generated from Nekojimi/JavaMavenTemplate
Made WebScraperSearcher able to parse different result formats (HTML/JSON) and supply query parameters dynamically.
parent
039a91ed40
commit
b8336c26fb
@ -0,0 +1,70 @@ |
||||
/* |
||||
* To change this license header, choose License Headers in Project Properties. |
||||
* To change this template file, choose Tools | Templates |
||||
* and open the template in the editor. |
||||
*/ |
||||
package moe.nekojimi.musicsearcher.parsers; |
||||
|
||||
import java.io.IOException; |
||||
import java.io.InputStream; |
||||
import java.net.MalformedURLException; |
||||
import java.net.URL; |
||||
import java.util.Collection; |
||||
import java.util.logging.Level; |
||||
import java.util.logging.Logger; |
||||
import org.jsoup.Jsoup; |
||||
import org.jsoup.nodes.Document; |
||||
import org.jsoup.nodes.Element; |
||||
|
||||
|
||||
public class HTMLParser extends Parser<Document, Element> |
||||
{ |
||||
@Override |
||||
public Document getDocument(InputStream input) |
||||
{ |
||||
try |
||||
{ |
||||
Document doc = Jsoup.parse(new String(input.readAllBytes())); |
||||
// System.out.println(doc.outerHtml());
|
||||
return doc; |
||||
} |
||||
catch (IOException ex) |
||||
{ |
||||
Logger.getLogger(HTMLParser.class.getName()).log(Level.SEVERE, null, ex); |
||||
return null; |
||||
} |
||||
} |
||||
|
||||
@Override |
||||
public Collection<Element> getResults(Document document, String selector) |
||||
{ |
||||
return document.select(selector); |
||||
} |
||||
|
||||
@Override |
||||
public String getField(Element object, String selector) |
||||
{ |
||||
Element ele = object.selectFirst(selector); |
||||
return ele.text(); |
||||
} |
||||
|
||||
@Override |
||||
public URL getURLField(Element object, URL baseURL, String selector) throws MalformedURLException |
||||
{ |
||||
Element ele = object.selectFirst(selector); |
||||
|
||||
String link; |
||||
if (ele.hasAttr("href")) |
||||
link = ele.attr("href"); |
||||
else |
||||
link = ele.text(); |
||||
|
||||
URL url; |
||||
if(!link.startsWith("http")) |
||||
url = new URL(baseURL, link); |
||||
else |
||||
url = new URL(link); |
||||
|
||||
return url; |
||||
} |
||||
} |
@ -0,0 +1,72 @@ |
||||
/* |
||||
* To change this license header, choose License Headers in Project Properties. |
||||
* To change this template file, choose Tools | Templates |
||||
* and open the template in the editor. |
||||
*/ |
||||
package moe.nekojimi.musicsearcher.parsers; |
||||
|
||||
import java.io.InputStream; |
||||
import java.net.MalformedURLException; |
||||
import java.net.URL; |
||||
import java.util.Collection; |
||||
import java.util.List; |
||||
import javax.json.Json; |
||||
import javax.json.JsonObject; |
||||
import javax.json.JsonReader; |
||||
import javax.json.JsonStructure; |
||||
import javax.json.JsonValue; |
||||
|
||||
/** |
||||
* |
||||
* @author jim |
||||
*/ |
||||
public class JSONParser extends Parser<JsonStructure, JsonObject> |
||||
{ |
||||
|
||||
@Override |
||||
public JsonStructure getDocument(InputStream input) |
||||
{ |
||||
JsonReader reader = Json.createReader(input); |
||||
return reader.read(); |
||||
} |
||||
|
||||
@Override |
||||
public Collection<JsonObject> getResults(JsonStructure document, String selector) |
||||
{ |
||||
JsonValue value = document.getValue(selector); |
||||
if (value.getValueType() == JsonValue.ValueType.ARRAY) |
||||
{ |
||||
return value.asJsonArray().getValuesAs(JsonObject.class); |
||||
} |
||||
else if (value.getValueType() == JsonValue.ValueType.OBJECT) |
||||
{ |
||||
return List.of(value.asJsonObject()); |
||||
} |
||||
return List.of(); |
||||
} |
||||
|
||||
@Override |
||||
public String getField(JsonObject object, String selector) |
||||
{ |
||||
return object.getValue(selector).toString(); |
||||
} |
||||
|
||||
@Override |
||||
public URL getURLField(JsonObject object, URL baseURL, String selector) throws MalformedURLException |
||||
{ |
||||
String link = getField(object, selector); |
||||
|
||||
URL url; |
||||
if(!link.startsWith("http")) |
||||
url = new URL(baseURL, link); |
||||
else |
||||
url = new URL(link); |
||||
|
||||
return url; |
||||
} |
||||
|
||||
private JsonValue navigate(JsonObject from, String selector) |
||||
{ |
||||
return from.getValue(selector); |
||||
} |
||||
} |
@ -0,0 +1,30 @@ |
||||
/* |
||||
* To change this license header, choose License Headers in Project Properties. |
||||
* To change this template file, choose Tools | Templates |
||||
* and open the template in the editor. |
||||
*/ |
||||
package moe.nekojimi.musicsearcher.parsers; |
||||
|
||||
import java.io.InputStream; |
||||
import java.net.MalformedURLException; |
||||
import java.net.URL; |
||||
import java.util.Collection; |
||||
|
||||
/** |
||||
* |
||||
* @author jim |
||||
*/ |
||||
public abstract class Parser<D,O> |
||||
{ |
||||
public abstract D getDocument(InputStream input); |
||||
|
||||
public abstract Collection<O> getResults(D document, String selector); |
||||
public Collection<O> getResults(InputStream input, String selector) |
||||
{ |
||||
return getResults(getDocument(input), selector); |
||||
} |
||||
|
||||
public abstract String getField(O object, String selector); |
||||
public abstract URL getURLField(O object, URL baseURL, String selector) throws MalformedURLException; |
||||
|
||||
} |
Loading…
Reference in new issue