generated from Nekojimi/JavaMavenTemplate
Webscraper: set source when returning results, and trim result fields.
This commit is contained in:
parent
b10f9d3573
commit
eff1f5bfdc
|
@ -35,15 +35,15 @@ public class WebScraperSearcher extends Searcher
|
||||||
{
|
{
|
||||||
protected String searchUrl;
|
protected String searchUrl;
|
||||||
protected URL rootURL;
|
protected URL rootURL;
|
||||||
|
|
||||||
protected String resultSelector;
|
protected String resultSelector;
|
||||||
protected String resultArtistSelector;
|
protected String resultArtistSelector;
|
||||||
protected String resultTitleSelector;
|
protected String resultTitleSelector;
|
||||||
protected String resultLinkSelector;
|
protected String resultLinkSelector;
|
||||||
protected String resultAlbumArtistSelector;
|
protected String resultAlbumArtistSelector;
|
||||||
|
|
||||||
protected Map<String,String> searchFields = new HashMap<>();
|
protected Map<String,String> searchFields = new HashMap<>();
|
||||||
|
|
||||||
protected Parser<?,?> parser;
|
protected Parser<?,?> parser;
|
||||||
|
|
||||||
public WebScraperSearcher(String name)
|
public WebScraperSearcher(String name)
|
||||||
|
@ -51,7 +51,7 @@ public class WebScraperSearcher extends Searcher
|
||||||
super(name);
|
super(name);
|
||||||
}
|
}
|
||||||
|
|
||||||
public WebScraperSearcher(YamlMapping yaml) throws MalformedURLException
|
public WebScraperSearcher(YamlMapping yaml) throws MalformedURLException
|
||||||
{
|
{
|
||||||
super(yaml);
|
super(yaml);
|
||||||
searchUrl = yaml.string("search_url");
|
searchUrl = yaml.string("search_url");
|
||||||
|
@ -59,7 +59,7 @@ public class WebScraperSearcher extends Searcher
|
||||||
// rootURL = fillURL(Query.fullText(""));
|
// rootURL = fillURL(Query.fullText(""));
|
||||||
// rootURL = new URL(rootURL.getProtocol(), rootURL.getHost(), "");
|
// rootURL = new URL(rootURL.getProtocol(), rootURL.getHost(), "");
|
||||||
resultSelector = yaml.string("result_selector");
|
resultSelector = yaml.string("result_selector");
|
||||||
|
|
||||||
YamlMapping fields = yaml.yamlMapping("result_fields");
|
YamlMapping fields = yaml.yamlMapping("result_fields");
|
||||||
if (fields != null)
|
if (fields != null)
|
||||||
{
|
{
|
||||||
|
@ -68,11 +68,11 @@ public class WebScraperSearcher extends Searcher
|
||||||
resultLinkSelector = fields.string("link");
|
resultLinkSelector = fields.string("link");
|
||||||
resultAlbumArtistSelector = fields.string("album_artist");
|
resultAlbumArtistSelector = fields.string("album_artist");
|
||||||
}
|
}
|
||||||
|
|
||||||
YamlMapping searchFieldMap = yaml.yamlMapping("search_fields");
|
YamlMapping searchFieldMap = yaml.yamlMapping("search_fields");
|
||||||
for (YamlNode key: searchFieldMap.keys())
|
for (YamlNode key: searchFieldMap.keys())
|
||||||
searchFields.put(key.asScalar().value(), searchFieldMap.string(key));
|
searchFields.put(key.asScalar().value(), searchFieldMap.string(key));
|
||||||
|
|
||||||
String formatName = yaml.string("format");
|
String formatName = yaml.string("format");
|
||||||
switch(formatName)
|
switch(formatName)
|
||||||
{
|
{
|
||||||
|
@ -86,21 +86,21 @@ public class WebScraperSearcher extends Searcher
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected List<Result> doSearch(Query query)
|
protected List<Result> doSearch(Query query)
|
||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
URL url = fillURL(query);
|
URL url = fillURL(query);
|
||||||
InputStream input = url.openStream();
|
InputStream input = url.openStream();
|
||||||
return processResults(parser, input);
|
return processResults(parser, input);
|
||||||
} catch (IOException ex)
|
} catch (IOException ex)
|
||||||
{
|
{
|
||||||
Logger.getLogger(WebScraperSearcher.class.getName()).log(Level.SEVERE, null, ex);
|
Logger.getLogger(WebScraperSearcher.class.getName()).log(Level.SEVERE, null, ex);
|
||||||
return List.of();
|
return List.of();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected URL fillURL(Query query) throws MalformedURLException
|
protected URL fillURL(Query query) throws MalformedURLException
|
||||||
{
|
{
|
||||||
// URL url = new URL(searchUrl.replaceAll("\\$QUERY", URLEncoder.encode(query.getTextSearch(), Charset.forName("utf-8"))));
|
// URL url = new URL(searchUrl.replaceAll("\\$QUERY", URLEncoder.encode(query.getTextSearch(), Charset.forName("utf-8"))));
|
||||||
try
|
try
|
||||||
|
@ -122,12 +122,12 @@ public class WebScraperSearcher extends Searcher
|
||||||
throw new MalformedURLException();
|
throw new MalformedURLException();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected String transformSearchString(String search)
|
protected String transformSearchString(String search)
|
||||||
{
|
{
|
||||||
return search;
|
return search;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected <E> List<Result> processResults(Parser<?,E> parser, InputStream input)
|
protected <E> List<Result> processResults(Parser<?,E> parser, InputStream input)
|
||||||
{
|
{
|
||||||
Collection<E> resultEles = parser.getResults(input, resultSelector);
|
Collection<E> resultEles = parser.getResults(input, resultSelector);
|
||||||
|
@ -142,20 +142,21 @@ public class WebScraperSearcher extends Searcher
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
Result res = new Result();
|
Result res = new Result();
|
||||||
|
res.setSource(name, abbr);
|
||||||
// Artist
|
// Artist
|
||||||
if (resultArtistSelector != null)
|
if (resultArtistSelector != null)
|
||||||
res.setArtist(parser.getField(ele, resultArtistSelector));
|
res.setArtist(parser.getField(ele, resultArtistSelector).trim());
|
||||||
// Title
|
// Title
|
||||||
if (resultTitleSelector != null)
|
if (resultTitleSelector != null)
|
||||||
res.setTitle(parser.getField(ele, resultTitleSelector));
|
res.setTitle(parser.getField(ele, resultTitleSelector).trim());
|
||||||
// Link
|
// Link
|
||||||
if (resultLinkSelector != null)
|
if (resultLinkSelector != null)
|
||||||
res.setLink(parser.getURLField(ele, rootURL, resultLinkSelector));
|
res.setLink(parser.getURLField(ele, rootURL, resultLinkSelector));
|
||||||
|
|
||||||
// Artist + Album
|
// Artist + Album
|
||||||
if (resultAlbumArtistSelector != null)
|
if (resultAlbumArtistSelector != null)
|
||||||
res.setAlbumArtist(parser.getField(ele, resultAlbumArtistSelector));
|
res.setAlbumArtist(parser.getField(ele, resultAlbumArtistSelector).trim());
|
||||||
|
|
||||||
// Artist + Title
|
// Artist + Title
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
|
|
Loading…
Reference in New Issue