Webscraper: set source when returning results, and trim result fields.

master
Jim 3 years ago
parent b10f9d3573
commit eff1f5bfdc
  1. 37
      src/main/java/moe/nekojimi/musicsearcher/providers/WebScraperSearcher.java

@ -35,15 +35,15 @@ public class WebScraperSearcher extends Searcher
{ {
protected String searchUrl; protected String searchUrl;
protected URL rootURL; protected URL rootURL;
protected String resultSelector; protected String resultSelector;
protected String resultArtistSelector; protected String resultArtistSelector;
protected String resultTitleSelector; protected String resultTitleSelector;
protected String resultLinkSelector; protected String resultLinkSelector;
protected String resultAlbumArtistSelector; protected String resultAlbumArtistSelector;
protected Map<String,String> searchFields = new HashMap<>(); protected Map<String,String> searchFields = new HashMap<>();
protected Parser<?,?> parser; protected Parser<?,?> parser;
public WebScraperSearcher(String name) public WebScraperSearcher(String name)
@ -51,7 +51,7 @@ public class WebScraperSearcher extends Searcher
super(name); super(name);
} }
public WebScraperSearcher(YamlMapping yaml) throws MalformedURLException public WebScraperSearcher(YamlMapping yaml) throws MalformedURLException
{ {
super(yaml); super(yaml);
searchUrl = yaml.string("search_url"); searchUrl = yaml.string("search_url");
@ -59,7 +59,7 @@ public class WebScraperSearcher extends Searcher
// rootURL = fillURL(Query.fullText("")); // rootURL = fillURL(Query.fullText(""));
// rootURL = new URL(rootURL.getProtocol(), rootURL.getHost(), ""); // rootURL = new URL(rootURL.getProtocol(), rootURL.getHost(), "");
resultSelector = yaml.string("result_selector"); resultSelector = yaml.string("result_selector");
YamlMapping fields = yaml.yamlMapping("result_fields"); YamlMapping fields = yaml.yamlMapping("result_fields");
if (fields != null) if (fields != null)
{ {
@ -68,11 +68,11 @@ public class WebScraperSearcher extends Searcher
resultLinkSelector = fields.string("link"); resultLinkSelector = fields.string("link");
resultAlbumArtistSelector = fields.string("album_artist"); resultAlbumArtistSelector = fields.string("album_artist");
} }
YamlMapping searchFieldMap = yaml.yamlMapping("search_fields"); YamlMapping searchFieldMap = yaml.yamlMapping("search_fields");
for (YamlNode key: searchFieldMap.keys()) for (YamlNode key: searchFieldMap.keys())
searchFields.put(key.asScalar().value(), searchFieldMap.string(key)); searchFields.put(key.asScalar().value(), searchFieldMap.string(key));
String formatName = yaml.string("format"); String formatName = yaml.string("format");
switch(formatName) switch(formatName)
{ {
@ -86,21 +86,21 @@ public class WebScraperSearcher extends Searcher
} }
@Override @Override
protected List<Result> doSearch(Query query) protected List<Result> doSearch(Query query)
{ {
try try
{ {
URL url = fillURL(query); URL url = fillURL(query);
InputStream input = url.openStream(); InputStream input = url.openStream();
return processResults(parser, input); return processResults(parser, input);
} catch (IOException ex) } catch (IOException ex)
{ {
Logger.getLogger(WebScraperSearcher.class.getName()).log(Level.SEVERE, null, ex); Logger.getLogger(WebScraperSearcher.class.getName()).log(Level.SEVERE, null, ex);
return List.of(); return List.of();
} }
} }
protected URL fillURL(Query query) throws MalformedURLException protected URL fillURL(Query query) throws MalformedURLException
{ {
// URL url = new URL(searchUrl.replaceAll("\\$QUERY", URLEncoder.encode(query.getTextSearch(), Charset.forName("utf-8")))); // URL url = new URL(searchUrl.replaceAll("\\$QUERY", URLEncoder.encode(query.getTextSearch(), Charset.forName("utf-8"))));
try try
@ -122,12 +122,12 @@ public class WebScraperSearcher extends Searcher
throw new MalformedURLException(); throw new MalformedURLException();
} }
} }
protected String transformSearchString(String search) protected String transformSearchString(String search)
{ {
return search; return search;
} }
protected <E> List<Result> processResults(Parser<?,E> parser, InputStream input) protected <E> List<Result> processResults(Parser<?,E> parser, InputStream input)
{ {
Collection<E> resultEles = parser.getResults(input, resultSelector); Collection<E> resultEles = parser.getResults(input, resultSelector);
@ -142,20 +142,21 @@ public class WebScraperSearcher extends Searcher
try try
{ {
Result res = new Result(); Result res = new Result();
res.setSource(name, abbr);
// Artist // Artist
if (resultArtistSelector != null) if (resultArtistSelector != null)
res.setArtist(parser.getField(ele, resultArtistSelector)); res.setArtist(parser.getField(ele, resultArtistSelector).trim());
// Title // Title
if (resultTitleSelector != null) if (resultTitleSelector != null)
res.setTitle(parser.getField(ele, resultTitleSelector)); res.setTitle(parser.getField(ele, resultTitleSelector).trim());
// Link // Link
if (resultLinkSelector != null) if (resultLinkSelector != null)
res.setLink(parser.getURLField(ele, rootURL, resultLinkSelector)); res.setLink(parser.getURLField(ele, rootURL, resultLinkSelector));
// Artist + Album // Artist + Album
if (resultAlbumArtistSelector != null) if (resultAlbumArtistSelector != null)
res.setAlbumArtist(parser.getField(ele, resultAlbumArtistSelector)); res.setAlbumArtist(parser.getField(ele, resultAlbumArtistSelector).trim());
// Artist + Title // Artist + Title
return res; return res;

Loading…
Cancel
Save