Webscraper: set source when returning results, and trim result fields.

master
Jim 3 years ago
parent b10f9d3573
commit eff1f5bfdc
  1. 37
      src/main/java/moe/nekojimi/musicsearcher/providers/WebScraperSearcher.java

@ -35,15 +35,15 @@ public class WebScraperSearcher extends Searcher
{
protected String searchUrl;
protected URL rootURL;
protected String resultSelector;
protected String resultArtistSelector;
protected String resultTitleSelector;
protected String resultLinkSelector;
protected String resultAlbumArtistSelector;
protected Map<String,String> searchFields = new HashMap<>();
protected Parser<?,?> parser;
public WebScraperSearcher(String name)
@ -51,7 +51,7 @@ public class WebScraperSearcher extends Searcher
super(name);
}
public WebScraperSearcher(YamlMapping yaml) throws MalformedURLException
public WebScraperSearcher(YamlMapping yaml) throws MalformedURLException
{
super(yaml);
searchUrl = yaml.string("search_url");
@ -59,7 +59,7 @@ public class WebScraperSearcher extends Searcher
// rootURL = fillURL(Query.fullText(""));
// rootURL = new URL(rootURL.getProtocol(), rootURL.getHost(), "");
resultSelector = yaml.string("result_selector");
YamlMapping fields = yaml.yamlMapping("result_fields");
if (fields != null)
{
@ -68,11 +68,11 @@ public class WebScraperSearcher extends Searcher
resultLinkSelector = fields.string("link");
resultAlbumArtistSelector = fields.string("album_artist");
}
YamlMapping searchFieldMap = yaml.yamlMapping("search_fields");
for (YamlNode key: searchFieldMap.keys())
searchFields.put(key.asScalar().value(), searchFieldMap.string(key));
String formatName = yaml.string("format");
switch(formatName)
{
@ -86,21 +86,21 @@ public class WebScraperSearcher extends Searcher
}
@Override
protected List<Result> doSearch(Query query)
protected List<Result> doSearch(Query query)
{
try
try
{
URL url = fillURL(query);
InputStream input = url.openStream();
return processResults(parser, input);
} catch (IOException ex)
} catch (IOException ex)
{
Logger.getLogger(WebScraperSearcher.class.getName()).log(Level.SEVERE, null, ex);
return List.of();
}
}
}
protected URL fillURL(Query query) throws MalformedURLException
protected URL fillURL(Query query) throws MalformedURLException
{
// URL url = new URL(searchUrl.replaceAll("\\$QUERY", URLEncoder.encode(query.getTextSearch(), Charset.forName("utf-8"))));
try
@ -122,12 +122,12 @@ public class WebScraperSearcher extends Searcher
throw new MalformedURLException();
}
}
protected String transformSearchString(String search)
{
return search;
}
protected <E> List<Result> processResults(Parser<?,E> parser, InputStream input)
{
Collection<E> resultEles = parser.getResults(input, resultSelector);
@ -142,20 +142,21 @@ public class WebScraperSearcher extends Searcher
try
{
Result res = new Result();
res.setSource(name, abbr);
// Artist
if (resultArtistSelector != null)
res.setArtist(parser.getField(ele, resultArtistSelector));
res.setArtist(parser.getField(ele, resultArtistSelector).trim());
// Title
if (resultTitleSelector != null)
res.setTitle(parser.getField(ele, resultTitleSelector));
res.setTitle(parser.getField(ele, resultTitleSelector).trim());
// Link
if (resultLinkSelector != null)
res.setLink(parser.getURLField(ele, rootURL, resultLinkSelector));
// Artist + Album
if (resultAlbumArtistSelector != null)
res.setAlbumArtist(parser.getField(ele, resultAlbumArtistSelector));
res.setAlbumArtist(parser.getField(ele, resultAlbumArtistSelector).trim());
// Artist + Title
return res;

Loading…
Cancel
Save