From eff1f5bfdc143694006d3ad14ef11da7b78ec230 Mon Sep 17 00:00:00 2001 From: Jim Date: Fri, 1 Oct 2021 13:19:37 +0100 Subject: [PATCH] Webscraper: set source when returning results, and trim result fields. --- .../providers/WebScraperSearcher.java | 37 ++++++++++--------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/src/main/java/moe/nekojimi/musicsearcher/providers/WebScraperSearcher.java b/src/main/java/moe/nekojimi/musicsearcher/providers/WebScraperSearcher.java index 748d21e..0c6fd09 100644 --- a/src/main/java/moe/nekojimi/musicsearcher/providers/WebScraperSearcher.java +++ b/src/main/java/moe/nekojimi/musicsearcher/providers/WebScraperSearcher.java @@ -35,15 +35,15 @@ public class WebScraperSearcher extends Searcher { protected String searchUrl; protected URL rootURL; - + protected String resultSelector; protected String resultArtistSelector; protected String resultTitleSelector; protected String resultLinkSelector; protected String resultAlbumArtistSelector; - + protected Map searchFields = new HashMap<>(); - + protected Parser parser; public WebScraperSearcher(String name) @@ -51,7 +51,7 @@ public class WebScraperSearcher extends Searcher super(name); } - public WebScraperSearcher(YamlMapping yaml) throws MalformedURLException + public WebScraperSearcher(YamlMapping yaml) throws MalformedURLException { super(yaml); searchUrl = yaml.string("search_url"); @@ -59,7 +59,7 @@ public class WebScraperSearcher extends Searcher // rootURL = fillURL(Query.fullText("")); // rootURL = new URL(rootURL.getProtocol(), rootURL.getHost(), ""); resultSelector = yaml.string("result_selector"); - + YamlMapping fields = yaml.yamlMapping("result_fields"); if (fields != null) { @@ -68,11 +68,11 @@ public class WebScraperSearcher extends Searcher resultLinkSelector = fields.string("link"); resultAlbumArtistSelector = fields.string("album_artist"); } - + YamlMapping searchFieldMap = yaml.yamlMapping("search_fields"); for (YamlNode key: searchFieldMap.keys()) searchFields.put(key.asScalar().value(), searchFieldMap.string(key)); - + String formatName = yaml.string("format"); switch(formatName) { @@ -86,21 +86,21 @@ public class WebScraperSearcher extends Searcher } @Override - protected List doSearch(Query query) + protected List doSearch(Query query) { - try + try { URL url = fillURL(query); InputStream input = url.openStream(); return processResults(parser, input); - } catch (IOException ex) + } catch (IOException ex) { Logger.getLogger(WebScraperSearcher.class.getName()).log(Level.SEVERE, null, ex); return List.of(); - } + } } - protected URL fillURL(Query query) throws MalformedURLException + protected URL fillURL(Query query) throws MalformedURLException { // URL url = new URL(searchUrl.replaceAll("\\$QUERY", URLEncoder.encode(query.getTextSearch(), Charset.forName("utf-8")))); try @@ -122,12 +122,12 @@ public class WebScraperSearcher extends Searcher throw new MalformedURLException(); } } - + protected String transformSearchString(String search) { return search; } - + protected List processResults(Parser parser, InputStream input) { Collection resultEles = parser.getResults(input, resultSelector); @@ -142,20 +142,21 @@ public class WebScraperSearcher extends Searcher try { Result res = new Result(); + res.setSource(name, abbr); // Artist if (resultArtistSelector != null) - res.setArtist(parser.getField(ele, resultArtistSelector)); + res.setArtist(parser.getField(ele, resultArtistSelector).trim()); // Title if (resultTitleSelector != null) - res.setTitle(parser.getField(ele, resultTitleSelector)); + res.setTitle(parser.getField(ele, resultTitleSelector).trim()); // Link if (resultLinkSelector != null) res.setLink(parser.getURLField(ele, rootURL, resultLinkSelector)); // Artist + Album if (resultAlbumArtistSelector != null) - res.setAlbumArtist(parser.getField(ele, resultAlbumArtistSelector)); - + res.setAlbumArtist(parser.getField(ele, resultAlbumArtistSelector).trim()); + // Artist + Title return res;