|
|
@ -35,15 +35,15 @@ public class WebScraperSearcher extends Searcher |
|
|
|
{ |
|
|
|
{ |
|
|
|
protected String searchUrl; |
|
|
|
protected String searchUrl; |
|
|
|
protected URL rootURL; |
|
|
|
protected URL rootURL; |
|
|
|
|
|
|
|
|
|
|
|
protected String resultSelector; |
|
|
|
protected String resultSelector; |
|
|
|
protected String resultArtistSelector; |
|
|
|
protected String resultArtistSelector; |
|
|
|
protected String resultTitleSelector; |
|
|
|
protected String resultTitleSelector; |
|
|
|
protected String resultLinkSelector; |
|
|
|
protected String resultLinkSelector; |
|
|
|
protected String resultAlbumArtistSelector; |
|
|
|
protected String resultAlbumArtistSelector; |
|
|
|
|
|
|
|
|
|
|
|
protected Map<String,String> searchFields = new HashMap<>(); |
|
|
|
protected Map<String,String> searchFields = new HashMap<>(); |
|
|
|
|
|
|
|
|
|
|
|
protected Parser<?,?> parser; |
|
|
|
protected Parser<?,?> parser; |
|
|
|
|
|
|
|
|
|
|
|
public WebScraperSearcher(String name) |
|
|
|
public WebScraperSearcher(String name) |
|
|
@ -51,7 +51,7 @@ public class WebScraperSearcher extends Searcher |
|
|
|
super(name); |
|
|
|
super(name); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
public WebScraperSearcher(YamlMapping yaml) throws MalformedURLException |
|
|
|
public WebScraperSearcher(YamlMapping yaml) throws MalformedURLException |
|
|
|
{ |
|
|
|
{ |
|
|
|
super(yaml); |
|
|
|
super(yaml); |
|
|
|
searchUrl = yaml.string("search_url"); |
|
|
|
searchUrl = yaml.string("search_url"); |
|
|
@ -59,7 +59,7 @@ public class WebScraperSearcher extends Searcher |
|
|
|
// rootURL = fillURL(Query.fullText(""));
|
|
|
|
// rootURL = fillURL(Query.fullText(""));
|
|
|
|
// rootURL = new URL(rootURL.getProtocol(), rootURL.getHost(), "");
|
|
|
|
// rootURL = new URL(rootURL.getProtocol(), rootURL.getHost(), "");
|
|
|
|
resultSelector = yaml.string("result_selector"); |
|
|
|
resultSelector = yaml.string("result_selector"); |
|
|
|
|
|
|
|
|
|
|
|
YamlMapping fields = yaml.yamlMapping("result_fields"); |
|
|
|
YamlMapping fields = yaml.yamlMapping("result_fields"); |
|
|
|
if (fields != null) |
|
|
|
if (fields != null) |
|
|
|
{ |
|
|
|
{ |
|
|
@ -68,11 +68,11 @@ public class WebScraperSearcher extends Searcher |
|
|
|
resultLinkSelector = fields.string("link"); |
|
|
|
resultLinkSelector = fields.string("link"); |
|
|
|
resultAlbumArtistSelector = fields.string("album_artist"); |
|
|
|
resultAlbumArtistSelector = fields.string("album_artist"); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
YamlMapping searchFieldMap = yaml.yamlMapping("search_fields"); |
|
|
|
YamlMapping searchFieldMap = yaml.yamlMapping("search_fields"); |
|
|
|
for (YamlNode key: searchFieldMap.keys()) |
|
|
|
for (YamlNode key: searchFieldMap.keys()) |
|
|
|
searchFields.put(key.asScalar().value(), searchFieldMap.string(key)); |
|
|
|
searchFields.put(key.asScalar().value(), searchFieldMap.string(key)); |
|
|
|
|
|
|
|
|
|
|
|
String formatName = yaml.string("format"); |
|
|
|
String formatName = yaml.string("format"); |
|
|
|
switch(formatName) |
|
|
|
switch(formatName) |
|
|
|
{ |
|
|
|
{ |
|
|
@ -86,21 +86,21 @@ public class WebScraperSearcher extends Searcher |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
@Override |
|
|
|
@Override |
|
|
|
protected List<Result> doSearch(Query query) |
|
|
|
protected List<Result> doSearch(Query query) |
|
|
|
{ |
|
|
|
{ |
|
|
|
try |
|
|
|
try |
|
|
|
{ |
|
|
|
{ |
|
|
|
URL url = fillURL(query); |
|
|
|
URL url = fillURL(query); |
|
|
|
InputStream input = url.openStream(); |
|
|
|
InputStream input = url.openStream(); |
|
|
|
return processResults(parser, input); |
|
|
|
return processResults(parser, input); |
|
|
|
} catch (IOException ex) |
|
|
|
} catch (IOException ex) |
|
|
|
{ |
|
|
|
{ |
|
|
|
Logger.getLogger(WebScraperSearcher.class.getName()).log(Level.SEVERE, null, ex); |
|
|
|
Logger.getLogger(WebScraperSearcher.class.getName()).log(Level.SEVERE, null, ex); |
|
|
|
return List.of(); |
|
|
|
return List.of(); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
protected URL fillURL(Query query) throws MalformedURLException |
|
|
|
protected URL fillURL(Query query) throws MalformedURLException |
|
|
|
{ |
|
|
|
{ |
|
|
|
// URL url = new URL(searchUrl.replaceAll("\\$QUERY", URLEncoder.encode(query.getTextSearch(), Charset.forName("utf-8"))));
|
|
|
|
// URL url = new URL(searchUrl.replaceAll("\\$QUERY", URLEncoder.encode(query.getTextSearch(), Charset.forName("utf-8"))));
|
|
|
|
try |
|
|
|
try |
|
|
@ -122,12 +122,12 @@ public class WebScraperSearcher extends Searcher |
|
|
|
throw new MalformedURLException(); |
|
|
|
throw new MalformedURLException(); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
protected String transformSearchString(String search) |
|
|
|
protected String transformSearchString(String search) |
|
|
|
{ |
|
|
|
{ |
|
|
|
return search; |
|
|
|
return search; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
protected <E> List<Result> processResults(Parser<?,E> parser, InputStream input) |
|
|
|
protected <E> List<Result> processResults(Parser<?,E> parser, InputStream input) |
|
|
|
{ |
|
|
|
{ |
|
|
|
Collection<E> resultEles = parser.getResults(input, resultSelector); |
|
|
|
Collection<E> resultEles = parser.getResults(input, resultSelector); |
|
|
@ -142,20 +142,21 @@ public class WebScraperSearcher extends Searcher |
|
|
|
try |
|
|
|
try |
|
|
|
{ |
|
|
|
{ |
|
|
|
Result res = new Result(); |
|
|
|
Result res = new Result(); |
|
|
|
|
|
|
|
res.setSource(name, abbr); |
|
|
|
// Artist
|
|
|
|
// Artist
|
|
|
|
if (resultArtistSelector != null) |
|
|
|
if (resultArtistSelector != null) |
|
|
|
res.setArtist(parser.getField(ele, resultArtistSelector)); |
|
|
|
res.setArtist(parser.getField(ele, resultArtistSelector).trim()); |
|
|
|
// Title
|
|
|
|
// Title
|
|
|
|
if (resultTitleSelector != null) |
|
|
|
if (resultTitleSelector != null) |
|
|
|
res.setTitle(parser.getField(ele, resultTitleSelector)); |
|
|
|
res.setTitle(parser.getField(ele, resultTitleSelector).trim()); |
|
|
|
// Link
|
|
|
|
// Link
|
|
|
|
if (resultLinkSelector != null) |
|
|
|
if (resultLinkSelector != null) |
|
|
|
res.setLink(parser.getURLField(ele, rootURL, resultLinkSelector)); |
|
|
|
res.setLink(parser.getURLField(ele, rootURL, resultLinkSelector)); |
|
|
|
|
|
|
|
|
|
|
|
// Artist + Album
|
|
|
|
// Artist + Album
|
|
|
|
if (resultAlbumArtistSelector != null) |
|
|
|
if (resultAlbumArtistSelector != null) |
|
|
|
res.setAlbumArtist(parser.getField(ele, resultAlbumArtistSelector)); |
|
|
|
res.setAlbumArtist(parser.getField(ele, resultAlbumArtistSelector).trim()); |
|
|
|
|
|
|
|
|
|
|
|
// Artist + Title
|
|
|
|
// Artist + Title
|
|
|
|
|
|
|
|
|
|
|
|
return res; |
|
|
|
return res; |
|
|
|