/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package moe.nekojimi.musicsearcher.providers; import com.amihaiemil.eoyaml.YamlMapping; import com.amihaiemil.eoyaml.YamlNode; import java.io.IOException; import java.io.InputStream; import java.net.MalformedURLException; import java.net.URISyntaxException; import java.net.URL; import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.logging.Level; import java.util.logging.Logger; import java.util.stream.Collectors; import moe.nekojimi.musicsearcher.Query; import moe.nekojimi.musicsearcher.Result; import moe.nekojimi.musicsearcher.SecretStore; import moe.nekojimi.musicsearcher.parsers.HTMLParser; import moe.nekojimi.musicsearcher.parsers.JSONParser; import moe.nekojimi.musicsearcher.parsers.Parser; import org.apache.http.client.utils.URIBuilder; /** * * @author jim */ public class WebScraperSearcher extends Searcher { protected String searchUrl; protected URL rootURL; protected String resultSelector; protected String resultArtistSelector; protected String resultTitleSelector; protected String resultLinkSelector; protected String resultAlbumArtistSelector; protected Map searchFields = new HashMap<>(); protected Parser parser; public WebScraperSearcher(String name) { super(name); } public WebScraperSearcher(YamlMapping yaml) throws MalformedURLException { super(yaml); searchUrl = yaml.string("search_url"); rootURL = new URL(searchUrl); // rootURL = fillURL(Query.fullText("")); // rootURL = new URL(rootURL.getProtocol(), rootURL.getHost(), ""); resultSelector = yaml.string("result_selector"); YamlMapping fields = yaml.yamlMapping("result_fields"); if (fields != null) { resultArtistSelector = fields.string("artist"); resultTitleSelector = fields.string("title"); resultLinkSelector = fields.string("link"); resultAlbumArtistSelector = fields.string("album_artist"); } YamlMapping searchFieldMap = yaml.yamlMapping("search_fields"); for (YamlNode key: searchFieldMap.keys()) searchFields.put(key.asScalar().value(), searchFieldMap.string(key)); String formatName = yaml.string("format"); switch(formatName) { case "html": parser = new HTMLParser(); break; case "json": parser = new JSONParser(); break; default: throw new IllegalArgumentException("Format " + formatName + " is unknown."); } } @Override protected List doSearch(Query query) { try { URL url = fillURL(query); InputStream input = url.openStream(); return processResults(parser, input); } catch (IOException ex) { Logger.getLogger(WebScraperSearcher.class.getName()).log(Level.SEVERE, null, ex); return List.of(); } } protected URL fillURL(Query query) throws MalformedURLException { // URL url = new URL(searchUrl.replaceAll("\\$QUERY", URLEncoder.encode(query.getTextSearch(), Charset.forName("utf-8")))); try { URIBuilder builder = new URIBuilder(rootURL.toURI()); if (query.getTextSearch() != null) { if (searchFields.containsKey("query")) builder.addParameter(searchFields.get("query"), transformSearchString(query.getTextSearch())); } if (searchFields.containsKey("secret")) { builder.addParameter(searchFields.get("secret"), SecretStore.get().getSecret(name)); } return builder.build().toURL(); } catch (URISyntaxException ex) { throw new MalformedURLException(); } } protected String transformSearchString(String search) { return search; } protected List processResults(Parser parser, InputStream input) { Collection resultEles = parser.getResults(input, resultSelector); return resultEles.stream() .map((ele)->parseResultElement(parser, ele)) .filter((res)->res!=null) .collect(Collectors.toList()); } protected Result parseResultElement(Parser parser, E ele) { try { Result res = new Result(); // Artist if (resultArtistSelector != null) res.setArtist(parser.getField(ele, resultArtistSelector)); // Title if (resultTitleSelector != null) res.setTitle(parser.getField(ele, resultTitleSelector)); // Link if (resultLinkSelector != null) res.setLink(parser.getURLField(ele, rootURL, resultLinkSelector)); // Artist + Album if (resultAlbumArtistSelector != null) res.setAlbumArtist(parser.getField(ele, resultAlbumArtistSelector)); // Artist + Title return res; } catch (Exception ex) { Logger.getLogger(WebScraperSearcher.class.getName()).log(Level.SEVERE, null, ex); return null; } } }