You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

169 lines
5.5 KiB

/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package moe.nekojimi.musicsearcher.providers;
import com.amihaiemil.eoyaml.YamlMapping;
import com.amihaiemil.eoyaml.YamlNode;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.stream.Collectors;
import moe.nekojimi.musicsearcher.Query;
import moe.nekojimi.musicsearcher.Result;
import moe.nekojimi.musicsearcher.SecretStore;
import moe.nekojimi.musicsearcher.parsers.HTMLParser;
import moe.nekojimi.musicsearcher.parsers.JSONParser;
import moe.nekojimi.musicsearcher.parsers.Parser;
import org.apache.http.client.utils.URIBuilder;
/**
*
* @author jim
*/
public class WebScraperSearcher extends Searcher
{
protected String searchUrl;
protected URL rootURL;
protected String resultSelector;
protected String resultArtistSelector;
protected String resultTitleSelector;
protected String resultLinkSelector;
protected String resultAlbumArtistSelector;
protected Map<String,String> searchFields = new HashMap<>();
protected Parser<?,?> parser;
public WebScraperSearcher(String name)
{
super(name);
}
public WebScraperSearcher(YamlMapping yaml) throws MalformedURLException
{
super(yaml);
searchUrl = yaml.string("search_url");
rootURL = new URL(searchUrl);
// rootURL = fillURL(Query.fullText(""));
// rootURL = new URL(rootURL.getProtocol(), rootURL.getHost(), "");
resultSelector = yaml.string("result_selector");
YamlMapping fields = yaml.yamlMapping("result_fields");
if (fields != null)
{
resultArtistSelector = fields.string("artist");
resultTitleSelector = fields.string("title");
resultLinkSelector = fields.string("link");
resultAlbumArtistSelector = fields.string("album_artist");
}
YamlMapping searchFieldMap = yaml.yamlMapping("search_fields");
for (YamlNode key: searchFieldMap.keys())
searchFields.put(key.asScalar().value(), searchFieldMap.string(key));
String formatName = yaml.string("format");
switch(formatName)
{
case "html":
parser = new HTMLParser(); break;
case "json":
parser = new JSONParser(); break;
default:
throw new IllegalArgumentException("Format " + formatName + " is unknown.");
}
}
@Override
protected List<Result> doSearch(Query query)
{
try
{
URL url = fillURL(query);
InputStream input = url.openStream();
return processResults(parser, input);
} catch (IOException ex)
{
Logger.getLogger(WebScraperSearcher.class.getName()).log(Level.SEVERE, null, ex);
return List.of();
}
}
protected URL fillURL(Query query) throws MalformedURLException
{
// URL url = new URL(searchUrl.replaceAll("\\$QUERY", URLEncoder.encode(query.getTextSearch(), Charset.forName("utf-8"))));
try
{
URIBuilder builder = new URIBuilder(rootURL.toURI());
if (query.getTextSearch() != null)
{
if (searchFields.containsKey("query"))
builder.addParameter(searchFields.get("query"), transformSearchString(query.getTextSearch()));
}
if (searchFields.containsKey("secret"))
{
builder.addParameter(searchFields.get("secret"), SecretStore.get().getSecret(name));
}
return builder.build().toURL();
}
catch (URISyntaxException ex)
{
throw new MalformedURLException();
}
}
protected String transformSearchString(String search)
{
return search;
}
protected <E> List<Result> processResults(Parser<?,E> parser, InputStream input)
{
Collection<E> resultEles = parser.getResults(input, resultSelector);
return resultEles.stream()
.map((ele)->parseResultElement(parser, ele))
.filter((res)->res!=null)
.collect(Collectors.toList());
}
protected <E> Result parseResultElement(Parser<?,E> parser, E ele)
{
try
{
Result res = new Result();
// Artist
if (resultArtistSelector != null)
res.setArtist(parser.getField(ele, resultArtistSelector));
// Title
if (resultTitleSelector != null)
res.setTitle(parser.getField(ele, resultTitleSelector));
// Link
if (resultLinkSelector != null)
res.setLink(parser.getURLField(ele, rootURL, resultLinkSelector));
// Artist + Album
if (resultAlbumArtistSelector != null)
res.setAlbumArtist(parser.getField(ele, resultAlbumArtistSelector));
// Artist + Title
return res;
}
catch (Exception ex)
{
Logger.getLogger(WebScraperSearcher.class.getName()).log(Level.SEVERE, null, ex);
return null;
}
}
}