generated from Nekojimi/JavaMavenTemplate
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
169 lines
5.5 KiB
169 lines
5.5 KiB
/*
|
|
* To change this license header, choose License Headers in Project Properties.
|
|
* To change this template file, choose Tools | Templates
|
|
* and open the template in the editor.
|
|
*/
|
|
package moe.nekojimi.musicsearcher.providers;
|
|
|
|
import com.amihaiemil.eoyaml.YamlMapping;
|
|
import com.amihaiemil.eoyaml.YamlNode;
|
|
import java.io.IOException;
|
|
import java.io.InputStream;
|
|
import java.net.MalformedURLException;
|
|
import java.net.URISyntaxException;
|
|
import java.net.URL;
|
|
import java.util.Collection;
|
|
import java.util.HashMap;
|
|
import java.util.List;
|
|
import java.util.Map;
|
|
import java.util.logging.Level;
|
|
import java.util.logging.Logger;
|
|
import java.util.stream.Collectors;
|
|
import moe.nekojimi.musicsearcher.Query;
|
|
import moe.nekojimi.musicsearcher.Result;
|
|
import moe.nekojimi.musicsearcher.SecretStore;
|
|
import moe.nekojimi.musicsearcher.parsers.HTMLParser;
|
|
import moe.nekojimi.musicsearcher.parsers.JSONParser;
|
|
import moe.nekojimi.musicsearcher.parsers.Parser;
|
|
import org.apache.http.client.utils.URIBuilder;
|
|
|
|
/**
|
|
*
|
|
* @author jim
|
|
*/
|
|
public class WebScraperSearcher extends Searcher
|
|
{
|
|
protected String searchUrl;
|
|
protected URL rootURL;
|
|
|
|
protected String resultSelector;
|
|
protected String resultArtistSelector;
|
|
protected String resultTitleSelector;
|
|
protected String resultLinkSelector;
|
|
protected String resultAlbumArtistSelector;
|
|
|
|
protected Map<String,String> searchFields = new HashMap<>();
|
|
|
|
protected Parser<?,?> parser;
|
|
|
|
public WebScraperSearcher(String name)
|
|
{
|
|
super(name);
|
|
}
|
|
|
|
public WebScraperSearcher(YamlMapping yaml) throws MalformedURLException
|
|
{
|
|
super(yaml);
|
|
searchUrl = yaml.string("search_url");
|
|
rootURL = new URL(searchUrl);
|
|
// rootURL = fillURL(Query.fullText(""));
|
|
// rootURL = new URL(rootURL.getProtocol(), rootURL.getHost(), "");
|
|
resultSelector = yaml.string("result_selector");
|
|
|
|
YamlMapping fields = yaml.yamlMapping("result_fields");
|
|
if (fields != null)
|
|
{
|
|
resultArtistSelector = fields.string("artist");
|
|
resultTitleSelector = fields.string("title");
|
|
resultLinkSelector = fields.string("link");
|
|
resultAlbumArtistSelector = fields.string("album_artist");
|
|
}
|
|
|
|
YamlMapping searchFieldMap = yaml.yamlMapping("search_fields");
|
|
for (YamlNode key: searchFieldMap.keys())
|
|
searchFields.put(key.asScalar().value(), searchFieldMap.string(key));
|
|
|
|
String formatName = yaml.string("format");
|
|
switch(formatName)
|
|
{
|
|
case "html":
|
|
parser = new HTMLParser(); break;
|
|
case "json":
|
|
parser = new JSONParser(); break;
|
|
default:
|
|
throw new IllegalArgumentException("Format " + formatName + " is unknown.");
|
|
}
|
|
}
|
|
|
|
@Override
|
|
protected List<Result> doSearch(Query query)
|
|
{
|
|
try
|
|
{
|
|
URL url = fillURL(query);
|
|
InputStream input = url.openStream();
|
|
return processResults(parser, input);
|
|
} catch (IOException ex)
|
|
{
|
|
Logger.getLogger(WebScraperSearcher.class.getName()).log(Level.SEVERE, null, ex);
|
|
return List.of();
|
|
}
|
|
}
|
|
|
|
protected URL fillURL(Query query) throws MalformedURLException
|
|
{
|
|
// URL url = new URL(searchUrl.replaceAll("\\$QUERY", URLEncoder.encode(query.getTextSearch(), Charset.forName("utf-8"))));
|
|
try
|
|
{
|
|
URIBuilder builder = new URIBuilder(rootURL.toURI());
|
|
if (query.getTextSearch() != null)
|
|
{
|
|
if (searchFields.containsKey("query"))
|
|
builder.addParameter(searchFields.get("query"), transformSearchString(query.getTextSearch()));
|
|
}
|
|
if (searchFields.containsKey("secret"))
|
|
{
|
|
builder.addParameter(searchFields.get("secret"), SecretStore.get().getSecret(name));
|
|
}
|
|
return builder.build().toURL();
|
|
}
|
|
catch (URISyntaxException ex)
|
|
{
|
|
throw new MalformedURLException();
|
|
}
|
|
}
|
|
|
|
protected String transformSearchString(String search)
|
|
{
|
|
return search;
|
|
}
|
|
|
|
protected <E> List<Result> processResults(Parser<?,E> parser, InputStream input)
|
|
{
|
|
Collection<E> resultEles = parser.getResults(input, resultSelector);
|
|
return resultEles.stream()
|
|
.map((ele)->parseResultElement(parser, ele))
|
|
.filter((res)->res!=null)
|
|
.collect(Collectors.toList());
|
|
}
|
|
|
|
protected <E> Result parseResultElement(Parser<?,E> parser, E ele)
|
|
{
|
|
try
|
|
{
|
|
Result res = new Result();
|
|
// Artist
|
|
if (resultArtistSelector != null)
|
|
res.setArtist(parser.getField(ele, resultArtistSelector));
|
|
// Title
|
|
if (resultTitleSelector != null)
|
|
res.setTitle(parser.getField(ele, resultTitleSelector));
|
|
// Link
|
|
if (resultLinkSelector != null)
|
|
res.setLink(parser.getURLField(ele, rootURL, resultLinkSelector));
|
|
|
|
// Artist + Album
|
|
if (resultAlbumArtistSelector != null)
|
|
res.setAlbumArtist(parser.getField(ele, resultAlbumArtistSelector));
|
|
|
|
// Artist + Title
|
|
|
|
return res;
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
Logger.getLogger(WebScraperSearcher.class.getName()).log(Level.SEVERE, null, ex);
|
|
return null;
|
|
}
|
|
}
|
|
}
|
|
|