|
|
|
/*
|
|
|
|
* To change this license header, choose License Headers in Project Properties.
|
|
|
|
* To change this template file, choose Tools | Templates
|
|
|
|
* and open the template in the editor.
|
|
|
|
*/
|
|
|
|
package moe.nekojimi.musicsearcher.providers;
|
|
|
|
|
|
|
|
import com.amihaiemil.eoyaml.YamlMapping;
|
|
|
|
import java.io.IOException;
|
|
|
|
import java.net.MalformedURLException;
|
|
|
|
import java.net.URL;
|
|
|
|
import java.net.URLEncoder;
|
|
|
|
import java.nio.charset.Charset;
|
|
|
|
import java.util.List;
|
|
|
|
import java.util.logging.Level;
|
|
|
|
import java.util.logging.Logger;
|
|
|
|
import java.util.stream.Collectors;
|
|
|
|
import moe.nekojimi.musicsearcher.Result;
|
|
|
|
import org.jsoup.Jsoup;
|
|
|
|
import org.jsoup.nodes.Document;
|
|
|
|
import org.jsoup.nodes.Element;
|
|
|
|
import org.jsoup.select.Elements;
|
|
|
|
|
|
|
|
/**
|
|
|
|
*
|
|
|
|
* @author jim
|
|
|
|
*/
|
|
|
|
public class WebScraperSearcher extends Searcher
|
|
|
|
{
|
|
|
|
private String searchUrl;
|
|
|
|
private URL rootURL;
|
|
|
|
|
|
|
|
private String resultSelector;
|
|
|
|
private String resultArtistSelector;
|
|
|
|
private String resultTitleSelector;
|
|
|
|
private String resultLinkSelector;
|
|
|
|
private String resultAlbumArtistSelector;
|
|
|
|
|
|
|
|
public WebScraperSearcher(String name)
|
|
|
|
{
|
|
|
|
super(name);
|
|
|
|
}
|
|
|
|
|
|
|
|
public WebScraperSearcher(YamlMapping yaml) throws MalformedURLException
|
|
|
|
{
|
|
|
|
super(yaml);
|
|
|
|
searchUrl = yaml.string("search_url");
|
|
|
|
rootURL = fillURL("");
|
|
|
|
rootURL = new URL(rootURL.getProtocol(), rootURL.getHost(), "");
|
|
|
|
resultSelector = yaml.string("result_selector");
|
|
|
|
|
|
|
|
YamlMapping fields = yaml.yamlMapping("result_field_selectors");
|
|
|
|
if (fields != null)
|
|
|
|
{
|
|
|
|
resultArtistSelector = fields.string("artist");
|
|
|
|
resultTitleSelector = fields.string("title");
|
|
|
|
resultLinkSelector = fields.string("link");
|
|
|
|
resultAlbumArtistSelector = fields.string("album_artist");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
protected List<Result> doSearch(String query)
|
|
|
|
{
|
|
|
|
try
|
|
|
|
{
|
|
|
|
URL url = fillURL(query);
|
|
|
|
Document doc = Jsoup.parse(url, 10000);
|
|
|
|
System.out.println("Document from " + name + ":" + doc.html());
|
|
|
|
Elements resultEles = doc.select(resultSelector);
|
|
|
|
return resultEles.stream()
|
|
|
|
.map((ele)->parseResultElement(ele))
|
|
|
|
.filter((res)->res!=null)
|
|
|
|
.collect(Collectors.toList());
|
|
|
|
} catch (IOException ex)
|
|
|
|
{
|
|
|
|
Logger.getLogger(WebScraperSearcher.class.getName()).log(Level.SEVERE, null, ex);
|
|
|
|
return List.of();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
protected URL fillURL(String query) throws MalformedURLException
|
|
|
|
{
|
|
|
|
URL url = new URL(searchUrl.replaceAll("\\$QUERY", URLEncoder.encode(query, Charset.forName("utf-8"))));
|
|
|
|
return url;
|
|
|
|
}
|
|
|
|
|
|
|
|
protected Result parseResultElement(Element ele)
|
|
|
|
{
|
|
|
|
try
|
|
|
|
{
|
|
|
|
Result res = new Result();
|
|
|
|
// Artist
|
|
|
|
if (resultArtistSelector != null)
|
|
|
|
{
|
|
|
|
Element artistEle = ele.selectFirst(resultArtistSelector);
|
|
|
|
if (artistEle != null)
|
|
|
|
res.setArtist(artistEle.text());
|
|
|
|
}
|
|
|
|
// Title
|
|
|
|
if (resultTitleSelector != null)
|
|
|
|
{
|
|
|
|
Element titleEle = ele.selectFirst(resultTitleSelector);
|
|
|
|
if (titleEle != null)
|
|
|
|
res.setTitle(titleEle.text());
|
|
|
|
}
|
|
|
|
// Link
|
|
|
|
if (resultLinkSelector != null)
|
|
|
|
{
|
|
|
|
Element linkEle = ele.selectFirst(resultLinkSelector);
|
|
|
|
if (linkEle != null)
|
|
|
|
{
|
|
|
|
String link;
|
|
|
|
if (linkEle.hasAttr("href"))
|
|
|
|
link = linkEle.attr("href");
|
|
|
|
else
|
|
|
|
link = linkEle.text();
|
|
|
|
|
|
|
|
URL url;
|
|
|
|
if(!link.startsWith("http"))
|
|
|
|
url = new URL(rootURL, link);
|
|
|
|
else
|
|
|
|
url = new URL(link);
|
|
|
|
|
|
|
|
res.setLink(url);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Artist + Album
|
|
|
|
if (resultAlbumArtistSelector != null)
|
|
|
|
{
|
|
|
|
Element alArtEle = ele.selectFirst(resultAlbumArtistSelector);
|
|
|
|
if (alArtEle != null)
|
|
|
|
{
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Artist + Title
|
|
|
|
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
catch (Exception ex)
|
|
|
|
{
|
|
|
|
Logger.getLogger(WebScraperSearcher.class.getName()).log(Level.SEVERE, null, ex);
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|