generated from Nekojimi/JavaMavenTemplate
Compare commits
5 Commits
d82b54ccf2
...
b8336c26fb
Author | SHA1 | Date |
---|---|---|
|
b8336c26fb | |
|
039a91ed40 | |
|
fe210ad8d0 | |
|
d3f083e0d5 | |
|
ba9fde9cac |
|
@ -0,0 +1,33 @@
|
|||
/*
|
||||
* To change this license header, choose License Headers in Project Properties.
|
||||
* To change this template file, choose Tools | Templates
|
||||
* and open the template in the editor.
|
||||
*/
|
||||
package moe.nekojimi.musicsearcher;
|
||||
|
||||
/**
|
||||
*
|
||||
* @author jim
|
||||
*/
|
||||
public class Query
|
||||
{
|
||||
private String textSearch;
|
||||
|
||||
public static Query fullText(String text)
|
||||
{
|
||||
Query query = new Query();
|
||||
query.setTextSearch(text);
|
||||
return query;
|
||||
}
|
||||
|
||||
public String getTextSearch()
|
||||
{
|
||||
return textSearch;
|
||||
}
|
||||
|
||||
public void setTextSearch(String textSearch)
|
||||
{
|
||||
this.textSearch = textSearch;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
/*
|
||||
* To change this license header, choose License Headers in Project Properties.
|
||||
* To change this template file, choose Tools | Templates
|
||||
* and open the template in the editor.
|
||||
*/
|
||||
package moe.nekojimi.musicsearcher;
|
||||
|
||||
/**
|
||||
*
|
||||
* @author jim
|
||||
*/
|
||||
public class QueryFieldUnsupportedException extends RuntimeException
|
||||
{
|
||||
private final String field;
|
||||
|
||||
public QueryFieldUnsupportedException(String field)
|
||||
{
|
||||
super("Searcher doesn't support query field: " + field);
|
||||
this.field = field;
|
||||
}
|
||||
|
||||
}
|
|
@ -54,6 +54,33 @@ public class Result
|
|||
public String toString() {
|
||||
return "Result{" + "link=" + link + ", artist=" + artist + ", album=" + album + ", title=" + title + '}';
|
||||
}
|
||||
|
||||
public void setAlbumArtist(String field)
|
||||
{
|
||||
// System.out.println("Parsing album-artist: " + field);
|
||||
String fieldLower = field.toLowerCase();
|
||||
if (fieldLower.contains("from") || field.toLowerCase().contains("by"))
|
||||
{
|
||||
artist = "";
|
||||
album = "";
|
||||
String[] words = field.split("\\s+");
|
||||
boolean readingArtist = false;
|
||||
for (String word: words)
|
||||
{
|
||||
if (word.equals("from"))
|
||||
readingArtist = false;
|
||||
else if (word.equals("by"))
|
||||
readingArtist = true;
|
||||
else
|
||||
{
|
||||
if (readingArtist)
|
||||
artist += word + " ";
|
||||
else
|
||||
album += word + " ";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,59 @@
|
|||
package moe.nekojimi.musicsearcher;
|
||||
|
||||
import com.amihaiemil.eoyaml.Yaml;
|
||||
import com.amihaiemil.eoyaml.YamlMapping;
|
||||
import com.amihaiemil.eoyaml.YamlNode;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
|
||||
/*
|
||||
* To change this license header, choose License Headers in Project Properties.
|
||||
* To change this template file, choose Tools | Templates
|
||||
* and open the template in the editor.
|
||||
*/
|
||||
|
||||
/**
|
||||
*
|
||||
* @author jim
|
||||
*/
|
||||
public class SecretStore
|
||||
{
|
||||
private static SecretStore secretStore;
|
||||
|
||||
public static SecretStore get()
|
||||
{
|
||||
if (secretStore == null)
|
||||
secretStore = new SecretStore();
|
||||
return secretStore;
|
||||
}
|
||||
|
||||
private Map<String,String> secrets = new HashMap<>();
|
||||
|
||||
private SecretStore()
|
||||
{
|
||||
try
|
||||
{
|
||||
File file = new File("secrets.yml");
|
||||
if (!file.exists())
|
||||
{
|
||||
System.out.println("WARNING: couldn't find secrets.yml. No API secrets available.");
|
||||
return;
|
||||
}
|
||||
|
||||
YamlMapping yaml = Yaml.createYamlInput(file).readYamlMapping();
|
||||
for (YamlNode key : yaml.keys())
|
||||
secrets.put(key.asScalar().value(), yaml.string(key));
|
||||
} catch (IOException ex) {
|
||||
Logger.getLogger(SecretStore.class.getName()).log(Level.SEVERE, null, ex);
|
||||
}
|
||||
}
|
||||
|
||||
public String getSecret(String key)
|
||||
{
|
||||
return secrets.getOrDefault(key,"");
|
||||
}
|
||||
}
|
|
@ -0,0 +1,70 @@
|
|||
/*
|
||||
* To change this license header, choose License Headers in Project Properties.
|
||||
* To change this template file, choose Tools | Templates
|
||||
* and open the template in the editor.
|
||||
*/
|
||||
package moe.nekojimi.musicsearcher.parsers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.Collection;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
|
||||
|
||||
public class HTMLParser extends Parser<Document, Element>
|
||||
{
|
||||
@Override
|
||||
public Document getDocument(InputStream input)
|
||||
{
|
||||
try
|
||||
{
|
||||
Document doc = Jsoup.parse(new String(input.readAllBytes()));
|
||||
// System.out.println(doc.outerHtml());
|
||||
return doc;
|
||||
}
|
||||
catch (IOException ex)
|
||||
{
|
||||
Logger.getLogger(HTMLParser.class.getName()).log(Level.SEVERE, null, ex);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<Element> getResults(Document document, String selector)
|
||||
{
|
||||
return document.select(selector);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getField(Element object, String selector)
|
||||
{
|
||||
Element ele = object.selectFirst(selector);
|
||||
return ele.text();
|
||||
}
|
||||
|
||||
@Override
|
||||
public URL getURLField(Element object, URL baseURL, String selector) throws MalformedURLException
|
||||
{
|
||||
Element ele = object.selectFirst(selector);
|
||||
|
||||
String link;
|
||||
if (ele.hasAttr("href"))
|
||||
link = ele.attr("href");
|
||||
else
|
||||
link = ele.text();
|
||||
|
||||
URL url;
|
||||
if(!link.startsWith("http"))
|
||||
url = new URL(baseURL, link);
|
||||
else
|
||||
url = new URL(link);
|
||||
|
||||
return url;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,72 @@
|
|||
/*
|
||||
* To change this license header, choose License Headers in Project Properties.
|
||||
* To change this template file, choose Tools | Templates
|
||||
* and open the template in the editor.
|
||||
*/
|
||||
package moe.nekojimi.musicsearcher.parsers;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import javax.json.Json;
|
||||
import javax.json.JsonObject;
|
||||
import javax.json.JsonReader;
|
||||
import javax.json.JsonStructure;
|
||||
import javax.json.JsonValue;
|
||||
|
||||
/**
|
||||
*
|
||||
* @author jim
|
||||
*/
|
||||
public class JSONParser extends Parser<JsonStructure, JsonObject>
|
||||
{
|
||||
|
||||
@Override
|
||||
public JsonStructure getDocument(InputStream input)
|
||||
{
|
||||
JsonReader reader = Json.createReader(input);
|
||||
return reader.read();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<JsonObject> getResults(JsonStructure document, String selector)
|
||||
{
|
||||
JsonValue value = document.getValue(selector);
|
||||
if (value.getValueType() == JsonValue.ValueType.ARRAY)
|
||||
{
|
||||
return value.asJsonArray().getValuesAs(JsonObject.class);
|
||||
}
|
||||
else if (value.getValueType() == JsonValue.ValueType.OBJECT)
|
||||
{
|
||||
return List.of(value.asJsonObject());
|
||||
}
|
||||
return List.of();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getField(JsonObject object, String selector)
|
||||
{
|
||||
return object.getValue(selector).toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public URL getURLField(JsonObject object, URL baseURL, String selector) throws MalformedURLException
|
||||
{
|
||||
String link = getField(object, selector);
|
||||
|
||||
URL url;
|
||||
if(!link.startsWith("http"))
|
||||
url = new URL(baseURL, link);
|
||||
else
|
||||
url = new URL(link);
|
||||
|
||||
return url;
|
||||
}
|
||||
|
||||
private JsonValue navigate(JsonObject from, String selector)
|
||||
{
|
||||
return from.getValue(selector);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,30 @@
|
|||
/*
|
||||
* To change this license header, choose License Headers in Project Properties.
|
||||
* To change this template file, choose Tools | Templates
|
||||
* and open the template in the editor.
|
||||
*/
|
||||
package moe.nekojimi.musicsearcher.parsers;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.Collection;
|
||||
|
||||
/**
|
||||
*
|
||||
* @author jim
|
||||
*/
|
||||
public abstract class Parser<D,O>
|
||||
{
|
||||
public abstract D getDocument(InputStream input);
|
||||
|
||||
public abstract Collection<O> getResults(D document, String selector);
|
||||
public Collection<O> getResults(InputStream input, String selector)
|
||||
{
|
||||
return getResults(getDocument(input), selector);
|
||||
}
|
||||
|
||||
public abstract String getField(O object, String selector);
|
||||
public abstract URL getURLField(O object, URL baseURL, String selector) throws MalformedURLException;
|
||||
|
||||
}
|
|
@ -5,26 +5,22 @@
|
|||
*/
|
||||
package moe.nekojimi.musicsearcher.providers;
|
||||
|
||||
import com.amihaiemil.eoyaml.YamlMapping;
|
||||
import java.util.List;
|
||||
import moe.nekojimi.musicsearcher.Result;
|
||||
|
||||
/**
|
||||
*
|
||||
* @author jim
|
||||
*/
|
||||
public class ApiSearcher extends Searcher
|
||||
{
|
||||
|
||||
public ApiSearcher(YamlMapping yaml)
|
||||
{
|
||||
super(yaml);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<Result> doSearch(String query)
|
||||
{
|
||||
throw new UnsupportedOperationException("NYI");
|
||||
}
|
||||
|
||||
}
|
||||
//public class ApiSearcher extends Searcher
|
||||
//{
|
||||
//
|
||||
// public ApiSearcher(YamlMapping yaml)
|
||||
// {
|
||||
// super(yaml);
|
||||
// }
|
||||
//
|
||||
// @Override
|
||||
// protected List<Result> doSearch(String query)
|
||||
// {
|
||||
// throw new UnsupportedOperationException("NYI");
|
||||
// }
|
||||
//
|
||||
//}
|
||||
|
|
|
@ -12,6 +12,8 @@ import java.util.concurrent.ExecutionException;
|
|||
import java.util.concurrent.ForkJoinPool;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.TimeoutException;
|
||||
import moe.nekojimi.musicsearcher.Query;
|
||||
import moe.nekojimi.musicsearcher.QueryFieldUnsupportedException;
|
||||
import moe.nekojimi.musicsearcher.Result;
|
||||
|
||||
/**
|
||||
|
@ -40,7 +42,7 @@ public abstract class Searcher
|
|||
return name;
|
||||
}
|
||||
|
||||
public List<Result> searchAndWait(String query) throws InterruptedException, ExecutionException
|
||||
public List<Result> searchAndWait(Query query) throws InterruptedException, ExecutionException
|
||||
{
|
||||
try
|
||||
{
|
||||
|
@ -51,19 +53,19 @@ public abstract class Searcher
|
|||
}
|
||||
}
|
||||
|
||||
public List<Result> searchAndWait(String query, long limit, TimeUnit unit) throws InterruptedException, ExecutionException, TimeoutException
|
||||
public List<Result> searchAndWait(Query query, long limit, TimeUnit unit) throws InterruptedException, ExecutionException, TimeoutException
|
||||
{
|
||||
return search(query).get(limit, unit);
|
||||
}
|
||||
|
||||
public CompletableFuture<List<Result>> search(String query)
|
||||
public CompletableFuture<List<Result>> search(Query query)
|
||||
{
|
||||
CompletableFuture<List<Result>> future = new CompletableFuture<>();
|
||||
future.completeAsync(()->doSearch(query), executor);
|
||||
return future;
|
||||
}
|
||||
|
||||
protected abstract List<Result> doSearch(String query);
|
||||
protected abstract List<Result> doSearch(Query query) throws QueryFieldUnsupportedException;
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
|
|
@ -6,20 +6,26 @@
|
|||
package moe.nekojimi.musicsearcher.providers;
|
||||
|
||||
import com.amihaiemil.eoyaml.YamlMapping;
|
||||
import com.amihaiemil.eoyaml.YamlNode;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.net.URLEncoder;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
import java.util.stream.Collectors;
|
||||
import moe.nekojimi.musicsearcher.Query;
|
||||
import moe.nekojimi.musicsearcher.Result;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
import moe.nekojimi.musicsearcher.SecretStore;
|
||||
import moe.nekojimi.musicsearcher.parsers.HTMLParser;
|
||||
import moe.nekojimi.musicsearcher.parsers.JSONParser;
|
||||
import moe.nekojimi.musicsearcher.parsers.Parser;
|
||||
import org.apache.http.client.utils.URIBuilder;
|
||||
|
||||
/**
|
||||
*
|
||||
|
@ -27,14 +33,18 @@ import org.jsoup.select.Elements;
|
|||
*/
|
||||
public class WebScraperSearcher extends Searcher
|
||||
{
|
||||
private String searchUrl;
|
||||
private URL rootURL;
|
||||
protected String searchUrl;
|
||||
protected URL rootURL;
|
||||
|
||||
private String resultSelector;
|
||||
private String resultArtistSelector;
|
||||
private String resultTitleSelector;
|
||||
private String resultLinkSelector;
|
||||
private String resultAlbumArtistSelector;
|
||||
protected String resultSelector;
|
||||
protected String resultArtistSelector;
|
||||
protected String resultTitleSelector;
|
||||
protected String resultLinkSelector;
|
||||
protected String resultAlbumArtistSelector;
|
||||
|
||||
protected Map<String,String> searchFields = new HashMap<>();
|
||||
|
||||
protected Parser<?,?> parser;
|
||||
|
||||
public WebScraperSearcher(String name)
|
||||
{
|
||||
|
@ -45,11 +55,12 @@ public class WebScraperSearcher extends Searcher
|
|||
{
|
||||
super(yaml);
|
||||
searchUrl = yaml.string("search_url");
|
||||
rootURL = fillURL("");
|
||||
rootURL = new URL(rootURL.getProtocol(), rootURL.getHost(), "");
|
||||
rootURL = new URL(searchUrl);
|
||||
// rootURL = fillURL(Query.fullText(""));
|
||||
// rootURL = new URL(rootURL.getProtocol(), rootURL.getHost(), "");
|
||||
resultSelector = yaml.string("result_selector");
|
||||
|
||||
YamlMapping fields = yaml.yamlMapping("result_field_selectors");
|
||||
YamlMapping fields = yaml.yamlMapping("result_fields");
|
||||
if (fields != null)
|
||||
{
|
||||
resultArtistSelector = fields.string("artist");
|
||||
|
@ -57,84 +68,93 @@ public class WebScraperSearcher extends Searcher
|
|||
resultLinkSelector = fields.string("link");
|
||||
resultAlbumArtistSelector = fields.string("album_artist");
|
||||
}
|
||||
|
||||
YamlMapping searchFieldMap = yaml.yamlMapping("search_fields");
|
||||
for (YamlNode key: searchFieldMap.keys())
|
||||
searchFields.put(key.asScalar().value(), searchFieldMap.string(key));
|
||||
|
||||
String formatName = yaml.string("format");
|
||||
switch(formatName)
|
||||
{
|
||||
case "html":
|
||||
parser = new HTMLParser(); break;
|
||||
case "json":
|
||||
parser = new JSONParser(); break;
|
||||
default:
|
||||
throw new IllegalArgumentException("Format " + formatName + " is unknown.");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<Result> doSearch(String query)
|
||||
protected List<Result> doSearch(Query query)
|
||||
{
|
||||
try
|
||||
{
|
||||
URL url = fillURL(query);
|
||||
Document doc = Jsoup.parse(url, 10000);
|
||||
System.out.println("Document from " + name + ":" + doc.html());
|
||||
Elements resultEles = doc.select(resultSelector);
|
||||
return resultEles.stream()
|
||||
.map((ele)->parseResultElement(ele))
|
||||
.filter((res)->res!=null)
|
||||
.collect(Collectors.toList());
|
||||
InputStream input = url.openStream();
|
||||
return processResults(parser, input);
|
||||
} catch (IOException ex)
|
||||
{
|
||||
Logger.getLogger(WebScraperSearcher.class.getName()).log(Level.SEVERE, null, ex);
|
||||
return List.of();
|
||||
}
|
||||
}
|
||||
|
||||
protected URL fillURL(Query query) throws MalformedURLException
|
||||
{
|
||||
// URL url = new URL(searchUrl.replaceAll("\\$QUERY", URLEncoder.encode(query.getTextSearch(), Charset.forName("utf-8"))));
|
||||
try
|
||||
{
|
||||
URIBuilder builder = new URIBuilder(rootURL.toURI());
|
||||
if (query.getTextSearch() != null)
|
||||
{
|
||||
if (searchFields.containsKey("query"))
|
||||
builder.addParameter(searchFields.get("query"), transformSearchString(query.getTextSearch()));
|
||||
}
|
||||
if (searchFields.containsKey("secret"))
|
||||
{
|
||||
builder.addParameter(searchFields.get("secret"), SecretStore.get().getSecret(name));
|
||||
}
|
||||
return builder.build().toURL();
|
||||
}
|
||||
catch (URISyntaxException ex)
|
||||
{
|
||||
throw new MalformedURLException();
|
||||
}
|
||||
}
|
||||
|
||||
protected URL fillURL(String query) throws MalformedURLException
|
||||
|
||||
protected String transformSearchString(String search)
|
||||
{
|
||||
URL url = new URL(searchUrl.replaceAll("\\$QUERY", URLEncoder.encode(query, Charset.forName("utf-8"))));
|
||||
return url;
|
||||
return search;
|
||||
}
|
||||
|
||||
protected <E> List<Result> processResults(Parser<?,E> parser, InputStream input)
|
||||
{
|
||||
Collection<E> resultEles = parser.getResults(input, resultSelector);
|
||||
return resultEles.stream()
|
||||
.map((ele)->parseResultElement(parser, ele))
|
||||
.filter((res)->res!=null)
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
protected Result parseResultElement(Element ele)
|
||||
protected <E> Result parseResultElement(Parser<?,E> parser, E ele)
|
||||
{
|
||||
try
|
||||
{
|
||||
Result res = new Result();
|
||||
// Artist
|
||||
if (resultArtistSelector != null)
|
||||
{
|
||||
Element artistEle = ele.selectFirst(resultArtistSelector);
|
||||
if (artistEle != null)
|
||||
res.setArtist(artistEle.text());
|
||||
}
|
||||
res.setArtist(parser.getField(ele, resultArtistSelector));
|
||||
// Title
|
||||
if (resultTitleSelector != null)
|
||||
{
|
||||
Element titleEle = ele.selectFirst(resultTitleSelector);
|
||||
if (titleEle != null)
|
||||
res.setTitle(titleEle.text());
|
||||
}
|
||||
res.setTitle(parser.getField(ele, resultTitleSelector));
|
||||
// Link
|
||||
if (resultLinkSelector != null)
|
||||
{
|
||||
Element linkEle = ele.selectFirst(resultLinkSelector);
|
||||
if (linkEle != null)
|
||||
{
|
||||
String link;
|
||||
if (linkEle.hasAttr("href"))
|
||||
link = linkEle.attr("href");
|
||||
else
|
||||
link = linkEle.text();
|
||||
|
||||
URL url;
|
||||
if(!link.startsWith("http"))
|
||||
url = new URL(rootURL, link);
|
||||
else
|
||||
url = new URL(link);
|
||||
|
||||
res.setLink(url);
|
||||
}
|
||||
}
|
||||
res.setLink(parser.getURLField(ele, rootURL, resultLinkSelector));
|
||||
|
||||
// Artist + Album
|
||||
if (resultAlbumArtistSelector != null)
|
||||
{
|
||||
Element alArtEle = ele.selectFirst(resultAlbumArtistSelector);
|
||||
if (alArtEle != null)
|
||||
{
|
||||
|
||||
}
|
||||
}
|
||||
res.setAlbumArtist(parser.getField(ele, resultAlbumArtistSelector));
|
||||
|
||||
// Artist + Title
|
||||
|
||||
|
@ -146,5 +166,4 @@ public class WebScraperSearcher extends Searcher
|
|||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue