generated from Nekojimi/JavaMavenTemplate
Implemented web scraper search.
This commit is contained in:
parent
4d7eccd1e0
commit
68869a149e
|
@ -1,23 +1,53 @@
|
|||
- name: Soundcloud
|
||||
type: WebScraperProvider
|
||||
search_url: https://soundcloud.com/search/sounds?q=$QUERY
|
||||
result_item: li.searchList__item
|
||||
result_fields:
|
||||
artist: .soundTitle__username
|
||||
title: .soundTitle__title
|
||||
link_href: a.soundTitle__title
|
||||
#- name: Soundcloud
|
||||
#abbr: SC
|
||||
#type: WebScraperSearcher
|
||||
#search_url: https://soundcloud.com/search/sounds?q=$QUERY
|
||||
#result_selector: li.searchList__item
|
||||
#result_field_selectors:
|
||||
#artist: .soundTitle__username
|
||||
#title: .soundTitle__title
|
||||
#link: a.soundTitle__title
|
||||
#- name: Soundcloud
|
||||
#abbr:SC
|
||||
#type: ApiSearcher
|
||||
#search_url: https://api.soundcloud.com/tracks?q=$QUERY&access=playable&limit=10&linked_partitioning=true
|
||||
#format: json
|
||||
#result_array: collection
|
||||
#result_field_names:
|
||||
#title: title
|
||||
#link: uri
|
||||
- name: Bandcamp
|
||||
type: WebScraperProvider
|
||||
abbr: BC
|
||||
type: WebScraperSearcher
|
||||
search_url: https://bandcamp.com/search?q=$QUERY&item_type
|
||||
result_item: li.searchresult
|
||||
result_fields:
|
||||
result_selector: li.searchresult
|
||||
result_field_selectors:
|
||||
title: .heading
|
||||
link_href: .heading a
|
||||
link: .heading a
|
||||
album_artist: .subhead
|
||||
# - name: Youtube
|
||||
# search_url:
|
||||
# result_item:
|
||||
# result_fields:
|
||||
# title:
|
||||
# link_href:
|
||||
# album_artist:
|
||||
#- name: Youtube
|
||||
#abbr: YT
|
||||
#search_url: https://www.youtube.com/results?search_query=$QUERY
|
||||
#result_item: ytd-video-renderer.ytd-item-section-renderer
|
||||
#result_fields:
|
||||
#title:
|
||||
#link_href:
|
||||
#album_artist:
|
||||
#- name: Jamendo
|
||||
#abbr: JM
|
||||
#type: WebScraperSearcher
|
||||
#search_url: https://www.jamendo.com/search/tracks?q=$QUERY
|
||||
#result_selector: li.active-result
|
||||
#result_field_selectors:
|
||||
#link: a.js-search-item-link
|
||||
#title_artist: a.js-search-item-link
|
||||
#- Name: Jamendo
|
||||
#abbr: JM
|
||||
#type: ApiSearcher
|
||||
#search_url: https://api.jamendo.com/v3.0/tracks/?client_id=$SECRET&format=jsonpretty&limit=10&include=musicinfo&groupby=artist_id&search=$QUERY
|
||||
#format: json
|
||||
#result_array: results
|
||||
#result_field_names:
|
||||
#title: name
|
||||
#artist: artist_name
|
||||
#album: album_name
|
||||
|
|
|
@ -5,17 +5,69 @@
|
|||
*/
|
||||
package moe.nekojimi.musicsearcher;
|
||||
|
||||
import com.amihaiemil.eoyaml.Yaml;
|
||||
import com.amihaiemil.eoyaml.YamlInput;
|
||||
import com.amihaiemil.eoyaml.YamlMapping;
|
||||
import com.amihaiemil.eoyaml.YamlSequence;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.lang.reflect.Constructor;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
import moe.nekojimi.musicsearcher.providers.Searcher;
|
||||
|
||||
/**
|
||||
*
|
||||
* @author jim
|
||||
*/
|
||||
public class Main
|
||||
{
|
||||
private static final Map<String, Searcher> searchers = new HashMap<>();
|
||||
|
||||
/**
|
||||
* @param args the command line arguments
|
||||
*/
|
||||
public static void main(String[] args)
|
||||
public static void main(String[] args) throws IOException
|
||||
{
|
||||
System.out.println("Hello world!");
|
||||
// System.out.println("Hello world!");
|
||||
YamlInput input = Yaml.createYamlInput(new File("searchproviders.yml"));
|
||||
YamlSequence seq = input.readYamlSequence();
|
||||
for (int i = 0; i < seq.size(); i++)
|
||||
{
|
||||
try
|
||||
{
|
||||
YamlMapping map = seq.yamlMapping(i);
|
||||
String type = map.string("type");
|
||||
Class<? extends Searcher> clazz = (Class<? extends Searcher>) Class.forName("moe.nekojimi.musicsearcher.providers." + type);
|
||||
Constructor<? extends Searcher> constructor = clazz.getConstructor(YamlMapping.class);
|
||||
Searcher searcher = constructor.newInstance(map);
|
||||
searchers.put(searcher.getName(), searcher);
|
||||
|
||||
} catch (ClassNotFoundException | NoSuchMethodException | SecurityException | InstantiationException | IllegalAccessException | IllegalArgumentException | InvocationTargetException ex) {
|
||||
Logger.getLogger(Main.class.getName()).log(Level.SEVERE, null, ex);
|
||||
}
|
||||
}
|
||||
|
||||
System.out.println(searchers);
|
||||
String query = "Test";
|
||||
for (Searcher searcher: searchers.values())
|
||||
{
|
||||
System.out.println("Searching " + searcher.getName() + " for " + query);
|
||||
try
|
||||
{
|
||||
List<Result> results = searcher.searchAndWait(query);
|
||||
for (Result result: results)
|
||||
{
|
||||
System.out.println("\t" + result);
|
||||
}
|
||||
} catch (InterruptedException | ExecutionException ex) {
|
||||
Logger.getLogger(Main.class.getName()).log(Level.SEVERE, null, ex);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,10 +5,55 @@
|
|||
*/
|
||||
package moe.nekojimi.musicsearcher;
|
||||
|
||||
import java.net.URL;
|
||||
|
||||
/**
|
||||
*
|
||||
* @author jim
|
||||
*/
|
||||
public class Result {
|
||||
public class Result
|
||||
{
|
||||
private URL link;
|
||||
private String artist;
|
||||
private String album;
|
||||
private String title;
|
||||
|
||||
public URL getLink() {
|
||||
return link;
|
||||
}
|
||||
|
||||
public void setLink(URL link) {
|
||||
this.link = link;
|
||||
}
|
||||
|
||||
public String getArtist() {
|
||||
return artist;
|
||||
}
|
||||
|
||||
public void setArtist(String artist) {
|
||||
this.artist = artist;
|
||||
}
|
||||
|
||||
public String getAlbum() {
|
||||
return album;
|
||||
}
|
||||
|
||||
public void setAlbum(String album) {
|
||||
this.album = album;
|
||||
}
|
||||
|
||||
public String getTitle() {
|
||||
return title;
|
||||
}
|
||||
|
||||
public void setTitle(String title) {
|
||||
this.title = title;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Result{" + "link=" + link + ", artist=" + artist + ", album=" + album + ", title=" + title + '}';
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -5,8 +5,11 @@
|
|||
*/
|
||||
package moe.nekojimi.musicsearcher.providers;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
import moe.nekojimi.musicsearcher.Result;
|
||||
|
||||
/**
|
||||
|
@ -16,7 +19,9 @@ import moe.nekojimi.musicsearcher.Result;
|
|||
public class MetaSearcher extends Searcher
|
||||
{
|
||||
|
||||
private final Set<Searcher> searchers;
|
||||
private final Set<Searcher> searchers = new HashSet<>();
|
||||
private int minSearchTime = 10000; // ms
|
||||
private int maxSearchTime = 30000; // ms
|
||||
|
||||
public MetaSearcher()
|
||||
{
|
||||
|
@ -24,9 +29,26 @@ public class MetaSearcher extends Searcher
|
|||
}
|
||||
|
||||
@Override
|
||||
public List<Result> search(String query)
|
||||
protected List<Result> doSearch(String query)
|
||||
{
|
||||
throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates.
|
||||
List<Result> results = new ArrayList<>();
|
||||
List<CompletableFuture<List<Result>>> searches = new ArrayList<>();
|
||||
for (Searcher searcher: searchers)
|
||||
{
|
||||
CompletableFuture<List<Result>> search = searcher.search(query);
|
||||
searches.add(search);
|
||||
search.whenComplete((t, u) ->
|
||||
{
|
||||
if (u == null)
|
||||
{
|
||||
results.addAll(t);
|
||||
// searches.remove(search);
|
||||
}
|
||||
});
|
||||
}
|
||||
CompletableFuture.allOf((CompletableFuture<?>[]) searches.toArray());
|
||||
return results;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -5,11 +5,11 @@
|
|||
*/
|
||||
package moe.nekojimi.musicsearcher.providers;
|
||||
|
||||
import com.amihaiemil.eoyaml.YamlMapping;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.ForkJoinPool;
|
||||
import java.util.concurrent.Future;
|
||||
import java.util.concurrent.FutureTask;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.TimeoutException;
|
||||
import moe.nekojimi.musicsearcher.Result;
|
||||
|
@ -20,7 +20,7 @@ import moe.nekojimi.musicsearcher.Result;
|
|||
*/
|
||||
public abstract class Searcher
|
||||
{
|
||||
private final String name;
|
||||
final String name;
|
||||
private final ForkJoinPool executor;
|
||||
|
||||
public Searcher(String name)
|
||||
|
@ -29,6 +29,17 @@ public abstract class Searcher
|
|||
this.executor = new ForkJoinPool();
|
||||
}
|
||||
|
||||
public Searcher(YamlMapping yaml)
|
||||
{
|
||||
this(yaml.string("name"));
|
||||
assert yaml.string("type").equals(this.getClass().getSimpleName());
|
||||
}
|
||||
|
||||
public String getName()
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
public List<Result> searchAndWait(String query) throws InterruptedException, ExecutionException
|
||||
{
|
||||
try
|
||||
|
@ -45,15 +56,19 @@ public abstract class Searcher
|
|||
return search(query).get(limit, unit);
|
||||
}
|
||||
|
||||
public Future<List<Result>> search(String query)
|
||||
public CompletableFuture<List<Result>> search(String query)
|
||||
{
|
||||
FutureTask<List<Result>> task = new FutureTask<>(() ->
|
||||
{
|
||||
return doSearch(query);
|
||||
});
|
||||
executor.execute(task);
|
||||
return task;
|
||||
CompletableFuture<List<Result>> future = new CompletableFuture<>();
|
||||
future.completeAsync(()->doSearch(query), executor);
|
||||
return future;
|
||||
}
|
||||
|
||||
protected abstract List<Result> doSearch(String query);
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return this.getClass().getSimpleName() + "{" + "name=" + name + '}';
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -5,9 +5,21 @@
|
|||
*/
|
||||
package moe.nekojimi.musicsearcher.providers;
|
||||
|
||||
import com.amihaiemil.eoyaml.YamlMapping;
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.net.URLEncoder;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.List;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
import java.util.stream.Collectors;
|
||||
import moe.nekojimi.musicsearcher.Result;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
/**
|
||||
*
|
||||
|
@ -16,22 +28,123 @@ import org.jsoup.Jsoup;
|
|||
public class WebScraperSearcher extends Searcher
|
||||
{
|
||||
private String searchUrl;
|
||||
private URL rootURL;
|
||||
|
||||
private String resultItem;
|
||||
private String artistItem;
|
||||
private String titleItem;
|
||||
private String linkHrefItem;
|
||||
private String albumArtistItem;
|
||||
private String resultSelector;
|
||||
private String resultArtistSelector;
|
||||
private String resultTitleSelector;
|
||||
private String resultLinkSelector;
|
||||
private String resultAlbumArtistSelector;
|
||||
|
||||
public WebScraperSearcher(String name)
|
||||
{
|
||||
super(name);
|
||||
}
|
||||
|
||||
public WebScraperSearcher(YamlMapping yaml) throws MalformedURLException
|
||||
{
|
||||
super(yaml);
|
||||
searchUrl = yaml.string("search_url");
|
||||
rootURL = fillURL("");
|
||||
rootURL = new URL(rootURL.getProtocol(), rootURL.getHost(), "");
|
||||
resultSelector = yaml.string("result_selector");
|
||||
|
||||
YamlMapping fields = yaml.yamlMapping("result_field_selectors");
|
||||
if (fields != null)
|
||||
{
|
||||
resultArtistSelector = fields.string("artist");
|
||||
resultTitleSelector = fields.string("title");
|
||||
resultLinkSelector = fields.string("link");
|
||||
resultAlbumArtistSelector = fields.string("album_artist");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<Result> doSearch(String query)
|
||||
{
|
||||
Jsoup.
|
||||
try
|
||||
{
|
||||
URL url = fillURL(query);
|
||||
Document doc = Jsoup.parse(url, 10000);
|
||||
System.out.println("Document from " + name + ":" + doc.html());
|
||||
Elements resultEles = doc.select(resultSelector);
|
||||
return resultEles.stream()
|
||||
.map((ele)->parseResultElement(ele))
|
||||
.filter((res)->res!=null)
|
||||
.collect(Collectors.toList());
|
||||
} catch (IOException ex)
|
||||
{
|
||||
Logger.getLogger(WebScraperSearcher.class.getName()).log(Level.SEVERE, null, ex);
|
||||
return List.of();
|
||||
}
|
||||
}
|
||||
|
||||
protected URL fillURL(String query) throws MalformedURLException
|
||||
{
|
||||
URL url = new URL(searchUrl.replaceAll("\\$QUERY", URLEncoder.encode(query, Charset.forName("utf-8"))));
|
||||
return url;
|
||||
}
|
||||
|
||||
protected Result parseResultElement(Element ele)
|
||||
{
|
||||
try
|
||||
{
|
||||
Result res = new Result();
|
||||
// Artist
|
||||
if (resultArtistSelector != null)
|
||||
{
|
||||
Element artistEle = ele.selectFirst(resultArtistSelector);
|
||||
if (artistEle != null)
|
||||
res.setArtist(artistEle.text());
|
||||
}
|
||||
// Title
|
||||
if (resultTitleSelector != null)
|
||||
{
|
||||
Element titleEle = ele.selectFirst(resultTitleSelector);
|
||||
if (titleEle != null)
|
||||
res.setTitle(titleEle.text());
|
||||
}
|
||||
// Link
|
||||
if (resultLinkSelector != null)
|
||||
{
|
||||
Element linkEle = ele.selectFirst(resultLinkSelector);
|
||||
if (linkEle != null)
|
||||
{
|
||||
String link;
|
||||
if (linkEle.hasAttr("href"))
|
||||
link = linkEle.attr("href");
|
||||
else
|
||||
link = linkEle.text();
|
||||
|
||||
URL url;
|
||||
if(!link.startsWith("http"))
|
||||
url = new URL(rootURL, link);
|
||||
else
|
||||
url = new URL(link);
|
||||
|
||||
res.setLink(url);
|
||||
}
|
||||
}
|
||||
|
||||
// Artist + Album
|
||||
if (resultAlbumArtistSelector != null)
|
||||
{
|
||||
Element alArtEle = ele.selectFirst(resultAlbumArtistSelector);
|
||||
if (alArtEle != null)
|
||||
{
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// Artist + Title
|
||||
|
||||
return res;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Logger.getLogger(WebScraperSearcher.class.getName()).log(Level.SEVERE, null, ex);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue