From 3ceab2659baad195992f4f34e2c0857f06da686a Mon Sep 17 00:00:00 2001 From: Nekojimi Date: Sat, 2 Oct 2021 15:28:29 +0100 Subject: [PATCH] Web Searcher: Add header fields and link transformation support. --- .../providers/WebScraperSearcher.java | 76 +++++++++++++++++-- 1 file changed, 68 insertions(+), 8 deletions(-) diff --git a/src/main/java/moe/nekojimi/musicsearcher/providers/WebScraperSearcher.java b/src/main/java/moe/nekojimi/musicsearcher/providers/WebScraperSearcher.java index 0c6fd09..570211e 100644 --- a/src/main/java/moe/nekojimi/musicsearcher/providers/WebScraperSearcher.java +++ b/src/main/java/moe/nekojimi/musicsearcher/providers/WebScraperSearcher.java @@ -9,13 +9,18 @@ import com.amihaiemil.eoyaml.YamlMapping; import com.amihaiemil.eoyaml.YamlNode; import java.io.IOException; import java.io.InputStream; +import java.net.Authenticator; import java.net.MalformedURLException; import java.net.URISyntaxException; import java.net.URL; +import java.net.http.HttpClient; +import java.net.http.HttpRequest; +import java.net.http.HttpResponse; import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.logging.Level; import java.util.logging.Logger; import java.util.stream.Collectors; @@ -42,13 +47,22 @@ public class WebScraperSearcher extends Searcher protected String resultLinkSelector; protected String resultAlbumArtistSelector; + protected String linkFormat; + protected Map searchFields = new HashMap<>(); + protected Map searchHeaders = new HashMap<>(); + + protected Parser parser; - protected Parser parser; + private final HttpClient client; public WebScraperSearcher(String name) { super(name); + client = HttpClient.newBuilder() + .followRedirects(HttpClient.Redirect.ALWAYS) + .authenticator(Authenticator.getDefault()) + .build(); } public WebScraperSearcher(YamlMapping yaml) throws MalformedURLException @@ -70,8 +84,18 @@ public class WebScraperSearcher extends Searcher } YamlMapping searchFieldMap = yaml.yamlMapping("search_fields"); - for (YamlNode key: searchFieldMap.keys()) - searchFields.put(key.asScalar().value(), searchFieldMap.string(key)); + if (searchFieldMap != null) + { + for (YamlNode key : searchFieldMap.keys()) + searchFields.put(key.asScalar().value(), searchFieldMap.string(key)); + } + + YamlMapping searchHeaderMap = yaml.yamlMapping("search_headers"); + if (searchHeaderMap != null) + { + for (YamlNode key : searchHeaderMap.keys()) + searchHeaders.put(key.asScalar().value(), searchHeaderMap.string(key)); + } String formatName = yaml.string("format"); switch(formatName) @@ -83,6 +107,13 @@ public class WebScraperSearcher extends Searcher default: throw new IllegalArgumentException("Format " + formatName + " is unknown."); } + + linkFormat = yaml.string("link_format"); + + client = HttpClient.newBuilder() + .followRedirects(HttpClient.Redirect.ALWAYS) + // .authenticator(Authenticator.getDefault()) + .build(); } @Override @@ -91,9 +122,19 @@ public class WebScraperSearcher extends Searcher try { URL url = fillURL(query); - InputStream input = url.openStream(); - return processResults(parser, input); - } catch (IOException ex) + HttpRequest.Builder builder = HttpRequest.newBuilder(url.toURI()).GET(); + + for (Entry header : searchHeaders.entrySet()) + builder = builder.header(header.getKey(), fillHeader(header.getValue())); + + HttpRequest request = builder.build(); + HttpResponse response = client.send(request, HttpResponse.BodyHandlers.ofInputStream()); + + if (response.statusCode() == 200) + return processResults(parser, response.body()); + else + throw new RuntimeException("Got status code " + response.statusCode() + new String(response.body().readAllBytes())); + } catch (IOException | URISyntaxException | InterruptedException ex) { Logger.getLogger(WebScraperSearcher.class.getName()).log(Level.SEVERE, null, ex); return List.of(); @@ -113,7 +154,7 @@ public class WebScraperSearcher extends Searcher } if (searchFields.containsKey("secret")) { - builder.addParameter(searchFields.get("secret"), SecretStore.get().getSecret(name)); + builder.addParameter(searchFields.get("secret"), getSecret()); } return builder.build().toURL(); } @@ -123,6 +164,20 @@ public class WebScraperSearcher extends Searcher } } + private String getSecret() + { + return SecretStore.get().getSecret(name); + } + + private String fillHeader(String value) + { + String ret = value; + if (ret.contains("$SECRET")) + ret = ret.replaceAll("\\$SECRET", getSecret()); + + return ret; + } + protected String transformSearchString(String search) { return search; @@ -151,7 +206,12 @@ public class WebScraperSearcher extends Searcher res.setTitle(parser.getField(ele, resultTitleSelector).trim()); // Link if (resultLinkSelector != null) - res.setLink(parser.getURLField(ele, rootURL, resultLinkSelector)); + { + if (linkFormat == null) + res.setLink(parser.getURLField(ele, rootURL, resultLinkSelector)); + else + res.setLink(new URL(linkFormat.replaceAll("\\$LINK", parser.getField(ele, resultLinkSelector)))); + } // Artist + Album if (resultAlbumArtistSelector != null)