Web Searcher: Add header fields and link transformation support.

master
Nekojimi 3 years ago
parent e6b109125e
commit 3ceab2659b
  1. 76
      src/main/java/moe/nekojimi/musicsearcher/providers/WebScraperSearcher.java

@ -9,13 +9,18 @@ import com.amihaiemil.eoyaml.YamlMapping;
import com.amihaiemil.eoyaml.YamlNode; import com.amihaiemil.eoyaml.YamlNode;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.net.Authenticator;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URISyntaxException; import java.net.URISyntaxException;
import java.net.URL; import java.net.URL;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.util.Collection; import java.util.Collection;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Map.Entry;
import java.util.logging.Level; import java.util.logging.Level;
import java.util.logging.Logger; import java.util.logging.Logger;
import java.util.stream.Collectors; import java.util.stream.Collectors;
@ -42,13 +47,22 @@ public class WebScraperSearcher extends Searcher
protected String resultLinkSelector; protected String resultLinkSelector;
protected String resultAlbumArtistSelector; protected String resultAlbumArtistSelector;
protected String linkFormat;
protected Map<String,String> searchFields = new HashMap<>(); protected Map<String,String> searchFields = new HashMap<>();
protected Map<String, String> searchHeaders = new HashMap<>();
protected Parser<?, ?> parser;
protected Parser<?,?> parser; private final HttpClient client;
public WebScraperSearcher(String name) public WebScraperSearcher(String name)
{ {
super(name); super(name);
client = HttpClient.newBuilder()
.followRedirects(HttpClient.Redirect.ALWAYS)
.authenticator(Authenticator.getDefault())
.build();
} }
public WebScraperSearcher(YamlMapping yaml) throws MalformedURLException public WebScraperSearcher(YamlMapping yaml) throws MalformedURLException
@ -70,8 +84,18 @@ public class WebScraperSearcher extends Searcher
} }
YamlMapping searchFieldMap = yaml.yamlMapping("search_fields"); YamlMapping searchFieldMap = yaml.yamlMapping("search_fields");
for (YamlNode key: searchFieldMap.keys()) if (searchFieldMap != null)
searchFields.put(key.asScalar().value(), searchFieldMap.string(key)); {
for (YamlNode key : searchFieldMap.keys())
searchFields.put(key.asScalar().value(), searchFieldMap.string(key));
}
YamlMapping searchHeaderMap = yaml.yamlMapping("search_headers");
if (searchHeaderMap != null)
{
for (YamlNode key : searchHeaderMap.keys())
searchHeaders.put(key.asScalar().value(), searchHeaderMap.string(key));
}
String formatName = yaml.string("format"); String formatName = yaml.string("format");
switch(formatName) switch(formatName)
@ -83,6 +107,13 @@ public class WebScraperSearcher extends Searcher
default: default:
throw new IllegalArgumentException("Format " + formatName + " is unknown."); throw new IllegalArgumentException("Format " + formatName + " is unknown.");
} }
linkFormat = yaml.string("link_format");
client = HttpClient.newBuilder()
.followRedirects(HttpClient.Redirect.ALWAYS)
// .authenticator(Authenticator.getDefault())
.build();
} }
@Override @Override
@ -91,9 +122,19 @@ public class WebScraperSearcher extends Searcher
try try
{ {
URL url = fillURL(query); URL url = fillURL(query);
InputStream input = url.openStream(); HttpRequest.Builder builder = HttpRequest.newBuilder(url.toURI()).GET();
return processResults(parser, input);
} catch (IOException ex) for (Entry<String, String> header : searchHeaders.entrySet())
builder = builder.header(header.getKey(), fillHeader(header.getValue()));
HttpRequest request = builder.build();
HttpResponse<InputStream> response = client.send(request, HttpResponse.BodyHandlers.ofInputStream());
if (response.statusCode() == 200)
return processResults(parser, response.body());
else
throw new RuntimeException("Got status code " + response.statusCode() + new String(response.body().readAllBytes()));
} catch (IOException | URISyntaxException | InterruptedException ex)
{ {
Logger.getLogger(WebScraperSearcher.class.getName()).log(Level.SEVERE, null, ex); Logger.getLogger(WebScraperSearcher.class.getName()).log(Level.SEVERE, null, ex);
return List.of(); return List.of();
@ -113,7 +154,7 @@ public class WebScraperSearcher extends Searcher
} }
if (searchFields.containsKey("secret")) if (searchFields.containsKey("secret"))
{ {
builder.addParameter(searchFields.get("secret"), SecretStore.get().getSecret(name)); builder.addParameter(searchFields.get("secret"), getSecret());
} }
return builder.build().toURL(); return builder.build().toURL();
} }
@ -123,6 +164,20 @@ public class WebScraperSearcher extends Searcher
} }
} }
private String getSecret()
{
return SecretStore.get().getSecret(name);
}
private String fillHeader(String value)
{
String ret = value;
if (ret.contains("$SECRET"))
ret = ret.replaceAll("\\$SECRET", getSecret());
return ret;
}
protected String transformSearchString(String search) protected String transformSearchString(String search)
{ {
return search; return search;
@ -151,7 +206,12 @@ public class WebScraperSearcher extends Searcher
res.setTitle(parser.getField(ele, resultTitleSelector).trim()); res.setTitle(parser.getField(ele, resultTitleSelector).trim());
// Link // Link
if (resultLinkSelector != null) if (resultLinkSelector != null)
res.setLink(parser.getURLField(ele, rootURL, resultLinkSelector)); {
if (linkFormat == null)
res.setLink(parser.getURLField(ele, rootURL, resultLinkSelector));
else
res.setLink(new URL(linkFormat.replaceAll("\\$LINK", parser.getField(ele, resultLinkSelector))));
}
// Artist + Album // Artist + Album
if (resultAlbumArtistSelector != null) if (resultAlbumArtistSelector != null)

Loading…
Cancel
Save