generated from Nekojimi/JavaMavenTemplate
Web Searcher: Add header fields and link transformation support.
This commit is contained in:
parent
e6b109125e
commit
3ceab2659b
|
@ -9,13 +9,18 @@ import com.amihaiemil.eoyaml.YamlMapping;
|
|||
import com.amihaiemil.eoyaml.YamlNode;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.net.Authenticator;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.net.http.HttpClient;
|
||||
import java.net.http.HttpRequest;
|
||||
import java.net.http.HttpResponse;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
import java.util.stream.Collectors;
|
||||
|
@ -42,13 +47,22 @@ public class WebScraperSearcher extends Searcher
|
|||
protected String resultLinkSelector;
|
||||
protected String resultAlbumArtistSelector;
|
||||
|
||||
protected Map<String,String> searchFields = new HashMap<>();
|
||||
protected String linkFormat;
|
||||
|
||||
protected Parser<?,?> parser;
|
||||
protected Map<String,String> searchFields = new HashMap<>();
|
||||
protected Map<String, String> searchHeaders = new HashMap<>();
|
||||
|
||||
protected Parser<?, ?> parser;
|
||||
|
||||
private final HttpClient client;
|
||||
|
||||
public WebScraperSearcher(String name)
|
||||
{
|
||||
super(name);
|
||||
client = HttpClient.newBuilder()
|
||||
.followRedirects(HttpClient.Redirect.ALWAYS)
|
||||
.authenticator(Authenticator.getDefault())
|
||||
.build();
|
||||
}
|
||||
|
||||
public WebScraperSearcher(YamlMapping yaml) throws MalformedURLException
|
||||
|
@ -70,8 +84,18 @@ public class WebScraperSearcher extends Searcher
|
|||
}
|
||||
|
||||
YamlMapping searchFieldMap = yaml.yamlMapping("search_fields");
|
||||
for (YamlNode key: searchFieldMap.keys())
|
||||
searchFields.put(key.asScalar().value(), searchFieldMap.string(key));
|
||||
if (searchFieldMap != null)
|
||||
{
|
||||
for (YamlNode key : searchFieldMap.keys())
|
||||
searchFields.put(key.asScalar().value(), searchFieldMap.string(key));
|
||||
}
|
||||
|
||||
YamlMapping searchHeaderMap = yaml.yamlMapping("search_headers");
|
||||
if (searchHeaderMap != null)
|
||||
{
|
||||
for (YamlNode key : searchHeaderMap.keys())
|
||||
searchHeaders.put(key.asScalar().value(), searchHeaderMap.string(key));
|
||||
}
|
||||
|
||||
String formatName = yaml.string("format");
|
||||
switch(formatName)
|
||||
|
@ -83,6 +107,13 @@ public class WebScraperSearcher extends Searcher
|
|||
default:
|
||||
throw new IllegalArgumentException("Format " + formatName + " is unknown.");
|
||||
}
|
||||
|
||||
linkFormat = yaml.string("link_format");
|
||||
|
||||
client = HttpClient.newBuilder()
|
||||
.followRedirects(HttpClient.Redirect.ALWAYS)
|
||||
// .authenticator(Authenticator.getDefault())
|
||||
.build();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -91,9 +122,19 @@ public class WebScraperSearcher extends Searcher
|
|||
try
|
||||
{
|
||||
URL url = fillURL(query);
|
||||
InputStream input = url.openStream();
|
||||
return processResults(parser, input);
|
||||
} catch (IOException ex)
|
||||
HttpRequest.Builder builder = HttpRequest.newBuilder(url.toURI()).GET();
|
||||
|
||||
for (Entry<String, String> header : searchHeaders.entrySet())
|
||||
builder = builder.header(header.getKey(), fillHeader(header.getValue()));
|
||||
|
||||
HttpRequest request = builder.build();
|
||||
HttpResponse<InputStream> response = client.send(request, HttpResponse.BodyHandlers.ofInputStream());
|
||||
|
||||
if (response.statusCode() == 200)
|
||||
return processResults(parser, response.body());
|
||||
else
|
||||
throw new RuntimeException("Got status code " + response.statusCode() + new String(response.body().readAllBytes()));
|
||||
} catch (IOException | URISyntaxException | InterruptedException ex)
|
||||
{
|
||||
Logger.getLogger(WebScraperSearcher.class.getName()).log(Level.SEVERE, null, ex);
|
||||
return List.of();
|
||||
|
@ -113,7 +154,7 @@ public class WebScraperSearcher extends Searcher
|
|||
}
|
||||
if (searchFields.containsKey("secret"))
|
||||
{
|
||||
builder.addParameter(searchFields.get("secret"), SecretStore.get().getSecret(name));
|
||||
builder.addParameter(searchFields.get("secret"), getSecret());
|
||||
}
|
||||
return builder.build().toURL();
|
||||
}
|
||||
|
@ -123,6 +164,20 @@ public class WebScraperSearcher extends Searcher
|
|||
}
|
||||
}
|
||||
|
||||
private String getSecret()
|
||||
{
|
||||
return SecretStore.get().getSecret(name);
|
||||
}
|
||||
|
||||
private String fillHeader(String value)
|
||||
{
|
||||
String ret = value;
|
||||
if (ret.contains("$SECRET"))
|
||||
ret = ret.replaceAll("\\$SECRET", getSecret());
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
protected String transformSearchString(String search)
|
||||
{
|
||||
return search;
|
||||
|
@ -151,7 +206,12 @@ public class WebScraperSearcher extends Searcher
|
|||
res.setTitle(parser.getField(ele, resultTitleSelector).trim());
|
||||
// Link
|
||||
if (resultLinkSelector != null)
|
||||
res.setLink(parser.getURLField(ele, rootURL, resultLinkSelector));
|
||||
{
|
||||
if (linkFormat == null)
|
||||
res.setLink(parser.getURLField(ele, rootURL, resultLinkSelector));
|
||||
else
|
||||
res.setLink(new URL(linkFormat.replaceAll("\\$LINK", parser.getField(ele, resultLinkSelector))));
|
||||
}
|
||||
|
||||
// Artist + Album
|
||||
if (resultAlbumArtistSelector != null)
|
||||
|
|
Loading…
Reference in New Issue