generated from Nekojimi/JavaMavenTemplate
Web Searcher: Add header fields and link transformation support.
This commit is contained in:
parent
e6b109125e
commit
3ceab2659b
|
@ -9,13 +9,18 @@ import com.amihaiemil.eoyaml.YamlMapping;
|
||||||
import com.amihaiemil.eoyaml.YamlNode;
|
import com.amihaiemil.eoyaml.YamlNode;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
|
import java.net.Authenticator;
|
||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.net.URISyntaxException;
|
import java.net.URISyntaxException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
|
import java.net.http.HttpClient;
|
||||||
|
import java.net.http.HttpRequest;
|
||||||
|
import java.net.http.HttpResponse;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.Map.Entry;
|
||||||
import java.util.logging.Level;
|
import java.util.logging.Level;
|
||||||
import java.util.logging.Logger;
|
import java.util.logging.Logger;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
@ -42,13 +47,22 @@ public class WebScraperSearcher extends Searcher
|
||||||
protected String resultLinkSelector;
|
protected String resultLinkSelector;
|
||||||
protected String resultAlbumArtistSelector;
|
protected String resultAlbumArtistSelector;
|
||||||
|
|
||||||
protected Map<String,String> searchFields = new HashMap<>();
|
protected String linkFormat;
|
||||||
|
|
||||||
protected Parser<?,?> parser;
|
protected Map<String,String> searchFields = new HashMap<>();
|
||||||
|
protected Map<String, String> searchHeaders = new HashMap<>();
|
||||||
|
|
||||||
|
protected Parser<?, ?> parser;
|
||||||
|
|
||||||
|
private final HttpClient client;
|
||||||
|
|
||||||
public WebScraperSearcher(String name)
|
public WebScraperSearcher(String name)
|
||||||
{
|
{
|
||||||
super(name);
|
super(name);
|
||||||
|
client = HttpClient.newBuilder()
|
||||||
|
.followRedirects(HttpClient.Redirect.ALWAYS)
|
||||||
|
.authenticator(Authenticator.getDefault())
|
||||||
|
.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
public WebScraperSearcher(YamlMapping yaml) throws MalformedURLException
|
public WebScraperSearcher(YamlMapping yaml) throws MalformedURLException
|
||||||
|
@ -70,8 +84,18 @@ public class WebScraperSearcher extends Searcher
|
||||||
}
|
}
|
||||||
|
|
||||||
YamlMapping searchFieldMap = yaml.yamlMapping("search_fields");
|
YamlMapping searchFieldMap = yaml.yamlMapping("search_fields");
|
||||||
for (YamlNode key: searchFieldMap.keys())
|
if (searchFieldMap != null)
|
||||||
searchFields.put(key.asScalar().value(), searchFieldMap.string(key));
|
{
|
||||||
|
for (YamlNode key : searchFieldMap.keys())
|
||||||
|
searchFields.put(key.asScalar().value(), searchFieldMap.string(key));
|
||||||
|
}
|
||||||
|
|
||||||
|
YamlMapping searchHeaderMap = yaml.yamlMapping("search_headers");
|
||||||
|
if (searchHeaderMap != null)
|
||||||
|
{
|
||||||
|
for (YamlNode key : searchHeaderMap.keys())
|
||||||
|
searchHeaders.put(key.asScalar().value(), searchHeaderMap.string(key));
|
||||||
|
}
|
||||||
|
|
||||||
String formatName = yaml.string("format");
|
String formatName = yaml.string("format");
|
||||||
switch(formatName)
|
switch(formatName)
|
||||||
|
@ -83,6 +107,13 @@ public class WebScraperSearcher extends Searcher
|
||||||
default:
|
default:
|
||||||
throw new IllegalArgumentException("Format " + formatName + " is unknown.");
|
throw new IllegalArgumentException("Format " + formatName + " is unknown.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
linkFormat = yaml.string("link_format");
|
||||||
|
|
||||||
|
client = HttpClient.newBuilder()
|
||||||
|
.followRedirects(HttpClient.Redirect.ALWAYS)
|
||||||
|
// .authenticator(Authenticator.getDefault())
|
||||||
|
.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -91,9 +122,19 @@ public class WebScraperSearcher extends Searcher
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
URL url = fillURL(query);
|
URL url = fillURL(query);
|
||||||
InputStream input = url.openStream();
|
HttpRequest.Builder builder = HttpRequest.newBuilder(url.toURI()).GET();
|
||||||
return processResults(parser, input);
|
|
||||||
} catch (IOException ex)
|
for (Entry<String, String> header : searchHeaders.entrySet())
|
||||||
|
builder = builder.header(header.getKey(), fillHeader(header.getValue()));
|
||||||
|
|
||||||
|
HttpRequest request = builder.build();
|
||||||
|
HttpResponse<InputStream> response = client.send(request, HttpResponse.BodyHandlers.ofInputStream());
|
||||||
|
|
||||||
|
if (response.statusCode() == 200)
|
||||||
|
return processResults(parser, response.body());
|
||||||
|
else
|
||||||
|
throw new RuntimeException("Got status code " + response.statusCode() + new String(response.body().readAllBytes()));
|
||||||
|
} catch (IOException | URISyntaxException | InterruptedException ex)
|
||||||
{
|
{
|
||||||
Logger.getLogger(WebScraperSearcher.class.getName()).log(Level.SEVERE, null, ex);
|
Logger.getLogger(WebScraperSearcher.class.getName()).log(Level.SEVERE, null, ex);
|
||||||
return List.of();
|
return List.of();
|
||||||
|
@ -113,7 +154,7 @@ public class WebScraperSearcher extends Searcher
|
||||||
}
|
}
|
||||||
if (searchFields.containsKey("secret"))
|
if (searchFields.containsKey("secret"))
|
||||||
{
|
{
|
||||||
builder.addParameter(searchFields.get("secret"), SecretStore.get().getSecret(name));
|
builder.addParameter(searchFields.get("secret"), getSecret());
|
||||||
}
|
}
|
||||||
return builder.build().toURL();
|
return builder.build().toURL();
|
||||||
}
|
}
|
||||||
|
@ -123,6 +164,20 @@ public class WebScraperSearcher extends Searcher
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private String getSecret()
|
||||||
|
{
|
||||||
|
return SecretStore.get().getSecret(name);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String fillHeader(String value)
|
||||||
|
{
|
||||||
|
String ret = value;
|
||||||
|
if (ret.contains("$SECRET"))
|
||||||
|
ret = ret.replaceAll("\\$SECRET", getSecret());
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
protected String transformSearchString(String search)
|
protected String transformSearchString(String search)
|
||||||
{
|
{
|
||||||
return search;
|
return search;
|
||||||
|
@ -151,7 +206,12 @@ public class WebScraperSearcher extends Searcher
|
||||||
res.setTitle(parser.getField(ele, resultTitleSelector).trim());
|
res.setTitle(parser.getField(ele, resultTitleSelector).trim());
|
||||||
// Link
|
// Link
|
||||||
if (resultLinkSelector != null)
|
if (resultLinkSelector != null)
|
||||||
res.setLink(parser.getURLField(ele, rootURL, resultLinkSelector));
|
{
|
||||||
|
if (linkFormat == null)
|
||||||
|
res.setLink(parser.getURLField(ele, rootURL, resultLinkSelector));
|
||||||
|
else
|
||||||
|
res.setLink(new URL(linkFormat.replaceAll("\\$LINK", parser.getField(ele, resultLinkSelector))));
|
||||||
|
}
|
||||||
|
|
||||||
// Artist + Album
|
// Artist + Album
|
||||||
if (resultAlbumArtistSelector != null)
|
if (resultAlbumArtistSelector != null)
|
||||||
|
|
Loading…
Reference in New Issue