|
|
|
@ -9,13 +9,18 @@ import com.amihaiemil.eoyaml.YamlMapping; |
|
|
|
|
import com.amihaiemil.eoyaml.YamlNode; |
|
|
|
|
import java.io.IOException; |
|
|
|
|
import java.io.InputStream; |
|
|
|
|
import java.net.Authenticator; |
|
|
|
|
import java.net.MalformedURLException; |
|
|
|
|
import java.net.URISyntaxException; |
|
|
|
|
import java.net.URL; |
|
|
|
|
import java.net.http.HttpClient; |
|
|
|
|
import java.net.http.HttpRequest; |
|
|
|
|
import java.net.http.HttpResponse; |
|
|
|
|
import java.util.Collection; |
|
|
|
|
import java.util.HashMap; |
|
|
|
|
import java.util.List; |
|
|
|
|
import java.util.Map; |
|
|
|
|
import java.util.Map.Entry; |
|
|
|
|
import java.util.logging.Level; |
|
|
|
|
import java.util.logging.Logger; |
|
|
|
|
import java.util.stream.Collectors; |
|
|
|
@ -42,13 +47,22 @@ public class WebScraperSearcher extends Searcher |
|
|
|
|
protected String resultLinkSelector; |
|
|
|
|
protected String resultAlbumArtistSelector; |
|
|
|
|
|
|
|
|
|
protected String linkFormat; |
|
|
|
|
|
|
|
|
|
protected Map<String,String> searchFields = new HashMap<>(); |
|
|
|
|
protected Map<String, String> searchHeaders = new HashMap<>(); |
|
|
|
|
|
|
|
|
|
protected Parser<?, ?> parser; |
|
|
|
|
|
|
|
|
|
private final HttpClient client; |
|
|
|
|
|
|
|
|
|
public WebScraperSearcher(String name) |
|
|
|
|
{ |
|
|
|
|
super(name); |
|
|
|
|
client = HttpClient.newBuilder() |
|
|
|
|
.followRedirects(HttpClient.Redirect.ALWAYS) |
|
|
|
|
.authenticator(Authenticator.getDefault()) |
|
|
|
|
.build(); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
public WebScraperSearcher(YamlMapping yaml) throws MalformedURLException |
|
|
|
@ -70,8 +84,18 @@ public class WebScraperSearcher extends Searcher |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
YamlMapping searchFieldMap = yaml.yamlMapping("search_fields"); |
|
|
|
|
if (searchFieldMap != null) |
|
|
|
|
{ |
|
|
|
|
for (YamlNode key : searchFieldMap.keys()) |
|
|
|
|
searchFields.put(key.asScalar().value(), searchFieldMap.string(key)); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
YamlMapping searchHeaderMap = yaml.yamlMapping("search_headers"); |
|
|
|
|
if (searchHeaderMap != null) |
|
|
|
|
{ |
|
|
|
|
for (YamlNode key : searchHeaderMap.keys()) |
|
|
|
|
searchHeaders.put(key.asScalar().value(), searchHeaderMap.string(key)); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
String formatName = yaml.string("format"); |
|
|
|
|
switch(formatName) |
|
|
|
@ -83,6 +107,13 @@ public class WebScraperSearcher extends Searcher |
|
|
|
|
default: |
|
|
|
|
throw new IllegalArgumentException("Format " + formatName + " is unknown."); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
linkFormat = yaml.string("link_format"); |
|
|
|
|
|
|
|
|
|
client = HttpClient.newBuilder() |
|
|
|
|
.followRedirects(HttpClient.Redirect.ALWAYS) |
|
|
|
|
// .authenticator(Authenticator.getDefault())
|
|
|
|
|
.build(); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
@Override |
|
|
|
@ -91,9 +122,19 @@ public class WebScraperSearcher extends Searcher |
|
|
|
|
try |
|
|
|
|
{ |
|
|
|
|
URL url = fillURL(query); |
|
|
|
|
InputStream input = url.openStream(); |
|
|
|
|
return processResults(parser, input); |
|
|
|
|
} catch (IOException ex) |
|
|
|
|
HttpRequest.Builder builder = HttpRequest.newBuilder(url.toURI()).GET(); |
|
|
|
|
|
|
|
|
|
for (Entry<String, String> header : searchHeaders.entrySet()) |
|
|
|
|
builder = builder.header(header.getKey(), fillHeader(header.getValue())); |
|
|
|
|
|
|
|
|
|
HttpRequest request = builder.build(); |
|
|
|
|
HttpResponse<InputStream> response = client.send(request, HttpResponse.BodyHandlers.ofInputStream()); |
|
|
|
|
|
|
|
|
|
if (response.statusCode() == 200) |
|
|
|
|
return processResults(parser, response.body()); |
|
|
|
|
else |
|
|
|
|
throw new RuntimeException("Got status code " + response.statusCode() + new String(response.body().readAllBytes())); |
|
|
|
|
} catch (IOException | URISyntaxException | InterruptedException ex) |
|
|
|
|
{ |
|
|
|
|
Logger.getLogger(WebScraperSearcher.class.getName()).log(Level.SEVERE, null, ex); |
|
|
|
|
return List.of(); |
|
|
|
@ -113,7 +154,7 @@ public class WebScraperSearcher extends Searcher |
|
|
|
|
} |
|
|
|
|
if (searchFields.containsKey("secret")) |
|
|
|
|
{ |
|
|
|
|
builder.addParameter(searchFields.get("secret"), SecretStore.get().getSecret(name)); |
|
|
|
|
builder.addParameter(searchFields.get("secret"), getSecret()); |
|
|
|
|
} |
|
|
|
|
return builder.build().toURL(); |
|
|
|
|
} |
|
|
|
@ -123,6 +164,20 @@ public class WebScraperSearcher extends Searcher |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
private String getSecret() |
|
|
|
|
{ |
|
|
|
|
return SecretStore.get().getSecret(name); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
private String fillHeader(String value) |
|
|
|
|
{ |
|
|
|
|
String ret = value; |
|
|
|
|
if (ret.contains("$SECRET")) |
|
|
|
|
ret = ret.replaceAll("\\$SECRET", getSecret()); |
|
|
|
|
|
|
|
|
|
return ret; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
protected String transformSearchString(String search) |
|
|
|
|
{ |
|
|
|
|
return search; |
|
|
|
@ -151,7 +206,12 @@ public class WebScraperSearcher extends Searcher |
|
|
|
|
res.setTitle(parser.getField(ele, resultTitleSelector).trim()); |
|
|
|
|
// Link
|
|
|
|
|
if (resultLinkSelector != null) |
|
|
|
|
{ |
|
|
|
|
if (linkFormat == null) |
|
|
|
|
res.setLink(parser.getURLField(ele, rootURL, resultLinkSelector)); |
|
|
|
|
else |
|
|
|
|
res.setLink(new URL(linkFormat.replaceAll("\\$LINK", parser.getField(ele, resultLinkSelector)))); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Artist + Album
|
|
|
|
|
if (resultAlbumArtistSelector != null) |
|
|
|
|