Add Function for drpy

pull/94/head
FongMi 10 months ago
parent 4fb8666f89
commit 6f03a1323d
  1. 6
      app/build.gradle
  2. 1
      app/proguard-rules.pro
  3. 54
      app/src/main/java/com/github/catvod/js/Function.java
  4. 32
      app/src/main/java/com/github/catvod/js/bean/Cache.java
  5. 42
      app/src/main/java/com/github/catvod/js/bean/Info.java
  6. 16
      app/src/main/java/com/github/catvod/js/utils/JSUtil.java
  7. 171
      app/src/main/java/com/github/catvod/js/utils/Parser.java
  8. 231
      app/src/main/java/com/github/catvod/utils/UriUtil.java
  9. BIN
      jar/custom_spider.jar
  10. 2
      jar/custom_spider.jar.md5
  11. 2
      jar/genJar.bat

@ -42,9 +42,9 @@ android {
dependencies {
implementation 'com.squareup.okhttp3:okhttp:' + okhttpVersion
implementation 'com.github.thegrizzlylabs:sardine-android:0.9'
implementation 'wang.harlon.quickjs:wrapper-android:2.4.0'
implementation 'com.google.code.gson:gson:2.11.0'
implementation 'wang.harlon.quickjs:wrapper-android:2.4.3'
implementation 'com.google.code.gson:gson:2.12.1'
implementation 'cn.wanghaomiao:JsoupXpath:2.5.1'
implementation 'com.orhanobut:logger:2.2.0'
implementation 'org.jsoup:jsoup:1.15.3'
implementation 'org.jsoup:jsoup:1.17.2'
}

@ -15,6 +15,7 @@
# Spider
-keep class com.github.catvod.crawler.* { *; }
-keep class com.github.catvod.spider.* { public <methods>; }
-keep class com.github.catvod.js.Function { *; }
# OkHttp
-dontwarn okhttp3.**

@ -0,0 +1,54 @@
package com.github.catvod.js;
import com.github.catvod.js.utils.Parser;
import com.github.catvod.js.utils.JSUtil;
import com.whl.quickjs.wrapper.JSArray;
import com.whl.quickjs.wrapper.JSMethod;
import com.whl.quickjs.wrapper.QuickJSContext;
import java.lang.reflect.Method;
public class Function {
private final QuickJSContext ctx;
private final Parser parser;
public Function(QuickJSContext ctx) {
this.parser = new Parser();
this.ctx = ctx;
setProperty();
}
private void setProperty() {
for (Method method : getClass().getMethods()) {
if (!method.isAnnotationPresent(JSMethod.class)) continue;
ctx.getGlobalObject().setProperty(method.getName(), args -> {
try {
return method.invoke(this, args);
} catch (Exception e) {
return null;
}
});
}
}
@JSMethod
public String pd(String html, String rule, String urlKey) {
return parser.parseDomForUrl(html, rule, urlKey);
}
@JSMethod
public String pdfh(String html, String rule) {
return parser.parseDomForUrl(html, rule, "");
}
@JSMethod
public JSArray pdfa(String html, String rule) {
return JSUtil.toArray(ctx, parser.parseDomForArray(html, rule));
}
@JSMethod
public JSArray pdfl(String html, String rule, String texts, String urls, String urlKey) {
return JSUtil.toArray(ctx, parser.parseDomForList(html, rule, texts, urls, urlKey));
}
}

@ -0,0 +1,32 @@
package com.github.catvod.js.bean;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
public class Cache {
private String pdfhHtml;
private String pdfaHtml;
private Document pdfhDoc;
private Document pdfaDoc;
public Document getPdfh(String html) {
updatePdfh(html);
return pdfhDoc;
}
public Document getPdfa(String html) {
updatePdfa(html);
return pdfaDoc;
}
private void updatePdfh(String html) {
if (html.equals(pdfhHtml)) return;
pdfhDoc = Jsoup.parse(pdfhHtml = html);
}
private void updatePdfa(String html) {
if (html.equals(pdfaHtml)) return;
pdfaDoc = Jsoup.parse(pdfaHtml = html);
}
}

@ -0,0 +1,42 @@
package com.github.catvod.js.bean;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class Info {
public int index;
public String rule;
public List<String> excludes;
public Info(String rule) {
this.rule = rule;
}
public void setRule(String rule) {
this.rule = rule;
}
public void setInfo(String pos) {
if (rule.contains("--")) {
String[] rules = rule.split("--");
setExcludes(rules);
setRule(rules[0]);
} else if (pos.contains("--")) {
String[] rules = pos.split("--");
setExcludes(rules);
pos = rules[0];
}
try {
index = Integer.parseInt(pos.replace("eq(", "").replace(")", ""));
} catch (Exception ignored) {
index = 0;
}
}
public void setExcludes(String[] rules) {
excludes = new ArrayList<>(Arrays.asList(rules));
excludes.remove(0);
}
}

@ -0,0 +1,16 @@
package com.github.catvod.js.utils;
import com.whl.quickjs.wrapper.JSArray;
import com.whl.quickjs.wrapper.QuickJSContext;
import java.util.List;
public class JSUtil {
public static JSArray toArray(QuickJSContext ctx, List<String> items) {
JSArray array = ctx.createNewJSArray();
if (items == null || items.isEmpty()) return array;
for (int i = 0; i < items.size(); i++) array.set(items.get(i), i);
return array;
}
}

@ -0,0 +1,171 @@
package com.github.catvod.js.utils;
import android.text.TextUtils;
import com.github.catvod.js.bean.Cache;
import com.github.catvod.js.bean.Info;
import com.github.catvod.utils.UriUtil;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Parser {
private final Pattern URL = Pattern.compile("url\\((.*?)\\)", Pattern.MULTILINE | Pattern.DOTALL);
private final Pattern NO_ADD = Pattern.compile(":eq|:lt|:gt|:first|:last|:not|:even|:odd|:has|:contains|:matches|:empty|^body$|^#");
private final Pattern JOIN_URL = Pattern.compile("(url|src|href|-original|-src|-play|-url|style)$|^(data-|url-|src-)", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE);
private final Pattern SPEC_URL = Pattern.compile("^(ftp|magnet|thunder|ws):", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE);
private final Cache cache;
public Parser() {
cache = new Cache();
}
private Info getParseInfo(String rule) {
Info info = new Info(rule);
if (rule.contains(":eq")) {
info.setRule(rule.split(":")[0]);
info.setInfo(rule.split(":")[1]);
} else if (rule.contains("--")) {
String[] rules = rule.split("--");
info.setExcludes(rules);
info.setRule(rules[0]);
}
return info;
}
private String parseHikerToJq(String parse, boolean first) {
if (!parse.contains("&&")) {
String[] split = parse.split(" ");
Matcher m = NO_ADD.matcher(split[split.length - 1]);
if (!m.find() && first) parse = parse + ":eq(0)";
return parse;
}
String[] parses = parse.split("&&");
List<String> items = new ArrayList<>();
for (int i = 0; i < parses.length; i++) {
String[] split = parses[i].split(" ");
if (NO_ADD.matcher(split[split.length - 1]).find()) {
items.add(parses[i]);
} else {
if (!first && i >= parses.length - 1) items.add(parses[i]);
else items.add(parses[i] + ":eq(0)");
}
}
return TextUtils.join(" ", items);
}
public String parseDomForUrl(String html, String rule, String addUrl) {
Document doc = cache.getPdfh(html);
if ("body&&Text".equals(rule) || "Text".equals(rule)) {
return doc.text();
} else if ("body&&Html".equals(rule) || "Html".equals(rule)) {
return doc.html();
}
String option = "";
if (rule.contains("&&")) {
String[] rs = rule.split("&&");
option = rs[rs.length - 1];
List<String> excludes = new ArrayList<>(Arrays.asList(rs));
excludes.remove(rs.length - 1);
rule = TextUtils.join("&&", excludes);
}
rule = parseHikerToJq(rule, true);
String[] parses = rule.split(" ");
Elements elements = new Elements();
for (String parse : parses) {
elements = parseOneRule(doc, parse, elements);
if (elements.isEmpty()) return "";
}
if (TextUtils.isEmpty(option)) return elements.outerHtml();
if ("Text".equals(option)) {
return elements.text();
} else if ("Html".equals(option)) {
return elements.html();
} else {
String result = "";
for (String s : option.split("[||]")) {
result = elements.attr(s);
if (s.toLowerCase().contains("style") && result.contains("url(")) {
Matcher m = URL.matcher(result);
if (m.find()) result = m.group(1);
result = result.replaceAll("^['|\"](.*)['|\"]$", "$1");
}
if (!result.isEmpty() && !addUrl.isEmpty()) {
if (JOIN_URL.matcher(s).find() && !SPEC_URL.matcher(result).find()) {
if (result.contains("http")) {
result = result.substring(result.indexOf("http"));
} else {
result = UriUtil.resolve(addUrl, result);
}
}
}
if (!result.isEmpty()) {
return result;
}
}
return result;
}
}
public List<String> parseDomForArray(String html, String rule) {
Document doc = cache.getPdfa(html);
rule = parseHikerToJq(rule, false);
String[] parses = rule.split(" ");
Elements elements = new Elements();
for (String parse : parses) {
elements = parseOneRule(doc, parse, elements);
if (elements.isEmpty()) return new ArrayList<>();
}
List<String> items = new ArrayList<>();
for (Element element : elements) items.add(element.outerHtml());
return items;
}
private Elements parseOneRule(Document doc, String parse, Elements elements) {
Info info = getParseInfo(parse);
if (elements.isEmpty()) {
elements = doc.select(info.rule);
} else {
elements = elements.select(info.rule);
}
if (parse.contains(":eq")) {
if (info.index < 0) {
elements = elements.eq(elements.size() + info.index);
} else {
elements = elements.eq(info.index);
}
}
if (info.excludes != null && !elements.isEmpty()) {
elements = elements.clone();
for (int i = 0; i < info.excludes.size(); i++) {
elements.select(info.excludes.get(i)).remove();
}
}
return elements;
}
public List<String> parseDomForList(String html, String rule, String texts, String urls, String urlKey) {
String[] parses = parseHikerToJq(rule, false).split(" ");
Elements elements = new Elements();
for (String parse : parses) {
elements = parseOneRule(cache.getPdfa(html), parse, elements);
if (elements.isEmpty()) return Collections.emptyList();
}
List<String> items = new ArrayList<>();
for (Element element : elements) {
html = element.outerHtml();
items.add(parseDomForUrl(html, texts, "").trim() + '$' + parseDomForUrl(html, urls, urlKey));
}
return items;
}
}

@ -0,0 +1,231 @@
package com.github.catvod.utils;
import android.text.TextUtils;
import androidx.annotation.Nullable;
/**
* Utility methods for manipulating URIs.
*/
public final class UriUtil {
/**
* The length of arrays returned by {@link #getUriIndices(String)}.
*/
private static final int INDEX_COUNT = 4;
/**
* An index into an array returned by {@link #getUriIndices(String)}.
*
* <p>The value at this position in the array is the index of the ':' after the scheme. Equals -1
* if the URI is a relative reference (no scheme). The hier-part starts at (schemeColon + 1),
* including when the URI has no scheme.
*/
private static final int SCHEME_COLON = 0;
/**
* An index into an array returned by {@link #getUriIndices(String)}.
*
* <p>The value at this position in the array is the index of the path part. Equals (schemeColon +
* 1) if no authority part, (schemeColon + 3) if the authority part consists of just "//", and
* (query) if no path part. The characters starting at this index can be "//" only if the
* authority part is non-empty (in this case the double-slash means the first segment is empty).
*/
private static final int PATH = 1;
/**
* An index into an array returned by {@link #getUriIndices(String)}.
*
* <p>The value at this position in the array is the index of the query part, including the '?'
* before the query. Equals fragment if no query part, and (fragment - 1) if the query part is a
* single '?' with no data.
*/
private static final int QUERY = 2;
/**
* An index into an array returned by {@link #getUriIndices(String)}.
*
* <p>The value at this position in the array is the index of the fragment part, including the '#'
* before the fragment. Equal to the length of the URI if no fragment part, and (length - 1) if
* the fragment part is a single '#' with no data.
*/
private static final int FRAGMENT = 3;
/**
* Performs relative resolution of a {@code referenceUri} with respect to a {@code baseUri}.
*
* <p>The resolution is performed as specified by RFC-3986.
*
* @param baseUri The base URI.
* @param referenceUri The reference URI to resolve.
*/
public static String resolve(@Nullable String baseUri, @Nullable String referenceUri) {
StringBuilder uri = new StringBuilder();
// Map null onto empty string, to make the following logic simpler.
baseUri = baseUri == null ? "" : baseUri;
referenceUri = referenceUri == null ? "" : referenceUri;
int[] refIndices = getUriIndices(referenceUri);
if (refIndices[SCHEME_COLON] != -1) {
// The reference is absolute. The target Uri is the reference.
uri.append(referenceUri);
removeDotSegments(uri, refIndices[PATH], refIndices[QUERY]);
return uri.toString();
}
int[] baseIndices = getUriIndices(baseUri);
if (refIndices[FRAGMENT] == 0) {
// The reference is empty or contains just the fragment part, then the target Uri is the
// concatenation of the base Uri without its fragment, and the reference.
return uri.append(baseUri, 0, baseIndices[FRAGMENT]).append(referenceUri).toString();
}
if (refIndices[QUERY] == 0) {
// The reference starts with the query part. The target is the base up to (but excluding) the
// query, plus the reference.
return uri.append(baseUri, 0, baseIndices[QUERY]).append(referenceUri).toString();
}
if (refIndices[PATH] != 0) {
// The reference has authority. The target is the base scheme plus the reference.
int baseLimit = baseIndices[SCHEME_COLON] + 1;
uri.append(baseUri, 0, baseLimit).append(referenceUri);
return removeDotSegments(uri, baseLimit + refIndices[PATH], baseLimit + refIndices[QUERY]);
}
if (referenceUri.charAt(refIndices[PATH]) == '/') {
// The reference path is rooted. The target is the base scheme and authority (if any), plus
// the reference.
uri.append(baseUri, 0, baseIndices[PATH]).append(referenceUri);
return removeDotSegments(uri, baseIndices[PATH], baseIndices[PATH] + refIndices[QUERY]);
}
// The target Uri is the concatenation of the base Uri up to (but excluding) the last segment,
// and the reference. This can be split into 2 cases:
if (baseIndices[SCHEME_COLON] + 2 < baseIndices[PATH] && baseIndices[PATH] == baseIndices[QUERY]) {
// Case 1: The base hier-part is just the authority, with an empty path. An additional '/' is
// needed after the authority, before appending the reference.
uri.append(baseUri, 0, baseIndices[PATH]).append('/').append(referenceUri);
return removeDotSegments(uri, baseIndices[PATH], baseIndices[PATH] + refIndices[QUERY] + 1);
} else {
// Case 2: Otherwise, find the last '/' in the base hier-part and append the reference after
// it. If base hier-part has no '/', it could only mean that it is completely empty or
// contains only one segment, in which case the whole hier-part is excluded and the reference
// is appended right after the base scheme colon without an added '/'.
int lastSlashIndex = baseUri.lastIndexOf('/', baseIndices[QUERY] - 1);
int baseLimit = lastSlashIndex == -1 ? baseIndices[PATH] : lastSlashIndex + 1;
uri.append(baseUri, 0, baseLimit).append(referenceUri);
return removeDotSegments(uri, baseIndices[PATH], baseLimit + refIndices[QUERY]);
}
}
/**
* Removes dot segments from the path of a URI.
*
* @param uri A {@link StringBuilder} containing the URI.
* @param offset The index of the start of the path in {@code uri}.
* @param limit The limit (exclusive) of the path in {@code uri}.
*/
private static String removeDotSegments(StringBuilder uri, int offset, int limit) {
if (offset >= limit) {
// Nothing to do.
return uri.toString();
}
if (uri.charAt(offset) == '/') {
// If the path starts with a /, always retain it.
offset++;
}
// The first character of the current path segment.
int segmentStart = offset;
int i = offset;
while (i <= limit) {
int nextSegmentStart;
if (i == limit) {
nextSegmentStart = i;
} else if (uri.charAt(i) == '/') {
nextSegmentStart = i + 1;
} else {
i++;
continue;
}
// We've encountered the end of a segment or the end of the path. If the final segment was
// "." or "..", remove the appropriate segments of the path.
if (i == segmentStart + 1 && uri.charAt(segmentStart) == '.') {
// Given "abc/def/./ghi", remove "./" to get "abc/def/ghi".
uri.delete(segmentStart, nextSegmentStart);
limit -= nextSegmentStart - segmentStart;
i = segmentStart;
} else if (i == segmentStart + 2 && uri.charAt(segmentStart) == '.' && uri.charAt(segmentStart + 1) == '.') {
// Given "abc/def/../ghi", remove "def/../" to get "abc/ghi".
int prevSegmentStart = uri.lastIndexOf("/", segmentStart - 2) + 1;
int removeFrom = Math.max(prevSegmentStart, offset);
uri.delete(removeFrom, nextSegmentStart);
limit -= nextSegmentStart - removeFrom;
segmentStart = prevSegmentStart;
i = prevSegmentStart;
} else {
i++;
segmentStart = i;
}
}
return uri.toString();
}
/**
* Calculates indices of the constituent components of a URI.
*
* @param uriString The URI as a string.
* @return The corresponding indices.
*/
private static int[] getUriIndices(String uriString) {
int[] indices = new int[INDEX_COUNT];
if (TextUtils.isEmpty(uriString)) {
indices[SCHEME_COLON] = -1;
return indices;
}
// Determine outer structure from right to left.
// Uri = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
int length = uriString.length();
int fragmentIndex = uriString.indexOf('#');
if (fragmentIndex == -1) {
fragmentIndex = length;
}
int queryIndex = uriString.indexOf('?');
if (queryIndex == -1 || queryIndex > fragmentIndex) {
// '#' before '?': '?' is within the fragment.
queryIndex = fragmentIndex;
}
// Slashes are allowed only in hier-part so any colon after the first slash is part of the
// hier-part, not the scheme colon separator.
int schemeIndexLimit = uriString.indexOf('/');
if (schemeIndexLimit == -1 || schemeIndexLimit > queryIndex) {
schemeIndexLimit = queryIndex;
}
int schemeIndex = uriString.indexOf(':');
if (schemeIndex > schemeIndexLimit) {
// '/' before ':'
schemeIndex = -1;
}
// Determine hier-part structure: hier-part = "//" authority path / path
// This block can also cope with schemeIndex == -1.
boolean hasAuthority = schemeIndex + 2 < queryIndex && uriString.charAt(schemeIndex + 1) == '/' && uriString.charAt(schemeIndex + 2) == '/';
int pathIndex;
if (hasAuthority) {
pathIndex = uriString.indexOf('/', schemeIndex + 3); // find first '/' after "://"
if (pathIndex == -1 || pathIndex > queryIndex) {
pathIndex = queryIndex;
}
} else {
pathIndex = schemeIndex + 1;
}
indices[SCHEME_COLON] = schemeIndex;
indices[PATH] = pathIndex;
indices[QUERY] = queryIndex;
indices[FRAGMENT] = fragmentIndex;
return indices;
}
}

Binary file not shown.

@ -1 +1 @@
6fc8fb3791e3d877fa06ed91c23a77f4
41d830d74afd5a31464b2f6678cd3f2e

@ -6,12 +6,14 @@ rd /s/q "%~dp0\Smali_classes"
java -jar "%~dp0\3rd\apktool_2.11.0.jar" d -f --only-main-classes "%~dp0\..\app\build\outputs\apk\release\app-release-unsigned.apk" -o "%~dp0\Smali_classes"
rd /s/q "%~dp0\spider.jar\smali\com\github\catvod\spider"
rd /s/q "%~dp0\spider.jar\smali\com\github\catvod\js"
rd /s/q "%~dp0\spider.jar\smali\org\slf4j\"
if not exist "%~dp0\spider.jar\smali\com\github\catvod\" md "%~dp0\spider.jar\smali\com\github\catvod\"
if not exist "%~dp0\spider.jar\smali\org\slf4j\" md "%~dp0\spider.jar\smali\org\slf4j\"
move "%~dp0\Smali_classes\smali\com\github\catvod\spider" "%~dp0\spider.jar\smali\com\github\catvod\"
move "%~dp0\Smali_classes\smali\com\github\catvod\js" "%~dp0\spider.jar\smali\com\github\catvod\"
move "%~dp0\Smali_classes\smali\org\slf4j" "%~dp0\spider.jar\smali\org\slf4j\"
java -jar "%~dp0\3rd\apktool_2.11.0.jar" b "%~dp0\spider.jar" -c

Loading…
Cancel
Save