pull/585/head
FongMi 1 year ago
parent 622f19a23a
commit ad8fc1e527
  1. 1
      quickjs/src/main/assets/js/lib/cheerio.min.js
  2. 6191
      quickjs/src/main/assets/js/lib/crypto-js.js
  3. 65
      quickjs/src/main/assets/js/lib/gbk.js
  4. 1
      quickjs/src/main/assets/js/lib/similarity.js
  5. 32
      quickjs/src/main/java/com/fongmi/quickjs/bean/Cache.java
  6. 42
      quickjs/src/main/java/com/fongmi/quickjs/bean/Info.java
  7. 4
      quickjs/src/main/java/com/fongmi/quickjs/crawler/Spider.java
  8. 40
      quickjs/src/main/java/com/fongmi/quickjs/method/Global.java
  9. 2
      quickjs/src/main/java/com/fongmi/quickjs/utils/JSUtil.java
  10. 171
      quickjs/src/main/java/com/fongmi/quickjs/utils/Parser.java

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

@ -1 +0,0 @@
function compareTwoStrings(first,second){if((first=first.replace(/\s+/g,""))===(second=second.replace(/\s+/g,"")))return 1;if(first.length<2||second.length<2)return 0;var firstBigrams=new Map;for(let i=0;i<first.length-1;i++){var bigram=first.substring(i,i+2),count=firstBigrams.has(bigram)?firstBigrams.get(bigram)+1:1;firstBigrams.set(bigram,count)}let intersectionSize=0;for(let i=0;i<second.length-1;i++){const bigram=second.substring(i,i+2),count=firstBigrams.has(bigram)?firstBigrams.get(bigram):0;0<count&&(firstBigrams.set(bigram,count-1),intersectionSize++)}return 2*intersectionSize/(first.length+second.length-2)}function findBestMatch(mainString,targetStrings){var ratings=[];let bestMatchIndex=0;for(let i=0;i<targetStrings.length;i++){var currentTargetString=targetStrings[i],currentRating=compareTwoStrings(mainString,currentTargetString);ratings.push({target:currentTargetString,rating:currentRating}),currentRating>ratings[bestMatchIndex].rating&&(bestMatchIndex=i)}return{ratings:ratings,bestMatch:ratings[bestMatchIndex],bestMatchIndex:bestMatchIndex}}function lcs(str1,str2){if(!str1||!str2)return{length:0,sequence:"",offset:0};for(var sequence="",str1Length=str1.length,str2Length=str2.length,num=new Array(str1Length),maxlen=0,lastSubsBegin=0,i=0;i<str1Length;i++){for(var subArray=new Array(str2Length),j=0;j<str2Length;j++)subArray[j]=0;num[i]=subArray}for(var thisSubsBegin=null,i=0;i<str1Length;i++)for(j=0;j<str2Length;j++)str1[i]!==str2[j]?num[i][j]=0:(num[i][j]=0===i||0===j?1:1+num[i-1][j-1],num[i][j]>maxlen&&(maxlen=num[i][j],lastSubsBegin===(thisSubsBegin=i-num[i][j]+1)?sequence+=str1[i]:(lastSubsBegin=thisSubsBegin,sequence="",sequence+=str1.substr(lastSubsBegin,i+1-lastSubsBegin))));return{length:maxlen,sequence:sequence,offset:thisSubsBegin}}function findBestLCS(mainString,targetStrings){var results=[];let bestMatchIndex=0;for(let i=0;i<targetStrings.length;i++){var currentTargetString=targetStrings[i],currentLCS=lcs(mainString,currentTargetString);results.push({target:currentTargetString,lcs:currentLCS}),currentLCS.length>results[bestMatchIndex].lcs.length&&(bestMatchIndex=i)}return{allLCS:results,bestMatch:results[bestMatchIndex],bestMatchIndex:bestMatchIndex}}export{compareTwoStrings,findBestMatch,findBestLCS};

@ -1,32 +0,0 @@
package com.fongmi.quickjs.bean;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
public class Cache {
public String pdfhHtml;
public String pdfaHtml;
public Document pdfhDoc;
public Document pdfaDoc;
public Document getPdfh(String html) {
updatePdfh(html);
return pdfhDoc;
}
public Document getPdfa(String html) {
updatePdfa(html);
return pdfaDoc;
}
private void updatePdfh(String html) {
if (html.equals(pdfhHtml)) return;
pdfhDoc = Jsoup.parse(pdfhHtml = html);
}
private void updatePdfa(String html) {
if (html.equals(pdfaHtml)) return;
pdfaDoc = Jsoup.parse(pdfaHtml = html);
}
}

@ -1,42 +0,0 @@
package com.fongmi.quickjs.bean;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class Info {
public int index;
public String rule;
public List<String> excludes;
public Info(String rule) {
this.rule = rule;
}
public void setRule(String rule) {
this.rule = rule;
}
public void setInfo(String pos) {
if (rule.contains("--")) {
String[] rules = rule.split("--");
setExcludes(rules);
setRule(rules[0]);
} else if (pos.contains("--")) {
String[] rules = pos.split("--");
setExcludes(rules);
pos = rules[0];
}
try {
index = Integer.parseInt(pos.replace("eq(", "").replace(")", ""));
} catch (Exception ignored) {
index = 0;
}
}
public void setExcludes(String[] rules) {
excludes = new ArrayList<>(Arrays.asList(rules));
excludes.remove(0);
}
}

@ -78,7 +78,7 @@ public class Spider extends com.github.catvod.crawler.Spider {
@Override
public String categoryContent(String tid, String pg, boolean filter, HashMap<String, String> extend) throws Exception {
JSObject obj = submit(() -> JSUtil.toObj(ctx, extend)).get();
JSObject obj = submit(() -> JSUtil.toObject(ctx, extend)).get();
return (String) call("category", tid, pg, filter, obj);
}
@ -190,7 +190,7 @@ public class Spider extends com.github.catvod.crawler.Spider {
}
private Object[] proxy1(Map<String, String> params) throws Exception {
JSObject object = JSUtil.toObj(ctx, params);
JSObject object = JSUtil.toObject(ctx, params);
JSONArray array = new JSONArray(((JSArray) jsObject.getJSFunction("proxy").call(object)).stringify());
Map<String, String> headers = array.length() > 3 ? Json.toMap(array.optString(3)) : null;
boolean base64 = array.length() > 4 && array.optInt(4) == 1;

@ -6,13 +6,10 @@ import androidx.annotation.NonNull;
import com.fongmi.quickjs.bean.Req;
import com.fongmi.quickjs.utils.Connect;
import com.fongmi.quickjs.utils.Crypto;
import com.fongmi.quickjs.utils.JSUtil;
import com.fongmi.quickjs.utils.Parser;
import com.github.catvod.Proxy;
import com.github.catvod.utils.Trans;
import com.github.catvod.utils.UriUtil;
import com.orhanobut.logger.Logger;
import com.whl.quickjs.wrapper.JSArray;
import com.whl.quickjs.wrapper.JSFunction;
import com.whl.quickjs.wrapper.JSMethod;
import com.whl.quickjs.wrapper.JSObject;
@ -21,9 +18,6 @@ import com.whl.quickjs.wrapper.QuickJSContext;
import java.io.IOException;
import java.lang.reflect.Method;
import java.net.URLEncoder;
import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.util.Timer;
import java.util.TimerTask;
import java.util.concurrent.ExecutorService;
@ -36,7 +30,6 @@ public class Global {
private final ExecutorService executor;
private final QuickJSContext ctx;
private final Parser parser;
private final Timer timer;
public static Global create(QuickJSContext ctx, ExecutorService executor) {
@ -44,7 +37,6 @@ public class Global {
}
private Global(QuickJSContext ctx, ExecutorService executor) {
this.parser = new Parser();
this.executor = executor;
this.timer = new Timer();
this.ctx = ctx;
@ -121,44 +113,12 @@ public class Global {
}
}
@Keep
@JSMethod
public String pd(String html, String rule, String urlKey) {
return parser.parseDomForUrl(html, rule, urlKey);
}
@Keep
@JSMethod
public String pdfh(String html, String rule) {
return parser.parseDomForUrl(html, rule, "");
}
@Keep
@JSMethod
public JSArray pdfa(String html, String rule) {
return JSUtil.toArray(ctx, parser.parseDomForArray(html, rule));
}
@Keep
@JSMethod
public JSArray pdfl(String html, String rule, String texts, String urls, String urlKey) {
return JSUtil.toArray(ctx, parser.parseDomForList(html, rule, texts, urls, urlKey));
}
@Keep
@JSMethod
public String joinUrl(String parent, String child) {
return UriUtil.resolve(parent, child);
}
@Keep
@JSMethod
public String gbkDecode(byte[] buffer) throws CharacterCodingException {
String result = Charset.forName("GB2312").newDecoder().decode(ByteBuffer.wrap(buffer)).toString();
Logger.t("gbkDecode").d("text:%s\nresult:\n%s", buffer, result);
return result;
}
@Keep
@JSMethod
public String md5X(String text) {

@ -16,7 +16,7 @@ public class JSUtil {
return array;
}
public static JSObject toObj(QuickJSContext ctx, Map<String, String> map) {
public static JSObject toObject(QuickJSContext ctx, Map<String, String> map) {
JSObject obj = ctx.createNewJSObject();
if (map == null || map.isEmpty()) return obj;
for (String s : map.keySet()) obj.setProperty(s, map.get(s));

@ -1,171 +0,0 @@
package com.fongmi.quickjs.utils;
import android.text.TextUtils;
import com.fongmi.quickjs.bean.Cache;
import com.fongmi.quickjs.bean.Info;
import com.github.catvod.utils.UriUtil;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Parser {
private final Pattern URL = Pattern.compile("url\\((.*?)\\)", Pattern.MULTILINE | Pattern.DOTALL);
private final Pattern NO_ADD = Pattern.compile(":eq|:lt|:gt|:first|:last|:not|:even|:odd|:has|:contains|:matches|:empty|^body$|^#");
private final Pattern JOIN_URL = Pattern.compile("(url|src|href|-original|-src|-play|-url|style)$|^(data-|url-|src-)", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE);
private final Pattern SPEC_URL = Pattern.compile("^(ftp|magnet|thunder|ws):", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE);
private final Cache cache;
public Parser() {
cache = new Cache();
}
private Info getParseInfo(String rule) {
Info info = new Info(rule);
if (rule.contains(":eq")) {
info.setRule(rule.split(":")[0]);
info.setInfo(rule.split(":")[1]);
} else if (rule.contains("--")) {
String[] rules = rule.split("--");
info.setExcludes(rules);
info.setRule(rules[0]);
}
return info;
}
private String parseHikerToJq(String parse, boolean first) {
if (!parse.contains("&&")) {
String[] split = parse.split(" ");
Matcher m = NO_ADD.matcher(split[split.length - 1]);
if (!m.find() && first) parse = parse + ":eq(0)";
return parse;
}
String[] parses = parse.split("&&");
List<String> items = new ArrayList<>();
for (int i = 0; i < parses.length; i++) {
String[] split = parses[i].split(" ");
if (NO_ADD.matcher(split[split.length - 1]).find()) {
items.add(parses[i]);
} else {
if (!first && i >= parses.length - 1) items.add(parses[i]);
else items.add(parses[i] + ":eq(0)");
}
}
return TextUtils.join(" ", items);
}
public String parseDomForUrl(String html, String rule, String addUrl) {
Document doc = cache.getPdfh(html);
if ("body&&Text".equals(rule) || "Text".equals(rule)) {
return doc.text();
} else if ("body&&Html".equals(rule) || "Html".equals(rule)) {
return doc.html();
}
String option = "";
if (rule.contains("&&")) {
String[] rs = rule.split("&&");
option = rs[rs.length - 1];
List<String> excludes = new ArrayList<>(Arrays.asList(rs));
excludes.remove(rs.length - 1);
rule = TextUtils.join("&&", excludes);
}
rule = parseHikerToJq(rule, true);
String[] parses = rule.split(" ");
Elements elements = new Elements();
for (String parse : parses) {
elements = parseOneRule(doc, parse, elements);
if (elements.isEmpty()) return "";
}
if (TextUtils.isEmpty(option)) return elements.outerHtml();
if ("Text".equals(option)) {
return elements.text();
} else if ("Html".equals(option)) {
return elements.html();
} else {
String result = "";
for (String s : option.split("[||]")) {
result = elements.attr(s);
if (s.toLowerCase().contains("style") && result.contains("url(")) {
Matcher m = URL.matcher(result);
if (m.find()) result = m.group(1);
result = result.replaceAll("^['|\"](.*)['|\"]$", "$1");
}
if (!result.isEmpty() && !addUrl.isEmpty()) {
if (JOIN_URL.matcher(s).find() && !SPEC_URL.matcher(result).find()) {
if (result.contains("http")) {
result = result.substring(result.indexOf("http"));
} else {
result = UriUtil.resolve(addUrl, result);
}
}
}
if (!result.isEmpty()) {
return result;
}
}
return result;
}
}
public List<String> parseDomForArray(String html, String rule) {
Document doc = cache.getPdfa(html);
rule = parseHikerToJq(rule, false);
String[] parses = rule.split(" ");
Elements elements = new Elements();
for (String parse : parses) {
elements = parseOneRule(doc, parse, elements);
if (elements.isEmpty()) return new ArrayList<>();
}
List<String> items = new ArrayList<>();
for (Element element : elements) items.add(element.outerHtml());
return items;
}
private Elements parseOneRule(Document doc, String parse, Elements elements) {
Info info = getParseInfo(parse);
if (elements.isEmpty()) {
elements = doc.select(info.rule);
} else {
elements = elements.select(info.rule);
}
if (parse.contains(":eq")) {
if (info.index < 0) {
elements = elements.eq(elements.size() + info.index);
} else {
elements = elements.eq(info.index);
}
}
if (info.excludes != null && !elements.isEmpty()) {
elements = elements.clone();
for (int i = 0; i < info.excludes.size(); i++) {
elements.select(info.excludes.get(i)).remove();
}
}
return elements;
}
public List<String> parseDomForList(String html, String rule, String texts, String urls, String urlKey) {
String[] parses = parseHikerToJq(rule, false).split(" ");
Elements elements = new Elements();
for (String parse : parses) {
elements = parseOneRule(cache.getPdfa(html), parse, elements);
if (elements.isEmpty()) return Collections.emptyList();
}
List<String> items = new ArrayList<>();
for (Element element : elements) {
html = element.outerHtml();
items.add(parseDomForUrl(html, texts, "").trim() + '$' + parseDomForUrl(html, urls, urlKey));
}
return items;
}
}
Loading…
Cancel
Save