diff --git a/quickjs/src/main/java/com/fongmi/quickjs/bean/Info.java b/quickjs/src/main/java/com/fongmi/quickjs/bean/Info.java index 95d9fb4a0..74fd4da18 100644 --- a/quickjs/src/main/java/com/fongmi/quickjs/bean/Info.java +++ b/quickjs/src/main/java/com/fongmi/quickjs/bean/Info.java @@ -29,8 +29,9 @@ public class Info { pos = rules[0]; } try { - index = Integer.parseInt(pos.split("\\(")[1].split("\\)")[0]); + index = Integer.parseInt(pos.replace("eq(", "").replace(")", "")); } catch (Exception ignored) { + index = 0; } } diff --git a/quickjs/src/main/java/com/fongmi/quickjs/method/Global.java b/quickjs/src/main/java/com/fongmi/quickjs/method/Global.java index a7fc38879..b39d47f31 100644 --- a/quickjs/src/main/java/com/fongmi/quickjs/method/Global.java +++ b/quickjs/src/main/java/com/fongmi/quickjs/method/Global.java @@ -2,6 +2,7 @@ package com.fongmi.quickjs.method; import androidx.annotation.Keep; import androidx.annotation.NonNull; +import androidx.media3.common.util.UriUtil; import com.fongmi.quickjs.bean.Req; import com.fongmi.quickjs.utils.Connect; @@ -121,31 +122,31 @@ public class Global { @Keep @JSMethod public String pd(String html, String rule, String urlKey) { - return parser.pdfh(html, rule, urlKey); + return parser.parseDomForUrl(html, rule, urlKey); } @Keep @JSMethod public String pdfh(String html, String rule) { - return parser.pdfh(html, rule, ""); + return parser.parseDomForUrl(html, rule, ""); } @Keep @JSMethod public JSArray pdfa(String html, String rule) { - return JSUtil.toArray(ctx, parser.pdfa(html, rule)); + return JSUtil.toArray(ctx, parser.parseDomForArray(html, rule)); } @Keep @JSMethod public JSArray pdfl(String html, String rule, String texts, String urls, String urlKey) { - return JSUtil.toArray(ctx, parser.pdfl(html, rule, texts, urls, urlKey)); + return JSUtil.toArray(ctx, parser.parseDomForList(html, rule, texts, urls, urlKey)); } @Keep @JSMethod public String joinUrl(String parent, String child) { - return parser.joinUrl(parent, child); + return UriUtil.resolve(parent, child); } @Keep diff --git a/quickjs/src/main/java/com/fongmi/quickjs/utils/Parser.java b/quickjs/src/main/java/com/fongmi/quickjs/utils/Parser.java index 5fc2ac321..1aece436d 100644 --- a/quickjs/src/main/java/com/fongmi/quickjs/utils/Parser.java +++ b/quickjs/src/main/java/com/fongmi/quickjs/utils/Parser.java @@ -20,7 +20,7 @@ import java.util.regex.Pattern; public class Parser { - private final Pattern p1 = Pattern.compile("url\\((.*?)\\)", Pattern.MULTILINE | Pattern.DOTALL); + private final Pattern URL = Pattern.compile("url\\((.*?)\\)", Pattern.MULTILINE | Pattern.DOTALL); private final Pattern NO_ADD = Pattern.compile(":eq|:lt|:gt|:first|:last|:not|:even|:odd|:has|:contains|:matches|:empty|^body$|^#"); private final Pattern JOIN_URL = Pattern.compile("(url|src|href|-original|-src|-play|-url|style)$|^(data-|url-|src-)", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE); private final Pattern SPEC_URL = Pattern.compile("^(ftp|magnet|thunder|ws):", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE); @@ -47,7 +47,9 @@ public class Parser { private String parseHikerToJq(String parse, boolean first) { if (!parse.contains("&&")) { String[] split = parse.split(" "); - return (NO_ADD.matcher(split[split.length - 1]).find() || !first) ? parse : parse + ":eq(0)"; + Matcher m = NO_ADD.matcher(split[split.length - 1]); + if (!m.find() && first) parse = parse + ":eq(0)"; + return parse; } String[] parses = parse.split("&&"); List items = new ArrayList<>(); @@ -63,59 +65,7 @@ public class Parser { return TextUtils.join(" ", items); } - private Elements parseOneRule(Document doc, String parse, Elements elements) { - Info info = getParseInfo(parse); - if (parse.contains(":eq")) { - if (elements.isEmpty()) { - if (info.index < 0) { - Elements r = doc.select(info.rule); - elements = r.eq(r.size() + info.index); - } else { - elements = doc.select(info.rule).eq(info.index); - } - } else { - if (info.index < 0) { - Elements r = elements.select(info.rule); - elements = r.eq(r.size() + info.index); - } else { - elements = elements.select(info.rule).eq(info.index); - } - } - } else { - if (elements.isEmpty()) { - elements = doc.select(parse); - } else { - elements = elements.select(parse); - } - } - if (info.excludes != null && !elements.isEmpty()) { - elements = elements.clone(); - for (String exclude : info.excludes) { - elements.select(exclude).remove(); - } - } - return elements; - } - - public String joinUrl(String parent, String child) { - return UriUtil.resolve(parent, child); - } - - public List pdfa(String html, String rule) { - Document doc = cache.getPdfa(html); - rule = parseHikerToJq(rule, false); - String[] parses = rule.split(" "); - Elements elements = new Elements(); - for (String parse : parses) { - elements = parseOneRule(doc, parse, elements); - if (elements.isEmpty()) return Collections.emptyList(); - } - List items = new ArrayList<>(); - for (Element element : elements) items.add(element.outerHtml()); - return items; - } - - public String pdfh(String html, String rule, String addUrl) { + public String parseDomForUrl(String html, String rule, String addUrl) { Document doc = cache.getPdfh(html); if ("body&&Text".equals(rule) || "Text".equals(rule)) { return doc.text(); @@ -143,23 +93,69 @@ public class Parser { } else if ("Html".equals(option)) { return elements.html(); } else { - String result = elements.attr(option); - if (option.toLowerCase().contains("style") && result.contains("url(")) { - Matcher matcher = p1.matcher(result); - if (matcher.find()) result = matcher.group(1); - if (result != null) result = result.replaceAll("^['|\"](.*)['|\"]$", "$1"); - } - if (!TextUtils.isEmpty(result) && !TextUtils.isEmpty(addUrl)) { - if (JOIN_URL.matcher(option).find() && !SPEC_URL.matcher(result).find()) { - if (result.contains("http")) result = result.substring(result.indexOf("http")); - else result = joinUrl(addUrl, result); + String result = ""; + for (String s : option.split("[||]")) { + result = elements.attr(s); + if (s.toLowerCase().contains("style") && result.contains("url(")) { + Matcher m = URL.matcher(result); + if (m.find()) result = m.group(1); + result = result.replaceAll("^['|\"](.*)['|\"]$", "$1"); + } + if (!result.isEmpty() && !addUrl.isEmpty()) { + if (JOIN_URL.matcher(s).find() && !SPEC_URL.matcher(result).find()) { + if (result.contains("http")) { + result = result.substring(result.indexOf("http")); + } else { + result = UriUtil.resolve(addUrl, result); + } + } + } + if (!result.isEmpty()) { + return result; } } return result; } } - public List pdfl(String html, String rule, String texts, String urls, String urlKey) { + public List parseDomForArray(String html, String rule) { + Document doc = cache.getPdfa(html); + rule = parseHikerToJq(rule, false); + String[] parses = rule.split(" "); + Elements elements = new Elements(); + for (String parse : parses) { + elements = parseOneRule(doc, parse, elements); + if (elements.isEmpty()) return new ArrayList<>(); + } + List items = new ArrayList<>(); + for (Element element : elements) items.add(element.outerHtml()); + return items; + } + + private Elements parseOneRule(Document doc, String parse, Elements elements) { + Info info = getParseInfo(parse); + if (elements.isEmpty()) { + elements = doc.select(info.rule); + } else { + elements = elements.select(info.rule); + } + if (parse.contains(":eq")) { + if (info.index < 0) { + elements = elements.eq(elements.size() + info.index); + } else { + elements = elements.eq(info.index); + } + } + if (info.excludes != null && !elements.isEmpty()) { + elements = elements.clone(); + for (int i = 0; i < info.excludes.size(); i++) { + elements.select(info.excludes.get(i)).remove(); + } + } + return elements; + } + + public List parseDomForList(String html, String rule, String texts, String urls, String urlKey) { String[] parses = parseHikerToJq(rule, false).split(" "); Elements elements = new Elements(); for (String parse : parses) { @@ -169,7 +165,7 @@ public class Parser { List items = new ArrayList<>(); for (Element element : elements) { html = element.outerHtml(); - items.add(pdfh(html, texts, "").trim() + '$' + pdfh(html, urls, urlKey)); + items.add(parseDomForUrl(html, texts, "").trim() + '$' + parseDomForUrl(html, urls, urlKey)); } return items; }