mirror of https://github.com/FongMi/TV.git
parent
622f19a23a
commit
ad8fc1e527
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
@ -1 +0,0 @@ |
||||
function compareTwoStrings(first,second){if((first=first.replace(/\s+/g,""))===(second=second.replace(/\s+/g,"")))return 1;if(first.length<2||second.length<2)return 0;var firstBigrams=new Map;for(let i=0;i<first.length-1;i++){var bigram=first.substring(i,i+2),count=firstBigrams.has(bigram)?firstBigrams.get(bigram)+1:1;firstBigrams.set(bigram,count)}let intersectionSize=0;for(let i=0;i<second.length-1;i++){const bigram=second.substring(i,i+2),count=firstBigrams.has(bigram)?firstBigrams.get(bigram):0;0<count&&(firstBigrams.set(bigram,count-1),intersectionSize++)}return 2*intersectionSize/(first.length+second.length-2)}function findBestMatch(mainString,targetStrings){var ratings=[];let bestMatchIndex=0;for(let i=0;i<targetStrings.length;i++){var currentTargetString=targetStrings[i],currentRating=compareTwoStrings(mainString,currentTargetString);ratings.push({target:currentTargetString,rating:currentRating}),currentRating>ratings[bestMatchIndex].rating&&(bestMatchIndex=i)}return{ratings:ratings,bestMatch:ratings[bestMatchIndex],bestMatchIndex:bestMatchIndex}}function lcs(str1,str2){if(!str1||!str2)return{length:0,sequence:"",offset:0};for(var sequence="",str1Length=str1.length,str2Length=str2.length,num=new Array(str1Length),maxlen=0,lastSubsBegin=0,i=0;i<str1Length;i++){for(var subArray=new Array(str2Length),j=0;j<str2Length;j++)subArray[j]=0;num[i]=subArray}for(var thisSubsBegin=null,i=0;i<str1Length;i++)for(j=0;j<str2Length;j++)str1[i]!==str2[j]?num[i][j]=0:(num[i][j]=0===i||0===j?1:1+num[i-1][j-1],num[i][j]>maxlen&&(maxlen=num[i][j],lastSubsBegin===(thisSubsBegin=i-num[i][j]+1)?sequence+=str1[i]:(lastSubsBegin=thisSubsBegin,sequence="",sequence+=str1.substr(lastSubsBegin,i+1-lastSubsBegin))));return{length:maxlen,sequence:sequence,offset:thisSubsBegin}}function findBestLCS(mainString,targetStrings){var results=[];let bestMatchIndex=0;for(let i=0;i<targetStrings.length;i++){var currentTargetString=targetStrings[i],currentLCS=lcs(mainString,currentTargetString);results.push({target:currentTargetString,lcs:currentLCS}),currentLCS.length>results[bestMatchIndex].lcs.length&&(bestMatchIndex=i)}return{allLCS:results,bestMatch:results[bestMatchIndex],bestMatchIndex:bestMatchIndex}}export{compareTwoStrings,findBestMatch,findBestLCS}; |
||||
@ -1,32 +0,0 @@ |
||||
package com.fongmi.quickjs.bean; |
||||
|
||||
import org.jsoup.Jsoup; |
||||
import org.jsoup.nodes.Document; |
||||
|
||||
public class Cache { |
||||
|
||||
public String pdfhHtml; |
||||
public String pdfaHtml; |
||||
public Document pdfhDoc; |
||||
public Document pdfaDoc; |
||||
|
||||
public Document getPdfh(String html) { |
||||
updatePdfh(html); |
||||
return pdfhDoc; |
||||
} |
||||
|
||||
public Document getPdfa(String html) { |
||||
updatePdfa(html); |
||||
return pdfaDoc; |
||||
} |
||||
|
||||
private void updatePdfh(String html) { |
||||
if (html.equals(pdfhHtml)) return; |
||||
pdfhDoc = Jsoup.parse(pdfhHtml = html); |
||||
} |
||||
|
||||
private void updatePdfa(String html) { |
||||
if (html.equals(pdfaHtml)) return; |
||||
pdfaDoc = Jsoup.parse(pdfaHtml = html); |
||||
} |
||||
} |
||||
@ -1,42 +0,0 @@ |
||||
package com.fongmi.quickjs.bean; |
||||
|
||||
import java.util.ArrayList; |
||||
import java.util.Arrays; |
||||
import java.util.List; |
||||
|
||||
public class Info { |
||||
|
||||
public int index; |
||||
public String rule; |
||||
public List<String> excludes; |
||||
|
||||
public Info(String rule) { |
||||
this.rule = rule; |
||||
} |
||||
|
||||
public void setRule(String rule) { |
||||
this.rule = rule; |
||||
} |
||||
|
||||
public void setInfo(String pos) { |
||||
if (rule.contains("--")) { |
||||
String[] rules = rule.split("--"); |
||||
setExcludes(rules); |
||||
setRule(rules[0]); |
||||
} else if (pos.contains("--")) { |
||||
String[] rules = pos.split("--"); |
||||
setExcludes(rules); |
||||
pos = rules[0]; |
||||
} |
||||
try { |
||||
index = Integer.parseInt(pos.replace("eq(", "").replace(")", "")); |
||||
} catch (Exception ignored) { |
||||
index = 0; |
||||
} |
||||
} |
||||
|
||||
public void setExcludes(String[] rules) { |
||||
excludes = new ArrayList<>(Arrays.asList(rules)); |
||||
excludes.remove(0); |
||||
} |
||||
} |
||||
@ -1,171 +0,0 @@ |
||||
package com.fongmi.quickjs.utils; |
||||
|
||||
import android.text.TextUtils; |
||||
|
||||
import com.fongmi.quickjs.bean.Cache; |
||||
import com.fongmi.quickjs.bean.Info; |
||||
import com.github.catvod.utils.UriUtil; |
||||
|
||||
import org.jsoup.nodes.Document; |
||||
import org.jsoup.nodes.Element; |
||||
import org.jsoup.select.Elements; |
||||
|
||||
import java.util.ArrayList; |
||||
import java.util.Arrays; |
||||
import java.util.Collections; |
||||
import java.util.List; |
||||
import java.util.regex.Matcher; |
||||
import java.util.regex.Pattern; |
||||
|
||||
public class Parser { |
||||
|
||||
private final Pattern URL = Pattern.compile("url\\((.*?)\\)", Pattern.MULTILINE | Pattern.DOTALL); |
||||
private final Pattern NO_ADD = Pattern.compile(":eq|:lt|:gt|:first|:last|:not|:even|:odd|:has|:contains|:matches|:empty|^body$|^#"); |
||||
private final Pattern JOIN_URL = Pattern.compile("(url|src|href|-original|-src|-play|-url|style)$|^(data-|url-|src-)", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE); |
||||
private final Pattern SPEC_URL = Pattern.compile("^(ftp|magnet|thunder|ws):", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE); |
||||
|
||||
private final Cache cache; |
||||
|
||||
public Parser() { |
||||
cache = new Cache(); |
||||
} |
||||
|
||||
private Info getParseInfo(String rule) { |
||||
Info info = new Info(rule); |
||||
if (rule.contains(":eq")) { |
||||
info.setRule(rule.split(":")[0]); |
||||
info.setInfo(rule.split(":")[1]); |
||||
} else if (rule.contains("--")) { |
||||
String[] rules = rule.split("--"); |
||||
info.setExcludes(rules); |
||||
info.setRule(rules[0]); |
||||
} |
||||
return info; |
||||
} |
||||
|
||||
private String parseHikerToJq(String parse, boolean first) { |
||||
if (!parse.contains("&&")) { |
||||
String[] split = parse.split(" "); |
||||
Matcher m = NO_ADD.matcher(split[split.length - 1]); |
||||
if (!m.find() && first) parse = parse + ":eq(0)"; |
||||
return parse; |
||||
} |
||||
String[] parses = parse.split("&&"); |
||||
List<String> items = new ArrayList<>(); |
||||
for (int i = 0; i < parses.length; i++) { |
||||
String[] split = parses[i].split(" "); |
||||
if (NO_ADD.matcher(split[split.length - 1]).find()) { |
||||
items.add(parses[i]); |
||||
} else { |
||||
if (!first && i >= parses.length - 1) items.add(parses[i]); |
||||
else items.add(parses[i] + ":eq(0)"); |
||||
} |
||||
} |
||||
return TextUtils.join(" ", items); |
||||
} |
||||
|
||||
public String parseDomForUrl(String html, String rule, String addUrl) { |
||||
Document doc = cache.getPdfh(html); |
||||
if ("body&&Text".equals(rule) || "Text".equals(rule)) { |
||||
return doc.text(); |
||||
} else if ("body&&Html".equals(rule) || "Html".equals(rule)) { |
||||
return doc.html(); |
||||
} |
||||
String option = ""; |
||||
if (rule.contains("&&")) { |
||||
String[] rs = rule.split("&&"); |
||||
option = rs[rs.length - 1]; |
||||
List<String> excludes = new ArrayList<>(Arrays.asList(rs)); |
||||
excludes.remove(rs.length - 1); |
||||
rule = TextUtils.join("&&", excludes); |
||||
} |
||||
rule = parseHikerToJq(rule, true); |
||||
String[] parses = rule.split(" "); |
||||
Elements elements = new Elements(); |
||||
for (String parse : parses) { |
||||
elements = parseOneRule(doc, parse, elements); |
||||
if (elements.isEmpty()) return ""; |
||||
} |
||||
if (TextUtils.isEmpty(option)) return elements.outerHtml(); |
||||
if ("Text".equals(option)) { |
||||
return elements.text(); |
||||
} else if ("Html".equals(option)) { |
||||
return elements.html(); |
||||
} else { |
||||
String result = ""; |
||||
for (String s : option.split("[||]")) { |
||||
result = elements.attr(s); |
||||
if (s.toLowerCase().contains("style") && result.contains("url(")) { |
||||
Matcher m = URL.matcher(result); |
||||
if (m.find()) result = m.group(1); |
||||
result = result.replaceAll("^['|\"](.*)['|\"]$", "$1"); |
||||
} |
||||
if (!result.isEmpty() && !addUrl.isEmpty()) { |
||||
if (JOIN_URL.matcher(s).find() && !SPEC_URL.matcher(result).find()) { |
||||
if (result.contains("http")) { |
||||
result = result.substring(result.indexOf("http")); |
||||
} else { |
||||
result = UriUtil.resolve(addUrl, result); |
||||
} |
||||
} |
||||
} |
||||
if (!result.isEmpty()) { |
||||
return result; |
||||
} |
||||
} |
||||
return result; |
||||
} |
||||
} |
||||
|
||||
public List<String> parseDomForArray(String html, String rule) { |
||||
Document doc = cache.getPdfa(html); |
||||
rule = parseHikerToJq(rule, false); |
||||
String[] parses = rule.split(" "); |
||||
Elements elements = new Elements(); |
||||
for (String parse : parses) { |
||||
elements = parseOneRule(doc, parse, elements); |
||||
if (elements.isEmpty()) return new ArrayList<>(); |
||||
} |
||||
List<String> items = new ArrayList<>(); |
||||
for (Element element : elements) items.add(element.outerHtml()); |
||||
return items; |
||||
} |
||||
|
||||
private Elements parseOneRule(Document doc, String parse, Elements elements) { |
||||
Info info = getParseInfo(parse); |
||||
if (elements.isEmpty()) { |
||||
elements = doc.select(info.rule); |
||||
} else { |
||||
elements = elements.select(info.rule); |
||||
} |
||||
if (parse.contains(":eq")) { |
||||
if (info.index < 0) { |
||||
elements = elements.eq(elements.size() + info.index); |
||||
} else { |
||||
elements = elements.eq(info.index); |
||||
} |
||||
} |
||||
if (info.excludes != null && !elements.isEmpty()) { |
||||
elements = elements.clone(); |
||||
for (int i = 0; i < info.excludes.size(); i++) { |
||||
elements.select(info.excludes.get(i)).remove(); |
||||
} |
||||
} |
||||
return elements; |
||||
} |
||||
|
||||
public List<String> parseDomForList(String html, String rule, String texts, String urls, String urlKey) { |
||||
String[] parses = parseHikerToJq(rule, false).split(" "); |
||||
Elements elements = new Elements(); |
||||
for (String parse : parses) { |
||||
elements = parseOneRule(cache.getPdfa(html), parse, elements); |
||||
if (elements.isEmpty()) return Collections.emptyList(); |
||||
} |
||||
List<String> items = new ArrayList<>(); |
||||
for (Element element : elements) { |
||||
html = element.outerHtml(); |
||||
items.add(parseDomForUrl(html, texts, "").trim() + '$' + parseDomForUrl(html, urls, urlKey)); |
||||
} |
||||
return items; |
||||
} |
||||
} |
||||
Loading…
Reference in new issue