|
|
|
|
@ -60,12 +60,16 @@ public class M3u8 { |
|
|
|
|
linesplit = "\r\n"; |
|
|
|
|
String[] lines = m3u8content.split(linesplit); |
|
|
|
|
|
|
|
|
|
HashMap<String, Integer> preUrlMap = new HashMap<String, Integer>(); |
|
|
|
|
for (int i = 0; i < lines.length; i++) { |
|
|
|
|
String line = lines[i]; |
|
|
|
|
if (line.length() == 0 || line.charAt(0) == '#') continue; |
|
|
|
|
// 第一阶段:按去掉文件后缀后统计各前缀出现次数
|
|
|
|
|
HashMap<String, Integer> preUrlMap = new HashMap<>(); |
|
|
|
|
for (String line : lines) { |
|
|
|
|
if (line.length() == 0 || line.charAt(0) == '#') { |
|
|
|
|
continue; |
|
|
|
|
} |
|
|
|
|
int ilast = line.lastIndexOf('.'); |
|
|
|
|
if (ilast <= 4) continue; |
|
|
|
|
if (ilast <= 4) { |
|
|
|
|
continue; |
|
|
|
|
} |
|
|
|
|
String preUrl = line.substring(0, ilast - 4); |
|
|
|
|
Integer cnt = preUrlMap.get(preUrl); |
|
|
|
|
if (cnt != null) { |
|
|
|
|
@ -74,17 +78,22 @@ public class M3u8 { |
|
|
|
|
preUrlMap.put(preUrl, 1); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (preUrlMap.size() <= 1) return null; |
|
|
|
|
boolean domainFiltering = false; |
|
|
|
|
if (maxPercent(preUrlMap) < 0.8) { |
|
|
|
|
// 用域名重建 preUrlMap
|
|
|
|
|
// 尝试判断域名,取同域名最多的链接,其它域名当作广告去除
|
|
|
|
|
preUrlMap.clear(); |
|
|
|
|
for (int i = 0; i < lines.length; i++) { |
|
|
|
|
String line = lines[i]; |
|
|
|
|
if (line.length() == 0 || line.charAt(0) == '#') continue; |
|
|
|
|
if (!line.startsWith("http://") && !line.startsWith("https://")) return null; |
|
|
|
|
int ifirst = line.indexOf('/', 9); |
|
|
|
|
if (ifirst <= 0) continue; |
|
|
|
|
for (String line : lines) { |
|
|
|
|
if (line.length() == 0 || line.charAt(0) == '#') { |
|
|
|
|
continue; |
|
|
|
|
} |
|
|
|
|
if (!line.startsWith("http://") && !line.startsWith("https://")) { |
|
|
|
|
return null; |
|
|
|
|
} |
|
|
|
|
int ifirst = line.indexOf('/', 9); // skip http:// 或 https://
|
|
|
|
|
if (ifirst <= 0) { |
|
|
|
|
continue; |
|
|
|
|
} |
|
|
|
|
String preUrl = line.substring(0, ifirst); |
|
|
|
|
Integer cnt = preUrlMap.get(preUrl); |
|
|
|
|
if (cnt != null) { |
|
|
|
|
@ -93,21 +102,14 @@ public class M3u8 { |
|
|
|
|
preUrlMap.put(preUrl, 1); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (preUrlMap.size() <= 1) return null; |
|
|
|
|
|
|
|
|
|
// 如果所有域名都超过timesNoAd条,说明没有广告,直接返回 null
|
|
|
|
|
boolean allExceed6 = true; |
|
|
|
|
for (Map.Entry<String, Integer> entry : preUrlMap.entrySet()) { |
|
|
|
|
if (entry.getValue() <= timesNoAd) { |
|
|
|
|
allExceed6 = false; |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
if (maxPercent(preUrlMap) < 0.8) { |
|
|
|
|
return null; //视频非广告片断占比不够大
|
|
|
|
|
} |
|
|
|
|
if (allExceed6) return null; |
|
|
|
|
domainFiltering = true; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// 找出出现次数最多的域名前缀
|
|
|
|
|
// 找出出现次数最多的 key(文件前缀或域名均适用)
|
|
|
|
|
int maxTimes = 0; |
|
|
|
|
String maxTimesPreUrl = ""; |
|
|
|
|
for (Map.Entry<String, Integer> entry : preUrlMap.entrySet()) { |
|
|
|
|
@ -120,6 +122,7 @@ public class M3u8 { |
|
|
|
|
|
|
|
|
|
boolean dealedExtXKey = false; |
|
|
|
|
for (int i = 0; i < lines.length; ++i) { |
|
|
|
|
// 处理解密KEY的绝对路径拼接
|
|
|
|
|
if (!dealedExtXKey && lines[i].startsWith("#EXT-X-KEY")) { |
|
|
|
|
String keyUrl = ""; |
|
|
|
|
int start = lines[i].indexOf("URI=\""); |
|
|
|
|
@ -132,55 +135,58 @@ public class M3u8 { |
|
|
|
|
if (!keyUrl.startsWith("http://") && !keyUrl.startsWith("https://")) { |
|
|
|
|
String newKeyUrl; |
|
|
|
|
if (keyUrl.charAt(0) == '/') { |
|
|
|
|
int ifirst = tsUrlPre.indexOf('/', 9); |
|
|
|
|
int ifirst = tsUrlPre.indexOf('/', 9); //skip https://, http://
|
|
|
|
|
newKeyUrl = tsUrlPre.substring(0, ifirst) + keyUrl; |
|
|
|
|
} else { |
|
|
|
|
} else |
|
|
|
|
newKeyUrl = tsUrlPre + keyUrl; |
|
|
|
|
} |
|
|
|
|
lines[i] = lines[i].replace("URI=\"" + keyUrl + "\"", "URI=\"" + newKeyUrl + "\""); |
|
|
|
|
} |
|
|
|
|
dealedExtXKey = true; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (lines[i].length() == 0 || lines[i].charAt(0) == '#') { |
|
|
|
|
continue; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
boolean keep = false; |
|
|
|
|
if (lines[i].startsWith("http://") || lines[i].startsWith("https://")) { |
|
|
|
|
int ifirst = lines[i].indexOf('/', 9); |
|
|
|
|
String domain; |
|
|
|
|
if (ifirst > 0) { |
|
|
|
|
domain = lines[i].substring(0, ifirst); |
|
|
|
|
// 根据判断方式过滤
|
|
|
|
|
if (!domainFiltering) { |
|
|
|
|
if (lines[i].startsWith(maxTimesPreUrl)) { |
|
|
|
|
if (!lines[i].startsWith("http://") && !lines[i].startsWith("https://")) { |
|
|
|
|
if (lines[i].charAt(0) == '/') { |
|
|
|
|
int ifirst = tsUrlPre.indexOf('/', 9); //skip https://, http://
|
|
|
|
|
lines[i] = tsUrlPre.substring(0, ifirst) + lines[i]; |
|
|
|
|
} else |
|
|
|
|
lines[i] = tsUrlPre + lines[i]; |
|
|
|
|
} |
|
|
|
|
} else { |
|
|
|
|
domain = lines[i]; |
|
|
|
|
} |
|
|
|
|
Integer count = preUrlMap.get(domain); |
|
|
|
|
if (count != null && count > timesNoAd) { |
|
|
|
|
keep = true; |
|
|
|
|
} else if (domain.equals(maxTimesPreUrl)) { |
|
|
|
|
keep = true; |
|
|
|
|
if (i > 0 && lines[i - 1].length() > 0 && lines[i - 1].charAt(0) == '#') { |
|
|
|
|
lines[i - 1] = ""; |
|
|
|
|
} |
|
|
|
|
lines[i] = ""; |
|
|
|
|
} |
|
|
|
|
} else { |
|
|
|
|
// 相对路径默认保留
|
|
|
|
|
keep = true; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (keep) { |
|
|
|
|
if (!lines[i].startsWith("http://") && !lines[i].startsWith("https://")) { |
|
|
|
|
if (lines[i].charAt(0) == '/') { |
|
|
|
|
// 域名过滤模式:先转换为绝对 URL
|
|
|
|
|
String absoluteUrl = lines[i]; |
|
|
|
|
if (!absoluteUrl.startsWith("http://") && !absoluteUrl.startsWith("https://")) { |
|
|
|
|
if (absoluteUrl.charAt(0) == '/') { |
|
|
|
|
int ifirst = tsUrlPre.indexOf('/', 9); |
|
|
|
|
lines[i] = tsUrlPre.substring(0, ifirst) + lines[i]; |
|
|
|
|
absoluteUrl = tsUrlPre.substring(0, ifirst) + absoluteUrl; |
|
|
|
|
} else { |
|
|
|
|
lines[i] = tsUrlPre + lines[i]; |
|
|
|
|
absoluteUrl = tsUrlPre + absoluteUrl; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} else { |
|
|
|
|
if (i > 0 && lines[i - 1].length() > 0 && lines[i - 1].charAt(0) == '#') { |
|
|
|
|
lines[i - 1] = ""; |
|
|
|
|
// 提取域名部分(http://xxx或https://xxx)
|
|
|
|
|
int ifirst = absoluteUrl.indexOf('/', 9); |
|
|
|
|
String domain = (ifirst > 0) ? absoluteUrl.substring(0, ifirst) : absoluteUrl; |
|
|
|
|
// 保留条件:域名等于出现次数最多的,或者该域名出现次数超过timesNoAd次
|
|
|
|
|
Integer cnt = preUrlMap.get(domain); |
|
|
|
|
if (domain.equals(maxTimesPreUrl) || (cnt != null && cnt >= timesNoAd)) { |
|
|
|
|
lines[i] = absoluteUrl; |
|
|
|
|
} else { |
|
|
|
|
if (i > 0 && lines[i - 1].length() > 0 && lines[i - 1].charAt(0) == '#') { |
|
|
|
|
lines[i - 1] = ""; |
|
|
|
|
} |
|
|
|
|
lines[i] = ""; |
|
|
|
|
} |
|
|
|
|
lines[i] = ""; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
return String.join(linesplit, lines); |
|
|
|
|
|