|
|
|
|
@ -25,12 +25,14 @@ public class M3u8 { |
|
|
|
|
private static final Pattern REGEX_X_DISCONTINUITY = Pattern.compile("#EXT-X-DISCONTINUITY[\\s\\S]*?(?=#EXT-X-DISCONTINUITY|$)"); |
|
|
|
|
private static final Pattern REGEX_MEDIA_DURATION = Pattern.compile(TAG_MEDIA_DURATION + ":([\\d\\.]+)\\b"); |
|
|
|
|
private static final Pattern REGEX_URI = Pattern.compile("URI=\"(.+?)\""); |
|
|
|
|
public static int currentAdCount; |
|
|
|
|
|
|
|
|
|
public static boolean isAd(String regex) { |
|
|
|
|
return regex.contains(TAG_DISCONTINUITY) || regex.contains(TAG_MEDIA_DURATION) || regex.contains(TAG_ENDLIST) || regex.contains(TAG_KEY) || M3u8.isDouble(regex); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
public static String purify(String tsUrlPre, String m3u8content) { |
|
|
|
|
currentAdCount = 0; |
|
|
|
|
if (null == m3u8content || m3u8content.length() == 0) return null; |
|
|
|
|
if (!m3u8content.startsWith("#EXTM3U")) return null; |
|
|
|
|
String result = removeMinorityUrl(tsUrlPre, m3u8content); |
|
|
|
|
@ -106,6 +108,14 @@ public class M3u8 { |
|
|
|
|
if (maxPercent(preUrlMap) < 0.8) { |
|
|
|
|
return null; //视频非广告片断占比不够大
|
|
|
|
|
} |
|
|
|
|
boolean allDomainsExceedThreshold = true; |
|
|
|
|
for (Integer count : preUrlMap.values()) { |
|
|
|
|
if (count <= 15) { |
|
|
|
|
allDomainsExceedThreshold = false; |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
if (allDomainsExceedThreshold) return null; |
|
|
|
|
domainFiltering = true; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
@ -162,6 +172,7 @@ public class M3u8 { |
|
|
|
|
lines[i - 1] = ""; |
|
|
|
|
} |
|
|
|
|
lines[i] = ""; |
|
|
|
|
currentAdCount+=1; |
|
|
|
|
} |
|
|
|
|
} else { |
|
|
|
|
// 域名过滤模式:先转换为绝对 URL
|
|
|
|
|
@ -186,6 +197,7 @@ public class M3u8 { |
|
|
|
|
lines[i - 1] = ""; |
|
|
|
|
} |
|
|
|
|
lines[i] = ""; |
|
|
|
|
currentAdCount+=1; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
@ -224,19 +236,47 @@ public class M3u8 { |
|
|
|
|
|
|
|
|
|
private static String scan(String line, List<String> ads) { |
|
|
|
|
Matcher m1 = REGEX_X_DISCONTINUITY.matcher(line); |
|
|
|
|
List<String> needRemoveAd = new ArrayList<>(); |
|
|
|
|
while (m1.find()) { |
|
|
|
|
String group = m1.group(); |
|
|
|
|
BigDecimal t = BigDecimal.ZERO; |
|
|
|
|
String groupCleaned = group.replace(TAG_ENDLIST, ""); |
|
|
|
|
Matcher m2 = REGEX_MEDIA_DURATION.matcher(group); |
|
|
|
|
if (m2.find()) t = t.add(new BigDecimal(m2.group(1))); |
|
|
|
|
for (String ad : ads) if (t.toString().startsWith(ad)) line = line.replace(group.replace(TAG_ENDLIST, ""), ""); |
|
|
|
|
BigDecimal ft = BigDecimal.ZERO; |
|
|
|
|
BigDecimal lt = BigDecimal.ZERO; |
|
|
|
|
int tCount = 0; |
|
|
|
|
while (m2.find()) { |
|
|
|
|
if (ft.equals(BigDecimal.ZERO))ft = new BigDecimal(m2.group(1)); |
|
|
|
|
lt = new BigDecimal(m2.group(1)); |
|
|
|
|
tCount+=1; |
|
|
|
|
} |
|
|
|
|
String ftStr = ft.toString(); |
|
|
|
|
String ltStr = lt.toString(); |
|
|
|
|
for (String ad : ads) { |
|
|
|
|
if (ad.startsWith("-")) { |
|
|
|
|
String adClean = ad.substring(1); |
|
|
|
|
if (ltStr.startsWith(adClean)) { |
|
|
|
|
needRemoveAd.add(groupCleaned); |
|
|
|
|
currentAdCount+=tCount; |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
} else { |
|
|
|
|
if (ftStr.startsWith(ad)) { |
|
|
|
|
needRemoveAd.add(groupCleaned); |
|
|
|
|
currentAdCount+=tCount; |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
for (String rem : needRemoveAd) { |
|
|
|
|
line = line.replace(rem, ""); |
|
|
|
|
} |
|
|
|
|
return line; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
private static boolean isDouble(String ad) { |
|
|
|
|
try { |
|
|
|
|
return Double.parseDouble(ad) > 0; |
|
|
|
|
return Double.parseDouble(ad) != 0; |
|
|
|
|
} catch (Exception e) { |
|
|
|
|
return false; |
|
|
|
|
} |
|
|
|
|
|