You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
FONGMITV/chaquo/src/main/python/base/spider.py

131 lines
4.0 KiB

import re
import os
import json
import time
import requests
from lxml import etree
from com.chaquo.python import Python
from abc import abstractmethod, ABCMeta
from importlib.machinery import SourceFileLoader
from com.github.catvod import Proxy
class Spider(metaclass=ABCMeta):
_instance = None
def __new__(cls, *args, **kwargs):
if cls._instance:
return cls._instance
else:
cls._instance = super().__new__(cls)
return cls._instance
@abstractmethod
def init(self, extend=""):
pass
def homeContent(self, filter):
pass
def homeVideoContent(self):
pass
def categoryContent(self, tid, pg, filter, extend):
pass
def detailContent(self, ids):
pass
def searchContent(self, key, quick, pg="1"):
pass
def playerContent(self, flag, id, vipFlags):
pass
def liveContent(self):
pass
def localProxy(self, param):
pass
def isVideoFormat(self, url):
pass
def manualVideoCheck(self):
pass
def destroy(self):
pass
def getName(self):
pass
def getDependence(self):
return []
def loadSpider(self, name):
return self.loadModule(name).Spider()
def loadModule(self, name):
cache_dir = Python.getPlatform().getApplication().getCacheDir().getAbsolutePath()
path = os.path.join(os.path.join(cache_dir, 'py'), f'{name}.py')
return SourceFileLoader(name, path).load_module()
def regStr(self, reg, src, group=1):
m = re.search(reg, src)
src = ''
if m:
src = m.group(group)
return src
def removeHtmlTags(self, src):
clean = re.compile('<.*?>')
return re.sub(clean, '', src)
def cleanText(self, src):
clean = re.sub('[\U0001F600-\U0001F64F\U0001F300-\U0001F5FF\U0001F680-\U0001F6FF\U0001F1E0-\U0001F1FF]', '', src)
return clean
def fetch(self, url, params=None, cookies=None, headers=None, timeout=5, verify=True, stream=False, allow_redirects = True):
rsp = requests.get(url, params=params, cookies=cookies, headers=headers, timeout=timeout, verify=verify, stream=stream, allow_redirects=allow_redirects)
rsp.encoding = 'utf-8'
return rsp
def post(self, url, params=None, data=None, json=None, cookies=None, headers=None, timeout=5, verify=True, stream=False, allow_redirects = True):
rsp = requests.post(url, params=params, data=data, json=json, cookies=cookies, headers=headers, timeout=timeout, verify=verify, stream=stream, allow_redirects=allow_redirects)
rsp.encoding = 'utf-8'
return rsp
def html(self, content):
return etree.HTML(content)
def getProxyUrl(self, local=True):
return f'{Proxy.getUrl(local)}?do=py'
def getCache(self, key):
value = self.fetch(f'http://127.0.0.1:{Proxy.getPort()}/cache?do=get&key={key}', timeout=5).text
if len(value) > 0:
if value.startswith('{') and value.endswith('}') or value.startswith('[') and value.endswith(']'):
value = json.loads(value)
if type(value) == dict:
if not 'expiresAt' in value or value['expiresAt'] >= int(time.time()):
return value
else:
self.delCache(key)
return None
return value
else:
return None
def setCache(self, key, value):
if type(value) in [int, float]:
value = str(value)
if len(value) > 0:
if type(value) == dict or type(value) == list:
value = json.dumps(value, ensure_ascii=False)
r = self.post(f'http://127.0.0.1:{Proxy.getPort()}/cache?do=set&key={key}', data={"value": value}, timeout=5)
return 'succeed' if r.status_code == 200 else 'failed'
def delCache(self, key):
r = self.fetch(f'http://127.0.0.1:{Proxy.getPort()}/cache?do=del&key={key}', timeout=5)
return 'succeed' if r.status_code == 200 else 'failed'