diff --git a/pyramid/src/main/python/base/spider.py b/pyramid/src/main/python/base/spider.py index db8613304..64f64e2fb 100644 --- a/pyramid/src/main/python/base/spider.py +++ b/pyramid/src/main/python/base/spider.py @@ -43,6 +43,7 @@ class Spider(metaclass=ABCMeta): def searchContent(self, key, quick): pass + @abstractmethod def searchContentPage(self, key, quick, pg): pass @@ -66,6 +67,7 @@ class Spider(metaclass=ABCMeta): def getName(self): pass + @abstractmethod def destroy(self): pass @@ -80,6 +82,13 @@ class Spider(metaclass=ABCMeta): path = os.path.join(os.path.join(cache_dir, 'py'), f'{name}.py') return SourceFileLoader(name, path).load_module() + def regStr(self, reg, src, group=1): + m = re.search(reg, src) + src = '' + if m: + src = m.group(group) + return src + def removeHtmlTags(self, src): clean = re.compile('<.*?>') return re.sub(clean, '', src)