init
This commit is contained in:
79
spider/normalutils/spider.py
Normal file
79
spider/normalutils/spider.py
Normal file
@ -0,0 +1,79 @@
|
||||
from typing import Callable, Optional
|
||||
|
||||
from fake_useragent import UserAgent
|
||||
import httpx
|
||||
|
||||
from validator import Validator
|
||||
import httpchoices
|
||||
|
||||
|
||||
class NoValidator(Validator):
|
||||
def is_valid(self, raise_error):
|
||||
return True
|
||||
|
||||
|
||||
class Spider:
|
||||
validator_class: Validator = NoValidator
|
||||
parser: Callable[[str], dict] = None
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
url,
|
||||
method="GET",
|
||||
request_data: Optional[dict] = None,
|
||||
params: Optional[dict] = None,
|
||||
) -> None:
|
||||
self.useragent = UserAgent()
|
||||
self.headers = {"User-Agent": self.useragent.random}
|
||||
self.__data = {}
|
||||
self.__html = ""
|
||||
self.url = url
|
||||
self.method = method
|
||||
self.has_verified = False
|
||||
httpchoices.HttpMethod.is_valid(method, True)
|
||||
self.__request_parameters = {
|
||||
"data": request_data,
|
||||
"params": params,
|
||||
}
|
||||
|
||||
def get_parser(self):
|
||||
assert self.parser is not None
|
||||
return self.__class__.parser
|
||||
|
||||
async def __get_html(self) -> str:
|
||||
if self.__html != "":
|
||||
return self.__html
|
||||
async with httpx.AsyncClient() as client:
|
||||
try:
|
||||
response = await client.request(
|
||||
self.method,
|
||||
self.url,
|
||||
headers=self.headers,
|
||||
**self.__request_parameters
|
||||
)
|
||||
response.raise_for_status()
|
||||
self.__html = response.text
|
||||
except httpx.HTTPStatusError:
|
||||
pass
|
||||
return self.__html
|
||||
|
||||
async def __get_data(self) -> dict:
|
||||
if self.__data == {} or self.__data == []:
|
||||
html = await self.__get_html()
|
||||
self.__data = self.get_parser()(html)
|
||||
return self.__data
|
||||
|
||||
async def is_valid(self, raise_exception=False) -> bool:
|
||||
data = await self.__get_data()
|
||||
validator_class = self.validator_class(data)
|
||||
ans = validator_class.is_valid(raise_exception)
|
||||
if ans:
|
||||
self.has_verified = True
|
||||
return ans
|
||||
|
||||
async def data(self) -> dict:
|
||||
if self.has_verified:
|
||||
return self.__data
|
||||
else:
|
||||
await self.is_valid(True)
|
||||
return self.__data
|
||||
Reference in New Issue
Block a user