SYMBOL INDEX (1180 symbols across 72 files) FILE: agent-skill/Scrapling-Skill/examples/04_spider.py class QuotesSpider (line 21) | class QuotesSpider(Spider): method parse (line 26) | async def parse(self, response: Response): FILE: benchmarks.py function benchmark (line 22) | def benchmark(func): function test_lxml (line 47) | def test_lxml(): function test_bs4_lxml (line 59) | def test_bs4_lxml(): function test_bs4_html5lib (line 64) | def test_bs4_html5lib(): function test_pyquery (line 69) | def test_pyquery(): function test_scrapling (line 74) | def test_scrapling(): function test_parsel (line 82) | def test_parsel(): function test_mechanicalsoup (line 87) | def test_mechanicalsoup(): function test_selectolax (line 94) | def test_selectolax(): function display (line 98) | def display(results): function test_scrapling_text (line 111) | def test_scrapling_text(request_html): function test_autoscraper (line 116) | def test_autoscraper(request_html): FILE: cleanup.py function clean (line 6) | def clean(): FILE: scrapling/__init__.py function __getattr__ (line 27) | def __getattr__(name: str) -> Any: function __dir__ (line 36) | def __dir__() -> list[str]: FILE: scrapling/cli.py function __Execute (line 23) | def __Execute(cmd: List[str], help_line: str) -> None: # pragma: no cover function __ParseJSONData (line 29) | def __ParseJSONData(json_string: Optional[str] = None) -> Optional[Dict[... function __Request_and_Save (line 40) | def __Request_and_Save( function __ParseExtractArguments (line 60) | def __ParseExtractArguments( function __BuildRequest (line 82) | def __BuildRequest(headers: List[str], cookies: str, params: str, json: ... function install (line 115) | def install(force): # pragma: no cover function mcp (line 156) | def mcp(http, host, port): function shell (line 182) | def shell(code, level): function extract (line 192) | def extract(): function get (line 239) | def get( function post (line 335) | def post( function put (line 432) | def put( function delete (line 527) | def delete( function fetch (line 624) | def fetch( function stealthy_fetch (line 748) | def stealthy_fetch( function main (line 818) | def main(): FILE: scrapling/core/_types.py class SetCookieParam (line 46) | class SetCookieParam(TypedDict, total=False): FILE: scrapling/core/ai.py class ResponseModel (line 32) | class ResponseModel(BaseModel): function _content_translator (line 40) | def _content_translator(content: Generator[str, None, None], page: _Scra... function _normalize_credentials (line 45) | def _normalize_credentials(credentials: Optional[Dict[str, str]]) -> Opt... class ScraplingMCPServer (line 59) | class ScraplingMCPServer: method get (line 61) | def get( method bulk_get (line 142) | async def bulk_get( method fetch (line 231) | async def fetch( method bulk_fetch (line 313) | async def bulk_fetch( method stealthy_fetch (line 400) | async def stealthy_fetch( method bulk_stealthy_fetch (line 497) | async def bulk_stealthy_fetch( method serve (line 597) | def serve(self, http: bool, host: str, port: int): FILE: scrapling/core/custom_types.py class TextHandler (line 29) | class TextHandler(str): method __getitem__ (line 34) | def __getitem__(self, key: SupportsIndex | slice) -> "TextHandler": #... method split (line 38) | def split(self, sep: str | None = None, maxsplit: SupportsIndex = -1) ... method strip (line 41) | def strip(self, chars: str | None = None) -> Union[str, "TextHandler"]... method lstrip (line 44) | def lstrip(self, chars: str | None = None) -> Union[str, "TextHandler"... method rstrip (line 47) | def rstrip(self, chars: str | None = None) -> Union[str, "TextHandler"... method capitalize (line 50) | def capitalize(self) -> Union[str, "TextHandler"]: # pragma: no cover method casefold (line 53) | def casefold(self) -> Union[str, "TextHandler"]: # pragma: no cover method center (line 56) | def center(self, width: SupportsIndex, fillchar: str = " ") -> Union[s... method expandtabs (line 59) | def expandtabs(self, tabsize: SupportsIndex = 8) -> Union[str, "TextHa... method format (line 62) | def format(self, *args: object, **kwargs: object) -> Union[str, "TextH... method format_map (line 65) | def format_map(self, mapping) -> Union[str, "TextHandler"]: # pragma:... method join (line 68) | def join(self, iterable: Iterable[str]) -> Union[str, "TextHandler"]: ... method ljust (line 71) | def ljust(self, width: SupportsIndex, fillchar: str = " ") -> Union[st... method rjust (line 74) | def rjust(self, width: SupportsIndex, fillchar: str = " ") -> Union[st... method swapcase (line 77) | def swapcase(self) -> Union[str, "TextHandler"]: # pragma: no cover method title (line 80) | def title(self) -> Union[str, "TextHandler"]: # pragma: no cover method translate (line 83) | def translate(self, table) -> Union[str, "TextHandler"]: # pragma: no... method zfill (line 86) | def zfill(self, width: SupportsIndex) -> Union[str, "TextHandler"]: #... method replace (line 89) | def replace(self, old: str, new: str, count: SupportsIndex = -1) -> Un... method upper (line 92) | def upper(self) -> Union[str, "TextHandler"]: method lower (line 95) | def lower(self) -> Union[str, "TextHandler"]: method sort (line 100) | def sort(self, reverse: bool = False) -> Union[str, "TextHandler"]: method clean (line 104) | def clean(self, remove_entities=False) -> Union[str, "TextHandler"]: method get (line 112) | def get(self, default=None): # pragma: no cover method get_all (line 115) | def get_all(self): # pragma: no cover method json (line 121) | def json(self) -> Dict: method re (line 128) | def re( method re (line 139) | def re( method re (line 148) | def re( method re_first (line 184) | def re_first( class TextHandlers (line 210) | class TextHandlers(List[TextHandler]): method __getitem__ (line 218) | def __getitem__(self, pos: SupportsIndex) -> TextHandler: # pragma: n... method __getitem__ (line 222) | def __getitem__(self, pos: slice) -> "TextHandlers": # pragma: no cover method __getitem__ (line 225) | def __getitem__(self, pos: SupportsIndex | slice) -> Union[TextHandler... method re (line 231) | def re( method re_first (line 249) | def re_first( method get (line 272) | def get(self, default=None): method extract (line 278) | def extract(self): class AttributesHandler (line 285) | class AttributesHandler(Mapping[str, _TextHandlerType]): method __init__ (line 292) | def __init__(self, mapping: Any = None, **kwargs: Any) -> None: method get (line 307) | def get(self, key: str, default: Any = None) -> _TextHandlerType: method search_values (line 311) | def search_values(self, keyword: str, partial: bool = False) -> Genera... method json_string (line 325) | def json_string(self) -> bytes: method __getitem__ (line 329) | def __getitem__(self, key: str) -> _TextHandlerType: method __iter__ (line 332) | def __iter__(self): method __len__ (line 335) | def __len__(self): method __repr__ (line 338) | def __repr__(self): method __str__ (line 341) | def __str__(self): method __contains__ (line 344) | def __contains__(self, key): FILE: scrapling/core/mixins.py class SelectorsGeneration (line 4) | class SelectorsGeneration: method _general_selection (line 15) | def _general_selection(self: Any, selection: str = "css", full_path: b... method generate_css_selector (line 60) | def generate_css_selector(self: Any) -> str: method generate_full_css_selector (line 67) | def generate_full_css_selector(self: Any) -> str: method generate_xpath_selector (line 74) | def generate_xpath_selector(self: Any) -> str: method generate_full_xpath_selector (line 81) | def generate_full_xpath_selector(self: Any) -> str: FILE: scrapling/core/shell.py class NoExitArgumentParser (line 72) | class NoExitArgumentParser(ArgumentParser): # pragma: no cover method error (line 73) | def error(self, message): method exit (line 77) | def exit(self, status=0, message=None): class CurlParser (line 84) | class CurlParser: method __init__ (line 87) | def __init__(self) -> None: method parse (line 135) | def parse(self, curl_command: str) -> Optional[Request]: method convert2fetcher (line 286) | def convert2fetcher(self, curl_command: Request | str) -> Optional[Res... function _unpack_signature (line 319) | def _unpack_signature(func, signature_name=None): function show_page_in_browser (line 353) | def show_page_in_browser(page: Selector): # pragma: no cover class CustomShell (line 370) | class CustomShell: method __init__ (line 373) | def __init__(self, code, log_level="debug"): method init_components (line 414) | def init_components(self): method banner (line 428) | def banner(): method update_page (line 456) | def update_page(self, result): # pragma: no cover method create_wrapper (line 472) | def create_wrapper( method get_namespace (line 490) | def get_namespace(self): method show_help (line 529) | def show_help(self): # pragma: no cover method start (line 533) | def start(self): # pragma: no cover class Convertor (line 559) | class Convertor: method _convert_to_markdown (line 569) | def _convert_to_markdown(cls, body: TextHandler) -> str: method _strip_noise_tags (line 576) | def _strip_noise_tags(cls, page: Selector) -> Selector: method _extract_content (line 584) | def _extract_content( method write_content_to_file (line 624) | def write_content_to_file(cls, page: Selector, filename: str, css_sele... FILE: scrapling/core/storage.py class StorageSystemMixin (line 14) | class StorageSystemMixin(ABC): # pragma: no cover method __init__ (line 16) | def __init__(self, url: Optional[str] = None): method _get_base_url (line 24) | def _get_base_url(self, default_value: str = "default") -> str: method save (line 42) | def save(self, element: HtmlElement, identifier: str) -> None: method retrieve (line 52) | def retrieve(self, identifier: str) -> Optional[Dict]: method _get_hash (line 63) | def _get_hash(identifier: str) -> str: class SQLiteStorageSystem (line 74) | class SQLiteStorageSystem(StorageSystemMixin): method __init__ (line 79) | def __init__(self, storage_file: str, url: Optional[str] = None): method _setup_database (line 97) | def _setup_database(self) -> None: method save (line 109) | def save(self, element: HtmlElement, identifier: str) -> None: method retrieve (line 129) | def retrieve(self, identifier: str) -> Optional[Dict[str, Any]]: method close (line 147) | def close(self): method __del__ (line 154) | def __del__(self): FILE: scrapling/core/translator.py class XPathExpr (line 20) | class XPathExpr(OriginalXPathExpr): method from_xpath (line 25) | def from_xpath( method __str__ (line 36) | def __str__(self) -> str: method join (line 53) | def join( class TranslatorProtocol (line 72) | class TranslatorProtocol(Protocol): method xpath_element (line 73) | def xpath_element(self, selector: Element) -> OriginalXPathExpr: # py... method css_to_xpath (line 76) | def css_to_xpath(self, css: str, prefix: str = ...) -> str: # pyright... class TranslatorMixin (line 80) | class TranslatorMixin: method xpath_element (line 86) | def xpath_element(self: TranslatorProtocol, selector: Element) -> XPat... method xpath_pseudo_element (line 91) | def xpath_pseudo_element(self, xpath: OriginalXPathExpr, pseudo_elemen... method xpath_attr_functional_pseudo_element (line 110) | def xpath_attr_functional_pseudo_element(xpath: OriginalXPathExpr, fun... method xpath_text_simple_pseudo_element (line 117) | def xpath_text_simple_pseudo_element(xpath: OriginalXPathExpr) -> XPat... class HTMLTranslator (line 122) | class HTMLTranslator(TranslatorMixin, OriginalHTMLTranslator): method css_to_xpath (line 123) | def css_to_xpath(self, css: str, prefix: str = "descendant-or-self::")... function css_to_xpath (line 132) | def css_to_xpath(query: str) -> str: FILE: scrapling/core/utils/_shell.py function _CookieParser (line 11) | def _CookieParser(cookie_string): function _ParseHeaders (line 19) | def _ParseHeaders(header_lines: List[str], parse_cookies: bool = True) -... FILE: scrapling/core/utils/_utils.py function setup_logger (line 20) | def setup_logger(): class LoggerProxy (line 43) | class LoggerProxy: method __getattr__ (line 44) | def __getattr__(self, name: str): function set_logger (line 51) | def set_logger(logger: logging.Logger) -> Token: function reset_logger (line 56) | def reset_logger(token: Token) -> None: function flatten (line 61) | def flatten(lst: Iterable[Any]) -> List[Any]: function _is_iterable (line 65) | def _is_iterable(obj: Any) -> bool: class _StorageTools (line 76) | class _StorageTools: method __clean_attributes (line 78) | def __clean_attributes(element: html.HtmlElement, forbidden: tuple = (... method element_to_dict (line 84) | def element_to_dict(cls, element: html.HtmlElement) -> Dict: method _get_element_path (line 112) | def _get_element_path(cls, element: html.HtmlElement): function clean_spaces (line 118) | def clean_spaces(string): FILE: scrapling/engines/_browsers/_base.py class SyncSession (line 46) | class SyncSession: method _build_context_with_proxy (line 50) | def _build_context_with_proxy(self, proxy: Optional[ProxyType] = None)... method __init__ (line 53) | def __init__(self, max_pages: int = 1): method start (line 62) | def start(self) -> None: method close (line 65) | def close(self): # pragma: no cover method __enter__ (line 84) | def __enter__(self): method __exit__ (line 88) | def __exit__(self, exc_type, exc_val, exc_tb): method _initialize_context (line 91) | def _initialize_context(self, config: PlaywrightConfig | StealthConfig... method _get_page (line 101) | def _get_page( method get_pool_stats (line 125) | def get_pool_stats(self) -> Dict[str, int]: method _wait_for_networkidle (line 134) | def _wait_for_networkidle(page: Page | Frame, timeout: Optional[int] =... method _wait_for_page_stability (line 141) | def _wait_for_page_stability(self, page: Page | Frame, load_dom: bool,... method _create_response_handler (line 149) | def _create_response_handler(page_info: PageInfo[Page], response_conta... method _page_generator (line 168) | def _page_generator( class AsyncSession (line 200) | class AsyncSession: method _build_context_with_proxy (line 204) | def _build_context_with_proxy(self, proxy: Optional[ProxyType] = None)... method __init__ (line 207) | def __init__(self, max_pages: int = 1): method start (line 217) | async def start(self) -> None: method close (line 220) | async def close(self): method __aenter__ (line 239) | async def __aenter__(self): method __aexit__ (line 243) | async def __aexit__(self, exc_type, exc_val, exc_tb): method _initialize_context (line 246) | async def _initialize_context( method _get_page (line 258) | async def _get_page( method get_pool_stats (line 296) | def get_pool_stats(self) -> Dict[str, int]: method _wait_for_networkidle (line 305) | async def _wait_for_networkidle(page: AsyncPage | AsyncFrame, timeout:... method _wait_for_page_stability (line 312) | async def _wait_for_page_stability(self, page: AsyncPage | AsyncFrame,... method _create_response_handler (line 320) | def _create_response_handler(page_info: PageInfo[AsyncPage], response_... method _page_generator (line 339) | async def _page_generator( class BaseSessionMixin (line 373) | class BaseSessionMixin: method __validate_routine__ (line 377) | def __validate_routine__(self, params: Dict, model: type[StealthConfig... method __validate_routine__ (line 380) | def __validate_routine__(self, params: Dict, model: type[PlaywrightCon... method __validate_routine__ (line 382) | def __validate_routine__( method __generate_options__ (line 401) | def __generate_options__(self, extra_flags: Tuple | None = None) -> None: method _build_context_with_proxy (line 439) | def _build_context_with_proxy(self, proxy: Optional[ProxyType] = None)... class DynamicSessionMixin (line 456) | class DynamicSessionMixin(BaseSessionMixin): method __validate__ (line 457) | def __validate__(self, **params): class StealthySessionMixin (line 462) | class StealthySessionMixin(BaseSessionMixin): method __validate__ (line 463) | def __validate__(self, **params): method __generate_stealth_options (line 479) | def __generate_stealth_options(self) -> None: method _detect_cloudflare (line 502) | def _detect_cloudflare(page_content: str) -> str | None: FILE: scrapling/engines/_browsers/_controllers.py class DynamicSession (line 22) | class DynamicSession(SyncSession, DynamicSessionMixin): method __init__ (line 38) | def __init__(self, **kwargs: Unpack[PlaywrightSession]): method start (line 71) | def start(self): method fetch (line 101) | def fetch(self, url: str, **kwargs: Unpack[PlaywrightFetchParams]) -> ... class AsyncDynamicSession (line 193) | class AsyncDynamicSession(AsyncSession, DynamicSessionMixin): method __init__ (line 204) | def __init__(self, **kwargs: Unpack[PlaywrightSession]): method start (line 238) | async def start(self) -> None: method fetch (line 267) | async def fetch(self, url: str, **kwargs: Unpack[PlaywrightFetchParams... FILE: scrapling/engines/_browsers/_page.py class PageInfo (line 14) | class PageInfo(Generic[PageType]): method mark_busy (line 22) | def mark_busy(self, url: str = ""): method mark_error (line 27) | def mark_error(self): method __repr__ (line 31) | def __repr__(self): method __eq__ (line 34) | def __eq__(self, other_page): class PagePool (line 41) | class PagePool: method __init__ (line 46) | def __init__(self, max_pages: int = 5): method add_page (line 52) | def add_page(self, page: SyncPage) -> PageInfo[SyncPage]: ... method add_page (line 55) | def add_page(self, page: AsyncPage) -> PageInfo[AsyncPage]: ... method add_page (line 57) | def add_page(self, page: SyncPage | AsyncPage) -> PageInfo[SyncPage] |... method pages_count (line 74) | def pages_count(self) -> int: method busy_count (line 79) | def busy_count(self) -> int: method cleanup_error_pages (line 84) | def cleanup_error_pages(self): FILE: scrapling/engines/_browsers/_stealth.py class StealthySession (line 26) | class StealthySession(SyncSession, StealthySessionMixin): method __init__ (line 42) | def __init__(self, **kwargs: Unpack[StealthSession]): method start (line 79) | def start(self) -> None: method _cloudflare_solver (line 110) | def _cloudflare_solver(self, page: Page) -> None: # pragma: no cover method fetch (line 187) | def fetch(self, url: str, **kwargs: Unpack[StealthFetchParams]) -> Res... class AsyncStealthySession (line 285) | class AsyncStealthySession(AsyncSession, StealthySessionMixin): method __init__ (line 296) | def __init__(self, **kwargs: Unpack[StealthSession]): method start (line 333) | async def start(self) -> None: method _cloudflare_solver (line 363) | async def _cloudflare_solver(self, page: async_Page) -> None: # pragm... method fetch (line 440) | async def fetch(self, url: str, **kwargs: Unpack[StealthFetchParams]) ... FILE: scrapling/engines/_browsers/_types.py class RequestsSession (line 30) | class RequestsSession(TypedDict, total=False): class GetRequestParams (line 50) | class GetRequestParams(RequestsSession, total=False): class DataRequestParams (line 57) | class DataRequestParams(GetRequestParams, total=False): class PlaywrightSession (line 63) | class PlaywrightSession(TypedDict, total=False): class PlaywrightFetchParams (line 94) | class PlaywrightFetchParams(TypedDict, total=False): class StealthSession (line 110) | class StealthSession(PlaywrightSession, total=False): class StealthFetchParams (line 117) | class StealthFetchParams(PlaywrightFetchParams, total=False): FILE: scrapling/engines/_browsers/_validators.py function _is_invalid_file_path (line 29) | def _is_invalid_file_path(value: str) -> bool | str: # pragma: no cover function _is_invalid_cdp_url (line 42) | def _is_invalid_cdp_url(cdp_url: str) -> bool | str: class PlaywrightConfig (line 59) | class PlaywrightConfig(Struct, kw_only=True, frozen=False, weakref=True): method __post_init__ (line 91) | def __post_init__(self): # pragma: no cover class StealthConfig (line 122) | class StealthConfig(PlaywrightConfig, kw_only=True, frozen=False, weakre... method __post_init__ (line 128) | def __post_init__(self): class _fetch_params (line 137) | class _fetch_params: function validate_fetch (line 155) | def validate_fetch( function _filter_defaults (line 209) | def _filter_defaults(params: Dict, model: str) -> Dict: function validate (line 216) | def validate(params: Dict, model: type[StealthConfig]) -> StealthConfig:... function validate (line 220) | def validate(params: Dict, model: type[PlaywrightConfig]) -> PlaywrightC... function validate (line 223) | def validate(params: Dict, model: type[PlaywrightConfig] | type[StealthC... FILE: scrapling/engines/static.py function _select_random_browser (line 34) | def _select_random_browser(impersonate: ImpersonateType) -> Optional[Bro... class _ConfigurationLogic (line 48) | class _ConfigurationLogic(ABC): method __init__ (line 70) | def __init__(self, **kwargs: Unpack[RequestsSession]): method _get_param (line 96) | def _get_param(kwargs: Dict, key: str, default: Any) -> Any: method _merge_request_args (line 100) | def _merge_request_args(self, **method_kwargs) -> Dict[str, Any]: method _headers_job (line 165) | def _headers_job(self, url, headers: Dict, stealth: bool, impersonate_... class _SyncSessionLogic (line 191) | class _SyncSessionLogic(_ConfigurationLogic): method __init__ (line 194) | def __init__(self, **kwargs: Unpack[RequestsSession]): method __enter__ (line 198) | def __enter__(self): method __exit__ (line 207) | def __exit__(self, exc_type, exc_val, exc_tb): method _make_request (line 221) | def _make_request(self, method: SUPPORTED_HTTP_METHODS, stealth: Optio... method get (line 275) | def get(self, url: str, **kwargs: Unpack[GetRequestParams]) -> Response: method post (line 305) | def post(self, url: str, **kwargs: Unpack[DataRequestParams]) -> Respo... method put (line 337) | def put(self, url: str, **kwargs: Unpack[DataRequestParams]) -> Response: method delete (line 369) | def delete(self, url: str, **kwargs: Unpack[DataRequestParams]) -> Res... class _ASyncSessionLogic (line 404) | class _ASyncSessionLogic(_ConfigurationLogic): method __init__ (line 407) | def __init__(self, **kwargs: Unpack[RequestsSession]): method __aenter__ (line 411) | async def __aenter__(self): # pragma: no cover method __aexit__ (line 420) | async def __aexit__(self, exc_type, exc_val, exc_tb): method _make_request (line 434) | async def _make_request(self, method: SUPPORTED_HTTP_METHODS, stealth:... method get (line 492) | def get(self, url: str, **kwargs: Unpack[GetRequestParams]) -> Awaitab... method post (line 522) | def post(self, url: str, **kwargs: Unpack[DataRequestParams]) -> Await... method put (line 554) | def put(self, url: str, **kwargs: Unpack[DataRequestParams]) -> Awaita... method delete (line 586) | def delete(self, url: str, **kwargs: Unpack[DataRequestParams]) -> Awa... class FetcherSession (line 621) | class FetcherSession: method __init__ (line 653) | def __init__( method __enter__ (line 710) | def __enter__(self) -> _SyncSessionLogic: method __exit__ (line 723) | def __exit__(self, exc_type, exc_val, exc_tb): method __aenter__ (line 731) | async def __aenter__(self) -> _ASyncSessionLogic: method __aexit__ (line 744) | async def __aexit__(self, exc_type, exc_val, exc_tb): class FetcherClient (line 753) | class FetcherClient(_SyncSessionLogic): method __init__ (line 756) | def __init__(self, **kwargs: Any) -> None: class AsyncFetcherClient (line 763) | class AsyncFetcherClient(_ASyncSessionLogic): method __init__ (line 766) | def __init__(self, **kwargs: Any) -> None: FILE: scrapling/engines/toolbelt/convertor.py class ResponseFactory (line 16) | class ResponseFactory: method __extract_browser_encoding (line 28) | def __extract_browser_encoding(cls, content_type: str | None, default:... method _process_response_history (line 39) | def _process_response_history(cls, first_response: SyncResponse, parse... method from_playwright_response (line 82) | def from_playwright_response( method _async_process_response_history (line 145) | async def _async_process_response_history( method _get_page_content (line 190) | def _get_page_content(cls, page: SyncPage) -> str: method _get_async_page_content (line 205) | async def _get_async_page_content(cls, page: AsyncPage) -> str: method from_async_playwright_response (line 220) | async def from_async_playwright_response( method from_http_request (line 283) | def from_http_request(response: CurlResponse, parser_arguments: Dict, ... FILE: scrapling/engines/toolbelt/custom.py class Response (line 28) | class Response(Selector): method __init__ (line 31) | def __init__( method body (line 72) | def body(self) -> bytes: method follow (line 76) | def follow( method __str__ (line 134) | def __str__(self) -> str: class BaseFetcher (line 138) | class BaseFetcher: method __init__ (line 157) | def __init__(self, *args, **kwargs): method display_config (line 170) | def display_config(cls): method configure (line 182) | def configure(cls, **kwargs): method _generate_parser_arguments (line 202) | def _generate_parser_arguments(cls) -> Dict: class StatusText (line 218) | class StatusText: method get (line 293) | def get(cls, status_code: int) -> str: FILE: scrapling/engines/toolbelt/fingerprints.py function get_os_name (line 21) | def get_os_name() -> OSName | Tuple: function generate_headers (line 37) | def generate_headers(browser_mode: bool | str = False) -> Dict: FILE: scrapling/engines/toolbelt/navigation.py class ProxyDict (line 16) | class ProxyDict(Struct): function create_intercept_handler (line 22) | def create_intercept_handler(disable_resources: bool, blocked_domains: O... function create_async_intercept_handler (line 49) | def create_async_intercept_handler(disable_resources: bool, blocked_doma... function construct_proxy_dict (line 76) | def construct_proxy_dict(proxy_string: str | Dict[str, str] | Tuple) -> ... FILE: scrapling/engines/toolbelt/proxy_rotation.py function _get_proxy_key (line 18) | def _get_proxy_key(proxy: ProxyType) -> str: function is_proxy_error (line 27) | def is_proxy_error(error: Exception) -> bool: function cyclic_rotation (line 33) | def cyclic_rotation(proxies: List[ProxyType], current_index: int) -> Tup... class ProxyRotator (line 39) | class ProxyRotator: method __init__ (line 51) | def __init__( method get_proxy (line 88) | def get_proxy(self) -> ProxyType: method proxies (line 95) | def proxies(self) -> List[ProxyType]: method __len__ (line 99) | def __len__(self) -> int: method __repr__ (line 103) | def __repr__(self) -> str: FILE: scrapling/fetchers/__init__.py function __getattr__ (line 37) | def __getattr__(name: str) -> Any: function __dir__ (line 46) | def __dir__() -> list[str]: FILE: scrapling/fetchers/chrome.py class DynamicFetcher (line 7) | class DynamicFetcher(BaseFetcher): method fetch (line 11) | def fetch(cls, url: str, **kwargs: Unpack[PlaywrightSession]) -> Respo... method async_fetch (line 51) | async def async_fetch(cls, url: str, **kwargs: Unpack[PlaywrightSessio... FILE: scrapling/fetchers/requests.py class Fetcher (line 13) | class Fetcher(BaseFetcher): class AsyncFetcher (line 22) | class AsyncFetcher(BaseFetcher): FILE: scrapling/fetchers/stealth_chrome.py class StealthyFetcher (line 7) | class StealthyFetcher(BaseFetcher): method fetch (line 14) | def fetch(cls, url: str, **kwargs: Unpack[StealthSession]) -> Response: method async_fetch (line 63) | async def async_fetch(cls, url: str, **kwargs: Unpack[StealthSession])... FILE: scrapling/parser.py class Selector (line 64) | class Selector(SelectorsGeneration): method __init__ (line 80) | def __init__( method __getitem__ (line 183) | def __getitem__(self, key: str) -> TextHandler: method __contains__ (line 188) | def __contains__(self, key: str) -> bool: method _is_text_node (line 195) | def _is_text_node( method __element_convertor (line 206) | def __element_convertor(self, element: HtmlElement | _ElementUnicodeRe... method __elements_convertor (line 219) | def __elements_convertor(self, elements: List[HtmlElement | _ElementUn... method __handle_elements (line 243) | def __handle_elements(self, result: List[HtmlElement | _ElementUnicode... method __getstate__ (line 250) | def __getstate__(self) -> Any: method tag (line 260) | def tag(self) -> str: method text (line 269) | def text(self) -> TextHandler: method get_all_text (line 279) | def get_all_text( method urljoin (line 331) | def urljoin(self, relative_url: str) -> str: method attrib (line 336) | def attrib(self) -> AttributesHandler: method html_content (line 345) | def html_content(self) -> TextHandler: method body (line 355) | def body(self) -> str | bytes: method prettify (line 361) | def prettify(self) -> TextHandler: method has_class (line 376) | def has_class(self, class_name: str) -> bool: method parent (line 386) | def parent(self) -> Optional["Selector"]: method below_elements (line 392) | def below_elements(self) -> "Selectors": method children (line 400) | def children(self) -> "Selectors": method siblings (line 411) | def siblings(self) -> "Selectors": method iterancestors (line 417) | def iterancestors(self) -> Generator["Selector", None, None]: method find_ancestor (line 424) | def find_ancestor(self, func: Callable[["Selector"], bool]) -> Optiona... method path (line 435) | def path(self) -> "Selectors": method next (line 441) | def next(self) -> Optional["Selector"]: method previous (line 453) | def previous(self) -> Optional["Selector"]: method get (line 464) | def get(self) -> TextHandler: method getall (line 473) | def getall(self) -> TextHandlers: method __str__ (line 480) | def __str__(self) -> str: method __repr__ (line 485) | def __repr__(self) -> str: method relocate (line 510) | def relocate( method relocate (line 515) | def relocate( method relocate (line 519) | def relocate( method css (line 564) | def css( method xpath (line 624) | def xpath( method find_all (line 694) | def find_all( method find (line 788) | def find( method __calculate_similarity_score (line 803) | def __calculate_similarity_score(self, original: Dict, candidate: Html... method __calculate_dict_diff (line 871) | def __calculate_dict_diff(dict1: Dict, dict2: Dict) -> float: method save (line 877) | def save(self, element: HtmlElement, identifier: str) -> None: method retrieve (line 898) | def retrieve(self, identifier: str) -> Optional[Dict[str, Any]]: method json (line 913) | def json(self) -> Dict: method re (line 929) | def re( method re_first (line 945) | def re_first( method __get_attributes (line 964) | def __get_attributes(element: HtmlElement, ignore_attributes: List | T... method __are_alike (line 968) | def __are_alike( method find_similar (line 1009) | def find_similar( method find_by_text (line 1071) | def find_by_text( method find_by_text (line 1081) | def find_by_text( method find_by_text (line 1090) | def find_by_text( method find_by_regex (line 1139) | def find_by_regex( method find_by_regex (line 1148) | def find_by_regex( method find_by_regex (line 1156) | def find_by_regex( class Selectors (line 1196) | class Selectors(List[Selector]): method __getitem__ (line 1204) | def __getitem__(self, pos: SupportsIndex) -> Selector: method __getitem__ (line 1208) | def __getitem__(self, pos: slice) -> "Selectors": method __getitem__ (line 1211) | def __getitem__(self, pos: SupportsIndex | slice) -> Union[Selector, "... method xpath (line 1218) | def xpath( method css (line 1249) | def css( method re (line 1277) | def re( method re_first (line 1295) | def re_first( method search (line 1317) | def search(self, func: Callable[["Selector"], bool]) -> Optional["Sele... method filter (line 1327) | def filter(self, func: Callable[["Selector"], bool]) -> "Selectors": method get (line 1335) | def get(self) -> Optional[TextHandler]: ... method get (line 1338) | def get(self, default: _T) -> Union[TextHandler, _T]: ... method get (line 1340) | def get(self, default=None): method getall (line 1348) | def getall(self) -> TextHandlers: method first (line 1356) | def first(self) -> Optional[Selector]: method last (line 1361) | def last(self) -> Optional[Selector]: method length (line 1366) | def length(self) -> int: method __getstate__ (line 1370) | def __getstate__(self) -> Any: # pragma: no cover FILE: scrapling/spiders/checkpoint.py class CheckpointData (line 16) | class CheckpointData: class CheckpointManager (line 23) | class CheckpointManager: method __init__ (line 28) | def __init__(self, crawldir: str | Path | AsyncPath, interval: float =... method has_checkpoint (line 38) | async def has_checkpoint(self) -> bool: method save (line 42) | async def save(self, data: CheckpointData) -> None: method load (line 63) | async def load(self) -> Optional[CheckpointData]: method cleanup (line 83) | async def cleanup(self) -> None: FILE: scrapling/spiders/engine.py function _dump (line 21) | def _dump(obj: Dict) -> str: class CrawlerEngine (line 25) | class CrawlerEngine: method __init__ (line 28) | def __init__( method _is_domain_allowed (line 60) | def _is_domain_allowed(self, request: Request) -> bool: method _rate_limiter (line 71) | def _rate_limiter(self, domain: str) -> CapacityLimiter: method _normalize_request (line 79) | def _normalize_request(self, request: Request) -> None: method _process_request (line 88) | async def _process_request(self, request: Request) -> None: method _task_wrapper (line 158) | async def _task_wrapper(self, request: Request) -> None: method request_pause (line 165) | def request_pause(self) -> None: method _save_checkpoint (line 184) | async def _save_checkpoint(self) -> None: method _is_checkpoint_time (line 191) | def _is_checkpoint_time(self) -> bool: method _restore_from_checkpoint (line 202) | async def _restore_from_checkpoint(self) -> bool: method crawl (line 222) | async def crawl(self) -> CrawlStats: method items (line 309) | def items(self) -> ItemList: method __aiter__ (line 313) | def __aiter__(self) -> AsyncGenerator[dict, None]: method _stream (line 316) | async def _stream(self) -> AsyncGenerator[dict, None]: FILE: scrapling/spiders/request.py function _convert_to_bytes (line 16) | def _convert_to_bytes(value: str | bytes) -> bytes: class Request (line 25) | class Request: method __init__ (line 26) | def __init__( method copy (line 47) | def copy(self) -> "Request": method domain (line 61) | def domain(self) -> str: method update_fingerprint (line 64) | def update_fingerprint( method __repr__ (line 115) | def __repr__(self) -> str: method __str__ (line 119) | def __str__(self) -> str: method __lt__ (line 122) | def __lt__(self, other: object) -> bool: method __gt__ (line 128) | def __gt__(self, other: object) -> bool: method __eq__ (line 134) | def __eq__(self, other: object) -> bool: method __getstate__ (line 142) | def __getstate__(self) -> dict[str, Any]: method __setstate__ (line 149) | def __setstate__(self, state: dict[str, Any]) -> None: method _restore_callback (line 154) | def _restore_callback(self, spider: "Spider") -> None: FILE: scrapling/spiders/result.py class ItemList (line 10) | class ItemList(list): method to_json (line 13) | def to_json(self, path: Union[str, Path], *, indent: bool = False): method to_jsonl (line 28) | def to_jsonl(self, path: Union[str, Path]): class CrawlStats (line 42) | class CrawlStats: method elapsed_seconds (line 65) | def elapsed_seconds(self) -> float: method requests_per_second (line 69) | def requests_per_second(self) -> float: method increment_status (line 74) | def increment_status(self, status: int) -> None: method increment_response_bytes (line 77) | def increment_response_bytes(self, domain: str, count: int) -> None: method increment_requests_count (line 81) | def increment_requests_count(self, sid: str) -> None: method to_dict (line 85) | def to_dict(self) -> dict[str, Any]: class CrawlResult (line 109) | class CrawlResult: method completed (line 117) | def completed(self) -> bool: method __len__ (line 121) | def __len__(self) -> int: method __iter__ (line 124) | def __iter__(self) -> Iterator[dict[str, Any]]: FILE: scrapling/spiders/scheduler.py class Scheduler (line 12) | class Scheduler: method __init__ (line 20) | def __init__(self, include_kwargs: bool = False, include_headers: bool... method enqueue (line 30) | async def enqueue(self, request: Request) -> bool: method dequeue (line 47) | async def dequeue(self) -> Request: method __len__ (line 53) | def __len__(self) -> int: method is_empty (line 57) | def is_empty(self) -> bool: method snapshot (line 60) | def snapshot(self) -> Tuple[List[Request], Set[bytes]]: method restore (line 66) | def restore(self, data: "CheckpointData") -> None: FILE: scrapling/spiders/session.py class SessionManager (line 12) | class SessionManager: method __init__ (line 15) | def __init__(self) -> None: method add (line 22) | def add(self, session_id: str, session: Session, *, default: bool = Fa... method remove (line 43) | def remove(self, session_id: str) -> None: method pop (line 50) | def pop(self, session_id: str) -> Session: method default_session_id (line 68) | def default_session_id(self) -> str: method session_ids (line 74) | def session_ids(self) -> list[str]: method get (line 77) | def get(self, session_id: str) -> Session: method start (line 83) | async def start(self) -> None: method close (line 94) | async def close(self) -> None: method fetch (line 101) | async def fetch(self, request: Request) -> Response: method __aenter__ (line 132) | async def __aenter__(self) -> "SessionManager": method __aexit__ (line 136) | async def __aexit__(self, *exc) -> None: method __contains__ (line 139) | def __contains__(self, session_id: str) -> bool: method __len__ (line 143) | def __len__(self) -> int: FILE: scrapling/spiders/spider.py class LogCounterHandler (line 21) | class LogCounterHandler(logging.Handler): method __init__ (line 24) | def __init__(self): method emit (line 34) | def emit(self, record: logging.LogRecord) -> None: method get_counts (line 48) | def get_counts(self) -> Dict[str, int]: class SessionConfigurationError (line 59) | class SessionConfigurationError(Exception): class Spider (line 65) | class Spider(ABC): method __init__ (line 92) | def __init__(self, crawldir: Optional[Union[str, Path, AsyncPath]] = N... method start_requests (line 141) | async def start_requests(self) -> AsyncGenerator[Request, None]: method parse (line 159) | async def parse(self, response: "Response") -> AsyncGenerator[Dict[str... method on_start (line 164) | async def on_start(self, resuming: bool = False) -> None: method on_close (line 174) | async def on_close(self) -> None: method on_error (line 178) | async def on_error(self, request: Request, error: Exception) -> None: method on_scraped_item (line 186) | async def on_scraped_item(self, item: Dict[str, Any]) -> Dict[str, Any... method is_blocked (line 190) | async def is_blocked(self, response: "Response") -> bool: method retry_blocked_request (line 196) | async def retry_blocked_request(self, request: Request, response: "Res... method __repr__ (line 200) | def __repr__(self) -> str: method configure_sessions (line 204) | def configure_sessions(self, manager: SessionManager) -> None: method pause (line 218) | def pause(self): method _setup_signal_handler (line 225) | def _setup_signal_handler(self) -> None: method _restore_signal_handler (line 240) | def _restore_signal_handler(self) -> None: method __run (line 248) | async def __run(self) -> CrawlResult: method start (line 264) | def start(self, use_uvloop: bool = False, **backend_options: Any) -> C... method stream (line 290) | async def stream(self) -> AsyncGenerator[Dict[str, Any], None]: method stats (line 312) | def stats(self) -> CrawlStats: FILE: tests/ai/test_ai_mcp.py class TestMCPServer (line 8) | class TestMCPServer: method test_url (line 12) | def test_url(self, httpbin): method server (line 16) | def server(self): method test_get_tool (line 19) | def test_get_tool(self, server, test_url): method test_bulk_get_tool (line 27) | async def test_bulk_get_tool(self, server, test_url): method test_fetch_tool (line 35) | async def test_fetch_tool(self, server, test_url): method test_bulk_fetch_tool (line 42) | async def test_bulk_fetch_tool(self, server, test_url): method test_stealthy_fetch_tool (line 48) | async def test_stealthy_fetch_tool(self, server, test_url): method test_bulk_stealthy_fetch_tool (line 55) | async def test_bulk_stealthy_fetch_tool(self, server, test_url): FILE: tests/cli/test_cli.py function configure_selector_mock (line 13) | def configure_selector_mock(): class TestCLI (line 24) | class TestCLI: method html_url (line 28) | def html_url(self, httpbin): method runner (line 32) | def runner(self): method test_shell_command (line 35) | def test_shell_command(self, runner): method test_mcp_command (line 45) | def test_mcp_command(self, runner): method test_extract_get_command (line 55) | def test_extract_get_command(self, runner, tmp_path, html_url): method test_extract_post_command (line 89) | def test_extract_post_command(self, runner, tmp_path, html_url): method test_extract_put_command (line 108) | def test_extract_put_command(self, runner, tmp_path, html_url): method test_extract_delete_command (line 127) | def test_extract_delete_command(self, runner, tmp_path, html_url): method test_extract_fetch_command (line 144) | def test_extract_fetch_command(self, runner, tmp_path, html_url): method test_extract_stealthy_fetch_command (line 163) | def test_extract_stealthy_fetch_command(self, runner, tmp_path, html_u... method test_invalid_arguments (line 183) | def test_invalid_arguments(self, runner, html_url): method test_impersonate_comma_separated (line 195) | def test_impersonate_comma_separated(self, runner, tmp_path, html_url): method test_impersonate_single_browser (line 219) | def test_impersonate_single_browser(self, runner, tmp_path, html_url): FILE: tests/cli/test_shell_functionality.py class TestCurlParser (line 8) | class TestCurlParser: method parser (line 12) | def parser(self): method test_basic_curl_parse (line 15) | def test_basic_curl_parse(self, parser): method test_curl_with_headers (line 25) | def test_curl_with_headers(self, parser): method test_curl_with_data (line 36) | def test_curl_with_data(self, parser): method test_curl_with_cookies (line 51) | def test_curl_with_cookies(self, parser): method test_curl_with_proxy (line 63) | def test_curl_with_proxy(self, parser): method test_curl2fetcher (line 70) | def test_curl2fetcher(self, parser): method test_invalid_curl_commands (line 81) | def test_invalid_curl_commands(self, parser): class TestConvertor (line 88) | class TestConvertor: method sample_html (line 92) | def sample_html(self): method test_extract_markdown (line 104) | def test_extract_markdown(self, sample_html): method test_extract_html (line 112) | def test_extract_html(self, sample_html): method test_extract_text (line 120) | def test_extract_text(self, sample_html): method test_extract_with_selector (line 129) | def test_extract_with_selector(self, sample_html): method test_write_to_file (line 140) | def test_write_to_file(self, sample_html, tmp_path): method test_invalid_operations (line 159) | def test_invalid_operations(self, sample_html): class TestCustomShell (line 176) | class TestCustomShell: method test_shell_initialization (line 179) | def test_shell_initialization(self): method test_shell_namespace (line 187) | def test_shell_namespace(self): FILE: tests/core/test_shell_core.py class TestCookieParser (line 11) | class TestCookieParser: method test_simple_cookie_parsing (line 14) | def test_simple_cookie_parsing(self): method test_multiple_cookies_parsing (line 21) | def test_multiple_cookies_parsing(self): method test_cookie_with_attributes (line 31) | def test_cookie_with_attributes(self): method test_empty_cookie_string (line 38) | def test_empty_cookie_string(self): method test_malformed_cookie_handling (line 43) | def test_malformed_cookie_handling(self): class TestParseHeaders (line 50) | class TestParseHeaders: method test_simple_headers (line 53) | def test_simple_headers(self): method test_headers_with_cookies (line 67) | def test_headers_with_cookies(self): method test_headers_without_colons (line 80) | def test_headers_without_colons(self): method test_invalid_header_format (line 92) | def test_invalid_header_format(self): method test_headers_with_multiple_colons (line 102) | def test_headers_with_multiple_colons(self): method test_headers_with_whitespace (line 113) | def test_headers_with_whitespace(self): method test_parse_cookies_disabled (line 125) | def test_parse_cookies_disabled(self): method test_empty_header_lines (line 137) | def test_empty_header_lines(self): class TestRequestNamedTuple (line 144) | class TestRequestNamedTuple: method test_request_creation (line 147) | def test_request_creation(self): method test_request_defaults (line 167) | def test_request_defaults(self): method test_request_field_access (line 187) | def test_request_field_access(self): class TestLoggingLevels (line 209) | class TestLoggingLevels: method test_known_logging_levels (line 212) | def test_known_logging_levels(self): method test_logging_level_values (line 220) | def test_logging_level_values(self): method test_level_hierarchy (line 231) | def test_level_hierarchy(self): FILE: tests/core/test_storage_core.py class TestSQLiteStorageSystem (line 7) | class TestSQLiteStorageSystem: method test_sqlite_storage_creation (line 10) | def test_sqlite_storage_creation(self): method test_sqlite_storage_with_file (line 16) | def test_sqlite_storage_with_file(self): method test_sqlite_storage_initialization_args (line 33) | def test_sqlite_storage_initialization_args(self): FILE: tests/fetchers/async/test_dynamic.py class TestDynamicFetcherAsync (line 10) | class TestDynamicFetcherAsync: method fetcher (line 12) | def fetcher(self): method urls (line 16) | def urls(self, httpbin): method test_basic_fetch (line 28) | async def test_basic_fetch(self, fetcher, urls): method test_cookies_loading (line 34) | async def test_cookies_loading(self, fetcher, urls): method test_automation (line 41) | async def test_automation(self, fetcher, urls): method test_properties (line 73) | async def test_properties(self, fetcher, urls, kwargs): method test_cdp_url_invalid (line 79) | async def test_cdp_url_invalid(self, fetcher, urls): FILE: tests/fetchers/async/test_dynamic_session.py class TestAsyncDynamicSession (line 11) | class TestAsyncDynamicSession: method urls (line 16) | def urls(self, httpbin): method test_concurrent_async_requests (line 22) | async def test_concurrent_async_requests(self, urls): method test_page_pool_management (line 52) | async def test_page_pool_management(self, urls): method test_dynamic_session_with_options (line 70) | async def test_dynamic_session_with_options(self, urls): method test_error_handling_in_fetch (line 80) | async def test_error_handling_in_fetch(self, urls): FILE: tests/fetchers/async/test_requests.py class TestAsyncFetcher (line 11) | class TestAsyncFetcher: method fetcher (line 13) | def fetcher(self): method urls (line 17) | def urls(self, httpbin): method test_basic_get (line 29) | async def test_basic_get(self, fetcher, urls): method test_get_properties (line 35) | async def test_get_properties(self, fetcher, urls): method test_post_properties (line 53) | async def test_post_properties(self, fetcher, urls): method test_put_properties (line 81) | async def test_put_properties(self, fetcher, urls): method test_delete_properties (line 110) | async def test_delete_properties(self, fetcher, urls): FILE: tests/fetchers/async/test_requests_session.py class TestFetcherSession (line 6) | class TestFetcherSession: method test_async_fetcher_client_creation (line 9) | def test_async_fetcher_client_creation(self): FILE: tests/fetchers/async/test_stealth.py class TestStealthyFetcher (line 11) | class TestStealthyFetcher: method fetcher (line 13) | def fetcher(self): method urls (line 17) | def urls(self, httpbin): method test_basic_fetch (line 29) | async def test_basic_fetch(self, fetcher, urls): method test_cookies_loading (line 35) | async def test_cookies_loading(self, fetcher, urls): method test_automation (line 41) | async def test_automation(self, fetcher, urls): method test_properties (line 73) | async def test_properties(self, fetcher, urls, kwargs): FILE: tests/fetchers/async/test_stealth_session.py class TestAsyncStealthySession (line 12) | class TestAsyncStealthySession: method urls (line 17) | def urls(self, httpbin): method test_concurrent_async_requests (line 23) | async def test_concurrent_async_requests(self, urls): method test_page_pool_management (line 53) | async def test_page_pool_management(self, urls): method test_stealthy_session_with_options (line 71) | async def test_stealthy_session_with_options(self, urls): method test_error_handling_in_fetch (line 81) | async def test_error_handling_in_fetch(self, urls): FILE: tests/fetchers/sync/test_dynamic.py class TestDynamicFetcher (line 10) | class TestDynamicFetcher: method fetcher (line 12) | def fetcher(self): method setup_urls (line 17) | def setup_urls(self, httpbin): method test_basic_fetch (line 27) | def test_basic_fetch(self, fetcher): method test_cookies_loading (line 34) | def test_cookies_loading(self, fetcher): method test_automation (line 40) | def test_automation(self, fetcher): method test_properties (line 71) | def test_properties(self, fetcher, kwargs): method test_cdp_url_invalid (line 76) | def test_cdp_url_invalid(self, fetcher): FILE: tests/fetchers/sync/test_requests.py class TestFetcher (line 10) | class TestFetcher: method fetcher (line 12) | def fetcher(self): method setup_urls (line 17) | def setup_urls(self, httpbin): method test_basic_get (line 28) | def test_basic_get(self, fetcher): method test_get_properties (line 34) | def test_get_properties(self, fetcher): method test_post_properties (line 49) | def test_post_properties(self, fetcher): method test_put_properties (line 79) | def test_put_properties(self, fetcher): method test_delete_properties (line 108) | def test_delete_properties(self, fetcher): FILE: tests/fetchers/sync/test_requests_session.py class TestFetcherSession (line 7) | class TestFetcherSession: method test_fetcher_session_creation (line 10) | def test_fetcher_session_creation(self): method test_fetcher_session_context_manager (line 21) | def test_fetcher_session_context_manager(self): method test_fetcher_session_double_enter (line 31) | def test_fetcher_session_double_enter(self): method test_fetcher_client_creation (line 39) | def test_fetcher_client_creation(self): FILE: tests/fetchers/sync/test_stealth_session.py class TestStealthConstants (line 8) | class TestStealthConstants: method test_cf_pattern_regex (line 11) | def test_cf_pattern_regex(self): class TestStealthySession (line 38) | class TestStealthySession: method setup_urls (line 42) | def setup_urls(self, httpbin): method test_session_creation (line 52) | def test_session_creation(self): FILE: tests/fetchers/test_base.py class TestBaseFetcher (line 6) | class TestBaseFetcher: method test_default_configuration (line 9) | def test_default_configuration(self): method test_configure_single_parameter (line 18) | def test_configure_single_parameter(self): method test_configure_multiple_parameters (line 28) | def test_configure_multiple_parameters(self): method test_configure_invalid_parameter (line 48) | def test_configure_invalid_parameter(self): method test_configure_no_parameters (line 53) | def test_configure_no_parameters(self): method test_configure_non_parser_keyword (line 58) | def test_configure_non_parser_keyword(self): method test_generate_parser_arguments (line 65) | def test_generate_parser_arguments(self): FILE: tests/fetchers/test_constants.py class TestConstants (line 4) | class TestConstants: method test_default_disabled_resources (line 7) | def test_default_disabled_resources(self): method test_harmful_default_args (line 14) | def test_harmful_default_args(self): method test_flags (line 19) | def test_flags(self): FILE: tests/fetchers/test_impersonate_list.py class TestRandomBrowserSelection (line 11) | class TestRandomBrowserSelection: method test_select_random_browser_with_single_string (line 14) | def test_select_random_browser_with_single_string(self): method test_select_random_browser_with_none (line 19) | def test_select_random_browser_with_none(self): method test_select_random_browser_with_list (line 24) | def test_select_random_browser_with_list(self): method test_select_random_browser_with_empty_list (line 30) | def test_select_random_browser_with_empty_list(self): method test_select_random_browser_with_single_item_list (line 35) | def test_select_random_browser_with_single_item_list(self): class TestFetcherWithImpersonateList (line 42) | class TestFetcherWithImpersonateList: method setup_urls (line 46) | def setup_urls(self, httpbin): method test_get_with_impersonate_list (line 50) | def test_get_with_impersonate_list(self): method test_get_with_single_impersonate (line 56) | def test_get_with_single_impersonate(self): method test_post_with_impersonate_list (line 61) | def test_post_with_impersonate_list(self): method test_put_with_impersonate_list (line 68) | def test_put_with_impersonate_list(self): method test_delete_with_impersonate_list (line 75) | def test_delete_with_impersonate_list(self): class TestFetcherSessionWithImpersonateList (line 84) | class TestFetcherSessionWithImpersonateList: method setup_urls (line 88) | def setup_urls(self, httpbin): method test_session_init_with_impersonate_list (line 92) | def test_session_init_with_impersonate_list(self): method test_session_request_with_impersonate_list (line 98) | def test_session_request_with_impersonate_list(self): method test_session_multiple_requests_with_impersonate_list (line 105) | def test_session_multiple_requests_with_impersonate_list(self): method test_session_request_level_impersonate_override (line 114) | def test_session_request_level_impersonate_override(self): method test_session_request_level_impersonate_list_override (line 123) | def test_session_request_level_impersonate_list_override(self): class TestImpersonateTypeValidation (line 133) | class TestImpersonateTypeValidation: method test_impersonate_accepts_string (line 136) | def test_impersonate_accepts_string(self): method test_impersonate_accepts_list (line 142) | def test_impersonate_accepts_list(self): method test_impersonate_accepts_none (line 149) | def test_impersonate_accepts_none(self): FILE: tests/fetchers/test_pages.py class TestPageInfo (line 6) | class TestPageInfo: method test_page_info_creation (line 9) | def test_page_info_creation(self): method test_page_info_marking (line 18) | def test_page_info_marking(self): method test_page_info_equality (line 30) | def test_page_info_equality(self): method test_page_info_repr (line 43) | def test_page_info_repr(self): class TestPagePool (line 53) | class TestPagePool: method test_page_pool_creation (line 56) | def test_page_pool_creation(self): method test_add_page (line 64) | def test_add_page(self): method test_add_page_limit_exceeded (line 76) | def test_add_page_limit_exceeded(self): method test_cleanup_error_pages (line 89) | def test_cleanup_error_pages(self): FILE: tests/fetchers/test_proxy_rotation.py class TestCyclicRotationStrategy (line 9) | class TestCyclicRotationStrategy: method test_cyclic_rotation_cycles_through_proxies (line 12) | def test_cyclic_rotation_cycles_through_proxies(self): method test_cyclic_rotation_wraps_index (line 28) | def test_cyclic_rotation_wraps_index(self): class TestProxyRotatorCreation (line 38) | class TestProxyRotatorCreation: method test_create_with_string_proxies (line 41) | def test_create_with_string_proxies(self): method test_create_with_dict_proxies (line 49) | def test_create_with_dict_proxies(self): method test_create_with_mixed_proxies (line 60) | def test_create_with_mixed_proxies(self): method test_empty_proxies_raises_error (line 70) | def test_empty_proxies_raises_error(self): method test_dict_without_server_raises_error (line 75) | def test_dict_without_server_raises_error(self): method test_invalid_proxy_type_raises_error (line 80) | def test_invalid_proxy_type_raises_error(self): method test_non_callable_strategy_raises_error (line 88) | def test_non_callable_strategy_raises_error(self): class TestProxyRotatorRotation (line 97) | class TestProxyRotatorRotation: method test_get_proxy_cyclic_rotation (line 100) | def test_get_proxy_cyclic_rotation(self): method test_get_proxy_single_proxy (line 115) | def test_get_proxy_single_proxy(self): method test_get_proxy_with_dict_proxies (line 122) | def test_get_proxy_with_dict_proxies(self): class TestCustomStrategies (line 135) | class TestCustomStrategies: method test_random_strategy (line 138) | def test_random_strategy(self): method test_sticky_strategy (line 150) | def test_sticky_strategy(self): method test_weighted_strategy (line 163) | def test_weighted_strategy(self): method test_lambda_strategy (line 183) | def test_lambda_strategy(self): class TestProxyRotatorProperties (line 194) | class TestProxyRotatorProperties: method test_proxies_property_returns_copy (line 197) | def test_proxies_property_returns_copy(self): method test_len_returns_proxy_count (line 209) | def test_len_returns_proxy_count(self): method test_repr (line 215) | def test_repr(self): class TestProxyRotatorThreadSafety (line 221) | class TestProxyRotatorThreadSafety: method test_concurrent_get_proxy (line 224) | def test_concurrent_get_proxy(self): method test_thread_pool_concurrent_access (line 244) | def test_thread_pool_concurrent_access(self): class TestIsProxyError (line 257) | class TestIsProxyError: method test_proxy_errors_detected (line 270) | def test_proxy_errors_detected(self, error_msg): method test_non_proxy_errors_not_detected (line 283) | def test_non_proxy_errors_not_detected(self, error_msg): method test_case_insensitive_detection (line 287) | def test_case_insensitive_detection(self): method test_empty_error_message (line 293) | def test_empty_error_message(self): method test_custom_exception_types (line 297) | def test_custom_exception_types(self): FILE: tests/fetchers/test_response_handling.py class TestResponseFactory (line 7) | class TestResponseFactory: method test_response_from_curl (line 10) | def test_response_from_curl(self): method test_response_history_processing (line 34) | def test_response_history_processing(self): class TestErrorScenarios (line 60) | class TestErrorScenarios: method test_invalid_html_handling (line 63) | def test_invalid_html_handling(self): method test_empty_responses (line 82) | def test_empty_responses(self): FILE: tests/fetchers/test_utils.py function content_type_map (line 16) | def content_type_map(): function status_map (line 66) | def status_map(): function test_parsing_response_status (line 133) | def test_parsing_response_status(status_map): function test_unknown_status_code (line 139) | def test_unknown_status_code(): class TestConstructProxyDict (line 144) | class TestConstructProxyDict: method test_proxy_string_basic (line 147) | def test_proxy_string_basic(self): method test_proxy_string_with_auth (line 158) | def test_proxy_string_with_auth(self): method test_proxy_dict_input (line 169) | def test_proxy_dict_input(self): method test_proxy_dict_minimal (line 180) | def test_proxy_dict_minimal(self): method test_invalid_proxy_string (line 192) | def test_invalid_proxy_string(self): method test_invalid_proxy_dict (line 197) | def test_invalid_proxy_dict(self): class TestFingerprintFunctions (line 203) | class TestFingerprintFunctions: method test_get_os_name (line 206) | def test_get_os_name(self): method test_generate_headers_basic (line 214) | def test_generate_headers_basic(self): method test_generate_headers_browser_mode (line 222) | def test_generate_headers_browser_mode(self): class TestResponse (line 230) | class TestResponse: method test_response_creation (line 233) | def test_response_creation(self): method test_response_with_bytes_content (line 251) | def test_response_with_bytes_content(self): class _MockRequest (line 269) | class _MockRequest: method __init__ (line 271) | def __init__(self, url: str, resource_type: str = "document"): class _MockRoute (line 276) | class _MockRoute: method __init__ (line 278) | def __init__(self, url: str, resource_type: str = "document"): method abort (line 283) | def abort(self): method continue_ (line 286) | def continue_(self): class _AsyncMockRoute (line 290) | class _AsyncMockRoute: method __init__ (line 292) | def __init__(self, url: str, resource_type: str = "document"): method abort (line 297) | async def abort(self): method continue_ (line 300) | async def continue_(self): class TestCreateInterceptHandler (line 304) | class TestCreateInterceptHandler: method test_blocks_disabled_resource_types (line 307) | def test_blocks_disabled_resource_types(self): method test_continues_allowed_resource_types (line 313) | def test_continues_allowed_resource_types(self): method test_blocks_exact_domain (line 319) | def test_blocks_exact_domain(self): method test_blocks_subdomain (line 325) | def test_blocks_subdomain(self): method test_continues_non_blocked_domain (line 331) | def test_continues_non_blocked_domain(self): method test_resource_blocking_takes_priority_over_domain (line 337) | def test_resource_blocking_takes_priority_over_domain(self): method test_domain_blocking_with_resources_disabled (line 344) | def test_domain_blocking_with_resources_disabled(self): method test_no_blocking_continues (line 351) | def test_no_blocking_continues(self): method test_does_not_block_partial_domain_match (line 357) | def test_does_not_block_partial_domain_match(self): method test_multiple_blocked_domains (line 364) | def test_multiple_blocked_domains(self): class TestCreateAsyncInterceptHandler (line 377) | class TestCreateAsyncInterceptHandler: method test_blocks_disabled_resource_types (line 381) | async def test_blocks_disabled_resource_types(self): method test_blocks_domain (line 388) | async def test_blocks_domain(self): method test_continues_non_blocked (line 395) | async def test_continues_non_blocked(self): method test_blocks_subdomain (line 402) | async def test_blocks_subdomain(self): method test_does_not_block_partial_domain_match (line 409) | async def test_does_not_block_partial_domain_match(self): FILE: tests/fetchers/test_validator.py class TestValidators (line 9) | class TestValidators: method test_playwright_config_valid (line 12) | def test_playwright_config_valid(self): method test_playwright_config_invalid_max_pages (line 28) | def test_playwright_config_invalid_max_pages(self): method test_playwright_config_invalid_timeout (line 40) | def test_playwright_config_invalid_timeout(self): method test_playwright_config_invalid_cdp_url (line 47) | def test_playwright_config_invalid_cdp_url(self): method test_stealth_config_valid (line 54) | def test_stealth_config_valid(self): method test_stealth_config_cloudflare_timeout (line 70) | def test_stealth_config_cloudflare_timeout(self): method test_playwright_config_blocked_domains (line 81) | def test_playwright_config_blocked_domains(self): method test_playwright_config_blocked_domains_default_none (line 89) | def test_playwright_config_blocked_domains_default_none(self): method test_stealth_config_blocked_domains (line 95) | def test_stealth_config_blocked_domains(self): FILE: tests/parser/test_adaptive.py class TestParserAdaptive (line 8) | class TestParserAdaptive: method test_element_relocation (line 9) | def test_element_relocation(self): method test_element_relocation_async (line 60) | async def test_element_relocation_async(self): FILE: tests/parser/test_attributes_handler.py class TestAttributesHandler (line 8) | class TestAttributesHandler: method sample_html (line 12) | def sample_html(self): method attributes (line 51) | def attributes(self, sample_html): method test_basic_attribute_access (line 56) | def test_basic_attribute_access(self, attributes): method test_iteration_methods (line 72) | def test_iteration_methods(self, attributes): method test_json_parsing (line 93) | def test_json_parsing(self, attributes): method test_json_error_handling (line 112) | def test_json_error_handling(self, attributes): method test_json_string_property (line 122) | def test_json_string_property(self, attributes): method test_search_values (line 133) | def test_search_values(self, attributes): method test_special_attribute_types (line 160) | def test_special_attribute_types(self, sample_html): method test_attribute_modification (line 177) | def test_attribute_modification(self, sample_html): method test_string_representation (line 196) | def test_string_representation(self, attributes): method test_edge_cases (line 207) | def test_edge_cases(self, sample_html): method test_url_attribute (line 228) | def test_url_attribute(self, attributes): method test_comparison_operations (line 236) | def test_comparison_operations(self, sample_html): method test_complex_search_patterns (line 249) | def test_complex_search_patterns(self, attributes): method test_attribute_filtering (line 265) | def test_attribute_filtering(self, attributes): method test_performance_with_many_attributes (line 277) | def test_performance_with_many_attributes(self): method test_unicode_attributes (line 294) | def test_unicode_attributes(self): method test_malformed_attributes (line 317) | def test_malformed_attributes(self): FILE: tests/parser/test_general.py function html_content (line 13) | def html_content(): function page (line 82) | def page(html_content): class TestCSSSelectors (line 87) | class TestCSSSelectors: method test_basic_product_selection (line 88) | def test_basic_product_selection(self, page): method test_in_stock_product_selection (line 93) | def test_in_stock_product_selection(self, page): class TestXPathSelectors (line 102) | class TestXPathSelectors: method test_high_rating_reviews (line 103) | def test_high_rating_reviews(self, page): method test_high_priced_products (line 110) | def test_high_priced_products(self, page): class TestTextMatching (line 120) | class TestTextMatching: method test_regex_multiple_matches (line 121) | def test_regex_multiple_matches(self, page): method test_regex_first_match (line 126) | def test_regex_first_match(self, page): method test_partial_text_match (line 133) | def test_partial_text_match(self, page): method test_exact_text_match (line 138) | def test_exact_text_match(self, page): class TestSimilarElements (line 147) | class TestSimilarElements: method test_finding_similar_products (line 148) | def test_finding_similar_products(self, page): method test_finding_similar_reviews (line 154) | def test_finding_similar_reviews(self, page): class TestErrorHandling (line 166) | class TestErrorHandling: method test_invalid_selector_initialization (line 167) | def test_invalid_selector_initialization(self): method test_invalid_storage (line 176) | def test_invalid_storage(self, page, html_content): method test_bad_selectors (line 181) | def test_bad_selectors(self, page): class TestPicklingAndRepresentation (line 191) | class TestPicklingAndRepresentation: method test_unpickleable_objects (line 192) | def test_unpickleable_objects(self, page): method test_string_representations (line 198) | def test_string_representations(self, page): class TestElementNavigation (line 208) | class TestElementNavigation: method test_basic_navigation_properties (line 209) | def test_basic_navigation_properties(self, page): method test_parent_and_sibling_navigation (line 216) | def test_parent_and_sibling_navigation(self, page): method test_child_navigation (line 225) | def test_child_navigation(self, page): method test_next_and_previous_navigation (line 231) | def test_next_and_previous_navigation(self, page): method test_ancestor_finding (line 240) | def test_ancestor_finding(self, page): class TestJSONAndAttributes (line 251) | class TestJSONAndAttributes: method test_json_conversion (line 252) | def test_json_conversion(self, page): method test_attribute_operations (line 260) | def test_attribute_operations(self, page): function test_large_html_parsing_performance (line 287) | def test_large_html_parsing_performance(): function test_selectors_generation (line 310) | def test_selectors_generation(page): function test_getting_all_text (line 325) | def test_getting_all_text(page): function test_regex_on_text (line 330) | def test_regex_on_text(page): FILE: tests/parser/test_parser_advanced.py class TestSelectorAdvancedFeatures (line 10) | class TestSelectorAdvancedFeatures: method test_adaptive_initialization_with_storage (line 13) | def test_adaptive_initialization_with_storage(self): method test_adaptive_initialization_with_default_storage_args (line 28) | def test_adaptive_initialization_with_default_storage_args(self): method test_adaptive_with_existing_storage (line 43) | def test_adaptive_with_existing_storage(self): class TestAdvancedSelectors (line 58) | class TestAdvancedSelectors: method complex_html (line 62) | def complex_html(self): method test_comment_and_cdata_handling (line 84) | def test_comment_and_cdata_handling(self, complex_html): method test_advanced_xpath_variables (line 105) | def test_advanced_xpath_variables(self, complex_html): method test_pseudo_elements (line 117) | def test_pseudo_elements(self, complex_html): method test_complex_attribute_operations (line 131) | def test_complex_attribute_operations(self, complex_html): method test_url_joining (line 144) | def test_url_joining(self): method test_find_operations_edge_cases (line 153) | def test_find_operations_edge_cases(self, complex_html): method test_text_operations_edge_cases (line 170) | def test_text_operations_edge_cases(self, complex_html): method test_get_all_text_preserves_interleaved_text_nodes (line 186) | def test_get_all_text_preserves_interleaved_text_nodes(self): class TestTextHandlerAdvanced (line 214) | class TestTextHandlerAdvanced: method test_text_handler_operations (line 217) | def test_text_handler_operations(self): method test_text_handler_regex (line 234) | def test_text_handler_regex(self): method test_text_handlers_operations (line 253) | def test_text_handlers_operations(self): class TestSelectorsAdvanced (line 270) | class TestSelectorsAdvanced: method test_selectors_filtering (line 273) | def test_selectors_filtering(self): method test_selectors_properties (line 294) | def test_selectors_properties(self): FILE: tests/spiders/test_checkpoint.py class TestCheckpointData (line 14) | class TestCheckpointData: method test_default_values (line 17) | def test_default_values(self): method test_with_requests_and_seen (line 24) | def test_with_requests_and_seen(self): method test_pickle_roundtrip (line 38) | def test_pickle_roundtrip(self): class TestCheckpointManagerInit (line 52) | class TestCheckpointManagerInit: method test_init_with_string_path (line 55) | def test_init_with_string_path(self): method test_init_with_pathlib_path (line 62) | def test_init_with_pathlib_path(self): method test_init_with_custom_interval (line 69) | def test_init_with_custom_interval(self): method test_init_with_zero_interval (line 74) | def test_init_with_zero_interval(self): method test_init_with_negative_interval_raises (line 79) | def test_init_with_negative_interval_raises(self): method test_init_with_invalid_interval_type_raises (line 84) | def test_init_with_invalid_interval_type_raises(self): method test_checkpoint_file_path (line 89) | def test_checkpoint_file_path(self): class TestCheckpointManagerOperations (line 97) | class TestCheckpointManagerOperations: method temp_dir (line 101) | def temp_dir(self): method test_has_checkpoint_false_when_no_file (line 107) | async def test_has_checkpoint_false_when_no_file(self, temp_dir: Path): method test_save_creates_checkpoint_file (line 116) | async def test_save_creates_checkpoint_file(self, temp_dir: Path): method test_save_creates_directory_if_not_exists (line 132) | async def test_save_creates_directory_if_not_exists(self, temp_dir: Pa... method test_has_checkpoint_true_after_save (line 143) | async def test_has_checkpoint_true_after_save(self, temp_dir: Path): method test_load_returns_none_when_no_checkpoint (line 154) | async def test_load_returns_none_when_no_checkpoint(self, temp_dir: Pa... method test_save_and_load_roundtrip (line 163) | async def test_save_and_load_roundtrip(self, temp_dir: Path): method test_save_is_atomic (line 185) | async def test_save_is_atomic(self, temp_dir: Path): method test_cleanup_removes_checkpoint_file (line 202) | async def test_cleanup_removes_checkpoint_file(self, temp_dir: Path): method test_cleanup_no_error_when_no_file (line 220) | async def test_cleanup_no_error_when_no_file(self, temp_dir: Path): method test_load_returns_none_on_corrupt_file (line 228) | async def test_load_returns_none_on_corrupt_file(self, temp_dir: Path): method test_multiple_saves_overwrite (line 243) | async def test_multiple_saves_overwrite(self, temp_dir: Path): class TestCheckpointManagerEdgeCases (line 270) | class TestCheckpointManagerEdgeCases: method temp_dir (line 274) | def temp_dir(self): method test_save_empty_checkpoint (line 280) | async def test_save_empty_checkpoint(self, temp_dir: Path): method test_save_large_checkpoint (line 294) | async def test_save_large_checkpoint(self, temp_dir: Path): method test_requests_preserve_metadata (line 315) | async def test_requests_preserve_metadata(self, temp_dir: Path): FILE: tests/spiders/test_engine.py class MockResponse (line 22) | class MockResponse: method __init__ (line 25) | def __init__(self, status: int = 200, body: bytes = b"ok", url: str = ... method __str__ (line 32) | def __str__(self) -> str: class MockSession (line 36) | class MockSession: method __init__ (line 39) | def __init__(self, name: str = "mock", response: MockResponse | None =... method __aenter__ (line 45) | async def __aenter__(self): method __aexit__ (line 49) | async def __aexit__(self, *args): method fetch (line 52) | async def fetch(self, url: str, **kwargs): class ErrorSession (line 58) | class ErrorSession(MockSession): method __init__ (line 61) | def __init__(self, error: Exception | None = None): method fetch (line 65) | async def fetch(self, url: str, **kwargs): class MockSpider (line 69) | class MockSpider: method __init__ (line 72) | def __init__( method parse (line 113) | async def parse(self, response) -> AsyncGenerator[Dict[str, Any] | Req... method on_start (line 116) | async def on_start(self, resuming: bool = False) -> None: method on_close (line 119) | async def on_close(self) -> None: method on_error (line 122) | async def on_error(self, request: Request, error: Exception) -> None: method on_scraped_item (line 125) | async def on_scraped_item(self, item: Dict[str, Any]) -> Dict[str, Any... method is_blocked (line 131) | async def is_blocked(self, response) -> bool: method retry_blocked_request (line 136) | async def retry_blocked_request(self, request: Request, response) -> R... method start_requests (line 142) | async def start_requests(self) -> AsyncGenerator[Request, None]: class _LogCounterStub (line 146) | class _LogCounterStub: method get_counts (line 149) | def get_counts(self) -> Dict[str, int]: function _make_engine (line 153) | def _make_engine( class TestDumpHelper (line 171) | class TestDumpHelper: method test_dump_returns_json_string (line 172) | def test_dump_returns_json_string(self): method test_dump_handles_nested (line 176) | def test_dump_handles_nested(self): class TestCrawlerEngineInit (line 187) | class TestCrawlerEngineInit: method test_default_initialisation (line 188) | def test_default_initialisation(self): method test_checkpoint_system_disabled_by_default (line 199) | def test_checkpoint_system_disabled_by_default(self): method test_checkpoint_system_enabled_with_crawldir (line 203) | def test_checkpoint_system_enabled_with_crawldir(self): method test_global_limiter_uses_concurrent_requests (line 208) | def test_global_limiter_uses_concurrent_requests(self): method test_allowed_domains_from_spider (line 213) | def test_allowed_domains_from_spider(self): class TestIsDomainAllowed (line 224) | class TestIsDomainAllowed: method test_all_allowed_when_empty (line 225) | def test_all_allowed_when_empty(self): method test_exact_domain_match (line 230) | def test_exact_domain_match(self): method test_subdomain_match (line 237) | def test_subdomain_match(self): method test_partial_name_not_matched (line 244) | def test_partial_name_not_matched(self): method test_multiple_allowed_domains (line 251) | def test_multiple_allowed_domains(self): class TestRateLimiter (line 265) | class TestRateLimiter: method test_returns_global_limiter_when_per_domain_disabled (line 266) | def test_returns_global_limiter_when_per_domain_disabled(self): method test_returns_per_domain_limiter_when_enabled (line 271) | def test_returns_per_domain_limiter_when_enabled(self): method test_same_domain_returns_same_limiter (line 279) | def test_same_domain_returns_same_limiter(self): method test_different_domains_get_different_limiters (line 287) | def test_different_domains_get_different_limiters(self): class TestNormalizeRequest (line 301) | class TestNormalizeRequest: method test_sets_default_sid_when_empty (line 302) | def test_sets_default_sid_when_empty(self): method test_preserves_existing_sid (line 310) | def test_preserves_existing_sid(self): class TestProcessRequest (line 323) | class TestProcessRequest: method test_successful_fetch_updates_stats (line 325) | async def test_successful_fetch_updates_stats(self): method test_failed_fetch_increments_failed_count (line 338) | async def test_failed_fetch_increments_failed_count(self): method test_failed_fetch_does_not_increment_requests_count (line 351) | async def test_failed_fetch_does_not_increment_requests_count(self): method test_blocked_response_triggers_retry (line 363) | async def test_blocked_response_triggers_retry(self): method test_blocked_response_max_retries_exceeded (line 375) | async def test_blocked_response_max_retries_exceeded(self): method test_retry_request_has_dont_filter (line 388) | async def test_retry_request_has_dont_filter(self): method test_retry_clears_proxy_kwargs (line 400) | async def test_retry_clears_proxy_kwargs(self): method test_callback_yielding_dict_increments_items (line 412) | async def test_callback_yielding_dict_increments_items(self): method test_callback_yielding_request_enqueues (line 423) | async def test_callback_yielding_request_enqueues(self): method test_callback_yielding_offsite_request_filtered (line 436) | async def test_callback_yielding_offsite_request_filtered(self): method test_dropped_item_when_on_scraped_item_returns_none (line 450) | async def test_dropped_item_when_on_scraped_item_returns_none(self): method test_callback_exception_calls_on_error (line 462) | async def test_callback_exception_calls_on_error(self): method test_proxy_tracked_in_stats (line 477) | async def test_proxy_tracked_in_stats(self): method test_proxies_dict_tracked_in_stats (line 487) | async def test_proxies_dict_tracked_in_stats(self): method test_uses_parse_when_no_callback (line 499) | async def test_uses_parse_when_no_callback(self): class TestTaskWrapper (line 521) | class TestTaskWrapper: method test_decrements_active_tasks (line 523) | async def test_decrements_active_tasks(self): method test_decrements_even_on_error (line 533) | async def test_decrements_even_on_error(self): class TestRequestPause (line 551) | class TestRequestPause: method test_first_call_sets_pause_requested (line 552) | def test_first_call_sets_pause_requested(self): method test_second_call_sets_force_stop (line 560) | def test_second_call_sets_force_stop(self): method test_third_call_after_force_stop_is_noop (line 569) | def test_third_call_after_force_stop_is_noop(self): class TestCheckpointMethods (line 584) | class TestCheckpointMethods: method test_is_checkpoint_time_false_when_disabled (line 585) | def test_is_checkpoint_time_false_when_disabled(self): method test_save_and_restore_checkpoint (line 590) | async def test_save_and_restore_checkpoint(self): method test_restore_when_no_checkpoint_returns_false (line 607) | async def test_restore_when_no_checkpoint_returns_false(self): method test_restore_from_checkpoint_raises_when_disabled (line 614) | async def test_restore_from_checkpoint_raises_when_disabled(self): class TestCrawl (line 625) | class TestCrawl: method test_basic_crawl_returns_stats (line 627) | async def test_basic_crawl_returns_stats(self): method test_crawl_calls_on_start_and_on_close (line 638) | async def test_crawl_calls_on_start_and_on_close(self): method test_crawl_sets_stats_timing (line 649) | async def test_crawl_sets_stats_timing(self): method test_crawl_sets_concurrency_stats (line 660) | async def test_crawl_sets_concurrency_stats(self): method test_crawl_processes_multiple_start_urls (line 670) | async def test_crawl_processes_multiple_start_urls(self): method test_crawl_follows_yielded_requests (line 688) | async def test_crawl_follows_yielded_requests(self): method test_crawl_with_download_delay (line 709) | async def test_crawl_with_download_delay(self): method test_crawl_filters_offsite_requests (line 719) | async def test_crawl_filters_offsite_requests(self): method test_crawl_cleans_up_checkpoint_on_completion (line 734) | async def test_crawl_cleans_up_checkpoint_on_completion(self): method test_crawl_handles_fetch_error_gracefully (line 745) | async def test_crawl_handles_fetch_error_gracefully(self): method test_crawl_log_levels_populated (line 757) | async def test_crawl_log_levels_populated(self): method test_crawl_resets_state_on_each_run (line 766) | async def test_crawl_resets_state_on_each_run(self): class TestItemsProperty (line 785) | class TestItemsProperty: method test_items_returns_item_list (line 786) | def test_items_returns_item_list(self): method test_items_initially_empty (line 790) | def test_items_initially_empty(self): method test_items_populated_after_crawl (line 795) | async def test_items_populated_after_crawl(self): class TestStreaming (line 806) | class TestStreaming: method test_stream_yields_items (line 808) | async def test_stream_yields_items(self): method test_stream_processes_follow_up_requests (line 820) | async def test_stream_processes_follow_up_requests(self): method test_stream_items_not_stored_in_items_list (line 841) | async def test_stream_items_not_stored_in_items_list(self): class TestPauseDuringCrawl (line 860) | class TestPauseDuringCrawl: method test_pause_stops_crawl_gracefully (line 862) | async def test_pause_stops_crawl_gracefully(self): method test_pause_with_checkpoint_sets_paused (line 885) | async def test_pause_with_checkpoint_sets_paused(self): method test_pause_without_checkpoint_does_not_set_paused (line 907) | async def test_pause_without_checkpoint_does_not_set_paused(self): FILE: tests/spiders/test_request.py class TestRequestCreation (line 11) | class TestRequestCreation: method test_basic_request_creation (line 14) | def test_basic_request_creation(self): method test_request_with_all_parameters (line 27) | def test_request_with_all_parameters(self): method test_request_meta_default_is_empty_dict (line 54) | def test_request_meta_default_is_empty_dict(self): class TestRequestProperties (line 65) | class TestRequestProperties: method test_domain_extraction (line 68) | def test_domain_extraction(self): method test_domain_with_port (line 73) | def test_domain_with_port(self): method test_domain_with_subdomain (line 78) | def test_domain_with_subdomain(self): method test_fingerprint_returns_bytes (line 83) | def test_fingerprint_returns_bytes(self): method test_fingerprint_is_deterministic (line 90) | def test_fingerprint_is_deterministic(self): method test_fingerprint_different_urls (line 96) | def test_fingerprint_different_urls(self): class TestRequestCopy (line 103) | class TestRequestCopy: method test_copy_creates_independent_request (line 106) | def test_copy_creates_independent_request(self): method test_copy_meta_is_independent (line 139) | def test_copy_meta_is_independent(self): class TestRequestComparison (line 151) | class TestRequestComparison: method test_priority_less_than (line 154) | def test_priority_less_than(self): method test_priority_greater_than (line 162) | def test_priority_greater_than(self): method test_equality_by_fingerprint (line 170) | def test_equality_by_fingerprint(self): method test_equality_different_priorities_same_fingerprint (line 184) | def test_equality_different_priorities_same_fingerprint(self): method test_comparison_with_non_request (line 195) | def test_comparison_with_non_request(self): class TestRequestStringRepresentation (line 204) | class TestRequestStringRepresentation: method test_str_returns_url (line 207) | def test_str_returns_url(self): method test_repr_without_callback (line 212) | def test_repr_without_callback(self): method test_repr_with_callback (line 222) | def test_repr_with_callback(self): class TestRequestPickling (line 234) | class TestRequestPickling: method test_pickle_without_callback (line 237) | def test_pickle_without_callback(self): method test_pickle_with_callback_stores_name (line 255) | def test_pickle_with_callback_stores_name(self): method test_pickle_with_none_callback (line 268) | def test_pickle_with_none_callback(self): method test_setstate_stores_callback_name (line 276) | def test_setstate_stores_callback_name(self): method test_pickle_roundtrip_preserves_session_kwargs (line 296) | def test_pickle_roundtrip_preserves_session_kwargs(self): class TestRequestRestoreCallback (line 315) | class TestRequestRestoreCallback: method test_restore_callback_from_spider (line 318) | def test_restore_callback_from_spider(self): method test_restore_callback_falls_back_to_parse (line 337) | def test_restore_callback_falls_back_to_parse(self): method test_restore_callback_with_none_name (line 353) | def test_restore_callback_with_none_name(self): method test_restore_callback_without_callback_name_attr (line 369) | def test_restore_callback_without_callback_name_attr(self): FILE: tests/spiders/test_result.py class TestItemList (line 12) | class TestItemList: method test_itemlist_is_list (line 15) | def test_itemlist_is_list(self): method test_itemlist_basic_operations (line 21) | def test_itemlist_basic_operations(self): method test_to_json_creates_file (line 31) | def test_to_json_creates_file(self): method test_to_json_creates_parent_directory (line 47) | def test_to_json_creates_parent_directory(self): method test_to_json_with_indent (line 58) | def test_to_json_with_indent(self): method test_to_jsonl_creates_file (line 71) | def test_to_jsonl_creates_file(self): method test_to_jsonl_one_object_per_line (line 93) | def test_to_jsonl_one_object_per_line(self): class TestCrawlStats (line 109) | class TestCrawlStats: method test_default_values (line 112) | def test_default_values(self): method test_elapsed_seconds (line 128) | def test_elapsed_seconds(self): method test_requests_per_second (line 134) | def test_requests_per_second(self): method test_requests_per_second_zero_elapsed (line 144) | def test_requests_per_second_zero_elapsed(self): method test_increment_status (line 154) | def test_increment_status(self): method test_increment_response_bytes (line 164) | def test_increment_response_bytes(self): method test_increment_requests_count (line 178) | def test_increment_requests_count(self): method test_to_dict (line 189) | def test_to_dict(self): method test_custom_stats (line 209) | def test_custom_stats(self): class TestCrawlResult (line 219) | class TestCrawlResult: method test_basic_creation (line 222) | def test_basic_creation(self): method test_completed_property_true_when_not_paused (line 234) | def test_completed_property_true_when_not_paused(self): method test_completed_property_false_when_paused (line 244) | def test_completed_property_false_when_paused(self): method test_len_returns_item_count (line 254) | def test_len_returns_item_count(self): method test_iter_yields_items (line 263) | def test_iter_yields_items(self): method test_result_with_stats (line 274) | def test_result_with_stats(self): class TestCrawlResultIntegration (line 292) | class TestCrawlResultIntegration: method test_full_workflow (line 295) | def test_full_workflow(self): FILE: tests/spiders/test_scheduler.py class TestSchedulerInit (line 10) | class TestSchedulerInit: method test_scheduler_starts_empty (line 13) | def test_scheduler_starts_empty(self): class TestSchedulerEnqueue (line 21) | class TestSchedulerEnqueue: method test_enqueue_single_request (line 25) | async def test_enqueue_single_request(self): method test_enqueue_multiple_requests (line 37) | async def test_enqueue_multiple_requests(self): method test_enqueue_duplicate_filtered (line 48) | async def test_enqueue_duplicate_filtered(self): method test_enqueue_duplicate_allowed_with_dont_filter (line 63) | async def test_enqueue_duplicate_allowed_with_dont_filter(self): method test_enqueue_different_methods_not_duplicate (line 78) | async def test_enqueue_different_methods_not_duplicate(self): class TestSchedulerDequeue (line 93) | class TestSchedulerDequeue: method test_dequeue_returns_request (line 97) | async def test_dequeue_returns_request(self): method test_dequeue_respects_priority_order (line 108) | async def test_dequeue_respects_priority_order(self): method test_dequeue_fifo_for_same_priority (line 131) | async def test_dequeue_fifo_for_same_priority(self): method test_dequeue_updates_length (line 149) | async def test_dequeue_updates_length(self): class TestSchedulerSnapshot (line 166) | class TestSchedulerSnapshot: method test_snapshot_empty_scheduler (line 170) | async def test_snapshot_empty_scheduler(self): method test_snapshot_captures_pending_requests (line 180) | async def test_snapshot_captures_pending_requests(self): method test_snapshot_captures_seen_set (line 197) | async def test_snapshot_captures_seen_set(self): method test_snapshot_returns_copies (line 213) | async def test_snapshot_returns_copies(self): method test_snapshot_excludes_dequeued_requests (line 231) | async def test_snapshot_excludes_dequeued_requests(self): class TestSchedulerRestore (line 250) | class TestSchedulerRestore: method test_restore_requests (line 254) | async def test_restore_requests(self): method test_restore_seen_set (line 271) | async def test_restore_seen_set(self): method test_restore_maintains_priority_order (line 287) | async def test_restore_maintains_priority_order(self): method test_restore_empty_checkpoint (line 308) | async def test_restore_empty_checkpoint(self): class TestSchedulerIntegration (line 319) | class TestSchedulerIntegration: method test_snapshot_and_restore_roundtrip (line 323) | async def test_snapshot_and_restore_roundtrip(self): method test_partial_processing_then_checkpoint (line 351) | async def test_partial_processing_then_checkpoint(self): method test_deduplication_after_restore (line 370) | async def test_deduplication_after_restore(self): FILE: tests/spiders/test_session.py class MockSession (line 9) | class MockSession: # type: ignore[type-arg] method __init__ (line 12) | def __init__(self, name: str = "mock"): method __aenter__ (line 18) | async def __aenter__(self): method __aexit__ (line 23) | async def __aexit__(self, *args): method fetch (line 27) | async def fetch(self, url: str, **kwargs): class TestSessionManagerInit (line 31) | class TestSessionManagerInit: method test_manager_starts_empty (line 34) | def test_manager_starts_empty(self): method test_manager_no_default_session_when_empty (line 40) | def test_manager_no_default_session_when_empty(self): class TestSessionManagerAdd (line 48) | class TestSessionManagerAdd: method test_add_single_session (line 51) | def test_add_single_session(self): method test_first_session_becomes_default (line 62) | def test_first_session_becomes_default(self): method test_add_multiple_sessions (line 71) | def test_add_multiple_sessions(self): method test_explicit_default_session (line 84) | def test_explicit_default_session(self): method test_add_duplicate_id_raises (line 93) | def test_add_duplicate_id_raises(self): method test_add_returns_self_for_chaining (line 101) | def test_add_returns_self_for_chaining(self): method test_method_chaining (line 109) | def test_method_chaining(self): method test_add_lazy_session (line 117) | def test_add_lazy_session(self): class TestSessionManagerRemove (line 127) | class TestSessionManagerRemove: method test_remove_session (line 130) | def test_remove_session(self): method test_remove_nonexistent_raises (line 140) | def test_remove_nonexistent_raises(self): method test_pop_returns_session (line 147) | def test_pop_returns_session(self): method test_remove_default_updates_default (line 158) | def test_remove_default_updates_default(self): method test_remove_lazy_session_cleans_up (line 170) | def test_remove_lazy_session_cleans_up(self): class TestSessionManagerGet (line 180) | class TestSessionManagerGet: method test_get_existing_session (line 183) | def test_get_existing_session(self): method test_get_nonexistent_raises_with_available (line 193) | def test_get_nonexistent_raises_with_available(self): class TestSessionManagerContains (line 203) | class TestSessionManagerContains: method test_contains_existing (line 206) | def test_contains_existing(self): method test_not_contains_missing (line 213) | def test_not_contains_missing(self): class TestSessionManagerAsyncContext (line 221) | class TestSessionManagerAsyncContext: method test_start_activates_sessions (line 225) | async def test_start_activates_sessions(self): method test_start_skips_lazy_sessions (line 237) | async def test_start_skips_lazy_sessions(self): method test_close_deactivates_sessions (line 252) | async def test_close_deactivates_sessions(self): method test_async_context_manager (line 266) | async def test_async_context_manager(self): method test_start_idempotent (line 278) | async def test_start_idempotent(self): class TestSessionManagerProperties (line 290) | class TestSessionManagerProperties: method test_session_ids_returns_list (line 293) | def test_session_ids_returns_list(self): method test_len_returns_session_count (line 305) | def test_len_returns_session_count(self): class TestSessionManagerIntegration (line 318) | class TestSessionManagerIntegration: method test_realistic_setup (line 321) | def test_realistic_setup(self): method test_lifecycle_management (line 335) | async def test_lifecycle_management(self): FILE: tests/spiders/test_spider.py class TestLogCounterHandler (line 16) | class TestLogCounterHandler: method test_initial_counts_are_zero (line 19) | def test_initial_counts_are_zero(self): method test_counts_debug_messages (line 30) | def test_counts_debug_messages(self): method test_counts_info_messages (line 48) | def test_counts_info_messages(self): method test_counts_warning_messages (line 65) | def test_counts_warning_messages(self): method test_counts_error_messages (line 82) | def test_counts_error_messages(self): method test_counts_critical_messages (line 99) | def test_counts_critical_messages(self): method test_counts_multiple_levels (line 116) | def test_counts_multiple_levels(self): class TestBlockedCodes (line 151) | class TestBlockedCodes: method test_blocked_codes_contains_expected_values (line 154) | def test_blocked_codes_contains_expected_values(self): method test_blocked_codes_does_not_contain_success (line 166) | def test_blocked_codes_does_not_contain_success(self): class ConcreteSpider (line 175) | class ConcreteSpider(Spider): method parse (line 181) | async def parse(self, response) -> AsyncGenerator[Dict[str, Any] | Req... class TestSpiderInit (line 185) | class TestSpiderInit: method test_spider_requires_name (line 188) | def test_spider_requires_name(self): method test_spider_initializes_logger (line 198) | def test_spider_initializes_logger(self): method test_spider_logger_has_log_counter (line 205) | def test_spider_logger_has_log_counter(self): method test_spider_with_crawldir (line 212) | def test_spider_with_crawldir(self): method test_spider_without_crawldir (line 219) | def test_spider_without_crawldir(self): method test_spider_custom_interval (line 225) | def test_spider_custom_interval(self): method test_spider_default_interval (line 231) | def test_spider_default_interval(self): method test_spider_repr (line 237) | def test_spider_repr(self): class TestSpiderClassAttributes (line 247) | class TestSpiderClassAttributes: method test_default_concurrent_requests (line 250) | def test_default_concurrent_requests(self): method test_default_concurrent_requests_per_domain (line 254) | def test_default_concurrent_requests_per_domain(self): method test_default_download_delay (line 258) | def test_default_download_delay(self): method test_default_max_blocked_retries (line 262) | def test_default_max_blocked_retries(self): method test_default_logging_level (line 266) | def test_default_logging_level(self): method test_default_allowed_domains_empty (line 270) | def test_default_allowed_domains_empty(self): class TestSpiderSessionConfiguration (line 275) | class TestSpiderSessionConfiguration: method test_default_configure_sessions (line 278) | def test_default_configure_sessions(self): method test_configure_sessions_error_raises_custom_exception (line 284) | def test_configure_sessions_error_raises_custom_exception(self): method test_configure_sessions_no_sessions_raises (line 299) | def test_configure_sessions_no_sessions_raises(self): class TestSpiderStartRequests (line 315) | class TestSpiderStartRequests: method test_start_requests_yields_from_start_urls (line 319) | async def test_start_requests_yields_from_start_urls(self): method test_start_requests_no_urls_raises (line 342) | async def test_start_requests_no_urls_raises(self): method test_start_requests_uses_default_session (line 359) | async def test_start_requests_uses_default_session(self): class TestSpiderHooks (line 369) | class TestSpiderHooks: method test_on_start_default (line 373) | async def test_on_start_default(self): method test_on_close_default (line 382) | async def test_on_close_default(self): method test_on_error_default (line 390) | async def test_on_error_default(self): method test_on_scraped_item_default_returns_item (line 400) | async def test_on_scraped_item_default_returns_item(self): method test_is_blocked_default_checks_status_codes (line 410) | async def test_is_blocked_default_checks_status_codes(self): method test_retry_blocked_request_default_returns_request (line 429) | async def test_retry_blocked_request_default_returns_request(self): class TestSpiderPause (line 443) | class TestSpiderPause: method test_pause_without_engine_raises (line 446) | def test_pause_without_engine_raises(self): class TestSpiderStats (line 454) | class TestSpiderStats: method test_stats_without_engine_raises (line 457) | def test_stats_without_engine_raises(self): class TestSpiderCustomization (line 465) | class TestSpiderCustomization: method test_custom_concurrent_requests (line 468) | def test_custom_concurrent_requests(self): method test_custom_allowed_domains (line 482) | def test_custom_allowed_domains(self): method test_custom_download_delay (line 497) | def test_custom_download_delay(self): class TestSpiderLogging (line 512) | class TestSpiderLogging: method test_custom_logging_level (line 515) | def test_custom_logging_level(self): method test_log_file_creates_handler (line 529) | def test_log_file_creates_handler(self): method test_logger_does_not_propagate (line 554) | def test_logger_does_not_propagate(self): class TestSessionConfigurationError (line 561) | class TestSessionConfigurationError: method test_exception_message (line 564) | def test_exception_message(self): method test_exception_is_exception (line 570) | def test_exception_is_exception(self):