Repository: g1879/DrissionPage Branch: master Commit: 4bebd1354f61 Files: 166 Total size: 1.3 MB Directory structure: gitextract_pk4tkf1v/ ├── .github/ │ └── FUNDING.yml ├── .gitignore ├── DrissionPage/ │ ├── __init__.py │ ├── __init__.pyi │ ├── _base/ │ │ ├── base.py │ │ ├── base.pyi │ │ ├── chromium.py │ │ ├── chromium.pyi │ │ ├── driver.py │ │ └── driver.pyi │ ├── _configs/ │ │ ├── chromium_options.py │ │ ├── chromium_options.pyi │ │ ├── configs.ini │ │ ├── options_manage.py │ │ ├── options_manage.pyi │ │ ├── session_options.py │ │ └── session_options.pyi │ ├── _elements/ │ │ ├── chromium_element.py │ │ ├── chromium_element.pyi │ │ ├── none_element.py │ │ ├── none_element.pyi │ │ ├── session_element.py │ │ └── session_element.pyi │ ├── _functions/ │ │ ├── browser.py │ │ ├── browser.pyi │ │ ├── by.py │ │ ├── cli.py │ │ ├── cookies.py │ │ ├── cookies.pyi │ │ ├── elements.py │ │ ├── elements.pyi │ │ ├── keys.py │ │ ├── keys.pyi │ │ ├── locator.py │ │ ├── locator.pyi │ │ ├── settings.py │ │ ├── settings.pyi │ │ ├── texts.py │ │ ├── tools.py │ │ ├── tools.pyi │ │ ├── web.py │ │ └── web.pyi │ ├── _pages/ │ │ ├── chromium_base.py │ │ ├── chromium_base.pyi │ │ ├── chromium_frame.py │ │ ├── chromium_frame.pyi │ │ ├── chromium_page.py │ │ ├── chromium_page.pyi │ │ ├── chromium_tab.py │ │ ├── chromium_tab.pyi │ │ ├── mix_tab.py │ │ ├── mix_tab.pyi │ │ ├── session_page.py │ │ ├── session_page.pyi │ │ ├── web_page.py │ │ └── web_page.pyi │ ├── _units/ │ │ ├── actions.py │ │ ├── actions.pyi │ │ ├── clicker.py │ │ ├── clicker.pyi │ │ ├── console.py │ │ ├── console.pyi │ │ ├── cookies_setter.py │ │ ├── cookies_setter.pyi │ │ ├── downloader.py │ │ ├── downloader.pyi │ │ ├── listener.py │ │ ├── listener.pyi │ │ ├── rect.py │ │ ├── rect.pyi │ │ ├── screencast.py │ │ ├── screencast.pyi │ │ ├── scroller.py │ │ ├── scroller.pyi │ │ ├── selector.py │ │ ├── selector.pyi │ │ ├── setter.py │ │ ├── setter.pyi │ │ ├── states.py │ │ ├── states.pyi │ │ ├── waiter.py │ │ └── waiter.pyi │ ├── common.py │ ├── errors.py │ ├── items.py │ └── version.py ├── LICENSE ├── MANIFEST.in ├── README.md ├── docs_en/ │ ├── .nojekyll │ ├── ChromiumPage/ │ │ ├── actions.md │ │ ├── browser_options.md │ │ ├── create_page_object.md │ │ ├── element_operation.md │ │ ├── frame_operation.md │ │ ├── get_element_info.md │ │ ├── get_elements.md │ │ ├── get_page_info.md │ │ ├── introduction.md │ │ ├── network_listener.md │ │ ├── page_operation.md │ │ ├── screen_recording.md │ │ ├── tab_operation.md │ │ ├── upload_files.md │ │ ├── visit_web_page.md │ │ └── waiting.md │ ├── MixPage/ │ │ └── introduction.md │ ├── Q&A.md │ ├── README_en.md │ ├── SessionPage/ │ │ ├── create_page_object.md │ │ ├── get_element_info.md │ │ ├── get_elements.md │ │ ├── get_page_info.md │ │ ├── introduction.md │ │ ├── session_options.md │ │ ├── set_session.md │ │ └── visit_web_page.md │ ├── WebPage/ │ │ ├── create_page_object.md │ │ ├── introduction.md │ │ ├── mode_switch.md │ │ └── webpage_function.md │ ├── advance/ │ │ ├── accelerate_reading.md │ │ ├── commands.md │ │ ├── errors.md │ │ ├── ini_file.md │ │ ├── packaging.md │ │ ├── settings.md │ │ └── tools.md │ ├── cooperation.md │ ├── demos/ │ │ ├── douban_book_pics.md │ │ ├── login_gitee.md │ │ ├── maoyan_TOP100.md │ │ ├── multithreading_with_tabs.md │ │ └── starbucks_pics.md │ ├── download/ │ │ ├── DownloadKit.md │ │ ├── browser.md │ │ └── introduction.md │ ├── features/ │ │ ├── features_demos/ │ │ │ ├── compare_with_requests.md │ │ │ ├── compare_with_selenium.md │ │ │ ├── download_file.md │ │ │ ├── get_element_attributes.md │ │ │ └── switch_mode.md │ │ └── intimate_design.md │ ├── get_elements/ │ │ ├── cheat_sheet.md │ │ ├── introduction.md │ │ ├── more.md │ │ ├── not_found.md │ │ ├── simplify.md │ │ └── usage.md │ ├── get_start/ │ │ ├── basic_concept.md │ │ ├── before_start.md │ │ ├── examples/ │ │ │ ├── control_browser.md │ │ │ ├── data_packets.md │ │ │ └── switch_mode.md │ │ ├── import.md │ │ └── installation.md │ ├── history/ │ │ ├── 1.x.md │ │ ├── 2.x.md │ │ ├── 3.x.md │ │ ├── 4.x.md │ │ └── statement.md │ ├── usage_introduction.md │ └── whatsnew/ │ ├── 3_2.md │ └── 4_0.md ├── requirements.txt └── setup.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/FUNDING.yml ================================================ # These are supported funding model platforms github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] patreon: # Replace with a single Patreon username open_collective: # Replace with a single Open Collective username ko_fi: # Replace with a single Ko-fi username tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry liberapay: # Replace with a single Liberapay username issuehunt: # Replace with a single IssueHunt username lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry polar: # Replace with a single Polar username buy_me_a_coffee: # Replace with a single Buy Me a Coffee username thanks_dev: # Replace with a single thanks.dev username custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] ================================================ FILE: .gitignore ================================================ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover .hypothesis/ .pytest_cache/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints # IPython profile_default/ ipython_config.py # pyenv .python-version # celery beat schedule file celerybeat-schedule # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ ================================================ FILE: DrissionPage/__init__.py ================================================ # -*- coding:utf-8 -*- """ @Author : g1879 @Contact : g1879@qq.com @Website : https://DrissionPage.cn @Copyright: (c) 2020 by g1879, Inc. All Rights Reserved. 允许任何人以个人身份使用或分发本项目源代码,但仅限于学习和合法非盈利目的。 个人或组织如未获得版权持有人授权,不得将本项目以源代码或二进制形式用于商业行为。 使用本项目需满足以下条款,如使用过程中出现违反任意一项条款的情形,授权自动失效。 * 禁止将DrissionPage应用到任何可能违反当地法律规定和道德约束的项目中 * 禁止将DrissionPage用于任何可能有损他人利益的项目中 * 禁止将DrissionPage用于攻击与骚扰行为 * 遵守Robots协议,禁止将DrissionPage用于采集法律或系统Robots协议不允许的数据 使用DrissionPage发生的一切行为均由使用人自行负责。 因使用DrissionPage进行任何行为所产生的一切纠纷及后果均与版权持有人无关, 版权持有人不承担任何使用DrissionPage带来的风险和损失。 版权持有人不对DrissionPage可能存在的缺陷导致的任何损失负任何责任。 """ from ._base.chromium import Chromium from ._configs.chromium_options import ChromiumOptions from ._configs.session_options import SessionOptions from ._pages.chromium_page import ChromiumPage from ._pages.session_page import SessionPage from ._pages.web_page import WebPage from .version import __version__ ================================================ FILE: DrissionPage/__init__.pyi ================================================ # -*- coding:utf-8 -*- """ @Author : g1879 @Contact : g1879@qq.com @Website : https://DrissionPage.cn @Copyright: (c) 2020 by g1879, Inc. All Rights Reserved. """ from ._base.chromium import Chromium from ._configs.chromium_options import ChromiumOptions from ._configs.session_options import SessionOptions from ._pages.chromium_page import ChromiumPage from ._pages.session_page import SessionPage from ._pages.web_page import WebPage from .version import __version__ __all__ = ['WebPage', 'ChromiumPage', 'Chromium', 'ChromiumOptions', 'SessionOptions', 'SessionPage', '__version__'] ================================================ FILE: DrissionPage/_base/base.py ================================================ # -*- coding:utf-8 -*- """ @Author : g1879 @Contact : g1879@qq.com @Website : https://DrissionPage.cn @Copyright: (c) 2020 by g1879, Inc. All Rights Reserved. """ from abc import abstractmethod from copy import copy from pathlib import Path from re import sub from urllib.parse import quote from DrissionGet import DrissionGet from requests import Session from .._configs.session_options import SessionOptions from .._elements.none_element import NoneElement from .._functions.elements import get_frame, get_eles from .._functions.locator import get_loc from .._functions.settings import Settings as _S from .._functions.web import format_html from ..errors import ElementNotFoundError, LocatorError class BaseParser(object): def __call__(self, locator): return self.ele(locator) def ele(self, locator, index=1, timeout=None): return self._ele(locator, timeout, index=index, method='ele()') def eles(self, locator, timeout=None): return self._ele(locator, timeout, index=None) def find(self, locators, any_one=True, first_ele=True, timeout=None): if 'Session' in self._type: timeout = 0 if timeout is None: timeout = self.timeout r = get_eles(locators, self, any_one, first_ele, timeout) if any_one: for ele in r: if r[ele]: return ele, r[ele] return None, None return r # ----------------以下属性或方法待后代实现---------------- @property def html(self): return '' def s_ele(self, locator=None): pass def s_eles(self, locator): pass def _ele(self, locator, timeout=None, index=1, raise_err=None, method=None): pass def _find_elements(self, locator, timeout, index=1, relative=False, raise_err=None): pass class BaseElement(BaseParser): def __init__(self, owner=None): self.owner = owner self._type = 'BaseElement' def get_frame(self, loc_or_ind, timeout=None): if not isinstance(loc_or_ind, (int, str, tuple)): raise ValueError(_S._lang.join(_S._lang.INCORRECT_TYPE_, 'loc_or_ind', ALLOW_TYPE=_S._lang.LOC_OR_IND, CURR_VAL=loc_or_ind)) return get_frame(self, loc_ind_ele=loc_or_ind, timeout=timeout) def _ele(self, locator, timeout=None, index=1, relative=False, raise_err=None, method=None): if hasattr(locator, '_type'): return locator if timeout is None: timeout = self.timeout r = self._find_elements(locator, timeout=timeout, index=index, relative=relative, raise_err=raise_err) if r or isinstance(r, (list, float, int)): return r if raise_err is True or (_S.raise_when_ele_not_found and raise_err is None): raise ElementNotFoundError(METHOD=method, ARGS={'locator': locator, 'index': index, 'timeout': timeout}) r.method = method r.args = {'locator': locator, 'index': index, 'timeout': timeout} return r @property def timeout(self): return self.owner.timeout if self.owner else 10 @property def child_count(self): return int(self._ele('xpath:count(./*)')) # ----------------以下属性或方法由后代实现---------------- @property def tag(self): return def parent(self, level_or_loc=1): pass def next(self, index=1): pass def nexts(self): pass class DrissionElement(BaseElement): @property def link(self): return self.attr('href') or self.attr('src') @property def css_path(self): return self._get_ele_path(xpath=False) @property def xpath(self): return self._get_ele_path() @property def comments(self): return self.eles('xpath:.//comment()') def texts(self, text_node_only=False): texts = self.eles('xpath:/text()') if text_node_only else [x if isinstance(x, str) else x.text for x in self.eles('xpath:./text() | *')] return [format_html(x.strip(' ').rstrip('\n')) for x in texts if x and sub('[\r\n\t ]', '', x) != ''] def parent(self, level_or_loc=1, index=1, timeout=None): if isinstance(level_or_loc, int): loc = f'xpath:./ancestor::*[{level_or_loc}]' elif isinstance(level_or_loc, (tuple, str)): loc = get_loc(level_or_loc, True) if loc[0] == 'css selector': raise LocatorError(_S._lang.UNSUPPORTED_CSS_SYNTAX) loc = f'xpath:./ancestor::{loc[1].lstrip(". / ")}[{index}]' else: raise ValueError(_S._lang.join(_S._lang.INCORRECT_TYPE_, 'level_or_loc', ALLOW_TYPE='tuple, int, str', CURR_VAL=level_or_loc)) return self._ele(loc, timeout=timeout, relative=True, raise_err=False, method='parent()') def child(self, locator='', index=1, timeout=None, ele_only=True): if isinstance(locator, int): index = locator locator = '' if not locator: loc = '*' if ele_only else 'node()' else: loc = get_loc(locator, True) # 把定位符转换为xpath if loc[0] == 'css selector': raise LocatorError(_S._lang.UNSUPPORTED_CSS_SYNTAX) loc = loc[1].lstrip('./') node = self._ele(f'xpath:./{loc}', timeout=timeout, index=index, relative=True, raise_err=False) return node if node else NoneElement(self.owner, 'child()', {'locator': locator, 'index': index, 'ele_only': ele_only}) def prev(self, locator='', index=1, timeout=None, ele_only=True): return self._get_relative('prev()', 'preceding', True, locator, index, timeout, ele_only) def next(self, locator='', index=1, timeout=None, ele_only=True): return self._get_relative('next()', 'following', True, locator, index, timeout, ele_only) def before(self, locator='', index=1, timeout=None, ele_only=True): return self._get_relative('before()', 'preceding', False, locator, index, timeout, ele_only) def after(self, locator='', index=1, timeout=None, ele_only=True): return self._get_relative('after()', 'following', False, locator, index, timeout, ele_only) def children(self, locator='', timeout=None, ele_only=True): if not locator: loc = '*' if ele_only else 'node()' else: loc = get_loc(locator, True) # 把定位符转换为xpath if loc[0] == 'css selector': raise LocatorError(_S._lang.UNSUPPORTED_CSS_SYNTAX) loc = loc[1].lstrip('./') loc = f'xpath:./{loc}' nodes = self._ele(loc, timeout=timeout, index=None, relative=True) return [e for e in nodes if not (isinstance(e, str) and sub('[ \n\t\r]', '', e) == '')] def prevs(self, locator='', timeout=None, ele_only=True): return self._get_relatives(locator=locator, direction='preceding', timeout=timeout, ele_only=ele_only) def nexts(self, locator='', timeout=None, ele_only=True): return self._get_relatives(locator=locator, direction='following', timeout=timeout, ele_only=ele_only) def befores(self, locator='', timeout=None, ele_only=True): return self._get_relatives(locator=locator, direction='preceding', brother=False, timeout=timeout, ele_only=ele_only) def afters(self, locator='', timeout=None, ele_only=True): return self._get_relatives(locator=locator, direction='following', brother=False, timeout=timeout, ele_only=ele_only) def _get_relative(self, func, direction, brother, locator='', index=1, timeout=None, ele_only=True): if isinstance(locator, int): index = locator locator = '' node = self._get_relatives(index, locator, direction, brother, timeout, ele_only) return node if node else NoneElement(self.owner, func, {'locator': locator, 'index': index, 'ele_only': ele_only}) def _get_relatives(self, index=None, locator='', direction='following', brother=True, timeout=.5, ele_only=True): brother = '-sibling' if brother else '' if not locator: loc = '*' if ele_only else 'node()' else: loc = get_loc(locator, True) # 把定位符转换为xpath if loc[0] == 'css selector': raise LocatorError(_S._lang.UNSUPPORTED_CSS_SYNTAX) loc = loc[1].lstrip('./') loc = f'xpath:./{direction}{brother}::{loc}' if index is not None: index = index if direction == 'following' else -index nodes = self._ele(loc, timeout=timeout, index=index, relative=True, raise_err=False) if isinstance(nodes, list): nodes = [e for e in nodes if not (isinstance(e, str) and sub('[ \n\t\r]', '', e) == '')] return nodes # ----------------以下属性或方法由后代实现---------------- @property def attrs(self): return @property def text(self): return @property def raw_text(self): return @abstractmethod def attr(self, name): return '' def _get_ele_path(self, xpath=True): return '' def _find_elements(self, locator, timeout, index=1, relative=False, raise_err=None): pass class BasePage(BaseParser): def __init__(self): self._url = None self._url_available = None self.retry_times = 3 self.retry_interval = 2 self._downloader = None self._download_path = None self._none_ele_return_value = False self._none_ele_value = None self._session = None self._headers = None self._session_options = None self._type = 'BasePage' @property def title(self): ele = self._ele('xpath://title', raise_err=False, method='title') return ele.text if ele else None @property def url_available(self): return self._url_available @property def download_path(self): return self._download_path @property def download(self): if self._downloader is None: if not self._session: self._create_session() self._downloader = DrissionGet(driver=self, save_path=self.download_path) return self._downloader def _before_connect(self, url, retry, interval): is_file = False if isinstance(url, Path) or ('://' not in url and ':\\\\' not in url): p = Path(url) if p.exists(): url = str(p.absolute()) is_file = True self._url = url if is_file else quote(url, safe='-_.~!*\'"();:@&=+$,/\\?#[]%') retry = retry if retry is not None else self.retry_times interval = interval if interval is not None else self.retry_interval return retry, interval, is_file def _set_session_options(self, session_or_options=None): if not session_or_options: self._session_options = SessionOptions(session_or_options) elif isinstance(session_or_options, SessionOptions): self._session_options = session_or_options elif isinstance(session_or_options, Session): self._session_options = SessionOptions() self._session = copy(session_or_options) self._headers = self._session.headers self._session.headers = None def _create_session(self): if not self._session_options: self._set_session_options() self._session, self._headers = self._session_options.make_session() # ----------------以下属性或方法由后代实现---------------- @property def url(self): return @property def json(self): return @property def user_agent(self): return @abstractmethod def get(self, url, show_errmsg=False, retry=None, interval=None): pass def _ele(self, locator, timeout=None, index=1, raise_err=None, method=None): if not locator: raise ElementNotFoundError(METHOD=method, ARGS={'locator': locator, 'index': index, 'timeout': timeout}) if timeout is None: timeout = self.timeout r = self._find_elements(locator, timeout=timeout, index=index, raise_err=raise_err) if r or isinstance(r, list): return r if raise_err is True or (_S.raise_when_ele_not_found and raise_err is None): raise ElementNotFoundError(METHOD=method, ARGS={'locator': locator, 'index': index, 'timeout': timeout}) r.method = method r.args = {'locator': locator, 'index': index, 'timeout': timeout} return r ================================================ FILE: DrissionPage/_base/base.pyi ================================================ # -*- coding:utf-8 -*- """ @Author : g1879 @Contact : g1879@qq.com @Website : https://DrissionPage.cn @Copyright: (c) 2020 by g1879, Inc. All Rights Reserved. """ from abc import abstractmethod from typing import Union, Tuple, List, Any, Optional, Dict from DrissionGet import DrissionGet from requests import Session from requests.structures import CaseInsensitiveDict from .._configs.session_options import SessionOptions from .._elements.chromium_element import ChromiumElement from .._elements.none_element import NoneElement from .._elements.session_element import SessionElement from .._functions.elements import SessionElementsList from .._pages.chromium_frame import ChromiumFrame from .._pages.chromium_page import ChromiumPage from .._pages.session_page import SessionPage from .._pages.web_page import WebPage class BaseParser(object): """所有页面、元素类的基类""" _type: str timeout: float def __call__(self, locator: Union[Tuple[str, str], str], index: int = 1): ... def ele(self, locator: Union[Tuple[str, str], str, BaseElement], index: int = 1, timeout: float = None): ... def eles(self, locator: Union[Tuple[str, str], str], timeout=None): ... def find(self, locators: Union[str, List[str], tuple], any_one: bool = True, first_ele: bool = True, timeout: float = None) -> Union[Dict[str, ChromiumElement], Dict[str, SessionElement], Dict[str, List[ChromiumElement]], Dict[str, List[SessionElement]], Tuple[str, SessionElement], Tuple[str, ChromiumElement]]: """传入多个定位符,获取多个ele :param locators: 定位符组成的列表 :param any_one: 是否任何一个定位符找到结果即返回 :param first_ele: 每个定位符是否只获取第一个元素 :param timeout: 超时时间(秒) :return: any_one为True时,返回一个找到的元素定位符和对象组成的元组,格式:(loc, ele),全都没找到返回(None, None) any_one为False时,返回dict格式,key为定位符,value为找到的元素或列表 """ ... # ----------------以下属性或方法待后代实现---------------- @property def html(self) -> str: ... def s_ele(self, locator: Union[Tuple[str, str], str, BaseElement, None] = None, index: int = 1) -> SessionElement: ... def s_eles(self, locator: Union[Tuple[str, str], str]) -> SessionElementsList: ... def _ele(self, locator: Union[Tuple[str, str], str], timeout: float = None, index: Optional[int] = 1, raise_err: bool = None, method: str = None): ... def _find_elements(self, locator: Union[Tuple[str, str], str], timeout: float, index: Optional[int] = 1, relative: bool = False, raise_err: bool = None): ... class BaseElement(BaseParser): """各元素类的基类""" owner: BasePage = ... def __init__(self, owner: BasePage = None): ... @property def timeout(self) -> float: """返回其查找元素时超时时间""" ... @property def child_count(self) -> int: """返回直接子元素的个数""" ... # ----------------以下属性或方法由后代实现---------------- @property def tag(self) -> str: ... def parent(self, level_or_loc: Union[tuple, str, int] = 1): ... def prev(self, index: int = 1) -> None: ... def prevs(self) -> None: ... def next(self, index: int = 1): ... def nexts(self): ... def get_frame(self, loc_or_ind, timeout=None) -> ChromiumFrame: """获取元素中一个frame对象 :param loc_or_ind: 定位符、iframe序号,序号从1开始,可传入负数获取倒数第几个 :param timeout: 查找元素超时时间(秒) :return: ChromiumFrame对象 """ ... def _ele(self, locator: Union[Tuple[str, str], str], timeout: float = None, index: Optional[int] = 1, relative: bool = False, raise_err: bool = None, method: str = None): """调用获取元素的方法 :param locator: 定位符 :param timeout: 超时时间(秒) :param index: 获取第几个,从1开始,可传入负数获取倒数第几个 :param relative: 是否相对定位 :param raise_err: 找不到时是否抛出异常 :param method: 调用的方法名 :return: 元素对象或它们组成的列表 """ ... class DrissionElement(BaseElement): """ChromiumElement 和 SessionElement的基类,但不是ShadowRoot的基类""" def __init__(self, owner: BasePage = None): ... @property def link(self) -> str: """返回href或src绝对url""" ... @property def css_path(self) -> str: """返回css path路径""" ... @property def xpath(self) -> str: """返回xpath路径""" ... @property def comments(self) -> list: """返回元素注释文本组成的列表""" ... def texts(self, text_node_only: bool = False) -> list: """返回元素内所有直接子节点的文本,包括元素和文本节点 :param text_node_only: 是否只返回文本节点 :return: 文本列表 """ ... def parent(self, level_or_loc: Union[tuple, str, int] = 1, index: int = 1, timeout: float = None) -> Union[DrissionElement, None]: """返回上面某一级父元素,可指定层数或用查询语法定位 :param level_or_loc: 第几级父元素,1开始,或定位符 :param index: 当level_or_loc传入定位符,使用此参数选择第几个结果,1开始 :param timeout: 时间(秒) :return: 上级元素对象 """ ... def child(self, locator: Union[Tuple[str, str], str, int] = '', index: int = 1, timeout: float = None, ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: """返回直接子元素元素或节点组成的列表,可用查询语法筛选 :param locator: 用于筛选的查询语法 :param index: 第几个查询结果,1开始 :param timeout: 查找节点的超时时间(秒) :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 直接子元素或节点文本组成的列表 """ ... def prev(self, locator: Union[Tuple[str, str], str, int] = '', index: int = 1, timeout: float = None, ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: """返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 :param locator: 用于筛选的查询语法 :param index: 前面第几个查询结果,1开始 :param timeout: 查找节点的超时时间(秒) :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 兄弟元素 """ ... def next(self, locator: Union[Tuple[str, str], str, int] = '', index: int = 1, timeout: float = None, ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: """返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 :param locator: 用于筛选的查询语法 :param index: 后面第几个查询结果,1开始 :param timeout: 查找节点的超时时间(秒) :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 兄弟元素 """ ... def before(self, locator: Union[Tuple[str, str], str, int] = '', index: int = 1, timeout: float = None, ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: """返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 :param locator: 用于筛选的查询语法 :param index: 前面第几个查询结果,1开始 :param timeout: 查找节点的超时时间(秒) :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 本元素前面的某个元素或节点 """ ... def after(self, locator: Union[Tuple[str, str], str, int] = '', index: int = 1, timeout: float = None, ele_only: bool = True) -> Union[DrissionElement, str, NoneElement]: """返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 :param locator: 用于筛选的查询语法 :param index: 后面第几个查询结果,1开始 :param timeout: 查找节点的超时时间(秒) :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 本元素后面的某个元素或节点 """ ... def children(self, locator: Union[Tuple[str, str], str] = '', timeout: float = None, ele_only: bool = True) -> List[Union[DrissionElement, str]]: """返回直接子元素元素或节点组成的列表,可用查询语法筛选 :param locator: 用于筛选的查询语法 :param timeout: 查找节点的超时时间(秒) :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 直接子元素或节点文本组成的列表 """ ... def prevs(self, locator: Union[Tuple[str, str], str] = '', timeout: float = None, ele_only: bool = True) -> List[Union[DrissionElement, str]]: """返回前面全部兄弟元素或节点组成的列表,可用查询语法筛选 :param locator: 用于筛选的查询语法 :param timeout: 查找节点的超时时间(秒) :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 兄弟元素或节点文本组成的列表 """ ... def nexts(self, locator: Union[Tuple[str, str], str] = '', timeout: float = None, ele_only: bool = True) -> List[Union[DrissionElement, str]]: """返回后面全部兄弟元素或节点组成的列表,可用查询语法筛选 :param locator: 用于筛选的查询语法 :param timeout: 查找节点的超时时间(秒) :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 兄弟元素或节点文本组成的列表 """ ... def befores(self, locator: Union[Tuple[str, str], str] = '', timeout: float = None, ele_only: bool = True) -> List[Union[DrissionElement, str]]: """返回后面全部兄弟元素或节点组成的列表,可用查询语法筛选 :param locator: 用于筛选的查询语法 :param timeout: 查找节点的超时时间(秒) :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 本元素前面的元素或节点组成的列表 """ ... def afters(self, locator: Union[Tuple[str, str], str] = '', timeout: float = None, ele_only: bool = True) -> List[Union[DrissionElement, str]]: """返回前面全部兄弟元素或节点组成的列表,可用查询语法筛选 :param locator: 用于筛选的查询语法 :param timeout: 查找节点的超时时间(秒) :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 本元素后面的元素或节点组成的列表 """ ... def _get_relative(self, func: str, direction: str, brother: bool, locator: Union[Tuple[str, str], str] = '', index: int = 1, timeout: float = None, ele_only: bool = True) -> DrissionElement: """获取一个亲戚元素或节点,可用查询语法筛选,可指定返回筛选结果的第几个 :param func: 方法名称 :param direction: 方向,'following' 或 'preceding' :param locator: 用于筛选的查询语法 :param index: 前面第几个查询结果,1开始 :param timeout: 查找节点的超时时间(秒) :param ele_only: 是否只获取元素,为False时把文本、注释节点也纳入 :return: 本元素前面的某个元素或节点 """ ... def _get_relatives(self, index: int = None, locator: Union[Tuple[str, str], str] = '', direction: str = 'following', brother: bool = True, timeout: float = 0.5, ele_only: bool = True) -> List[Union[DrissionElement, str]]: """按要求返回兄弟元素或节点组成的列表 :param index: 获取第几个,该参数不为None时只获取该编号的元素 :param locator: 用于筛选的查询语法 :param direction: 'following' 或 'preceding',查找的方向 :param brother: 查找范围,在同级查找还是整个dom前后查找 :param timeout: 查找等待时间(秒) :return: 元素对象或字符串 """ ... # ----------------以下属性或方法由后代实现---------------- @property def attrs(self) -> dict: ... @property def text(self) -> str: ... @property def raw_text(self) -> str: ... @abstractmethod def attr(self, name: str) -> str: ... def _get_ele_path(self, xpath: bool = True) -> str: ... class BasePage(BaseParser): """页面类的基类""" _url_available: Optional[bool] = ... retry_times: int = ... retry_interval: float = ... _download_path: Optional[str] = ... _downloader: Optional[DrissionGet] = ... _none_ele_return_value: bool = ... _none_ele_value: Any = ... _page: Union[ChromiumPage, SessionPage, WebPage] = ... _session: Optional[Session] = ... _headers: Optional[CaseInsensitiveDict] = ... _session_options: Optional[SessionOptions] = ... def __init__(self): ... @property def title(self) -> Union[str, None]: """返回网页title""" ... @property def url_available(self) -> bool: """返回当前访问的url有效性""" ... @property def download_path(self) -> str: """返回默认下载路径""" ... @property def download(self) -> DrissionGet: """返回下载器对象""" ... def _before_connect(self, url: str, retry: int, interval: float) -> tuple: """连接前的准备 :param url: 要访问的url :param retry: 重试次数 :param interval: 重试间隔 :return: 重试次数、间隔、是否文件组成的tuple """ ... def _set_session_options(self, session_or_options: Union[Session, SessionOptions] = None) -> None: """启动配置 :param session_or_options: Session、SessionOptions对象 :return: None """ ... def _create_session(self) -> None: """创建内建Session对象""" ... # ----------------以下属性或方法由后代实现---------------- @property def url(self) -> str: ... @property def json(self) -> dict: ... @property def user_agent(self) -> str: ... @abstractmethod def get(self, url: str, show_errmsg: bool = False, retry: int = None, interval: float = None): ... def _ele(self, locator, timeout: float = None, index: Optional[int] = 1, raise_err: bool = None, method: str = None): """调用获取元素的方法 :param locator: 定位符 :param timeout: 超时时间(秒) :param index: 获取第几个,从1开始,可传入负数获取倒数第几个 :param raise_err: 找不到时是否抛出异常 :param method: 调用的方法名 :return: 元素对象或它们组成的列表 """ ... ================================================ FILE: DrissionPage/_base/chromium.py ================================================ # -*- coding:utf-8 -*- """ @Author : g1879 @Contact : g1879@qq.com @Website : https://DrissionPage.cn @Copyright: (c) 2020 by g1879, Inc. All Rights Reserved. """ from pathlib import Path from re import match from shutil import rmtree from threading import Lock from time import sleep, perf_counter from requests import Session from websocket import WebSocketBadStatusException from .driver import BrowserDriver, Driver from .._configs.chromium_options import ChromiumOptions from .._functions.browser import connect_browser from .._functions.cookies import CookiesList from .._functions.settings import Settings as _S from .._functions.tools import PortFinder, raise_error from .._pages.chromium_base import Timeout from .._pages.chromium_tab import ChromiumTab from .._pages.mix_tab import MixTab from .._units.downloader import DownloadManager from .._units.setter import BrowserSetter from .._units.states import BrowserStates from .._units.waiter import BrowserWaiter from ..errors import BrowserConnectError, CDPError, PageDisconnectedError, IncorrectURLError __ERROR__ = 'error' class Chromium(object): _BROWSERS = {} _lock = Lock() def __new__(cls, addr_or_opts=None, session_options=None): opt = handle_options(addr_or_opts) with cls._lock: is_headless, browser_id, is_exists, ws_only, driver = run_browser(opt) if browser_id in cls._BROWSERS: return cls._BROWSERS[browser_id] r = object.__new__(cls) driver.owner = r r._driver = driver r._chromium_options = opt r._is_headless = is_headless r._is_exists = is_exists r._ws_only = ws_only r.id = browser_id cls._BROWSERS[browser_id] = r return r def __init__(self, addr_or_opts=None, session_options=None): if hasattr(self, '_created'): return self._created = True self._type = 'Chromium' self._frames = {} self._drivers = {} self._all_drivers = {} self._relation = {} self._newest_tab_id = None self._set = None self._wait = None self._states = None self._timeouts = Timeout(**self._chromium_options.timeouts) self._load_mode = self._chromium_options.load_mode self._download_path = str(Path(self._chromium_options.download_path).absolute()) self._auto_handle_alert = None self._none_ele_return_value = False self._none_ele_value = None self.retry_times = self._chromium_options.retry_times self.retry_interval = self._chromium_options.retry_interval self.address = self._chromium_options.address self._ws_address = (self._chromium_options.ws_address if self._chromium_options.ws_address else f'ws://{self.address}/devtools/browser/{self.id}') self._disconnect_flag = False if (not self._ws_only and (not self._chromium_options._ua_set and self._is_headless != self._chromium_options.is_headless) or (self._is_exists and self._chromium_options._new_env)): self.quit(3, True) connect_browser(self._chromium_options) s = Session() s.trust_env = False s.keep_alive = False ws = s.get(f'http://{self.address}/json/version', headers={'Connection': 'close'}) self.id = ws.json()['webSocketDebuggerUrl'].split('/')[-1] self._driver = BrowserDriver(self.id, self._ws_address, self) ws.close() s.close() self._is_exists = False self._frames = {} self._drivers = {} self._all_drivers = {} self.version = self._run_cdp('Browser.getVersion')['product'] self._process_id = None try: r = self._run_cdp('SystemInfo.getProcessInfo') for i in r.get('processInfo', []): if i['type'] == 'browser': self._process_id = i['id'] break except: pass self._run_cdp('Target.setDiscoverTargets', discover=True) self._driver.set_callback('Target.targetDestroyed', self._onTargetDestroyed) self._driver.set_callback('Target.targetCreated', self._onTargetCreated) self._dl_mgr = DownloadManager(self) self._session_options = session_options @property def user_data_path(self): return self._chromium_options.user_data_path @property def process_id(self): return self._process_id @property def timeout(self): return self._timeouts.base @property def timeouts(self): return self._timeouts @property def load_mode(self): return self._load_mode @property def download_path(self): return self._download_path @property def set(self): if self._set is None: self._set = BrowserSetter(self) return self._set @property def states(self): if self._states is None: self._states = BrowserStates(self) return self._states @property def wait(self): if self._wait is None: self._wait = BrowserWaiter(self) return self._wait @property def tabs_count(self): j = self._run_cdp('Target.getTargets')['targetInfos'] # 不要改用get,避免卡死 return len([i for i in j if i['type'] in ('page', 'webview') and not i['url'].startswith('devtools://')]) @property def tab_ids(self): if self._ws_only: return [i['targetId'] for i in self._run_cdp('Target.getTargets')['targetInfos'] if i['type'] in ('page', 'webview') and not i['url'].startswith('devtools://')] else: return [i['id'] for i in self._driver.get(f'http://{self.address}/json').json() if i['type'] in ('page', 'webview') and not i['url'].startswith('devtools://')] @property def latest_tab(self): return self._get_tab(id_or_num=self.tab_ids[0], as_id=not _S.singleton_tab_obj) def cookies(self, all_info=False): cks = self._run_cdp(f'Storage.getCookies')['cookies'] r = cks if all_info else [{'name': c['name'], 'value': c['value'], 'domain': c['domain']} for c in cks] return CookiesList(r) def new_tab(self, url=None, new_window=False, background=False, new_context=False): return self._new_tab(True, url=url, new_window=new_window, background=background, new_context=new_context) def get_tab(self, id_or_num=None, title=None, url=None, tab_type='page'): t = self._get_tab(id_or_num=id_or_num, title=title, url=url, tab_type=tab_type, mix=True, as_id=False) if t._type != 'MixTab': raise RuntimeError(_S._lang.join(_S._lang.TAB_OBJ_EXISTS)) return t def get_tabs(self, title=None, url=None, tab_type='page'): return self._get_tabs(title=title, url=url, tab_type=tab_type, mix=True, as_id=False) def close_tabs(self, tabs_or_ids, others=False): if isinstance(tabs_or_ids, str): tabs = {tabs_or_ids} elif isinstance(tabs_or_ids, ChromiumTab): tabs = {tabs_or_ids.tab_id} elif isinstance(tabs_or_ids, (list, tuple)): tabs = set(i.tab_id if isinstance(i, ChromiumTab) else i for i in tabs_or_ids) else: raise ValueError(_S._lang.join(_S._lang.INCORRECT_TYPE_, 'tabs_or_ids', ALLOW_TYPE=_S._lang.TAB_OR_ID, CURR_VAL=tabs_or_ids)) all_tabs = set(self.tab_ids) if others: tabs = all_tabs - tabs if len(all_tabs - tabs) > 0: for tab in tabs: self._close_tab(tab=tab) else: self.quit() def _close_tab(self, tab): if isinstance(tab, str): tab = self.get_tab(tab) tab._run_cdp('Target.closeTarget', targetId=tab.tab_id) while tab.driver.is_running and tab.tab_id in self._all_drivers: sleep(.01) def activate_tab(self, id_ind_tab): if isinstance(id_ind_tab, int): id_ind_tab += -1 if id_ind_tab else 1 id_ind_tab = self.tab_ids[id_ind_tab] elif isinstance(id_ind_tab, ChromiumTab): id_ind_tab = id_ind_tab.tab_id self._run_cdp('Target.activateTarget', targetId=id_ind_tab) def reconnect(self): self._disconnect_flag = True self._driver.stop() BrowserDriver.BROWSERS.pop(self.id) self._driver = BrowserDriver(self.id, self._ws_address, self) self._run_cdp('Target.setDiscoverTargets', discover=True) self._driver.set_callback('Target.targetDestroyed', self._onTargetDestroyed) self._driver.set_callback('Target.targetCreated', self._onTargetCreated) self._disconnect_flag = False def clear_cache(self, cache=True, cookies=True): if cache: self.latest_tab.run_cdp('Network.clearBrowserCache') if cookies: self._run_cdp('Storage.clearCookies') def quit(self, timeout=5, force=False, del_data=False): try: self._run_cdp('Browser.close') except PageDisconnectedError: pass self._driver.stop() drivers = list(self._all_drivers.values()) for tab in drivers: for driver in tab: driver.stop() if not self.address.startswith('127.0.0.1'): return if force: pids = None try: pids = [pid['id'] for pid in self._run_cdp('SystemInfo.getProcessInfo')['processInfo']] except: pass if pids: from psutil import Process for pid in pids: try: Process(pid).kill() except: pass from os import popen from platform import system end_time = perf_counter() + timeout while perf_counter() < end_time: ok = True for pid in pids: txt = f'tasklist | findstr {pid}' if system().lower() == 'windows' else f'ps -ef | grep {pid}' p = popen(txt) sleep(.05) try: if f' {pid} ' in p.read(): ok = False break except TypeError: pass if ok: break if del_data and not self._chromium_options.is_auto_port and self._chromium_options.user_data_path: path = Path(self._chromium_options.user_data_path) rmtree(path, True) def _new_tab(self, mix=True, url=None, new_window=False, background=False, new_context=False): tab_type = MixTab if mix else ChromiumTab tab = None if new_context: tab = self._run_cdp('Target.createBrowserContext')['browserContextId'] kwargs = {'url': ''} if new_window: kwargs['newWindow'] = True if background: kwargs['background'] = True if tab: kwargs['browserContextId'] = tab if self.states.is_incognito and not new_context: return _new_tab_by_js(self, url, tab_type, new_window) else: try: tab = self._run_cdp('Target.createTarget', **kwargs)['targetId'] except CDPError: return _new_tab_by_js(self, url, tab_type, new_window) while self.states.is_alive: if tab in self._drivers: break sleep(.01) else: raise BrowserConnectError(_S._lang.BROWSER_DISCONNECTED) tab = tab_type(self, tab) if url: tab.get(url) return tab def _get_tab(self, id_or_num=None, title=None, url=None, tab_type='page', mix=True, as_id=False): if id_or_num is not None: if isinstance(id_or_num, int): id_or_num = self.tab_ids[id_or_num - 1 if id_or_num > 0 else id_or_num] elif isinstance(id_or_num, ChromiumTab): return id_or_num.tab_id if as_id else ChromiumTab(self, id_or_num.tab_id) else: j = self._run_cdp('Target.getTargets')['targetInfos'] if id_or_num not in [i['targetId'] for i in j]: raise RuntimeError(_S._lang.join(_S._lang.NO_SUCH_TAB, ARG=id_or_num, ALL_TABS=self.tab_ids)) elif title == url is None and tab_type == 'page': id_or_num = self.tab_ids[0] else: tabs = self._get_tabs(title=title, url=url, tab_type=tab_type, as_id=True) if tabs: id_or_num = tabs[0] else: raise RuntimeError(_S._lang.join(_S._lang.NO_SUCH_TAB, ARGS={'id_or_num': id_or_num, 'title': title, 'url': url, 'tab_type': tab_type})) if as_id: return id_or_num with self._lock: return MixTab(self, id_or_num) if mix else ChromiumTab(self, id_or_num) def _get_tabs(self, title=None, url=None, tab_type='page', mix=True, as_id=False): if self._ws_only: tabs = self._run_cdp('Target.getTargets')['targetInfos'] _id = 'targetId' else: tabs = self._driver.get(f'http://{self.address}/json').json() # 不要改用cdp _id = 'id' if isinstance(tab_type, str): tab_type = {tab_type} elif isinstance(tab_type, (list, tuple, set)): tab_type = set(tab_type) elif tab_type is not None: raise ValueError(_S._lang.join(_S._lang.INCORRECT_TYPE_, 'tab_type', ALLOW_TYPE='set, list, tuple, str, None', CURR_VAL=tab_type)) tabs = [i for i in tabs if ((title is None or title in i['title']) and (url is None or url in i['url']) and (tab_type is None or i['type'] in tab_type) and i['title'] != 'chrome-extension://neajdppkdcdipfabeoofebfddakdcjhd/audio.html')] if as_id: return [tab[_id] for tab in tabs] with self._lock: if mix: return [MixTab(self, tab[_id]) for tab in tabs] else: return [ChromiumTab(self, tab[_id]) for tab in tabs] def _run_cdp(self, cmd, **cmd_args): ignore = cmd_args.pop('_ignore', None) r = self._driver.run(cmd, **cmd_args) return r if __ERROR__ not in r else raise_error(r, self, ignore) def _get_driver(self, tab_id, owner=None): d = self._drivers.pop(tab_id, None) if not d: if self._ws_only: d = Driver(tab_id, self._ws_address) d.session_id = d.run('Target.attachToTarget', targetId=tab_id, flatten=True)['sessionId'] else: d = Driver(tab_id, f'ws://{self.address}/devtools/page/{tab_id}') d.owner = owner self._all_drivers.setdefault(tab_id, set()).add(d) return d def _onTargetCreated(self, **kwargs): if (kwargs['targetInfo']['type'] in ('page', 'webview') and kwargs['targetInfo']['targetId'] not in self._all_drivers and not kwargs['targetInfo']['url'].startswith('devtools://')): try: tab_id = kwargs['targetInfo']['targetId'] self._frames[tab_id] = tab_id if self._ws_only: d = Driver(tab_id, self._ws_address) d.session_id = d.run('Target.attachToTarget', targetId=tab_id, flatten=True)['sessionId'] else: d = Driver(tab_id, f'ws://{self.address}/devtools/page/{tab_id}') self._relation[tab_id] = kwargs['targetInfo'].get('openerId', None) self._drivers[tab_id] = d self._all_drivers.setdefault(tab_id, set()).add(d) self._newest_tab_id = tab_id except WebSocketBadStatusException: pass def _onTargetDestroyed(self, **kwargs): tab_id = kwargs['targetId'] self._dl_mgr.clear_tab_info(tab_id) for key in [k for k, i in self._frames.items() if i == tab_id]: self._frames.pop(key, None) for d in self._all_drivers.get(tab_id, tuple()): d.stop() self._drivers.pop(tab_id, None) self._all_drivers.pop(tab_id, None) self._relation.pop(tab_id, None) def _on_disconnect(self): if not self._disconnect_flag: Chromium._BROWSERS.pop(self.id, None) if self._chromium_options.is_auto_port and self._chromium_options.user_data_path: path = Path(self._chromium_options.user_data_path) end_time = perf_counter() + 7 while perf_counter() < end_time: if not path.exists(): break try: rmtree(path) break except (PermissionError, FileNotFoundError, OSError): pass sleep(.03) def handle_options(addr_or_opts): if not addr_or_opts: _chromium_options = ChromiumOptions(addr_or_opts) if _chromium_options.is_auto_port: port, path = PortFinder(_chromium_options.tmp_path).get_port(_chromium_options.is_auto_port) _chromium_options._address = f'127.0.0.1:{port}' _chromium_options.set_user_data_path(path) elif isinstance(addr_or_opts, ChromiumOptions): if addr_or_opts.is_auto_port: port, path = PortFinder(addr_or_opts.tmp_path).get_port(addr_or_opts.is_auto_port) addr_or_opts._address = f'127.0.0.1:{port}' addr_or_opts.set_user_data_path(path) _chromium_options = addr_or_opts elif isinstance(addr_or_opts, str) and ':' in addr_or_opts: _chromium_options = ChromiumOptions() _chromium_options.set_address(addr_or_opts) elif isinstance(addr_or_opts, int): _chromium_options = ChromiumOptions() _chromium_options.set_local_port(addr_or_opts) else: raise ValueError(_S._lang.join(_S._lang.INCORRECT_VAL_, 'addr_or_opts', ALLOW_TYPE=_S._lang.IP_OR_OPTIONS, CURR_VAL=addr_or_opts)) return _chromium_options def run_browser(chromium_options): ws_only = False if chromium_options.ws_address: try: driver = BrowserDriver('', chromium_options.ws_address, None) except EnvironmentError: raise except Exception as e: raise BrowserConnectError(_S._lang.join(_S._lang.BROWSER_CONNECT_ERR2, INFO=str(e))) browser_id = driver.run('Target.getTargets') if 'error' in browser_id: raise BrowserConnectError(_S._lang.join(_S._lang.BROWSER_CONNECT_ERR2, INFO=browser_id['error'])) browser_id = browser_id['targetInfos'][0]['browserContextId'] is_headless = 'headless' in driver.run('Browser.getVersion')['userAgent'].lower() driver.id = browser_id if 'devtools/browser' not in chromium_options.ws_address: return is_headless, browser_id, True, True, driver s = Session() s.trust_env = False s.keep_alive = False try: ws = s.get(f'http://{chromium_options.address}/json/version', headers={'Connection': 'close'}, timeout=2) if not ws: ws_only = True else: ws.close() except: ws_only = True s.close() return is_headless, browser_id, True, ws_only, driver is_exists = connect_browser(chromium_options) try: s = Session() s.trust_env = False s.keep_alive = False ws = s.get(f'http://{chromium_options.address}/json/version', headers={'Connection': 'close'}, timeout=2) if not ws and not chromium_options.ws_address: raise BrowserConnectError(_S._lang.BROWSER_CONNECT_ERR2) json = ws.json() browser_id = json['webSocketDebuggerUrl'].split('/')[-1] is_headless = 'headless' in json['User-Agent'].lower() ws.close() s.close() except KeyError: raise BrowserConnectError(_S._lang.BROWSER_NOT_FOR_CONTROL) except: raise BrowserConnectError(_S._lang.BROWSER_CONNECT_ERR2) driver = BrowserDriver('', f'ws://{chromium_options.address}/devtools/browser/{browser_id}', None) browser_id = driver.run('Target.getTargets') if 'error' in browser_id: raise BrowserConnectError(_S._lang.join(_S._lang.BROWSER_CONNECT_ERR2, INFO=browser_id['error'])) browser_id = browser_id['targetInfos'][0]['browserContextId'] return is_headless, browser_id, is_exists, ws_only, driver def _new_tab_by_js(browser: Chromium, url, tab_type, new_window): mix = tab_type == MixTab tab = browser._get_tab(mix=mix) if url and not match(r'^.*?://.*', url): raise IncorrectURLError(_S._lang.INVALID_URL, url=url) url = f'"{url}"' if url else '""' new = 'target="_new"' if new_window else 'target="_blank"' tid = browser._newest_tab_id tab.run_js(f'window.open({url}, {new})') tid = browser.wait.new_tab(curr_tab=tid) return browser._get_tab(tid, mix=mix) ================================================ FILE: DrissionPage/_base/chromium.pyi ================================================ # -*- coding:utf-8 -*- """ @Author : g1879 @Contact : g1879@qq.com @Website : https://DrissionPage.cn @Copyright: (c) 2020 by g1879, Inc. All Rights Reserved. """ from threading import Lock from typing import List, Optional, Set, Dict, Union, Tuple, Literal, Any from .driver import BrowserDriver, Driver from .._configs.chromium_options import ChromiumOptions from .._configs.session_options import SessionOptions from .._functions.cookies import CookiesList from .._pages.chromium_base import Timeout, ChromiumBase from .._pages.chromium_tab import ChromiumTab from .._pages.mix_tab import MixTab from .._units.downloader import DownloadManager from .._units.setter import BrowserSetter from .._units.states import BrowserStates from .._units.waiter import BrowserWaiter class Chromium(object): _BROWSERS: dict = ... _lock: Lock = ... id: str = ... address: str = ... _ws_address: str = ... version: str = ... retry_times: int = ... retry_interval: float = ... _set: Optional[BrowserSetter] = ... _wait: Optional[BrowserWaiter] = ... _states: Optional[BrowserStates] = ... _chromium_options: ChromiumOptions = ... _session_options: SessionOptions = ... _driver: BrowserDriver = ... _frames: dict = ... _drivers: Dict[str, Driver] = ... _all_drivers: Dict[str, Set[Driver]] = ... _relation: Dict[str, Optional[str]] = ... _process_id: Optional[int] = ... _dl_mgr: DownloadManager = ... _timeouts: Timeout = ... _load_mode: str = ... _download_path: str = ... _auto_handle_alert: Optional[bool] = ... _is_exists: bool = ... _is_headless: bool = ... _ws_only: bool = ... _disconnect_flag: bool = ... _none_ele_return_value: bool = ... _none_ele_value: Any = ... _newest_tab_id: Optional[str] = ... def __new__(cls, addr_or_opts: Union[str, int, ChromiumOptions] = None, session_options: Union[SessionOptions, None, False] = None): """ :param addr_or_opts: 浏览器地址:端口、ws地址、ChromiumOptions对象或端口数字(int) :param session_options: 使用双模Tab时使用的默认Session配置,为None使用ini文件配置,为False不从ini读取 """ ... def __init__(self, addr_or_opts: Union[str, int, ChromiumOptions] = None, session_options: Union[SessionOptions, None, False] = None): """ :param addr_or_opts: 浏览器地址:端口、ws地址、ChromiumOptions对象或端口数字(int) :param session_options: 使用双模Tab时使用的默认Session配置,为None使用ini文件配置,为False不从ini读取 """ ... @property def user_data_path(self) -> str: """返回用户文件夹路径""" ... @property def process_id(self) -> Optional[int]: """返回浏览器进程id""" ... @property def timeout(self) -> float: """返回基础超时设置""" ... @property def timeouts(self) -> Timeout: """返回所有超时设置""" ... @property def load_mode(self) -> Literal['none', 'normal', 'eager']: """返回页面加载模式,包括 'none', 'normal', 'eager' 三种""" ... @property def download_path(self) -> str: """返回默认下载路径""" ... @property def set(self) -> BrowserSetter: """返回用于设置的对象""" ... @property def states(self) -> BrowserStates: """返回用于获取状态的对象""" ... @property def wait(self) -> BrowserWaiter: """返回用于等待的对象""" ... @property def tabs_count(self) -> int: """返回标签页数量,只统计page、webview类型""" ... @property def tab_ids(self) -> List[str]: """返回所有标签页id组成的列表,只统计page、webview类型""" ... @property def latest_tab(self) -> Union[MixTab, str]: """返回最新的标签页,最新标签页指最后创建或最后被激活的 当Settings.singleton_tab_obj==True时返回Tab对象,否则返回tab id""" ... def cookies(self, all_info: bool = False) -> CookiesList: """以list格式返回所有域名的cookies :param all_info: 是否返回所有内容,False则只返回name, value, domain :return: cookies组成的列表 """ ... def new_tab(self, url: str = None, new_window: bool = False, background: bool = False, new_context: bool = False) -> MixTab: """新建一个标签页 :param url: 新标签页跳转到的网址,为None时新建空标签页 :param new_window: 是否在新窗口打开标签页,隐身模式下无效 :param background: 是否不激活新标签页,隐身模式和访客模式及new_window为True时无效 :param new_context: 是否创建独立环境,隐身模式和访客模式下无效 :return: 新标签页对象 """ ... def get_tab(self, id_or_num: Union[str, int] = None, title: str = None, url: str = None, tab_type: Union[str, list, tuple] = 'page', as_id: bool = False) -> Union[MixTab, str]: """获取一个标签页对象,id_or_num不为None时,后面几个参数无效 :param id_or_num: 要获取的标签页id或序号,序号从1开始,可传入负数获取倒数第几个,不是视觉排列顺序,而是激活顺序 :param title: 要匹配title的文本,模糊匹配,为None则匹配所有 :param url: 要匹配url的文本,模糊匹配,为None则匹配所有 :param tab_type: tab类型,可用列表输入多个,如 'page', 'iframe' 等,为None则匹配所有 :param as_id: 是否返回标签页id而不是标签页对象 :return: Tab对象 """ ... def get_tabs(self, title: str = None, url: str = None, tab_type: Union[str, list, tuple] = 'page', as_id: bool = False) -> List[MixTab, str]: """查找符合条件的tab,返回它们组成的列表,title和url是与关系 :param title: 要匹配title的文本 :param url: 要匹配url的文本 :param tab_type: tab类型,可用列表输入多个 :param as_id: 是否返回标签页id而不是标签页对象 :return: Tab对象列表 """ ... def close_tabs(self, tabs_or_ids: Union[str, ChromiumTab, List[Union[str, ChromiumTab]], Tuple[Union[str, ChromiumTab]]], others: bool = False) -> None: """关闭传入的标签页,可传入多个 :param tabs_or_ids: 指定的标签页对象或id,可用列表或元组传入多个 :param others: 是否关闭指定标签页之外的 :return: None """ ... def _close_tab(self, tab: Union[ChromiumBase, str]): """关闭一个标签页 :param tab: 标签页对象或id :return: None """ def activate_tab(self, id_ind_tab: Union[int, str, ChromiumTab]) -> None: """使一个标签页显示到前端 :param id_ind_tab: 标签页id(str)、Tab对象或标签页序号(int),序号从1开始 :return: None """ ... def reconnect(self) -> None: """断开重连""" ... def clear_cache(self, cache: bool = True, cookies: bool = True) -> None: """清除缓存,可选要清除的项 :param cache: 是否清除cache :param cookies: 是否清除cookies :return: None """ ... def quit(self, timeout: float = 5, force: bool = False, del_data: bool = False) -> None: """关闭浏览器 :param timeout: 等待浏览器关闭超时时间(秒) :param force: 是否立刻强制终止进程 :param del_data: 是否删除用户文件夹 :return: None """ ... def _new_tab(self, mix: bool = True, url: str = None, new_window: bool = False, background: bool = False, new_context: bool = False) -> Union[ChromiumTab, MixTab]: """新建一个标签页 :param mix: 是否创建MixTab :param url: 新标签页跳转到的网址 :param new_window: 是否在新窗口打开标签页 :param background: 是否不激活新标签页,如new_window为True则无效 :param new_context: 是否创建新的上下文 :return: 新标签页对象 """ ... def _get_tab(self, id_or_num: Union[str, int] = None, title: str = None, url: str = None, tab_type: Union[str, list, tuple] = 'page', mix: bool = True, as_id: bool = False) -> Union[ChromiumTab, str]: """获取一个标签页对象,id_or_num不为None时,后面几个参数无效 :param id_or_num: 要获取的标签页id或序号,序号从1开始,可传入负数获取倒数第几个,不是视觉排列顺序,而是激活顺序 :param title: 要匹配title的文本,模糊匹配,为None则匹配所有 :param url: 要匹配url的文本,模糊匹配,为None则匹配所有 :param tab_type: tab类型,可用列表输入多个,如 'page', 'iframe' 等,为None则匹配所有 :param mix: 是否返回可切换模式的Tab对象 :param as_id: 是否返回标签页id而不是标签页对象,mix=False时无效 :return: Tab对象 """ ... def _get_tabs(self, title: str = None, url: str = None, tab_type: Union[str, list, tuple] = 'page', mix: bool = True, as_id: bool = False) -> List[ChromiumTab, str]: """查找符合条件的tab,返回它们组成的列表,title和url是与关系 :param title: 要匹配title的文本 :param url: 要匹配url的文本 :param tab_type: tab类型,可用列表输入多个 :param mix: 是否返回可切换模式的Tab对象 :param as_id: 是否返回标签页id而不是标签页对象,mix=False时无效 :return: Tab对象列表 """ ... def _run_cdp(self, cmd, **cmd_args) -> dict: """执行Chrome DevTools Protocol语句 :param cmd: 协议项目 :param cmd_args: 参数 :return: 执行的结果 """ ... def _get_driver(self, tab_id: str, owner=None) -> Driver: """新建并返回指定tab id的Driver :param tab_id: 标签页id :param owner: 使用该驱动的对象 :return: Driver对象 """ ... def _onTargetCreated(self, **kwargs): ... def _onTargetDestroyed(self, **kwargs): ... def _on_disconnect(self): ... def handle_options(addr_or_opts): """设置浏览器启动属性 :param addr_or_opts: 'ip:port'、ChromiumOptions、Driver :return: 返回ChromiumOptions对象 """ ... def run_browser(chromium_options)->Tuple[bool, str, bool, bool, BrowserDriver]: """连接浏览器 :param chromium_options: ChromiumOptions对象 :return: 返回(是否无头, 浏览器id, 是否接管已存在, 是否只能用ws连接, 浏览器连接驱动) """ ... ================================================ FILE: DrissionPage/_base/driver.py ================================================ # -*- coding: utf-8 -*- """ @Author : g1879 @Contact : g1879@qq.com @Website : https://DrissionPage.cn @Copyright: (c) 2020 by g1879, Inc. All Rights Reserved. """ from json import dumps, loads, JSONDecodeError from queue import Queue, Empty from threading import Thread from time import perf_counter, sleep from requests import Session from requests import adapters from websocket import (WebSocketTimeoutException, WebSocketConnectionClosedException, create_connection, WebSocketException, WebSocketBadStatusException) from .._functions.settings import Settings as _S from ..errors import PageDisconnectedError, BrowserConnectError adapters.DEFAULT_RETRIES = 5 class Driver(object): def __init__(self, _id, address, owner=None): self.id = _id self.address = address self.owner = owner self.alert_flag = False # 标记alert出现,跳过一条请求后复原 self._cur_id = 0 self._ws = None self._recv_th = Thread(target=self._recv_loop) self._handle_event_th = Thread(target=self._handle_event_loop) self._recv_th.daemon = True self._handle_event_th.daemon = True self._handle_immediate_event_th = None self.is_running = False self.session_id = None self.event_handlers = {} self.immediate_event_handlers = {} self.method_results = {} self.event_queue = Queue() self.immediate_event_queue = Queue() self.start() def _send(self, message, timeout=None): self._cur_id += 1 ws_id = self._cur_id message['id'] = ws_id message_json = dumps(message) end_time = perf_counter() + timeout if timeout is not None else None self.method_results[ws_id] = Queue() try: self._ws.send(message_json) if timeout == 0: self.method_results.pop(ws_id, None) return {'id': ws_id, 'result': {}} except (OSError, WebSocketConnectionClosedException): self.method_results.pop(ws_id, None) return {'error': {'message': 'connection disconnected'}, 'type': 'connection_error'} while self.is_running: try: result = self.method_results[ws_id].get(timeout=.2) self.method_results.pop(ws_id, None) return result except Empty: if self.alert_flag and message['method'].startswith(('Input.', 'Runtime.')): return {'error': {'message': 'alert exists.'}, 'type': 'alert_exists'} if timeout is not None and perf_counter() > end_time: self.method_results.pop(ws_id, None) return {'error': {'message': 'alert exists.'}, 'type': 'alert_exists'} \ if self.alert_flag else {'error': {'message': 'timeout'}, 'type': 'timeout'} continue return {'error': {'message': 'connection disconnected'}, 'type': 'connection_error'} def _recv_loop(self): while self.is_running: try: # self._ws.settimeout(1) msg_json = self._ws.recv() msg = loads(msg_json) except WebSocketTimeoutException: continue except (WebSocketException, OSError, WebSocketConnectionClosedException, JSONDecodeError): self._stop() return if 'method' in msg: if msg['method'].startswith('Page.javascriptDialog'): self.alert_flag = msg['method'].endswith('Opening') function = self.immediate_event_handlers.get(msg['method']) if function: self._handle_immediate_event(function, msg['params']) else: self.event_queue.put(msg) elif msg.get('id') in self.method_results: self.method_results[msg['id']].put(msg) def _handle_event_loop(self): while self.is_running: try: event = self.event_queue.get(timeout=1) except Empty: continue function = self.event_handlers.get(event['method']) if function: function(**event['params']) self.event_queue.task_done() def _handle_immediate_event_loop(self): while not self.immediate_event_queue.empty(): function, kwargs = self.immediate_event_queue.get(timeout=1) try: function(**kwargs) except PageDisconnectedError: pass def _handle_immediate_event(self, function, kwargs): self.immediate_event_queue.put((function, kwargs)) if self._handle_immediate_event_th is None or not self._handle_immediate_event_th.is_alive(): self._handle_immediate_event_th = Thread(target=self._handle_immediate_event_loop) self._handle_immediate_event_th.daemon = True self._handle_immediate_event_th.start() def run(self, _method, **kwargs): if not self.is_running: return {'error': 'connection disconnected', 'type': 'connection_error'} timeout = kwargs.pop('_timeout', _S.cdp_timeout) if self.session_id: result = self._send({'method': _method, 'params': kwargs, 'sessionId': self.session_id}, timeout=timeout) else: result = self._send({'method': _method, 'params': kwargs}, timeout=timeout) if 'result' not in result and 'error' in result: kwargs['_timeout'] = timeout return {'error': result['error']['message'], 'type': result.get('type', 'call_method_error'), 'method': _method, 'args': kwargs, 'data': result['error'].get('data')} else: return result['result'] def start(self): self.is_running = True try: self._ws = create_connection(self.address, enable_multithread=True, suppress_origin=True) except WebSocketBadStatusException as e: if 'Handshake status 403 Forbidden' in str(e): raise EnvironmentError(_S._lang.join(_S._lang.UPGRADE_WS)) else: raise except ConnectionRefusedError: raise BrowserConnectError(_S._lang.BROWSER_NOT_EXIST) self._recv_th.start() self._handle_event_th.start() return True def stop(self): self._stop() while self._handle_event_th.is_alive() or self._recv_th.is_alive(): sleep(.01) return True def _stop(self): if not self.is_running: return False self.is_running = False if self._ws: self._ws.close() self._ws = None self.event_handlers.clear() self.method_results.clear() self.event_queue.queue.clear() if hasattr(self.owner, '_on_disconnect'): self.owner._on_disconnect() def set_callback(self, event, callback, immediate=False): handler = self.immediate_event_handlers if immediate else self.event_handlers if callback: handler[event] = callback else: handler.pop(event, None) class BrowserDriver(Driver): BROWSERS = {} def __new__(cls, _id, address, owner): if address in cls.BROWSERS: return cls.BROWSERS[address] return object.__new__(cls) def __init__(self, _id, address, owner): if hasattr(self, '_created'): return self._created = True BrowserDriver.BROWSERS[address] = self super().__init__(_id, address, owner) def __repr__(self): return f'' @staticmethod def get(url): s = Session() s.trust_env = False s.keep_alive = False r = s.get(url, headers={'Connection': 'close'}) r.close() s.close() return r ================================================ FILE: DrissionPage/_base/driver.pyi ================================================ # -*- coding: utf-8 -*- """ @Author : g1879 @Contact : g1879@qq.com @Website : https://DrissionPage.cn @Copyright: (c) 2020 by g1879, Inc. All Rights Reserved. """ from queue import Queue from threading import Thread from typing import Union, Callable, Dict, Optional from requests import Response from websocket import WebSocket from .._base.chromium import Chromium class Driver(object): id: str address: str owner = ... alert_flag: bool _cur_id: int _ws: Optional[WebSocket] _recv_th: Thread _handle_event_th: Thread _handle_immediate_event_th: Optional[Thread] session_id: Optional[str] = ... is_running: bool event_handlers: dict immediate_event_handlers: dict method_results: dict event_queue: Queue immediate_event_queue: Queue def __init__(self, _id: str, address: str, owner=None): """ :param _id: 标签页id :param address: 浏览器连接地址 :param owner: 创建这个驱动的对象 """ ... def _send(self, message: dict, timeout: float = None) -> dict: """发送信息到浏览器,并返回浏览器返回的信息 :param message: 发送给浏览器的数据 :param timeout: 超时时间,为None表示无限 :return: 浏览器返回的数据 """ ... def _recv_loop(self) -> None: """接收浏览器信息的守护线程方法""" ... def _handle_event_loop(self) -> None: """当接收到浏览器信息,执行已绑定的方法""" ... def _handle_immediate_event_loop(self): ... def _handle_immediate_event(self, function: Callable, kwargs: dict): """处理立即执行的动作 :param function: 要运行下方法 :param kwargs: 方法参数 :return: None """ ... def run(self, _method: str, **kwargs) -> dict: """执行cdp方法 :param _method: cdp方法名 :param kwargs: cdp参数 :return: 执行结果 """ ... def start(self) -> bool: """启动连接""" ... def stop(self) -> bool: """中断连接""" ... def _stop(self) -> None: """中断连接""" ... def set_callback(self, event: str, callback: Union[Callable, None], immediate: bool = False) -> None: """绑定cdp event和回调方法 :param event: cdp event :param callback: 绑定到cdp event的回调方法 :param immediate: 是否要立即处理的动作 :return: None """ ... class BrowserDriver(Driver): BROWSERS: Dict[str, Driver] = ... owner: Chromium = ... def __new__(cls, _id: str, address: str, owner: Chromium): """ :param _id: 浏览器id :param address: 浏览器连接地址 :param owner: 浏览器对象 """ ... def __init__(self, _id: str, address: str, owner: Optional[Chromium]): """ :param _id: 浏览器id :param address: 浏览器连接地址 :param owner: 浏览器对象 """ ... @staticmethod def get(url) -> Response: """ :param url: 要访问的链接 :return: Response对象 """ ... ================================================ FILE: DrissionPage/_configs/chromium_options.py ================================================ # -*- coding:utf-8 -*- """ @Author : g1879 @Contact : g1879@qq.com @Website : https://DrissionPage.cn @Copyright: (c) 2020 by g1879, Inc. All Rights Reserved. """ from pathlib import Path from re import search from urllib.parse import urlparse from .options_manage import OptionsManager from .._functions.settings import Settings as _S class ChromiumOptions(object): def __init__(self, read_file=True, ini_path=None): self._user_data_path = None self._user = 'Default' self._prefs_to_del = [] self.clear_file_flags = False self._is_headless = False self._ua_set = False self.ws_address = '' if read_file is False: ini_path = False self.ini_path = None elif ini_path: ini_path = Path(ini_path).absolute() if not ini_path.exists(): raise FileNotFoundError(_S._lang.join(_S._lang.INI_NOT_FOUND, PATH=ini_path)) self.ini_path = str(ini_path) else: self.ini_path = str(Path(__file__).parent / 'configs.ini') om = OptionsManager(ini_path) options = om.chromium_options self._download_path = om.paths.get('download_path', '.') or '.' self._tmp_path = om.paths.get('tmp_path', None) or None self._arguments = options.get('arguments', []) self._browser_path = options.get('browser_path', '') self._extensions = options.get('extensions', []) self._prefs = options.get('prefs', {}) self._flags = options.get('flags', {}) self._address = options.get('address', '') self._load_mode = options.get('load_mode', 'normal') self._system_user_path = options.get('system_user_path', False) self._existing_only = options.get('existing_only', False) self._new_env = options.get('new_env', False) for i in self._arguments: if i.startswith('--headless'): self._is_headless = True break self._proxy = om.proxies.get('http', None) or om.proxies.get('https', None) user_path = user = False for arg in self._arguments: if arg.startswith('--user-data-dir='): self.set_user_data_path(arg[16:]) user_path = True if arg.startswith('--profile-directory='): self.set_user(arg[20:]) user = True if user and user_path: break timeouts = om.timeouts self._timeouts = {'base': timeouts['base'], 'page_load': timeouts['page_load'], 'script': timeouts['script']} self._auto_port = options.get('auto_port', False) others = om.others self._retry_times = others.get('retry_times', 3) self._retry_interval = others.get('retry_interval', 2) return def __repr__(self): return f'' @property def download_path(self): return self._download_path @property def browser_path(self): return self._browser_path @property def user_data_path(self): return self._user_data_path @property def tmp_path(self): return self._tmp_path @property def user(self): return self._user @property def load_mode(self): return self._load_mode @property def timeouts(self): return self._timeouts @property def proxy(self): return self._proxy @property def address(self): return self._address @property def arguments(self): return self._arguments @property def extensions(self): return self._extensions @property def preferences(self): return self._prefs @property def flags(self): return self._flags @property def system_user_path(self): return self._system_user_path @property def is_existing_only(self): return self._existing_only @property def is_auto_port(self): return self._auto_port @property def retry_times(self): return self._retry_times @property def retry_interval(self): return self._retry_interval @property def is_headless(self): return self._is_headless def set_retry(self, times=None, interval=None): if times is not None: self._retry_times = times if interval is not None: self._retry_interval = interval return self def set_argument(self, arg, value=None): self.remove_argument(arg) if value is not False: if arg == '--headless': if value == 'false': self._is_headless = False else: if value is None: value = 'new' self._arguments.append(f'--headless={value}') self._is_headless = True else: arg_str = arg if value is None else f'{arg}={value}' self._arguments.append(arg_str) elif arg == '--headless': self._is_headless = False return self def remove_argument(self, value): elements_to_delete = [arg for arg in self._arguments if arg == value or arg.startswith(f'{value}=')] if not elements_to_delete: return self if len(elements_to_delete) == 1: self._arguments.remove(elements_to_delete[0]) else: self._arguments = [arg for arg in self._arguments if arg not in elements_to_delete] return self def add_extension(self, path): self._extensions.append(path) return self def remove_extensions(self): self._extensions = [] return self def set_pref(self, arg, value): self._prefs[arg] = value return self def remove_pref(self, arg): self._prefs.pop(arg, None) return self def remove_pref_from_file(self, arg): self._prefs_to_del.append(arg) return self def set_flag(self, flag, value=None): if value is False: self._flags.pop(flag, None) else: self._flags[flag] = value return self def clear_flags_in_file(self): self.clear_file_flags = True return self def clear_flags(self): self._flags = {} return self def clear_arguments(self): self._arguments = [] return self def clear_prefs(self): self._prefs = {} return self def set_timeouts(self, base=None, page_load=None, script=None): if base is not None: self._timeouts['base'] = base if page_load is not None: self._timeouts['page_load'] = page_load if script is not None: self._timeouts['script'] = script return self def set_user(self, user='Default'): self.set_argument('--profile-directory', user) self._user = user return self def headless(self, on_off=True): on_off = 'new' if on_off else on_off return self.set_argument('--headless', on_off) def no_imgs(self, on_off=True): on_off = None if on_off else False return self.set_argument('--blink-settings=imagesEnabled=false', on_off) def no_js(self, on_off=True): on_off = None if on_off else False return self.set_argument('--disable-javascript', on_off) def mute(self, on_off=True): on_off = None if on_off else False return self.set_argument('--mute-audio', on_off) def incognito(self, on_off=True): on_off = None if on_off else False self.set_argument('--incognito', on_off) return self.set_argument('--inprivate', on_off) # edge def new_env(self, on_off=True): self._new_env = on_off return self def ignore_certificate_errors(self, on_off=True): on_off = None if on_off else False return self.set_argument('--ignore-certificate-errors', on_off) def set_user_agent(self, user_agent): return self.set_argument('--user-agent', user_agent) def set_proxy(self, proxy): if search(r'.*?:.*?@.*?\..*', proxy): print(_S._lang.UNSUPPORTED_USER_PROXY) if proxy.lower().startswith('socks'): print(_S._lang.UNSUPPORTED_SOCKS_PROXY) self._proxy = proxy return self.set_argument('--proxy-server', proxy) def set_load_mode(self, value): if value not in ('normal', 'eager', 'none'): raise ValueError(_S._lang.join(_S._lang.INCORRECT_VAL_, 'value', ALLOW_VAL="'normal', 'eager', 'none'", CURR_VAL=value)) self._load_mode = value.lower() return self def set_paths(self, browser_path=None, local_port=None, address=None, download_path=None, user_data_path=None, cache_path=None): """快捷的路径设置函数,即将废弃 :param browser_path: 浏览器可执行文件路径 :param local_port: 本地端口号 :param address: 调试浏览器地址,例:127.0.0.1:9222 :param download_path: 下载文件路径 :param user_data_path: 用户数据路径 :param cache_path: 缓存路径 :return: 当前对象 """ if browser_path is not None: self.set_browser_path(browser_path) if local_port is not None: self.set_local_port(local_port) if address is not None: self.set_address(address) if download_path is not None: self.set_download_path(download_path) if user_data_path is not None: self.set_user_data_path(user_data_path) if cache_path is not None: self.set_cache_path(cache_path) return self def set_local_port(self, port): self._address = f'127.0.0.1:{port}' self._auto_port = False self.ws_address = '' return self def set_address(self, address): address = address.replace('localhost', '127.0.0.1') self.ws_address = '' if address.startswith('http'): address = address.lstrip('htps:/') elif address.startswith(('ws:', 'wss:')): self.ws_address = address address = urlparse(address).netloc self._address = address self._auto_port = False return self def set_browser_path(self, path): self._browser_path = str(path) return self def set_download_path(self, path): self._download_path = '.' if path is None else str(path) return self def set_tmp_path(self, path): self._tmp_path = str(path) return self def set_user_data_path(self, path): u = str(path) self.set_argument('--user-data-dir', u) self._user_data_path = u self._auto_port = False return self def set_cache_path(self, path): self.set_argument('--disk-cache-dir', str(path)) return self def use_system_user_path(self, on_off=True): self._system_user_path = on_off return self def auto_port(self, on_off=True, scope=None): if on_off: self._auto_port = scope if scope else (9600, 59600) else: self._auto_port = False self._address = '' self.ws_address = '' return self def existing_only(self, on_off=True): self._existing_only = on_off return self def save(self, path=None): if path == 'default': path = (Path(__file__).parent / 'configs.ini').absolute() elif path is None: if self.ini_path: path = Path(self.ini_path).absolute() else: path = (Path(__file__).parent / 'configs.ini').absolute() else: path = Path(path).absolute() path = path / 'config.ini' if path.is_dir() else path if path.exists(): om = OptionsManager(path) else: om = OptionsManager(self.ini_path or (Path(__file__).parent / 'configs.ini')) # 设置chromium_options attrs = ('address', 'browser_path', 'arguments', 'extensions', 'user', 'load_mode', 'auto_port', 'system_user_path', 'existing_only', 'flags', 'new_env') for i in attrs: om.set_item('chromium_options', i, self.__getattribute__(f'_{i}')) # 设置代理 om.set_item('proxies', 'http', self._proxy or '') om.set_item('proxies', 'https', self._proxy or '') # 设置路径 om.set_item('paths', 'download_path', self._download_path or '') om.set_item('paths', 'tmp_path', self._tmp_path or '') # 设置timeout om.set_item('timeouts', 'base', self._timeouts['base']) om.set_item('timeouts', 'page_load', self._timeouts['page_load']) om.set_item('timeouts', 'script', self._timeouts['script']) # 设置重试 om.set_item('others', 'retry_times', self.retry_times) om.set_item('others', 'retry_interval', self.retry_interval) # 设置prefs om.set_item('chromium_options', 'prefs', self._prefs) path = str(path) om.save(path) return path def save_to_default(self): return self.save('default') ================================================ FILE: DrissionPage/_configs/chromium_options.pyi ================================================ # -*- coding:utf-8 -*- """ @Author : g1879 @Contact : g1879@qq.com @Website : https://DrissionPage.cn @Copyright: (c) 2020 by g1879, Inc. All Rights Reserved. """ from pathlib import Path from typing import Union, Any, Literal, Optional, Tuple class ChromiumOptions(object): ini_path: Optional[str] = ... _user_data_path: Optional[str] = ... _download_path: str = ... _tmp_path: str = ... _arguments: list = ... _browser_path: str = ... _user: str = ... _load_mode: str = ... _timeouts: dict = ... _proxy: str = ... _address: str = ... _extensions: list = ... _prefs: dict = ... _flags: dict = ... _prefs_to_del: list = ... _new_env: bool = ... clear_file_flags: bool = ... _auto_port: Union[Tuple[int, int], False] = ... _system_user_path: bool = ... _existing_only: bool = ... _retry_times: int = ... _retry_interval: float = ... _is_headless: bool = ... _ua_set: bool = ... ws_address: Optional[str] = ... def __init__(self, read_file: [bool, None] = True, ini_path: Union[str, Path] = None): """ :param read_file: 是否从默认ini文件中读取配置信息 :param ini_path: ini文件路径,为None则读取默认ini文件 """ ... @property def download_path(self) -> str: """默认下载路径文件路径""" ... @property def browser_path(self) -> str: """浏览器启动文件路径""" ... @property def user_data_path(self) -> str: """返回用户数据文件夹路径""" ... @property def tmp_path(self) -> Optional[str]: """返回临时文件夹路径""" ... @property def user(self) -> str: """返回用户配置文件夹名称""" ... @property def load_mode(self) -> str: """返回页面加载策略,'normal', 'eager', 'none'""" ... @property def timeouts(self) -> dict: """返回timeouts设置""" ... @property def proxy(self) -> str: """返回代理设置""" ... @property def address(self) -> str: """返回浏览器地址,ip:port""" ... @property def arguments(self) -> list: """返回浏览器命令行设置列表""" ... @property def extensions(self) -> list: """以list形式返回要加载的插件路径""" ... @property def preferences(self) -> dict: """返回用户首选项配置""" ... @property def flags(self) -> dict: """返回实验项配置""" ... @property def system_user_path(self) -> bool: """返回是否使用系统安装的浏览器所使用的用户数据文件夹""" ... @property def is_existing_only(self) -> bool: """返回是否只接管现有浏览器方式""" ... @property def is_auto_port(self) -> Union[bool, Tuple[int, int]]: """返回是否使用自动端口和用户文件,如指定范围则返回范围tuple""" ... @property def retry_times(self) -> int: """返回连接失败时的重试次数""" ... @property def retry_interval(self) -> float: """返回连接失败时的重试间隔(秒)""" ... @property def is_headless(self) -> bool: """返回是否无头模式""" ... def set_retry(self, times: int = None, interval: float = None) -> ChromiumOptions: """设置连接失败时的重试操作 :param times: 重试次数 :param interval: 重试间隔 :return: 当前对象 """ ... def set_argument(self, arg: str, value: Union[str, None, bool] = None) -> ChromiumOptions: """设置浏览器配置的argument属性 :param arg: 属性名 :param value: 属性值,有值的属性传入值,没有的传入None,如传入False,删除该项 :return: 当前对象 """ ... def remove_argument(self, value: str) -> ChromiumOptions: """移除一个argument项 :param value: 设置项名,有值的设置项传入设置名称即可 :return: 当前对象 """ ... def add_extension(self, path: Union[str, Path]) -> ChromiumOptions: """添加插件 :param path: 插件路径,可指向文件夹 :return: 当前对象 """ ... def remove_extensions(self) -> ChromiumOptions: """移除所有插件 :return: 当前对象 """ ... def set_pref(self, arg: str, value: Any) -> ChromiumOptions: """设置Preferences文件中的用户设置项 :param arg: 设置项名称 :param value: 设置项值 :return: 当前对象 """ ... def remove_pref(self, arg: str) -> ChromiumOptions: """删除用户首选项设置,不能删除已设置到文件中的项 :param arg: 设置项名称 :return: 当前对象 """ ... def remove_pref_from_file(self, arg: str) -> ChromiumOptions: """删除用户配置文件中已设置的项 :param arg: 设置项名称 :return: 当前对象 """ ... def set_flag(self, flag: str, value: Union[int, str, bool] = None) -> ChromiumOptions: """设置实验项 :param flag: 设置项名称 :param value: 设置项的值,为False则删除该项 :return: 当前对象 """ ... def clear_flags_in_file(self) -> ChromiumOptions: """删除浏览器配置文件中已设置的实验项""" ... def clear_flags(self) -> ChromiumOptions: """清空本对象已设置的flag参数""" ... def clear_arguments(self) -> ChromiumOptions: """清空本对象已设置的argument参数""" ... def clear_prefs(self) -> ChromiumOptions: """清空本对象已设置的pref参数""" ... def set_timeouts(self, base: float = None, page_load: float = None, script: float = None) -> ChromiumOptions: """设置超时时间,单位为秒 :param base: 默认超时时间 :param page_load: 页面加载超时时间 :param script: 脚本运行超时时间 :return: 当前对象 """ ... def set_user(self, user: str = 'Default') -> ChromiumOptions: """设置使用哪个用户配置文件夹 :param user: 用户文件夹名称 :return: 当前对象 """ ... def headless(self, on_off: bool = True) -> ChromiumOptions: """设置是否隐藏浏览器界面 :param on_off: 开或关 :return: 当前对象 """ ... def no_imgs(self, on_off: bool = True) -> ChromiumOptions: """设置是否加载图片 :param on_off: 开或关 :return: 当前对象 """ ... def no_js(self, on_off: bool = True) -> ChromiumOptions: """设置是否禁用js :param on_off: 开或关 :return: 当前对象 """ ... def mute(self, on_off: bool = True) -> ChromiumOptions: """设置是否静音 :param on_off: 开或关 :return: 当前对象 """ ... def incognito(self, on_off: bool = True) -> ChromiumOptions: """设置是否使用无痕模式启动 :param on_off: 开或关 :return: 当前对象 """ ... def new_env(self, on_off: bool = True) -> ChromiumOptions: """设置是否使用全新浏览器环境 :param on_off: 开或关 :return: 当前对象 """ ... def ignore_certificate_errors(self, on_off=True) -> ChromiumOptions: """设置是否忽略证书错误 :param on_off: 开或关 :return: 当前对象 """ ... def set_user_agent(self, user_agent: str) -> ChromiumOptions: """设置user agent :param user_agent: user agent文本 :return: 当前对象 """ ... def set_proxy(self, proxy: str) -> ChromiumOptions: """设置代理 :param proxy: 代理url和端口 :return: 当前对象 """ ... def set_load_mode(self, value: Literal['normal', 'eager', 'none']) -> ChromiumOptions: """设置load_mode,可接收 'normal', 'eager', 'none' normal:默认情况下使用, 等待所有资源下载完成 eager:DOM访问已准备就绪, 但其他资源 (如图像) 可能仍在加载中 none:完全不阻塞 :param value: 可接收 'normal', 'eager', 'none' :return: 当前对象 """ ... def set_local_port(self, port: Union[str, int]) -> ChromiumOptions: """设置本地启动端口,与set_address()和auto_port()互斥 :param port: 端口号 :return: 当前对象 """ ... def set_address(self, address: str) -> ChromiumOptions: """设置浏览器地址,格式'ip:port',与auto_port()和set_local_port()互斥 :param address: 浏览器地址 :return: 当前对象 """ ... def set_browser_path(self, path: Union[str, Path]) -> ChromiumOptions: """设置浏览器可执行文件路径 :param path: 浏览器路径 :return: 当前对象 """ ... def set_download_path(self, path: Union[str, Path]) -> ChromiumOptions: """设置下载文件保存路径 :param path: 下载路径 :return: 当前对象 """ ... def set_tmp_path(self, path: Union[str, Path]) -> ChromiumOptions: """设置临时文件文件保存路径 :param path: 下载路径 :return: 当前对象 """ ... def set_user_data_path(self, path: Union[str, Path]) -> ChromiumOptions: """设置用户文件夹路径 :param path: 用户文件夹路径 :return: 当前对象 """ ... def set_cache_path(self, path: Union[str, Path]) -> ChromiumOptions: """设置缓存路径 :param path: 缓存路径 :return: 当前对象 """ ... def use_system_user_path(self, on_off: bool = True) -> ChromiumOptions: """设置是否使用系统安装的浏览器默认用户文件夹 :param on_off: 开或关 :return: 当前对象 """ ... def auto_port(self, on_off: bool = True, scope: Tuple[int, int] = None) -> ChromiumOptions: """自动获取可用端口,与set_address()和set_local_port()互斥 :param on_off: 是否开启自动获取端口号 :param scope: 指定端口范围,不含最后的数字,为None则使用[9600-59600) :return: 当前对象 """ ... def existing_only(self, on_off: bool = True) -> ChromiumOptions: """设置只接管已有浏览器,不自动启动新的 :param on_off: 是否开启自动获取端口号 :return: 当前对象 """ ... def save(self, path: Union[str, Path] = None) -> str: """保存设置到文件 :param path: ini文件的路径, None 保存到当前读取的配置文件,传入 'default' 保存到默认ini文件 :return: 保存文件的绝对路径 """ ... def save_to_default(self) -> str: """保存当前配置到默认ini文件""" ... ================================================ FILE: DrissionPage/_configs/configs.ini ================================================ [paths] download_path = tmp_path = [chromium_options] address = 127.0.0.1:9222 browser_path = chrome arguments = ['--no-default-browser-check', '--disable-suggestions-ui', '--no-first-run', '--disable-infobars', '--disable-popup-blocking', '--hide-crash-restore-bubble', '--disable-features=PrivacySandboxSettings4'] extensions = [] prefs = {'profile.default_content_settings.popups': 0, 'profile.default_content_setting_values': {'notifications': 2}} flags = {} load_mode = normal user = Default auto_port = False system_user_path = False existing_only = False new_env = False [session_options] headers = {'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.1.2 Safari/603.3.8', 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'connection': 'keep-alive', 'accept-charset': 'GB2312,utf-8;q=0.7,*;q=0.7'} [timeouts] base = 10 page_load = 30 script = 30 [proxies] http = https = [others] retry_times = 3 retry_interval = 2 ================================================ FILE: DrissionPage/_configs/options_manage.py ================================================ # -*- coding:utf-8 -*- """ @Author : g1879 @Contact : g1879@qq.com @Website : https://DrissionPage.cn @Copyright: (c) 2020 by g1879, Inc. All Rights Reserved. """ from configparser import RawConfigParser, NoSectionError, NoOptionError from pathlib import Path from pprint import pprint from .._functions.settings import Settings as _S class OptionsManager(object): def __init__(self, path=None): if path is False: self.ini_path = None else: default_configs = Path(__file__).parent / 'configs.ini' if path is None: dp_configs = Path('dp_configs.ini') if dp_configs.exists(): self.ini_path = dp_configs else: self.ini_path = default_configs elif path == 'default': self.ini_path = default_configs elif isinstance(path, Path): self.ini_path = path else: self.ini_path = Path(path) self._conf = RawConfigParser() if path is not False and self.ini_path.exists(): self.file_exists = True self._conf.read(self.ini_path, encoding='utf-8') else: self.file_exists = False self._conf.add_section('paths') self._conf.add_section('chromium_options') self._conf.add_section('session_options') self._conf.add_section('timeouts') self._conf.add_section('proxies') self._conf.add_section('others') self.set_item('paths', 'download_path', '') self.set_item('paths', 'tmp_path', '') self.set_item('chromium_options', 'address', '127.0.0.1:9222') self.set_item('chromium_options', 'browser_path', 'chrome') self.set_item('chromium_options', 'arguments', "['--no-default-browser-check', '--disable-suggestions-ui', " "'--no-first-run', '--disable-infobars', " "'--disable-popup-blocking', '--hide-crash-restore-bubble', " "'--disable-features=PrivacySandboxSettings4']") self.set_item('chromium_options', 'extensions', '[]') self.set_item('chromium_options', 'prefs', "{'profile.default_content_settings.popups': 0, " "'profile.default_content_setting_values': " "{'notifications': 2}}") self.set_item('chromium_options', 'flags', '{}') self.set_item('chromium_options', 'load_mode', 'normal') self.set_item('chromium_options', 'user', 'Default') self.set_item('chromium_options', 'auto_port', 'False') self.set_item('chromium_options', 'system_user_path', 'False') self.set_item('chromium_options', 'existing_only', 'False') self.set_item('chromium_options', 'new_env', 'False') self.set_item('session_options', 'headers', "{'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X " "10_12_6) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10." "1.2 Safari/603.3.8', 'accept': 'text/html,application/xhtml" "+xml,application/xml;q=0.9,*/*;q=0.8', 'connection': " "'keep-alive', 'accept-charset': 'GB2312,utf-8;q=0.7,*;q=0.7'}") self.set_item('timeouts', 'base', '10') self.set_item('timeouts', 'page_load', '30') self.set_item('timeouts', 'script', '30') self.set_item('proxies', 'http', '') self.set_item('proxies', 'https', '') self.set_item('others', 'retry_times', '3') self.set_item('others', 'retry_interval', '2') def __getattr__(self, item): return self.get_option(item) def get_value(self, section, item): try: return eval(self._conf.get(section, item)) except (SyntaxError, NameError): return self._conf.get(section, item) except NoSectionError and NoOptionError: return None def get_option(self, section): items = self._conf.items(section) option = dict() for j in items: try: option[j[0]] = eval(self._conf.get(section, j[0])) except Exception: option[j[0]] = self._conf.get(section, j[0]) return option def set_item(self, section, item, value): self._conf.set(section, item, str(value)) self.__setattr__(f'_{section}', None) return self def remove_item(self, section, item): self._conf.remove_option(section, item) return self def save(self, path=None): default_path = (Path(__file__).parent / 'configs.ini').absolute() if path == 'default': path = default_path elif path is None: if self.ini_path is None: raise RuntimeError(_S._lang.join(_S._lang.INI_NOT_SET)) path = self.ini_path.absolute() else: path = Path(path).absolute() path = path / 'config.ini' if path.is_dir() else path path.parent.mkdir(exist_ok=True, parents=True) path = str(path) self._conf.write(open(path, 'w', encoding='utf-8')) print(f'{_S._lang.OPTIONS_HAVE_SAVED}: {path}') if path == str(default_path): print(_S._lang.AUTO_LOAD_TIP) self.file_exists = True return path def save_to_default(self): return self.save('default') def show(self): for i in self._conf.sections(): print(f'[{i}]') pprint(self.get_option(i)) print() ================================================ FILE: DrissionPage/_configs/options_manage.pyi ================================================ # -*- coding:utf-8 -*- """ @Author : g1879 @Contact : g1879@qq.com @Website : https://DrissionPage.cn @Copyright: (c) 2020 by g1879, Inc. All Rights Reserved. """ from configparser import RawConfigParser from pathlib import Path from typing import Any, Optional, Union class OptionsManager(object): """管理配置文件内容的类""" ini_path: Optional[Path] = ... file_exists: bool = ... _conf: RawConfigParser = ... def __init__(self, path: Union[Path, str] = None): """初始化,读取配置文件,如没有设置临时文件夹,则设置并新建 :param path: ini文件的路径,为None则找项目文件夹下的,找不到则读取模块文件夹下的 """ ... def __getattr__(self, item) -> dict: """以dict形似返回获取大项信息 :param item: 项名 :return: None """ ... def get_value(self, section: str, item: str) -> Any: """获取配置的值 :param section: 段名 :param item: 项名 :return: 项值 """ ... def get_option(self, section: str) -> dict: """把section内容以字典方式返回 :param section: 段名 :return: 段内容生成的字典 """ ... def set_item(self, section: str, item: str, value: Any) -> None: """设置配置值 :param section: 段名 :param item: 项名 :param value: 项值 :return: None """ ... def remove_item(self, section: str, item: str) -> None: """删除配置值 :param section: 段名 :param item: 项名 :return: None """ ... def save(self, path: str = None) -> str: """保存配置文件 :param path: ini文件的路径,传入 'default' 保存到默认ini文件 :return: 保存路径 """ ... def save_to_default(self) -> str: """保存当前配置到默认ini文件""" ... def show(self) -> None: """打印所有设置信息""" ... ================================================ FILE: DrissionPage/_configs/session_options.py ================================================ # -*- coding:utf-8 -*- """ @Author : g1879 @Contact : g1879@qq.com @Website : https://DrissionPage.cn @Copyright: (c) 2020 by g1879, Inc. All Rights Reserved. """ from copy import copy from pathlib import Path from requests import Session from requests.structures import CaseInsensitiveDict from .options_manage import OptionsManager from .._functions.cookies import cookies_to_tuple, set_session_cookies from .._functions.settings import Settings as _S from .._functions.web import format_headers class SessionOptions(object): def __init__(self, read_file=True, ini_path=None): self.ini_path = None self._download_path = '.' self._timeout = 10 self._del_set = set() # 记录要从ini文件删除的参数 if read_file is False: ini_path = False self.ini_path = None elif ini_path: ini_path = Path(ini_path).absolute() if not ini_path.exists(): raise FileNotFoundError(_S._lang.join(_S._lang.INI_NOT_FOUND, PATH=ini_path)) self.ini_path = str(ini_path) else: self.ini_path = str(Path(__file__).parent / 'configs.ini') om = OptionsManager(ini_path) self._headers = None self._cookies = None self._auth = None self._proxies = None self._hooks = None self._params = None self._verify = None self._cert = None self._adapters = None self._stream = None self._trust_env = None self._max_redirects = None options = om.session_options if options.get('headers', None) is not None: self.set_headers(options['headers']) if options.get('cookies', None) is not None: self.set_cookies(options['cookies']) if options.get('auth', None) is not None: self._auth = options['auth'] if options.get('params', None) is not None: self._params = options['params'] if options.get('verify', None) is not None: self._verify = options['verify'] if options.get('cert', None) is not None: self._cert = options['cert'] if options.get('stream', None) is not None: self._stream = options['stream'] if options.get('trust_env', None) is not None: self._trust_env = options['trust_env'] if options.get('max_redirects', None) is not None: self._max_redirects = options['max_redirects'] self.set_proxies(om.proxies.get('http', None), om.proxies.get('https', None)) self._timeout = om.timeouts.get('base', 10) self._download_path = om.paths.get('download_path', '.') or '.' others = om.others self._retry_times = others.get('retry_times', 3) self._retry_interval = others.get('retry_interval', 2) def __repr__(self): return f'' # ===========须独立处理的项开始============ @property def download_path(self): return self._download_path def set_download_path(self, path): self._download_path = '.' if path is None else str(path) return self @property def timeout(self): return self._timeout def set_timeout(self, second): self._timeout = second return self @property def proxies(self): if self._proxies is None: self._proxies = {} return self._proxies def set_proxies(self, http=None, https=None): self._sets('proxies', {'http': http, 'https': https}) return self @property def retry_times(self): return self._retry_times @property def retry_interval(self): return self._retry_interval def set_retry(self, times=None, interval=None): if times is not None: self._retry_times = times if interval is not None: self._retry_interval = interval return self # ===========须独立处理的项结束============ @property def headers(self): if self._headers is None: self._headers = {} return self._headers def set_headers(self, headers): if headers is None: self._headers = None self._del_set.add('headers') else: headers = format_headers(headers) self._headers = {key.lower(): headers[key] for key in headers} return self def set_a_header(self, name, value): if self._headers is None: self._headers = {} self._headers[name.lower()] = value return self def remove_a_header(self, name): if self._headers is None: return self self._headers.pop(name.lower(), None) return self def clear_headers(self): self._headers = None self._del_set.add('headers') @property def cookies(self): if self._cookies is None: self._cookies = [] return self._cookies def set_cookies(self, cookies): cookies = cookies if cookies is None else list(cookies_to_tuple(cookies)) self._sets('cookies', cookies) return self @property def auth(self): return self._auth def set_auth(self, auth): self._sets('auth', auth) return self @property def hooks(self): if self._hooks is None: self._hooks = {} return self._hooks def set_hooks(self, hooks): self._hooks = hooks return self @property def params(self): if self._params is None: self._params = {} return self._params def set_params(self, params): self._sets('params', params) return self @property def verify(self): return self._verify def set_verify(self, on_off): self._sets('verify', on_off) return self @property def cert(self): return self._cert def set_cert(self, cert): self._sets('cert', cert) return self @property def adapters(self): if self._adapters is None: self._adapters = [] return self._adapters def add_adapter(self, url, adapter): self._adapters.append((url, adapter)) return self @property def stream(self): return self._stream def set_stream(self, on_off): self._sets('stream', on_off) return self @property def trust_env(self): return self._trust_env def set_trust_env(self, on_off): self._sets('trust_env', on_off) return self @property def max_redirects(self): return self._max_redirects def set_max_redirects(self, times): self._sets('max_redirects', times) return self def _sets(self, arg, val): if val is None: self.__setattr__(f'_{arg}', None) self._del_set.add(arg) else: self.__setattr__(f'_{arg}', val) if arg in self._del_set: self._del_set.remove(arg) def save(self, path=None): if path == 'default': path = (Path(__file__).parent / 'configs.ini').absolute() elif path is None: if self.ini_path: path = Path(self.ini_path).absolute() else: path = (Path(__file__).parent / 'configs.ini').absolute() else: path = Path(path).absolute() path = path / 'config.ini' if path.is_dir() else path if path.exists(): om = OptionsManager(path) else: om = OptionsManager(self.ini_path or (Path(__file__).parent / 'configs.ini')) options = session_options_to_dict(self) for i in options: if i not in ('download_path', 'timeout', 'proxies'): om.set_item('session_options', i, options[i]) om.set_item('paths', 'download_path', self.download_path or '') om.set_item('timeouts', 'base', self.timeout) om.set_item('proxies', 'http', self.proxies.get('http', '')) om.set_item('proxies', 'https', self.proxies.get('https', '')) om.set_item('others', 'retry_times', self.retry_times) om.set_item('others', 'retry_interval', self.retry_interval) for i in self._del_set: if i == 'download_path': om.set_item('paths', 'download_path', '') elif i == 'proxies': om.set_item('proxies', 'http', '') om.set_item('proxies', 'https', '') else: om.remove_item('session_options', i) path = str(path) om.save(path) return path def save_to_default(self): return self.save('default') def as_dict(self): return session_options_to_dict(self) def make_session(self): s = Session() h = CaseInsensitiveDict(self.headers) if self.headers else CaseInsensitiveDict() if self.cookies: set_session_cookies(s, self.cookies) if self.adapters: for url, adapter in self.adapters: s.mount(url, adapter) for i in ['auth', 'proxies', 'hooks', 'params', 'verify', 'cert', 'stream', 'trust_env', 'max_redirects']: attr = self.__getattribute__(i) if attr: s.__setattr__(i, attr) return s, h def from_session(self, session, headers=None): self._headers = CaseInsensitiveDict(copy(session.headers).update(headers)) if headers else session.headers self._cookies = session.cookies self._auth = session.auth self._proxies = session.proxies self._hooks = session.hooks self._params = session.params self._verify = session.verify self._cert = session.cert self._stream = session.stream self._trust_env = session.trust_env self._max_redirects = session.max_redirects if session.adapters: self._adapters = [(k, i) for k, i in session.adapters.items()] return self def session_options_to_dict(options): if options in (False, None): return SessionOptions(read_file=False).as_dict() if isinstance(options, dict): return options re_dict = dict() attrs = ['headers', 'cookies', 'proxies', 'params', 'verify', 'stream', 'trust_env', 'cert', 'max_redirects', 'timeout', 'download_path'] for attr in attrs: val = options.__getattribute__(f'_{attr}') if val is not None: re_dict[attr] = val return re_dict ================================================ FILE: DrissionPage/_configs/session_options.pyi ================================================ # -*- coding:utf-8 -*- """ @Author : g1879 @Contact : g1879@qq.com @Website : https://DrissionPage.cn @Copyright: (c) 2020 by g1879, Inc. All Rights Reserved. """ from http.cookiejar import CookieJar, Cookie from pathlib import Path from typing import Any, Union, Tuple, Optional from requests import Session from requests.adapters import HTTPAdapter from requests.auth import HTTPBasicAuth from requests.cookies import RequestsCookieJar from requests.structures import CaseInsensitiveDict class SessionOptions(object): """requests的Session对象配置类""" ini_path: Optional[str] = ... _download_path: str = ... _headers: Union[dict, CaseInsensitiveDict, None] = ... _cookies: Union[list, RequestsCookieJar, None] = ... _auth: Optional[tuple] = ... _proxies: Optional[dict] = ... _hooks: Optional[dict] = ... _params: Union[dict, None] = ... _verify: Optional[bool] = ... _cert: Union[str, tuple, None] = ... _adapters: Optional[list] = ... _stream: Optional[bool] = ... _trust_env: Optional[bool] = ... _max_redirects: Optional[int] = ... _timeout: float = ... _del_set: set = ... _retry_times: int = ... _retry_interval: float = ... def __init__(self, read_file: [bool, None] = True, ini_path: Union[str, Path] = None): """ :param read_file: 是否从文件读取配置 :param ini_path: ini文件路径 """ ... @property def download_path(self) -> str: """返回默认下载路径属性信息""" ... def set_download_path(self, path: Union[str, Path]) -> SessionOptions: """设置默认下载路径 :param path: 下载路径 :return: 返回当前对象 """ ... @property def timeout(self) -> float: """返回timeout属性信息""" ... def set_timeout(self, second: float) -> SessionOptions: """设置超时信息 :param second: 秒数 :return: 返回当前对象 """ ... @property def proxies(self) -> dict: """返回proxies设置信息""" ... def set_proxies(self, http: Union[str, None], https: Union[str, None] = None) -> SessionOptions: """设置proxies参数 :param http: http代理地址 :param https: https代理地址 :return: 返回当前对象 """ ... @property def retry_times(self) -> int: """返回连接失败时的重试次数""" ... @property def retry_interval(self) -> float: """返回连接失败时的重试间隔(秒)""" ... def set_retry(self, times: int = None, interval: float = None) -> SessionOptions: """设置连接失败时的重试操作 :param times: 重试次数 :param interval: 重试间隔 :return: 当前对象 """ ... @property def headers(self) -> dict: """返回headers设置信息""" ... def set_headers(self, headers: Union[dict, str, None]) -> SessionOptions: """设置headers参数 :param headers: 参数值,传入None可在ini文件标记删除 :return: 返回当前对象 """ ... def set_a_header(self, name: str, value: str) -> SessionOptions: """设置headers中一个项 :param name: 设置名称 :param value: 设置值 :return: 返回当前对象 """ ... def remove_a_header(self, name: str) -> SessionOptions: """从headers中删除一个设置 :param name: 要删除的设置 :return: 返回当前对象 """ ... def clear_headers(self) -> SessionOptions: """清空已设置的header参数""" ... @property def cookies(self) -> list: """以list形式返回cookies""" ... def set_cookies(self, cookies: Union[Cookie, CookieJar, list, tuple, str, dict, None]) -> SessionOptions: """设置一个或多个cookies信息 :param cookies: cookies,可为Cookie, CookieJar, list, tuple, str, dict,传入None可在ini文件标记删除 :return: 返回当前对象 """ ... @property def auth(self) -> Union[Tuple[str, str], HTTPBasicAuth]: """返回认证设置信息""" ... def set_auth(self, auth: Union[Tuple[str, str], HTTPBasicAuth, None]) -> SessionOptions: """设置认证元组或对象 :param auth: 认证元组或对象 :return: 返回当前对象 """ ... @property def hooks(self) -> dict: """返回回调方法""" ... def set_hooks(self, hooks: Union[dict, None]) -> SessionOptions: """设置回调方法 :param hooks: 回调方法 :return: 返回当前对象 """ ... @property def params(self) -> dict: """返回连接参数设置信息""" ... def set_params(self, params: Union[dict, None]) -> SessionOptions: """设置查询参数字典 :param params: 查询参数字典 :return: 返回当前对象 """ ... @property def verify(self) -> bool: """返回是否验证SSL证书设置""" ... def set_verify(self, on_off: Union[bool, None]) -> SessionOptions: """设置是否验证SSL证书 :param on_off: 是否验证 SSL 证书 :return: 返回当前对象 """ ... @property def cert(self) -> Union[str, tuple]: """返回SSL证书设置信息""" ... def set_cert(self, cert: Union[str, Tuple[str, str], None]) -> SessionOptions: """SSL客户端证书文件的路径(.pem格式),或('cert', 'key')元组 :param cert: 证书路径或元组 :return: 返回当前对象 """ ... @property def adapters(self) -> list: """返回适配器设置信息""" ... def add_adapter(self, url: str, adapter: HTTPAdapter) -> SessionOptions: """添加适配器 :param url: 适配器对应url :param adapter: 适配器对象 :return: 返回当前对象 """ ... @property def stream(self) -> bool: """返回是否使用流式响应内容设置信息""" ... def set_stream(self, on_off: Union[bool, None]) -> SessionOptions: """设置是否使用流式响应内容 :param on_off: 是否使用流式响应内容 :return: 返回当前对象 """ ... @property def trust_env(self) -> bool: """返回是否信任环境设置信息""" ... def set_trust_env(self, on_off: Union[bool, None]) -> SessionOptions: """设置是否信任环境 :param on_off: 是否信任环境 :return: 返回当前对象 """ ... @property def max_redirects(self) -> int: """返回最大重定向次数""" ... def set_max_redirects(self, times: Union[int, None]) -> SessionOptions: """设置最大重定向次数 :param times: 最大重定向次数 :return: 返回当前对象 """ ... def _sets(self, arg: str, val: Any) -> None: """给属性赋值或标记删除 :param arg: 属性名称 :param val: 参数值 :return: None """ ... def save(self, path: str = None) -> str: """保存设置到文件 :param path: ini文件的路径,传入 'default' 保存到默认ini文件 :return: 保存文件的绝对路径 """ ... def save_to_default(self) -> str: """保存当前配置到默认ini文件""" ... def as_dict(self) -> dict: """以字典形式返回本对象""" ... def make_session(self) -> Tuple[Session, Optional[CaseInsensitiveDict]]: """根据内在的配置生成Session对象,headers从对象中分离""" ... def from_session(self, session: Session, headers: CaseInsensitiveDict = None) -> SessionOptions: """从Session对象中读取配置 :param session: Session对象 :param headers: headers :return: 当前对象 """ ... def session_options_to_dict(options: Union[dict, SessionOptions, None]) -> Union[dict, None]: """把session配置对象转换为字典 :param options: session配置对象或字典 :return: 配置字典 """ ... ================================================ FILE: DrissionPage/_elements/chromium_element.py ================================================ # -*- coding:utf-8 -*- """ @Author : g1879 @Contact : g1879@qq.com @Website : https://DrissionPage.cn @Copyright: (c) 2020 by g1879, Inc. All Rights Reserved. """ from json import loads from os.path import basename from pathlib import Path from platform import system from re import search from time import perf_counter, sleep from DataRecorder.tools import get_usable_path, make_valid_name from .none_element import NoneElement from .session_element import make_session_ele from .._base.base import DrissionElement, BaseElement from .._functions.elements import ChromiumElementsList, SessionElementsList from .._functions.keys import input_text_or_keys, Keys from .._functions.locator import get_loc, locator_to_tuple from .._functions.settings import Settings as _S from .._functions.web import make_absolute_link, get_ele_txt, format_html, is_js_func, get_blob from .._units.clicker import Clicker from .._units.rect import ElementRect from .._units.scroller import ElementScroller from .._units.selector import SelectElement from .._units.setter import ChromiumElementSetter from .._units.states import ElementStates, ShadowRootStates from .._units.waiter import ElementWaiter from ..errors import (ContextLostError, ElementLostError, JavaScriptError, CDPError, NoResourceError, AlertExistsError, NoRectError, LocatorError) __FRAME_ELEMENT__ = ('iframe', 'frame') class ChromiumElement(DrissionElement): def __init__(self, owner, node_id=None, obj_id=None, backend_id=None): super().__init__(owner) self.tab = self.owner._tab self._select = None self._scroll = None self._rect = None self._set = None self._states = None self._pseudo = None self._clicker = None self._tag = None self._wait = None self._type = 'ChromiumElement' self._doc_id = None if node_id and obj_id and backend_id: self._node_id = node_id self._obj_id = obj_id self._backend_id = backend_id elif node_id: self._node_id = node_id self._obj_id = self._get_obj_id(node_id) self._backend_id = self._get_backend_id(self._node_id) elif obj_id: self._node_id = self._get_node_id(obj_id) self._obj_id = obj_id self._backend_id = self._get_backend_id(self._node_id) elif backend_id: self._obj_id = self._get_obj_id(backend_id=backend_id) self._node_id = self._get_node_id(obj_id=self._obj_id) self._backend_id = backend_id else: raise ElementLostError def __call__(self, locator, index=1, timeout=None): return self.ele(locator, index=index, timeout=timeout) def __repr__(self): attrs = [f"{k}='{v}'" for k, v in self.attrs.items()] return f'' def __eq__(self, other): return self._backend_id == getattr(other, '_backend_id', None) @property def tag(self): if self._tag is None: self._tag = self.owner._run_cdp('DOM.describeNode', backendNodeId=self._backend_id)['node']['localName'].lower() return self._tag @property def html(self): return self.owner._run_cdp('DOM.getOuterHTML', backendNodeId=self._backend_id)['outerHTML'] @property def inner_html(self): return self._run_js('return this.innerHTML;') @property def attrs(self): try: attrs = self.owner._run_cdp('DOM.getAttributes', nodeId=self._node_id)['attributes'] return {attrs[i]: attrs[i + 1] for i in range(0, len(attrs), 2)} except ElementLostError: self._refresh_id() attrs = self.owner._run_cdp('DOM.getAttributes', nodeId=self._node_id)['attributes'] return {attrs[i]: attrs[i + 1] for i in range(0, len(attrs), 2)} except CDPError: # 文档根元素不能调用此方法 return {} @property def text(self): return get_ele_txt(make_session_ele(self.html)) @property def raw_text(self): return self.property('innerText') # -----------------d模式独有属性------------------- @property def set(self): if self._set is None: self._set = ChromiumElementSetter(self) return self._set @property def states(self): if self._states is None: self._states = ElementStates(self) return self._states @property def pseudo(self): if self._pseudo is None: self._pseudo = Pseudo(self) return self._pseudo @property def rect(self): if self._rect is None: self._rect = ElementRect(self) return self._rect @property def sr(self): end_time = perf_counter() + self.timeout while perf_counter() < end_time: info = self.owner._run_cdp('DOM.describeNode', backendNodeId=self._backend_id)['node'] if info.get('shadowRoots', None): return ShadowRoot(self, backend_id=info['shadowRoots'][0]['backendNodeId']) return None @property def shadow_root(self): return self.sr @property def scroll(self): if self._scroll is None: self._scroll = ElementScroller(self) return self._scroll @property def click(self): if self._clicker is None: self._clicker = Clicker(self) return self._clicker @property def wait(self): if self._wait is None: self._wait = ElementWaiter(self) return self._wait @property def select(self): if self._select is None: if self.tag != 'select': self._select = False else: self._select = SelectElement(self) return self._select @property def value(self): return self.property('value') def check(self, uncheck=False, by_js=False): is_checked = self.states.is_checked if by_js: js = None if is_checked and uncheck: js = 'this.checked=false' elif not is_checked and not uncheck: js = 'this.checked=true' if js: self._run_js(js) self._run_js('this.dispatchEvent(new Event("change", {bubbles: true}));') else: if (is_checked and uncheck) or (not is_checked and not uncheck): self.click() return self def parent(self, level_or_loc=1, index=1, timeout=0): return super().parent(level_or_loc, index, timeout=timeout) def child(self, locator='', index=1, timeout=None, ele_only=True): return super().child(locator, index, timeout, ele_only=ele_only) def prev(self, locator='', index=1, timeout=None, ele_only=True): return super().prev(locator, index, timeout, ele_only=ele_only) def next(self, locator='', index=1, timeout=None, ele_only=True): return super().next(locator, index, timeout, ele_only=ele_only) def before(self, locator='', index=1, timeout=None, ele_only=True): return super().before(locator, index, timeout, ele_only=ele_only) def after(self, locator='', index=1, timeout=None, ele_only=True): return super().after(locator, index, timeout, ele_only=ele_only) def children(self, locator='', timeout=None, ele_only=True): return ChromiumElementsList(self.owner, super().children(locator, timeout, ele_only=ele_only)) def prevs(self, locator='', timeout=None, ele_only=True): return ChromiumElementsList(self.owner, super().prevs(locator, timeout, ele_only=ele_only)) def nexts(self, locator='', timeout=None, ele_only=True): return ChromiumElementsList(self.owner, super().nexts(locator, timeout, ele_only=ele_only)) def befores(self, locator='', timeout=None, ele_only=True): return ChromiumElementsList(self.owner, super().befores(locator, timeout, ele_only=ele_only)) def afters(self, locator='', timeout=None, ele_only=True): return ChromiumElementsList(self.owner, super().afters(locator, timeout, ele_only=ele_only)) def over(self, timeout=None): if timeout is None: timeout = self.timeout bid = self.states.is_covered end_time = perf_counter() + timeout while not bid and perf_counter() < end_time: bid = self.states.is_covered return (ChromiumElement(owner=self.owner, backend_id=bid) if bid else NoneElement(page=self.owner, method='over()', args={'timeout': timeout})) def offset(self, locator=None, x=None, y=None, timeout=None): if locator and not (isinstance(locator, str) and not locator.startswith( ('x:', 'xpath:', 'x=', 'xpath=', 'c:', 'css:', 'c=', 'css='))): raise LocatorError(ALLOW_TYPE=_S._lang.STR_ONLY, CURR_VAL=locator) if x == y is None: x, y = self.rect.midpoint x = int(x) y = int(y) else: nx, ny = self.rect.location nx += x if x else 0 ny += y if y else 0 x = int(nx) y = int(ny) loc_data = locator_to_tuple(locator) if locator else None if timeout is None: timeout = self.timeout end_time = perf_counter() + timeout try: ele = ChromiumElement(owner=self.owner, backend_id=self.owner._run_cdp('DOM.getNodeForLocation', x=x, y=y, includeUserAgentShadowDOM=True, ignorePointerEventsNone=False)['backendNodeId']) except CDPError: ele = False if ele and (loc_data is None or _check_ele(ele, loc_data)): return ele while perf_counter() < end_time: try: ele = ChromiumElement(owner=self.owner, backend_id=self.owner._run_cdp('DOM.getNodeForLocation', x=x, y=y, includeUserAgentShadowDOM=True, ignorePointerEventsNone=False)['backendNodeId']) except CDPError: ele = False if ele and (loc_data is None or _check_ele(ele, loc_data)): return ele sleep(.01) return NoneElement(page=self.owner, method='offset()', args={'locator': locator, 'offset_x': x, 'offset_y': y, 'timeout': timeout}) def east(self, loc_or_pixel=None, index=1): return self._get_relative_eles(mode='east', locator=loc_or_pixel, index=index) def south(self, loc_or_pixel=None, index=1): return self._get_relative_eles(mode='south', locator=loc_or_pixel, index=index) def west(self, loc_or_pixel=None, index=1): return self._get_relative_eles(mode='west', locator=loc_or_pixel, index=index) def north(self, loc_or_pixel=None, index=1): return self._get_relative_eles(mode='north', locator=loc_or_pixel, index=index) def _get_relative_eles(self, mode='north', locator=None, index=1): if locator and not (isinstance(locator, str) and not locator.startswith( ('x:', 'xpath:', 'x=', 'xpath=', 'c:', 'css:', 'c=', 'css=')) or isinstance(locator, int)): raise LocatorError(ALLOW_TYPE=_S._lang.STR_ONLY, CURR_VAL=locator) rect = self.states.has_rect if not rect: raise NoRectError if mode == 'east': cdp_data = {'x': int(rect[1][0]), 'y': int(self.rect.midpoint[1]), 'includeUserAgentShadowDOM': True, 'ignorePointerEventsNone': False} variable = 'x' minus = False elif mode == 'south': cdp_data = {'x': int(self.rect.midpoint[0]), 'y': int(rect[2][1]), 'includeUserAgentShadowDOM': True, 'ignorePointerEventsNone': False} variable = 'y' minus = False elif mode == 'west': cdp_data = {'x': int(rect[0][0]), 'y': int(self.rect.midpoint[1]), 'includeUserAgentShadowDOM': True, 'ignorePointerEventsNone': False} variable = 'x' minus = True else: # north cdp_data = {'x': int(self.rect.midpoint[0]), 'y': int(rect[0][1]), 'includeUserAgentShadowDOM': True, 'ignorePointerEventsNone': False} variable = 'y' minus = True if isinstance(locator, int): if minus: cdp_data[variable] -= locator else: cdp_data[variable] += locator try: return ChromiumElement(owner=self.owner, backend_id=self.owner._run_cdp('DOM.getNodeForLocation', **cdp_data)['backendNodeId']) except CDPError: return NoneElement(page=self.owner, method=f'{mode}()', args={'locator': locator}) num = 0 value = -8 if minus else 8 size = self.owner.rect.size max_len = size[0] if mode == 'east' else size[1] loc_data = locator_to_tuple(locator) if locator else None curr_ele = None while 0 < cdp_data[variable] < max_len: cdp_data[variable] += value try: bid = self.owner._run_cdp('DOM.getNodeForLocation', **cdp_data)['backendNodeId'] if bid == curr_ele: continue else: curr_ele = bid ele = ChromiumElement(self.owner, backend_id=bid) if loc_data is None or _check_ele(ele, loc_data): num += 1 if num == index: return ele except: pass return NoneElement(page=self.owner, method=f'{mode}()', args={'locator': locator}) def attr(self, name): attrs = self.attrs if name == 'href': link = attrs.get('href') if not link or link.lower().startswith(('javascript:', 'mailto:')): return link else: return make_absolute_link(link, self.property('baseURI')) elif name == 'src': return make_absolute_link(attrs.get('src'), self.property('baseURI')) elif name == 'text': return self.text elif name == 'innerText': return self.raw_text elif name in ('html', 'outerHTML'): return self.html elif name == 'innerHTML': return self.inner_html else: return attrs.get(name, None) def remove_attr(self, name): self._run_js(f'this.removeAttribute("{name}");') return self def property(self, name): try: value = self._run_js(f'return this.{name};') return format_html(value) if isinstance(value, str) else value except: return None def run_js(self, script, *args, as_expr=False, timeout=None): return self._run_js(script, *args, as_expr=as_expr, timeout=timeout) def _run_js(self, script, *args, as_expr=False, timeout=None): return run_js(self, script, as_expr, self.owner.timeouts.script if timeout is None else timeout, args) def run_async_js(self, script, *args, as_expr=False): run_js(self, script, as_expr, 0, args) def ele(self, locator, index=1, timeout=None): return self._ele(locator, timeout, index=index, method='ele()') def eles(self, locator, timeout=None): return self._ele(locator, timeout=timeout, index=None) def s_ele(self, locator=None, index=1, timeout=None): return (make_session_ele(self, locator, index=index, method='s_ele()') if locator is None or self.ele(locator, index=index, timeout=timeout) else NoneElement(self.owner, method='s_ele()', args={'locator': locator, 'index': index})) def s_eles(self, locator=None, timeout=None): return (make_session_ele(self, locator, index=None) if self.ele(locator, timeout=timeout) else SessionElementsList()) def _find_elements(self, locator, timeout, index=1, relative=False, raise_err=None): return find_in_chromium_ele(self, locator, index, timeout, relative=relative) def style(self, style, pseudo_ele=''): if pseudo_ele: pseudo_ele = f', "{pseudo_ele}"' return self._run_js(f'return window.getComputedStyle(this{pseudo_ele}).getPropertyValue("{style}");') def src(self, timeout=None, base64_to_bytes=True): if timeout is None: timeout = self.timeout if self.tag == 'img': # 等待图片加载完成 js = ('return this.complete && typeof this.naturalWidth != "undefined" ' '&& this.naturalWidth > 0 && typeof this.naturalHeight != "undefined" ' '&& this.naturalHeight > 0') end_time = perf_counter() + timeout while not self._run_js(js) and perf_counter() < end_time: sleep(.05) src = self.attr('href') if self.tag == 'link' else self.attr('src') if not src: raise RuntimeError(_S._lang.join(_S._lang.NO_SRC_ATTR)) if src.lower().startswith('data:image'): if base64_to_bytes: from base64 import b64decode return b64decode(src.split(',', 1)[-1]) else: return src.split(',', 1)[-1] is_blob = src.startswith('blob') result = None end_time = perf_counter() + timeout if is_blob: while perf_counter() < end_time: result = get_blob(self.owner, src, base64_to_bytes) if result: break sleep(.05) else: while perf_counter() < end_time: src = self.attr('href') if self.tag == 'link' else self.property('currentSrc') or self.property('src') if not src: sleep(.01) continue node = self.owner._run_cdp('DOM.describeNode', backendNodeId=self._backend_id)['node'] frame = node.get('frameId', None) or self.owner._frame_id try: result = self.owner._run_cdp('Page.getResourceContent', frameId=frame, url=src) break except CDPError: pass sleep(.05) if not result: return None elif is_blob: return result elif result['base64Encoded'] and base64_to_bytes: from base64 import b64decode return b64decode(result['content']) else: return result['content'] def save(self, path=None, name=None, timeout=None, rename=True): data = self.src(timeout=timeout) if not data: raise NoResourceError path = path or '.' if not name and self.tag == 'img': src = self.attr('src') if src.lower().startswith('data:image'): r = search(r'data:image/(.*?);base64,', src) name = f'img.{r.group(1)}' if r else None path = Path(path) / make_valid_name(name or basename(self.property('currentSrc'))) if not path.suffix: path = path.with_suffix('.jpg') if rename: path = get_usable_path(path) path.parent.mkdir(parents=True, exist_ok=True) path = path.absolute() write_type = 'wb' if isinstance(data, bytes) else 'w' with open(path, write_type) as f: f.write(data) return str(path) def get_screenshot(self, path=None, name=None, as_bytes=None, as_base64=None, scroll_to_center=True): if self.tag == 'img': # 等待图片加载完成 js = ('return this.complete && typeof this.naturalWidth != "undefined" && this.naturalWidth > 0 ' '&& typeof this.naturalHeight != "undefined" && this.naturalHeight > 0') end_time = perf_counter() + self.timeout while not self._run_js(js) and perf_counter() < end_time: sleep(.05) if scroll_to_center: self.scroll.to_see(center=True) left, top = self.rect.location width, height = self.rect.size left_top = (left, top) right_bottom = (left + width, top + height) if not name: name = f'{self.tag}.jpg' return self.owner._get_screenshot(path, name, as_bytes=as_bytes, as_base64=as_base64, full_page=False, left_top=left_top, right_bottom=right_bottom, ele=self) def input(self, vals, clear=False, by_js=False): if self.tag == 'input' and self.attr('type') == 'file': return self._set_file_input(vals) if by_js: if clear: self.clear(True) if isinstance(vals, (list, tuple)): vals = ''.join([str(i) for i in vals]) self.set.property('value', str(vals)) self._run_js('this.dispatchEvent(new Event("change", {bubbles: true}));') return self self.wait.clickable(wait_moved=False, timeout=.5) if clear and vals not in ('\n', '\ue007', '\ue006'): self.clear(by_js=False) else: self._input_focus() if isinstance(vals, str) and vals not in ('\ue003', '\ue017', '\ue010', '\ue011', '\ue012', '\ue013', '\ue014', '\ue015',): input_text_or_keys(self.owner, vals) else: self.owner.actions.type(vals) return self def clear(self, by_js=False): if by_js or system().lower() in ('macos', 'darwin'): self._run_js("this.value='';") self._run_js('this.dispatchEvent(new Event("change", {bubbles: true}));') return self self._input_focus() self.input((Keys.CTRL_A, Keys.DEL), clear=False) return self def _input_focus(self): try: self.owner._run_cdp('DOM.focus', backendNodeId=self._backend_id) except Exception: self.click(by_js=None) def focus(self): try: self.owner._run_cdp('DOM.focus', backendNodeId=self._backend_id) except Exception: self._run_js('this.focus();') return self def hover(self, offset_x=None, offset_y=None): self.owner.actions.move_to(self, offset_x=offset_x, offset_y=offset_y, duration=.1) return self def drag(self, offset_x=0, offset_y=0, duration=.5): curr_x, curr_y = self.rect.midpoint offset_x += curr_x offset_y += curr_y self.drag_to((offset_x, offset_y), duration) return self def drag_to(self, ele_or_loc, duration=.5): if isinstance(ele_or_loc, ChromiumElement): ele_or_loc = ele_or_loc.rect.midpoint elif not isinstance(ele_or_loc, (list, tuple)): raise ValueError(_S._lang.join(_S._lang.INCORRECT_TYPE_, 'ele_or_loc', ALLOW_TYPE=_S._lang.ELE_LOC_FORMAT, CURR_VAL=ele_or_loc)) self.owner.actions.hold(self).move_to(ele_or_loc, duration=duration).release() return self def _get_obj_id(self, node_id=None, backend_id=None): if node_id: return self.owner._run_cdp('DOM.resolveNode', nodeId=node_id)['object']['objectId'] else: return self.owner._run_cdp('DOM.resolveNode', backendNodeId=backend_id)['object']['objectId'] def _get_node_id(self, obj_id=None, backend_id=None): if obj_id: return self.owner._run_cdp('DOM.requestNode', objectId=obj_id)['nodeId'] else: n = self.owner._run_cdp('DOM.describeNode', backendNodeId=backend_id)['node'] self._tag = n['localName'] return n['nodeId'] def _get_backend_id(self, node_id): n = self.owner._run_cdp('DOM.describeNode', nodeId=node_id)['node'] self._tag = n['localName'] return n['backendNodeId'] def _refresh_id(self): self._obj_id = self._get_obj_id(backend_id=self._backend_id) self._node_id = self._get_node_id(obj_id=self._obj_id) def _get_ele_path(self, xpath=True): if xpath: txt1 = 'let tag = el.nodeName.toLowerCase();' txt3 = ''' && sib.nodeName.toLowerCase()===tag''' txt4 = '''path = '/' + tag + '[' + nth + ']' + path;''' txt5 = '''return path;''' else: txt1 = ''' let i = el.getAttribute("id"); if (i){path = '>' + el.tagName.toLowerCase() + "#" + i + path; el = el.parentNode; continue;} ''' txt3 = '' txt4 = '''path = '>' + el.tagName.toLowerCase() + ":nth-child(" + nth + ")" + path;''' txt5 = '''return path.substr(1);''' js = '''function(){ function e(el) { if (!(el instanceof Element)) return; let path = ''; while (el.nodeType === Node.ELEMENT_NODE) { ''' + txt1 + ''' let sib = el, nth = 0; while (sib) { if(sib.nodeType === Node.ELEMENT_NODE''' + txt3 + '''){nth += 1;} sib = sib.previousSibling; } ''' + txt4 + ''' el = el.parentNode; } ''' + txt5 + ''' } return e(this);} ''' return self._run_js(js) def _set_file_input(self, files): if isinstance(files, str): files = files.split('\n') files = [str(Path(i).absolute()) for i in files] self.owner._run_cdp('DOM.setFileInputFiles', files=files, backendNodeId=self._backend_id) return self class ShadowRoot(BaseElement): def __init__(self, parent_ele, obj_id=None, backend_id=None): super().__init__(parent_ele.owner) self.tab = self.owner._tab self.parent_ele = parent_ele if backend_id: self._backend_id = backend_id self._obj_id = self._get_obj_id(backend_id) self._node_id = self._get_node_id(self._obj_id) elif obj_id: self._obj_id = obj_id self._node_id = self._get_node_id(obj_id) self._backend_id = self._get_backend_id(self._node_id) self._states = None self._type = 'ShadowRoot' def __call__(self, locator, index=1, timeout=None): return self.ele(locator, index=index, timeout=timeout) def __repr__(self): return f'' def __eq__(self, other): return self._backend_id == getattr(other, '_backend_id', None) @property def tag(self): return 'shadow-root' @property def html(self): return f'{self.inner_html}' @property def inner_html(self): return self._run_js('return this.innerHTML;') @property def states(self): if self._states is None: self._states = ShadowRootStates(self) return self._states def run_js(self, script, *args, as_expr=False, timeout=None): return self._run_js(script, *args, as_expr=as_expr, timeout=timeout) def _run_js(self, script, *args, as_expr=False, timeout=None): return run_js(self, script, as_expr, self.owner.timeouts.script if timeout is None else timeout, args) def run_async_js(self, script, *args, as_expr=False, timeout=None): from threading import Thread Thread(target=run_js, args=(self, script, as_expr, self.owner.timeouts.script if timeout is None else timeout, args)).start() def parent(self, level_or_loc=1, index=1, timeout=0): if isinstance(level_or_loc, int): loc = f'xpath:./ancestor-or-self::*[{level_or_loc}]' elif isinstance(level_or_loc, (tuple, str)): loc = get_loc(level_or_loc, True) if loc[0] == 'css selector': raise LocatorError(_S._lang.UNSUPPORTED_CSS_SYNTAX) loc = f'xpath:./ancestor-or-self::{loc[1].lstrip(". / ")}[{index}]' else: raise ValueError(_S._lang.join(_S._lang.INCORRECT_TYPE_, 'level_or_loc', ALLOW_TYPE=_S._lang.LOC_OR_IND, CURR_VAL=level_or_loc)) return self.parent_ele._ele(loc, timeout=timeout, relative=True, raise_err=False, method='parent()') def child(self, locator='', index=1, timeout=None): if not locator: loc = '*' else: loc = get_loc(locator, True) # 把定位符转换为xpath if loc[0] == 'css selector': raise LocatorError(_S._lang.UNSUPPORTED_CSS_SYNTAX) loc = loc[1].lstrip('./') loc = f'xpath:./{loc}' ele = self._ele(loc, index=index, relative=True, timeout=timeout) return ele if ele else NoneElement(self.owner, 'child()', {'locator': locator, 'index': index, 'timeout': timeout}) def next(self, locator='', index=1, timeout=None): loc = get_loc(locator, True) if loc[0] == 'css selector': raise LocatorError(_S._lang.UNSUPPORTED_CSS_SYNTAX) loc = loc[1].lstrip('./') xpath = f'xpath:./{loc}' ele = self.parent_ele._ele(xpath, index=index, relative=True, timeout=timeout) return ele if ele else NoneElement(self.owner, 'next()', {'locator': locator, 'index': index, 'timeout': timeout}) def before(self, locator='', index=1, timeout=None): loc = get_loc(locator, True) if loc[0] == 'css selector': raise LocatorError(_S._lang.UNSUPPORTED_CSS_SYNTAX) loc = loc[1].lstrip('./') xpath = f'xpath:./preceding::{loc}' ele = self.parent_ele._ele(xpath, index=index, relative=True, timeout=timeout) return ele if ele else NoneElement(self.owner, 'before()', {'locator': locator, 'index': index, 'timeout': timeout}) def after(self, locator='', index=1, timeout=None): nodes = self.afters(locator=locator, timeout=timeout) return nodes[index - 1] if nodes else NoneElement(self.owner, 'after()', {'locator': locator, 'index': index, 'timeout': timeout}) def children(self, locator='', timeout=None): if not locator: loc = '*' else: loc = get_loc(locator, True) # 把定位符转换为xpath if loc[0] == 'css selector': raise LocatorError(_S._lang.UNSUPPORTED_CSS_SYNTAX) loc = loc[1].lstrip('./') loc = f'xpath:./{loc}' return self._ele(loc, index=None, relative=True, timeout=timeout) def nexts(self, locator='', timeout=None): loc = get_loc(locator, True) if loc[0] == 'css selector': raise LocatorError(_S._lang.UNSUPPORTED_CSS_SYNTAX) loc = loc[1].lstrip('./') xpath = f'xpath:./{loc}' return self.parent_ele._ele(xpath, index=None, relative=True, timeout=timeout) def befores(self, locator='', timeout=None): loc = get_loc(locator, True) if loc[0] == 'css selector': raise LocatorError(_S._lang.UNSUPPORTED_CSS_SYNTAX) loc = loc[1].lstrip('./') xpath = f'xpath:./preceding::{loc}' return self.parent_ele._ele(xpath, index=None, relative=True, timeout=timeout) def afters(self, locator='', timeout=None): eles1 = self.nexts(locator) loc = get_loc(locator, True)[1].lstrip('./') xpath = f'xpath:./following::{loc}' return eles1 + self.parent_ele._ele(xpath, index=None, relative=True, timeout=timeout) def ele(self, locator, index=1, timeout=None): return self._ele(locator, timeout, index=index, method='ele()') def eles(self, locator, timeout=None): return self._ele(locator, timeout=timeout, index=None) def s_ele(self, locator=None, index=1, timeout=None): return (make_session_ele(self, locator, index=index, method='s_ele()') if locator is None or self.ele(locator, index=index, timeout=timeout) else NoneElement(self.owner, method='s_ele()', args={'locator': locator, 'index': index})) def s_eles(self, locator, timeout=None): return (make_session_ele(self, locator, index=None) if self.ele(locator, timeout=timeout) else SessionElementsList()) def _find_elements(self, locator, timeout, index=1, relative=False, raise_err=None): loc = get_loc(locator, css_mode=False) if loc[0] == 'css selector' and str(loc[1]).startswith(':root'): loc = loc[0], loc[1][5:] def do_find(): if loc[0] == 'css selector': if index == 1: nod_id = self.owner._run_cdp('DOM.querySelector', nodeId=self._node_id, selector=loc[1])['nodeId'] if nod_id: r = make_chromium_eles(self.owner, _ids=nod_id, is_obj_id=False) return None if r is False else r else: nod_ids = self.owner._run_cdp('DOM.querySelectorAll', nodeId=self._node_id, selector=loc[1])['nodeIds'] r = make_chromium_eles(self.owner, _ids=nod_ids, index=index, is_obj_id=False) return None if r is False else r else: eles = make_session_ele(self, loc, index=None) if isinstance(eles, (float, str, int)): return eles elif not eles: return None css = [] for i in eles: if hasattr(i, 'css_path'): c = i.css_path if c in ('html:nth-child(1)', 'html:nth-child(1)>body:nth-child(1)', 'html:nth-child(1)>body:nth-child(1)>shadow_root:nth-child(1)'): continue elif c.startswith('html:nth-child(1)>body:nth-child(1)>shadow_root:nth-child(1)'): c = c[61:] css.append((True, c)) else: css.append((False, i)) if index is not None: try: c = css[index - 1] if c[0] is False: return c[1] node_id = self.owner._run_cdp('DOM.querySelector', nodeId=self._node_id, selector=c[1])['nodeId'] r = make_chromium_eles(self.owner, _ids=node_id, is_obj_id=False) return None if r is False else r except IndexError: return None else: r = [] for i in css: if i[0] is False: r.append(i[1]) else: node_id = self.owner._run_cdp('DOM.querySelector', nodeId=self._node_id, selector=i[1])['nodeId'] if node_id: e = make_chromium_eles(self.owner, _ids=node_id, is_obj_id=False) if e is False: return None r.append(e) return None if not r else r end_time = perf_counter() + timeout result = do_find() while result is None and perf_counter() <= end_time: sleep(.01) result = do_find() if result or isinstance(result, (str, float, int)): return result return NoneElement(self.owner) if index is not None else ChromiumElementsList(self.owner) def _get_node_id(self, obj_id): return self.owner._run_cdp('DOM.requestNode', objectId=obj_id)['nodeId'] def _get_obj_id(self, back_id): return self.owner._run_cdp('DOM.resolveNode', backendNodeId=back_id)['object']['objectId'] def _get_backend_id(self, node_id): r = self.owner._run_cdp('DOM.describeNode', nodeId=node_id)['node'] self._tag = r['localName'].lower() return r['backendNodeId'] def find_in_chromium_ele(ele, locator, index=1, timeout=None, relative=True): # ---------------处理定位符--------------- if isinstance(locator, (str, tuple)): loc = get_loc(locator) else: raise LocatorError(ALLOW_TYPE=_S._lang.LOC_FORMAT, CURR_VAL=locator) loc_str = loc[1] if loc[0] == 'xpath' and loc[1].lstrip().startswith('/'): loc_str = f'.{loc_str}' elif loc[0] == 'css selector' and loc[1].lstrip().startswith('>'): loc_str = f'{ele.css_path}{loc[1]}' loc = loc[0], loc_str if timeout is None: timeout = ele.timeout # ---------------执行查找----------------- if loc[0] == 'xpath': return find_by_xpath(ele, loc[1], index, timeout, relative=relative) else: return find_by_css(ele, loc[1], index, timeout) def find_by_xpath(ele, xpath, index, timeout, relative=True): type_txt = '9' if index == 1 else '7' node_txt = 'this.contentDocument' if ele.tag in __FRAME_ELEMENT__ and not relative else 'this' js = make_js_for_find_ele_by_xpath(xpath, type_txt, node_txt) ele.owner.wait.doc_loaded() def do_find(): res = ele.owner._run_cdp('Runtime.callFunctionOn', functionDeclaration=js, objectId=ele._obj_id, returnByValue=False, awaitPromise=True, userGesture=True) if res['result']['type'] == 'string': return res['result']['value'] if 'exceptionDetails' in res: if 'The result is not a node set' in res['result']['description']: js1 = make_js_for_find_ele_by_xpath(xpath, '1', node_txt) res = ele.owner._run_cdp('Runtime.callFunctionOn', functionDeclaration=js1, objectId=ele._obj_id, returnByValue=False, awaitPromise=True, userGesture=True) return res['result']['value'] elif 'is not a valid XPath expression' in res['result']['description']: raise LocatorError(_S._lang.INVALID_XPATH_, xpath) else: raise LocatorError(_S._lang.FIND_ELE_ERR, INFO=res) if res['result']['subtype'] == 'null' or res['result']['description'] in ('NodeList(0)', 'Array(0)'): return None if index == 1: r = make_chromium_eles(ele.owner, _ids=res['result']['objectId'], is_obj_id=True) return None if r is False else r else: res = ele.owner._run_cdp('Runtime.getProperties', objectId=res['result']['objectId'], ownProperties=True)['result'][:-1] if index is None: r = ChromiumElementsList(owner=ele.owner) for i in res: if i['value']['type'] == 'object': r.append(make_chromium_eles(ele.owner, _ids=i['value']['objectId'], is_obj_id=True)) else: r.append(i['value']['value']) return None if False in r else r else: eles_count = len(res) if eles_count == 0 or abs(index) > eles_count: return None index1 = eles_count + index + 1 if index < 0 else index res = res[index1 - 1] if res['value']['type'] == 'object': r = make_chromium_eles(ele.owner, _ids=res['value']['objectId'], is_obj_id=True) else: r = res['value']['value'] return None if r is False else r end_time = perf_counter() + timeout result = do_find() while result is None and perf_counter() < end_time: sleep(.01) result = do_find() if result: return result return NoneElement(ele.owner) if index is not None else ChromiumElementsList(owner=ele.owner) def find_by_css(ele, selector, index, timeout): selector = selector.replace('"', r'\"') find_all = '' if index == 1 else 'All' node_txt = 'this.contentDocument' if ele.tag in ('iframe', 'frame', 'shadow-root') else 'this' js = f'function(){{return {node_txt}.querySelector{find_all}("{selector}");}}' ele.owner.wait.doc_loaded() def do_find(): res = ele.owner._run_cdp('Runtime.callFunctionOn', functionDeclaration=js, objectId=ele._obj_id, returnByValue=False, awaitPromise=True, userGesture=True) if 'exceptionDetails' in res: if 'is not a valid selector' in res['result']['description']: raise LocatorError(_S._lang.INVALID_CSS_, selector) else: raise LocatorError(_S._lang.FIND_ELE_ERR, INFO=res) if res['result']['subtype'] == 'null' or res['result']['description'] in ('NodeList(0)', 'Array(0)'): return None if index == 1: r = make_chromium_eles(ele.owner, _ids=res['result']['objectId'], is_obj_id=True) return None if r is False else r else: obj_ids = [i['value']['objectId'] for i in ele.owner._run_cdp('Runtime.getProperties', objectId=res['result']['objectId'], ownProperties=True)['result']] r = make_chromium_eles(ele.owner, _ids=obj_ids, index=index, is_obj_id=True) return None if r is False else r end_time = perf_counter() + timeout result = do_find() while result is None and perf_counter() < end_time: sleep(.01) result = do_find() if result: return result return NoneElement(ele.owner) if index is not None else ChromiumElementsList(owner=ele.owner) def make_chromium_eles(page, _ids, index=1, is_obj_id=True, ele_only=False): if is_obj_id: get_node_func = _get_node_by_obj_id else: get_node_func = _get_node_by_node_id if not isinstance(_ids, (list, tuple)): _ids = (_ids,) if index is not None: # 获取一个 if ele_only: for obj_id in _ids: tmp = get_node_func(page, obj_id, ele_only) if tmp is not None: return tmp return False else: obj_id = _ids[index - 1] return get_node_func(page, obj_id, ele_only) else: # 获取全部 nodes = ChromiumElementsList(owner=page) for obj_id in _ids: # if obj_id == 0: # continue tmp = get_node_func(page, obj_id, ele_only) if tmp is False: return False elif tmp is not None: nodes.append(tmp) return nodes def _get_node_info(page, id_type, _id): if not _id: return False arg = {id_type: _id} node = page.driver.run('DOM.describeNode', **arg) if 'error' in node: return False return node def _get_node_by_obj_id(page, obj_id, ele_only): """根据obj id返回元素对象或文本,ele_only时如果是文本返回None,出错返回False""" node = _get_node_info(page, 'objectId', obj_id) if node is False: return False if node['node']['nodeName'] in ('#text', '#comment'): return None if ele_only else node['node']['nodeValue'] else: return _make_ele(page, obj_id, node) def _get_node_by_node_id(page, node_id, ele_only): """根据node id返回元素对象或文本,ele_only时如果是文本返回None,出错返回False""" node = _get_node_info(page, 'nodeId', node_id) if node is False: return False if node['node']['nodeName'] in ('#text', '#comment'): return None if ele_only else node['node']['nodeValue'] else: obj_id = page.driver.run('DOM.resolveNode', nodeId=node_id) if 'error' in obj_id: return False obj_id = obj_id['object']['objectId'] return _make_ele(page, obj_id, node) def _make_ele(page, obj_id, node): ele = ChromiumElement(page, obj_id=obj_id, node_id=node['node']['nodeId'], backend_id=node['node']['backendNodeId']) if ele.tag in __FRAME_ELEMENT__: from .._pages.chromium_frame import ChromiumFrame ele = ChromiumFrame(page, ele, node) return ele def make_js_for_find_ele_by_xpath(xpath, type_txt, node_txt): for_txt = '' # 获取第一个元素、节点或属性 if type_txt == '9': return_txt = ''' if(e.singleNodeValue==null){return null;} else if(e.singleNodeValue.constructor.name=="Text"){return e.singleNodeValue.data;} else if(e.singleNodeValue.constructor.name=="Attr"){return e.singleNodeValue.nodeValue;} else if(e.singleNodeValue.constructor.name=="Comment"){return e.singleNodeValue.nodeValue;} else{return e.singleNodeValue;}''' # 按顺序获取所有元素、节点或属性 elif type_txt == '7': for_txt = """ let a=new Array(); for(let i = 0; i ChromiumElement: """在内部查找元素 :param locator: 元素的定位信息,可以是loc元组,或查询字符串 :param timeout: 超时时间(秒) :return: ChromiumElement对象或属性、文本 """ ... def __repr__(self) -> str: ... def __eq__(self, other: ChromiumElement) -> bool: ... @property def tag(self) -> str: """返回元素tag""" ... @property def html(self) -> str: """返回元素outerHTML文本""" ... @property def inner_html(self) -> str: """返回元素innerHTML文本""" ... @property def attrs(self) -> dict: """返回元素所有attribute属性""" ... @property def text(self) -> str: """返回元素内所有文本,文本已格式化""" ... @property def raw_text(self) -> str: """返回未格式化处理的元素内文本""" ... @property def set(self) -> ChromiumElementSetter: """返回用于设置元素属性的对象""" ... @property def states(self) -> ElementStates: """返回用于获取元素状态的对象""" ... @property def pseudo(self) -> Pseudo: """返回用于获取伪元素内容的对象""" ... @property def rect(self) -> ElementRect: """返回用于获取元素位置的对象""" ... @property def shadow_root(self) -> Union[None, ShadowRoot]: """返回当前元素的shadow_root元素对象""" ... @property def sr(self) -> Union[None, ShadowRoot]: """返回当前元素的shadow_root元素对象""" ... @property def scroll(self) -> ElementScroller: """用于滚动滚动条的对象""" ... @property def click(self) -> Clicker: """返回用于点击的对象""" ... @property def wait(self) -> ElementWaiter: """返回用于等待的对象""" ... @property def select(self) -> Union[SelectElement, False]: """返回专门处理下拉列表的Select类,非元素项目 :param equal: 是否匹配被选择的元素,False匹配不被选择的 :return: 筛选结果 """ ... def enabled(self, equal: bool = True) -> ChromiumElement: """以是否可用为条件筛选元素 :param equal: 是否匹配可用的元素,False表示匹配disabled状态的 :return: 筛选结果 """ ... def clickable(self, equal: bool = True) -> ChromiumElement: """以是否可点击为条件筛选元素 :param equal: 是否匹配可点击的元素,False表示匹配不是可点击的 :return: 筛选结果 """ ... def have_rect(self, equal: bool = True) -> ChromiumElement: """以是否有大小为条件筛选元素 :param equal: 是否匹配有大小的元素,False表示匹配没有大小的 :return: 筛选结果 """ ... def style(self, name: str, value: str, equal: bool = True) -> ChromiumElement: """以是否拥有某个style值为条件筛选元素 :param name: 属性名称 :param value: 属性值 :param equal: True表示匹配name值为value值的元素,False表示匹配name值不为value值的 :return: 筛选结果 """ ... def property(self, name: str, value: str, equal: bool = True) -> ChromiumElement: """以是否拥有某个property值为条件筛选元素 :param name: 属性名称 :param value: 属性值 :param equal: True表示匹配name值为value值的元素,False表示匹配name值不为value值的 :return: 筛选结果 """ ... def _get_attr(self, name: str, value: str, method: str, equal: bool = True) -> ChromiumElement: """返回通过某个方法可获得某个值的元素 :param name: 属性名称 :param value: 属性值 :param method: 方法名称 :return: 筛选结果 """ ... def _any_state(self, name: str, equal: bool = True) -> ChromiumElement: """ :param name: 状态名称 :param equal: 是否是指定状态,False表示否定状态 :return: 选中的列表 """ ... class ChromiumFilter(ChromiumFilterOne): def __iter__(self) -> Iterable[ChromiumElement]: ... def __next__(self) -> ChromiumElement: ... def __len__(self) -> int: ... def __getitem__(self, item: int) -> ChromiumElement: ... @property def get(self) -> Getter: """返回用于获取元素属性的对象""" ... def tag(self, name: str, equal: bool = True) -> ChromiumFilter: """筛选某种元素 :param name: 标签页名称 :param equal: True表示匹配这种元素,False表示匹配非这种元素 :return: 筛选结果 """ ... def attr(self, name: str, value: str, equal: bool = True) -> ChromiumFilter: """以是否拥有某个attribute值为条件筛选元素 :param name: 属性名称 :param value: 属性值 :param equal: True表示匹配name值为value值的元素,False表示匹配name值不为value值的 :return: 筛选结果 """ ... def text(self, text: str, fuzzy: bool = True, contain: bool = True) -> ChromiumFilter: """以是否含有指定文本为条件筛选元素 :param text: 用于匹配的文本 :param fuzzy: 是否模糊匹配 :param contain: 是否包含该字符串,False表示不包含 :return: 筛选结果 """ ... def displayed(self, equal: bool = True) -> ChromiumFilter: """以是否显示为条件筛选元素 :param equal: 是否匹配显示的元素,False匹配不显示的 :return: 筛选结果 """ ... def checked(self, equal: bool = True) -> ChromiumFilter: """以是否被选中为条件筛选元素 :param equal: 是否匹配被选中的元素,False匹配不被选中的 :return: 筛选结果 """ ... def selected(self, equal: bool = True) -> ChromiumFilter: """以是否被选择为条件筛选元素,用于元素输入路径的方法设置。' NO_SUCH_KEY_ = '没有这个按键: {}' NO_NEW_TAB = '没有等到新标签页。' NO_SUCH_TAB = '没有找到指定标签页。' NEED_DOMAIN = '需设置domain或url值。如设置url值,需以http开头。' NEED_DOMAIN2 = 'cookie必须带有"domain"或"url"字段。' NEED_ARG_ = '{}必须设置。' SAVE_PATH_MUST_BE_FOLDER = 'save_path必须为文件夹。' GET_PDF_FAILED = '保存失败,可能浏览器版本不支持。' GET_BLOB_FAILED = '无法获取该资源。' NO_SRC_ATTR = '元素没有src值或该值为空。' D_MODE_ONLY = 'url、domain、path参数只有d模式下有效。' S_MODE_ONLY = '以下参数在s模式下才会生效:' STATUS_CODE_ = '状态码: {}' TAB_OBJ_EXISTS = '该标签页已有非MixTab版本,如需多对象共用标签页请设置Settings.set_singleton_tab_obj(False)。' ONLY_ENGLISH = '转换成视频仅支持英文路径和文件名。' SELECT_ONLY = 'select方法只能在 element.' NO_SUCH_KEY_ = 'There is no button: {}' NO_NEW_TAB = 'Failed to wait for new tab.' NO_SUCH_TAB = 'The specified tab was not found.' NEED_DOMAIN = 'You need to set a domain or url value. If the url value is set, it must start with http.' NEED_DOMAIN2 = 'The cookie must have a "domain" or "url" field.' NEED_ARG_ = '{} must be set.' SAVE_PATH_MUST_BE_FOLDER = 'save_path must be a folder.' GET_PDF_FAILED = 'The save fails because the browser version may not support it.' GET_BLOB_FAILED = 'The resource cannot be retrieved.' NO_SRC_ATTR = 'The element does not have a src value or the value is empty.' D_MODE_ONLY = 'The url, domain, and path parameters are valid only in d mode.' S_MODE_ONLY = 'The following parameters take effect only in s mode:' STATUS_CODE_ = 'Status Code: {}' TAB_OBJ_EXISTS = ('There is already a non-mixtab version of this tab. If multiple objects are common, ' 'use Settings.set_singleton_tab_obj(False).') ONLY_ENGLISH = 'Only English path and file name are supported when converting to video.' SELECT_ONLY = 'The select method can only be used on