Repository: stanfordio/truthbrush Branch: main Commit: c238b780fa57 Files: 11 Total size: 54.5 KB Directory structure: gitextract_u2j8s6ci/ ├── .github/ │ └── workflows/ │ └── publish-to-pypi.yml ├── .gitignore ├── CITATION.cff ├── LICENSE ├── README.md ├── SECURITY.md ├── pyproject.toml ├── test/ │ └── test_api.py └── truthbrush/ ├── __init__.py ├── api.py └── cli.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/workflows/publish-to-pypi.yml ================================================ name: Publish to PyPI on: release: types: [published] jobs: publish: runs-on: ubuntu-latest environment: release permissions: # IMPORTANT: this permission is mandatory for trusted publishing id-token: write steps: - uses: actions/checkout@v4 with: fetch-depth: 1 - name: Set up Python 3.14 uses: actions/setup-python@v5 with: python-version: "3.14" - name: Install Poetry uses: snok/install-poetry@v1 with: virtualenvs-create: true virtualenvs-in-project: true - name: Install Dependencies run: poetry install - name: Build package run: poetry build - name: Publish package distributions to PyPI uses: pypa/gh-action-pypi-publish@release/v1 sync-citation: runs-on: ubuntu-latest permissions: contents: write steps: - uses: actions/checkout@v4 with: ref: main fetch-depth: 1 - name: Sync CITATION.cff to release tag run: | set -eu version="${GITHUB_REF_NAME#v}" date=$(date -u +%Y-%m-%d) sed -i "s/^version: .*/version: $version/" CITATION.cff sed -i "s/^date-released: .*/date-released: $date/" CITATION.cff if git diff --quiet CITATION.cff; then echo "CITATION.cff already in sync at $version / $date" exit 0 fi git config user.name "github-actions[bot]" git config user.email "41898282+github-actions[bot]@users.noreply.github.com" git add CITATION.cff git commit -m "Sync CITATION.cff to $GITHUB_REF_NAME" git push origin HEAD:main ================================================ FILE: .gitignore ================================================ out/* # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ pip-wheel-metadata/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover *.py,cover .hypothesis/ .pytest_cache/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 db.sqlite3-journal # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints # IPython profile_default/ ipython_config.py # pyenv .python-version # pipenv # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. # However, in case of collaboration, if having platform-specific dependencies or dependencies # having no cross-platform support, pipenv may install dependencies that don't work, or not # install all needed dependencies. #Pipfile.lock # PEP 582; used by e.g. github.com/David-OConnor/pyflow __pypackages__/ # Celery stuff celerybeat-schedule celerybeat.pid # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ ================================================ FILE: CITATION.cff ================================================ cff-version: 1.2.0 message: "Feel free to cite this software in your research." authors: - family-names: McCain given-names: Miles - family-names: Thiel given-names: David orcid: https://orcid.org/0000-0002-0947-5921 title: "Truthbrush" version: 0.4.1 date-released: 2026-04-25 url: https://github.com/stanfordio/truthbrush ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: README.md ================================================ ### Please see maintained fork at: https://github.com/w2rc/truthbrush # truthbrush Truthbrush is an API client for Truth Social. Currently, this tool can: * Search for users, statuses, groups, or hashtags * Pull a user's statuses * Pull the list of "People to Follow" or suggested users * Pull "trending" hashtags * Pull "trending" Truth posts * Pull ads * Pull a user's metadata * Pull the list of users who liked a post * Pull the list of comments on a post * Pull "trending" groups * Pull list of suggested groups * Pull "trending" group hashtags * Pull posts from group timeline Truthbrush is designed for academic research, open source intelligence gathering, and data archival. It pulls all data from the publicly accessible API. ## Installation From PyPi: ```sh pip install truthbrush ``` From git: * To install it, run `pip install git+https://github.com/stanfordio/truthbrush.git` From source: * Clone the repository and run `pip3 install .`. Provided your `pip` is setup correctly, this will make `truthbrush` available both as a command and as a Python package. After installation, you will need to set your Truth Social username and password as environmental variables. `export TRUTHSOCIAL_USERNAME=foo` `export TRUTHSOCIAL_PASSWORD=bar` If you encounter login issues, you can instead extract your login token from the truth:auth Local Storage store and export it in `TRUTHSOCIAL_TOKEN`. You may also set these variables in a `.env` file in the directory from which you are running Truthbrush. ### Public mode (no credentials) Some Truth Social endpoints are readable without authentication. To run Truthbrush against only those endpoints, pass `--no-auth` on the CLI or construct the client with `require_auth=False`: ```sh truthbrush --no-auth trends truthbrush --no-auth user realDonaldTrump ``` ```py from truthbrush import Api api = Api(require_auth=False) print(api.trending()) ``` Endpoints that require authentication will return an API error (typically HTTP 401) when called in public mode. Which endpoints are publicly accessible is determined by Truth Social and may change without notice. ## CLI Usage ```text Usage: truthbrush [OPTIONS] COMMAND [ARGS]... Options: --no-auth Run without authentication. Only public endpoints will succeed. --help Show this message and exit. Commands: search Search for users, statuses or hashtags. statuses Pull a user's statuses. suggestions Pull the list of suggested users. tags Pull trendy tags. trends Pull trendy Truths. ads Pull ads. user Pull a user's metadata. likes Pull the list of users who liked a post comments Pull the list of oldest comments on a post groupposts Pull posts from a groups's timeline grouptags Pull trending group tags. grouptrends Pull trending groups. groupsuggestions Pull list of suggested groups. ``` **Search for users, statuses, groups, or hashtags** ```bash truthbrush search --searchtype [accounts|statuses|hashtags|groups] QUERY ``` Restrict status results to a date window: ```bash truthbrush search --searchtype statuses --start-date 2024-11-01 --end-date 2024-11-07 QUERY ``` **Pull all statuses (posts) from a user** ```bash truthbrush statuses HANDLE ``` Restrict to a date window (UTC assumed when no timezone is given): ```bash truthbrush statuses --created-after 2024-11-01 --created-before 2024-11-07 HANDLE ``` **Pull "People to Follow" (suggested) users** ```bash truthbrush suggestions ``` **Pull trendy tags** ```bash truthbrush tags ``` **Pull ads** ```bash truthbrush ads ``` **Pull all of a user's metadata** ```bash truthbrush user HANDLE ``` **Pull the list of users who liked a post** ```bash truthbrush likes POST --includeall TOP_NUM ``` **Pull the list of oldest comments on a post** ```bash truthbrush comments POST --includeall --onlyfirst TOP_NUM ``` **Pull trending group tags** ```bash truthbrush grouptags ``` **Pull trending groups** ```bash truthbrush grouptrends ``` **Pull list of suggested groups** ```bash truthbrush groupsuggestions ``` **Pull posts from a group's timeline** ```bash truthbrush groupposts GROUP_ID ``` ## Contributing Contributions are encouraged! For small bug fixes and minor improvements, feel free to just open a PR. For larger changes, please open an issue first so that other contributors can discuss your plan, avoid duplicated work, and ensure it aligns with the goals of the project. Be sure to also follow the [code of conduct](CODE_OF_CONDUCT.md). Thanks! Development setup (ensure you have [Poetry](https://python-poetry.org/) installed): ```sh poetry install poetry shell truthbrush --help # will use your local copy of truthbrush ``` To run the tests: ```sh pytest # optionally run tests with verbose logging outputs: pytest --log-cli-level=DEBUG -s ``` Please format and lint your code with `ruff`, and run `ty` to check types: ```sh ruff format . ruff check . ty check truthbrush/ ``` ================================================ FILE: SECURITY.md ================================================ # Security Policy ## Reporting a Vulnerability If you believe you have found a vulnerability, please send an email to [internetobservatory@stanford.edu](mailto:internetobservatory@stanford.edu) with information on what the vulnerability is, steps to reproduce, and estimated severity. We will strive to get back to you as soon as possible. Please do not open GitHub issues for anything you suspect may be a security vulnerability. ================================================ FILE: pyproject.toml ================================================ [tool.poetry] name = "truthbrush" version = "0.4.1" description = "API client for Truth Social" authors = ["R. Miles McCain ", "David Thiel"] license = "Apache 2.0" readme = "README.md" [tool.poetry.scripts] truthbrush = "truthbrush.cli:cli" [tool.poetry.dependencies] python = "^3.14" click = "^8.3.0" loguru = "^0.7.3" python-dotenv = "^1.2.0" python-dateutil = "^2.9.0" curl_cffi = "^0.15.0" [tool.poetry.group.dev.dependencies] pytest = "^9.0.3" ruff = "^0.15.0" ty = ">=0.0.31,<1.0" [build-system] requires = ["poetry-core>=1.0.0"] build-backend = "poetry.core.masonry.api" [tool.ruff] target-version = "py314" line-length = 100 [tool.ruff.lint] select = ["E", "F", "I", "UP", "B"] ignore = ["E501"] [tool.ty.rules] unresolved-import = "warn" ================================================ FILE: test/test_api.py ================================================ from datetime import UTC import pytest from dateutil import parser as date_parse from truthbrush.api import Api, LoginErrorException @pytest.fixture(scope="module") def api(): return Api() def as_datetime(date_str): """Datetime formatter function. Ensures timezone is UTC. Consider moving to Api class.""" return date_parse.parse(date_str).replace(tzinfo=UTC) def test_lookup(api): user = api.lookup(user_handle="realDonaldTrump") assert list(user.keys()) == [ "id", "username", "acct", "display_name", "locked", "bot", "discoverable", "group", "created_at", "note", "url", "avatar", "avatar_static", "header", "header_static", "followers_count", "following_count", "statuses_count", "last_status_at", "verified", "location", "website", "accepting_messages", "chats_onboarded", "feeds_onboarded", "show_nonmember_group_statuses", "pleroma", "emojis", "fields", ] assert isinstance(user["id"], str) def test_pull_statuses(api): username = "truthsocial" # COMPLETE PULLS # it fetches a timeline of the user's posts: full_timeline = list(api.pull_statuses(username=username, replies=False, verbose=True)) assert len(full_timeline) > 25 # more than one page # the posts are in reverse chronological order: latest, earliest = full_timeline[0], full_timeline[-1] latest_at, earliest_at = as_datetime(latest["created_at"]), as_datetime(earliest["created_at"]) assert earliest_at < latest_at # EMPTY PULLS # can use created_after param for filtering out posts: next_pull = list( api.pull_statuses(username=username, replies=False, created_after=latest_at, verbose=True) ) assert not any(next_pull) # can use since_id param for filtering out posts: next_pull = list( api.pull_statuses(username=username, replies=False, since_id=latest["id"], verbose=True) ) assert not any(next_pull) # PARTIAL PULLS n_posts = 50 # two and a half pages worth, to verify everything is ok recent = full_timeline[n_posts] recent_at = as_datetime(recent["created_at"]) # can use created_after param for filtering out posts: partial_pull = list( api.pull_statuses(username=username, replies=False, created_after=recent_at, verbose=True) ) assert len(partial_pull) == n_posts assert recent["id"] not in [post["id"] for post in partial_pull] # can use since_id param for filtering out posts: partial_pull = list( api.pull_statuses(username=username, replies=False, since_id=recent["id"], verbose=True) ) assert len(partial_pull) == n_posts assert recent["id"] not in [post["id"] for post in partial_pull] # POST INFO # contains status info assert list(latest.keys()) == [ "id", "created_at", "in_reply_to_id", "quote_id", "in_reply_to_account_id", "sensitive", "spoiler_text", "visibility", "language", "uri", "url", "content", "account", "media_attachments", "mentions", "tags", "card", "group", "quote", "in_reply_to", "reblog", "sponsored", "replies_count", "reblogs_count", "favourites_count", "favourited", "reblogged", "muted", "pinned", "bookmarked", "poll", "emojis", "_pulled", ] assert isinstance(latest["id"], str) def test_get_auth_id_raises_login_error_exception(api): with pytest.raises(LoginErrorException): api.get_auth_id("invalid_username", "invalid_password") def test_public_mode_does_not_require_credentials(monkeypatch): monkeypatch.delenv("TRUTHSOCIAL_USERNAME", raising=False) monkeypatch.delenv("TRUTHSOCIAL_PASSWORD", raising=False) monkeypatch.delenv("TRUTHSOCIAL_TOKEN", raising=False) public_api = Api(username=None, password=None, token=None, require_auth=False) assert public_api.auth_id is None # user_likes calls __check_login then short-circuits on top_num < 1 before any HTTP. assert list(public_api.user_likes("abc", top_num=0)) == [] def test_strict_mode_still_raises_without_credentials(monkeypatch): monkeypatch.delenv("TRUTHSOCIAL_USERNAME", raising=False) monkeypatch.delenv("TRUTHSOCIAL_PASSWORD", raising=False) monkeypatch.delenv("TRUTHSOCIAL_TOKEN", raising=False) strict_api = Api(username=None, password=None, token=None) with pytest.raises(LoginErrorException): strict_api.lookup(user_handle="realDonaldTrump") ================================================ FILE: truthbrush/__init__.py ================================================ from truthbrush.api import Api, CFBlockException, GeoblockException, LoginErrorException __all__ = ["Api", "CFBlockException", "GeoblockException", "LoginErrorException"] ================================================ FILE: truthbrush/api.py ================================================ import json import logging import os from collections.abc import Iterator from datetime import UTC, datetime from time import sleep from typing import Any, Literal, cast import curl_cffi from curl_cffi import requests from dateutil import parser as date_parse from dotenv import load_dotenv from loguru import logger load_dotenv() # take environment variables from .env. _DEBUG_ENV = os.getenv("DEBUG") or "" logging.basicConfig( level=logging.DEBUG if _DEBUG_ENV.lower() not in ("", "false") else logging.INFO ) BASE_URL = "https://truthsocial.com" API_BASE_URL = "https://truthsocial.com/api" USER_AGENT: str = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36" IMPERSONATE_TARGET: str = "chrome146" # Oauth client credentials, from https://truthsocial.com/packs/js/application-d77ef3e9148ad1d0624c.js CLIENT_ID = "9X1Fdd-pxNsAgEDNi_SfhJWi8T-vLuV2WVzKIbkTCw4" CLIENT_SECRET = "ozF8jzI4968oTKFkEnsBC-UbLPCdrSv0MkXGQu2o_-M" proxies = {"http": os.getenv("http_proxy"), "https": os.getenv("https_proxy")} TRUTHSOCIAL_USERNAME = os.getenv("TRUTHSOCIAL_USERNAME") TRUTHSOCIAL_PASSWORD = os.getenv("TRUTHSOCIAL_PASSWORD") TRUTHSOCIAL_TOKEN = os.getenv("TRUTHSOCIAL_TOKEN") class LoginErrorException(Exception): pass class GeoblockException(LoginErrorException): """Raised when Truth Social blocks access due to geographic restrictions""" pass class CFBlockException(LoginErrorException): """Raised when Cloudflare blocks the request""" pass def date_to_bound(dt_input: str | datetime, bound: Literal["start", "end"]) -> int: if isinstance(dt_input, str): dt_input = datetime.fromisoformat(dt_input) if dt_input.hour or dt_input.minute or dt_input.second or dt_input.microsecond: raise ValueError( "date string must not include a time component. Pass in datetime object for time-specific bounds." ) if dt_input.tzinfo is None: dt_input = dt_input.replace(tzinfo=UTC) if bound == "start": dt = dt_input.replace(hour=0, minute=0, second=0, microsecond=0) ms = int(dt.timestamp() * 1000) return (ms << 16) | 0x0000 else: dt = dt_input.replace(hour=23, minute=59, second=59, microsecond=999999) ms = int(dt.timestamp() * 1000) return (ms << 16) | 0xFFFF class Api: def __init__( self, username: str | None = TRUTHSOCIAL_USERNAME, password: str | None = TRUTHSOCIAL_PASSWORD, token: str | None = TRUTHSOCIAL_TOKEN, *, require_auth: bool = True, ): self.ratelimit_max = 300 self.ratelimit_remaining: int | None = None self.ratelimit_reset: datetime | None = None self.__username = username self.__password = password self.auth_id = token self.require_auth = require_auth def __check_login(self): """Runs before any login-walled function to check for login credentials and generates an auth ID token""" if self.auth_id is None: if not self.require_auth: return if self.__username is None: raise LoginErrorException("Username is missing.") if self.__password is None: raise LoginErrorException("Password is missing.") self.auth_id = self.get_auth_id(self.__username, self.__password) logger.warning(f"Using token {self.auth_id}") def _make_session(self): s = requests.Session() return s def _check_ratelimit(self, resp): if resp.headers.get("x-ratelimit-limit") is not None: self.ratelimit_max = int(resp.headers.get("x-ratelimit-limit")) if resp.headers.get("x-ratelimit-remaining") is not None: self.ratelimit_remaining = int(resp.headers.get("x-ratelimit-remaining")) if resp.headers.get("x-ratelimit-reset") is not None: self.ratelimit_reset = date_parse.parse(resp.headers.get("x-ratelimit-reset")) if ( self.ratelimit_remaining is not None and self.ratelimit_remaining <= 50 and self.ratelimit_reset is not None ): # We do 50 to be safe; their tracking is a bit stochastic... it can jump down quickly now = datetime.now(UTC) time_to_sleep = (self.ratelimit_reset.replace(tzinfo=UTC) - now).total_seconds() logger.warning(f"Approaching rate limit; sleeping for {time_to_sleep} seconds...") if time_to_sleep > 0: sleep(time_to_sleep) else: sleep(10) def _get(self, url: str, params: dict | None = None) -> Any: headers = {"User-Agent": USER_AGENT} if self.auth_id is not None: headers["Authorization"] = "Bearer " + self.auth_id try: resp = self._make_session().get( API_BASE_URL + url, params=params, proxies=proxies, impersonate=IMPERSONATE_TARGET, headers=headers, ) except curl_cffi.curl.CurlError as e: logger.error(f"Curl error: {e}") return None # Will also sleep self._check_ratelimit(resp) try: r = resp.json() except json.JSONDecodeError: body = resp.text if "Just a moment" in body or "cdn-cgi/challenge-platform" in body: raise CFBlockException( "Cloudflare challenge page received instead of JSON. " "Source IP is likely flagged; try a different network." ) from None logger.error(f"Failed to decode JSON: {body}") r = None return r def _get_paginated( self, url: str, params: dict | None = None, resume: str | None = None ) -> Any: next_link: str | None = API_BASE_URL + url headers = {"User-Agent": USER_AGENT} if self.auth_id is not None: headers["Authorization"] = "Bearer " + self.auth_id if resume is not None: next_link += f"?max_id={resume}" while next_link is not None: resp = self._make_session().get( next_link, params=params, proxies=proxies, impersonate=IMPERSONATE_TARGET, headers=headers, ) link_header = resp.headers.get("Link", "") next_link = None for link in link_header.split(","): parts = link.split(";") if len(parts) == 2 and parts[1].strip() == 'rel="next"': next_link = parts[0].strip("<>") break logger.info(f"Next: {next_link}, resp: {resp}, headers: {resp.headers}") yield resp.json() # Will also sleep self._check_ratelimit(resp) def user_likes(self, post: str, include_all: bool = False, top_num: int = 40) -> Iterator[dict]: """Return the top_num most recent (or all) users who liked the post.""" self.__check_login() top_num = int(top_num) if top_num < 1: return post = post.split("/")[-1] n_output = 0 for followers_batch in self._get_paginated( f"/v1/statuses/{post}/favourited_by", resume=None, params=dict(limit=80) ): for f in followers_batch: yield f n_output += 1 if not include_all and n_output >= top_num: return def pull_comments( self, post: str, include_all: bool = False, only_first: bool = False, top_num: int = 40, ) -> Iterator[dict]: """Return the top_num oldest (or all) replies to a post.""" self.__check_login() top_num = int(top_num) if top_num < 1: return post = post.split("/")[-1] n_output = 0 for followers_batch in self._get_paginated( f"/v1/statuses/{post}/context/descendants", resume=None, params=dict(sort="oldest"), ): # TO-DO: sort by sort=controversial, sort=newest, sort=oldest, sort=trending for f in followers_batch: if (only_first and f["in_reply_to_id"] == post) or not only_first: yield f n_output += 1 if not include_all and n_output >= top_num: return def lookup(self, user_handle: str | None = None) -> dict | None: """Lookup a user's information.""" self.__check_login() assert user_handle is not None return self._get("/v1/accounts/lookup", params=dict(acct=user_handle)) def search( self, searchtype: str | None = None, query: str | None = None, limit: int = 40, resolve: bool = True, offset: int = 0, min_id: str = "0", max_id: str | None = None, start_date: str | datetime | None = None, end_date: str | datetime | None = None, ) -> Iterator[dict]: """Search users, statuses or hashtags.""" self.__check_login() assert query is not None and searchtype is not None # error handling for date and id bounds if min_id != "0" and start_date is not None: raise ValueError("Cannot specify both min_id and start_date") if max_id is not None and end_date is not None: raise ValueError("Cannot specify both max_id and end_date") if start_date is not None: min_id = str(date_to_bound(start_date, "start")) if end_date is not None: max_id = str(date_to_bound(end_date, "end")) if max_id is not None: assert min_id < max_id, "min_id must be less than max_id" # Truth Social's /v2/search caps each page at ~20 regardless of `limit`, # and pagination state is not shared across backend nodes — so the same # offset can return data on one call and an empty page on the next. PAGE_SIZE = 20 MAX_EMPTY_RETRIES = 1 total_yielded = 0 empty_streak = 0 while total_yielded < limit: fetch_size = min(PAGE_SIZE, limit - total_yielded) params = dict( q=query, resolve=resolve, limit=fetch_size, type=searchtype, offset=offset, min_id=min_id, ) if max_id is not None: params["max_id"] = max_id resp = self._get("/v2/search", params=params) if not resp: break page_count = len(resp.get(searchtype) or []) if page_count == 0: empty_streak += 1 if empty_streak > MAX_EMPTY_RETRIES: break # Advance by PAGE_SIZE (not page_count, which is 0) so the # retry probes a different offset rather than re-polling. offset += PAGE_SIZE sleep(1) continue empty_streak = 0 yield resp total_yielded += page_count offset += page_count def hashtag( self, tag: str | None = None, limit: int = 100, ) -> Iterator[list[dict]]: """Collect posts with a specific hashtag.""" self.__check_login() assert tag is not None if tag.startswith("#"): # Remove the hashtag symbol tag = tag[1:] num_results = 0 params: dict = dict() while num_results < limit: logger.info(f"Collecting posts with hashtag: {tag}, max_id: {params.get('max_id')}") resp = self._get( f"/v1/timelines/tag/{tag}", params=params, ) if not resp: break # Filter out empty results results = [value for value in resp if value] if not results: break num_results += len(results) params["max_id"] = results[-1]["id"] yield results def trending(self, limit=10): """Return trending truths. Optional arg limit<20 specifies number to return.""" self.__check_login() return self._get(f"/v1/truth/trending/truths?limit={limit}") def group_posts(self, group_id: str, limit: int = 20) -> list[dict]: self.__check_login() timeline: list[dict] = [] posts = self._get(f"/v1/timelines/group/{group_id}?limit={limit}") while posts: timeline += posts limit = limit - len(posts) if limit <= 0: break max_id = posts[-1]["id"] posts = self._get(f"/v1/timelines/group/{group_id}?max_id={max_id}&limit={limit}") return timeline def tags(self): """Return trending tags.""" self.__check_login() return self._get("/v1/trends") def suggested(self, maximum: int = 50) -> Any: """Return a list of suggested users to follow.""" self.__check_login() return self._get(f"/v2/suggestions?limit={maximum}") def trending_groups(self, limit=10): """Return trending group truths. Optional arg limit<20 specifies number to return.""" self.__check_login() return self._get(f"/v1/truth/trends/groups?limit={limit}") def group_tags(self): """Return trending group tags.""" self.__check_login() return self._get("/v1/groups/tags") def suggested_groups(self, maximum: int = 50) -> Any: """Return a list of suggested groups to follow.""" self.__check_login() return self._get(f"/v1/truth/suggestions/groups?limit={maximum}") def ads(self, device: str = "desktop") -> Any: """Return a list of ads from Rumble's Ad Platform via Truth Social API.""" self.__check_login() return self._get(f"/v3/truth/ads?device={device}") def user_followers( self, user_handle: str | None = None, user_id: str | None = None, maximum: int = 1000, resume: str | None = None, ) -> Iterator[dict]: assert user_handle is not None or user_id is not None if user_id is None: user = self.lookup(user_handle) assert user is not None, "lookup returned no user" user_id = user["id"] n_output = 0 for followers_batch in self._get_paginated( f"/v1/accounts/{user_id}/followers", resume=resume ): for f in followers_batch: yield f n_output += 1 if maximum is not None and n_output >= maximum: return def user_following( self, user_handle: str | None = None, user_id: str | None = None, maximum: int = 1000, resume: str | None = None, ) -> Iterator[dict]: assert user_handle is not None or user_id is not None if user_id is None: user = self.lookup(user_handle) assert user is not None, "lookup returned no user" user_id = user["id"] n_output = 0 for followers_batch in self._get_paginated( f"/v1/accounts/{user_id}/following", resume=resume ): for f in followers_batch: yield f n_output += 1 if maximum is not None and n_output >= maximum: return def pull_statuses( self, username: str | None = None, replies: bool = False, verbose: bool = False, created_after: datetime | None = None, since_id: str | int | None = None, pinned: bool = False, created_before: datetime | None = None, *, user_id: str | None = None, ) -> Iterator[dict]: """Pull the given user's statuses. Pass either `username` or `user_id`. Supplying `user_id` directly skips an extra `lookup` call, which matters when `lookup` is not available (e.g. in public mode, if Truth Social gates that endpoint). Params: created_after : timezone aware datetime object (lower bound, exclusive) created_before : timezone aware datetime object (upper bound). The time component is rounded up to end-of-day UTC, so passing `2024-11-07T15:30:00Z` widens to `2024-11-07T23:59:59.999999Z`. since_id : number or string Yields posts in reverse chronological order. """ self.__check_login() if user_id is None: if username is None: raise ValueError("pull_statuses requires either `username` or `user_id`.") user = self.lookup(username) if user is None: return user_id = user["id"] params: dict = {} if created_before is not None: # Mastodon snowflake ids encode the timestamp in the high bits, so a # date upper bound translates directly to a `max_id` filter — the # server walks back from that point instead of us fetching newer # posts only to discard them in the loop below. params["max_id"] = str(date_to_bound(created_before, "end")) page_counter = 0 keep_going = True while keep_going: try: url = f"/v1/accounts/{user_id}/statuses" if pinned: url += "?pinned=true&with_muted=true" elif not replies: url += "?exclude_replies=true" if verbose: logger.debug("--------------------------") logger.debug(f"{url} {params}") result = self._get(url, params=params) page_counter += 1 except json.JSONDecodeError as e: logger.error(f"Unable to pull user #{user_id}'s statuses': {e}") break except CFBlockException: raise except Exception as e: logger.error(f"Misc. error while pulling statuses for {user_id}: {e}") break if result is None: break if isinstance(result, dict) and "error" in result: logger.error( f"API returned an error while pulling user #{user_id}'s statuses: {result}" ) break if not isinstance(result, list): logger.error(f"Result is not a list (it's a {type(result)}): {result}") break if len(result) == 0: break posts: list[dict] = sorted( cast(list[dict], result), key=lambda k: k["id"], reverse=True ) # reverse chronological order (recent first, older last) params["max_id"] = posts[-1][ "id" ] # when pulling the next page, get posts before this (the oldest) if verbose: logger.debug(f"PAGE: {page_counter}") if pinned: # assume single page keep_going = False for post in posts: post["_pulled"] = datetime.now().isoformat() # only keep posts created after the specified date # exclude posts created before the specified date # since the page is listed in reverse chronology, we don't need any remaining posts on this page either post_at = date_parse.parse(post["created_at"]).replace(tzinfo=UTC) if (created_after and post_at <= created_after) or ( since_id and int(post["id"]) <= int(since_id) ): keep_going = False # stop the loop, request no more pages break # do not yeild this post or remaining (older) posts on this page if verbose: logger.debug(f"{post['id']} {post['created_at']}") yield post def get_auth_id(self, username: str, password: str) -> str: """Logs in to Truth account and returns the session token""" url = BASE_URL + "/oauth/v2/token" try: payload = { "client_id": CLIENT_ID, "client_secret": CLIENT_SECRET, "grant_type": "password", "username": username, "password": password, "redirect_uri": "urn:ietf:wg:oauth:2.0:oob", "scope": "read", } sess_req = requests.request( "POST", url, json=payload, proxies=proxies, impersonate=IMPERSONATE_TARGET, headers={ "User-Agent": USER_AGENT, }, ) # Check for 403 errors and identify the specific type if sess_req.status_code == 403: response_text = sess_req.text.lower() # Check for geographic restriction if "unavailable in your area" in response_text: logger.error("Geographic restriction detected") raise GeoblockException("Truth Social is unavailable in your area.") # Check for Cloudflare block if "you have been blocked" in response_text: logger.error("Cloudflare block detected") raise CFBlockException("Request blocked by Cloudflare.") # Generic 403 error logger.error(f"403 Forbidden: {response_text[:200]}") raise LoginErrorException( f"Authentication forbidden (403). Response: {response_text[:200]}" ) sess_req.raise_for_status() except requests.RequestsError as e: logger.error(f"Failed login request: {e!s}") raise LoginErrorException("Cannot authenticate to .") from e if not sess_req.json()["access_token"]: raise ValueError("Invalid truthsocial.com credentials provided!") return sess_req.json()["access_token"] ================================================ FILE: truthbrush/cli.py ================================================ """Defines the CLI for Truthbrush.""" import datetime import json import click from .api import Api @click.group() @click.option( "--no-auth", is_flag=True, default=False, help="Run without authentication. Only public endpoints will succeed.", ) @click.pass_context def cli(ctx: click.Context, no_auth: bool): """This is an API client for Truth Social.""" ctx.ensure_object(dict) ctx.obj["api"] = Api(require_auth=not no_auth) @cli.command() @click.argument("group_id") @click.option("--limit", default=20, help="Limit the number of items returned", type=int) @click.pass_context def groupposts(ctx: click.Context, group_id: str, limit: int): """Pull posts from group timeline""" print(json.dumps(ctx.obj["api"].group_posts(group_id, limit))) @cli.command() @click.pass_context def trends(ctx: click.Context): """Pull trendy Truths.""" print(json.dumps(ctx.obj["api"].trending())) @cli.command() @click.pass_context def tags(ctx: click.Context): """Pull trendy tags.""" print(json.dumps(ctx.obj["api"].tags())) @cli.command() @click.pass_context def grouptags(ctx: click.Context): """Pull group tags.""" print(json.dumps(ctx.obj["api"].group_tags())) @cli.command() @click.pass_context def grouptrends(ctx: click.Context): """Pull group trends.""" print(json.dumps(ctx.obj["api"].trending_groups())) @cli.command() @click.pass_context def groupsuggest(ctx: click.Context): """Pull group suggestions.""" print(json.dumps(ctx.obj["api"].suggested_groups())) @cli.command() @click.argument("handle") @click.pass_context def user(ctx: click.Context, handle: str): """Pull a user's metadata.""" print(json.dumps(ctx.obj["api"].lookup(handle))) @cli.command() @click.argument("query") @click.option( "--searchtype", help="Type of search query (accounts, statuses, groups, or hashtags)", type=click.Choice(["accounts", "statuses", "hashtags", "groups"]), ) @click.option("--limit", default=40, help="Limit the number of items returned", type=int) @click.option("--resolve", help="Resolve", type=bool) @click.option( "--start-date", default=None, help="Start date for search results (e.g. 2026-01-01)", type=str ) @click.option( "--end-date", default=None, help="End date for search results (e.g. 2026-03-01)", type=str ) @click.pass_context def search( ctx: click.Context, searchtype: str, query: str, limit: int, resolve: bool, start_date: str, end_date: str, ): """Search for users, statuses, groups, or hashtags.""" for page in ctx.obj["api"].search( searchtype, query, limit, resolve, start_date=start_date, end_date=end_date ): print(json.dumps(page[searchtype])) @cli.command() @click.pass_context def suggestions(ctx: click.Context): """Pull the list of suggested users.""" print(json.dumps(ctx.obj["api"].suggested())) @cli.command() @click.pass_context def ads(ctx: click.Context): """Pull ads.""" print(json.dumps(ctx.obj["api"].ads())) # @cli.command() # @click.argument("handle") # @click.option("--maximum", help="the maximum number of followers to pull", type=int) # @click.option( # "--resume", # help="the `max_id` cursor to resume from, if necessary (pull this from logs to resume a failed/stalled export)", # type=str, # ) # def followers(handle: str, maximum: int = None, resume: str = None): # """Pull a user's followers.""" # for follower in api.user_followers(handle, maximum=maximum, resume=resume): # print(json.dumps(follower)) # @cli.command() # @click.argument("handle") # @click.option( # "--maximum", help="the maximum number of followed users to pull", type=int # ) # @click.option( # "--resume", # help="the `max_id` cursor to resume from, if necessary (pull this from logs to resume a failed/stalled export)", # type=str, # ) # def following(handle: str, maximum: int = None, resume: str = None): # """Pull users a given user follows.""" # for followed in api.user_following(handle, maximum=maximum, resume=resume): # print(json.dumps(followed)) @cli.command() @click.argument("username") @click.option( "--replies/--no-replies", default=False, help="Include replies when pulling posts (defaults to no replies)", ) @click.option( "--created-after", default=None, help="Only pull posts created on or after the specified datetime, e.g. 2021-10-02 or 2011-11-04T00:05:23+04:00 (defaults to none). If a timezone is not specified, UTC is assumed.", type=datetime.datetime.fromisoformat, ) @click.option( "--created-before", default=None, help="Only pull posts created on or before the specified datetime, e.g. 2021-10-02 or 2011-11-04T00:05:23+04:00 (defaults to none). If a timezone is not specified, UTC is assumed.", type=datetime.datetime.fromisoformat, ) @click.option("--pinned/--all", default=False, help="Only pull pinned posts (defaults to all)") @click.pass_context def statuses( ctx: click.Context, username: str, replies: bool = False, created_after: datetime.datetime | None = None, created_before: datetime.datetime | None = None, pinned: bool = False, ): """Pull a user's statuses""" # Assume UTC if no timezone is specified if created_after is not None and created_after.tzinfo is None: created_after = created_after.replace(tzinfo=datetime.UTC) if created_before is not None and created_before.tzinfo is None: created_before = created_before.replace(tzinfo=datetime.UTC) for page in ctx.obj["api"].pull_statuses( username, created_after=created_after, created_before=created_before, replies=replies, pinned=pinned, ): print(json.dumps(page)) @cli.command() @click.argument("post") @click.option("--includeall", is_flag=True, help="return all comments on post.") @click.argument("top_num") @click.pass_context def likes(ctx: click.Context, post: str, includeall: bool, top_num: int): """Pull the top_num most recent users who liked the post.""" for page in ctx.obj["api"].user_likes(post, includeall, top_num): print(json.dumps(page)) @cli.command() @click.argument("post") @click.option("--includeall", is_flag=True, help="return all comments on post. Overrides top_num.") @click.option("--onlyfirst", is_flag=True, help="return only direct replies to specified post") @click.argument("top_num") @click.pass_context def comments(ctx: click.Context, post: str, includeall: bool, onlyfirst: bool, top_num: int = 40): """Pull the top_num comments on a post (defaults to all users, including replies).""" for page in ctx.obj["api"].pull_comments(post, includeall, onlyfirst, top_num): print(page)