Repository: soxoj/maigret
Branch: main
Commit: 83a9dafe55cd
Files: 98
Total size: 2.1 MB
Directory structure:
gitextract_52ofrfho/
├── .dockerignore
├── .githooks/
│ └── pre-commit
├── .github/
│ ├── FUNDING.yml
│ ├── ISSUE_TEMPLATE/
│ │ ├── add-a-site.md
│ │ ├── bug.md
│ │ └── report-false-result.md
│ ├── dependabot.yml
│ └── workflows/
│ ├── build-docker-image.yml
│ ├── codeql-analysis.yml
│ ├── pyinstaller.yml
│ ├── python-package.yml
│ ├── python-publish.yml
│ └── update-site-data.yml
├── .gitignore
├── .readthedocs.yaml
├── CHANGELOG.md
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── Dockerfile
├── Installer.bat
├── LICENSE
├── MANIFEST.in
├── Makefile
├── README.md
├── docs/
│ ├── Makefile
│ ├── make.bat
│ ├── requirements.txt
│ └── source/
│ ├── command-line-options.rst
│ ├── conf.py
│ ├── development.rst
│ ├── features.rst
│ ├── index.rst
│ ├── installation.rst
│ ├── philosophy.rst
│ ├── quick-start.rst
│ ├── settings.rst
│ ├── supported-identifier-types.rst
│ ├── tags.rst
│ └── usage-examples.rst
├── maigret/
│ ├── __init__.py
│ ├── __main__.py
│ ├── __version__.py
│ ├── activation.py
│ ├── checking.py
│ ├── errors.py
│ ├── executors.py
│ ├── maigret.py
│ ├── notify.py
│ ├── permutator.py
│ ├── report.py
│ ├── resources/
│ │ ├── data.json
│ │ ├── simple_report.tpl
│ │ ├── simple_report_pdf.css
│ │ └── simple_report_pdf.tpl
│ ├── result.py
│ ├── settings.py
│ ├── sites.py
│ ├── submit.py
│ ├── types.py
│ ├── utils.py
│ └── web/
│ ├── app.py
│ └── templates/
│ ├── base.html
│ ├── index.html
│ ├── results.html
│ └── status.html
├── pyinstaller/
│ ├── maigret_standalone.py
│ ├── maigret_standalone.spec
│ └── requirements.txt
├── pyproject.toml
├── pytest.ini
├── sites.md
├── snapcraft.yaml
├── static/
│ ├── recursive_search.md
│ └── report_alexaimephotographycars.html
├── tests/
│ ├── __init__.py
│ ├── conftest.py
│ ├── db.json
│ ├── local.json
│ ├── test_activation.py
│ ├── test_checking.py
│ ├── test_cli.py
│ ├── test_data.py
│ ├── test_errors.py
│ ├── test_executors.py
│ ├── test_maigret.py
│ ├── test_notify.py
│ ├── test_permutator.py
│ ├── test_report.py
│ ├── test_sites.py
│ ├── test_submit.py
│ └── test_utils.py
├── utils/
│ ├── __init__.py
│ ├── add_tags.py
│ ├── check_engines.py
│ ├── import_sites.py
│ ├── sites_diff.py
│ └── update_site_data.py
└── wizard.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .dockerignore
================================================
.git/
.vscode/
static/
tests/
*.txt
!/requirements.txt
venv/
================================================
FILE: .githooks/pre-commit
================================================
#!/bin/sh
echo 'Activating update_sitesmd hook script...'
poetry run update_sitesmd
================================================
FILE: .github/FUNDING.yml
================================================
# These are supported funding model platforms
patreon: soxoj
github: soxoj
buy_me_a_coffee: soxoj
================================================
FILE: .github/ISSUE_TEMPLATE/add-a-site.md
================================================
---
name: Add a site
about: I want to add a new site for Maigret checks
title: New site
labels: new-site
assignees: soxoj
---
Link to the site main page: https://example.com
Link to an existing account: https://example.com/users/john
Link to a nonexistent account: https://example.com/users/noonewouldeverusethis7
Tags: photo, us, ...
================================================
FILE: .github/ISSUE_TEMPLATE/bug.md
================================================
---
name: Maigret bug report
about: I want to report a bug in Maigret functionality
title: ''
labels: bug
assignees: soxoj
---
## Checklist
- [ ] I'm reporting a bug in Maigret functionality
- [ ] I've checked for similar bug reports including closed ones
- [ ] I've checked for pull requests that attempt to fix this bug
## Description
Info about Maigret version you are running and environment (`--version`, operation system, ISP provider):
Мы не нашли страницу': CheckError( 'Resolving', 'MegaFon 404 page' ), 'Доступ к информационному ресурсу ограничен на основании Федерального закона': CheckError( 'Censorship', 'MGTS' ), 'Incapsula incident ID': CheckError('Bot protection', 'Incapsula'), 'Сайт заблокирован хостинг-провайдером': CheckError( 'Site-specific', 'Site is disabled (Beget)' ), 'Generated by cloudfront (CloudFront)': CheckError('Request blocked', 'Cloudflare'), '/cdn-cgi/challenge-platform/h/b/orchestrate/chl_page': CheckError( 'Just a moment: bot redirect challenge', 'Cloudflare' ), } ERRORS_TYPES = { 'Captcha': 'Try to switch to another IP address or to use service cookies', 'Bot protection': 'Try to switch to another IP address', 'Censorship': 'Switch to another internet service provider', 'Request timeout': 'Try to increase timeout or to switch to another internet service provider', 'Connecting failure': 'Try to decrease number of parallel connections (e.g. -n 10)', } # TODO: checking for reason ERRORS_REASONS = { 'Login required': 'Add authorization cookies through `--cookies-jar-file` (see cookies.txt)', } TEMPORARY_ERRORS_TYPES = [ 'Request timeout', 'Unknown', 'Request failed', 'Connecting failure', 'HTTP', 'Proxy', 'Interrupted', 'Connection lost', ] THRESHOLD = 3 # percent def is_important(err_data): return err_data['perc'] >= THRESHOLD def is_permanent(err_type): return err_type not in TEMPORARY_ERRORS_TYPES def detect(text): for flag, err in COMMON_ERRORS.items(): if flag in text: return err return None def solution_of(err_type) -> str: return ERRORS_TYPES.get(err_type, '') def extract_and_group(search_res: QueryResultWrapper) -> List[Dict[str, Any]]: errors_counts: Dict[str, int] = {} for r in search_res.values(): if r and isinstance(r, dict) and r.get('status'): if not isinstance(r['status'], MaigretCheckResult): continue err = r['status'].error if not err: continue errors_counts[err.type] = errors_counts.get(err.type, 0) + 1 counts = [] for err, count in sorted(errors_counts.items(), key=lambda x: x[1], reverse=True): counts.append( { 'err': err, 'count': count, 'perc': round(count / len(search_res), 2) * 100, } ) return counts def notify_about_errors( search_results: QueryResultWrapper, query_notify, show_statistics=False ) -> List[Tuple]: """ Prepare error notifications in search results, text + symbol, to be displayed by notify object. Example: [ ("Too many errors of type "timeout" (50.0%)", "!") ("Verbose error statistics:", "-") ] """ results = [] errs = extract_and_group(search_results) was_errs_displayed = False for e in errs: if not is_important(e): continue text = f'Too many errors of type "{e["err"]}" ({round(e["perc"],2)}%)' solution = solution_of(e['err']) if solution: text = '. '.join([text, solution.capitalize()]) results.append((text, '!')) was_errs_displayed = True if show_statistics: results.append(('Verbose error statistics:', '-')) for e in errs: text = f'{e["err"]}: {round(e["perc"],2)}%' results.append((text, '!')) if was_errs_displayed: results.append( ('You can see detailed site check errors with a flag `--print-errors`', '-') ) return results ================================================ FILE: maigret/executors.py ================================================ import asyncio import sys import time from typing import Any, Iterable, List, Callable import alive_progress from alive_progress import alive_bar from .types import QueryDraft def create_task_func(): if sys.version_info.minor > 6: create_asyncio_task = asyncio.create_task else: loop = asyncio.get_event_loop() create_asyncio_task = loop.create_task return create_asyncio_task class AsyncExecutor: # Deprecated: will be removed soon, don't use it def __init__(self, *args, **kwargs): self.logger = kwargs['logger'] async def run(self, tasks: Iterable[QueryDraft]): start_time = time.time() results = await self._run(tasks) self.execution_time = time.time() - start_time self.logger.debug(f'Spent time: {self.execution_time}') return results async def _run(self, tasks: Iterable[QueryDraft]): await asyncio.sleep(0) class AsyncioSimpleExecutor(AsyncExecutor): # Deprecated: will be removed soon, don't use it def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.semaphore = asyncio.Semaphore(kwargs.get('in_parallel', 100)) async def _run(self, tasks: Iterable[QueryDraft]): async def sem_task(f, args, kwargs): async with self.semaphore: return await f(*args, **kwargs) futures = [sem_task(f, args, kwargs) for f, args, kwargs in tasks] return await asyncio.gather(*futures) class AsyncioProgressbarExecutor(AsyncExecutor): # Deprecated: will be removed soon, don't use it def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) async def _run(self, tasks: Iterable[QueryDraft]): futures = [f(*args, **kwargs) for f, args, kwargs in tasks] total_tasks = len(futures) results = [] # Use alive_bar for progress tracking with alive_bar(total_tasks, title='Searching', force_tty=True) as progress: # Chunk progress updates for efficiency async def track_task(task): result = await task progress() # Update progress bar once task completes return result # Use gather to run tasks concurrently and track progress results = await asyncio.gather(*(track_task(f) for f in futures)) return results class AsyncioProgressbarSemaphoreExecutor(AsyncExecutor): # Deprecated: will be removed soon, don't use it def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.semaphore = asyncio.Semaphore(kwargs.get('in_parallel', 1)) async def _run(self, tasks: Iterable[QueryDraft]): async def _wrap_query(q: QueryDraft): async with self.semaphore: f, args, kwargs = q return await f(*args, **kwargs) async def semaphore_gather(tasks: Iterable[QueryDraft]): coros = [_wrap_query(q) for q in tasks] results = [] # Use alive_bar correctly as a context manager with alive_bar(len(coros), title='Searching', force_tty=True) as progress: for f in asyncio.as_completed(coros): results.append(await f) progress() # Update the progress bar return results return await semaphore_gather(tasks) class AsyncioProgressbarQueueExecutor(AsyncExecutor): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.workers_count = kwargs.get('in_parallel', 10) self.queue = asyncio.Queue(self.workers_count) self.timeout = kwargs.get('timeout') # Pass a progress function; alive_bar by default self.progress_func = kwargs.get('progress_func', alive_bar) self.progress = None # TODO: tests async def increment_progress(self, count): """Update progress by calling the provided progress function.""" if self.progress: if asyncio.iscoroutinefunction(self.progress): await self.progress(count) else: self.progress(count) await asyncio.sleep(0) # TODO: tests async def stop_progress(self): """Stop the progress tracking.""" if hasattr(self.progress, "close") and self.progress: close_func = self.progress.close if asyncio.iscoroutinefunction(close_func): await close_func() else: close_func() await asyncio.sleep(0) async def worker(self): """Consume tasks from the queue and process them.""" while True: try: f, args, kwargs = self.queue.get_nowait() except asyncio.QueueEmpty: return query_future = f(*args, **kwargs) query_task = create_task_func()(query_future) try: result = await asyncio.wait_for(query_task, timeout=self.timeout) except asyncio.TimeoutError: result = kwargs.get('default') self.results.append(result) if self.progress: await self.increment_progress(1) self.queue.task_done() async def _run(self, queries: Iterable[QueryDraft]): """Main runner function to execute tasks with progress tracking.""" self.results: List[Any] = [] queries_list = list(queries) min_workers = min(len(queries_list), self.workers_count) workers = [create_task_func()(self.worker()) for _ in range(min_workers)] # Initialize the progress bar if self.progress_func: with self.progress_func( len(queries_list), title="Searching", force_tty=True ) as bar: self.progress = bar # Assign alive_bar's callable to self.progress # Add tasks to the queue for t in queries_list: await self.queue.put(t) # Wait for tasks to complete await self.queue.join() # Cancel any remaining workers for w in workers: w.cancel() return self.results class AsyncioQueueGeneratorExecutor: # Deprecated: will be removed soon, don't use it def __init__(self, *args, **kwargs): self.workers_count = kwargs.get('in_parallel', 10) self.queue = asyncio.Queue() self.timeout = kwargs.get('timeout') self.logger = kwargs['logger'] self._results = asyncio.Queue() self._stop_signal = object() async def worker(self): """Process tasks from the queue and put results into the results queue.""" while True: task = await self.queue.get() if task is self._stop_signal: self.queue.task_done() break try: f, args, kwargs = task query_future = f(*args, **kwargs) query_task = create_task_func()(query_future) try: result = await asyncio.wait_for(query_task, timeout=self.timeout) except asyncio.TimeoutError: result = kwargs.get('default') await self._results.put(result) except Exception as e: self.logger.error(f"Error in worker: {e}") finally: self.queue.task_done() async def run(self, queries: Iterable[Callable[..., Any]]): """Run workers to process queries in parallel.""" start_time = time.time() # Add tasks to the queue for t in queries: await self.queue.put(t) # Create workers workers = [ asyncio.create_task(self.worker()) for _ in range(self.workers_count) ] # Add stop signals for _ in range(self.workers_count): await self.queue.put(self._stop_signal) try: while any(w.done() is False for w in workers) or not self._results.empty(): try: result = await asyncio.wait_for(self._results.get(), timeout=1) yield result except asyncio.TimeoutError: pass finally: # Ensure all workers are awaited await asyncio.gather(*workers) self.execution_time = time.time() - start_time self.logger.debug(f"Spent time: {self.execution_time}") ================================================ FILE: maigret/maigret.py ================================================ """ Maigret main module """ import ast import asyncio import logging import os import sys import platform import re from argparse import ArgumentParser, RawDescriptionHelpFormatter from typing import List, Tuple import os.path as path from socid_extractor import extract, parse from .__version__ import __version__ from .checking import ( timeout_check, SUPPORTED_IDS, self_check, BAD_CHARS, maigret, ) from . import errors from .notify import QueryNotifyPrint from .report import ( save_csv_report, save_xmind_report, save_html_report, save_pdf_report, generate_report_context, save_txt_report, SUPPORTED_JSON_REPORT_FORMATS, save_json_report, get_plaintext_report, sort_report_by_data_points, save_graph_report, ) from .sites import MaigretDatabase from .submit import Submitter from .types import QueryResultWrapper from .utils import get_dict_ascii_tree from .settings import Settings from .permutator import Permute def extract_ids_from_page(url, logger, timeout=5) -> dict: results = {} # url, headers reqs: List[Tuple[str, set]] = [(url, set())] try: # temporary workaround for URL mutations MVP from socid_extractor import mutate_url reqs += list(mutate_url(url)) except Exception as e: logger.warning(e) for req in reqs: url, headers = req print(f'Scanning webpage by URL {url}...') page, _ = parse(url, cookies_str='', headers=headers, timeout=timeout) logger.debug(page) info = extract(page) if not info: print('Nothing extracted') else: print(get_dict_ascii_tree(info.items(), new_line=False), ' ') for k, v in info.items(): # TODO: merge with the same functionality in checking module if 'username' in k and not 'usernames' in k: results[v] = 'username' elif 'usernames' in k: try: tree = ast.literal_eval(v) if type(tree) == list: for n in tree: results[n] = 'username' except Exception as e: logger.warning(e) if k in SUPPORTED_IDS: results[v] = k return results def extract_ids_from_results(results: QueryResultWrapper, db: MaigretDatabase) -> dict: ids_results = {} for website_name in results: dictionary = results[website_name] # TODO: fix no site data issue if not dictionary: continue new_usernames = dictionary.get('ids_usernames') if new_usernames: for u, utype in new_usernames.items(): ids_results[u] = utype for url in dictionary.get('ids_links', []): ids_results.update(db.extract_ids_from_url(url)) return ids_results def setup_arguments_parser(settings: Settings): from aiohttp import __version__ as aiohttp_version from requests import __version__ as requests_version from socid_extractor import __version__ as socid_version version_string = '\n'.join( [ f'%(prog)s {__version__}', f'Socid-extractor: {socid_version}', f'Aiohttp: {aiohttp_version}', f'Requests: {requests_version}', f'Python: {platform.python_version()}', ] ) parser = ArgumentParser( formatter_class=RawDescriptionHelpFormatter, description=f"Maigret v{__version__}\n" "Documentation: https://maigret.readthedocs.io/\n" "All settings are also configurable through files, see docs.", ) parser.add_argument( "username", nargs='*', metavar="USERNAMES", help="One or more usernames to search by.", ) parser.add_argument( "--version", action="version", version=version_string, help="Display version information and dependencies.", ) parser.add_argument( "--timeout", action="store", metavar='TIMEOUT', dest="timeout", type=timeout_check, default=settings.timeout, help="Time in seconds to wait for response to requests " f"(default {settings.timeout}s). " "A longer timeout will be more likely to get results from slow sites. " "On the other hand, this may cause a long delay to gather all results. ", ) parser.add_argument( "--retries", action="store", type=int, metavar='RETRIES', default=settings.retries_count, help="Attempts to restart temporarily failed requests.", ) parser.add_argument( "-n", "--max-connections", action="store", type=int, dest="connections", default=settings.max_connections, help=f"Allowed number of concurrent connections (default {settings.max_connections}).", ) parser.add_argument( "--no-recursion", action="store_true", dest="disable_recursive_search", default=(not settings.recursive_search), help="Disable recursive search by additional data extracted from pages.", ) parser.add_argument( "--no-extracting", action="store_true", dest="disable_extracting", default=(not settings.info_extracting), help="Disable parsing pages for additional data and other usernames.", ) parser.add_argument( "--id-type", dest="id_type", default='username', choices=SUPPORTED_IDS, help="Specify identifier(s) type (default: username).", ) parser.add_argument( "--permute", action="store_true", default=False, help="Permute at least 2 usernames to generate more possible usernames.", ) parser.add_argument( "--db", metavar="DB_FILE", dest="db_file", default=settings.sites_db_path, help="Load Maigret database from a JSON file or HTTP web resource.", ) parser.add_argument( "--cookies-jar-file", metavar="COOKIE_FILE", dest="cookie_file", default=settings.cookie_jar_file, help="File with cookies.", ) parser.add_argument( "--ignore-ids", action="append", metavar='IGNORED_IDS', dest="ignore_ids_list", default=settings.ignore_ids_list, help="Do not make search by the specified username or other ids.", ) # reports options parser.add_argument( "--folderoutput", "-fo", dest="folderoutput", default=settings.reports_path, metavar="PATH", help="If using multiple usernames, the output of the results will be saved to this folder.", ) parser.add_argument( "--proxy", "-p", metavar='PROXY_URL', action="store", dest="proxy", default=settings.proxy_url, help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080", ) parser.add_argument( "--tor-proxy", metavar='TOR_PROXY_URL', action="store", default=settings.tor_proxy_url, help="Specify URL of your Tor gateway. Default is socks5://127.0.0.1:9050", ) parser.add_argument( "--i2p-proxy", metavar='I2P_PROXY_URL', action="store", default=settings.i2p_proxy_url, help="Specify URL of your I2P gateway. Default is http://127.0.0.1:4444", ) parser.add_argument( "--with-domains", action="store_true", default=settings.domain_search, help="Enable (experimental) feature of checking domains on usernames.", ) filter_group = parser.add_argument_group( 'Site filtering', 'Options to set site search scope' ) filter_group.add_argument( "-a", "--all-sites", action="store_true", dest="all_sites", default=settings.scan_all_sites, help="Use all sites for scan.", ) filter_group.add_argument( "--top-sites", action="store", default=settings.top_sites_count, metavar="N", type=int, help="Count of sites for scan ranked by Alexa Top (default: 500).", ) filter_group.add_argument( "--tags", dest="tags", default='', help="Specify tags of sites (see `--stats`)." ) filter_group.add_argument( "--site", action="append", metavar='SITE_NAME', dest="site_list", default=settings.scan_sites_list, help="Limit analysis to just the specified sites (multiple option).", ) filter_group.add_argument( "--use-disabled-sites", action="store_true", default=settings.scan_disabled_sites, help="Use disabled sites to search (may cause many false positives).", ) modes_group = parser.add_argument_group( 'Operating modes', 'Various functions except the default search by a username. ' 'Modes are executed sequentially in the order of declaration.', ) modes_group.add_argument( "--parse", dest="parse_url", default='', metavar='URL', help="Parse page by URL and extract username and IDs to use for search.", ) modes_group.add_argument( "--submit", metavar='URL', type=str, dest="new_site_to_submit", default=False, help="URL of existing profile in new site to submit.", ) modes_group.add_argument( "--self-check", action="store_true", default=settings.self_check_enabled, help="Do self check for sites and database and disable non-working ones.", ) modes_group.add_argument( "--stats", action="store_true", default=False, help="Show database statistics (most frequent sites engines and tags).", ) modes_group.add_argument( "--web", metavar='PORT', type=int, nargs='?', # Optional PORT value const=5000, # Default PORT if `--web` is provided without a value default=None, # Explicitly set default to None help="Launch the web interface on the specified port (default: 5000 if no PORT is provided).", ) output_group = parser.add_argument_group( 'Output options', 'Options to change verbosity and view of the console output' ) output_group.add_argument( "--print-not-found", action="store_true", dest="print_not_found", default=settings.print_not_found, help="Print sites where the username was not found.", ) output_group.add_argument( "--print-errors", action="store_true", dest="print_check_errors", default=settings.print_check_errors, help="Print errors messages: connection, captcha, site country ban, etc.", ) output_group.add_argument( "--verbose", "-v", action="store_true", dest="verbose", default=False, help="Display extra information and metrics.", ) output_group.add_argument( "--info", "-vv", action="store_true", dest="info", default=False, help="Display extra/service information and metrics.", ) output_group.add_argument( "--debug", "-vvv", "-d", action="store_true", dest="debug", default=False, help="Display extra/service/debug information and metrics, save responses in debug.log.", ) output_group.add_argument( "--no-color", action="store_true", dest="no_color", default=(not settings.colored_print), help="Don't color terminal output", ) output_group.add_argument( "--no-progressbar", action="store_true", dest="no_progressbar", default=(not settings.show_progressbar), help="Don't show progressbar.", ) report_group = parser.add_argument_group( 'Report formats', 'Supported formats of report files' ) report_group.add_argument( "-T", "--txt", action="store_true", dest="txt", default=settings.txt_report, help="Create a TXT report (one report per username).", ) report_group.add_argument( "-C", "--csv", action="store_true", dest="csv", default=settings.csv_report, help="Create a CSV report (one report per username).", ) report_group.add_argument( "-H", "--html", action="store_true", dest="html", default=settings.html_report, help="Create an HTML report file (general report on all usernames).", ) report_group.add_argument( "-X", "--xmind", action="store_true", dest="xmind", default=settings.xmind_report, help="Generate an XMind 8 mindmap report (one report per username).", ) report_group.add_argument( "-P", "--pdf", action="store_true", dest="pdf", default=settings.pdf_report, help="Generate a PDF report (general report on all usernames).", ) report_group.add_argument( "-G", "--graph", action="store_true", dest="graph", default=settings.graph_report, help="Generate a graph report (general report on all usernames).", ) report_group.add_argument( "-J", "--json", action="store", metavar='TYPE', dest="json", default=settings.json_report_type, choices=SUPPORTED_JSON_REPORT_FORMATS, help=f"Generate a JSON report of specific type: {', '.join(SUPPORTED_JSON_REPORT_FORMATS)}" " (one report per username).", ) parser.add_argument( "--reports-sorting", default=settings.report_sorting, choices=('default', 'data'), help="Method of results sorting in reports (default: in order of getting the result)", ) return parser async def main(): # Logging log_level = logging.ERROR logging.basicConfig( format='[%(filename)s:%(lineno)d] %(levelname)-3s %(asctime)s %(message)s', datefmt='%H:%M:%S', level=log_level, ) logger = logging.getLogger('maigret') logger.setLevel(log_level) # Load settings settings = Settings() settings_loaded, err = settings.load() if not settings_loaded: logger.error(err) sys.exit(3) arg_parser = setup_arguments_parser(settings) args = arg_parser.parse_args() # Re-set logging level based on args if args.debug: log_level = logging.DEBUG elif args.info: log_level = logging.INFO elif args.verbose: log_level = logging.WARNING logger.setLevel(log_level) # Usernames initial list usernames = { u: args.id_type for u in args.username if u and u not in ['-'] and u not in args.ignore_ids_list } original_usernames = "" if args.permute and len(usernames) > 1 and args.id_type == 'username': original_usernames = " ".join(usernames.keys()) usernames = Permute(usernames).gather(method='strict') parsing_enabled = not args.disable_extracting recursive_search_enabled = not args.disable_recursive_search # Make prompts if args.proxy is not None: print("Using the proxy: " + args.proxy) if args.parse_url: extracted_ids = extract_ids_from_page( args.parse_url, logger, timeout=args.timeout ) usernames.update(extracted_ids) if args.tags: args.tags = list(set(str(args.tags).split(','))) db_file = args.db_file \ if (args.db_file.startswith("http://") or args.db_file.startswith("https://")) \ else path.join(path.dirname(path.realpath(__file__)), args.db_file) if args.top_sites == 0 or args.all_sites: args.top_sites = sys.maxsize # Create notify object for query results. query_notify = QueryNotifyPrint( result=None, verbose=args.verbose, print_found_only=not args.print_not_found, skip_check_errors=not args.print_check_errors, color=not args.no_color, ) # Create object with all information about sites we are aware of. db = MaigretDatabase().load_from_path(db_file) get_top_sites_for_id = lambda x: db.ranked_sites_dict( top=args.top_sites, tags=args.tags, names=args.site_list, disabled=args.use_disabled_sites, id_type=x, ) site_data = get_top_sites_for_id(args.id_type) if args.new_site_to_submit: submitter = Submitter(db=db, logger=logger, settings=settings, args=args) is_submitted = await submitter.dialog(args.new_site_to_submit, args.cookie_file) if is_submitted: db.save_to_file(db_file) await submitter.close() # Database self-checking if args.self_check: if len(site_data) == 0: query_notify.warning( 'No sites to self-check with the current filters! Exiting...' ) return query_notify.success( f'Maigret sites database self-check started for {len(site_data)} sites...' ) is_need_update = await self_check( db, site_data, logger, proxy=args.proxy, max_connections=args.connections, tor_proxy=args.tor_proxy, i2p_proxy=args.i2p_proxy, ) if is_need_update: if input('Do you want to save changes permanently? [Yn]\n').lower() in ( 'y', '', ): db.save_to_file(db_file) print('Database was successfully updated.') else: print('Updates will be applied only for current search session.') if args.verbose or args.debug: query_notify.info( 'Scan sessions flags stats: ' + str(db.get_scan_stats(site_data)) ) # Database statistics if args.stats: print(db.get_db_stats()) report_dir = path.join(os.getcwd(), args.folderoutput) # Make reports folder is not exists os.makedirs(report_dir, exist_ok=True) # Define one report filename template report_filepath_tpl = path.join(report_dir, 'report_{username}{postfix}') # Web interface if args.web is not None: from maigret.web.app import app app.config["MAIGRET_DB_FILE"] = db_file port = ( args.web if args.web else 5000 ) # args.web is either the specified port or 5000 by default # Host configuration: secure by default, but allow override via environment host = os.getenv('FLASK_HOST', '127.0.0.1') app.run(host=host, port=port) return if usernames == {}: # magic params to exit after init query_notify.warning('No usernames to check, exiting.') sys.exit(0) if len(usernames) > 1 and args.permute and args.id_type == 'username': query_notify.warning( f"{len(usernames)} permutations from {original_usernames} to check..." + get_dict_ascii_tree(usernames, prepend="\t") ) if not site_data: query_notify.warning('No sites to check, exiting!') sys.exit(2) query_notify.warning( f'Starting a search on top {len(site_data)} sites from the Maigret database...' ) if not args.all_sites: query_notify.warning( 'You can run search by full list of sites with flag `-a`', '!' ) already_checked = set() general_results = [] while usernames: username, id_type = list(usernames.items())[0] del usernames[username] if username.lower() in already_checked: continue already_checked.add(username.lower()) if username in args.ignore_ids_list: query_notify.warning( f'Skip a search by username {username} cause it\'s marked as ignored.' ) continue # check for characters do not supported by sites generally found_unsupported_chars = set(BAD_CHARS).intersection(set(username)) if found_unsupported_chars: pretty_chars_str = ','.join( map(lambda s: f'"{s}"', found_unsupported_chars) ) query_notify.warning( f'Found unsupported URL characters: {pretty_chars_str}, skip search by username "{username}"' ) continue sites_to_check = get_top_sites_for_id(id_type) results = await maigret( username=username, site_dict=dict(sites_to_check), query_notify=query_notify, proxy=args.proxy, tor_proxy=args.tor_proxy, i2p_proxy=args.i2p_proxy, timeout=args.timeout, is_parsing_enabled=parsing_enabled, id_type=id_type, debug=args.verbose, logger=logger, cookies=args.cookie_file, forced=args.use_disabled_sites, max_connections=args.connections, no_progressbar=args.no_progressbar, retries=args.retries, check_domains=args.with_domains, ) errs = errors.notify_about_errors( results, query_notify, show_statistics=args.verbose ) for e in errs: query_notify.warning(*e) if args.reports_sorting == "data": results = sort_report_by_data_points(results) general_results.append((username, id_type, results)) # TODO: tests if recursive_search_enabled: extracted_ids = extract_ids_from_results(results, db) query_notify.warning(f'Extracted IDs: {extracted_ids}') usernames.update(extracted_ids) # reporting for a one username if args.xmind: username = username.replace('/', '_') filename = report_filepath_tpl.format(username=username, postfix='.xmind') save_xmind_report(filename, username, results) query_notify.warning(f'XMind report for {username} saved in {filename}') if args.csv: username = username.replace('/', '_') filename = report_filepath_tpl.format(username=username, postfix='.csv') save_csv_report(filename, username, results) query_notify.warning(f'CSV report for {username} saved in {filename}') if args.txt: username = username.replace('/', '_') filename = report_filepath_tpl.format(username=username, postfix='.txt') save_txt_report(filename, username, results) query_notify.warning(f'TXT report for {username} saved in {filename}') if args.json: username = username.replace('/', '_') filename = report_filepath_tpl.format( username=username, postfix=f'_{args.json}.json' ) save_json_report(filename, username, results, report_type=args.json) query_notify.warning( f'JSON {args.json} report for {username} saved in {filename}' ) # reporting for all the result if general_results: if args.html or args.pdf: query_notify.warning('Generating report info...') report_context = generate_report_context(general_results) # determine main username username = report_context['username'] if args.html: username = username.replace('/', '_') filename = report_filepath_tpl.format( username=username, postfix='_plain.html' ) save_html_report(filename, report_context) query_notify.warning(f'HTML report on all usernames saved in {filename}') if args.pdf: username = username.replace('/', '_') filename = report_filepath_tpl.format(username=username, postfix='.pdf') save_pdf_report(filename, report_context) query_notify.warning(f'PDF report on all usernames saved in {filename}') if args.graph: username = username.replace('/', '_') filename = report_filepath_tpl.format( username=username, postfix='_graph.html' ) save_graph_report(filename, general_results, db) query_notify.warning(f'Graph report on all usernames saved in {filename}') text_report = get_plaintext_report(report_context) if text_report: query_notify.info('Short text report:') print(text_report) # update database db.save_to_file(db_file) def run(): try: if sys.version_info.minor >= 10: asyncio.run(main()) else: loop = asyncio.get_event_loop() loop.run_until_complete(main()) except KeyboardInterrupt: print('Maigret is interrupted.') sys.exit(1) if __name__ == "__main__": run() ================================================ FILE: maigret/notify.py ================================================ """Sherlock Notify Module This module defines the objects for notifying the caller about the results of queries. """ import sys from colorama import Fore, Style, init from .result import MaigretCheckStatus from .utils import get_dict_ascii_tree class QueryNotify: """Query Notify Object. Base class that describes methods available to notify the results of a query. It is intended that other classes inherit from this base class and override the methods to implement specific functionality. """ def __init__(self, result=None): """Create Query Notify Object. Contains information about a specific method of notifying the results of a query. Keyword Arguments: self -- This object. result -- Object of type QueryResult() containing results for this query. Return Value: Nothing. """ self.result = result return def start(self, message=None, id_type="username"): """Notify Start. Notify method for start of query. This method will be called before any queries are performed. This method will typically be overridden by higher level classes that will inherit from it. Keyword Arguments: self -- This object. message -- Object that is used to give context to start of query. Default is None. Return Value: Nothing. """ return def update(self, result): """Notify Update. Notify method for query result. This method will typically be overridden by higher level classes that will inherit from it. Keyword Arguments: self -- This object. result -- Object of type QueryResult() containing results for this query. Return Value: Nothing. """ self.result = result return def finish(self, message=None): """Notify Finish. Notify method for finish of query. This method will be called after all queries have been performed. This method will typically be overridden by higher level classes that will inherit from it. Keyword Arguments: self -- This object. message -- Object that is used to give context to start of query. Default is None. Return Value: Nothing. """ return def __str__(self): """Convert Object To String. Keyword Arguments: self -- This object. Return Value: Nicely formatted string to get information about this object. """ result = str(self.result) return result class QueryNotifyPrint(QueryNotify): """Query Notify Print Object. Query notify class that prints results. """ def __init__( self, result=None, verbose=False, print_found_only=False, skip_check_errors=False, color=True, ): """Create Query Notify Print Object. Contains information about a specific method of notifying the results of a query. Keyword Arguments: self -- This object. result -- Object of type QueryResult() containing results for this query. verbose -- Boolean indicating whether to give verbose output. print_found_only -- Boolean indicating whether to only print found sites. color -- Boolean indicating whether to color terminal output Return Value: Nothing. """ # Colorama module's initialization. init(autoreset=True) super().__init__(result) self.verbose = verbose self.print_found_only = print_found_only self.skip_check_errors = skip_check_errors self.color = color return def make_colored_terminal_notify( self, status, text, status_color, text_color, appendix ): text = [ f"{Style.BRIGHT}{Fore.WHITE}[{status_color}{status}{Fore.WHITE}]" + f"{text_color} {text}: {Style.RESET_ALL}" + f"{appendix}" ] return "".join(text) def make_simple_terminal_notify( self, status, text, status_color, text_color, appendix ): return f"[{status}] {text}: {appendix}" def make_terminal_notify(self, *args): if self.color: return self.make_colored_terminal_notify(*args) else: return self.make_simple_terminal_notify(*args) def start(self, message, id_type): """Notify Start. Will print the title to the standard output. Keyword Arguments: self -- This object. message -- String containing username that the series of queries are about. Return Value: Nothing. """ title = f"Checking {id_type}" if self.color: print( Style.BRIGHT + Fore.GREEN + "[" + Fore.YELLOW + "*" + Fore.GREEN + f"] {title}" + Fore.WHITE + f" {message}" + Fore.GREEN + " on:" ) else: print(f"[*] {title} {message} on:") def _colored_print(self, fore_color, msg): if self.color: print(Style.BRIGHT + fore_color + msg) else: print(msg) def success(self, message, symbol="+"): msg = f"[{symbol}] {message}" self._colored_print(Fore.GREEN, msg) def warning(self, message, symbol="-"): msg = f"[{symbol}] {message}" self._colored_print(Fore.YELLOW, msg) def info(self, message, symbol="*"): msg = f"[{symbol}] {message}" self._colored_print(Fore.BLUE, msg) def update(self, result, is_similar=False): """Notify Update. Will print the query result to the standard output. Keyword Arguments: self -- This object. result -- Object of type QueryResult() containing results for this query. Return Value: Nothing. """ notify = None self.result = result ids_data_text = "" if self.result.ids_data: ids_data_text = get_dict_ascii_tree(self.result.ids_data.items(), " ") # Output to the terminal is desired. if result.status == MaigretCheckStatus.CLAIMED: color = Fore.BLUE if is_similar else Fore.GREEN status = "?" if is_similar else "+" notify = self.make_terminal_notify( status, result.site_name, color, color, result.site_url_user + ids_data_text, ) elif result.status == MaigretCheckStatus.AVAILABLE: if not self.print_found_only: notify = self.make_terminal_notify( "-", result.site_name, Fore.RED, Fore.YELLOW, "Not found!" + ids_data_text, ) elif result.status == MaigretCheckStatus.UNKNOWN: if not self.skip_check_errors: notify = self.make_terminal_notify( "?", result.site_name, Fore.RED, Fore.RED, str(self.result.error) + ids_data_text, ) elif result.status == MaigretCheckStatus.ILLEGAL: if not self.print_found_only: text = "Illegal Username Format For This Site!" notify = self.make_terminal_notify( "-", result.site_name, Fore.RED, Fore.YELLOW, text + ids_data_text, ) else: # It should be impossible to ever get here... raise ValueError( f"Unknown Query Status '{str(result.status)}' for " f"site '{self.result.site_name}'" ) if notify: sys.stdout.write("\x1b[1K\r") print(notify) return notify def __str__(self): """Convert Object To String. Keyword Arguments: self -- This object. Return Value: Nicely formatted string to get information about this object. """ result = str(self.result) return result ================================================ FILE: maigret/permutator.py ================================================ # License MIT. by balestek https://github.com/balestek from itertools import permutations class Permute: def __init__(self, elements: dict): self.separators = ["", "_", "-", "."] self.elements = elements def gather(self, method: str = "strict" or "all") -> dict: permutations_dict = {} for i in range(1, len(self.elements) + 1): for subset in permutations(self.elements, i): if i == 1: if method == "all": permutations_dict[subset[0]] = self.elements[subset[0]] permutations_dict["_" + subset[0]] = self.elements[subset[0]] permutations_dict[subset[0] + "_"] = self.elements[subset[0]] else: for separator in self.separators: perm = separator.join(subset) permutations_dict[perm] = self.elements[subset[0]] if separator == "": permutations_dict["_" + perm] = self.elements[subset[0]] permutations_dict[perm + "_"] = self.elements[subset[0]] return permutations_dict ================================================ FILE: maigret/report.py ================================================ import ast import csv import io import json import logging import os from datetime import datetime from typing import Dict, Any import xmind from dateutil.tz import gettz from dateutil.parser import parse as parse_datetime_str from jinja2 import Template from .checking import SUPPORTED_IDS from .result import MaigretCheckStatus from .sites import MaigretDatabase from .utils import is_country_tag, CaseConverter, enrich_link_str ADDITIONAL_TZINFO = {"CDT": gettz("America/Chicago")} SUPPORTED_JSON_REPORT_FORMATS = [ "simple", "ndjson", ] """ UTILS """ def filter_supposed_data(data): # interesting fields allowed_fields = ["fullname", "gender", "location", "age"] filtered_supposed_data = { CaseConverter.snake_to_title(k): v[0] for k, v in data.items() if k in allowed_fields } return filtered_supposed_data def sort_report_by_data_points(results): return dict( sorted( results.items(), key=lambda x: len( (x[1].get('status') and x[1]['status'].ids_data or {}).keys() ), reverse=True, ) ) """ REPORTS SAVING """ def save_csv_report(filename: str, username: str, results: dict): with open(filename, "w", newline="", encoding="utf-8") as f: generate_csv_report(username, results, f) def save_txt_report(filename: str, username: str, results: dict): with open(filename, "w", encoding="utf-8") as f: generate_txt_report(username, results, f) def save_html_report(filename: str, context: dict): template, _ = generate_report_template(is_pdf=False) filled_template = template.render(**context) with open(filename, "w", encoding="utf-8") as f: f.write(filled_template) def save_pdf_report(filename: str, context: dict): template, css = generate_report_template(is_pdf=True) filled_template = template.render(**context) # moved here to speed up the launch of Maigret from xhtml2pdf import pisa with open(filename, "w+b") as f: pisa.pisaDocument(io.StringIO(filled_template), dest=f, default_css=css) def save_json_report(filename: str, username: str, results: dict, report_type: str): with open(filename, "w", encoding="utf-8") as f: generate_json_report(username, results, f, report_type=report_type) class MaigretGraph: other_params = {'size': 10, 'group': 3} site_params = {'size': 15, 'group': 2} username_params = {'size': 20, 'group': 1} def __init__(self, graph): self.G = graph def add_node(self, key, value, color=None): node_name = f'{key}: {value}' params = dict(self.other_params) if key in SUPPORTED_IDS: params = dict(self.username_params) elif value.startswith('http'): params = dict(self.site_params) params['title'] = node_name if color: params['color'] = color self.G.add_node(node_name, **params) return node_name def link(self, node1_name, node2_name): self.G.add_edge(node1_name, node2_name, weight=2) def save_graph_report(filename: str, username_results: list, db: MaigretDatabase): import networkx as nx G = nx.Graph() graph = MaigretGraph(G) base_site_nodes = {} site_account_nodes = {} processed_values = {} # Track processed values to avoid duplicates for username, id_type, results in username_results: # Add username node, using normalized version directly if different norm_username = username.lower() username_node_name = graph.add_node(id_type, norm_username) for website_name, dictionary in results.items(): if not dictionary or dictionary.get("is_similar"): continue status = dictionary.get("status") if not status or status.status != MaigretCheckStatus.CLAIMED: continue # base site node site_base_url = website_name if site_base_url not in base_site_nodes: base_site_nodes[site_base_url] = graph.add_node( 'site', site_base_url, color='#28a745' ) # Green color site_base_node_name = base_site_nodes[site_base_url] # account node account_url = dictionary.get('url_user', f'{site_base_url}/{norm_username}') account_node_id = f"{site_base_url}: {account_url}" if account_node_id not in site_account_nodes: site_account_nodes[account_node_id] = graph.add_node( 'account', account_url ) account_node_name = site_account_nodes[account_node_id] # link username → account → site graph.link(username_node_name, account_node_name) graph.link(account_node_name, site_base_node_name) def process_ids(parent_node, ids): for k, v in ids.items(): if ( k.endswith('_count') or k.startswith('is_') or k.endswith('_at') or k in 'image' ): continue # Normalize value if string norm_v = v.lower() if isinstance(v, str) else v value_key = f"{k}:{norm_v}" if value_key in processed_values: ids_data_name = processed_values[value_key] else: v_data = v if isinstance(v, str) and v.startswith('['): try: v_data = ast.literal_eval(v) except Exception as e: logging.error(e) continue if isinstance(v_data, list): list_node_name = graph.add_node(k, site_base_url) processed_values[value_key] = list_node_name for vv in v_data: data_node_name = graph.add_node(vv, site_base_url) graph.link(list_node_name, data_node_name) add_ids = { a: b for b, a in db.extract_ids_from_url(vv).items() } if add_ids: process_ids(data_node_name, add_ids) ids_data_name = list_node_name else: ids_data_name = graph.add_node(k, norm_v) processed_values[value_key] = ids_data_name if 'username' in k or k in SUPPORTED_IDS: new_username_key = f"username:{norm_v}" if new_username_key not in processed_values: new_username_node_name = graph.add_node( 'username', norm_v ) processed_values[new_username_key] = ( new_username_node_name ) graph.link(ids_data_name, new_username_node_name) add_ids = { k: v for v, k in db.extract_ids_from_url(v).items() } if add_ids: process_ids(ids_data_name, add_ids) graph.link(parent_node, ids_data_name) if status.ids_data: process_ids(account_node_name, status.ids_data) # Remove overly long nodes nodes_to_remove = [node for node in G.nodes if len(str(node)) > 100] G.remove_nodes_from(nodes_to_remove) # Remove site nodes with only one connection single_degree_sites = [ n for n, deg in G.degree() if n.startswith("site:") and deg <= 1 ] G.remove_nodes_from(single_degree_sites) # Generate interactive visualization from pyvis.network import Network nt = Network(notebook=True, height="750px", width="100%") nt.from_nx(G) nt.show(filename) def get_plaintext_report(context: dict) -> str: output = (context['brief'] + " ").replace('. ', '.\n') interests = list(map(lambda x: x[0], context.get('interests_tuple_list', []))) countries = list(map(lambda x: x[0], context.get('countries_tuple_list', []))) if countries: output += f'Countries: {", ".join(countries)}\n' if interests: output += f'Interests (tags): {", ".join(interests)}\n' return output.strip() """ REPORTS GENERATING """ def generate_report_template(is_pdf: bool): """ HTML/PDF template generation """ def get_resource_content(filename): return open(os.path.join(maigret_path, "resources", filename)).read() maigret_path = os.path.dirname(os.path.realpath(__file__)) if is_pdf: template_content = get_resource_content("simple_report_pdf.tpl") css_content = get_resource_content("simple_report_pdf.css") else: template_content = get_resource_content("simple_report.tpl") css_content = None template = Template(template_content) template.globals["title"] = CaseConverter.snake_to_title # type: ignore template.globals["detect_link"] = enrich_link_str # type: ignore return template, css_content def generate_report_context(username_results: list): brief_text = [] usernames = {} extended_info_count = 0 tags: Dict[str, int] = {} supposed_data: Dict[str, Any] = {} first_seen = None # moved here to speed up the launch of Maigret import pycountry for username, id_type, results in username_results: found_accounts = 0 new_ids = [] usernames[username] = {"type": id_type} for website_name in results: dictionary = results[website_name] # TODO: fix no site data issue if not dictionary: continue if dictionary.get("is_similar"): continue status = dictionary.get("status") if not status: # FIXME: currently in case of timeout continue if status.ids_data: dictionary["ids_data"] = status.ids_data extended_info_count += 1 # detect first seen created_at = status.ids_data.get("created_at") if created_at: if first_seen is None: first_seen = created_at else: try: known_time = parse_datetime_str( first_seen, tzinfos=ADDITIONAL_TZINFO ) new_time = parse_datetime_str( created_at, tzinfos=ADDITIONAL_TZINFO ) if new_time < known_time: first_seen = created_at except Exception as e: logging.debug( "Problems with converting datetime %s/%s: %s", first_seen, created_at, str(e), exc_info=True, ) for k, v in status.ids_data.items(): # suppose target data field = "fullname" if k == "name" else k if field not in supposed_data: supposed_data[field] = [] supposed_data[field].append(v) # suppose country if k in ["country", "locale"]: try: if is_country_tag(k): tag = pycountry.countries.get(alpha_2=v).alpha_2.lower() else: tag = pycountry.countries.search_fuzzy(v)[ 0 ].alpha_2.lower() # TODO: move countries to another struct tags[tag] = tags.get(tag, 0) + 1 except Exception as e: logging.debug( "Pycountry exception: %s", str(e), exc_info=True ) new_usernames = dictionary.get("ids_usernames") if new_usernames: for u, utype in new_usernames.items(): if u not in usernames: new_ids.append((u, utype)) usernames[u] = {"type": utype} if status.status == MaigretCheckStatus.CLAIMED: found_accounts += 1 dictionary["found"] = True else: continue # ignore non-exact search results if status.tags: for t in status.tags: tags[t] = tags.get(t, 0) + 1 brief_text.append( f"Search by {id_type} {username} returned {found_accounts} accounts." ) if new_ids: ids_list = [] for u, t in new_ids: ids_list.append(f"{u} ({t})" if t != "username" else u) brief_text.append("Found target's other IDs: " + ", ".join(ids_list) + ".") brief_text.append(f"Extended info extracted from {extended_info_count} accounts.") brief = " ".join(brief_text).strip() tuple_sort = lambda d: sorted(d, key=lambda x: x[1], reverse=True) if "global" in tags: # remove tag 'global' useless for country detection del tags["global"] first_username = username_results[0][0] countries_lists = list(filter(lambda x: is_country_tag(x[0]), tags.items())) interests_list = list(filter(lambda x: not is_country_tag(x[0]), tags.items())) filtered_supposed_data = filter_supposed_data(supposed_data) return { "username": first_username, # TODO: return brief list "brief": brief, "results": username_results, "first_seen": first_seen, "interests_tuple_list": tuple_sort(interests_list), "countries_tuple_list": tuple_sort(countries_lists), "supposed_data": filtered_supposed_data, "generated_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), } def generate_csv_report(username: str, results: dict, csvfile): writer = csv.writer(csvfile) writer.writerow( ["username", "name", "url_main", "url_user", "exists", "http_status"] ) for site in results: # TODO: fix the reason status = 'Unknown' if "status" in results[site]: status = str(results[site]["status"].status) writer.writerow( [ username, site, results[site].get("url_main", ""), results[site].get("url_user", ""), status, results[site].get("http_status", 0), ] ) def generate_txt_report(username: str, results: dict, file): exists_counter = 0 for website_name in results: dictionary = results[website_name] # TODO: fix no site data issue if not dictionary: continue if ( dictionary.get("status") and dictionary["status"].status == MaigretCheckStatus.CLAIMED ): exists_counter += 1 file.write(dictionary["url_user"] + "\n") file.write(f"Total Websites Username Detected On : {exists_counter}") def generate_json_report(username: str, results: dict, file, report_type): is_report_per_line = report_type.startswith("ndjson") all_json = {} for sitename in results: site_result = results[sitename] # TODO: fix no site data issue if not site_result or not site_result.get("status"): continue if site_result["status"].status != MaigretCheckStatus.CLAIMED: continue data = dict(site_result) data["status"] = data["status"].json() data["site"] = data["site"].json for field in ["future", "checker"]: if field in data: del data[field] if is_report_per_line: data["sitename"] = sitename file.write(json.dumps(data) + "\n") else: all_json[sitename] = data if not is_report_per_line: file.write(json.dumps(all_json)) """ XMIND 8 Functions """ def save_xmind_report(filename, username, results): if os.path.exists(filename): os.remove(filename) workbook = xmind.load(filename) sheet = workbook.getPrimarySheet() design_xmind_sheet(sheet, username, results) xmind.save(workbook, path=filename) def add_xmind_subtopic(userlink, k, v, supposed_data): currentsublabel = userlink.addSubTopic() field = "fullname" if k == "name" else k if field not in supposed_data: supposed_data[field] = [] supposed_data[field].append(v) currentsublabel.setTitle("%s: %s" % (k, v)) def design_xmind_sheet(sheet, username, results): alltags = {} supposed_data = {} sheet.setTitle("%s Analysis" % (username)) root_topic1 = sheet.getRootTopic() root_topic1.setTitle("%s" % (username)) undefinedsection = root_topic1.addSubTopic() undefinedsection.setTitle("Undefined") alltags["undefined"] = undefinedsection for website_name in results: dictionary = results[website_name] if not dictionary: continue result_status = dictionary.get("status") # TODO: fix the reason if not result_status or result_status.status != MaigretCheckStatus.CLAIMED: continue stripped_tags = list(map(lambda x: x.strip(), result_status.tags)) normalized_tags = list( filter(lambda x: x and not is_country_tag(x), stripped_tags) ) category = None for tag in normalized_tags: if tag in alltags.keys(): continue tagsection = root_topic1.addSubTopic() tagsection.setTitle(tag) alltags[tag] = tagsection category = tag section = alltags[category] if category else undefinedsection userlink = section.addSubTopic() userlink.addLabel(result_status.site_url_user) ids_data = result_status.ids_data or {} for k, v in ids_data.items(): # suppose target data if isinstance(v, list): for currentval in v: add_xmind_subtopic(userlink, k, currentval, supposed_data) else: add_xmind_subtopic(userlink, k, v, supposed_data) # add supposed data filtered_supposed_data = filter_supposed_data(supposed_data) if len(filtered_supposed_data) > 0: undefinedsection = root_topic1.addSubTopic() undefinedsection.setTitle("SUPPOSED DATA") for k, v in filtered_supposed_data.items(): currentsublabel = undefinedsection.addSubTopic() currentsublabel.setTitle("%s: %s" % (k, v)) ================================================ FILE: maigret/resources/data.json ================================================ { "sites": { "0-3.RU": { "tags": [ "forum", "ru" ], "engine": "XenForo", "alexaRank": 4046374, "urlMain": "http://0-3.ru", "usernameClaimed": "donna", "usernameUnclaimed": "noonewouldeverusethis7" }, "101010.pl": { "checkType": "status_code", "urlMain": "https://101010.pl/", "url": "https://101010.pl/@{username}", "alexaRank": 1500240, "usernameClaimed": "ueh_kon", "usernameUnclaimed": "noonewouldeverusethis7" }, "0k.clan.su": { "tags": [ "ru" ], "engine": "uCoz", "alexaRank": 8930061, "urlMain": "http://0k.clan.su", "usernameClaimed": "eruzz", "usernameUnclaimed": "noonewouldeverusethis7" }, "discussions.ubisoft.com": { "tags": [ "forum", "gaming" ], "checkType": "message", "presenseStrs": [ "Block User" ], "absenceStrs": [ "You seem to have stumbled upon a page that does not exist. Return to the" ], "url": "https://discussions.ubisoft.com/user/{username}?lang=en-US", "usernameClaimed": "ubi-pingu", "usernameUnclaimed": "noonewouldeverusethis7" }, "1001mem.ru": { "tags": [ "ru" ], "regexCheck": "^[^.]{1,}$", "checkType": "message", "absenceStrs": [ "\u042d\u0442\u043e\u0442 \u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u0435\u043b\u044c \u043d\u0435 \u0441\u0443\u0449\u0435\u0441\u0442\u0432\u0443\u0435\u0442, \u0438\u043b\u0438 \u0437\u0430\u0431\u043b\u043e\u043a\u0438\u0440\u043e\u0432\u0430\u043d." ], "alexaRank": 1155058, "urlMain": "http://1001mem.ru", "url": "http://1001mem.ru/{username}", "usernameClaimed": "adam", "usernameUnclaimed": "noonewouldeverusethis7" }, "1001tracklists": { "tags": [ "us" ], "checkType": "message", "presenseStrs": [ "Info Page" ], "absenceStrs": [ "Sorry, the requested user is not valid!" ], "alexaRank": 36590, "urlMain": "https://www.1001tracklists.com", "url": "https://www.1001tracklists.com/user/{username}/index.html", "usernameClaimed": "JacoWilles", "usernameUnclaimed": "noonewouldeverusethis7" }, "101xp.com": { "tags": [ "forum", "gaming", "ru" ], "engine": "XenForo", "alexaRank": 43529, "urlMain": "https://forum-ru.101xp.com", "usernameClaimed": "aida", "usernameUnclaimed": "noonewouldeverusethis7" }, "11x2": { "checkType": "status_code", "alexaRank": 1429974, "urlMain": "https://11x2.com", "url": "https://11x2.com/user/home/{username}", "usernameClaimed": "hazelamy", "usernameUnclaimed": "noonewouldeverusethis7" }, "123rf": { "tags": [ "photo", "ru", "us" ], "checkType": "response_url", "alexaRank": 1151, "urlMain": "https://ru.123rf.com", "url": "https://ru.123rf.com/profile_{username}", "usernameClaimed": "rawpixel", "usernameUnclaimed": "noonewouldeverusethis7" }, "1337x": { "tags": [ "torrent" ], "checkType": "message", "absenceStrs": [ "Bad Username." ], "presenseStrs": [ "Join Date" ], "alexaRank": 492, "urlMain": "https://1337x.to", "url": "https://1337x.to/user/{username}/", "usernameClaimed": "adam", "usernameUnclaimed": "noonewouldeverusethis7" }, "1xforum": { "tags": [ "forum", "ru" ], "engine": "vBulletin", "alexaRank": 1172921, "urlMain": "https://1xforum.com", "usernameClaimed": "adam", "usernameUnclaimed": "noonewouldeverusethis7", "disabled": true }, "247sports": { "tags": [ "news", "sport" ], "checkType": "status_code", "alexaRank": 2084, "urlMain": "https://247sports.com", "url": "https://247sports.com/user/{username}/", "usernameClaimed": "adam", "usernameUnclaimed": "noonewouldeverusethis7" }, "24open": { "disabled": true, "tags": [ "dating", "ru", "us" ], "checkType": "status_code", "alexaRank": 50670, "urlMain": "https://24open.ru", "url": "https://24open.ru/user/{username}/", "usernameClaimed": "niko3193", "usernameUnclaimed": "noonewouldeverusethis7" }, "2Dimensions": { "checkType": "status_code", "alexaRank": 8413056, "urlMain": "https://2Dimensions.com/", "url": "https://2Dimensions.com/a/{username}", "usernameClaimed": "blue", "usernameUnclaimed": "noonewouldeverusethis7" }, "2berega.spb.ru": { "tags": [ "ru" ], "checkType": "message", "absenceStrs": [ "\u041f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u0435\u043b\u044c \u043d\u0435 \u043d\u0430\u0439\u0434\u0435\u043d" ], "alexaRank": 1128372, "urlMain": "https://2berega.spb.ru", "url": "https://2berega.spb.ru/user/{username}", "usernameClaimed": "adam", "usernameUnclaimed": "noonewouldeverusethis7" }, "2d-3d": { "tags": [ "ru" ], "checkType": "status_code", "alexaRank": 455230, "urlMain": "https://www.2d-3d.ru", "url": "https://www.2d-3d.ru/user/{username}/", "usernameClaimed": "adam", "usernameUnclaimed": "noonewouldeverusethis7" }, "2fast4u": { "disabled": true, "tags": [ "nl" ], "checkType": "message", "absenceStrs": [ "Deze gebruiker is niet geregistreerd, zodat je zijn of haar profiel niet kunt bekijken." ], "alexaRank": 1325758, "urlMain": "https://www.2fast4u.be", "url": "https://www.2fast4u.be/members/?username={username}", "usernameClaimed": "Schussboelie", "usernameUnclaimed": "noonewouldeverusethis7" }, "33bru": { "tags": [ "ru", "ua" ], "regexCheck": "^[a-zA-Z0-9-]{3,}$", "checkType": "message", "presenseStrs": [ "\u041f\u0440\u043e\u0444\u0438\u043b\u044c \u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u0435\u043b\u044f" ], "absenceStrs": [ "\u0418\u0437\u0432\u0438\u043d\u0438\u0442\u0435, \u0442\u0430\u043a\u043e\u0433\u043e \u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u0435\u043b\u044f \u043d\u0435 \u0441\u0443\u0449\u0435\u0441\u0442\u0432\u0443\u0435\u0442" ], "alexaRank": 1261462, "urlMain": "http://33bru.com/", "url": "http://{username}.33bru.com/", "usernameClaimed": "adam", "usernameUnclaimed": "noonewouldeverusethis7", "disabled": true }, "3DMir.ru": { "checkType": "message", "presenseStrs": [ "