Repository: Mohammedcha/gplay-scraper Branch: main Commit: 304dcd3d3546 Files: 81 Total size: 567.4 KB Directory structure: gitextract_ihudd3jc/ ├── .github/ │ ├── ISSUE_TEMPLATE/ │ │ ├── bug_report.md │ │ └── feature_request.md │ ├── pull_request_template.md │ └── workflows/ │ ├── docs.yml │ └── test.yml ├── .gitignore ├── CHANGELOG.md ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── README/ │ ├── APP_METHODS.md │ ├── DEVELOPER_METHODS.md │ ├── LIST_METHODS.md │ ├── README.md │ ├── REVIEWS_METHODS.md │ ├── SEARCH_METHODS.md │ ├── SIMILAR_METHODS.md │ └── SUGGEST_METHODS.md ├── README.md ├── SECURITY.md ├── build_docs.py ├── docs/ │ ├── README.md │ ├── api/ │ │ ├── app.rst │ │ ├── developer.rst │ │ ├── list.rst │ │ ├── reviews.rst │ │ ├── search.rst │ │ ├── similar.rst │ │ └── suggest.rst │ ├── conf.py │ ├── configuration.rst │ ├── error_handling.rst │ ├── examples.rst │ ├── fields.rst │ ├── index.rst │ ├── installation.rst │ ├── quickstart.rst │ └── requirements.txt ├── examples/ │ ├── README.md │ ├── app_methods_example.py │ ├── developer_methods_example.py │ ├── list_methods_example.py │ ├── reviews_methods_example.py │ ├── search_methods_example.py │ ├── similar_methods_example.py │ └── suggest_methods_example.py ├── gplay_scraper/ │ ├── __init__.py │ ├── app.py │ ├── config.py │ ├── core/ │ │ ├── __init__.py │ │ ├── gplay_methods.py │ │ ├── gplay_parser.py │ │ └── gplay_scraper.py │ ├── exceptions.py │ ├── models/ │ │ ├── __init__.py │ │ └── element_specs.py │ └── utils/ │ ├── __init__.py │ ├── constants.py │ ├── error_handling.py │ ├── helpers.py │ └── http_client.py ├── output/ │ ├── app_example.json │ ├── developer_example.json │ ├── list_example.json │ ├── reviews_example.json │ ├── search_example.json │ ├── similar_example.json │ └── suggest_example.json ├── requirements.txt ├── setup.py └── tests/ ├── __init__.py ├── test_app_methods.py ├── test_basic.py ├── test_developer_methods.py ├── test_list_methods.py ├── test_package.py ├── test_reviews_methods.py ├── test_search_methods.py ├── test_similar_methods.py └── test_suggest_methods.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/ISSUE_TEMPLATE/bug_report.md ================================================ --- name: Bug report about: Create a report to help us improve title: '[BUG] ' labels: bug assignees: '' --- **Describe the bug** A clear and concise description of what the bug is. **To Reproduce** Steps to reproduce the behavior: 1. Use app ID '...' 2. Call method '....' 3. See error **Expected behavior** A clear and concise description of what you expected to happen. **Code Example** ```python from gplay_scraper import GPlayScraper scraper = GPlayScraper() # Your code here ``` **Error Output** ``` Paste the full error message here ``` **Environment:** - OS: [e.g. Windows 10, macOS, Linux] - Python version: [e.g. 3.8.5] - Library version: [e.g. 1.0.2] **Additional context** Add any other context about the problem here. ================================================ FILE: .github/ISSUE_TEMPLATE/feature_request.md ================================================ --- name: Feature request about: Suggest an idea for this project title: '[FEATURE] ' labels: enhancement assignees: '' --- **Is your feature request related to a problem? Please describe.** A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] **Describe the solution you'd like** A clear and concise description of what you want to happen. **Describe alternatives you've considered** A clear and concise description of any alternative solutions or features you've considered. **Use Case** Describe how this feature would be used: ```python # Example of how the new feature would work scraper = GPlayScraper() result = scraper.new_method(app_id) ``` **Additional context** Add any other context or screenshots about the feature request here. ================================================ FILE: .github/pull_request_template.md ================================================ # Pull Request ## Description Brief description of changes made. ## Type of Change - [ ] Bug fix (non-breaking change which fixes an issue) - [ ] New feature (non-breaking change which adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) - [ ] Documentation update ## Testing - [ ] I have tested my changes locally - [ ] I have added tests for new functionality - [ ] All existing tests pass ## Code Quality - [ ] My code follows the project's style guidelines - [ ] I have performed a self-review of my own code - [ ] I have commented my code, particularly in hard-to-understand areas - [ ] I have made corresponding changes to the documentation ## Related Issues Fixes #(issue number) ## Additional Notes Any additional information about the changes. ================================================ FILE: .github/workflows/docs.yml ================================================ name: Build and Deploy Documentation on: push: branches: [ main ] permissions: contents: write jobs: docs: runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v5 with: python-version: '3.11' - name: Install dependencies run: | python -m pip install --upgrade pip pip install -r docs/requirements.txt - name: Build documentation run: | cd docs sphinx-build -b html . _build/html touch _build/html/.nojekyll - name: Deploy to GitHub Pages uses: peaceiris/actions-gh-pages@v4 if: github.ref == 'refs/heads/main' with: github_token: ${{ secrets.GITHUB_TOKEN }} publish_dir: ./docs/_build/html force_orphan: true ================================================ FILE: .github/workflows/test.yml ================================================ name: Tests on: push: branches: [ main, develop ] pull_request: branches: [ main ] jobs: test: runs-on: ubuntu-latest strategy: matrix: python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] fail-fast: false steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | python -m pip install --upgrade pip pip install -r requirements.txt pip install pytest pytest-cov - name: Run package and basic functionality tests run: | python -m unittest tests.test_package tests.test_basic -v - name: Run network-dependent tests (optional) continue-on-error: true timeout-minutes: 15 run: | echo "Running network-dependent tests with delays (failures expected due to rate limiting)..." python -m unittest tests.test_app_methods -v || echo "App methods test completed" python -m unittest tests.test_search_methods -v || echo "Search methods test completed" python -m unittest tests.test_reviews_methods -v || echo "Reviews methods test completed" python -m unittest tests.test_developer_methods -v || echo "Developer methods test completed" python -m unittest tests.test_list_methods -v || echo "List methods test completed" python -m unittest tests.test_similar_methods -v || echo "Similar methods test completed" python -m unittest tests.test_suggest_methods -v || echo "Suggest methods test completed" ================================================ FILE: .gitignore ================================================ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ pip-wheel-metadata/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover *.py,cover .hypothesis/ .pytest_cache/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 db.sqlite3-journal # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ !docs/.nojekyll # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints # IPython profile_default/ ipython_config.py # pyenv .python-version # pipenv Pipfile.lock # PEP 582 __pypackages__/ # Celery stuff celerybeat-schedule celerybeat.pid # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ # IDE .vscode/ .idea/ *.swp *.swo # OS .DS_Store Thumbs.db # Project specific backup/ temp/ *.tmp # Publishing scripts (local use only) publish_to_github.bat publish_to_github.sh publish_to_pypi.bat publish_to_pypi.sh update_github.bat update_github.sh update_pypi.bat update_pypi.sh # Test and debug files xx.py x_fallback.py test_all_methods.py debug_limbo*.py limbo_ds5_raw.txt appbrain_scraper.py # Documentation folders (local use only) wiki/ community/ # Chrome extensions chrome-extension/ chrome-extension-new/ firefox-extensions/ firefox-extension-new/ edge-extensions/ edge-extension-new/ opera-extensions/ opera-extension-new/ webstore-upload/ webstore-upload-new/ webstore-upload-chrome/ webstore-upload-firefox/ webstore-upload-edge/ webstore-upload-opera/ webstore-upload-chrome-new/ webstore-upload-firefox-new/ webstore-upload-edge-new/ webstore-upload-opera-new/ build-extensions/ build-extension/ build-extension-chrome/ build-extension-firefox/ build-extension-edge/ build-extension-opera/ build-extension-chrome-new/ build-extension-firefox-new/ build-extension-edge-new/ build-extension-opera-new/ dist-extensions/ dist-extension/ dist-extension-chrome/ dist-extension-firefox/ dist-extension-edge/ dist-extension-opera/ dist-extension-chrome-new/ dist-extension-firefox-new/ dist-extension-edge-new/ dist-extension-opera-new/ release/ release-chrome/ release-firefox/ release-edge/ release-opera/ release-chrome-new/ release-firefox-new/ release-edge-new/ release-opera-new/ temp-extensions/ temp-extension/ temp-extension-chrome/ temp-extension-firefox/ temp-extension-edge/ temp-extension-opera/ temp-extension-chrome-new/ ================================================ FILE: CHANGELOG.md ================================================ # Changelog All notable changes to this project will be documented in this file. ## [1.0.6] - 2025-11-16 ### Bug Fixes - **Reviews Pagination Fix**: Fixed critical issue when requesting more reviews than available - Resolved 'NoneType' object is not subscriptable error - Improved token extraction logic for empty review responses - Now gracefully returns available reviews instead of crashing - Enhanced error handling in ReviewsScraper and ReviewsParser - **Empty Response Handling**: Better handling of apps with limited reviews - Safe bounds checking for pagination tokens - Proper null checking for empty data structures - Graceful degradation when no more reviews are available ### Acknowledgments - Thanks to [@PhamDinhThienVu](https://github.com/PhamDinhThienVu) for reporting the reviews pagination bug ## [1.0.5] - 2025-10-18 ### New Features - **Publisher Country Detection**: Added `publisherCountry` field to app data - Automatically detects developer's country from phone number and address - Uses international phone prefixes and address parsing - Returns country names like "United States", "Germany", "Japan", etc. - Handles multiple countries when phone and address differ (e.g., "United States/Germany") ### Removed Features - **Removed updatedTimestamp**: Removed deprecated timestamp field that was causing confusion ### Bug Fixes - **Enhanced Error Handling**: Improved error handling and retry mechanisms - Better HTTP client fallback when requests fail - More robust JSON parsing with multiple fallback strategies - Improved handling of network timeouts and connection errors - **Retry Mechanism**: Fixed automatic retry logic for failed requests - Exponential backoff for rate limiting - Automatic HTTP client switching on failures - Better error recovery for temporary network issues - **General Bug Fixes**: Fixed various edge cases and improved stability - Better handling of malformed JSON responses - Improved data extraction for apps with missing fields - Enhanced Unicode handling for international app data ## [1.0.4] - 2025-10-16 ### New Features - **Assets Parameter**: Added configurable image sizes for all app methods - `SMALL` (512px width) - `MEDIUM` (1024px width) - Default - `LARGE` (2048px width) - `ORIGINAL` (Maximum size) - Available in all app methods: `app_analyze()`, `app_get_field()`, `app_get_fields()`, `app_print_field()`, `app_print_fields()`, `app_print_all()` - Affects icon, headerImage, screenshots, and videoImage URLs ### Bug Fixes - **Release Date Fallback**: Fixed missing release dates when using language/country parameters - Added automatic fallback request without `hl`/`gl` parameters when release date is null - Ensures release date extraction for apps in all regions - **Path Resolution**: Fixed various path-related issues in data extraction - **Image URL Processing**: Improved image URL formatting with proper size parameters ### Usage Examples ```python # Use different asset sizes data = scraper.app_analyze("com.whatsapp", assets="LARGE") icon = scraper.app_get_field("com.whatsapp", "icon", assets="SMALL") scraper.app_print_all("com.whatsapp", assets="ORIGINAL") ``` ## [1.0.3] - 2025-10-15 ### New Features - **Enhanced Search Pagination**: Now able to fetch unlimited search results (300+) with automatic pagination, not limited to 50 results anymore - **Improved Search Performance**: Optimized search result fetching with better token handling and batch processing ### Bug Fixes & Code Quality Improvements - **Code Review**: Addressed security vulnerabilities and code quality issues - **Error Handling**: Improved error handling patterns across all modules - **Performance**: Optimized JSON parsing and HTTP client fallback logic - **Security**: Fixed potential SSRF and injection vulnerabilities - **Maintainability**: Enhanced code readability and documentation ## [1.0.2] - 2025-01-15 ### Major Release - Complete Library Redesign 🚀 This version represents a complete rewrite of GPlay Scraper with a focus on modularity, extensibility, and comprehensive data extraction across all Google Play Store features. ### New Features #### 7 Method Types with 42 Functions - **App Methods** - Extract 65+ data fields from any app (ratings, installs, pricing, permissions, screenshots, etc.) - **Search Methods** - Search Google Play Store apps with comprehensive filtering and pagination - **Reviews Methods** - Extract user reviews with ratings, timestamps, helpful votes, and detailed feedback - **Developer Methods** - Get all apps published by a specific developer using developer ID - **List Methods** - Access top charts (TOP_FREE, TOP_PAID, TOP_GROSSING) by category with 54 categories - **Similar Methods** - Find similar/competitor apps for market research and competitive analysis - **Suggest Methods** - Get search suggestions and autocomplete for ASO keyword research Each method type includes 6 functions: - `analyze()` - Get all data as dictionary/list - `get_field()` - Get single field value - `get_fields()` - Get multiple fields as dictionary - `print_field()` - Print single field to console - `print_fields()` - Print multiple fields to console - `print_all()` - Print all data as formatted JSON #### 7 HTTP Clients with Automatic Fallback - **requests** (default) - Standard Python HTTP library, reliable and well-tested - **curl_cffi** - Browser impersonation with TLS fingerprinting, best for avoiding detection - **tls_client** - Custom TLS fingerprinting, good for bypassing restrictions - **httpx** - Modern async-capable HTTP client with HTTP/2 support - **urllib3** - Low-level HTTP client with connection pooling - **cloudscraper** - Cloudflare bypass capabilities - **aiohttp** - Async HTTP client for high-performance concurrent requests Automatic fallback system tries clients in order until one succeeds, ensuring maximum reliability. #### Multi-Language & Multi-Region Support - Support for 100+ languages (en, es, fr, de, ja, ko, zh, ar, etc.) - Support for 150+ countries (us, gb, ca, au, in, br, jp, etc.) - Get localized app data, reviews, and search results - Region-specific pricing and availability information #### Comprehensive Data Extraction - **65+ App Fields**: title, developer, ratings, installs, price, screenshots, permissions, release date, update date, size, version, content rating, privacy policy, and more - **Review Data**: user name, rating, review text, timestamp, app version, helpful votes, developer reply - **Search Results**: app ID, title, developer, rating, price, icon, screenshots, description snippet - **Developer Portfolio**: all apps from a developer with complete metadata - **Top Charts**: ranked lists with install counts, ratings, and trending data - **Similar Apps**: competitor analysis with relevance scoring - **Search Suggestions**: popular keywords and autocomplete terms #### Enhanced Architecture - **Modular Design**: Separate classes for methods, scrapers, and parsers - **Core Modules**: `gplay_methods.py`, `gplay_scraper.py`, `gplay_parser.py` - **HTTP Client Abstraction**: `HttpClient` class with pluggable client support - **Element Specs**: Reusable CSS selector specifications for data extraction - **Helper Utilities**: Text processing, date parsing, JSON cleaning, age calculation - **Exception Hierarchy**: 6 custom exception types for specific error scenarios #### Documentation & Testing - **Comprehensive Docstrings**: All 42 methods, 7 scrapers, 7 parsers, and utility functions documented - **Sphinx Documentation**: Professional HTML documentation with examples, API reference, and guides - **HTTP Clients Guide**: Detailed documentation on when and how to use each HTTP client - **Fields Reference**: Complete reference of all 65+ fields, categories, and parameters - **Unit Tests**: Complete test coverage for all 7 method types - **Examples**: Real-world usage examples for each method type #### Configuration & Customization - **Configurable Parameters**: Language, country, count, sort order, collection type - **Rate Limiting**: Built-in delays to prevent blocking (configurable) - **Error Handling**: Graceful fallbacks and informative error messages - **Logging**: Detailed logging for debugging and monitoring - **Timeout Control**: Configurable request timeouts - **Retry Logic**: Automatic retries with exponential backoff ### Breaking Changes - Complete API redesign - not backward compatible with v1.0.1 - Method names changed from `get_app_details()` to `app_analyze()` - New parameter structure for all methods - HTTP client must be specified or uses automatic fallback - Exception types renamed and reorganized ### Migration Guide Old (v1.0.1): ```python scraper = GPlayScraper() data = scraper.get_app_details("com.whatsapp") ``` New (v1.0.2): ```python scraper = GPlayScraper() data = scraper.app_analyze("com.whatsapp") ``` ### Performance Improvements - Faster JSON parsing with optimized regex patterns - Reduced memory usage with streaming parsers - Better caching of HTTP client instances - Parallel request support with async clients ### Bug Fixes - Fixed JSON parsing for apps with special characters in descriptions - Fixed review extraction for apps with no reviews - Fixed developer ID extraction from developer pages - Fixed category parsing for apps in multiple categories - Fixed price parsing for apps with regional pricing - Fixed screenshot URL extraction for apps with video previews ## [1.0.1] - 2025-10-07 ### Added - **Paid App Support**: Fixed JSON parsing issues for paid apps with malformed data structures - **Reviews Extraction**: Successfully extracts user reviews for both free and paid apps - **Organized Output**: Restructured JSON output with logical field grouping: - Basic Information - Category & Genre - Release & Updates - Media Content - Install Statistics - Ratings & Reviews - Advertising - Technical Details - Content Rating - Privacy & Security - Pricing & Monetization - Developer Information - ASO Analysis - **Enhanced JSON Parser**: Bracket-matching algorithm for complex nested structures - **Original Price Field**: Added `originalPrice` field for sale price tracking ### Fixed - **JSON Parsing Errors**: Resolved "Expecting ',' delimiter" errors for paid apps - **Reviews Data**: Fixed empty reviews arrays by implementing alternative parsing methods - **Malformed Data Handling**: Improved handling of unquoted keys and malformed JSON from Play Store ### Improved - **Error Handling**: Better fallback mechanisms for JSON parsing failures - **Data Extraction**: More robust extraction for apps with complex pricing structures - **Code Organization**: Cleaner separation of parsing logic and error recovery ## [1.0.0] - 2025-10-06 ### Added - Initial release of GPlay Scraper - Complete Google Play Store app data extraction - ASO (App Store Optimization) analysis - Modular architecture with separate core modules - Support for 60+ data fields including: - Basic app information - Install statistics and metrics - Ratings and reviews data - Technical specifications - Developer information - Media content (screenshots, videos, icons) - Pricing and monetization details - ASO keyword analysis - Multiple access methods: - `analyze()` - Complete app analysis - `get_field()` - Single field retrieval - `get_fields()` - Multiple field retrieval - `print_field()` - Direct field printing - `print_fields()` - Multiple field printing - `print_all()` - Complete data printing - Comprehensive documentation and examples - Error handling and logging - Rate limiting considerations - Cross-platform compatibility ### Features - Web scraping of Google Play Store pages - JSON data extraction and parsing - Automatic install metrics calculation - Keyword frequency analysis - Readability scoring - Review data extraction - Image URL processing - Date parsing and age calculation ================================================ FILE: CODE_OF_CONDUCT.md ================================================ # Contributor Covenant Code of Conduct ## Our Pledge We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive, and healthy community. ## Our Standards Examples of behavior that contributes to a positive environment for our community include: * Demonstrating empathy and kindness toward other people * Being respectful of differing opinions, viewpoints, and experiences * Giving and gracefully accepting constructive feedback * Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience * Focusing on what is best not just for us as individuals, but for the overall community Examples of unacceptable behavior include: * The use of sexualized language or imagery, and sexual attention or advances of any kind * Trolling, insulting or derogatory comments, and personal or political attacks * Public or private harassment * Publishing others' private information, such as a physical or email address, without their explicit permission * Other conduct which could reasonably be considered inappropriate in a professional setting ## Enforcement Responsibilities Community leaders are responsible for clarifying and enforcing our standards of acceptable behavior and will take appropriate and fair corrective action in response to any behavior that they deem inappropriate, threatening, offensive, or harmful. ## Scope This Code of Conduct applies within all community spaces, and also applies when an individual is officially representing the community in public spaces. ## Enforcement Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement through GitHub Issues. All complaints will be reviewed and investigated promptly and fairly. ## Attribution This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 2.0, available at https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. [homepage]: https://www.contributor-covenant.org ================================================ FILE: CONTRIBUTING.md ================================================ # Contributing to GPlay Scraper Thank you for your interest in contributing! ## Development Setup 1. Fork the repository 2. Clone your fork: `git clone https://github.com/yourusername/gplay-scraper.git` 3. Install in development mode: `pip install -e .` 4. Install dev dependencies: `pip install pytest` ## Running Tests ```bash python -m pytest tests/ -v ``` ## Code Style - Follow PEP 8 - Add docstrings to new functions - Include type hints where appropriate ## Submitting Changes 1. Create a feature branch: `git checkout -b feature-name` 2. Make your changes 3. Add tests for new functionality 4. Run tests to ensure they pass 5. Submit a pull request ## Reporting Issues Please use GitHub Issues to report bugs or request features. ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2025 Mohammed Cha Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: MANIFEST.in ================================================ include README.md include LICENSE include requirements.txt include CHANGELOG.md include CONTRIBUTING.md include SECURITY.md recursive-include examples *.py recursive-include tests *.py ================================================ FILE: README/APP_METHODS.md ================================================ # App Methods Extract detailed information about individual Google Play Store apps. ## Quick Start ```python from gplay_scraper import GPlayScraper scraper = GPlayScraper() # Get all data data = scraper.app_analyze("com.whatsapp") print(data['title'], data['score'], data['installs']) # Get specific fields title = scraper.app_get_field("com.whatsapp", "title") print(title) # WhatsApp Messenger # Get multiple fields info = scraper.app_get_fields("com.whatsapp", ["title", "score", "developer"]) print(info) ``` --- ## HTTP Clients The library supports 7 HTTP clients with automatic fallback. If one fails, it tries the next. ### Supported Clients 1. **requests** (default) - Standard Python HTTP library 2. **curl_cffi** - cURL with browser impersonation 3. **tls_client** - Advanced TLS fingerprinting 4. **httpx** - Modern async-capable HTTP client 5. **urllib3** - Low-level HTTP client 6. **cloudscraper** - Cloudflare bypass 7. **aiohttp** - Async HTTP client ### Usage ```python # Default (tries requests first, then others) scraper = GPlayScraper() # Specify a client scraper = GPlayScraper(http_client="curl_cffi") scraper = GPlayScraper(http_client="tls_client") scraper = GPlayScraper(http_client="httpx") ``` ### Installation ```bash # Default pip install requests # Advanced clients (optional) pip install curl-cffi pip install tls-client pip install httpx pip install urllib3 pip install cloudscraper pip install aiohttp ``` **Note:** The library automatically falls back to available clients if your preferred one fails. --- ## Methods ### `app_analyze(app_id, lang='en', country='us', assets=None)` Returns all 65+ fields as a dictionary. ```python data = scraper.app_analyze("com.whatsapp") # Returns: {'appId': 'com.whatsapp', 'title': 'WhatsApp Messenger', ...} # With custom image sizes data = scraper.app_analyze("com.whatsapp", assets="LARGE") # Returns same data but with larger image URLs (2048px) ``` ### `app_get_field(app_id, field, lang='en', country='us', assets=None)` Returns a single field value. ```python score = scraper.app_get_field("com.whatsapp", "score") # Returns: 4.2 # Get high-quality icon icon = scraper.app_get_field("com.whatsapp", "icon", assets="ORIGINAL") # Returns: URL with maximum image quality ``` ### `app_get_fields(app_id, fields, lang='en', country='us', assets=None)` Returns multiple fields as a dictionary. ```python data = scraper.app_get_fields("com.whatsapp", ["title", "score", "installs"]) # Returns: {'title': 'WhatsApp Messenger', 'score': 4.2, 'installs': '5,000,000,000+'} # Get media with custom sizes media = scraper.app_get_fields("com.whatsapp", ["icon", "screenshots"], assets="SMALL") # Returns: Media URLs with 512px width ``` ### `app_print_field(app_id, field, lang='en', country='us', assets=None)` Prints a single field to console. ```python scraper.app_print_field("com.whatsapp", "title") # Output: title: WhatsApp Messenger # Print large icon URL scraper.app_print_field("com.whatsapp", "icon", assets="LARGE") # Output: icon: https://...=w2048 ``` ### `app_print_fields(app_id, fields, lang='en', country='us', assets=None)` Prints multiple fields to console. ```python scraper.app_print_fields("com.whatsapp", ["title", "score"]) # Output: # title: WhatsApp Messenger # score: 4.2 # Print media with original quality scraper.app_print_fields("com.whatsapp", ["icon", "screenshots"], assets="ORIGINAL") # Output: URLs with maximum image quality ``` ### `app_print_all(app_id, lang='en', country='us', assets=None)` Prints all fields as formatted JSON. ```python scraper.app_print_all("com.whatsapp") # Output: Full JSON with all 65+ fields # Print with high-quality images scraper.app_print_all("com.whatsapp", assets="LARGE") # Output: Full JSON with 2048px image URLs ``` --- ## Available Fields (65+) ### Basic Information - `appId` - Package name (e.g., "com.whatsapp") - `title` - App name - `summary` - Short description - `description` - Full description - `appUrl` - Play Store URL ### Ratings & Reviews - `score` - Average rating (1-5) - `ratings` - Total number of ratings - `reviews` - Total number of reviews - `histogram` - Rating distribution [1★, 2★, 3★, 4★, 5★] ### Install Metrics - `installs` - Install range (e.g., "10,000,000+") - `minInstalls` - Minimum installs - `realInstalls` - Estimated real installs - `dailyInstalls` - Estimated daily installs - `monthlyInstalls` - Estimated monthly installs - `minDailyInstalls` - Minimum daily installs - `realDailyInstalls` - Real estimated daily installs - `minMonthlyInstalls` - Minimum monthly installs - `realMonthlyInstalls` - Real estimated monthly installs ### Pricing - `price` - Price in currency (0 if free) - `currency` - Currency code (e.g., "USD") - `free` - Boolean, true if free - `offersIAP` - Has in-app purchases - `inAppProductPrice` - IAP price range - `sale` - Currently on sale - `originalPrice` - Original price if on sale ### Media - `icon` - App icon URL - `headerImage` - Header image URL - `screenshots` - List of screenshot URLs - `video` - Promo video URL - `videoImage` - Video thumbnail URL ### Developer - `developer` - Developer name - `developerId` - Developer ID - `developerEmail` - Contact email - `developerWebsite` - Website URL - `developerAddress` - Physical address - `developerPhone` - Contact phone - `privacyPolicy` - Privacy policy URL - `publisherCountry` - Developer's country ### Category - `genre` - Primary category (e.g., "Communication") - `genreId` - Category ID (e.g., "COMMUNICATION") - `categories` - List of categories ### Technical - `version` - Current version - `androidVersion` - Required Android version - `minAndroidApi` - Minimum API level - `maxAndroidApi` - Maximum API level - `appBundle` - App bundle name ### Dates - `released` - Release date (e.g., "Feb 24, 2009") - `appAgeDays` - Age in days - `lastUpdated` - Last update date ### Content - `contentRating` - Age rating (e.g., "Everyone") - `contentRatingDescription` - Rating description - `whatsNew` - Recent changes list - `permissions` - Required permissions dict - `dataSafety` - Data safety info list ### Advertising - `adSupported` - Contains ads - `containsAds` - Shows advertisements ### Availability - `available` - App is available --- ## Practical Examples ### Competitive Analysis ```python apps = ["com.whatsapp", "com.telegram", "com.viber"] for app_id in apps: data = scraper.app_get_fields(app_id, ["title", "score", "realInstalls"]) print(f"{data['title']}: {data['score']}★ - {data['realInstalls']:,} installs") ``` ### Monitor App Updates ```python app_id = "com.whatsapp" data = scraper.app_get_fields(app_id, ["version", "lastUpdated", "whatsNew"]) print(f"Version: {data['version']}") print(f"Updated: {data['lastUpdated']}") print(f"Changes: {data['whatsNew']}") ``` ### Extract Developer Info ```python app_id = "com.whatsapp" dev_info = scraper.app_get_fields(app_id, [ "developer", "developerEmail", "developerWebsite" ]) print(dev_info) ``` ### Get High-Quality Media ```python app_id = "com.whatsapp" # Get original quality images media = scraper.app_get_fields(app_id, ["icon", "screenshots"], assets="ORIGINAL") print(f"Icon: {media['icon']}") # Maximum quality print(f"Screenshots: {len(media['screenshots'])} images") # Get small thumbnails for faster loading thumbnails = scraper.app_get_fields(app_id, ["icon", "headerImage"], assets="SMALL") print(f"Small icon: {thumbnails['icon']}") # 512px ``` ### Check Monetization ```python app_id = "com.whatsapp" money = scraper.app_get_fields(app_id, [ "free", "price", "offersIAP", "containsAds" ]) print(f"Free: {money['free']}") print(f"Has IAP: {money['offersIAP']}") print(f"Has Ads: {money['containsAds']}") ``` --- ## Parameters ### Initialization - `http_client` (str, optional) - HTTP client to use: "requests", "curl_cffi", "tls_client", "httpx", "urllib3", "cloudscraper", "aiohttp" (default: "requests") ### Method Parameters - `app_id` (str, required) - App package name from Play Store URL - `lang` (str, optional) - Language code (default: 'en') - `country` (str, optional) - Country code (default: 'us') - `assets` (str, optional) - Image size: 'SMALL', 'MEDIUM', 'LARGE', 'ORIGINAL' (default: 'MEDIUM') - `field` (str) - Single field name - `fields` (List[str]) - List of field names ### Assets Parameter (Image Sizes) - **SMALL** - 512px width (`w512`) - **MEDIUM** - 1024px width (`w1024`) - Default - **LARGE** - 2048px width (`w2048`) - **ORIGINAL** - Maximum size (`w9999`) Affects these fields: `icon`, `headerImage`, `screenshots`, `videoImage` ```python # Different image qualities small_icon = scraper.app_get_field("com.whatsapp", "icon", assets="SMALL") # Returns: https://...=w512 large_icon = scraper.app_get_field("com.whatsapp", "icon", assets="LARGE") # Returns: https://...=w2048 original_icon = scraper.app_get_field("com.whatsapp", "icon", assets="ORIGINAL") # Returns: https://...=w9999 ``` ### Finding App IDs From Play Store URL: `https://play.google.com/store/apps/details?id=com.whatsapp` The app_id is: `com.whatsapp` ### Language & Country Codes - **Language**: 'en', 'es', 'fr', 'de', 'ja', 'ko', 'pt', 'ru', 'zh', etc. - **Country**: 'us', 'gb', 'ca', 'au', 'in', 'br', 'jp', 'kr', 'de', 'fr', etc. --- ## When to Use Each Method - **`app_analyze()`** - Need all data for comprehensive analysis - **`app_get_field()`** - Need just one specific value - **`app_get_fields()`** - Need several specific fields (more efficient than multiple get_field calls) - **`app_print_field()`** - Quick debugging/console output - **`app_print_fields()`** - Quick debugging of multiple values - **`app_print_all()`** - Explore available data structure --- ## Advanced Features ### Rate Limiting Built-in rate limiting (1 second delay between requests) prevents blocking. ### Error Handling ```python from gplay_scraper import GPlayScraper, AppNotFoundError, NetworkError scraper = GPlayScraper() try: data = scraper.app_analyze("invalid.app.id") except AppNotFoundError: print("App not found") except NetworkError: print("Network error occurred") ``` ### Multi-Region Data ```python # Get data from different regions us_data = scraper.app_analyze("com.whatsapp", country="us") uk_data = scraper.app_analyze("com.whatsapp", country="gb") jp_data = scraper.app_analyze("com.whatsapp", country="jp", lang="ja") ``` ================================================ FILE: README/DEVELOPER_METHODS.md ================================================ # Developer Methods Get all apps published by a specific developer on Google Play Store. ## Quick Start ```python from gplay_scraper import GPlayScraper scraper = GPlayScraper() # Get all apps from a developer apps = scraper.developer_analyze("5700313618786177705") for app in apps: print(f"{app['title']}: {app['score']}★") # Get specific fields titles = scraper.developer_get_field("5700313618786177705", "title") print(titles) # Get multiple fields apps = scraper.developer_get_fields("5700313618786177705", ["title", "score", "free"]) print(apps) ``` --- ## HTTP Clients The library supports 7 HTTP clients with automatic fallback. If one fails, it tries the next. ### Supported Clients 1. **requests** (default) - Standard Python HTTP library 2. **curl_cffi** - cURL with browser impersonation 3. **tls_client** - Advanced TLS fingerprinting 4. **httpx** - Modern async-capable HTTP client 5. **urllib3** - Low-level HTTP client 6. **cloudscraper** - Cloudflare bypass 7. **aiohttp** - Async HTTP client ### Usage ```python # Default (tries requests first, then others) scraper = GPlayScraper() # Specify a client scraper = GPlayScraper(http_client="curl_cffi") scraper = GPlayScraper(http_client="tls_client") scraper = GPlayScraper(http_client="httpx") ``` ### Installation ```bash # Default pip install requests # Advanced clients (optional) pip install curl-cffi pip install tls-client pip install httpx pip install urllib3 pip install cloudscraper pip install aiohttp ``` **Note:** The library automatically falls back to available clients if your preferred one fails. --- ## Methods ### `developer_analyze(dev_id, count=100, lang='en', country='us')` Returns all apps from a developer as a list of dictionaries. ```python apps = scraper.developer_analyze("5700313618786177705", count=50) # Returns: [{'appId': '...', 'title': '...', 'score': 4.5, ...}, ...] ``` ### `developer_get_field(dev_id, field, count=100, lang='en', country='us')` Returns a specific field from all developer apps. ```python titles = scraper.developer_get_field("5700313618786177705", "title") # Returns: ['App 1', 'App 2', 'App 3', ...] ``` ### `developer_get_fields(dev_id, fields, count=100, lang='en', country='us')` Returns multiple fields from all developer apps. ```python apps = scraper.developer_get_fields("5700313618786177705", ["title", "score", "free"]) # Returns: [{'title': 'App 1', 'score': 4.5, 'free': True}, ...] ``` ### `developer_print_field(dev_id, field, count=100, lang='en', country='us')` Prints a specific field from all developer apps. ```python scraper.developer_print_field("5700313618786177705", "title") # Output: # 1. title: App 1 # 2. title: App 2 # 3. title: App 3 ``` ### `developer_print_fields(dev_id, fields, count=100, lang='en', country='us')` Prints multiple fields from all developer apps. ```python scraper.developer_print_fields("5700313618786177705", ["title", "score"]) # Output: # 1. title: App 1, score: 4.5 # 2. title: App 2, score: 4.2 ``` ### `developer_print_all(dev_id, count=100, lang='en', country='us')` Prints all data for all developer apps as formatted JSON. ```python scraper.developer_print_all("5700313618786177705") # Output: Full JSON array with all apps ``` --- ## Available Fields - `appId` - App package name (e.g., "com.example.app") - `title` - App name - `description` - App description - `icon` - App icon URL - `url` - Play Store URL - `developer` - Developer name - `score` - Average rating (1-5) - `scoreText` - Rating as text (e.g., "4.5") - `currency` - Price currency (e.g., "USD") - `price` - App price (0 if free) - `free` - Boolean, true if free --- ## Practical Examples ### Analyze Developer Portfolio ```python dev_id = "5700313618786177705" apps = scraper.developer_analyze(dev_id) print(f"Total apps: {len(apps)}") print(f"Average rating: {sum(a['score'] for a in apps if a['score']) / len(apps):.2f}") print(f"Free apps: {sum(1 for a in apps if a['free'])}") print(f"Paid apps: {sum(1 for a in apps if not a['free'])}") ``` ### Find Top-Rated Apps ```python dev_id = "5700313618786177705" apps = scraper.developer_get_fields(dev_id, ["title", "score"]) # Sort by rating top_apps = sorted(apps, key=lambda x: x['score'] or 0, reverse=True)[:5] for i, app in enumerate(top_apps, 1): print(f"{i}. {app['title']}: {app['score']}★") ``` ### Compare Free vs Paid Apps ```python dev_id = "5700313618786177705" apps = scraper.developer_get_fields(dev_id, ["title", "free", "price", "score"]) free_apps = [a for a in apps if a['free']] paid_apps = [a for a in apps if not a['free']] print(f"Free apps: {len(free_apps)} (avg rating: {sum(a['score'] or 0 for a in free_apps)/len(free_apps):.2f})") print(f"Paid apps: {len(paid_apps)} (avg rating: {sum(a['score'] or 0 for a in paid_apps)/len(paid_apps):.2f})") ``` ### Export Developer Apps ```python import json dev_id = "5700313618786177705" apps = scraper.developer_analyze(dev_id) with open('developer_apps.json', 'w') as f: json.dump(apps, f, indent=2) print(f"Exported {len(apps)} apps to developer_apps.json") ``` --- ## Parameters ### Initialization - `http_client` (str, optional) - HTTP client to use: "requests", "curl_cffi", "tls_client", "httpx", "urllib3", "cloudscraper", "aiohttp" (default: "requests") ### Method Parameters - `dev_id` (str, required) - Developer ID (numeric or string) - `count` (int, optional) - Maximum number of apps to return (default: 100) - `lang` (str, optional) - Language code (default: 'en') - `country` (str, optional) - Country code (default: 'us') - `field` (str) - Single field name - `fields` (List[str]) - List of field names ### Finding Developer IDs **Method 1: From Developer Page URL** - Numeric ID: `https://play.google.com/store/apps/dev?id=5700313618786177705` - Developer ID: `5700313618786177705` - String ID: `https://play.google.com/store/apps/developer?id=Google+LLC` - Developer ID: `Google+LLC` or `Google LLC` **Method 2: From App Page** 1. Go to any app by the developer 2. Click on the developer name 3. Extract ID from the URL ### Language & Country Codes - **Language**: 'en', 'es', 'fr', 'de', 'ja', 'ko', 'pt', 'ru', 'zh', etc. - **Country**: 'us', 'gb', 'ca', 'au', 'in', 'br', 'jp', 'kr', 'de', 'fr', etc. --- ## When to Use Each Method - **`developer_analyze()`** - Need complete data for all apps - **`developer_get_field()`** - Need just one field from all apps - **`developer_get_fields()`** - Need specific fields from all apps (more efficient) - **`developer_print_field()`** - Quick debugging/console output - **`developer_print_fields()`** - Quick debugging of multiple fields - **`developer_print_all()`** - Explore available data structure --- ## Advanced Features ### Rate Limiting Built-in rate limiting (1 second delay between requests) prevents blocking. ### Error Handling ```python from gplay_scraper import GPlayScraper, AppNotFoundError, NetworkError scraper = GPlayScraper() try: apps = scraper.developer_analyze("invalid_dev_id") except AppNotFoundError: print("Developer not found") except NetworkError: print("Network error occurred") ``` ### Multi-Region Data ```python # Get developer apps from different regions us_apps = scraper.developer_analyze("5700313618786177705", country="us") uk_apps = scraper.developer_analyze("5700313618786177705", country="gb") jp_apps = scraper.developer_analyze("5700313618786177705", country="jp", lang="ja") ``` ### Pagination ```python # Get first 50 apps apps_batch1 = scraper.developer_analyze("5700313618786177705", count=50) # Get more apps (library handles this automatically up to count limit) apps_all = scraper.developer_analyze("5700313618786177705", count=200) ``` ================================================ FILE: README/LIST_METHODS.md ================================================ # List Methods Get top charts from Google Play Store (top free, top paid, top grossing). ## Quick Start ```python from gplay_scraper import GPlayScraper scraper = GPlayScraper() # Get top free apps top_free = scraper.list_analyze("TOP_FREE", "GAME", count=50) for app in top_free[:10]: print(f"{app['title']}: {app['installs']} installs") # Get specific fields titles = scraper.list_get_field("TOP_FREE", "title", "APPLICATION") print(titles) # Get multiple fields apps = scraper.list_get_fields("TOP_PAID", ["title", "price", "score"], "GAME") print(apps) ``` --- ## HTTP Clients The library supports 7 HTTP clients with automatic fallback. If one fails, it tries the next. ### Supported Clients 1. **requests** (default) - Standard Python HTTP library 2. **curl_cffi** - cURL with browser impersonation 3. **tls_client** - Advanced TLS fingerprinting 4. **httpx** - Modern async-capable HTTP client 5. **urllib3** - Low-level HTTP client 6. **cloudscraper** - Cloudflare bypass 7. **aiohttp** - Async HTTP client ### Usage ```python # Default (tries requests first, then others) scraper = GPlayScraper() # Specify a client scraper = GPlayScraper(http_client="curl_cffi") scraper = GPlayScraper(http_client="tls_client") scraper = GPlayScraper(http_client="httpx") ``` ### Installation ```bash # Default pip install requests # Advanced clients (optional) pip install curl-cffi pip install tls-client pip install httpx pip install urllib3 pip install cloudscraper pip install aiohttp ``` **Note:** The library automatically falls back to available clients if your preferred one fails. --- ## Methods ### `list_analyze(collection='TOP_FREE', category='APPLICATION', count=100, lang='en', country='us')` Returns top chart apps as a list of dictionaries. ```python apps = scraper.list_analyze("TOP_FREE", "GAME", count=50) # Returns: [{'appId': '...', 'title': '...', 'installs': '...', ...}, ...] ``` ### `list_get_field(collection, field, category='APPLICATION', count=100, lang='en', country='us')` Returns a specific field from all chart apps. ```python titles = scraper.list_get_field("TOP_FREE", "title", "APPLICATION") # Returns: ['App 1', 'App 2', 'App 3', ...] ``` ### `list_get_fields(collection, fields, category='APPLICATION', count=100, lang='en', country='us')` Returns multiple fields from all chart apps. ```python apps = scraper.list_get_fields("TOP_PAID", ["title", "price", "score"], "GAME") # Returns: [{'title': 'App 1', 'price': 4.99, 'score': 4.5}, ...] ``` ### `list_print_field(collection, field, category='APPLICATION', count=100, lang='en', country='us')` Prints a specific field from all chart apps. ```python scraper.list_print_field("TOP_FREE", "title", "APPLICATION", count=20) # Output: # 1. title: App 1 # 2. title: App 2 # 3. title: App 3 ``` ### `list_print_fields(collection, fields, category='APPLICATION', count=100, lang='en', country='us')` Prints multiple fields from all chart apps. ```python scraper.list_print_fields("TOP_FREE", ["title", "score"], "GAME", count=20) # Output: # 1. title: App 1, score: 4.5 # 2. title: App 2, score: 4.2 ``` ### `list_print_all(collection='TOP_FREE', category='APPLICATION', count=100, lang='en', country='us')` Prints all data for all chart apps as formatted JSON. ```python scraper.list_print_all("TOP_FREE", "GAME", count=50) # Output: Full JSON array with all apps ``` --- ## Available Fields - `appId` - App package name (e.g., "com.example.app") - `title` - App name - `description` - App description - `icon` - App icon URL - `screenshots` - List of screenshot URLs - `url` - Play Store URL - `developer` - Developer name - `genre` - App category - `score` - Average rating (1-5) - `scoreText` - Rating as text (e.g., "4.5") - `installs` - Install count (e.g., "10,000,000+") - `currency` - Price currency (e.g., "USD") - `price` - App price (0 if free) - `free` - Boolean, true if free --- ## Collection Types ### Available Collections - **`TOP_FREE`** - Top free apps (most popular free apps) - **`TOP_PAID`** - Top paid apps (most popular paid apps) - **`TOP_GROSSING`** - Top grossing apps (highest revenue apps) --- ## Categories ### App Categories (36) - `APPLICATION` - All apps (default) - `ANDROID_WEAR` - Android Wear apps - `ART_AND_DESIGN` - Art & design - `AUTO_AND_VEHICLES` - Auto & vehicles - `BEAUTY` - Beauty - `BOOKS_AND_REFERENCE` - Books & reference - `BUSINESS` - Business - `COMICS` - Comics - `COMMUNICATION` - Communication - `DATING` - Dating - `EDUCATION` - Education - `ENTERTAINMENT` - Entertainment - `EVENTS` - Events - `FINANCE` - Finance - `FOOD_AND_DRINK` - Food & drink - `HEALTH_AND_FITNESS` - Health & fitness - `HOUSE_AND_HOME` - House & home - `LIBRARIES_AND_DEMO` - Libraries & demo - `LIFESTYLE` - Lifestyle - `MAPS_AND_NAVIGATION` - Maps & navigation - `MEDICAL` - Medical - `MUSIC_AND_AUDIO` - Music & audio - `NEWS_AND_MAGAZINES` - News & magazines - `PARENTING` - Parenting - `PERSONALIZATION` - Personalization - `PHOTOGRAPHY` - Photography - `PRODUCTIVITY` - Productivity - `SHOPPING` - Shopping - `SOCIAL` - Social - `SPORTS` - Sports - `TOOLS` - Tools - `TRAVEL_AND_LOCAL` - Travel & local - `VIDEO_PLAYERS` - Video players & editors - `WATCH_FACE` - Watch faces - `WEATHER` - Weather - `FAMILY` - Family ### Game Categories (18) - `GAME` - All games - `GAME_ACTION` - Action games - `GAME_ADVENTURE` - Adventure games - `GAME_ARCADE` - Arcade games - `GAME_BOARD` - Board games - `GAME_CARD` - Card games - `GAME_CASINO` - Casino games - `GAME_CASUAL` - Casual games - `GAME_EDUCATIONAL` - Educational games - `GAME_MUSIC` - Music games - `GAME_PUZZLE` - Puzzle games - `GAME_RACING` - Racing games - `GAME_ROLE_PLAYING` - Role playing games - `GAME_SIMULATION` - Simulation games - `GAME_SPORTS` - Sports games - `GAME_STRATEGY` - Strategy games - `GAME_TRIVIA` - Trivia games - `GAME_WORD` - Word games --- ## Practical Examples ### Top Free Games Analysis ```python top_games = scraper.list_analyze("TOP_FREE", "GAME", count=100) print(f"Total games: {len(top_games)}") print(f"Average rating: {sum(a['score'] for a in top_games if a['score']) / len(top_games):.2f}") print(f"\nTop 5 games:") for i, game in enumerate(top_games[:5], 1): print(f"{i}. {game['title']} - {game['score']}★ - {game['installs']} installs") ``` ### Compare Free vs Paid Apps ```python top_free = scraper.list_get_fields("TOP_FREE", ["title", "score", "installs"], "APPLICATION", count=50) top_paid = scraper.list_get_fields("TOP_PAID", ["title", "score", "price"], "APPLICATION", count=50) free_avg = sum(a['score'] or 0 for a in top_free) / len(top_free) paid_avg = sum(a['score'] or 0 for a in top_paid) / len(top_paid) print(f"Top Free Apps - Avg Rating: {free_avg:.2f}") print(f"Top Paid Apps - Avg Rating: {paid_avg:.2f}") ``` ### Find Highest Grossing Apps ```python top_grossing = scraper.list_get_fields("TOP_GROSSING", ["title", "developer", "genre"], "APPLICATION", count=20) print("Top 10 Highest Grossing Apps:") for i, app in enumerate(top_grossing[:10], 1): print(f"{i}. {app['title']} by {app['developer']} ({app['genre']})") ``` ### Category Comparison ```python categories = ["GAME", "SOCIAL", "PRODUCTIVITY", "ENTERTAINMENT"] for category in categories: apps = scraper.list_get_fields("TOP_FREE", ["title", "score"], category, count=10) avg_score = sum(a['score'] or 0 for a in apps) / len(apps) print(f"{category}: {avg_score:.2f}★ average") ``` ### Game Genre Analysis ```python game_genres = ["GAME_ACTION", "GAME_PUZZLE", "GAME_CASUAL", "GAME_STRATEGY"] for genre in game_genres: games = scraper.list_get_fields("TOP_FREE", ["title", "score", "installs"], genre, count=5) print(f"\n{genre}:") for i, game in enumerate(games, 1): print(f" {i}. {game['title']} - {game['score']}★") ``` ### Export Top Charts ```python import json top_free = scraper.list_analyze("TOP_FREE", "GAME", count=100) with open('top_free_games.json', 'w') as f: json.dump(top_free, f, indent=2) print(f"Exported {len(top_free)} games to top_free_games.json") ``` ### Track Chart Positions ```python import time import json from datetime import datetime def track_charts(): snapshot = { "timestamp": datetime.now().isoformat(), "top_free": scraper.list_get_fields("TOP_FREE", ["title", "score"], "GAME", count=10), "top_paid": scraper.list_get_fields("TOP_PAID", ["title", "price"], "GAME", count=10) } with open(f'charts_{datetime.now().strftime("%Y%m%d")}.json', 'w') as f: json.dump(snapshot, f, indent=2) print(f"Snapshot saved at {snapshot['timestamp']}") track_charts() ``` --- ## Parameters ### Initialization - `http_client` (str, optional) - HTTP client to use: "requests", "curl_cffi", "tls_client", "httpx", "urllib3", "cloudscraper", "aiohttp" (default: "requests") ### Method Parameters - `collection` (str) - Chart type: "TOP_FREE", "TOP_PAID", "TOP_GROSSING" (default: "TOP_FREE") - `category` (str, optional) - Category filter (default: "APPLICATION") - `count` (int, optional) - Maximum number of apps to return (default: 100) - `lang` (str, optional) - Language code (default: 'en') - `country` (str, optional) - Country code (default: 'us') - `field` (str) - Single field name - `fields` (List[str]) - List of field names ### Language & Country Codes - **Language**: 'en', 'es', 'fr', 'de', 'ja', 'ko', 'pt', 'ru', 'zh', etc. - **Country**: 'us', 'gb', 'ca', 'au', 'in', 'br', 'jp', 'kr', 'de', 'fr', etc. --- ## When to Use Each Method - **`list_analyze()`** - Need complete data for all chart apps - **`list_get_field()`** - Need just one field from all apps - **`list_get_fields()`** - Need specific fields from all apps (more efficient) - **`list_print_field()`** - Quick debugging/console output - **`list_print_fields()`** - Quick debugging of multiple fields - **`list_print_all()`** - Explore available data structure --- ## Advanced Features ### Rate Limiting Built-in rate limiting (1 second delay between requests) prevents blocking. ### Error Handling ```python from gplay_scraper import GPlayScraper, AppNotFoundError, NetworkError scraper = GPlayScraper() try: apps = scraper.list_analyze("INVALID_COLLECTION", "GAME") except AppNotFoundError: print("Collection not found") except NetworkError: print("Network error occurred") ``` ### Multi-Region Charts ```python # Get charts from different regions us_charts = scraper.list_analyze("TOP_FREE", "GAME", country="us") uk_charts = scraper.list_analyze("TOP_FREE", "GAME", country="gb") jp_charts = scraper.list_analyze("TOP_FREE", "GAME", country="jp", lang="ja") print(f"US Top Game: {us_charts[0]['title']}") print(f"UK Top Game: {uk_charts[0]['title']}") print(f"JP Top Game: {jp_charts[0]['title']}") ``` ### Batch Analysis ```python # Analyze multiple collections at once collections = ["TOP_FREE", "TOP_PAID", "TOP_GROSSING"] results = {} for collection in collections: apps = scraper.list_get_fields(collection, ["title", "score"], "GAME", count=10) results[collection] = apps print(f"{collection}: {len(apps)} apps retrieved") ``` ================================================ FILE: README/README.md ================================================ # GPlay Scraper Documentation Complete documentation for all 7 method types in GPlay Scraper. ## 📚 Method Documentation ### [App Methods](APP_METHODS.md) Extract comprehensive app data with 65+ fields including ratings, installs, pricing, screenshots, permissions, and technical details. **Key Features:** - 65+ data fields per app - Basic info, ratings, installs, pricing - Media content (screenshots, videos, icons) - Technical specs (version, size, Android version) - Developer information and contact details **Use Cases:** App analysis, competitive research, market intelligence, data collection --- ### [Search Methods](SEARCH_METHODS.md) Search Google Play Store apps by keyword with filtering and pagination. **Key Features:** - Search by keyword, app name, or category - Filter and paginate results - Get app titles, developers, ratings, prices - Multi-language and multi-region support **Use Cases:** App discovery, market research, competitor analysis, trend tracking --- ### [Reviews Methods](REVIEWS_METHODS.md) Extract user reviews with ratings, timestamps, and detailed feedback for sentiment analysis. **Key Features:** - Get reviews with ratings (1-5 stars) - Review text, timestamps, app versions - Reviewer names and helpful vote counts - Sort by newest, relevant, or highest rated **Use Cases:** Sentiment analysis, user feedback, app improvement, competitive monitoring --- ### [Developer Methods](DEVELOPER_METHODS.md) Get all apps published by a specific developer using their developer ID. **Key Features:** - Complete app portfolio for any developer - Track developer's app performance - Analyze ratings and install counts - Monitor developer's market presence **Use Cases:** Developer research, portfolio analysis, competitive intelligence, market tracking --- ### [List Methods](LIST_METHODS.md) Access Google Play Store top charts including top free, top paid, and top grossing apps by category. **Key Features:** - Top free, top paid, top grossing charts - 54 categories (36 app + 18 game) - Ranked lists with install counts and ratings - Trending apps and market leaders **Use Cases:** Market trends, category analysis, competitive benchmarking, app discovery --- ### [Similar Methods](SIMILAR_METHODS.md) Find apps similar to a reference app for competitive analysis and market research. **Key Features:** - Discover competitor apps - Find similar/related apps - Get titles, developers, ratings, pricing - Competitive analysis and positioning **Use Cases:** Competitive analysis, market research, app discovery, positioning strategy --- ### [Suggest Methods](SUGGEST_METHODS.md) Get search suggestions and autocomplete from Google Play Store for keyword discovery and ASO. **Key Features:** - Autocomplete suggestions - Popular search terms - Nested keyword discovery - Multi-language support **Use Cases:** Keyword research, ASO optimization, content strategy, market insights --- ## 🚀 Quick Start ```python from gplay_scraper import GPlayScraper scraper = GPlayScraper() # App Methods scraper.app_print_all("com.whatsapp") # Search Methods scraper.search_print_all("fitness tracker", count=20) # Reviews Methods scraper.reviews_print_all("com.whatsapp", count=100, sort="NEWEST") # Developer Methods scraper.developer_print_all("5700313618786177705", count=50) # List Methods scraper.list_print_all("TOP_FREE", "GAME", count=50) # Similar Methods scraper.similar_print_all("com.whatsapp", count=30) # Suggest Methods scraper.suggest_print_all("photo editor", count=10) ``` ## 📖 Method Pattern Each method type follows the same pattern with 6 functions: - **`analyze()`** - Get all data as dictionary/list - **`get_field()`** - Get single field value - **`get_fields()`** - Get multiple fields as dictionary - **`print_field()`** - Print single field to console - **`print_fields()`** - Print multiple fields to console - **`print_all()`** - Print all data as formatted JSON ## 🌍 Multi-Language & Multi-Region All methods support multi-language and multi-region parameters: ```python # Get data in Spanish from Spain scraper.app_analyze("com.whatsapp", lang="es", country="es") # Get data in Japanese from Japan scraper.search_analyze("game", count=20, lang="ja", country="jp") # Get data in French from France scraper.reviews_analyze("com.whatsapp", count=50, lang="fr", country="fr") ``` **Supported:** - **Languages:** 100+ (en, es, fr, de, ja, ko, zh, ar, pt, ru, etc.) - **Countries:** 150+ (us, gb, ca, au, in, br, jp, kr, de, fr, etc.) ## 🔧 HTTP Clients All methods support 7 HTTP clients with automatic fallback: ```python # Default (requests) scraper = GPlayScraper() # Specify client scraper = GPlayScraper(http_client="curl_cffi") scraper = GPlayScraper(http_client="tls_client") scraper = GPlayScraper(http_client="httpx") ``` **Available Clients:** 1. **requests** (default) - Standard Python HTTP library 2. **curl_cffi** - Browser impersonation with TLS fingerprinting 3. **tls_client** - Custom TLS fingerprinting 4. **httpx** - Modern async-capable HTTP client 5. **urllib3** - Low-level HTTP client 6. **cloudscraper** - Cloudflare bypass capabilities 7. **aiohttp** - Async HTTP client ## 📊 What Can You Scrape? ### App Data (65+ Fields) - Basic: title, developer, description, category, genre - Ratings: score, ratings count, histogram - Installs: install count ranges, statistics - Pricing: free/paid, price, in-app purchases - Media: icon, screenshots, video, header image - Technical: version, size, Android version, dates - Content: age rating, privacy policy, contact info - Features: permissions, what's new, website ### Search & Discovery - Search apps by keyword - Get search suggestions - Find similar/competitor apps - Access top charts by category ### Developer Intelligence - Complete app portfolio - Performance tracking - Market presence analysis ### User Reviews - Reviews with ratings and text - Timestamps and app versions - Reviewer names and votes - Filter by sort options ### Market Research - Multi-language support (100+ languages) - Multi-region data (150+ countries) - Localized pricing and availability - Competitive analysis ## 🎯 Use Cases **Market Research** - Analyze competitor apps - Track market trends - Identify opportunities - Benchmark performance **App Development** - Monitor user feedback - Track app performance - Analyze competitors - Optimize app store presence **Data Analysis** - Collect app data for research - Sentiment analysis from reviews - Market intelligence reports - Machine learning datasets **Business Intelligence** - Competitive monitoring - Market positioning - Trend analysis - Strategic planning ## 📄 License This project is licensed under the MIT License. --- **For detailed documentation on each method type, click the links above.** ================================================ FILE: README/REVIEWS_METHODS.md ================================================ # Reviews Methods Extract user reviews from Google Play Store apps with ratings, content, and metadata. ## Quick Start ```python from gplay_scraper import GPlayScraper scraper = GPlayScraper() # Get reviews reviews = scraper.reviews_analyze("com.whatsapp", count=100, sort="NEWEST") for review in reviews[:5]: print(f"{review['userName']}: {review['score']}★") print(f" {review['content'][:100]}...") # Get specific fields scores = scraper.reviews_get_field("com.whatsapp", "score", count=100) print(f"Average: {sum(scores)/len(scores):.2f}★") # Get multiple fields reviews = scraper.reviews_get_fields("com.whatsapp", ["userName", "score", "content"], count=50) print(reviews) ``` --- ## HTTP Clients The library supports 7 HTTP clients with automatic fallback. If one fails, it tries the next. ### Supported Clients 1. **requests** (default) - Standard Python HTTP library 2. **curl_cffi** - cURL with browser impersonation 3. **tls_client** - Advanced TLS fingerprinting 4. **httpx** - Modern async-capable HTTP client 5. **urllib3** - Low-level HTTP client 6. **cloudscraper** - Cloudflare bypass 7. **aiohttp** - Async HTTP client ### Usage ```python # Default (tries requests first, then others) scraper = GPlayScraper() # Specify a client scraper = GPlayScraper(http_client="curl_cffi") scraper = GPlayScraper(http_client="tls_client") scraper = GPlayScraper(http_client="httpx") ``` ### Installation ```bash # Default pip install requests # Advanced clients (optional) pip install curl-cffi pip install tls-client pip install httpx pip install urllib3 pip install cloudscraper pip install aiohttp ``` **Note:** The library automatically falls back to available clients if your preferred one fails. --- ## Methods ### `reviews_analyze(app_id, count=100, lang='en', country='us', sort='NEWEST')` Returns reviews as a list of dictionaries. ```python reviews = scraper.reviews_analyze("com.whatsapp", count=100, sort="NEWEST") # Returns: [{'reviewId': '...', 'userName': '...', 'score': 5, 'content': '...', ...}, ...] ``` ### `reviews_get_field(app_id, field, count=100, lang='en', country='us', sort='NEWEST')` Returns a specific field from all reviews. ```python scores = scraper.reviews_get_field("com.whatsapp", "score", count=100) # Returns: [5, 4, 5, 3, 4, ...] ``` ### `reviews_get_fields(app_id, fields, count=100, lang='en', country='us', sort='NEWEST')` Returns multiple fields from all reviews. ```python reviews = scraper.reviews_get_fields("com.whatsapp", ["userName", "score", "content"], count=50) # Returns: [{'userName': 'John', 'score': 5, 'content': 'Great app!'}, ...] ``` ### `reviews_print_field(app_id, field, count=100, lang='en', country='us', sort='NEWEST')` Prints a specific field from all reviews. ```python scraper.reviews_print_field("com.whatsapp", "content", count=20) # Output: # 1. content: Great app! # 2. content: Love it # 3. content: Needs improvement ``` ### `reviews_print_fields(app_id, fields, count=100, lang='en', country='us', sort='NEWEST')` Prints multiple fields from all reviews. ```python scraper.reviews_print_fields("com.whatsapp", ["userName", "score"], count=20) # Output: # userName: John, score: 5 # userName: Jane, score: 4 ``` ### `reviews_print_all(app_id, count=100, lang='en', country='us', sort='NEWEST')` Prints all review data as formatted JSON. ```python scraper.reviews_print_all("com.whatsapp", count=50) # Output: Full JSON array with all reviews ``` --- ## Available Fields - `reviewId` - Unique review ID - `userName` - Reviewer name - `userImage` - Reviewer avatar URL - `score` - Review rating (1-5 stars) - `content` - Review text/comment - `thumbsUpCount` - Number of helpful votes - `appVersion` - App version reviewed - `at` - Review timestamp (ISO 8601 format) --- ## Sort Options - **`NEWEST`** (default) - Most recent reviews first - **`RELEVANT`** - Most relevant/helpful reviews - **`RATING`** - Sorted by rating (highest/lowest) --- ## Practical Examples ### Sentiment Analysis ```python reviews = scraper.reviews_get_fields("com.whatsapp", ["score", "content"], count=200) # Rating distribution rating_dist = {1: 0, 2: 0, 3: 0, 4: 0, 5: 0} for review in reviews: rating_dist[review['score']] += 1 print("Rating Distribution:") for rating, count in rating_dist.items(): print(f"{rating}★: {'█' * count} ({count})") # Average rating avg = sum(r['score'] for r in reviews) / len(reviews) print(f"\nAverage: {avg:.2f}★") ``` ### Find Common Issues ```python reviews = scraper.reviews_get_fields("com.whatsapp", ["score", "content"], count=100, sort="RATING") # Get low-rated reviews low_rated = [r for r in reviews if r['score'] <= 2] print(f"Found {len(low_rated)} low-rated reviews:") for review in low_rated[:10]: print(f"- {review['content'][:100]}...") ``` ### Track Review Trends ```python from datetime import datetime reviews = scraper.reviews_get_fields("com.whatsapp", ["at", "score"], count=500, sort="NEWEST") # Group by month monthly_scores = {} for review in reviews: date = datetime.fromisoformat(review['at']) month_key = date.strftime("%Y-%m") if month_key not in monthly_scores: monthly_scores[month_key] = [] monthly_scores[month_key].append(review['score']) # Calculate monthly averages for month, scores in sorted(monthly_scores.items()): avg = sum(scores) / len(scores) print(f"{month}: {avg:.2f}★ ({len(scores)} reviews)") ``` ### Compare App Versions ```python reviews = scraper.reviews_get_fields("com.whatsapp", ["appVersion", "score"], count=300) # Group by version version_scores = {} for review in reviews: version = review['appVersion'] or "Unknown" if version not in version_scores: version_scores[version] = [] version_scores[version].append(review['score']) # Show version ratings for version, scores in sorted(version_scores.items()): if len(scores) >= 5: # Only versions with 5+ reviews avg = sum(scores) / len(scores) print(f"v{version}: {avg:.2f}★ ({len(scores)} reviews)") ``` ### Export Reviews to CSV ```python import csv reviews = scraper.reviews_analyze("com.whatsapp", count=500) with open('reviews.csv', 'w', newline='', encoding='utf-8') as f: writer = csv.DictWriter(f, fieldnames=['userName', 'score', 'content', 'at', 'appVersion']) writer.writeheader() for review in reviews: writer.writerow({ 'userName': review['userName'], 'score': review['score'], 'content': review['content'], 'at': review['at'], 'appVersion': review['appVersion'] }) print(f"Exported {len(reviews)} reviews to reviews.csv") ``` ### Identify Top Reviewers ```python reviews = scraper.reviews_get_fields("com.whatsapp", ["userName", "thumbsUpCount"], count=200) # Sort by helpful votes top_reviewers = sorted(reviews, key=lambda x: x['thumbsUpCount'] or 0, reverse=True)[:10] print("Top 10 Most Helpful Reviewers:") for i, review in enumerate(top_reviewers, 1): print(f"{i}. {review['userName']}: {review['thumbsUpCount']} helpful votes") ``` ### Monitor Recent Feedback ```python import time from datetime import datetime def monitor_reviews(app_id, interval=3600): """Check for new reviews every hour""" last_check = datetime.now() while True: reviews = scraper.reviews_get_fields(app_id, ["at", "score", "content"], count=50, sort="NEWEST") new_reviews = [r for r in reviews if datetime.fromisoformat(r['at']) > last_check] if new_reviews: print(f"\n{len(new_reviews)} new reviews:") for review in new_reviews: print(f"- {review['score']}★: {review['content'][:80]}...") last_check = datetime.now() time.sleep(interval) # Run monitor (Ctrl+C to stop) # monitor_reviews("com.whatsapp") ``` ### Keyword Analysis ```python from collections import Counter import re reviews = scraper.reviews_get_field("com.whatsapp", "content", count=500) # Extract words words = [] for content in reviews: if content: words.extend(re.findall(r'\b\w+\b', content.lower())) # Remove common words stop_words = {'the', 'a', 'an', 'and', 'or', 'but', 'is', 'are', 'was', 'were', 'in', 'on', 'at', 'to', 'for'} filtered_words = [w for w in words if w not in stop_words and len(w) > 3] # Top keywords top_keywords = Counter(filtered_words).most_common(20) print("Top Keywords in Reviews:") for word, count in top_keywords: print(f"{word}: {count}") ``` --- ## Parameters ### Initialization - `http_client` (str, optional) - HTTP client to use: "requests", "curl_cffi", "tls_client", "httpx", "urllib3", "cloudscraper", "aiohttp" (default: "requests") ### Method Parameters - `app_id` (str, required) - App package name - `count` (int, optional) - Maximum number of reviews to return (default: 100) - `lang` (str, optional) - Language code (default: 'en') - `country` (str, optional) - Country code (default: 'us') - `sort` (str, optional) - Sort order: "NEWEST", "RELEVANT", "RATING" (default: "NEWEST") - `field` (str) - Single field name - `fields` (List[str]) - List of field names ### Language & Country Codes - **Language**: 'en', 'es', 'fr', 'de', 'ja', 'ko', 'pt', 'ru', 'zh', etc. - **Country**: 'us', 'gb', 'ca', 'au', 'in', 'br', 'jp', 'kr', 'de', 'fr', etc. --- ## When to Use Each Method - **`reviews_analyze()`** - Need complete review data for analysis - **`reviews_get_field()`** - Need just one field (e.g., all scores) - **`reviews_get_fields()`** - Need specific fields (more efficient) - **`reviews_print_field()`** - Quick debugging/console output - **`reviews_print_fields()`** - Quick debugging of multiple fields - **`reviews_print_all()`** - Explore available data structure --- ## Advanced Features ### Rate Limiting Built-in rate limiting (1 second delay between requests) prevents blocking. ### Batch Fetching Reviews are fetched in batches of 50. The library automatically handles pagination. ```python # Fetch 500 reviews (10 batches of 50) reviews = scraper.reviews_analyze("com.whatsapp", count=500) print(f"Fetched {len(reviews)} reviews") ``` ### Error Handling ```python from gplay_scraper import GPlayScraper, AppNotFoundError, NetworkError scraper = GPlayScraper() try: reviews = scraper.reviews_analyze("invalid.app.id") except AppNotFoundError: print("App not found") except NetworkError: print("Network error occurred") ``` ### Multi-Region Reviews ```python # Get reviews from different regions us_reviews = scraper.reviews_analyze("com.whatsapp", country="us", count=100) uk_reviews = scraper.reviews_analyze("com.whatsapp", country="gb", count=100) jp_reviews = scraper.reviews_analyze("com.whatsapp", country="jp", lang="ja", count=100) print(f"US avg: {sum(r['score'] for r in us_reviews)/len(us_reviews):.2f}★") print(f"UK avg: {sum(r['score'] for r in uk_reviews)/len(uk_reviews):.2f}★") print(f"JP avg: {sum(r['score'] for r in jp_reviews)/len(jp_reviews):.2f}★") ``` ### Sort Comparison ```python # Compare different sort orders newest = scraper.reviews_get_fields("com.whatsapp", ["score"], count=100, sort="NEWEST") relevant = scraper.reviews_get_fields("com.whatsapp", ["score"], count=100, sort="RELEVANT") rating = scraper.reviews_get_fields("com.whatsapp", ["score"], count=100, sort="RATING") print(f"Newest avg: {sum(r['score'] for r in newest)/len(newest):.2f}★") print(f"Relevant avg: {sum(r['score'] for r in relevant)/len(relevant):.2f}★") print(f"Rating avg: {sum(r['score'] for r in rating)/len(rating):.2f}★") ``` ================================================ FILE: README/SEARCH_METHODS.md ================================================ # Search Methods Search for apps on Google Play Store by keyword, app name, or category. ## Quick Start ```python from gplay_scraper import GPlayScraper scraper = GPlayScraper() # Search for apps results = scraper.search_analyze("social media", count=20) for app in results: print(f"{app['title']}: {app['score']}★ by {app['developer']}") # Get specific fields titles = scraper.search_get_field("fitness tracker", "title") print(titles) # Get multiple fields apps = scraper.search_get_fields("photo editor", ["title", "score", "free"]) print(apps) ``` --- ## HTTP Clients The library supports 7 HTTP clients with automatic fallback. If one fails, it tries the next. ### Supported Clients 1. **requests** (default) - Standard Python HTTP library 2. **curl_cffi** - cURL with browser impersonation 3. **tls_client** - Advanced TLS fingerprinting 4. **httpx** - Modern async-capable HTTP client 5. **urllib3** - Low-level HTTP client 6. **cloudscraper** - Cloudflare bypass 7. **aiohttp** - Async HTTP client ### Usage ```python # Default (tries requests first, then others) scraper = GPlayScraper() # Specify a client scraper = GPlayScraper(http_client="curl_cffi") scraper = GPlayScraper(http_client="tls_client") scraper = GPlayScraper(http_client="httpx") ``` ### Installation ```bash # Default pip install requests # Advanced clients (optional) pip install curl-cffi pip install tls-client pip install httpx pip install urllib3 pip install cloudscraper pip install aiohttp ``` **Note:** The library automatically falls back to available clients if your preferred one fails. --- ## Methods ### `search_analyze(query, count=100, lang='en', country='us')` Returns search results as a list of dictionaries. ```python results = scraper.search_analyze("social media", count=20) # Returns: [{'appId': '...', 'title': '...', 'score': 4.5, ...}, ...] ``` ### `search_get_field(query, field, count=100, lang='en', country='us')` Returns a specific field from all search results. ```python titles = scraper.search_get_field("fitness tracker", "title") # Returns: ['App 1', 'App 2', 'App 3', ...] ``` ### `search_get_fields(query, fields, count=100, lang='en', country='us')` Returns multiple fields from all search results. ```python apps = scraper.search_get_fields("photo editor", ["title", "score", "free"]) # Returns: [{'title': 'App 1', 'score': 4.5, 'free': True}, ...] ``` ### `search_print_field(query, field, count=100, lang='en', country='us')` Prints a specific field from all search results. ```python scraper.search_print_field("social media", "title", count=10) # Output: # 0. title: App 1 # 1. title: App 2 # 2. title: App 3 ``` ### `search_print_fields(query, fields, count=100, lang='en', country='us')` Prints multiple fields from all search results. ```python scraper.search_print_fields("social media", ["title", "score"], count=10) # Output: # 0. title: App 1, score: 4.5 # 1. title: App 2, score: 4.2 ``` ### `search_print_all(query, count=100, lang='en', country='us')` Prints all data for all search results as formatted JSON. ```python scraper.search_print_all("social media", count=20) # Output: Full JSON array with all search results ``` --- ## Available Fields - `appId` - App package name (e.g., "com.example.app") - `title` - App name - `description` - App description/summary - `icon` - App icon URL - `url` - Play Store URL - `developer` - Developer name - `score` - Average rating (1-5) - `scoreText` - Rating as text (e.g., "4.5") - `currency` - Price currency (e.g., "USD") - `price` - App price (0 if free) - `free` - Boolean, true if free --- ## Practical Examples ### Find Top-Rated Apps ```python results = scraper.search_get_fields("productivity", ["title", "score", "developer"], count=50) # Filter high-rated apps top_rated = [app for app in results if app['score'] and app['score'] >= 4.5] top_rated.sort(key=lambda x: x['score'], reverse=True) print("Top-Rated Productivity Apps:") for i, app in enumerate(top_rated[:10], 1): print(f"{i}. {app['title']}: {app['score']}★ by {app['developer']}") ``` ### Compare Free vs Paid Apps ```python results = scraper.search_get_fields("photo editor", ["title", "free", "price", "score"], count=50) free_apps = [app for app in results if app['free']] paid_apps = [app for app in results if not app['free']] free_avg = sum(app['score'] or 0 for app in free_apps) / len(free_apps) if free_apps else 0 paid_avg = sum(app['score'] or 0 for app in paid_apps) / len(paid_apps) if paid_apps else 0 print(f"Free apps: {len(free_apps)} (avg: {free_avg:.2f}★)") print(f"Paid apps: {len(paid_apps)} (avg: {paid_avg:.2f}★)") ``` ### Market Research ```python keywords = ["fitness", "meditation", "diet", "sleep tracker"] for keyword in keywords: results = scraper.search_get_fields(keyword, ["title", "score"], count=10) avg_score = sum(app['score'] or 0 for app in results) / len(results) print(f"{keyword}: {len(results)} apps, avg {avg_score:.2f}★") ``` ### Find Competitors ```python query = "task manager" results = scraper.search_get_fields(query, ["title", "developer", "score", "free"], count=30) print(f"Competitors for '{query}':") for i, app in enumerate(results[:15], 1): price = "Free" if app['free'] else f"${app.get('price', 'N/A')}" print(f"{i}. {app['title']} by {app['developer']} - {app['score']}★ ({price})") ``` ### Export Search Results ```python import json query = "language learning" results = scraper.search_analyze(query, count=100) with open(f'search_{query.replace(" ", "_")}.json', 'w') as f: json.dump(results, f, indent=2) print(f"Exported {len(results)} results for '{query}'") ``` ### Multi-Keyword Search ```python keywords = ["vpn", "proxy", "security"] all_results = {} for keyword in keywords: results = scraper.search_get_fields(keyword, ["appId", "title", "score"], count=20) all_results[keyword] = results print(f"{keyword}: {len(results)} apps found") # Find apps appearing in multiple searches app_ids = {} for keyword, results in all_results.items(): for app in results: app_id = app['appId'] if app_id not in app_ids: app_ids[app_id] = {'title': app['title'], 'keywords': []} app_ids[app_id]['keywords'].append(keyword) # Apps in multiple categories multi_category = {aid: data for aid, data in app_ids.items() if len(data['keywords']) > 1} print(f"\nApps in multiple categories: {len(multi_category)}") for app_id, data in list(multi_category.items())[:5]: print(f"- {data['title']}: {', '.join(data['keywords'])}") ``` ### Analyze Developer Presence ```python from collections import Counter query = "puzzle game" results = scraper.search_get_field(query, "developer", count=100) # Count apps per developer developer_counts = Counter(results) top_developers = developer_counts.most_common(10) print(f"Top Developers in '{query}':") for developer, count in top_developers: print(f"{developer}: {count} apps") ``` ### Price Range Analysis ```python query = "premium photo editor" results = scraper.search_get_fields(query, ["title", "price", "free"], count=50) paid_apps = [app for app in results if not app['free'] and app['price']] if paid_apps: prices = [app['price'] for app in paid_apps] print(f"Price Analysis for '{query}':") print(f" Min: ${min(prices):.2f}") print(f" Max: ${max(prices):.2f}") print(f" Avg: ${sum(prices)/len(prices):.2f}") print(f" Total paid apps: {len(paid_apps)}") ``` --- ## Parameters ### Initialization - `http_client` (str, optional) - HTTP client to use: "requests", "curl_cffi", "tls_client", "httpx", "urllib3", "cloudscraper", "aiohttp" (default: "requests") ### Method Parameters - `query` (str, required) - Search keyword or phrase - `count` (int, optional) - Maximum number of results to return (default: 100) - `lang` (str, optional) - Language code (default: 'en') - `country` (str, optional) - Country code (default: 'us') - `field` (str) - Single field name - `fields` (List[str]) - List of field names ### Search Query Tips - Use specific keywords: "fitness tracker" vs "fitness" - Try app categories: "puzzle game", "photo editor" - Search by functionality: "vpn", "password manager" - Use brand names: "google", "microsoft" - Combine terms: "free music player" ### Language & Country Codes - **Language**: 'en', 'es', 'fr', 'de', 'ja', 'ko', 'pt', 'ru', 'zh', etc. - **Country**: 'us', 'gb', 'ca', 'au', 'in', 'br', 'jp', 'kr', 'de', 'fr', etc. --- ## When to Use Each Method - **`search_analyze()`** - Need complete data for all search results - **`search_get_field()`** - Need just one field from all results - **`search_get_fields()`** - Need specific fields from all results (more efficient) - **`search_print_field()`** - Quick debugging/console output - **`search_print_fields()`** - Quick debugging of multiple fields - **`search_print_all()`** - Explore available data structure --- ## Advanced Features ### Rate Limiting Built-in rate limiting (1 second delay between requests) prevents blocking. ### Error Handling ```python from gplay_scraper import GPlayScraper, AppNotFoundError, NetworkError scraper = GPlayScraper() try: results = scraper.search_analyze("") except ValueError: print("Query cannot be empty") except NetworkError: print("Network error occurred") ``` ### Multi-Region Search ```python # Search in different regions us_results = scraper.search_analyze("vpn", country="us", count=20) uk_results = scraper.search_analyze("vpn", country="gb", count=20) jp_results = scraper.search_analyze("vpn", country="jp", lang="ja", count=20) print(f"US: {len(us_results)} results") print(f"UK: {len(uk_results)} results") print(f"JP: {len(jp_results)} results") ``` ### Pagination ```python # Get more results results_20 = scraper.search_analyze("game", count=20) results_50 = scraper.search_analyze("game", count=50) results_100 = scraper.search_analyze("game", count=100) print(f"20 results: {len(results_20)}") print(f"50 results: {len(results_50)}") print(f"100 results: {len(results_100)}") ``` ### Search Result Filtering ```python results = scraper.search_analyze("music player", count=50) # Filter by rating high_rated = [app for app in results if app['score'] and app['score'] >= 4.0] # Filter by price free_apps = [app for app in results if app['free']] # Filter by developer google_apps = [app for app in results if 'google' in app['developer'].lower()] print(f"High rated: {len(high_rated)}") print(f"Free: {len(free_apps)}") print(f"Google: {len(google_apps)}") ``` ================================================ FILE: README/SIMILAR_METHODS.md ================================================ # Similar Methods Find similar and related apps on Google Play Store based on a reference app. ## Quick Start ```python from gplay_scraper import GPlayScraper scraper = GPlayScraper() # Get similar apps similar = scraper.similar_analyze("com.whatsapp", count=20) for app in similar: print(f"{app['title']}: {app['score']}★ by {app['developer']}") # Get specific fields titles = scraper.similar_get_field("com.whatsapp", "title") print(titles) # Get multiple fields apps = scraper.similar_get_fields("com.whatsapp", ["title", "score", "free"]) print(apps) ``` --- ## HTTP Clients The library supports 7 HTTP clients with automatic fallback. If one fails, it tries the next. ### Supported Clients 1. **requests** (default) - Standard Python HTTP library 2. **curl_cffi** - cURL with browser impersonation 3. **tls_client** - Advanced TLS fingerprinting 4. **httpx** - Modern async-capable HTTP client 5. **urllib3** - Low-level HTTP client 6. **cloudscraper** - Cloudflare bypass 7. **aiohttp** - Async HTTP client ### Usage ```python # Default (tries requests first, then others) scraper = GPlayScraper() # Specify a client scraper = GPlayScraper(http_client="curl_cffi") scraper = GPlayScraper(http_client="tls_client") scraper = GPlayScraper(http_client="httpx") ``` ### Installation ```bash # Default pip install requests # Advanced clients (optional) pip install curl-cffi pip install tls-client pip install httpx pip install urllib3 pip install cloudscraper pip install aiohttp ``` **Note:** The library automatically falls back to available clients if your preferred one fails. --- ## Methods ### `similar_analyze(app_id, count=100, lang='en', country='us')` Returns similar apps as a list of dictionaries. ```python similar = scraper.similar_analyze("com.whatsapp", count=20) # Returns: [{'appId': '...', 'title': '...', 'score': 4.5, ...}, ...] ``` ### `similar_get_field(app_id, field, count=100, lang='en', country='us')` Returns a specific field from all similar apps. ```python titles = scraper.similar_get_field("com.whatsapp", "title") # Returns: ['App 1', 'App 2', 'App 3', ...] ``` ### `similar_get_fields(app_id, fields, count=100, lang='en', country='us')` Returns multiple fields from all similar apps. ```python apps = scraper.similar_get_fields("com.whatsapp", ["title", "score", "free"]) # Returns: [{'title': 'App 1', 'score': 4.5, 'free': True}, ...] ``` ### `similar_print_field(app_id, field, count=100, lang='en', country='us')` Prints a specific field from all similar apps. ```python scraper.similar_print_field("com.whatsapp", "title", count=10) # Output: # 1. title: App 1 # 2. title: App 2 # 3. title: App 3 ``` ### `similar_print_fields(app_id, fields, count=100, lang='en', country='us')` Prints multiple fields from all similar apps. ```python scraper.similar_print_fields("com.whatsapp", ["title", "score"], count=10) # Output: # 1. title: App 1, score: 4.5 # 2. title: App 2, score: 4.2 ``` ### `similar_print_all(app_id, count=100, lang='en', country='us')` Prints all data for all similar apps as formatted JSON. ```python scraper.similar_print_all("com.whatsapp", count=20) # Output: Full JSON array with all similar apps ``` --- ## Available Fields - `appId` - App package name (e.g., "com.example.app") - `title` - App name - `description` - App description - `icon` - App icon URL - `url` - Play Store URL - `developer` - Developer name - `score` - Average rating (1-5) - `scoreText` - Rating as text (e.g., "4.5") - `currency` - Price currency (e.g., "USD") - `price` - App price (0 if free) - `free` - Boolean, true if free --- ## Practical Examples ### Competitive Analysis ```python app_id = "com.whatsapp" similar = scraper.similar_get_fields(app_id, ["title", "score", "developer"], count=30) print(f"Competitors of {app_id}:") for i, app in enumerate(similar[:10], 1): print(f"{i}. {app['title']}: {app['score']}★ by {app['developer']}") # Calculate average competitor rating avg_score = sum(app['score'] or 0 for app in similar) / len(similar) print(f"\nAverage competitor rating: {avg_score:.2f}★") ``` ### Find Better Alternatives ```python app_id = "com.example.app" my_app = scraper.app_get_field(app_id, "score") similar = scraper.similar_get_fields(app_id, ["title", "score", "url"], count=50) # Find apps with higher ratings better_apps = [app for app in similar if app['score'] and app['score'] > my_app] better_apps.sort(key=lambda x: x['score'], reverse=True) print(f"Apps better than {app_id} ({my_app}★):") for app in better_apps[:10]: print(f"- {app['title']}: {app['score']}★") ``` ### Market Positioning ```python app_id = "com.whatsapp" similar = scraper.similar_get_fields(app_id, ["title", "free", "price", "score"], count=50) free_apps = [app for app in similar if app['free']] paid_apps = [app for app in similar if not app['free']] print(f"Market Analysis for {app_id}:") print(f" Free competitors: {len(free_apps)}") print(f" Paid competitors: {len(paid_apps)}") if free_apps: print(f" Free avg rating: {sum(a['score'] or 0 for a in free_apps)/len(free_apps):.2f}★") if paid_apps: print(f" Paid avg rating: {sum(a['score'] or 0 for a in paid_apps)/len(paid_apps):.2f}★") ``` ### Developer Overlap Analysis ```python from collections import Counter app_id = "com.whatsapp" similar = scraper.similar_get_field(app_id, "developer", count=50) # Count apps per developer developer_counts = Counter(similar) top_developers = developer_counts.most_common(5) print(f"Top developers in similar apps to {app_id}:") for developer, count in top_developers: print(f"{developer}: {count} apps") ``` ### Export Similar Apps ```python import json app_id = "com.whatsapp" similar = scraper.similar_analyze(app_id, count=50) with open(f'similar_to_{app_id}.json', 'w') as f: json.dump(similar, f, indent=2) print(f"Exported {len(similar)} similar apps to similar_to_{app_id}.json") ``` ### Compare Multiple Apps ```python apps_to_compare = ["com.whatsapp", "com.telegram", "com.viber"] all_similar = {} for app_id in apps_to_compare: similar = scraper.similar_get_fields(app_id, ["appId", "title"], count=20) all_similar[app_id] = [app['appId'] for app in similar] print(f"{app_id}: {len(similar)} similar apps") # Find common competitors common = set(all_similar[apps_to_compare[0]]) for app_id in apps_to_compare[1:]: common &= set(all_similar[app_id]) print(f"\nCommon competitors: {len(common)}") for app_id in list(common)[:5]: title = scraper.app_get_field(app_id, "title") print(f"- {title}") ``` ### Feature Gap Analysis ```python app_id = "com.whatsapp" similar = scraper.similar_get_fields(app_id, ["title", "score"], count=30) # Get top-rated competitors top_competitors = sorted(similar, key=lambda x: x['score'] or 0, reverse=True)[:5] print(f"Top-rated competitors of {app_id}:") for i, app in enumerate(top_competitors, 1): print(f"{i}. {app['title']}: {app['score']}★") # You can then analyze these apps individually for features ``` ### Price Comparison ```python app_id = "com.example.paidapp" my_price = scraper.app_get_field(app_id, "price") similar = scraper.similar_get_fields(app_id, ["title", "price", "free"], count=50) paid_similar = [app for app in similar if not app['free'] and app['price']] if paid_similar: prices = [app['price'] for app in paid_similar] print(f"Price Comparison:") print(f" Your app: ${my_price:.2f}") print(f" Competitor min: ${min(prices):.2f}") print(f" Competitor max: ${max(prices):.2f}") print(f" Competitor avg: ${sum(prices)/len(prices):.2f}") ``` --- ## Parameters ### Initialization - `http_client` (str, optional) - HTTP client to use: "requests", "curl_cffi", "tls_client", "httpx", "urllib3", "cloudscraper", "aiohttp" (default: "requests") ### Method Parameters - `app_id` (str, required) - App package name to find similar apps for - `count` (int, optional) - Maximum number of similar apps to return (default: 100) - `lang` (str, optional) - Language code (default: 'en') - `country` (str, optional) - Country code (default: 'us') - `field` (str) - Single field name - `fields` (List[str]) - List of field names ### Language & Country Codes - **Language**: 'en', 'es', 'fr', 'de', 'ja', 'ko', 'pt', 'ru', 'zh', etc. - **Country**: 'us', 'gb', 'ca', 'au', 'in', 'br', 'jp', 'kr', 'de', 'fr', etc. --- ## When to Use Each Method - **`similar_analyze()`** - Need complete data for all similar apps - **`similar_get_field()`** - Need just one field from all similar apps - **`similar_get_fields()`** - Need specific fields from all similar apps (more efficient) - **`similar_print_field()`** - Quick debugging/console output - **`similar_print_fields()`** - Quick debugging of multiple fields - **`similar_print_all()`** - Explore available data structure --- ## Use Cases ### Competitive Intelligence - Identify direct competitors - Monitor competitor ratings and pricing - Track market positioning - Discover new entrants in your category ### Market Research - Understand market landscape - Analyze pricing strategies - Identify market gaps - Study successful competitors ### Product Development - Find feature inspiration - Identify differentiation opportunities - Benchmark against competitors - Discover user expectations ### Marketing Strategy - Identify target audience overlap - Study competitor positioning - Find partnership opportunities - Analyze market trends --- ## Advanced Features ### Rate Limiting Built-in rate limiting (1 second delay between requests) prevents blocking. ### Error Handling ```python from gplay_scraper import GPlayScraper, AppNotFoundError, NetworkError scraper = GPlayScraper() try: similar = scraper.similar_analyze("invalid.app.id") except AppNotFoundError: print("App not found or no similar apps available") except NetworkError: print("Network error occurred") ``` ### Multi-Region Similar Apps ```python # Get similar apps from different regions us_similar = scraper.similar_analyze("com.whatsapp", country="us", count=20) uk_similar = scraper.similar_analyze("com.whatsapp", country="gb", count=20) jp_similar = scraper.similar_analyze("com.whatsapp", country="jp", lang="ja", count=20) print(f"US similar apps: {len(us_similar)}") print(f"UK similar apps: {len(uk_similar)}") print(f"JP similar apps: {len(jp_similar)}") ``` ### Filtering Results ```python similar = scraper.similar_analyze("com.whatsapp", count=50) # Filter by rating high_rated = [app for app in similar if app['score'] and app['score'] >= 4.0] # Filter by price free_apps = [app for app in similar if app['free']] # Filter by developer exclude_dev = [app for app in similar if app['developer'] != "WhatsApp LLC"] print(f"High rated: {len(high_rated)}") print(f"Free: {len(free_apps)}") print(f"Other developers: {len(exclude_dev)}") ``` ### Batch Analysis ```python # Analyze similar apps for multiple apps apps = ["com.whatsapp", "com.telegram", "com.viber"] results = {} for app_id in apps: similar = scraper.similar_get_fields(app_id, ["title", "score"], count=10) results[app_id] = similar print(f"{app_id}: {len(similar)} similar apps found") ``` ================================================ FILE: README/SUGGEST_METHODS.md ================================================ # Suggest Methods Get search suggestions and autocomplete from Google Play Store for keyword discovery and ASO. ## Quick Start ```python from gplay_scraper import GPlayScraper scraper = GPlayScraper() # Get suggestions suggestions = scraper.suggest_analyze("video", count=5) print(suggestions) # ['video player', 'video editor', 'video downloader', 'video maker', 'video call'] # Get nested suggestions nested = scraper.suggest_nested("video", count=3) for term, suggestions in nested.items(): print(f"{term}: {suggestions}") ``` --- ## HTTP Clients The library supports 7 HTTP clients with automatic fallback. If one fails, it tries the next. ### Supported Clients 1. **requests** (default) - Standard Python HTTP library 2. **curl_cffi** - cURL with browser impersonation 3. **tls_client** - Advanced TLS fingerprinting 4. **httpx** - Modern async-capable HTTP client 5. **urllib3** - Low-level HTTP client 6. **cloudscraper** - Cloudflare bypass 7. **aiohttp** - Async HTTP client ### Usage ```python # Default (tries requests first, then others) scraper = GPlayScraper() # Specify a client scraper = GPlayScraper(http_client="curl_cffi") scraper = GPlayScraper(http_client="tls_client") scraper = GPlayScraper(http_client="httpx") ``` ### Installation ```bash # Default pip install requests # Advanced clients (optional) pip install curl-cffi pip install tls-client pip install httpx pip install urllib3 pip install cloudscraper pip install aiohttp ``` **Note:** The library automatically falls back to available clients if your preferred one fails. --- ## Methods ### `suggest_analyze(term, count=5, lang='en', country='us')` Returns search suggestions as a list of strings. ```python suggestions = scraper.suggest_analyze("video", count=5) # Returns: ['video player', 'video editor', 'video downloader', 'video maker', 'video call'] ``` ### `suggest_nested(term, count=5, lang='en', country='us')` Returns nested suggestions (suggestions for each suggestion). ```python nested = scraper.suggest_nested("video", count=3) # Returns: { # 'video player': ['video player hd', 'video player all format', 'video player pro'], # 'video editor': ['video editor pro', 'video editor free', 'video editor app'], # 'video downloader': ['video downloader for facebook', 'video downloader hd', ...] # } ``` ### `suggest_print_all(term, count=5, lang='en', country='us')` Prints suggestions as formatted JSON. ```python scraper.suggest_print_all("video", count=5) # Output: ["video player", "video editor", "video downloader", "video maker", "video call"] ``` ### `suggest_print_nested(term, count=5, lang='en', country='us')` Prints nested suggestions as formatted JSON. ```python scraper.suggest_print_nested("video", count=3) # Output: Full JSON object with nested suggestions ``` --- ## Return Formats ### Simple Suggestions (List) ```python ['video player', 'video editor', 'video downloader', 'video maker', 'video call'] ``` ### Nested Suggestions (Dictionary) ```python { 'video player': ['video player hd', 'video player all format', 'video player pro'], 'video editor': ['video editor pro', 'video editor free', 'video editor app'], 'video downloader': ['video downloader for facebook', 'video downloader hd'] } ``` --- ## Practical Examples ### Autocomplete Feature ```python def autocomplete(user_input): """Provide autocomplete suggestions as user types""" if len(user_input) < 2: return [] suggestions = scraper.suggest_analyze(user_input, count=10) return suggestions # Usage print(autocomplete("gam")) # ['game', 'games', 'gaming', ...] print(autocomplete("photo")) # ['photo editor', 'photo collage', ...] ``` ### Keyword Research ```python base_keywords = ["fitness", "workout", "exercise"] all_keywords = set() for keyword in base_keywords: suggestions = scraper.suggest_analyze(keyword, count=10) all_keywords.update(suggestions) print(f"{keyword}: {len(suggestions)} suggestions") print(f"\nTotal unique keywords: {len(all_keywords)}") print("Sample keywords:", list(all_keywords)[:10]) ``` ### Deep Keyword Mining ```python term = "photo editor" nested = scraper.suggest_nested(term, count=5) print(f"Keyword tree for '{term}':") for parent, children in nested.items(): print(f"\n{parent}:") for child in children: print(f" - {child}") ``` ### ASO Keyword Discovery ```python import json def discover_keywords(seed_term, depth=2): """Discover keywords with specified depth""" keywords = {} # Level 1 level1 = scraper.suggest_analyze(seed_term, count=10) keywords[seed_term] = level1 if depth > 1: # Level 2 for term in level1[:5]: # Limit to avoid too many requests level2 = scraper.suggest_analyze(term, count=5) keywords[term] = level2 return keywords keywords = discover_keywords("game", depth=2) print(json.dumps(keywords, indent=2)) ``` ### Trending Search Terms ```python categories = ["game", "social", "productivity", "photo", "music"] trending = {} for category in categories: suggestions = scraper.suggest_analyze(category, count=5) trending[category] = suggestions print(f"{category}: {', '.join(suggestions[:3])}...") ``` ### Long-Tail Keywords ```python short_term = "vpn" suggestions = scraper.suggest_analyze(short_term, count=10) # Filter for long-tail (3+ words) long_tail = [s for s in suggestions if len(s.split()) >= 3] print(f"Long-tail keywords for '{short_term}':") for keyword in long_tail: print(f"- {keyword}") ``` ### Competitor Keyword Analysis ```python competitor_apps = ["whatsapp", "telegram", "signal"] all_suggestions = {} for app in competitor_apps: suggestions = scraper.suggest_analyze(app, count=10) all_suggestions[app] = suggestions print(f"{app}: {len(suggestions)} suggestions") # Find common keywords common = set(all_suggestions[competitor_apps[0]]) for app in competitor_apps[1:]: common &= set(all_suggestions[app]) print(f"\nCommon keywords: {common}") ``` ### Export Keyword Map ```python import json term = "fitness" nested = scraper.suggest_nested(term, count=10) with open(f'keywords_{term}.json', 'w') as f: json.dump(nested, f, indent=2) print(f"Exported keyword map for '{term}'") print(f"Total parent keywords: {len(nested)}") print(f"Total child keywords: {sum(len(v) for v in nested.values())}") ``` ### Search Volume Estimation ```python term = "photo editor" suggestions = scraper.suggest_analyze(term, count=20) # Suggestions appear in order of popularity (roughly) print(f"Top suggestions for '{term}' (by estimated popularity):") for i, suggestion in enumerate(suggestions[:10], 1): print(f"{i}. {suggestion}") ``` --- ## Parameters ### Initialization - `http_client` (str, optional) - HTTP client to use: "requests", "curl_cffi", "tls_client", "httpx", "urllib3", "cloudscraper", "aiohttp" (default: "requests") ### Method Parameters - `term` (str, required) - Search term or keyword - `count` (int, optional) - Number of suggestions to return (default: 5, max: ~10) - `lang` (str, optional) - Language code (default: 'en') - `country` (str, optional) - Country code (default: 'us') ### Search Term Tips - Use partial words: "gam" → "game", "games", "gaming" - Try categories: "fitness", "photo", "music" - Test variations: "vpn", "vpn free", "vpn app" - Use brand names: "whatsapp", "instagram" - Combine terms: "photo editor free" ### Language & Country Codes - **Language**: 'en', 'es', 'fr', 'de', 'ja', 'ko', 'pt', 'ru', 'zh', etc. - **Country**: 'us', 'gb', 'ca', 'au', 'in', 'br', 'jp', 'kr', 'de', 'fr', etc. --- ## When to Use Each Method - **`suggest_analyze()`** - Get simple list of suggestions for autocomplete or keyword research - **`suggest_nested()`** - Deep keyword mining with two levels of suggestions - **`suggest_print_all()`** - Quick debugging/console output of suggestions - **`suggest_print_nested()`** - Quick debugging/console output of nested suggestions --- ## Use Cases ### App Store Optimization (ASO) - Discover high-traffic keywords - Find long-tail keyword opportunities - Analyze competitor keywords - Optimize app title and description ### Market Research - Identify trending search terms - Understand user search behavior - Discover niche markets - Track keyword trends over time ### Content Strategy - Generate content ideas - Find related topics - Optimize metadata - Improve discoverability ### Competitive Analysis - Discover competitor keywords - Find keyword gaps - Identify market opportunities - Track competitor positioning --- ## Advanced Features ### Rate Limiting Built-in rate limiting (1 second delay between requests) prevents blocking. ### Error Handling ```python from gplay_scraper import GPlayScraper, NetworkError scraper = GPlayScraper() try: suggestions = scraper.suggest_analyze("") except ValueError: print("Term cannot be empty") except NetworkError: print("Network error occurred") ``` ### Multi-Region Suggestions ```python # Get suggestions from different regions us_suggestions = scraper.suggest_analyze("game", country="us") uk_suggestions = scraper.suggest_analyze("game", country="gb") jp_suggestions = scraper.suggest_analyze("game", country="jp", lang="ja") print(f"US: {us_suggestions[:3]}") print(f"UK: {uk_suggestions[:3]}") print(f"JP: {jp_suggestions[:3]}") ``` ### Batch Processing ```python terms = ["fitness", "diet", "workout", "yoga", "meditation"] all_suggestions = {} for term in terms: suggestions = scraper.suggest_analyze(term, count=10) all_suggestions[term] = suggestions print(f"{term}: {len(suggestions)} suggestions") # Find overlapping keywords all_keywords = set() for suggestions in all_suggestions.values(): all_keywords.update(suggestions) print(f"\nTotal unique keywords: {len(all_keywords)}") ``` ### Recursive Keyword Expansion ```python def expand_keywords(term, max_depth=2, current_depth=0): """Recursively expand keywords""" if current_depth >= max_depth: return [] suggestions = scraper.suggest_analyze(term, count=5) all_keywords = suggestions.copy() if current_depth < max_depth - 1: for suggestion in suggestions[:2]: # Limit to avoid explosion child_keywords = expand_keywords(suggestion, max_depth, current_depth + 1) all_keywords.extend(child_keywords) return all_keywords keywords = expand_keywords("game", max_depth=2) print(f"Expanded to {len(set(keywords))} unique keywords") ``` ### Suggestion Filtering ```python term = "game" suggestions = scraper.suggest_analyze(term, count=20) # Filter by length short = [s for s in suggestions if len(s.split()) <= 2] long = [s for s in suggestions if len(s.split()) > 2] # Filter by keyword free_games = [s for s in suggestions if 'free' in s.lower()] print(f"Short keywords: {len(short)}") print(f"Long keywords: {len(long)}") print(f"Free games: {len(free_games)}") ``` ================================================ FILE: README.md ================================================ # Google Play Scraper - Python Library 📱 [](https://badge.fury.io/py/gplay-scraper) [](https://www.python.org/downloads/) [](https://opensource.org/licenses/MIT) [](https://mohammedcha.github.io/gplay-scraper/) [](https://pepy.tech/project/gplay-scraper) [](https://github.com/Mohammedcha/gplay-scraper/stargazers) [](https://github.com/Mohammedcha/gplay-scraper/issues)
Built with ❤️ using Sphinx Book Theme
", "search_bar_text": "Search documentation...", "icon_links": [ { "name": "GitHub", "url": "https://github.com/mohammedcha/gplay-scraper", "icon": "fa-brands fa-github", "type": "fontawesome", }, { "name": "PyPI", "url": "https://pypi.org/project/gplay-scraper/", "icon": "fa-brands fa-python", "type": "fontawesome", }, ], } pygments_style = 'monokai' pygments_dark_style = 'monokai' html_title = "GPlay Scraper" html_static_path = ['_static'] html_logo = "_static/logo.png" html_favicon = "_static/favicon.png" if 'sphinx_copybutton' in extensions: copybutton_prompt_text = r">>> |\.\.\. |\$ |In \[\d*\]: | {2,5}\.\.\.: | {5,8}: " copybutton_prompt_is_regexp = True ================================================ FILE: docs/configuration.rst ================================================ Configuration ============= Advanced configuration options for GPlay Scraper. HTTP Client Selection --------------------- Choose from 7 HTTP clients with automatic fallback. .. code-block:: python from gplay_scraper import GPlayScraper # Default (requests) scraper = GPlayScraper() # Use curl_cffi (best for bypassing blocks) scraper = GPlayScraper(http_client='curl_cffi') # Use tls_client (advanced TLS fingerprinting) scraper = GPlayScraper(http_client='tls_client') # Use httpx (modern HTTP/2) scraper = GPlayScraper(http_client='httpx') Available HTTP Clients ^^^^^^^^^^^^^^^^^^^^^^ 1. **requests** - Default, most compatible 2. **curl_cffi** - Best for anti-bot bypass (Chrome 110 impersonation) 3. **tls_client** - Advanced TLS fingerprinting (Chrome 112) 4. **urllib3** - Low-level HTTP with connection pooling 5. **cloudscraper** - Cloudflare bypass 6. **aiohttp** - Async HTTP support 7. **httpx** - Modern HTTP/2 client The library automatically falls back to the next available client if one fails. Rate Limiting ------------- Configure delay between requests to avoid rate limits. .. code-block:: python from gplay_scraper import Config # Set rate limit delay (seconds) Config.RATE_LIMIT_DELAY = 2.0 # 2 seconds between requests # Or use default (1.0 second) Config.RATE_LIMIT_DELAY = 1.0 Language & Region ----------------- Set default language and country for all requests. .. code-block:: python from gplay_scraper import Config # Set default language Config.DEFAULT_LANGUAGE = 'es' # Spanish # Set default country Config.DEFAULT_COUNTRY = 'mx' # Mexico Common Language Codes ^^^^^^^^^^^^^^^^^^^^^^ * ``en`` - English * ``es`` - Spanish * ``fr`` - French * ``de`` - German * ``it`` - Italian * ``pt`` - Portuguese * ``ja`` - Japanese * ``ko`` - Korean * ``zh`` - Chinese * ``ru`` - Russian * ``ar`` - Arabic * ``hi`` - Hindi Common Country Codes ^^^^^^^^^^^^^^^^^^^^^ * ``us`` - United States * ``gb`` - United Kingdom * ``ca`` - Canada * ``de`` - Germany * ``fr`` - France * ``es`` - Spain * ``mx`` - Mexico * ``jp`` - Japan * ``kr`` - South Korea * ``cn`` - China * ``in`` - India * ``br`` - Brazil Request Timeout --------------- Configure HTTP request timeout. .. code-block:: python from gplay_scraper import Config # Set timeout (seconds) Config.DEFAULT_TIMEOUT = 30 # 30 seconds # Or use default (10 seconds) Config.DEFAULT_TIMEOUT = 10 Retry Configuration ------------------- Configure automatic retry behavior. .. code-block:: python from gplay_scraper import Config # Set number of retries Config.DEFAULT_RETRY_COUNT = 5 # Try 5 times # Or use default (3 retries) Config.DEFAULT_RETRY_COUNT = 3 Image Asset Sizes ----------------- Configure default image size for all requests. .. code-block:: python scraper = GPlayScraper() # Small images (512px) app = scraper.app_analyze('com.whatsapp', assets='SMALL') # Medium images (1024px) - default app = scraper.app_analyze('com.whatsapp', assets='MEDIUM') # Large images (2048px) app = scraper.app_analyze('com.whatsapp', assets='LARGE') # Original size (maximum) app = scraper.app_analyze('com.whatsapp', assets='ORIGINAL') Per-Request Configuration -------------------------- Override defaults for specific requests. .. code-block:: python scraper = GPlayScraper() # Per-request language app_es = scraper.app_analyze('com.whatsapp', lang='es') app_fr = scraper.app_analyze('com.whatsapp', lang='fr') # Per-request country app_uk = scraper.app_analyze('com.whatsapp', country='gb') app_de = scraper.app_analyze('com.whatsapp', country='de') # Per-request images app_large = scraper.app_analyze('com.whatsapp', assets='LARGE') Logging ------- Configure logging level for debugging. .. code-block:: python import logging # Enable debug logging logging.basicConfig(level=logging.DEBUG) # Enable info logging logging.basicConfig(level=logging.INFO) # Disable logging logging.basicConfig(level=logging.ERROR) Complete Configuration Example ------------------------------- .. code-block:: python from gplay_scraper import GPlayScraper, Config import logging # Configure library Config.RATE_LIMIT_DELAY = 2.0 Config.DEFAULT_LANGUAGE = 'en' Config.DEFAULT_COUNTRY = 'us' Config.DEFAULT_TIMEOUT = 30 Config.DEFAULT_RETRY_COUNT = 5 # Configure logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s' ) # Initialize with preferred HTTP client scraper = GPlayScraper(http_client='curl_cffi') # Use the scraper app = scraper.app_analyze('com.whatsapp') Environment Variables --------------------- You can also use environment variables for configuration. .. code-block:: bash # Set in your shell or .env file export GPLAY_HTTP_CLIENT=curl_cffi export GPLAY_RATE_LIMIT=2.0 export GPLAY_LANGUAGE=en export GPLAY_COUNTRY=us Best Practices -------------- 1. **Use curl_cffi or tls_client** for better success rates 2. **Set rate limiting** to 2+ seconds for large batch operations 3. **Use field filtering** to reduce data transfer and parsing time 4. **Enable logging** during development, disable in production 5. **Handle exceptions** gracefully for production use 6. **Reuse scraper instance** instead of creating new ones See Also -------- * :doc:`error_handling` - Error handling guide * :doc:`examples` - Practical examples ================================================ FILE: docs/error_handling.rst ================================================ Error Handling ============== Guide to handling errors and exceptions in GPlay Scraper. Exception Types --------------- GPlay Scraper provides 6 custom exception types: AppNotFoundError ^^^^^^^^^^^^^^^^ Raised when an app, developer, or resource is not found. .. code-block:: python from gplay_scraper import GPlayScraper from gplay_scraper.exceptions import AppNotFoundError scraper = GPlayScraper() try: app = scraper.app_analyze('invalid.app.id') except AppNotFoundError as e: print(f"App not found: {e}") NetworkError ^^^^^^^^^^^^ Raised when network or HTTP errors occur. .. code-block:: python from gplay_scraper.exceptions import NetworkError try: app = scraper.app_analyze('com.whatsapp') except NetworkError as e: print(f"Network error: {e}") DataParsingError ^^^^^^^^^^^^^^^^ Raised when JSON parsing or data extraction fails. .. code-block:: python from gplay_scraper.exceptions import DataParsingError try: app = scraper.app_analyze('com.whatsapp') except DataParsingError as e: print(f"Parsing error: {e}") RateLimitError ^^^^^^^^^^^^^^ Raised when rate limits are exceeded. .. code-block:: python from gplay_scraper.exceptions import RateLimitError try: # Making too many requests too quickly for i in range(1000): app = scraper.app_analyze(f'com.app{i}') except RateLimitError as e: print(f"Rate limited: {e}") InvalidAppIdError ^^^^^^^^^^^^^^^^^ Raised when input validation fails. .. code-block:: python from gplay_scraper.exceptions import InvalidAppIdError try: app = scraper.app_analyze('') # Empty app ID except InvalidAppIdError as e: print(f"Invalid input: {e}") GPlayScraperError ^^^^^^^^^^^^^^^^^ Base exception for all library errors. .. code-block:: python from gplay_scraper.exceptions import GPlayScraperError try: app = scraper.app_analyze('com.whatsapp') except GPlayScraperError as e: print(f"Library error: {e}") Comprehensive Error Handling ----------------------------- Handle all common exceptions. .. code-block:: python from gplay_scraper import GPlayScraper from gplay_scraper.exceptions import ( AppNotFoundError, NetworkError, DataParsingError, RateLimitError, InvalidAppIdError, GPlayScraperError ) scraper = GPlayScraper() try: app = scraper.app_analyze('com.whatsapp') except InvalidAppIdError as e: print(f"Invalid app ID: {e}") except AppNotFoundError as e: print(f"App not found: {e}") except NetworkError as e: print(f"Network error: {e}") except DataParsingError as e: print(f"Parsing error: {e}") except RateLimitError as e: print(f"Rate limited: {e}") except GPlayScraperError as e: print(f"Unknown library error: {e}") Automatic Retries ----------------- The library automatically retries failed requests with HTTP client fallback. .. code-block:: python from gplay_scraper import Config # Configure retries Config.DEFAULT_RETRY_COUNT = 5 # Try 5 times scraper = GPlayScraper() app = scraper.app_analyze('com.whatsapp') # Automatically retries up to 5 times if it fails # Switches HTTP clients between retries Graceful Degradation -------------------- Methods return None or empty lists on failure instead of crashing. .. code-block:: python scraper = GPlayScraper() # Returns None if app not found (after retries) app = scraper.app_analyze('invalid.app') if app is None: print("App not found") # Returns empty list if search fails results = scraper.search_analyze('invalid query') if not results: print("No results found") Production Error Handling -------------------------- Example for production use. .. code-block:: python import logging from gplay_scraper import GPlayScraper, Config from gplay_scraper.exceptions import GPlayScraperError # Configure logging logging.basicConfig( level=logging.ERROR, format='%(asctime)s - %(levelname)s - %(message)s', filename='gplay_scraper.log' ) logger = logging.getLogger(__name__) # Configure retries Config.DEFAULT_RETRY_COUNT = 5 scraper = GPlayScraper(http_client='curl_cffi') def safe_analyze_app(app_id): """Safely analyze an app with error handling.""" try: return scraper.app_analyze(app_id) except GPlayScraperError as e: logger.error(f"Failed to analyze {app_id}: {e}") return None # Use in production app_ids = ['com.app1', 'com.app2', 'com.app3'] results = [] for app_id in app_ids: app = safe_analyze_app(app_id) if app: results.append(app) print(f"Successfully analyzed {len(results)}/{len(app_ids)} apps") Batch Processing with Error Handling ------------------------------------- .. code-block:: python from gplay_scraper import GPlayScraper from gplay_scraper.exceptions import GPlayScraperError scraper = GPlayScraper() app_ids = ['com.app1', 'com.app2', 'invalid.app', 'com.app3'] successful = [] failed = [] for app_id in app_ids: try: app = scraper.app_analyze(app_id) if app: successful.append(app) except GPlayScraperError as e: failed.append((app_id, str(e))) print(f"Successful: {len(successful)}") print(f"Failed: {len(failed)}") if failed: print("\nFailed apps:") for app_id, error in failed: print(f" {app_id}: {error}") Best Practices -------------- 1. **Always handle exceptions** in production code 2. **Use specific exceptions** when possible instead of catching all 3. **Log errors** for debugging and monitoring 4. **Implement retries** for transient failures 5. **Use graceful degradation** - continue processing even if some items fail 6. **Monitor error rates** to detect issues early See Also -------- * :doc:`configuration` - Configuration options * :doc:`examples` - More practical examples ================================================ FILE: docs/examples.rst ================================================ Examples ======== Practical examples of using GPlay Scraper for common tasks. App Analytics Dashboard ----------------------- Track key metrics for your app. .. code-block:: python from gplay_scraper import GPlayScraper scraper = GPlayScraper() app = scraper.app_analyze('com.myapp') print("=== App Analytics Dashboard ===") print(f"App: {app['title']}") print(f"Developer: {app['developer']}") print(f"Rating: {app['score']}/5 ({app['ratings']:,} ratings)") print(f"\nInstall Metrics:") print(f" Total Installs: {app['realInstalls']:,}") print(f" Daily Installs: {app['dailyInstalls']:,}") print(f" Monthly Installs: {app['monthlyInstalls']:,}") print(f" App Age: {app['appAgeDays']} days") print(f"\nRating Distribution:") hist = app['histogram'] for i, count in enumerate(hist, 1): print(f" {i}★: {count:,}") Market Research --------------- Analyze a market segment. .. code-block:: python scraper = GPlayScraper() # Search for fitness apps results = scraper.search_analyze('fitness tracker', count=100) # Filter by rating high_rated = [app for app in results if app['score'] >= 4.5] free_apps = [app for app in high_rated if app['free']] print(f"Total fitness tracker apps: {len(results)}") print(f"High-rated (4.5+): {len(high_rated)}") print(f"High-rated & Free: {len(free_apps)}") print("\nTop 5 Free High-Rated Apps:") for app in free_apps[:5]: print(f" {app['title']}: {app['score']}/5") Competitor Monitoring --------------------- Track your competitors. .. code-block:: python scraper = GPlayScraper() competitors = ['com.competitor1', 'com.competitor2', 'com.competitor3'] print("Competitor Analysis") print("-" * 60) for app_id in competitors: app = scraper.app_analyze(app_id) reviews = scraper.reviews_analyze(app_id, count=100, sort='NEWEST') avg_recent_rating = sum(r['score'] for r in reviews) / len(reviews) print(f"\n{app['title']}") print(f" Overall Rating: {app['score']}/5") print(f" Recent Rating: {avg_recent_rating:.2f}/5") print(f" Daily Installs: {app['dailyInstalls']:,}") print(f" Total Installs: {app['realInstalls']:,}") Review Sentiment Analysis -------------------------- Analyze user feedback. .. code-block:: python scraper = GPlayScraper() reviews = scraper.reviews_analyze('com.myapp', count=500) # Categorize by rating positive = [r for r in reviews if r['score'] >= 4] neutral = [r for r in reviews if r['score'] == 3] negative = [r for r in reviews if r['score'] <= 2] print("Review Sentiment Analysis") print(f"Total Reviews: {len(reviews)}") print(f"Positive (4-5★): {len(positive)} ({len(positive)/len(reviews)*100:.1f}%)") print(f"Neutral (3★): {len(neutral)} ({len(neutral)/len(reviews)*100:.1f}%)") print(f"Negative (1-2★): {len(negative)} ({len(negative)/len(reviews)*100:.1f}%)") # Show recent negative reviews print("\nRecent Negative Reviews:") for review in negative[:5]: print(f" {review['userName']}: {review['score']}/5") print(f" {review['content'][:100]}...") Top Charts Tracking ------------------- Monitor top charts positions. .. code-block:: python scraper = GPlayScraper() # Track top free games top_games = scraper.list_analyze('TOP_FREE', category='GAME', count=50) # Find your app's position my_app_id = 'com.mygame' position = next((i for i, app in enumerate(top_games, 1) if app['appId'] == my_app_id), None) if position: print(f"Your game is ranked #{position} in top free games!") else: print("Your game is not in top 50") # Show top 10 print("\nTop 10 Free Games:") for i, app in enumerate(top_games[:10], 1): print(f"{i}. {app['title']} - {app['score']}/5") Developer Portfolio Overview ----------------------------- Analyze a developer's entire portfolio. .. code-block:: python scraper = GPlayScraper() apps = scraper.developer_analyze('Google LLC') # Calculate metrics avg_rating = sum(app['score'] for app in apps) / len(apps) free_count = sum(1 for app in apps if app['free']) high_rated = [app for app in apps if app['score'] >= 4.5] print(f"Developer: Google LLC") print(f"Total Apps: {len(apps)}") print(f"Average Rating: {avg_rating:.2f}/5") print(f"Free Apps: {free_count}/{len(apps)}") print(f"High-Rated Apps (4.5+): {len(high_rated)}") # Best rated apps sorted_apps = sorted(apps, key=lambda x: x['score'], reverse=True) print("\nTop 5 Highest Rated:") for app in sorted_apps[:5]: print(f" {app['title']}: {app['score']}/5") Batch Data Collection ---------------------- Collect data for multiple apps efficiently. .. code-block:: python import json from gplay_scraper import GPlayScraper scraper = GPlayScraper() app_ids = [ 'com.whatsapp', 'org.telegram.messenger', 'org.thoughtcrime.securesms', 'com.discord' ] results = [] for app_id in app_ids: # Get only the fields you need fields = ['title', 'developer', 'score', 'realInstalls', 'dailyInstalls'] data = scraper.app_get_fields(app_id, fields) results.append(data) # Save to JSON with open('messaging_apps.json', 'w') as f: json.dump(results, f, indent=2) print(f"Collected data for {len(results)} apps") Multi-Language Content ---------------------- Get localized app information. .. code-block:: python scraper = GPlayScraper() languages = { 'en': 'English', 'es': 'Spanish', 'fr': 'French', 'de': 'German', 'ja': 'Japanese' } for lang_code, lang_name in languages.items(): app = scraper.app_analyze('com.whatsapp', lang=lang_code) print(f"\n{lang_name} ({lang_code}):") print(f" Title: {app['title']}") print(f" Summary: {app['summary']}") Trend Discovery --------------- Discover trending apps in a category. .. code-block:: python scraper = GPlayScraper() # Get top free apps top_free = scraper.list_analyze('TOP_FREE', category='PRODUCTIVITY', count=100) # Filter for new apps (less than 180 days old) new_apps = [app for app in top_free if 'New' in app.get('description', '')] # Get apps with high install velocity trending = [] for app in top_free[:20]: full_data = scraper.app_analyze(app['appId']) if full_data['dailyInstalls'] > 10000: trending.append(full_data) print("Trending Productivity Apps:") for app in trending: print(f" {app['title']}") print(f" Daily Installs: {app['dailyInstalls']:,}") print(f" Rating: {app['score']}/5") See Also -------- * :doc:`quickstart` - Basic usage guide * :doc:`api/app` - Complete API reference * :doc:`configuration` - Configuration options ================================================ FILE: docs/fields.rst ================================================ Field Reference =============== Complete reference of all 112 fields returned by GPlay Scraper. App Fields (57 Fields) ----------------------- Basic Information (5 fields) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ * ``appId`` (string) - Package identifier (e.g., "com.whatsapp") * ``title`` (string) - App name * ``summary`` (string) - Short description * ``description`` (string) - Full description * ``appUrl`` (string) - Play Store URL Category (4 fields) ^^^^^^^^^^^^^^^^^^^ * ``genre`` (string) - Primary category * ``genreId`` (string) - Category ID * ``categories`` (array) - All categories * ``available`` (boolean) - Availability status Release & Updates (3 fields) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ * ``released`` (string) - Release date (e.g., "Oct 18, 2010") * ``appAgeDays`` (integer) - Days since release (computed) * ``lastUpdated`` (string) - Last update date Media (5 fields) ^^^^^^^^^^^^^^^^ * ``icon`` (string) - App icon URL * ``headerImage`` (string) - Header image URL * ``screenshots`` (array) - Screenshot URLs * ``video`` (string or null) - Promotional video URL * ``videoImage`` (string or null) - Video thumbnail URL Install Statistics (10 fields) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ * ``installs`` (string) - Install range (e.g., "10,000,000,000+") * ``minInstalls`` (integer) - Minimum installs * ``realInstalls`` (integer) - Exact install count * ``dailyInstalls`` (integer) - Average daily installs (computed) * ``minDailyInstalls`` (integer) - Min daily installs (computed) * ``realDailyInstalls`` (integer) - Real daily installs (computed) * ``monthlyInstalls`` (integer) - Average monthly installs (computed) * ``minMonthlyInstalls`` (integer) - Min monthly installs (computed) * ``realMonthlyInstalls`` (integer) - Real monthly installs (computed) Ratings (4 fields) ^^^^^^^^^^^^^^^^^^ * ``score`` (float) - Average rating (0-5) * ``ratings`` (integer) - Total ratings count * ``reviews`` (integer) - Total reviews count * ``histogram`` (array) - Rating distribution [1★, 2★, 3★, 4★, 5★] Ads (2 fields) ^^^^^^^^^^^^^^ * ``adSupported`` (boolean) - Supports ads * ``containsAds`` (boolean) - Contains ads Technical (7 fields) ^^^^^^^^^^^^^^^^^^^^ * ``version`` (string) - Current version * ``androidVersion`` (string) - Minimum Android version * ``maxAndroidApi`` (integer) - Maximum Android API * ``minAndroidApi`` (string or integer) - Minimum Android API * ``appBundle`` (string) - Bundle identifier * ``contentRating`` (string) - Age rating * ``contentRatingDescription`` (string) - Rating description Updates (1 field) ^^^^^^^^^^^^^^^^^ * ``whatsNew`` (array) - Changelog entries Privacy (2 fields) ^^^^^^^^^^^^^^^^^^ * ``permissions`` (object) - Required permissions * ``dataSafety`` (array) - Data safety information Pricing (7 fields) ^^^^^^^^^^^^^^^^^^ * ``price`` (number) - App price * ``currency`` (string) - Currency code * ``free`` (boolean) - Is free * ``offersIAP`` (boolean) - Has in-app purchases * ``inAppProductPrice`` (string or null) - IAP price range * ``sale`` (boolean) - On sale * ``originalPrice`` (number or null) - Original price if on sale Developer (8 fields) ^^^^^^^^^^^^^^^^^^^^ * ``developer`` (string) - Developer name * ``developerId`` (string) - Developer ID * ``developerEmail`` (string) - Contact email * ``developerWebsite`` (string) - Website URL * ``developerAddress`` (string) - Physical address * ``developerPhone`` (string or null) - Contact phone * ``publisherCountry`` (string) - Publisher country (computed) * ``privacyPolicy`` (string) - Privacy policy URL Search Fields (11 Fields) -------------------------- * ``title`` - App name * ``appId`` - Package identifier * ``url`` - Play Store URL * ``icon`` - App icon URL * ``developer`` - Developer name * ``summary`` - Short description * ``score`` - Average rating (0-5) * ``scoreText`` - Rating as text * ``price`` - App price * ``free`` - Is free (boolean) * ``currency`` - Currency code Review Fields (8 Fields) ------------------------- * ``reviewId`` - Unique review ID * ``userName`` - Reviewer name * ``userImage`` - Reviewer image URL * ``content`` - Review text * ``score`` - Rating (1-5) * ``thumbsUpCount`` - Helpful votes * ``at`` - Review date (ISO format) * ``appVersion`` - App version reviewed Developer App Fields (11 Fields) --------------------------------- Same as Search Fields. Similar App Fields (11 Fields) ------------------------------- Same as Search Fields. List (Top Charts) Fields (14 Fields) ------------------------------------- * ``title`` - App name * ``appId`` - Package identifier * ``url`` - Play Store URL * ``icon`` - App icon URL * ``screenshots`` - Screenshot URLs (array) * ``developer`` - Developer name * ``genre`` - Category/genre * ``installs`` - Install count * ``description`` - App description * ``score`` - Average rating * ``scoreText`` - Rating as text * ``price`` - App price * ``free`` - Is free (boolean) * ``currency`` - Currency code Computed Fields --------------- The following 8 fields are computed at runtime: **appAgeDays** Calculated as: ``(current_date - release_date).days`` **dailyInstalls** Calculated as: ``total_installs / days_since_release`` **minDailyInstalls** Calculated as: ``min_installs / days_since_release`` **realDailyInstalls** Calculated as: ``real_installs / days_since_release`` **monthlyInstalls** Calculated as: ``total_installs / (days_since_release / 30.44)`` **minMonthlyInstalls** Calculated as: ``min_installs / months_since_release`` **realMonthlyInstalls** Calculated as: ``real_installs / months_since_release`` **publisherCountry** Extracted from developer phone prefix or address Field Count Summary ------------------- * App: 57 fields * Search: 11 fields * Reviews: 8 fields * Developer: 11 fields * Similar: 11 fields * List: 14 fields * **Total: 112 unique fields** ================================================ FILE: docs/index.rst ================================================ GPlay Scraper Documentation ============================ A comprehensive Python library for scraping Google Play Store data with 40 methods across 7 categories. Features -------- * **57 app fields** including install analytics * **40 methods** for different data types * **7 HTTP clients** with automatic fallback * **Multi-language** and **multi-region** support * **Automatic retries** and error handling * **Rate limiting** built-in Quick Example ------------- .. code-block:: python from gplay_scraper import GPlayScraper scraper = GPlayScraper() # Get complete app data (57 fields) app = scraper.app_analyze('com.whatsapp') print(app['title']) # WhatsApp Messenger print(app['realInstalls']) # 10931553905 print(app['dailyInstalls']) # 1815870 print(app['publisherCountry']) # United States Table of Contents ----------------- .. toctree:: :maxdepth: 2 :caption: Getting Started installation quickstart examples .. toctree:: :maxdepth: 2 :caption: API Reference api/app api/search api/reviews api/developer api/similar api/list api/suggest .. toctree:: :maxdepth: 2 :caption: Advanced configuration error_handling fields Indices and tables ================== * :ref:`genindex` * :ref:`modindex` * :ref:`search` ================================================ FILE: docs/installation.rst ================================================ Installation ============ Requirements ------------ * Python 3.7 or higher * pip package manager Basic Installation ------------------ Install using pip: .. code-block:: bash pip install gplay-scraper This installs the library with the default HTTP client (requests). Optional Dependencies --------------------- For better performance and anti-bot protection, install additional HTTP clients: .. code-block:: bash # Install all optional HTTP clients pip install httpx curl-cffi tls-client aiohttp cloudscraper # Or install individually as needed pip install httpx # Modern HTTP/2 client pip install curl-cffi # Best for bypassing blocks pip install tls-client # Advanced TLS fingerprinting pip install aiohttp # Async support pip install cloudscraper # Cloudflare bypass Verify Installation ------------------- Test your installation: .. code-block:: python from gplay_scraper import GPlayScraper scraper = GPlayScraper() app = scraper.app_analyze('com.whatsapp') print(f"Successfully installed! Got: {app['title']}") Development Installation ------------------------ To install from source: .. code-block:: bash git clone https://github.com/yourusername/gplay-scraper.git cd gplay-scraper pip install -e . Upgrading --------- To upgrade to the latest version: .. code-block:: bash pip install --upgrade gplay-scraper Troubleshooting --------------- ImportError ^^^^^^^^^^^ If you get an ImportError, ensure the package is installed: .. code-block:: bash pip show gplay-scraper HTTP Client Issues ^^^^^^^^^^^^^^^^^^ If you encounter HTTP errors, try installing alternative clients: .. code-block:: bash pip install curl-cffi Then specify the client: .. code-block:: python from gplay_scraper import GPlayScraper scraper = GPlayScraper(http_client='curl_cffi') Next Steps ---------- * :doc:`quickstart` - Get started with basic usage * :doc:`examples` - See practical examples * :doc:`api/app` - Explore the API reference ================================================ FILE: docs/quickstart.rst ================================================ Quick Start Guide ================= This guide will get you started with GPlay Scraper in 5 minutes. Basic Usage ----------- Initialize the Scraper ^^^^^^^^^^^^^^^^^^^^^^ .. code-block:: python from gplay_scraper import GPlayScraper # Initialize once scraper = GPlayScraper() Get App Data ^^^^^^^^^^^^ Extract complete app information with 57 fields: .. code-block:: python # Get all app data app = scraper.app_analyze('com.whatsapp') # Access the data print(app['title']) # App name print(app['developer']) # Developer name print(app['score']) # Rating (0-5) print(app['realInstalls']) # Exact install count print(app['dailyInstalls']) # Average daily installs print(app['publisherCountry']) # Publisher country Get Specific Fields ^^^^^^^^^^^^^^^^^^^ If you only need certain fields: .. code-block:: python # Get single field title = scraper.app_get_field('com.whatsapp', 'title') # Get multiple fields fields = scraper.app_get_fields('com.whatsapp', ['title', 'score', 'dailyInstalls']) Search for Apps ^^^^^^^^^^^^^^^ Search the Play Store by keyword: .. code-block:: python # Search for apps results = scraper.search_analyze('messaging', count=10) # Iterate through results for app in results: print(f"{app['title']} by {app['developer']}") print(f" Rating: {app['score']}/5") print(f" Free: {app['free']}") Get Reviews ^^^^^^^^^^^ Extract user reviews with ratings: .. code-block:: python # Get newest reviews reviews = scraper.reviews_analyze('com.whatsapp', count=50, sort='NEWEST') # Process reviews for review in reviews: print(f"{review['userName']}: {review['score']}/5") print(f" {review['content'][:100]}...") Get Developer Apps ^^^^^^^^^^^^^^^^^^ Find all apps from a developer: .. code-block:: python # Get all apps from Google apps = scraper.developer_analyze('Google LLC') for app in apps: print(f"{app['title']} - {app['score']}/5") Find Similar Apps ^^^^^^^^^^^^^^^^^ Discover competitor or similar apps: .. code-block:: python # Find apps similar to WhatsApp similar = scraper.similar_analyze('com.whatsapp', count=20) for app in similar: print(f"{app['title']} - {app['score']}/5") Get Top Charts ^^^^^^^^^^^^^^ Access top free, paid, or grossing apps: .. code-block:: python # Top free games top_free = scraper.list_analyze('TOP_FREE', category='GAME', count=100) # Top paid apps top_paid = scraper.list_analyze('TOP_PAID', category='APPLICATION', count=50) # Top grossing top_grossing = scraper.list_analyze('TOP_GROSSING', count=100) Get Search Suggestions ^^^^^^^^^^^^^^^^^^^^^^ Get autocomplete suggestions: .. code-block:: python # Get suggestions suggestions = scraper.suggest_analyze('mine', count=10) print(suggestions) # ['minecraft', 'minesweeper', 'mineplex', ...] Multi-Language Support ---------------------- Get data in different languages: .. code-block:: python # Spanish app = scraper.app_analyze('com.whatsapp', lang='es') # French app = scraper.app_analyze('com.whatsapp', lang='fr') # Japanese app = scraper.app_analyze('com.whatsapp', lang='ja') Regional Data ------------- Get region-specific data: .. code-block:: python # UK data app = scraper.app_analyze('com.whatsapp', country='gb') # Germany app = scraper.app_analyze('com.whatsapp', country='de') # Japan app = scraper.app_analyze('com.whatsapp', country='jp') Image Sizes ----------- Control image quality: .. code-block:: python # Small images (512px) app = scraper.app_analyze('com.whatsapp', assets='SMALL') # Medium images (1024px) - default app = scraper.app_analyze('com.whatsapp', assets='MEDIUM') # Large images (2048px) app = scraper.app_analyze('com.whatsapp', assets='LARGE') # Original size app = scraper.app_analyze('com.whatsapp', assets='ORIGINAL') Error Handling -------------- Handle errors gracefully: .. code-block:: python from gplay_scraper import GPlayScraper from gplay_scraper.exceptions import ( AppNotFoundError, InvalidAppIdError, NetworkError ) scraper = GPlayScraper() try: app = scraper.app_analyze('invalid.app.id') except InvalidAppIdError: print("Invalid app ID format") except AppNotFoundError: print("App not found on Play Store") except NetworkError: print("Network error occurred") Common Patterns --------------- Batch Processing ^^^^^^^^^^^^^^^^ .. code-block:: python app_ids = ['com.whatsapp', 'com.telegram', 'com.signal'] for app_id in app_ids: app = scraper.app_analyze(app_id) print(f"{app['title']}: {app['realInstalls']:,} installs") Market Research ^^^^^^^^^^^^^^^ .. code-block:: python # Find highly-rated messaging apps results = scraper.search_analyze('messaging', count=100) high_rated = [app for app in results if app['score'] >= 4.5] for app in high_rated: print(f"{app['title']}: {app['score']}/5") Competitor Analysis ^^^^^^^^^^^^^^^^^^^ .. code-block:: python # Analyze your app vs competitors my_app = scraper.app_analyze('com.myapp') competitors = scraper.similar_analyze('com.myapp', count=10) print(f"My App: {my_app['score']}/5") print("\nCompetitors:") for comp in competitors: print(f" {comp['title']}: {comp['score']}/5") Review Monitoring ^^^^^^^^^^^^^^^^^ .. code-block:: python # Monitor negative reviews reviews = scraper.reviews_analyze('com.myapp', count=100, sort='NEWEST') negative = [r for r in reviews if r['score'] <= 2] for review in negative: print(f"{review['userName']}: {review['score']}/5") print(f" {review['content']}") Next Steps ---------- * :doc:`examples` - See more detailed examples * :doc:`api/app` - Complete API reference * :doc:`configuration` - Advanced configuration options * :doc:`fields` - All available fields reference ================================================ FILE: docs/requirements.txt ================================================ sphinx>=7.0.0 sphinx-book-theme>=1.0.0 sphinx-copybutton>=0.5.0 sphinx-autobuild>=2021.3.14 ================================================ FILE: examples/README.md ================================================ # Examples This folder contains example scripts demonstrating all methods for each of the 7 method types. ## Files ### 1. app_methods_example.py Demonstrates all 6 app methods: - `app_analyze()` - Get all data as dictionary - `app_get_field()` - Get single field value - `app_get_fields()` - Get multiple fields - `app_print_field()` - Print single field to console - `app_print_fields()` - Print multiple fields to console - `app_print_all()` - Print all data as JSON ### 2. search_methods_example.py Demonstrates all 6 search methods: - `search_analyze()` - Get all search results - `search_get_field()` - Get single field from results - `search_get_fields()` - Get multiple fields from results - `search_print_field()` - Print single field from results - `search_print_fields()` - Print multiple fields from results - `search_print_all()` - Print all results as JSON ### 3. reviews_methods_example.py Demonstrates all 6 reviews methods: - `reviews_analyze()` - Get all reviews - `reviews_get_field()` - Get single field from reviews - `reviews_get_fields()` - Get multiple fields from reviews - `reviews_print_field()` - Print single field from reviews - `reviews_print_fields()` - Print multiple fields from reviews - `reviews_print_all()` - Print all reviews as JSON ### 4. developer_methods_example.py Demonstrates all 6 developer methods: - `developer_analyze()` - Get all developer apps - `developer_get_field()` - Get single field from apps - `developer_get_fields()` - Get multiple fields from apps - `developer_print_field()` - Print single field from apps - `developer_print_fields()` - Print multiple fields from apps - `developer_print_all()` - Print all apps as JSON ### 5. list_methods_example.py Demonstrates all 6 list methods: - `list_analyze()` - Get all top chart apps - `list_get_field()` - Get single field from apps - `list_get_fields()` - Get multiple fields from apps - `list_print_field()` - Print single field from apps - `list_print_fields()` - Print multiple fields from apps - `list_print_all()` - Print all apps as JSON ### 6. similar_methods_example.py Demonstrates all 6 similar methods: - `similar_analyze()` - Get all similar apps - `similar_get_field()` - Get single field from apps - `similar_get_fields()` - Get multiple fields from apps - `similar_print_field()` - Print single field from apps - `similar_print_fields()` - Print multiple fields from apps - `similar_print_all()` - Print all apps as JSON ### 7. suggest_methods_example.py Demonstrates all 4 suggest methods: - `suggest_analyze()` - Get search suggestions - `suggest_nested()` - Get nested suggestions - `suggest_print_all()` - Print suggestions as JSON - `suggest_print_nested()` - Print nested suggestions as JSON ## Running Examples ```bash # Run any example python examples/app_methods_example.py python examples/search_methods_example.py python examples/reviews_methods_example.py python examples/developer_methods_example.py python examples/list_methods_example.py python examples/similar_methods_example.py python examples/suggest_methods_example.py ``` ## Note These examples are simple demonstrations. For more advanced use cases, check the documentation in the `README/` folder. ================================================ FILE: examples/app_methods_example.py ================================================ """ App Methods Example Demonstrates all 6 app methods for extracting app details Parameters: - app_id: App package name - lang: Language code (default: 'en') - country: Country code (default: 'us') """ from gplay_scraper import GPlayScraper scraper = GPlayScraper() app_id = "com.whatsapp" lang = "en" country = "us" print("=== App Methods Example ===\n") # 1. app_analyze() - Get all data as dictionary print("1. app_analyze(app_id, lang='en', country='us')") data = scraper.app_analyze(app_id, lang=lang, country=country) print(f" Retrieved {len(data)} fields") print(f" Title: {data['title']}") print(f" Score: {data['score']}") # 2. app_get_field() - Get single field print("\n2. app_get_field(app_id, field, lang='en', country='us')") title = scraper.app_get_field(app_id, "title", lang=lang, country=country) print(f" Title: {title}") # 3. app_get_fields() - Get multiple fields print("\n3. app_get_fields(app_id, fields, lang='en', country='us')") fields = scraper.app_get_fields(app_id, ["title", "score", "installs"], lang=lang, country=country) print(f" {fields}") # 4. app_print_field() - Print single field print("\n4. app_print_field(app_id, field, lang='en', country='us')") scraper.app_print_field(app_id, "developer", lang=lang, country=country) # 5. app_print_fields() - Print multiple fields print("\n5. app_print_fields(app_id, fields, lang='en', country='us')") scraper.app_print_fields(app_id, ["title", "score", "free"], lang=lang, country=country) # 6. app_print_all() - Print all data as JSON print("\n6. app_print_all(app_id, lang='en', country='us')") scraper.app_print_all(app_id, lang=lang, country=country) ================================================ FILE: examples/developer_methods_example.py ================================================ """ Developer Methods Example Demonstrates all 6 developer methods for getting developer's apps Parameters: - dev_id: Developer ID (numeric or string) - count: Number of apps (default: 100) - lang: Language code (default: 'en') - country: Country code (default: 'us') """ from gplay_scraper import GPlayScraper scraper = GPlayScraper() dev_id = "5700313618786177705" # Google LLC count = 20 lang = "en" country = "us" print("=== Developer Methods Example ===\n") # 1. developer_analyze() - Get all developer apps print("1. developer_analyze(dev_id, count=100, lang='en', country='us')") apps = scraper.developer_analyze(dev_id, count=count, lang=lang, country=country) print(f" Found {len(apps)} apps") print(f" First app: {apps[0]['title']}") # 2. developer_get_field() - Get single field from all apps print("\n2. developer_get_field(dev_id, field, count=100, lang='en', country='us')") titles = scraper.developer_get_field(dev_id, "title", count=count, lang=lang, country=country) print(f" Titles: {titles[:3]}") # 3. developer_get_fields() - Get multiple fields from all apps print("\n3. developer_get_fields(dev_id, fields, count=100, lang='en', country='us')") apps_data = scraper.developer_get_fields(dev_id, ["title", "score"], count=10, lang=lang, country=country) print(f" First 2 apps: {apps_data[:2]}") # 4. developer_print_field() - Print single field from all apps print("\n4. developer_print_field(dev_id, field, count=100, lang='en', country='us')") scraper.developer_print_field(dev_id, "title", count=5, lang=lang, country=country) # 5. developer_print_fields() - Print multiple fields from all apps print("\n5. developer_print_fields(dev_id, fields, count=100, lang='en', country='us')") scraper.developer_print_fields(dev_id, ["title", "score"], count=5, lang=lang, country=country) # 6. developer_print_all() - Print all developer apps as JSON print("\n6. developer_print_all(dev_id, count=100, lang='en', country='us')") scraper.developer_print_all(dev_id, count=5, lang=lang, country=country) ================================================ FILE: examples/list_methods_example.py ================================================ """ List Methods Example Demonstrates all 6 list methods for getting top charts Parameters: - collection: Chart type - 'TOP_FREE', 'TOP_PAID', 'TOP_GROSSING' (default: 'TOP_FREE') - category: Category filter (default: 'APPLICATION') - count: Number of apps (default: 100) - lang: Language code (default: 'en') - country: Country code (default: 'us') """ from gplay_scraper import GPlayScraper scraper = GPlayScraper() collection = "TOP_FREE" category = "GAME" count = 20 lang = "en" country = "us" print("=== List Methods Example ===\n") # 1. list_analyze() - Get all top chart apps print("1. list_analyze(collection='TOP_FREE', category='APPLICATION', count=100, lang='en', country='us')") apps = scraper.list_analyze(collection, category, count=count, lang=lang, country=country) print(f" Found {len(apps)} apps") print(f" First app: {apps[0]['title']}") # 2. list_get_field() - Get single field from all apps print("\n2. list_get_field(collection, field, category='APPLICATION', count=100, lang='en', country='us')") titles = scraper.list_get_field(collection, "title", category, count=count, lang=lang, country=country) print(f" Titles: {titles[:3]}") # 3. list_get_fields() - Get multiple fields from all apps print("\n3. list_get_fields(collection, fields, category='APPLICATION', count=100, lang='en', country='us')") apps_data = scraper.list_get_fields(collection, ["title", "score"], category, count=10, lang=lang, country=country) print(f" First 2 apps: {apps_data[:2]}") # 4. list_print_field() - Print single field from all apps print("\n4. list_print_field(collection, field, category='APPLICATION', count=100, lang='en', country='us')") scraper.list_print_field(collection, "title", category, count=5, lang=lang, country=country) # 5. list_print_fields() - Print multiple fields from all apps print("\n5. list_print_fields(collection, fields, category='APPLICATION', count=100, lang='en', country='us')") scraper.list_print_fields(collection, ["title", "score"], category, count=5, lang=lang, country=country) # 6. list_print_all() - Print all top chart apps as JSON print("\n6. list_print_all(collection='TOP_FREE', category='APPLICATION', count=100, lang='en', country='us')") scraper.list_print_all(collection, category, count=5, lang=lang, country=country) ================================================ FILE: examples/reviews_methods_example.py ================================================ """ Reviews Methods Example Demonstrates all 6 reviews methods for extracting user reviews Parameters: - app_id: App package name - count: Number of reviews (default: 100) - lang: Language code (default: 'en') - country: Country code (default: 'us') - sort: Sort order - 'NEWEST', 'RELEVANT', 'RATING' (default: 'NEWEST') """ from gplay_scraper import GPlayScraper scraper = GPlayScraper() app_id = "com.whatsapp" count = 20 lang = "en" country = "us" sort = "NEWEST" print("=== Reviews Methods Example ===\n") # 1. reviews_analyze() - Get all reviews print("1. reviews_analyze(app_id, count=100, lang='en', country='us', sort='NEWEST')") reviews = scraper.reviews_analyze(app_id, count=count, lang=lang, country=country, sort=sort) print(f" Retrieved {len(reviews)} reviews") print(f" First review score: {reviews[0]['score']}") # 2. reviews_get_field() - Get single field from all reviews print("\n2. reviews_get_field(app_id, field, count=100, lang='en', country='us', sort='NEWEST')") scores = scraper.reviews_get_field(app_id, "score", count=count, lang=lang, country=country, sort=sort) print(f" Scores: {scores[:5]}") # 3. reviews_get_fields() - Get multiple fields from all reviews print("\n3. reviews_get_fields(app_id, fields, count=100, lang='en', country='us', sort='NEWEST')") review_data = scraper.reviews_get_fields(app_id, ["userName", "score"], count=10, lang=lang, country=country, sort=sort) print(f" First 2 reviews: {review_data[:2]}") # 4. reviews_print_field() - Print single field from all reviews print("\n4. reviews_print_field(app_id, field, count=100, lang='en', country='us', sort='NEWEST')") scraper.reviews_print_field(app_id, "score", count=5, lang=lang, country=country, sort=sort) # 5. reviews_print_fields() - Print multiple fields from all reviews print("\n5. reviews_print_fields(app_id, fields, count=100, lang='en', country='us', sort='NEWEST')") scraper.reviews_print_fields(app_id, ["userName", "score"], count=5, lang=lang, country=country, sort=sort) # 6. reviews_print_all() - Print all reviews as JSON print("\n6. reviews_print_all(app_id, count=100, lang='en', country='us', sort='NEWEST')") scraper.reviews_print_all(app_id, count=5, lang=lang, country=country, sort=sort) ================================================ FILE: examples/search_methods_example.py ================================================ """ Search Methods Example Demonstrates all 6 search methods for finding apps Parameters: - query: Search keyword - count: Number of results (default: 100) - lang: Language code (default: 'en') - country: Country code (default: 'us') """ from gplay_scraper import GPlayScraper scraper = GPlayScraper() query = "social media" count = 10 lang = "en" country = "us" print("=== Search Methods Example ===\n") # 1. search_analyze() - Get all search results print("1. search_analyze(query, count=100, lang='en', country='us')") results = scraper.search_analyze(query, count=count, lang=lang, country=country) print(f" Found {len(results)} apps") print(f" First app: {results[0]['title']}") # 2. search_get_field() - Get single field from all results print("\n2. search_get_field(query, field, count=100, lang='en', country='us')") titles = scraper.search_get_field(query, "title", count=count, lang=lang, country=country) print(f" Titles: {titles[:3]}") # 3. search_get_fields() - Get multiple fields from all results print("\n3. search_get_fields(query, fields, count=100, lang='en', country='us')") apps = scraper.search_get_fields(query, ["title", "score"], count=count, lang=lang, country=country) print(f" First 2 apps: {apps[:2]}") # 4. search_print_field() - Print single field from all results print("\n4. search_print_field(query, field, count=100, lang='en', country='us')") scraper.search_print_field(query, "title", count=5, lang=lang, country=country) # 5. search_print_fields() - Print multiple fields from all results print("\n5. search_print_fields(query, fields, count=100, lang='en', country='us')") scraper.search_print_fields(query, ["title", "developer"], count=5, lang=lang, country=country) # 6. search_print_all() - Print all search results as JSON print("\n6. search_print_all(query, count=100, lang='en', country='us')") scraper.search_print_all(query, count=5, lang=lang, country=country) ================================================ FILE: examples/similar_methods_example.py ================================================ """ Similar Methods Example Demonstrates all 6 similar methods for finding related apps Parameters: - app_id: App package name - count: Number of similar apps (default: 100) - lang: Language code (default: 'en') - country: Country code (default: 'us') """ from gplay_scraper import GPlayScraper scraper = GPlayScraper() app_id = "com.whatsapp" count = 20 lang = "en" country = "us" print("=== Similar Methods Example ===\n") # 1. similar_analyze() - Get all similar apps print("1. similar_analyze(app_id, count=100, lang='en', country='us')") apps = scraper.similar_analyze(app_id, count=count, lang=lang, country=country) print(f" Found {len(apps)} similar apps") print(f" First app: {apps[0]['title']}") # 2. similar_get_field() - Get single field from all similar apps print("\n2. similar_get_field(app_id, field, count=100, lang='en', country='us')") titles = scraper.similar_get_field(app_id, "title", count=count, lang=lang, country=country) print(f" Titles: {titles[:3]}") # 3. similar_get_fields() - Get multiple fields from all similar apps print("\n3. similar_get_fields(app_id, fields, count=100, lang='en', country='us')") apps_data = scraper.similar_get_fields(app_id, ["title", "score"], count=10, lang=lang, country=country) print(f" First 2 apps: {apps_data[:2]}") # 4. similar_print_field() - Print single field from all similar apps print("\n4. similar_print_field(app_id, field, count=100, lang='en', country='us')") scraper.similar_print_field(app_id, "title", count=5, lang=lang, country=country) # 5. similar_print_fields() - Print multiple fields from all similar apps print("\n5. similar_print_fields(app_id, fields, count=100, lang='en', country='us')") scraper.similar_print_fields(app_id, ["title", "score"], count=5, lang=lang, country=country) # 6. similar_print_all() - Print all similar apps as JSON print("\n6. similar_print_all(app_id, count=100, lang='en', country='us')") scraper.similar_print_all(app_id, count=5, lang=lang, country=country) ================================================ FILE: examples/suggest_methods_example.py ================================================ """ Suggest Methods Example Demonstrates all 4 suggest methods for getting search suggestions Parameters: - term: Search term - count: Number of suggestions (default: 5) - lang: Language code (default: 'en') - country: Country code (default: 'us') """ from gplay_scraper import GPlayScraper scraper = GPlayScraper() term = "fitness" count = 5 lang = "en" country = "us" print("=== Suggest Methods Example ===\n") # 1. suggest_analyze() - Get search suggestions print("1. suggest_analyze(term, count=5, lang='en', country='us')") suggestions = scraper.suggest_analyze(term, count=count, lang=lang, country=country) print(f" Suggestions: {suggestions}") # 2. suggest_nested() - Get nested suggestions print("\n2. suggest_nested(term, count=5, lang='en', country='us')") nested = scraper.suggest_nested(term, count=count, lang=lang, country=country) print(f" Nested suggestions (first 2):") for i, (key, values) in enumerate(list(nested.items())[:2]): print(f" {key}: {values}") # 3. suggest_print_all() - Print suggestions as JSON print("\n3. suggest_print_all(term, count=5, lang='en', country='us')") scraper.suggest_print_all(term, count=count, lang=lang, country=country) # 4. suggest_print_nested() - Print nested suggestions as JSON print("\n4. suggest_print_nested(term, count=5, lang='en', country='us')") scraper.suggest_print_nested(term, count=count, lang=lang, country=country) ================================================ FILE: gplay_scraper/__init__.py ================================================ """GPlay Scraper - Google Play Store scraping library. This package provides comprehensive tools for scraping Google Play Store data including: - App details (65+ fields) - Search results - User reviews - Developer portfolios - Similar apps - Top charts - Search suggestions """ import logging # Import main scraper class from .app import GPlayScraper # Import all method classes from .core.gplay_methods import AppMethods, SearchMethods, ReviewsMethods, DeveloperMethods, SimilarMethods, ListMethods, SuggestMethods # Import configuration from .config import Config # Import custom exceptions from .exceptions import ( GPlayScraperError, InvalidAppIdError, AppNotFoundError, RateLimitError, NetworkError, DataParsingError, ) # Configure logging to use NullHandler by default logging.getLogger(__name__).addHandler(logging.NullHandler()) # Package metadata __version__ = "1.0.6" # Public API exports __all__ = [ "GPlayScraper", "AppMethods", "SearchMethods", "ReviewsMethods", "DeveloperMethods", "SimilarMethods", "ListMethods", "SuggestMethods", "Config", "GPlayScraperError", "InvalidAppIdError", "AppNotFoundError", "RateLimitError", "NetworkError", "DataParsingError", ] ================================================ FILE: gplay_scraper/app.py ================================================ """Main GPlayScraper class that provides unified access to all scraping methods. This module contains the main GPlayScraper class which aggregates all 7 method types and provides 42 functions for interacting with Google Play Store data. """ from .core.gplay_methods import AppMethods, SearchMethods, ReviewsMethods, DeveloperMethods, SimilarMethods, ListMethods, SuggestMethods from .config import Config from typing import Any, List, Dict class GPlayScraper: """Main scraper class providing access to all Google Play Store scraping methods. This class aggregates 7 method types: - App Methods: Extract 65+ fields from any app - Search Methods: Search for apps by keyword - Reviews Methods: Extract user reviews and ratings - Developer Methods: Get all apps from a developer - List Methods: Get top charts (free, paid, grossing) - Similar Methods: Find similar/competitor apps - Suggest Methods: Get search suggestions Args: http_client: HTTP client to use (requests, curl_cffi, tls_client, httpx, urllib3, cloudscraper, aiohttp) """ def __init__(self, http_client: str = None): """Initialize GPlayScraper with all method types. Args: http_client: Optional HTTP client name. Defaults to 'requests' with automatic fallback. """ # Initialize all 7 method types self.app_methods = AppMethods(http_client) self.search_methods = SearchMethods(http_client) self.reviews_methods = ReviewsMethods(http_client) self.developer_methods = DeveloperMethods(http_client) self.similar_methods = SimilarMethods(http_client) self.list_methods = ListMethods(http_client) self.suggest_methods = SuggestMethods(http_client) # ==================== App Methods ==================== def app_analyze(self, app_id: str, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY, assets: str = None) -> Dict: """Get complete app data with 65+ fields. Args: app_id: Google Play app ID (e.g., 'com.whatsapp') lang: Language code (default: 'en') country: Country code (default: 'us') assets: Asset size (SMALL=512px, MEDIUM=1024px, LARGE=2048px, ORIGINAL=max) Returns: Dictionary containing all app data """ return self.app_methods.app_analyze(app_id, lang, country, assets) def app_get_field(self, app_id: str, field: str, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY, assets: str = None) -> Any: """Get single field value from app data. Args: app_id: Google Play app ID field: Field name to retrieve lang: Language code country: Country code assets: Asset size (SMALL, MEDIUM, LARGE, ORIGINAL) Returns: Value of the requested field """ return self.app_methods.app_get_field(app_id, field, lang, country, assets) def app_get_fields(self, app_id: str, fields: List[str], lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY, assets: str = None) -> Dict[str, Any]: """Get multiple field values from app data. Args: app_id: Google Play app ID fields: List of field names to retrieve lang: Language code country: Country code assets: Asset size (SMALL, MEDIUM, LARGE, ORIGINAL) Returns: Dictionary with requested fields and values """ return self.app_methods.app_get_fields(app_id, fields, lang, country, assets) def app_print_field(self, app_id: str, field: str, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY, assets: str = None) -> None: """Print single field value to console. Args: app_id: Google Play app ID field: Field name to print lang: Language code country: Country code assets: Asset size (SMALL, MEDIUM, LARGE, ORIGINAL) """ return self.app_methods.app_print_field(app_id, field, lang, country, assets) def app_print_fields(self, app_id: str, fields: List[str], lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY, assets: str = None) -> None: """Print multiple field values to console. Args: app_id: Google Play app ID fields: List of field names to print lang: Language code country: Country code assets: Asset size (SMALL, MEDIUM, LARGE, ORIGINAL) """ return self.app_methods.app_print_fields(app_id, fields, lang, country, assets) def app_print_all(self, app_id: str, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY, assets: str = None) -> None: """Print all app data as JSON to console. Args: app_id: Google Play app ID lang: Language code country: Country code assets: Asset size (SMALL, MEDIUM, LARGE, ORIGINAL) """ return self.app_methods.app_print_all(app_id, lang, country, assets) # ==================== Search Methods ==================== def search_analyze(self, query: str, count: int = Config.DEFAULT_SEARCH_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> List[Dict]: """Search for apps and get complete results. Args: query: Search query string count: Number of results to return lang: Language code country: Country code Returns: List of dictionaries containing app data """ return self.search_methods.search_analyze(query, count, lang, country) def search_get_field(self, query: str, field: str, count: int = Config.DEFAULT_SEARCH_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> List[Any]: """Get single field from search results. Args: query: Search query string field: Field name to retrieve count: Number of results lang: Language code country: Country code Returns: List of field values """ return self.search_methods.search_get_field(query, field, count, lang, country) def search_get_fields(self, query: str, fields: List[str], count: int = Config.DEFAULT_SEARCH_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> List[Dict[str, Any]]: """Get multiple fields from search results. Args: query: Search query string fields: List of field names count: Number of results lang: Language code country: Country code Returns: List of dictionaries with requested fields """ return self.search_methods.search_get_fields(query, fields, count, lang, country) def search_print_field(self, query: str, field: str, count: int = Config.DEFAULT_SEARCH_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> None: """Print single field from search results. Args: query: Search query string field: Field name to print count: Number of results lang: Language code country: Country code """ return self.search_methods.search_print_field(query, field, count, lang, country) def search_print_fields(self, query: str, fields: List[str], count: int = Config.DEFAULT_SEARCH_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> None: """Print multiple fields from search results. Args: query: Search query string fields: List of field names count: Number of results lang: Language code country: Country code """ return self.search_methods.search_print_fields(query, fields, count, lang, country) def search_print_all(self, query: str, count: int = Config.DEFAULT_SEARCH_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> None: """Print all search results as JSON. Args: query: Search query string count: Number of results lang: Language code country: Country code """ return self.search_methods.search_print_all(query, count, lang, country) # ==================== Reviews Methods ==================== def reviews_analyze(self, app_id: str, count: int = Config.DEFAULT_REVIEWS_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY, sort: str = Config.DEFAULT_REVIEWS_SORT) -> List[Dict]: """Get user reviews for an app. Args: app_id: Google Play app ID count: Number of reviews to fetch lang: Language code country: Country code sort: Sort order (NEWEST, RELEVANT, RATING) Returns: List of review dictionaries """ return self.reviews_methods.reviews_analyze(app_id, count, lang, country, sort) def reviews_get_field(self, app_id: str, field: str, count: int = Config.DEFAULT_REVIEWS_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY, sort: str = Config.DEFAULT_REVIEWS_SORT) -> List[Any]: """Get single field from reviews. Args: app_id: Google Play app ID field: Field name to retrieve count: Number of reviews lang: Language code country: Country code sort: Sort order Returns: List of field values """ return self.reviews_methods.reviews_get_field(app_id, field, count, lang, country, sort) def reviews_get_fields(self, app_id: str, fields: List[str], count: int = Config.DEFAULT_REVIEWS_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY, sort: str = Config.DEFAULT_REVIEWS_SORT) -> List[Dict[str, Any]]: """Get multiple fields from reviews. Args: app_id: Google Play app ID fields: List of field names count: Number of reviews lang: Language code country: Country code sort: Sort order Returns: List of dictionaries with requested fields """ return self.reviews_methods.reviews_get_fields(app_id, fields, count, lang, country, sort) def reviews_print_field(self, app_id: str, field: str, count: int = Config.DEFAULT_REVIEWS_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY, sort: str = Config.DEFAULT_REVIEWS_SORT) -> None: """Print single field from reviews. Args: app_id: Google Play app ID field: Field name to print count: Number of reviews lang: Language code country: Country code sort: Sort order """ return self.reviews_methods.reviews_print_field(app_id, field, count, lang, country, sort) def reviews_print_fields(self, app_id: str, fields: List[str], count: int = Config.DEFAULT_REVIEWS_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY, sort: str = Config.DEFAULT_REVIEWS_SORT) -> None: """Print multiple fields from reviews. Args: app_id: Google Play app ID fields: List of field names count: Number of reviews lang: Language code country: Country code sort: Sort order """ return self.reviews_methods.reviews_print_fields(app_id, fields, count, lang, country, sort) def reviews_print_all(self, app_id: str, count: int = Config.DEFAULT_REVIEWS_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY, sort: str = Config.DEFAULT_REVIEWS_SORT) -> None: """Print all reviews as JSON. Args: app_id: Google Play app ID count: Number of reviews lang: Language code country: Country code sort: Sort order """ return self.reviews_methods.reviews_print_all(app_id, count, lang, country, sort) # ==================== Developer Methods ==================== def developer_analyze(self, dev_id: str, count: int = Config.DEFAULT_DEVELOPER_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> List[Dict]: """Get all apps from a developer. Args: dev_id: Developer ID (numeric or string) count: Number of apps to return lang: Language code country: Country code Returns: List of app dictionaries """ return self.developer_methods.developer_analyze(dev_id, count, lang, country) def developer_get_field(self, dev_id: str, field: str, count: int = Config.DEFAULT_DEVELOPER_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> List[Any]: """Get single field from developer apps. Args: dev_id: Developer ID field: Field name to retrieve count: Number of apps lang: Language code country: Country code Returns: List of field values """ return self.developer_methods.developer_get_field(dev_id, field, count, lang, country) def developer_get_fields(self, dev_id: str, fields: List[str], count: int = Config.DEFAULT_DEVELOPER_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> List[Dict[str, Any]]: """Get multiple fields from developer apps. Args: dev_id: Developer ID fields: List of field names count: Number of apps lang: Language code country: Country code Returns: List of dictionaries with requested fields """ return self.developer_methods.developer_get_fields(dev_id, fields, count, lang, country) def developer_print_field(self, dev_id: str, field: str, count: int = Config.DEFAULT_DEVELOPER_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> None: """Print single field from developer apps. Args: dev_id: Developer ID field: Field name to print count: Number of apps lang: Language code country: Country code """ return self.developer_methods.developer_print_field(dev_id, field, count, lang, country) def developer_print_fields(self, dev_id: str, fields: List[str], count: int = Config.DEFAULT_DEVELOPER_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> None: """Print multiple fields from developer apps. Args: dev_id: Developer ID fields: List of field names count: Number of apps lang: Language code country: Country code """ return self.developer_methods.developer_print_fields(dev_id, fields, count, lang, country) def developer_print_all(self, dev_id: str, count: int = Config.DEFAULT_DEVELOPER_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> None: """Print all developer apps as JSON. Args: dev_id: Developer ID count: Number of apps lang: Language code country: Country code """ return self.developer_methods.developer_print_all(dev_id, count, lang, country) # ==================== Similar Methods ==================== def similar_analyze(self, app_id: str, count: int = Config.DEFAULT_SIMILAR_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> List[Dict]: """Get similar/competitor apps. Args: app_id: Google Play app ID count: Number of similar apps to return lang: Language code country: Country code Returns: List of similar app dictionaries """ return self.similar_methods.similar_analyze(app_id, count, lang, country) def similar_get_field(self, app_id: str, field: str, count: int = Config.DEFAULT_SIMILAR_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> List[Any]: """Get single field from similar apps. Args: app_id: Google Play app ID field: Field name to retrieve count: Number of similar apps lang: Language code country: Country code Returns: List of field values """ return self.similar_methods.similar_get_field(app_id, field, count, lang, country) def similar_get_fields(self, app_id: str, fields: List[str], count: int = Config.DEFAULT_SIMILAR_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> List[Dict[str, Any]]: """Get multiple fields from similar apps. Args: app_id: Google Play app ID fields: List of field names count: Number of similar apps lang: Language code country: Country code Returns: List of dictionaries with requested fields """ return self.similar_methods.similar_get_fields(app_id, fields, count, lang, country) def similar_print_field(self, app_id: str, field: str, count: int = Config.DEFAULT_SIMILAR_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> None: """Print single field from similar apps. Args: app_id: Google Play app ID field: Field name to print count: Number of similar apps lang: Language code country: Country code """ return self.similar_methods.similar_print_field(app_id, field, count, lang, country) def similar_print_fields(self, app_id: str, fields: List[str], count: int = Config.DEFAULT_SIMILAR_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> None: """Print multiple fields from similar apps. Args: app_id: Google Play app ID fields: List of field names count: Number of similar apps lang: Language code country: Country code """ return self.similar_methods.similar_print_fields(app_id, fields, count, lang, country) def similar_print_all(self, app_id: str, count: int = Config.DEFAULT_SIMILAR_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> None: """Print all similar apps as JSON. Args: app_id: Google Play app ID count: Number of similar apps lang: Language code country: Country code """ return self.similar_methods.similar_print_all(app_id, count, lang, country) # ==================== List Methods ==================== def list_analyze(self, collection: str = Config.DEFAULT_LIST_COLLECTION, category: str = Config.DEFAULT_LIST_CATEGORY, count: int = Config.DEFAULT_LIST_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> List[Dict]: """Get top charts (top free, top paid, top grossing). Args: collection: Collection type (TOP_FREE, TOP_PAID, TOP_GROSSING) category: App category count: Number of apps to return lang: Language code country: Country code Returns: List of app dictionaries from top charts """ return self.list_methods.list_analyze(collection, category, count, lang, country) def list_get_field(self, collection: str, field: str, category: str = Config.DEFAULT_LIST_CATEGORY, count: int = Config.DEFAULT_LIST_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> List[Any]: """Get single field from top charts. Args: collection: Collection type field: Field name to retrieve category: App category count: Number of apps lang: Language code country: Country code Returns: List of field values """ return self.list_methods.list_get_field(collection, field, category, count, lang, country) def list_get_fields(self, collection: str, fields: List[str], category: str = Config.DEFAULT_LIST_CATEGORY, count: int = Config.DEFAULT_LIST_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> List[Dict[str, Any]]: """Get multiple fields from top charts. Args: collection: Collection type fields: List of field names category: App category count: Number of apps lang: Language code country: Country code Returns: List of dictionaries with requested fields """ return self.list_methods.list_get_fields(collection, fields, category, count, lang, country) def list_print_field(self, collection: str, field: str, category: str = Config.DEFAULT_LIST_CATEGORY, count: int = Config.DEFAULT_LIST_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> None: """Print single field from top charts. Args: collection: Collection type field: Field name to print category: App category count: Number of apps lang: Language code country: Country code """ return self.list_methods.list_print_field(collection, field, category, count, lang, country) def list_print_fields(self, collection: str, fields: List[str], category: str = Config.DEFAULT_LIST_CATEGORY, count: int = Config.DEFAULT_LIST_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> None: """Print multiple fields from top charts. Args: collection: Collection type fields: List of field names category: App category count: Number of apps lang: Language code country: Country code """ return self.list_methods.list_print_fields(collection, fields, category, count, lang, country) def list_print_all(self, collection: str = Config.DEFAULT_LIST_COLLECTION, category: str = Config.DEFAULT_LIST_CATEGORY, count: int = Config.DEFAULT_LIST_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> None: """Print all top charts as JSON. Args: collection: Collection type category: App category count: Number of apps lang: Language code country: Country code """ return self.list_methods.list_print_all(collection, category, count, lang, country) # ==================== Suggest Methods ==================== def suggest_analyze(self, term: str, count: int = Config.DEFAULT_SUGGEST_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> List[str]: """Get search suggestions for a term. Args: term: Search term count: Number of suggestions to return lang: Language code country: Country code Returns: List of suggestion strings """ return self.suggest_methods.suggest_analyze(term, count, lang, country) def suggest_nested(self, term: str, count: int = Config.DEFAULT_SUGGEST_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> Dict[str, List[str]]: """Get nested suggestions (suggestions for suggestions). Args: term: Search term count: Number of suggestions lang: Language code country: Country code Returns: Dictionary mapping terms to their suggestions """ return self.suggest_methods.suggest_nested(term, count, lang, country) def suggest_print_all(self, term: str, count: int = Config.DEFAULT_SUGGEST_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> None: """Print all suggestions as JSON. Args: term: Search term count: Number of suggestions lang: Language code country: Country code """ return self.suggest_methods.suggest_print_all(term, count, lang, country) def suggest_print_nested(self, term: str, count: int = Config.DEFAULT_SUGGEST_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> None: """Print nested suggestions as JSON. Args: term: Search term count: Number of suggestions lang: Language code country: Country code """ return self.suggest_methods.suggest_print_nested(term, count, lang, country) ================================================ FILE: gplay_scraper/config.py ================================================ """Configuration module for GPlay Scraper. Contains all constants, default values, URLs, and error messages. """ import random from typing import Dict, Any class Config: """Configuration class containing all settings and constants.""" # HTTP request settings DEFAULT_TIMEOUT = 30 # Request timeout in seconds RATE_LIMIT_DELAY = 1.0 # Delay between requests in seconds DEFAULT_RETRY_COUNT = 3 # Number of retries for failed requests # User agent strings for HTTP requests USER_AGENTS = [ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.2 Safari/605.1.15", ] # Google Play Store URLs PLAY_STORE_BASE_URL = "https://play.google.com" APP_DETAILS_ENDPOINT = "/store/apps/details" # App details page BATCHEXECUTE_ENDPOINT = "/_/PlayStoreUi/data/batchexecute" # Batch API endpoint DEVELOPER_NUMERIC_ENDPOINT = "/store/apps/dev" # Developer page (numeric ID) DEVELOPER_STRING_ENDPOINT = "/store/apps/developer" # Developer page (string ID) # Default parameters DEFAULT_LANGUAGE = "en" # Default language code DEFAULT_COUNTRY = "" # Default country code DEFAULT_REVIEWS_SORT = "NEWEST" # Options: NEWEST, RELEVANT, RATING DEFAULT_HTTP_CLIENT = "requests" # Options: requests, httpx, curl-cffi, tls-client, aiohttp, urllib3, cloudscraper # Default collection and category for list methods DEFAULT_LIST_COLLECTION = "TOP_FREE" # Options: TOP_FREE, TOP_PAID, TOP_GROSSING DEFAULT_LIST_CATEGORY = "APPLICATION" # Default category # Default count values for different methods DEFAULT_LIST_COUNT = 100 # Number of apps to fetch from lists DEFAULT_REVIEWS_COUNT = 100 # Number of reviews to fetch DEFAULT_REVIEWS_BATCH_SIZE = 50 # Reviews per batch request DEFAULT_SUGGEST_COUNT = 5 # Number of suggestions to fetch DEFAULT_SIMILAR_COUNT = 100 # Number of similar apps to fetch DEFAULT_DEVELOPER_COUNT = 100 # Number of developer apps to fetch DEFAULT_SEARCH_COUNT = 100 # Number of search results to fetch # Image size configurations IMAGE_SIZES = { "SMALL": "w512", # 512px width "MEDIUM": "w1024", # 1024px width "LARGE": "w2048", # 2048px width "ORIGINAL": "w9999" # Original/max size } DEFAULT_IMAGE_SIZE = "MEDIUM" # Default image size # Error message templates ERROR_MESSAGES = { "INVALID_APP_ID": "app_id must be a non-empty string", "INVALID_DEV_ID": "dev_id must be a non-empty string", "INVALID_QUERY": "query must be a non-empty string", "NO_DS5_DATA": "No data found in dataset", "DS5_NOT_FOUND": "Could not find data", "JSON_PARSE_FAILED": "Failed to parse JSON: {error}", "APP_FETCH_FAILED": "Failed to fetch app page for {app_id}: {error}", "SEARCH_FETCH_FAILED": "Failed to fetch search results for '{query}': {error}", "REVIEWS_FETCH_FAILED": "Failed to fetch reviews batch for {app_id}: {error}", "REVIEWS_SCRAPE_FAILED": "Failed to scrape reviews for {app_id}: {error}", "DEVELOPER_FETCH_FAILED": "Failed to fetch developer page for {dev_id}: {error}", "CLUSTER_FETCH_FAILED": "Failed to fetch cluster page: {error}", "LIST_FETCH_FAILED": "Failed to fetch list page: {error}", "SUGGEST_FETCH_FAILED": "Failed to fetch suggestions for '{term}': {error}", "RATE_LIMIT_SLEEP": "Rate limiting: sleeping for {sleep_time:.2f} seconds", "HTTP_CLIENT_NOT_AVAILABLE": "{client} not available", "HTTP_ERROR": "HTTP {status_code} Error", "NO_HTTP_CLIENT": "No HTTP client libraries found", "CLIENT_FAILED_TRYING_NEXT": "{client_type} failed, trying next client: {error}", "UNKNOWN_CLIENT_TYPE": "Unknown client type: {client_type}", "APP_NOT_FOUND": "App not found: {app_id}", "SEARCH_NOT_FOUND": "Search not found: {query}", "REVIEWS_NOT_FOUND": "Reviews not found for app: {app_id}", "DEVELOPER_NOT_FOUND": "Developer not found: {dev_id}", "CLUSTER_NOT_FOUND": "Cluster not found: {cluster_url}", "LIST_NOT_FOUND": "List not found: {collection}/{category}", "SUGGEST_NOT_FOUND": "Suggestions not found for: {term}", "NO_DS3_DATA": "No data found in dataset", "DS3_NOT_FOUND": "Could not find data", "DS3_JSON_PARSE_FAILED": "Failed to parse JSON: {error}", "SEARCH_PAGINATION_FAILED": "Failed to fetch paginated search results: {error}" } @classmethod def get_headers(cls, user_agent: str = None) -> Dict[str, str]: """Generate HTTP headers with random or specified user agent. Args: user_agent: Optional custom user agent string Returns: Dictionary containing HTTP headers """ return { "User-Agent": user_agent or random.choice(cls.USER_AGENTS) } @classmethod def get_image_size(cls, size: str = None) -> str: """Get image size parameter. Args: size: Size name (SMALL, MEDIUM, LARGE, ORIGINAL) or None for default Returns: Image size parameter string """ size = size or cls.DEFAULT_IMAGE_SIZE return cls.IMAGE_SIZES.get(size.upper(), cls.IMAGE_SIZES[cls.DEFAULT_IMAGE_SIZE]) ================================================ FILE: gplay_scraper/core/__init__.py ================================================ """Core module containing all 7 method classes for Google Play Store scraping.""" from .gplay_methods import AppMethods, SearchMethods, ReviewsMethods, DeveloperMethods, SimilarMethods, ListMethods, SuggestMethods __all__ = ['AppMethods', 'SearchMethods', 'ReviewsMethods', 'DeveloperMethods', 'SimilarMethods', 'ListMethods', 'SuggestMethods'] ================================================ FILE: gplay_scraper/core/gplay_methods.py ================================================ """Method classes for all 7 scraping types. This module contains 7 method classes, each providing 6 functions (except Suggest with 4): - analyze(): Get all data - get_field(): Get single field - get_fields(): Get multiple fields - print_field(): Print single field - print_fields(): Print multiple fields - print_all(): Print all data as JSON """ import json from typing import Any, List, Dict import logging from .gplay_scraper import AppScraper, SearchScraper, ReviewsScraper, DeveloperScraper, SimilarScraper, ListScraper, SuggestScraper from .gplay_parser import AppParser, SearchParser, ReviewsParser, DeveloperParser, SimilarParser, ListParser, SuggestParser from ..config import Config from ..exceptions import InvalidAppIdError, AppNotFoundError from ..utils.error_handling import comprehensive_error_handler, safe_print # Configure logging if not logging.getLogger().handlers: logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) class AppMethods: """Methods for extracting app details with 65+ fields.""" def __init__(self, http_client: str = None): """Initialize AppMethods with scraper and parser. Args: http_client: Optional HTTP client name """ self.scraper = AppScraper(http_client=http_client) self.parser = AppParser() @comprehensive_error_handler() def app_analyze(self, app_id: str, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY, assets: str = None) -> Dict: """Get complete app data with all 65+ fields. Args: app_id: Google Play app ID lang: Language code country: Country code assets: Asset size (SMALL, MEDIUM, LARGE, ORIGINAL) Returns: Dictionary with all app data or None if app not found after retries Raises: InvalidAppIdError: If app_id is invalid """ if not app_id or not isinstance(app_id, str): raise InvalidAppIdError(Config.ERROR_MESSAGES["INVALID_APP_ID"]) dataset = self.scraper.scrape_play_store_data(app_id, lang, country) app_details = self.parser.parse_app_data(dataset, app_id, self.scraper, assets) return self.parser.format_app_data(app_details) @comprehensive_error_handler() def app_get_field(self, app_id: str, field: str, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY, assets: str = None) -> Any: """Get single field value from app data. Args: app_id: Google Play app ID field: Field name to retrieve lang: Language code country: Country code assets: Asset size (SMALL, MEDIUM, LARGE, ORIGINAL) Returns: Value of the requested field """ return self.app_analyze(app_id, lang, country, assets).get(field) @comprehensive_error_handler() def app_get_fields(self, app_id: str, fields: List[str], lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY, assets: str = None) -> Dict[str, Any]: """Get multiple field values from app data. Args: app_id: Google Play app ID fields: List of field names to retrieve lang: Language code country: Country code assets: Asset size (SMALL, MEDIUM, LARGE, ORIGINAL) Returns: Dictionary with requested fields and values """ data = self.app_analyze(app_id, lang, country, assets) return {field: data.get(field) for field in fields} @safe_print() def app_print_field(self, app_id: str, field: str, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY, assets: str = None) -> None: """Print single field value to console. Args: app_id: Google Play app ID field: Field name to print lang: Language code country: Country code assets: Asset size (SMALL, MEDIUM, LARGE, ORIGINAL) """ value = self.app_get_field(app_id, field, lang, country, assets) try: print(f"{field}: {value}") except UnicodeEncodeError: print(f"{field}: {repr(value)}") @safe_print() def app_print_fields(self, app_id: str, fields: List[str], lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY, assets: str = None) -> None: """Print multiple field values to console. Args: app_id: Google Play app ID fields: List of field names to print lang: Language code country: Country code assets: Asset size (SMALL, MEDIUM, LARGE, ORIGINAL) """ data = self.app_get_fields(app_id, fields, lang, country, assets) for field, value in data.items(): try: print(f"{field}: {value}") except UnicodeEncodeError: print(f"{field}: {repr(value)}") @safe_print() def app_print_all(self, app_id: str, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY, assets: str = None) -> None: """Print all app data as JSON to console. Args: app_id: Google Play app ID lang: Language code country: Country code assets: Asset size (SMALL, MEDIUM, LARGE, ORIGINAL) """ data = self.app_analyze(app_id, lang, country, assets) try: print(json.dumps(data, indent=2, ensure_ascii=False)) except UnicodeEncodeError: print(json.dumps(data, indent=2, ensure_ascii=True)) class SearchMethods: """Methods for searching apps by keyword.""" def __init__(self, http_client: str = None): """Initialize SearchMethods with scraper and parser. Args: http_client: Optional HTTP client name """ self.scraper = SearchScraper(http_client=http_client) self.parser = SearchParser() @comprehensive_error_handler(return_empty=True) def search_analyze(self, query: str, count: int = Config.DEFAULT_SEARCH_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> List[Dict]: """Search for apps and get complete results with pagination support. Args: query: Search query string count: Number of results to return lang: Language code country: Country code Returns: List of dictionaries containing app data Raises: InvalidAppIdError: If query is invalid """ if not query or not isinstance(query, str): raise InvalidAppIdError(Config.ERROR_MESSAGES["INVALID_QUERY"]) dataset = self.scraper.scrape_play_store_data(query, count, lang, country) raw_results = self.parser.parse_search_results(dataset, count) return [self.parser.format_search_result(result) for result in raw_results] @comprehensive_error_handler(return_empty=True) def search_get_field(self, query: str, field: str, count: int = Config.DEFAULT_SEARCH_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> List[Any]: """Get single field from all search results. Args: query: Search query string field: Field name to retrieve count: Number of results lang: Language code country: Country code Returns: List of field values from all results """ results = self.search_analyze(query, count, lang, country) return [app.get(field) for app in results] @comprehensive_error_handler(return_empty=True) def search_get_fields(self, query: str, fields: List[str], count: int = Config.DEFAULT_SEARCH_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> List[Dict[str, Any]]: """Get multiple fields from all search results. Args: query: Search query string fields: List of field names to retrieve count: Number of results lang: Language code country: Country code Returns: List of dictionaries with requested fields """ results = self.search_analyze(query, count, lang, country) return [{field: app.get(field) for field in fields} for app in results] @safe_print() def search_print_field(self, query: str, field: str, count: int = Config.DEFAULT_SEARCH_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> None: """Print single field from all search results. Args: query: Search query string field: Field name to print count: Number of results lang: Language code country: Country code """ values = self.search_get_field(query, field, count, lang, country) for i, value in enumerate(values): try: print(f"{i}. {field}: {value}") except UnicodeEncodeError: print(f"{i}. {field}: {repr(value)}") @safe_print() def search_print_fields(self, query: str, fields: List[str], count: int = Config.DEFAULT_SEARCH_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> None: """Print multiple fields from all search results. Args: query: Search query string fields: List of field names to print count: Number of results lang: Language code country: Country code """ data = self.search_get_fields(query, fields, count, lang, country) for i, app_data in enumerate(data): try: field_str = ', '.join(f'{field}: {value}' for field, value in app_data.items()) print(f"{i}. {field_str}") except UnicodeEncodeError: field_str = ', '.join(f'{field}: {repr(value)}' for field, value in app_data.items()) print(f"{i}. {field_str}") @safe_print() def search_print_all(self, query: str, count: int = Config.DEFAULT_SEARCH_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> None: """Print all search results as JSON. Args: query: Search query string count: Number of results lang: Language code country: Country code """ results = self.search_analyze(query, count, lang, country) for i, result in enumerate(results): try: print(json.dumps(result, indent=2, ensure_ascii=False)) except UnicodeEncodeError: print(json.dumps(result, indent=2, ensure_ascii=True)) class ReviewsMethods: """Methods for extracting user reviews and ratings.""" def __init__(self, http_client: str = None): """Initialize ReviewsMethods with scraper and parser. Args: http_client: Optional HTTP client name """ self.scraper = ReviewsScraper(http_client=http_client) self.parser = ReviewsParser() @comprehensive_error_handler(return_empty=True) def reviews_analyze(self, app_id: str, count: int = Config.DEFAULT_REVIEWS_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY, sort: str = Config.DEFAULT_REVIEWS_SORT) -> List[Dict]: """Get user reviews for an app. Args: app_id: Google Play app ID count: Number of reviews to fetch lang: Language code country: Country code sort: Sort order (NEWEST, RELEVANT, RATING) Returns: List of review dictionaries Raises: InvalidAppIdError: If app_id is invalid """ if not app_id or not isinstance(app_id, str): raise InvalidAppIdError(Config.ERROR_MESSAGES["INVALID_APP_ID"]) if count <= 0: return [] try: dataset = self.scraper.scrape_reviews_data(app_id, count, lang, country, sort) reviews_data = self.parser.parse_multiple_responses(dataset) except Exception as e: logger.error(Config.ERROR_MESSAGES["REVIEWS_SCRAPE_FAILED"].format(app_id=app_id, error=e)) raise return self.parser.format_reviews_data(reviews_data) @comprehensive_error_handler(return_empty=True) def reviews_get_field(self, app_id: str, field: str, count: int = Config.DEFAULT_REVIEWS_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY, sort: str = Config.DEFAULT_REVIEWS_SORT) -> List[Any]: """Get single field from all reviews. Args: app_id: Google Play app ID field: Field name to retrieve count: Number of reviews lang: Language code country: Country code sort: Sort order Returns: List of field values from all reviews """ reviews_data = self.reviews_analyze(app_id, count, lang, country, sort) return [review.get(field) for review in reviews_data] @comprehensive_error_handler(return_empty=True) def reviews_get_fields(self, app_id: str, fields: List[str], count: int = Config.DEFAULT_REVIEWS_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY, sort: str = Config.DEFAULT_REVIEWS_SORT) -> List[Dict[str, Any]]: """Get multiple fields from all reviews. Args: app_id: Google Play app ID fields: List of field names to retrieve count: Number of reviews lang: Language code country: Country code sort: Sort order Returns: List of dictionaries with requested fields """ reviews_data = self.reviews_analyze(app_id, count, lang, country, sort) return [{field: review.get(field) for field in fields} for review in reviews_data] @safe_print() def reviews_print_field(self, app_id: str, field: str, count: int = Config.DEFAULT_REVIEWS_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY, sort: str = Config.DEFAULT_REVIEWS_SORT) -> None: """Print single field from all reviews. Args: app_id: Google Play app ID field: Field name to print count: Number of reviews lang: Language code country: Country code sort: Sort order """ field_values = self.reviews_get_field(app_id, field, count, lang, country, sort) for i, value in enumerate(field_values): try: print(f"{i+1}. {field}: {value}") except UnicodeEncodeError: print(f"{i+1}. {field}: {repr(value)}") @safe_print() def reviews_print_fields(self, app_id: str, fields: List[str], count: int = Config.DEFAULT_REVIEWS_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY, sort: str = Config.DEFAULT_REVIEWS_SORT) -> None: """Print multiple fields from all reviews. Args: app_id: Google Play app ID fields: List of field names to print count: Number of reviews lang: Language code country: Country code sort: Sort order """ reviews_data = self.reviews_get_fields(app_id, fields, count, lang, country, sort) for i, review in enumerate(reviews_data): for field, value in review.items(): try: print(f"{field}: {value}") except UnicodeEncodeError: print(f"{field}: {repr(value)}") @safe_print() def reviews_print_all(self, app_id: str, count: int = Config.DEFAULT_REVIEWS_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY, sort: str = Config.DEFAULT_REVIEWS_SORT) -> None: """Print all reviews as JSON. Args: app_id: Google Play app ID count: Number of reviews lang: Language code country: Country code sort: Sort order """ reviews_data = self.reviews_analyze(app_id, count, lang, country, sort) try: print(json.dumps(reviews_data, indent=2, ensure_ascii=False)) except UnicodeEncodeError: print(json.dumps(reviews_data, indent=2, ensure_ascii=True)) class DeveloperMethods: """Methods for getting all apps from a developer.""" def __init__(self, http_client: str = None): """Initialize DeveloperMethods with scraper and parser. Args: http_client: Optional HTTP client name """ self.scraper = DeveloperScraper(http_client=http_client) self.parser = DeveloperParser() @comprehensive_error_handler(return_empty=True) def developer_analyze(self, dev_id: str, count: int = Config.DEFAULT_DEVELOPER_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> List[Dict]: """Get all apps from a developer. Args: dev_id: Developer ID (numeric or string) count: Number of apps to return lang: Language code country: Country code Returns: List of app dictionaries Raises: InvalidAppIdError: If dev_id is invalid """ if not dev_id or not isinstance(dev_id, str): raise InvalidAppIdError(Config.ERROR_MESSAGES["INVALID_DEV_ID"]) dataset = self.scraper.scrape_play_store_data(dev_id, lang, country) apps_data = self.parser.parse_developer_data(dataset, dev_id) return self.parser.format_developer_data(apps_data)[:count] @comprehensive_error_handler(return_empty=True) def developer_get_field(self, dev_id: str, field: str, count: int = Config.DEFAULT_DEVELOPER_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> List[Any]: """Get single field from all developer apps. Args: dev_id: Developer ID field: Field name to retrieve count: Number of apps lang: Language code country: Country code Returns: List of field values from all apps """ results = self.developer_analyze(dev_id, count, lang, country) return [app.get(field) for app in results] @comprehensive_error_handler(return_empty=True) def developer_get_fields(self, dev_id: str, fields: List[str], count: int = Config.DEFAULT_DEVELOPER_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> List[Dict[str, Any]]: """Get multiple fields from all developer apps. Args: dev_id: Developer ID fields: List of field names to retrieve count: Number of apps lang: Language code country: Country code Returns: List of dictionaries with requested fields """ results = self.developer_analyze(dev_id, count, lang, country) return [{field: app.get(field) for field in fields} for app in results] @safe_print() def developer_print_field(self, dev_id: str, field: str, count: int = Config.DEFAULT_DEVELOPER_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> None: """Print single field from all developer apps. Args: dev_id: Developer ID field: Field name to print count: Number of apps lang: Language code country: Country code """ values = self.developer_get_field(dev_id, field, count, lang, country) for i, value in enumerate(values): try: print(f"{i+1}. {field}: {value}") except UnicodeEncodeError: print(f"{i+1}. {field}: {repr(value)}") @safe_print() def developer_print_fields(self, dev_id: str, fields: List[str], count: int = Config.DEFAULT_DEVELOPER_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> None: """Print multiple fields from all developer apps. Args: dev_id: Developer ID fields: List of field names to print count: Number of apps lang: Language code country: Country code """ data = self.developer_get_fields(dev_id, fields, count, lang, country) for i, app_data in enumerate(data): try: field_str = ', '.join(f'{field}: {value}' for field, value in app_data.items()) print(f"{i+1}. {field_str}") except UnicodeEncodeError: field_str = ', '.join(f'{field}: {repr(value)}' for field, value in app_data.items()) print(f"{i+1}. {field_str}") @safe_print() def developer_print_all(self, dev_id: str, count: int = Config.DEFAULT_DEVELOPER_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> None: """Print all developer apps as JSON. Args: dev_id: Developer ID count: Number of apps lang: Language code country: Country code """ results = self.developer_analyze(dev_id, count, lang, country) try: print(json.dumps(results, indent=2, ensure_ascii=False)) except UnicodeEncodeError: print(json.dumps(results, indent=2, ensure_ascii=True)) class SimilarMethods: """Methods for finding similar/competitor apps.""" def __init__(self, http_client: str = None): """Initialize SimilarMethods with scraper and parser. Args: http_client: Optional HTTP client name """ self.scraper = SimilarScraper(http_client=http_client) self.parser = SimilarParser() @comprehensive_error_handler(return_empty=True) def similar_analyze(self, app_id: str, count: int = Config.DEFAULT_SIMILAR_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> List[Dict]: """Get similar/competitor apps. Args: app_id: Google Play app ID count: Number of similar apps to return lang: Language code country: Country code Returns: List of similar app dictionaries Raises: InvalidAppIdError: If app_id is invalid """ if not app_id or not isinstance(app_id, str): raise InvalidAppIdError(Config.ERROR_MESSAGES["INVALID_APP_ID"]) dataset = self.scraper.scrape_play_store_data(app_id, lang, country) apps_data = self.parser.parse_similar_data(dataset) return self.parser.format_similar_data(apps_data)[:count] @comprehensive_error_handler(return_empty=True) def similar_get_field(self, app_id: str, field: str, count: int = Config.DEFAULT_SIMILAR_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> List[Any]: """Get single field from all similar apps. Args: app_id: Google Play app ID field: Field name to retrieve count: Number of similar apps lang: Language code country: Country code Returns: List of field values from all similar apps """ results = self.similar_analyze(app_id, count, lang, country) return [app.get(field) for app in results] @comprehensive_error_handler(return_empty=True) def similar_get_fields(self, app_id: str, fields: List[str], count: int = Config.DEFAULT_SIMILAR_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> List[Dict[str, Any]]: """Get multiple fields from all similar apps. Args: app_id: Google Play app ID fields: List of field names to retrieve count: Number of similar apps lang: Language code country: Country code Returns: List of dictionaries with requested fields """ results = self.similar_analyze(app_id, count, lang, country) return [{field: app.get(field) for field in fields} for app in results] @safe_print() def similar_print_field(self, app_id: str, field: str, count: int = Config.DEFAULT_SIMILAR_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> None: """Print single field from all similar apps. Args: app_id: Google Play app ID field: Field name to print count: Number of similar apps lang: Language code country: Country code """ values = self.similar_get_field(app_id, field, count, lang, country) for i, value in enumerate(values): try: print(f"{i+1}. {field}: {value}") except UnicodeEncodeError: print(f"{i+1}. {field}: {repr(value)}") @safe_print() def similar_print_fields(self, app_id: str, fields: List[str], count: int = Config.DEFAULT_SIMILAR_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> None: """Print multiple fields from all similar apps. Args: app_id: Google Play app ID fields: List of field names to print count: Number of similar apps lang: Language code country: Country code """ data = self.similar_get_fields(app_id, fields, count, lang, country) for i, app_data in enumerate(data): try: field_str = ', '.join(f'{field}: {value}' for field, value in app_data.items()) print(f"{i+1}. {field_str}") except UnicodeEncodeError: field_str = ', '.join(f'{field}: {repr(value)}' for field, value in app_data.items()) print(f"{i+1}. {field_str}") @safe_print() def similar_print_all(self, app_id: str, count: int = Config.DEFAULT_SIMILAR_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> None: """Print all similar apps as JSON. Args: app_id: Google Play app ID count: Number of similar apps lang: Language code country: Country code """ results = self.similar_analyze(app_id, count, lang, country) try: print(json.dumps(results, indent=2, ensure_ascii=False)) except UnicodeEncodeError: print(json.dumps(results, indent=2, ensure_ascii=True)) class ListMethods: """Methods for getting top charts (free, paid, grossing).""" def __init__(self, http_client: str = None): """Initialize ListMethods with scraper and parser. Args: http_client: Optional HTTP client name """ self.scraper = ListScraper(http_client=http_client) self.parser = ListParser() @comprehensive_error_handler(return_empty=True) def list_analyze(self, collection: str = Config.DEFAULT_LIST_COLLECTION, category: str = Config.DEFAULT_LIST_CATEGORY, count: int = Config.DEFAULT_LIST_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> List[Dict]: """Get top charts (top free, top paid, top grossing). Args: collection: Collection type (TOP_FREE, TOP_PAID, TOP_GROSSING) category: App category count: Number of apps to return lang: Language code country: Country code Returns: List of app dictionaries from top charts """ dataset = self.scraper.scrape_play_store_data(collection, category, count, lang, country) apps_data = self.parser.parse_list_data(dataset, count) return self.parser.format_list_data(apps_data) @comprehensive_error_handler(return_empty=True) def list_get_field(self, collection: str, field: str, category: str = Config.DEFAULT_LIST_CATEGORY, count: int = Config.DEFAULT_LIST_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> List[Any]: """Get single field from all list apps. Args: collection: Collection type field: Field name to retrieve category: App category count: Number of apps lang: Language code country: Country code Returns: List of field values from all apps """ results = self.list_analyze(collection, category, count, lang, country) return [app.get(field) for app in results] @comprehensive_error_handler(return_empty=True) def list_get_fields(self, collection: str, fields: List[str], category: str = Config.DEFAULT_LIST_CATEGORY, count: int = Config.DEFAULT_LIST_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> List[Dict[str, Any]]: """Get multiple fields from all list apps. Args: collection: Collection type fields: List of field names to retrieve category: App category count: Number of apps lang: Language code country: Country code Returns: List of dictionaries with requested fields """ results = self.list_analyze(collection, category, count, lang, country) return [{field: app.get(field) for field in fields} for app in results] @safe_print() def list_print_field(self, collection: str, field: str, category: str = Config.DEFAULT_LIST_CATEGORY, count: int = Config.DEFAULT_LIST_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> None: """Print single field from all list apps. Args: collection: Collection type field: Field name to print category: App category count: Number of apps lang: Language code country: Country code """ values = self.list_get_field(collection, field, category, count, lang, country) for i, value in enumerate(values): try: print(f"{i+1}. {field}: {value}") except UnicodeEncodeError: print(f"{i+1}. {field}: {repr(value)}") @safe_print() def list_print_fields(self, collection: str, fields: List[str], category: str = Config.DEFAULT_LIST_CATEGORY, count: int = Config.DEFAULT_LIST_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> None: """Print multiple fields from all list apps. Args: collection: Collection type fields: List of field names to print category: App category count: Number of apps lang: Language code country: Country code """ data = self.list_get_fields(collection, fields, category, count, lang, country) for i, app_data in enumerate(data): try: field_str = ', '.join(f'{field}: {value}' for field, value in app_data.items()) print(f"{i+1}. {field_str}") except UnicodeEncodeError: field_str = ', '.join(f'{field}: {repr(value)}' for field, value in app_data.items()) print(f"{i+1}. {field_str}") @safe_print() def list_print_all(self, collection: str = Config.DEFAULT_LIST_COLLECTION, category: str = Config.DEFAULT_LIST_CATEGORY, count: int = Config.DEFAULT_LIST_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> None: """Print all list apps as JSON. Args: collection: Collection type category: App category count: Number of apps lang: Language code country: Country code """ results = self.list_analyze(collection, category, count, lang, country) try: print(json.dumps(results, indent=2, ensure_ascii=False)) except UnicodeEncodeError: print(json.dumps(results, indent=2, ensure_ascii=True)) class SuggestMethods: """Methods for getting search suggestions and autocomplete.""" def __init__(self, http_client: str = None): """Initialize SuggestMethods with scraper and parser. Args: http_client: Optional HTTP client name """ self.scraper = SuggestScraper(http_client=http_client) self.parser = SuggestParser() @comprehensive_error_handler(return_empty=True) def suggest_analyze(self, term: str, count: int = Config.DEFAULT_SUGGEST_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> List[str]: """Get search suggestions for a term. Args: term: Search term count: Number of suggestions to return lang: Language code country: Country code Returns: List of suggestion strings Raises: InvalidAppIdError: If term is invalid """ if not term or not isinstance(term, str): raise InvalidAppIdError(Config.ERROR_MESSAGES["INVALID_QUERY"]) dataset = self.scraper.scrape_suggestions(term, lang, country) suggestions = self.parser.parse_suggestions(dataset) return self.parser.format_suggestions(suggestions[:count]) @comprehensive_error_handler() def suggest_nested(self, term: str, count: int = Config.DEFAULT_SUGGEST_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> Dict[str, List[str]]: """Get nested suggestions (suggestions for suggestions). Args: term: Search term count: Number of suggestions per level lang: Language code country: Country code Returns: Dictionary mapping suggestions to their nested suggestions Raises: InvalidAppIdError: If term is invalid """ if not term or not isinstance(term, str): raise InvalidAppIdError(Config.ERROR_MESSAGES["INVALID_QUERY"]) first_level = self.suggest_analyze(term, count, lang, country) results = {} for suggestion in first_level: second_level = self.suggest_analyze(suggestion, count, lang, country) results[suggestion] = second_level return results @safe_print() def suggest_print_all(self, term: str, count: int = Config.DEFAULT_SUGGEST_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> None: """Print all suggestions as JSON. Args: term: Search term count: Number of suggestions lang: Language code country: Country code """ suggestions = self.suggest_analyze(term, count, lang, country) try: print(json.dumps(suggestions, indent=2, ensure_ascii=False)) except UnicodeEncodeError: print(json.dumps(suggestions, indent=2, ensure_ascii=True)) @safe_print() def suggest_print_nested(self, term: str, count: int = Config.DEFAULT_SUGGEST_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> None: """Print nested suggestions as JSON. Args: term: Search term count: Number of suggestions per level lang: Language code country: Country code """ nested = self.suggest_nested(term, count, lang, country) try: print(json.dumps(nested, indent=2, ensure_ascii=False)) except UnicodeEncodeError: print(json.dumps(nested, indent=2, ensure_ascii=True)) ================================================ FILE: gplay_scraper/core/gplay_parser.py ================================================ """Parser classes for extracting and formatting data from raw responses. This module contains 7 parser classes that handle JSON/HTML parsing and data formatting for all scraping methods. """ import json import re from datetime import datetime, timezone from typing import Dict, Any, List, Optional, Tuple from ..models.element_specs import ElementSpecs, nested_lookup, format_image_url from ..utils.helpers import clean_json_string, alternative_json_clean, calculate_app_age, calculate_daily_installs, calculate_monthly_installs, tamp_to_date, get_publisher_country from ..config import Config from ..exceptions import DataParsingError from ..utils.error_handling import handle_parsing_errors from ..utils.helpers import pho_count, add_count class AppParser: """Parser for extracting and formatting app data.""" @handle_parsing_errors() def parse_app_data(self, dataset: Dict, app_id: str, scraper=None, assets: str = None) -> Dict[str, Any]: """Parse raw app data from dataset with fallback for missing release date. Args: dataset: Raw dataset from scraper app_id: Google Play app ID scraper: AppScraper instance for fallback requests Returns: Dictionary with parsed app details Raises: DataParsingError: If parsing fails """ ds5_data = dataset.get("ds:5", "") if not ds5_data: raise DataParsingError(Config.ERROR_MESSAGES["NO_DS5_DATA"]) json_str_cleaned = clean_json_string(ds5_data) try: data = json.loads(json_str_cleaned) except json.JSONDecodeError as e: try: alternative_cleaned = alternative_json_clean(ds5_data) data = json.loads(alternative_cleaned) except Exception: raise DataParsingError(Config.ERROR_MESSAGES["JSON_PARSE_FAILED"].format(error=str(e))) app_details = {} for key, spec in ElementSpecs.App.items(): value = spec.extract_content(data.get("data", data)) if key in ["icon", "headerImage", "videoImage"] and value: app_details[key] = format_image_url(value, assets) elif key == "screenshots" and value: app_details[key] = [format_image_url(url, assets) for url in value if url] else: app_details[key] = value app_details['appId'] = app_id app_details['url'] = f"{Config.PLAY_STORE_BASE_URL}{Config.APP_DETAILS_ENDPOINT}?id={app_id}" app_details['publisherCountry'] = get_publisher_country(app_details.get('developerPhone'), app_details.get('developerAddress')) rating_fields = ["released", "score", "ratings", "reviews", "histogram"] missing_rating_fields = [] for key in rating_fields: value = app_details.get(key) if key == "histogram": if not value or (isinstance(value, list) and all(x == 0 for x in value)): missing_rating_fields.append(key) elif not value: missing_rating_fields.append(key) if missing_rating_fields and scraper: try: country_code = None phone = app_details.get("developerPhone") if phone: country_code = pho_count(phone) if not country_code: address = app_details.get("developerAddress") if address: country_code = add_count(address) if country_code: fallback_dataset = scraper.fetch_fallback_data(app_id, gl=country_code) suffix = f"fallback_{country_code}" else: fallback_dataset = scraper.fetch_fallback_data(app_id, no_locale=True) suffix = "fallback_no_locale" if fallback_dataset and fallback_dataset.get("ds:5"): fallback_cleaned = clean_json_string(fallback_dataset["ds:5"]) try: fallback_data = json.loads(fallback_cleaned) for field in missing_rating_fields: if field in ElementSpecs.App: spec = ElementSpecs.App[field] fallback_value = spec.extract_content(fallback_data.get("data", fallback_data)) if fallback_value: app_details[field] = fallback_value except: pass except: pass if not app_details.get("score"): app_details["score"] = 0 if not app_details.get("ratings"): app_details["ratings"] = 0 if not app_details.get("reviews"): app_details["reviews"] = 0 if not app_details.get("installs"): app_details["installs"] = 0 if not app_details.get("minInstalls"): app_details["minInstalls"] = 0 current_date = datetime.now(timezone.utc) release_date_str = app_details.get("released") if release_date_str: app_details["appAge"] = calculate_app_age(release_date_str, current_date) app_details["dailyInstalls"] = calculate_daily_installs(app_details.get("installs"), release_date_str, current_date) app_details["minDailyInstalls"] = calculate_daily_installs(app_details.get("minInstalls"), release_date_str, current_date) app_details["realDailyInstalls"] = calculate_daily_installs(app_details.get("realInstalls"), release_date_str, current_date) app_details["monthlyInstalls"] = calculate_monthly_installs(app_details.get("installs"), release_date_str, current_date) app_details["minMonthlyInstalls"] = calculate_monthly_installs(app_details.get("minInstalls"), release_date_str, current_date) app_details["realMonthlyInstalls"] = calculate_monthly_installs(app_details.get("realInstalls"), release_date_str, current_date) else: metric_keys = [ "appAge", "dailyInstalls", "minDailyInstalls", "realDailyInstalls", "monthlyInstalls", "minMonthlyInstalls", "realMonthlyInstalls" ] for key in metric_keys: app_details[key] = 0 return app_details @handle_parsing_errors() def format_app_data(self, details: dict) -> dict: """Format parsed app data into final structure. Args: details: Parsed app details Returns: Formatted dictionary with all app fields """ return { "appId": details.get("appId"), "title": details.get("title"), "summary": details.get("summary"), "description": details.get("description"), "genre": details.get("genre"), "genreId": details.get("genreId"), "categories": details.get("categories"), "available": details.get("available"), "released": details.get("released"), "appAgeDays": details.get("appAge"), "lastUpdated": tamp_to_date(details.get("updated")), "icon": details.get("icon"), "headerImage": details.get("headerImage"), "screenshots": details.get("screenshots"), "video": details.get("video"), "videoImage": details.get("videoImage"), "installs": details.get("installs"), "minInstalls": details.get("minInstalls"), "realInstalls": details.get("realInstalls"), "dailyInstalls": details.get("dailyInstalls"), "minDailyInstalls": details.get("minDailyInstalls"), "realDailyInstalls": details.get("realDailyInstalls"), "monthlyInstalls": details.get("monthlyInstalls"), "minMonthlyInstalls": details.get("minMonthlyInstalls"), "realMonthlyInstalls": details.get("realMonthlyInstalls"), "score": details.get("score"), "ratings": details.get("ratings"), "reviews": details.get("reviews"), "histogram": details.get("histogram"), "adSupported": details.get("adSupported"), "containsAds": details.get("containsAds"), "version": details.get("version"), "androidVersion": details.get("androidVersion"), "maxAndroidApi": details.get("maxandroidapi"), "minAndroidApi": details.get("minandroidapi"), "appBundle": details.get("appBundle"), "contentRating": details.get("contentRating"), "contentRatingDescription": details.get("contentRatingDescription"), "whatsNew": details.get("whatsNew"), "permissions": details.get("permissions"), "dataSafety": details.get("dataSafety"), "price": details.get("price"), "currency": details.get("currency"), "free": details.get("free"), "offersIAP": details.get("offersIAP"), "inAppProductPrice": details.get("inAppProductPrice"), "sale": details.get("sale"), "originalPrice": details.get("originalPrice"), "developer": details.get("developer"), "developerId": details.get("developerId"), "developerEmail": details.get("developerEmail"), "developerWebsite": details.get("developerWebsite"), "developerAddress": details.get("developerAddress"), "developerPhone": details.get("developerPhone"), "publisherCountry": details.get("publisherCountry"), "privacyPolicy": details.get("privacyPolicy"), "appUrl": details.get("url"), } class SearchParser: """Parser for extracting and formatting search results.""" @handle_parsing_errors(return_empty=True) def parse_search_results(self, dataset: Dict, count: int) -> List[Dict]: """Parse search results from dataset. Args: dataset: Raw dataset from scraper count: Maximum number of results to parse Returns: List of parsed search result dictionaries """ if "ds:1" not in dataset: return [] search_data = nested_lookup(dataset.get("ds:1", {}), [0, 1, 0, 0, 0]) if not search_data: return [] results = [] n_apps = min(len(search_data), count) for i in range(n_apps): app = self.extract_search_result(search_data[i]) if app: results.append(app) return results[:count] @handle_parsing_errors() def extract_search_result(self, data) -> Dict: """Extract single search result from raw data. Args: data: Raw search result data Returns: Dictionary with extracted search result or None if extraction fails """ try: result = {} for key, spec in ElementSpecs.Search.items(): result[key] = spec.extract_content(data) return result except Exception: return None @handle_parsing_errors() def format_search_result(self, result: dict) -> dict: """Format parsed search result into final structure. Args: result: Parsed search result Returns: Formatted dictionary with search result fields """ return { "appId": result.get("appId"), "title": result.get("title"), "description": result.get("summary"), "icon": result.get("icon"), "developer": result.get("developer"), "score": result.get("score"), "scoreText": result.get("scoreText"), "currency": result.get("currency"), "price": result.get("price"), "free": result.get("free"), "url": result.get("url"), } @handle_parsing_errors() def extract_pagination_token(self, dataset: Dict) -> str: """Extract pagination token from search dataset. Args: dataset: Search dataset Returns: Pagination token or None """ sections = nested_lookup(dataset.get("ds:1", {}), [0, 1, 0, 0]) if not sections: return None for section in sections: if isinstance(section, list) and len(section) > 1: potential_token = nested_lookup(section, [1]) if isinstance(potential_token, str): return potential_token return None @handle_parsing_errors() def parse_html_content(self, html_content: str) -> Dict: """Extract datasets from search page HTML. Args: html_content: HTML content of search page Returns: Dictionary containing all datasets Raises: DataParsingError: If no datasets found """ script_regex = re.compile(r"AF_initDataCallback[\s\S]*? Tuple[List[Dict], Optional[str]]: """Parse reviews from API response content. Args: content: Raw API response content Returns: Tuple of (list of review dictionaries, next page token) """ if not content or not isinstance(content, str): return [], None regex = re.compile(r"\)]}'\n\n([\s\S]+)") matches = regex.findall(content) if not matches: return [], None try: data = json.loads(matches[0]) if not data or len(data) == 0 or len(data[0]) < 3: return [], None reviews_data = json.loads(data[0][2]) # Handle case where reviews_data is None or empty if not reviews_data: return [], None next_token = None try: if (isinstance(reviews_data, list) and len(reviews_data) >= 2 and reviews_data[-2] and isinstance(reviews_data[-2], list) and len(reviews_data[-2]) > 0): potential_token = reviews_data[-2][-1] if isinstance(potential_token, str): next_token = potential_token except (IndexError, TypeError, AttributeError): pass # Check if we have actual reviews data if (not isinstance(reviews_data, list) or len(reviews_data) == 0 or not isinstance(reviews_data[0], list) or len(reviews_data[0]) == 0): return [], None reviews = [] for review_raw in reviews_data[0]: if review_raw: # Make sure review_raw is not None review = self.extract_review_data(review_raw) if review: reviews.append(review) return reviews, next_token except (json.JSONDecodeError, IndexError, KeyError, TypeError, AttributeError): return [], None @handle_parsing_errors() def extract_review_data(self, review_raw) -> Optional[Dict]: """Extract single review from raw data. Args: review_raw: Raw review data array Returns: Dictionary with extracted review data or None if extraction fails """ try: review = { "reviewId": review_raw[0] if len(review_raw) > 0 else None, "userName": review_raw[1][0] if len(review_raw) > 1 and review_raw[1] else None, "userImage": None, "content": review_raw[4] if len(review_raw) > 4 else None, "score": review_raw[2] if len(review_raw) > 2 else None, "thumbsUpCount": review_raw[6] if len(review_raw) > 6 else None, "at": datetime.fromtimestamp(review_raw[5][0]).isoformat() if len(review_raw) > 5 and review_raw[5] else None, "appVersion": review_raw[10] if len(review_raw) > 10 else None, } try: if len(review_raw) > 1 and review_raw[1] and len(review_raw[1]) > 1 and review_raw[1][1]: review["userImage"] = review_raw[1][1][3][2] except: pass return review except Exception: return None @handle_parsing_errors(return_empty=True) def parse_multiple_responses(self, dataset: Dict) -> List[Dict]: """Parse multiple review responses. Args: dataset: Dataset containing multiple review responses Returns: List of all parsed reviews """ if not dataset or not isinstance(dataset, dict): return [] responses = dataset.get("reviews", []) if not responses or not isinstance(responses, list): return [] all_reviews = [] for response in responses: if response and isinstance(response, str): try: reviews, _ = self.parse_reviews_response(response) if reviews: # Only extend if we got actual reviews all_reviews.extend(reviews) except Exception: continue # Skip this response if it fails return all_reviews @handle_parsing_errors(return_empty=True) def format_reviews_data(self, reviews_data: List[Dict]) -> List[Dict]: """Format parsed reviews into final structure. Args: reviews_data: List of parsed reviews Returns: List of formatted review dictionaries """ formatted_reviews = [] for review in reviews_data: formatted_review = { "reviewId": review.get("reviewId"), "userName": review.get("userName"), "userImage": review.get("userImage"), "score": review.get("score"), "content": review.get("content"), "thumbsUpCount": review.get("thumbsUpCount"), "appVersion": review.get("appVersion"), "at": review.get("at"), } formatted_reviews.append(formatted_review) return formatted_reviews class DeveloperParser: """Parser for extracting and formatting developer apps.""" @handle_parsing_errors(return_empty=True) def parse_developer_data(self, dataset: Dict, dev_id: str) -> List[Dict]: """Parse developer apps from dataset. Args: dataset: Raw dataset from scraper dev_id: Developer ID (numeric or string) Returns: List of parsed app dictionaries Raises: DataParsingError: If parsing fails """ ds3_data = dataset.get("ds:3", "") if not ds3_data: raise DataParsingError(Config.ERROR_MESSAGES["NO_DS3_DATA"]) json_str_cleaned = clean_json_string(ds3_data) try: data = json.loads(json_str_cleaned) except json.JSONDecodeError as e: try: alternative_cleaned = alternative_json_clean(ds3_data) data = json.loads(alternative_cleaned) except Exception: raise DataParsingError(Config.ERROR_MESSAGES["DS3_JSON_PARSE_FAILED"].format(error=str(e))) # Navigate to apps array based on dev_id type is_numeric = dev_id.isdigit() if is_numeric: apps_path = [0, 1, 0, 21, 0] else: apps_path = [0, 1, 0, 22, 0] apps_data = nested_lookup(data.get("data", data), apps_path) if not apps_data: return [] apps = [] for app_data in apps_data: app_details = {} for key, spec in ElementSpecs.Developer.items(): app_details[key] = spec.extract_content(app_data) if app_details.get("title"): apps.append(app_details) return apps @handle_parsing_errors(return_empty=True) def format_developer_data(self, apps_data: List[Dict]) -> List[Dict]: """Format parsed developer apps into final structure. Args: apps_data: List of parsed apps Returns: List of formatted app dictionaries """ formatted_apps = [] for app in apps_data: formatted_app = { "appId": app.get("appId"), "title": app.get("title"), "description": app.get("description"), "icon": app.get("icon"), "developer": app.get("developer"), "score": app.get("score"), "scoreText": app.get("scoreText"), "currency": app.get("currency"), "price": app.get("price"), "free": app.get("free"), "url": app.get("url"), } formatted_apps.append(formatted_app) return formatted_apps class SimilarParser: """Parser for extracting and formatting similar apps.""" @handle_parsing_errors(return_empty=True) def parse_similar_data(self, dataset: Dict) -> List[Dict]: """Parse similar apps from dataset. Args: dataset: Raw dataset from scraper Returns: List of parsed similar app dictionaries """ ds3_data = dataset.get("ds:3", "") if not ds3_data: return [] json_str_cleaned = clean_json_string(ds3_data) try: data = json.loads(json_str_cleaned) except json.JSONDecodeError as e: try: alternative_cleaned = alternative_json_clean(ds3_data) data = json.loads(alternative_cleaned) except Exception: return [] apps_data = nested_lookup(data.get("data", data), [0, 1, 0, 21, 0]) if not apps_data: return [] apps = [] for app_data in apps_data: app_details = {} for key, spec in ElementSpecs.Similar.items(): app_details[key] = spec.extract_content(app_data) if app_details.get("title"): apps.append(app_details) return apps @handle_parsing_errors(return_empty=True) def format_similar_data(self, apps_data: List[Dict]) -> List[Dict]: """Format parsed similar apps into final structure. Args: apps_data: List of parsed apps Returns: List of formatted app dictionaries """ formatted_apps = [] for app in apps_data: formatted_app = { "appId": app.get("appId"), "title": app.get("title"), "description": app.get("description"), "icon": app.get("icon"), "developer": app.get("developer"), "score": app.get("score"), "scoreText": app.get("scoreText"), "currency": app.get("currency"), "price": app.get("price"), "free": app.get("free"), "url": app.get("url"), } formatted_apps.append(formatted_app) return formatted_apps class ListParser: """Parser for extracting and formatting top chart apps.""" @handle_parsing_errors(return_empty=True) def parse_list_data(self, dataset: Dict, count: int) -> List[Dict]: """Parse top chart apps from dataset. Args: dataset: Raw dataset from scraper count: Maximum number of apps to parse Returns: List of parsed app dictionaries """ collection_data = dataset.get("collection_data") if not collection_data: return [] apps_data = nested_lookup(collection_data, [0, 1, 0, 28, 0]) if not apps_data: return [] apps = [] for app_data in apps_data[:count]: app_details = {} for key, spec in ElementSpecs.List.items(): app_details[key] = spec.extract_content(app_data) if app_details.get("title"): apps.append(app_details) return apps @handle_parsing_errors(return_empty=True) def format_list_data(self, apps_data: List[Dict]) -> List[Dict]: """Format parsed list apps into final structure. Args: apps_data: List of parsed apps Returns: List of formatted app dictionaries """ formatted_apps = [] for app in apps_data: formatted_app = { "appId": app.get("appId"), "title": app.get("title"), "description": app.get("description"), "icon": app.get("icon"), "screenshots": app.get("screenshots"), "developer": app.get("developer"), "genre": app.get("genre"), "score": app.get("score"), "scoreText": app.get("scoreText"), "installs": app.get("installs"), "currency": app.get("currency"), "price": app.get("price"), "free": app.get("free"), "url": app.get("url"), } formatted_apps.append(formatted_app) return formatted_apps class SuggestParser: """Parser for extracting and formatting search suggestions.""" @handle_parsing_errors(return_empty=True) def parse_suggestions(self, dataset: Dict) -> List[str]: """Parse suggestions from dataset. Args: dataset: Raw dataset from scraper Returns: List of suggestion strings """ return dataset.get("suggestions", []) @handle_parsing_errors(return_empty=True) def format_suggestions(self, suggestions: List[str]) -> List[str]: """Format suggestions (pass-through for strings). Args: suggestions: List of suggestion strings Returns: Same list of suggestion strings """ return suggestions ================================================ FILE: gplay_scraper/core/gplay_scraper.py ================================================ import json import re import logging from typing import Dict from ..utils.http_client import HttpClient from ..config import Config from ..exceptions import DataParsingError, InvalidAppIdError, AppNotFoundError from ..utils.error_handling import handle_network_errors, handle_parsing_errors, validate_inputs from urllib.parse import quote from .gplay_parser import SearchParser from ..utils.constants import SORT_NAMES, CLUSTER_NAMES logger = logging.getLogger(__name__) class AppScraper: """Scraper for fetching app details from Google Play Store. Handles the extraction of comprehensive app information including ratings, reviews, install counts, pricing, and metadata. Supports fallback data fetching when primary requests fail to retrieve certain fields. Features: - Primary app data extraction from HTML pages - Fallback data fetching for missing fields (release dates, ratings) - Multiple locale support for regional data - Automatic retry with different parameters """ def __init__(self, rate_limit_delay: float = None, http_client: str = None): """Initialize AppScraper with HTTP client. Args: rate_limit_delay: Delay between requests in seconds (default: 1.0) http_client: HTTP client to use (requests, curl_cffi, etc.) """ self.http_client = HttpClient(rate_limit_delay, http_client) def fetch_playstore_page(self, app_id: str, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> str: """Fetch app page HTML from Google Play Store. Args: app_id: Google Play app ID lang: Language code country: Country code Returns: HTML content of app page """ return self.http_client.fetch_app_page(app_id, lang, country) def fetch_fallback_data(self, app_id: str, gl: str = None, no_locale: bool = False) -> Dict: """Fetch app data with specific country or without locale parameters. Args: app_id: Google Play app ID gl: Country code for fallback request no_locale: If True, fetch without hl and gl parameters Returns: Dictionary containing ds:5 dataset from fallback request """ if no_locale: html_content = self.http_client.fetch_app_page_no_locale(app_id) elif gl: html_content = self.http_client.fetch_app_page(app_id, lang=Config.DEFAULT_LANGUAGE, country=gl) else: html_content = self.http_client.fetch_app_page_no_locale(app_id) ds_match = re.search(r'AF_initDataCallback\s*\(\s*({\s*key:\s*["\']ds:5["\'][\s\S]*?})\s*\)\s*;', html_content, re.DOTALL) if ds_match: ds5_data = ds_match.group(1) else: all_callbacks = re.findall(r'AF_initDataCallback\s*\(\s*({[\s\S]*?})\s*\)\s*;', html_content, re.DOTALL) ds5_data = "" for callback in all_callbacks: if "'ds:5'" in callback or '"ds:5"' in callback: ds5_data = callback break return {"ds:5": ds5_data} if ds5_data else None @validate_inputs() @handle_network_errors() @handle_parsing_errors() def scrape_play_store_data(self, app_id: str, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> Dict: """Extract dataset from app page HTML. Args: app_id: Google Play app ID lang: Language code country: Country code Returns: Dictionary containing ds:5 dataset Raises: DataParsingError: If dataset not found AppNotFoundError: If app not found """ html_content = self.fetch_playstore_page(app_id, lang, country) ds_match = re.search(r'AF_initDataCallback\s*\(\s*({\s*key:\s*["\']ds:5["\'][\s\S]*?})\s*\)\s*;', html_content, re.DOTALL) if ds_match: ds5_data = ds_match.group(1) else: all_callbacks = re.findall(r'AF_initDataCallback\s*\(\s*({[\s\S]*?})\s*\)\s*;', html_content, re.DOTALL) ds5_data = "" for callback in all_callbacks: if "'ds:5'" in callback or '"ds:5"' in callback: ds5_data = callback break if not ds5_data: raise DataParsingError(Config.ERROR_MESSAGES["DS5_NOT_FOUND"]) return {"ds:5": ds5_data, "fallback_needed": False} class SearchScraper: """Scraper for fetching search results from Google Play Store. Handles app search functionality with support for pagination to retrieve large numbers of search results. Integrates with SearchParser for data extraction. Features: - Initial search page fetching - Automatic pagination for large result sets - Token-based continuation for additional results - Configurable result limits """ def __init__(self, rate_limit_delay: float = None, http_client: str = None): """Initialize SearchScraper with HTTP client and parser. Args: rate_limit_delay: Delay between requests in seconds http_client: HTTP client to use for requests """ self.http_client = HttpClient(rate_limit_delay, http_client) self.parser = SearchParser() def fetch_playstore_search(self, query: str, count: int, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> str: """Fetch search page HTML from Google Play Store. Args: query: Search query string count: Number of results needed lang: Language code country: Country code Returns: HTML content of search page Raises: InvalidAppIdError: If query is invalid """ if not query or not isinstance(query, str): raise InvalidAppIdError(Config.ERROR_MESSAGES["INVALID_QUERY"]) if count <= 0: return "" return self.http_client.fetch_search_page(query=query, lang=lang, country=country) @validate_inputs() @handle_network_errors() @handle_parsing_errors() def scrape_play_store_data(self, query: str, count: int = Config.DEFAULT_SEARCH_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> Dict: """Scrape search results with automatic pagination support. Args: query: Search query string count: Total number of results to fetch lang: Language code country: Country code Returns: Dictionary containing all search results Raises: DataParsingError: If parsing fails """ html_content = self.fetch_playstore_search(query, count, lang, country) dataset = self.parser.parse_html_content(html_content) if count <= Config.DEFAULT_SEARCH_COUNT // 5: return dataset token = self.parser.extract_pagination_token(dataset) all_results = [] initial_results = self._get_nested_value(dataset.get("ds:1", []), [0, 1, 0, 0, 0], []) all_results.extend(initial_results) while len(all_results) < count and token: needed = min(Config.DEFAULT_REVIEWS_BATCH_SIZE * 2, count - len(all_results)) try: response_text = self.http_client.fetch_search_page(token=token, needed=needed, lang=lang, country=country) data = json.loads(response_text[5:]) parsed_data = json.loads(data[0][2]) if parsed_data: paginated_results = self._get_nested_value(parsed_data, [0, 0, 0], []) all_results.extend(paginated_results) token = self._get_nested_value(parsed_data, [0, 0, 7, 1]) else: break except (json.JSONDecodeError, IndexError, KeyError, Exception): break if "ds:1" in dataset: dataset["ds:1"][0][1][0][0][0] = all_results[:count] return dataset def _get_nested_value(self, data, path, default=None): """Safely get nested value from data structure. Args: data: Data structure to traverse path: List of keys/indices to follow default: Default value if path not found Returns: Value at path or default """ try: for key in path: data = data[key] return data except (KeyError, IndexError, TypeError): return default class ReviewsScraper: """Scraper for fetching user reviews from Google Play Store. Handles extraction of user reviews using Google Play's internal API. Supports different sorting options and pagination for large review sets. Features: - Multiple sort orders (newest, relevant, rating) - Batch processing for large review counts - Pagination token management - Configurable batch sizes """ def __init__(self, rate_limit_delay: float = None, http_client: str = None): """Initialize ReviewsScraper with HTTP client. Args: rate_limit_delay: Delay between requests in seconds http_client: HTTP client to use for API requests """ self.http_client = HttpClient(rate_limit_delay, http_client) def fetch_reviews_batch(self, app_id: str, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY, sort: int = Config.DEFAULT_REVIEWS_SORT, batch_count: int = Config.DEFAULT_REVIEWS_BATCH_SIZE, token: str = None) -> str: """Fetch single batch of reviews from API. Args: app_id: Google Play app ID lang: Language code country: Country code sort: Sort order (NEWEST, RELEVANT, RATING) batch_count: Number of reviews per batch token: Pagination token for next batch Returns: Raw API response content """ sort_value = SORT_NAMES.get(sort, sort) if isinstance(sort, str) else sort return self.http_client.fetch_reviews_batch(app_id, lang, country, sort_value, batch_count, token) @validate_inputs() @handle_network_errors() @handle_parsing_errors() def scrape_reviews_data(self, app_id: str, count: int = Config.DEFAULT_REVIEWS_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY, sort: int = Config.DEFAULT_REVIEWS_SORT) -> Dict: """Scrape multiple batches of reviews. Args: app_id: Google Play app ID count: Total number of reviews to fetch lang: Language code country: Country code sort: Sort order Returns: Dictionary containing all review responses """ all_responses = [] token = None batch_size = Config.DEFAULT_REVIEWS_BATCH_SIZE while len(all_responses) * batch_size < count: remaining = count - (len(all_responses) * batch_size) fetch_count = min(batch_size, remaining) response = self.fetch_reviews_batch(app_id, lang, country, sort, fetch_count, token) if not response: break all_responses.append(response) try: regex = re.compile(r"\)]}'\n\n([\s\S]+)") matches = regex.findall(response) if matches: data = json.loads(matches[0]) parsed_data = json.loads(data[0][2]) # Check if we got any reviews in this batch if not parsed_data or len(parsed_data) == 0 or (len(parsed_data) > 0 and len(parsed_data[0]) == 0): break # Extract next token safely try: if len(parsed_data) >= 2 and parsed_data[-2] and len(parsed_data[-2]) > 0: token = parsed_data[-2][-1] else: token = None except (IndexError, TypeError, AttributeError): token = None if not token or isinstance(token, list) or not isinstance(token, str): break else: break except (json.JSONDecodeError, IndexError, KeyError, TypeError): break return {"reviews": all_responses if all_responses else []} class DeveloperScraper: """Scraper for fetching developer portfolio from Google Play Store. Extracts all apps published by a specific developer, supporting both numeric developer IDs and string-based developer names. Features: - Numeric developer ID support (e.g., '5700313618786177705') - String developer name support (e.g., 'Google LLC') - Complete app portfolio extraction - Developer metadata collection """ def __init__(self, rate_limit_delay: float = None, http_client: str = None): """Initialize DeveloperScraper with HTTP client. Args: rate_limit_delay: Delay between requests in seconds http_client: HTTP client to use for requests """ self.http_client = HttpClient(rate_limit_delay, http_client) def fetch_developer_page(self, dev_id: str, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> str: """Fetch developer page HTML from Google Play Store. Args: dev_id: Developer ID (numeric or string) lang: Language code country: Country code Returns: HTML content of developer page """ return self.http_client.fetch_developer_page(dev_id, lang, country) @validate_inputs() @handle_network_errors() @handle_parsing_errors() def scrape_play_store_data(self, dev_id: str, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> Dict: """Extract dataset from developer page HTML. Args: dev_id: Developer ID lang: Language code country: Country code Returns: Dictionary containing ds:3 dataset and dev_id Raises: DataParsingError: If dataset not found """ html_content = self.fetch_developer_page(dev_id, lang, country) ds_match = re.search(r'AF_initDataCallback\s*\(\s*({\s*key:\s*["\']ds:3["\'][\s\S]*?})\s*\)\s*;', html_content, re.DOTALL) if ds_match: ds3_data = ds_match.group(1) else: all_callbacks = re.findall(r'AF_initDataCallback\s*\(\s*({[\s\S]*?})\s*\)\s*;', html_content, re.DOTALL) ds3_data = "" for callback in all_callbacks: if "'ds:3'" in callback or '"ds:3"' in callback: ds3_data = callback break if not ds3_data: raise DataParsingError(Config.ERROR_MESSAGES["DS3_NOT_FOUND"]) return {"ds:3": ds3_data, "dev_id": dev_id} class SimilarScraper: """Scraper for fetching similar apps from Google Play Store. Extracts similar/related apps by finding cluster URLs from app pages and fetching the corresponding collection pages. Features: - Cluster URL extraction from app pages - Similar app collection fetching - Related app recommendations - Competitive analysis data """ def __init__(self, rate_limit_delay: float = None, http_client: str = None): """Initialize SimilarScraper with HTTP client. Args: rate_limit_delay: Delay between requests in seconds http_client: HTTP client to use for requests """ self.http_client = HttpClient(rate_limit_delay, http_client) def fetch_similar_page(self, app_id: str, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> str: """Fetch app page HTML to extract similar apps cluster URL. Args: app_id: Google Play app ID lang: Language code country: Country code Returns: HTML content of app page """ return self.http_client.fetch_app_page(app_id, lang, country) @validate_inputs() @handle_network_errors() @handle_parsing_errors() def scrape_play_store_data(self, app_id: str, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> Dict: """Extract similar apps dataset from cluster page. Args: app_id: Google Play app ID lang: Language code country: Country code Returns: Dictionary containing ds:3 dataset Raises: DataParsingError: If dataset not found """ html_content = self.fetch_similar_page(app_id, lang, country) pattern1 = r'"(/store/apps/collection/cluster\?gsr=[^&]+)"' matches1 = re.findall(pattern1, html_content) pattern2 = r'"(/store/apps/collection/cluster\?gsr=[^"]+)"' matches2 = re.findall(pattern2, html_content) all_matches = list(set(matches1 + matches2)) if not all_matches: return {"ds:3": None} cluster_url = all_matches[0].replace('&', '&') cluster_html = self.http_client.fetch_cluster_page(cluster_url, lang, country) ds_match = re.search(r'AF_initDataCallback\s*\(\s*({\s*key:\s*["\']ds:3["\'][\s\S]*?})\s*\)\s*;', cluster_html, re.DOTALL) if ds_match: ds3_data = ds_match.group(1) else: all_callbacks = re.findall(r'AF_initDataCallback\s*\(\s*({[\s\S]*?})\s*\)\s*;', cluster_html, re.DOTALL) ds3_data = "" for callback in all_callbacks: if "'ds:3'" in callback or '"ds:3"' in callback: ds3_data = callback break if not ds3_data: raise DataParsingError(Config.ERROR_MESSAGES["DS3_NOT_FOUND"]) return {"ds:3": ds3_data} class ListScraper: """Scraper for fetching top charts from Google Play Store. Handles extraction of ranked app lists including top free, top paid, and top grossing apps across different categories. Features: - Multiple collection types (free, paid, grossing) - Category-specific charts (games, social, productivity, etc.) - Configurable result counts - Regional chart variations """ def __init__(self, rate_limit_delay: float = None, http_client: str = None): """Initialize ListScraper with HTTP client. Args: rate_limit_delay: Delay between requests in seconds http_client: HTTP client to use for API requests """ self.http_client = HttpClient(rate_limit_delay, http_client) @handle_network_errors() @handle_parsing_errors() def scrape_play_store_data(self, collection: str, category: str = Config.DEFAULT_LIST_CATEGORY, count: int = Config.DEFAULT_LIST_COUNT, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> Dict: """Scrape top charts data from Google Play Store. Args: collection: Collection type (TOP_FREE, TOP_PAID, TOP_GROSSING) category: App category (e.g., GAME, SOCIAL) count: Number of apps to fetch lang: Language code country: Country code Returns: Dictionary containing collection data Raises: DataParsingError: If JSON parsing fails """ cluster = CLUSTER_NAMES.get(collection, collection) response_text = self.http_client.fetch_list_page(cluster, category, count, lang, country) try: lines = response_text.strip().split('\n') data = json.loads(lines[2]) collection_data = json.loads(data[0][2]) return {"collection_data": collection_data} except (json.JSONDecodeError, IndexError, KeyError) as e: raise DataParsingError(Config.ERROR_MESSAGES["JSON_PARSE_FAILED"].format(error=str(e))) class SuggestScraper: """Scraper for fetching search suggestions from Google Play Store. Provides autocomplete functionality for search terms, useful for keyword research and ASO (App Store Optimization) analysis. Features: - Real-time search suggestions - Keyword research capabilities - ASO optimization data - Popular search term discovery """ def __init__(self, rate_limit_delay: float = None, http_client: str = None): """Initialize SuggestScraper with HTTP client. Args: rate_limit_delay: Delay between requests in seconds http_client: HTTP client to use for API requests """ self.http_client = HttpClient(rate_limit_delay, http_client) @validate_inputs() @handle_network_errors() @handle_parsing_errors() def scrape_suggestions(self, term: str, lang: str = Config.DEFAULT_LANGUAGE, country: str = Config.DEFAULT_COUNTRY) -> Dict: """Scrape search suggestions from Google Play Store. Args: term: Search term for suggestions lang: Language code country: Country code Returns: Dictionary containing list of suggestions Raises: DataParsingError: If JSON parsing fails """ if not term: return {"suggestions": []} response_text = self.http_client.fetch_suggest_page(term, lang, country) try: input_data = json.loads(response_text[5:]) data = json.loads(input_data[0][2]) if data is None: return {"suggestions": []} suggestions = [s[0] for s in data[0][0]] return {"suggestions": suggestions} except (json.JSONDecodeError, IndexError, KeyError, TypeError) as e: raise DataParsingError(Config.ERROR_MESSAGES["JSON_PARSE_FAILED"].format(error=str(e))) ================================================ FILE: gplay_scraper/exceptions.py ================================================ """Custom exceptions for GPlay Scraper. This module defines all custom exceptions used throughout the library. """ class GPlayScraperError(Exception): """Base exception for all GPlay Scraper errors.""" pass class InvalidAppIdError(GPlayScraperError): """Raised when an invalid app ID, dev ID, or query is provided.""" pass class AppNotFoundError(GPlayScraperError): """Raised when an app, developer, or resource is not found (404 error).""" pass class RateLimitError(GPlayScraperError): """Raised when rate limiting is triggered by Google Play Store.""" pass class NetworkError(GPlayScraperError): """Raised when network requests fail.""" pass class DataParsingError(GPlayScraperError): """Raised when parsing JSON or HTML data fails.""" pass ================================================ FILE: gplay_scraper/models/__init__.py ================================================ ================================================ FILE: gplay_scraper/models/element_specs.py ================================================ """Element specifications for data extraction from Google Play Store. This module defines ElementSpec class and ElementSpecs for all 7 method types. Each spec defines how to extract specific fields from raw JSON data. """ from typing import Any, Callable, List, Optional, Dict, Union import html from datetime import datetime from ..utils.helpers import unescape_text from ..config import Config def parse_permissions(perms_data: Any) -> Dict[str, List[str]]: """Parse permissions from various Google Play Store data formats. Google Play Store uses complex nested data structures for app permissions. This function handles all known formats and extracts human-readable permission descriptions organized by category. Data Structure Patterns: - Format 1: [[[category, [...], [[null, description]], [...]]]] - Format 2: [[category, [...], [description1, description2]]] - Format 3: Mixed with "Other" category for uncategorized permissions - Format 4: Empty/null data for apps with no permissions Args: perms_data: Raw permissions data from Play Store JSON (nested lists/dicts) Returns: Dictionary mapping permission categories to lists of permission descriptions Example: {"Location": ["GPS access"], "Storage": ["Read files", "Write files"]} Examples: >>> parse_permissions(None) {} >>> parse_permissions([[[["Location", [...], [[null, "GPS access"]], [...]]]]) {"Location": ["GPS access"]} >>> parse_permissions([["Storage", [...], ["Read files", "Write files"]]]) {"Storage": ["Read files", "Write files"]} """ if not perms_data: return {} permissions = {} try: if isinstance(perms_data, list) and len(perms_data) > 2: sections = perms_data[2] if len(perms_data) > 2 else [] if isinstance(sections, list): for section in sections: if not isinstance(section, list): continue for perm_group in section: if not isinstance(perm_group, list) or len(perm_group) < 3: continue category = None if isinstance(perm_group[0], str): category = perm_group[0] elif isinstance(perm_group[0], list) and len(perm_group[0]) > 0: category = perm_group[0][0] if isinstance(perm_group[0][0], str) else "Other" if not category: category = "Other" details = [] perm_details = perm_group[2] if len(perm_group) > 2 else [] if isinstance(perm_details, list): for detail in perm_details: if isinstance(detail, list) and len(detail) > 1: if detail[1] and isinstance(detail[1], str): details.append(detail[1]) elif isinstance(detail, str): details.append(detail) if details: if category in permissions: permissions[category].extend(details) else: permissions[category] = details if len(sections) > 2: additional_perms = sections[2] if len(sections) > 2 else [] if isinstance(additional_perms, list): other_perms = [] for item in additional_perms: if isinstance(item, list) and len(item) > 1 and isinstance(item[1], str): other_perms.append(item[1]) if other_perms: if "Other" in permissions: permissions["Other"].extend(other_perms) else: permissions["Other"] = other_perms elif isinstance(perms_data, list): for item in perms_data: if isinstance(item, list) and len(item) > 2: category = item[0] if isinstance(item[0], str) else "Other" details = [] if isinstance(item[2], list): for detail in item[2]: if isinstance(detail, list) and len(detail) > 1 and isinstance(detail[1], str): details.append(detail[1]) if details: permissions[category] = details permissions = {k: v for k, v in permissions.items() if v} except (IndexError, KeyError, TypeError, AttributeError): pass return permissions def nested_lookup(obj: Any, key_list: List) -> Any: """Safely navigate nested dictionary/list structure. Traverses complex nested data structures (mix of dicts and lists) following a path of keys/indices. Returns None if any step in the path fails. Args: obj: Object to navigate (dict, list, or any nested structure) key_list: List of keys/indices to follow (e.g., [0, 'data', 1, 'title']) Returns: Value at the nested location or None if path doesn't exist Examples: >>> data = {'users': [{'name': 'John'}, {'name': 'Jane'}]} >>> nested_lookup(data, ['users', 1, 'name']) 'Jane' >>> nested_lookup(data, ['users', 5, 'name']) # Index out of range None >>> nested_lookup(data, ['invalid', 'path']) None """ current = obj for key in key_list: try: current = current[key] except (IndexError, KeyError, TypeError): return None return current def format_image_url(url: str, size: str = None) -> str: """Format image URL with size parameter. Google Play Store images can be resized by appending size parameters. This function adds the appropriate size parameter to get images in desired resolution. Args: url: Base image URL from Google Play Store size: Size parameter - SMALL (512px), MEDIUM (1024px), LARGE (2048px), ORIGINAL (max) Returns: Formatted URL with size parameter appended, or None if url is empty Examples: >>> format_image_url('https://play-lh.googleusercontent.com/abc123', 'LARGE') 'https://play-lh.googleusercontent.com/abc123=w2048' >>> format_image_url('https://example.com/image.jpg', 'SMALL') 'https://example.com/image.jpg=w512' >>> format_image_url('', 'LARGE') None """ if not url: return None size_param = Config.get_image_size(size) return f"{url}={size_param}" class ElementSpec: """Specification for extracting a single field from raw data. Defines how to extract a specific piece of information from Google Play Store's complex nested JSON data structures. Each spec contains a navigation path and optional processing logic. The extraction process: 1. Navigate through nested data using data_map path 2. Apply post_processor function if specified 3. Return fallback_value if extraction fails 4. Handle asset sizing for image URLs Attributes: ds_num: Dataset number (legacy, kept for compatibility) data_map: List of keys/indices to navigate to the field (e.g., [1, 2, 0, 0]) post_processor: Optional function to process extracted value (e.g., unescape_text) fallback_value: Value to return if extraction fails (can be another ElementSpec) assets: Asset size parameter for image URLs Examples: # Simple field extraction title_spec = ElementSpec("raw", [1, 2, 0, 0]) # With post-processing price_spec = ElementSpec("raw", [1, 2, 57, 0], lambda x: x / 1000000) # With fallback version_spec = ElementSpec("raw", [1, 2, 140, 0], fallback_value="Unknown") """ def __init__( self, ds_num: Optional[int], data_map: List[int], post_processor: Callable = None, fallback_value: Any = None, assets: str = None, ): """Initialize ElementSpec with extraction parameters.""" self.ds_num = ds_num self.data_map = data_map self.post_processor = post_processor self.fallback_value = fallback_value self.assets = assets def extract_content(self, source: dict, assets: str = None) -> Any: """Extract content from source using data_map. Performs the actual data extraction by following the navigation path, applying post-processing, and handling fallbacks. Args: source: Source dictionary/list (Google Play Store JSON data) assets: Override asset size for this extraction (SMALL, MEDIUM, LARGE, ORIGINAL) Returns: Extracted and processed value, or fallback_value if extraction fails Process: 1. Navigate through source data using data_map path 2. Apply post_processor function if available 3. Handle image URL formatting for asset-related fields 4. Return fallback_value if any step fails Examples: >>> spec = ElementSpec("raw", [1, 2, 0, 0]) >>> spec.extract_content({'1': {'2': [['App Title']]}) 'App Title' >>> spec.extract_content({'invalid': 'data'}) None # or fallback_value if specified """ try: result = nested_lookup(source, self.data_map) if self.post_processor is not None: try: if hasattr(self.post_processor, '__name__') and 'image' in self.post_processor.__name__: result = self.post_processor(result, assets or self.assets) else: result = self.post_processor(result) except Exception: pass except (KeyError, IndexError, TypeError, AttributeError): result = None if result is None and self.fallback_value is not None: if isinstance(self.fallback_value, ElementSpec): result = self.fallback_value.extract_content(source, assets) else: result = self.fallback_value return result class ElementSpecs: """Collection of element specifications for all method types. Central registry of data extraction specifications for all Google Play Store data types. Each specification defines exactly how to extract specific fields from the complex nested JSON structures returned by Google's APIs. Data Categories: - App: 65+ fields for complete app details (ratings, installs, permissions, etc.) - Search: Fields for search results (title, developer, price, etc.) - Review: Fields for user reviews (content, rating, timestamp, etc.) - Developer: Fields for developer app listings - Similar: Fields for similar/related apps - List: Fields for top chart apps (rankings, categories, etc.) Usage Pattern: Each category contains ElementSpec objects that define: - Navigation path through JSON data - Post-processing functions for data transformation - Fallback values for missing data - Asset sizing for images Example: >>> app_title = ElementSpecs.App['title'].extract_content(app_data) >>> search_results = [ElementSpecs.Search['title'].extract_content(item) for item in results] """ # App Data Specifications - 65+ fields for complete app analysis App = { "title": ElementSpec("raw", [1, 2, 0, 0]), "description": ElementSpec( "raw", [1, 2], lambda s: (lambda desc_text: unescape_text(desc_text) if desc_text else None)( nested_lookup(s, [72, 0, 0]) or nested_lookup(s, [72, 0, 1]) ), ), "summary": ElementSpec("raw", [1, 2, 73, 0, 1], unescape_text), "installs": ElementSpec("raw", [1, 2, 13, 0]), "minInstalls": ElementSpec("raw", [1, 2, 13, 1]), "realInstalls": ElementSpec("raw", [1, 2, 13, 2]), "score": ElementSpec("raw", [1, 2, 51, 0, 1]), "ratings": ElementSpec("raw", [1, 2, 51, 2, 1]), "reviews": ElementSpec("raw", [1, 2, 51, 3, 1]), "histogram": ElementSpec( "raw", [1, 2, 51, 1], lambda container: [ container[1][1], container[2][1], container[3][1], container[4][1], container[5][1], ], [0, 0, 0, 0, 0], ), "price": ElementSpec( "raw", [1, 2, 57, 0, 0, 0, 0, 1, 0, 0], lambda price: (price / 1000000) or 0 ), "free": ElementSpec("raw", [1, 2, 57, 0, 0, 0, 0, 1, 0, 0], lambda s: s == 0), "currency": ElementSpec("raw", [1, 2, 57, 0, 0, 0, 0, 1, 0, 1]), "sale": ElementSpec("raw", [1, 2, 57, 0, 0, 0, 0, 14, 0, 0], bool, False), "originalPrice": ElementSpec("raw", [1, 2, 57, 0, 0, 0, 0, 1, 1, 0], lambda price: (price / 1000000) if price else None), "offersIAP": ElementSpec("raw", [1, 2, 19, 0], bool, False), "inAppProductPrice": ElementSpec("raw", [1, 2, 19, 0]), "developer": ElementSpec("raw", [1, 2, 68, 0]), "developerId": ElementSpec("raw", [1, 2, 68, 1, 4, 2], lambda s: s.split("id=")[1] if s and "id=" in s else None), "developerEmail": ElementSpec("raw", [1, 2, 69, 1, 0]), "developerWebsite": ElementSpec("raw", [1, 2, 69, 0, 5, 2]), "developerAddress": ElementSpec("raw", [1, 2, 69, 4, 2, 0]), "developerPhone": ElementSpec("raw", [1, 2, 69, 4, 3]), "privacyPolicy": ElementSpec("raw", [1, 2, 99, 0, 5, 2]), "genre": ElementSpec("raw", [1, 2, 79, 0, 0, 0]), "genreId": ElementSpec("raw", [1, 2, 79, 0, 0, 2]), "categories": ElementSpec("raw", [1, 2, 79, 0, 0, 0], lambda cat: [cat] if cat else [], []), "icon": ElementSpec("raw", [1, 2, 95, 0, 3, 2]), "headerImage": ElementSpec("raw", [1, 2, 96, 0, 3, 2]), "screenshots": ElementSpec("raw", [1, 2, 78, 0], lambda container: [item[3][2] for item in container] if container else [], []), "video": ElementSpec("raw", [1, 2, 100, 0, 0, 3, 2]), "videoImage": ElementSpec("raw", [1, 2, 100, 1, 0, 3, 2]), "contentRating": ElementSpec("raw", [1, 2, 9, 0]), "contentRatingDescription": ElementSpec("raw", [1, 2, 9, 6, 1], fallback_value=ElementSpec("raw", [1, 2, 9, 2, 1], fallback_value=ElementSpec("raw", [1, 2, 9, 0]))), "appId": ElementSpec("raw", [1, 2, 1, 0, 0]), "adSupported": ElementSpec("raw", [1, 2, 48], bool), "containsAds": ElementSpec("raw", [1, 2, 48], bool, False), "released": ElementSpec("raw", [1, 2, 10, 0]), "updated": ElementSpec("raw", [1, 2, 145, 0, 1, 0], fallback_value=ElementSpec("raw", [1, 2, 103, "146", 0, 0], fallback_value=ElementSpec("raw", [1, 2, 145, 0, 0], fallback_value=ElementSpec("raw", [1, 2, 112, "146", 0, 0], fallback_value=ElementSpec("raw", [1, 2, 103, "146", 0, 1,0], fallback_value="Never updated"))))), "version": ElementSpec("raw", [1, 2, 140, 0, 0, 0], fallback_value=ElementSpec("raw", [1, 2, 103, "141", 0, 0, 0], fallback_value="Varies with device")), "androidVersion": ElementSpec("raw", [1, 2, 140, 1, 1, 0, 0, 1], fallback_value=ElementSpec("raw", [1, 2, 103, "155", 1, 2], fallback_value=ElementSpec("raw", [1, 2, 112, "141", 1, 1, 0, 0, 0], fallback_value="Varies with device"))), "permissions": ElementSpec("raw", [1, 2, 74], parse_permissions), "dataSafety": ElementSpec("raw", [1, 2, 136], lambda data: [item[1] for item in data[1] if item and len(item) > 1] if data and len(data) > 1 and data[1] else []), "appBundle": ElementSpec("raw", [1, 2, 77, 0]), "maxandroidapi": ElementSpec("raw", [1, 2, 140, 1, 0, 0, 0], fallback_value=ElementSpec("raw", [1, 2, 103, "141", 1, 0 , 0, 0], fallback_value=ElementSpec("raw", [1, 2, 112, "141", 1, 0, 0, 0], fallback_value="Varies with device"))), "minandroidapi": ElementSpec("raw", [1, 2, 140, 1, 1, 0, 0, 0], fallback_value=ElementSpec("raw", [1, 2, 103, "141", 1, 1, 0, 0, 0], fallback_value=ElementSpec("raw", [1, 2, 112, "141", 1, 1, 0, 0, 0], fallback_value="Varies with device"))), "whatsNew": ElementSpec("raw", [1, 2, 144, 1, 1], lambda x: [line.strip() for line in html.unescape(x).split('