[
  {
    "path": ".gitignore",
    "content": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packaging\n.Python\nbuild/\ndevelop-eggs/\ndist/\ndownloads/\neggs/\n.eggs/\nlib/\nlib64/\nparts/\nsdist/\nvar/\nwheels/\n*.egg-info/\n.installed.cfg\n*.egg\nMANIFEST\n\n# PyInstaller\n#  Usually these files are written by a python script from a template\n#  before PyInstaller builds the exe, so as to inject date/other infos into it.\n*.manifest\n*.spec\n\n# Installer logs\npip-log.txt\npip-delete-this-directory.txt\n\n# Unit test / coverage reports\nhtmlcov/\n.tox/\n.coverage\n.coverage.*\n.cache\nnosetests.xml\ncoverage.xml\n*.cover\n.hypothesis/\n.pytest_cache/\n\n# Translations\n*.mo\n*.pot\n\n# Django stuff:\n*.log\nlocal_settings.py\ndb.sqlite3\n\n# Flask stuff:\ninstance/\n.webassets-cache\n\n# Scrapy stuff:\n.scrapy\n\n# Sphinx documentation\ndocs/_build/\n\n# PyBuilder\ntarget/\n\n# Jupyter Notebook\n.ipynb_checkpoints\n\n# pyenv\n.python-version\n\n# celery beat schedule file\ncelerybeat-schedule\n\n# SageMath parsed files\n*.sage.py\n\n# Environments\n.env\n.venv\nenv/\nvenv/\nENV/\nenv.bak/\nvenv.bak/\n\n# Spyder project settings\n.spyderproject\n.spyproject\n\n# Rope project settings\n.ropeproject\n\n# mkdocs documentation\n/site\n\n# mypy\n.mypy_cache/\n\n*.env\n"
  },
  {
    "path": "README.md",
    "content": "# PacktPub Downloader\n\nScript to download all your PacktPub books inspired by https://github.com/ozzieperez/packtpub-library-downloader\n\nSince PacktPub restructured their website [packtpub-library-downloader](https://github.com/ozzieperez/packtpub-library-downloader) became obsolete because the downloader used webscraping. So I figured out that now PacktPub uses a REST API. Then I found which endpoint to use for downloading books and made a simple script. Feel free to fork and PR to improve. Packtpub's API isn't documented :'(\n\n## Usage:\n    pip install -r requirements.txt\n\tpython main.py -e <email> -p <password> [-d <directory> -b <book file types> -s -v -q]\n\n##### Example: Download books in PDF format\n\tpython main.py -e hello@world.com -p p@ssw0rd -d ~/Desktop/packt -b pdf,epub,mobi,code\n\n## Docker integration\n\nYou must put your data in the `.env` file. \n\n```\nmv data.env-sample data.env\n```\n\nand replace the sample data with your login credentials.\n\n```\ndocker-compose up\n```\n\nAfter the execution, you can see the content in the `book` directory.\n\n\n## Commandline Options\n- *-e*, *--email* = Your login email\n- *-p*, *--password* = Your login password\n- *-d*, *--directory* = Directory to download into. Default is \"media/\" in the current directory\n- *-b*, *--books* = Assets to download. Options are: *pdf,mobi,epub,code*\n- *-s*, *--separate* = Create a separate directory for each book\n- *-v*, *--verbose* = Show more detailed information\n- *-q*, *--quiet* = Don't show information or progress bars\n\n**Book File Types**\n\n- *pdf*: PDF format\n- *mobi*: MOBI format\n- *epub*: EPUB format\n- *code*: Accompanying source code, saved as .zip files\n\nI'm working on Python 3.6.0 \n"
  },
  {
    "path": "config.py",
    "content": "# -*- coding: utf-8 -*-\n\n'''\n    This file contain all url endpoint \n'''\n\n# instead of variables should i change variables to a one big json of urls ? \n\n# this is base url where i do the requests\nBASE_URL = \"https://services.packtpub.com/\"\n\n# URL to request jwt token, params by post are user and pass, return jwt token\nAUTH_ENDPOINT = \"auth-v1/users/tokens\"\n\n# URL to get all your books, two params that i change are offset and limit, method GET\nPRODUCTS_ENDPOINT = \"entitlements-v1/users/me/products?sort=createdAt:DESC&offset={offset}&limit={limit}\"\n\n# URL to get types , param is  book id, method GET\nURL_BOOK_TYPES_ENDPOINT = \"products-v1/products/{book_id}/types\"\n\n# URL to get url file to download, params are book id and format of the file (can be pdf, epub, etc..), method GET\nURL_BOOK_ENDPOINT = \"products-v1/products/{book_id}/files/{format}\"\n"
  },
  {
    "path": "data.env-sample",
    "content": "EMAIL=email@example.com\nPASSWORD=example$password"
  },
  {
    "path": "docker-compose.yml",
    "content": "version: '3.3'\n\nservices:\n  packtpub-downloader:\n    image: python:3.6.0\n    container_name: \"packtpub-downloader\"\n    env_file:\n      - data.env\n    volumes: \n      - \"./:/app\"\n    command: \"/bin/bash /app/entrypoint.sh\""
  },
  {
    "path": "entrypoint.sh",
    "content": "pip install -r /app/requirements.txt\npython /app/main.py -e $EMAIL -p $PASSWORD -d /app/book -b pdf,mobi,epub,code"
  },
  {
    "path": "main.py",
    "content": "# -*- coding: utf-8 -*-\n#!/usr/bin/python\n\nfrom __future__ import print_function\nimport os\nimport sys\nimport glob\nimport math\nimport getopt\nimport requests\nfrom tqdm import tqdm, trange\nfrom config import BASE_URL, PRODUCTS_ENDPOINT, URL_BOOK_TYPES_ENDPOINT, URL_BOOK_ENDPOINT\nfrom user import User\n\n\n#TODO: I should do a function that his only purpose is to request and return data\ndef book_request(user, offset=0, limit=10, verbose=False):\n    data = []\n    url = BASE_URL + PRODUCTS_ENDPOINT.format(offset=offset, limit=limit)\n    if verbose:\n        print(url)\n    r = requests.get(url, headers=user.get_header())\n    data += r.json().get('data', [])\n\n    return url, r, data\n\ndef get_books(user, offset=0, limit=10, is_verbose=False, is_quiet=False):\n    '''\n        Request all your books, return json with info of all your books\n        Params\n        ...\n        header : str\n        offset : int\n        limit : int\n            how many book wanna get by request\n    '''\n    # TODO: given x time jwt expired and should refresh the header, user.refresh_header()\n    \n    url, r, data = book_request(user, offset, limit)\n    \n    print(f'You have {str(r.json()[\"count\"])} books')\n    print(\"Getting list of books...\")\n    \n    if not is_quiet:\n        pages_list = trange(r.json()['count'] // limit, unit='Pages')\n    else:\n        pages_list = range(r.json()['count'] // limit)\n    for i in pages_list:\n        offset += limit\n        data += book_request(user, offset, limit, is_verbose)[2]\n    return data\n\n\ndef get_url_book(user, book_id, format='pdf'):\n    '''\n        Return url of the book to download\n    '''\n    \n    url = BASE_URL + URL_BOOK_ENDPOINT.format(book_id=book_id, format=format)\n    r = requests.get(url, headers=user.get_header())\n\n    if r.status_code == 200: # success\n        return r.json().get('data', '')\n\n    elif r.status_code == 401: # jwt expired \n        user.refresh_header() # refresh token \n        get_url_book(user, book_id, format)  # call recursive \n    \n    print('ERROR (please copy and paste in the issue)')\n    print(r.json())\n    print(r.status_code)\n    return ''\n\n\ndef get_book_file_types(user, book_id):\n    '''\n        Return a list with file types of a book\n    '''\n\n    url = BASE_URL + URL_BOOK_TYPES_ENDPOINT.format(book_id=book_id)\n    r = requests.get(url, headers=user.get_header())\n\n    if  (r.status_code == 200): # success\n        return r.json()['data'][0].get('fileTypes', [])\n    \n    elif (r.status_code == 401): # jwt expired \n        user.refresh_header() # refresh token \n        get_book_file_types(user, book_id, format)  # call recursive \n    \n    print('ERROR (please copy and paste in the issue)')\n    print(r.json())\n    print(r.status_code)\n    return []\n\n\n# TODO: i'd like that this functions be async and download faster\ndef download_book(filename, url):\n    '''\n        Download your book\n    '''\n    print('Starting to download ' + filename)\n\n    with open(filename, 'wb') as f:\n        r = requests.get(url, stream=True)\n        total = r.headers.get('content-length')\n        if total is None:\n            f.write(response.content)\n        else:\n            total = int(total)\n            # TODO: read more about tqdm\n            for chunk in tqdm(r.iter_content(chunk_size=1024), total=math.ceil(total//1024), unit='KB', unit_scale=True):\n                if chunk:  # filter out keep-alive new chunks\n                    f.write(chunk)\n                    f.flush()\n            print('Finished ' + filename)\n\n\ndef make_zip(filename):\n    if filename[-4:] == 'code':\n        os.replace(filename, filename[:-4] + 'zip')\n\n\ndef move_current_files(root, book):\n    sub_dir = f'{root}/{book}'\n    does_dir_exist(sub_dir)\n    for f in glob.iglob(sub_dir + '.*'):\n        try:\n            os.rename(f, f'{sub_dir}/{book}' + f[f.index('.'):])\n        except OSError:\n            os.rename(f, f'{sub_dir}/{book}' + '_1' + f[f.index('.'):])\n        except ValueError as e:\n            print(e)\n            print('Skipping')\n\n\ndef does_dir_exist(directory):\n    if not os.path.exists(directory):\n        try:\n            os.makedirs(directory)\n        except Exception as e:\n            print(e)\n            sys.exit(2)\n\n\ndef main(argv):\n    # thanks to https://github.com/ozzieperez/packtpub-library-downloader/blob/master/downloader.py\n    email = None\n    password = None\n    root_directory = 'media' \n    book_file_types = ['pdf', 'mobi', 'epub', 'code']\n    separate = None\n    verbose = None\n    quiet = None\n    errorMessage = 'Usage: main.py -e <email> -p <password> [-d <directory> -b <book file types> -s -v -q]'\n\n    # get the command line arguments/options\n    try:\n        opts, args = getopt.getopt(\n            argv, 'e:p:d:b:svq', ['email=', 'pass=', 'directory=', 'books=', 'separate', 'verbose', 'quiet'])\n    except getopt.GetoptError:\n        print(errorMessage)\n        sys.exit(2)\n\n    # hold the values of the command line options\n    for opt, arg in opts:\n        if opt in ('-e', '--email'):\n            email = arg\n        elif opt in ('-p', '--pass'):\n            password = arg\n        elif opt in ('-d', '--directory'):\n            root_directory = os.path.expanduser(\n                arg) if '~' in arg else os.path.abspath(arg)\n        elif opt in ('-b', '--books'):\n            book_file_types = arg.split(',')\n        elif opt in ('-s', '--separate'):\n            separate = True\n        elif opt in ('-v', '--verbose'):\n            verbose = True\n        elif opt in ('-q', '--quiet'):\n            quiet = True\n\n    if verbose and quiet:\n        print(\"Verbose and quiet cannot be used together.\")\n        sys.exit(2)\n\n    # do we have the minimum required info?\n    if not email or not password:\n        print(errorMessage)\n        sys.exit(2)\n\n    # check if not exists dir and create\n    does_dir_exist(root_directory)\n\n    # create user with his properly header\n    user = User(email, password)\n\n    # get all your books\n    books = get_books(user, is_verbose=verbose, is_quiet=quiet)\n    print('Downloading books...')\n    if not quiet:\n        books_iter = tqdm(books, unit='Book')\n    else:\n        books_iter = books\n    for book in books_iter:\n        # get the different file type of current book\n        file_types = get_book_file_types(user, book['productId'])\n        for file_type in file_types:\n            if file_type in book_file_types:  # check if the file type entered is available by the current book\n                book_name = book['productName'].replace(' ', '_').replace('.', '_').replace(':', '_').replace('/','')\n                if separate:\n                    filename = f'{root_directory}/{book_name}/{book_name}.{file_type}'\n                    move_current_files(root_directory, book_name)\n                else:\n                    filename = f'{root_directory}/{book_name}.{file_type}'\n                # get url of the book to download\n                url = get_url_book(user, book['productId'], file_type)\n                if not os.path.exists(filename) and not os.path.exists(filename.replace('.code', '.zip')):\n                    download_book(filename, url)\n                    make_zip(filename)\n                else:\n                    if verbose:\n                        tqdm.write(f'{filename} already exists, skipping.')\n\n\nif __name__ == '__main__':\n    main(sys.argv[1:])\n"
  },
  {
    "path": "requirements.txt",
    "content": "aiofiles==0.4.0\naiohttp==3.5.4\nasync-timeout==3.0.1\nattrs==18.2.0\ncertifi==2018.11.29\nchardet==3.0.4\nidna==2.8\nidna-ssl==1.1.0\nmultidict==4.5.2\nrequests==2.21.0\ntqdm==4.30.0\ntyping-extensions==3.7.2\nurllib3==1.24.1\nyarl==1.3.0\n"
  },
  {
    "path": "user.py",
    "content": "# -*- coding: utf-8 -*-\n\nfrom __future__ import print_function\nimport sys\nimport requests\nfrom config import BASE_URL, AUTH_ENDPOINT\n\nclass User:\n    \"\"\"\n        User object that contain his header \n    \"\"\"\n    username = \"\"\n    password = \"\"\n    # need to fill Authoritazion with current token provide by api\n    header = {\n        \"User-Agent\": \"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 \" +\n        \"(KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36\",\n        \"Authorization\":\"\"\n        }\n    \n    def __init__(self, username, password):\n        self.username = username\n        self.password = password\n        self.header[\"Authorization\"] = self.get_token()\n    \n    def get_token(self):\n        \"\"\"\n            Request auth endpoint and return user token  \n        \"\"\"\n        url = BASE_URL+AUTH_ENDPOINT\n        # use json paramenter because for any reason they send user and pass in plain text :'(  \n        r = requests.post(url, json={'username':self.username, 'password':self.password})\n        if r.status_code == 200:\n            print(\"You are in!\")\n            return 'Bearer ' + r.json()['data']['access']\n    \n        # except should happend when user and pass are incorrect \n        print(\"Error login,  check user and password\")\n        print(\"Error {}\".format(e))\n        sys.exit(2)\n\n    def get_header(self):\n        return self.header\n\n    def refresh_header(self):\n        \"\"\"\n            Refresh jwt because it expired and returned\n        \"\"\"\n        self.header[\"Authorization\"] = self.get_token()\n\n        return self.header\n\n"
  }
]