Repository: TRoboto/datacamp-downloader Branch: master Commit: 5347e77ae527 Files: 20 Total size: 68.2 KB Directory structure: gitextract_6dyxc1ui/ ├── .gitattributes ├── .github/ │ └── workflows/ │ └── release.yml ├── .gitignore ├── LICENSE ├── README.md ├── docs.md ├── pyproject.toml ├── requirements.txt ├── setup.py └── src/ └── datacamp_downloader/ ├── __init__.py ├── constants.py ├── datacamp_utils.py ├── downloader.py ├── helper.py ├── session.py └── templates/ ├── course.py ├── exercise.py ├── lang.py ├── track.py └── video.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitattributes ================================================ # Auto detect text files and perform LF normalization * text=auto ================================================ FILE: .github/workflows/release.yml ================================================ name: Upload Python Package on: push: tags: - "V*" jobs: deploy: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - uses: actions/setup-python@v2 - name: Install pypa/build run: python -m pip install build --user - name: Build a binary wheel and a source tarball run: python -m build --sdist --wheel --outdir dist/ . - name: Publish distribution 📦 to PyPI uses: pypa/gh-action-pypi-publish@master with: password: ${{ secrets.PYPI_API_TOKEN }} ================================================ FILE: .gitignore ================================================ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ pip-wheel-metadata/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover *.py,cover .hypothesis/ .pytest_cache/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 db.sqlite3-journal # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints # IPython profile_default/ ipython_config.py # pyenv .python-version # pipenv # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. # However, in case of collaboration, if having platform-specific dependencies or dependencies # having no cross-platform support, pipenv may install dependencies that don't work, or not # install all needed dependencies. #Pipfile.lock # PEP 582; used by e.g. github.com/David-OConnor/pyflow __pypackages__/ # Celery stuff celerybeat-schedule celerybeat.pid # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ .idea/ .vscode/ Datacamp/ ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2020 Mohammad Al-Fetyani Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ # Datacamp Downloader [](https://github.com/TRoboto/datacamp-downloader/blob/master/LICENSE) [](https://pypi.org/project/datacamp-downloader/) [](https://github.com/TRoboto/datacamp-downloader/blob/master/docs.md) [](https://pepy.tech/project/datacamp-downloader) [](https://github.com/TRoboto/datacamp-downloader/stargazers) [](https://github.com/TRoboto/datacamp-downloader/network/members) [](https://github.com/TRoboto/datacamp-downloader/graphs/contributors) ## Table of Contents - [Datacamp Downloader](#datacamp-downloader) - [Table of Contents](#table-of-contents) - [Description](#description) - [Installation](#installation) - [PIP](#pip) - [From source](#from-source) - [Autocompletion](#autocompletion) - [Documentation](#documentation) - [Getting Started](#getting-started) - [Login](#login) - [Download](#download) - [User Privacy](#user-privacy) - [Disclaimer](#disclaimer) ## Update Datacamp Downloader V3.2 is now available. The major change is that the tool now uses selenium for the backend. See changelog for version [3.0](https://github.com/TRoboto/datacamp-downloader/pull/39), [3.1](https://github.com/TRoboto/datacamp-downloader/pull/42) and [3.2](https://github.com/TRoboto/datacamp-downloader/pull/47). ## Description Datacamp Downloader is a command-line interface tool developed in Python in order to help you download your completed contents on [Datacamp](https://datacamp.com) and keep them locally on your computer. Datacamp Downloader helps you download all videos, slides, audios, exercises, transcripts, datasets and subtitles in organized folders. The design and development of this tool was inspired by [udacimak](https://github.com/udacimak/udacimak) **Datacampers!** If you find this CLI helpful, please support the developers by starring this repository. ## Installation ### PIP If you use pip, you can install datacamp-downloader with: ``` pip install datacamp-downloader ``` ### From source You can directly clone this repo and install the tool with: ``` pip install git+https://github.com/TRoboto/datacamp-downloader.git ``` ### Autocompletion To allow command autocompletion with `[TAB][TAB]`, run: ``` datacamp --install-completion [bash|zsh|fish|powershell|pwsh] ``` Then restart the terminal. **Note:** autocompletion might not be supported by all operating systems. ## Documentation The available commands with full documentation can be found in [docs](https://github.com/TRoboto/datacamp-downloader/blob/master/docs.md) ## Getting Started ### Login - To login using your username or password, run: ``` datacamp login -u [USERNAME] -p [PASSWORD] ``` or simply run: ``` datacamp login ``` - To login using Datacamp authentication token, run: ``` datacamp set-token [TOKEN] ``` Datacamp authentication token can be found in Datacamp website browser _cookies_. To get your Datacamp authentication, follow these steps: **Firefox** 1. Visit [datacamp.com](https://datacamp.com) and log in. 2. Open the **Developer Tools** (press `Cmd + Opt + J` on MacOS or `F12` on Windows). 3. Go to **Storage tab**, then **Cookies** > `https://www.datacamp.com` 4. Find `_dct` key, its **Value** is the Datacamp authentication token. **Chrome** 1. Visit [datacamp.com](https://datacamp.com) and log in. 2. Open the **Developer Tools** (press `Cmd + Opt + J` on MacOS or `F12` on Windows). 3. Go to **Application tab**, then **Storage** > **Cookies** > `https://www.datacamp.com` 4. Find `_dct` key, its **Value** is the Datacamp authentication token. --- **Security Note** Datacamp authentication token is a secret key and is unique to you. **You should not share it publicly**. --- If you provided valid credentials, you should see the following: ``` Hi, YOUR_NAME Active subscription found ``` > Active subscription is not required anymore. ### Download First, you should list your completed courses/track. To list your completed **courses**, run: ``` datacamp courses ``` To list your completed **tracks**, run: ``` datacamp tracks ``` Similar output to this should appear with your completed courses/tracks: ``` +--------+------------------------------------------+------------+------------+------------+ | ID | Title | Datasets | Exercises | Videos | +--------+------------------------------------------+------------+------------+------------+ | 1 | Introduction to Python | 2 | 46 | 11 | +--------+------------------------------------------+------------+------------+------------+ | 2 | Introduction to SQL | 1 | 40 | 1 | +--------+------------------------------------------+------------+------------+------------+ | 3 | Intermediate Python | 3 | 69 | 18 | +--------+------------------------------------------+------------+------------+------------+ | 4 | Introduction to Data Science in Python | 0 | 31 | 13 | +--------+------------------------------------------+------------+------------+------------+ | 5 | Data Science for Everyone | 0 | 33 | 15 | +--------+------------------------------------------+------------+------------+------------+ | 6 | Joining Data in SQL | 3 | 40 | 13 | +--------+------------------------------------------+------------+------------+------------+ | 7 | Data Manipulation with pandas | 4 | 41 | 15 | +--------+------------------------------------------+------------+------------+------------+ | 8 | Supervised Learning with scikit-learn | 7 | 37 | 17 | +--------+------------------------------------------+------------+------------+------------+ | 9 | Machine Learning for Everyone | 0 | 25 | 12 | +--------+------------------------------------------+------------+------------+------------+ | 10 | Python Data Science Toolbox (Part 1) | 1 | 34 | 12 | +--------+------------------------------------------+------------+------------+------------+ ``` Now, you can download any of the courses/tracks with: ``` datacamp download id1 id2 id3 ``` For example to download the first and second course, run: ``` datacamp download 1 2 ``` - To download all your completed courses, run: ``` datacamp download all ``` - To download all your completed tracks, run: ``` datacamp download all-t ``` This by default will download **videos**, **slides**, **datasets**, **exercises**, **english subtitles** and **transcripts** in organized folders in the **current directory**. To customize this behavior see `datacamp download` command in the [docs](https://github.com/TRoboto/datacamp-downloader/blob/master/docs.md). ## User Privacy `datacamp` creates a session file with your credentials saved in the temp folder. If you no longer need to use the tool, it is preferable to reset the session, which will remove the saved file, with: ``` datacamp reset ``` ## Disclaimer This CLI is provided to help you download Datacamp courses/tracks for personal use only. Sharing the content of the courses is strictly prohibited under [Datacamp's Terms of Use](https://www.datacamp.com/terms-of-use/). By using this CLI, the developers of this CLI are not responsible for any law infringement caused by the users of this CLI. ================================================ FILE: docs.md ================================================ # `datacamp` **Usage**: ```console $ datacamp [OPTIONS] COMMAND [ARGS]... ``` **Options**: - `--version`: Show version. - `--install-completion`: Install completion for the current shell. - `--show-completion`: Show completion for the current shell, to copy it or customize the installation. - `--help`: Show this message and exit. **Commands**: - `courses`: List your completed courses. - `download`: Download courses/tracks given their ids. - `login`: Log in to Datacamp using your username and password - `reset`: Restart the session. - `set-token`: Log in to Datacamp using your token. - `tracks`: List your completed tracks. ## `datacamp login` Log in to Datacamp using your username and password. **Usage**: ```console $ datacamp login [OPTIONS] ``` **Options**: - `-u, --username TEXT`: [required] - `-p, --password TEXT`: [required] - `--help`: Show this message and exit. ## `datacamp set-token` Log in to Datacamp using your token. **Usage**: ```console $ datacamp set-token [OPTIONS] TOKEN ``` **Arguments**: - `TOKEN`: [required] **Options**: - `--help`: Show this message and exit. ## `datacamp courses` List your completed courses. **Usage**: ```console $ datacamp courses [OPTIONS] ``` **Options**: - `-r, --refresh`: Refresh completed courses. [default: False] - `--help`: Show this message and exit. ## `datacamp tracks` List your completed tracks. **Usage**: ```console $ datacamp tracks [OPTIONS] ``` **Options**: - `-r, --refresh`: Refresh completed tracks. [default: False] - `--help`: Show this message and exit. ## `datacamp download` Download courses/tracks given their ids. Example: `datacamp download id1 id2 id3` To download all your completed courses run: `datacamp download all` To download all your completed tracks run: `datacamp download all-t` **Usage**: ```console $ datacamp download [OPTIONS] IDS... ``` **Arguments**: - `IDS...`: IDs for courses/tracks to download or `all` to download all your completed courses or `all-t` to download all your completed tracks. [required] **Options**: - `-p, --path DIRECTORY`: Path to the download directory. [default: `current_directory/Datacamp`] - `--slides / --no-slides`: Download slides. [default: True] - `--datasets / --no-datasets`: Download datasets. [default: True] - `--videos / --no-videos`: Download videos. [default: True] - `--exercises / --no-exercises`: Download exercises. [default: True] - `-st, --subtitles [en|zh|fr|de|it|ja|ko|pt|ru|es|none]`: Choose subtitles to download. [default: en] - `--audios / --no-audios`: Download audio files. [default: False] - `--scripts, --transcript / --no-scripts, --no-transcript`: Download scripts or transcripts. [default: True] - `--python-file / --no-python-file`: Download your own solution as a python file if available. [default: True] - `--no-warnings`: Disable warnings. [default: True] - `-w, --overwrite`: Overwrite files if exist. [default: False] - `--help`: Show this message and exit. ## `datacamp reset` Restart the session. **Usage**: ```console $ datacamp reset [OPTIONS] ``` **Options**: - `--help`: Show this message and exit. ================================================ FILE: pyproject.toml ================================================ [build-system] requires = [ "setuptools", "wheel" ] build-backend = "setuptools.build_meta" ================================================ FILE: requirements.txt ================================================ beautifulsoup4==4.13.5 requests==2.32.5 selenium==4.35.0 undetected-chromedriver==3.2.1 webdriver-manager==4.0.2 texttable==1.6.3 termcolor==1.1.0 colorama==0.4.4 tomd==0.1.3 typer==0.3.2 setuptools==80.9.0 ================================================ FILE: setup.py ================================================ from setuptools import find_packages, setup with open("README.md", "r", encoding="utf-8") as fh: long_description = fh.read() with open("requirements.txt", "r", encoding="utf-8") as fh: required = fh.read().splitlines() setup( name="datacamp-downloader", version="3.3", author="Mohammad Al-Fetyani", author_email="m4bh@hotmail.com", description="Download your completed courses on Datacamp easily!", long_description=long_description, long_description_content_type="text/markdown", url="https://github.com/TRoboto/datacamp-downloader", project_urls={ "Bug Tracker": "https://github.com/TRoboto/datacamp-downloader/issues", }, classifiers=[ "Programming Language :: Python :: 3", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", ], package_dir={"": "src"}, install_requires=required, setup_requires=["setuptools-git"], packages=find_packages(where="src"), include_package_data=True, python_requires=">=3.6", entry_points={"console_scripts": ["datacamp=datacamp_downloader.downloader:app"]}, ) ================================================ FILE: src/datacamp_downloader/__init__.py ================================================ from colorama import init from .session import Session # use Colorama to make Termcolor work on Windows too init() active_session = Session() datacamp = active_session.datacamp ================================================ FILE: src/datacamp_downloader/constants.py ================================================ import tempfile HOME_PAGE = "https://www.datacamp.com/" LOGIN_URL = "https://www.datacamp.com/users/sign_in" LOGIN_DETAILS_URL = "https://www.datacamp.com/api/users/signed_in" SESSION_FILE = tempfile.gettempdir() + "/.datacamp.v3" PROFILE_URL = "https://www.datacamp.com/profile/{slug}" PROFILE_DATA_URL = "https://www.datacamp.com/api/public/users/{slug}" COURSE_DETAILS_API = "https://campus-api.datacamp.com/api/courses/{id}/" EXERCISE_DETAILS_API = "https://campus-api.datacamp.com/api/exercise/{id}" VIDEO_DETAILS_API = "https://projector.datacamp.com/api/videos/{hash}" PROGRESS_API = "https://campus-api.datacamp.com/api/courses/{course_id}/chapters/{chapter_id}/progress" LANGMAP = { "en": "English", "zh": "Chinese simplified", "fr": "French", "de": "German", "it": "Italian", "ja": "Japanese", "ko": "Korean", "pt": "Portuguese", "ru": "Russian", "es": "Spanish", } ================================================ FILE: src/datacamp_downloader/datacamp_utils.py ================================================ import re import sys from pathlib import Path from selenium.webdriver.common.keys import Keys from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By import traceback from bs4 import BeautifulSoup import datacamp_downloader.session as session from .constants import ( COURSE_DETAILS_API, EXERCISE_DETAILS_API, LANGMAP, LOGIN_DETAILS_URL, LOGIN_URL, PROFILE_DATA_URL, PROGRESS_API, VIDEO_DETAILS_API, ) from .helper import ( Logger, animate_wait, correct_path, download_file, fix_track_link, get_table, print_progress, save_text, ) from .templates.course import Chapter, Course from .templates.exercise import Exercise from .templates.track import Track from .templates.video import Video def login_required(f): def wrapper(*args, **kwargs): self = args[0] if not isinstance(self, Datacamp): Logger.error(f"{login_required.__name__} can only decorate Datacamp class.") return if not self.loggedin: Logger.error("Login first!") return return f(*args, **kwargs) return wrapper def try_except_request(f): def wrapper(*args, **kwargs): self = args[0] if not isinstance(self, Datacamp): Logger.error( f"{try_except_request.__name__} can only decorate Datacamp class." ) return try: return f(*args, **kwargs) except Exception as e: if str(e): Logger.error(e) return return wrapper class Datacamp: def __init__(self, session: "session.Session") -> None: self.session = session self.init() def init(self): self.username = None self.password = None self.token = None self.has_active_subscription = False self.loggedin = False self.login_data = None self.profile_data = None self.courses = [] self.tracks = [] self.not_found_courses = set() @animate_wait @try_except_request def login(self, username, password): # quick guard if username == self.username and self.password == password and self.loggedin: Logger.info("Already logged in!") return self.init() self.username = username self.password = password # open signin page (this calls self.session.start() internally) req = self.session.get(LOGIN_URL) if not req: Logger.error("Cannot access datacamp website!") return try: # Wait for the email input to be present and clickable wd = WebDriverWait(self.session.driver, 15) wd.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#user_email"))) email = self.session.driver.find_element(By.ID, "user_email") email.clear() email.click() email.send_keys(username) Logger.info("Filled email") except Exception as e: Logger.error(f"Cannot find/fill email field: {e}") # save screenshot for debugging try: self.session.driver.save_screenshot("login_error_email.png") except Exception: pass return # Click the next/continue button (try a couple of selectors) try: try: next_button = self.session.driver.find_element(By.XPATH, '//button[@tabindex="2"]') except Exception: # fallback: any submit button in a form next_button = self.session.driver.find_element(By.CSS_SELECTOR, "button[type='submit'], input[type='submit']") next_button.click() except Exception as e: Logger.error(f"Cannot click next/continue button: {e}") try: self.session.driver.save_screenshot("login_error_next.png") except Exception: pass return # Wait for password input to be clickable try: wd = WebDriverWait(self.session.driver, 15) password_field = wd.until(EC.element_to_be_clickable((By.ID, "user_password"))) except Exception as e: Logger.error(f"Password field not found or not clickable (maybe SSO-only login?): {e}") try: self.session.driver.save_screenshot("login_error_no_password.png") except Exception: pass return # Try to enter password robustly: ActionChains -> direct send_keys -> JS fallback try: # ActionChains to focus and type ActionChains(self.session.driver).move_to_element(password_field).click().send_keys(password).perform() Logger.info("Password typed via ActionChains") except Exception as e1: try: password_field.clear() password_field.send_keys(password) Logger.info("Password typed via send_keys") except Exception as e2: # Last resort: set value via JS try: self.session.driver.execute_script("arguments[0].value = arguments[1]; arguments[0].dispatchEvent(new Event('input'));", password_field, password) Logger.info("Password set via JS") except Exception as e3: Logger.error("Cannot type password into the field. Details:\n" + "\n".join(map(str, [e1, e2, e3]))) try: self.session.driver.save_screenshot("login_error_password.png") except Exception: pass return # Submit the form (try button or ENTER) try: # Try to find the submit button try: submit_button = self.session.driver.find_element(By.XPATH, '//input[@tabindex="4"]') submit_button.click() except Exception: # fallback: hit Enter on password field password_field.send_keys(Keys.RETURN) Logger.info("Submitted login form, waiting for result...") except Exception as e: Logger.error(f"Cannot submit login form: {e}") try: self.session.driver.save_screenshot("login_error_submit.png") except Exception: pass return # wait for page to load and check result try: # wait for either the profile element, or error/flash messages WebDriverWait(self.session.driver, 10).until( lambda d: "/users/sign_up" not in d.page_source and "Invalid" not in d.page_source ) except Exception: # Not a fatal error here, proceed to check token / page content pass # obtain token cookie if login succeeded try: token_cookie = self.session.driver.get_cookie("_dct") if not token_cookie: Logger.error("Login did not produce a _dct cookie (likely login failed or SSO-only).") try: self.session.driver.save_screenshot("login_no_token.png") except Exception: pass return self.token = token_cookie["value"] self._set_profile() Logger.info("Login flow completed") except Exception as e: Logger.error("Error after login attempt: " + str(e)) try: self.session.driver.save_screenshot("login_error_final.png") except Exception: pass return @animate_wait @try_except_request def set_token(self, token): if self.token == token and self.loggedin: Logger.info("Already logged in!") return self.init() self.session.start() self.token = token self.session.add_token(token) self._set_profile() def get_profile_data(self): if not self.profile_data: self.profile_data = self.session.get_json( PROFILE_DATA_URL.format(slug=self.login_data["slug"]) ) self.session.driver.minimize_window() return self.profile_data @login_required @animate_wait def list_completed_tracks(self, refresh): table = get_table() table.set_cols_width([6, 40, 10]) table.add_row(["ID", "Title", "Courses"]) table_so_far = table.draw() Logger.clear_and_print(table_so_far) for track in self.get_completed_tracks(refresh): table.add_row([track.id, track.title, len(track.courses)]) table_str = table.draw() Logger.clear_and_print(table_str.replace(table_so_far, "").strip()) table_so_far = table_str @login_required @animate_wait def list_completed_courses(self, refresh): table = get_table() table.set_cols_width([6, 40, 10, 10, 10]) table.add_row(["ID", "Title", "Datasets", "Exercises", "Videos"]) table_so_far = table.draw() Logger.clear_and_print(table_so_far) for i, course in enumerate(self.get_completed_courses(refresh), 1): all_exercises_count = sum([c.nb_exercises for c in course.chapters]) videos_count = sum([c.number_of_videos for c in course.chapters]) course.order = i table.add_row( [ i, course.title, len(course.datasets), all_exercises_count - videos_count, videos_count, ] ) table_str = table.draw() Logger.clear_and_print(table_str.replace(table_so_far, "").strip()) table_so_far = table_str @login_required def download(self, ids, directory, **kwargs): self.overwrite = kwargs.get("overwrite") if "all-t" in ids: if not self.tracks: Logger.error( "No tracks to download! Maybe run `datacamp tracks` first!" ) return to_download = self.tracks elif "all" in ids: if not self.courses: Logger.error( "No courses to download! Maybe run `datacamp courses` first!" ) return to_download = self.courses else: to_download = [] for id in ids: if "t" in id: track = self.get_track(id) if not track: Logger.warning(f"Track {id} is not fetched. Ignoring it.") continue to_download.append(track) elif id.isnumeric(): course = self.get_course_by_order(int(id)) if not course: Logger.warning(f"Course {id} is not fetched. Ignoring it.") continue to_download.append(course) if not to_download: Logger.error("No courses/tracks to download!") return path = Path(directory) if not isinstance(directory, Path) else directory self.session.start() self.session.driver.minimize_window() for i, material in enumerate(to_download, 1): if not material: continue Logger.info( f"[{i}/{len(to_download)}] Start to download ({material.id}) {material.title}" ) if isinstance(material, Course): self.download_course(material, path, **kwargs) else: self.download_track(material, path, **kwargs) def download_normal_exercise( self, exercise: Exercise, path: Path, include_last_attempt: bool = False ): save_text(path, str(exercise), self.overwrite) if include_last_attempt and exercise.is_python and exercise.last_attempt: save_text( path.parent / (path.name[:-3] + f".py"), exercise.last_attempt, self.overwrite, ) subexs = exercise.data.subexercises if subexs: for i, subexercise in enumerate(subexs, 1): exercise = self._get_exercise(subexercise) self.download_normal_exercise( exercise, path.parent / (path.name[:-3] + f"_sub{i}.md") ) def download_track(self, track: Track, path: Path, **kwargs): path = path / correct_path(track.title) for i, course in enumerate(track.courses, 1): Logger.info( f"[{i}/{len(track.courses)}] Download ({course.id}) {course.title} from ({track.title} Track)" ) self.download_course(course, path, f"{i}-", **kwargs) def download_course(self, course: Course, path: Path, index="", **kwargs): download_path = path / ( index + correct_path(course.slug or course.title.lower().replace(" ", "-")) ) if kwargs.get("datasets") and course.datasets: for i, dataset in enumerate(course.datasets, 1): print_progress(i, len(course.datasets), f"datasets") if dataset.asset_url: download_file( dataset.asset_url, download_path / "datasets" / correct_path(dataset.asset_url.split("/")[-1]), False, overwrite=self.overwrite, ) sys.stdout.write("\n") for chapter in course.chapters: cpath = download_path / self._get_chapter_name(chapter) if kwargs.get("slides") and chapter.slides_link: download_file( chapter.slides_link, cpath / correct_path(chapter.slides_link.split("/")[-1]), overwrite=self.overwrite, ) if ( kwargs.get("exercises") or kwargs.get("videos") or kwargs.get("audios") or kwargs.get("scripts") ): self.download_others(course.id, chapter, cpath, **kwargs) def download_others(self, course_id, chapter: Chapter, path: Path, **kwargs): exercises = kwargs.get("exercises") videos = kwargs.get("videos") audios = kwargs.get("audios") scripts = kwargs.get("scripts") subtitles = kwargs.get("subtitles") last_attempt = kwargs.get("last_attempt") ids = self._get_exercises_ids(course_id, chapter.id) last_attempts = self.get_exercises_last_attempt(course_id, chapter.id) exercise_counter = 1 video_counter = 1 for i, id in enumerate(ids, 1): print_progress(i, len(ids), f"chapter {chapter.number}") exercise = self._get_exercise(id) exercise.last_attempt = last_attempts[id] if not exercise: continue if exercises and not exercise.is_video: self.download_normal_exercise( exercise, path / "exercises" / f"ex{exercise_counter}.md", last_attempt, ) exercise_counter += 1 if exercise.is_video: video = self._get_video(exercise.data.get("projector_key")) if not video: continue video_path = path / "videos" / f"ch{chapter.number}_{video_counter}" if videos and video.video_mp4_link: download_file( video.video_mp4_link, video_path.with_suffix(".mp4"), overwrite=self.overwrite, ) if audios and video.audio_link: download_file( video.audio_link, path / "audios" / f"ch{chapter.number}_{video_counter}.mp3", False, overwrite=self.overwrite, ) if scripts and video.script_link: download_file( video.script_link, path / "scripts" / (video_path.name + "_script.md"), False, overwrite=self.overwrite, ) if subtitles and video.subtitles: for sub in subtitles: subtitle = self._get_subtitle(sub, video) if not subtitle: continue download_file( subtitle.link, video_path.parent / (video_path.name + f"_{sub}.vtt"), False, overwrite=self.overwrite, ) video_counter += 1 print_progress(i, len(ids), f"chapter {chapter.number}") sys.stdout.write("\n") def get_completed_tracks(self, refresh=False): if self.tracks and not refresh: yield from self.tracks return self.tracks = [] data = self.get_profile_data() completed_tracks = data["completed_tracks"] for i, track in enumerate(completed_tracks, 1): self.tracks.append(Track(f"t{i}", track["title"].strip(), track["url"])) all_courses = set() # add courses for track in self.tracks: courses = list(self._get_courses_from_link(fix_track_link(track.link))) if not courses: continue track.courses = courses all_courses.update(track.courses) yield track # add to courses current_ids = [c.id for c in self.courses] for course in all_courses: if course.id not in current_ids: self.courses.append(course) self.session.save() def get_completed_courses(self, refresh=False): if self.courses and not refresh: yield from self.courses return self.courses = [] data = self.get_profile_data() completed_courses = data["completed_courses"] for course in completed_courses: fetched_course = self.get_course(course["id"]) if not fetched_course: continue self.session.driver.minimize_window() self.courses.append(fetched_course) yield fetched_course if not self.courses: return [] self.session.save() def get_course(self, id): if id in self.not_found_courses: return for course in self.courses: if course.id == id: return course return self._get_course(id) def get_course_by_order(self, order): for course in self.courses: if course.order == order and course.id not in self.not_found_courses: return course @try_except_request def get_exercises_last_attempt(self, course_id, chapter_id): data = self.session.get_json( PROGRESS_API.format(course_id=course_id, chapter_id=chapter_id) ) if "error" in data: raise ValueError( f"Cannot get exercises for course {course_id}, chapter {chapter_id}." ) last_attempt = {e["exercise_id"]: e["last_attempt"] for e in data} return last_attempt def get_track(self, id): for track in self.tracks: if track.id == id: return track @try_except_request def _get_courses_from_link(self, link: str): html = self.session.get(link) self.session.driver.minimize_window() soup = BeautifulSoup(html, "html.parser") courses_ids = soup.findAll("article", {"class": re.compile("^js-async")}) for i, id_tag in enumerate(courses_ids, 1): id = id_tag.get("data-id") if not id: continue course = self.get_course(int(id)) if course: yield course def _get_chapter_name(self, chapter: Chapter): if chapter.title and chapter.title_meta: return correct_path(chapter.slug) if chapter.title: return correct_path( f"chapter-{chapter.number}-{chapter.title.replace(' ', '-').lower()}" ) return f"chapter-{chapter.number}" def _set_profile(self): try: data = self.session.get_json(LOGIN_DETAILS_URL) except Exception as e: Logger.error("Incorrect input token!") return Logger.info("Hi, " + (data.get("first_name") or data.get("last_name") or data.get("email"))) # New API: 'has_active_subscription' may not exist anymore has_sub = False if "has_active_subscription" in data: has_sub = data["has_active_subscription"] elif "active_products" in data: has_sub = len(data["active_products"]) > 0 if has_sub: Logger.info("Active subscription found") else: Logger.warning("No active subscription found") self.loggedin = True self.login_data = data self.has_active_subscription = has_sub self.session.save() def _get_subtitle(self, sub, video: Video): if not LANGMAP.get(sub): return for subtitle in video.subtitles: if subtitle.language == LANGMAP[sub]: return subtitle @try_except_request def _get_video(self, id): if not id: raise ValueError("ID tag not found.") res = self.session.get_json(VIDEO_DETAILS_API.format(hash=id)) if "error" in res: raise ValueError() return Video(**res) @try_except_request def _get_exercises_ids(self, course_id, chapter_id): if not course_id or not chapter_id: raise ValueError("ID tags not found.") data = self.session.get_json( PROGRESS_API.format(course_id=course_id, chapter_id=chapter_id) ) if "error" in data: raise ValueError( f"Cannot get exercises for course {course_id}, chapter {chapter_id}." ) ids = [e["exercise_id"] for e in data] return ids @try_except_request def _get_exercise(self, id): if not id: raise ValueError("ID tag not found.") res = self.session.get_json(EXERCISE_DETAILS_API.format(id=id)) if "error" in res: raise ValueError(f"Cannot get exercise with id: {id}.") return Exercise(**res) @try_except_request def _get_course(self, id): if not id: self.not_found_courses.add(id) raise ValueError("ID tag not found.") res = self.session.get_json(COURSE_DETAILS_API.format(id=id)) if "error" in res: self.not_found_courses.add(id) raise ValueError() # Normalize time field time_needed = res.get("time_needed") if not time_needed and res.get("time_needed_in_hours") is not None: time_needed = f"{res['time_needed_in_hours']} hours" elif not time_needed and res.get("duration_minutes") is not None: hours = res["duration_minutes"] / 60 time_needed = f"{hours:.1f} hours" return Course( id=res["id"], title=res["title"], description=res.get("description", ""), slug=res.get("slug"), datasets=res.get("datasets", []), chapters=res.get("chapters", []), time_needed=time_needed, ) ================================================ FILE: src/datacamp_downloader/downloader.py ================================================ import os from pathlib import Path from typing import List, Optional import typer from . import active_session, datacamp from .helper import Logger from .templates.lang import Language __version__ = "3.3.0" def version_callback(value: bool): if value: typer.echo(f"Datacamp Downloader CLI Version: {__version__}") raise typer.Exit() def main( version: Optional[bool] = typer.Option( None, "--version", callback=version_callback, is_eager=True, help="Show version.", ), ): pass app = typer.Typer(callback=main) @app.command() def login( username: str = typer.Option(..., "-u", "--username", prompt=True), password: str = typer.Option(..., "-p", "--password", prompt=True, hide_input=True), ): """Log in to Datacamp using your username and password.""" datacamp.login(username, password) @app.command() def set_token(token: str = typer.Argument(...)): """Log in to Datacamp using your token.""" datacamp.set_token(token) @app.command() def tracks( refresh: Optional[bool] = typer.Option( False, "--refresh", "-r", is_flag=True, help="Refresh completed tracks." ) ): """List your completed tracks.""" datacamp.list_completed_tracks(refresh) @app.command() def courses( refresh: Optional[bool] = typer.Option( False, "--refresh", "-r", is_flag=True, help="Refresh completed courses." ) ): """List your completed courses.""" datacamp.list_completed_courses(refresh) @app.command() def download( ids: List[str] = typer.Argument( ..., help="IDs for courses/tracks to download or `all` to download all your completed courses or `all-t` to download all your completed tracks.", ), path: Path = typer.Option( Path(os.getcwd() + "/Datacamp"), "--path", "-p", help="Path to the download directory.", dir_okay=True, file_okay=False, ), slides: Optional[bool] = typer.Option( True, "--slides/--no-slides", help="Download slides.", ), datasets: Optional[bool] = typer.Option( True, "--datasets/--no-datasets", help="Download datasets.", ), videos: Optional[bool] = typer.Option( True, "--videos/--no-videos", help="Download videos.", ), exercises: Optional[bool] = typer.Option( True, "--exercises/--no-exercises", help="Download exercises.", ), subtitles: Optional[List[Language]] = typer.Option( [Language.EN.value], "--subtitles", "-st", help="Choose subtitles to download.", case_sensitive=False, ), audios: Optional[bool] = typer.Option( False, "--audios/--no-audios", help="Download audio files.", ), scripts: Optional[bool] = typer.Option( True, "--scripts/--no-scripts", "--transcript/--no-transcript", show_default=True, help="Download scripts or transcripts.", ), python_file: Optional[bool] = typer.Option( True, "--python-file/--no-python-file", show_default=True, help="Download your own solution as a python file if available.", ), warnings: Optional[bool] = typer.Option( True, "--no-warnings", flag_value=False, is_flag=True, help="Disable warnings.", ), overwrite: Optional[bool] = typer.Option( False, "--overwrite", "-w", flag_value=True, is_flag=True, help="Overwrite files if exist.", ), ): """Download courses/tracks given their ids. Example: `datacamp download id1 id2 id3`\n To download all your completed courses run: \t`datacamp download all`\n To download all your completed tracks run: \t`datacamp download all-t` """ Logger.show_warnings = warnings datacamp.download( ids, path, slides=slides, datasets=datasets, videos=videos, exercises=exercises, subtitles=subtitles, audios=audios, scripts=scripts, overwrite=overwrite, last_attempt=python_file, ) @app.command() def reset(): """Restart the session.""" active_session.reset() ================================================ FILE: src/datacamp_downloader/helper.py ================================================ import itertools import re import sys import threading import time from pathlib import Path import requests from termcolor import colored from texttable import Texttable class Logger: show_warnings = True is_writing = False @classmethod def error(cls, text): Logger.print(text, "ERROR:", "red") @classmethod def clear(cls): sys.stdout.write("\r" + " " * 100 + "\r") @classmethod def warning(cls, text): if cls.show_warnings: Logger.print(text, "WARNING:", "yellow") @classmethod def info(cls, text): Logger.print(text, "INFO:", "green") @classmethod def print(cls, text, head, color=None, background=None, end="\n"): cls.is_writing = True Logger.clear() print(colored(f"{head}", color, background), text, end=end, flush=True) cls.is_writing = False @classmethod def clear_and_print(cls, text): cls.is_writing = True Logger.clear() print(text, flush=True) cls.is_writing = False def get_table(): table = Texttable() return table def animate_wait(f): done = False def animate(): for c in itertools.cycle(list("/—\|")): if done: Logger.clear() break if not Logger.is_writing: print("\rPlease wait " + c, end="", flush=True) time.sleep(0.1) def wrapper(*args): nonlocal done done = False t = threading.Thread(target=animate) t.daemon = True t.start() output = f(*args) done = True return output return wrapper def correct_path(path: str): return re.sub("[^-a-zA-Z0-9_.() /]+", "", path) def download_file(link: str, path: Path, progress=True, max_retry=10, overwrite=False): # start = time.clock() if not overwrite and path.exists(): Logger.warning(f"{path.absolute()} is already downloaded") return for i in range(max_retry): try: response = requests.get(link, stream=True) i = -1 break except Exception: Logger.print(f"", f"Retry [{i+1}/{max_retry}]", "magenta", end="") if i != -1: Logger.error(f"Failed to download {link}") return path.parent.mkdir(exist_ok=True, parents=True) total_length = response.headers.get("content-length") with path.open("wb") as f: if total_length is None: # no content length header f.write(response.content) else: dl = 0 total_length = int(total_length) for data in response.iter_content(chunk_size=1024 * 1024): # 1MB dl += len(data) f.write(data) if progress: print_progress(dl, total_length, path.name) if progress: sys.stdout.write("\n") def print_progress(progress, total, name, max=50): done = int(max * progress / total) Logger.print( "[%s%s] %d%%" % ("=" * done, " " * (max - done), done * 2), f"Downloading [{name}]", "blue", end="\r", ) sys.stdout.flush() def save_text(path: Path, content: str, overwrite=False): if not path.is_file: Logger.error(f"{path.absolute()} isn't a file") return if not overwrite and path.exists(): Logger.warning(f"{path.absolute()} is already downloaded") return path.parent.mkdir(exist_ok=True, parents=True) path.write_text(content, encoding="utf8") # Logger.info(f"{path.name} has been saved.") def fix_track_link(link): if "?" in link: link += "&embedded=true" else: link += "?embedded=true" return link ================================================ FILE: src/datacamp_downloader/session.py ================================================ import json import os import pickle import json from webdriver_manager.chrome import ChromeDriverManager import re from bs4 import BeautifulSoup import os from pathlib import Path # Prefer top-level undetected_chromedriver (works with Selenium 4); fallback to v2. try: import undetected_chromedriver as uc except Exception: import undetected_chromedriver.v2 as uc # Selenium helper imports (we use these to create Service/options safely) from selenium import webdriver from selenium.webdriver.chrome.service import Service as ChromeService from selenium.webdriver.chrome.options import Options as ChromeOptions from selenium.webdriver.common.by import By from selenium.webdriver.remote.webelement import WebElement from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import WebDriverWait from .constants import HOME_PAGE, SESSION_FILE from .datacamp_utils import Datacamp class Session: def __init__(self) -> None: self.savefile = Path(SESSION_FILE) self.datacamp = self.load_datacamp() def save(self): self.datacamp.session = None pickled = pickle.dumps(self.datacamp) self.savefile.write_bytes(pickled) def load_datacamp(self): if self.savefile.exists(): datacamp = pickle.load(self.savefile.open("rb")) datacamp.session = self return datacamp return Datacamp(self) def reset(self): try: os.remove(SESSION_FILE) except: pass def _setup_driver(self, headless=True): try: options = uc.ChromeOptions() except Exception: options = ChromeOptions() try: options.headless = headless except Exception: if headless: options.add_argument("--headless=new") # existing flags... options.add_argument("--no-first-run") options.add_argument("--no-service-autorun") options.add_argument("--password-store=basic") options.add_argument("--disable-extensions") options.add_argument("--disable-browser-side-navigation") options.add_argument("--disable-infobars") options.add_argument("--disable-popup-blocking") options.add_argument("--disable-gpu") options.add_argument("--disable-notifications") options.add_argument("--content-shell-hide-toolbar") options.add_argument("--top-controls-hide-threshold") options.add_argument("--force-app-mode") options.add_argument("--hide-scrollbars") options.add_argument("--no-sandbox") options.add_argument("--disable-dev-shm-usage") # get the absolute path of the installed package package_dir = os.path.dirname(os.path.abspath(__file__)) # create a chrome profile folder inside the package directory profile_dir = os.path.join(package_dir, "dc_chrome_profile") # make sure it exists os.makedirs(profile_dir, exist_ok=True) # tell Chrome to use it options.add_argument(f"--user-data-dir={profile_dir}") service = ChromeService(executable_path=ChromeDriverManager().install()) try: self.driver = uc.Chrome(service=service, options=options) return except Exception: self.driver = webdriver.Chrome(service=service, options=options) def start(self, headless=False): if hasattr(self, "driver"): return self._setup_driver(headless) self.driver.get(HOME_PAGE) self.bypass_cloudflare(HOME_PAGE) if self.datacamp.token: self.add_token(self.datacamp.token) def bypass_cloudflare(self, url): try: self.get_element_by_id("cf-spinner-allow-5-secs") with self.driver: self.driver.get(url) except: pass def get(self, url): self.start() self.driver.get(url) self.bypass_cloudflare(url) return self.driver.page_source def get_json(self, url): page = self.get(url).strip() # Parse with BeautifulSoup soup = BeautifulSoup(page, "html.parser") pre = soup.find("pre") if pre: page = pre.text # ✅ grab only the JSON inside
else:
page = page # maybe raw JSON already
# Debug
#print("\n\n[DEBUG get_json cleaned] First 200 chars:\n", page[:200], "\n\n")
return json.loads(page)
def to_json(self, page: str):
return json.loads(page)
def get_element_by_id(self, id: str) -> WebElement:
return self.driver.find_element(By.ID, id)
def get_element_by_xpath(self, xpath: str) -> WebElement:
return self.driver.find_element(By.XPATH, xpath)
def click_element(self, id: str):
self.get_element_by_id(id).click()
def wait_for_element_by_css_selector(self, *css: str, timeout: int = 10):
WebDriverWait(self.driver, timeout).until(
EC.visibility_of_any_elements_located((By.CSS_SELECTOR, ",".join(css)))
)
def add_token(self, token: str):
cookie = {
"name": "_dct",
"value": token,
"domain": ".datacamp.com",
"secure": True,
}
self.driver.add_cookie(cookie)
return self
================================================
FILE: src/datacamp_downloader/templates/course.py
================================================
# Generated by https://quicktype.io
from enum import Enum
from typing import Any, List, Optional
class TypeEnum(Enum):
MULTIPLE_CHOICE_EXERCISE = "MultipleChoiceExercise"
NORMAL_EXERCISE = "NormalExercise"
VIDEO_EXERCISE = "VideoExercise"
class Exercise:
type: TypeEnum
title: str
aggregate_xp: int
number: int
url: str
def __init__(
self,
type: TypeEnum,
title: str,
aggregate_xp: int,
number: int,
url: str,
**kwargs
) -> None:
self.type = type
self.title = title
self.aggregate_xp = aggregate_xp
self.number = number
self.url = url
class Chapter:
id: int
title_meta: str
title: str
description: str
number: int
slug: str
nb_exercises: int
badge_completed_url: str
badge_uncompleted_url: str
last_updated_on: str
slides_link: str
free_preview: Optional[bool]
xp: int
number_of_videos: int
exercises: List[Exercise]
def __init__(
self,
id: int,
title_meta: str,
title: str,
description: str,
number: int,
slug: str,
nb_exercises: int,
badge_completed_url: str,
badge_uncompleted_url: str,
last_updated_on: str,
slides_link: str,
free_preview: Optional[bool],
xp: int,
number_of_videos: int,
exercises: List[Exercise],
**kwargs
) -> None:
self.id = id
self.title_meta = title_meta
self.title = title
self.description = description
self.number = number
self.slug = slug
self.nb_exercises = nb_exercises
self.badge_completed_url = badge_completed_url
self.badge_uncompleted_url = badge_uncompleted_url
self.last_updated_on = last_updated_on
self.slides_link = slides_link
self.free_preview = free_preview
self.xp = xp
self.number_of_videos = number_of_videos
self.exercises = [Exercise(**c) for c in exercises]
class Collaborator:
avatar_url: str
full_name: str
def __init__(self, avatar_url: str, full_name: str) -> None:
self.avatar_url = avatar_url
self.full_name = full_name
class Dataset:
asset_url: str
name: str
def __init__(self, asset_url: str, name: str) -> None:
self.asset_url = asset_url
self.name = name
class Instructor:
id: int
marketing_biography: str
biography: str
avatar_url: str
full_name: str
instructor_path: str
def __init__(
self,
id: int,
marketing_biography: str,
biography: str,
avatar_url: str,
full_name: str,
instructor_path: str,
**kwargs
) -> None:
self.id = id
self.marketing_biography = marketing_biography
self.biography = biography
self.avatar_url = avatar_url
self.full_name = full_name
self.instructor_path = instructor_path
class SharingLinks:
twitter: str
facebook: str
def __init__(self, twitter: str, facebook: str) -> None:
self.twitter = twitter
self.facebook = facebook
class Track:
path: str
title_with_subtitle: str
def __init__(self, path: str, title_with_subtitle: str) -> None:
self.path = path
self.title_with_subtitle = title_with_subtitle
class Course:
def __init__(self,
id: int,
title: str,
description: str = "",
slug: str = None,
chapters: List[dict] = None,
datasets: List[dict] = None,
time_needed_in_hours: int = None,
**kwargs) -> None:
"""
Flexible Course constructor that works with the new API.
Extra fields are captured by **kwargs so we don't break.
"""
self.id = id
self.title = title
self.description = description
self.slug = slug or str(id)
# build nested objects safely
self.chapters = [Chapter(**c) for c in (chapters or [])]
self.datasets = [Dataset(**c) for c in (datasets or [])]
# support both old/new API keys
self.time_needed = kwargs.get("time_needed") or time_needed_in_hours
self.xp = kwargs.get("xp", 0)
self.difficulty_level = kwargs.get("difficulty_level", None)
self.state = kwargs.get("state", "unknown")
# optional stuff
self.short_description = kwargs.get("short_description", "")
self.slug = kwargs.get("slug", slug or str(id))
self.image_url = kwargs.get("image_url", "")
self.image_thumbnail_url = kwargs.get("image_thumbnail_url", "")
self.last_updated_on = kwargs.get("last_updated_on", "")
self.link = kwargs.get("link", "")
self.programming_language = kwargs.get("programming_language", "unknown")
# fallback empty lists
self.instructors = [Instructor(**c) for c in kwargs.get("instructors", [])]
self.collaborators = [Collaborator(**c) for c in kwargs.get("collaborators", [])]
self.tracks = [Track(**c) for c in kwargs.get("tracks", [])]
# absorb anything else without crashing
self.extra = kwargs
================================================
FILE: src/datacamp_downloader/templates/exercise.py
================================================
# Generated by https://quicktype.io
from typing import Any, List, Optional
import tomd
from .course import TypeEnum
class Data:
id: int
type: str
assignment: Optional[str]
title: Optional[str]
sample_code: str
instructions: Optional[str]
number: int
sct: str
pre_exercise_code: str
solution: str
hint: Optional[str]
attachments: None
xp: int
possible_answers: List[Any]
feedbacks: List[Any]
question: str
subexercises: Optional[List["Data"]]
course_id: Optional[int]
chapter_id: Optional[int]
runtime_config: Optional[str]
language: Optional[str]
def __init__(
self,
id: int,
type: str,
assignment: Optional[str] = None,
title: Optional[str] = None,
number: int = None,
hint: Optional[str] = None,
xp: int = None,
possible_answers: List[Any] = None,
feedbacks: List[Any] = None,
course_id: Optional[int] = None,
chapter_id: Optional[int] = None,
runtime_config: Optional[str] = None,
language: Optional[str] = None,
subexercises: Optional[List["Data"]] = None,
instructions: Optional[str] = None,
attachments: None = None,
sample_code: str = None,
pre_exercise_code: str = None,
solution: str = None,
sct: str = None,
question: str = None,
**kwargs,
) -> None:
self.id = id
self.type = type
self.assignment = assignment
self.title = title
self.sample_code = sample_code
self.instructions = instructions
self.number = number
self.sct = sct
self.pre_exercise_code = pre_exercise_code
self.solution = solution
self.hint = hint
self.attachments = attachments
self.xp = xp
self.possible_answers = possible_answers
self.feedbacks = feedbacks
self.question = question
self.subexercises = (
[e.get("id") for e in subexercises] if subexercises else None
)
self.course_id = course_id
self.chapter_id = chapter_id
self.runtime_config = runtime_config
self.language = language
class Exercise:
data: Any
id: int
type: str
version: str
last_attempt: Optional[str]
def __init__(
self,
data: Data,
id: int,
type: str,
version: str,
last_attempt: str = None,
**kwargs,
) -> None:
self.id = id
self.type = type
self.version = version
if not self.is_video:
self.data = Data(**data)
else:
self.data = data
self.last_attempt = last_attempt
@property
def is_video(self):
return self.type == TypeEnum.VIDEO_EXERCISE.value
@property
def is_python(self):
return self.data.language == "python"
def __str__(self) -> str:
html = (
" {}
\nExercise ID {}
\n Assignment
{}\n".format(
self.data.title, self.id, self.data.assignment
)
+ self.get_pre_exercise_code()
+ self.get_instructions()
+ self.get_sample_code()
+ self.get_anwsers()
+ self.get_hints()
+ self.get_solution()
)
return tomd.convert(html)
def get_hints(self):
code = " Hints
{}"
if self.data.hint:
return code.format(self.data.hint)
return ""
def get_anwsers(self):
code = " Answers
{}"
if self.data.possible_answers:
return code.format(self._get_ordered_list(self.data.possible_answers))
# return code.format("No answers were found.")
return ""
def get_instructions(self):
code = " Instructions
{}"
if self.data.instructions:
return code.format(self.data.instructions)
return ""
def _get_ordered_list(self, list):
return "{}
".format("\n".join(f"No solution was found.
") def get_sample_code(self): if self.data.sample_code: return self._get_code(self.data.sample_code) return "" def get_pre_exercise_code(self): code = "{code}"
================================================
FILE: src/datacamp_downloader/templates/lang.py
================================================
from enum import Enum
class Language(str, Enum):
EN = "en"
ZH = "zh"
FR = "fr"
DE = "de"
IT = "it"
JA = "ja"
KO = "ko"
PT = "pt"
RU = "ru"
ES = "es"
NONE = "none"
================================================
FILE: src/datacamp_downloader/templates/track.py
================================================
from typing import List
from .course import Course
class Track:
id: int
title: str
link: str
courses: List[Course]
def __init__(self, id: int, title: str, link: str) -> None:
self.id = id
self.title = title
self.link = link
self.courses = []
================================================
FILE: src/datacamp_downloader/templates/video.py
================================================
# Generated by https://quicktype.io
from enum import Enum
from typing import Any, List, Optional
class TypeEnum(Enum):
FINAL_SLIDE = "FinalSlide"
FULL_SLIDE = "FullSlide"
TITLE_SLIDE = "TitleSlide"
class Structure:
number: int
type: TypeEnum
key: str
script: str
title: str
instructor_name: Optional[str]
instructor_title: Optional[str]
technology: Optional[str]
citations: List[Any]
code_zoom: int
disable_transition: bool
hide_slide_in_video: bool
hide_title: bool
use_full_width: bool
part1: Optional[str]
def __init__(
self,
number: int,
type: TypeEnum,
key: str,
script: str,
title: str,
instructor_name: Optional[str],
instructor_title: Optional[str],
technology: Optional[str],
citations: List[Any],
code_zoom: int,
disable_transition: bool,
hide_slide_in_video: bool,
hide_title: bool,
use_full_width: bool,
part1: Optional[str],
**kwargs
) -> None:
self.number = number
self.type = type
self.key = key
self.script = script
self.title = title
self.instructor_name = instructor_name
self.instructor_title = instructor_title
self.technology = technology
self.citations = citations
self.code_zoom = code_zoom
self.disable_transition = disable_transition
self.hide_slide_in_video = hide_slide_in_video
self.hide_title = hide_title
self.use_full_width = use_full_width
self.part1 = part1
class SlideDeck:
key: str
plain_video_hls_link: str
plain_video_mp4_link: str
plain_video_raw_link: None
structure: List[Structure]
timings: str
title: str
transformations: str
def __init__(
self,
key: str,
plain_video_hls_link: str,
plain_video_mp4_link: str,
plain_video_raw_link: None,
structure: List[Structure],
timings: str,
title: str,
transformations: str,
) -> None:
self.key = key
self.plain_video_hls_link = plain_video_hls_link
self.plain_video_mp4_link = plain_video_mp4_link
self.plain_video_raw_link = plain_video_raw_link
self.structure = [Structure(**s) for s in structure]
self.timings = timings
self.title = title
self.transformations = transformations
class Subtitle:
language: str
link: str
def __init__(self, language: str, link: str) -> None:
self.language = language
self.link = link
class Video:
audio_link: str
key: str
render_dynamically: int
script_link: str
slide_deck: SlideDeck
slides_link: str
subtitle_vtt_link: str
subtitles: List[Subtitle]
thumbnail_link: None
transcript_timings: None
type: str
video_hls_link: None
video_mp4_link: str
video_raw_link: None
def __init__(
self,
audio_link: str,
key: str,
render_dynamically: int,
script_link: str,
slide_deck: SlideDeck,
slides_link: str,
subtitle_vtt_link: str,
subtitles: List[Subtitle],
thumbnail_link: None,
transcript_timings: None,
type: str,
video_hls_link: None,
video_mp4_link: str,
video_raw_link: None,
) -> None:
self.audio_link = audio_link
self.key = key
self.render_dynamically = render_dynamically
self.script_link = script_link
self.slide_deck = slide_deck
self.slides_link = slides_link
self.subtitle_vtt_link = subtitle_vtt_link
self.subtitles = [Subtitle(**s) for s in subtitles]
self.thumbnail_link = thumbnail_link
self.transcript_timings = transcript_timings
self.type = type
self.video_hls_link = video_hls_link
self.video_mp4_link = video_mp4_link
self.video_raw_link = video_raw_link