Repository: nicobrenner/commandjobs Branch: master Commit: 152e333b84f2 Files: 41 Total size: 176.8 KB Directory structure: gitextract_3tl60gtl/ ├── .dockerignore ├── .gitignore ├── LICENSE ├── README.md ├── config/ │ ├── base_resume.sample │ ├── requirements.txt │ └── sample.env ├── docker/ │ ├── Dockerfile │ ├── Dockerfile.base │ ├── docker-compose.yml │ └── docker-entrypoint.sh ├── job_scraper/ │ ├── __init__.py │ ├── hacker_news/ │ │ ├── __init__.py │ │ └── scraper.py │ ├── scraper_selectors/ │ │ ├── __init__.py │ │ └── workday_selectors.py │ ├── utils.py │ ├── waas/ │ │ ├── __init__.py │ │ └── work_startup_scraper.py │ └── workday/ │ ├── __init__.py │ └── scraper.py ├── src/ │ ├── __init__.py │ ├── database_manager.py │ ├── display_all_jobs.py │ ├── display_applications.py │ ├── display_matching_table.py │ ├── display_table.py │ ├── gpt_processor.py │ ├── menu.py │ ├── migrations/ │ │ ├── 000_create_initial_tables.py │ │ ├── 001_add_discarded_applied.py │ │ ├── 002_create_application_notes.py │ │ ├── 003_add_applied_date.py │ │ ├── 004_migrate_applications_table.py │ │ ├── 005_migrate_old_notes.py │ │ ├── 006_unique_applications_job_id.py │ │ └── 007_add_scraped_at_timestamp.py │ ├── test_menu.py │ └── truncate_tables.py └── tests/ ├── __init__.py └── test_workday_scraper.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .dockerignore ================================================ # Ignore Python bytecode __pycache__/ *.pyc *.pyo *.pyd # Ignore logs and databases *.log # Ignore environments venv/ .env .env.* # Ignore OS + IDE files .DS_Store .idea/ .vscode/ *.egg-info/ node_modules/ # Ignore git + tests .git tests/ ================================================ FILE: .gitignore ================================================ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover *.py,cover .hypothesis/ .pytest_cache/ cover/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 db.sqlite3-journal # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder .pybuilder/ target/ # Jupyter Notebook .ipynb_checkpoints # IPython profile_default/ ipython_config.py # pyenv # For a library or package, you might want to ignore these files since the code is # intended to run in multiple environments; otherwise, check them in: # .python-version # pipenv # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. # However, in case of collaboration, if having platform-specific dependencies or dependencies # having no cross-platform support, pipenv may install dependencies that don't work, or not # install all needed dependencies. #Pipfile.lock # poetry # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. # This is especially recommended for binary packages to ensure reproducibility, and is more # commonly ignored for libraries. # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control #poetry.lock # pdm # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. #pdm.lock # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it # in version control. # https://pdm.fming.dev/#use-with-ide .pdm.toml # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm __pypackages__/ # Celery stuff celerybeat-schedule celerybeat.pid # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ # pytype static type analyzer .pytype/ # Cython debug symbols cython_debug/ # PyCharm # JetBrains specific template is maintained in a separate JetBrains.gitignore that can # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ # Mac files .AppleDouble/ .DS_Store # Vim files *.swp # Ignore user-generated files base_resume.txt job_listings.db job_listings.db-shm job_listings.db-wal *.csv # Ignore vscode configuration .vscode/ test.py ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: README.md ================================================
██████╗ ██████╗ ███╗ ███╗███╗ ███╗ █████╗ ███╗ ██╗██████╗ ██╔════╝██╔═══██╗████╗ ████║████╗ ████║██╔══██╗████╗ ██║██╔══██╗ ██║ ██║ ██║██╔████╔██║██╔████╔██║███████║██╔██╗ ██║██║ ██║ ██║ ██║ ██║██║╚██╔╝██║██║╚██╔╝██║██╔══██║██║╚██╗██║██║ ██║ ╚██████╗╚██████╔╝██║ ╚═╝ ██║██║ ╚═╝ ██║██║ ██║██║ ╚████║██████╔╝ ╚═════╝ ╚═════╝ ╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═══╝╚═════╝ ██╗ ██████╗ ██████╗ ███████╗ ██║██╔═══██╗██╔══██╗██╔════╝ ██║██║ ██║██████╔╝███████╗ ██ ██║██║ ██║██╔══██╗╚════██║ ╚█████╔╝╚██████╔╝██████╔╝███████║ ╚════╝ ╚═════╝ ╚═════╝ ╚══════╝

📺 Use AI to find the best jobs for your resume and preferences

🧘🏻 A distraction-free, local-first, command line interface to scrape online jobs, and filter them to your needs

   
Using AI, Command Jobs makes sure to find only the absolute best matches for your experience, skills and job preferences Stop wasting your time with online tools that are not built for you, the job finder Command Jobs is the only job searching tool that runs from where you work, the terminal. And yes, it also doesn't make you read through hundreds of job listings just to find a couple of good matches This is just starting out! Follow along as we improve it To get started, check out [Quick Start](#quick-start), [Configuration](#configuration) and [Usage](#usage) 🙏🏼🤗❤️ Note: If you want to add another source of job listings, [go to this issue](https://github.com/nicobrenner/commandjobs/issues/23) and add it as a suggested source ## Updates * Optimized docker building and running * Added new scraper for Workday, currently scraping NVIDIA, CROWDSTRIKE, RED HAT and SALESFORCE. * The scraper currently scrapes for all countries on posts no older than a **week** back! * Building in public: * ❤️ If you want to contribute to this project and want to take a crack at writing tests for it, it would be amazing! 🤗 Here's a ticket to write a new test, and a walk-through of the current test code: [Request to create: Test displaying the resume text](https://github.com/nicobrenner/commandjobs/issues/48) 🙏🏼 * Video walkthrough, from `git clone` all the way to finding the best matches * [![Command Jobs Walkthrough](https://cdn.loom.com/sessions/thumbnails/8034361163004b3e95ada50c91da0143-with-play.gif)](https://www.loom.com/share/8034361163004b3e95ada50c91da0143) * Here's a little bit of the internals of the application. Very high level overview of the features as well as the database. If you want to see more, or would like a deeper explanation, please create an Issue, thank you * [![Command Jobs Internals](https://cdn.loom.com/sessions/thumbnails/cf1ad06f82a344f18e3e5a569857d60b-with-play.gif)](https://www.loom.com/share/cf1ad06f82a344f18e3e5a569857d60b) * Just wrote the first test! 😅 And it's in no small part thanks to Agentic's [Glide](https://glide.agenticlabs.com/task/IqHd0RV), which they recently launched ([see announcement here](https://news.ycombinator.com/item?id=39682183)). I was about to switch from ncurses to [python-prompt-toolkit](https://github.com/prompt-toolkit/python-prompt-toolkit), and failing that from python to Go, so I could build Command Jobs using [Bubble Tea](https://github.com/charmbracelet/bubbletea) 🤩😍🤤 * [![First test with Glide](https://cdn.loom.com/sessions/thumbnails/afd0733ac8dd477cbeea63c8ea6cb363-with-play.gif)](https://www.loom.com/share/afd0733ac8dd477cbeea63c8ea6cb363) * Check out the amazing [ancv](https://github.com/alexpovel/ancv), a tool for building a really cool ascii version of your resume on the terminal! 🤗 (love the joke with the Venn diagram). Will need to integrate it as a library with Command Jobs * Tried out [ShellGPT](https://github.com/mattvr/ShellGPT) and made a small PR to highlight its chat interface in the `README`. It's a pretty cool tool to use GPT from the terminal. Next I want to try coding a bit with [aider](https://github.com/paul-gauthier/aider) * [![ShellGPT](https://cdn.loom.com/sessions/thumbnails/7f415a53cb404cb0a059a9a065addce8-with-play.gif)](https://www.loom.com/share/7f415a53cb404cb0a059a9a065addce8) * Decided to try to build this project as openly as possible, in that spirit, I just recorded a coding session in which I go through the process of trying to resolve a bug ([issue #12](https://github.com/nicobrenner/commandjobs/issues/12)), and finding 3 other bugs instead! If you are just getting started with coding, it's also a pretty good overview of a basic software project management. In the video I show the whole workflow of not only writing code, but also managing an environment, dealing with errors, documenting the process in Github, managing git and branches, commiting, pushing and merging code, updating documentation (like now), and sharing/promoting * [![Trying to solve #12](https://cdn.loom.com/sessions/thumbnails/82196bfcbf0a41d58885c5b3ddc69492-with-play.gif)](https://www.loom.com/share/82196bfcbf0a41d58885c5b3ddc69492) * Thank you to the Hacker News community for the encouragement, enthusiasm and support. Check out this thread: [Show HN: Tech jobs on the command line](https://news.ycombinator.com/item?id=39621373) ## Features - View and navigate AI-matched job listings directly from the terminal !["AI job matches"](docs/commandjobs-ai-matches.png) - Scrape job listings from "Ask HN: Who's hiring?" posts on Hacker News !["Ask HN: Who's hiring?" March 2024](docs/hn-ask-hn-whos-hiring-march-5-wide-optimized.gif) - Process listings with GPT to find the best matches for you * The app asks GPT for each job listing, if it's a good fit for your resume * The prompt includes the resume, the job listing, a section for json formating the results, a role description, a job preferences section, and some additional questions * You get a filtered list of the best matches for your resume and preferences ## In the works - Track job applications directly in the terminal - Scrape job listings from additional sources - Add cronjob that runs periodically to scrape - Alerts about new matches found - Anything you'd like to see? Please add a ticket ## Usage !["Command Jobs main menu"](docs/commandjobs-main-menu.png) After going through the Configuration and successfully running Command Jobs You will get a menu with the options below. To navigate the menu, just use the arrow keys and select options with Enter. You can quit at any time by pressing `q` When first running the app, open the Edit Resume section and paste the text of your resume, no need to include your name or contact info (you can see an example resume on `config/base_resume.sample`. Alternatively, you can paste your resume text directly into a `base_resume.txt` file on the base folder of the code Then, get some job listings into the app by running Scrape "Ask HN: Who's hiring?". You can see the first few listings in the Navigate jobs in the local db section (if you want to see more, you can also open `job_listings.db` directly with sqlite3 and check out the contents) For the next step, make sure you've reviewed your `.env` file and have adapted the prompts to your preferences for job matching Once you have your Resume ready, jobs in the local db and the prompts configured, run Find best matches for resume with AI. That will run through the listings to find a match of your resume and job preferences (for now, it is limited at 5 checks per run, you can modify that through changing the `LIMIT` in the query within `fetch_job_listings()` in `src/database_manager.py`) When the GPT analysis is done, you get access to the AI found X listings match your resume option, where you can navigate the best matches found The menu includes: - **Edit Resume**: Add or replace the text of your resume for AI matching - **Scrape "Ask HN: Who's hiring?"**: Scrape job listings from Hacker News - **Navigate jobs in the local db**: Browse listings stored locally - **Find best matches for resume with AI**: Match listings to your resume using AI - **AI found X listings match your resume**: Review personalized job matches To exit the application, press `q` ## Quick Start Video walkthrough, from `git clone` all the way to finding the best matches * [![Command Jobs Walkthrough](https://cdn.loom.com/sessions/thumbnails/8034361163004b3e95ada50c91da0143-with-play.gif)](https://www.loom.com/share/8034361163004b3e95ada50c91da0143) Below is the step by step * Clone the repository: - `git clone https://github.com/nicobrenner/commandjobs.git` - `cd commandjobs` * Run via Docker 1. Build the Docker image: - `docker-compose -f docker/docker-compose.yml build` 2. Run the Docker container (make sure you've setup your OpenAI API key in your `.env` file - see [Configuration](#configuration) section below): - `docker-compose -f docker/docker-compose.yml run --rm app` * (if you don't want to use Docker) Run with Python in a Virtual Environment 1. Set up a Python virtual environment and activate it: - `python3 -m venv venv` - `source venv/bin/activate` 2. Install the dependencies: - `pip install -r config/requirements.txt` 3. Run the application (make sure you've setup your OpenAI API key in your `.env` file - see [Configuration](#configuration) section below): - `python src/menu.py` ## Configuration 1. Create a `.env` file in the root directory of the project by copying the `config/sample.env` file, and adding your OpenAI API key: `cp config/sample.env .env` edit the .env file to add your OpenAI API key ``` OPENAI_API_KEY=your_openai_api_key_here OPENAI_GPT_MODEL=gpt-4.1-turbo BASE_RESUME_PATH=base_resume.txt HN_START_URL=https://news.ycombinator.com/item?id=45438503&p=1 ... ``` Note: the above HN_START_URL is for October 2025 ### Obtaining an OpenAI API Key If you don't have an OpenAI API key, [follow these instructions](https://openai.com/blog/openai-api) to obtain one. 2. Modify the prompt so that it matches your preferences. The prompt has 5 sections: * `COMMANDJOBS_ROLE`: list the roles that you are looking for ``` COMMANDJOBS_ROLE=backend engineer, or fullstack engineer, or senior engineer, or senior tech lead, or engineering manager, or senior enginering manager, or founding engineer, or founding fullstack engineer, or something similar ``` * `COMMANDJOBS_IDEAL_JOB_QUESTIONS`: explain what is a good fit for you ``` COMMANDJOBS_IDEAL_JOB_QUESTIONS=and the company uses either Ruby, Rails, Ruby on Rails, or Python, the position doesn't require any knowledge or experience in any of the following: {job_requirement_exclusions}, the position is remote, it's for the US and the description matches the resume? (Yes or No), justify the Yes or No about the role being a good fit for the experience of the resume in one sentence. ``` * `COMMANDJOBS_EXCLUSIONS`: list things to avoid (this takes some trial and error to get right, iterating with the matches you get each time) ``` COMMANDJOBS_EXCLUSIONS=VMS (video management systems), computer vision systems, Java, C++, C#, Grails, ML, Machine Learning, PyTorch, training models ``` * `COMMANDJOBS_PROMPT`: the prompt includes all the other elements as well as the questions that we want answers about from GPT ``` COMMANDJOBS_PROMPT=Given the below job listing html, and resume text. Listing:\n{job_html}\n\nResume:\n{resume}\n\nPlease provide the following information about the listing: brief 2 sentence summary of the listing, company name, [list of available positions, with individual corresponding links if available], tech stack description, do they use rails? (Yes or No), do they use python? (Yes or No), are the positions remote (not hybrid, not onsite)? (Yes or No), are they hiring in the US? (Yes or No), how to apply to the job? (provide 1 sentence max description, include link or email address if necessary), Does the role prioritize candidates with a background in a specific industry sector (e.g., tech, finance, healthcare)?, does the job seem like a good fit for the resume (Only say Yes if the role is for {roles} {ideal_job_questions}\n\nProvide output in JSON format, use this example for reference, always with the same keys, but replace the values with the answers for the previous requests for information: \n{output_format} ``` * `COMMANDJOBS_OUTPUT_FORMAT`: this specifies the output format for the prompt, including an example to follow - it's important that the structure and fields of the format matches the questions from the prompt ``` COMMANDJOBS_OUTPUT_FORMAT="{\n \"small_summary\": \"Wine and Open Source developers for C-language systems programming\",\n \"company_name\": \"CodeWeavers\",\n \"available_positions\": [\n {\n \"position\": \"Wine and General Open Source Developers\",\n \"link\": \"https://www.codeweavers.com/about/jobs\"\n }\n ],\n \"tech_stack_description\": \"C-language systems programming\",\n \"use_rails\": \"No\",\n \"use_python\": \"No\",\n \"remote_positions\": \"Yes\",\n \"hiring_in_us\": \"Yes\",\n \"how_to_apply\": \"Apply through our website, here is the link: https://www.codeweavers.com/about/jobs\",\n \"back_ground_with_priority\": null,\n \"fit_for_resume\": \"No\",\n \"fit_justification\": \"The position is for Wine and Open Source developers, neither of which the resume has experience with. The job is remote in the US\"\n }" ``` 3. Modify the query with filters for matching jobs. In the file `src/display_matching_table.py`, the method `__init__` has a variable (`self.good_match_filters`) with the following SQL conditions: ```sql json_valid(gi.answer) = 1 AND json_extract(gi.answer, '$.fit_for_resume') = 'Yes' AND json_extract(gi.answer, '$.remote_positions') = 'Yes' AND json_extract(gi.answer, '$.hiring_in_us') <> 'No' ``` These 3 conditions represent the default criteria for filtering AI-found matches. Below is the breakdown of the 3 default requirements for a good match: 1. The AI determined the listing a good match for the resume and preferences ```sql AND json_extract(gi.answer, '$.fit_for_resume') = 'Yes' ``` 2. The role is, or can be, remote ```sql AND json_extract(gi.answer, '$.remote_positions') = 'Yes' ``` 3. The role is hiring in the US (the value can be either Yes or NULL or '', so the condition checks that the field `'$.hiring_in_us'` is not `'No'`) ```sql AND json_extract(gi.answer, '$.hiring_in_us') <> 'No' ``` Note: the database is a sqlite3 database, so you can also just open it `sqlite3 job_listings.db` and then try out a query like the one below, and then experiment to see what you find. Regardless of filtering, all the answers and prompts should be stored in the `gpt_interactions` table (checkout the latest update video about the internals): ```sql SELECT COUNT(gi.job_id) FROM gpt_interactions gi JOIN job_listings jl ON gi.job_id = jl.id WHERE json_valid(gi.answer) = 1 AND json_extract(gi.answer, '$.fit_for_resume') = 'Yes' AND json_extract(gi.answer, '$.remote_positions') = 'Yes' AND json_extract(gi.answer, '$.hiring_in_us') <> 'No' ``` You should adjust that to your preferences and you can mix and match with the questions/answers you want to get from your prompt 4. Increase the limit of listings to check per batch The option `COMMANDJOBS_LISTINGS_PER_BATCH` (which should be in your `.env` file, see `sample.env`) determines how many listings are processed each time the menu option "Find best matches with AI" is executed. If you are using the default of 10, it means that every time you run the option "Find best matches", Command Jobs will make 10 requests to `gpt`. Once you trust the app, I recommend setting the limit to 500, so that the app can process all scraped listings in one go ## Contributing Priority * ❤️ If you want to contribute to this project and want to take a crack at writing tests for it, it would be amazing! 🤗 Here's a ticket to write a new test, and a walk-through of the current test code: [Request to create: Test displaying the resume text](https://github.com/nicobrenner/commandjobs/issues/48) 🙏🏼 We welcome contributions, especially in improving scrapers and enhancing user experience. If you'd like to help, please file an issue or pull request on [our GitHub repository](https://github.com/nicobrenner/commandjobs/issues) Here's an overview of some of the internals of the app * [![Command Jobs Internals](https://cdn.loom.com/sessions/thumbnails/cf1ad06f82a344f18e3e5a569857d60b-with-play.gif)](https://www.loom.com/share/cf1ad06f82a344f18e3e5a569857d60b) ## Issues Encounter any issues? Please file them on the [project's GitHub repo](https://github.com/nicobrenner/commandjobs/issues). We appreciate your feedback and contributions to making Command Jobs better! ## License This project is open-source and available under the [Apache 2.0 License](LICENSE). ## Related projects * [ancv](https://github.com/alexpovel/ancv), get a fancy version of your resume in your terminal, very cool ================================================ FILE: config/base_resume.sample ================================================ Skills 10+ years: Ruby on Rails | Backend | Frontend | Full-stack | AWS | Postgres | Redis | CI/CD | CircleCI | Javascript | RSpec 5+ years: Docker | Python | SMS | Twilio | VOIP | SIP Experience CTO/Co-founder AutopilotReviews San Francisco / Los Angeles 10/2014 - Present · Cultivated a robust engineering culture, leading to the successful recruitment and management of a high-performing team · Pioneered the integration of Twilio + A2P10DLC for delivering millions of text messages · Spearheaded the development of a highly scalable survey SaaS product (Ruby on Rails/PostgreSQL/Redis + JavaScript) · Led and managed team to setup AWS infrastructure, CI/CD and Agile processes for development · Drove the development of multiple backend integrations using Python and Selenium Founding Engineer Padlet (YC W13) San Francisco 09/2013 - 09/2014 · Instrumental in scaling the infrastructure of a Ruby on Rails/PostgreSQL + Angular + Node/Redis stack to support over 1 million registered users and 5,000 concurrent connections · Played a key role in enhancing team capabilities through strategic recruitment and fostering a collaborative environment · Optimized Postgres performance for high-speed data processing and management, directly contributing to the platform's scalability and efficiency · Built and deployed in-house sensitive media detector, which was fundamental to Padlet’s capacity to grow CTO/Co-founder ClickFono Santiago, Chile 03/2008 - 08/2013 · Led the architectural design and server infrastructure setup, incorporating SIP/voice integrations with telecom providers to create the most advanced online SaaS phone platform in Latin America at the time · Built a REST API for voice applications, using Ruby on Rails/PostgreSQL · Typical clients were top brands in Insurance, Banking, Finance, Retail, Telecommunications Founding Engineer Needish (acquired by Groupon) Santiago, Chile 03/2007 - 01/2008 · First hire, wrote first few versions of the application using CakePHP, setup Postgres database and server infrastructure as well as testing Education UC Berkeley 2004-2005 1 year EAP program in CS / IEOR PUC, Chile 2000-2006 Double major CS and IEOR engineering degree Supervised Machine Learning, by Andrew Ng - Coursera 2017 ================================================ FILE: config/requirements.txt ================================================ beautifulsoup4==4.9.3 requests==2.25.1 openai python-dotenv windows-curses; sys_platform == 'win32' selenium==4.25.0 webdriver-manager==4.0.2 ================================================ FILE: config/sample.env ================================================ OPENAI_API_KEY=your_openai_api_key_here OPENAI_GPT_MODEL=gpt-4.1-nano BASE_RESUME_PATH=base_resume.txt HN_START_URL=https://news.ycombinator.com/item?id=45438503&p=1 COMMANDJOBS_LISTINGS_PER_BATCH=10 COMMANDJOBS_ROLE=backend engineer, or fullstack engineer, or senior engineer, or senior tech lead, or engineering manager, or senior enginering manager, or founding engineer, or founding fullstack engineer, or something similar COMMANDJOBS_IDEAL_JOB_QUESTIONS=and the company uses either Ruby, Rails, Ruby on Rails, or Python, the position doesn't require any knowledge or experience in any of the following: {job_requirement_exclusions}, the position is remote, it's for the US and the description matches the resume? (Yes or No), justify the Yes or No about the role being a good fit for the experience of the resume in one sentence. COMMANDJOBS_EXCLUSIONS=VMS (video management systems), computer vision systems, Java, C++, C#, Grails, ML, Machine Learning, PyTorch, training models COMMANDJOBS_PROMPT=Given the below job listing html, and resume text. Listing:\n{job_html}\n\nResume:\n{resume}\n\nPlease provide the following information about the listing: brief 2 sentence summary of the listing, company name, [list of available positions, with individual corresponding links if available], tech stack description, do they use rails? (Yes or No), do they use python? (Yes or No), are the positions remote (not hybrid, not onsite)? (Yes or No), are they hiring in the US? (Yes or No), how to apply to the job? (provide 1 sentence max description, include link or email address if necessary), Does the role prioritize candidates with a background in a specific industry sector (e.g., tech, finance, healthcare)?, does the job seem like a good fit for the resume (Only say Yes if the role is for {roles} {ideal_job_questions}\n\nProvide output in JSON format, use this example for reference, always with the same keys, but replace the values with the answers for the previous requests for information: \n{output_format} COMMANDJOBS_OUTPUT_FORMAT="{\n \"small_summary\": \"Wine and Open Source developers for C-language systems programming\",\n \"company_name\": \"CodeWeavers\",\n \"available_positions\": [\n {\n \"position\": \"Wine and General Open Source Developers\",\n \"link\": \"https://www.codeweavers.com/about/jobs\"\n }\n ],\n \"tech_stack_description\": \"C-language systems programming\",\n \"use_rails\": \"No\",\n \"use_python\": \"No\",\n \"remote_positions\": \"Yes\",\n \"hiring_in_us\": \"Yes\",\n \"how_to_apply\": \"Apply through our website, here is the link: https://www.codeweavers.com/about/jobs\",\n \"back_ground_with_priority\": null,\n \"fit_for_resume\": \"No\",\n \"fit_justification\": \"The position is for Wine and Open Source developers, neither of which the resume has experience with. The job is remote in the US\"\n }" ================================================ FILE: docker/Dockerfile ================================================ # docker/Dockerfile # Use your prebuilt base image FROM commandjobs-base WORKDIR /commandjobs # Copy only your actual source code COPY . /commandjobs # Default command CMD ["python3", "src/menu.py"] ================================================ FILE: docker/Dockerfile.base ================================================ # docker/Dockerfile.base FROM python:3.12 # Install system dependencies just once! RUN apt-get update && \ apt-get install -y wget unzip chromium chromium-driver && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* # Set a working directory WORKDIR /commandjobs # Install project Python dependencies (this will still cache very well) COPY config/requirements.txt /commandjobs/config/requirements.txt RUN pip3 install --no-cache-dir -r config/requirements.txt ================================================ FILE: docker/docker-compose.yml ================================================ services: base: build: context: .. dockerfile: docker/Dockerfile.base image: commandjobs-base:latest app: # Set container & image name container_name: commandjobs image: commandjobs:1.0 build: context: .. dockerfile: docker/Dockerfile depends_on: - base # Set environment variables environment: - MENU_APP=src/menu.py - PYTHONPATH=/commandjobs - TERM=xterm-256color env_file: - ../.env # Mount entire project into docker container under /repo volumes: - ..:/commandjobs # Use host network mode (may require changes depending on Docker environment) network_mode: host tty: true # Allocate a pseudo-TTY stdin_open: true # Keep STDIN open working_dir: /commandjobs entrypoint: ["sh", "/commandjobs/docker/docker-entrypoint.sh"] ================================================ FILE: docker/docker-entrypoint.sh ================================================ #!/bin/bash set -e # Exit immediately if any command fails echo "Starting the application..." echo ">>> Installing dependencies..." pip3 install -r config/requirements.txt || echo "Error, could not install requirements.txt $?" echo ">>> Running database migrations..." # Loop through every .py in src/migrations, sorted by filename for migration in src/migrations/*.py; do echo "----> Applying $(basename "$migration")" python3 "$migration" done echo ">>> Launching application..." exec python3 src/menu.py || echo "Python script exited with error code $?" echo "Application has terminated." ================================================ FILE: job_scraper/__init__.py ================================================ ================================================ FILE: job_scraper/hacker_news/__init__.py ================================================ ================================================ FILE: job_scraper/hacker_news/scraper.py ================================================ import requests from bs4 import BeautifulSoup import sqlite3 # Define a new exception for interrupting scraping class ScrapingInterrupt(Exception): pass class HNScraper: def __init__(self, db_path='job_listings.db'): self.db_path = db_path # Define the base URL for Ask HN: Who's hiring self.base_url = 'https://news.ycombinator.com/item?id=45438503&p=1' self.new_entries_count = 0 # Initialize counter for new entries def save_to_database(self, original_text, original_html, source, external_id): """Save a job listing to the SQLite database.""" from datetime import datetime conn = sqlite3.connect(self.db_path) conn.execute("PRAGMA journal_mode=WAL;") c = conn.cursor() # Get current timestamp scraped_at = datetime.now().isoformat() # Use INSERT OR IGNORE to skip existing records with the same external_id c.execute("INSERT OR IGNORE INTO job_listings (original_text, original_html, source, external_id, scraped_at) VALUES (?, ?, ?, ?, ?)", (original_text, original_html, source, external_id, scraped_at)) conn.commit() conn.close() return c.rowcount > 0 # True if the listing was inserted def scrape_hn_jobs(self, start_url, stdscr, update_func=None, done_event=None, result_queue=None): """Scrape job listings from Hacker News and save them to the database.""" url = start_url update_func(f"Scraping: {start_url}") while url: try: response = requests.get(url, timeout=10) soup = BeautifulSoup(response.text, 'html.parser') comments = soup.find_all('tr', class_='athing comtr') for comment in comments: ind_cell = comment.find('td', class_='ind') img = ind_cell.find('img') if ind_cell else None if img and img.get('width') == "0": # Top-level comment job_description = comment.find('div', class_='commtext c00') if job_description: original_text = job_description.text original_html = job_description.prettify() # Extract the external_id from the comment element comment_id = comment.get('id') external_id = f"https://news.ycombinator.com/item?id={comment_id}" source = "Hacker News" inserted = self.save_to_database(original_text, original_html, source, external_id) if inserted: # if the row was inserted self.new_entries_count += 1 # Increment the new entries count # Check for updates and interrupts if update_func: update_func(original_text[:100]) # Call the update function with truncated text if update_func: update_func(f"Scraping: {source}") more_link = soup.find('a', class_='morelink') if more_link: url = 'https://news.ycombinator.com/' + more_link['href'] if update_func: update_func(f"Page complete, loading next... {self.new_entries_count} listings added so far") else: url = None except requests.exceptions.Timeout as e: if update_func: update_func("Request timed out. Try again later.") break except requests.exceptions.RequestException as e: if update_func: update_func(f"Request failed: {str(e)}") break # Handle user interrupts except ScrapingInterrupt: if update_func: update_func(f"Scraping interrupted by user. {self.new_entries_count} new listings added") break if update_func: # Put the result into the queue result_queue.put(self.new_entries_count) if done_event: done_event.set() # Set the event to signal that scraping is done if __name__ == "__main__": db_path = 'job_listings.db' scraper = HNScraper(db_path) start_url = 'https://news.ycombinator.com/item?id=45438503&p=1' scraper.scrape_hn_jobs(start_url) ================================================ FILE: job_scraper/scraper_selectors/__init__.py ================================================ ================================================ FILE: job_scraper/scraper_selectors/workday_selectors.py ================================================ from enum import StrEnum class WorkDaySelectors(StrEnum): JOB_LISTING_XPATH = '//li[@class="css-1q2dra3"]' JOB_TITLE_XPATH = './/h3/a' JOB_ID_XPATH = './/ul[@data-automation-id="subtitle"]/li' POSTED_ON_XAPTH = './/dd[@class="css-129m7dg"][preceding-sibling::dt[contains(text(),"posted on")]]' JOB_DESCRIPTION_XPATH = '//div[@data-automation-id="jobPostingDescription"]' NEXT_PAGE_XPATH = "//button[@data-uxi-element-id='next']" ================================================ FILE: job_scraper/utils.py ================================================ def get_workday_company_urls() -> dict: urls = { 'NVIDIA': 'https://nvidia.wd5.myworkdayjobs.com/NVIDIAExternalCareerSite?jobFamilyGroup=0c40f6bd1d8f10ae43ffaefd46dc7e78', 'SALESFORCE': 'https://salesforce.wd12.myworkdayjobs.com/en-US/External_Career_Site/details/Lead-Marketing-Cloud-Solution-Engineer_JR268932?jobFamilyGroup=14fa3452ec7c1011f90d0002a2100000', 'RED_HAT': 'https://redhat.wd5.myworkdayjobs.com/Jobs', 'CROWDSTRIKE': 'https://crowdstrike.wd5.myworkdayjobs.com/crowdstrikecareers' } return urls def get_workday_post_time_range() -> list[str]: return ['posted today', 'posted yesterday', 'posted 2 days ago', 'posted 3 days ago', 'posted 4 days ago', 'posted 5 days ago', 'posted 6 days ago', 'posted 7 days ago'] ================================================ FILE: job_scraper/waas/__init__.py ================================================ ================================================ FILE: job_scraper/waas/work_startup_scraper.py ================================================ import sqlite3 import requests from bs4 import BeautifulSoup import json class ScrapingInterrupt(Exception): pass class WorkStartupScraper: def __init__(self, db_path='job_listings.db'): self.db_path = db_path # Define the base URL for Ask HN: Who's hiring self.base_url = 'https://www.workatastartup.com/jobs' self.new_entries_count = 0 # Initialize counter for new entries def get_company_links(self): response = requests.get(self.base_url) soup = BeautifulSoup(response.content, 'html.parser') company_links_set = set() company_links = [] for a in soup.select('a[target="company"]'): company_url = a['href'] if company_url not in company_links_set: company_links.append(company_url) company_links_set.add(company_url) return company_links def get_job_links(self, company_url): # Fetch the HTML content from the URL response = requests.get(company_url) soup = BeautifulSoup(response.content, 'html.parser') # Find all elements with a data-page attribute data_page_elements = soup.find_all(attrs={"data-page": True}) # Initialize a list to store matching links job_links = [] # Find the div with the data-page attribute div = soup.find('div', {'data-page': True}) if div: # Extract the JSON-like content from the data-page attribute data_page_content = div['data-page'] # Parse the JSON content data = json.loads(data_page_content) # Extract job links for job in data['props']['rawCompany']['jobs']: job_link = job['show_path'] job_links.append(job_link) return job_links def get_job_details(self, job_url): response = requests.get(job_url) soup = BeautifulSoup(response.content, 'html.parser') # Find the "About the role" section and extract content until "How you'll contribute" about_section = soup.find(string="About the role") if about_section: # Find the parent element of "About the role" about_div = about_section.find_parent('div') if about_div: # Extract content between "About the role" and "How you'll contribute" extracted_content = [] for sibling in about_div.next_siblings: if sibling.name == 'div' and sibling.find(string="How you'll contribute"): break extracted_content.append(str(sibling)) # Join the extracted content extracted_content_str = ''.join(extracted_content).strip() # Get original text and HTML original_text = BeautifulSoup(extracted_content_str, 'html.parser').get_text(strip=True) original_html = extracted_content_str # Extract external ID from job URL external_id = job_url source = "Work at a startup" return { 'original_text': original_text, 'original_html': original_html, 'source': source, 'external_id': external_id } else: print(f"No parent element found for 'About the role' in {job_url}") else: print(f"'About the role' section not found in {job_url}") return None def scrape_jobs(self, stdscr, update_func=None, done_event=None, result_queue=None): """Scrape job listings from Work at a Startup and save them to the database.""" jobs_list = [] update_func(f"Scraping: {self.base_url}") try: company_links = self.get_company_links() count = 0 flag1 = False flag2 = False flag3 = False for company_link in company_links: count += 1 job_links = self.get_job_links(company_link) for job_link in job_links: job_details = self.get_job_details(job_link) if job_details: jobs_list.append(job_details) if update_func: update_func(f"Scraping: {company_link}") # Updates the progress of the scraping if count / len(company_links)>= 0.25 and not flag1: update_func("Scraping: 25% of companies completed") flag1 = True elif count / len(company_links)>= 0.5 and not flag2: update_func("Scraping: 50% of companies completed") flag2 = True elif count / len(company_links)>= 0.75: update_func("Scraping: 75% of companies completed") flag3 = True for job in jobs_list: inserted= self.save_to_database(job['original_text'], job['original_html'], job['source'], job['external_id']) if inserted: self.new_entries_count += 1 if job==jobs_list[-1]: if done_event: result_queue.put(self.new_entries_count) done_event.set() # Set the event to signal that scraping is done except requests.exceptions.Timeout as e: if update_func: update_func("Request timed out. Try again later.") except requests.exceptions.RequestException as e: if update_func: update_func(f"Request failed: {str(e)}") # Handle user interrupts except ScrapingInterrupt: if update_func: update_func(f"Scraping interrupted by user. {self.new_entries_count} new listings added") def save_to_database(self, original_text, original_html, source, external_id): """Save a job listing to the SQLite database.""" from datetime import datetime conn = sqlite3.connect(self.db_path) conn.execute("PRAGMA journal_mode=WAL;") c = conn.cursor() # Get current timestamp scraped_at = datetime.now().isoformat() # Use INSERT OR IGNORE to skip existing records with the same external_id c.execute("INSERT OR IGNORE INTO job_listings (original_text, original_html, source, external_id, scraped_at) VALUES (?, ?, ?, ?, ?)", (original_text, original_html, source, external_id, scraped_at)) conn.commit() conn.close() return c.rowcount > 0 # True if the listing was inserted ================================================ FILE: job_scraper/workday/__init__.py ================================================ ================================================ FILE: job_scraper/workday/scraper.py ================================================ import sqlite3 import time from selenium import webdriver from selenium.common.exceptions import TimeoutException, StaleElementReferenceException from webdriver_manager.chrome import ChromeDriverManager from selenium.webdriver.chrome.service import Service from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import WebDriverWait from job_scraper.scraper_selectors.workday_selectors import WorkDaySelectors from job_scraper.utils import get_workday_post_time_range, get_workday_company_urls class WorkdayScraper: def __init__(self, db_path='job_listings.db', update_func=None, done_event=None, result_queue=None): self.db_path = db_path self.driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=self.get_selenium_configs()) self.one_week_span_text = get_workday_post_time_range() self.company_urls = get_workday_company_urls() self.new_entries_count = 0 self.done_event = done_event self.result_queue = result_queue self.update_func = update_func self.job_listings = [] @staticmethod def get_selenium_configs() -> Options: chrome_options = Options() chrome_options.add_argument("--headless") chrome_options.add_argument("--no-sandbox") chrome_options.add_argument("--disable-dev-shm-usage") chrome_options.add_argument("--disable-gpu") return chrome_options def save_to_database(self, original_text, original_html, source, external_id): from datetime import datetime conn = sqlite3.connect(self.db_path) conn.execute("PRAGMA journal_mode=WAL;") c = conn.cursor() # Get current timestamp scraped_at = datetime.now().isoformat() c.execute("INSERT OR IGNORE INTO job_listings (original_text, original_html, source, external_id, scraped_at) VALUES (?, ?, ?, ?, ?)", (original_text, original_html, source, external_id, scraped_at)) conn.commit() conn.close() return c.rowcount > 0 def save_new_job_listing(self, job_description, job_description_html, job_url, job_id): if not job_description: return if not job_description_html: return if not job_url: return if not job_id: return self.job_listings.append({ 'original_text': job_description, 'original_html': job_description_html, 'source': job_url, 'external_id': job_id }) def save_job_listings_to_db(self): for job in self.job_listings: inserted = self.save_to_database( job['original_text'], job['original_html'], job['source'], job['external_id'] ) if inserted: self.new_entries_count += 1 if self.done_event: self.result_queue.put(self.new_entries_count) self.done_event.set() def scrape(self): self.update_func(f"Scraping Workday companies:\t{", ".join(self.company_urls.keys())}") for company_name, company_url in self.company_urls.items(): self.driver.get(company_url) wait = WebDriverWait(self.driver, 10) posted_this_week = True while posted_this_week: try: wait.until(EC.presence_of_element_located((By.XPATH, WorkDaySelectors.JOB_LISTING_XPATH))) except TimeoutException: self.update_func("Job Listing Element not found. Try again later") break job_elements = self.driver.find_elements(By.XPATH, WorkDaySelectors.JOB_LISTING_XPATH) for job_element in job_elements: try: self.update_func(f"Scraping {company_name}: {self.driver.current_url}") job_title_element = job_element.find_element(By.XPATH, WorkDaySelectors.JOB_TITLE_XPATH) job_id_element = job_element.find_element(By.XPATH, WorkDaySelectors.JOB_ID_XPATH) job_id = job_id_element.text posted_on_element = job_element.find_element(By.XPATH, WorkDaySelectors.POSTED_ON_XAPTH) posted_on = posted_on_element.text if posted_on.lower() in self.one_week_span_text: job_url = job_title_element.get_attribute('href') job_title_element.click() job_description_element = wait.until( EC.presence_of_element_located((By.XPATH, WorkDaySelectors.JOB_DESCRIPTION_XPATH)) ) job_description = job_description_element.text job_description_html = job_description_element.get_attribute("innerHTML") self.save_new_job_listing(job_description, job_description_html, job_url, job_id) else: posted_this_week = False break except StaleElementReferenceException: continue if not posted_this_week: break try: next_page_button = wait.until( EC.element_to_be_clickable((By.XPATH, WorkDaySelectors.NEXT_PAGE_XPATH)) ) next_page_button.click() except TimeoutException: self.update_func("TimeoutException. Please try again later!") break self.save_job_listings_to_db() self.update_func("Scraping completed for all companies.") ================================================ FILE: src/__init__.py ================================================ ================================================ FILE: src/database_manager.py ================================================ import sqlite3 import asyncio class DatabaseManager: def __init__(self, db_path): self.conn = sqlite3.connect(db_path) self.conn.execute("PRAGMA journal_mode=WAL;") self.cursor = self.conn.cursor() self.initialize_db() def initialize_db(self): self.cursor.execute(''' CREATE TABLE IF NOT EXISTS job_listings ( id INTEGER PRIMARY KEY AUTOINCREMENT, original_text TEXT, original_html TEXT, source TEXT, external_id TEXT UNIQUE ) ''') self.conn.commit() self.cursor.execute(''' CREATE TABLE IF NOT EXISTS gpt_interactions ( id INTEGER PRIMARY KEY, job_id INTEGER, prompt TEXT, answer TEXT ) ''') self.conn.commit() def fetch_job_listings(self, listings_per_batch): # The LIMIT here is effectively throttling GPT usage # every time the AI processing runs, # it only checks {listings_per_batch} listings # 10 by default listings_per_batch = listings_per_batch or 10 query = f""" SELECT jl.id, jl.original_text, jl.original_html FROM job_listings jl LEFT JOIN gpt_interactions gi ON jl.id = gi.job_id WHERE gi.job_id IS NULL LIMIT {listings_per_batch} """ self.cursor.execute(query) return self.cursor.fetchall() def fetch_processed_listings_count(self): query = "SELECT COUNT(id) FROM gpt_interactions" self.cursor.execute(query) result = self.cursor.fetchone() # Fetch the first row of the result set if result: return result[0] # Return the first element of the tuple, which is the count else: return 0 # Return 0 if no rows are found, for safety def fetch_applied_listings_count(self): """Return the total number of listings the user has marked as applied.""" query = "SELECT COUNT(*) FROM applications WHERE status = 'Open'" self.cursor.execute(query) result = self.cursor.fetchone() return result[0] if result else 0 def save_gpt_interaction(self, job_id, prompt, answer): self.cursor.execute("INSERT INTO gpt_interactions (job_id, prompt, answer) VALUES (?, ?, ?)", (job_id, prompt, answer)) self.conn.commit() def close(self): self.conn.close() ================================================ FILE: src/display_all_jobs.py ================================================ import locale import sqlite3 import curses import textwrap import logging import json from datetime import datetime from display_applications import ApplicationsDisplay locale.setlocale(locale.LC_ALL, '') class AllJobsDisplay: def __init__(self, stdscr, db_path): self.stdscr = stdscr self.db_path = db_path self.highlighted_row_index = 0 self.current_page = 1 self.total_pages = 0 self.rows_per_page = 3 self.search_term = "" logging.basicConfig(filename='all_jobs_display.log', level=logging.DEBUG) def log(self, message): """Log a message for debugging.""" logging.debug(message) def format_scraped_date(self, scraped_at): """Format scraped_at timestamp for display.""" try: if scraped_at: # Parse the ISO timestamp and format for display dt = datetime.fromisoformat(scraped_at) return dt.strftime("%Y-%m-%d") return "Unknown" except (ValueError, TypeError): return "Unknown" def get_search_filters(self): """Build additional WHERE conditions for search filtering.""" if not self.search_term: return "" # Search in company name, summary, job description, and available positions search_conditions = [ f"lower(json_extract(gi.answer, '$.company_name')) LIKE '%{self.search_term.lower()}%'", f"lower(json_extract(gi.answer, '$.small_summary')) LIKE '%{self.search_term.lower()}%'", f"lower(jl.original_text) LIKE '%{self.search_term.lower()}%'", f"lower(json_extract(gi.answer, '$.available_positions')) LIKE '%{self.search_term.lower()}%'" ] return " AND (" + " OR ".join(search_conditions) + ")" def prompt_search(self): """Prompt user for search term and update search filters.""" max_y, max_x = self.stdscr.getmaxyx() # Create input window input_win = curses.newwin(3, max_x - 4, max_y - 5, 2) input_win.box() input_win.addstr(1, 2, f"Search (current: '{self.search_term}'): ") input_win.refresh() # Enable echo and get input curses.echo() curses.curs_set(1) # Show cursor # Get user input try: search_input = input_win.getstr(1, len(f"Search (current: '{self.search_term}'): ") + 2, 50).decode('utf-8') self.search_term = search_input.strip() except: pass # Handle any input errors finally: curses.noecho() curses.curs_set(0) # Hide cursor # Reset pagination self.current_page = 1 self.highlighted_row_index = 0 # Clear the input window input_win.clear() input_win.refresh() del input_win def fetch_total_entries(self): try: conn = sqlite3.connect(self.db_path) cur = conn.cursor() search_filters = self.get_search_filters() cur.execute(f""" SELECT COUNT(gi.job_id) FROM gpt_interactions gi JOIN job_listings jl ON gi.job_id = jl.id WHERE json_valid(gi.answer) = 1 AND (jl.discarded IS NULL OR jl.discarded = 0) AND (jl.applied IS NULL OR jl.applied = 0){search_filters} """) total_entries = cur.fetchone()[0] conn.close() return total_entries except (sqlite3.OperationalError, sqlite3.DatabaseError): return 0 def fetch_job(self, offset=None): if offset is None: offset = (self.current_page - 1) * self.rows_per_page + self.highlighted_row_index try: conn = sqlite3.connect(self.db_path) cur = conn.cursor() search_filters = self.get_search_filters() query = f""" SELECT json_extract(gi.answer, '$.company_name') AS company_name, json_extract(gi.answer, '$.available_positions') AS available_positions, json_extract(gi.answer, '$.small_summary') AS summary, json_extract(gi.answer, '$.fit_for_resume') AS fit_for_resume, json_extract(gi.answer, '$.fit_justification') AS fit_justification, gi.job_id, jl.original_text, jl.external_id, jl.scraped_at FROM gpt_interactions gi JOIN job_listings jl ON gi.job_id = jl.id WHERE json_valid(gi.answer) = 1 AND (jl.discarded IS NULL OR jl.discarded = 0) AND (jl.applied IS NULL OR jl.applied = 0){search_filters} ORDER BY jl.scraped_at DESC, jl.id DESC LIMIT 1 OFFSET {offset} """ self.log(f"Executing query: {query}") # Log the query cur.execute(query) data = cur.fetchone() self.log(f"Fetched 1 row") # Log the number of results conn.close() return data except (sqlite3.OperationalError, sqlite3.DatabaseError): return None def fetch_data(self, page_num): offset = (page_num - 1) * self.rows_per_page try: conn = sqlite3.connect(self.db_path) cur = conn.cursor() search_filters = self.get_search_filters() query = f""" SELECT json_extract(gi.answer, '$.company_name') AS company_name, json_extract(gi.answer, '$.available_positions') AS available_positions, json_extract(gi.answer, '$.small_summary') AS summary, json_extract(gi.answer, '$.fit_for_resume') AS fit_for_resume, json_extract(gi.answer, '$.fit_justification') AS fit_justification, gi.job_id, jl.original_text, jl.external_id, jl.scraped_at FROM gpt_interactions gi JOIN job_listings jl ON gi.job_id = jl.id WHERE json_valid(gi.answer) = 1 AND (jl.discarded IS NULL OR jl.discarded = 0) AND (jl.applied IS NULL OR jl.applied = 0){search_filters} ORDER BY jl.scraped_at DESC, jl.id DESC LIMIT {self.rows_per_page} OFFSET {offset} """ self.log(f"Executing query: {query}") # Log the query cur.execute(query) data = cur.fetchall() self.log(f"Fetched {len(data)} rows") # Log the number of results conn.close() return data except (sqlite3.OperationalError, sqlite3.DatabaseError): return None def draw_page(self, current_page): max_y, max_x = self.stdscr.getmaxyx() data = self.fetch_data(page_num=current_page) # Column widths for processed jobs column_widths = { "Company": 15, "Position": 25, "Summary": 50, "Why?": 30 # Fit justification } self.stdscr.clear() header = " ".join(title.center(column_widths[title]) for title in column_widths.keys()) self.stdscr.attron(curses.color_pair(4)) self.stdscr.addstr(0, 0, header) self.stdscr.attroff(curses.color_pair(4)) y_offset = 2 # Start below the header for idx, listing in enumerate(data): if idx == self.highlighted_row_index: self.stdscr.attron(curses.color_pair(3)) max_height_wrapped_text = 1 for i, key in enumerate(column_widths.keys()): if key == "Company": field = listing[0] # company_name from AI analysis elif key == "Position": # Parse JSON positions and extract titles try: positions = json.loads(listing[1]) or [] titles = [pos.get("position") for pos in positions if isinstance(pos.get("position"), str)] field = ", ".join(titles) if titles else "Various" except (json.JSONDecodeError, TypeError): field = "Various" elif key == "Summary": field = listing[2] # small_summary from AI analysis elif key == "Why?": # Show fit status and brief justification fit_status = listing[3] if listing[3] else "Unknown" justification = listing[4] if listing[4] else "" field = f"{fit_status}: {justification[:100]}" if justification else fit_status width = column_widths[key] # For the 'Company' column, add scraped date underneath if key == "Company": scraped_at = listing[8] if len(listing) > 8 else None formatted_date = self.format_scraped_date(scraped_at) field = f"{field}\n({formatted_date})" # This part takes a field content and wraps it in width wrapped_text = textwrap.wrap(str(field), width=width) for j, line in enumerate(wrapped_text): line_pos = sum(column_widths[title] for title in list(column_widths.keys())[:i]) + i * 3 if line_pos + width <= max_x and y_offset + j < max_y - 1: self.stdscr.addstr(y_offset + j, line_pos, line.ljust(width)) if j > max_height_wrapped_text: max_height_wrapped_text = j y_offset += max_height_wrapped_text + 2 if y_offset >= max_y - 3: # Check if we've reached the end of the screen break # Stop drawing if there's no more space on the screen if idx == self.highlighted_row_index: self.stdscr.attroff(curses.color_pair(3)) # --- footer line: pagination + controls --- footer_y = max_y - 2 # 1) Draw pagination (flush-left) search_status = f" (filtered: '{self.search_term}')" if self.search_term else "" pagination = f"Page {self.current_page} of {self.total_pages} ({self.total_entries} job listings{search_status} 📋)" self.stdscr.attron(curses.color_pair(5)) self.stdscr.addstr(footer_y, 0, pagination.ljust(max_x)) self.stdscr.attroff(curses.color_pair(5)) # 2) Prepare controls text controls_text = "[↑↓] Move [←→ ] Page [Enter] View [d] Discard [a] Apply [s] Search [c] Clear [q] Back" # 3) Clear the next line so no overlap self.stdscr.move(footer_y + 1, 0) self.stdscr.clrtoeol() # 4) Draw controls (same left alignment) self.stdscr.attron(curses.color_pair(7)) self.stdscr.addstr(footer_y + 1, 0, controls_text[: max_x - 1]) self.stdscr.attroff(curses.color_pair(7)) self.stdscr.refresh() def draw_table(self): self.total_entries = self.fetch_total_entries() self.total_pages = (self.total_entries + self.rows_per_page - 1) // self.rows_per_page self.draw_page(self.current_page) while True: key = self.stdscr.getch() if key == curses.KEY_DOWN: self.highlighted_row_index = min(self.highlighted_row_index + 1, self.rows_per_page - 1) self.draw_page(self.current_page) elif key == curses.KEY_UP: self.highlighted_row_index = max(0, self.highlighted_row_index - 1) self.draw_page(self.current_page) elif key == curses.KEY_RIGHT: if self.current_page < self.total_pages: self.current_page += 1 self.highlighted_row_index = 0 # Reset highlighted row for the new page self.draw_page(self.current_page) elif key == curses.KEY_LEFT: if self.current_page > 1: self.current_page -= 1 self.highlighted_row_index = 0 # Reset highlighted row for the new page self.draw_page(self.current_page) elif key in [curses.KEY_ENTER, 10, 13]: self.show_job_detail(self.highlighted_row_index + (self.current_page - 1) * self.rows_per_page) self.draw_page(self.current_page) # Redraw the table after returning from the detail view elif key == ord('d'): # Discard current job job = self.fetch_job(self.highlighted_row_index + (self.current_page - 1) * self.rows_per_page) if job: self.discard_listing(job[5]) # job[5] = job_id self.total_entries = self.fetch_total_entries() self.total_pages = (self.total_entries + self.rows_per_page - 1) // self.rows_per_page self.draw_page(self.current_page) elif key == ord('s'): # Search functionality self.prompt_search() self.total_entries = self.fetch_total_entries() self.total_pages = (self.total_entries + self.rows_per_page - 1) // self.rows_per_page self.draw_page(self.current_page) elif key == ord('c'): # Clear search if self.search_term: self.search_term = "" self.current_page = 1 self.highlighted_row_index = 0 self.total_entries = self.fetch_total_entries() self.total_pages = (self.total_entries + self.rows_per_page - 1) // self.rows_per_page self.draw_page(self.current_page) elif key == ord('q'): break # Exit the table view elif key == ord('a'): # Apply to current job job = self.fetch_job(self.highlighted_row_index + (self.current_page - 1) * self.rows_per_page) if job: self.apply_to_listing(job[5]) # job[5] = job_id # Show post-apply dialog choice = self.show_post_apply_dialog() if choice == 'a': # Go to applications view apps = ApplicationsDisplay(self.stdscr, self.db_path) apps.draw_board() return # If 'q', just return to table view self.total_entries = self.fetch_total_entries() self.total_pages = (self.total_entries + self.rows_per_page - 1) // self.rows_per_page self.draw_page(self.current_page) def discard_listing(self, job_id): try: conn = sqlite3.connect(self.db_path) cur = conn.cursor() cur.execute("UPDATE job_listings SET discarded = 1 WHERE id = ?", (job_id,)) conn.commit() conn.close() self.log(f"Discarded job {job_id}") except Exception as e: self.log(f"Error discarding job {job_id}: {e}") def apply_to_listing(self, job_id): try: conn = sqlite3.connect(self.db_path) conn.execute("PRAGMA journal_mode=WAL;") cur = conn.cursor() # 1) mark the listing itself as applied from datetime import date today = date.today().isoformat() # e.g. "2025-05-14" cur.execute(""" UPDATE job_listings SET applied = 1, applied_date = ? WHERE id = ? """, (today, job_id)) # 2) upsert into applications cur.execute("SELECT id FROM applications WHERE job_id = ?", (job_id,)) row = cur.fetchone() if row: application_id = row[0] cur.execute(""" UPDATE applications SET status = 'Open', created_at = ?, updated_at = ? WHERE id = ? """, (today, today, application_id)) else: cur.execute(""" INSERT INTO applications (job_id, status, created_at, updated_at) VALUES (?, 'Open', ?, ?) """, (job_id, today, today)) conn.commit() conn.close() self.log(f"Applied to job {job_id} (and created application record)") except Exception as e: self.log(f"Error marking job {job_id} as applied: {e}") def show_job_detail(self, job_index): self.total_entries = self.fetch_total_entries() # Get total number of entries for cycling # Enter a loop to allow cycling through job details while True: job = self.fetch_job(job_index) # Fetch job details if not job: return # If no job is found, simply return if job: self.stdscr.clear() # Screen dimensions max_y, max_x = self.stdscr.getmaxyx() # Set maximum content width content_width = min(76, max_x) start_col = max(0, (max_x - content_width) // 2) # Calculate start position for centered text y_offset = 1 # Start from the second row for better visibility # Show: Company, Position, Summary, Why it's a fit, Job Description, External Link company_name = job[0] positions_json = job[1] try: positions = json.loads(positions_json) or [] position_titles = [pos.get("position") for pos in positions if isinstance(pos.get("position"), str)] position_text = ", ".join(position_titles) if position_titles else "Various" except (json.JSONDecodeError, TypeError): position_text = "Various" details = [company_name, position_text, job[2], job[4], job[7], job[6]] # company, positions, summary, fit_justification, external_id, original_text headers = ["Company", "Position", "Summary", "Why it's a good fit", "External Link", "Job Description"] for idx, detail in enumerate(details): self.log(f'{idx} {detail}') header = headers[idx] # Calculate the position for left-aligned headers within the content area header_lines = textwrap.wrap(header, content_width) # Header with background self.stdscr.attron(curses.color_pair(4)) header_start_col = max(0, start_col - 2) # Ensure we don't go negative header_line = f' {header_lines[0]} ' if y_offset < max_y - 1 and header_start_col + len(header_line) < max_x: self.stdscr.addstr(y_offset, header_start_col, header_line) header_width = len(header_line) y_offset += 1 self.stdscr.attroff(curses.color_pair(4)) y_offset += 1 # Detail text - special handling for job description to prevent overflow detail_text = detail if detail is not None else "" if header == "External Link": # Display external link with underline formatting, consistent with other sections detail_lines = textwrap.wrap(detail_text, content_width) for line in detail_lines: if y_offset < max_y - 3: # Check to avoid writing beyond the screen detail_start_col = max(start_col, (max_x - len(line)) // 2) # Center detail text self.stdscr.addstr(y_offset, detail_start_col, line, curses.A_UNDERLINE) y_offset += 1 else: break elif header == "Job Description": # Limit job description to prevent screen overflow remaining_lines = max_y - y_offset - 4 # Leave space for controls detail_lines = textwrap.wrap(detail_text, content_width) # Show only as many lines as will fit, with truncation indicator if len(detail_lines) > remaining_lines: display_lines = detail_lines[:remaining_lines-1] display_lines.append("... [See full description on External Link above]") else: display_lines = detail_lines for line in display_lines: if y_offset < max_y - 3: # Leave more space for controls detail_start_col = max(start_col, (max_x - len(line)) // 2) # Center detail text self.stdscr.addstr(y_offset, detail_start_col, line) y_offset += 1 else: break else: # Normal processing for other sections detail_lines = textwrap.wrap(detail_text, content_width) for line in detail_lines: if y_offset < max_y - 3: # Check to avoid writing beyond the screen detail_start_col = max(start_col, (max_x - len(line)) // 2) # Center detail text self.stdscr.addstr(y_offset, detail_start_col, line) y_offset += 1 else: break y_offset += 1 # Extra space between sections self.stdscr.refresh() # Ensure getch() waits for input by disabling nodelay mode self.stdscr.nodelay(False) while True: # Draw control hints at the bottom center controls = "[← ] Prev [→ ] Next [q] Back [a] Apply" self.stdscr.attron(curses.color_pair(7)) control_x = max(0, (max_x - len(controls)) // 2) control_y = max_y - 2 if control_y >= 0 and control_x + len(controls) < max_x: self.stdscr.addstr(control_y, control_x, controls) self.stdscr.attroff(curses.color_pair(7)) self.stdscr.refresh() ch = self.stdscr.getch() if ch == ord('q'): return # Quit the detail view elif ch == curses.KEY_LEFT: job_index = (job_index - 1) % self.total_entries # Move to the previous job or wrap around break # Break the inner loop to refresh the job detail view with the new index elif ch == curses.KEY_RIGHT: job_index = (job_index + 1) % self.total_entries # Move to the next job or wrap around break # Break the inner loop to refresh the job detail view with the new index elif ch == ord('a'): # Apply directly from detail view job_id = job[5] self.apply_to_listing(job_id) # Show post-apply dialog choice = self.show_post_apply_dialog() if choice == 'a': # Go to applications view apps = ApplicationsDisplay(self.stdscr, self.db_path) apps.draw_board() return # If 'q', just return to detail view break def show_post_apply_dialog(self): """ Display a centered dialog offering [q] Keep browsing or [a] Go to applications. Returns 'q' or 'a'. """ max_y, max_x = self.stdscr.getmaxyx() text = "[q] Keep browsing [a] Go to applications?" width = len(text) + 4 height = 3 start_y = (max_y - height) // 2 start_x = (max_x - width) // 2 win = curses.newwin(height, width, start_y, start_x) win.box() win.attron(curses.color_pair(7)) win.addstr(1, 2, text) win.attroff(curses.color_pair(7)) win.refresh() # Immediately listen for q or a (no Enter required) while True: ch = win.getch() if ch in (ord('q'), ord('a')): break # Clear dialog and refresh underlying screen win.clear() self.stdscr.touchwin() self.stdscr.refresh() return chr(ch) ================================================ FILE: src/display_applications.py ================================================ import locale import curses import base64 import sys import sqlite3 import textwrap import json import datetime # make sure we’re in a UTF-8 locale so curses can handle wide chars: locale.setlocale(locale.LC_ALL, '') class ApplicationsDisplay: def __init__(self, stdscr, db_path): self.stdscr = stdscr self.db_path = db_path self.cursor = 0 # Pane state self.active_pane = 'applications' # or 'notes' self.note_cursor = 0 # Data self.applications = [] # (application_id, job_id, company, applied_date, status) self.notes = [] # (id, note, created_at) self.job_detail = None self.show_finalized_only = False def fetch_applications(self): """ Load self.applications = [ (application_id, job_id, company_name, applied_date, status, last_activity), ... ], ordered by last_activity DESC. """ conn = sqlite3.connect(self.db_path) conn.execute("PRAGMA journal_mode=WAL;") cur = conn.cursor() base_query = """ SELECT a.id AS application_id, a.job_id AS job_id, json_extract(gi.answer, '$.company_name') AS company_name, a.created_at AS applied_date, a.status AS status, COALESCE( ( SELECT MAX(created_at) FROM application_notes WHERE application_id = a.id ), a.created_at ) AS last_activity FROM applications AS a JOIN gpt_interactions AS gi ON gi.job_id = a.job_id """ if not self.show_finalized_only: base_query += " WHERE a.status = 'Open'" else: base_query += " WHERE a.status <> 'Open'" base_query += " ORDER BY last_activity DESC" cur.execute(base_query) self.applications = cur.fetchall() conn.close() def fetch_notes(self, application_id): """ Load self.notes = [(id, note, created_at), ...] for the given application_id. """ conn = sqlite3.connect(self.db_path) cur = conn.cursor() cur.execute( "SELECT id, note, created_at FROM application_notes WHERE application_id = ? ORDER BY created_at DESC", (application_id,) ) self.notes = cur.fetchall() conn.close() # clamp cursor if self.note_cursor >= len(self.notes): self.note_cursor = max(0, len(self.notes) - 1) def fetch_job_detail(self, job_id): """ Return a dict with positions_list, Summary, How to Apply, and Listing Link. """ conn = sqlite3.connect(self.db_path) conn.execute("PRAGMA journal_mode=WAL;") cur = conn.cursor() cur.execute( """ SELECT json_extract(gi.answer, '$.available_positions'), json_extract(gi.answer, '$.small_summary'), json_extract(gi.answer, '$.how_to_apply'), jl.external_id FROM gpt_interactions gi JOIN job_listings jl ON gi.job_id = jl.id WHERE jl.id = ? """, (job_id,) ) row = cur.fetchone() conn.close() detail = {"positions_list": [], "Summary": "", "How to Apply": "", "Listing Link": ""} if not row: return detail raw_positions, summary, apply, link = row try: detail["positions_list"] = json.loads(raw_positions) or [] except Exception: detail["positions_list"] = [] detail["Summary"] = summary or "" detail["How to Apply"] = apply or "" detail["Listing Link"] = link or "" return detail def delete_note(self, note_id): """ Prompt for confirmation, delete if confirmed. """ h, w = self.stdscr.getmaxyx() prompt = "Delete this note? [y/N]: " self.stdscr.attron(curses.color_pair(5)) self.stdscr.addstr(h - 3, 2, prompt) self.stdscr.attroff(curses.color_pair(5)) self.stdscr.refresh() curses.echo() choice = self.stdscr.getstr(h - 3, 3 + len(prompt), 1).decode('utf-8').lower() curses.noecho() # clear prompt line self.stdscr.move(h - 3, 0) self.stdscr.clrtoeol() self.stdscr.refresh() if choice == 'y': conn = sqlite3.connect(self.db_path) cur = conn.cursor() cur.execute("DELETE FROM application_notes WHERE id = ?", (note_id,)) conn.commit() conn.close() def add_note(self, application_id, job_id): """ Multi-line note entry with visible cursor. Save with Ctrl-G, cancel with Ctrl-D. """ # ─── show cursor ──────────────────────────────────────────── curses.curs_set(1) # ─── compute box dims ─────────────────────────────────────── h, w = self.stdscr.getmaxyx() box_h, box_w = h - 6, w - 8 start_y, start_x = 3, 4 # ─── draw outer border & title ───────────────────────────── win = curses.newwin(box_h, box_w, start_y, start_x) win.keypad(True) win.box() win.addstr(0, 2, " Enter note ([Ctrl-G] Save / [Ctrl-D] Cancel) ") win.refresh() # ─── create a pad big enough for huge pastes ─────────────── pad_h = max(10000, box_h * 20) # allow up to 2048 columns before running out of space pad_w = max(2048, box_w - 2) pad = curses.newpad(pad_h, pad_w) pad.scrollok(True) pad.idlok(True) pad.keypad(True) pad_row = 0 cur_y, cur_x = 0, 0 saved = False # ─── edit loop ───────────────────────────────────────────── while True: # redraw border & title win.box() win.addstr(0, 2, " Enter note ([Ctrl-G] Save / [Ctrl-D] Cancel) ") win.refresh() # show the pad slice pad.refresh( pad_row, 0, start_y + 1, start_x + 1, start_y + box_h - 2, start_x + box_w - 2 ) # use get_wch to receive wide characters properly: try: ch = pad.get_wch() except curses.error: continue # Ctrl-G → save, Ctrl-D → cancel if ch == '\x07': saved = True break if ch == '\x04': saved = False break # ── printable wide‐char (string) ───────────────────────── if isinstance(ch, str): if ch == '\n': cur_y += 1 cur_x = 0 else: try: pad.addstr(cur_y, cur_x, ch) except curses.error: pass cur_x += 1 else: # it's an integer key‐code: arrows, backspace, etc. if ch == curses.KEY_UP and pad_row > 0: pad_row -= 1 elif ch == curses.KEY_DOWN and cur_y - pad_row >= box_h - 2: pad_row += 1 elif ch == curses.KEY_LEFT and cur_x > 0: cur_x -= 1 elif ch == curses.KEY_RIGHT: cur_x += 1 elif ch in (curses.KEY_BACKSPACE, 127): if cur_x > 0: cur_x -= 1 try: pad.delch(cur_y, cur_x) except curses.error: pass elif cur_y > 0: cur_y -= 1 line = pad.instr(cur_y, 0, pad_w).decode('utf-8').rstrip('\x00') cur_x = len(line) # ignore other int‐codes # ─── auto-scroll vertically ─────────────────────────────── if cur_y - pad_row > box_h - 3: pad_row = cur_y - (box_h - 3) elif cur_y < pad_row: pad_row = cur_y pad_row = max(0, min(pad_row, pad_h - (box_h - 2))) # ─── move the hardware cursor ───────────────────────────── real_y = start_y + 1 + (cur_y - pad_row) real_x = start_x + 1 + cur_x curses.setsyx(real_y, real_x) curses.doupdate() # ─── hide cursor & clear any leftover input ──────────────── curses.curs_set(0) # try the built-in flush… curses.flushinp() # …and as a fallback, nodelay‐drain stdscr self.stdscr.nodelay(True) while True: if self.stdscr.getch() == curses.ERR: break self.stdscr.nodelay(False) if not saved: return # cancelled # ─── grab all lines from the pad ─────────────────────────── lines = [] for y in range(cur_y + 1): raw = pad.instr(y, 0, pad_w).decode('utf-8').rstrip('\x00') lines.append(raw.rstrip()) note_text = "\n".join(lines).strip() if not note_text: return # ─── persist to DB ──────────────────────────────────────── conn = sqlite3.connect(self.db_path) conn.execute("PRAGMA journal_mode=WAL;") cur = conn.cursor() if application_id is None: now = datetime.datetime.utcnow().isoformat() cur.execute( "INSERT INTO applications (job_id, status, created_at, updated_at) " "VALUES (?, 'Open', ?, ?)", (job_id, now, now), ) application_id = cur.lastrowid cur.execute( "INSERT INTO application_notes (application_id, note) VALUES (?, ?)", (application_id, note_text) ) conn.commit() conn.close() def view_note(self, note_text): """ Display a read-only note with a one-cell margin inside the box. Ctrl-K copies to clipboard. """ full_text = note_text curses.curs_set(0) h, w = self.stdscr.getmaxyx() box_h, box_w = h - 6, w - 8 start_y, start_x = 3, 4 # outer frame win = curses.newwin(box_h, box_w, start_y, start_x) win.keypad(True) # —— prepare wrapped lines using the *inner* width (box_w - 4) —— inner_w = box_w - 4 # leave one‐cell on left + right lines = [] for paragraph in note_text.split('\n'): wrapped = textwrap.wrap(paragraph, inner_w) lines.extend(wrapped if wrapped else ['']) pad_h = max(len(lines), box_h - 4) pad = curses.newpad(pad_h, inner_w) for idx, line in enumerate(lines): try: pad.addnstr(idx, 0, line, inner_w) except curses.error: pass pad_pos = 0 title = " View note: [↑↓] Scroll | [q/Esc] Close | [k] Copy to clipboard " # compute the *inner* viewport coordinates top = start_y + 1 left = start_x + 1 bottom = start_y + box_h - 2 right = start_x + box_w - 2 while True: # redraw frame and title win.erase() win.box() win.addstr(0, 2, title) win.refresh() # refresh the pad inside the 1-cell margin pad.refresh(pad_pos, 0, top + 1, # shift down one for margin left + 1, # shift right one for margin bottom - 1, # shift up one for margin right - 1) # shift left one for margin ch = win.getch() if ch in (ord('q'), 27): break elif ch == curses.KEY_UP and pad_pos > 0: pad_pos -= 1 elif ch == curses.KEY_DOWN and pad_pos < len(lines) - (box_h - 4): pad_pos += 1 elif ch == ord('k'): # lowercase “k” to copy try: # 1) base64-encode the full text b64 = base64.b64encode(full_text.encode('utf-8')).decode('ascii') # 2) build OSC52 sequence (c = clipboard) seq = f"\033]52;c;{b64}\a" # 3) temporarily end curses so we can write raw escapes curses.def_prog_mode() curses.endwin() # 4) send the sequence to the terminal sys.stdout.write(seq) sys.stdout.flush() # 5) resume curses curses.reset_prog_mode() curses.doupdate() curses.curs_set(0) # 6) flash confirmation on the last interior line h, w = self.stdscr.getmaxyx() prompt_row = h - 3 msg = "Copied note to clipboard" self.stdscr.attron(curses.color_pair(5)) self.stdscr.addstr(prompt_row, 5, msg) self.stdscr.attroff(curses.color_pair(5)) self.stdscr.refresh() curses.napms(1000) # clear that prompt line self.stdscr.move(prompt_row, 0) self.stdscr.clrtoeol() self.stdscr.refresh() except Exception: # (if something really weird happens) msg = "Copy failed" win.addstr(box_h - 2, 2, msg, curses.A_BOLD) win.refresh() curses.napms(1000) win.addstr(box_h - 2, 2, " " * len(msg)) win.refresh() # when we break out, draw_board will redraw the main screen def finalize(self, application_id, job_id): # unchanged curses.echo() prompt_row = curses.LINES - 4 self.stdscr.attron(curses.color_pair(5)) prompt_txt = " 👉 Finalize reason ([h] Hired / [r] Rejected / [a] Abandoned / [k] Keep Open):" self.stdscr.addstr(prompt_row, 0, prompt_txt) self.stdscr.attroff(curses.color_pair(5)) choice = self.stdscr.getkey().lower() curses.noecho() mapping = {'h': 'Hired', 'r': 'Rejected', 'a': 'Abandoned'} if choice not in mapping: return status = mapping[choice] now = datetime.datetime.now().isoformat(sep=' ', timespec='seconds') conn = sqlite3.connect(self.db_path) conn.execute("PRAGMA journal_mode=WAL;") cur = conn.cursor() if application_id is not None: cur.execute("UPDATE applications SET status=?,updated_at=? WHERE id=?", (status, now, application_id)) else: cur.execute( "INSERT INTO applications (job_id,status,created_at,updated_at) VALUES (?,?,?,?)", (job_id, status, now, now) ) application_id = cur.lastrowid cur.execute( "INSERT INTO application_notes (application_id,note) VALUES (?,?)", (application_id, f"FINALIZED: {status}") ) conn.commit() conn.close() # Refresh applications list and adjust cursor self.fetch_applications() if self.cursor >= len(self.applications) and len(self.applications) > 0: self.cursor = len(self.applications) - 1 elif len(self.applications) == 0: self.cursor = 0 def draw_board(self): while True: self.stdscr.clear() h, w = self.stdscr.getmaxyx() left_w, mid_w = w // 4, w // 3 right_w = w - left_w - mid_w - 4 # Header titles = ["Company".center(left_w), "Notes".center(mid_w), "Details".center(right_w)] header_line = " ".join(titles) self.stdscr.attron(curses.color_pair(4)) self.stdscr.addstr(0, 0, header_line[:w]) self.stdscr.attroff(curses.color_pair(4)) base_y = 2 # Load data self.fetch_applications() # Left pane for idx, (app_id, job_id, company, adate, status, last_activity) in enumerate(self.applications): y = base_y + idx label = f" {company} ({adate.split(' ')[0]}) [{status[0]}] " attr = curses.A_REVERSE if self.active_pane == 'applications' and idx == self.cursor else curses.A_NORMAL self.stdscr.addnstr(y, 0, label, left_w - 1, attr) # Middle pane (Notes) if self.applications and self.cursor < len(self.applications): application_id, job_id, *_ = self.applications[self.cursor] self.fetch_notes(application_id) note_y = base_y for idx, (nid, note, ts) in enumerate(self.notes): display = f"{ts.split(' ')[0]}: {note.replace('\n',' ')[:mid_w-6]}" y = note_y + idx if y < h - 4: attr = curses.A_REVERSE if self.active_pane == 'notes' and idx == self.note_cursor else curses.A_NORMAL self.stdscr.addnstr(y, left_w + 2, display, mid_w - 2, attr) hint = "[n] add note" if self.active_pane == 'notes': hint += " [d] delete note [Enter] view note" self.stdscr.addstr(min(h - 5, note_y + len(self.notes) + 1), left_w + 2, hint, curses.A_DIM) # Right pane (Details) x0, y0 = left_w + mid_w + 4, base_y detail = self.fetch_job_detail(job_id) # Available Positions self.stdscr.addstr(y0, x0, "Available Positions:", curses.A_BOLD) y0 += 1 for p in detail["positions_list"]: for wrapped in textwrap.wrap(f"{p.get('position','')} — {p.get('link','')}", right_w - 1): if y0 < h - 4: self.stdscr.addstr(y0, x0, wrapped) y0 += 1 y0 += 1 # Summary self.stdscr.addstr(y0, x0, "Summary:", curses.A_BOLD) y0 += 1 for wrapped in textwrap.wrap(detail["Summary"], right_w - 1): if y0 < h - 4: self.stdscr.addstr(y0, x0, wrapped) y0 += 1 y0 += 1 # How to Apply self.stdscr.addstr(y0, x0, "How to Apply:", curses.A_BOLD) y0 += 1 for wrapped in textwrap.wrap(detail["How to Apply"], right_w - 1): if y0 < h - 4: self.stdscr.addstr(y0, x0, wrapped) y0 += 1 y0 += 1 # Listing Link self.stdscr.addstr(y0, x0, "Listing Link:", curses.A_BOLD) y0 += 1 for wrapped in textwrap.wrap(detail["Listing Link"], right_w - 1): if y0 < h - 4: self.stdscr.addstr(y0, x0, wrapped) y0 += 1 # Help line help_txt = "[←→ ] Switch pane [↑↓] Move [space] Toggle Finalized [n] Note [f] Finalize [q] Back" sx = max(0, (w - len(help_txt)) // 2) self.stdscr.attron(curses.color_pair(7)) self.stdscr.addnstr(h - 2, sx, help_txt, len(help_txt)) self.stdscr.attroff(curses.color_pair(7)) self.stdscr.refresh() # Key handling c = self.stdscr.getch() # Pane switching if c == curses.KEY_RIGHT and self.applications: self.active_pane = 'notes' if self.active_pane == 'applications' else 'applications' elif c == curses.KEY_LEFT: self.active_pane = 'applications' # Within applications pane elif self.active_pane == 'applications': if c == curses.KEY_UP and self.cursor > 0: self.cursor -= 1 self.note_cursor = 0 elif c == curses.KEY_DOWN and self.cursor < len(self.applications) - 1: self.cursor += 1 self.note_cursor = 0 elif c == ord(' '): self.show_finalized_only = not self.show_finalized_only self.cursor = 0 elif c == ord('n'): self.add_note(*self.applications[self.cursor][:2]) elif c == ord('f'): self.finalize(*self.applications[self.cursor][:2]) elif c in (ord('q'), 27): break # Within notes pane elif self.active_pane == 'notes': if c == curses.KEY_UP and self.note_cursor > 0: self.note_cursor -= 1 elif c == curses.KEY_DOWN and self.note_cursor < len(self.notes) - 1: self.note_cursor += 1 elif c == ord('d') and self.notes: nid = self.notes[self.note_cursor][0]; self.delete_note(nid) elif c in (ord('\n'), curses.KEY_ENTER) and self.notes: note_text = self.notes[self.note_cursor][1] self.view_note(note_text) elif c == ord('n'): self.add_note(*self.applications[self.cursor][:2]) elif c == ord('f'): self.finalize(*self.applications[self.cursor][:2]) elif c in (ord('q'), 27): self.active_pane = 'applications' ================================================ FILE: src/display_matching_table.py ================================================ import locale import sqlite3 import curses import textwrap import logging import json from datetime import date, datetime from display_applications import ApplicationsDisplay locale.setlocale(locale.LC_ALL, '') class MatchingTableDisplay: def __init__(self, stdscr, db_path): self.stdscr = stdscr self.db_path = db_path self.highlighted_row_index = 0 self.current_page = 1 self.total_pages = 0 self.rows_per_page = 3 self.search_term = "" logging.basicConfig(filename='matching_table_display.log', level=logging.DEBUG) self.good_match_filters = ''' json_valid(gi.answer) = 1 AND json_extract(gi.answer, '$.fit_for_resume') = 'Yes' AND json_extract(gi.answer, '$.remote_positions') = 'Yes' AND json_extract(gi.answer, '$.hiring_in_us') <> 'No' AND (jl.discarded IS NULL OR jl.discarded = 0) AND (jl.applied IS NULL OR jl.applied = 0) ''' def log(self, message): """Log a message for debugging.""" logging.debug(message) def format_scraped_date(self, scraped_at): """Format scraped_at timestamp for display.""" try: if scraped_at: # Parse the ISO timestamp and format for display dt = datetime.fromisoformat(scraped_at) return dt.strftime("%Y-%m-%d") return "Unknown" except (ValueError, TypeError): return "Unknown" def get_search_filters(self): """Build additional WHERE conditions for search filtering.""" if not self.search_term: return "" # Search in company name, summary, job description, and available positions search_conditions = [ f"lower(json_extract(gi.answer, '$.company_name')) LIKE '%{self.search_term.lower()}%'", f"lower(json_extract(gi.answer, '$.small_summary')) LIKE '%{self.search_term.lower()}%'", f"lower(jl.original_text) LIKE '%{self.search_term.lower()}%'", f"lower(json_extract(gi.answer, '$.available_positions')) LIKE '%{self.search_term.lower()}%'" ] return " AND (" + " OR ".join(search_conditions) + ")" def prompt_search(self): """Prompt user for search term and update search filters.""" max_y, max_x = self.stdscr.getmaxyx() # Create input window input_win = curses.newwin(3, max_x - 4, max_y - 5, 2) input_win.box() input_win.addstr(1, 2, f"Search (current: '{self.search_term}'): ") input_win.refresh() # Enable echo and get input curses.echo() curses.curs_set(1) # Show cursor # Get user input try: search_input = input_win.getstr(1, len(f"Search (current: '{self.search_term}'): ") + 2, 50).decode('utf-8') self.search_term = search_input.strip() except: pass # Handle any input errors finally: curses.noecho() curses.curs_set(0) # Hide cursor # Reset pagination self.current_page = 1 self.highlighted_row_index = 0 # Clear the input window input_win.clear() input_win.refresh() del input_win def fetch_total_entries(self): try: conn = sqlite3.connect(self.db_path) cur = conn.cursor() search_filters = self.get_search_filters() cur.execute(f""" SELECT COUNT(gi.job_id) FROM gpt_interactions gi JOIN job_listings jl ON gi.job_id = jl.id WHERE {self.good_match_filters}{search_filters} """) total_entries = cur.fetchone()[0] conn.close() return total_entries except (sqlite3.OperationalError, sqlite3.DatabaseError): return 0 def fetch_job(self, offset=None): if offset is None: offset = (self.current_page - 1) * self.rows_per_page + self.highlighted_row_index try: conn = sqlite3.connect(self.db_path) cur = conn.cursor() query = f""" SELECT json_extract(gi.answer, '$.company_name') AS company_name, json_extract(gi.answer, '$.available_positions') AS available_positions, json_extract(gi.answer, '$.small_summary') AS summary, json_extract(gi.answer, '$.fit_for_resume') AS fit_for_resume, json_extract(gi.answer, '$.fit_justification') AS fit_justification, json_extract(gi.answer, '$.how_to_apply') AS how_to_apply, json_extract(gi.answer, '$.remote_positions') AS remote_positions, json_extract(gi.answer, '$.hiring_in_us') AS hiring_in_us, gi.job_id, jl.original_text, jl.external_id, jl.scraped_at FROM gpt_interactions gi JOIN job_listings jl ON gi.job_id = jl.id WHERE {self.good_match_filters}{self.get_search_filters()} ORDER BY jl.scraped_at DESC, jl.id DESC LIMIT 1 OFFSET {offset} """ self.log(f"Executing query: {query}") # Log the query cur.execute(query) data = cur.fetchone() self.log(f"Fetched {len(data)} rows") # Log the number of results conn.close() return data except (sqlite3.OperationalError, sqlite3.DatabaseError): return None def fetch_data(self, page_num): offset = (page_num - 1) * self.rows_per_page try: conn = sqlite3.connect(self.db_path) cur = conn.cursor() query = f""" SELECT json_extract(gi.answer, '$.company_name') AS company_name, json_extract(gi.answer, '$.available_positions') AS available_positions, json_extract(gi.answer, '$.small_summary') AS summary, json_extract(gi.answer, '$.fit_for_resume') AS fit_for_resume, json_extract(gi.answer, '$.fit_justification') AS fit_justification, json_extract(gi.answer, '$.how_to_apply') AS how_to_apply, json_extract(gi.answer, '$.remote_positions') AS remote_positions, json_extract(gi.answer, '$.hiring_in_us') AS hiring_in_us, gi.job_id, jl.original_text, jl.scraped_at FROM gpt_interactions gi JOIN job_listings jl ON gi.job_id = jl.id WHERE {self.good_match_filters}{self.get_search_filters()} ORDER BY jl.scraped_at DESC, jl.id DESC LIMIT {self.rows_per_page} OFFSET {offset} """ self.log(f"Executing query: {query}") # Log the query cur.execute(query) data = cur.fetchall() self.log(f"Fetched {len(data)} rows") # Log the number of results conn.close() return data except (sqlite3.OperationalError, sqlite3.DatabaseError): return None def draw_page(self, current_page): max_y, max_x = self.stdscr.getmaxyx() data = self.fetch_data(page_num=current_page) # Adjusted column widths column_widths = { "Company": 15, "Position": 20, # Assign 1/4 screen width to Position for JSON data "Summary": 40, # Summary could be long, so assign 1/4 screen width "Good Fit?": 10, "Why?": 30, "How to Apply?": 20 } self.stdscr.clear() header = " ".join(title.center(column_widths[title]) for title in column_widths.keys()) self.stdscr.attron(curses.color_pair(4)) self.stdscr.addstr(0, 0, header) self.stdscr.attroff(curses.color_pair(4)) y_offset = 2 # Start below the header for idx, listing in enumerate(data): if idx == self.highlighted_row_index: self.stdscr.attron(curses.color_pair(3)) max_height_wrapped_text = 1 for i, key in enumerate(column_widths.keys()): field = listing[i] width = column_widths[key] # Parse JSON for the 'Position' column and extract position titles if key == "Position": try: positions = json.loads(field) or [] # keep only those with a real string for "position" titles = [pos.get("position") for pos in positions if isinstance(pos.get("position"), str)] field = ", ".join(titles) if titles else "" except (json.JSONDecodeError, TypeError): # JSON was bad, or field was None field = "Invalid data" # For the 'Company' column, add scraped date underneath if key == "Company": # listing has scraped_at as the last field (index 10 in fetch_data, index 11 in fetch_job) scraped_at = listing[10] if len(listing) > 10 else None formatted_date = self.format_scraped_date(scraped_at) field = f"{field}\n({formatted_date})" # This part takes a field content and wraps it in width # then it loops through it line by line, and wrapped_text = textwrap.wrap(str(field), width=width) for j, line in enumerate(wrapped_text): line_pos = sum(column_widths[title] for title in list(column_widths.keys())[:i]) + i * 3 # if line_pos + len(line) < max_x and y_offset + j < max_y: if line_pos + width <= max_x and y_offset + j < max_y - 1: self.stdscr.addstr(y_offset + j, line_pos, line.ljust(width)) if j > max_height_wrapped_text: max_height_wrapped_text = j y_offset += max_height_wrapped_text + 2 if y_offset >= max_y - 3: # Check if we've reached the end of the screen break # Stop drawing if there's no more space on the screen if idx == self.highlighted_row_index: self.stdscr.attroff(curses.color_pair(3)) # Pagination info (this section is duplicated below and can be removed) # pagination_info = f"Page {self.current_page} of {self.total_pages} ({self.total_entries} great matches for your resume 😁)" # self.stdscr.attron(curses.color_pair(5)) # self.stdscr.addstr(max_y - 2, 0, pagination_info) # self.stdscr.attroff(curses.color_pair(5)) # --- new controls hint bar --- # --- footer line: pagination + controls --- footer_y = max_y - 2 # 1) Draw pagination (flush-left) search_status = f" (filtered: '{self.search_term}')" if self.search_term else "" pagination = f"Page {self.current_page} of {self.total_pages} ({self.total_entries} great matches{search_status} 😁)" self.stdscr.attron(curses.color_pair(5)) self.stdscr.addstr(footer_y, 0, pagination.ljust(max_x)) self.stdscr.attroff(curses.color_pair(5)) # 2) Prepare controls text controls_text = "[↑↓] Move [←→ ] Page [Enter] View [d] Discard [a] Apply [s] Search [c] Clear [q] Back" # 3) Clear the next line so no overlap self.stdscr.move(footer_y + 1, 0) self.stdscr.clrtoeol() # 4) Draw controls (same left alignment) self.stdscr.attron(curses.color_pair(7)) self.stdscr.addstr(footer_y + 1, 0, controls_text[: max_x - 1]) self.stdscr.attroff(curses.color_pair(7)) self.stdscr.refresh() def draw_table(self): self.total_entries = self.fetch_total_entries() self.total_pages = (self.total_entries + self.rows_per_page - 1) // self.rows_per_page self.draw_page(self.current_page) while True: key = self.stdscr.getch() if key == curses.KEY_DOWN: self.highlighted_row_index = min(self.highlighted_row_index + 1, self.rows_per_page - 1) self.draw_page(self.current_page) elif key == curses.KEY_UP: self.highlighted_row_index = max(0, self.highlighted_row_index - 1) self.draw_page(self.current_page) elif key == curses.KEY_RIGHT: if self.current_page < self.total_pages: self.current_page += 1 self.highlighted_row_index = 0 # Reset highlighted row for the new page self.draw_page(self.current_page) elif key == curses.KEY_LEFT: if self.current_page > 1: self.current_page -= 1 self.highlighted_row_index = 0 # Reset highlighted row for the new page self.draw_page(self.current_page) elif key in [curses.KEY_ENTER, 10, 13]: self.show_job_detail(self.highlighted_row_index + (self.current_page - 1) * self.rows_per_page) self.draw_page(self.current_page) # Redraw the table after returning from the detail view elif key == ord('d'): # Discard current job job = self.fetch_job(self.highlighted_row_index + (self.current_page - 1) * self.rows_per_page) if job: self.discard_listing(job[8]) # job[8] = job_id self.total_entries = self.fetch_total_entries() self.total_pages = (self.total_entries + self.rows_per_page - 1) // self.rows_per_page # self.highlighted_row_index = 0 self.draw_page(self.current_page) elif key == ord('s'): # Search functionality self.prompt_search() self.total_entries = self.fetch_total_entries() self.total_pages = (self.total_entries + self.rows_per_page - 1) // self.rows_per_page self.draw_page(self.current_page) elif key == ord('c'): # Clear search if self.search_term: self.search_term = "" self.current_page = 1 self.highlighted_row_index = 0 self.total_entries = self.fetch_total_entries() self.total_pages = (self.total_entries + self.rows_per_page - 1) // self.rows_per_page self.draw_page(self.current_page) elif key == ord('q'): # Clear search when exiting to avoid confusing menu display self.search_term = "" break # Exit the table view elif key == ord('a'): # Apply to current job job = self.fetch_job(self.highlighted_row_index + (self.current_page - 1) * self.rows_per_page) if job: self.apply_to_listing(job[8]) # job[8] = job_id # Show post-apply dialog choice = self.show_post_apply_dialog() if choice == 'a': # Clear search when navigating to applications self.search_term = "" # Go to applications view apps = ApplicationsDisplay(self.stdscr, self.db_path) apps.draw_board() return # If 'q', just return to table view self.total_entries = self.fetch_total_entries() self.total_pages = (self.total_entries + self.rows_per_page - 1) // self.rows_per_page # self.highlighted_row_index = 0 self.draw_page(self.current_page) def discard_listing(self, job_id): try: conn = sqlite3.connect(self.db_path) cur = conn.cursor() cur.execute("UPDATE job_listings SET discarded = 1 WHERE id = ?", (job_id,)) conn.commit() conn.close() self.log(f"Discarded job {job_id}") except Exception as e: self.log(f"Error discarding job {job_id}: {e}") def apply_to_listing(self, job_id): try: conn = sqlite3.connect(self.db_path) conn.execute("PRAGMA journal_mode=WAL;") cur = conn.cursor() # 1) mark the listing itself as applied today = date.today().isoformat() # e.g. "2025-05-14" cur.execute(""" UPDATE job_listings SET applied = 1, applied_date = ? WHERE id = ? """, (today, job_id)) # 2) upsert into applications # - if an application already exists, just refresh its timestamps/status # - otherwise insert a new one cur.execute("SELECT id FROM applications WHERE job_id = ?", (job_id,)) row = cur.fetchone() if row: application_id = row[0] cur.execute(""" UPDATE applications SET status = 'Open', created_at = ?, -- in case you want created_at to match apply date updated_at = ? WHERE id = ? """, (today, today, application_id)) else: cur.execute(""" INSERT INTO applications (job_id, status, created_at, updated_at) VALUES (?, 'Open', ?, ?) """, (job_id, today, today)) conn.commit() conn.close() self.log(f"Applied to job {job_id} (and created application record)") except Exception as e: self.log(f"Error marking job {job_id} as applied: {e}") def show_job_detail(self, job_index): self.total_entries = self.fetch_total_entries() # Get total number of entries for cycling # Enter a loop to allow cycling through job details while True: job = self.fetch_job(job_index) # Fetch job details if not job: return # If no job is found, simply return if job: self.stdscr.clear() # Screen dimensions max_y, max_x = self.stdscr.getmaxyx() # Set maximum content width content_width = min(76, max_x) start_col = max(0, (max_x - content_width) // 2) # Calculate start position for centered text y_offset = 1 # Start from the second row for better visibility for idx, detail in enumerate([job[0], job[1], job[4], job[5], job[9]]): self.log(f'{idx} {detail}') header = ["Company", "Position", "Why it's a good fit", "How to Apply", "Job Description"][idx] if header == "Position": try: # if detail is None or "null", coerce to empty list positions = json.loads(detail) or [] # only keep real strings titles = [ p.get("position") for p in positions if isinstance(p.get("position"), str) ] detail = ", ".join(titles) except (json.JSONDecodeError, TypeError): # fall back to blank if we can’t parse detail = "" # Calculate the position for left-aligned headers within the content area header_lines = textwrap.wrap(header, content_width) # Header with background self.stdscr.attron(curses.color_pair(4)) header_start_col = start_col # Align left within the content width header_line = f' {header_lines[0]} ' self.stdscr.addstr(y_offset, header_start_col - 2, header_line) header_width = len(header_line) y_offset += 1 self.stdscr.attroff(curses.color_pair(4)) if header == "Job Description": y_offset -= 1 link_text = job[10] if job[10] is not None else "" link_lines = textwrap.wrap(link_text, content_width) for idx, line in enumerate(link_lines): start_on = start_col if idx == 0: start_on += header_width + 1 # Underline the text of the link self.stdscr.addstr(y_offset, start_on, line, curses.A_UNDERLINE) y_offset += 1 y_offset += 1 # Detail text # avoid passing None to wrap() detail_text = detail if detail is not None else "" detail_lines = textwrap.wrap(detail_text, content_width) for line in detail_lines: if y_offset < max_y - 1: # Check to avoid writing beyond the screen detail_start_col = max(start_col, (max_x - len(line)) // 2) # Center detail text self.stdscr.addstr(y_offset, detail_start_col, line) y_offset += 1 y_offset += 1 # Extra space between sections self.stdscr.refresh() # Ensure getch() waits for input by disabling nodelay mode self.stdscr.nodelay(False) while True: # Draw control hints at the bottom center controls = "[← ] Prev [→ ] Next [q] Back [a] Apply" self.stdscr.attron(curses.color_pair(7)) self.stdscr.addstr(max_y - 2, max(0, (max_x - len(controls)) // 2), controls) self.stdscr.attroff(curses.color_pair(7)) self.stdscr.refresh() ch = self.stdscr.getch() if ch == ord('q'): # Clear search when exiting detail view to avoid confusing menu display self.search_term = "" return # Quit the detail view elif ch == curses.KEY_LEFT: job_index = (job_index - 1) % self.total_entries # Move to the previous job or wrap around break # Break the inner loop to refresh the job detail view with the new index elif ch == curses.KEY_RIGHT: job_index = (job_index + 1) % self.total_entries # Move to the next job or wrap around break # Break the inner loop to refresh the job detail view with the new index elif ch == ord('a'): # Apply directly from detail view job_id = job[8] # adjust index if needed self.apply_to_listing(job_id) # Show post-apply dialog choice = self.show_post_apply_dialog() if choice == 'a': # Clear search when navigating to applications self.search_term = "" # Go to applications view apps = ApplicationsDisplay(self.stdscr, self.db_path) apps.draw_board() return # If 'q', just return to detail view break def show_post_apply_dialog(self): """ Display a centered dialog offering [q] Keep browsing or [a] Go to applications. Returns 'q' or 'a'. """ max_y, max_x = self.stdscr.getmaxyx() text = "[q] Keep browsing [a] Go to applications?" width = len(text) + 4 height = 3 start_y = (max_y - height) // 2 start_x = (max_x - width) // 2 win = curses.newwin(height, width, start_y, start_x) win.box() win.attron(curses.color_pair(7)) win.addstr(1, 2, text) win.attroff(curses.color_pair(7)) win.refresh() # Immediately listen for q or a (no Enter required) while True: ch = win.getch() if ch in (ord('q'), ord('a')): break # Clear dialog and refresh underlying screen win.clear() self.stdscr.touchwin() self.stdscr.refresh() return chr(ch) ================================================ FILE: src/display_table.py ================================================ # display_table.py import sqlite3 import curses import textwrap def fetch_data(db_path): try: conn = sqlite3.connect(db_path) cur = conn.cursor() cur.execute("SELECT original_text, external_id FROM job_listings LIMIT 5") data = cur.fetchall() conn.close() return data except (sqlite3.OperationalError, sqlite3.DatabaseError): return None def draw_table(stdscr, db_path): curses.init_pair(3, curses.COLOR_WHITE, curses.COLOR_BLUE) # Highlight color data = fetch_data(db_path) max_y, max_x = stdscr.getmaxyx() max_table_width = min(120, max_x - 4) # Adjusted for padding and separators text_col_width = 78 # Adjusted for spacing between cells source_col_width = 18 # Adjusted for spacing if not data: stdscr.addstr(0, 0, "No data found or database is missing.") stdscr.refresh() stdscr.getch() return highlighted_row_index = 0 offset = 0 while True: stdscr.clear() row_num = 2 # Starting row for data for idx, (original_text, source) in enumerate(data[offset:]): wrapped_text = textwrap.wrap(original_text[:80], width=text_col_width) wrapped_source = textwrap.wrap(source, width=source_col_width) row_height = max(len(wrapped_text), len(wrapped_source)) for i in range(row_height): text_line = wrapped_text[i] if i < len(wrapped_text) else "" source_line = wrapped_source[i] if i < len(wrapped_source) else "" # Construct the line with spacing between cells line = f"{text_line.ljust(text_col_width)} | {source_line.ljust(source_col_width)}" if idx + offset == highlighted_row_index: stdscr.attron(curses.color_pair(3)) stdscr.addstr(row_num, 1, line) # Adjusted to start from column 1 for padding stdscr.attroff(curses.color_pair(3)) else: stdscr.addstr(row_num, 1, line) row_num += 1 # Draw a horizontal separator line after each row stdscr.addstr(row_num, 1, '-' * (text_col_width + source_col_width + 3)) # '+3' for cell spacing and separator row_num += 1 # Increment row_num to account for the separator line if row_num >= max_y - 1: break stdscr.refresh() # Key handling for scrolling and quitting key = stdscr.getch() if key == curses.KEY_DOWN and highlighted_row_index < len(data) - 1: highlighted_row_index += 1 if row_num >= max_y - 1 and offset < len(data) - (max_y - 2): offset += 1 # Scroll down elif key == curses.KEY_UP and highlighted_row_index > 0: highlighted_row_index -= 1 if highlighted_row_index < offset: offset -= 1 # Scroll up elif key == ord('q'): break # Quit the table view ================================================ FILE: src/gpt_processor.py ================================================ import asyncio import os import json from openai import AsyncOpenAI from dotenv import load_dotenv class GPTProcessor: def __init__(self, db_manager, api_key): # Load environment variables load_dotenv() self.db_manager = db_manager self.client = AsyncOpenAI(api_key=api_key) self.log_file = 'gpt_processor.log' # Log file path self.listings_per_batch = os.getenv('COMMANDJOBS_LISTINGS_PER_BATCH') if self.listings_per_batch is None: raise ValueError(f"COMMANDJOBS_LISTINGS_PER_BATCH is not set; exiting.") def log(self, message): """Append a message to the log file.""" with open(self.log_file, 'a') as f: f.write(f"{message}\n") async def process_job_listings_with_gpt(self, resume_path, update_ui_callback): update_ui_callback(f"Getting job listings") resume = self.read_resume_from_file(resume_path) job_listings = self.db_manager.fetch_job_listings(self.listings_per_batch) update_ui_callback(f"Processing {len(job_listings)} listings with AI. Please wait...") self.log(f"Creating tasks for {len(job_listings)} job listings") tasks = [self.process_single_listing(job_id, job_text, job_html, resume, update_ui_callback) for job_id, job_text, job_html in job_listings] self.log(f"About to 'gather' {len(tasks)} tasks") # Letting the exceptions bubble up to MenuApp await asyncio.gather(*tasks) async def process_single_listing(self, job_id, job_text, job_html, resume, update_ui_callback): prompt = self.generate_prompt(job_text, job_html, resume) self.log(f"Prompt: {prompt}") # Log the prompt if not prompt: # Check if prompt is None or empty raise ValueError("Prompt is None or empty, skipping GPT request.") answer_dict = {} # Letting bubble up the potential exceptions from # the two lines below, up to process_job_listings_with_gpt answer = await self.get_gpt_response(prompt) self.db_manager.save_gpt_interaction(job_id, prompt, answer) # Attempt to load the JSON string into a Python dictionary try: answer_dict = json.loads(answer) # Show a little preview of the processed jobs update_ui_callback(f"Processed {answer_dict['company_name']} / {answer_dict['small_summary'][:50]}") except json.JSONDecodeError: self.log(f"Invalid JSON format: {answer}") self.log(f"Processed job_id: {job_id}") def read_resume_from_file(self, file_path): try: with open(file_path, 'r') as file: return file.read() except FileNotFoundError: return "Resume file not found." def generate_prompt(self, job_text, job_html, resume): # Similar to the original prompt creation logic # Ensure to return the formatted prompt string # output_format = """{ # "small_summary": "Wine and Open Source developers for C-language systems programming", # "company_name": "CodeWeavers", # "available_positions": [ # { # "position": "Wine and General Open Source Developers", # "link": "https://www.codeweavers.com/about/jobs" # } # ], # "tech_stack_description": "C-language systems programming", # "use_rails": "No", # "use_python": "No", # "remote_positions": "Yes", # "hiring_in_us": "Yes", # "how_to_apply": "Apply through our website, here is the link: https://www.codeweavers.com/about/jobs", # "back_ground_with_priority": null, # "fit_for_resume": "No", # "fit_justification": "The position is for Wine and Open Source developers, neither of which the resume has experience with. The job is remote in the US" # }""" output_format_str = os.getenv('COMMANDJOBS_OUTPUT_FORMAT') self.log(f"output_format_str: {output_format_str}") # Convert the escaped newlines back to actual newline characters output_format = output_format_str.encode().decode('unicode_escape') # self.log(f"output_format: {output_format}") roles = os.getenv('COMMANDJOBS_ROLE') job_requirement_exclusions=os.getenv('COMMANDJOBS_EXCLUSIONS') # self.log(f"job_requirement_exclusions: {job_requirement_exclusions}") ideal_job_questions_template = os.getenv('COMMANDJOBS_IDEAL_JOB_QUESTIONS') prompt_template = os.getenv('COMMANDJOBS_PROMPT') # Perform the interpolation ideal_job_questions = ideal_job_questions_template.format(job_requirement_exclusions=job_requirement_exclusions) prompt = prompt_template.format(job_html=job_html, resume=resume, roles=roles, ideal_job_questions=ideal_job_questions, output_format=output_format) return prompt async def get_gpt_response(self, prompt): response = await self.client.chat.completions.create( messages=[{"role": "user", "content": prompt}], model=os.getenv('OPENAI_GPT_MODEL'), ) self.log(f"response.choices: {response.choices}") return response.choices[0].message.content ================================================ FILE: src/menu.py ================================================ import curses import os import time from job_scraper.hacker_news.scraper import HNScraper from display_table import draw_table from database_manager import DatabaseManager from display_matching_table import MatchingTableDisplay from display_applications import ApplicationsDisplay from display_all_jobs import AllJobsDisplay from gpt_processor import GPTProcessor import asyncio import sqlite3 import logging import threading from queue import Queue from dotenv import load_dotenv from job_scraper.workday.scraper import WorkdayScraper from job_scraper.waas.work_startup_scraper import WorkStartupScraper DB_PATH='job_listings.db' class MenuApp: def __init__(self, stdscr, logger): # Load environment variables load_dotenv() required_values = ( "OPENAI_API_KEY", "OPENAI_GPT_MODEL", "BASE_RESUME_PATH", "HN_START_URL", "COMMANDJOBS_LISTINGS_PER_BATCH", ) for required_value in required_values: if not os.getenv(required_value): error_message = f''' {required_value} env variable is not set; Please check the documentation at https://github.com/nicobrenner/commandjobs?tab=readme-ov-file#configuration ''' raise ValueError(error_message) self.scraping_done_event = threading.Event() # Event to signal scraping completion self.logger = logger self.stdscr = stdscr self.setup_ncurses() self.db_path = DB_PATH self.db_manager = DatabaseManager(self.db_path) # Specify the path self.gpt_processor = GPTProcessor(self.db_manager, os.getenv('OPENAI_API_KEY')) self.resume_path = os.getenv('BASE_RESUME_PATH') self.table_display = MatchingTableDisplay(self.stdscr, self.db_path) self.all_jobs_display = AllJobsDisplay(self.stdscr, self.db_path) self.total_ai_job_recommendations = self.table_display.fetch_total_entries() self.update_processed_listings_count() self.total_listings = self.get_total_listings() env_limit = 0 if os.getenv('COMMANDJOBS_LISTINGS_PER_BATCH') is None else os.getenv('COMMANDJOBS_LISTINGS_PER_BATCH') self.listings_per_request = max(int(env_limit), 10) resume_menu = "📄 Create resume (just paste it here once)" find_best_matches_menu = "🧠 Find best matches with AI (Create your resume first)" resume_str = self.read_resume_from_file() if len(resume_str) > 0: resume_menu = "📄 Edit resume" find_best_matches_menu = f"🧠 Find best matches for resume with AI (will check {self.listings_per_request} listings at a time)" total_processed = f'{self.processed_listings_count} processed with AI so far' db_menu_item = f"💾 Navigate jobs in local db ({self.total_listings} listings, {total_processed})" ai_recommendations_menu = "😅 No job matches for your resume yet" if self.total_ai_job_recommendations > 0: ai_recommendations_menu = f"✅ {self.total_ai_job_recommendations} recommended listings, out of {total_processed}" # Fetch applied-listings count applied_count = self.db_manager.fetch_applied_listings_count() applications_menu = f"📋 Applications ({applied_count})" self.menu_items = [ applications_menu, # 1 <-- moved up ai_recommendations_menu, # 2 <-- moved up find_best_matches_menu, # 3 <-- moved up "🕸 Scrape \"Ask HN: Who's hiring?\"", # 4 "🕸 Scrape \"Work at a Startup jobs\"", # 5 "🕸 Scrape \"Workday\"", # 6 resume_menu, # 0 db_menu_item # 7 <-- moved down ] self.current_row = 0 self.display_splash_screen() self.run() def update_processed_listings_count(self): self.processed_listings_count = self.db_manager.fetch_processed_listings_count() async def process_with_gpt(self): exit_message = 'Processing completed successfully' try: self.logger.debug('Calling: self.gpt_processor.process_job_listings_with_gpt') await self.gpt_processor.process_job_listings_with_gpt(self.resume_path, update_ui_callback=self.update_status_bar) except Exception as e: self.logger.exception("Failed to process listings with GPT: %s", str(e)) exit_message = f'Failed to process listings with GPT: {str(e)}' finally: new_count = self.table_display.fetch_total_entries() if new_count > self.total_ai_job_recommendations: count_diff = new_count - self.total_ai_job_recommendations exit_message = f'Processing completed successfully. {count_diff} new matches found ({new_count} total)' else: exit_message = f'Processing completed successfully. No new matches found ({new_count} total)' return exit_message def read_resume_from_file(self): try: with open(self.resume_path, 'r') as file: return file.read() except FileNotFoundError: return '' def setup_ncurses(self): curses.curs_set(0) # Turn off cursor visibility self.stdscr.keypad(True) # Enable keypad mode curses.start_color() curses.init_pair(1, curses.COLOR_BLACK, curses.COLOR_CYAN) curses.init_pair(2, curses.COLOR_BLACK, curses.COLOR_WHITE) curses.init_pair(3, curses.COLOR_WHITE, curses.COLOR_BLUE) # Highlight color curses.init_pair(4, curses.COLOR_BLACK, curses.COLOR_WHITE) # Highlight headers color curses.init_pair(5, curses.COLOR_WHITE, curses.COLOR_MAGENTA) # Highlight headers color curses.init_pair(6, curses.COLOR_RED, curses.COLOR_BLACK) # Highlight headers color curses.init_pair(7, curses.COLOR_GREEN, curses.COLOR_BLACK) curses.init_pair(8, curses.COLOR_YELLOW, curses.COLOR_BLACK) curses.init_pair(9, curses.COLOR_BLUE, curses.COLOR_BLACK) curses.init_pair(10, curses.COLOR_MAGENTA, curses.COLOR_BLACK) curses.init_pair(11, curses.COLOR_RED, curses.COLOR_BLACK) def display_splash_screen(self): splash_text = [ " ██████╗ ██████╗ ███╗ ███╗███╗ ███╗ █████╗ ███╗ ██╗██████╗ ", "██╔════╝██╔═══██╗████╗ ████║████╗ ████║██╔══██╗████╗ ██║██╔══██╗", "██║ ██║ ██║██╔████╔██║██╔████╔██║███████║██╔██╗ ██║██║ ██║", "██║ ██║ ██║██║╚██╔╝██║██║╚██╔╝██║██╔══██║██║╚██╗██║██║ ██║", "╚██████╗╚██████╔╝██║ ╚═╝ ██║██║ ╚═╝ ██║██║ ██║██║ ╚████║██████╔╝", "╚═════╝ ╚═════╝ ╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═══╝╚═════╝ ", " ", " ██╗ ██████╗ ██████╗ ███████╗ ", " ██║██╔═══██╗██╔══██╗██╔════╝ ", " ██║██║ ██║██████╔╝███████╗ ", " ██ ██║██║ ██║██╔══██╗╚════██║ ", " ╚█████╔╝╚██████╔╝██████╔╝███████║ ", " ╚════╝ ╚═════╝ ╚═════╝ ╚══════╝ ", ] self.stdscr.clear() max_y, max_x = self.stdscr.getmaxyx() # Repeat base animation 3 times for i in range(0, 3): # Loop through color pairs 7 to 11 # defined inside setup_ncurses() for color in range(7, 12): self.stdscr.attron(curses.color_pair(color)) for i, line in enumerate(splash_text): # Calculate the starting position for each line to be centered start_x = max(0, (max_x - len(line)) // 2) self.stdscr.addstr(i + (max_y - len(splash_text)) // 2, start_x, line) self.stdscr.refresh() # 100ms per color curses.napms(100) self.stdscr.attroff(curses.color_pair(color)) self.stdscr.clear() self.stdscr.refresh() def draw_title(self, title="Command Jobs"): max_y, max_x = self.stdscr.getmaxyx() title_x = max(0, (max_x - len(title)) // 2) self.stdscr.attron(curses.A_BOLD) self.stdscr.addstr(0, title_x, title) self.stdscr.attroff(curses.A_BOLD) self.stdscr.addstr(1, 0, "-" * max_x) def draw_menu(self): # Draw title and menu items self.draw_title() h, w = self.stdscr.getmaxyx() for idx, item in enumerate(self.menu_items): x = w // 2 - len(item) // 2 y = h // 2 - len(self.menu_items) // 2 + idx if idx == self.current_row: self.stdscr.attron(curses.color_pair(1)) self.stdscr.addstr(y, x, item) self.stdscr.attroff(curses.color_pair(1)) else: self.stdscr.addstr(y, x, item) # --- Centered controls hint line --- controls = "[↑↓] Select [Enter] Go into [q] Quit to terminal" # place it two rows up from bottom hint_y = h - 2 hint_x = max(0, (w - len(controls)) // 2) self.stdscr.attron(curses.color_pair(7)) self.stdscr.addstr(hint_y, hint_x, controls[:w - hint_x - 1]) self.stdscr.attroff(curses.color_pair(7)) self.stdscr.refresh() def run(self): while True: self.draw_menu() key = self.stdscr.getch() self.handle_keypress(key) def handle_keypress(self, key): if key == curses.KEY_UP: self.current_row = max(0, self.current_row - 1) elif key == curses.KEY_DOWN: self.current_row = min(len(self.menu_items) - 1, self.current_row + 1) elif key in [curses.KEY_ENTER, 10, 13]: self.execute_menu_action() elif key == ord('q'): exit() def update_menu_items(self): # Update the total and processed listings count self.total_listings = self.get_total_listings() self.total_ai_job_recommendations = self.table_display.fetch_total_entries() self.update_processed_listings_count() # Update the resume option resume_menu = "📄 Create resume (just paste it here once)" find_best_matches_menu = "🧠 Find best matches with AI (Create your resume first)" resume_str = self.read_resume_from_file() if len(resume_str) > 0: resume_menu = "📄 Edit resume" find_best_matches_menu = f"🧠 Find best matches for resume with AI (will check {self.listings_per_request} listings at a time)" # Update menu items with the new counts total_processed = f'{self.processed_listings_count} processed with AI so far' db_menu_item = f"💾 Navigate jobs in local db ({self.total_listings} listings, {total_processed})" ai_recommendations_menu = "😅 No job matches for your resume yet" if self.total_ai_job_recommendations > 0: ai_recommendations_menu = f"✅ {self.total_ai_job_recommendations} recommended listings, out of {total_processed}" # Update the Applications counter applied_count = self.db_manager.fetch_applied_listings_count() applications_menu = f"📋 Applications ({applied_count})" # Update the relevant menu items # ----------------------------------------------- # refresh the *same* slots used in self.menu_items # 0 📋 Applications # 1 ✅ Recommended # 2 🧠 Find best matches # 3 🕸 Scrape HN ← leave untouched! # 4 🕸 Scrape W@S # 5 🕸 Scrape Workday # 6 📄 Resume ← update this one # 7 💾 Navigate DB # ----------------------------------------------- self.menu_items[0] = applications_menu self.menu_items[1] = ai_recommendations_menu self.menu_items[2] = find_best_matches_menu self.menu_items[6] = resume_menu # ← was 3 self.menu_items[7] = db_menu_item # Redraw the menu to reflect the updated items self.draw_menu() # Menu options, the number map to the self.menu_items array # eg. first option (0): self.menu_items[0] = resume_menu # = "Create or replace base resume" def execute_menu_action(self): exit_message = '' if self.current_row == 0: # 📋 Applications self.app_display = ApplicationsDisplay(self.stdscr, self.db_path) self.app_display.draw_board() elif self.current_row == 1: # ✅ Recommended listings self.table_display.draw_table() elif self.current_row == 2: # 🧠 Find best matches exit_message = asyncio.run(self.process_with_gpt()) elif self.current_row == 3: # 🕸 Scrape “Ask HN” self.start_scraping_with_status_updates() elif self.current_row == 4: # 🕸 Scrape “Work at a Startup” self.start_scraping_WaaS_with_status_updates() elif self.current_row == 5: # 🕸 Scrape “Workday” self.start_scraping_workday_with_status_updates() elif self.current_row == 6: # 📄 Resume exit_message = self.manage_resume(self.stdscr) elif self.current_row == 7: # 💾 Navigate DB self.all_jobs_display.draw_table() # redraw status / menu after the action self.stdscr.clear() self.update_menu_items() if exit_message != '': self.update_status_bar(exit_message) def display_text_with_scrolling(self, header, lines): curses.noecho() max_y, max_x = self.stdscr.getmaxyx() offset = 0 # How much we've scrolled resume_updated = False new_lines = '' while True: self.stdscr.clear() self.draw_title() # Call draw_title as a method of the class # Draw the sticky header below the title self.stdscr.attron(curses.color_pair(2)) # Apply color pair for white background self.stdscr.addstr(2, 0, header + " " * (max_x - len(header))) # Extend background to full width self.stdscr.attroff(curses.color_pair(2)) # Turn off color pair for i, line in enumerate(lines[offset:offset+max_y-5]): self.stdscr.addstr(i+3, 0, line.strip()) key = self.stdscr.getch() if key in [ord('q'), ord('Q')]: break elif key == curses.KEY_DOWN: if offset < len(lines) - max_y + 2: offset += 1 elif key == curses.KEY_UP: if offset > 0: offset -= 1 elif key in [ord('r'), ord('R')]: new_lines = self.capture_text_with_scrolling() if len(new_lines) > 0: resume_updated = new_lines != lines break return resume_updated def get_total_listings(self): """Return the total number of job listings in the database.""" conn = sqlite3.connect(self.db_path) conn.execute("PRAGMA journal_mode=WAL;") cur = conn.cursor() cur.execute("SELECT COUNT(*) FROM job_listings") total = cur.fetchone()[0] conn.close() return total def manage_resume(self, stdscr): curses.echo() resume_path = os.getenv('BASE_RESUME_PATH') resume_updated = False exit_message = 'Resume not updated' if os.path.exists(resume_path): with open(resume_path, 'r') as file: lines = file.readlines() header = "Base Resume (Press 'q' to go back, 'r' to replace):" # Use a separator for clarity resume_updated = self.display_text_with_scrolling(header, lines) else: resume_updated = self.capture_text_with_scrolling() if resume_updated: exit_message = f"Resume saved to {self.resume_path}" return exit_message def update_status_bar(self, text): max_y, max_x = self.stdscr.getmaxyx() # Ensure the status text will not overflow the screen width status_text = text[:max_x - 3] try: # Clear the previous status bar content self.stdscr.move(max_y - 1, 0) self.stdscr.clrtoeol() # Write the new status bar content self.stdscr.addstr(max_y - 1, 0, status_text, curses.color_pair(2)) self.stdscr.refresh() except curses.error: pass # Ignore the error or handle it as needed def start_scraping_with_status_updates(self): # Create a queue to receive the result from the scraping thread result_queue = Queue() # Pass self.update_status_bar as the update function to HNScraper self.scraper = HNScraper(self.db_path) # Initialize the scraper start_url = os.getenv('HN_START_URL') # Starting URL scraping_thread = threading.Thread(target=self.scraper.scrape_hn_jobs, args=( start_url, self.stdscr, self.update_status_bar, self.scraping_done_event, result_queue)) scraping_thread.start() # Call this method after the scraping is done self.scraping_done_event.wait() # Wait for the event to be set by the scraping thread # Retrieve the result from the queue new_listings_count = result_queue.get() # This will block until the result is available self.update_status_bar(f"Scraping completed {new_listings_count} new listings added") self.scraping_done_event.clear() # Clear the event for the next scraping operation def start_scraping_WaaS_with_status_updates(self): result_queue= Queue() self.scraper = WorkStartupScraper(self.db_path) scraping_thread = threading.Thread(target=self.scraper.scrape_jobs, args=(self.stdscr, self.update_status_bar, self.scraping_done_event, result_queue)) scraping_thread.start() self.scraping_done_event.wait() new_listings_count = result_queue.get() self.update_status_bar(f"Scraping of Waas completed {new_listings_count} new listings added") self.scraping_done_event.clear() time.sleep(3) self.stdscr.clear() def start_scraping_workday_with_status_updates(self): result_queue= Queue() self.scraper = WorkdayScraper(self.db_path, self.update_status_bar, self.scraping_done_event, result_queue) scraping_thread = threading.Thread(target=self.scraper.scrape) scraping_thread.start() self.scraping_done_event.wait() new_listings_count = result_queue.get() self.update_status_bar(f"Scraping of Workday completed: {new_listings_count} new listings added") self.scraping_done_event.clear() time.sleep(3) self.stdscr.clear() # Despite the name of the method, this currently # is not handling scrolling 😅 # It directs the user to paste text into the terminal # When Esc is pressed, captures the input and returns it def capture_text_with_scrolling(self): directions = "Paste your resume text, then Press the 'Esc' key to finish and save" curses.curs_set(1) # Show cursor self.stdscr.keypad(True) # Enable keypad mode curses.noecho() # Don't echo keypresses curses.raw() # Raw mode - get all inputs self.stdscr.clear() # Clear the screen self.stdscr.scrollok(True) # Enable scrolling in the window text = [] y, x = 0, 0 # Initial position max_y, max_x = self.stdscr.getmaxyx() # This loop "listens" for keyboard input while True: self.stdscr.addstr(0, 0, directions, curses.A_REVERSE) try: char = self.stdscr.get_wch() # Get character or key press except AttributeError: # To be able to handle utf8, we need ncurses to have # the stdscr.get_wch() method available self.stdscr.addstr(0, 0, "Error, app needs stdscr.get_wch() method", curses.A_REVERSE) return '' if char == '\x1b': # Escape key pressed break elif char == '\n': # Handle newline text.append('\n') y += 1 x = 0 if y >= max_y - 1: self.stdscr.scroll(1) y -= 1 elif isinstance(char, str): # Regular character input if x >= max_x - 1: # Move to next line if at the end y += 1 x = 0 if y >= max_y - 1: self.stdscr.scroll(1) y -= 1 text.append(char) try: self.stdscr.addstr(y, x, char) except curses.error: pass # Ignore errors potentially caused by edge cases in window size x += 1 self.stdscr.refresh() input_lines = ''.join(text) if text != []: with open(self.resume_path, 'w') as file: file.writelines(input_lines) curses.curs_set(0) # hide cursor again return input_lines # Ensure logging is configured to write to a file or standard output logging.basicConfig(filename='application.log', level=logging.DEBUG, format='%(asctime)s %(levelname)s %(name)s %(message)s') logger = logging.getLogger(__name__) def main(stdscr): global logger app = MenuApp(stdscr, logger) app.run() # Ensuring app.run is called to start the application loop if __name__ == "__main__": curses.wrapper(main) ================================================ FILE: src/migrations/000_create_initial_tables.py ================================================ # src/migrations/000_create_initial_tables.py import sqlite3 import os # two levels up from this file's folder, then job_listings.db: DB_PATH = os.path.abspath( os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, 'job_listings.db') ) def table_exists(cursor, table_name): cursor.execute( "SELECT name FROM sqlite_master " "WHERE type='table' AND name=?", (table_name,) ) return cursor.fetchone() is not None def main(): conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() # 1) job_listings if not table_exists(cursor, 'job_listings'): print("Creating table job_listings…") cursor.execute(''' CREATE TABLE job_listings ( id INTEGER PRIMARY KEY AUTOINCREMENT, original_text TEXT, original_html TEXT, source TEXT, external_id TEXT UNIQUE ) ''') # 2) gpt_interactions if not table_exists(cursor, 'gpt_interactions'): print("Creating table gpt_interactions…") cursor.execute(''' CREATE TABLE gpt_interactions ( id INTEGER PRIMARY KEY, job_id INTEGER, prompt TEXT, answer TEXT ) ''') conn.commit() conn.close() print("000_create_initial_tables.py completed.") if __name__ == "__main__": main() ================================================ FILE: src/migrations/001_add_discarded_applied.py ================================================ # src/migrations/001_add_discarded_applied.py import sqlite3 DB_PATH = 'job_listings.db' # <-- adjust if you use a different path def column_exists(cursor, table_name, column_name): cursor.execute(f"PRAGMA table_info({table_name})") return any(col[1] == column_name for col in cursor.fetchall()) def main(): conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() if not column_exists(cursor, 'job_listings', 'discarded'): print("Adding 'discarded' column...") cursor.execute("ALTER TABLE job_listings ADD COLUMN discarded INTEGER DEFAULT 0") if not column_exists(cursor, 'job_listings', 'applied'): print("Adding 'applied' column...") cursor.execute("ALTER TABLE job_listings ADD COLUMN applied INTEGER DEFAULT 0") conn.commit() conn.close() print("Migration completed.") if __name__ == "__main__": main() ================================================ FILE: src/migrations/002_create_application_notes.py ================================================ # src/migrations/002_create_application_notes.py import sqlite3 DB = 'job_listings.db' def column_exists(cur, table, column): cur.execute(f"PRAGMA table_info({table})") return any(r[1] == column for r in cur.fetchall()) def main(): conn = sqlite3.connect(DB) cur = conn.cursor() # Ensure applied column exists if not column_exists(cur, 'job_listings', 'applied'): cur.execute("ALTER TABLE job_listings ADD COLUMN applied INTEGER DEFAULT 0") # Create notes table cur.execute(''' CREATE TABLE IF NOT EXISTS application_notes ( id INTEGER PRIMARY KEY AUTOINCREMENT, job_id INTEGER NOT NULL, note TEXT NOT NULL, created_at DATETIME DEFAULT CURRENT_TIMESTAMP ) ''') conn.commit() conn.close() print("✔️ Migration 002 complete") if __name__ == "__main__": main() ================================================ FILE: src/migrations/003_add_applied_date.py ================================================ # src/migrations/003_add_applied_date.py import sqlite3 DB = 'job_listings.db' def column_exists(cur, table, column): cur.execute(f"PRAGMA table_info({table})") return any(r[1] == column for r in cur.fetchall()) def main(): conn = sqlite3.connect(DB) cur = conn.cursor() if not column_exists(cur, 'job_listings', 'applied_date'): print("Adding applied_date column…") # store date in ISO YYYY-MM-DD cur.execute("ALTER TABLE job_listings ADD COLUMN applied_date TEXT") conn.commit() conn.close() print("✔️ Migration 003 complete") if __name__ == "__main__": main() ================================================ FILE: src/migrations/004_migrate_applications_table.py ================================================ # src/migrations/004_migrate_applications_table.py import sqlite3 import os import sys DB = 'job_listings.db' def table_exists(cur, name): cur.execute("SELECT name FROM sqlite_master WHERE type='table' AND name=?", (name,)) return cur.fetchone() is not None def column_list(cur, table): cur.execute(f"PRAGMA table_info({table})") return [row[1] for row in cur.fetchall()] def main(): if not os.path.exists(DB): print(f"Error: database file not found at {DB}", file=sys.stderr) sys.exit(1) conn = sqlite3.connect(DB) cur = conn.cursor() # If we've already migrated (i.e. new schema is present), skip if table_exists(cur, 'applications') and 'application_id' in column_list(cur, 'application_notes'): print("✔️ Migration 004 already applied, skipping.") return try: print("Running Migration 004…") conn.executescript(""" PRAGMA foreign_keys = OFF; CREATE TABLE IF NOT EXISTS applications ( id INTEGER PRIMARY KEY AUTOINCREMENT, job_id INTEGER NOT NULL, status TEXT NOT NULL DEFAULT 'Open', created_at TEXT NOT NULL DEFAULT (datetime('now')), updated_at TEXT NOT NULL DEFAULT (datetime('now')), FOREIGN KEY(job_id) REFERENCES job_listings(id) ); INSERT OR IGNORE INTO applications (job_id, status, created_at, updated_at) SELECT id, 'Open', applied_date, applied_date FROM job_listings WHERE applied = 1 AND applied_date IS NOT NULL; ALTER TABLE application_notes RENAME TO _old_notes; CREATE TABLE IF NOT EXISTS application_notes ( id INTEGER PRIMARY KEY AUTOINCREMENT, application_id INTEGER NOT NULL, note TEXT NOT NULL, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, FOREIGN KEY(application_id) REFERENCES applications(id) ); INSERT INTO application_notes (application_id, note, created_at) SELECT a.id, n.note, n.created_at FROM _old_notes AS n JOIN applications AS a ON n.job_id = a.job_id; DROP TABLE _old_notes; PRAGMA foreign_keys = ON; """) conn.commit() print("✅ Migration 004 complete!") except Exception as e: conn.rollback() print("❌ Migration 004 failed:", e, file=sys.stderr) sys.exit(1) finally: conn.close() if __name__ == "__main__": main() ================================================ FILE: src/migrations/005_migrate_old_notes.py ================================================ # src/migrations/005_replace_notes_table.py import sqlite3 import sys import os DB = 'job_listings.db' def table_exists(cur, name): cur.execute("SELECT name FROM sqlite_master WHERE type='table' AND name=?", (name,)) return cur.fetchone() is not None def main(): if not os.path.exists(DB): print(f"Error: database file not found at {DB}", file=sys.stderr) sys.exit(1) conn = sqlite3.connect(DB) cur = conn.cursor() try: # only run if _old_notes exists if table_exists(cur, '_old_notes'): # drop the empty new notes table if table_exists(cur, 'application_notes'): print("Dropping empty application_notes…") cur.execute("DROP TABLE application_notes") # rename the old one into place print("Renaming _old_notes → application_notes…") cur.execute("ALTER TABLE _old_notes RENAME TO application_notes") # make sure the schema is what we expect (you can add more PRAGMAs here) conn.commit() print("✅ Migration 005 complete") else: print("⚠️ _old_notes not found, skipping migration 005") except Exception as e: conn.rollback() print("❌ Migration 005 failed:", e, file=sys.stderr) sys.exit(1) finally: conn.close() if __name__ == "__main__": main() ================================================ FILE: src/migrations/006_unique_applications_job_id.py ================================================ # src/migrations/006_unique_applications_job_id.py import sqlite3 import sys import os DB_PATH = 'job_listings.db' def main(db_path): if not os.path.exists(db_path): print(f"Error: database file not found at {db_path}", file=sys.stderr) sys.exit(1) conn = sqlite3.connect(db_path) try: cur = conn.cursor() cur.executescript(""" PRAGMA foreign_keys = OFF; BEGIN; /* 1) Create a fresh table with the exact same schema as `applications`, except named `applications_new`. We explicitly include the `id` column so we can re-insert with the same primary keys. */ CREATE TABLE IF NOT EXISTS applications_new ( id INTEGER PRIMARY KEY, -- we'll preserve the old PK job_id INTEGER NOT NULL UNIQUE, status TEXT NOT NULL DEFAULT 'Open', created_at TEXT NOT NULL, updated_at TEXT NOT NULL, FOREIGN KEY(job_id) REFERENCES job_listings(id) ); /* 2) Copy in exactly one row per job_id, picking the *earliest* created_at. By selecting MIN(id) per job_id, we also pick its original PK. */ INSERT OR IGNORE INTO applications_new (id, job_id, status, created_at, updated_at) SELECT id, job_id, status, created_at, updated_at FROM applications WHERE id IN ( SELECT MIN(id) -- pick the very first row inserted for each job_id FROM applications GROUP BY job_id ); /* 3) Drop the old table and swap in the new one */ DROP TABLE applications; ALTER TABLE applications_new RENAME TO applications; COMMIT; PRAGMA foreign_keys = ON; """) conn.commit() print("✔️ Migration 006 complete: duplicates removed, original IDs preserved") except Exception as e: conn.rollback() print("❌ Migration 006 failed:", e, file=sys.stderr) sys.exit(1) finally: conn.close() if __name__ == "__main__": main(DB_PATH) ================================================ FILE: src/migrations/007_add_scraped_at_timestamp.py ================================================ # src/migrations/007_add_scraped_at_timestamp.py import sqlite3 import sys import os DB_PATH = 'job_listings.db' def main(db_path): if not os.path.exists(db_path): print(f"Error: database file not found at {db_path}", file=sys.stderr) sys.exit(1) conn = sqlite3.connect(db_path) try: cur = conn.cursor() # Check if the column already exists cur.execute("PRAGMA table_info(job_listings)") columns = [column[1] for column in cur.fetchall()] if 'scraped_at' not in columns: # Add the scraped_at column cur.execute("ALTER TABLE job_listings ADD COLUMN scraped_at TEXT") # Set a default timestamp for existing entries (Jan 1 2025) default_timestamp = "2025-01-01T00:00:00" cur.execute("UPDATE job_listings SET scraped_at = ? WHERE scraped_at IS NULL", (default_timestamp,)) conn.commit() print("✔️ Migration 007 complete: added scraped_at timestamp column") else: print("✔️ Migration 007 already applied, skipping.") except Exception as e: conn.rollback() print("❌ Migration 007 failed:", e, file=sys.stderr) sys.exit(1) finally: conn.close() if __name__ == "__main__": main(DB_PATH) ================================================ FILE: src/test_menu.py ================================================ import os import unittest from unittest.mock import patch, MagicMock from menu import MenuApp DB_PATH='test_db.db' class TestManageResume(unittest.TestCase): @patch('menu.curses') @patch('menu.os.getenv') def test_manage_resume(self, mock_getenv, mock_curses): # Mock environment variables mock_getenv.side_effect = lambda x: {'OPENAI_API_KEY': 'test_key', 'BASE_RESUME_PATH': 'temp_base_resume.txt', 'HN_START_URL': 'test_url', 'COMMANDJOBS_LISTINGS_PER_BATCH': '10', 'OPENAI_GPT_MODEL': 'gpt-3.5'}.get(x, None) # Mock stdscr object mock_stdscr = MagicMock() mock_curses.initscr.return_value = mock_stdscr mock_stdscr.getmaxyx.return_value = (100, 40) # Example values for a terminal size # Use config/base_resume.sample as the test resume test_resume_text = '' with open('config/base_resume.sample', 'r') as file: test_resume_text = file.read() # This is testing when the resume file doesn't exist # Remove test resume file, to make sure it doesn't exist temp_test_resume_path = os.getenv('BASE_RESUME_PATH') if os.path.exists(temp_test_resume_path): os.remove(temp_test_resume_path) # Mock user input sequence for getch and get_wch # And then paste the resume text + Esc ('\x1b'), to save the resume mock_stdscr.get_wch.side_effect = list(test_resume_text) + ['\x1b'] # Initialize Menu with mocked stdscr and logger logger = MagicMock() with patch.object(MenuApp, 'run', return_value=None): menu = MenuApp(mock_stdscr, logger) # Simulate calling capture_text_with_scrolling exit_message = menu.manage_resume(mock_stdscr) # Verify we got a success message self.assertEqual(exit_message, f'Resume saved to {temp_test_resume_path}') # Verify the text was saved to base_resume.txt with open(temp_test_resume_path, 'r') as file: saved_text = file.read() self.assertEqual(saved_text, test_resume_text) # Remove temp test resume file if os.path.exists(temp_test_resume_path): os.remove(temp_test_resume_path) temp_test_db_path = DB_PATH if os.path.exists(temp_test_db_path): os.remove(temp_test_db_path) if __name__ == '__main__': unittest.main() ================================================ FILE: src/truncate_tables.py ================================================ import sqlite3 # Replace 'job_listings.db' with the correct path to your database file db_path = 'job_listings.db' def truncate_tables(database_path): # Connect to the SQLite database conn = sqlite3.connect(database_path) cursor = conn.cursor() # SQL commands to truncate tables truncate_gpt_interactions = "DELETE FROM gpt_interactions;" # truncate_job_listings = "DELETE FROM job_listings;" try: # Execute the SQL commands cursor.execute(truncate_gpt_interactions) # cursor.execute(truncate_job_listings) # Commit the changes conn.commit() print("Tables truncated successfully.") except sqlite3.Error as e: print(f"An error occurred: {e}") finally: # Close the connection conn.close() # Call the function to truncate tables truncate_tables(db_path) ================================================ FILE: tests/__init__.py ================================================ ================================================ FILE: tests/test_workday_scraper.py ================================================ import pytest from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.chrome.service import Service from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from webdriver_manager.chrome import ChromeDriverManager from selenium.webdriver.chrome.options import Options from selenium.common.exceptions import TimeoutException from job_scraper.scraper_selectors.workday_selectors import WorkDaySelectors from job_scraper.utils import get_workday_company_urls @pytest.fixture(scope="module") def selenium_driver(): chrome_options = Options() chrome_options.add_argument("--headless") chrome_options.add_argument("--no-sandbox") chrome_options.add_argument("--disable-dev-shm-usage") chrome_options.add_argument("--disable-gpu") driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options) yield driver driver.quit() @pytest.mark.parametrize( "company_name, url", list(get_workday_company_urls().items()) ) def test_job_listing_xpath_present(selenium_driver, company_name, url): selenium_driver.get(url) wait = WebDriverWait(selenium_driver, 10) try: wait.until(EC.presence_of_element_located((By.XPATH, WorkDaySelectors.JOB_LISTING_XPATH))) except TimeoutException: pytest.fail(f"FAIL: JOB_LISTING_XPATH not found for {company_name}")