Repository: gleitz/howdoi
Branch: master
Commit: 033dd3ced067
Files: 72
Total size: 227.9 KB

Directory structure:
gitextract_bgds9lt_/

├── .flake8
├── .flake8rc
├── .gitattributes
├── .github/
│   ├── ISSUE_TEMPLATE/
│   │   ├── bug-report.md
│   │   ├── documentation.md
│   │   ├── feature.md
│   │   ├── help-and-asking-questions.md
│   │   └── miscellaneous.md
│   ├── PULL_REQUEST_TEMPLATE.md
│   └── workflows/
│       ├── docs.yml
│       ├── early-warning.yml
│       ├── node.js.yml
│       ├── python-non-master.yml
│       └── python.yml
├── .gitignore
├── .mypy.ini
├── .pre-commit-config.yaml
├── .pylintrc
├── CHANGES.txt
├── LICENSE.txt
├── MANIFEST.in
├── README.md
├── docs/
│   ├── contributing.md
│   ├── contributing_docs.md
│   ├── contributing_to_howdoi.md
│   ├── development_env.md
│   ├── extension_dev.md
│   ├── howdoi_advanced_usage.md
│   ├── index.md
│   ├── introduction.md
│   ├── troubleshooting.md
│   ├── usage.md
│   └── windows-contributing.md
├── extension/
│   ├── code-editor-integration/
│   │   ├── .eslintignore
│   │   ├── .eslintrc.json
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── package.json
│   │   ├── src/
│   │   │   ├── create_attributes.ts
│   │   │   ├── find_attributes.ts
│   │   │   ├── plugin.ts
│   │   │   ├── plugin_interfaces.ts
│   │   │   ├── remove_regexes.ts
│   │   │   └── test/
│   │   │       └── plugin.test.ts
│   │   └── tsconfig.json
│   ├── vscode-howdoi/
│   │   ├── .eslintrc
│   │   ├── .gitignore
│   │   ├── .vscodeignore
│   │   ├── CHANGELOG.md
│   │   ├── LICENSE.txt
│   │   ├── README.md
│   │   ├── package.json
│   │   ├── src/
│   │   │   ├── extension.ts
│   │   │   └── test/
│   │   │       ├── runTest.ts
│   │   │       └── suite/
│   │   │           ├── extension.test.ts
│   │   │           └── index.ts
│   │   └── tsconfig.json
│   └── vscode-pkg/
│       ├── README.md
│       └── howdoi-0.0.1.vsix
├── fastentrypoints.py
├── howdoi/
│   ├── __init__.py
│   ├── __main__.py
│   ├── errors.py
│   └── howdoi.py
├── mkdocs.yml
├── notebooks/
│   └── language_labelling.ipynb
├── requirements/
│   ├── common.txt
│   ├── dev.txt
│   └── prod.txt
├── requirements.txt
├── setup.py
└── test_howdoi.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .flake8
================================================
[flake8]
max-line-length = 119
ignore = E9,F63,F7,F82,E402


================================================
FILE: .flake8rc
================================================
[flake8]
max-line-length = 119
ignore = E9,F63,F7,F82,E402


================================================
FILE: .gitattributes
================================================
cache_html/* linguist-vendored
*.html linguist-language=Python
*.ipynb linguist-documentation


================================================
FILE: .github/ISSUE_TEMPLATE/bug-report.md
================================================
---
name: Bug Report
about: Report a bug encountered with howdoi
labels: bug
---

#### What happened:

#### What you expected to happen:

#### Output with `--explain`
Please provide the output of running the same command with the `--explain` flag like so

```
$ howdoi YOUR_COMMAND --explain
```

================================================
FILE: .github/ISSUE_TEMPLATE/documentation.md
================================================
---
name: Documentation
about: If your issue is related to creating new or fixing the existing documentation.
title: ''
labels: ''
assignees: joeyouss

---

### Description 
(Describe in detail what you want to work on)

### Additional Comments (if any)
### Have you read the [Contributing Guidelines on Pull Requests]?

(Write your answer here.)


================================================
FILE: .github/ISSUE_TEMPLATE/feature.md
================================================
---
name: Feature
about: If your issue suggests/proposes new features which improve howdoi
title: ''
labels: ''
assignees: ''

---

### Description 
(Describe in detail what you want to work on)

### Additional Comments (if any)
### Have you read the Contributing Guidelines on Pull Requests on mkdocs?

(Write your answer here.)


================================================
FILE: .github/ISSUE_TEMPLATE/help-and-asking-questions.md
================================================
---
name: Help and asking questions
about: If you are stuck and want to ask the howdoi maintainers for help
title: ''
labels: ''
assignees: ''

---

### Description 
(Describe in detail what you are working on)

### Help wanted for
(Describe what help you need and what are the problems you are facing currently)

### Additional Comments (if any)
### Have you read the Contributing Guidelines on Pull Requests on mkdocs?

(Write your answer here.)


================================================
FILE: .github/ISSUE_TEMPLATE/miscellaneous.md
================================================
---
name: Miscellaneous
about: If your issue does not fit in other listed categories.
title: ''
labels: ''
assignees: ''

---

### Description 
(Describe in detail what your issue is related to)

### Additional Comments (if any)
### Have you read the Contributing Guidelines on Pull Requests on mkdocs?

(Write your answer here.)


================================================
FILE: .github/PULL_REQUEST_TEMPLATE.md
================================================
## Description:

- Tasks solved

- Links to issues solved

## Pull Request type:

- Bug fixes
- New feature
- Improvement
- Refactoring
- Documentation update
- Security fix

## How to test:

Please provide detailed instructions for testing your changes locally, including expected response/behavior.

## Pull Request checklist:

- [ ] Read the [contributing_to_howdoi.md](https://github.com/gleitz/howdoi/blob/master/docs/contributing_to_howdoi.md)
- [ ] Attach screenshots of expected behavior.
- [ ] The changes pass tests locally (`nose2`).
- [ ] There are no linting errors (`python setup.py lint`).
- [ ] The changes don't break existing features.
- [ ] Check that there are no confidential files like `.env` included.
- [ ] Request review from the maintainers.
- [ ] For bug fixes or changes to directory structure, make sure docs are updated.

## Known bugs (if any):

If there are bugs in your current changes you can still open the PR and mention the bugs you found. Propose further changes that can help fix bugs in your current changes.


================================================
FILE: .github/workflows/docs.yml
================================================
# This is a basic workflow to help you get started with Actions

name: MkDocs CI

on:
  push:
    branches: [ master, docs ]

jobs:
  build:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v2

    - name: Set up Python
      uses: actions/setup-python@v2
      with:
        python-version: '3.11'

    - name: Install Pip
      run:  python -m pip install --upgrade pip

    - name: Install dependencies
      run: pip install mkdocs && pip install mkdocs-material markdown-include

    - name: Build docs
      run:  python -m mkdocs build

    - name: Deploy MkDocs
      uses: mhausenblas/mkdocs-deploy-gh-pages@1.17
      env:
        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        EXTRA_PACKAGES: "gcc libxml2-dev libxslt-dev musl-dev"


================================================
FILE: .github/workflows/early-warning.yml
================================================
name: Early Warning Notifier

on:
  push:
    branches: master
  release:
    types: [published]
  schedule:
    - cron: "0 0 * * *"

jobs:
  sanity-check:
    runs-on: ubuntu-latest
    name: Sanity Check
    steps:
      - uses: actions/checkout@v2
      - uses: actions/setup-python@v2
        with:
          python-version: '3.11'

      - name: Setup environment
        run: pip install -r requirements.txt

      - name: Perform check
        run: python -m howdoi --sanity-check --explain

      - name: Notify if check does not pass
        if: ${{ failure() }}
        uses: dawidd6/action-send-mail@v2
        with:
          # mail server settings
          server_address: ${{ secrets.EMAIL_SERVER_ADDRESS }}
          server_port: ${{ secrets.EMAIL_SERVER_PORT }}
          # user credentials
          username: ${{ secrets.EMAIL_USERNAME }}
          password: ${{ secrets.EMAIL_PASSWORD }}
          # email subject
          subject: ${{ github.job }} job of ${{ github.repository }} has ${{ job.status }}
          # email body as text
          body: ${{ github.job }} job in worflow ${{ github.workflow }} of ${{ github.repository }} has status ${{ job.status }}
          # comma-separated string, send email to
          to: gleitz@mit.edu
          # from email name
          from: Howdoi Early Warning System


================================================
FILE: .github/workflows/node.js.yml
================================================
# This workflow will do a clean install of node dependencies, build the source code and run tests across different versions of node
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-nodejs-with-github-actions

name: Node.js CI

on:
  push:
    branches: [ master, vscode-ext]
    paths: [ 'extension/**']
  pull_request:
    branches: [ master ]
    paths: [ 'extension/**']
  
jobs:
  
    job_1:
      name: Plugin

      runs-on: macos-latest

      strategy:
        matrix:
          node-version: [10.x, 12.x, 14.x]
      
      steps:
        - uses: actions/checkout@v2
        - name: Use Node.js ${{ matrix.node-version }}
          uses: actions/setup-node@v1
          with:
            node-version: ${{ matrix.node-version }}
        - run: |
            npm ci
            npm run build --if-present
            npm run lint
            npm test
          working-directory: ./extension/code-editor-integration
    
    job_2:
      name: Vscode Ext

      runs-on: macos-latest

      strategy:
        matrix:
          node-version: [10.x, 12.x, 14.x]

      steps:
        - uses: actions/checkout@v2
        - name: Use Node.js ${{ matrix.node-version }}
          uses: actions/setup-node@v1
          with:
            node-version: ${{ matrix.node-version }}
        - run: |
            npm ci
            npm run build --if-present
            npm run precompile
            npm run lint
            npm test
          working-directory: ./extension/vscode-howdoi


================================================
FILE: .github/workflows/python-non-master.yml
================================================
name: Python CI (branches)

on:
  push:
    branches:
      - '*'
      - '!master'
  pull_request:
    branches:
      - 'master'

jobs:
  build:

    if: github.ref != 'refs/heads/master'
    runs-on: ubuntu-latest
    strategy:
      matrix:
        python-version: [3.7, 3.8, 3.9, '3.10', '3.11']

    steps:
    - uses: actions/checkout@v2
    - name: Set up Python ${{ matrix.python-version }}
      uses: actions/setup-python@v2
      with:
        python-version: ${{ matrix.python-version }}
    - name: Install dependencies
      run: |
        python -m pip install --upgrade pip
        if [ -f requirements/dev.txt ]; then pip install -r requirements/dev.txt; fi
    - name: Lint with flake8
      run: |
        flake8 . --count --show-source --statistics
    - name: Lint with pylint
      run: |
        pylint howdoi *.py --rcfile=.pylintrc
    - name: Test with nose
      run: |
        nose2


================================================
FILE: .github/workflows/python.yml
================================================
name: Python CI

on:
  push:
    branches:
      - master

jobs:
  build:

    if: github.ref == 'refs/heads/master'
    runs-on: ubuntu-latest
    strategy:
      matrix:
        python-version: [3.7, 3.8, 3.9, '3.10', '3.11']

    steps:
    - uses: actions/checkout@v2
    - name: Set up Python ${{ matrix.python-version }}
      uses: actions/setup-python@v2
      with:
        python-version: ${{ matrix.python-version }}
    - name: Install dependencies
      run: |
        python -m pip install --upgrade pip
        if [ -f requirements/dev.txt ]; then pip install -r requirements/dev.txt; fi
    - name: Lint with flake8
      run: |
        flake8 . --count --show-source --statistics
    - name: Lint with pylint
      run: |
        pylint howdoi *.py --rcfile=.pylintrc
    - name: Test with nose
      run: |
        nose2


================================================
FILE: .gitignore
================================================

# Created by https://www.gitignore.io/api/python,osx,windows,linux,pycharm

### Python ###
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
.idea/
# C extensions
*.so

# Distribution / packaging
.Python
env/
site/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
.hypothesis/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py

# Flask instance folder
instance/

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# IPython Notebook
.ipynb_checkpoints

# pyenv
.python-version


### OSX ###
.DS_Store
.AppleDouble
.LSOverride

# Icon must end with two \r
Icon


# Thumbnails
._*

# Files that might appear in the root of a volume
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns

# Directories potentially created on remote AFP share
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk


### Windows ###
# Windows image file caches
Thumbs.db
ehthumbs.db

# Folder config file
Desktop.ini

# Recycle Bin used on file shares
$RECYCLE.BIN/

# Windows Installer files
*.cab
*.msi
*.msm
*.msp

# Windows shortcuts
*.lnk


### Linux ###
*~

# temporary files which can be created if a process still has a handle open of a deleted file
.fuse_hidden*

# KDE directory preferences
.directory

# Linux trash folder which might appear on any partition or disk
.Trash-*


### PyCharm ###
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839

# User-specific stuff:
.idea/workspace.xml
.idea/tasks.xml
.idea/dictionaries
.idea/vcs.xml
.idea/jsLibraryMappings.xml
.vscode

# Sensitive or high-churn files:
.idea/dataSources.ids
.idea/dataSources.xml
.idea/sqlDataSources.xml
.idea/dynamic.xml
.idea/uiDesigner.xml

# Gradle:
.idea/gradle.xml
.idea/libraries

# Mongo Explorer plugin:
.idea/mongoSettings.xml

## File-based project format:
*.iws

## Plugin-specific files:

# IntelliJ
/out/

# mpeltonen/sbt-idea plugin
.idea_modules/

# JIRA plugin
atlassian-ide-plugin.xml

# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties

### Miscellany ###
.ropeproject
README.html
.idea
HOW_TO_RELEASE.txt
.mypy_cache/

================================================
FILE: .mypy.ini
================================================
[mypy]
ignore_missing_imports = True

================================================
FILE: .pre-commit-config.yaml
================================================
repos:
  - repo: https://github.com/pycqa/flake8
    rev: 3.9.2
    hooks:
      - id: flake8
        args:
          - "--config=.flake8rc"

  - repo: https://github.com/PyCQA/pylint/
    rev: v2.15.10
    hooks:
      - id: pylint
        args:
          - "howdoi *.py --rcfile=.pylintrc"


================================================
FILE: .pylintrc
================================================
[MASTER]

# A comma-separated list of package or module names from where C extensions may
# be loaded. Extensions are loading into the active Python interpreter and may
# run arbitrary code.
extension-pkg-whitelist=

# Add files or directories to the blacklist. They should be base names, not
# paths.
ignore=CVS

# Add files or directories matching the regex patterns to the blacklist. The
# regex matches against base names, not paths.
ignore-patterns=

# Python code to execute, usually for sys.path manipulation such as
# pygtk.require().
#init-hook=

# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
# number of processors available to use.
jobs=1

# Control the amount of potential inferred values when inferring a single
# object. This can help the performance when dealing with large functions or
# complex, nested conditions.
limit-inference-results=100

# List of plugins (as comma separated values of python modules names) to load,
# usually to register additional checkers.
load-plugins=

# Pickle collected data for later comparisons.
persistent=yes

# Specify a configuration file.
#rcfile=

# When enabled, pylint would attempt to guess common misconfiguration and emit
# user-friendly hints instead of false-positive error messages.
suggestion-mode=yes

# Allow loading of arbitrary C extensions. Extensions are imported into the
# active Python interpreter and may run arbitrary code.
unsafe-load-any-extension=no


[MESSAGES CONTROL]

# Only show warnings with the listed confidence levels. Leave empty to show
# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED.
confidence=

# Disable the message, report, category or checker with the given id(s). You
# can either give multiple identifiers separated by comma (,) or put this
# option multiple times (only on the command line, not in the configuration
# file where it should appear only once). You can also use "--disable=all" to
# disable everything first and then reenable specific checks. For example, if
# you want to run only the similarities checker, you can use "--disable=all
# --enable=similarities". If you want to run only the classes checker, but have
# no Warning level messages displayed, use "--disable=all --enable=classes
# --disable=W".
disable=C0413,C0111,C0115,C0116

# Enable the message, report, category or checker with the given id(s). You can
# either give multiple identifier separated by comma (,) or put this option
# multiple time (only on the command line, not in the configuration file where
# it should appear only once). See also the "--disable" option for examples.
enable=c-extension-no-member


[REPORTS]

# Python expression which should return a note less than 10 (10 is the highest
# note). You have access to the variables errors warning, statement which
# respectively contain the number of errors / warnings messages and the total
# number of statements analyzed. This is used by the global evaluation report
# (RP0004).
evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)

# Template used to display messages. This is a python new-style format string
# used to format the message information. See doc for all details.
#msg-template=

# Set the output format. Available formats are text, parseable, colorized, json
# and msvs (visual studio). You can also give a reporter class, e.g.
# mypackage.mymodule.MyReporterClass.
output-format=text

# Tells whether to display a full report or only the messages.
reports=no

# Activate the evaluation score.
score=yes


[REFACTORING]

# Maximum number of nested blocks for function / method body
max-nested-blocks=5

# Complete name of functions that never returns. When checking for
# inconsistent-return-statements if a never returning function is called then
# it will be considered as an explicit return statement and no message will be
# printed.
never-returning-functions=sys.exit


[LOGGING]

# Logging modules to check that the string format arguments are in logging
# function parameter format.
logging-modules=logging


[SPELLING]

# Limits count of emitted suggestions for spelling mistakes.
max-spelling-suggestions=4

# Spelling dictionary name. Available dictionaries: none. To make it working
# install python-enchant package..
spelling-dict=

# List of comma separated words that should not be checked.
spelling-ignore-words=

# A path to a file that contains private dictionary; one word per line.
spelling-private-dict-file=

# Tells whether to store unknown words to indicated private dictionary in
# --spelling-private-dict-file option instead of raising a message.
spelling-store-unknown-words=no


[MISCELLANEOUS]

# List of note tags to take in consideration, separated by a comma.
notes=FIXME,
      XXX,
      TODO


[TYPECHECK]

# List of decorators that produce context managers, such as
# contextlib.contextmanager. Add to this list to register other decorators that
# produce valid context managers.
contextmanager-decorators=contextlib.contextmanager

# List of members which are set dynamically and missed by pylint inference
# system, and so shouldn't trigger E1101 when accessed. Python regular
# expressions are accepted.
generated-members=

# Tells whether missing members accessed in mixin class should be ignored. A
# mixin class is detected if its name ends with "mixin" (case insensitive).
ignore-mixin-members=yes

# Tells whether to warn about missing members when the owner of the attribute
# is inferred to be None.
ignore-none=yes

# This flag controls whether pylint should warn about no-member and similar
# checks whenever an opaque object is returned when inferring. The inference
# can return multiple potential results while evaluating a Python object, but
# some branches might not be evaluated, which results in partial inference. In
# that case, it might be useful to still emit no-member and other checks for
# the rest of the inferred objects.
ignore-on-opaque-inference=yes

# List of class names for which member attributes should not be checked (useful
# for classes with dynamically set attributes). This supports the use of
# qualified names.
ignored-classes=optparse.Values,thread._local,_thread._local

# List of module names for which member attributes should not be checked
# (useful for modules/projects where namespaces are manipulated during runtime
# and thus existing member attributes cannot be deduced by static analysis. It
# supports qualified module names, as well as Unix pattern matching.
ignored-modules=

# Show a hint with possible names when a member name was not found. The aspect
# of finding the hint is based on edit distance.
missing-member-hint=yes

# The minimum edit distance a name should have in order to be considered a
# similar match for a missing member name.
missing-member-hint-distance=1

# The total number of similar names that should be taken in consideration when
# showing a hint for a missing member.
missing-member-max-choices=1


[VARIABLES]

# List of additional names supposed to be defined in builtins. Remember that
# you should avoid to define new builtins when possible.
additional-builtins=

# Tells whether unused global variables should be treated as a violation.
allow-global-unused-variables=yes

# List of strings which can identify a callback function by name. A callback
# name must start or end with one of those strings.
callbacks=cb_,
          _cb

# A regular expression matching the name of dummy variables (i.e. expected to
# not be used).
dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_

# Argument names that match this expression will be ignored. Default to name
# with leading underscore.
ignored-argument-names=_.*|^ignored_|^unused_

# Tells whether we should check for unused import in __init__ files.
init-import=no

# List of qualified module names which can have objects that can redefine
# builtins.
redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io


[FORMAT]

# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
expected-line-ending-format=

# Regexp for a line that is allowed to be longer than the limit.
ignore-long-lines=^\s*(# )?<?https?://\S+>?$

# Number of spaces of indent required inside a hanging  or continued line.
indent-after-paren=4

# String used as indentation unit. This is usually "    " (4 spaces) or "\t" (1
# tab).
indent-string='    '

# Maximum number of characters on a single line.
max-line-length=119

# Maximum number of lines in a module.
max-module-lines=1000

# Allow the body of a class to be on the same line as the declaration if body
# contains single statement.
single-line-class-stmt=no

# Allow the body of an if to be on the same line as the test if there is no
# else.
single-line-if-stmt=no


[SIMILARITIES]

# Ignore comments when computing similarities.
ignore-comments=yes

# Ignore docstrings when computing similarities.
ignore-docstrings=yes

# Ignore imports when computing similarities.
ignore-imports=no

# Minimum lines number of a similarity.
min-similarity-lines=4


[BASIC]

# Naming style matching correct argument names.
argument-naming-style=snake_case

# Regular expression matching correct argument names. Overrides argument-
# naming-style.
#argument-rgx=

# Naming style matching correct attribute names.
attr-naming-style=snake_case

# Regular expression matching correct attribute names. Overrides attr-naming-
# style.
#attr-rgx=

# Bad variable names which should always be refused, separated by a comma.
bad-names=foo,
          bar,
          baz,
          toto,
          tutu,
          tata

# Naming style matching correct class attribute names.
class-attribute-naming-style=any

# Regular expression matching correct class attribute names. Overrides class-
# attribute-naming-style.
#class-attribute-rgx=

# Naming style matching correct class names.
class-naming-style=PascalCase

# Regular expression matching correct class names. Overrides class-naming-
# style.
#class-rgx=

# Naming style matching correct constant names.
const-naming-style=UPPER_CASE

# Regular expression matching correct constant names. Overrides const-naming-
# style.
#const-rgx=

# Minimum line length for functions/classes that require docstrings, shorter
# ones are exempt.
docstring-min-length=-1

# Naming style matching correct function names.
function-naming-style=snake_case

# Regular expression matching correct function names. Overrides function-
# naming-style.
#function-rgx=

# Good variable names which should always be accepted, separated by a comma.
good-names=i,
           j,
           k,
           ex,
           Run,
           _,
           f

# Include a hint for the correct naming format with invalid-name.
include-naming-hint=no

# Naming style matching correct inline iteration names.
inlinevar-naming-style=any

# Regular expression matching correct inline iteration names. Overrides
# inlinevar-naming-style.
#inlinevar-rgx=

# Naming style matching correct method names.
method-naming-style=snake_case

# Regular expression matching correct method names. Overrides method-naming-
# style.
#method-rgx=

# Naming style matching correct module names.
module-naming-style=snake_case

# Regular expression matching correct module names. Overrides module-naming-
# style.
#module-rgx=

# Colon-delimited sets of names that determine each other's naming style when
# the name regexes allow several styles.
name-group=

# Regular expression which should only match function or class names that do
# not require a docstring.
no-docstring-rgx=^_

# List of decorators that produce properties, such as abc.abstractproperty. Add
# to this list to register other decorators that produce valid properties.
# These decorators are taken in consideration only for invalid-name.
property-classes=abc.abstractproperty

# Naming style matching correct variable names.
variable-naming-style=snake_case

# Regular expression matching correct variable names. Overrides variable-
# naming-style.
#variable-rgx=


[IMPORTS]

# Allow wildcard imports from modules that define __all__.
allow-wildcard-with-all=no

# Analyse import fallback blocks. This can be used to support both Python 2 and
# 3 compatible code, which means that the block might have code that exists
# only in one or another interpreter, leading to false positives when analysed.
analyse-fallback-blocks=no

# Deprecated modules which should not be used, separated by a comma.
deprecated-modules=optparse,tkinter.tix

# Create a graph of external dependencies in the given file (report RP0402 must
# not be disabled).
ext-import-graph=

# Create a graph of every (i.e. internal and external) dependencies in the
# given file (report RP0402 must not be disabled).
import-graph=

# Create a graph of internal dependencies in the given file (report RP0402 must
# not be disabled).
int-import-graph=

# Force import order to recognize a module as part of the standard
# compatibility libraries.
known-standard-library=

# Force import order to recognize a module as part of a third party library.
known-third-party=enchant


[CLASSES]

# List of method names used to declare (i.e. assign) instance attributes.
defining-attr-methods=__init__,
                      __new__,
                      setUp

# List of member names, which should be excluded from the protected access
# warning.
exclude-protected=_asdict,
                  _fields,
                  _replace,
                  _source,
                  _make

# List of valid names for the first argument in a class method.
valid-classmethod-first-arg=cls

# List of valid names for the first argument in a metaclass class method.
valid-metaclass-classmethod-first-arg=cls


[DESIGN]

# Maximum number of arguments for function / method.
max-args=5

# Maximum number of attributes for a class (see R0902).
max-attributes=7

# Maximum number of boolean expressions in an if statement.
max-bool-expr=5

# Maximum number of branch for function / method body.
max-branches=12

# Maximum number of locals for function / method body.
max-locals=15

# Maximum number of parents for a class (see R0901).
max-parents=7

# Maximum number of public methods for a class (see R0904).
max-public-methods=20

# Maximum number of return / yield for function / method body.
max-returns=6

# Maximum number of statements in function / method body.
max-statements=50

# Minimum number of public methods for a class (see R0903).
min-public-methods=2


[EXCEPTIONS]

# Exceptions that will emit a warning when being caught. Defaults to
# "Exception".
overgeneral-exceptions=Exception


================================================
FILE: CHANGES.txt
================================================
2.0.20
------
-  Update dependency versions
-  Add support for Python 3.10

2.0.19
------
-  Fix typo

2.0.18
------
-  Fixed issue with howdoi cache where cache misses would be printed to the console

2.0.17
------
-  New documentation and mkdocs
-  Fixed issue with how howdoi chooses the proper search engine (command line flags now override environment variables)
-  Added a search engine fallback if one of the search engines fails
-  Fixed issue with howdoi cache

2.0.16
------
-  Fix GDPR issue for those using howdoi in countries outside the US
-  Better support for using `HOWDOI_URL`

2.0.15
------
-  Add explainability with `-x` or `--explain` options
-  Better error checking for when search engines block queries
-  Using improved DuckDuckGo endpoint
-  Answer pages now fetched in parallel for speed improvement

2.0.14
------
-  Fix a number of bugs by switching from parsing Google links to looking for URLs instead

2.0.13
------
-  More permanent fix for extracting Google links

2.0.12
------
-  Hotfix for Google link formatting

2.0.11
------
-  Hotfix for Google link formatting

2.0.10
------
-  Hotfix for new Google classnames
-  Separate requirements.txt files for prod and dev

2.0.9
------
-  Cleaner command line options that also include environment variables
-  README updates

2.0.8
------
-  Fix issue for answers that have no code in the answer but code in the comments
-  Add range checks for -n and -p flags
-  Moved from Travis to Github Actions
-  Dropped Python 2.7 support

2.0.7
------
-  Update for new Google CSS style

2.0.6
------
-  Fix issue where `-a` would not return a proper response due to updated CSS on StackOverflow

2.0.5
------
-  New logo and colors!

2.0.4
------
-  Cachelib rollback to support Python 2.7
-  Better error message when Google is being blocked (for example in China)

2.0.3
------
-  Bring back Python 2.7 support (for now)

2.0.2
------
-  Fixed keep support for stashing and viewing answers

2.0.1
------
-  Added JSON output with the -j flag (great for consuming howdoi results for use in other apps)
-  Added stashing ability for saving useful answer for later (based on https://github.com/OrkoHunter/keep)
-  Added caching for tests to prevent being rate limited by Google while developing
-  Added easier method for calling howdoi when imported (howdoi.howdoi)

1.2.1
------
-  Fix dependency issue

1.2.0
------
-  Massive speed improvements of startup, answer fetching, and caching
-  Command line flags for alternate search engines
-  Remove duplicate answers

1.1.14
------
-  Links displayed with markdown syntax
-  Improved performance and caching (again)

1.1.13
------
-  Improved performance and caching
-  More friendly answer display
-  Added support for Python 3.6
-  Removed support for Python 2.6

1.1.12
------
-  Add additional search engine support

1.1.11
------
-  Fix issue with UTF-8 encoding

1.1.10
------
-  Include the link in output when asking for >1 answer
-  Compatibility with linuxbrew

1.1.9
------
-  Fix issue with upload to PyPI

1.1.8
------
-  Fix colorization when HOWDOI_COLORIZE env variable is enabled
-  Fix certificate validation when SSL disabled

1.1.7
------
-  Add Localization support with HOWDOI_LOCALIZATION env variable (Currently only pt-br and en)

1.1.6
------
-  Updates for Python3
-  Updates for caching

1.1.5
------
-  Updates for Python3
-  Fix issues with cache
-  Allow disabling SSL when accessing Google

1.1.4
------
-  Added caching

1.1.3
------
-  Added fix to handle change in Google search page HTML
-  Updated Travis CI tests

1.1.2
------
-  Compatibility fixes for Python3.2
-  Travis CI tests now being run for Python 2.6, 2.7, 3.2, and 3.3

1.1.1
------
-  Added message when question has no answer

1.1
------
-  Added multiple answers with -n/--num-answers flag
-  Added colorized output with -c/--color flag
-  Added answer link to the bottom of questions with -a/--all flag
-  Unit tests now managed through Travis CI

1.0
------
-  Added support for Python3
-  Switched to the requests library instead of urllib2
-  Project status changed to Production/Stable
-  Added troubleshooting steps to the README

0.2
------
-  Added sane flags
-  Now using ``/usr/bin/env python`` instead of ``/usr/bin/python``
-  Updated README for brew installation instructions

0.1.2
------
-  Added Windows executable
-  Updated README for pip installation instructions

0.1.1
------
-  Added to PyPI

0.1
------
-  We're doing it live!


================================================
FILE: LICENSE.txt
================================================
Copyright (C) 2012 Benjamin Gleitzman (gleitz@mit.edu)

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


================================================
FILE: MANIFEST.in
================================================
include LICENSE.txt
include README.md
include CHANGES.txt
include fastentrypoints.py
include requirements.txt
include test_howdoi.py
exclude howdoi.rb
exclude .*rc
prune page_cache
prune notebooks


================================================
FILE: README.md
================================================
<p align="center">
    <a href="https://pypi.python.org/pypi/howdoi">
        <img src="https://www.dropbox.com/s/dk13iy2uoufdwr7/HowDoIcolor512.png?raw=1" alt="Sherlock, your neighborhood command-line sloth sleuth" />
    </a>
</p>
<h1 align="center">howdoi</h1>
<h2 align="center">Instant coding answers via the command line</h2>
<p align="center"><strong>⚡ Never open your browser to look for help again ⚡</strong></p>

<p align="center">
    <a href="https://github.com/gleitz/howdoi/actions?query=workflow%3A%22Python+CI%22"><img src="https://img.shields.io/github/actions/workflow/status/gleitz/howdoi/python.yml?style=plastic&color=78dce8" alt="build status"></a>
    <a href="https://pepy.tech/project/howdoi"><img src="https://img.shields.io/pypi/dm/howdoi?style=plastic&color=ab9df2&maxAge=86400&label=downloads&query=%24.total_downloads&url=https%3A%2F%2Fapi.pepy.tech%2Fapi%2Fprojects%2Fhowdoi" alt="downloads"></a>
    <a href="https://pypi.python.org/pypi/howdoi"><img src="https://img.shields.io/pypi/pyversions/howdoi.svg?style=plastic&color=ff6188" alt="Python versions"></a>
</p>

------------------------------------------------------------------------

## Introduction to howdoi

Are you a hack programmer? Do you find yourself constantly Googling for
how to do basic programming tasks?

Suppose you want to know how to format a date in bash. Why open your
browser and read through blogs (risking major distraction) when you can
simply stay in the console and ask howdoi:

    $ howdoi format date bash
    > DATE=`date +%Y-%m-%d`

howdoi will answer all sorts of queries:

    $ howdoi print stack trace python
    > import traceback
    >
    > try:
    >     1/0
    > except:
    >     print '>>> traceback <<<'
    >     traceback.print_exc()
    >     print '>>> end of traceback <<<'
    > traceback.print_exc()

    $ howdoi convert mp4 to animated gif
    > video=/path/to/video.avi
    > outdir=/path/to/output.gif
    > mplayer "$video" \
    >         -ao null \
    >         -ss "00:01:00" \  # starting point
    >         -endpos 10 \ # duration in second
    >         -vo gif89a:fps=13:output=$outdir \
    >         -vf scale=240:180

    $ howdoi create tar archive
    > tar -cf backup.tar --exclude "www/subf3" www

[![image](http://imgs.xkcd.com/comics/tar.png)](https://xkcd.com/1168/)

## Installation

    pip install howdoi

or

    brew install howdoi

## Usage

### New to howdoi?

    howdoi howdoi

### RTFM

-   [Introduction and
    installation](http://gleitz.github.io/howdoi/introduction/)
-   [Usage](http://gleitz.github.io/howdoi/usage/)
-   [Contributing to
    howdoi](http://gleitz.github.io/howdoi/contributing_to_howdoi/)
-   [Advanced
    usage](http://gleitz.github.io/howdoi/howdoi_advanced_usage/)
-   [Troubleshooting](http://gleitz.github.io/howdoi/troubleshooting/)

### Commands

    usage: howdoi [-h] [-p POS] [-n NUM] [-a] [-l] [-c] [-x] [-C] [-j] [-v] [-e [ENGINE]]
    [--save] [--view] [--remove] [--empty] [QUERY ...]

    instant coding answers via the command line

    positional arguments:
      QUERY                 the question to answer

    optional arguments:
      -h, --help            show this help message and exit
      -p POS, --pos POS     select answer in specified position (default: 1)
      -n NUM, --num NUM     number of answers to return (default: 1)
      -a, --all             display the full text of the answer
      -l, --link            display only the answer link
      -c, --color           enable colorized output
      -x, --explain         explain how answer was chosen
      -C, --clear-cache     clear the cache
      -j, --json            return answers in raw json format
      -v, --version         display the current version of howdoi
      -e [ENGINE], --engine [ENGINE]
                            search engine for this query (google, bing, duckduckgo)
      --save, --stash       stash a howdoi answer
      --view                view your stash
      --remove              remove an entry in your stash
      --empty               empty your stash

    environment variable examples:
      HOWDOI_COLORIZE=1
      HOWDOI_DISABLE_CACHE=1
      HOWDOI_DISABLE_SSL=1
      HOWDOI_SEARCH_ENGINE=google
      HOWDOI_URL=serverfault.com

Using the howdoi stashing feature (for more advanced features view the
[keep documentation](https://github.com/OrkoHunter/keep)).

    stashing: howdoi --save QUERY
    viewing:  howdoi --view
    removing: howdoi --remove (will be prompted which answer to delete)
    emptying: howdoi --empty (empties entire stash, will be prompted to confirm)

As a shortcut, if you commonly use the same parameters each time and
don\'t want to type them, add something similar to your .bash_profile
(or otherwise). This example gives you 5 colored results each time.

    alias h='function hdi(){ howdoi $* -c -n 5; }; hdi'

And then to run it from the command line simply type:

    $ h format date bash

You can also search other [StackExchange
properties](https://stackexchange.com/sites#traffic) for answers:

    HOWDOI_URL=cooking.stackexchange.com howdoi make pesto

or as an alias:

    alias hcook='function hcook(){ HOWDOI_URL=cooking.stackexchange.com howdoi $* ; }; hcook'
    hcook make pesto

Other useful aliases:

    alias hless='function hdi(){ howdoi $* -c | less --raw-control-chars --quit-if-one-screen --no-init; }; hdi'

## Integrations

-   Slack integration available through
    [slack-howdoi](https://github.com/ellisonleao/slack-howdoi)
-   Telegram integration available through
    [howdoi-telegram](https://github.com/aahnik/howdoi-telegram)
-   Discord integration available through
    [discord-howdoi](https://github.com/MLH-Fellowship/0.5.1-howDoIDiscord)
-   Emacs integration available through
    [emacs-howdoi](https://blog.gleitzman.com/post/700738401851277312/howdoi-use-howdoi-in-emacs)
-   VSCode integration available on the
    [marketplace](https://marketplace.visualstudio.com/items?itemName=howdoi-org.howdoi)
-   Alfred integration available through
    [alfred-howdoi](https://github.com/gleitz/alfred-howdoi)

## Contributors

-   Benjamin Gleitzman ([\@gleitz](http://twitter.com/gleitz))
-   Yanlam Ko ([\@YKo20010](https://github.com/YKo20010))
-   Diana Arreola ([\@diarreola](https://github.com/diarreola))
-   Eyitayo Ogunbiyi ([\@tayoogunbiyi](https://github.com/tayoogunbiyi))
-   Chris Nguyen ([\@chrisngyn](https://github.com/chrisngyn))
-   Shageldi Ovezov ([\@ovezovs](https://github.com/chrisngyn))
-   Mwiza Simbeye
    ([\@mwizasimbeye11](https://github.com/mwizasimbeye11))
-   Shantanu Verma ([\@SaurusXI](https://github.com/SaurusXI))
-   Sheza Munir ([\@ShezaMunir](https://github.com/ShezaMunir))
-   Jyoti Bisht ([\@joeyouss](https://github.com/joeyouss))
-   And [more!](https://github.com/gleitz/howdoi/graphs/contributors)

## How to contribute

We welcome contributions that make howdoi better and improve the
existing functionalities of the project. We have created a separate
[guide to contributing to
howdoi](http://gleitz.github.io/howdoi/contributing_to_howdoi/) that explains
how to get up and running with your first pull request.

## Notes

-   Works with Python 3.7 and newer. Unfortunately Python 2.7 support
    has been discontinued :(
-   Special thanks to Rich Jones
    ([\@miserlou](https://github.com/miserlou)) for the idea
-   More thanks to [Ben Bronstein](https://benbronstein.com/) for the
    logo


================================================
FILE: docs/contributing.md
================================================
## General guidelines

Be sure to go through these items before creating a new issue:

1. Check the [existing issues](https://github.com/gleitz/howdoi/issues) to see if anyone is already working or have already worken on your issue or a similar one.

2. If there are no current or past issues similar to yours, be sure to give a a **complete description** when creating it.

3. Wait for feedback on the issue before starting to work.

!!! tip
    Include instructions on how to reproduce the bug you found or specific use cases of a requested feature.

!!! Note
    Follow Github's [guide to collaborating efficiently](https://lab.github.com/githubtraining/introduction-to-github).


## Setting up development environment

Clone the git repository
```bash
$ git clone https://github.com/gleitz/howdoi.git
```

Setup and activate a virtual environment
```bash
$ python3 -m venv .venv
$ source .venv/bin/activate
```

Install packages
```bash
$ pip install -r requirements.txt
```

## Running howdoi

Run on the command-line
```bash
python -m howdoi QUERY
```

!!! note
    If you try running `python howdoi/howdoi.py` (without `-m`) you might get `ValueError: Attempted relative import in non-package`.

If you want to use howdoi from within a python script, just pass your query to `howdoi.howdoi()`

```python
from howdoi import howdoi

query = "for loop python"
output = howdoi.howdoi(query)
```

Or parse it yourself and passed the arguments to `howdoi.howdoi()`
```python
from howdoi import howdoi

query = "for loop python"
parser = howdoi.get_parser()
args = vars(parser.parse_args(query.split(' ')))

output = howdoi.howdoi(args)
```

!!! attention
    Parsing queries yourself is the older way to pass in queries and may be deprecated in the future. Prefer the first example.


## Submitting Pull Requests
Before PRs are accepted they must pass all [Travis tests](https://travis-ci.org/gleitz/howdoi) and not have any `flake8` or `pylint` warnings or errors.

#### Testing
Howdoi uses python's [`unittest`](https://docs.python.org/3/library/unittest.html) library for unit testing. Run the unit tests locally

```bash
$ python -m test_howdoi
```

It's also possible to run only specific tests

```bash
$ python -m unittest test_howdoi.TestClass.test_method
```

Make sure all tests pass before submitting a PR.

!!! tip
    Remmember to run the tests while inside the virtual environment (run `source .venv/bin/activate` to activate it).

#### Linting
Run linting locally with [`flake8`](https://flake8.pycqa.org/en/latest/)
```bash
$ flake8
```
Or [`pylint`](https://www.pylint.org/)
```bash
$ pylint *
```

!!! tip
    Howdoi uses vanilla configuration files for both linters (`.flake8rc` and `.pylintrc` in the root directory), but with a max line length of 119 characters.


## Documentation

To get started building the docs first download `mkdocs`

```bash
$ pip install mkdocs-material markdown-include
```

#### Commands

* `python -m mkdocs new [dir-name]` - Create a new project.
* `python -m mkdocs serve` - Start the live-reloading docs server.
* `python -m mkdocs build` - Build the documentation site.
* `python -m mkdocs help` - Print this help message.


#### Project layout

    mkdocs.yml    # The configuration file.
    docs/
        index.md  # The documentation homepage.
        ...       # Other markdown pages, images and other files.


#### Here are some example alerts you can use
These are from the [Adomonition](https://python-markdown.github.io/extensions/admonition/) extension

!!! attention
    attention alert

!!! caution
    caution alert

!!! warning
    warning alert

!!! danger
    danger alert

!!! error
    error alert

!!! hint
    hint alert

!!! important
    important alert

!!! tip
    tip alert

!!! note
    note alert

!!! Custom alert
    Custom alert

Alternatively you can use the `!!! type "Custom Title"` format to get the correct type emoji and use any title you want like so:

!!! tip "Tip type alert but with a custom title"
    they're good aren't they

#### Include source code in 1 line of code

To import code we can use this syntax inside of a code block with the language label:  "{\!path/to/file\!}".

Here's `../howdoi/__init__.py`

```Python
{!../howdoi/__init__.py!}
```

#### Here is a choice tab
Proper syntax highlighted code blocks in these don't work the way you'd think and I don't know how to get them to work normally without some extension

=== "Python"
    To do x in python use this code:

    ```python
    def main():
        print("Hello world")
    if __name__ == "__main__":
        main()
    ```

=== "Golang"
    To do x in golang use this code:

    ```go
    package main
    import "fmt"
    func main() {
        fmt.Println("Hello world")
    }
    ```


You can include the contents of a file
```Python
{!../howdoi/__init__.py!}
```


================================================
FILE: docs/contributing_docs.md
================================================
If you would like to improve the existing documentation, you can do so by using `mkdocs`. Howdoi uses mkdocs to render its documentation. Steps to contribute to docs:

- Every step from [Contributing to howdoi](http://gleitz.github.io/howdoi/contributing_to_howdoi/) remains the same with additional requirements of installing and building mkdocs.
- First, install mkdocs by running the following command:
  ```
  pip install mkdocs
  ```
- You can learn about mkdocs usage from [mkdocs documentation](https://www.mkdocs.org/user-guide/).
- You can propose your documentation by [creating a new issue](https://github.com/gleitz/howdoi/issues/new/choose).
- Once approved in the issue, you can create a PR with modifications to the mkdocs markdown.
- Next, create a new branch and go to the folder `howdoi/docs/` and add a .md file.
- Go to `mkdocs.yml` and add the name of your added .md file in `nav`
- To see the changes in your local server, go to your terminal and in this directory run :

```
   $ mkdocs build
   $ mkdocs serve
```

- Once done, make a PR for the same and wait for it to be reviewed.


================================================
FILE: docs/contributing_to_howdoi.md
================================================
As beginners, navigating the codebase and finding your way out of the documentation can become difficult. This page will help you understand everything about contributing to howdoi and the best practices in open source as well.
You can either contribute code to Howdoi (explained on this page) or contribute documentation (explained on next page)

#### Setting up the development environment

Follow the page [Setting up the development environment](http://gleitz.github.io/howdoi/development_env/) for setting up the development environment for Howdoi.

#### Finding your first issue

- Go to issues in the [howdoi repo](https://github.com/gleitz/howdoi).
- Find the issues which you might be interested to work on. Or, you can also come up with your own ideas of improving howdoi.
- After finding the issue you are interested in : If the issue is an existing one, comment on the issue and ask for it to be assigned to you. Or, if the issue is unlisted and new , create a new issue and fill every information needed in the issues template provided by howdoi and ask for it to be assigned to you.

- After receiving confirmation, start working on the issue and whenever and wherever help is needed, comment on the issue itself describing your query in detail.
- A good guide on how to collaborate efficiently can be found [here](https://lab.github.com/githubtraining/introduction-to-github){:target="\_blank"}.

#### Making a Pull request (PR)

- After you have worked on the issue and fixed it, we need to merge it from your forked repository into the howdoi repository. This is done by making a PR.
- You can search
  ```
  howdoi create a pull request on Github
  ```
  in your command line and follow the steps written in it.
- Each PR made should pass all the tests and should not have any flake8 or pylint errors. Github runs tests on each PR but we before that, you should run `python setup.py lint` which will run pylint and flake8.

- Once your commit passes all the tests, make a PR and wait for it to be reviewed and merged.

#### Asking for help

At times, help is needed while solving the issue. We recommend the following step for asking for help when you get stuck:

1. Read from howdoi docs and howdoi github to see if your answer has already been answered.
2. Comment on the issue you are working describing in detail what problems you are facing.
3. Make sure to write your query in detail and if it is bug, include steps to reproduce it.
4. If you are not working on any issue and have a question to be answered, open a new issue on Github and wait for a reply on it.


================================================
FILE: docs/development_env.md
================================================
- Clone the howdoi repository:

  ```
  $ git clone https://github.com/gleitz/howdoi.git
  ```

- To see how to set up the development for Windows, see [here](http://gleitz.github.io/howdoi/windows-contributing/).

- Set up and activate a virtual environment:

  ```
  $ python3 -m venv .venv
  $ source .venv/bin/activate
  ```

- Install all the required packages:

  ```
  $ pip install -r requirements/dev.txt
  ```

- Running from command line :

  ```
  $ python -m howdoi QUERY
  ```

- From a Python script, you can call howdoi with `howdoi.howdoi` and pass your query in the argument.

```
from howdoi import howdoi
query = "for loop python"
output = howdoi.howdoi(query)
```

Or, parse it yourself and then pass the arguments:

```
from howdoi import howdoi
query = "for loop python"
parser = howdoi.get_parser()
args = vars(parser.parse_args(query.split(' ')))
output = howdoi.howdoi(args)
```

#### Notes

- Parsing queries yourself is the older way to pass in queries and may be deprecated in the future. Please use the first example.
- If you try running `python howdoi/howdoi.py` (without -m) you might get `ValueError: Attempted relative import in non-package`.


================================================
FILE: docs/extension_dev.md
================================================
## Extension development

You can integrate howdoi as a code editor extension. For this, you need to edit the files within the [extension/code-editor-integration](https://github.com/gleitz/howdoi/tree/master/extension/code-editor-integration) folder.
To improve the Visual Studio Code howdoi extension, edit the files within the [extension/vscode-howdoi](https://github.com/gleitz/howdoi/tree/master/extension/vscode-howdoi) folder and republish the extension.

#### How to integrate howdoi as a code editor extension?

1. The Code Editor Integration plug-in runs when you call
   ` runHowdoi` function which takes in a user’s query(type = string). The parameter is encapsulated by a single line comment and is formatted as : `// howdoi query`

   `runHowdoi` function returns and Object which looks like :

```
{
   Question: string
   Answer : string[]
   Link : string[]
}
```

2. What are these object values?

- question : it contains the user’s query encapsulated by a single line comment
- answer : contains the three possible answers to the user’s query.
- link : link contains the three possible links to the answer encapsulated by a single line comment

3. To start with development, you need to first install howdoi on your machine. Steps to do that lie here.

#### Development

Next, install all important packages by running `npm install`

- To compile the script, run `npm run compile`
- To run the script , run `npm start`
- To compile and run the script, run `npm run build`
- To run the testing script, run `npm test`
- Now, to utilize the plug-in to create a howdoi extension, you need to do the following

#### Integration

To use the plug-in to create a howdoi extension, follow these steps:

- Copy the `code-editor-integration` folder in your workspace and remove `node-modules` folder. You can do this by adding the script in your `package.json` file and running it. But, first you will need to install ncp and rimraf.:

```
"copy": "ncp ../code-editor-integration/ src/code-editor-integration/"
"clean": "rimraf ./src/code-editor-integration/node_modules"
```

- Import the `plugin.ts` file into your main file.
- Call the `runHowdoi` function.

#### Visual Code Extension development

To begin the development for Visual Studio Code extension, install all the necessary packages:

`npm install`

Then precompile the extension :

`npm run precompile`

To run and test extension, utilize the Visual Studio Code’s debugging tools.


================================================
FILE: docs/howdoi_advanced_usage.md
================================================
- **Howdoi stashing feature** - We agree that sometimes you need to need search results for later and running the same query again and again
  won’t be that feasible. Hence, Howdoi has a stashing feature which allows you to save your query, view the query, delete the saved
  results and even empty the entire stash ! (see keep documentation for more information on stashing). Here is how you can do this:

  - **stashing: howdoi --save QUERY**
  - **viewing:  howdoi --view**
  - **removing: howdoi --remove (will be prompted which answer to delete)**
  - **emptying: howdoi --empty (empties entire stash, will be prompted to confirm)**

- **Shortcuts for your parameters** - You might run the same parameters many times and again, typing them isn’t always the best option. You can use shortcuts for your parameters by using something like:

  ```
  $ alias h='function hdi(){ howdoi $* -c -n 5; }; hdi'
  ```

  And the in your command line, replace your parameters with your alias i.e. h:

  ```
  $ h format date bash
  ```

- **Other uses and aliases** - You can also search other StackExchange properties for answers.

  Example:

  ```
  $ HOWDOI_URL=cooking.stackexchange.com
  $ howdoi make pesto
  ```

  Or use an alias for the same :

  ```
  $ alias hcook='function hcook(){ HOWDOI_URL=cooking.stackexchange.com howdoi $* ; }; hcook'
  $ hcook make pesto
  ```

- **Setting up environment variables** - Howdoi uses some environment variables which can be configured by the user as per his/her choice.
  The following are the environment variables and their usage :

  - HOWDOI_COLORIZE=1 - Colorizes the output produced.
  - HOWDOI_DISABLE_CACHE=1 - Disables the Caching functionality.
    Howdoi uses a cache for faster access to previous questions. The
    cache is stored in ~/.cache/howdoi.
  - HOWDOI_DISABLE_SSL=1 - Disables the SSL certificate.
  - HOWDOI_SEARCH_ENGINE=google - Changes the search engine to your
    preference (default: google, also supported: bing, duckduckgo).
    The -e flag will switch the underlying engine for a single query.
  - HOWDOI_URL=serverfault.com - Changes the source url for answers
    (default: stackoverflow.com, also supported: serverfault.com,
    pt.stackoverflow.com, full list).


================================================
FILE: docs/index.md
================================================
# howdoi
Never open your browser to look for help again.

Create tar archive:
```bash
$ howdoi create tar archive
> tar -cf backup.tar --exclude "www/subf3" www
```

Format a date in bash:
```bash
$ howdoi format date bash
> DATE=`date +%Y-%m-%d`
```
Print stack trace in Python:
``` bash
$ howdoi print stack trace python
> import traceback
>
> try:
>     1/0
> except:
>     print '>>> traceback <<<'
>     traceback.print_exc()
>     print '>>> end of traceback <<<'
> traceback.print_exc()
```

Convert MP4 to GIF:
```bash
$ howdoi convert mp4 to animated gif
> video=/path/to/video.avi
> outdir=/path/to/output.gif
> mplayer "$video" \
>         -ao null \
>         -ss "00:01:00" \  # starting point
>         -endpos 10 \ # duration in second
>         -vo gif89a:fps=13:output=$outdir \
>         -vf scale=240:180
```


================================================
FILE: docs/introduction.md
================================================
Howdoi is an open source command line tool that gives answers to your questions right on the command line. Howdoi can be used by anyone and everyone who finds themselves Googling for answers to their basic programming questions.

You wonder:

```
howdoi print hello in python
```

We answer:

```
print(“hello”)
```

### Installation

You'll need:

- `pip`
- Python 3.7 or above. Python 2.7 support is discontinued.

To install howdoi:

`pip install howdoi`

Or

`pip install git+https://github.com/gleitz/howdoi.git#egg=howdoi`

> Don't have pip installed yet? [Follow this simple tutorial to get started](https://pip.pypa.io/en/stable/installing/)

If you want to use [setuptools]() to install howdoi you can do so like this:

`python setup.py install`


================================================
FILE: docs/troubleshooting.md
================================================
You might get the following error when installing with Homebrew:

```bash
==> python setup.py install

http://peak.telecommunity.com/EasyInstall.html

Please make the appropriate changes for your system and try again.
```

Fix the error by executing the following command:
```bash
sudo chmod -R go+w /Library/Python/2.7/site-packages/
```

An official lxml for python 3.3+ for windows has not yet been released. You may get an error while installing.
Try and install an [unofficial binary for lxml](http://www.lfd.uci.edu/~gohlke/pythonlibs/#lxml).


================================================
FILE: docs/usage.md
================================================
Let's start at the beginning.

```
$ howdoi howdoi
```

For further information about flags and environment variables:

```
$ howdoi -h
```

#### Howdoi Flags and Arguments

```
usage: howdoi [-h] [-p POS] [-n NUM] [-a] [-l] [-c] [-x] [-C] [-j] [-v] [-e [ENGINE]] [--save] [--view] [--remove] [--empty] [QUERY ...]

instant coding answers via the command line

positional arguments:
  QUERY                 the question to answer

optional arguments:
  -h, --help            show this help message and exit
  -p POS, --pos POS     select answer in specified position (default: 1)
  -n NUM, --num NUM     number of answers to return (default: 1)
  -a, --all             display the full text of the answer
  -l, --link            display only the answer link
  -c, --color           enable colorized output
  -x, --explain         explain how answer was chosen
  -C, --clear-cache     clear the cache
  -j, --json            return answers in raw json format
  -v, --version         display the current version of howdoi
  -e [ENGINE], --engine [ENGINE]
                        search engine for this query (google, bing, duckduckgo)
  --save, --stash       stash a howdoi answer
  --view                view your stash
  --remove              remove an entry in your stash
  --empty               empty your stash

environment variable examples:
  HOWDOI_COLORIZE=1
  HOWDOI_DISABLE_CACHE=1
  HOWDOI_DISABLE_SSL=1
  HOWDOI_SEARCH_ENGINE=google
  HOWDOI_URL=serverfault.com
```


================================================
FILE: docs/windows-contributing.md
================================================
## Setting up a Windows Development environment 

 
Howdoi can be used on Windows, MacOS and Linux etc. But the development environment can only be set up in a Linux or Mac. To set up the development environment in Windows, follow the following steps: 

### Install WSL

WSL is Windows Subsystem for Linux. It offers Linux interoperability in Windows and allows users to set up a virtual environment. 

If you do not have WSL set up, follow the steps in this [tutorial](https://www.windowscentral.com/install-windows-subsystem-linux-windows-10) 

### Setting up the virtual environment

After setting up WSL, go to the directory where you want to set up Howdoi. Open the Command Prompt here and enter the following to enter the WSL environment. 

``` bash
bash 
``` 

Now you are using the Linux-compatible kernel on Windows and can set up a virtual environment. 

### Clone the git repository

```bash
$ git clone https://github.com/gleitz/howdoi.git 
``` 

Setup and activate a virtual environment: 

``` bash
$ python3 -m venv .venv 
$ source .venv/bin/activate 
``` 

Make sure you have pip installed, if not, enter: 

``` bash
$ sudo apt install python3-pip 
``` 

Go to the project directory and install requirements: 

``` bash
$ cd howdoi 
$ pip install -r requirements.txt 
``` 

> if howdoi does not process queries after this command, make sure your PATH variables for Python and Pip are set correctly in Windows environment variables.  

### Test run

Check to see if everything has been set-up correctly: 

```  bash
pip install howdoi 
howdoi  print hello world
``` 

 
================================================
FILE: extension/code-editor-integration/.eslintignore
================================================
plugin.js
node_modules/

================================================
FILE: extension/code-editor-integration/.eslintrc.json
================================================
{
  "root": true,
  "parser": "@typescript-eslint/parser",
  "plugins": [
    "@typescript-eslint"
  ],
  "extends": [
    "eslint:recommended",
    "plugin:@typescript-eslint/eslint-recommended",
    "plugin:@typescript-eslint/recommended"
  ],
  "rules": {
    "max-len": [
      "error",
      {
        "code": 120
      }
    ],
    "indent": [
      "error",
      2,
      {
        "SwitchCase": 1,
        "flatTernaryExpressions": false,
        "ignoreComments": false
      }
    ],
    "@typescript-eslint/naming-convention": "warn",
    "@typescript-eslint/semi": ["error", "never"],
    "@typescript-eslint/member-delimiter-style": ["error", {
      "multiline": {
        "delimiter": "none",    // 'none' or 'semi' or 'comma'
        "requireLast": true
      },
      "singleline": {
        "delimiter": "semi",    // 'semi' or 'comma'
        "requireLast": false
      }
    }],
    "comma-spacing": ["error", { "before": false, "after": true }],
    "curly": "warn",
    "eqeqeq": "warn",
    "no-throw-literal": "warn",
    "quotes": ["error", "single"],
    "semi": "off"
  }    
}

================================================
FILE: extension/code-editor-integration/.gitignore
================================================
out/
node_modules/


================================================
FILE: extension/code-editor-integration/README.md
================================================
# Code Editor Integration Development
![Node.js CI](https://img.shields.io/github/workflow/status/gleitz/howdoi/Node.js%20CI?color=78dce8&label=Node.js%20CI&style=plastic)

Simplifies the process of integrating howdoi as a code editor extension.

## Description

The Code Editor Integration plug-in is ran by calling the `runHowdoi` function which takes in a user's query of type string. The parameter is encapsulated by a single line comment and is formatted as follows:

    // howdoi query

`runHowdoi` returns an Object with the structure:

    {
        question: string
        answer: string[]
        link: string[] 
    }

The Object values:
* question contains the user's query encapsulated by a single line comment
* answer contains the three possible answers to the user's query 
* link contains the three possible links to the answer encapsulated by a single line comment


## Installation

First, install howdoi on your machine.

Then, install all necessary packages:

    npm install

## Development

To compile the script:

    npm run compile

To run the script:

    npm start

To compile and run the script:

    npm run build

To run the testing script:

    npm test

## Integration

To utilize this plug-in to create a howdoi extension for a code editor: 

1. Copy the `code-editor-integration` folder into your workspace and remove the `node_modules` folder by adding the script
    
        "copy": "ncp ../code-editor-integration/ src/code-editor-integration/"
        "clean": "rimraf ./src/code-editor-integration/node_modules"
  
    into your `package.json` file and running it.
    First, you will need to install [ncp](https://www.npmjs.com/package/ncp) and [rimraf](https://www.npmjs.com/package/rimraf).

2. Import the `plugin.ts` file into your main file.
    
3. Call the `runHowdoi` function.

Refer to `vscode-howdoi` for an example.

## Usage

usage: 
    
    // howdoi query [-n NUM_ANSWERS]

positional arguments:

      QUERY                 the question to answer

optional arguments:

      -n NUM_ANSWERS        NUM_ANSWERS
                            number of answers to return
                            (default: 3)


================================================
FILE: extension/code-editor-integration/package.json
================================================
{
  "name": "code-editor-integration",
  "version": "1.0.0",
  "description": "",
  "main": "plugin.js",
  "scripts": {
    "prepublish": "npm run compile",
    "compile": "tsc -p ./",
    "lint": "eslint . --ext .ts",
    "watch": "tsc -watch -p ./",
    "pretest": "npm run compile && npm run lint",
    "start": "node ./out/plugin.js",
    "build": "npm run compile && npm run lint && npm run start",
    "test": "cross-env TS_NODE_FILES=true mocha --exit --require ts-node/register --colors src/test/**/*.ts"
  },
  "keywords": [],
  "author": "",
  "license": "ISC",
  "dependencies": {},
  "devDependencies": {
    "@types/chai": "^4.2.11",
    "@types/mocha": "^7.0.2",
    "@types/node": "^14.0.14",
    "@typescript-eslint/eslint-plugin": "^3.9.0",
    "@typescript-eslint/parser": "^3.4.0",
    "chai": "^4.2.0",
    "cross-env": "^5.2.0",
    "eslint": "^7.3.1",
    "mocha": "^10.1.0",
    "nyc": "^14.1.1",
    "ts-node": "^8.3.0",
    "typescript": "^3.5.3"
  }
}


================================================
FILE: extension/code-editor-integration/src/create_attributes.ts
================================================
'use strict'
import {HowdoiObj, JSONObj, CommentChars} from './plugin_interfaces'

export function createComment(command: string, commentChar: CommentChars): string {
  // adds single line comment to string provided
  const frontCommentChar: string = commentChar.frontComment
  const endCommentChar: string = commentChar.endComment
  if (frontCommentChar && (endCommentChar !== '')) {
    const commentedCommand: string = frontCommentChar + ' ' + command + ' ' + endCommentChar
    return commentedCommand
  }
  const commentedCommand: string = frontCommentChar + ' ' + command
  return commentedCommand
}

export function createHowdoiObj(parsedJson: JSONObj[], userCommand: string, commentChar: CommentChars): HowdoiObj {
  // creates a HowdoiObj interface 
  const howdoiObj: HowdoiObj = {question: userCommand, answer: [], link: []}

  for (let i = 0; i < parsedJson.length; i++) {
    if (parsedJson[i].answer.trim() === 'end=\'\'') { 
      break
    }
    howdoiObj.answer.push(parsedJson[i].answer.trim())
    howdoiObj.link.push(createComment(parsedJson[i].link.trim(), commentChar))
  }
  return howdoiObj
}


================================================
FILE: extension/code-editor-integration/src/find_attributes.ts
================================================
'use strict'
import {CommentChars} from './plugin_interfaces'

export function findCommentChar(userCommand: string): CommentChars { 
  /* This function finds the comment regex, removes it from the string and returns a
    CommentChars interface with the beginning comment regex and ending comment regex or returns
    null if there is no comment regex/an invalid comment regex */
  const frontCommentRegex =  /^[!@#<>/;%*(+=._-]+/
  const endCommentRegex = /[!@#<>/%*+=._-]+$/
  let frontCommentChar: string
  let endCommentChar: string
  let userCommandWithoutComment: CommentChars
  const initialMatchRegex: RegExpMatchArray | null = userCommand.match(frontCommentRegex)
  const endMatchRegex: RegExpMatchArray | null = userCommand.match(endCommentRegex)
        
  if (initialMatchRegex && endMatchRegex){
    frontCommentChar = initialMatchRegex.join()
    endCommentChar = endMatchRegex.join()
    userCommandWithoutComment = {
      frontComment: frontCommentChar,
      endComment: endCommentChar
    }
    return userCommandWithoutComment
  }
  else if(initialMatchRegex){
    frontCommentChar = initialMatchRegex.join()
    userCommandWithoutComment = {
      frontComment: frontCommentChar,
      endComment: ''
    }
    return userCommandWithoutComment
  }
  else {
    throw Error('Invalid line comment. Please use single line comment for howdoi.')
  }
}

export function findNumFlagVal(userCommand: string): number { 
  /* This function finds the numFlag value within the userCommand and returns the value */
  const numFlag =  '-n'
  const defaultNumFlag = 3
  const index = userCommand.indexOf(numFlag)
   
  if (index === -1){
    return defaultNumFlag
  }

  const userNumFlag = Number(userCommand.slice(index).replace(numFlag, '').trim())
  if (isNaN(userNumFlag) || (userNumFlag === 0)) {
    throw new RangeError('Invalid num flag value')
  }
  return userNumFlag
}


================================================
FILE: extension/code-editor-integration/src/plugin.ts
================================================
'use strict'
import * as cp from 'child_process'
import {once} from 'events'
import {HOWDOI_PREFIX, HowdoiObj, JSONObj, CommentChars} from './plugin_interfaces'
import * as removeRegex from './remove_regexes'
import * as findAttr from './find_attributes'
import * as createAttr from './create_attributes'

export async function retrieveHowdoiOutput(command: string, numFlagVal: number): Promise<JSONObj[]> {
  /* This function spawns an external application in a new process to run the howdoi query and returns
  the howdoi query answer formatted as a JSONObj[] */
  const numFlag: string = '-n' + String(numFlagVal)
  const process = cp.spawn(HOWDOI_PREFIX, [command, numFlag, '-j'])
  let howdoiJSON: JSONObj[] = [{ answer: '', link: '', position: ''}]
  
  process.stdout.on('data', (data: string) => {
    howdoiJSON = JSON.parse(data)
  })

  process.stderr.on('dataErr', (dataErr: Buffer) => {
    console.log(`stderr: ${dataErr}`)
  })
    
  process.on('error', (error: Error) => {
    console.log(`error: ${error.message}`)
  })
    
  process.on('close', (code: number) => {
    console.log(`child process exited with code ${code}`)
  }) 
  
  // Wait for the child process to exit
  const endProcess = await once(process, 'close')
  try {
    endProcess
  } catch(e) {
    throw Error('Invalid json object or no json object returned')
  }
  return howdoiJSON
}

export async function runHowdoi(userCommand: string): Promise<HowdoiObj> {
  /* This functions modifies the users command while checking for errors
  and formats the howdoi query answer into a HowdoiObj*/
  
  let commentChar: CommentChars
  // check if query is enclosed by a single line comment and return commentChar
  try {
    // retrieve single line comment and store in CommentChars obj
    commentChar = findAttr.findCommentChar(userCommand)
  }catch (e) {
    throw new ReferenceError('Invalid line comment. Please use single line comment for howdoi.')
  }
  
  const commandWithoutComment: string = removeRegex.removeCommentChar(userCommand, commentChar)
  let commandWithoutPrefix: string

  // check if howdoi prefix is present and remove it
  try {
    commandWithoutPrefix = removeRegex.removeHowdoiPrefix(commandWithoutComment)
  }catch (e) {
    throw new SyntaxError('Place "howdoi" in front of query')
  }

  let numFlagVal: number
  // check if -n flag is present and remove it
  try {
    numFlagVal = findAttr.findNumFlagVal(commandWithoutPrefix)
  }catch (e) {
    throw new RangeError('Invalid num flag value')
  }

  const commandWithoutFlag = removeRegex.removeNumFlag(commandWithoutPrefix)

  let parsedJson: JSONObj[] 
  // check if howdoi output is valid and save the JSON Obj
  try {
    parsedJson = await retrieveHowdoiOutput(commandWithoutFlag, numFlagVal)
  }catch (e) {
    throw new Error('Invalid json object or no json object returned')
  }
  
  let howdoiResultObj = createAttr.createHowdoiObj(parsedJson, userCommand, commentChar)
  howdoiResultObj = removeRegex.removeInlineRegex(howdoiResultObj)
  return howdoiResultObj
}


================================================
FILE: extension/code-editor-integration/src/plugin_interfaces.ts
================================================
export const HOWDOI_PREFIX = 'howdoi'

export interface HowdoiObj {
  question: string
  answer: string[]
  link: string[]  
}

export interface JSONObj {
  answer: string
  link: string
  position: string
}

export interface CommentChars {
  frontComment: string
  endComment: string
}


================================================
FILE: extension/code-editor-integration/src/remove_regexes.ts
================================================
'use strict'
import {HOWDOI_PREFIX, HowdoiObj, CommentChars} from './plugin_interfaces'

export function removeCommentChar(userCommand: string, commentChar: CommentChars): string {
  /* This function removes the single line comment regex from the userCommand and returns it*/
  const frontCommentChar: string = commentChar.frontComment
  const endCommentChar: string = commentChar.endComment

  if (!userCommand.includes(endCommentChar)) {
    return userCommand.replace(frontCommentChar, '').trim()
  }
  userCommand = userCommand.replace(frontCommentChar, '')
  return userCommand.replace(endCommentChar, '').trim()
}

export function removeHowdoiPrefix(command: string): string {
  // removes the prefix `howdoi` from the string
  if (!command.trim().startsWith(HOWDOI_PREFIX)) {
    throw Error('Place "howdoi" in front of query')
  }
  return command.replace(HOWDOI_PREFIX, '').trim()
}

export function removeNumFlag(userCommand: string): string { 
  /* This function removes the numFlag value within the userCommand and returns userCommand */
  const numFlag =  '-n'
  const index = userCommand.indexOf(numFlag)
   
  if (index === -1){
    return userCommand
  }
  const commandWithoutNumFlag = userCommand.slice(0, index).trim()
  return commandWithoutNumFlag
}

export function removeInlineRegex(howdoiResultObj: HowdoiObj): HowdoiObj {
  /* This function returns a HowdoiObj that has the arrow and dot regexes removed
  from the answer array to display inline code more cleanly */

  const arrowRegex = /[>->->]{3}/g
  const dotRegex = /[.-.-.]{3}/g

  for (let i = 0; i < howdoiResultObj.answer.length; i++) {
    if (howdoiResultObj.answer[i].match(arrowRegex)) {
      howdoiResultObj.answer[i] = howdoiResultObj.answer[i].replace(arrowRegex, '').trim()
    }
    if (howdoiResultObj.answer[i].match(dotRegex)) {
      howdoiResultObj.answer[i] = howdoiResultObj.answer[i].replace(dotRegex, '').trim()
    }
  }
  return howdoiResultObj
}


================================================
FILE: extension/code-editor-integration/src/test/plugin.test.ts
================================================
import { assert, expect} from 'chai'
import {suite, test} from 'mocha'
import {CommentChars} from '../plugin_interfaces'
import * as removeRegex from '../remove_regexes'
import * as findAttr from '../find_attributes'
import * as createAttr from '../create_attributes'

suite('Plugin Tests', function () {
  //  //: JS, TS, C/ C++/ C#, Java, GO, Rust, Scala, Swift, J#, Dlang single line comment
  const commentChar1: CommentChars = {frontComment: '//', endComment: ''}
  // #: Python, Ruby, powershell, Julia, R, prolog, Crystal, Dockerfile, Diff single line comment
  const commentChar2: CommentChars = { frontComment: '#', endComment: '' }
  // /* */: C++, CSS single line comment 
  const commentChar3: CommentChars = { frontComment: '--', endComment: '' }
  // <!-- -->: HTML, PHP, Markdown, Vue single line comment
  const commentChar4: CommentChars = { frontComment: '%', endComment: '' }
  // --: SQL, Haskell single line comment
  const commentChar5: CommentChars = { frontComment: ';', endComment: '' }
  // %: LaTex single line comment
  const commentChar6: CommentChars = { frontComment: '/*', endComment: '*/' }
  // ;: clojure single line comment
  const commentChar7: CommentChars = { frontComment: '<!--', endComment: '-->' }
  /* eslint-disable prefer-const*/
  const commentCharArr: CommentChars[] = [commentChar1, commentChar2, commentChar3, commentChar4, commentChar5, 
    commentChar6, commentChar7]

  // Global function used for #findNumFlagVal and #removeNumFlag
  function getRandomInt(max: number): number {
    return Math.floor(Math.random() * Math.floor(max))
  }

  suite('Find comment regex in string -> #findCommentChar', function () {
    test('String w/o comment regex', function () {
      // error example
      const err = 'Invalid line comment. Please use single line comment for howdoi.' 
      expect(function(){
        findAttr.findCommentChar('howdoi query')
      }).to.throw(err)
    })
    test('Comment regex test (w/ space): //, #, --, %, ;, /* */, <!-- -->', function () {
      for (let commentChar of commentCharArr) {
        let commentedQuery = commentChar.frontComment + ' howdoi query ' + commentChar.endComment
        assert.deepEqual(findAttr.findCommentChar(commentedQuery), commentChar)
      }   
    })
    test('Comment regex test (w/o space): //, #, --, %, ;, /* */, <!-- -->', function () {
      for (let commentChar of commentCharArr) {
        let commentedQuery = commentChar.frontComment + 'howdoi query' + commentChar.endComment
        assert.deepEqual(findAttr.findCommentChar(commentedQuery), commentChar)
      }   
    })
  })

  suite('Find the Num Flag value from the user command -> #findNumFlagVal', function () {
    test('error examples', function () {
      const err = 'Invalid num flag value' 
      expect(function(){
        findAttr.findNumFlagVal('query -n')
      }).to.throw(err)
      expect(function(){
        findAttr.findNumFlagVal('query -nzl')
      }).to.throw(err)
    })
    test('testing default num 3', function () {
      assert.equal(findAttr.findNumFlagVal('query -n3'), 3)
      assert.equal(findAttr.findNumFlagVal('query -n 3'), 3)
    })
    test('testing non-default numbers', function () {
      const maxNum = 25
      for (let i = 0; i < maxNum; i++) {
        const randomNum = getRandomInt(maxNum)
        const query1 = 'query -n' + String(randomNum)
        const query2 = 'query -n ' + String(randomNum)
        if (randomNum === 0) {
          const err = 'Invalid num flag value'
          expect(function(){
            findAttr.findNumFlagVal(query1)
          }).to.throw(err)
        }
        else {
          assert.equal(findAttr.findNumFlagVal(query1), randomNum)
          assert.equal(findAttr.findNumFlagVal(query2), randomNum)
        }
      }
    })
  })

  suite('Removal of comment character from user command -> #removeCommentChar', function () {
    test('Removal of front and front/back comment char (w/ space): //, #, --, %, ;, /* */, <!-- -->', function () {
      for (let commentChar of commentCharArr) {
        let commentedQuery = commentChar.frontComment + ' howdoi query ' + commentChar.endComment
        assert.equal(removeRegex.removeCommentChar(commentedQuery, commentChar), 'howdoi query')
      } 
    })
    test('Removal of front and front/back comment char (w/o space): //, #, --, %, ;, /* */, <!-- -->', function () {
      for (let commentChar of commentCharArr) {
        let commentedQuery = commentChar.frontComment + 'howdoi query' + commentChar.endComment
        assert.equal(removeRegex.removeCommentChar(commentedQuery, commentChar), 'howdoi query')
      } 
    })
  })

  suite('Removal of howdoi prefix test -> #removeHowdoiPrefix', function () {
    test('Normal Query', function () {
      // Normal Query
      assert.equal(removeRegex.removeHowdoiPrefix('howdoi query'), 'query')
    })
    test('Query with whitespace', function () {
      // Query with whitespace
      assert.equal(removeRegex.removeHowdoiPrefix(' howdoi query '), 'query')
    })
    test('Query without howdoi prefix', function () {
      // Query without howdoi prefix
      const err = 'Place "howdoi" in front of query' 
      expect(function(){
        removeRegex.removeHowdoiPrefix('query')
      }).to.throw(err)
    })
    test('Query without howdoi prefix and whitespace', function () {
      // Query without howdoi prefix and whitespace
      const err = 'Place "howdoi" in front of query'
      expect(function(){
        removeRegex.removeHowdoiPrefix(' query ')
      }).to.throw(err)
    })
  })

  suite('Remove the Num Flag and value from the user command -> #removeNumFlag', function () {
    test('testing default num 3', function () {
      assert.equal(removeRegex.removeNumFlag('query -n3'), 'query')
      assert.equal(removeRegex.removeNumFlag('query -n 3'), 'query')
    })
    test('testing non-default numbers', function () {
      const maxNum = 25
      for (let i = 0; i < maxNum; i++) {
        const randomNum = getRandomInt(maxNum)
        const query1 = 'query -n' + String(randomNum)
        const query2 = 'query -n ' + String(randomNum)
        assert.equal(removeRegex.removeNumFlag(query1), 'query')
        assert.equal(removeRegex.removeNumFlag(query2), 'query')
      }
    })
  })

  suite('Create comment character to a string -> #createComment', function () {
    test('Creare comment with front & front/back char to string: //, #, --, %, ;, /* */, <!-- -->', function () {
      let testString = ''
      for (let commentChar of commentCharArr) {
        if (commentChar.frontComment && (commentChar.endComment !== '')) {
          testString = commentChar.frontComment + ' howdoi query ' + commentChar.endComment
        }
        else {
          testString = commentChar.frontComment + ' howdoi query'
        }
        assert.equal(createAttr.createComment('howdoi query', commentChar), testString)
      }   
    })
  })
})


================================================
FILE: extension/code-editor-integration/tsconfig.json
================================================
{
  "compilerOptions": {
    "target": "es2018",                 
    "module": "commonjs", 
    "outDir": "out",                    
    "lib": ["es2018", "dom"],                             
    "sourceMap": true,                     
    "rootDir": "src",
    "strict": true,                           
    "esModuleInterop": true,                  
    "skipLibCheck": true,                    
    "forceConsistentCasingInFileNames": true  
  }
}


================================================
FILE: extension/vscode-howdoi/.eslintrc
================================================
{
  "root": true,
  "parser": "@typescript-eslint/parser",
  "parserOptions": {
    "ecmaVersion": 6,
    "sourceType": "module"
  },
  "plugins": [
    "@typescript-eslint"
  ],
  "rules": {
    "max-len": [
      "error",
      {
        "code": 120
      }
    ],
    "indent": [
      "error",
      2,
      {
        "SwitchCase": 1,
        "flatTernaryExpressions": false,
        "ignoreComments": false
      }
    ],
    "@typescript-eslint/class-name-casing": "warn",
    "@typescript-eslint/semi": ["error", "never"],
    "@typescript-eslint/member-delimiter-style": ["error", {
      "multiline": {
        "delimiter": "none",   
        "requireLast": true
      },
      "singleline": {
        "delimiter": "semi",    
        "requireLast": false
      }
    }],
    "comma-spacing": ["error", { "before": false, "after": true }],
    "curly": "warn",
    "eqeqeq": "warn",
    "no-throw-literal": "warn",
    "quotes": ["error", "single"],
    "semi": "off"
  }
}


================================================
FILE: extension/vscode-howdoi/.gitignore
================================================
node_modules/
src/code-editor-integration/
out/
.vscode/**
.vscode-test/**

================================================
FILE: extension/vscode-howdoi/.vscodeignore
================================================
.vscode/**
.vscode-test/**
out/test/**
src/**
.gitignore
**/tsconfig.json
**/.eslintrc.json
**/*.map
**/*.ts


================================================
FILE: extension/vscode-howdoi/CHANGELOG.md
================================================
# Change Log
- Initial release

================================================
FILE: extension/vscode-howdoi/LICENSE.txt
================================================
MIT License

Copyright (c) 2020 Diana Arreola

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: extension/vscode-howdoi/README.md
================================================
# Howdoi

Are you a hack programmer? Do you find yourself constantly Googling for how to do basic programming tasks?

Suppose you want to know how to format a date in bash. Why open your browser and read through blogs (risking major distraction) when you can simply stay in the code editor and ask [howdoi](https://github.com/gleitz/howdoi#howdoi):

<!-- howdoi format date bash gif-->
![howdoi format bash date](https://i.imgur.com/hcYpryn.gif)

howdoi will answer all sorts of queries:
<!-- howdoi print stack trace python gif-->
![howdoi print stack trace python](https://i.imgur.com/8D5wiM4.gif)

## Installation

Install howdoi on your machine before using the howdoi extension. To do so, refer to the [howdoi installation steps](https://github.com/gleitz/howdoi#installation).

## Getting Started

The howdoi extension allows users to ask howdoi within their own code editor. 

For usage within the code editor:

1. Write down your question in the code editor commented out with a single line comment. 

    ![up close howdoi print stack trace python](https://i.imgur.com/NLYeTNs.gif)

2. Highlight the text from the first step.

    ![highlighted howdoi print stack trace python](https://i.imgur.com/58x3QbQ.gif)

3. Open the command palette using:

        cmd/ctrl + shift + P
    or
    
        View > Command Palette

4. Run howdoi in command palette and choose among three answers from the drop down.

    ![highlighted howdoi print stack trace python](https://i.imgur.com/92c0xVM.gif)

## Usage

usage: 
    
    // howdoi query [-n NUM_ANSWERS]

positional arguments:

      QUERY                 the question to answer

optional arguments:

      -n NUM_ANSWERS        NUM_ANSWERS
                            number of answers to return
                            (default: 3)

example:

  ![highlighted howdoi print stack trace python](https://i.imgur.com/9WGSL2y.gif)    


## Release Notes


### 1.0.0

Initial release of the howdoi VS Code Extension.


-----------------------------------------------------------------------------------------------------------

**Enjoy!**


================================================
FILE: extension/vscode-howdoi/package.json
================================================
{
  "name": "howdoi",
  "displayName": "howdoi",
  "publisher": "howdoi-org",
  "description": "Receive instant coding answers via a code editor.",
  "version": "0.0.1",
  "engines": {
    "vscode": "^1.46.0"
  },
  "categories": [
    "Other",
    "Programming Languages"
  ],
  "author": {
    "name": "diana arreola"
  },
  "icon": "img/howdoi-logo.png",
  "galleryBanner": {
    "color": "#ab9df2",
    "theme": "dark"
  },
  "activationEvents": [
    "onCommand:howdoi.extension"
  ],
  "main": "./out/extension.js",
  "contributes": {
    "commands": [
      {
        "command": "howdoi.extension",
        "title": "howdoi"
      }
    ]
  },
  "scripts": {
    "copy": "ncp ../code-editor-integration/ src/code-editor-integration/",
    "clean": "rimraf ./src/code-editor-integration/node_modules",
    "precompile": "npm run copy && npm run clean",
    "vscode:prepublish": "npm run compile",
    "compile": "tsc -p ./",
    "lint": "eslint src --ext ts",
    "watch": "tsc -watch -p ./",
    "pretest": "npm run compile && npm run lint",
    "test": "node ./out/test/runTest.js"
  },
  "devDependencies": {
    "@types/glob": "^7.1.1",
    "@types/mocha": "^7.0.2",
    "@types/node": "^13.11.0",
    "@types/vscode": "^1.46.0",
    "@typescript-eslint/eslint-plugin": "^2.34.0",
    "@typescript-eslint/parser": "^2.30.0",
    "chai": "^4.2.0",
    "eslint": "^6.8.0",
    "glob": "^7.1.6",
    "mocha": "^10.1.0",
    "typescript": "^3.8.3",
    "vscode-test": "^1.3.0"
  },
  "dependencies": {
    "@types/chai": "^4.2.12",
    "ncp": "^2.0.0",
    "rimraf": "^3.0.2"
  },
  "badges": [
    {
      "url": "https://img.shields.io/github/workflow/status/gleitz/howdoi/Python%20CI?style=plastic&color=78dce8" ,
      "href": "https://github.com/gleitz/howdoi/actions?query=workflow%3A%22Python+CI%22",
      "description": "Github Actions Python CI"

    },
    {
      "url": "https://img.shields.io/github/workflow/status/gleitz/howdoi/Node.js%20CI?color=78dce8&label=Node.js%20CI&style=plastic" ,
      "href": "https://github.com/gleitz/howdoi/actions?query=workflow%3A%22Node.js+CI%22",
      "description": "Github Actions Node.js CI"

    }
  ],
  "license": "SEE LICENSE IN LICENSE.txt",
  "bugs": {
    "url": "https://github.com/gleitz/howdoi/issues"
  },
  "repository": {
    "type": "git",
    "url": "https://github.com/gleitz/howdoi.git"
  },
  "homepage": "https://github.com/gleitz/howdoi/blob/master/extension/vscode-howdoi/README.md"
}


================================================
FILE: extension/vscode-howdoi/src/extension.ts
================================================
import * as vscode from 'vscode'
import * as plugin from './code-editor-integration/src/plugin'

export function activate(context: vscode.ExtensionContext) {

  let disposable = vscode.commands.registerCommand('howdoi.extension', async () => {

    const editor = vscode.window.activeTextEditor
    if (!editor) {
      vscode.window.showInformationMessage('create a file to enable howdoi')
      return
    }

    const userCommand: string = editor.document.getText(editor.selection).trim()
    let howdoiResultObj 
  
    try {
      howdoiResultObj = await plugin.runHowdoi(userCommand)
    } catch (e) {
      if (e instanceof ReferenceError) {
        vscode.window.showInformationMessage('Invalid line comment. Please use single line comment for howdoi.')
        return e
      } else if (e instanceof SyntaxError) {
        vscode.window.showInformationMessage('Place "howdoi" in front of query')
        return e
      } else if (e instanceof RangeError) {
        vscode.window.showInformationMessage('Invalid num flag value')
        return e
      } else if (e instanceof Error) {
        vscode.window.showInformationMessage('Could not find response for query')
        return e
      } else {
        vscode.window.showInformationMessage('Error. Try again')
        return e
      }
    } 

    quickPicker(editor, howdoiResultObj, userCommand)
  })
  context.subscriptions.push(disposable)
}

function quickPicker(editor: any, howdoiResultObj: any, userCommand: string): void {
  const quickPick = vscode.window.createQuickPick()

  quickPick.items = howdoiResultObj.answer.map((answer: string) => (
    {label: answer, link: howdoiResultObj.link[howdoiResultObj.answer.indexOf(answer)] }))

  quickPick.onDidChangeSelection(([item]: any) => {
    if (item) {
      editor.edit((edit: any) => {
        edit.replace(editor.selection, userCommand + '\n' + item.link + '\n' + item.label)
      })
	  quickPick.dispose()
    }
  })
  quickPick.onDidHide(() => quickPick.dispose())
  quickPick.show()
}

export function deactivate() {}


================================================
FILE: extension/vscode-howdoi/src/test/runTest.ts
================================================
import * as path from 'path'

import { runTests } from 'vscode-test'

async function main() {
  try {
    // The folder containing the Extension Manifest package.json
    // Passed to `--extensionDevelopmentPath`
    const extensionDevelopmentPath = path.resolve(__dirname, '../../')

    // The path to test runner
    // Passed to --extensionTestsPath
    const extensionTestsPath = path.resolve(__dirname, './suite/index')

    // Download VS Code, unzip it and run the integration test
    await runTests({ extensionDevelopmentPath, extensionTestsPath })
  } catch (err) {
    console.error('Failed to run tests')
    process.exit(1)
  }
}

main()


================================================
FILE: extension/vscode-howdoi/src/test/suite/extension.test.ts
================================================
import * as vscode from 'vscode'
import { assert, expect} from 'chai'
import * as pluginTests from '../../code-editor-integration/src/test/plugin.test'

suite('Extension Test Suite', () => {
  vscode.window.showInformationMessage('Start all tests.')
  pluginTests
})


================================================
FILE: extension/vscode-howdoi/src/test/suite/index.ts
================================================
import * as path from 'path'
import * as Mocha from 'mocha'
import * as glob from 'glob'

export function run(): Promise<void> {
  // Create the mocha test
  const mocha = new Mocha({
    ui: 'tdd',
    color: true
  })

  const testsRoot = path.resolve(__dirname, '..')

  return new Promise((c, e) => {
    glob('**/**.test.js', { cwd: testsRoot }, (err, files) => {
      if (err) {
        return e(err)
      }

      // Add files to the test suite
      files.forEach(f => mocha.addFile(path.resolve(testsRoot, f)))

      try {
        // Run the mocha test
        mocha.run(failures => {
          if (failures > 0) {
            e(new Error(`${failures} tests failed.`))
          } else {
            c()
          }
        })
      } catch (err) {
        console.error(err)
        e(err)
      }
    })
  })
}


================================================
FILE: extension/vscode-howdoi/tsconfig.json
================================================
{
  "compilerOptions": {
    "module": "commonjs",
    "target": "es6",
    "outDir": "out",
    "lib": [
      "es6",
      "dom"
    ],
    "sourceMap": true,
    "rootDir": "src",
    "strict": true  
  },
  "exclude": [
    "node_modules",
    ".vscode-test"
  ]
}


================================================
FILE: extension/vscode-pkg/README.md
================================================
# howdoi Packaged Visual Studio Code Extension

Locally install the howdoi Visual Studio Code Extension.

## Installation

- Head over [here](https://github.com/gleitz/howdoi#installation) to install howdoi on your machine.

- Open Visual Studio Code and open the Command Palette and run:

        Shell Command: Install ‘code’ command in PATH
    ![Image of Shell Command](https://github.com/gleitz/howdoi/tree/master/extension/vscode-pkg/img/code-command.png)
    
    Restart your terminal.
- Within the `extension/vscode-pkg` folder, run:

        code --install-extension howdoi-0.0.1.vsix

- Add the unpackaged files to your VS Code extensions folder (the path of the folder can be found [here](https://code.visualstudio.com/api/working-with-extensions/publishing-extension#your-extension-folder)). If no files were created within the `extension/vscode-pkg`, check if the files are in the extension folder path.

- Look over the README within the unpackaged files for more info on how to run howdoi within VS Code.


================================================
FILE: fastentrypoints.py
================================================
# flake8: noqa
# pylint: skip-file

# Copyright (c) 2016, Aaron Christianson
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# 1. Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
'''
Monkey patch setuptools to write faster console_scripts with this format:

    import sys
    from mymodule import entry_function
    sys.exit(entry_function())

This is better.

(c) 2016, Aaron Christianson
http://github.com/ninjaaron/fast-entry_points
'''
from setuptools.command import easy_install
import re
TEMPLATE = r'''
# -*- coding: utf-8 -*-
# EASY-INSTALL-ENTRY-SCRIPT: '{3}','{4}','{5}'
__requires__ = '{3}'
import re
import sys

from {0} import {1}

if __name__ == '__main__':
    sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0])
    sys.exit({2}())'''.lstrip()


@classmethod
def get_args(cls, dist, header=None):  # noqa: D205,D400
    """
    Yield write_script() argument tuples for a distribution's
    console_scripts and gui_scripts entry points.
    """
    if header is None:
        # pylint: disable=E1101
        header = cls.get_header()
    spec = str(dist.as_requirement())
    for type_ in 'console', 'gui':
        group = type_ + '_scripts'
        for name, ep in dist.get_entry_map(group).items():
            # ensure_safe_name
            if re.search(r'[\\/]', name):
                raise ValueError("Path separators not allowed in script names")
            script_text = TEMPLATE.format(
                ep.module_name, ep.attrs[0], '.'.join(ep.attrs),
                spec, group, name)
            # pylint: disable=E1101
            args = cls._get_script_args(type_, name, header, script_text)
            for res in args:
                yield res


# pylint: disable=E1101
easy_install.ScriptWriter.get_args = get_args


def main():
    import os
    import re
    import shutil
    import sys
    dests = sys.argv[1:] or ['.']
    filename = re.sub(r'\.pyc$', '.py', __file__)

    for dst in dests:
        shutil.copy(filename, dst)
        manifest_path = os.path.join(dst, 'MANIFEST.in')
        setup_path = os.path.join(dst, 'setup.py')

        # Insert the include statement to MANIFEST.in if not present
        with open(manifest_path, 'a+') as manifest:
            manifest.seek(0)
            manifest_content = manifest.read()
            if 'include fastentrypoints.py' not in manifest_content:
                manifest.write(('\n' if manifest_content else '') + 'include fastentrypoints.py')

        # Insert the import statement to setup.py if not present
        with open(setup_path, 'a+') as setup:
            setup.seek(0)
            setup_content = setup.read()
            if 'import fastentrypoints' not in setup_content:
                setup.seek(0)
                setup.truncate()
                setup.write('import fastentrypoints\n' + setup_content)


================================================
FILE: howdoi/__init__.py
================================================
__version__ = '2.0.20'


================================================
FILE: howdoi/__main__.py
================================================
from .howdoi import command_line_runner

command_line_runner()


================================================
FILE: howdoi/errors.py
================================================
class GoogleValidationError(Exception):
    pass


class BingValidationError(Exception):
    pass


class DDGValidationError(Exception):
    pass


================================================
FILE: howdoi/howdoi.py
================================================
#!/usr/bin/env python

######################################################
#
# howdoi - instant coding answers via the command line
# written by Benjamin Gleitzman (gleitz@mit.edu)
# inspired by Rich Jones (rich@anomos.info)
#
######################################################

import gc
gc.disable()

import argparse
import inspect
import json
import os
import re
import sys
import textwrap

from urllib.request import getproxies
from urllib.parse import quote as url_quote, urlparse, parse_qs

from multiprocessing import Pool

import logging
import appdirs
import requests

from cachelib import FileSystemCache, NullCache

from keep import utils as keep_utils

from pygments.lexers import guess_lexer, get_lexer_by_name
from pygments.util import ClassNotFound
from rich.syntax import Syntax
from rich.console import Console

from pyquery import PyQuery as pq
from requests.exceptions import ConnectionError as RequestsConnectionError
from requests.exceptions import SSLError

from colorama import init
init()

from howdoi import __version__
from howdoi.errors import GoogleValidationError, BingValidationError, DDGValidationError

logging.basicConfig(format='%(levelname)s: %(message)s')
if os.getenv('HOWDOI_DISABLE_SSL'):  # Set http instead of https
    SCHEME = 'http://'
    VERIFY_SSL_CERTIFICATE = False
else:
    SCHEME = 'https://'
    VERIFY_SSL_CERTIFICATE = True

SUPPORTED_SEARCH_ENGINES = ('google', 'bing', 'duckduckgo')

URL = os.getenv('HOWDOI_URL') or 'stackoverflow.com'

USER_AGENTS = ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:11.0) Gecko/20100101 Firefox/11.0',
               'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:22.0) Gecko/20100 101 Firefox/22.0',
               'Mozilla/5.0 (Windows NT 6.1; rv:11.0) Gecko/20100101 Firefox/11.0',
               ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) AppleWebKit/536.5 (KHTML, like Gecko) '
                'Chrome/19.0.1084.46 Safari/536.5'),
               ('Mozilla/5.0 (Windows; Windows NT 6.1) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.46'
                'Safari/536.5'),)
SEARCH_URLS = {
    'bing': SCHEME + 'www.bing.com/search?q=site:{0}%20{1}&hl=en',
    'google': SCHEME + 'www.google.com/search?q=site:{0}%20{1}&hl=en',
    'duckduckgo': SCHEME + 'duckduckgo.com/html?q=site:{0}%20{1}&t=hj&ia=web'
}

BLOCK_INDICATORS = (
    'form id="captcha-form"',
    'This page appears when Google automatically detects requests coming from your computer '
    'network which appear to be in violation of the <a href="//www.google.com/policies/terms/">Terms of Service'
)

BLOCKED_QUESTION_FRAGMENTS = (
    'webcache.googleusercontent.com',
)

STAR_HEADER = '\u2605'
ANSWER_HEADER = '{2}  Answer from {0} {2}\n{1}'
NO_ANSWER_MSG = '< no answer given >'

CACHE_EMPTY_VAL = "NULL"
CACHE_DIR = appdirs.user_cache_dir('howdoi')
CACHE_ENTRY_MAX = 128

HTML_CACHE_PATH = 'page_cache'
SUPPORTED_HELP_QUERIES = ['use howdoi', 'howdoi', 'run howdoi', 'setup howdoi',
                          'do howdoi', 'howdoi howdoi', 'howdoi use howdoi']

NO_RESULTS_MESSAGE = "Sorry, couldn't find any help with that topic"

# variables for text formatting, prepend to string to begin text formatting.
BOLD = '\033[1m'
GREEN = '\033[92m'
RED = '\033[91m'
UNDERLINE = '\033[4m'
END_FORMAT = '\033[0m'  # append to string to end text formatting.

# stash options
STASH_SAVE = 'save'
STASH_VIEW = 'view'
STASH_REMOVE = 'remove'
STASH_EMPTY = 'empty'

BLOCKED_ENGINES = []

if os.getenv('HOWDOI_DISABLE_CACHE'):
    # works like an always empty cache
    cache = NullCache()
else:
    cache = FileSystemCache(CACHE_DIR, CACHE_ENTRY_MAX, default_timeout=0)

howdoi_session = requests.session()


class BlockError(RuntimeError):
    pass


class IntRange:
    def __init__(self, imin=None, imax=None):
        self.imin = imin
        self.imax = imax

    def __call__(self, arg):
        try:
            value = int(arg)
        except ValueError as value_error:
            raise self.exception() from value_error
        if (self.imin is not None and value < self.imin) or (self.imax is not None and value > self.imax):
            raise self.exception()
        return value

    def exception(self):
        if self.imin is not None and self.imax is not None:
            return argparse.ArgumentTypeError(f'Must be an integer in the range [{self.imin}, {self.imax}]')
        if self.imin is not None:
            return argparse.ArgumentTypeError(f'Must be an integer >= {self.imin}')
        if self.imax is not None:
            return argparse.ArgumentTypeError(f'Must be an integer <= {self.imax}')
        return argparse.ArgumentTypeError('Must be an integer')


def _random_int(width):
    bres = os.urandom(width)
    if sys.version < '3':
        ires = int(bres.encode('hex'), 16)
    else:
        ires = int.from_bytes(bres, 'little')

    return ires


def _random_choice(seq):
    return seq[_random_int(1) % len(seq)]


def get_proxies():
    proxies = getproxies()
    filtered_proxies = {}
    for key, value in proxies.items():
        if key.startswith('http'):
            if not value.startswith('http'):
                filtered_proxies[key] = f'http://{value}'
            else:
                filtered_proxies[key] = value
    return filtered_proxies


def _get_result(url):
    try:
        resp = howdoi_session.get(url, headers={'User-Agent': _random_choice(USER_AGENTS)},
                                  proxies=get_proxies(),
                                  verify=VERIFY_SSL_CERTIFICATE,
                                  cookies={'CONSENT': 'YES+US.en+20170717-00-0'})
        resp.raise_for_status()
        return resp.text
    except requests.exceptions.SSLError as error:
        logging.error('%sEncountered an SSL Error. Try using HTTP instead of '
                      'HTTPS by setting the environment variable "HOWDOI_DISABLE_SSL".\n%s', RED, END_FORMAT)
        raise error


def _get_from_cache(cache_key):
    # As of cachelib 0.3.0, it internally logging a warning on cache miss
    current_log_level = logging.getLogger().getEffectiveLevel()
    # Reduce the log level so the warning is not printed
    logging.getLogger().setLevel(logging.ERROR)
    page = cache.get(cache_key)  # pylint: disable=assignment-from-none
    # Restore the log level
    logging.getLogger().setLevel(current_log_level)
    return page


def _add_links_to_text(element):
    hyperlinks = element.find('a')

    for hyperlink in hyperlinks:
        pquery_object = pq(hyperlink)
        href = hyperlink.attrib['href']
        copy = pquery_object.text()
        if copy == href:
            replacement = copy
        else:
            replacement = f'[{copy}]({href})'
        pquery_object.replace_with(replacement)


def get_text(element):
    ''' return inner text in pyquery element '''
    _add_links_to_text(element)
    try:
        return element.text(squash_space=False)
    except TypeError:
        return element.text()


def _extract_links_from_bing(html):
    html.remove_namespaces()
    return [a.attrib['href'] for a in html('.b_algo')('h2')('a')]


def _clean_google_link(link):
    if '/url?' in link:
        parsed_link = urlparse(link)
        query_params = parse_qs(parsed_link.query)
        url_params = query_params.get('q', []) or query_params.get('url', [])
        if url_params:
            return url_params[0]
    return link


def _extract_links_from_google(query_object):
    html = query_object.html()
    link_pattern = re.compile(fr"https?://{URL}/questions/[0-9]*/[a-z0-9-]*")
    links = link_pattern.findall(html)
    links = [_clean_google_link(link) for link in links]
    return links


def _extract_links_from_duckduckgo(html):
    html.remove_namespaces()
    links_anchors = html.find('a.result__a')
    results = []
    for anchor in links_anchors:
        link = anchor.attrib['href']
        url_obj = urlparse(link)
        parsed_url = parse_qs(url_obj.query).get('uddg', '')
        if parsed_url:
            results.append(parsed_url[0])
    return results


def _extract_links(html, search_engine):
    if search_engine == 'bing':
        return _extract_links_from_bing(html)
    if search_engine == 'duckduckgo':
        return _extract_links_from_duckduckgo(html)
    return _extract_links_from_google(html)


def _get_search_url(search_engine):
    return SEARCH_URLS.get(search_engine, SEARCH_URLS['google'])


def _is_blocked(page):
    for indicator in BLOCK_INDICATORS:
        if page.find(indicator) != -1:
            return True

    return False


def _get_links(query):
    search_engine = os.getenv('HOWDOI_SEARCH_ENGINE', 'google')
    search_url = _get_search_url(search_engine).format(URL, url_quote(query))

    logging.info('Searching %s with URL: %s', search_engine, search_url)

    try:
        result = _get_result(search_url)
    except requests.HTTPError:
        logging.info('Received HTTPError')
        result = None
    if not result or _is_blocked(result):
        logging.error('%sUnable to find an answer because the search engine temporarily blocked the request. '
                      'Attempting to use a different search engine.%s', RED, END_FORMAT)
        raise BlockError('Temporary block by search engine')

    html = pq(result)
    links = _extract_links(html, search_engine)
    if len(links) == 0:
        logging.info('Search engine %s found no StackOverflow links, returned HTML is:', search_engine)
        logging.info(result)
    return list(dict.fromkeys(links))  # remove any duplicates


def get_link_at_pos(links, position):
    if not links:
        return False

    if len(links) >= position:
        link = links[position - 1]
    else:
        link = links[-1]
    return link


def _format_output(args, code):
    if not args['color']:
        return code
    lexer = None
    # try to find a lexer using the StackOverflow tags
    # or the query arguments
    for keyword in args['query'].split() + args['tags']:
        try:
            lexer = get_lexer_by_name(keyword).name
            break
        except ClassNotFound:
            pass

    # no lexer found above, use the guesser
    if not lexer:
        try:
            lexer = guess_lexer(code).name
        except ClassNotFound:
            return code

    syntax = Syntax(code, lexer, background_color="default", line_numbers=False)
    console = Console(record=True)
    with console.capture() as capture:
        console.print(syntax)
    return capture.get()


def _is_question(link):
    for fragment in BLOCKED_QUESTION_FRAGMENTS:
        if fragment in link:
            return False
    return re.search(r'questions/\d+/', link)


def _get_questions(links):
    return [link for link in links if _is_question(link)]


def _get_answer(args, link):  # pylint: disable=too-many-branches
    cache_key = _get_cache_key(link)
    page = _get_from_cache(cache_key)
    if not page:
        logging.info('Fetching page: %s', link)
        page = _get_result(link + '?answertab=votes')
        cache.set(cache_key, page)
    else:
        logging.info('Using cached page: %s', link)

    html = pq(page)

    first_answer = html('.answercell').eq(0) or html('.answer').eq(0)

    instructions = first_answer.find('pre') or first_answer.find('code')
    args['tags'] = [t.text for t in html('.post-tag')]

    # make decision on answer body class.
    if first_answer.find(".js-post-body"):
        answer_body_cls = ".js-post-body"
    else:
        # rollback to post-text class
        answer_body_cls = ".post-text"

    if not instructions and not args['all']:
        logging.info('No code sample found, returning entire answer')
        text = get_text(first_answer.find(answer_body_cls).eq(0))
    elif args['all']:
        logging.info('Returning entire answer')
        texts = []
        for html_tag in first_answer.items(f'{answer_body_cls} > *'):
            current_text = get_text(html_tag)
            if current_text:
                if html_tag[0].tag in ['pre', 'code']:
                    texts.append(_format_output(args, current_text))
                else:
                    texts.append(current_text)
        text = '\n'.join(texts)
    else:
        text = _format_output(args, get_text(instructions.eq(0)))
    if text is None:
        logging.info('%sAnswer was empty%s', RED, END_FORMAT)
        text = NO_ANSWER_MSG
    text = text.strip()
    return text


def _get_links_with_cache(query):
    cache_key = _get_cache_key(query)
    res = _get_from_cache(cache_key)
    if res:
        logging.info('Using cached links')
        if res == CACHE_EMPTY_VAL:
            logging.info('No StackOverflow links found in cached search engine results - will make live query')
        else:
            return res

    links = _get_links(query)
    if not links:
        cache.set(cache_key, CACHE_EMPTY_VAL)

    question_links = _get_questions(links)
    cache.set(cache_key, question_links or CACHE_EMPTY_VAL)

    return question_links


def build_splitter(splitter_character='=', splitter_length=80):
    return '\n' + splitter_character * splitter_length + '\n\n'


def _get_answers(args):
    """
    @args: command-line arguments
    returns: array of answers and their respective metadata
             False if unable to get answers
    """

    question_links = _get_links_with_cache(args['query'])
    if not question_links:
        return False

    initial_pos = args['pos'] - 1
    final_pos = initial_pos + int(args['num_answers'])
    question_links = question_links[initial_pos:final_pos]
    search_engine = os.getenv('HOWDOI_SEARCH_ENGINE', 'google')

    logging.info('Links from %s found on %s: %s', URL, search_engine, len(question_links))
    logging.info('URL: %s', '\n '.join(question_links))
    logging.info('Answers requested: %s, Starting at position: %s', args["num_answers"], args['pos'])

    with Pool() as pool:
        answers = pool.starmap(
            _get_answer_worker,
            [(args, link) for link in question_links]
        )

    answers = [a for a in answers if a.get('answer')]
    for i, answer in enumerate(answers, 1):
        answer['position'] = i

    logging.info('Total answers returned: %s', len(answers))

    return answers or False


def _get_answer_worker(args, link):
    answer = _get_answer(args, link)
    result = {
        'answer': None,
        'link': None,
        'position': None
    }

    multiple_answers = (int(args['num_answers']) > 1 or args['all'])

    if not answer:
        return result
    if not args['link'] and not args['json_output'] and multiple_answers:
        answer = ANSWER_HEADER.format(link, answer, STAR_HEADER)
    answer += '\n'

    result['answer'] = answer
    result['link'] = link

    return result


def _clear_cache():
    global cache  # pylint: disable=global-statement,invalid-name
    if not cache:
        cache = FileSystemCache(CACHE_DIR, CACHE_ENTRY_MAX, 0)

    return cache.clear()


def _is_help_query(query):
    return any(query.lower() == help_query for help_query in SUPPORTED_HELP_QUERIES)


def _format_answers(args, res):
    if "error" in res:
        return f'ERROR: {RED}{res["error"]}{END_FORMAT}'

    if args["json_output"]:
        return json.dumps(res)

    formatted_answers = []

    for answer in res:
        next_ans = answer["answer"]
        if args["link"]:  # if we only want links
            next_ans = answer["link"]
        formatted_answers.append(next_ans or NO_RESULTS_MESSAGE)

    return build_splitter().join(formatted_answers)


def _get_help_instructions():
    instruction_splitter = build_splitter(' ', 60)
    query = 'print hello world in python'
    instructions = [
        'Here are a few popular howdoi commands ',
        '>>> howdoi {} (default query)',
        '>>> howdoi {} -a (read entire answer)',
        '>>> howdoi {} -n [number] (retrieve n number of answers)',
        '>>> howdoi {} -l (display only a link to where the answer is from',
        '>>> howdoi {} -c (Add colors to the output)',
        '>>> howdoi {} -e (Specify the search engine you want to use e.g google,bing)'
    ]

    instructions = map(lambda s: s.format(query), instructions)

    return instruction_splitter.join(instructions)


def _get_cache_key(args):
    frame = inspect.currentframe()
    calling_func = inspect.getouterframes(frame)[1].function
    return calling_func + str(args) + __version__


def format_stash_item(fields, index=-1):
    title = fields['alias']
    description = fields['desc']
    item_num = index + 1
    if index == -1:
        return f'{UNDERLINE}{BOLD}$ {title}{END_FORMAT}\n\n{description}\n'
    return f'{UNDERLINE}{BOLD}$ [{item_num}] {title}{END_FORMAT}\n\n{description}\n'


def print_stash(stash_list=None):
    if not stash_list or len(stash_list) == 0:
        stash_list = ['\nSTASH LIST:']
        commands = keep_utils.read_commands()
        if commands is None or len(commands.items()) == 0:
            logging.error('%sNo commands found in stash. '
                          'Add a command with "howdoi --%s <query>".%s', RED, STASH_SAVE, END_FORMAT)
            return
        for _, fields in commands.items():
            stash_list.append(format_stash_item(fields))
    else:
        stash_list = [format_stash_item(x['fields'], i) for i, x in enumerate(stash_list)]
    print(build_splitter('#').join(stash_list))


def _get_stash_key(args):
    stash_args = {}
    ignore_keys = [STASH_SAVE, STASH_VIEW, STASH_REMOVE, STASH_EMPTY, 'tags']  # ignore these for stash key
    for key in args:
        if key not in ignore_keys:
            stash_args[key] = args[key]
    return str(stash_args)


def _stash_remove(cmd_key, title):
    commands = keep_utils.read_commands()
    if commands is not None and cmd_key in commands:
        keep_utils.remove_command(cmd_key)
        print(f'\n{BOLD}{GREEN}"{title}" removed from stash{END_FORMAT}\n')
    else:
        print(f'\n{BOLD}{RED}"{title}" not found in stash{END_FORMAT}\n')


def _stash_save(cmd_key, title, answer):
    try:
        keep_utils.save_command(cmd_key, answer, title)
    except FileNotFoundError:
        os.system('keep init')
        keep_utils.save_command(cmd_key, answer, title)
    finally:
        print_stash()


def _parse_cmd(args, res):
    answer = _format_answers(args, res)
    cmd_key = _get_stash_key(args)
    title = ''.join(args['query'])
    if args[STASH_SAVE]:
        _stash_save(cmd_key, title, answer)
        return ''

    if args[STASH_REMOVE]:
        _stash_remove(cmd_key, title)
        return ''
    return answer


def howdoi(raw_query):
    if isinstance(raw_query, str):  # you can pass either a raw or a parsed query
        parser = get_parser()
        args = vars(parser.parse_args(raw_query.split(' ')))
    else:
        args = raw_query

    search_engine = args['search_engine'] or os.getenv('HOWDOI_SEARCH_ENGINE') or 'google'
    os.environ['HOWDOI_SEARCH_ENGINE'] = search_engine
    if search_engine not in SUPPORTED_SEARCH_ENGINES:
        supported_search_engines = ', '.join(SUPPORTED_SEARCH_ENGINES)
        message = f'Unsupported engine {search_engine}. The supported engines are: {supported_search_engines}'
        res = {'error': message}
        return _parse_cmd(args, res)

    args['query'] = ' '.join(args['query']).replace('?', '')
    cache_key = _get_cache_key(args)

    if _is_help_query(args['query']):
        return _get_help_instructions() + '\n'

    res = _get_from_cache(cache_key)

    if res:
        logging.info('Using cached response (add -C to clear the cache)')
        return _parse_cmd(args, res)

    logging.info('Fetching answers for query: %s', args["query"])

    try:
        res = _get_answers(args)
        if not res:
            message = NO_RESULTS_MESSAGE
            if not args['explain']:
                message = f'{message} (use --explain to learn why)'
            res = {'error': message}
        cache.set(cache_key, res)
    except (RequestsConnectionError, SSLError):
        res = {'error': f'Unable to reach {search_engine}. Do you need to use a proxy?\n'}
    except BlockError:
        BLOCKED_ENGINES.append(search_engine)
        next_engine = next((engine for engine in SUPPORTED_SEARCH_ENGINES if engine not in BLOCKED_ENGINES), None)
        if next_engine is None:
            res = {'error': 'Unable to get a response from any search engine\n'}
        else:
            args['search_engine'] = next_engine
            args['query'] = args['query'].split()
            logging.info('%sRetrying search with %s%s', GREEN, next_engine, END_FORMAT)
            return howdoi(args)
    return _parse_cmd(args, res)


def get_parser():
    parser = argparse.ArgumentParser(description='instant coding answers via the command line',
                                     epilog=textwrap.dedent('''\
                                     environment variable examples:
                                       HOWDOI_COLORIZE=1
                                       HOWDOI_DISABLE_CACHE=1
                                       HOWDOI_DISABLE_SSL=1
                                       HOWDOI_SEARCH_ENGINE=google
                                       HOWDOI_URL=serverfault.com
                                     '''),
                                     formatter_class=argparse.RawTextHelpFormatter)
    parser.add_argument('query', metavar='QUERY', type=str, nargs='*', help='the question to answer')
    parser.add_argument('-p', '--pos', help='select answer in specified position (default: 1)',
                        default=1, type=IntRange(1, 20), metavar='POS')
    parser.add_argument('-n', '--num', help='number of answers to return (default: 1)',
                        dest='num_answers', default=1, type=IntRange(1, 20), metavar='NUM')
    parser.add_argument('--num-answers', help=argparse.SUPPRESS)
    parser.add_argument('-a', '--all', help='display the full text of the answer', action='store_true')
    parser.add_argument('-l', '--link', help='display only the answer link', action='store_true')
    parser.add_argument('-c', '--color', help='enable colorized output', action='store_true')
    parser.add_argument('-x', '--explain', help='explain how answer was chosen', action='store_true')
    parser.add_argument('-C', '--clear-cache', help='clear the cache',
                        action='store_true')
    parser.add_argument('-j', '--json', help='return answers in raw json format', dest='json_output',
                        action='store_true')
    parser.add_argument('--json-output', action='store_true', help=argparse.SUPPRESS)
    parser.add_argument('-v', '--version', help='display the current version of howdoi',
                        action='store_true')
    parser.add_argument('-e', '--engine', help='search engine for this query (google, bing, duckduckgo)',
                        dest='search_engine', nargs="?", metavar='ENGINE')
    parser.add_argument('--save', '--stash', help='stash a howdoi answer',
                        action='store_true')
    parser.add_argument('--view', help='view your stash',
                        action='store_true')
    parser.add_argument('--remove', help='remove an entry in your stash',
                        action='store_true')
    parser.add_argument('--empty', help='empty your stash',
                        action='store_true')
    parser.add_argument('--sanity-check', help=argparse.SUPPRESS,
                        action='store_true')
    return parser


def _sanity_check(engine, test_query=None):
    parser = get_parser()
    if not test_query:
        test_query = 'format date bash'

    args = vars(parser.parse_args(test_query.split()))
    args['search_engine'] = engine

    try:
        result = howdoi(args)
        # Perhaps better to use `-j` and then check for an error message
        # rather than trying to enumerate all the error strings
        assert "Sorry" not in result and "Unable to" not in result
    except AssertionError as exc:
        if engine == 'google':
            raise GoogleValidationError from exc
        if engine == 'bing':
            raise BingValidationError from exc
        raise DDGValidationError from exc


def prompt_stash_remove(args, stash_list, view_stash=True):
    if view_stash:
        print_stash(stash_list)

    last_index = len(stash_list)
    prompt = f'{BOLD}> Select a stash command to remove [1-{last_index}] (0 to cancel): {END_FORMAT}'
    user_input = input(prompt)

    try:
        user_input = int(user_input)
        if user_input == 0:
            return
        if user_input < 1 or user_input > last_index:
            logging.error('\n%sInput index is invalid.%s', RED, END_FORMAT)
            prompt_stash_remove(args, stash_list, False)
            return
        cmd = stash_list[user_input - 1]
        cmd_key = cmd['command']
        cmd_name = cmd['fields']['alias']
        _stash_remove(cmd_key, cmd_name)
        return
    except ValueError:
        logging.error('\n%sInvalid input. Must specify index of command.%s', RED, END_FORMAT)
        prompt_stash_remove(args, stash_list, False)
        return


def perform_sanity_check():
    '''Perform sanity check.
    Returns exit code for program. An exit code of -1 means a validation error was encountered.
    '''
    global cache  # pylint: disable=global-statement,invalid-name
    # Disable cache to avoid cached answers while performing the checks
    cache = NullCache()

    exit_code = 0
    for engine in ['google']:  # 'bing' and 'duckduckgo' throw various block errors
        print(f'Checking {engine}...')
        try:
            _sanity_check(engine)
        except (GoogleValidationError, BingValidationError, DDGValidationError):
            logging.error('%s%s query failed%s', RED, engine, END_FORMAT)
            exit_code = -1
    if exit_code == 0:
        print(f'{GREEN}Ok{END_FORMAT}')
    return exit_code


def command_line_runner():  # pylint: disable=too-many-return-statements,too-many-branches
    parser = get_parser()
    args = vars(parser.parse_args())

    if args['version']:
        print(__version__)
        return

    if args['explain']:
        logging.getLogger().setLevel(logging.INFO)
        logging.info('Version: %s', __version__)

    if args['sanity_check']:
        sys.exit(
            perform_sanity_check()
        )

    if args['clear_cache']:
        if _clear_cache():
            print(f'{GREEN}Cache cleared successfully{END_FORMAT}')
        else:
            logging.error('%sClearing cache failed%s', RED, END_FORMAT)

    if args[STASH_VIEW]:
        print_stash()
        return

    if args[STASH_EMPTY]:
        os.system('keep init')
        return

    if args[STASH_REMOVE] and len(args['query']) == 0:
        commands = keep_utils.read_commands()
        if commands is None or len(commands.items()) == 0:
            logging.error('%sNo commands found in stash. '
                          'Add a command with "howdoi --%s <query>".%s', RED, STASH_SAVE, END_FORMAT)
            return
        stash_list = [{'command': cmd, 'fields': field} for cmd, field in commands.items()]
        prompt_stash_remove(args, stash_list)
        return

    if not args['query']:
        parser.print_help()
        return

    if os.getenv('HOWDOI_COLORIZE'):
        args['color'] = True

    howdoi_result = howdoi(args)

    if os.name == 'nt':
        # Windows
        print(howdoi_result)
    else:
        utf8_result = howdoi_result.encode('utf-8', 'ignore')
        # Write UTF-8 to stdout: https://stackoverflow.com/a/3603160
        sys.stdout.buffer.write(utf8_result)

    # close the session to release connection
    howdoi_session.close()


if __name__ == '__main__':
    command_line_runner()


================================================
FILE: mkdocs.yml
================================================
site_name: howdoi

theme:
  name: "material"
  palette:
    primary: ""
    accent: "teal"
  icon:
    repo: fontawesome/brands/github

repo_name: gleitz/howdoi
repo_url: https://github.com/gleitz/howdoi
edit_uri: ""

nav:
  - howdoi: index.md
  - Introduction: introduction.md
  - Usage: usage.md
  - Setting up development environment: development_env.md
  - Contributing: contributing_to_howdoi.md
  - Contributing documentation: contributing_docs.md
  - Extension development: extension_dev.md
  - Howdoi advanced usage: howdoi_advanced_usage.md
  - Troubleshooting: troubleshooting.md
  - Development for Windows: windows-contributing.md

markdown_extensions:
  - toc:
      permalink: true
  - markdown.extensions.codehilite:
      guess_lang: false
  - admonition
  - codehilite
  - extra
  - pymdownx.snippets:
      base_path: docs
  - pymdownx.superfences:
      custom_fences:
        - name: mermaid
          class: mermaid
          format: !!python/name:pymdownx.superfences.fence_div_format
  - pymdownx.tabbed

extra:
  social:
    - icon: fontawesome/brands/github
      link: "https://github.com/gleitz/howdoi"


================================================
FILE: notebooks/language_labelling.ipynb
================================================
{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "Copy of HowDoI-NLP.ipynb",
      "provenance": [],
      "collapsed_sections": [],
      "toc_visible": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "b9f3692aa5394af38fbf8701f9d1a2f5": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_20cc5addfd6a404abd07e3e7410604bf",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_193cdcb227f4436ab295a1d03c935bc7",
              "IPY_MODEL_e3736ea8f8f1433fa58a2c7cefbc9d43"
            ]
          }
        },
        "20cc5addfd6a404abd07e3e7410604bf": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "193cdcb227f4436ab295a1d03c935bc7": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_0214896e38b7475b9810990cd98bcfbe",
            "_dom_classes": [],
            "description": "Downloading: 100%",
            "_model_name": "FloatProgressModel",
            "bar_style": "success",
            "max": 231508,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 231508,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_9d6b675edcf94469a3665d8ebd699cd6"
          }
        },
        "e3736ea8f8f1433fa58a2c7cefbc9d43": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_2a6a7cec7ce9445a854b6378fe5c7d69",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 232k/232k [00:00&lt;00:00, 885kB/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_af203886f4db407f92d990d4d421c207"
          }
        },
        "0214896e38b7475b9810990cd98bcfbe": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "initial",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "9d6b675edcf94469a3665d8ebd699cd6": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "2a6a7cec7ce9445a854b6378fe5c7d69": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "af203886f4db407f92d990d4d421c207": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "89764229f6bb4419927cd6499b252cc1": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_489bcf4ae407408fa0b4f18193277f3d",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_c907be1df1ef42cda9d7a45ce3ae50ff",
              "IPY_MODEL_1f0711de1dde4e12b0ff8f7f11f38e5d"
            ]
          }
        },
        "489bcf4ae407408fa0b4f18193277f3d": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "c907be1df1ef42cda9d7a45ce3ae50ff": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_420b462bf1dc498082e1c341e9649e34",
            "_dom_classes": [],
            "description": "Downloading: 100%",
            "_model_name": "FloatProgressModel",
            "bar_style": "success",
            "max": 466062,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 466062,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_e47fef1e664a4a608cd595eb476b84d0"
          }
        },
        "1f0711de1dde4e12b0ff8f7f11f38e5d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_fc1006302b0741b18c5719d6b46b508b",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 466k/466k [00:00&lt;00:00, 1.41MB/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_20700e5b87dd4845b9d451a4e3a81794"
          }
        },
        "420b462bf1dc498082e1c341e9649e34": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "initial",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "e47fef1e664a4a608cd595eb476b84d0": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "fc1006302b0741b18c5719d6b46b508b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "20700e5b87dd4845b9d451a4e3a81794": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "723f45572f9845f7a5a43d028acabd37": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_fd385e56155444c6ba0b70f461b014ef",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_d032b636ac73444ea8e13ec9ee1866d8",
              "IPY_MODEL_f6501a7bc274434eae206e1e1fb2e00a"
            ]
          }
        },
        "fd385e56155444c6ba0b70f461b014ef": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "d032b636ac73444ea8e13ec9ee1866d8": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_7b3318ea2d8e49e186f47c5792cbe30b",
            "_dom_classes": [],
            "description": "Downloading: 100%",
            "_model_name": "FloatProgressModel",
            "bar_style": "success",
            "max": 28,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 28,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_e9a8d590fca842859822b4cad55b1a5f"
          }
        },
        "f6501a7bc274434eae206e1e1fb2e00a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_f9924cd4fc98461c82148ac3a38a845d",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 28.0/28.0 [00:00&lt;00:00, 673B/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_fd9e3dfc77e243ecb7f6257b6700e728"
          }
        },
        "7b3318ea2d8e49e186f47c5792cbe30b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "initial",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "e9a8d590fca842859822b4cad55b1a5f": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "f9924cd4fc98461c82148ac3a38a845d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "fd9e3dfc77e243ecb7f6257b6700e728": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "7a54279586bb4c87a52746f605f7b3e2": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_6a7acabfc3f24d3395c0cc7cde68a43a",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_b9c6899ed3dd489d85b201b51a6f35c3",
              "IPY_MODEL_b413ba22912c4149aeea6824ccbc90c1"
            ]
          }
        },
        "6a7acabfc3f24d3395c0cc7cde68a43a": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "b9c6899ed3dd489d85b201b51a6f35c3": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_6151969c55174665964db6f97bd18884",
            "_dom_classes": [],
            "description": "Downloading: 100%",
            "_model_name": "FloatProgressModel",
            "bar_style": "success",
            "max": 442,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 442,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_f97dbff45ea347f380b3d3e2bf9b60a5"
          }
        },
        "b413ba22912c4149aeea6824ccbc90c1": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_6bc47beb071c4884a4ddb8a7dd669222",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 442/442 [00:00&lt;00:00, 12.0kB/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_6acde23812d44176829ce106ea626f09"
          }
        },
        "6151969c55174665964db6f97bd18884": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "initial",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "f97dbff45ea347f380b3d3e2bf9b60a5": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "6bc47beb071c4884a4ddb8a7dd669222": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "6acde23812d44176829ce106ea626f09": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "6e48979f9d49495da15e278aafd97a8b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_73a609c59da744088c8def134a0acebc",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_3e978968f8814d4e9cb0cce52fe25f53",
              "IPY_MODEL_5b91aa38af194763b33168e24eb80b91"
            ]
          }
        },
        "73a609c59da744088c8def134a0acebc": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "3e978968f8814d4e9cb0cce52fe25f53": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_27cc2164a06a40a0b6e093bd1f642fce",
            "_dom_classes": [],
            "description": "Downloading: 100%",
            "_model_name": "FloatProgressModel",
            "bar_style": "success",
            "max": 267967963,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 267967963,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_1b013fd1c8ce45c992ed4dba0bcf2393"
          }
        },
        "5b91aa38af194763b33168e24eb80b91": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_daedb950ac444ef3aa764b29f3e84052",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 268M/268M [00:05&lt;00:00, 50.3MB/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_579a1b0eff7b425a9f22a18506fb7033"
          }
        },
        "27cc2164a06a40a0b6e093bd1f642fce": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "initial",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "1b013fd1c8ce45c992ed4dba0bcf2393": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "daedb950ac444ef3aa764b29f3e84052": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "579a1b0eff7b425a9f22a18506fb7033": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        }
      }
    },
    "accelerator": "GPU"
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "co5T4uL_pXfO"
      },
      "source": [
        "## Fetch data from disk"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Pjc-a5jWpdzy",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "192951ce-5610-4f4b-b7eb-ef9fb6c9ece7"
      },
      "source": [
        "from google.colab import drive\n",
        "drive.mount('/content/drive')"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Mounted at /content/drive\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "VfZhymMbzuSg",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "41cf4baf-003f-4b50-9e65-5be0abce2a5c"
      },
      "source": [
        "!pip install transformers"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Collecting transformers\n",
            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/81/91/61d69d58a1af1bd81d9ca9d62c90a6de3ab80d77f27c5df65d9a2c1f5626/transformers-4.5.0-py3-none-any.whl (2.1MB)\n",
            "\u001b[K     |████████████████████████████████| 2.2MB 9.4MB/s \n",
            "\u001b[?25hRequirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.7/dist-packages (from transformers) (4.41.1)\n",
            "Collecting sacremoses\n",
            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/08/cd/342e584ee544d044fb573ae697404ce22ede086c9e87ce5960772084cad0/sacremoses-0.0.44.tar.gz (862kB)\n",
            "\u001b[K     |████████████████████████████████| 870kB 41.6MB/s \n",
            "\u001b[?25hCollecting tokenizers<0.11,>=0.10.1\n",
            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/ae/04/5b870f26a858552025a62f1649c20d29d2672c02ff3c3fb4c688ca46467a/tokenizers-0.10.2-cp37-cp37m-manylinux2010_x86_64.whl (3.3MB)\n",
            "\u001b[K     |████████████████████████████████| 3.3MB 42.0MB/s \n",
            "\u001b[?25hRequirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (2019.12.20)\n",
            "Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from transformers) (2.23.0)\n",
            "Requirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from transformers) (20.9)\n",
            "Requirement already satisfied: importlib-metadata; python_version < \"3.8\" in /usr/local/lib/python3.7/dist-packages (from transformers) (3.8.1)\n",
            "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (1.19.5)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers) (3.0.12)\n",
            "Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.15.0)\n",
            "Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (7.1.2)\n",
            "Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.0.1)\n",
            "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2.10)\n",
            "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (1.24.3)\n",
            "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (3.0.4)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2020.12.5)\n",
            "Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging->transformers) (2.4.7)\n",
            "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata; python_version < \"3.8\"->transformers) (3.4.1)\n",
            "Requirement already satisfied: typing-extensions>=3.6.4; python_version < \"3.8\" in /usr/local/lib/python3.7/dist-packages (from importlib-metadata; python_version < \"3.8\"->transformers) (3.7.4.3)\n",
            "Building wheels for collected packages: sacremoses\n",
            "  Building wheel for sacremoses (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for sacremoses: filename=sacremoses-0.0.44-cp37-none-any.whl size=886084 sha256=8188b4a1fe7b93d2ceab6b8c4c244e549efa60bdfd85aed0aa3020e99990acc0\n",
            "  Stored in directory: /root/.cache/pip/wheels/3e/fb/c0/13ab4d63d537658f448366744654323077c4d90069b6512f3c\n",
            "Successfully built sacremoses\n",
            "Installing collected packages: sacremoses, tokenizers, transformers\n",
            "Successfully installed sacremoses-0.0.44 tokenizers-0.10.2 transformers-4.5.0\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "lLppqXoRSjeO"
      },
      "source": [
        "## Data Prep"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "VbcL94ugTLQ7"
      },
      "source": [
        "import pandas as pd\n",
        "from torch.utils.data import Dataset, IterableDataset, DataLoader, get_worker_info\n",
        "from transformers import DistilBertTokenizerFast\n",
        "import math"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 210,
          "referenced_widgets": [
            "b9f3692aa5394af38fbf8701f9d1a2f5",
            "20cc5addfd6a404abd07e3e7410604bf",
            "193cdcb227f4436ab295a1d03c935bc7",
            "e3736ea8f8f1433fa58a2c7cefbc9d43",
            "0214896e38b7475b9810990cd98bcfbe",
            "9d6b675edcf94469a3665d8ebd699cd6",
            "2a6a7cec7ce9445a854b6378fe5c7d69",
            "af203886f4db407f92d990d4d421c207",
            "89764229f6bb4419927cd6499b252cc1",
            "489bcf4ae407408fa0b4f18193277f3d",
            "c907be1df1ef42cda9d7a45ce3ae50ff",
            "1f0711de1dde4e12b0ff8f7f11f38e5d",
            "420b462bf1dc498082e1c341e9649e34",
            "e47fef1e664a4a608cd595eb476b84d0",
            "fc1006302b0741b18c5719d6b46b508b",
            "20700e5b87dd4845b9d451a4e3a81794",
            "723f45572f9845f7a5a43d028acabd37",
            "fd385e56155444c6ba0b70f461b014ef",
            "d032b636ac73444ea8e13ec9ee1866d8",
            "f6501a7bc274434eae206e1e1fb2e00a",
            "7b3318ea2d8e49e186f47c5792cbe30b",
            "e9a8d590fca842859822b4cad55b1a5f",
            "f9924cd4fc98461c82148ac3a38a845d",
            "fd9e3dfc77e243ecb7f6257b6700e728"
          ]
        },
        "id": "nplakpGpWubB",
        "outputId": "e3a3a5eb-d8d7-48e3-d50b-55ae1c801a33"
      },
      "source": [
        "ALL_LANGS = [\"A# .NET\",\"A# (Axiom)\",\"A-0 System\",\"A+\",\"A++\",\"ABAP\",\"ABC\",\"ABC ALGOL\",\"ABLE\",\"ABSET\",\"ABSYS\",\"ACC\",\"Accent\",\"Ace DASL\",\"ACL2\",\"ACT-III\",\"Action!\",\"ActionScript\",\"Ada\",\"Adenine\",\"Agda\",\"Agilent VEE\",\"Agora\",\"AIMMS\",\"Alef\",\"ALF\",\"ALGOL 58\",\"ALGOL 60\",\"ALGOL 68\",\"ALGOL W\",\"Alice\",\"Alma-0\",\"AmbientTalk\",\"Amiga E\",\"AMOS\",\"AMPL\",\"APL\",\"App Inventor for Android's visual block language\",\"AppleScript\",\"Arc\",\"ARexx\",\"Argus\",\"AspectJ\",\"Assembly language\",\"ATS\",\"Ateji PX\",\"AutoHotkey\",\"Autocoder\",\"AutoIt\",\"AutoLISP / Visual LISP\",\"Averest\",\"AWK\",\"Axum\",\"B\",\"Babbage\",\"Bash\",\"BASIC\",\"bc\",\"BCPL\",\"BeanShell\",\"Batch (Windows/Dos)\",\"Bertrand\",\"BETA\",\"Bigwig\",\"Bistro\",\"BitC\",\"BLISS\",\"Blue\",\"Bon\",\"Boo\",\"Boomerang\",\"Bourne shell\",\"bash\",\"ksh\",\"BREW\",\"BPEL\",\"C\",\"C--\",\"C++\",\"C#\",\"C/AL\",\"Caché ObjectScript\",\"C Shell\",\"Caml\",\"Candle\",\"Cayenne\",\"CDuce\",\"Cecil\",\"Cel\",\"Cesil\",\"Ceylon\",\"CFEngine\",\"CFML\",\"Cg\",\"Ch\",\"Chapel\",\"CHAIN\",\"Charity\",\"Charm\",\"Chef\",\"CHILL\",\"CHIP-8\",\"chomski\",\"ChucK\",\"CICS\",\"Cilk\",\"CL\",\"Claire\",\"Clarion\",\"Clean\",\"Clipper\",\"CLIST\",\"Clojure\",\"CLU\",\"CMS-2\",\"COBOL\",\"Cobra\",\"CODE\",\"CoffeeScript\",\"Cola\",\"ColdC\",\"ColdFusion\",\"COMAL\",\"Combined Programming Language\",\"COMIT\",\"Common Intermediate Language\",\"Common Lisp\",\"COMPASS\",\"Component Pascal\",\"Constraint Handling Rules\",\"Converge\",\"Cool\",\"Coq\",\"Coral 66\",\"Corn\",\"CorVision\",\"COWSEL\",\"CPL\",\"csh\",\"CSP\",\"Csound\",\"CUDA\",\"Curl\",\"Curry\",\"Cyclone\",\"Cython\",\"D\",\"DASL\",\"DASL\",\"Dart\",\"DataFlex\",\"Datalog\",\"DATATRIEVE\",\"dBase\",\"dc\",\"DCL\",\"Deesel\",\"Delphi\",\"DinkC\",\"DIBOL\",\"Dog\",\"Draco\",\"DRAKON\",\"Dylan\",\"DYNAMO\",\"E\",\"E#\",\"Ease\",\"Easy PL/I\",\"Easy Programming Language\",\"EASYTRIEVE PLUS\",\"ECMAScript\",\"Edinburgh IMP\",\"EGL\",\"Eiffel\",\"ELAN\",\"Elixir\",\"Elm\",\"Emacs Lisp\",\"Emerald\",\"Epigram\",\"EPL\",\"Erlang\",\"es\",\"Escapade\",\"Escher\",\"ESPOL\",\"Esterel\",\"Etoys\",\"Euclid\",\"Euler\",\"Euphoria\",\"EusLisp Robot Programming Language\",\"CMS EXEC\",\"EXEC 2\",\"Executable UML\",\"F\",\"F#\",\"Factor\",\"Falcon\",\"Fancy\",\"Fantom\",\"FAUST\",\"Felix\",\"Ferite\",\"FFP\",\"Fjölnir\",\"FL\",\"Flavors\",\"Flex\",\"FLOW-MATIC\",\"FOCAL\",\"FOCUS\",\"FOIL\",\"FORMAC\",\"@Formula\",\"Forth\",\"Fortran\",\"Fortress\",\"FoxBase\",\"FoxPro\",\"FP\",\"FPr\",\"Franz Lisp\",\"Frege\",\"F-Script\",\"FSProg\",\"G\",\"Google Apps Script\",\"Game Maker Language\",\"GameMonkey Script\",\"GAMS\",\"GAP\",\"G-code\",\"Genie\",\"GDL\",\"Gibiane\",\"GJ\",\"GEORGE\",\"GLSL\",\"GNU E\",\"GM\",\"Go\",\"Go!\",\"GOAL\",\"Gödel\",\"Godiva\",\"GOM (Good Old Mad)\",\"Goo\",\"Gosu\",\"GOTRAN\",\"GPSS\",\"GraphTalk\",\"GRASS\",\"Groovy\",\"Hack (programming language)\",\"HAL/S\",\"Hamilton C shell\",\"Harbour\",\"Hartmann pipelines\",\"Haskell\",\"Haxe\",\"High Level Assembly\",\"HLSL\",\"Hop\",\"Hope\",\"Hugo\",\"Hume\",\"HyperTalk\",\"IBM Basic assembly language\",\"IBM HAScript\",\"IBM Informix-4GL\",\"IBM RPG\",\"ICI\",\"Icon\",\"Id\",\"IDL\",\"Idris\",\"IMP\",\"Inform\",\"Io\",\"Ioke\",\"IPL\",\"IPTSCRAE\",\"ISLISP\",\"ISPF\",\"ISWIM\",\"J\",\"J#\",\"J++\",\"JADE\",\"Jako\",\"JAL\",\"Janus\",\"JASS\",\"Java\",\"JavaScript\",\"JCL\",\"JEAN\",\"Join Java\",\"JOSS\",\"Joule\",\"JOVIAL\",\"Joy\",\"JScript\",\"JScript .NET\",\"JavaFX Script\",\"Julia\",\"Jython\",\"K\",\"Kaleidoscope\",\"Karel\",\"Karel++\",\"KEE\",\"Kixtart\",\"KIF\",\"Kojo\",\"Kotlin\",\"KRC\",\"KRL\",\"KUKA\",\"KRYPTON\",\"ksh\",\"L\",\"L# .NET\",\"LabVIEW\",\"Ladder\",\"Lagoona\",\"LANSA\",\"Lasso\",\"LaTeX\",\"Lava\",\"LC-3\",\"Leda\",\"Legoscript\",\"LIL\",\"LilyPond\",\"Limbo\",\"Limnor\",\"LINC\",\"Lingo\",\"Linoleum\",\"LIS\",\"LISA\",\"Lisaac\",\"Lisp\",\"Lite-C\",\"Lithe\",\"Little b\",\"Logo\",\"Logtalk\",\"LPC\",\"LSE\",\"LSL\",\"LiveCode\",\"LiveScript\",\"Lua\",\"Lucid\",\"Lustre\",\"LYaPAS\",\"Lynx\",\"M2001\",\"M4\",\"Machine code\",\"MAD\",\"MAD/I\",\"Magik\",\"Magma\",\"make\",\"Maple\",\"MAPPER\",\"MARK-IV\",\"Mary\",\"MASM Microsoft Assembly x86\",\"Mathematica\",\"MATLAB\",\"Maxima\",\"Macsyma\",\"Max\",\"MaxScript\",\"Maya (MEL)\",\"MDL\",\"Mercury\",\"Mesa\",\"Metacard\",\"Metafont\",\"MetaL\",\"Microcode\",\"MicroScript\",\"MIIS\",\"MillScript\",\"MIMIC\",\"Mirah\",\"Miranda\",\"MIVA Script\",\"ML\",\"Moby\",\"Model 204\",\"Modelica\",\"Modula\",\"Modula-2\",\"Modula-3\",\"Mohol\",\"MOO\",\"Mortran\",\"Mouse\",\"MPD\",\"CIL\",\"MSL\",\"MUMPS\",\"NASM\",\"NATURAL\",\"Napier88\",\"Neko\",\"Nemerle\",\"nesC\",\"NESL\",\"Net.Data\",\"NetLogo\",\"NetRexx\",\"NewLISP\",\"NEWP\",\"Newspeak\",\"NewtonScript\",\"NGL\",\"Nial\",\"Nice\",\"Nickle\",\"Nim\",\"NPL\",\"Not eXactly C\",\"Not Quite C\",\"NSIS\",\"Nu\",\"NWScript\",\"NXT-G\",\"o:XML\",\"Oak\",\"Oberon\",\"Obix\",\"OBJ2\",\"Object Lisp\",\"ObjectLOGO\",\"Object REXX\",\"Object Pascal\",\"Objective-C\",\"Objective-J\",\"Obliq\",\"Obol\",\"OCaml\",\"occam\",\"occam-π\",\"Octave\",\"OmniMark\",\"Onyx\",\"Opa\",\"Opal\",\"OpenCL\",\"OpenEdge ABL\",\"OPL\",\"OPS5\",\"OptimJ\",\"Orc\",\"ORCA/Modula-2\",\"Oriel\",\"Orwell\",\"Oxygene\",\"Oz\",\"P#\",\"ParaSail (programming language)\",\"PARI/GP\",\"Pascal\",\"Pawn\",\"PCASTL\",\"PCF\",\"PEARL\",\"PeopleCode\",\"Perl\",\"PDL\",\"PHP\",\"Phrogram\",\"Pico\",\"Picolisp\",\"Pict\",\"Pike\",\"PIKT\",\"PILOT\",\"Pipelines\",\"Pizza\",\"PL-11\",\"PL/0\",\"PL/B\",\"PL/C\",\"PL/I\",\"PL/M\",\"PL/P\",\"PL/SQL\",\"PL360\",\"PLANC\",\"Plankalkül\",\"Planner\",\"PLEX\",\"PLEXIL\",\"Plus\",\"POP-11\",\"PostScript\",\"PortablE\",\"Powerhouse\",\"PowerBuilder\",\"PowerShell\",\"PPL\",\"Processing\",\"Processing.js\",\"Prograph\",\"PROIV\",\"Prolog\",\"PROMAL\",\"Promela\",\"PROSE modeling language\",\"PROTEL\",\"ProvideX\",\"Pro*C\",\"Pure\",\"Python\",\"Q (equational programming language)\",\"Q (programming language from Kx Systems)\",\"Qalb\",\"QtScript\",\"QuakeC\",\"QPL\",\"R\",\"R++\",\"Racket\",\"RAPID\",\"Rapira\",\"Ratfiv\",\"Ratfor\",\"rc\",\"REBOL\",\"Red\",\"Redcode\",\"REFAL\",\"Reia\",\"Revolution\",\"rex\",\"REXX\",\"Rlab\",\"RobotC\",\"ROOP\",\"RPG\",\"RPL\",\"RSL\",\"RTL/2\",\"Ruby\",\"RuneScript\",\"Rust\",\"S\",\"S2\",\"S3\",\"S-Lang\",\"S-PLUS\",\"SA-C\",\"SabreTalk\",\"SAIL\",\"SALSA\",\"SAM76\",\"SAS\",\"SASL\",\"Sather\",\"Sawzall\",\"SBL\",\"Scala\",\"Scheme\",\"Scilab\",\"Scratch\",\"Script.NET\",\"Sed\",\"Seed7\",\"Self\",\"SenseTalk\",\"SequenceL\",\"SETL\",\"Shift Script\",\"SIMPOL\",\"SIGNAL\",\"SiMPLE\",\"SIMSCRIPT\",\"Simula\",\"Simulink\",\"SISAL\",\"SLIP\",\"SMALL\",\"Smalltalk\",\"Small Basic\",\"SML\",\"Snap!\",\"SNOBOL\",\"SPITBOL\",\"Snowball\",\"SOL\",\"Span\",\"SPARK\",\"Speedcode\",\"SPIN\",\"SP/k\",\"SPS\",\"Squeak\",\"Squirrel\",\"SR\",\"S/SL\",\"Stackless Python\",\"Starlogo\",\"Strand\",\"Stata\",\"Stateflow\",\"Subtext\",\"SuperCollider\",\"SuperTalk\",\"Swift (Apple programming language)\",\"Swift (parallel scripting language)\",\"SYMPL\",\"SyncCharts\",\"SystemVerilog\",\"T\",\"TACL\",\"TACPOL\",\"TADS\",\"TAL\",\"Tcl\",\"Tea\",\"TECO\",\"TELCOMP\",\"TeX\",\"TEX\",\"TIE\",\"Timber\",\"TMG\",\"Tom\",\"TOM\",\"Topspeed\",\"TPU\",\"Trac\",\"TTM\",\"T-SQL\",\"TTCN\",\"Turing\",\"TUTOR\",\"TXL\",\"TypeScript\",\"Turbo C++\",\"Ubercode\",\"UCSD Pascal\",\"Umple\",\"Unicon\",\"Uniface\",\"UNITY\",\"Unix shell\",\"UnrealScript\",\"Vala\",\"VBA\",\"VBScript\",\"Verilog\",\"VHDL\",\"Visual Basic\",\"Visual Basic .NET\",\"Visual DataFlex\",\"Visual DialogScript\",\"Visual Fortran\",\"Visual FoxPro\",\"Visual J++\",\"Visual J#\",\"Visual Objects\",\"Visual Prolog\",\"VSXu\",\"Vvvv\",\"WATFIV, WATFOR\",\"WebDNA\",\"WebQL\",\"Windows PowerShell\",\"Winbatch\",\"Wolfram\",\"Wyvern\",\"X++\",\"X#\",\"X10\",\"XBL\",\"XC\",\"XMOS architecture\",\"xHarbour\",\"XL\",\"Xojo\",\"XOTcl\",\"XPL\",\"XPL0\",\"XQuery\",\"XSB\",\"XSLT\",\"XPath\",\"Xtend\",\"Yorick\",\"YQL\",\"Z notation\",\"Zeno\",\"ZOPL\",\"ZPL\"]\n",
        "ALL_LANGS = list(map(lambda x: x.lower(), ALL_LANGS))\n",
        "ALL_LANGS_SET = set(ALL_LANGS)\n",
        "print(ALL_LANGS_SET)\n",
        "\n",
        "tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "{'k', 'gamemonkey script', 'prose modeling language', 'lua', 'x++', 'txl', 'joule', 'xotcl', 'opal', 'lite-c', 'algol 58', 'uniface', 'spin', 'icon', 'promela', 'g', 'chapel', 'nickle', 'nesc', 'boomerang', 'xl', 'l', 'averest', 'b', 'dataflex', 'datatrieve', 'smalltalk', 'mary', 'foxpro', 'prograph', 'moby', 'stackless python', 'assembly language', 'c/al', 'amiga e', 'caml', 'beanshell', 'rpg', 'ada', 'postscript', 'f#', 'snap!', 'iptscrae', 'dbase', 'ferite', 'ceylon', 'visual foxpro', 'redcode', 'quakec', 'mortran', 'kif', 'xpath', 'vbscript', 'goo', 'ace dasl', 'cg', 'lynx', 'sbl', 'yql', 'a+', 'csound', 'dibol', 'abc algol', 'elan', 'q (equational programming language)', 'spitbol', 'lansa', 'rapid', 'csp', 'octave', 'george', 'cil', 'bon', 'cfml', 'pipelines', 'karel++', 'charity', 'argus', 'arc', 'alice', 'a++', 'clu', 'elixir', 'simscript', 'lilypond', 'starlogo', 'pizza', 'pl/0', 'self', 'clojure', 'reia', 'masm microsoft assembly x86', 'maxscript', 'latex', 'ppl', 'tmg', 'fp', 'formac', 'squirrel', 'mirah', 'typescript', 'miis', 'able', 'turing', 'synccharts', 'salsa', 'cyclone', 'squeak', 'occam-π', 'turbo c++', 'teco', 't-sql', 'ratfiv', 'not exactly c', 'graphtalk', 'xquery', 'planc', 'powerhouse', 'ibm hascript', 'logo', 'e#', 'bliss', 'ttm', 'bash', 'krl', 'tads', 'c#', 'java', 'ubercode', 'simula', 'simulink', 'ops5', 'grass', 'sml', 'adenine', 'code', 'mapper', 'sp/k', 'alf', 'kaleidoscope', 'robotc', 'macsyma', 'bc', 'joss', 'portable', 'inform', 'lingo', 'pcastl', 'cool', 'drakon', 'high level assembly', 'vsxu', 'rebol', 'objective-j', 'zeno', 'algol 60', 'sed', 'ipl', 'o:xml', 'nxt-g', 'cms-2', 'phrogram', 'jade', 'proiv', 'ici', 'julia', 'leda', 'processing', 'autolisp / visual lisp', 'systemverilog', 'pure', 'swift (apple programming language)', 'fancy', 'strand', 'j#', 'orwell', 'pl/m', 'lithe', 'chill', 'tacpol', 'coldfusion', 'logtalk', 'modelica', 'prolog', 'scratch', 'algol 68', 'jako', 'not quite c', 'mouse', 'watfiv, watfor', 'mdl', 'shift script', 'j', 'jscript .net', 'vvvv', 'mohol', 'clipper', 'little b', 'modula', 'mimic', 'f', 'gödel', 'cobol', 'unity', 'nwscript', 'ratfor', 'supercollider', 'red', 'bitc', 'clarion', 'kotlin', 'arexx', 'pari/gp', 'obol', 'a-0 system', 'millscript', 'abc', 'newspeak', 'dcl', 'oriel', 'act-iii', 'godiva', 'limbo', 'joy', 'mathematica', 'delphi', 'idl', 'autohotkey', 'parasail (programming language)', 'magik', 'nim', 'ocaml', 'easy programming language', 'janus', 'goal', 'modula-3', 'coldc', 'gnu e', 'lse', 'oberon', 'genie', 'esterel', 'gotran', 'qpl', 'ateji px', 'hlsl', 'small basic', 'r', 'focal', 'cola', 'euphoria', 'acl2', 'brew', 'linc', 'orc', 'bourne shell', 'winbatch', 'coffeescript', 'stata', 'tal', 'mad/i', 'snowball', 'object rexx', 'visual prolog', 'pl/i', 'cics', 'cobra', 'haskell', 'visual j++', 'comit', 'cuda', 'snobol', 'lyapas', 'planner', 'curry', 'bigwig', 'javafx script', 'pop-11', 'visual dataflex', 'agora', 'egl', 'falcon', 'go', 'topspeed', 'hope', 'visual basic .net', 'hypertalk', 'accent', 'dynamo', 'ffp', 'natural', 'factor', 'ml', 'object lisp', 'tacl', 'caché objectscript', 'p#', 'sasl', 'tcl', 'mercury', 'fl', 'dog', 'simpol', 'amos', 'jass', 'epl', 'fantom', 'seed7', 'gpss', 'unicon', 'apl', 'pearl', 'abset', 'scala', 'neko', 'ladder', 'emacs lisp', 'stateflow', 'compass', 's3', 'aimms', 'lava', 'ucsd pascal', 'axum', 'visual objects', 'autoit', 'chain', 'a# .net', 'cel', 'setl', 'pl/p', 'candle', 'hume', 'awk', 'x10', 'nsis', 'combined programming language', 'max', 'mark-iv', 'gosu', 'jscript', 'sabretalk', 'io', 'sawzall', 'z notation', 'absys', 'easytrieve plus', 'visual basic', 'iswim', 'rc', 'scheme', 'orca/modula-2', 'timber', 'chip-8', 'python', 'clist', 'trac', 'sensetalk', 'revolution', 'sas', 'ngl', 'hartmann pipelines', 'simple', 'oxygene', 'obliq', 'euslisp robot programming language', 'kee', 'sail', 'coq', 'nial', 'sps', 'agilent vee', 'lc-3', 'pcf', 'bistro', 'ease', 'roop', 'lis', 'erlang', 'optimj', 'pawn', 's', 'join java', 's2', 'c', 'haxe', 'fortran', 'cfengine', 'boo', 'escher', 'executable uml', 'deesel', 'metafont', 'xsb', 'powershell', 'swift (parallel scripting language)', 'tutor', 'cecil', 'zpl', 's-plus', 'onyx', 'm2001', 'miranda', 'fjölnir', 'matlab', 'fsprog', 'objectlogo', 'e', 'pascal', 'pl/sql', 'plexil', 'moo', 'maple', 'yorick', 'abap', 'miva script', 'autocoder', 'hal/s', 'j++', 'imp', 'c++', 'ibm rpg', 'speedcode', 'tpu', 'visual j#', 'modula-2', 'xbl', 'pdl', 'franz lisp', 'gj', 'groovy', 'napier88', 'tex', 'flow-matic', 'xmos architecture', 'claire', 'php', 'lpc', 'lucid', 'gams', 'gom (good old mad)', 'g-code', 'msl', 'cowsel', 'ats', 'xharbour', 'foxbase', 'forth', 'oak', 'ruby', 'plankalkül', 'ampl', 'cms exec', 'labview', 'model 204', 'a# (axiom)', 'hamilton c shell', 'sequencel', 'peoplecode', 'ibm basic assembly language', 'c shell', 'sam76', 'unix shell', 'webdna', 'algol w', 'visual dialogscript', 'mpd', 'beta', 'nice', 'signal', 'dasl', 'tie', 'legoscript', 'limnor', 'ttcn', 's-lang', 'nesl', 'bpel', 'common intermediate language', 'flavors', 'edinburgh imp', 'csh', 'euler', 'm4', 'pico', 'vhdl', 'protel', 'sa-c', 'lustre', 'mad', 'obj2', 'common lisp', 'newtonscript', 'krc', 'etoys', 'opl', 'powerbuilder', 'racket', 'windows powershell', 'converge', 'nasm', 'xc', 'f-script', 'livecode', 'espol', 'sol', 'script.net', 'sisal', 'batch (windows/dos)', 'microcode', 'rex', 'refal', 'rlab', 'lagoona', 'lisaac', 'picolisp', 'providex', 'r++', 'machine code', 'cesil', 'vala', 'es', 'obix', 'xpl0', 'alef', 'cduce', 'newp', 'lisp', 'cayenne', 'constraint handling rules', 'sr', 'subtext', 'cilk', 'dc', 'epigram', 'jal', 'kixtart', 'cpl', 'opa', 'felix', '@formula', 'hugo', 'plex', 'gibiane', 'maya (mel)', 'zopl', 'jython', 's/sl', 'jean', 'qtscript', 'rapira', 'supertalk', 'nu', 'ioke', 'gm', 'clean', 'microscript', 'promal', 'harbour', 'webql', 'scilab', 'rsl', 'xojo', 'applescript', 'foil', 'jovial', 'corvision', 'oz', 'coral 66', 'eiffel', 'maxima', 'dylan', 'pl/b', 'component pascal', 'npl', 'draco', 'opencl', 'perl', 'krypton', 'pl-11', 'xpl', 'netrexx', 'game maker language', 'umple', 'kojo', 'lsl', 'faust', 'gdl', 'object pascal', 'idris', 'unrealscript', 'action!', \"app inventor for android's visual block language\", 'focus', 'rexx', 'glsl', 'metal', 'sather', 'newlisp', 'islisp', 'flex', 'ambienttalk', 'pro*c', 'kuka', 'charm', 'openedge abl', 'xslt', 'occam', 'telcomp', 'dinkc', 'dart', 'bertrand', 'ksh', 'c--', 'lasso', 'slip', 'ibm informix-4gl', 'chuck', 'd', 'go!', 'fortress', 'l# .net', 'comal', 'q (programming language from kx systems)', 'xtend', 't', 'livescript', 'pict', 'ispf', 'vba', 'alma-0', 'pike', 'curl', 'small', 'rust', 'mesa', 'rtl/2', 'hop', 'javascript', 'lil', 'exec 2', 'pikt', 'frege', 'qalb', 'bcpl', 'chomski', 'aspectj', 'rpl', 'fpr', 'plus', 'gap', 'tea', 'wyvern', 'cl', 'babbage', 'x#', 'ch', 'agda', 'cython', 'pl/c', 'elm', 'basic', 'nemerle', 'lisa', 'netlogo', 'euclid', 'pilot', 'tom', 'actionscript', 'processing.js', 'corn', 'metacard', 'sympl', 'magma', 'visual fortran', 'make', 'span', 'ecmascript', 'jcl', 'emerald', 'google apps script', 'spark', 'net.data', 'objective-c', 'blue', 'omnimark', 'mumps', 'runescript', 'verilog', 'karel', 'id', 'escapade', 'linoleum', 'easy pl/i', 'wolfram', 'datalog', 'chef', 'acc', 'pl360', 'hack (programming language)'}\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "b9f3692aa5394af38fbf8701f9d1a2f5",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=231508.0, style=ProgressStyle(descripti…"
            ]
          },
          "metadata": {
            "tags": []
          }
        },
        {
          "output_type": "stream",
          "text": [
            "\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "89764229f6bb4419927cd6499b252cc1",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=466062.0, style=ProgressStyle(descripti…"
            ]
          },
          "metadata": {
            "tags": []
          }
        },
        {
          "output_type": "stream",
          "text": [
            "\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "723f45572f9845f7a5a43d028acabd37",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=28.0, style=ProgressStyle(description_w…"
            ]
          },
          "metadata": {
            "tags": []
          }
        },
        {
          "output_type": "stream",
          "text": [
            "\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "gl4s2lbNNo_f"
      },
      "source": [
        "!cp ./drive/MyDrive/howdoi_train.csv ./\n",
        "!cp ./drive/MyDrive/howdoi_test.csv ./"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "bPeuuANP_Kj_",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "93c5bd8c-45ed-47ad-fea6-15e6637dbb2b"
      },
      "source": [
        "# do lazy loading with h5py to save memory\n",
        "'''\n",
        "import h5py\n",
        "import numpy as np\n",
        "\n",
        "import subprocess\n",
        "train_path, test_path = \"./howdoi_train.csv\",  \"./howdoi_test.csv\"\n",
        "h5_train_path, h5_test_path = \"./data_tr.h5\", \"./data_ts.h5\"\n",
        "\n",
        "# this is just a random large number, this size of data (short strings)\n",
        "#   doesn't take much RAM, not even sure we have to read it in chunks at all\n",
        "chunksize = 1000 * 10000\n",
        "\n",
        "# hacky way of reading the length of the file without opening it\n",
        "lines_train = subprocess.check_output(['wc', '-l', train_path])\n",
        "lines_train = int(lines_train.split()[0])\n",
        "\n",
        "# h5 is a format you can read from without loading up the data in memory\n",
        "#   so it's perfect for huge datasets\n",
        "\n",
        "# NOTE: this will take a minute or so\n",
        "with h5py.File(h5_train_path, 'w') as h5f:\n",
        "    # use num_features if the csv file has no column header\n",
        "    texts = h5f.create_dataset(\"text-train\",\n",
        "                               shape=(lines_train,),\n",
        "                               compression=None,\n",
        "                               dtype=h5py.string_dtype('utf-8'))\n",
        "    labels = h5f.create_dataset(\"label-train\",\n",
        "                               shape=(lines_train,),\n",
        "                               compression=None,\n",
        "                               dtype=\"bool\")\n",
        "\n",
        "    # read num_lines in chunks of size chunksize\n",
        "    for i in range(1, lines_train, chunksize):  \n",
        "\n",
        "        df = pd.read_csv(\n",
        "          train_path,  \n",
        "          header=None, # we ignore the header by starting the loop from row 1\n",
        "          nrows=chunksize,\n",
        "          skiprows=i\n",
        "        )\n",
        "        \n",
        "        titles = df.values[:, -2]\n",
        "\n",
        "        # you don't have to do this at this step, you could also just store\n",
        "        #   this as a string, like in the original csv\n",
        "        has_tags = [\n",
        "          len(set(str(t).lower().split('|')).intersection(ALL_LANGS_SET)) > 0\n",
        "          for t in df.values[:, -1]\n",
        "        ]\n",
        "        print(has_tags)\n",
        "\n",
        "        items_num = len(titles)\n",
        "\n",
        "        # this fills in the current chunk of the h5 file\n",
        "        texts[i-1:i-1+items_num] = titles\n",
        "        labels[i-1:i-1+items_num] = has_tags\n",
        "\n",
        "# Create test set\n",
        "\n",
        "'''"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "IOPub data rate exceeded.\n",
            "The notebook server will temporarily stop sending output\n",
            "to the client in order to avoid crashing it.\n",
            "To change this limit, set the config variable\n",
            "`--NotebookApp.iopub_data_rate_limit`.\n",
            "\n",
            "Current values:\n",
            "NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)\n",
            "NotebookApp.rate_limit_window=3.0 (secs)\n",
            "\n",
            "IOPub data rate exceeded.\n",
            "The notebook server will temporarily stop sending output\n",
            "to the client in order to avoid crashing it.\n",
            "To change this limit, set the config variable\n",
            "`--NotebookApp.iopub_data_rate_limit`.\n",
            "\n",
            "Current values:\n",
            "NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)\n",
            "NotebookApp.rate_limit_window=3.0 (secs)\n",
            "\n"
          ],
          "name": "stderr"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "IslPgrNQig6P"
      },
      "source": [
        "import h5py\n",
        "import numpy as np\n",
        "\n",
        "import subprocess\n",
        "train_path, test_path = \"./howdoi_train.csv\",  \"./howdoi_test.csv\"\n",
        "h5_train_path, h5_test_path = \"./data_tr.h5\", \"./data_ts.h5\"\n",
        "\n",
        "# this is just a random large number, this size of data (short strings)\n",
        "#   doesn't take much RAM, not even sure we have to read it in chunks at all\n",
        "chunksize = 1000 * 10000\n",
        "\n",
        "# hacky way of reading the length of the file without opening it\n",
        "lines_train = subprocess.check_output(['wc', '-l', train_path])\n",
        "lines_train = int(lines_train.split()[0])\n",
        "\n",
        "df_train = pd.read_csv(train_path)\n",
        "df_test = pd.read_csv(test_path)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "iybXPZ2WoFJP"
      },
      "source": [
        "df_train['tags'] = df_train['tags'].map(lambda x:\n",
        "          len(set(str(x).lower().split('|')).intersection(ALL_LANGS_SET)) > 0\n",
        "        )\n",
        "df_test['tags'] = df_test['tags'].map(lambda x:\n",
        "          len(set(str(x).lower().split('|')).intersection(ALL_LANGS_SET)) > 0\n",
        "        )"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "kA2tOyy2pyHa",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 206
        },
        "outputId": "6c4a5775-a332-4674-c08e-eddb0b4ee430"
      },
      "source": [
        "df_test.head()"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Unnamed: 0</th>\n",
              "      <th>Unnamed: 0.1</th>\n",
              "      <th>title</th>\n",
              "      <th>tags</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>11971400</td>\n",
              "      <td>11971400</td>\n",
              "      <td>Changing colors of shapes in HTML5 canvas</td>\n",
              "      <td>javascript|html|canvas|polygon</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>5433772</td>\n",
              "      <td>5433772</td>\n",
              "      <td>Where to look for DB file after update-database?</td>\n",
              "      <td>c#|.net|entity</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>8996304</td>\n",
              "      <td>8996304</td>\n",
              "      <td>Graddle missing transitive dependency</td>\n",
              "      <td>maven|gradle|transitive-dependency</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>7648213</td>\n",
              "      <td>7648213</td>\n",
              "      <td>laravel link does work but button does not</td>\n",
              "      <td>twitter-bootstrap|laravel</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>14123938</td>\n",
              "      <td>14123938</td>\n",
              "      <td>Elegant haskell case/error handling in sequent...</td>\n",
              "      <td>haskell</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "   Unnamed: 0  ...                                tags\n",
              "0    11971400  ...      javascript|html|canvas|polygon\n",
              "1     5433772  ...                      c#|.net|entity\n",
              "2     8996304  ...  maven|gradle|transitive-dependency\n",
              "3     7648213  ...           twitter-bootstrap|laravel\n",
              "4    14123938  ...                             haskell\n",
              "\n",
              "[5 rows x 4 columns]"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 9
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "FRqvd1muijVE"
      },
      "source": [
        "import torch"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "qjtjK2tH6hdn"
      },
      "source": [
        "class QueryDataset(Dataset):\n",
        "  def __init__(self, filename, kind):\n",
        "    self.titles = df_train['title']\n",
        "    self.labels = df_train['tags']\n",
        "\n",
        "  def __len__(self):\n",
        "    return self.titles.shape[0]\n",
        "\n",
        "  def __getitem__(self, i):\n",
        "    # now the cool bit - read without loading the whole thing in memory!\n",
        "    title = self.titles[i]\n",
        "    label = self.labels[i].astype('bool')\n",
        "    label = 1 if label else 0\n",
        "    # encoded = tokenizer(title, truncation=True, padding=True)\n",
        "    out = {'title': title, 'label': label}\n",
        "    return out"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "c9OPoW4uivuu"
      },
      "source": [
        "def collate_fn(data):\n",
        "  titles, labels = [v['title'] for v in data], [v['label'] for v in data]\n",
        "  encoded = tokenizer(titles, truncation=True, padding=True)\n",
        "  # for k,v in encoded.items():\n",
        "  #   print(len(v[0]))\n",
        "  out = {k: torch.tensor(v) for k,v in encoded.items()}\n",
        "  out['labels'] = torch.tensor(labels)\n",
        "  return out"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "DxL9pAGSXyJZ",
        "outputId": "4e8fa188-eed7-454f-8e20-914152bfbb27"
      },
      "source": [
        "trainset = QueryDataset(h5_train_path, 'train')\n",
        "trainloader = DataLoader(trainset, batch_size=256, num_workers=2, shuffle=True,\n",
        "                        collate_fn=collate_fn) # This seemingly redundant collate_fn param actually helps avoid a RuntimeError - https://github.com/pytorch/pytorch/issues/42654#issuecomment-706926806\n",
        "for i, y in enumerate(trainloader):\n",
        "  print(y)\n",
        "  break"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "{'input_ids': tensor([[  101, 24357,  3746,  ...,     0,     0,     0],\n",
            "        [  101, 10463,  5164,  ...,     0,     0,     0],\n",
            "        [  101,  2129,  2000,  ...,     0,     0,     0],\n",
            "        ...,\n",
            "        [  101,  9585,  8011,  ...,     0,     0,     0],\n",
            "        [  101,  2054,  2024,  ...,     0,     0,     0],\n",
            "        [  101,  2129,  2064,  ...,     0,     0,     0]]), 'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],\n",
            "        [1, 1, 1,  ..., 0, 0, 0],\n",
            "        [1, 1, 1,  ..., 0, 0, 0],\n",
            "        ...,\n",
            "        [1, 1, 1,  ..., 0, 0, 0],\n",
            "        [1, 1, 1,  ..., 0, 0, 0],\n",
            "        [1, 1, 1,  ..., 0, 0, 0]]), 'labels': tensor([0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1,\n",
            "        0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0,\n",
            "        1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0,\n",
            "        1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0,\n",
            "        1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1,\n",
            "        0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1,\n",
            "        1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1,\n",
            "        1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1,\n",
            "        1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0,\n",
            "        1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1,\n",
            "        1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1])}\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "2nhCSTDlyC5m"
      },
      "source": [
        "## Model training code"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "2z7cHC1xyAZr"
      },
      "source": [
        "from transformers import DistilBertForSequenceClassification, AdamW"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "HmD6NW1FyZjM",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 237,
          "referenced_widgets": [
            "7a54279586bb4c87a52746f605f7b3e2",
            "6a7acabfc3f24d3395c0cc7cde68a43a",
            "b9c6899ed3dd489d85b201b51a6f35c3",
            "b413ba22912c4149aeea6824ccbc90c1",
            "6151969c55174665964db6f97bd18884",
            "f97dbff45ea347f380b3d3e2bf9b60a5",
            "6bc47beb071c4884a4ddb8a7dd669222",
            "6acde23812d44176829ce106ea626f09",
            "6e48979f9d49495da15e278aafd97a8b",
            "73a609c59da744088c8def134a0acebc",
            "3e978968f8814d4e9cb0cce52fe25f53",
            "5b91aa38af194763b33168e24eb80b91",
            "27cc2164a06a40a0b6e093bd1f642fce",
            "1b013fd1c8ce45c992ed4dba0bcf2393",
            "daedb950ac444ef3aa764b29f3e84052",
            "579a1b0eff7b425a9f22a18506fb7033"
          ]
        },
        "outputId": "7e390c52-f065-460c-dd08-8af04ee0af4e"
      },
      "source": [
        "device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')\n",
        "\n",
        "model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased')\n",
        "model.to(device)\n",
        "model.train()\n",
        "\n",
        "optim = AdamW(model.parameters(), lr=5e-5)\n",
        "\n",
        "for epoch in range(3):\n",
        "  for batch in trainloader:\n",
        "    optim.zero_grad()\n",
        "    input_ids = batch['input_ids'].to(device)\n",
        "    attention_mask = batch['attention_mask'].to(device)\n",
        "    labels = batch['labels'].to(device)\n",
        "    outputs = model(input_ids, attention_mask=attention_mask, labels=labels)\n",
        "    loss = outputs[0]\n",
        "    loss.backward()\n",
        "    optim.step()\n",
        "\n",
        "model.eval()"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "7a54279586bb4c87a52746f605f7b3e2",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=442.0, style=ProgressStyle(description_…"
            ]
          },
          "metadata": {
            "tags": []
          }
        },
        {
          "output_type": "stream",
          "text": [
            "\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "6e48979f9d49495da15e278aafd97a8b",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=267967963.0, style=ProgressStyle(descri…"
            ]
          },
          "metadata": {
            "tags": []
          }
        },
        {
          "output_type": "stream",
          "text": [
            "\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']\n",
            "- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
            "- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
            "Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classifier.weight', 'classifier.bias']\n",
            "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
          ],
          "name": "stderr"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "mr_F0fiyzuy8"
      },
      "source": [
        ""
      ],
      "execution_count": null,
      "outputs": []
    }
  ]
}

================================================
FILE: requirements/common.txt
================================================
# Contains common requirements
Pygments>=2.3.1
argparse==1.4.0
cssselect==1.1.0
lxml>=4.2.5
pyquery==1.4.1
requests==2.31.0
cachelib==0.1.1
appdirs==1.4.4
keep==2.9
rich==12.6.0
colorama==0.4.4


================================================
FILE: requirements/dev.txt
================================================
# Contains development specific requirements and imports common requirements
flake8==5.0.4
nose2==0.12.0
pylint==2.15.10
pre-commit==2.17.0
twine==3.8.0
-r common.txt


================================================
FILE: requirements/prod.txt
================================================
# Contains production specific requirements and imports common requirements
-r common.txt

================================================
FILE: requirements.txt
================================================
# when adding a new dependency, also add to setup.py's `install_requires`
-r requirements/prod.txt

================================================
FILE: setup.py
================================================
#!/usr/bin/env python

import glob
import subprocess
from pathlib import Path
from distutils.cmd import Command  # pylint: disable=deprecated-module
from setuptools import setup, find_packages
# pylint: disable=unused-import
import fastentrypoints  # noqa: F401
# pylint: enable=unused-import
import howdoi


class Lint(Command):
    """A custom command to run Flake8 on all Python source files.
    """
    description = 'run Flake8 on Python source files'
    user_options = []

    def initialize_options(self):
        pass

    def finalize_options(self):
        pass

    def run(self):
        local_python_files_str = ' '.join(glob.glob('*.py'))
        commands = {'Flake8': 'flake8 --config=.flake8rc .'.split(),
                    'Pylint': f'pylint howdoi {local_python_files_str} --rcfile=.pylintrc'.split()}

        for linter, command in commands.items():
            try:
                print(f'\nRunning {linter}...')
                subprocess.check_call(command)
                print(f'No lint errors found by {linter}')
            except FileNotFoundError:
                print(f'{linter} not installed')
            except subprocess.CalledProcessError:
                pass


def read(*names):
    values = {}
    for name in names:
        value = ''
        for extension in ('.txt', '.md'):
            filename = name + extension
            if Path(filename).is_file():
                with open(filename, encoding='utf-8') as in_file:
                    value = in_file.read()
                break
        values[name] = value
    return values


# pylint: disable=consider-using-f-string
long_description = """
%(README)s

# News

%(CHANGES)s

""" % read('README', 'CHANGES')
# pylint: enable=consider-using-f-string


setup(
    name='howdoi',
    version=howdoi.__version__,
    description='Instant coding answers via the command line',
    long_description=long_description,
    long_description_content_type='text/markdown',
    classifiers=[
        "Development Status :: 5 - Production/Stable",
        "Environment :: Console",
        "Intended Audience :: Developers",
        "Programming Language :: Python :: 3",
        "Programming Language :: Python :: 3.7",
        "Programming Language :: Python :: 3.8",
        "Programming Language :: Python :: 3.9",
        "Programming Language :: Python :: 3.10",
        "Programming Language :: Python :: 3.11",
        "Topic :: Documentation",
    ],
    keywords='howdoi help console command line answer',
    author='Benjamin Gleitzman',
    author_email='gleitz@mit.edu',
    maintainer='Benjamin Gleitzman',
    maintainer_email='gleitz@mit.edu',
    url='https://github.com/gleitz/howdoi',
    license='MIT',
    packages=find_packages(),
    entry_points={
        'console_scripts': [
            'howdoi = howdoi.howdoi:command_line_runner',
        ]
    },
    install_requires=[
        'Pygments',
        'cssselect',
        'lxml',
        'pyquery',
        'requests',
        'cachelib',
        'appdirs',
        'keep',
        'rich',
        'colorama'
    ],
    cmdclass={
        'lint': Lint
    }
)


================================================
FILE: test_howdoi.py
================================================
#!/usr/bin/env python

"""Tests for Howdoi."""
import gzip
import json
import os
import re
import unittest

from pathlib import Path
from unittest.mock import patch
import requests

from cachelib import NullCache
from pyquery import PyQuery as pq

# pylint: disable=no-name-in-module
from howdoi import howdoi


# pylint: disable=protected-access
original_get_result = howdoi._get_result


def _format_url_to_filename(url, file_ext='html'):
    filename = ''.join(ch for ch in url if ch.isalnum())
    return filename + '.' + file_ext


def _get_result_mock(url):
    # pylint: disable=protected-access
    file_name = _format_url_to_filename(url, 'html.gz')
    # pylint: disable=no-member
    file_path = Path.joinpath(Path(howdoi.HTML_CACHE_PATH), Path(file_name)).resolve()
    try:
        with gzip.open(file_path, 'rb') as f:
            cached_page_content = str(f.read(), encoding='utf-8')
            return cached_page_content

    except FileNotFoundError:
        page_content = original_get_result(url)
        with gzip.open(file_path, 'wb') as f:
            f.write(bytes(page_content, encoding='utf-8'))
            return page_content


# pylint: disable=protected-access
class HowdoiTestCase(unittest.TestCase):  # pylint: disable=too-many-public-methods

    def setUp(self):
        self.patcher_get_result = patch.object(howdoi, '_get_result')
        self.mock_get_result = self.patcher_get_result.start()
        self.mock_get_result.side_effect = _get_result_mock
        # ensure no cache is used during testing.
        howdoi.cache = NullCache()

        self.queries = ['format date bash',
                        'print stack trace python',
                        'convert mp4 to animated gif',
                        'create tar archive',
                        'cat']
        self.help_queries = howdoi.SUPPORTED_HELP_QUERIES
        self.pt_queries = ['abrir arquivo em python',
                           'enviar email em django',
                           'hello world em c']
        self.bad_queries = ['moe',
                            'mel']
        self.query_without_code_or_pre_block = 'Difference between element node and Text Node'

    def tearDown(self):
        self.patcher_get_result.stop()
        keys_to_remove = ['HOWDOI_URL', 'HOWDOI_SEARCH_ENGINE']
        for key in keys_to_remove:
            if key in os.environ:
                del os.environ[key]
        howdoi.BLOCKED_ENGINES = []

    def _negative_number_query(self):
        query = self.queries[0]
        howdoi.howdoi(query + ' -n -1')

    def _high_positive_number_query(self):
        query = self.queries[0]
        howdoi.howdoi(query + ' -n 21')

    def _negative_position_query(self):
        query = self.queries[0]
        howdoi.howdoi(query + ' -p -2')

    def _high_positive_position_query(self):
        query = self.queries[0]
        howdoi.howdoi(query + ' -p 40')

    def assertValidResponse(self, res):  # pylint: disable=invalid-name
        self.assertTrue(len(res) > 0)

    def test_get_link_at_pos(self):
        self.assertEqual(howdoi.get_link_at_pos(['/questions/42/'], 1),
                         '/questions/42/')
        self.assertEqual(howdoi.get_link_at_pos(['/questions/42/'], 2),
                         '/questions/42/')
        self.assertEqual(howdoi.get_link_at_pos(['/howdoi', '/questions/42/'], 1),
                         '/howdoi')
        self.assertEqual(howdoi.get_link_at_pos(['/howdoi', '/questions/42/'], 2),
                         '/questions/42/')
        self.assertEqual(howdoi.get_link_at_pos(['/questions/42/', '/questions/142/'], 1),
                         '/questions/42/')

    @patch.object(howdoi, '_get_result')
    def test_blockerror(self, mock_get_links):
        mock_get_links.side_effect = requests.HTTPError
        query = self.queries[0]
        response = howdoi.howdoi(query)
        self.assertEqual(response, "ERROR: \x1b[91mUnable to get a response from any search engine\n\x1b[0m")

    def test_answers(self):
        for query in self.queries:
            self.assertValidResponse(howdoi.howdoi(query))
        for query in self.bad_queries:
            self.assertValidResponse(howdoi.howdoi(query))

        os.environ['HOWDOI_URL'] = 'pt.stackoverflow.com'
        for query in self.pt_queries:
            self.assertValidResponse(howdoi.howdoi(query))

    def test_answers_bing(self):
        os.environ['HOWDOI_SEARCH_ENGINE'] = 'bing'
        for query in self.queries:
            self.assertValidResponse(howdoi.howdoi(query))
        for query in self.bad_queries:
            self.assertValidResponse(howdoi.howdoi(query))

        os.environ['HOWDOI_URL'] = 'pt.stackoverflow.com'
        for query in self.pt_queries:
            self.assertValidResponse(howdoi.howdoi(query))

        os.environ['HOWDOI_SEARCH_ENGINE'] = ''

    # commenting out duckduckgo test, re-enable when issue #404 (duckduckgo blocking requests) is resolved

    # def test_answers_duckduckgo(self):
    #     os.environ['HOWDOI_SEARCH_ENGINE'] = 'duckduckgo'
    #     for query in self.queries:
    #         self.assertValidResponse(howdoi.howdoi(query))
    #     for query in self.bad_queries:
    #         self.assertValidResponse(howdoi.howdoi(query))

    #     os.environ['HOWDOI_URL'] = 'pt.stackoverflow.com'
    #     for query in self.pt_queries:
    #         self.assertValidResponse(howdoi.howdoi(query))

    #     os.environ['HOWDOI_SEARCH_ENGINE'] = ''

    def test_answer_links_using_l_option(self):
        for query in self.queries:
            response = howdoi.howdoi(query + ' -l')
            self.assertNotEqual(re.match(r'http.?://.*questions/\d.*', response, re.DOTALL), None)

    def test_answer_links_using_all_option(self):
        for query in self.queries:
            response = howdoi.howdoi(query + ' -a')
            self.assertNotEqual(re.match(r'.*http.?://.*questions/\d.*', response, re.DOTALL), None)

    def test_position(self):
        query = self.queries[0]
        first_answer = howdoi.howdoi(query)
        not_first_answer = howdoi.howdoi(query + ' -p5')
        self.assertNotEqual(first_answer, not_first_answer)

    def test_all_text(self):
        query = self.queries[0]
        first_answer = howdoi.howdoi(query)
        second_answer = howdoi.howdoi(query + ' -a')
        self.assertNotEqual(first_answer, second_answer)
        self.assertNotEqual(re.match('.*Answer from http.?://.*', second_answer, re.DOTALL), None)

    def test_json_output(self):
        query = self.queries[0]
        txt_answer = howdoi.howdoi(query)
        json_answer = howdoi.howdoi(query + ' -j')
        link_answer = howdoi.howdoi(query + ' -l')
        json_answer = json.loads(json_answer)[0]
        self.assertEqual(json_answer["answer"], txt_answer)
        self.assertEqual(json_answer["link"], link_answer)
        self.assertEqual(json_answer["position"], 1)

    def test_multiple_answers(self):
        query = self.queries[0]
        first_answer = howdoi.howdoi(query)
        second_answer = howdoi.howdoi(query + ' -n3')
        self.assertNotEqual(first_answer, second_answer)

    def test_unicode_answer(self):
        assert howdoi.howdoi('make a log scale d3')
        assert howdoi.howdoi('python unittest -n3')
        assert howdoi.howdoi('parse html regex -a')
        assert howdoi.howdoi('delete remote git branch -a')

    def test_colorize(self):
        query = self.queries[0]
        normal = howdoi.howdoi(query)
        colorized = howdoi.howdoi('-c ' + query)

        # There is currently an issue with Github actions and colorization
        # so do not run checks if we are running in Github
        if "GITHUB_ACTION" not in os.environ:
            self.assertTrue(normal.find('[38;') == -1)
            self.assertTrue(colorized.find('[38;') != -1)

    # pylint: disable=line-too-long
    def test_get_text_without_links(self):
        html = '''\n  <p>The halting problem is basically a\n  formal way of asking if you can tell\n  whether or not an arbitrary program\n  will eventually halt.</p>\n  \n  <p>In other words, can you write a\n  program called a halting oracle,\n  HaltingOracle(program, input), which\n  returns true if program(input) would\n  eventually halt, and which returns\n  false if it wouldn't?</p>\n  \n  <p>The answer is: no, you can't.</p>\n'''  # noqa: E501
        paragraph = pq(html)
        expected_output = '''The halting problem is basically a\n  formal way of asking if you can tell\n  whether or not an arbitrary program\n  will eventually halt.\n\n  \n  \nIn other words, can you write a\n  program called a halting oracle,\n  HaltingOracle(program, input), which\n  returns true if program(input) would\n  eventually halt, and which returns\n  false if it wouldn't?\n\n  \n  \nThe answer is: no, you can't.\n\n'''  # noqa: E501
        actual_output = howdoi.get_text(paragraph)
        self.assertEqual(actual_output, expected_output)

    def test_get_text_with_one_link(self):
        html = '<p>It\'s a <a href="http://paulirish.com/2010/the-protocol-relative-url/">protocol-relative URL</a> (typically HTTP or HTTPS). So if I\'m on <code>http://example.org</code> and I link (or include an image, script, etc.) to <code>//example.com/1.png</code>, it goes to <code>http://example.com/1.png</code>. If I\'m on <code>https://example.org</code>, it goes to <code>https://example.com/1.png</code>.</p>'  # noqa: E501
        paragraph = pq(html)
        expected_output = "It's a [protocol-relative URL](http://paulirish.com/2010/the-protocol-relative-url/) (typically HTTP or HTTPS). So if I'm on http://example.org and I link (or include an image, script, etc.) to //example.com/1.png, it goes to http://example.com/1.png. If I'm on https://example.org, it goes to https://example.com/1.png."  # noqa: E501
        actual_output = howdoi.get_text(paragraph)
        self.assertEqual(actual_output, expected_output)

    def test_get_text_with_multiple_links_test_one(self):
        html = 'Here\'s a quote from <a href="http://en.wikipedia.org/wiki/Wikipedia:Manual_of_Style#Links" rel="nofollow noreferrer">wikipedia\'s manual of style</a> section on links (but see also <a href="http://en.wikipedia.org/wiki/Wikipedia:External_links" rel="nofollow noreferrer">their comprehensive page on External Links</a>)'  # noqa: E501
        paragraph = pq(html)
        expected_output = "Here's a quote from [wikipedia's manual of style](http://en.wikipedia.org/wiki/Wikipedia:Manual_of_Style#Links) section on links (but see also [their comprehensive page on External Links](http://en.wikipedia.org/wiki/Wikipedia:External_links))"  # noqa: E501
        actual_output = howdoi.get_text(paragraph)
        self.assertEqual(actual_output, expected_output)

    def test_get_text_with_multiple_links_test_two(self):
        html = 'For example, if I were to reference <a href="http://www.apple.com/" rel="nofollow noreferrer">apple.com</a> as the subject of a sentence - or to talk about <a href="http://www.apple.com/" rel="nofollow noreferrer">Apple\'s website</a> as the topic of conversation. This being different to perhaps recommendations for reading <a href="https://ux.stackexchange.com/q/14872/6046">our article about Apple\'s website</a>.'  # noqa: E501
        paragraph = pq(html)
        expected_output = "For example, if I were to reference [apple.com](http://www.apple.com/) as the subject of a sentence - or to talk about [Apple's website](http://www.apple.com/) as the topic of conversation. This being different to perhaps recommendations for reading [our article about Apple's website](https://ux.stackexchange.com/q/14872/6046)."  # noqa: E501
        actual_output = howdoi.get_text(paragraph)
        self.assertEqual(actual_output, expected_output)

    def test_get_text_with_link_but_with_copy_duplicating_the_href(self):
        html = '<a href="https://github.com/jquery/jquery/blob/56136897f241db22560b58c3518578ca1453d5c7/src/manipulation.js#L451" rel="nofollow noreferrer">https://github.com/jquery/jquery/blob/56136897f241db22560b58c3518578ca1453d5c7/src/manipulation.js#L451</a>'  # noqa: E501
        paragraph = pq(html)
        expected_output = 'https://github.com/jquery/jquery/blob/56136897f241db22560b58c3518578ca1453d5c7/src/manipulation.js#L451'  # noqa: E501
        actual_output = howdoi.get_text(paragraph)
        self.assertEqual(actual_output, expected_output)

    def test_get_text_with_a_link_but_copy_is_within_nested_div(self):
        html = 'If the function is from a source file available on the filesystem, then <a href="https://docs.python.org/3/library/inspect.html#inspect.getsource" rel="noreferrer"><code>inspect.getsource(foo)</code></a> might be of help:'  # noqa: E501
        paragraph = pq(html)
        expected_output = 'If the function is from a source file available on the filesystem, then [inspect.getsource(foo)](https://docs.python.org/3/library/inspect.html#inspect.getsource) might be of help:'  # noqa: E501
        actual_output = howdoi.get_text(paragraph)
        self.assertEqual(actual_output, expected_output)
    # pylint: enable=line-too-long

    def test_get_questions(self):
        links = ['https://stackoverflow.com/questions/tagged/cat',
                 'http://rads.stackoverflow.com/amzn/click/B007KAZ166',
                 'https://stackoverflow.com/questions/40108569/how-to-get-the-last-line-of-a-file-using-cat-command']
        expected_output = [
            'https://stackoverflow.com/questions/40108569/how-to-get-the-last-line-of-a-file-using-cat-command']
        actual_output = howdoi._get_questions(links)
        self.assertSequenceEqual(actual_output, expected_output)

    def test_help_queries(self):
        help_queries = self.help_queries

        for query in help_queries:
            output = howdoi.howdoi(query)
            self.assertTrue(output)
            self.assertIn('few popular howdoi commands', output)
            self.assertIn('retrieve n number of answers', output)
            self.assertIn(
                'Specify the search engine you want to use e.g google,bing',
                output
            )

    def test_missing_pre_or_code_query(self):
        output = howdoi.howdoi(self.query_without_code_or_pre_block)
        self.assertTrue(output)

    def test_format_url_to_filename(self):
        url = 'https://stackoverflow.com/questions/tagged/cat'
        invalid_filename_characters = ['/', '\\', '%']
        filename = _format_url_to_filename(url, 'html')
        self.assertTrue(filename)
        self.assertTrue(filename.endswith('html'))
        for invalid_character in invalid_filename_characters:
            self.assertNotIn(invalid_character, filename)

    def test_help_queries_are_properly_validated(self):
        help_queries = self.help_queries
        for query in help_queries:
            is_valid_help_query = howdoi._is_help_query(query)
            self.assertTrue(is_valid_help_query)
        bad_help_queries = [self.queries[0],
                            self.bad_queries[0], 'use how do i']

        for query in bad_help_queries:
            self.assertFalse(howdoi._is_help_query(query))

    def test_negative_and_high_positive_int_values_rejected(self):
        with self.assertRaises(SystemExit):
            self._negative_number_query()
        with self.assertRaises(SystemExit):
            self._negative_position_query()
        with self.assertRaises(SystemExit):
            self._high_positive_position_query()
        with self.assertRaises(SystemExit):
            self._high_positive_number_query()


class HowdoiTestCaseEnvProxies(unittest.TestCase):

    def setUp(self):
        self.temp_get_proxies = howdoi.getproxies

    def tearDown(self):
        howdoi.getproxies = self.temp_get_proxies

    def test_get_proxies1(self):
        def getproxies1():
            proxies = {'http': 'wwwproxy.company.com',
                       'https': 'wwwproxy.company.com',
                       'ftp': 'ftpproxy.company.com'}
            return proxies

        howdoi.getproxies = getproxies1
        filtered_proxies = howdoi.get_proxies()
        self.assertTrue('http://' in filtered_proxies['http'])
        self.assertTrue('http://' in filtered_proxies['https'])
        self.assertTrue('ftp' not in filtered_proxies.keys())  # pylint: disable=consider-iterating-dictionary


if __name__ == '__main__':
    unittest.main()