Repository: XinhaoLi74/SmilesPE Branch: master Commit: e5f27dfea077 Files: 80 Total size: 472.9 KB Directory structure: gitextract_99jv75gb/ ├── .github/ │ └── workflows/ │ └── main.yml ├── .gitignore ├── CONTRIBUTING.md ├── Examples/ │ ├── SPE2Vec.ipynb │ └── train_SPE.ipynb ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.md ├── SPE_ChEMBL.txt ├── SmilesPE/ │ ├── __init__.py │ ├── _nbdev.py │ ├── learner.py │ ├── pretokenizer.py │ ├── spe2vec.py │ └── tokenizer.py ├── docs/ │ ├── .gitignore │ ├── Gemfile │ ├── _config.yml │ ├── _data/ │ │ ├── alerts.yml │ │ ├── definitions.yml │ │ ├── glossary.yml │ │ ├── sidebars/ │ │ │ └── home_sidebar.yml │ │ ├── tags.yml │ │ ├── terms.yml │ │ └── topnav.yml │ ├── _includes/ │ │ ├── archive.html │ │ ├── callout.html │ │ ├── footer.html │ │ ├── google_analytics.html │ │ ├── head.html │ │ ├── head_print.html │ │ ├── image.html │ │ ├── important.html │ │ ├── initialize_shuffle.html │ │ ├── inline_image.html │ │ ├── links.html │ │ ├── note.html │ │ ├── search_google_custom.html │ │ ├── search_simple_jekyll.html │ │ ├── sidebar.html │ │ ├── tip.html │ │ ├── toc.html │ │ ├── topnav.html │ │ └── warning.html │ ├── _layouts/ │ │ ├── default.html │ │ ├── default_print.html │ │ ├── none.html │ │ ├── page.html │ │ └── page_print.html │ ├── css/ │ │ ├── boxshadowproperties.css │ │ ├── customstyles.css │ │ ├── fonts/ │ │ │ └── FontAwesome.otf │ │ ├── modern-business.css │ │ ├── printstyles.css │ │ ├── syntax.css │ │ ├── theme-blue.css │ │ └── theme-green.css │ ├── feed.xml │ ├── fonts/ │ │ └── FontAwesome.otf │ ├── index.html │ ├── js/ │ │ ├── customscripts.js │ │ ├── jekyll-search.js │ │ └── toc.js │ ├── learner.html │ ├── licenses/ │ │ ├── LICENSE │ │ └── LICENSE-BSD-NAVGOCO.txt │ ├── pretokenizer.html │ ├── sidebar.json │ ├── sitemap.xml │ ├── spe2vec.html │ ├── tokenizer.html │ └── tooltips.json ├── notebooks_build/ │ ├── 00_pretokenizer.ipynb │ ├── 01_learner.ipynb │ ├── 02_tokenizer.ipynb │ ├── 03_spe2vec.ipynb │ └── index.ipynb ├── settings.ini └── setup.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/workflows/main.yml ================================================ name: CI on: [push, pull_request] jobs: build: runs-on: ubuntu-latest steps: - uses: actions/checkout@v1 - uses: actions/setup-python@v1 with: python-version: '3.6' architecture: 'x64' - name: Install the library run: | pip install nbdev jupyter pip install -e . - name: Read all notebooks run: | nbdev_read_nbs - name: Check if all notebooks are cleaned run: | echo "Check we are starting with clean git checkout" if [ -n "$(git status -uno -s)" ]; then echo "git status is not clean"; false; fi echo "Trying to strip out notebooks" nbdev_clean_nbs echo "Check that strip out was unnecessary" git status -s # display the status to see which nbs need cleaning up if [ -n "$(git status -uno -s)" ]; then echo -e "!!! Detected unstripped out notebooks\n!!!Remember to run nbdev_install_git_hooks"; false; fi - name: Check if there is no diff library/notebooks run: | if [ -n "$(nbdev_diff_nbs)" ]; then echo -e "!!! Detected difference between the notebooks and the library"; false; fi - name: Run tests run: | nbdev_test_nbs ================================================ FILE: .gitignore ================================================ # Large SMILES dataset .smi # folder experiments/ *.bak .gitattributes .last_checked .gitconfig *.bak *.log *~ ~* _tmp* tmp* tags # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python env/ build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ *.egg-info/ .installed.cfg *.egg # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover .hypothesis/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints # pyenv .python-version # celery beat schedule file celerybeat-schedule # SageMath parsed files *.sage.py # dotenv .env # virtualenv .venv venv/ ENV/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .vscode *.swp # osx generated files .DS_Store .DS_Store? .Trashes ehthumbs.db Thumbs.db .idea # pytest .pytest_cache # tools/trust-doc-nbs docs_src/.last_checked # symlinks to fastai docs_src/fastai tools/fastai # link checker checklink/cookies.txt # .gitconfig is now autogenerated .gitconfig ================================================ FILE: CONTRIBUTING.md ================================================ # How to contribute ## How to get started Before anything else, please install the git hooks that run automatic scripts during each commit and merge to strip the notebooks of superfluous metadata (and avoid merge conflicts). After cloning the repository, run the following command inside it: ``` nbdev_install_git_hooks ``` ## Did you find a bug? * Ensure the bug was not already reported by searching on GitHub under Issues. * If you're unable to find an open issue addressing the problem, open a new one. Be sure to include a title and clear description, as much relevant information as possible, and a code sample or an executable test case demonstrating the expected behavior that is not occurring. * Be sure to add the complete error messages. #### Did you write a patch that fixes a bug? * Open a new GitHub pull request with the patch. * Ensure that your PR includes a test that fails without your patch, and pass with it. * Ensure the PR description clearly describes the problem and solution. Include the relevant issue number if applicable. ## PR submission guidelines * Keep each PR focused. While it's more convenient, do not combine several unrelated fixes together. Create as many branches as needing to keep each PR focused. * Do not mix style changes/fixes with "functional" changes. It's very difficult to review such PRs and it most likely get rejected. * Do not add/remove vertical whitespace. Preserve the original style of the file you edit as much as you can. * Do not turn an already submitted PR into your development playground. If after you submitted PR, you discovered that more work is needed - close the PR, do the required work and then submit a new PR. Otherwise each of your commits requires attention from maintainers of the project. * If, however, you submitted a PR and received a request for changes, you should proceed with commits inside that PR, so that the maintainer can see the incremental fixes and won't need to review the whole PR again. In the exception case where you realize it'll take many many commits to complete the requests, then it's probably best to close the PR, do the work and then submit it again. Use common sense where you'd choose one way over another. ## Do you want to contribute to the documentation? * Docs are automatically created from the notebooks in the nbs folder. ================================================ FILE: Examples/SPE2Vec.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Train a SPE2Vec Model" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 1. Prepare a large corpus by augmenting SMILES" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from SmilesPE.learner import corpus_augment" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "infile = '../data/clean_chembl25.smi'\n", "outdir = '../data/aug_chembl/'\n", "\n", "corpus_augment(infile, outdir, cycles = 10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2. Train with skip-gram algorithm" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import codecs\n", "from SmilesPE.tokenizer import *\n", "from SmilesPE.spe2vec import *" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Load the pre-trained SPE vocab and set up a `SPE_Tokenizer`." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "spe_vob= codecs.open('../../SPE_ChEMBL.txt')\n", "spe = SPE_Tokenizer(spe_vob)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "indir = '../data/aug_chembl/'\n", "corpus = Corpus(indir, tokenizer=spe, isdir=True, dropout=0.2)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 13h 21min 43s, sys: 43.4 s, total: 13h 22min 26s\n", "Wall time: 11h 53min 20s\n" ] } ], "source": [ "%%time\n", "model = learn_spe2vec(corpus=corpus, n_jobs=4)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "3114" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(model.wv.vocab)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "save the model" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "model.save('../results/spe_model.bin')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Usage of a Trained SPE2Vec Model\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from SmilesPE.spe2vec import *" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 1. Load a Trained Model\n", "This will return a [genism word2vec model](https://radimrehurek.com/gensim/models/word2vec.html)." ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "model = load_spe2vec('../results/spe_model.bin')" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "3114" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(model.wv.vocab)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Word2Vec(vocab=3114, size=100, alpha=0.025)\n" ] } ], "source": [ "print(reloaded_model)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2. Similarity Search" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Find the *n* most simlair tokens." ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[('I)', 0.7128984928131104),\n", " ('[N+](=O)[O-]', 0.7083392143249512),\n", " ('Br', 0.6726223230361938),\n", " ('=[N+]=[N-]', 0.6512905359268188),\n", " ('C3CCCCC3', 0.632835865020752)]" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_model.wv.most_similar(positive = ['I'], topn=5)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[('[N+](=O)[O-]', 0.7984017133712769),\n", " ('c3ccc(Br', 0.6949862241744995),\n", " ('I', 0.6726224422454834),\n", " ('C#N', 0.647026538848877),\n", " ('c3ccc([N+](=O)[O-]', 0.6429217457771301)]" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_model.wv.most_similar(positive = ['Br'], topn=5)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[('F', 0.6898730993270874),\n", " ('[N+]([O-])=O', 0.6884483098983765),\n", " ('C(F)(F)F', 0.675315797328949),\n", " ('O=[N+]([O-])', 0.6138204336166382),\n", " ('c1(Cl)cc(', 0.5590237379074097)]" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_model.wv.most_similar(positive = ['Cl'], topn=5)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[('(F)F', 0.7689766883850098),\n", " ('C(F)(F)F', 0.7498051524162292),\n", " ('Cl', 0.6898730993270874),\n", " ('FC(', 0.669197142124176),\n", " ('FC(F)(', 0.6604156494140625)]" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_model.wv.most_similar(positive = ['F'], topn=5)" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[('Br', 0.7984017133712769),\n", " ('I', 0.708339273929596),\n", " ('C#N', 0.6947668790817261),\n", " ('N(CC)CC', 0.6504974365234375),\n", " ('c3ccc([N+](=O)[O-]', 0.6387115716934204),\n", " ('[N+]([O-])=O', 0.6123145818710327),\n", " ('[N+](=O)[O-])cc2', 0.5939745903015137),\n", " ('NC(=N)N', 0.5908252000808716),\n", " ('[N+]([O-])=O)', 0.5907192230224609),\n", " ('C(=N)N', 0.5786335468292236)]" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_model.wv.most_similar(positive = ['[N+](=O)[O-]'], topn=10)" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[('cccn', 0.7426245212554932),\n", " ('cc', 0.6368259191513062),\n", " ('ncc', 0.6253657937049866),\n", " ('c4cccc', 0.618654191493988),\n", " ('cn', 0.601131796836853)]" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_model.wv.most_similar(positive = ['cccc'], topn=5)" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[('c1cc(Br)ccc1', 0.8210796117782593),\n", " ('c1ccc(C(F)(F)F)cc1', 0.8167715072631836),\n", " ('c1ccc(C)cc1', 0.8143419027328491),\n", " ('c1ncccc1', 0.8109526634216309),\n", " ('c1cc(Cl)ccc1', 0.8099758625030518),\n", " ('c1ccc(Br)cc1', 0.8057501316070557),\n", " ('c1cc(OC)ccc1', 0.797452449798584),\n", " ('COc1ccccc1', 0.7923589944839478),\n", " ('c1ccc([N+](=O)[O-])cc1', 0.7881063222885132),\n", " ('c1cc(C)ccc1', 0.7868707180023193)]" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_model.wv.most_similar(positive = ['c1ccccc1'], topn=10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Compare similarity of two tokens" ] }, { "cell_type": "code", "execution_count": 68, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.6898731\n", "0.387633\n", "0.28637922\n", "0.27690318\n", "0.3910834\n", "0.18097402\n", "0.26682103\n", "0.41482514\n" ] } ], "source": [ "ref = 'F'\n", "for i in ['Cl', 'Br', 'I', 'c', 'C', 'B', '[nH]', 'O']:\n", " print(new_model.wv.similarity(ref, i))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3. Get the embedding of one token" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([-2.02950224e-01, 1.40340686e-01, 2.67382432e-02, 9.74031091e-02,\n", " -1.72517717e-01, 8.01303983e-02, -7.36229196e-02, -9.51444656e-02,\n", " 3.72148484e-01, 1.81130946e-01, 1.74484134e-01, -1.29301652e-01,\n", " -1.03604697e-01, -2.50630349e-01, 4.05520827e-01, -2.43114457e-01,\n", " -2.12318584e-01, 1.80523582e-02, -1.68795168e-01, -9.79098603e-02,\n", " 7.37059772e-01, -1.00865168e-02, 3.67026001e-01, -5.12824394e-02,\n", " -5.35637774e-02, 7.80568644e-03, -4.06715780e-01, 1.34660244e-01,\n", " 7.20059797e-02, -2.08876193e-01, -1.00592591e-01, 9.60482061e-02,\n", " -2.87714094e-01, -3.32500666e-01, -7.18628839e-02, -2.87026674e-01,\n", " -1.61783621e-01, -4.23521474e-02, 1.38721481e-01, 1.18507959e-01,\n", " -1.22052059e-01, 4.58160996e-01, -1.06491186e-01, -8.75373706e-02,\n", " -1.00945957e-01, -9.78194326e-02, -1.70962840e-01, -7.79090226e-02,\n", " 1.30518824e-01, -4.17784601e-01, 1.55221280e-02, -2.45539263e-01,\n", " -2.69203156e-01, -4.91197050e-01, -2.96125770e-01, 4.95558456e-02,\n", " 2.02171758e-01, -2.08486952e-02, 1.11753680e-01, -2.19787493e-01,\n", " 8.67594033e-02, 1.35289162e-01, -2.45470226e-01, 9.63769853e-02,\n", " 3.55482250e-01, -5.12443066e-01, -2.84075797e-01, -1.55221820e-01,\n", " 5.48201621e-01, -4.52058613e-01, 3.87241513e-01, 1.58400357e-01,\n", " -3.09546709e-01, -8.23730826e-02, 8.94564539e-02, 1.70166790e-01,\n", " 3.83394212e-02, 7.79561698e-02, 1.92218885e-01, -2.24415556e-01,\n", " 1.81946293e-01, 1.45321786e-01, -8.67245272e-02, -1.71326876e-01,\n", " 4.72679436e-02, 6.64546043e-02, 4.71588194e-01, 4.90657687e-01,\n", " 9.30584818e-02, -2.62536593e-02, 2.47501358e-01, 3.53196040e-02,\n", " -5.44035360e-02, 2.17035249e-01, 1.04817919e-01, -4.58425283e-02,\n", " -1.58783898e-01, -2.32295177e-04, 8.71988460e-02, 4.19131443e-02],\n", " dtype=float32)" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_model.wv['Cl']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 4. `class SPE2Vec()`\n", "`SPE2Vec` takes two input: `spe2vec model path` and `tokenizer`\n", "\n", "three methods:\n", "- tokenize: return the toeknized SMILES as a list\n", "- Generate a vector for a SMILES. The vector is construc in four modes: ['average', 'sum', 'avg_pool', 'sum_pool']. The Unknown token will be skipped.\n", "- spe2vec: Generate a list of vectors (np.array). Each vector is spe vector of each token. The unknown token will be represented by the mean of all token vectors from the model if `skip_unknown` is set to False.\n", " \n", " " ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "import codecs\n", "from SmilesPE.tokenizer import *\n", "spe_vob= codecs.open('../../SPE_ChEMBL.txt')\n", "spe = SPE_Tokenizer(spe_vob)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "spe2vec = SPE2Vec('../results/spe_model.bin', spe)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "from rdkit import Chem\n", "from rdkit.Chem import PandasTools\n", "from rdkit.Chem import Draw\n", "from rdkit.Chem.Draw import IPythonConsole" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "smi = 'CC(=O)NCCC1=CNc2c1cc(OC)cc2CC(=O)NCCc1c[nH]c2ccc(OC)cc12'" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAAgAElEQVR4nO3deVyU1f4H8O/MAMouoiOLioJgAmoICAKpKOYCpImopZiVYNd0eIUlvW7+oqvVxavXyKWi9CrqNVOJHNCugBnIKoGymIqIIruA7MvAzJzfHwfnRaiIsz3MzPf98g+cGc75ssyH85zzPOdhEUIAIYSQtNhMF4AQQqoNYxQhhGSCMYoQQjLBGEUIIZlgjCKEkEwwRqG5uXnDhg0NDQ1MF4IQUklaTBfAsK6urqVLl6akpDx69Ojnn39muhyEkOphafJ5oyKRaNWqVbGxsZaWlunp6VZWVkxXhBBSPZp7UE8I2bhxY2xsrKmpaWJiImYoQkg6mhuj27ZtO3z4sJ6eHp/Pt7e3Z7ochJCq0tAYjYyM3LNnj7a2dmxsrIeHB9PlIIRUmCbOjeb9+KPLmjUsFuvHH39cuXIl0+UghFSb5sVoXBwEBu738tJevfq9995juhqEkMrTsBOefv8d3nwTRKItPj6AGYoQkgdNGo0WFMCcOdDUBJs2wcGDTFeDEFITGhOjJSXg5QW1tfDmm3D8OLA1dG0NISR3mhGjVVXg6Qn378OCBZCQADo6TBeEEFIfGhCjTU0wdy7k54ObG1y6BPr6TBeEEFIrGnBsq60NY8aAoyNcuIAZihCSOw0YjQKAQADNzcDlMl0HQqpJJIKwMCAEWlvB3x+WL2e6oKFFTWMUf+oIydGRIyAUQnAwAIC/Pxw7BiYmTNc0hKjpQf2xY+DoCPv2wZEjcOQINDYyXRBCqqywEFxdez+2t4eSEkarGXLUNEbxp46QHDk6Qk5O78d//gk2NoxWM+SoaYziTx0hOVq3DgoKYMsWWL8e1q2Do0fB0REKC5kua6hQ0xilP/W//Q28vcHGBkaOZLoghFSZlhYsWwYcDkRHQ2Ag3L4NN25AXBzTZQ0VarrERGVng7s7WFnBvXvAYjFdDUKqzMUFcnMhIQF8feHCBfD1hRkzIDeX6bKGBDUdjVIzZ8LYsVBWBvn5TJeCkIpbuhQA4Nw5AID588HICPLy4N49ZosaItQ6Rlks8PcHAPjlF6ZLQUjFLVsGAHDuHIhEMGwYLFoEABAfz2xRQ4Raxyg8/hOKMYqQjKZOBTs7ePgQsrIA8J31F+oeo/PmwYgRkJ8PpaVMl4KQinvtNYDHx/W+vqCjA6mpUF/PbFFDgbrHqLY2LF4M8PhnjxCSGh2B0gV6Y2Pw9gaRCM6fZ7aooUDdYxT+OjWOEJKahweYmUFJCdy4AdD7zur87TeGqxoCNCBGfX1h+HBIS4O6OqZLQUiVsdng5wfQOyDtWro0wM5u1NmznZ2dDBfGNA2IUQOD8nXrDrm7//y//zFdCkKqrTkg4Ozs2RvS0gBguIVFpYlJR0dHYmIi03UxTANiFODCjBnB6ekxZ88yXQhCqm24t/c71679JzHxwYMHALBs2TIAOKfxM2YaEaPLli1js9lJSUnt7e1M14KQChs2bNjChQsJITQ6AwICAIDP5wuFQqZLY5JGxOiYMWPc3d07OzsvXrzIdC0Iqba+I1BbW9uXXnqpoaEhPT2d6bqYpBExCgBLly4FPPpASGZ+fn46OjopKSkNDQ3wOFV/0ezz8DUlRpcvXw4AfD6/p6eH6VoQUmHGxsZz5swRCoUXLlyAxwOUuLg4dd7k6Hk0JUYnTZpkb2/f1NR05coVpmtBSLX1HYG6ubmNHTu2rKwsX4M3ANKUGAU8+kBITpYtW8Zisf73v/81NDQUFxc7ODhYWVnV1tYyXRdjOJ999hnTNSiJgYHBoUOHbt26JRAIBALB6NGjhw8fznRRCKkeQ0PDCxcu3L9//5dffrl06dLly5ebm5svXbqUnZ1dVVUlFArNzMw4HA7TZSqPWm/b/FehoaEHDhxgsVgikYg+Ym1t7enp6ezs7OXl5eTkxGZr0NgcIVlkZGRs3bo1KyvLzMxs8uTJeXl5ra2tkmf19fVdXV09PDzc3d1nzZo1atQoBktVAk2J0R07dkREROjo6ISHh7e3t2dlZeXm5goEAskLRo4c6e7u7u7u7uHhMXPmTENDQwarRUNZdzc8fAi1tVVVVbk1NTXV1dUPHz6sqqqqra2tqamxs7P74osvZsyYwXSZCiQSiVavXn327FlLS8u0tLQJEyaIRKJbt27l5uamp6enpaXdvHmzb7CYm5t7eXnRIYubm5u2tjaDxSuCRsTod99997e//Y3D4Zw6dWrFihX0QaFQmJ+fn5aWlpubm5aWdq/PPt4cDmfy5Ml0lOrp6Wlvb8/Ce5Cor4QEeP99uHULWCxYvx7WroWKCnjvPRAKYcUKCAqCzEyorYWaGqiuhtra3p3h5szJS0lxfrK1YcOGiUSiu3fvjh8/XtlfiVIQQkJCQg4dOmRqapqammpvb//ka5qbm3NyciRvrqamJslTBgYG06dPp+8stRmoqn+MxsXFBQYGisXi77//fsOGDc96WXl5eUZGRmZmZlZWVl5eXt/zokaPHk2PTTw8PNzc3HBGVc0kJEBsLEyaBFu3PiVGjYzg+PG/vF5LC7hceOWV0rY2HpfLtbS05HK55ubmZmZmXC73008//emnn7Zt27Zr1y6GviDF2rZt2+7du/X09BITEz09PZ/7ejpeoe+sjIyMvuMVFou1ZcuWr7/+WpH1KoOax+ilS5d8fX0FAsG//vWvjz76CAC6urqem4M9PT0FBQX0b2lqampZWZnkqZMnT77++uuYpOokIQFqaiAtDXbsgG3bYO1a+Ne/wN4eCIHaWnj3Xbh5E8zMYMwYsLAALhe43IFukJibm+vi4mJiYlJeXq6vr6/Er0MZ9u3bFxoaqq2tfe7cucV0J98XVFNTk5OTQw//09PTv/jiiw8++EDudSobUV/Z2dkGBgYAwOPx6CNxcXHjxo0rKCh4oXZKS0tPnDixefPmkSNHAkB8fLwCikWMiY8nP/xAiotJSAhZtYrEx5NvvyWEkJ4esnSpNA26u7sDQHR0tHzrZNzRo8KpU0PYbPaPP/4olwa7u7vb2trk0hSz1HZturi42N/fv62tLSgoKCoqCgCSk5NXr15dXl6enJz8Qk1NnDhxzZo1+/fvDwsLAzzzVE3Z2oKWFjw+ieOFNTQ0REZG3rlzBwBCQ0MB4OuvvyZqdKh37hxs2MApLY0+dOji6tWr5dKmtra2mgzYmc5xhSgvL6cT/K+99lpPTw8h5OrVq3TxfcuWLVI3W1RUBACjRo0SCoXyKxYNFV9/TbS1yY4d0nzuxo0bJb9dPT09Y8eOBYCkpCQ5l8iQy5fJ8OEEgPzjH0yXMiSpYYzW1dW99NJLADB37tzOzk5CSHFxMZfLBYC1a9eKxWJZGp88eTIApKamyqlYNISsWkUAyA8/SPO5RUVFLBbL0NCwqamJEPL5558DgL+/v5xLZEJ+PhkxggCQTZuYLmWoUrcYbWlpcXZ2BoBp06Y1NjYSQsrLy62srOjvNB2ZvhCxWPz++++PGzeOtrZt2zYA2Lp1q/xLR0wbP54AkKIiKT993rx5APDVV18RQurq6nR1dVks1u3bt+VZ4qClp6e3tLTI3k5JCTEzIwDkjTeISCR7e+pJrWJUIBC8+uqrAGBjY1NdXU0IqaurmzJlCgDMmjWrvb1dumbp2+PEiROEELqv4oQJE+RZNxoCKisJADEykj4s6KT5hAkT6JzPO++8AwChoaHyrHIQzp075+3tzeFwOByOvb19UFBQdHR0UVGRFMdhlZVk4kQCQHx8SFeXIopVE+oTo0KhMDAwEAAsLCxKS0sJIe3t7R4eHgAwderUR48eSd3yvn37ACAwMJAQIhKJzM3NAeBFl/vREHf2LAEgr74qfQsikcjGxgYA+Hw+IaSwsJAe5jc3N8utyuf57bff6Nl4EydO7Hex0KhRo/z8/Hbvbr58mQxyeXzWLAJAZs0i0o5ANIWaxKhYLA4ODgYAY2Pj69evE0IEAsHChQv7jkylVlFRwWKxDAwM6ExrSEgIAOyQbiUCDVVbtxIAEhEhUyN79+4FgPnz59P/zpkzBwD27dsne3mDcf369REjRgDApk2bCCHd3d1//PFHVFRUUFDQhAkTAGDUKC4AASAcDrG3J0FBJDqaFBWRZ41Tc3KItzdpaFBO+SpMTWI0PDwcAPT09K5cuUIIEYlE/UamMqLzrQkJCYSQ8+fPA8CMGTNkbxYNHXTkdfGiTI20tLQYGRkBQH5+PiEkNjYWACZNmiRS/LTinTt3xowZAwBvvvnmU7u7d+/e2bO/b9lCnJ2JlhaheUr/mZmRn34i48eTjg7S2Smfk2c1ijrEKD3o1tbWvnDhAiFELBbTAaOxsfG1a9fk0sWOHTsAIDg4mBDS1dVF3ypyCWg0FHR1keHDCYtFZJj76fX+++8DQEhICCFEKBROnDhR8gdYcSoqKuh4c8GCBQKB4Lmvb28nV66QqCgSGEi4XGJtTeLjyfr15PPPMUalofIx2tjYOGbMGDabffLkSfrI9u3b6cg0LS1NXr0UFBQAAJfLpasHK1euhMfnVyM1kJ3dOmxYl729HJoqLi5ms9m6urr19fWEkN27dwPAq7LMuT5PY2Pj9OnTAcDd3V26i4Jqa3sv5XrrLVJW1hujr7xCNm4kISEYo8+n8lcxXb16tbOzc8aMGW+88QZ9ZO7cuSYmJmfOnBnMvgmDNHXqVDs7u4cPH2ZmZgLenlvtZGQcIsRoyZIvZW/K1tZ24cKFnZ2dhw4dAoANGzYYGBgkJiYWFhbK3viTOjo6/Pz88vPzHR0dz58/L91FQVxu7weffAJffNH78ZtvwnffwcGDcipUral8jDo4OLS2tt68ebOzs5M+Mn/+/Lt37y5ZskS+Hb322mvwODqXLFlCb45YTzdNQyouPT29u7vb3t5cLq3xeDwA2L9/f09Pz4gRI4KCggDg22+/lUvjffX09KxYsSI9Pd3a2joxMZHu+SALW1vQ1ZVLaRqG6eGwHLi4uMDjs0wUh94Lz8bGhv6XngYQExOj0E6RctBrN+lmw7ITi8X0bOUzZ84QQm7fvs1isfT09BrkuuZN904GgNGjR9+6dUuOLaMXpfKjUVDWIbaHh4eZmdndu3fplfV443u18eDBg4qKihEjRtjZ2cmlQRaLRRea6E6adnZ2CxYs6Ojo+M9//iOX9qmwsLBTp04ZGRldvHiRXqOMGMN0jsuB0nYMobs+79y5kxBSWVlJhxhSXxyFhohTp04BwJIlS+TYZmtrq4+Pj2TZMyEhAQDGjRvX0dEhl/b/7//+DwB0dXVTUlLk0iCShTqMRh0cHOzs7Orr6zMyMhTaER2B0mv+LCwsZs6c2dHRkZSUpNBOkaLRZcNZs2bJsU0DA4OkpCTJsqebm5u+vr5QKDQyMnJxcQkNDT127Nj9+/ela/ybb77ZuXMnh8M5ceLE7Nmz5VY0khrTOS4fdGf7sLAwhfbS1dVlaGjIYrHKysoIIV9++SUAvP322wrtlBHx8Rp0MvbMmTMBIDk5mf73wYMHa9asoT9iuSgpKaHTBaNGjep391krK6s33nhj3759OTk53d3dg2nt5MmTbDabxWIdPnxYXhUiGalJjCptx5CQkJC33nqrpKSEEPLnn38CgKmpqRQbR0nn0aNH4eHh69evd3FxUejqFoMnYzc1NXV2dg4yU2TX0dGho6PD4XAkV76vW7cOAPT09Hbu3Ekv/5XF1atX6cVF06ZNKy8vb2lpuXLlSmRkpJ+fX7+FdW1tbWdnZx6Pd/r06Zqamqe2lpSUNGzYMADYs2ePjIUhOVKTGJXsGEIvwlMaOsr4/vvvFd1Rd3f3vn37TE1NAUAyolmwYIG8Fpf7YeRk7O7u7ujo6NGjR69YscLOzo5ek6ZQAoFg69atWlpapqamko0XKisrg4KC6L1gbWxsTp8+LXX7Fy9epJuF+/j4PHWDkrt378bExISEhNjb2/cbqJqbmwcGBkZFRV25coVemJSVlUVPC/373/8udUlIEdQkRsnjHUP+ocTtuevr6y0sLOi5Mp6ennw+X8Y9oZ8lKSnJ0dGRvru8vb3/+OOPmJiY0aNH0yEMj8eT+x5Ccr890XPFxcXZ2trSr1EyTAsICLh//75C+iMkNjZ20qRJAECzydjYeO/evZJRcEpKyrRp02gZ8+fPv3Hjxou2f/jwYS0tLQBYv379YAbX9fX1CQkJ27dvnzdvHr2HmISBgYG7uzt9MDg4WEG/Zkhq6hOjSt4xpLW11c3NDQDGjBlDRxz0wO3YsWNyPCC9ceOG5DoCOzu7viOjR48e8Xg8DodDRy7R0dFy2f8iL4/weITP790EPjT0KTH6009Ejucp5ubmzp07t+/X2N3dHRUVRTcu0NXVjYiIkP3gul+PdO8lAJg8efK3335Lz5CnBfz666/0ZSKRKCYmht5Inf65GuRGyGKxOCIigjYYHh4uRYVCobCoqEgyUKVD4/nz5y9dulRpM0ho8NQnRpW5Y0h3d/eiRYsAwNrauqqqqqWlJSoqig5LAcDMzCwiIoLuli+1uro6SUqamJhERkZ2PW3j3GvXrnl5edF+XV1ds7Ozpe6xspK88w5hswkAeXyizlOUlxN9faKtTXg8IuMguLKyMiQkhH6NI0eOjIyM7LutRlVVleTgety4cXKZC+7XY1RUlCSVkpOT7e3t6XfSz8/v3r179PGGhgbJD8LCwiImJmbgwaBAIFi7di0AaGlpyevmoLW1tefOnautrVXalDF6IeoTo+TxjiFRUVEK7UUkEq1atQoAuFxu31tECASCmJgYBwcH+lY0NDTk8Xjl5eUv2n5HR0dkZCT9k6CtrR0SEvLw4cMBXi8Wi0+fPj1u3Dg6bRoUFDTw65/U3k4iI4mREQEg2tokJITU1T3zxQ0NZOPG3rS1sCAnTjxzt8oBe2yPjIyko3g60HvWX53s7Gy6ki71wfXgexxgFJybm0u3AAeAOXPmPGsKvrW1lf59NTAwOH/+vHSlIpWjVjF68uRJOnuo0F7o7XONjIzy8vKefFYsFiclJfn5+dG3nI6OTlBQUNHg7u9DA5HueEbXJQafGm1tbREREXQZ18TEJCoqajAXI4jF5PRpMmFC776Tfn7kzp1BdZeXRzw9ez/L1ZUMfhBMv0Z6dyw67qOnPQzgyYPrF5oLftEen7XEJBKJDh8+TO+NqKWlxePx+h1fV1ZWvvzyy3SOJTc3d/AVIlWnVjHa1NRET16pG2A0JRu6C99grh7Jy8sLCgqiiwwsFsvHx2fgq/6zsrIk450ZM2ZcvnxZivJu3769ePFi2oiTk9PAWwWmpqbOm7fQyKgJgDg7kxe9HEYsJkeP9t7vjMMh27effO6dWjIzMyVnuTs7O7/QFTgvenAt6dHd3Z326OLiMvgen7XE1NjYGB4erqOj4+fn1/f1RUVF9J7e9vb2ilsWQ0OTWsUoebxjyNGjRxXR+MGDBwGAw+HExsYO8lNKS0t5PJ6enp4kH2NiYvqNYsrKyiTDHwsLi+joaBmvauXz+XS3YBaLFRgY+ODBg34vKCkpCQgIoCW99lpETIz093FrayMREcTTs0oy2/jU4u/cuRMYGEi/RktLS6kXxPoeXM+ePXuA89uKi4vpHRCk7nGAJaaCgoK+U/CXLl0yNjYGgFmzZinuTzgastQtRr/55hsAWLZsWd8HfX19Q0JCjhw5cvPmTalPFvnvf/8r9dUjdXV1kZGR9MxWujAVFRXV3t7e2toaERFB70Gmp6cXHh4ul5viEkI6OjokLevr60dERNDVGzqYknuPN278OX/+fMmgLzMzU/IUvWSAzjbQHltbW2XpSywWx8TE0HPa6Vxwv+Tq26O+vr6MPT53FHz69Gn6/QwICJDXJfNItahbjFZWVtK9xyXbgNfW1kIfRkZGPj4+ERERfD5/8LcLTUxM1NHRAdmuHmlvbz9w4IC1tTWtZOTIkXQIw2az169fX1FRIXXLz1JaWkr3AaAHm5s3b6ZjKzab/fbbb1dWVsq3Oz6fTyd2WSxWUFBQeXl5dHQ0nUykeVdVVSWvvuhcMP2hSEbBkhP45d7js0bBUVFR9LR5Ho+nhBsuoaFJ3WKUEELnwuLi4uh/u7u709PT//3vfwcEBFhYWPSNVA6H8/LLL2/atOn48eMDLDtIrh755JNPZC9PJBLx+Xx3d3ctLa0RI0a4ubllZGTI3uwALl26RE/lMTMzAwBvb++nLo7JRXt7+/bt2yWjXfp9njdvnrxuitVPUVGRt7c37WXKlCl0KgMAfHx85H49W99RsJaWVnBwML0TPYfD2b9/v3z7QqpFDWP0n//8JwC8+uqrtbW1Tz5bVlZ28uRJHo/n6ura70beXC53w4YN/V5fWFhIL6qhNymTl0ePHgGAoaGhHNscgEAgiIqKSkpK+uWXXxTd14EDBxwdHV1dXUNDQydOnGhtbb1582aF9sjn88ePH6+vr8/hcPpdpCB3jY2NPB6PrhxyOBxdXd2ff/5Zcd0hlaCGMXrr1q358+fT1YwnL0zuS3Ij78DAQDrKeP311/u+oLS0lE5oLlu2TL5Xj1y/fh0AHB0d5djmEEFvdh0ZGSkWi1NTUwHA09NT0Z2WlJQAgKmpqXJOUC8sLDQxMVHcYiZSLVqgdujlfRs3bszJyamurj5z5syZM2cAwMDAwNXV1cPDY9asWe7u7qampnRPHWdnZ3oq6N27dwUCgaSdurq6xYsXV1dXe3t7nzp1ig5AZBEcHHzp0qX9+/f7+vo+ePAAAOgpMmpGJBIBAIfDoX/JoM9eKopDpxGGDx/e7whDQRwdHceNG9fY2Ojk5KSE7tAQp4YxCgC2tra//fabSCS6detWbm5uenp6WlrazZs3L1++fPnyZfoac3NzLy8vT09PLy8vJycnNpttY2MjaaGlpWXRokW3b992dXU9d+4cXfaVUUlJyb1793R1dQGgvLwc1D1G+32stE4BoKKioru7e+zYsXQNSgk9Ik2mnjFKcTgcBwcHBwcHuoNkc3NzTk5OWlpaenp6RkZGv4Hq9OnTaap6eHjo6en5+/vn5eXZ2trGx8dLdh6RER2B0qs2aYzSj9UMzRc6AmUqRpcsWVJYWFhQUDB16lQF9SgWi0EpA2009KlzjPZjbGzs4+Pj4+MDAD09Pfn5+RkZGVlZWRkZGWVlZenp6XTvZzabbWJi0tDQMG7cuOTkZDpnKjuxWFxRUcFisegOJn0jVc3QfKGJprSs6RejSugXR6NIQoNitC9tbW0XFxcXFxd6S/GampqcnBx6+J+eni4Wi7W0tL766is5HnTT7Xm4XC49qNeEuVFQYtb0y00l9IsxiiTwkAQAwMzMzN/f/7PPPktKSmpqagoICBAKhfv375djF/1yE+dGFdepcvrFGEUSGKP96ejo7N27d8yYMSkpKXFxcfJqtm9uikSiqqoqNpttaWkpr/aHjr4Dw77zpAql/BjFuVEkgb8ET2FoaEjvA/7hhx/2PQVKFn0nQ6urq3t6eszMzBS3jsygTVxu5pw5c/T1AWC6llbmnDkfP976T3H65TWORpEyYYw+3Xvvvefg4FBaWkr3OpFd36V5NV5fAgCnigr3lBTbnh4AsOrsdE9J8aiqUnSnfde1AJeYkHJhjD4dh8PZtWsXAOzcubOhoUH2BvvOjarx+hIAgEgEAEDzpe/Hiu0T50YRYzBGn8nX13fhwoWNjY07duyQvbW+c6NqvL4E8Dg66UhQfWMU50aRBP4SDGT37t0cDufo0aPFxU0yNqUh594DAIjFAI+jk36srCUmnBtFjMAYHcjUqVPDw6MNDQs++miELO0IBIKHDx/q6OjQrerUe26UkYP6fnOjSjhDAGMUSWCMPkdo6LutrVZ8PiQlSd8IvT+opaUlfWPj3KgC+nzKVUw4GkXKgTH6HFwufPwxAMBHH/VmghQ059x7gCERozg3ipQJfwme74MPYMIEyM+Ho0elbGHevHkNDQ3Hjx8HgI6Ojvr6+mHDhtF7XaghJpaY8GJQxCCM0ecbPhy+/BIA4JNPoKVFykZGjhzZb31Jsh2nunn5ZfDxAS4XAMDCAnx8wNZW0X32CzVjY2MTExOcG0XKgTE6KKtXg6cn1NbCnj2yNnX37l1Q4yN6APjiC0hKAldXAAB/f0hKgq1bFd1nv1Crrq5+9OiRXHaJpTo7O5OTk/s+ggf1SEJDd3h6USwW7NkDHh6wZw+8+y5YWb3A5wqFwtu3b+c+lpOTY2FhMX36dIUVyzSRCMLCgBBobQV/f1i+XCl9KmpsSAg5e/ZseHh4ZWVlYWGhnZ2dQrtDqghjdLDc3WHlSjh9Gn79Fd57b6BX9vRAYSHk5sKDB/G//vqPwsLC7u5uybMcDmfRokV79+5VeMVMOXYMHB0hOBgAwN8fvL3BxETRfdKxYXFxcWtrq7y22QaAjIyMsLCw7OxsAHBycmpvb6ePK23LFaQamL4ZlCopKyNZWU95vKeHFBWRmBjC4xFPT6KrSwAIAJkz5w/6TTY3N/fz84uIiODz+Y8ePVJ64cr1wQdEcjvlbdvI1atK6LOtrW3jxo30Wx0TEyMWi2Vs8MGDB0FBQXT+2sLCIjo6WigU0qeys7M9PDz09fU//PBDmQtH6gBjdFDi48n48aSjg3R2klWrSHw8+fZbQgjp6SFLl5J33+3NTfqPzSaTJ5M1a8h339Wnpqa2trYyXb5yHT5Mvv++92M/P9LQoJxur1275uXlRf9uubq6ZmdnS9dOa2trREQEvUeenp5eeHh4c3Mzfaq0tHTlypWSbP3jjz/kVz5SYRijgxIfT9avJ59//vQYPXiQ2NqS1avJnj3k8mXy+E2nqXp6yObNZPNm8tZbRJG3jH+SWCw+ffo0PSOCzWYHBQU9fPhw8J8uEoliYmLobWNYLFZgYOC9e/foUwNkK0IYo4MSH09++IG89RYpK+uN0VdeIRs3kpAQsnQp08Whv2pra4uIiKDL9CYmJlFRUVEJORQAAAQdSURBVJLj8QEkJSVNmzaNDmbd3NwyMjLo4wNkK0IUxuig0BgtLiYhIU8ZjaIh6Pbt24sXL6ax6OTklJaW9qxX3rx5c+HChfSVkyZNio2NlTzVL1vT09OVUjtSMbjU+AJsbUFXl+ki0ODY2dlduHCBz+dPmDDh2rVrr7zyyrp162pqap58ZWNjY2Ji4ogRIyIjI4uKipYvXw4At27d8vf3X7BgQUFBwfjx42NiYjIzMz08PJT+dSBVwHSOI6RYHR0dkmlNfX39iIgIgUDQ7zWHDh2qr6+nH9fX1/N4PC0tLQCg2drV1aX0qpEqwRhFGoGewESHDnZ2dr/++uuTrxEIBFFRUcbGxgCgpaUVEhJSW1ur/FKRysEYRRokOTnZ3t6ehqmfn1/fxSI+n29tbU2f8vHxKSwsZK5MpGJYhBAGphIQYkhPT88333zz6aeftrS06Orqbtu2bcGCBR9//HFaWhoATJkyZc+ePUuWLGG6TKRKMEaRJqqsrPzoo49OnTpFCOFwOCKRyMzMbOfOnW+//TZeKY9eFMYo0lypqamvv/56Z2fn2rVr9+7da2BgwHRFSCVhjCKNNmPGjGvXruXl5Tk5OTFdC1JVeN4o0mi4bSiSHf72II2GO4ci2WGMIo2GMYpkhzGKNBoe1CPZ4W8P0mg4GkWywxhFGg1jFMkOYxRpNIxRJDu8pR3SaKkODhxLy5E4N4pkgDGKNJrV9etQXQ04GkUywD/CSLOJRACAMYpkgTGKNBvGKJIZxijSbBijSGYYo0izicUAALjEhGSAvz1Is+FoFMkMYxRpNoxRJDOMUaTZMEaRzPC8UaTZ3n0XxGKMUSQL3P0eIYRkgqNRpKlEIggLA0KgtRX8/WH5cqYLQqoKR6NIUx05AkIhBAcDAPj7w7FjYGLCdE1IJeESE9I8DQ2QkACFheDq2vuIvT2UlDBaE1JhGKNIk/T0wPffw5QpEBgIZmaQk9P7+J9/go0No5UhFYYH9UhjxMVBeDjcuQMAsGABHDgA+/cDALS2gq8vBAYyWx1SXRijSAPk5cHWrfD77wAAkyfDzp0YmkiO8KAeqbOeykpYvx5cXeH332HUKDh4EIqKMEORfGGMIvXU0dGxa9euKV5ezXFxwOEAjwd37sCmTaCFJ/khOcNfKaRuxGLx8ePHP/nkk8rKSgBI+OCDNZs3g7U103UhtYUxitRKVlZWWFhYZmYmADg7O+/du3f27NlMF4XUHB7UIzVRUlKycuVKDw+PzMxMS0vL6Ojoq1evYoYiJcCVeqQO6urqrKysOjs7DQwMwsPDw8LC9PT0mC4KaQqMUaQmNm3a1NbWtmvXLnNzc6ZrQZoFYxSpCUIIi8ViugqkiTBGEUJIJrjEhBBCMsEYRQghmWCMIoSQTDBGEUJIJv8PAuB1ay1i3dkAAAAASUVORK5CYII=\n", "text/plain": [ "" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mol = Chem.MolFromSmiles(smi)\n", "mol" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'CC(=O)N CCC1 = CN c2 c1cc(OC) cc2 CC(=O)N CC c1c[nH] c2ccc(OC) cc12'" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "toks = spe2vec.tokenize(smi)\n", "toks" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(100,)" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "smivec = spe2vec.smiles2vec(smi, mode='average')\n", "smivec.shape" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(300,)" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "smivec = spe2vec.smiles2vec(smi, mode='avg_pool')\n", "smivec.shape" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(12, (100,))" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "smivec = spe2vec.spe2vec(smi)\n", "len(smivec), smivec[0].shape" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.7" } }, "nbformat": 4, "nbformat_minor": 4 } ================================================ FILE: Examples/train_SPE.ipynb ================================================ { "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import codecs\n", "from SmilesPE.learner import *" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The train data should be large (e.g., 1.7M ChEMBL SMILES). **All SMILES must be RDKit sanitizable**. One example dataset can be download [here](https://github.com/XinhaoLi74/SmilesPE/blob/master/Examples/clean_chembl25.smi)." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of SMILES: 1789388\n" ] } ], "source": [ "file_name = \"./clean_chembl25.smi\"\n", "\n", "with open(file_name, \"r\") as ins:\n", " SMILES = []\n", " for line in ins:\n", " SMILES.append(line.split('\\n')[0])\n", "print('Number of SMILES:', len(SMILES))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Training\n", "\n", "- input: a list of SMILES\n", "- output: the file to save the learned vocabulary.\n", "- num_symbols: maximum total number of SPE symbols, set to 30,000\n", "- min_frequency: the minimum frequency of SPE symbols appears, set to 2,000.\n", "- augmentation: times of SMILES augmentation, set to 1. The final data set is ~2 times larger than the original one.\n", "- verbose: if True, print the merging process\n", "- total_symbols: if True; the maximum total of SPE symbols = num_symbols - number of atom-level token." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Counting SMILES...\n", "1695655 unique Canonical SMILES\n", "Augmenting SMILES...(1 times)\n" ] }, { "data": { "text/html": [], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "3473557 unique SMILES (Canonical + Augmented)\n", "Gettting Pair Statistics\n" ] }, { "data": { "text/html": [ "\n", "
\n", " \n", " \n", " 100.00% [3473557/3473557 01:15<00:00]\n", "
\n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "Number of unique characters & Reducing number of merge operations by: 115\n", "Unique characters: {'[c-]', '[SeH]', '[N]', '[C@@]', '[Te]', '[OH+]', 'n', '[AsH]', '[B]', 'b', '[S@@]', 'o', ')', '[NH+]', '[SH]', 'O', 'I', '[C@]', '-', '[As+]', '[Cl+2]', '[P+]', '[o+]', '[C]', '[C@H]', '[CH2]', '\\\\', 'P', '[O-]', '[NH-]', '[S@@+]', '[te]', '[s+]', 's', '[B-]', 'B', 'F', '=', '[te+]', '[H]', '[C@@H]', '[Na]', '[Si]', '[CH2-]', '[S@+]', 'C', '[se+]', '[cH-]', '6', 'N', '[IH2]', '[As]', '[Si@]', '[BH3-]', '[Se]', 'Br', '[C+]', '[I+3]', '[b-]', '[P@+]', '[SH2]', '[I+2]', '%11', '[Ag-3]', '[O]', '9', 'c', '[N-]', '[BH-]', '4', '[N@+]', '[SiH]', '[Cl+3]', '#', '(', '[O+]', '[S-]', '[Br+2]', '[nH]', '[N+]', '[n-]', '3', '[Se+]', '[P@@]', '[Zn]', '2', '[NH2+]', '%10', '[SiH2]', '[nH+]', '[Si@@]', '[P@@+]', '/', '1', '[c+]', '[S@]', '[S+]', '[SH+]', '[B@@-]', '8', '[B@-]', '[C-]', '7', '[P@]', '[se]', 'S', '[n+]', '[PH]', '[I+]', '5', 'p', '[BH2-]', '[N@@+]', '[CH]', 'Cl'}\n", "pair 0: c c -> cc (frequency 18211458)\n", "pair 1: C C -> CC (frequency 8249650)\n", "pair 2: O ) -> O) (frequency 6227992)\n", "pair 3: c 1 -> c1 (frequency 5519116)\n", "pair 4: c ( -> c( (frequency 5484200)\n", "pair 5: C ( -> C( (frequency 4992669)\n", "pair 6: = O) -> =O) (frequency 4801376)\n", "pair 7: c 2 -> c2 (frequency 4243337)\n", "pair 8: C ) -> C) (frequency 3892279)\n", "pair 9: cc cc -> cccc (frequency 2282308)\n", "pair 10: c 3 -> c3 (frequency 2240231)\n", "pair 11: C( =O) -> C(=O) (frequency 2157897)\n", "pair 12: cc c( -> ccc( (frequency 1936315)\n", "pair 13: ) cc -> )cc (frequency 1747627)\n", "pair 14: ( C) -> (C) (frequency 1448918)\n", "pair 15: ( =O) -> (=O) (frequency 1291445)\n", "pair 16: 2 ) -> 2) (frequency 1214511)\n", "pair 17: cc ( -> cc( (frequency 1067547)\n", "pair 18: C 1 -> C1 (frequency 999268)\n", "pair 19: F ) -> F) (frequency 949405)\n", "pair 20: N ( -> N( (frequency 909203)\n", "pair 21: CC CC -> CCCC (frequency 900906)\n", "pair 22: c1 ccc( -> c1ccc( (frequency 809039)\n", "pair 23: 3 ) -> 3) (frequency 762167)\n", "pair 24: C O -> CO (frequency 697590)\n", "pair 25: c n -> cn (frequency 690940)\n", "pair 26: c1 cccc -> c1cccc (frequency 660633)\n", "pair 27: c2 cccc -> c2cccc (frequency 650153)\n", "pair 28: c 4 -> c4 (frequency 646272)\n", "pair 29: C N -> CN (frequency 644462)\n", "pair 30: C(=O) N -> C(=O)N (frequency 640036)\n", "pair 31: O C) -> OC) (frequency 636494)\n", "pair 32: )cc 1 -> )cc1 (frequency 611642)\n", "pair 33: c2 ) -> c2) (frequency 587195)\n", "pair 34: C 2 -> C2 (frequency 565738)\n", "pair 35: n 1 -> n1 (frequency 551408)\n", "pair 36: n c( -> nc( (frequency 550884)\n", "pair 37: C( C) -> C(C) (frequency 544755)\n", "pair 38: c2 ccc( -> c2ccc( (frequency 525533)\n", "pair 39: N ) -> N) (frequency 520816)\n", "pair 40: [C@H] ( -> [C@H]( (frequency 502129)\n", "pair 41: [C@@H] ( -> [C@@H]( (frequency 474443)\n", "pair 42: c2 c( -> c2c( (frequency 470792)\n", "pair 43: = C( -> =C( (frequency 449000)\n", "pair 44: CC 1 -> CC1 (frequency 442435)\n", "pair 45: c1 ) -> c1) (frequency 420500)\n", "pair 46: Cl ) -> Cl) (frequency 408273)\n", "pair 47: c3 cccc -> c3cccc (frequency 407073)\n", "pair 48: N C(=O) -> NC(=O) (frequency 406302)\n", "pair 49: cc 1 -> cc1 (frequency 403784)\n", "pair 50: O CC -> OCC (frequency 394836)\n", "pair 51: c1 c( -> c1c( (frequency 393746)\n", "pair 52: c1 cc -> c1cc (frequency 380636)\n", "pair 53: c1 cc( -> c1cc( (frequency 368240)\n", "pair 54: = C -> =C (frequency 366518)\n", "pair 55: S (=O) -> S(=O) (frequency 359497)\n", "pair 56: CC N( -> CCN( (frequency 349959)\n", "pair 57: ( F) -> (F) (frequency 327156)\n", "pair 58: c3 ) -> c3) (frequency 325828)\n", "pair 59: N 1 -> N1 (frequency 324936)\n", "pair 60: n 2 -> n2 (frequency 311276)\n", "pair 61: = O -> =O (frequency 303468)\n", "pair 62: c3 ccc( -> c3ccc( (frequency 279481)\n", "pair 63: S(=O) (=O) -> S(=O)(=O) (frequency 274230)\n", "pair 64: c3 c( -> c3c( (frequency 272769)\n", "pair 65: CC (=O) -> CC(=O) (frequency 269198)\n", "pair 66: CC ( -> CC( (frequency 267950)\n", "pair 67: / C -> /C (frequency 265765)\n", "pair 68: c1cccc c1 -> c1ccccc1 (frequency 259176)\n", "pair 69: C( F) -> C(F) (frequency 255973)\n", "pair 70: F )cc -> F)cc (frequency 254670)\n", "pair 71: c2 cc -> c2cc (frequency 254382)\n", "pair 72: c2 n -> c2n (frequency 252775)\n", "pair 73: CC C -> CCC (frequency 251687)\n", "pair 74: N 2 -> N2 (frequency 244670)\n", "pair 75: 4 ) -> 4) (frequency 241111)\n", "pair 76: c2cccc c2) -> c2ccccc2) (frequency 240630)\n", "pair 77: O C -> OC (frequency 237477)\n", "pair 78: Cl )cc -> Cl)cc (frequency 236493)\n", "pair 79: O =C( -> O=C( (frequency 232953)\n", "pair 80: C(F) (F) -> C(F)(F) (frequency 232034)\n", "pair 81: c1 n -> c1n (frequency 231497)\n", "pair 82: c( =O) -> c(=O) (frequency 230218)\n", "pair 83: c2cccc c2 -> c2ccccc2 (frequency 227001)\n", "pair 84: (C) C) -> (C)C) (frequency 226747)\n", "pair 85: c2 cc( -> c2cc( (frequency 223953)\n", "pair 86: C( N -> C(N (frequency 221780)\n", "pair 87: c1 ( -> c1( (frequency 216532)\n", "pair 88: [C@H] 1 -> [C@H]1 (frequency 209173)\n", "pair 89: =C / -> =C/ (frequency 208304)\n", "pair 90: = N -> =N (frequency 203132)\n", "pair 91: n ( -> n( (frequency 196383)\n", "pair 92: [C@@H] 1 -> [C@@H]1 (frequency 194479)\n", "pair 93: C 3 -> C3 (frequency 184823)\n", "pair 94: CC ) -> CC) (frequency 184604)\n", "pair 95: CC O -> CCO (frequency 176493)\n", "pair 96: CC N -> CCN (frequency 172181)\n", "pair 97: CC 2) -> CC2) (frequency 169682)\n", "pair 98: 1 ) -> 1) (frequency 169274)\n", "pair 99: C # -> C# (frequency 168844)\n", "pair 100: c1 2 -> c12 (frequency 166507)\n", "pair 101: CC 2 -> CC2 (frequency 165845)\n", "pair 102: C N( -> CN( (frequency 165053)\n", "pair 103: c1cccc ( -> c1cccc( (frequency 164139)\n", "pair 104: c( - -> c(- (frequency 162371)\n", "pair 105: CC (C) -> CC(C) (frequency 162316)\n", "pair 106: c3cccc c3 -> c3ccccc3 (frequency 161531)\n", "pair 107: c( C) -> c(C) (frequency 161189)\n", "pair 108: cc 2) -> cc2) (frequency 159959)\n", "pair 109: N C( -> NC( (frequency 159644)\n", "pair 110: cc 2 -> cc2 (frequency 153764)\n", "pair 111: ( O) -> (O) (frequency 151514)\n", "pair 112: c1cccc c1) -> c1ccccc1) (frequency 150535)\n", "pair 113: [C@H] 2 -> [C@H]2 (frequency 150256)\n", "pair 114: C c1ccc( -> Cc1ccc( (frequency 145502)\n", "pair 115: c( OC) -> c(OC) (frequency 142967)\n", "pair 116: C(=O) O) -> C(=O)O) (frequency 141359)\n", "pair 117: c3cccc c3) -> c3ccccc3) (frequency 140996)\n", "pair 118: [C@@H] 2 -> [C@@H]2 (frequency 140864)\n", "pair 119: n 2) -> n2) (frequency 134043)\n", "pair 120: nc( - -> nc(- (frequency 133124)\n", "pair 121: C(C) C) -> C(C)C) (frequency 132429)\n", "pair 122: c4 cccc -> c4cccc (frequency 130758)\n", "pair 123: )cc1 ) -> )cc1) (frequency 129741)\n", "pair 124: O = -> O= (frequency 129726)\n", "pair 125: /C =C/ -> /C=C/ (frequency 129644)\n", "pair 126: c3 cc -> c3cc (frequency 129278)\n", "pair 127: C 2) -> C2) (frequency 127058)\n", "pair 128: CC C( -> CCC( (frequency 125932)\n", "pair 129: c 5 -> c5 (frequency 123649)\n", "pair 130: 3 )cc -> 3)cc (frequency 122865)\n", "pair 131: CO c1ccc( -> COc1ccc( (frequency 122389)\n", "pair 132: CN 1 -> CN1 (frequency 121906)\n", "pair 133: [O-] ) -> [O-]) (frequency 116630)\n", "pair 134: C O) -> CO) (frequency 115981)\n", "pair 135: C(F)(F) F) -> C(F)(F)F) (frequency 114809)\n", "pair 136: cc1 ) -> cc1) (frequency 112181)\n", "pair 137: [C@H]( O) -> [C@H](O) (frequency 110202)\n", "pair 138: n c2 -> nc2 (frequency 110040)\n", "pair 139: c( N -> c(N (frequency 107883)\n", "pair 140: [C@@H]( O) -> [C@@H](O) (frequency 105654)\n", "pair 141: C( O) -> C(O) (frequency 105631)\n", "pair 142: c3 cc( -> c3cc( (frequency 105600)\n", "pair 143: CC(=O) N -> CC(=O)N (frequency 105310)\n", "pair 144: c s -> cs (frequency 102497)\n", "pair 145: CC1 ) -> CC1) (frequency 100785)\n", "pair 146: / C( -> /C( (frequency 100715)\n", "pair 147: CC 3) -> CC3) (frequency 100572)\n", "pair 148: [N+] (=O) -> [N+](=O) (frequency 100462)\n", "pair 149: C S -> CS (frequency 99001)\n", "pair 150: c4cccc c4 -> c4ccccc4 (frequency 98889)\n", "pair 151: c1cc 2 -> c1cc2 (frequency 95532)\n", "pair 152: c3 n -> c3n (frequency 95323)\n", "pair 153: c( O) -> c(O) (frequency 94708)\n", "pair 154: 2 )cc1 -> 2)cc1 (frequency 94391)\n", "pair 155: CC CN -> CCCN (frequency 93841)\n", "pair 156: nc( N -> nc(N (frequency 93379)\n", "pair 157: CC OCC -> CCOCC (frequency 92351)\n", "pair 158: cc 3) -> cc3) (frequency 91102)\n", "pair 159: (C) C -> (C)C (frequency 89321)\n", "pair 160: (=O) =O) -> (=O)=O) (frequency 88764)\n", "pair 161: n 3 -> n3 (frequency 88708)\n", "pair 162: C1 ) -> C1) (frequency 88307)\n", "pair 163: n c1 -> nc1 (frequency 88233)\n", "pair 164: S ) -> S) (frequency 86827)\n", "pair 165: C# N) -> C#N) (frequency 86567)\n", "pair 166: cc c1 -> ccc1 (frequency 86461)\n", "pair 167: / N -> /N (frequency 85299)\n", "pair 168: )cc ( -> )cc( (frequency 84902)\n", "pair 169: C(=O) O -> C(=O)O (frequency 82642)\n", "pair 170: [C@H] 3 -> [C@H]3 (frequency 81079)\n", "pair 171: CCCC CCCC -> CCCCCCCC (frequency 80870)\n", "pair 172: ( N -> (N (frequency 80616)\n", "pair 173: c1 cn -> c1cn (frequency 80579)\n", "pair 174: C 3) -> C3) (frequency 78984)\n", "pair 175: Br ) -> Br) (frequency 78824)\n", "pair 176: )cc c1 -> )ccc1 (frequency 78676)\n", "pair 177: c2 c1 -> c2c1 (frequency 78167)\n", "pair 178: O=C( N -> O=C(N (frequency 78038)\n", "pair 179: cccc 1 -> cccc1 (frequency 77651)\n", "pair 180: [C@@H] 3 -> [C@@H]3 (frequency 75777)\n", "pair 181: c2cccc ( -> c2cccc( (frequency 75146)\n", "pair 182: C(C) (C)C) -> C(C)(C)C) (frequency 74628)\n", "pair 183: c1cc c2c( -> c1ccc2c( (frequency 74249)\n", "pair 184: c4 ccc( -> c4ccc( (frequency 72885)\n", "pair 185: CN 2 -> CN2 (frequency 72838)\n", "pair 186: C = -> C= (frequency 72534)\n", "pair 187: - c2ccc( -> -c2ccc( (frequency 71614)\n", "pair 188: - c2 -> -c2 (frequency 71436)\n", "pair 189: C(C) =O) -> C(C)=O) (frequency 70891)\n", "pair 190: c( Cl) -> c(Cl) (frequency 70721)\n", "pair 191: CO C(=O) -> COC(=O) (frequency 70622)\n", "pair 192: )cc 2) -> )cc2) (frequency 69730)\n", "pair 193: C [C@H]( -> C[C@H]( (frequency 68108)\n", "pair 194: n (C) -> n(C) (frequency 68090)\n", "pair 195: C( N) -> C(N) (frequency 67942)\n", "pair 196: =C \\ -> =C\\ (frequency 67259)\n", "pair 197: n1 ) -> n1) (frequency 67237)\n", "pair 198: C(C) (C) -> C(C)(C) (frequency 67092)\n", "pair 199: cc 3 -> cc3 (frequency 66177)\n", "pair 200: c o -> co (frequency 66036)\n", "pair 201: S ( -> S( (frequency 65989)\n", "pair 202: OCC ) -> OCC) (frequency 65754)\n", "pair 203: CC N1 -> CCN1 (frequency 65698)\n", "pair 204: c [nH] -> c[nH] (frequency 65648)\n", "pair 205: C( = -> C(= (frequency 65368)\n", "pair 206: n cn -> ncn (frequency 65330)\n", "pair 207: C n1 -> Cn1 (frequency 65186)\n", "pair 208: C(F)(F) F)cc -> C(F)(F)F)cc (frequency 65180)\n", "pair 209: O C(=O) -> OC(=O) (frequency 65104)\n", "pair 210: [C@H] (C) -> [C@H](C) (frequency 63957)\n", "pair 211: ( [O-]) -> ([O-]) (frequency 63633)\n", "pair 212: C c2ccc( -> Cc2ccc( (frequency 62927)\n", "pair 213: [C@@H] (C) -> [C@@H](C) (frequency 62506)\n", "pair 214: c4 c( -> c4c( (frequency 62028)\n", "pair 215: OC) c(OC) -> OC)c(OC) (frequency 61970)\n", "pair 216: C(=O)N 1 -> C(=O)N1 (frequency 61470)\n", "pair 217: )cc 2 -> )cc2 (frequency 61140)\n", "pair 218: CN C(=O) -> CNC(=O) (frequency 61100)\n", "pair 219: nc( C) -> nc(C) (frequency 60335)\n", "pair 220: 2) c1 -> 2)c1 (frequency 59312)\n", "pair 221: =O) cc1 -> =O)cc1 (frequency 59055)\n", "pair 222: S(=O)(=O) N -> S(=O)(=O)N (frequency 57859)\n", "pair 223: CC 3 -> CC3 (frequency 57821)\n", "pair 224: C [C@@H]( -> C[C@@H]( (frequency 57434)\n", "pair 225: C(=O) N( -> C(=O)N( (frequency 57100)\n", "pair 226: Cl )cc1 -> Cl)cc1 (frequency 56454)\n", "pair 227: n n -> nn (frequency 56025)\n", "pair 228: C c1 -> Cc1 (frequency 55032)\n", "pair 229: O= C1 -> O=C1 (frequency 55025)\n", "pair 230: C(=O)N 2 -> C(=O)N2 (frequency 54545)\n", "pair 231: S(=O) ( -> S(=O)( (frequency 54462)\n", "pair 232: C c2ccccc2) -> Cc2ccccc2) (frequency 54410)\n", "pair 233: O CO -> OCO (frequency 53760)\n", "pair 234: C 4 -> C4 (frequency 53749)\n", "pair 235: CN (C) -> CN(C) (frequency 53233)\n", "pair 236: C1 =O -> C1=O (frequency 53056)\n", "pair 237: c4 ) -> c4) (frequency 52771)\n", "pair 238: =N / -> =N/ (frequency 52520)\n", "pair 239: C c1cc -> Cc1cc (frequency 52460)\n", "pair 240: c2 cn -> c2cn (frequency 52036)\n", "pair 241: 5 ) -> 5) (frequency 52008)\n", "pair 242: cccc 2 -> cccc2 (frequency 51927)\n", "pair 243: cc( - -> cc(- (frequency 51765)\n", "pair 244: )cc 3) -> )cc3) (frequency 51346)\n", "pair 245: [N+](=O) [O-] -> [N+](=O)[O-] (frequency 51172)\n", "pair 246: C1 ( -> C1( (frequency 51027)\n", "pair 247: c1ccc2c( c1) -> c1ccc2c(c1) (frequency 50993)\n", "pair 248: F )cc1 -> F)cc1 (frequency 50930)\n", "pair 249: C c1cc( -> Cc1cc( (frequency 50630)\n", "pair 250: c1cccc 2 -> c1cccc2 (frequency 50182)\n", "pair 251: c1cc2 c( -> c1cc2c( (frequency 49944)\n", "pair 252: n 3) -> n3) (frequency 49345)\n", "pair 253: CO c1cc( -> COc1cc( (frequency 49240)\n", "pair 254: CC C) -> CCC) (frequency 49232)\n", "pair 255: c2cc 3 -> c2cc3 (frequency 49134)\n", "pair 256: cc (C) -> cc(C) (frequency 48608)\n", "pair 257: C(O) =O) -> C(O)=O) (frequency 48480)\n", "pair 258: C c1ccccc1) -> Cc1ccccc1) (frequency 48439)\n", "pair 259: [C@] 1 -> [C@]1 (frequency 48307)\n", "pair 260: n c3 -> nc3 (frequency 48167)\n", "pair 261: - c2n -> -c2n (frequency 47565)\n", "pair 262: c2ccc( Cl)cc -> c2ccc(Cl)cc (frequency 47002)\n", "pair 263: [N+](=O) [O-]) -> [N+](=O)[O-]) (frequency 46796)\n", "pair 264: O C( -> OC( (frequency 46787)\n", "pair 265: CCN( C(=O) -> CCN(C(=O) (frequency 46707)\n", "pair 266: CC(C) (C) -> CC(C)(C) (frequency 46663)\n", "pair 267: [C@@] 2 -> [C@@]2 (frequency 46650)\n", "pair 268: CC C1 -> CCC1 (frequency 46497)\n", "pair 269: N 3 -> N3 (frequency 46477)\n", "pair 270: =O) c1 -> =O)c1 (frequency 46474)\n", "pair 271: cccc 2) -> cccc2) (frequency 46444)\n", "pair 272: [N+] ([O-]) -> [N+]([O-]) (frequency 45915)\n", "pair 273: = C2 -> =C2 (frequency 44895)\n", "pair 274: C# N -> C#N (frequency 44689)\n", "pair 275: c( F) -> c(F) (frequency 44622)\n", "pair 276: [C@] 2 -> [C@]2 (frequency 44488)\n", "pair 277: n( - -> n(- (frequency 44106)\n", "pair 278: s 1 -> s1 (frequency 44022)\n", "pair 279: O 1 -> O1 (frequency 43717)\n", "pair 280: CC C2 -> CCC2 (frequency 43406)\n", "pair 281: o 1 -> o1 (frequency 43102)\n", "pair 282: C c1ccccc1 -> Cc1ccccc1 (frequency 42896)\n", "pair 283: cc c2 -> ccc2 (frequency 42474)\n", "pair 284: (C) CC -> (C)CC (frequency 42393)\n", "pair 285: c( N) -> c(N) (frequency 42243)\n", "pair 286: [C@@] 1 -> [C@@]1 (frequency 41833)\n", "pair 287: CC( O) -> CC(O) (frequency 41819)\n", "pair 288: CCO C(=O) -> CCOC(=O) (frequency 41669)\n", "pair 289: N C(=O)N -> NC(=O)N (frequency 41603)\n", "pair 290: C c1n -> Cc1n (frequency 41558)\n", "pair 291: c4ccccc4 ) -> c4ccccc4) (frequency 41168)\n", "pair 292: cccc 3) -> cccc3) (frequency 40926)\n", "pair 293: c1 c2c( -> c1c2c( (frequency 40693)\n", "pair 294: cn 1 -> cn1 (frequency 40110)\n", "pair 295: F)cc 2) -> F)cc2) (frequency 40040)\n", "pair 296: C c1c( -> Cc1c( (frequency 40024)\n", "pair 297: CC (C)C) -> CC(C)C) (frequency 39921)\n", "pair 298: nc( N) -> nc(N) (frequency 39584)\n", "pair 299: C [C@H]1 -> C[C@H]1 (frequency 39536)\n", "pair 300: O=C( O) -> O=C(O) (frequency 39483)\n", "pair 301: N =C( -> N=C( (frequency 39228)\n", "pair 302: c3cccc ( -> c3cccc( (frequency 38805)\n", "pair 303: C1 CCCC -> C1CCCC (frequency 38752)\n", "pair 304: = C1 -> =C1 (frequency 38014)\n", "pair 305: C n2 -> Cn2 (frequency 37935)\n", "pair 306: = S) -> =S) (frequency 37869)\n", "pair 307: c2cc c3c( -> c2ccc3c( (frequency 37352)\n", "pair 308: CC( N -> CC(N (frequency 36887)\n", "pair 309: - n2 -> -n2 (frequency 36859)\n", "pair 310: CCN( C -> CCN(C (frequency 36489)\n", "pair 311: c3ccc( Cl)cc -> c3ccc(Cl)cc (frequency 36395)\n", "pair 312: c2 )cc1 -> c2)cc1 (frequency 36098)\n", "pair 313: Cl) c( -> Cl)c( (frequency 36044)\n", "pair 314: c(=O) [nH] -> c(=O)[nH] (frequency 35947)\n", "pair 315: c3 cn -> c3cn (frequency 35870)\n", "pair 316: C(N) =O) -> C(N)=O) (frequency 35695)\n", "pair 317: c3ccc( F)cc -> c3ccc(F)cc (frequency 35482)\n", "pair 318: P (=O) -> P(=O) (frequency 35349)\n", "pair 319: ( C -> (C (frequency 35310)\n", "pair 320: N (C) -> N(C) (frequency 35240)\n", "pair 321: n2 )cc1 -> n2)cc1 (frequency 34920)\n", "pair 322: c1c( C) -> c1c(C) (frequency 34910)\n", "pair 323: CCC N( -> CCCN( (frequency 34876)\n", "pair 324: c2 [nH] -> c2[nH] (frequency 34700)\n", "pair 325: CCCC C2) -> CCCCC2) (frequency 34548)\n", "pair 326: c2c( C) -> c2c(C) (frequency 34181)\n", "pair 327: CC [C@H]( -> CC[C@H]( (frequency 34124)\n", "pair 328: 3)cc 2) -> 3)cc2) (frequency 34034)\n", "pair 329: c2 s -> c2s (frequency 34018)\n", "pair 330: O [C@H]( -> O[C@H]( (frequency 33808)\n", "pair 331: C [C@@H]1 -> C[C@@H]1 (frequency 33703)\n", "pair 332: OC) c1 -> OC)c1 (frequency 33643)\n", "pair 333: C) cc1 -> C)cc1 (frequency 33444)\n", "pair 334: C2 =O) -> C2=O) (frequency 33402)\n", "pair 335: c4 cc -> c4cc (frequency 33377)\n", "pair 336: CC CO -> CCCO (frequency 33150)\n", "pair 337: c2) c1 -> c2)c1 (frequency 32647)\n", "pair 338: N S(=O)(=O) -> NS(=O)(=O) (frequency 32553)\n", "pair 339: Cl)cc ( -> Cl)cc( (frequency 32421)\n", "pair 340: c12 c( -> c12c( (frequency 32332)\n", "pair 341: =N \\ -> =N\\ (frequency 32225)\n", "pair 342: c2 3) -> c23) (frequency 32212)\n", "pair 343: c1 nc( -> c1nc( (frequency 32042)\n", "pair 344: /N =C/ -> /N=C/ (frequency 31998)\n", "pair 345: C) =O) -> C)=O) (frequency 31970)\n", "pair 346: N # -> N# (frequency 31453)\n", "pair 347: cccc c1 -> ccccc1 (frequency 31412)\n", "pair 348: 3 )cc1 -> 3)cc1 (frequency 31313)\n", "pair 349: N2 CCN( -> N2CCN( (frequency 31177)\n", "pair 350: N1 CCN( -> N1CCN( (frequency 30888)\n", "pair 351: CC(=O) O) -> CC(=O)O) (frequency 30723)\n", "pair 352: =C( \\ -> =C(\\ (frequency 30560)\n", "pair 353: OCC (=O) -> OCC(=O) (frequency 30548)\n", "pair 354: c1 =O -> c1=O (frequency 30534)\n", "pair 355: (C) ( -> (C)( (frequency 30349)\n", "pair 356: CC 4) -> CC4) (frequency 30347)\n", "pair 357: CC2 )cc1 -> CC2)cc1 (frequency 30221)\n", "pair 358: CCN (C) -> CCN(C) (frequency 30111)\n", "pair 359: c1 - -> c1- (frequency 30074)\n", "pair 360: n cc -> ncc (frequency 30071)\n", "pair 361: c2ccc( - -> c2ccc(- (frequency 29759)\n", "pair 362: [C@] 3 -> [C@]3 (frequency 29542)\n", "pair 363: S (C) -> S(C) (frequency 29483)\n", "pair 364: O ( -> O( (frequency 29066)\n", "pair 365: C c1cccc( -> Cc1cccc( (frequency 28749)\n", "pair 366: CC NC(=O) -> CCNC(=O) (frequency 28698)\n", "pair 367: N( C(=O) -> N(C(=O) (frequency 28654)\n", "pair 368: c2 1 -> c21 (frequency 28649)\n", "pair 369: c2ccc(Cl)cc 2) -> c2ccc(Cl)cc2) (frequency 28351)\n", "pair 370: c1 s -> c1s (frequency 28300)\n", "pair 371: cc c3 -> ccc3 (frequency 28033)\n", "pair 372: CN 3 -> CN3 (frequency 27961)\n", "pair 373: CCCC ) -> CCCC) (frequency 27927)\n", "pair 374: cc c2) -> ccc2) (frequency 27875)\n", "pair 375: F )cc1) -> F)cc1) (frequency 27581)\n", "pair 376: cn 2 -> cn2 (frequency 27472)\n", "pair 377: CC 4 -> CC4 (frequency 27305)\n", "pair 378: C(=O)N [C@@H]( -> C(=O)N[C@@H]( (frequency 27150)\n", "pair 379: c4 cc( -> c4cc( (frequency 27134)\n", "pair 380: C 4) -> C4) (frequency 26923)\n", "pair 381: Cl )cc1) -> Cl)cc1) (frequency 26889)\n", "pair 382: [N+]([O-]) =O) -> [N+]([O-])=O) (frequency 26862)\n", "pair 383: CC O) -> CCO) (frequency 26859)\n", "pair 384: [C@H] 4 -> [C@H]4 (frequency 26830)\n", "pair 385: c( S -> c(S (frequency 26823)\n", "pair 386: CC S -> CCS (frequency 26755)\n", "pair 387: CC CN1 -> CCCN1 (frequency 26720)\n", "pair 388: Cl) c1 -> Cl)c1 (frequency 26691)\n", "pair 389: c2 o -> c2o (frequency 26678)\n", "pair 390: [C@@H] 4 -> [C@@H]4 (frequency 26666)\n", "pair 391: C(=O) OC) -> C(=O)OC) (frequency 26659)\n", "pair 392: c2 nc( -> c2nc( (frequency 26614)\n", "pair 393: c1ccc( Cl)cc1 -> c1ccc(Cl)cc1 (frequency 26597)\n", "pair 394: CCCC 1 -> CCCC1 (frequency 26530)\n", "pair 395: n n1 -> nn1 (frequency 26488)\n", "pair 396: c2ccc3c( c2) -> c2ccc3c(c2) (frequency 26486)\n", "pair 397: C [C@H]2 -> C[C@H]2 (frequency 26481)\n", "pair 398: n o -> no (frequency 26474)\n", "pair 399: c2ccccc2 1 -> c2ccccc21 (frequency 26403)\n", "pair 400: [C@@] 3 -> [C@@]3 (frequency 26395)\n", "pair 401: C2 ( -> C2( (frequency 26356)\n", "pair 402: c5 cccc -> c5cccc (frequency 26332)\n", "pair 403: CC n1 -> CCn1 (frequency 26320)\n", "pair 404: C) cc2) -> C)cc2) (frequency 26142)\n", "pair 405: CC N2 -> CCN2 (frequency 25965)\n", "pair 406: c1ccc( N -> c1ccc(N (frequency 25935)\n", "pair 407: 3)cc 2 -> 3)cc2 (frequency 25879)\n", "pair 408: N) N -> N)N (frequency 25810)\n", "pair 409: cccc 3 -> cccc3 (frequency 25674)\n", "pair 410: cccc 1) -> cccc1) (frequency 25498)\n", "pair 411: CCCC 2) -> CCCC2) (frequency 25385)\n", "pair 412: c2cc( - -> c2cc(- (frequency 25381)\n", "pair 413: c( C(=O)N -> c(C(=O)N (frequency 25209)\n", "pair 414: OC) =O) -> OC)=O) (frequency 25134)\n", "pair 415: c2cc3 c( -> c2cc3c( (frequency 25126)\n", "pair 416: c3ccc(F)cc 3) -> c3ccc(F)cc3) (frequency 25107)\n", "pair 417: C [C@@H]2 -> C[C@@H]2 (frequency 24925)\n", "pair 418: C2 =O -> C2=O (frequency 24894)\n", "pair 419: /C =C\\ -> /C=C\\ (frequency 24867)\n", "pair 420: n( C -> n(C (frequency 24644)\n", "pair 421: S(C) (=O)=O) -> S(C)(=O)=O) (frequency 24560)\n", "pair 422: cn 2) -> cn2) (frequency 24543)\n", "pair 423: cn c1 -> cnc1 (frequency 24448)\n", "pair 424: Cl) c(Cl) -> Cl)c(Cl) (frequency 24437)\n", "pair 425: c2ccc( OC) -> c2ccc(OC) (frequency 24407)\n", "pair 426: N1 ( -> N1( (frequency 24318)\n", "pair 427: c2cc cn -> c2cccn (frequency 24209)\n", "pair 428: c2 c3c( -> c2c3c( (frequency 24101)\n", "pair 429: cc c1) -> ccc1) (frequency 24068)\n", "pair 430: O) cc1 -> O)cc1 (frequency 24024)\n", "pair 431: ( - -> (- (frequency 23964)\n", "pair 432: c1ccc( - -> c1ccc(- (frequency 23963)\n", "pair 433: c2ccccc2 )cc1 -> c2ccccc2)cc1 (frequency 23787)\n", "pair 434: cn c3 -> cnc3 (frequency 23745)\n", "pair 435: c3ccc(Cl)cc 3) -> c3ccc(Cl)cc3) (frequency 23640)\n", "pair 436: c(C) c1 -> c(C)c1 (frequency 23538)\n", "pair 437: n [nH] -> n[nH] (frequency 23470)\n", "pair 438: )cc 3 -> )cc3 (frequency 23455)\n", "pair 439: OC) cc( -> OC)cc( (frequency 23444)\n", "pair 440: CCOCC 2) -> CCOCC2) (frequency 23440)\n", "pair 441: N c1n -> Nc1n (frequency 23423)\n", "pair 442: c2 =O) -> c2=O) (frequency 23410)\n", "pair 443: = C(C) -> =C(C) (frequency 23234)\n", "pair 444: CO c1cccc( -> COc1cccc( (frequency 23019)\n", "pair 445: O C(C)=O) -> OC(C)=O) (frequency 23006)\n", "pair 446: CC C(N -> CCC(N (frequency 23004)\n", "pair 447: c2ccc( F)cc2) -> c2ccc(F)cc2) (frequency 22996)\n", "pair 448: c2cc n -> c2ccn (frequency 22965)\n", "pair 449: F) c( -> F)c( (frequency 22957)\n", "pair 450: c2 nc(- -> c2nc(- (frequency 22849)\n", "pair 451: P(=O) (O) -> P(=O)(O) (frequency 22787)\n", "pair 452: N( C( -> N(C( (frequency 22778)\n", "pair 453: CO c1ccccc1 -> COc1ccccc1 (frequency 22761)\n", "pair 454: N C -> NC (frequency 22752)\n", "pair 455: CCCC C3) -> CCCCC3) (frequency 22634)\n", "pair 456: CCN( CC) -> CCN(CC) (frequency 22621)\n", "pair 457: n cc1 -> ncc1 (frequency 22619)\n", "pair 458: OCC O -> OCCO (frequency 22520)\n", "pair 459: c2) =O) -> c2)=O) (frequency 22518)\n", "pair 460: s 2) -> s2) (frequency 22407)\n", "pair 461: O [C@@H]( -> O[C@@H]( (frequency 22404)\n", "pair 462: n 4 -> n4 (frequency 22271)\n", "pair 463: C(= S) -> C(=S) (frequency 22216)\n", "pair 464: /C =N/ -> /C=N/ (frequency 22152)\n", "pair 465: CC C(=O)N -> CCC(=O)N (frequency 22082)\n", "pair 466: O) c( -> O)c( (frequency 21944)\n", "pair 467: CCOCC 3) -> CCOCC3) (frequency 21941)\n", "pair 468: N (C)C) -> N(C)C) (frequency 21936)\n", "pair 469: C1 2 -> C12 (frequency 21921)\n", "pair 470: C2 )cc1 -> C2)cc1 (frequency 21806)\n", "pair 471: c2 n1 -> c2n1 (frequency 21802)\n", "pair 472: CCCC 3) -> CCCC3) (frequency 21766)\n", "pair 473: CC = -> CC= (frequency 21746)\n", "pair 474: nc( S -> nc(S (frequency 21458)\n", "pair 475: CN( C(=O) -> CN(C(=O) (frequency 21429)\n", "pair 476: N( C -> N(C (frequency 21378)\n", "pair 477: n2) c1 -> n2)c1 (frequency 21321)\n", "pair 478: c1ccc( OC) -> c1ccc(OC) (frequency 21307)\n", "pair 479: C =C -> C=C (frequency 21160)\n", "pair 480: =O) =O) -> =O)=O) (frequency 20994)\n", "pair 481: [nH] 1 -> [nH]1 (frequency 20829)\n", "pair 482: /C( =C/ -> /C(=C/ (frequency 20780)\n", "pair 483: c3ccc( OC) -> c3ccc(OC) (frequency 20588)\n", "pair 484: CC [C@@H]( -> CC[C@@H]( (frequency 20552)\n", "pair 485: /C( =C\\ -> /C(=C\\ (frequency 20531)\n", "pair 486: cc 4 -> cc4 (frequency 20494)\n", "pair 487: CN C( -> CNC( (frequency 20469)\n", "pair 488: O c1ccc( -> Oc1ccc( (frequency 20385)\n", "pair 489: c1 [nH] -> c1[nH] (frequency 20243)\n", "pair 490: C1 =O) -> C1=O) (frequency 20080)\n", "pair 491: c3ccc( C) -> c3ccc(C) (frequency 20065)\n", "pair 492: c2cc (C) -> c2cc(C) (frequency 20053)\n", "pair 493: CCC 3 -> CCC3 (frequency 19992)\n", "pair 494: c5cccc c5 -> c5ccccc5 (frequency 19949)\n", "pair 495: c2 cc1 -> c2cc1 (frequency 19855)\n", "pair 496: CC2) c1 -> CC2)c1 (frequency 19838)\n", "pair 497: O) c(O) -> O)c(O) (frequency 19807)\n", "pair 498: CCCC 2 -> CCCC2 (frequency 19581)\n", "pair 499: c1cc (C) -> c1cc(C) (frequency 19560)\n", "pair 500: c1ccc( O -> c1ccc(O (frequency 19522)\n", "pair 501: C2 =N -> C2=N (frequency 19430)\n", "pair 502: cc2 )cc1 -> cc2)cc1 (frequency 19242)\n", "pair 503: F)cc 2 -> F)cc2 (frequency 19182)\n", "pair 504: c2n ( -> c2n( (frequency 19129)\n", "pair 505: C1 CCN( -> C1CCN( (frequency 19118)\n", "pair 506: c3cc 4 -> c3cc4 (frequency 18993)\n", "pair 507: c2cccc c12 -> c2ccccc12 (frequency 18990)\n", "pair 508: cc 4) -> cc4) (frequency 18979)\n", "pair 509: N) =O) -> N)=O) (frequency 18962)\n", "pair 510: [C@]1 2 -> [C@]12 (frequency 18898)\n", "pair 511: NC(=O) [C@H]( -> NC(=O)[C@H]( (frequency 18782)\n", "pair 512: c1 (C) -> c1(C) (frequency 18776)\n", "pair 513: C1 CN( -> C1CN( (frequency 18773)\n", "pair 514: C c3ccccc3) -> Cc3ccccc3) (frequency 18756)\n", "pair 515: c1 nc(- -> c1nc(- (frequency 18751)\n", "pair 516: OC)c(OC) c(OC) -> OC)c(OC)c(OC) (frequency 18489)\n", "pair 517: [C@] 4 -> [C@]4 (frequency 18464)\n", "pair 518: Cl)cc 2) -> Cl)cc2) (frequency 18439)\n", "pair 519: N2 C(=O) -> N2C(=O) (frequency 18406)\n", "pair 520: C3 =O) -> C3=O) (frequency 18381)\n", "pair 521: C) c1 -> C)c1 (frequency 18302)\n", "pair 522: c2cccc 3 -> c2cccc3 (frequency 18288)\n", "pair 523: c2c( - -> c2c(- (frequency 18283)\n", "pair 524: N c1ccc( -> Nc1ccc( (frequency 18195)\n", "pair 525: CCCC C1 -> CCCCC1 (frequency 18188)\n", "pair 526: C( O -> C(O (frequency 18122)\n", "pair 527: S(=O) (N -> S(=O)(N (frequency 18039)\n", "pair 528: CC(=O) O -> CC(=O)O (frequency 18004)\n", "pair 529: cc c3) -> ccc3) (frequency 17995)\n", "pair 530: CC C(C) -> CCC(C) (frequency 17984)\n", "pair 531: C S(=O)(=O) -> CS(=O)(=O) (frequency 17979)\n", "pair 532: c1c( O) -> c1c(O) (frequency 17733)\n", "pair 533: c3 [nH] -> c3[nH] (frequency 17651)\n", "pair 534: OCC) =O) -> OCC)=O) (frequency 17594)\n", "pair 535: OC) c( -> OC)c( (frequency 17583)\n", "pair 536: C(=O) OC -> C(=O)OC (frequency 17580)\n", "pair 537: [C@@H]1 O -> [C@@H]1O (frequency 17527)\n", "pair 538: c2n cn -> c2ncn (frequency 17473)\n", "pair 539: c 6 -> c6 (frequency 17473)\n", "pair 540: s 1) -> s1) (frequency 17439)\n", "pair 541: C1 =C( -> C1=C( (frequency 17406)\n", "pair 542: CCCN 2 -> CCCN2 (frequency 17353)\n", "pair 543: C [C@@H](O) -> C[C@@H](O) (frequency 17322)\n", "pair 544: CCC 2) -> CCC2) (frequency 17306)\n", "pair 545: (=O) =O -> (=O)=O (frequency 17298)\n", "pair 546: o c( -> oc( (frequency 17243)\n", "pair 547: ( O -> (O (frequency 17167)\n", "pair 548: N( CC) -> N(CC) (frequency 17163)\n", "pair 549: c(=O) n( -> c(=O)n( (frequency 17135)\n", "pair 550: n n2 -> nn2 (frequency 17115)\n", "pair 551: /C =C -> /C=C (frequency 17101)\n", "pair 552: c2c( =O) -> c2c(=O) (frequency 17094)\n", "pair 553: C [C@H](O) -> C[C@H](O) (frequency 17090)\n", "pair 554: -c2 cn -> -c2cn (frequency 17087)\n", "pair 555: c2c( cccc2) -> c2c(cccc2) (frequency 16982)\n", "pair 556: c3c( C) -> c3c(C) (frequency 16944)\n", "pair 557: c1 O -> c1O (frequency 16935)\n", "pair 558: - c2ccccc2) -> -c2ccccc2) (frequency 16865)\n", "pair 559: - c1ccc( -> -c1ccc( (frequency 16833)\n", "pair 560: NC( =S) -> NC(=S) (frequency 16823)\n", "pair 561: C(C) C -> C(C)C (frequency 16818)\n", "pair 562: CCO CC1 -> CCOCC1 (frequency 16736)\n", "pair 563: =C( / -> =C(/ (frequency 16688)\n", "pair 564: C(=O)N C -> C(=O)NC (frequency 16638)\n", "pair 565: c3c( cccc3) -> c3c(cccc3) (frequency 16609)\n", "pair 566: ( C(F)(F)F) -> (C(F)(F)F) (frequency 16533)\n", "pair 567: [n+] 1 -> [n+]1 (frequency 16532)\n", "pair 568: CC1 (C) -> CC1(C) (frequency 16489)\n", "pair 569: cc( O) -> cc(O) (frequency 16325)\n", "pair 570: - c2cc( -> -c2cc( (frequency 16295)\n", "pair 571: N 4 -> N4 (frequency 16276)\n", "pair 572: O 2) -> O2) (frequency 16229)\n", "pair 573: C( OC)=O) -> C(OC)=O) (frequency 16197)\n", "pair 574: CCCC (=O) -> CCCC(=O) (frequency 16190)\n", "pair 575: - 2 -> -2 (frequency 16189)\n", "pair 576: cn 3) -> cn3) (frequency 16182)\n", "pair 577: c3ccc( - -> c3ccc(- (frequency 16166)\n", "pair 578: = N) -> =N) (frequency 16129)\n", "pair 579: CC # -> CC# (frequency 16104)\n", "pair 580: c2) CC1 -> c2)CC1 (frequency 16084)\n", "pair 581: [C@@H](O) [C@H](O) -> [C@@H](O)[C@H](O) (frequency 16044)\n", "pair 582: C(=O)N [C@H]( -> C(=O)N[C@H]( (frequency 15973)\n", "pair 583: c( NC(=O) -> c(NC(=O) (frequency 15940)\n", "pair 584: c1cc2c( cc1 -> c1cc2c(cc1 (frequency 15880)\n", "pair 585: c2 nc(N -> c2nc(N (frequency 15866)\n", "pair 586: C c1cn -> Cc1cn (frequency 15825)\n", "pair 587: C2 CC2) -> C2CC2) (frequency 15787)\n", "pair 588: O 2 -> O2 (frequency 15782)\n", "pair 589: C(=O)N 3 -> C(=O)N3 (frequency 15759)\n", "pair 590: N1 C(=O) -> N1C(=O) (frequency 15723)\n", "pair 591: [C@@H]( CO) -> [C@@H](CO) (frequency 15690)\n", "pair 592: c1ccc( Cl)cc1) -> c1ccc(Cl)cc1) (frequency 15687)\n", "pair 593: c3 s -> c3s (frequency 15617)\n", "pair 594: /C( =N\\ -> /C(=N\\ (frequency 15586)\n", "pair 595: C =C( -> C=C( (frequency 15574)\n", "pair 596: S(=O)(=O) N2 -> S(=O)(=O)N2 (frequency 15515)\n", "pair 597: CCO c1ccc( -> CCOc1ccc( (frequency 15493)\n", "pair 598: /C =C2 -> /C=C2 (frequency 15419)\n", "pair 599: (F) (F) -> (F)(F) (frequency 15324)\n", "pair 600: [C@H]1 O -> [C@H]1O (frequency 15278)\n", "pair 601: CC [C@H]2 -> CC[C@H]2 (frequency 15252)\n", "pair 602: cs 1 -> cs1 (frequency 15242)\n", "pair 603: c2ccc(OC) cc2) -> c2ccc(OC)cc2) (frequency 15197)\n", "pair 604: c3 o -> c3o (frequency 15118)\n", "pair 605: C1 C( -> C1C( (frequency 15118)\n", "pair 606: n c2) -> nc2) (frequency 15116)\n", "pair 607: c2cc co -> c2ccco (frequency 15081)\n", "pair 608: cs 2) -> cs2) (frequency 15070)\n", "pair 609: c2c( O) -> c2c(O) (frequency 15055)\n", "pair 610: F)cc ( -> F)cc( (frequency 15035)\n", "pair 611: [C@]2 (C) -> [C@]2(C) (frequency 14968)\n", "pair 612: [C@@] 4 -> [C@@]4 (frequency 14952)\n", "pair 613: [C@@]2 (C) -> [C@@]2(C) (frequency 14910)\n", "pair 614: O[C@H]( CO) -> O[C@H](CO) (frequency 14905)\n", "pair 615: c1c( Cl) -> c1c(Cl) (frequency 14896)\n", "pair 616: c2cc( Cl)cc -> c2cc(Cl)cc (frequency 14885)\n", "pair 617: Cl)cc 2 -> Cl)cc2 (frequency 14878)\n", "pair 618: CCCC ( -> CCCC( (frequency 14806)\n", "pair 619: ccc( - -> ccc(- (frequency 14790)\n", "pair 620: C1 N( -> C1N( (frequency 14752)\n", "pair 621: c1 C -> c1C (frequency 14751)\n", "pair 622: c2cccc( Cl) -> c2cccc(Cl) (frequency 14720)\n", "pair 623: Cc1cc (C) -> Cc1cc(C) (frequency 14703)\n", "pair 624: C1 = -> C1= (frequency 14671)\n", "pair 625: cn 1) -> cn1) (frequency 14654)\n", "pair 626: CC(=O) N1 -> CC(=O)N1 (frequency 14640)\n", "pair 627: n1 c( -> n1c( (frequency 14628)\n", "pair 628: c1ccc( F)cc1 -> c1ccc(F)cc1 (frequency 14609)\n", "pair 629: cccc 4) -> cccc4) (frequency 14584)\n", "pair 630: c1 2) -> c12) (frequency 14572)\n", "pair 631: S( N) -> S(N) (frequency 14514)\n", "pair 632: CN1 CCN( -> CN1CCN( (frequency 14490)\n", "pair 633: CCC( O) -> CCC(O) (frequency 14468)\n", "pair 634: C3 CC3) -> C3CC3) (frequency 14452)\n", "pair 635: c3cc c4c( -> c3ccc4c( (frequency 14447)\n", "pair 636: c1c( OC) -> c1c(OC) (frequency 14442)\n", "pair 637: CC C(=O)O) -> CCC(=O)O) (frequency 14438)\n", "pair 638: C(=O) OCC) -> C(=O)OCC) (frequency 14403)\n", "pair 639: Br )cc -> Br)cc (frequency 14403)\n", "pair 640: n cc( -> ncc( (frequency 14361)\n", "pair 641: c1 o -> c1o (frequency 14324)\n", "pair 642: n n2) -> nn2) (frequency 14297)\n", "pair 643: O CCCC -> OCCCC (frequency 14265)\n", "pair 644: - c1ccccc1 -> -c1ccccc1 (frequency 14232)\n", "pair 645: C2 =C( -> C2=C( (frequency 14205)\n", "pair 646: C2 CCCCC2) -> C2CCCCC2) (frequency 14203)\n", "pair 647: C1 (=O) -> C1(=O) (frequency 14188)\n", "pair 648: c2ccc( C)cc2) -> c2ccc(C)cc2) (frequency 14174)\n", "pair 649: c1cc2c( cc1) -> c1cc2c(cc1) (frequency 14151)\n", "pair 650: /C =C(\\ -> /C=C(\\ (frequency 14145)\n", "pair 651: nc2 c1 -> nc2c1 (frequency 14126)\n", "pair 652: c3cc n -> c3ccn (frequency 14110)\n", "pair 653: c2ccc( O) -> c2ccc(O) (frequency 14065)\n", "pair 654: C1 =N -> C1=N (frequency 14044)\n", "pair 655: S 1 -> S1 (frequency 14013)\n", "pair 656: C) cc1) -> C)cc1) (frequency 13997)\n", "pair 657: ( CC) -> (CC) (frequency 13980)\n", "pair 658: C(=O) C( -> C(=O)C( (frequency 13964)\n", "pair 659: CCN 3 -> CCN3 (frequency 13945)\n", "pair 660: c2ccc(Cl)cc 2 -> c2ccc(Cl)cc2 (frequency 13894)\n", "pair 661: cn c2 -> cnc2 (frequency 13837)\n", "pair 662: C c2ccccc2 -> Cc2ccccc2 (frequency 13833)\n", "pair 663: C [C@H]3 -> C[C@H]3 (frequency 13817)\n", "pair 664: n cc2 -> ncc2 (frequency 13811)\n", "pair 665: CS c1n -> CSc1n (frequency 13799)\n", "pair 666: CO C( -> COC( (frequency 13668)\n", "pair 667: c3ccc(OC) cc3) -> c3ccc(OC)cc3) (frequency 13660)\n", "pair 668: c3 c2 -> c3c2 (frequency 13556)\n", "pair 669: c2c( Cl) -> c2c(Cl) (frequency 13545)\n", "pair 670: CCC 3) -> CCC3) (frequency 13538)\n", "pair 671: [C@@] (C)( -> [C@@](C)( (frequency 13531)\n", "pair 672: Br )cc1 -> Br)cc1 (frequency 13467)\n", "pair 673: C(F)(F) F -> C(F)(F)F (frequency 13447)\n", "pair 674: CC c1ccc( -> CCc1ccc( (frequency 13406)\n", "pair 675: [C@@H]( N) -> [C@@H](N) (frequency 13395)\n", "pair 676: C(C) (C)C -> C(C)(C)C (frequency 13368)\n", "pair 677: c(N 3 -> c(N3 (frequency 13261)\n", "pair 678: c(=O) c( -> c(=O)c( (frequency 13212)\n", "pair 679: C2 CCN( -> C2CCN( (frequency 13193)\n", "pair 680: c3cccc c23) -> c3ccccc23) (frequency 13183)\n", "pair 681: C(=O) OCC -> C(=O)OCC (frequency 13177)\n", "pair 682: S(N) (=O)=O) -> S(N)(=O)=O) (frequency 13174)\n", "pair 683: s c1 -> sc1 (frequency 13142)\n", "pair 684: o 2) -> o2) (frequency 13111)\n", "pair 685: C( OCC)=O) -> C(OCC)=O) (frequency 13030)\n", "pair 686: / C(C) -> /C(C) (frequency 13005)\n", "pair 687: c1 nc(N -> c1nc(N (frequency 12979)\n", "pair 688: N C(N -> NC(N (frequency 12971)\n", "pair 689: CN1 C(=O) -> CN1C(=O) (frequency 12884)\n", "pair 690: S (N -> S(N (frequency 12861)\n", "pair 691: CN ) -> CN) (frequency 12841)\n", "pair 692: Cl)cc c1 -> Cl)ccc1 (frequency 12836)\n", "pair 693: C( c2ccccc2) -> C(c2ccccc2) (frequency 12813)\n", "pair 694: CO c1cc -> COc1cc (frequency 12798)\n", "pair 695: c2ccn cc2) -> c2ccncc2) (frequency 12790)\n", "pair 696: OCO 2 -> OCO2 (frequency 12782)\n", "pair 697: OCO 3) -> OCO3) (frequency 12778)\n", "pair 698: c(OC) c1 -> c(OC)c1 (frequency 12769)\n", "pair 699: c( O -> c(O (frequency 12666)\n", "pair 700: c3 4) -> c34) (frequency 12665)\n", "pair 701: c4 n -> c4n (frequency 12646)\n", "pair 702: OC)c1 OC -> OC)c1OC (frequency 12636)\n", "pair 703: 4 )cc -> 4)cc (frequency 12573)\n", "pair 704: I ) -> I) (frequency 12527)\n", "pair 705: c1) =O -> c1)=O (frequency 12525)\n", "pair 706: n 4) -> n4) (frequency 12522)\n", "pair 707: C [C@@H]3 -> C[C@@H]3 (frequency 12442)\n", "pair 708: c4 cn -> c4cn (frequency 12420)\n", "pair 709: c5 c( -> c5c( (frequency 12394)\n", "pair 710: N) N) -> N)N) (frequency 12393)\n", "pair 711: CCCC C1) -> CCCCC1) (frequency 12382)\n", "pair 712: OCC(=O) N -> OCC(=O)N (frequency 12348)\n", "pair 713: - c2ccccc2 -> -c2ccccc2 (frequency 12301)\n", "pair 714: [C@@H]( NC(=O) -> [C@@H](NC(=O) (frequency 12287)\n", "pair 715: N2 CCOCC2) -> N2CCOCC2) (frequency 12283)\n", "pair 716: N1 CCC( -> N1CCC( (frequency 12279)\n", "pair 717: C( CC) -> C(CC) (frequency 12279)\n", "pair 718: CC2) =O) -> CC2)=O) (frequency 12275)\n", "pair 719: n2 c( -> n2c( (frequency 12268)\n", "pair 720: CCO CC1) -> CCOCC1) (frequency 12188)\n", "pair 721: C# C -> C#C (frequency 12168)\n", "pair 722: [nH] c(=O) -> [nH]c(=O) (frequency 12145)\n", "pair 723: C= C1 -> C=C1 (frequency 12121)\n", "pair 724: C(=O)N (C) -> C(=O)N(C) (frequency 12112)\n", "pair 725: CN( C -> CN(C (frequency 12072)\n", "pair 726: O) cc1) -> O)cc1) (frequency 12062)\n", "pair 727: [C@] (C)( -> [C@](C)( (frequency 12048)\n", "pair 728: O) c1 -> O)c1 (frequency 12045)\n", "pair 729: cc2 c( -> cc2c( (frequency 12038)\n", "pair 730: F) c(F) -> F)c(F) (frequency 12016)\n", "pair 731: [C@@]1 2 -> [C@@]12 (frequency 11962)\n", "pair 732: cccc ( -> cccc( (frequency 11953)\n", "pair 733: c2ccc( C) -> c2ccc(C) (frequency 11952)\n", "pair 734: c2 c[nH] -> c2c[nH] (frequency 11893)\n", "pair 735: s 3) -> s3) (frequency 11888)\n", "pair 736: C(=O)N ) -> C(=O)N) (frequency 11881)\n", "pair 737: N( CC -> N(CC (frequency 11874)\n", "pair 738: n c12 -> nc12 (frequency 11805)\n", "pair 739: N CC -> NCC (frequency 11801)\n", "pair 740: c2n cc -> c2ncc (frequency 11798)\n", "pair 741: CCN (C)C) -> CCN(C)C) (frequency 11798)\n", "pair 742: n(C) c(=O) -> n(C)c(=O) (frequency 11771)\n", "pair 743: c3ccccc3 2) -> c3ccccc32) (frequency 11765)\n", "pair 744: c1 =O) -> c1=O) (frequency 11761)\n", "pair 745: c3n cn -> c3ncn (frequency 11711)\n", "pair 746: O C1 -> OC1 (frequency 11711)\n", "pair 747: c3ccc(C) cc3) -> c3ccc(C)cc3) (frequency 11707)\n", "pair 748: P (O) -> P(O) (frequency 11693)\n", "pair 749: ccc( OC) -> ccc(OC) (frequency 11682)\n", "pair 750: /C( =N/ -> /C(=N/ (frequency 11682)\n", "pair 751: c3cc (C) -> c3cc(C) (frequency 11662)\n", "pair 752: c( C#N) -> c(C#N) (frequency 11651)\n", "pair 753: c3ccc( O) -> c3ccc(O) (frequency 11646)\n", "pair 754: o c(- -> oc(- (frequency 11629)\n", "pair 755: C(F)(F) F)cc1 -> C(F)(F)F)cc1 (frequency 11621)\n", "pair 756: C2 =O)cc1 -> C2=O)cc1 (frequency 11595)\n", "pair 757: c1ccc( F)cc1) -> c1ccc(F)cc1) (frequency 11562)\n", "pair 758: ( CC -> (CC (frequency 11535)\n", "pair 759: C(=O)N CC -> C(=O)NCC (frequency 11498)\n", "pair 760: c2ccco 2) -> c2ccco2) (frequency 11408)\n", "pair 761: O c2ccc( -> Oc2ccc( (frequency 11390)\n", "pair 762: C(= N)N -> C(=N)N (frequency 11390)\n", "pair 763: =O) cc2) -> =O)cc2) (frequency 11387)\n", "pair 764: c3cc4 c( -> c3cc4c( (frequency 11342)\n", "pair 765: C(C) ( -> C(C)( (frequency 11290)\n", "pair 766: c2 C) -> c2C) (frequency 11279)\n", "pair 767: CCCCCCCC CCCC -> CCCCCCCCCCCC (frequency 11264)\n", "pair 768: Br )cc2) -> Br)cc2) (frequency 11241)\n", "pair 769: =N /N -> =N/N (frequency 11229)\n", "pair 770: C( c1ccccc1) -> C(c1ccccc1) (frequency 11142)\n", "pair 771: C c2c( -> Cc2c( (frequency 11123)\n", "pair 772: O [C@H]1 -> O[C@H]1 (frequency 11103)\n", "pair 773: [C@]3 (C) -> [C@]3(C) (frequency 11102)\n", "pair 774: 4 )cc3) -> 4)cc3) (frequency 11059)\n", "pair 775: C1 CC -> C1CC (frequency 11057)\n", "pair 776: C =O) -> C=O) (frequency 11047)\n", "pair 777: [N+] (C) -> [N+](C) (frequency 11035)\n", "pair 778: CC [C@@H]1 -> CC[C@@H]1 (frequency 11029)\n", "pair 779: c3 2) -> c32) (frequency 10965)\n", "pair 780: c1( N -> c1(N (frequency 10959)\n", "pair 781: C( N1 -> C(N1 (frequency 10950)\n", "pair 782: c4ccc( F)cc -> c4ccc(F)cc (frequency 10940)\n", "pair 783: c2) n1 -> c2)n1 (frequency 10928)\n", "pair 784: C(=O) ( -> C(=O)( (frequency 10921)\n", "pair 785: C2 = -> C2= (frequency 10918)\n", "pair 786: O [C@@H]1 -> O[C@@H]1 (frequency 10913)\n", "pair 787: c1ccc( NC(=O) -> c1ccc(NC(=O) (frequency 10899)\n", "pair 788: C c2cc( -> Cc2cc( (frequency 10872)\n", "pair 789: c( Br) -> c(Br) (frequency 10870)\n", "pair 790: n c3) -> nc3) (frequency 10855)\n", "pair 791: c3ccc4c( c3) -> c3ccc4c(c3) (frequency 10849)\n", "pair 792: c2n c3c( -> c2nc3c( (frequency 10846)\n", "pair 793: 5 )cc -> 5)cc (frequency 10836)\n", "pair 794: CC [C@H]1 -> CC[C@H]1 (frequency 10818)\n", "pair 795: - c1n -> -c1n (frequency 10792)\n", "pair 796: c2n cc( -> c2ncc( (frequency 10766)\n", "pair 797: c(Cl) c1 -> c(Cl)c1 (frequency 10751)\n", "pair 798: c2cccc n2) -> c2ccccn2) (frequency 10723)\n", "pair 799: c1c( F) -> c1c(F) (frequency 10715)\n", "pair 800: C1CCCC C1 -> C1CCCCC1 (frequency 10700)\n", "pair 801: =O) ccc1 -> =O)ccc1 (frequency 10681)\n", "pair 802: N C(C)=O) -> NC(C)=O) (frequency 10626)\n", "pair 803: c5 ccc( -> c5ccc( (frequency 10605)\n", "pair 804: n1 cn -> n1cn (frequency 10589)\n", "pair 805: [n+] 2 -> [n+]2 (frequency 10582)\n", "pair 806: n c1) -> nc1) (frequency 10533)\n", "pair 807: O 3) -> O3) (frequency 10507)\n", "pair 808: c2cc cs2) -> c2cccs2) (frequency 10489)\n", "pair 809: c1cc( - -> c1cc(- (frequency 10485)\n", "pair 810: [n+] ( -> [n+]( (frequency 10460)\n", "pair 811: c3cc cn -> c3cccn (frequency 10411)\n", "pair 812: C(F)(F)F)cc 2) -> C(F)(F)F)cc2) (frequency 10363)\n", "pair 813: CC1 ( -> CC1( (frequency 10355)\n", "pair 814: [C@]2 ( -> [C@]2( (frequency 10346)\n", "pair 815: c3cc( Cl)cc -> c3cc(Cl)cc (frequency 10335)\n", "pair 816: O= [N+]([O-]) -> O=[N+]([O-]) (frequency 10334)\n", "pair 817: CC(=O) N( -> CC(=O)N( (frequency 10323)\n", "pair 818: c1 c[nH] -> c1c[nH] (frequency 10318)\n", "pair 819: cc c4 -> ccc4 (frequency 10315)\n", "pair 820: c2ccccc2) c1 -> c2ccccc2)c1 (frequency 10308)\n", "pair 821: cs 1) -> cs1) (frequency 10285)\n", "pair 822: c2ccc( O -> c2ccc(O (frequency 10284)\n", "pair 823: CN (C)C) -> CN(C)C) (frequency 10258)\n", "pair 824: [nH] 2) -> [nH]2) (frequency 10246)\n", "pair 825: cc( OC) -> cc(OC) (frequency 10183)\n", "pair 826: c3cc co -> c3ccco (frequency 10126)\n", "pair 827: c3 n( -> c3n( (frequency 10112)\n", "pair 828: n c4 -> nc4 (frequency 10111)\n", "pair 829: C1 CC1 -> C1CC1 (frequency 10106)\n", "pair 830: CC(O) =O) -> CC(O)=O) (frequency 10102)\n", "pair 831: O 1) -> O1) (frequency 10082)\n", "pair 832: c1cc( OC) -> c1cc(OC) (frequency 10077)\n", "pair 833: 3 C) -> 3C) (frequency 10067)\n", "pair 834: nc(N 3 -> nc(N3 (frequency 10016)\n", "pair 835: C( N2 -> C(N2 (frequency 9979)\n", "pair 836: c1cc co -> c1ccco (frequency 9962)\n", "pair 837: 2 )ccc1 -> 2)ccc1 (frequency 9961)\n", "pair 838: /N =C(\\ -> /N=C(\\ (frequency 9961)\n", "pair 839: CCC2 ( -> CCC2( (frequency 9927)\n", "pair 840: n c2c( -> nc2c( (frequency 9921)\n", "pair 841: CC( CC( -> CC(CC( (frequency 9915)\n", "pair 842: CC [C@@H]2 -> CC[C@@H]2 (frequency 9913)\n", "pair 843: c3ccccc3 )cc2) -> c3ccccc3)cc2) (frequency 9885)\n", "pair 844: OC 3 -> OC3 (frequency 9881)\n", "pair 845: [C@] ( -> [C@]( (frequency 9813)\n", "pair 846: CCN( C( -> CCN(C( (frequency 9813)\n", "pair 847: CCCC 1) -> CCCC1) (frequency 9804)\n", "pair 848: C 5 -> C5 (frequency 9792)\n", "pair 849: CC [C@H]3 -> CC[C@H]3 (frequency 9763)\n", "pair 850: c4ccc( Cl)cc -> c4ccc(Cl)cc (frequency 9740)\n", "pair 851: c( C(N -> c(C(N (frequency 9739)\n", "pair 852: c2ccccc2) CC1 -> c2ccccc2)CC1 (frequency 9727)\n", "pair 853: OCC ( -> OCC( (frequency 9716)\n", "pair 854: C(=O) /C=C/ -> C(=O)/C=C/ (frequency 9709)\n", "pair 855: [C@@] ( -> [C@@]( (frequency 9707)\n", "pair 856: 4 CCCC -> 4CCCC (frequency 9704)\n", "pair 857: cs 3) -> cs3) (frequency 9697)\n", "pair 858: n1 cc( -> n1cc( (frequency 9695)\n", "pair 859: N=C( N) -> N=C(N) (frequency 9674)\n", "pair 860: /C=C2 \\ -> /C=C2\\ (frequency 9674)\n", "pair 861: / C1 -> /C1 (frequency 9664)\n", "pair 862: c2c( F) -> c2c(F) (frequency 9614)\n", "pair 863: =C1 \\ -> =C1\\ (frequency 9611)\n", "pair 864: c3c( =O) -> c3c(=O) (frequency 9607)\n", "pair 865: c3 c4c( -> c3c4c( (frequency 9599)\n", "pair 866: )cc c2 -> )ccc2 (frequency 9594)\n", "pair 867: c1c( N -> c1c(N (frequency 9577)\n", "pair 868: Cl) c3) -> Cl)c3) (frequency 9569)\n", "pair 869: nc( O) -> nc(O) (frequency 9546)\n", "pair 870: c2n c3ccccc3 -> c2nc3ccccc3 (frequency 9542)\n", "pair 871: C( C(=O)N -> C(C(=O)N (frequency 9520)\n", "pair 872: C n3 -> Cn3 (frequency 9514)\n", "pair 873: C(F)(F)F) c1 -> C(F)(F)F)c1 (frequency 9486)\n", "pair 874: c3ccn cc3) -> c3ccncc3) (frequency 9485)\n", "pair 875: C(= C) -> C(=C) (frequency 9481)\n", "pair 876: c1cc cnc1 -> c1cccnc1 (frequency 9458)\n", "pair 877: c1cccc2 c1 -> c1cccc2c1 (frequency 9440)\n", "pair 878: [N+](=O)[O-] )cc1 -> [N+](=O)[O-])cc1 (frequency 9408)\n", "pair 879: c1c( - -> c1c(- (frequency 9406)\n", "pair 880: CC(=O) N2 -> CC(=O)N2 (frequency 9404)\n", "pair 881: O) =O) -> O)=O) (frequency 9394)\n", "pair 882: c2ccc( N -> c2ccc(N (frequency 9362)\n", "pair 883: O (C) -> O(C) (frequency 9355)\n", "pair 884: CC) =O) -> CC)=O) (frequency 9350)\n", "pair 885: ccc( C) -> ccc(C) (frequency 9343)\n", "pair 886: c3cccc 4 -> c3cccc4 (frequency 9338)\n", "pair 887: [C@]1 (C) -> [C@]1(C) (frequency 9334)\n", "pair 888: CC= C) -> CC=C) (frequency 9327)\n", "pair 889: CCCC C3 -> CCCCC3 (frequency 9313)\n", "pair 890: n2 cn -> n2cn (frequency 9282)\n", "pair 891: S(=O) (C) -> S(=O)(C) (frequency 9273)\n", "pair 892: C(C) =O -> C(C)=O (frequency 9263)\n", "pair 893: O= S(=O)( -> O=S(=O)( (frequency 9219)\n", "pair 894: c(=O) o -> c(=O)o (frequency 9209)\n", "pair 895: c2 nc(C) -> c2nc(C) (frequency 9173)\n", "pair 896: N [C@@H]( -> N[C@@H]( (frequency 9171)\n", "pair 897: CC c1n -> CCc1n (frequency 9165)\n", "pair 898: CC 3)cc -> CC3)cc (frequency 9146)\n", "pair 899: c2) C1 -> c2)C1 (frequency 9141)\n", "pair 900: O C2 -> OC2 (frequency 9133)\n", "pair 901: = C) -> =C) (frequency 9129)\n", "pair 902: c2cccc (F) -> c2cccc(F) (frequency 9127)\n", "pair 903: CCC 4 -> CCC4 (frequency 9113)\n", "pair 904: n n( -> nn( (frequency 9109)\n", "pair 905: [nH] c( -> [nH]c( (frequency 9100)\n", "pair 906: c2c( c1) -> c2c(c1) (frequency 9090)\n", "pair 907: n1 2 -> n12 (frequency 9088)\n", "pair 908: [C@H]( NC(=O) -> [C@H](NC(=O) (frequency 9080)\n", "pair 909: c1c( =O) -> c1c(=O) (frequency 9054)\n", "pair 910: c1cc( N -> c1cc(N (frequency 9045)\n", "pair 911: c3ccco 3) -> c3ccco3) (frequency 8999)\n", "pair 912: C( C( -> C(C( (frequency 8996)\n", "pair 913: P(=O)(O) O) -> P(=O)(O)O) (frequency 8995)\n", "pair 914: CC(C) ( -> CC(C)( (frequency 8985)\n", "pair 915: c2cc3c( cc2) -> c2cc3c(cc2) (frequency 8979)\n", "pair 916: c3cccc n3) -> c3ccccn3) (frequency 8967)\n", "pair 917: [C@@]1 (C) -> [C@@]1(C) (frequency 8963)\n", "pair 918: C[C@H]( NC(=O) -> C[C@H](NC(=O) (frequency 8954)\n", "pair 919: ccc( F) -> ccc(F) (frequency 8942)\n", "pair 920: CCCC C2 -> CCCCC2 (frequency 8930)\n", "pair 921: CCOCC 4) -> CCOCC4) (frequency 8921)\n", "pair 922: CCN1 C(=O) -> CCN1C(=O) (frequency 8913)\n", "pair 923: CC C1) -> CCC1) (frequency 8912)\n", "pair 924: S C) -> SC) (frequency 8910)\n", "pair 925: c(C) c( -> c(C)c( (frequency 8907)\n", "pair 926: C1CCCC C1) -> C1CCCCC1) (frequency 8896)\n", "pair 927: c5 ) -> c5) (frequency 8881)\n", "pair 928: CO c1c( -> COc1c( (frequency 8871)\n", "pair 929: c2c( OC) -> c2c(OC) (frequency 8869)\n", "pair 930: c1cccc (F) -> c1cccc(F) (frequency 8867)\n", "pair 931: C1 CC( -> C1CC( (frequency 8858)\n", "pair 932: OC(=O) C) -> OC(=O)C) (frequency 8854)\n", "pair 933: [C@@]2 ( -> [C@@]2( (frequency 8834)\n", "pair 934: c4cccc ( -> c4cccc( (frequency 8831)\n", "pair 935: C3 CCCCC3) -> C3CCCCC3) (frequency 8829)\n", "pair 936: n nc1 -> nnc1 (frequency 8827)\n", "pair 937: c1ccc(OC) cc1) -> c1ccc(OC)cc1) (frequency 8816)\n", "pair 938: (O) ( -> (O)( (frequency 8816)\n", "pair 939: cccc 4 -> cccc4 (frequency 8815)\n", "pair 940: c2c( cc( -> c2c(cc( (frequency 8810)\n", "pair 941: s c( -> sc( (frequency 8807)\n", "pair 942: n o1 -> no1 (frequency 8803)\n", "pair 943: CS c2n -> CSc2n (frequency 8797)\n", "pair 944: [C@H]( CO) -> [C@H](CO) (frequency 8786)\n", "pair 945: CO c1cc2 -> COc1cc2 (frequency 8786)\n", "pair 946: c3n cc -> c3ncc (frequency 8764)\n", "pair 947: c2cccc c12) -> c2ccccc12) (frequency 8753)\n", "pair 948: C [C@@H](C) -> C[C@@H](C) (frequency 8744)\n", "pair 949: c(F) c1 -> c(F)c1 (frequency 8740)\n", "pair 950: C /C=C\\ -> C/C=C\\ (frequency 8721)\n", "pair 951: CC S(=O)(=O) -> CCS(=O)(=O) (frequency 8719)\n", "pair 952: CC OC -> CCOC (frequency 8698)\n", "pair 953: CN (C)C -> CN(C)C (frequency 8684)\n", "pair 954: c1cn ( -> c1cn( (frequency 8679)\n", "pair 955: CO C) -> COC) (frequency 8651)\n", "pair 956: = CC(=O) -> =CC(=O) (frequency 8629)\n", "pair 957: [C@@H]1 2 -> [C@@H]12 (frequency 8628)\n", "pair 958: CC2 ( -> CC2( (frequency 8619)\n", "pair 959: (C) c1 -> (C)c1 (frequency 8615)\n", "pair 960: CC [C@]4 -> CC[C@]4 (frequency 8610)\n", "pair 961: nc( NC(=O) -> nc(NC(=O) (frequency 8602)\n", "pair 962: n1 ( -> n1( (frequency 8588)\n", "pair 963: C(=O)N c2ccc( -> C(=O)Nc2ccc( (frequency 8575)\n", "pair 964: c3 =O) -> c3=O) (frequency 8566)\n", "pair 965: CCCN C(=O) -> CCCNC(=O) (frequency 8560)\n", "pair 966: c1 (=O) -> c1(=O) (frequency 8548)\n", "pair 967: Cc1 c[nH] -> Cc1c[nH] (frequency 8548)\n", "pair 968: c1n ( -> c1n( (frequency 8542)\n", "pair 969: c1n cc( -> c1ncc( (frequency 8528)\n", "pair 970: c2cc( OC) -> c2cc(OC) (frequency 8525)\n", "pair 971: cn c1) -> cnc1) (frequency 8513)\n", "pair 972: CCCC N -> CCCCN (frequency 8510)\n", "pair 973: CCN (C)CC -> CCN(C)CC (frequency 8508)\n", "pair 974: c(- c3ccc( -> c(-c3ccc( (frequency 8489)\n", "pair 975: [C@]1 ( -> [C@]1( (frequency 8467)\n", "pair 976: ccc( N -> ccc(N (frequency 8446)\n", "pair 977: o 1) -> o1) (frequency 8442)\n", "pair 978: N1 CC -> N1CC (frequency 8440)\n", "pair 979: c(- c2ccc( -> c(-c2ccc( (frequency 8437)\n", "pair 980: CCCC CC -> CCCCCC (frequency 8430)\n", "pair 981: [C@@H] 5 -> [C@@H]5 (frequency 8398)\n", "pair 982: 6 ) -> 6) (frequency 8388)\n", "pair 983: F)cc (F) -> F)cc(F) (frequency 8365)\n", "pair 984: c1( OC) -> c1(OC) (frequency 8360)\n", "pair 985: c1cc( O) -> c1cc(O) (frequency 8355)\n", "pair 986: C( c1ccc( -> C(c1ccc( (frequency 8348)\n", "pair 987: NC(=S) N -> NC(=S)N (frequency 8337)\n", "pair 988: c3) =O) -> c3)=O) (frequency 8336)\n", "pair 989: [C@@]3 (C) -> [C@@]3(C) (frequency 8326)\n", "pair 990: c( OCC -> c(OCC (frequency 8324)\n", "pair 991: C(=O)N c1ccc( -> C(=O)Nc1ccc( (frequency 8318)\n", "pair 992: OCO 4) -> OCO4) (frequency 8317)\n", "pair 993: O C(C)(C)C) -> OC(C)(C)C) (frequency 8312)\n", "pair 994: F)cc c1 -> F)ccc1 (frequency 8312)\n", "pair 995: c2cccn c2) -> c2cccnc2) (frequency 8290)\n", "pair 996: =C 3 -> =C3 (frequency 8289)\n", "pair 997: O [C@@H]2 -> O[C@@H]2 (frequency 8284)\n", "pair 998: c2 )ccc1 -> c2)ccc1 (frequency 8270)\n", "pair 999: C c3ccccc3 -> Cc3ccccc3 (frequency 8256)\n", "pair 1000: c3ccc(Cl)cc 3 -> c3ccc(Cl)cc3 (frequency 8250)\n", "pair 1001: c1 cs -> c1cs (frequency 8249)\n", "pair 1002: c -2 -> c-2 (frequency 8247)\n", "pair 1003: C(=O)O) cc1 -> C(=O)O)cc1 (frequency 8247)\n", "pair 1004: c2ccccc2 c1 -> c2ccccc2c1 (frequency 8220)\n", "pair 1005: c2 3)cc1 -> c23)cc1 (frequency 8182)\n", "pair 1006: C2) c1 -> C2)c1 (frequency 8174)\n", "pair 1007: OC)cc( OC) -> OC)cc(OC) (frequency 8156)\n", "pair 1008: -c2 nc( -> -c2nc( (frequency 8128)\n", "pair 1009: n2) CC1 -> n2)CC1 (frequency 8116)\n", "pair 1010: c2ccccc2 n1 -> c2ccccc2n1 (frequency 8116)\n", "pair 1011: [C@@]1 ( -> [C@@]1( (frequency 8089)\n", "pair 1012: n cc1) -> ncc1) (frequency 8078)\n", "pair 1013: c3cc( - -> c3cc(- (frequency 8067)\n", "pair 1014: c1ccc(OC) cc1 -> c1ccc(OC)cc1 (frequency 8061)\n", "pair 1015: C3 ( -> C3( (frequency 8027)\n", "pair 1016: 4) CC3) -> 4)CC3) (frequency 8024)\n", "pair 1017: CCCN 3 -> CCCN3 (frequency 8017)\n", "pair 1018: c1) =O) -> c1)=O) (frequency 8010)\n", "pair 1019: Cn1 cc( -> Cn1cc( (frequency 8006)\n", "pair 1020: n n1) -> nn1) (frequency 7984)\n", "pair 1021: c1n cc -> c1ncc (frequency 7978)\n", "pair 1022: F C(F)(F) -> FC(F)(F) (frequency 7977)\n", "pair 1023: C#N )cc -> C#N)cc (frequency 7965)\n", "pair 1024: N c1ccccc1 -> Nc1ccccc1 (frequency 7958)\n", "pair 1025: C1 (C) -> C1(C) (frequency 7903)\n", "pair 1026: P(=O) ( -> P(=O)( (frequency 7899)\n", "pair 1027: c3c( Cl) -> c3c(Cl) (frequency 7893)\n", "pair 1028: [C@H]1 2 -> [C@H]12 (frequency 7889)\n", "pair 1029: C(N) =N -> C(N)=N (frequency 7889)\n", "pair 1030: c(- c3ccccc3) -> c(-c3ccccc3) (frequency 7868)\n", "pair 1031: [nH] 3) -> [nH]3) (frequency 7867)\n", "pair 1032: c2cccc( - -> c2cccc(- (frequency 7866)\n", "pair 1033: c1( - -> c1(- (frequency 7856)\n", "pair 1034: c2ccccc2) =O) -> c2ccccc2)=O) (frequency 7841)\n", "pair 1035: C( NC(=O) -> C(NC(=O) (frequency 7825)\n", "pair 1036: /C=C(\\ C) -> /C=C(\\C) (frequency 7808)\n", "pair 1037: c(=O) n1 -> c(=O)n1 (frequency 7806)\n", "pair 1038: c3cccn c3) -> c3cccnc3) (frequency 7803)\n", "pair 1039: c2cccc( C(F)(F)F) -> c2cccc(C(F)(F)F) (frequency 7803)\n", "pair 1040: C(N C( -> C(NC( (frequency 7778)\n", "pair 1041: O C(C)(C) -> OC(C)(C) (frequency 7774)\n", "pair 1042: C(=O)N C( -> C(=O)NC( (frequency 7758)\n", "pair 1043: C(F)(F)F)cc 3) -> C(F)(F)F)cc3) (frequency 7739)\n", "pair 1044: CC C(=O) -> CCC(=O) (frequency 7728)\n", "pair 1045: CO c2ccc( -> COc2ccc( (frequency 7721)\n", "pair 1046: n(- c2ccccc2) -> n(-c2ccccc2) (frequency 7707)\n", "pair 1047: OC c1ccccc1) -> OCc1ccccc1) (frequency 7686)\n", "pair 1048: C( N( -> C(N( (frequency 7674)\n", "pair 1049: CC [C@@H]3 -> CC[C@@H]3 (frequency 7666)\n", "pair 1050: 3) c1 -> 3)c1 (frequency 7665)\n", "pair 1051: )cc( OC)c1OC -> )cc(OC)c1OC (frequency 7659)\n", "pair 1052: C c2n -> Cc2n (frequency 7651)\n", "pair 1053: [N+] ( -> [N+]( (frequency 7647)\n", "pair 1054: P ( -> P( (frequency 7622)\n", "pair 1055: c3 nc( -> c3nc( (frequency 7594)\n", "pair 1056: CCC n1 -> CCCn1 (frequency 7591)\n", "pair 1057: C(F) F) -> C(F)F) (frequency 7588)\n", "pair 1058: CN2 C(=O) -> CN2C(=O) (frequency 7572)\n", "pair 1059: S(=O)(=O)N 3 -> S(=O)(=O)N3 (frequency 7566)\n", "pair 1060: c(O) c1 -> c(O)c1 (frequency 7535)\n", "pair 1061: s c2 -> sc2 (frequency 7533)\n", "pair 1062: [C@H]( N) -> [C@H](N) (frequency 7528)\n", "pair 1063: 3 c( -> 3c( (frequency 7519)\n", "pair 1064: c1( Cl) -> c1(Cl) (frequency 7517)\n", "pair 1065: [C@H](O) [C@H](O) -> [C@H](O)[C@H](O) (frequency 7517)\n", "pair 1066: =O) ccc( -> =O)ccc( (frequency 7501)\n", "pair 1067: n cc3 -> ncc3 (frequency 7488)\n", "pair 1068: cc cn -> cccn (frequency 7488)\n", "pair 1069: CCCC 3 -> CCCC3 (frequency 7486)\n", "pair 1070: /C=C/ C(=O) -> /C=C/C(=O) (frequency 7484)\n", "pair 1071: c3cc cs3) -> c3cccs3) (frequency 7476)\n", "pair 1072: c2ccccc2 Cl) -> c2ccccc2Cl) (frequency 7472)\n", "pair 1073: c1n c2c( -> c1nc2c( (frequency 7467)\n", "pair 1074: CN2 CCN( -> CN2CCN( (frequency 7452)\n", "pair 1075: c(- c2ccccc2) -> c(-c2ccccc2) (frequency 7451)\n", "pair 1076: CC (F)(F) -> CC(F)(F) (frequency 7440)\n", "pair 1077: C(=S) N -> C(=S)N (frequency 7440)\n", "pair 1078: c3c( F) -> c3c(F) (frequency 7432)\n", "pair 1079: N) ncn -> N)ncn (frequency 7424)\n", "pair 1080: Cl c1ccc( -> Clc1ccc( (frequency 7424)\n", "pair 1081: )cc 2)cc1 -> )cc2)cc1 (frequency 7421)\n", "pair 1082: o c(=O) -> oc(=O) (frequency 7419)\n", "pair 1083: c5ccccc5 ) -> c5ccccc5) (frequency 7411)\n", "pair 1084: O [C@H]2 -> O[C@H]2 (frequency 7385)\n", "pair 1085: c1cccc( Cl)c1 -> c1cccc(Cl)c1 (frequency 7381)\n", "pair 1086: [N+](=O)[O-]) c1 -> [N+](=O)[O-])c1 (frequency 7363)\n", "pair 1087: N2 CCC( -> N2CCC( (frequency 7362)\n", "pair 1088: C1 C2 -> C1C2 (frequency 7347)\n", "pair 1089: CCC [C@H]1 -> CCC[C@H]1 (frequency 7338)\n", "pair 1090: S 2 -> S2 (frequency 7337)\n", "pair 1091: [C@H]1 CC -> [C@H]1CC (frequency 7312)\n", "pair 1092: CN( C( -> CN(C( (frequency 7310)\n", "pair 1093: c3ccc( Br -> c3ccc(Br (frequency 7309)\n", "pair 1094: C(= N)N) -> C(=N)N) (frequency 7306)\n", "pair 1095: cn ( -> cn( (frequency 7303)\n", "pair 1096: Br )cc1) -> Br)cc1) (frequency 7282)\n", "pair 1097: Cl) c1) -> Cl)c1) (frequency 7280)\n", "pair 1098: N1 CCOCC1 -> N1CCOCC1 (frequency 7275)\n", "pair 1099: Cn1 cn -> Cn1cn (frequency 7272)\n", "pair 1100: C(=O) C) -> C(=O)C) (frequency 7266)\n", "pair 1101: C(N [C@H]( -> C(N[C@H]( (frequency 7261)\n", "pair 1102: c1cc( Cl)ccc1 -> c1cc(Cl)ccc1 (frequency 7214)\n", "pair 1103: (O) =O) -> (O)=O) (frequency 7213)\n", "pair 1104: c1ccc( OCC -> c1ccc(OCC (frequency 7205)\n", "pair 1105: nc2 1 -> nc21 (frequency 7199)\n", "pair 1106: CO c1ccc2c(c1) -> COc1ccc2c(c1) (frequency 7189)\n", "pair 1107: c2cc c3ccccc3 -> c2ccc3ccccc3 (frequency 7179)\n", "pair 1108: CN(C) C(=O) -> CN(C)C(=O) (frequency 7166)\n", "pair 1109: C4 CC4) -> C4CC4) (frequency 7161)\n", "pair 1110: CC1) =O -> CC1)=O (frequency 7145)\n", "pair 1111: c3ccc( O -> c3ccc(O (frequency 7124)\n", "pair 1112: c3 C) -> c3C) (frequency 7115)\n", "pair 1113: c1 (F) -> c1(F) (frequency 7104)\n", "pair 1114: O= S( -> O=S( (frequency 7102)\n", "pair 1115: C( CCCC -> C(CCCC (frequency 7087)\n", "pair 1116: CC2 )ccc1 -> CC2)ccc1 (frequency 7081)\n", "pair 1117: [C@@H]( NC( -> [C@@H](NC( (frequency 7078)\n", "pair 1118: c3ccccc3) CC2) -> c3ccccc3)CC2) (frequency 7052)\n", "pair 1119: c2ccc( N3 -> c2ccc(N3 (frequency 7051)\n", "pair 1120: O=C(N c1ccc( -> O=C(Nc1ccc( (frequency 7051)\n", "pair 1121: cc3 c( -> cc3c( (frequency 7046)\n", "pair 1122: C) =O -> C)=O (frequency 7038)\n", "pair 1123: CC1 =C( -> CC1=C( (frequency 7035)\n", "pair 1124: c2 c1) -> c2c1) (frequency 7032)\n", "pair 1125: n3 c( -> n3c( (frequency 7026)\n", "pair 1126: (=O) o -> (=O)o (frequency 7009)\n", "pair 1127: =O) C -> =O)C (frequency 7003)\n", "pair 1128: c2ccc(O) cc2) -> c2ccc(O)cc2) (frequency 6995)\n", "pair 1129: cc( N -> cc(N (frequency 6956)\n", "pair 1130: O=C( /C=C/ -> O=C(/C=C/ (frequency 6946)\n", "pair 1131: C(=O)N O) -> C(=O)NO) (frequency 6941)\n", "pair 1132: c2cc( O) -> c2cc(O) (frequency 6930)\n", "pair 1133: c( NC( -> c(NC( (frequency 6929)\n", "pair 1134: CC c1ccccc1) -> CCc1ccccc1) (frequency 6926)\n", "pair 1135: C 5) -> C5) (frequency 6924)\n", "pair 1136: C[C@H]( N) -> C[C@H](N) (frequency 6899)\n", "pair 1137: [S+] ([O-]) -> [S+]([O-]) (frequency 6897)\n", "pair 1138: c2 =O)cc1 -> c2=O)cc1 (frequency 6895)\n", "pair 1139: =O) CC1 -> =O)CC1 (frequency 6885)\n", "pair 1140: OC) cc2) -> OC)cc2) (frequency 6882)\n", "pair 1141: C= C(C) -> C=C(C) (frequency 6876)\n", "pair 1142: c2 cs -> c2cs (frequency 6870)\n", "pair 1143: /C =C(/ -> /C=C(/ (frequency 6866)\n", "pair 1144: c3 nc(- -> c3nc(- (frequency 6861)\n", "pair 1145: c2cc( F)cc -> c2cc(F)cc (frequency 6858)\n", "pair 1146: c3 )cc2) -> c3)cc2) (frequency 6849)\n", "pair 1147: C2 CCCC2) -> C2CCCC2) (frequency 6842)\n", "pair 1148: o 3) -> o3) (frequency 6838)\n", "pair 1149: C(O) ( -> C(O)( (frequency 6817)\n", "pair 1150: Cc1ccc( NC(=O) -> Cc1ccc(NC(=O) (frequency 6816)\n", "pair 1151: C2 3 -> C23 (frequency 6813)\n", "pair 1152: S(=O)(=O) c1ccc( -> S(=O)(=O)c1ccc( (frequency 6806)\n", "pair 1153: C1 =C(C) -> C1=C(C) (frequency 6806)\n", "pair 1154: C(N [C@@H]( -> C(N[C@@H]( (frequency 6806)\n", "pair 1155: C(F) ( -> C(F)( (frequency 6805)\n", "pair 1156: o 2)cc1 -> o2)cc1 (frequency 6794)\n", "pair 1157: c2ccc3c(c2) OCO3) -> c2ccc3c(c2)OCO3) (frequency 6790)\n", "pair 1158: n2 C) -> n2C) (frequency 6779)\n", "pair 1159: c2cccc(Cl) c2) -> c2cccc(Cl)c2) (frequency 6773)\n", "pair 1160: n1 C -> n1C (frequency 6765)\n", "pair 1161: c2n cccc2 -> c2ncccc2 (frequency 6750)\n", "pair 1162: [C@@H]1 CC -> [C@@H]1CC (frequency 6745)\n", "pair 1163: [nH] 1) -> [nH]1) (frequency 6732)\n", "pair 1164: n2 )ccc1 -> n2)ccc1 (frequency 6728)\n", "pair 1165: c3cc cnc3 -> c3cccnc3 (frequency 6728)\n", "pair 1166: = S -> =S (frequency 6714)\n", "pair 1167: Br )cc( -> Br)cc( (frequency 6707)\n", "pair 1168: c2ccccc2 F) -> c2ccccc2F) (frequency 6704)\n", "pair 1169: c2ccc( F)cc2 -> c2ccc(F)cc2 (frequency 6701)\n", "pair 1170: c2ccc( Br)cc2) -> c2ccc(Br)cc2) (frequency 6701)\n", "pair 1171: - c1c( -> -c1c( (frequency 6692)\n", "pair 1172: Cc2ccc( - -> Cc2ccc(- (frequency 6689)\n", "pair 1173: c2ccc( OCC -> c2ccc(OCC (frequency 6687)\n", "pair 1174: /C =N/N -> /C=N/N (frequency 6673)\n", "pair 1175: C(C)C) =O) -> C(C)C)=O) (frequency 6671)\n", "pair 1176: C(F)(F) F)cc1) -> C(F)(F)F)cc1) (frequency 6668)\n", "pair 1177: P(=O)(O) O -> P(=O)(O)O (frequency 6657)\n", "pair 1178: c1cc ncc1 -> c1ccncc1 (frequency 6635)\n", "pair 1179: c(O) c( -> c(O)c( (frequency 6613)\n", "pair 1180: # N) -> #N) (frequency 6608)\n", "pair 1181: CCN C( -> CCNC( (frequency 6604)\n", "pair 1182: 3)cc ( -> 3)cc( (frequency 6604)\n", "pair 1183: n n3 -> nn3 (frequency 6568)\n", "pair 1184: c - -> c- (frequency 6564)\n", "pair 1185: CC2) C1 -> CC2)C1 (frequency 6550)\n", "pair 1186: c3cc( OC) -> c3cc(OC) (frequency 6537)\n", "pair 1187: Cn1 c(=O) -> Cn1c(=O) (frequency 6529)\n", "pair 1188: CC2) CC1 -> CC2)CC1 (frequency 6524)\n", "pair 1189: C [C@H](C) -> C[C@H](C) (frequency 6519)\n", "pair 1190: C1 CC1) -> C1CC1) (frequency 6511)\n", "pair 1191: OC [C@H]1 -> OC[C@H]1 (frequency 6502)\n", "pair 1192: = [N+] -> =[N+] (frequency 6498)\n", "pair 1193: (C) (O) -> (C)(O) (frequency 6497)\n", "pair 1194: [C@H] 5 -> [C@H]5 (frequency 6494)\n", "pair 1195: 1 C -> 1C (frequency 6492)\n", "pair 1196: C#N )cc1 -> C#N)cc1 (frequency 6476)\n", "pair 1197: c1( O) -> c1(O) (frequency 6474)\n", "pair 1198: c1c2c( ccc1) -> c1c2c(ccc1) (frequency 6472)\n", "pair 1199: [n+] ([O-]) -> [n+]([O-]) (frequency 6469)\n", "pair 1200: [N+](=O)[O-] )cc2) -> [N+](=O)[O-])cc2) (frequency 6464)\n", "pair 1201: nn 3) -> nn3) (frequency 6460)\n", "pair 1202: / c( -> /c( (frequency 6457)\n", "pair 1203: CN 4 -> CN4 (frequency 6453)\n", "pair 1204: c4ccc(F)cc 4) -> c4ccc(F)cc4) (frequency 6437)\n", "pair 1205: F c1ccc( -> Fc1ccc( (frequency 6432)\n", "pair 1206: s 2)cc1 -> s2)cc1 (frequency 6419)\n", "pair 1207: Cc1 nc( -> Cc1nc( (frequency 6396)\n", "pair 1208: c( N2 -> c(N2 (frequency 6395)\n", "pair 1209: C [C@]12 -> C[C@]12 (frequency 6395)\n", "pair 1210: OCC 2 -> OCC2 (frequency 6392)\n", "pair 1211: c1cc c2[nH] -> c1ccc2[nH] (frequency 6385)\n", "pair 1212: O) cc2) -> O)cc2) (frequency 6384)\n", "pair 1213: [C@H]2 O) -> [C@H]2O) (frequency 6380)\n", "pair 1214: c2ccc( NC(=O) -> c2ccc(NC(=O) (frequency 6379)\n", "pair 1215: c1) N -> c1)N (frequency 6379)\n", "pair 1216: c4 [nH] -> c4[nH] (frequency 6377)\n", "pair 1217: =O) cc( -> =O)cc( (frequency 6370)\n", "pair 1218: c1 nc(N) -> c1nc(N) (frequency 6362)\n", "pair 1219: CC OC) -> CCOC) (frequency 6356)\n", "pair 1220: C= C2 -> C=C2 (frequency 6337)\n", "pair 1221: 3)cc 2)cc1 -> 3)cc2)cc1 (frequency 6336)\n", "pair 1222: N(C) C(=O) -> N(C)C(=O) (frequency 6334)\n", "pair 1223: c3c( O) -> c3c(O) (frequency 6331)\n", "pair 1224: c1cccc n1 -> c1ccccn1 (frequency 6325)\n", "pair 1225: (=O) N -> (=O)N (frequency 6323)\n", "pair 1226: nc2 n1 -> nc2n1 (frequency 6315)\n", "pair 1227: c1cc( -c2ccc( -> c1cc(-c2ccc( (frequency 6315)\n", "pair 1228: c1cc2 cccc -> c1cc2cccc (frequency 6312)\n", "pair 1229: C( C -> C(C (frequency 6300)\n", "pair 1230: Br )cc2 -> Br)cc2 (frequency 6288)\n", "pair 1231: c1c( -c2ccc( -> c1c(-c2ccc( (frequency 6279)\n", "pair 1232: C( \\ -> C(\\ (frequency 6272)\n", "pair 1233: O=C1 N -> O=C1N (frequency 6268)\n", "pair 1234: c2cc3c( cc2 -> c2cc3c(cc2 (frequency 6263)\n", "pair 1235: C( Cl) -> C(Cl) (frequency 6263)\n", "pair 1236: c1 nc(C) -> c1nc(C) (frequency 6250)\n", "pair 1237: C2 CCCC -> C2CCCC (frequency 6239)\n", "pair 1238: CC 5) -> CC5) (frequency 6238)\n", "pair 1239: c3ccc(O) cc3) -> c3ccc(O)cc3) (frequency 6237)\n", "pair 1240: c3c( - -> c3c(- (frequency 6237)\n", "pair 1241: c1 N -> c1N (frequency 6236)\n", "pair 1242: c1ccc( -c2ccc( -> c1ccc(-c2ccc( (frequency 6234)\n", "pair 1243: c3c( cc( -> c3c(cc( (frequency 6232)\n", "pair 1244: CC c1ccccc1 -> CCc1ccccc1 (frequency 6215)\n", "pair 1245: /C=N/ NC(=O) -> /C=N/NC(=O) (frequency 6204)\n", "pair 1246: n( -c2ccc( -> n(-c2ccc( (frequency 6196)\n", "pair 1247: c3ccc(F)cc 3 -> c3ccc(F)cc3 (frequency 6193)\n", "pair 1248: ccc( Cl) -> ccc(Cl) (frequency 6192)\n", "pair 1249: CO c1cc2c(cc1 -> COc1cc2c(cc1 (frequency 6176)\n", "pair 1250: C(F)(F)F)cc (C(F)(F)F) -> C(F)(F)F)cc(C(F)(F)F) (frequency 6166)\n", "pair 1251: C1 O -> C1O (frequency 6152)\n", "pair 1252: c2cccc (C) -> c2cccc(C) (frequency 6123)\n", "pair 1253: c1cc( NC(=O) -> c1cc(NC(=O) (frequency 6120)\n", "pair 1254: C c3ccc( -> Cc3ccc( (frequency 6109)\n", "pair 1255: C( C#N) -> C(C#N) (frequency 6085)\n", "pair 1256: C(=O)O C(C)(C)C) -> C(=O)OC(C)(C)C) (frequency 6083)\n", "pair 1257: C(=O) c1ccccc1 -> C(=O)c1ccccc1 (frequency 6080)\n", "pair 1258: CC(C)(C) O -> CC(C)(C)O (frequency 6079)\n", "pair 1259: c3cccc (F) -> c3cccc(F) (frequency 6065)\n", "pair 1260: CCCCCCCC CCCCCCCC -> CCCCCCCCCCCCCCCC (frequency 6064)\n", "pair 1261: c2 F) -> c2F) (frequency 6054)\n", "pair 1262: CC= C(C)C) -> CC=C(C)C) (frequency 6052)\n", "pair 1263: O c1c( -> Oc1c( (frequency 6048)\n", "pair 1264: C( OC) -> C(OC) (frequency 6048)\n", "pair 1265: CC c2ccccc2) -> CCc2ccccc2) (frequency 6044)\n", "pair 1266: c2 )cc( -> c2)cc( (frequency 6029)\n", "pair 1267: =O) cc2 -> =O)cc2 (frequency 6014)\n", "pair 1268: c4ccccc4 3) -> c4ccccc43) (frequency 6007)\n", "pair 1269: Br) c1 -> Br)c1 (frequency 6005)\n", "pair 1270: (F) F -> (F)F (frequency 6004)\n", "pair 1271: C(N) =O -> C(N)=O (frequency 6002)\n", "pair 1272: n1 c2c( -> n1c2c( (frequency 5994)\n", "pair 1273: =C( O) -> =C(O) (frequency 5975)\n", "pair 1274: n nc2 -> nnc2 (frequency 5965)\n", "pair 1275: c4cccc c34) -> c4ccccc34) (frequency 5961)\n", "pair 1276: /C=C /C -> /C=C/C (frequency 5961)\n", "pair 1277: Cc2ccc( F)cc2) -> Cc2ccc(F)cc2) (frequency 5960)\n", "pair 1278: c1cccc n1) -> c1ccccn1) (frequency 5958)\n", "pair 1279: O= c1[nH] -> O=c1[nH] (frequency 5956)\n", "pair 1280: n(- c3ccccc3) -> n(-c3ccccc3) (frequency 5954)\n", "pair 1281: C( C(N -> C(C(N (frequency 5953)\n", "pair 1282: [C@H]3 CC -> [C@H]3CC (frequency 5949)\n", "pair 1283: O[C@H](CO) [C@@H](O) -> O[C@H](CO)[C@@H](O) (frequency 5947)\n", "pair 1284: CCCC C) -> CCCCC) (frequency 5947)\n", "pair 1285: - c2cc -> -c2cc (frequency 5947)\n", "pair 1286: c1ccc( C)cc1) -> c1ccc(C)cc1) (frequency 5944)\n", "pair 1287: ncn 2 -> ncn2 (frequency 5925)\n", "pair 1288: c(=O) n2 -> c(=O)n2 (frequency 5922)\n", "pair 1289: c2cccc( OC) -> c2cccc(OC) (frequency 5916)\n", "pair 1290: nc1 - -> nc1- (frequency 5908)\n", "pair 1291: c2cc3 cccc -> c2cc3cccc (frequency 5908)\n", "pair 1292: C4 CCCC -> C4CCCC (frequency 5907)\n", "pair 1293: =O) cc1) -> =O)cc1) (frequency 5896)\n", "pair 1294: c( C(F)(F)F) -> c(C(F)(F)F) (frequency 5895)\n", "pair 1295: O C(C)C) -> OC(C)C) (frequency 5893)\n", "pair 1296: c3 c2) -> c3c2) (frequency 5891)\n", "pair 1297: o c1 -> oc1 (frequency 5886)\n", "pair 1298: c2ccccc2 1) -> c2ccccc21) (frequency 5877)\n", "pair 1299: c3 )cc -> c3)cc (frequency 5874)\n", "pair 1300: c2 c3ccccc3 -> c2c3ccccc3 (frequency 5869)\n", "pair 1301: C(F)(F)F)cc 2 -> C(F)(F)F)cc2 (frequency 5867)\n", "pair 1302: c2n (C) -> c2n(C) (frequency 5856)\n", "pair 1303: 2) CC1 -> 2)CC1 (frequency 5839)\n", "pair 1304: s c2c1 -> sc2c1 (frequency 5833)\n", "pair 1305: CCC3 ( -> CCC3( (frequency 5833)\n", "pair 1306: c1n cn2 -> c1ncn2 (frequency 5820)\n", "pair 1307: )cc1) =O -> )cc1)=O (frequency 5820)\n", "pair 1308: c4c( cccc4) -> c4c(cccc4) (frequency 5819)\n", "pair 1309: /C(C) =C/ -> /C(C)=C/ (frequency 5816)\n", "pair 1310: COc1ccc( NC(=O) -> COc1ccc(NC(=O) (frequency 5805)\n", "pair 1311: [C@H]1 ( -> [C@H]1( (frequency 5803)\n", "pair 1312: [C@@] 5 -> [C@@]5 (frequency 5802)\n", "pair 1313: Cl)cc( Cl) -> Cl)cc(Cl) (frequency 5789)\n", "pair 1314: = N1 -> =N1 (frequency 5769)\n", "pair 1315: c3 )cc2 -> c3)cc2 (frequency 5766)\n", "pair 1316: O= S(=O)(N -> O=S(=O)(N (frequency 5756)\n", "pair 1317: C1CCCC 1 -> C1CCCC1 (frequency 5755)\n", "pair 1318: c1ccccc1 - -> c1ccccc1- (frequency 5751)\n", "pair 1319: n2 cc( -> n2cc( (frequency 5749)\n", "pair 1320: c1( -c2ccc( -> c1(-c2ccc( (frequency 5748)\n", "pair 1321: S CC(=O)N -> SCC(=O)N (frequency 5747)\n", "pair 1322: [N+](=O)[O-] )cc -> [N+](=O)[O-])cc (frequency 5737)\n", "pair 1323: N# Cc1ccc( -> N#Cc1ccc( (frequency 5728)\n", "pair 1324: c12c( cccc1) -> c12c(cccc1) (frequency 5726)\n", "pair 1325: OC c2ccccc2) -> OCc2ccccc2) (frequency 5716)\n", "pair 1326: C(N 3 -> C(N3 (frequency 5708)\n", "pair 1327: C1 C -> C1C (frequency 5707)\n", "pair 1328: c2 Cl) -> c2Cl) (frequency 5701)\n", "pair 1329: c1ccc( -c2n -> c1ccc(-c2n (frequency 5696)\n", "pair 1330: CCCN( C(=O) -> CCCN(C(=O) (frequency 5687)\n", "pair 1331: c1ccc( -n2 -> c1ccc(-n2 (frequency 5682)\n", "pair 1332: CN2 CCC( -> CN2CCC( (frequency 5682)\n", "pair 1333: c1c( NC(=O) -> c1c(NC(=O) (frequency 5676)\n", "pair 1334: CN C -> CNC (frequency 5646)\n", "pair 1335: N (C)C -> N(C)C (frequency 5643)\n", "pair 1336: c3n c4ccccc4 -> c3nc4ccccc4 (frequency 5621)\n", "pair 1337: c(=O)[nH] c1=O -> c(=O)[nH]c1=O (frequency 5614)\n", "pair 1338: [C@H](O) [C@@H](O) -> [C@H](O)[C@@H](O) (frequency 5602)\n", "pair 1339: =O) C1 -> =O)C1 (frequency 5594)\n", "pair 1340: c3 c[nH] -> c3c[nH] (frequency 5584)\n", "pair 1341: C(=O) C1 -> C(=O)C1 (frequency 5575)\n", "pair 1342: c3cccc( Cl)c3) -> c3cccc(Cl)c3) (frequency 5573)\n", "pair 1343: -c2 o -> -c2o (frequency 5564)\n", "pair 1344: N [C@H]( -> N[C@H]( (frequency 5562)\n", "pair 1345: OC c1ccccc1 -> OCc1ccccc1 (frequency 5561)\n", "pair 1346: O CC1 -> OCC1 (frequency 5559)\n", "pair 1347: n1 c(- -> n1c(- (frequency 5547)\n", "pair 1348: c3cc( F)cc -> c3cc(F)cc (frequency 5536)\n", "pair 1349: -c2 cs -> -c2cs (frequency 5530)\n", "pair 1350: c1ccc( O)cc1 -> c1ccc(O)cc1 (frequency 5524)\n", "pair 1351: O[C@H](CO) [C@@H](O)[C@H](O) -> O[C@H](CO)[C@@H](O)[C@H](O) (frequency 5521)\n", "pair 1352: C#N )cc2) -> C#N)cc2) (frequency 5521)\n", "pair 1353: )cc (C) -> )cc(C) (frequency 5514)\n", "pair 1354: c4ccc(Cl)cc 4) -> c4ccc(Cl)cc4) (frequency 5506)\n", "pair 1355: [C@@]2 3 -> [C@@]23 (frequency 5500)\n", "pair 1356: ccc( O) -> ccc(O) (frequency 5497)\n", "pair 1357: C c1ccc2c(c1) -> Cc1ccc2c(c1) (frequency 5489)\n", "pair 1358: Cc1cc co -> Cc1ccco (frequency 5488)\n", "pair 1359: CC( NC(=O) -> CC(NC(=O) (frequency 5487)\n", "pair 1360: Cc2ccc( Cl)cc2) -> Cc2ccc(Cl)cc2) (frequency 5477)\n", "pair 1361: c2 O) -> c2O) (frequency 5469)\n", "pair 1362: c3n cc( -> c3ncc( (frequency 5467)\n", "pair 1363: C(=O) C2 -> C(=O)C2 (frequency 5462)\n", "pair 1364: c1cc cs1 -> c1cccs1 (frequency 5461)\n", "pair 1365: CCCN (C)C) -> CCCN(C)C) (frequency 5460)\n", "pair 1366: n c3c( -> nc3c( (frequency 5455)\n", "pair 1367: [C@]2 3 -> [C@]23 (frequency 5453)\n", "pair 1368: C1 CO -> C1CO (frequency 5452)\n", "pair 1369: nc1 N -> nc1N (frequency 5450)\n", "pair 1370: C(=O) C -> C(=O)C (frequency 5431)\n", "pair 1371: CCN(C) CC3) -> CCN(C)CC3) (frequency 5429)\n", "pair 1372: c2n cnc3 -> c2ncnc3 (frequency 5420)\n", "pair 1373: CN1 CCC( -> CN1CCC( (frequency 5415)\n", "pair 1374: c2cc( OC)c(OC)c(OC) -> c2cc(OC)c(OC)c(OC) (frequency 5414)\n", "pair 1375: c( C(=O)O) -> c(C(=O)O) (frequency 5410)\n", "pair 1376: c2 nc(N) -> c2nc(N) (frequency 5409)\n", "pair 1377: [n+] (C) -> [n+](C) (frequency 5409)\n", "pair 1378: C3 =N -> C3=N (frequency 5398)\n", "pair 1379: c2cc( NC(=O) -> c2cc(NC(=O) (frequency 5396)\n", "pair 1380: cc2 1 -> cc21 (frequency 5391)\n", "pair 1381: c(C) cc1 -> c(C)cc1 (frequency 5389)\n", "pair 1382: CCN1 CCN( -> CCN1CCN( (frequency 5389)\n", "pair 1383: C2) C1 -> C2)C1 (frequency 5389)\n", "pair 1384: c2cc c1 -> c2ccc1 (frequency 5377)\n", "pair 1385: S(=O)(C) =O) -> S(=O)(C)=O) (frequency 5342)\n", "pair 1386: C= C) -> C=C) (frequency 5341)\n", "pair 1387: C(=O)N1 CCN( -> C(=O)N1CCN( (frequency 5339)\n", "pair 1388: n2 cc -> n2cc (frequency 5335)\n", "pair 1389: [C@@H]1 ( -> [C@@H]1( (frequency 5321)\n", "pair 1390: c3ccccc3 )cc2 -> c3ccccc3)cc2 (frequency 5315)\n", "pair 1391: CN S(=O)(=O) -> CNS(=O)(=O) (frequency 5315)\n", "pair 1392: c4ccc( OC) -> c4ccc(OC) (frequency 5310)\n", "pair 1393: C( CO) -> C(CO) (frequency 5300)\n", "pair 1394: (C) (C) -> (C)(C) (frequency 5300)\n", "pair 1395: C( /C=C/ -> C(/C=C/ (frequency 5299)\n", "pair 1396: cc2) c1 -> cc2)c1 (frequency 5291)\n", "pair 1397: c2 N -> c2N (frequency 5288)\n", "pair 1398: CCCN1 C(=O) -> CCCN1C(=O) (frequency 5286)\n", "pair 1399: c1) C -> c1)C (frequency 5281)\n", "pair 1400: =C1 / -> =C1/ (frequency 5265)\n", "pair 1401: OCC (O) -> OCC(O) (frequency 5262)\n", "pair 1402: )cc1 2 -> )cc12 (frequency 5249)\n", "pair 1403: c1ccc( N2 -> c1ccc(N2 (frequency 5248)\n", "pair 1404: C) c( -> C)c( (frequency 5244)\n", "pair 1405: c2c( N -> c2c(N (frequency 5240)\n", "pair 1406: cc2 c1 -> cc2c1 (frequency 5231)\n", "pair 1407: N c1nc( -> Nc1nc( (frequency 5230)\n", "pair 1408: [C@]3 ( -> [C@]3( (frequency 5228)\n", "pair 1409: [C@@H]3 CC -> [C@@H]3CC (frequency 5224)\n", "pair 1410: n3 cn -> n3cn (frequency 5223)\n", "pair 1411: N=C( N)N -> N=C(N)N (frequency 5221)\n", "pair 1412: o n1 -> on1 (frequency 5218)\n", "pair 1413: C(=O)N (C)C) -> C(=O)N(C)C) (frequency 5209)\n", "pair 1414: cc n1 -> ccn1 (frequency 5203)\n", "pair 1415: [C@@H]( OC(C)=O) -> [C@@H](OC(C)=O) (frequency 5200)\n", "pair 1416: S(=O)(=O) O) -> S(=O)(=O)O) (frequency 5198)\n", "pair 1417: c3 nc(N -> c3nc(N (frequency 5184)\n", "pair 1418: cn 3 -> cn3 (frequency 5181)\n", "pair 1419: c2cc( OC)c(OC) -> c2cc(OC)c(OC) (frequency 5181)\n", "pair 1420: N( CC(=O)N -> N(CC(=O)N (frequency 5164)\n", "pair 1421: C3 CCCC3) -> C3CCCC3) (frequency 5158)\n", "pair 1422: [nH] c(- -> [nH]c(- (frequency 5156)\n", "pair 1423: cc c4) -> ccc4) (frequency 5150)\n", "pair 1424: CC(C) = -> CC(C)= (frequency 5149)\n", "pair 1425: O c1ccccc1 -> Oc1ccccc1 (frequency 5146)\n", "pair 1426: Cc1ccccc1) NC(=O) -> Cc1ccccc1)NC(=O) (frequency 5130)\n", "pair 1427: c2c( c1 -> c2c(c1 (frequency 5126)\n", "pair 1428: [C@@H]2 CC -> [C@@H]2CC (frequency 5123)\n", "pair 1429: c3c( OC) -> c3c(OC) (frequency 5115)\n", "pair 1430: C2 =O)c1 -> C2=O)c1 (frequency 5115)\n", "pair 1431: C1 CCC( -> C1CCC( (frequency 5111)\n", "pair 1432: C( S -> C(S (frequency 5109)\n", "pair 1433: c4ccc( C) -> c4ccc(C) (frequency 5108)\n", "pair 1434: OC) cc1 -> OC)cc1 (frequency 5102)\n", "pair 1435: C c2cn -> Cc2cn (frequency 5100)\n", "pair 1436: n n(C) -> nn(C) (frequency 5099)\n", "pair 1437: CC( N) -> CC(N) (frequency 5095)\n", "pair 1438: c3ccc( N4 -> c3ccc(N4 (frequency 5090)\n", "pair 1439: C(C)C) cc1 -> C(C)C)cc1 (frequency 5084)\n", "pair 1440: c1n cn -> c1ncn (frequency 5080)\n", "pair 1441: CCC O) -> CCCO) (frequency 5069)\n", "pair 1442: c1ccccc1) c1ccccc1 -> c1ccccc1)c1ccccc1 (frequency 5067)\n", "pair 1443: Cl)c(Cl) c1 -> Cl)c(Cl)c1 (frequency 5062)\n", "pair 1444: n1 cc -> n1cc (frequency 5061)\n", "pair 1445: 3) CC1 -> 3)CC1 (frequency 5054)\n", "pair 1446: C( C(=O)O) -> C(C(=O)O) (frequency 5046)\n", "pair 1447: ncn 1 -> ncn1 (frequency 5045)\n", "pair 1448: CN1 CC -> CN1CC (frequency 5044)\n", "pair 1449: c1ccco 1) -> c1ccco1) (frequency 5040)\n", "pair 1450: C12 CC3 -> C12CC3 (frequency 5040)\n", "pair 1451: [C@@H]( N -> [C@@H](N (frequency 5039)\n", "pair 1452: N2 CCCCC2) -> N2CCCCC2) (frequency 5038)\n", "pair 1453: C3 =C( -> C3=C( (frequency 5038)\n", "pair 1454: c3cc( O) -> c3cc(O) (frequency 5037)\n", "pair 1455: [C@H] (C)C -> [C@H](C)C (frequency 5034)\n", "pair 1456: c12 ccccc1 -> c12ccccc1 (frequency 5029)\n", "pair 1457: c3cc( OC)c(OC) -> c3cc(OC)c(OC) (frequency 5028)\n", "pair 1458: 2 )cc( -> 2)cc( (frequency 5009)\n", "pair 1459: C(=O)N2 CCN( -> C(=O)N2CCN( (frequency 5002)\n", "pair 1460: [C@H]( O -> [C@H](O (frequency 4993)\n", "pair 1461: c2cccc3 cccc -> c2cccc3cccc (frequency 4992)\n", "pair 1462: N1 CCCC1 -> N1CCCC1 (frequency 4991)\n", "pair 1463: c(S CC(=O)N -> c(SCC(=O)N (frequency 4990)\n", "pair 1464: CCOCC 2)cc1 -> CCOCC2)cc1 (frequency 4990)\n", "pair 1465: c12 cc( -> c12cc( (frequency 4989)\n", "pair 1466: [C@@H]( O -> [C@@H](O (frequency 4989)\n", "pair 1467: C2) CC1 -> C2)CC1 (frequency 4988)\n", "pair 1468: =O) [nH] -> =O)[nH] (frequency 4986)\n", "pair 1469: c3ccc(Br )cc3) -> c3ccc(Br)cc3) (frequency 4980)\n", "pair 1470: c(=O) n(C) -> c(=O)n(C) (frequency 4977)\n", "pair 1471: C( CO -> C(CO (frequency 4972)\n", "pair 1472: C(= N) -> C(=N) (frequency 4956)\n", "pair 1473: c3ccccc3 Cl) -> c3ccccc3Cl) (frequency 4955)\n", "pair 1474: c( C(=O) -> c(C(=O) (frequency 4951)\n", "pair 1475: cc c2c1 -> ccc2c1 (frequency 4945)\n", "pair 1476: c4cc cn -> c4cccn (frequency 4944)\n", "pair 1477: N c1cc( -> Nc1cc( (frequency 4942)\n", "pair 1478: Cc1 o -> Cc1o (frequency 4940)\n", "pair 1479: OCO 2) -> OCO2) (frequency 4927)\n", "pair 1480: c1ccco 1 -> c1ccco1 (frequency 4922)\n", "pair 1481: Cc1c( C) -> Cc1c(C) (frequency 4911)\n", "pair 1482: c3ccccc3) =O) -> c3ccccc3)=O) (frequency 4904)\n", "pair 1483: CC 3)cc2 -> CC3)cc2 (frequency 4904)\n", "pair 1484: = [N-] -> =[N-] (frequency 4896)\n", "pair 1485: c1cc cnc1) -> c1cccnc1) (frequency 4891)\n", "pair 1486: C(C)(C)C) =O) -> C(C)(C)C)=O) (frequency 4883)\n", "pair 1487: c( CC) -> c(CC) (frequency 4881)\n", "pair 1488: c1cccc(F) c1 -> c1cccc(F)c1 (frequency 4880)\n", "pair 1489: CC 5 -> CC5 (frequency 4880)\n", "pair 1490: 4 CCOCC4) -> 4CCOCC4) (frequency 4879)\n", "pair 1491: CCN(C) CC2) -> CCN(C)CC2) (frequency 4876)\n", "pair 1492: 3)cc c1 -> 3)ccc1 (frequency 4868)\n", "pair 1493: c3 nc(C) -> c3nc(C) (frequency 4864)\n", "pair 1494: [C@@]3 ( -> [C@@]3( (frequency 4864)\n", "pair 1495: c2ccccc2 )cc1) -> c2ccccc2)cc1) (frequency 4860)\n", "pair 1496: [C@@H]( CC -> [C@@H](CC (frequency 4857)\n", "pair 1497: CCC2( CC1) -> CCC2(CC1) (frequency 4857)\n", "pair 1498: C(F)(F)F) c3) -> C(F)(F)F)c3) (frequency 4851)\n", "pair 1499: c1cc cs1) -> c1cccs1) (frequency 4843)\n", "pair 1500: [C@H]( OC(C)=O) -> [C@H](OC(C)=O) (frequency 4843)\n", "pair 1501: c2cc( N -> c2cc(N (frequency 4833)\n", "pair 1502: P(O) (=O) -> P(O)(=O) (frequency 4829)\n", "pair 1503: =C( N) -> =C(N) (frequency 4829)\n", "pair 1504: /N =N/ -> /N=N/ (frequency 4826)\n", "pair 1505: n nc( -> nnc( (frequency 4822)\n", "pair 1506: CO 2 -> CO2 (frequency 4820)\n", "pair 1507: C(=O)N( C -> C(=O)N(C (frequency 4809)\n", "pair 1508: c1ccc( C)cc1 -> c1ccc(C)cc1 (frequency 4802)\n", "pair 1509: CC# N) -> CC#N) (frequency 4800)\n", "pair 1510: O c3ccc( -> Oc3ccc( (frequency 4796)\n", "pair 1511: cccc c12 -> ccccc12 (frequency 4795)\n", "pair 1512: S(=O)(=O) C) -> S(=O)(=O)C) (frequency 4792)\n", "pair 1513: c1( NC( -> c1(NC( (frequency 4786)\n", "pair 1514: CC[C@]4 (C) -> CC[C@]4(C) (frequency 4785)\n", "pair 1515: CCCC N) -> CCCCN) (frequency 4784)\n", "pair 1516: [nH] c1 -> [nH]c1 (frequency 4777)\n", "pair 1517: CCCC NC(=O) -> CCCCNC(=O) (frequency 4774)\n", "pair 1518: ( c2ccccc2) -> (c2ccccc2) (frequency 4773)\n", "pair 1519: nc1 C -> nc1C (frequency 4766)\n", "pair 1520: c1cccc (C)c1 -> c1cccc(C)c1 (frequency 4763)\n", "pair 1521: c1c( N) -> c1c(N) (frequency 4758)\n", "pair 1522: cc c12 -> ccc12 (frequency 4746)\n", "pair 1523: (C)C) =O) -> (C)C)=O) (frequency 4742)\n", "pair 1524: C( CC -> C(CC (frequency 4738)\n", "pair 1525: c4ccccc4 )cc3) -> c4ccccc4)cc3) (frequency 4737)\n", "pair 1526: n2 C -> n2C (frequency 4733)\n", "pair 1527: CCN CC3) -> CCNCC3) (frequency 4733)\n", "pair 1528: c2c( N) -> c2c(N) (frequency 4731)\n", "pair 1529: CCCC O -> CCCCO (frequency 4729)\n", "pair 1530: CCCC C -> CCCCC (frequency 4724)\n", "pair 1531: c3n cccc3 -> c3ncccc3 (frequency 4721)\n", "pair 1532: c1cc( C(=O)N -> c1cc(C(=O)N (frequency 4719)\n", "pair 1533: Cl)cc 3) -> Cl)cc3) (frequency 4718)\n", "pair 1534: s c3 -> sc3 (frequency 4717)\n", "pair 1535: c2cc1 OC -> c2cc1OC (frequency 4716)\n", "pair 1536: c(C) c(C) -> c(C)c(C) (frequency 4708)\n", "pair 1537: /C=C2 / -> /C=C2/ (frequency 4705)\n", "pair 1538: c1cc ncc1) -> c1ccncc1) (frequency 4698)\n", "pair 1539: c2nc(N 3 -> c2nc(N3 (frequency 4692)\n", "pair 1540: c3ccc( OCC -> c3ccc(OCC (frequency 4691)\n", "pair 1541: c2c(Cl) cccc2 -> c2c(Cl)cccc2 (frequency 4689)\n", "pair 1542: 2 )cc1) -> 2)cc1) (frequency 4675)\n", "pair 1543: O C(C) -> OC(C) (frequency 4673)\n", "pair 1544: O= c1 -> O=c1 (frequency 4664)\n", "pair 1545: 4) =O) -> 4)=O) (frequency 4663)\n", "pair 1546: [nH] c2 -> [nH]c2 (frequency 4657)\n", "pair 1547: C(=O)N1 CCC( -> C(=O)N1CCC( (frequency 4653)\n", "pair 1548: [C@] 5 -> [C@]5 (frequency 4649)\n", "pair 1549: [C@@H]( CO -> [C@@H](CO (frequency 4649)\n", "pair 1550: C3 = -> C3= (frequency 4642)\n", "pair 1551: c3ccccc3 2)cc1 -> c3ccccc32)cc1 (frequency 4641)\n", "pair 1552: c4ccccc4) CC3) -> c4ccccc4)CC3) (frequency 4638)\n", "pair 1553: CC( N)=O) -> CC(N)=O) (frequency 4637)\n", "pair 1554: c2cc( Cl)c( -> c2cc(Cl)c( (frequency 4636)\n", "pair 1555: c2cccc (N -> c2cccc(N (frequency 4635)\n", "pair 1556: [C@H]3 O) -> [C@H]3O) (frequency 4634)\n", "pair 1557: c2n c1 -> c2nc1 (frequency 4629)\n", "pair 1558: )cc2 c( -> )cc2c( (frequency 4620)\n", "pair 1559: N =C1 -> N=C1 (frequency 4618)\n", "pair 1560: CC2) n1 -> CC2)n1 (frequency 4612)\n", "pair 1561: c5 cc -> c5cc (frequency 4610)\n", "pair 1562: cc2) =O) -> cc2)=O) (frequency 4604)\n", "pair 1563: c1cccc( NC(=O) -> c1cccc(NC(=O) (frequency 4600)\n", "pair 1564: OCCO 2 -> OCCO2 (frequency 4592)\n", "pair 1565: c1cc( Cl)c( -> c1cc(Cl)c( (frequency 4586)\n", "pair 1566: [C@H]1 CC[C@H]( -> [C@H]1CC[C@H]( (frequency 4579)\n", "pair 1567: n2) C1 -> n2)C1 (frequency 4577)\n", "pair 1568: C /C=C/ -> C/C=C/ (frequency 4574)\n", "pair 1569: c( Cl)cc1 -> c(Cl)cc1 (frequency 4573)\n", "pair 1570: ccc( Cl)c1 -> ccc(Cl)c1 (frequency 4570)\n", "pair 1571: C(C) =C( -> C(C)=C( (frequency 4566)\n", "pair 1572: C /C(=N\\ -> C/C(=N\\ (frequency 4564)\n", "pair 1573: C1) C2 -> C1)C2 (frequency 4561)\n", "pair 1574: c1ccc( Br)cc1 -> c1ccc(Br)cc1 (frequency 4556)\n", "pair 1575: CCC S -> CCCS (frequency 4550)\n", "pair 1576: NC( N) -> NC(N) (frequency 4541)\n", "pair 1577: CN( CC) -> CN(CC) (frequency 4541)\n", "pair 1578: 3)cc c2 -> 3)ccc2 (frequency 4538)\n", "pair 1579: C(N C -> C(NC (frequency 4527)\n", "pair 1580: )cc c3 -> )ccc3 (frequency 4527)\n", "pair 1581: c(OC) cc1 -> c(OC)cc1 (frequency 4522)\n", "pair 1582: c( C -> c(C (frequency 4520)\n", "pair 1583: nc(- c3ccccc3) -> nc(-c3ccccc3) (frequency 4516)\n", "pair 1584: O O -> OO (frequency 4514)\n", "pair 1585: c4ccc( O) -> c4ccc(O) (frequency 4502)\n", "pair 1586: C4 =O) -> C4=O) (frequency 4498)\n", "pair 1587: S) =N -> S)=N (frequency 4493)\n", "pair 1588: cc1) =O -> cc1)=O (frequency 4490)\n", "pair 1589: =[N+] =[N-] -> =[N+]=[N-] (frequency 4484)\n", "pair 1590: C /C(=C\\ -> C/C(=C\\ (frequency 4482)\n", "pair 1591: /N =C(/ -> /N=C(/ (frequency 4482)\n", "pair 1592: N2 CC -> N2CC (frequency 4472)\n", "pair 1593: O c2c( -> Oc2c( (frequency 4467)\n", "pair 1594: CCCN ) -> CCCN) (frequency 4463)\n", "pair 1595: 2) C1 -> 2)C1 (frequency 4463)\n", "pair 1596: Cn1 cc -> Cn1cc (frequency 4442)\n", "pair 1597: c3ccccc3 F) -> c3ccccc3F) (frequency 4434)\n", "pair 1598: c1ccc( C(=O)N -> c1ccc(C(=O)N (frequency 4428)\n", "pair 1599: N =C2 -> N=C2 (frequency 4425)\n", "pair 1600: nc(S CC(=O)N -> nc(SCC(=O)N (frequency 4424)\n", "pair 1601: CCCC N1 -> CCCCN1 (frequency 4423)\n", "pair 1602: C1CCCC 1) -> C1CCCC1) (frequency 4422)\n", "pair 1603: c1c(Cl) cccc1 -> c1c(Cl)cccc1 (frequency 4421)\n", "pair 1604: = C(C)C) -> =C(C)C) (frequency 4415)\n", "pair 1605: n3 C) -> n3C) (frequency 4413)\n", "pair 1606: c1cc(OC) ccc1 -> c1cc(OC)ccc1 (frequency 4412)\n", "pair 1607: C(=O) (N -> C(=O)(N (frequency 4409)\n", "pair 1608: F) c(Cl) -> F)c(Cl) (frequency 4407)\n", "pair 1609: c3cccc c23)cc1 -> c3ccccc23)cc1 (frequency 4406)\n", "pair 1610: c3cccc (C) -> c3cccc(C) (frequency 4406)\n", "pair 1611: c1ccc( OC -> c1ccc(OC (frequency 4405)\n", "pair 1612: n1 - -> n1- (frequency 4403)\n", "pair 1613: c1c(OC) ccc( -> c1c(OC)ccc( (frequency 4399)\n", "pair 1614: Cc1ccc( O)cc1) -> Cc1ccc(O)cc1) (frequency 4394)\n", "pair 1615: c4cc ncc -> c4ccncc (frequency 4384)\n", "pair 1616: cccc c4 -> ccccc4 (frequency 4381)\n", "pair 1617: c( =S) -> c(=S) (frequency 4380)\n", "pair 1618: N(CC) CC) -> N(CC)CC) (frequency 4375)\n", "pair 1619: n n(- -> nn(- (frequency 4373)\n", "pair 1620: [C@@H]( C -> [C@@H](C (frequency 4373)\n", "pair 1621: c1n c2ccccc2 -> c1nc2ccccc2 (frequency 4370)\n", "pair 1622: c2 1) -> c21) (frequency 4363)\n", "pair 1623: c1cccc( N -> c1cccc(N (frequency 4358)\n", "pair 1624: 4 )cc3 -> 4)cc3 (frequency 4358)\n", "pair 1625: c(=O)[nH] c(=O) -> c(=O)[nH]c(=O) (frequency 4353)\n", "pair 1626: c2ccc( Cl)c(Cl) -> c2ccc(Cl)c(Cl) (frequency 4350)\n", "pair 1627: c1c( NC( -> c1c(NC( (frequency 4349)\n", "pair 1628: O[C@H]( CO -> O[C@H](CO (frequency 4349)\n", "pair 1629: n3 cc -> n3cc (frequency 4347)\n", "pair 1630: Cc1ccc( Cl)cc1 -> Cc1ccc(Cl)cc1 (frequency 4347)\n", "pair 1631: =O) =O -> =O)=O (frequency 4347)\n", "pair 1632: c1n cccc1 -> c1ncccc1 (frequency 4346)\n", "pair 1633: ( Cl) -> (Cl) (frequency 4336)\n", "pair 1634: / C2 -> /C2 (frequency 4334)\n", "pair 1635: o c(C) -> oc(C) (frequency 4331)\n", "pair 1636: c1cc(C) ccc1 -> c1cc(C)ccc1 (frequency 4331)\n", "pair 1637: c1( NC(=O) -> c1(NC(=O) (frequency 4328)\n", "pair 1638: CC2 CC2) -> CC2CC2) (frequency 4328)\n", "pair 1639: n 3)cc2) -> n3)cc2) (frequency 4325)\n", "pair 1640: CCO 1 -> CCO1 (frequency 4323)\n", "pair 1641: c2cccc (O) -> c2cccc(O) (frequency 4317)\n", "pair 1642: Cc1 nc(- -> Cc1nc(- (frequency 4306)\n", "pair 1643: c2c(F) cccc2 -> c2c(F)cccc2 (frequency 4304)\n", "pair 1644: c1cc( -c2n -> c1cc(-c2n (frequency 4301)\n", "pair 1645: N2 CCCC -> N2CCCC (frequency 4300)\n", "pair 1646: C( OCC -> C(OCC (frequency 4296)\n", "pair 1647: /C=C 3 -> /C=C3 (frequency 4295)\n", "pair 1648: CC1 2 -> CC12 (frequency 4294)\n", "pair 1649: C(O) =O -> C(O)=O (frequency 4289)\n", "pair 1650: C[C@H]1 CN( -> C[C@H]1CN( (frequency 4284)\n", "pair 1651: = N)N -> =N)N (frequency 4283)\n", "pair 1652: [C@H]( CC -> [C@H](CC (frequency 4282)\n", "pair 1653: C) ccc1 -> C)ccc1 (frequency 4281)\n", "pair 1654: c2 c1cccc2 -> c2c1cccc2 (frequency 4279)\n", "pair 1655: c1cn cc( -> c1cncc( (frequency 4276)\n", "pair 1656: c1c( Cl)cc( -> c1c(Cl)cc( (frequency 4273)\n", "pair 1657: C3 CCN( -> C3CCN( (frequency 4270)\n", "pair 1658: c2cccc(F) c2) -> c2cccc(F)c2) (frequency 4268)\n", "pair 1659: c3ccc4c(c3) OCO4) -> c3ccc4c(c3)OCO4) (frequency 4267)\n", "pair 1660: N C1 -> NC1 (frequency 4267)\n", "pair 1661: CCO c1ccccc1 -> CCOc1ccccc1 (frequency 4262)\n", "pair 1662: N1 CCN(C(=O) -> N1CCN(C(=O) (frequency 4261)\n", "pair 1663: F)cc 3) -> F)cc3) (frequency 4260)\n", "pair 1664: Cc1 cs -> Cc1cs (frequency 4259)\n", "pair 1665: c- 3 -> c-3 (frequency 4256)\n", "pair 1666: n2cn c3c( -> n2cnc3c( (frequency 4255)\n", "pair 1667: N2 CCCC2) -> N2CCCC2) (frequency 4255)\n", "pair 1668: [C@@]4 (C) -> [C@@]4(C) (frequency 4251)\n", "pair 1669: - c2c( -> -c2c( (frequency 4237)\n", "pair 1670: c3cc( OC)c(OC)c(OC) -> c3cc(OC)c(OC)c(OC) (frequency 4234)\n", "pair 1671: c1c(Cl) ccc( -> c1c(Cl)ccc( (frequency 4226)\n", "pair 1672: n( CC) -> n(CC) (frequency 4224)\n", "pair 1673: c1( C(N -> c1(C(N (frequency 4220)\n", "pair 1674: c5 cc( -> c5cc( (frequency 4206)\n", "pair 1675: C( c1cc( -> C(c1cc( (frequency 4206)\n", "pair 1676: [C@@H]( OC(=O) -> [C@@H](OC(=O) (frequency 4205)\n", "pair 1677: [C@@H](O) [C@@H](O) -> [C@@H](O)[C@@H](O) (frequency 4203)\n", "pair 1678: (=O)=O) cc1 -> (=O)=O)cc1 (frequency 4190)\n", "pair 1679: NC( =N) -> NC(=N) (frequency 4182)\n", "pair 1680: c1c( C(=O)N -> c1c(C(=O)N (frequency 4178)\n", "pair 1681: [C@@H] (C)C -> [C@@H](C)C (frequency 4178)\n", "pair 1682: cn c(N -> cnc(N (frequency 4172)\n", "pair 1683: CN CC -> CNCC (frequency 4165)\n", "pair 1684: N2 CCN(C(=O) -> N2CCN(C(=O) (frequency 4163)\n", "pair 1685: N C1=O -> NC1=O (frequency 4160)\n", "pair 1686: [C@@]1 (O) -> [C@@]1(O) (frequency 4158)\n", "pair 1687: c2) ccc( -> c2)ccc( (frequency 4153)\n", "pair 1688: ccc2 1 -> ccc21 (frequency 4150)\n", "pair 1689: S C -> SC (frequency 4150)\n", "pair 1690: NC(=O) C( -> NC(=O)C( (frequency 4150)\n", "pair 1691: CC3) =O) -> CC3)=O) (frequency 4150)\n", "pair 1692: c2cc( C(F)(F)F)cc -> c2cc(C(F)(F)F)cc (frequency 4148)\n", "pair 1693: )cc n1 -> )ccn1 (frequency 4147)\n", "pair 1694: [N+] 1 -> [N+]1 (frequency 4142)\n", "pair 1695: NC(=O) c1ccc( -> NC(=O)c1ccc( (frequency 4132)\n", "pair 1696: CC [C@H](C) -> CC[C@H](C) (frequency 4131)\n", "pair 1697: c4cc 5 -> c4cc5 (frequency 4127)\n", "pair 1698: c3cc4c( cc3 -> c3cc4c(cc3 (frequency 4123)\n", "pair 1699: N1 CCCCC1 -> N1CCCCC1 (frequency 4116)\n", "pair 1700: CO 1 -> CO1 (frequency 4115)\n", "pair 1701: c1cc( F)ccc1 -> c1cc(F)ccc1 (frequency 4113)\n", "pair 1702: CC(=O)N c1ccc( -> CC(=O)Nc1ccc( (frequency 4111)\n", "pair 1703: CC 3)cc2) -> CC3)cc2) (frequency 4109)\n", "pair 1704: N CCCC -> NCCCC (frequency 4108)\n", "pair 1705: CO c1ccc2c( -> COc1ccc2c( (frequency 4108)\n", "pair 1706: -c2 [nH] -> -c2[nH] (frequency 4105)\n", "pair 1707: c2cc(Cl)cc c2 -> c2cc(Cl)ccc2 (frequency 4103)\n", "pair 1708: c2ccc( C(F)(F)F)cc2) -> c2ccc(C(F)(F)F)cc2) (frequency 4088)\n", "pair 1709: COc1cc2c(cc1 OC) -> COc1cc2c(cc1OC) (frequency 4088)\n", "pair 1710: COc1ccc( -c2n -> COc1ccc(-c2n (frequency 4086)\n", "pair 1711: o c2c1 -> oc2c1 (frequency 4085)\n", "pair 1712: =C( N)N) -> =C(N)N) (frequency 4084)\n", "pair 1713: cc2 )ccc1 -> cc2)ccc1 (frequency 4080)\n", "pair 1714: C3) C2 -> C3)C2 (frequency 4079)\n", "pair 1715: C1 =C -> C1=C (frequency 4079)\n", "pair 1716: S(=O)(=O) N( -> S(=O)(=O)N( (frequency 4069)\n", "pair 1717: CO C -> COC (frequency 4069)\n", "pair 1718: CC [C@]3(C) -> CC[C@]3(C) (frequency 4069)\n", "pair 1719: COc1ccc( C2 -> COc1ccc(C2 (frequency 4063)\n", "pair 1720: C( c3ccccc3) -> C(c3ccccc3) (frequency 4057)\n", "pair 1721: c3ccc( N -> c3ccc(N (frequency 4053)\n", "pair 1722: CC1 CC1) -> CC1CC1) (frequency 4045)\n", "pair 1723: )cc( - -> )cc(- (frequency 4043)\n", "pair 1724: n1 c(N -> n1c(N (frequency 4040)\n", "pair 1725: CC(C) (C)C) -> CC(C)(C)C) (frequency 4037)\n", "pair 1726: n 3)cc -> n3)cc (frequency 4034)\n", "pair 1727: c2c( Cl)cc( -> c2c(Cl)cc( (frequency 4032)\n", "pair 1728: c1c(F) cccc1 -> c1c(F)cccc1 (frequency 4029)\n", "pair 1729: N2 C -> N2C (frequency 4028)\n", "pair 1730: [C@H](C) CO) -> [C@H](C)CO) (frequency 4027)\n", "pair 1731: -c2 nc(- -> -c2nc(- (frequency 4025)\n", "pair 1732: [N+](=O)[O-] )cc1) -> [N+](=O)[O-])cc1) (frequency 4014)\n", "pair 1733: /C=C/ C(=O)N -> /C=C/C(=O)N (frequency 4014)\n", "pair 1734: C c1cc2 -> Cc1cc2 (frequency 4013)\n", "pair 1735: CC(C) C -> CC(C)C (frequency 4000)\n", "pair 1736: c1( -c2n -> c1(-c2n (frequency 3994)\n", "pair 1737: CN C(=O)N -> CNC(=O)N (frequency 3992)\n", "pair 1738: C(=O)O 1 -> C(=O)O1 (frequency 3991)\n", "pair 1739: C c1cccc2 -> Cc1cccc2 (frequency 3991)\n", "pair 1740: n n2)cc1 -> nn2)cc1 (frequency 3989)\n", "pair 1741: c2cccc(C(F)(F)F) c2) -> c2cccc(C(F)(F)F)c2) (frequency 3986)\n", "pair 1742: c3cc c4ccccc4 -> c3ccc4ccccc4 (frequency 3985)\n", "pair 1743: F C(F)( -> FC(F)( (frequency 3985)\n", "pair 1744: n2 c(=O) -> n2c(=O) (frequency 3983)\n", "pair 1745: S(=O)(=O) c2ccc( -> S(=O)(=O)c2ccc( (frequency 3981)\n", "pair 1746: Cc1cn ( -> Cc1cn( (frequency 3966)\n", "pair 1747: ccc(F) c1 -> ccc(F)c1 (frequency 3962)\n", "pair 1748: O [C@@H]3 -> O[C@@H]3 (frequency 3960)\n", "pair 1749: c3cccc(F) c3) -> c3cccc(F)c3) (frequency 3957)\n", "pair 1750: CC N) -> CCN) (frequency 3953)\n", "pair 1751: c2o ccc2) -> c2occc2) (frequency 3950)\n", "pair 1752: CCC 4) -> CCC4) (frequency 3950)\n", "pair 1753: C#N )cc3) -> C#N)cc3) (frequency 3947)\n", "pair 1754: Cc1ccc( C(=O)N -> Cc1ccc(C(=O)N (frequency 3945)\n", "pair 1755: OC[C@H]1 O[C@@H]( -> OC[C@H]1O[C@@H]( (frequency 3944)\n", "pair 1756: c( C( -> c(C( (frequency 3939)\n", "pair 1757: n c4) -> nc4) (frequency 3935)\n", "pair 1758: C [n+]1 -> C[n+]1 (frequency 3935)\n", "pair 1759: F)cc 2)cc1 -> F)cc2)cc1 (frequency 3932)\n", "pair 1760: [C@H](O) [C@@H]1O -> [C@H](O)[C@@H]1O (frequency 3931)\n", "pair 1761: ccc( NC(=O) -> ccc(NC(=O) (frequency 3930)\n", "pair 1762: CC c1cc( -> CCc1cc( (frequency 3927)\n", "pair 1763: c3ccc( C(F)(F)F)cc3) -> c3ccc(C(F)(F)F)cc3) (frequency 3921)\n", "pair 1764: -c2 s -> -c2s (frequency 3921)\n", "pair 1765: [C@] (O)( -> [C@](O)( (frequency 3916)\n", "pair 1766: c1n (C) -> c1n(C) (frequency 3915)\n", "pair 1767: CS ) -> CS) (frequency 3910)\n", "pair 1768: c(F) c2) -> c(F)c2) (frequency 3904)\n", "pair 1769: C(O) C(O) -> C(O)C(O) (frequency 3901)\n", "pair 1770: CC1 CCCCC1) -> CC1CCCCC1) (frequency 3900)\n", "pair 1771: n1 c(=O) -> n1c(=O) (frequency 3895)\n", "pair 1772: C) C -> C)C (frequency 3893)\n", "pair 1773: N1 CCOCC1) -> N1CCOCC1) (frequency 3890)\n", "pair 1774: [C@@] (O)( -> [C@@](O)( (frequency 3887)\n", "pair 1775: CC1 =O -> CC1=O (frequency 3886)\n", "pair 1776: c1ccc2c(c1) OCO2 -> c1ccc2c(c1)OCO2 (frequency 3876)\n", "pair 1777: CCN CC2) -> CCNCC2) (frequency 3873)\n", "pair 1778: Cn2 cn -> Cn2cn (frequency 3868)\n", "pair 1779: [C@@]2 (O) -> [C@@]2(O) (frequency 3866)\n", "pair 1780: c3 cs -> c3cs (frequency 3865)\n", "pair 1781: Cn1 c( -> Cn1c( (frequency 3858)\n", "pair 1782: c4 c(C) -> c4c(C) (frequency 3857)\n", "pair 1783: c2cccc 3) -> c2cccc3) (frequency 3849)\n", "pair 1784: c2cc (=O) -> c2cc(=O) (frequency 3841)\n", "pair 1785: c2 C1 -> c2C1 (frequency 3841)\n", "pair 1786: c(=O)c( C(=O)O) -> c(=O)c(C(=O)O) (frequency 3839)\n", "pair 1787: Cc2ccccc2) CC1 -> Cc2ccccc2)CC1 (frequency 3835)\n", "pair 1788: /C1 =C\\ -> /C1=C\\ (frequency 3835)\n", "pair 1789: n c2ccccc12 -> nc2ccccc12 (frequency 3834)\n", "pair 1790: CCCC CC) -> CCCCCC) (frequency 3833)\n", "pair 1791: c1c2c( cc( -> c1c2c(cc( (frequency 3830)\n", "pair 1792: c2c( n1) -> c2c(n1) (frequency 3828)\n", "pair 1793: c1( C( -> c1(C( (frequency 3828)\n", "pair 1794: Cc1ccc( F)cc1) -> Cc1ccc(F)cc1) (frequency 3828)\n", "pair 1795: n2 )cc( -> n2)cc( (frequency 3826)\n", "pair 1796: cccc1 2 -> cccc12 (frequency 3824)\n", "pair 1797: [N+](=O)[O-] )cc2 -> [N+](=O)[O-])cc2 (frequency 3823)\n", "pair 1798: c1c( C(N -> c1c(C(N (frequency 3821)\n", "pair 1799: c2[nH] 1 -> c2[nH]1 (frequency 3819)\n", "pair 1800: C(=O) c2ccccc2 -> C(=O)c2ccccc2 (frequency 3815)\n", "pair 1801: CO C(=O)N -> COC(=O)N (frequency 3814)\n", "pair 1802: CCN( CC -> CCN(CC (frequency 3814)\n", "pair 1803: Cc1ccc( F)cc1 -> Cc1ccc(F)cc1 (frequency 3813)\n", "pair 1804: n nc(- -> nnc(- (frequency 3809)\n", "pair 1805: C1 CCCN( -> C1CCCN( (frequency 3805)\n", "pair 1806: C[C@@H]( N) -> C[C@@H](N) (frequency 3802)\n", "pair 1807: CCN CC1 -> CCNCC1 (frequency 3801)\n", "pair 1808: [C@@H]1 C -> [C@@H]1C (frequency 3798)\n", "pair 1809: C2) =O) -> C2)=O) (frequency 3794)\n", "pair 1810: [C@]1 (O) -> [C@]1(O) (frequency 3791)\n", "pair 1811: /C(=N/ O) -> /C(=N/O) (frequency 3791)\n", "pair 1812: cs 2)cc1 -> cs2)cc1 (frequency 3790)\n", "pair 1813: c3cccc( OC) -> c3cccc(OC) (frequency 3789)\n", "pair 1814: C( =C/ -> C(=C/ (frequency 3787)\n", "pair 1815: C(=O) CC -> C(=O)CC (frequency 3785)\n", "pair 1816: [C@H]( OC(=O) -> [C@H](OC(=O) (frequency 3782)\n", "pair 1817: ccc( NC( -> ccc(NC( (frequency 3778)\n", "pair 1818: c2n nc( -> c2nnc( (frequency 3776)\n", "pair 1819: c1 3) -> c13) (frequency 3775)\n", "pair 1820: cccc 2)c1 -> cccc2)c1 (frequency 3768)\n", "pair 1821: C(F)(F)F)cc ( -> C(F)(F)F)cc( (frequency 3766)\n", "pair 1822: OCCO 3) -> OCCO3) (frequency 3765)\n", "pair 1823: Cc1 s -> Cc1s (frequency 3765)\n", "pair 1824: c1n nc( -> c1nnc( (frequency 3759)\n", "pair 1825: c1cc( Cl)cc -> c1cc(Cl)cc (frequency 3757)\n", "pair 1826: c4 3) -> c43) (frequency 3755)\n", "pair 1827: [C@@]2 1C -> [C@@]21C (frequency 3753)\n", "pair 1828: ncn 3) -> ncn3) (frequency 3751)\n", "pair 1829: c2ccc( [N+](=O)[O-])cc2) -> c2ccc([N+](=O)[O-])cc2) (frequency 3745)\n", "pair 1830: CCN(C) CC1 -> CCN(C)CC1 (frequency 3743)\n", "pair 1831: c2ccc( C(=O)N -> c2ccc(C(=O)N (frequency 3739)\n", "pair 1832: CC1) =O) -> CC1)=O) (frequency 3737)\n", "pair 1833: c1cc( NC( -> c1cc(NC( (frequency 3733)\n", "pair 1834: c1cc( Cl) -> c1cc(Cl) (frequency 3733)\n", "pair 1835: CC2 )cc( -> CC2)cc( (frequency 3733)\n", "pair 1836: C Cl) -> CCl) (frequency 3733)\n", "pair 1837: [nH] c3 -> [nH]c3 (frequency 3728)\n", "pair 1838: c3ccccc3 )cc -> c3ccccc3)cc (frequency 3727)\n", "pair 1839: cc( NC(=O) -> cc(NC(=O) (frequency 3721)\n", "pair 1840: c2cc( C(=O)N -> c2cc(C(=O)N (frequency 3711)\n", "pair 1841: c1ccc( O)cc1) -> c1ccc(O)cc1) (frequency 3711)\n", "pair 1842: [N+](C) (C)C) -> [N+](C)(C)C) (frequency 3711)\n", "pair 1843: C1 CC2 -> C1CC2 (frequency 3710)\n", "pair 1844: C( CN -> C(CN (frequency 3701)\n", "pair 1845: cn 2)cc1 -> cn2)cc1 (frequency 3699)\n", "pair 1846: cc2 C) -> cc2C) (frequency 3699)\n", "pair 1847: CC3 ( -> CC3( (frequency 3697)\n", "pair 1848: Cc1ccc( N -> Cc1ccc(N (frequency 3696)\n", "pair 1849: C1) =O -> C1)=O (frequency 3693)\n", "pair 1850: c1cc( Cl)cc( -> c1cc(Cl)cc( (frequency 3691)\n", "pair 1851: c(=O)[nH] 1 -> c(=O)[nH]1 (frequency 3691)\n", "pair 1852: c( OC -> c(OC (frequency 3690)\n", "pair 1853: o 2)c1 -> o2)c1 (frequency 3688)\n", "pair 1854: c1c( -c2n -> c1c(-c2n (frequency 3685)\n", "pair 1855: COc1ccc( N -> COc1ccc(N (frequency 3681)\n", "pair 1856: /C =C1\\ -> /C=C1\\ (frequency 3678)\n", "pair 1857: c2cccc3cccc c23) -> c2cccc3ccccc23) (frequency 3677)\n", "pair 1858: N c1cccc( -> Nc1cccc( (frequency 3671)\n", "pair 1859: c1c( O -> c1c(O (frequency 3668)\n", "pair 1860: Br )ccc1 -> Br)ccc1 (frequency 3668)\n", "pair 1861: N = -> N= (frequency 3667)\n", "pair 1862: OC)c(OC) c1 -> OC)c(OC)c1 (frequency 3664)\n", "pair 1863: CC(F)(F) F) -> CC(F)(F)F) (frequency 3653)\n", "pair 1864: CC2 CCCCC2) -> CC2CCCCC2) (frequency 3652)\n", "pair 1865: cc( C(=O)N -> cc(C(=O)N (frequency 3650)\n", "pair 1866: c3c(F) cccc3 -> c3c(F)cccc3 (frequency 3646)\n", "pair 1867: CC( C(=O)N -> CC(C(=O)N (frequency 3645)\n", "pair 1868: /N = -> /N= (frequency 3644)\n", "pair 1869: NC(=O) c1ccccc1 -> NC(=O)c1ccccc1 (frequency 3640)\n", "pair 1870: c2c[nH] c3ccccc23) -> c2c[nH]c3ccccc23) (frequency 3638)\n", "pair 1871: c(N C -> c(NC (frequency 3634)\n", "pair 1872: s 2)c1 -> s2)c1 (frequency 3632)\n", "pair 1873: [C@H]1 CC[C@H]2 -> [C@H]1CC[C@H]2 (frequency 3631)\n", "pair 1874: c2 )cc1) -> c2)cc1) (frequency 3630)\n", "pair 1875: N c1cc -> Nc1cc (frequency 3626)\n", "pair 1876: C23 CC4 -> C23CC4 (frequency 3625)\n", "pair 1877: c12 cccc -> c12cccc (frequency 3623)\n", "pair 1878: [N+](C) (C) -> [N+](C)(C) (frequency 3623)\n", "pair 1879: [C@]2 (O) -> [C@]2(O) (frequency 3623)\n", "pair 1880: c3cc(Cl)cc c3 -> c3cc(Cl)ccc3 (frequency 3622)\n", "pair 1881: C(C)(C)C) cc1 -> C(C)(C)C)cc1 (frequency 3615)\n", "pair 1882: cc n2) -> ccn2) (frequency 3610)\n", "pair 1883: c1n nc(- -> c1nnc(- (frequency 3605)\n", "pair 1884: c2cc s -> c2ccs (frequency 3600)\n", "pair 1885: [C@H]1 C -> [C@H]1C (frequency 3600)\n", "pair 1886: N3 CCOCC3) -> N3CCOCC3) (frequency 3600)\n", "pair 1887: OCC N -> OCCN (frequency 3598)\n", "pair 1888: F)cc 3 -> F)cc3 (frequency 3595)\n", "pair 1889: c2ccc( OC -> c2ccc(OC (frequency 3594)\n", "pair 1890: Cc1ccc( -c2ccccc2 -> Cc1ccc(-c2ccccc2 (frequency 3593)\n", "pair 1891: C1 CCN(C(=O) -> C1CCN(C(=O) (frequency 3593)\n", "pair 1892: cc(C) c1 -> cc(C)c1 (frequency 3592)\n", "pair 1893: c2ccccc2) C1 -> c2ccccc2)C1 (frequency 3592)\n", "pair 1894: c3ccc( [N+](=O)[O-] -> c3ccc([N+](=O)[O-] (frequency 3590)\n", "pair 1895: C(C)C) c1 -> C(C)C)c1 (frequency 3587)\n", "pair 1896: Cc1c[nH] c2ccccc12) -> Cc1c[nH]c2ccccc12) (frequency 3582)\n", "pair 1897: c1ccc( Br)cc1) -> c1ccc(Br)cc1) (frequency 3581)\n", "pair 1898: c4ccc( - -> c4ccc(- (frequency 3575)\n", "pair 1899: C3 )cc1 -> C3)cc1 (frequency 3575)\n", "pair 1900: (F) F) -> (F)F) (frequency 3575)\n", "pair 1901: CC3 CC3) -> CC3CC3) (frequency 3571)\n", "pair 1902: nn n2 -> nnn2 (frequency 3566)\n", "pair 1903: Cc1ccc( S(=O)(=O)N -> Cc1ccc(S(=O)(=O)N (frequency 3566)\n", "pair 1904: Cc1ccc( - -> Cc1ccc(- (frequency 3566)\n", "pair 1905: N) c1 -> N)c1 (frequency 3557)\n", "pair 1906: c1( N) -> c1(N) (frequency 3548)\n", "pair 1907: nn n1 -> nnn1 (frequency 3547)\n", "pair 1908: ccc( O -> ccc(O (frequency 3547)\n", "pair 1909: cc 3)cc -> cc3)cc (frequency 3547)\n", "pair 1910: Cc1ccc( Cl)cc1) -> Cc1ccc(Cl)cc1) (frequency 3547)\n", "pair 1911: COc1ccc( -n2 -> COc1ccc(-n2 (frequency 3547)\n", "pair 1912: c1ccc( Cl) -> c1ccc(Cl) (frequency 3543)\n", "pair 1913: c2cc( F) -> c2cc(F) (frequency 3541)\n", "pair 1914: [nH] c2c1 -> [nH]c2c1 (frequency 3540)\n", "pair 1915: CCO C( -> CCOC( (frequency 3536)\n", "pair 1916: n2 c1 -> n2c1 (frequency 3535)\n", "pair 1917: c1ccc( C) -> c1ccc(C) (frequency 3532)\n", "pair 1918: c1ccc( [N+](=O)[O-])cc1 -> c1ccc([N+](=O)[O-])cc1 (frequency 3531)\n", "pair 1919: C1=C( O) -> C1=C(O) (frequency 3529)\n", "pair 1920: C(F)(F)F) c1) -> C(F)(F)F)c1) (frequency 3528)\n", "pair 1921: CC c3ccccc3) -> CCc3ccccc3) (frequency 3524)\n", "pair 1922: Cc1ccc( -n2 -> Cc1ccc(-n2 (frequency 3519)\n", "pair 1923: [C@]4 (C) -> [C@]4(C) (frequency 3518)\n", "pair 1924: c3cc4c( cc3) -> c3cc4c(cc3) (frequency 3516)\n", "pair 1925: [C@@H]2 [C@@H]( -> [C@@H]2[C@@H]( (frequency 3510)\n", "pair 1926: c3n c4c( -> c3nc4c( (frequency 3508)\n", "pair 1927: [C@H]( C -> [C@H](C (frequency 3506)\n", "pair 1928: c2cc c3[nH] -> c2ccc3[nH] (frequency 3505)\n", "pair 1929: n 5 -> n5 (frequency 3500)\n", "pair 1930: NC(=O) CS -> NC(=O)CS (frequency 3499)\n", "pair 1931: nc2 N -> nc2N (frequency 3498)\n", "pair 1932: c(C) c2) -> c(C)c2) (frequency 3498)\n", "pair 1933: S1 (=O) -> S1(=O) (frequency 3496)\n", "pair 1934: CCCC CCC -> CCCCCCC (frequency 3496)\n", "pair 1935: C( =N -> C(=N (frequency 3489)\n", "pair 1936: C1 (C)C -> C1(C)C (frequency 3487)\n", "pair 1937: c1 c2cccc -> c1c2cccc (frequency 3480)\n", "pair 1938: OC)c(OC) c3) -> OC)c(OC)c3) (frequency 3478)\n", "pair 1939: CC(CC( C3) -> CC(CC(C3) (frequency 3478)\n", "pair 1940: c( [N+](=O)[O-]) -> c([N+](=O)[O-]) (frequency 3477)\n", "pair 1941: [C@H]( N -> [C@H](N (frequency 3477)\n", "pair 1942: [C@H]( C(=O)O) -> [C@H](C(=O)O) (frequency 3477)\n", "pair 1943: c2cccc( [N+](=O)[O-]) -> c2cccc([N+](=O)[O-]) (frequency 3476)\n", "pair 1944: ccc1 O -> ccc1O (frequency 3474)\n", "pair 1945: c(C) n1 -> c(C)n1 (frequency 3472)\n", "pair 1946: CCC(O) =O) -> CCC(O)=O) (frequency 3472)\n", "pair 1947: CC C1( -> CCC1( (frequency 3468)\n", "pair 1948: c1cc( O -> c1cc(O (frequency 3464)\n", "pair 1949: COc1ccc( C(=O)N -> COc1ccc(C(=O)N (frequency 3458)\n", "pair 1950: [C@@H]2 O -> [C@@H]2O (frequency 3456)\n", "pair 1951: c4cc c5c( -> c4ccc5c( (frequency 3453)\n", "pair 1952: CCO C1 -> CCOC1 (frequency 3453)\n", "pair 1953: B (O) -> B(O) (frequency 3451)\n", "pair 1954: C2 C( -> C2C( (frequency 3448)\n", "pair 1955: N# Cc1c( -> N#Cc1c( (frequency 3444)\n", "pair 1956: CCO c1cc( -> CCOc1cc( (frequency 3441)\n", "pair 1957: c1ccccc1) =O -> c1ccccc1)=O (frequency 3440)\n", "pair 1958: CC(=O)N [C@@H]( -> CC(=O)N[C@@H]( (frequency 3439)\n", "pair 1959: 5)cc 4) -> 5)cc4) (frequency 3439)\n", "pair 1960: c3n (C) -> c3n(C) (frequency 3438)\n", "pair 1961: NC(=O) CO -> NC(=O)CO (frequency 3438)\n", "pair 1962: c3cccc( Cl) -> c3cccc(Cl) (frequency 3437)\n", "pair 1963: S(=O)( N)=O) -> S(=O)(N)=O) (frequency 3430)\n", "pair 1964: C(=O)O) c1 -> C(=O)O)c1 (frequency 3426)\n", "pair 1965: c2s ccc2) -> c2sccc2) (frequency 3422)\n", "pair 1966: c1c( C( -> c1c(C( (frequency 3421)\n", "pair 1967: c2ccccc2) n1 -> c2ccccc2)n1 (frequency 3418)\n", "pair 1968: N3 CCN( -> N3CCN( (frequency 3417)\n", "pair 1969: c3ccc( C(=O)N -> c3ccc(C(=O)N (frequency 3416)\n", "pair 1970: c1 co -> c1co (frequency 3411)\n", "pair 1971: C(N CC -> C(NCC (frequency 3410)\n", "pair 1972: # N -> #N (frequency 3407)\n", "pair 1973: OCC N( -> OCCN( (frequency 3406)\n", "pair 1974: =O) CC2) -> =O)CC2) (frequency 3405)\n", "pair 1975: n2 c3c( -> n2c3c( (frequency 3403)\n", "pair 1976: c1(Cl) ccc( -> c1(Cl)ccc( (frequency 3401)\n", "pair 1977: N(CC) CC -> N(CC)CC (frequency 3397)\n", "pair 1978: c3 F) -> c3F) (frequency 3396)\n", "pair 1979: COc1ccc( - -> COc1ccc(- (frequency 3396)\n", "pair 1980: CCCC(=O) N -> CCCC(=O)N (frequency 3396)\n", "pair 1981: F)cc 2)c1 -> F)cc2)c1 (frequency 3393)\n", "pair 1982: c4cc co -> c4ccco (frequency 3391)\n", "pair 1983: c2ccc( OC)c(OC) -> c2ccc(OC)c(OC) (frequency 3391)\n", "pair 1984: c1( C(=O)N -> c1(C(=O)N (frequency 3386)\n", "pair 1985: C(C)(C) O) -> C(C)(C)O) (frequency 3386)\n", "pair 1986: Cc1cc( N -> Cc1cc(N (frequency 3385)\n", "pair 1987: c2 =O -> c2=O (frequency 3383)\n", "pair 1988: N1 C -> N1C (frequency 3381)\n", "pair 1989: =C( N -> =C(N (frequency 3379)\n", "pair 1990: c1ccccc1 2 -> c1ccccc12 (frequency 3378)\n", "pair 1991: nc2 cc1 -> nc2cc1 (frequency 3376)\n", "pair 1992: c1s c( -> c1sc( (frequency 3376)\n", "pair 1993: S(C) (=O)=O -> S(C)(=O)=O (frequency 3376)\n", "pair 1994: nc( Cl) -> nc(Cl) (frequency 3370)\n", "pair 1995: cn 4) -> cn4) (frequency 3370)\n", "pair 1996: OCC CO -> OCCCO (frequency 3369)\n", "pair 1997: ncn 2) -> ncn2) (frequency 3368)\n", "pair 1998: CC( O -> CC(O (frequency 3367)\n", "pair 1999: c4 s -> c4s (frequency 3366)\n", "pair 2000: c(O) cc1 -> c(O)cc1 (frequency 3359)\n", "pair 2001: O=C( CSc1n -> O=C(CSc1n (frequency 3358)\n", "pair 2002: CC [C@H](O) -> CC[C@H](O) (frequency 3356)\n", "pair 2003: C(=O)O) cc2) -> C(=O)O)cc2) (frequency 3354)\n", "pair 2004: c4cc( Cl)cc -> c4cc(Cl)cc (frequency 3352)\n", "pair 2005: [nH] 2)cc1 -> [nH]2)cc1 (frequency 3352)\n", "pair 2006: c1ccccc1 Cl) -> c1ccccc1Cl) (frequency 3351)\n", "pair 2007: Cl)c(Cl) c3) -> Cl)c(Cl)c3) (frequency 3351)\n", "pair 2008: c2 =O)c1 -> c2=O)c1 (frequency 3350)\n", "pair 2009: c6 cccc -> c6cccc (frequency 3348)\n", "pair 2010: [C@H]( CC(C)C) -> [C@H](CC(C)C) (frequency 3347)\n", "pair 2011: =O) C) -> =O)C) (frequency 3345)\n", "pair 2012: C(=O)N( CC) -> C(=O)N(CC) (frequency 3341)\n", "pair 2013: CCCC 4) -> CCCC4) (frequency 3340)\n", "pair 2014: c2nc(- c3ccccc3) -> c2nc(-c3ccccc3) (frequency 3335)\n", "pair 2015: C2 C3 -> C2C3 (frequency 3333)\n", "pair 2016: c2cc( Br)cc -> c2cc(Br)cc (frequency 3332)\n", "pair 2017: (- c3ccccc3) -> (-c3ccccc3) (frequency 3329)\n", "pair 2018: c12 ccc( -> c12ccc( (frequency 3328)\n", "pair 2019: CC [C@@H](C) -> CC[C@@H](C) (frequency 3328)\n", "pair 2020: nc2 )cc1 -> nc2)cc1 (frequency 3326)\n", "pair 2021: CC1 = -> CC1= (frequency 3320)\n", "pair 2022: (F)(F) F) -> (F)(F)F) (frequency 3320)\n", "pair 2023: C(F)(F)F)cc c1 -> C(F)(F)F)ccc1 (frequency 3318)\n", "pair 2024: c12c( cc( -> c12c(cc( (frequency 3317)\n", "pair 2025: C(=O) c1c( -> C(=O)c1c( (frequency 3317)\n", "pair 2026: CCC [C@H]2 -> CCC[C@H]2 (frequency 3315)\n", "pair 2027: C(=O)N[C@@H]( Cc1ccccc1) -> C(=O)N[C@@H](Cc1ccccc1) (frequency 3314)\n", "pair 2028: CCC =C(C)C) -> CCC=C(C)C) (frequency 3312)\n", "pair 2029: =[N+]=[N-] ) -> =[N+]=[N-]) (frequency 3301)\n", "pair 2030: =C 4 -> =C4 (frequency 3301)\n", "pair 2031: n1 c(C) -> n1c(C) (frequency 3300)\n", "pair 2032: c1cc( -n2 -> c1cc(-n2 (frequency 3298)\n", "pair 2033: Cn2 cc -> Cn2cc (frequency 3291)\n", "pair 2034: n2) ccc( -> n2)ccc( (frequency 3288)\n", "pair 2035: c2s c( -> c2sc( (frequency 3284)\n", "pair 2036: OCO 4 -> OCO4 (frequency 3280)\n", "pair 2037: C [C@H]4 -> C[C@H]4 (frequency 3279)\n", "pair 2038: c2ccc(Cl)cc2 Cl) -> c2ccc(Cl)cc2Cl) (frequency 3276)\n", "pair 2039: OC c3ccccc3) -> OCc3ccccc3) (frequency 3275)\n", "pair 2040: c2cc( Cl) -> c2cc(Cl) (frequency 3274)\n", "pair 2041: Cl)c(Cl) c1) -> Cl)c(Cl)c1) (frequency 3266)\n", "pair 2042: [C@@]1 (C)CC -> [C@@]1(C)CC (frequency 3265)\n", "pair 2043: /N =C2 -> /N=C2 (frequency 3264)\n", "pair 2044: c(- c2n -> c(-c2n (frequency 3261)\n", "pair 2045: [C@H]( C(C)C) -> [C@H](C(C)C) (frequency 3259)\n", "pair 2046: c1c2c( ccc1 -> c1c2c(ccc1 (frequency 3256)\n", "pair 2047: c2cn ( -> c2cn( (frequency 3252)\n", "pair 2048: n1 (C) -> n1(C) (frequency 3251)\n", "pair 2049: Cl)cc 3 -> Cl)cc3 (frequency 3250)\n", "pair 2050: c2ccccc2 OC) -> c2ccccc2OC) (frequency 3244)\n", "pair 2051: c2c(- c3ccccc3) -> c2c(-c3ccccc3) (frequency 3242)\n", "pair 2052: /C( C#N) -> /C(C#N) (frequency 3237)\n", "pair 2053: no 2) -> no2) (frequency 3236)\n", "pair 2054: S(=O)(=O) c1ccccc1 -> S(=O)(=O)c1ccccc1 (frequency 3236)\n", "pair 2055: nc2 C) -> nc2C) (frequency 3234)\n", "pair 2056: c2s c3c( -> c2sc3c( (frequency 3233)\n", "pair 2057: =C /C -> =C/C (frequency 3231)\n", "pair 2058: c3cc(OC)c(OC)c(OC) c3) -> c3cc(OC)c(OC)c(OC)c3) (frequency 3225)\n", "pair 2059: cc1 - -> cc1- (frequency 3223)\n", "pair 2060: s 2 -> s2 (frequency 3221)\n", "pair 2061: c4ccc(OC) cc4) -> c4ccc(OC)cc4) (frequency 3221)\n", "pair 2062: CC(O) ( -> CC(O)( (frequency 3221)\n", "pair 2063: Cc1 [nH] -> Cc1[nH] (frequency 3220)\n", "pair 2064: c1( O -> c1(O (frequency 3218)\n", "pair 2065: C(=O)N1 CC -> C(=O)N1CC (frequency 3218)\n", "pair 2066: n( CC(=O)N -> n(CC(=O)N (frequency 3216)\n", "pair 2067: n ccc1 -> nccc1 (frequency 3216)\n", "pair 2068: C S(=O)(=O)N -> CS(=O)(=O)N (frequency 3213)\n", "pair 2069: )cc 4) -> )cc4) (frequency 3208)\n", "pair 2070: c4 ncc -> c4ncc (frequency 3205)\n", "pair 2071: [C@] (C)(O) -> [C@](C)(O) (frequency 3204)\n", "pair 2072: C) C) -> C)C) (frequency 3197)\n", "pair 2073: C2) n1 -> C2)n1 (frequency 3194)\n", "pair 2074: Cc2ccc( OC)cc2) -> Cc2ccc(OC)cc2) (frequency 3192)\n", "pair 2075: c2cccn c2 -> c2cccnc2 (frequency 3191)\n", "pair 2076: CCN (C)C -> CCN(C)C (frequency 3185)\n", "pair 2077: c1( -n2 -> c1(-n2 (frequency 3183)\n", "pair 2078: N c1c( -> Nc1c( (frequency 3183)\n", "pair 2079: CCN2 C(=O) -> CCN2C(=O) (frequency 3183)\n", "pair 2080: C [C@@H]4 -> C[C@@H]4 (frequency 3180)\n", "pair 2081: [nH] 2 -> [nH]2 (frequency 3176)\n", "pair 2082: [C@H]( NC( -> [C@H](NC( (frequency 3176)\n", "pair 2083: c1ccc( NC( -> c1ccc(NC( (frequency 3175)\n", "pair 2084: OCC (N -> OCC(N (frequency 3175)\n", "pair 2085: COc1ccc( Cl)cc1 -> COc1ccc(Cl)cc1 (frequency 3175)\n", "pair 2086: CC1 (C)C -> CC1(C)C (frequency 3171)\n", "pair 2087: c4 c3 -> c4c3 (frequency 3169)\n", "pair 2088: CN2 CC -> CN2CC (frequency 3166)\n", "pair 2089: c4cccc n4) -> c4ccccn4) (frequency 3164)\n", "pair 2090: NC(=O) [C@@H]1 -> NC(=O)[C@@H]1 (frequency 3164)\n", "pair 2091: Cc1ccco 1) -> Cc1ccco1) (frequency 3162)\n", "pair 2092: C(=O)N2 CC -> C(=O)N2CC (frequency 3158)\n", "pair 2093: c2c( ccc( -> c2c(ccc( (frequency 3157)\n", "pair 2094: cc 5) -> cc5) (frequency 3155)\n", "pair 2095: cc1 2 -> cc12 (frequency 3154)\n", "pair 2096: c1cc (=O) -> c1cc(=O) (frequency 3154)\n", "pair 2097: NC(=O) C -> NC(=O)C (frequency 3154)\n", "pair 2098: CCn1 cc( -> CCn1cc( (frequency 3151)\n", "pair 2099: c1cccc( Cl)c1) -> c1cccc(Cl)c1) (frequency 3149)\n", "pair 2100: C2 )ccc1 -> C2)ccc1 (frequency 3148)\n", "pair 2101: C2 CCN(C -> C2CCN(C (frequency 3147)\n", "pair 2102: c1cc( OC)c(OC)c(OC) -> c1cc(OC)c(OC)c(OC) (frequency 3141)\n", "pair 2103: 4 )cc2) -> 4)cc2) (frequency 3140)\n", "pair 2104: c1c( -n2 -> c1c(-n2 (frequency 3139)\n", "pair 2105: c3cccc( C(F)(F)F)c3) -> c3cccc(C(F)(F)F)c3) (frequency 3136)\n", "pair 2106: O=C1 c2ccccc2 -> O=C1c2ccccc2 (frequency 3134)\n", "pair 2107: C[C@H]( N -> C[C@H](N (frequency 3134)\n", "pair 2108: ( N) -> (N) (frequency 3131)\n", "pair 2109: CCCC (C) -> CCCC(C) (frequency 3128)\n", "pair 2110: c2ccc( Cl) -> c2ccc(Cl) (frequency 3125)\n", "pair 2111: [N+](=O)[O-] )cc( -> [N+](=O)[O-])cc( (frequency 3125)\n", "pair 2112: CCOCC 2)c1 -> CCOCC2)c1 (frequency 3123)\n", "pair 2113: CC C(C)(C) -> CCC(C)(C) (frequency 3123)\n", "pair 2114: n2 )cc1) -> n2)cc1) (frequency 3120)\n", "pair 2115: Cn2 c(=O) -> Cn2c(=O) (frequency 3120)\n", "pair 2116: c2ncn ( -> c2ncn( (frequency 3116)\n", "pair 2117: CC(C) N -> CC(C)N (frequency 3116)\n", "pair 2118: c(=O) c1 -> c(=O)c1 (frequency 3115)\n", "pair 2119: [C@]3 4 -> [C@]34 (frequency 3115)\n", "pair 2120: C c3c( -> Cc3c( (frequency 3115)\n", "pair 2121: )cc c1) -> )ccc1) (frequency 3115)\n", "pair 2122: C1 (O) -> C1(O) (frequency 3114)\n", "pair 2123: CCC) =O) -> CCC)=O) (frequency 3112)\n", "pair 2124: c2ccccc2 )ccc1 -> c2ccccc2)ccc1 (frequency 3111)\n", "pair 2125: C(=O)N C(=O) -> C(=O)NC(=O) (frequency 3111)\n", "pair 2126: C( OCC) -> C(OCC) (frequency 3110)\n", "pair 2127: C /C( -> C/C( (frequency 3110)\n", "pair 2128: c1cn c( -> c1cnc( (frequency 3108)\n", "pair 2129: [C@@H]1 O) -> [C@@H]1O) (frequency 3106)\n", "pair 2130: CC3 )cc1 -> CC3)cc1 (frequency 3102)\n", "pair 2131: c1nc(- c2ccccc2) -> c1nc(-c2ccccc2) (frequency 3101)\n", "pair 2132: c1cn 2 -> c1cn2 (frequency 3101)\n", "pair 2133: /N =C1\\ -> /N=C1\\ (frequency 3099)\n", "pair 2134: CCC [C@@H]( -> CCC[C@@H]( (frequency 3098)\n", "pair 2135: Nc1n cn -> Nc1ncn (frequency 3094)\n", "pair 2136: C= CC( -> C=CC( (frequency 3088)\n", "pair 2137: c1c(F) ccc( -> c1c(F)ccc( (frequency 3087)\n", "pair 2138: c1ccc( C2 -> c1ccc(C2 (frequency 3085)\n", "pair 2139: [C@@H]( OC) -> [C@@H](OC) (frequency 3083)\n", "pair 2140: Cc1n o -> Cc1no (frequency 3081)\n", "pair 2141: c3) CC2) -> c3)CC2) (frequency 3080)\n", "pair 2142: OC [C@@H]1 -> OC[C@@H]1 (frequency 3080)\n", "pair 2143: Cl)cc2 Cl) -> Cl)cc2Cl) (frequency 3079)\n", "pair 2144: C(N) =N) -> C(N)=N) (frequency 3079)\n", "pair 2145: cc 3)cc2) -> cc3)cc2) (frequency 3078)\n", "pair 2146: [C@H]2 [C@H]( -> [C@H]2[C@H]( (frequency 3076)\n", "pair 2147: cccc 5) -> cccc5) (frequency 3075)\n", "pair 2148: -c2ccc( - -> -c2ccc(- (frequency 3075)\n", "pair 2149: OCC OCC -> OCCOCC (frequency 3074)\n", "pair 2150: C3 =O -> C3=O (frequency 3074)\n", "pair 2151: N2 CCN(C -> N2CCN(C (frequency 3073)\n", "pair 2152: CO c1cccc2 -> COc1cccc2 (frequency 3073)\n", "pair 2153: S) N -> S)N (frequency 3067)\n", "pair 2154: ( CO) -> (CO) (frequency 3064)\n", "pair 2155: c1ccccc1 Cl -> c1ccccc1Cl (frequency 3061)\n", "pair 2156: c1cc c2ccccc2c1 -> c1ccc2ccccc2c1 (frequency 3060)\n", "pair 2157: n cc(- -> ncc(- (frequency 3059)\n", "pair 2158: ccc(OC) cc1 -> ccc(OC)cc1 (frequency 3059)\n", "pair 2159: nc2 - -> nc2- (frequency 3051)\n", "pair 2160: CC( C(=O)O) -> CC(C(=O)O) (frequency 3049)\n", "pair 2161: c2ccc(OC) cc2 -> c2ccc(OC)cc2 (frequency 3048)\n", "pair 2162: c3cc o -> c3cco (frequency 3047)\n", "pair 2163: C(N C)=O) -> C(NC)=O) (frequency 3045)\n", "pair 2164: CCCC CCN -> CCCCCCN (frequency 3044)\n", "pair 2165: CC(C) (O) -> CC(C)(O) (frequency 3044)\n", "pair 2166: c2cc3cccc c3 -> c2cc3ccccc3 (frequency 3043)\n", "pair 2167: =O) o -> =O)o (frequency 3040)\n", "pair 2168: -c2 c3c( -> -c2c3c( (frequency 3038)\n", "pair 2169: (C)C) cc1 -> (C)C)cc1 (frequency 3034)\n", "pair 2170: OCO 3 -> OCO3 (frequency 3030)\n", "pair 2171: Cc1ccc( O -> Cc1ccc(O (frequency 3026)\n", "pair 2172: CC( c2ccccc2) -> CC(c2ccccc2) (frequency 3025)\n", "pair 2173: CC4 )cc3) -> CC4)cc3) (frequency 3022)\n", "pair 2174: n cc2) -> ncc2) (frequency 3021)\n", "pair 2175: CCCCCCCC ) -> CCCCCCCC) (frequency 3019)\n", "pair 2176: c1cc2 cc( -> c1cc2cc( (frequency 3018)\n", "pair 2177: (=O)=O) c1 -> (=O)=O)c1 (frequency 3018)\n", "pair 2178: n2 ccc( -> n2ccc( (frequency 3012)\n", "pair 2179: cc3) =O) -> cc3)=O) (frequency 3007)\n", "pair 2180: c1c( Br) -> c1c(Br) (frequency 3004)\n", "pair 2181: cc2c( c1) -> cc2c(c1) (frequency 3003)\n", "pair 2182: c1ccccc1) =O) -> c1ccccc1)=O) (frequency 3000)\n", "pair 2183: N(CC Cl) -> N(CCCl) (frequency 3000)\n", "pair 2184: N( c2ccccc2) -> N(c2ccccc2) (frequency 3000)\n", "pair 2185: Cc1ccc( -c2n -> Cc1ccc(-c2n (frequency 3000)\n", "pair 2186: 5 CCCC -> 5CCCC (frequency 3000)\n", "pair 2187: O c1cc( -> Oc1cc( (frequency 2999)\n", "pair 2188: 3)cc 2)c1 -> 3)cc2)c1 (frequency 2995)\n", "pair 2189: Nc1n cc( -> Nc1ncc( (frequency 2992)\n", "pair 2190: c2 C)cc1 -> c2C)cc1 (frequency 2989)\n", "pair 2191: N( Cc2ccccc2) -> N(Cc2ccccc2) (frequency 2989)\n", "pair 2192: CC )cc1 -> CC)cc1 (frequency 2989)\n", "pair 2193: c2c( C#N) -> c2c(C#N) (frequency 2988)\n", "pair 2194: CC1 CCN( -> CC1CCN( (frequency 2988)\n", "pair 2195: OC)c(OC) cc1 -> OC)c(OC)cc1 (frequency 2987)\n", "pair 2196: c1cccc( O -> c1cccc(O (frequency 2981)\n", "pair 2197: o n2) -> on2) (frequency 2977)\n", "pair 2198: ncn c(N -> ncnc(N (frequency 2976)\n", "pair 2199: c1cn ccc1 -> c1cnccc1 (frequency 2975)\n", "pair 2200: C1 N(C( -> C1N(C( (frequency 2975)\n", "pair 2201: [C@@] (C)(O) -> [C@@](C)(O) (frequency 2971)\n", "pair 2202: NC(=O) [C@@H]( -> NC(=O)[C@@H]( (frequency 2971)\n", "pair 2203: c3 c4ccccc4 -> c3c4ccccc4 (frequency 2969)\n", "pair 2204: c1c( Cl)c( -> c1c(Cl)c( (frequency 2968)\n", "pair 2205: /C=C/ C( -> /C=C/C( (frequency 2968)\n", "pair 2206: c2ccn cc2 -> c2ccncc2 (frequency 2967)\n", "pair 2207: [C@H]( OC) -> [C@H](OC) (frequency 2967)\n", "pair 2208: n c2ccccc21 -> nc2ccccc21 (frequency 2965)\n", "pair 2209: 5 CCOCC -> 5CCOCC (frequency 2964)\n", "pair 2210: c4 cc(C) -> c4cc(C) (frequency 2963)\n", "pair 2211: Br) c2) -> Br)c2) (frequency 2963)\n", "pair 2212: n c3ccccc3 -> nc3ccccc3 (frequency 2962)\n", "pair 2213: cnc2 c1ncn2 -> cnc2c1ncn2 (frequency 2962)\n", "pair 2214: CCCC N( -> CCCCN( (frequency 2961)\n", "pair 2215: CC) c1 -> CC)c1 (frequency 2961)\n", "pair 2216: cn n2 -> cnn2 (frequency 2960)\n", "pair 2217: =O) c( -> =O)c( (frequency 2959)\n", "pair 2218: c1( C2 -> c1(C2 (frequency 2953)\n", "pair 2219: O C(C)(C)C -> OC(C)(C)C (frequency 2953)\n", "pair 2220: n(C c3ccccc3) -> n(Cc3ccccc3) (frequency 2951)\n", "pair 2221: c(C) c3) -> c(C)c3) (frequency 2950)\n", "pair 2222: nc( C(=O)N -> nc(C(=O)N (frequency 2948)\n", "pair 2223: CC2 )cc1) -> CC2)cc1) (frequency 2948)\n", "pair 2224: c2cc( N3 -> c2cc(N3 (frequency 2944)\n", "pair 2225: c2c(Cl) cccc2) -> c2c(Cl)cccc2) (frequency 2941)\n", "pair 2226: c2ccc( C#N)cc2) -> c2ccc(C#N)cc2) (frequency 2940)\n", "pair 2227: O 4) -> O4) (frequency 2938)\n", "pair 2228: c1cccc 2) -> c1cccc2) (frequency 2937)\n", "pair 2229: CC C(=O)O -> CCC(=O)O (frequency 2937)\n", "pair 2230: c4ccc(C) cc4) -> c4ccc(C)cc4) (frequency 2934)\n", "pair 2231: cn2) CC1 -> cn2)CC1 (frequency 2933)\n", "pair 2232: OCC 3 -> OCC3 (frequency 2932)\n", "pair 2233: (- c2ccccc2) -> (-c2ccccc2) (frequency 2932)\n", "pair 2234: N1 CCCC -> N1CCCC (frequency 2929)\n", "pair 2235: c1cn c(N -> c1cnc(N (frequency 2927)\n", "pair 2236: c4) CC3) -> c4)CC3) (frequency 2921)\n", "pair 2237: c [n+]( -> c[n+]( (frequency 2913)\n", "pair 2238: Cc1ccc( S(=O)(=O) -> Cc1ccc(S(=O)(=O) (frequency 2909)\n", "pair 2239: c4cccc( Cl) -> c4cccc(Cl) (frequency 2908)\n", "pair 2240: c3cc s -> c3ccs (frequency 2907)\n", "pair 2241: CC1 =N -> CC1=N (frequency 2907)\n", "pair 2242: c[nH] 1 -> c[nH]1 (frequency 2906)\n", "pair 2243: c(=O) n(C)c(=O) -> c(=O)n(C)c(=O) (frequency 2906)\n", "pair 2244: cc(- c3ccccc3) -> cc(-c3ccccc3) (frequency 2905)\n", "pair 2245: OCC(=O) O) -> OCC(=O)O) (frequency 2905)\n", "pair 2246: c1(OC) ccc( -> c1(OC)ccc( (frequency 2904)\n", "pair 2247: CC C(C)C) -> CCC(C)C) (frequency 2902)\n", "pair 2248: c3cc( Cl)c( -> c3cc(Cl)c( (frequency 2901)\n", "pair 2249: c1- c1ccccc1 -> c1-c1ccccc1 (frequency 2901)\n", "pair 2250: C#N) c1 -> C#N)c1 (frequency 2899)\n", "pair 2251: c2ccc3ccccc3 c2) -> c2ccc3ccccc3c2) (frequency 2897)\n", "pair 2252: CC(CC( C4) -> CC(CC(C4) (frequency 2893)\n", "pair 2253: C) cc( -> C)cc( (frequency 2892)\n", "pair 2254: c1cccc(F) c1) -> c1cccc(F)c1) (frequency 2891)\n", "pair 2255: c3ccc( C#N)cc3) -> c3ccc(C#N)cc3) (frequency 2889)\n", "pair 2256: CN(C) c1ccc( -> CN(C)c1ccc( (frequency 2887)\n", "pair 2257: ( c1ccccc1) -> (c1ccccc1) (frequency 2887)\n", "pair 2258: c1cc( C(N -> c1cc(C(N (frequency 2885)\n", "pair 2259: c1c( OC)cc( -> c1c(OC)cc( (frequency 2885)\n", "pair 2260: CC1 CCCO -> CC1CCCO (frequency 2885)\n", "pair 2261: CCC [C@@H]1 -> CCC[C@@H]1 (frequency 2884)\n", "pair 2262: nn1 2 -> nn12 (frequency 2882)\n", "pair 2263: Cn2 cc( -> Cn2cc( (frequency 2882)\n", "pair 2264: (=O) O -> (=O)O (frequency 2882)\n", "pair 2265: [C@]12 C -> [C@]12C (frequency 2881)\n", "pair 2266: [C@H]2 O -> [C@H]2O (frequency 2881)\n", "pair 2267: c1cc( C( -> c1cc(C( (frequency 2879)\n", "pair 2268: c(OC) c2) -> c(OC)c2) (frequency 2879)\n", "pair 2269: CCC(N 3 -> CCC(N3 (frequency 2879)\n", "pair 2270: c3ccc(OC) cc3 -> c3ccc(OC)cc3 (frequency 2878)\n", "pair 2271: C2) C3) -> C2)C3) (frequency 2876)\n", "pair 2272: NC(=O) C2 -> NC(=O)C2 (frequency 2872)\n", "pair 2273: c2cn ccc2) -> c2cnccc2) (frequency 2871)\n", "pair 2274: C/C=C\\ C/C=C\\ -> C/C=C\\C/C=C\\ (frequency 2870)\n", "pair 2275: c4 o -> c4o (frequency 2869)\n", "pair 2276: NC( =N)N -> NC(=N)N (frequency 2864)\n", "pair 2277: [C@H]( CC) -> [C@H](CC) (frequency 2863)\n", "pair 2278: c2cc( O -> c2cc(O (frequency 2860)\n", "pair 2279: C(=O) CS -> C(=O)CS (frequency 2860)\n", "pair 2280: CCCC C2)cc1 -> CCCCC2)cc1 (frequency 2857)\n", "pair 2281: O=C( COC(=O) -> O=C(COC(=O) (frequency 2856)\n", "pair 2282: c1ccc( O) -> c1ccc(O) (frequency 2855)\n", "pair 2283: c2c(C) cc(C) -> c2c(C)cc(C) (frequency 2854)\n", "pair 2284: C(=O)N2 CCC( -> C(=O)N2CCC( (frequency 2854)\n", "pair 2285: ncn c3 -> ncnc3 (frequency 2852)\n", "pair 2286: c3ccccc3 OC) -> c3ccccc3OC) (frequency 2850)\n", "pair 2287: [C@@H]4 [C@@]5 -> [C@@H]4[C@@]5 (frequency 2849)\n", "pair 2288: [C@H]2 CC[C@H]( -> [C@H]2CC[C@H]( (frequency 2847)\n", "pair 2289: /N =C\\ -> /N=C\\ (frequency 2847)\n", "pair 2290: [C@@H]2 O) -> [C@@H]2O) (frequency 2844)\n", "pair 2291: c3cccc( - -> c3cccc(- (frequency 2841)\n", "pair 2292: N( C(C)=O) -> N(C(C)=O) (frequency 2840)\n", "pair 2293: c3ccccc3 - -> c3ccccc3- (frequency 2839)\n", "pair 2294: c3cc(OC)c(OC) cc3 -> c3cc(OC)c(OC)cc3 (frequency 2839)\n", "pair 2295: c2ccccc2 c1=O -> c2ccccc2c1=O (frequency 2839)\n", "pair 2296: S C(=S) -> SC(=S) (frequency 2834)\n", "pair 2297: c1ccccc1) N -> c1ccccc1)N (frequency 2830)\n", "pair 2298: CCCN (C) -> CCCN(C) (frequency 2829)\n", "pair 2299: CCS C) -> CCSC) (frequency 2828)\n", "pair 2300: nc(C) c1 -> nc(C)c1 (frequency 2827)\n", "pair 2301: NC(=O) C) -> NC(=O)C) (frequency 2826)\n", "pair 2302: C(=O)N C(=O)N -> C(=O)NC(=O)N (frequency 2826)\n", "pair 2303: c1c(C) ccc( -> c1c(C)ccc( (frequency 2825)\n", "pair 2304: c2n cccc2) -> c2ncccc2) (frequency 2824)\n", "pair 2305: c(N 4 -> c(N4 (frequency 2824)\n", "pair 2306: C(=O)N1 CCC[C@H]1 -> C(=O)N1CCC[C@H]1 (frequency 2821)\n", "pair 2307: CC(=O)N 3 -> CC(=O)N3 (frequency 2819)\n", "pair 2308: CN3 CCN( -> CN3CCN( (frequency 2817)\n", "pair 2309: c2ccc(O) cc2 -> c2ccc(O)cc2 (frequency 2816)\n", "pair 2310: CCC( CC) -> CCC(CC) (frequency 2815)\n", "pair 2311: n2) n1 -> n2)n1 (frequency 2812)\n", "pair 2312: [C@@H](C) O) -> [C@@H](C)O) (frequency 2812)\n", "pair 2313: NC(=O) /C=C/ -> NC(=O)/C=C/ (frequency 2811)\n", "pair 2314: =O) n( -> =O)n( (frequency 2811)\n", "pair 2315: [n+] 3 -> [n+]3 (frequency 2810)\n", "pair 2316: C#N )cc2 -> C#N)cc2 (frequency 2810)\n", "pair 2317: F C( -> FC( (frequency 2808)\n", "pair 2318: c2cc( Cl)cc(Cl) -> c2cc(Cl)cc(Cl) (frequency 2807)\n", "pair 2319: N C1=N -> NC1=N (frequency 2802)\n", "pair 2320: c3c( N -> c3c(N (frequency 2799)\n", "pair 2321: c2c1 =O -> c2c1=O (frequency 2792)\n", "pair 2322: CCC #N) -> CCC#N) (frequency 2791)\n", "pair 2323: c1 O) -> c1O) (frequency 2790)\n", "pair 2324: C c4ccccc4) -> Cc4ccccc4) (frequency 2788)\n", "pair 2325: c2cccc3 c2 -> c2cccc3c2 (frequency 2787)\n", "pair 2326: N( Cc1ccccc1) -> N(Cc1ccccc1) (frequency 2786)\n", "pair 2327: c6cccc c6 -> c6ccccc6 (frequency 2785)\n", "pair 2328: ccc( OC)c1 -> ccc(OC)c1 (frequency 2783)\n", "pair 2329: O C1=O -> OC1=O (frequency 2783)\n", "pair 2330: nc( C(F)(F)F) -> nc(C(F)(F)F) (frequency 2782)\n", "pair 2331: C(=O)N C) -> C(=O)NC) (frequency 2780)\n", "pair 2332: c2 co -> c2co (frequency 2779)\n", "pair 2333: c( F)cc1 -> c(F)cc1 (frequency 2779)\n", "pair 2334: c3o ccc3) -> c3occc3) (frequency 2777)\n", "pair 2335: O 2)cc1 -> O2)cc1 (frequency 2777)\n", "pair 2336: c1cccc2 ccccc12 -> c1cccc2ccccc12 (frequency 2774)\n", "pair 2337: N=C( N -> N=C(N (frequency 2774)\n", "pair 2338: c1ccccc1 O -> c1ccccc1O (frequency 2773)\n", "pair 2339: N1 CCN(C -> N1CCN(C (frequency 2773)\n", "pair 2340: n s -> ns (frequency 2769)\n", "pair 2341: nc2 3) -> nc23) (frequency 2765)\n", "pair 2342: c4 c3) -> c4c3) (frequency 2762)\n", "pair 2343: CO C(C)=O) -> COC(C)=O) (frequency 2759)\n", "pair 2344: CC3 CC( -> CC3CC( (frequency 2759)\n", "pair 2345: c3ccc( Cl) -> c3ccc(Cl) (frequency 2758)\n", "pair 2346: CCCN( CCC) -> CCCN(CCC) (frequency 2758)\n", "pair 2347: c1cc( F)c( -> c1cc(F)c( (frequency 2756)\n", "pair 2348: [C@]3 (C)CC -> [C@]3(C)CC (frequency 2752)\n", "pair 2349: c(C) cc( -> c(C)cc( (frequency 2751)\n", "pair 2350: c1cccc( C(F)(F)F)c1 -> c1cccc(C(F)(F)F)c1 (frequency 2745)\n", "pair 2351: c1ccc( CN -> c1ccc(CN (frequency 2742)\n", "pair 2352: C(=O) /C(=C/ -> C(=O)/C(=C/ (frequency 2742)\n", "pair 2353: c3ccc(O) cc3 -> c3ccc(O)cc3 (frequency 2740)\n", "pair 2354: c1cccc( - -> c1cccc(- (frequency 2739)\n", "pair 2355: cc n3) -> ccn3) (frequency 2737)\n", "pair 2356: - c1cc -> -c1cc (frequency 2736)\n", "pair 2357: c1c( -c2ccccc2) -> c1c(-c2ccccc2) (frequency 2735)\n", "pair 2358: S C1 -> SC1 (frequency 2735)\n", "pair 2359: CO [C@@H]1 -> CO[C@@H]1 (frequency 2735)\n", "pair 2360: c1c(C) cccc1 -> c1c(C)cccc1 (frequency 2734)\n", "pair 2361: O= S1(=O) -> O=S1(=O) (frequency 2734)\n", "pair 2362: c1cccc( -c2n -> c1cccc(-c2n (frequency 2733)\n", "pair 2363: c(C) cc(C) -> c(C)cc(C) (frequency 2732)\n", "pair 2364: cc 5 -> cc5 (frequency 2731)\n", "pair 2365: c( CN -> c(CN (frequency 2731)\n", "pair 2366: c3c(Cl) cccc3 -> c3c(Cl)cccc3 (frequency 2727)\n", "pair 2367: COc1ccc( OC)c( -> COc1ccc(OC)c( (frequency 2727)\n", "pair 2368: C(=O)N [C@@H](C) -> C(=O)N[C@@H](C) (frequency 2725)\n", "pair 2369: c3cc( C(F)(F)F)cc -> c3cc(C(F)(F)F)cc (frequency 2724)\n", "pair 2370: n1) =O -> n1)=O (frequency 2723)\n", "pair 2371: =C( C#N) -> =C(C#N) (frequency 2721)\n", "pair 2372: =O) cccc1 -> =O)cccc1 (frequency 2719)\n", "pair 2373: CO [C@H]1 -> CO[C@H]1 (frequency 2717)\n", "pair 2374: [C@H]( Cc2ccccc2) -> [C@H](Cc2ccccc2) (frequency 2716)\n", "pair 2375: c(=O) n3 -> c(=O)n3 (frequency 2715)\n", "pair 2376: CCCN( C -> CCCN(C (frequency 2714)\n", "pair 2377: CC(C) (C)C -> CC(C)(C)C (frequency 2714)\n", "pair 2378: c2c(OC) cccc2) -> c2c(OC)cccc2) (frequency 2708)\n", "pair 2379: CN(C) S(=O)(=O) -> CN(C)S(=O)(=O) (frequency 2706)\n", "pair 2380: s c(- -> sc(- (frequency 2702)\n", "pair 2381: c1cccc (C) -> c1cccc(C) (frequency 2701)\n", "pair 2382: Cc2ccc( O)cc2) -> Cc2ccc(O)cc2) (frequency 2701)\n", "pair 2383: c1ccc( O)c(O) -> c1ccc(O)c(O) (frequency 2699)\n", "pair 2384: [N+](=O)[O-] )ccc1 -> [N+](=O)[O-])ccc1 (frequency 2699)\n", "pair 2385: c(=O)[nH] c2=O) -> c(=O)[nH]c2=O) (frequency 2692)\n", "pair 2386: [C@@]3 (C)CC -> [C@@]3(C)CC (frequency 2691)\n", "pair 2387: nc3 2) -> nc32) (frequency 2689)\n", "pair 2388: CO CCO -> COCCO (frequency 2689)\n", "pair 2389: c2cc c3n -> c2ccc3n (frequency 2687)\n", "pair 2390: [C@H]( Cc1ccccc1) -> [C@H](Cc1ccccc1) (frequency 2686)\n", "pair 2391: CC [C@@H](O) -> CC[C@@H](O) (frequency 2686)\n", "pair 2392: c3ccc( Cl)c(Cl)c3) -> c3ccc(Cl)c(Cl)c3) (frequency 2685)\n", "pair 2393: c2cc(C) ccc2 -> c2cc(C)ccc2 (frequency 2685)\n", "pair 2394: c2ccccc2 C) -> c2ccccc2C) (frequency 2682)\n", "pair 2395: [C@@]2 1 -> [C@@]21 (frequency 2681)\n", "pair 2396: -c2 nc(N -> -c2nc(N (frequency 2679)\n", "pair 2397: [C@@]3 4 -> [C@@]34 (frequency 2676)\n", "pair 2398: /C =N\\ -> /C=N\\ (frequency 2676)\n", "pair 2399: C =C\\ -> C=C\\ (frequency 2675)\n", "pair 2400: nc( OC) -> nc(OC) (frequency 2670)\n", "pair 2401: CCC( N) -> CCC(N) (frequency 2670)\n", "pair 2402: c2ccc(Cl)cc 2)cc1 -> c2ccc(Cl)cc2)cc1 (frequency 2669)\n", "pair 2403: c1( S -> c1(S (frequency 2669)\n", "pair 2404: [nH] n1 -> [nH]n1 (frequency 2669)\n", "pair 2405: ccccc1 2) -> ccccc12) (frequency 2668)\n", "pair 2406: c1 c-2 -> c1c-2 (frequency 2667)\n", "pair 2407: CN( CC -> CN(CC (frequency 2667)\n", "pair 2408: CO c1cc2c( -> COc1cc2c( (frequency 2664)\n", "pair 2409: c2cc(OC)c(OC)c(OC) c2) -> c2cc(OC)c(OC)c(OC)c2) (frequency 2663)\n", "pair 2410: CCCN2 C(=O) -> CCCN2C(=O) (frequency 2654)\n", "pair 2411: C2 CC -> C2CC (frequency 2653)\n", "pair 2412: C(N O)=O) -> C(NO)=O) (frequency 2653)\n", "pair 2413: C(=O)N [C@H]1 -> C(=O)N[C@H]1 (frequency 2641)\n", "pair 2414: CCCN1 CCN( -> CCCN1CCN( (frequency 2637)\n", "pair 2415: c2cccc( O -> c2cccc(O (frequency 2636)\n", "pair 2416: C2 (C) -> C2(C) (frequency 2636)\n", "pair 2417: CCCO 2) -> CCCO2) (frequency 2633)\n", "pair 2418: cc3 C) -> cc3C) (frequency 2632)\n", "pair 2419: [C@@]2 (C)CC -> [C@@]2(C)CC (frequency 2632)\n", "pair 2420: )cc (O) -> )cc(O) (frequency 2631)\n", "pair 2421: N( O) -> N(O) (frequency 2629)\n", "pair 2422: C( C(=O) -> C(C(=O) (frequency 2629)\n", "pair 2423: c( /C=C/ -> c(/C=C/ (frequency 2628)\n", "pair 2424: c2ccc(Cl)c(Cl) c2) -> c2ccc(Cl)c(Cl)c2) (frequency 2625)\n", "pair 2425: CC [C@H]4 -> CC[C@H]4 (frequency 2625)\n", "pair 2426: N( CCC) -> N(CCC) (frequency 2623)\n", "pair 2427: C(=O)N CCCC -> C(=O)NCCCC (frequency 2622)\n", "pair 2428: c3n cccc3) -> c3ncccc3) (frequency 2619)\n", "pair 2429: n 3)cc2 -> n3)cc2 (frequency 2616)\n", "pair 2430: 4 )cc2 -> 4)cc2 (frequency 2613)\n", "pair 2431: COc1ccc( C(=O) -> COc1ccc(C(=O) (frequency 2611)\n", "pair 2432: cn 2)c1 -> cn2)c1 (frequency 2610)\n", "pair 2433: c4 5) -> c45) (frequency 2605)\n", "pair 2434: CCCN C(=N)N) -> CCCNC(=N)N) (frequency 2603)\n", "pair 2435: CCC [C@H]( -> CCC[C@H]( (frequency 2602)\n", "pair 2436: c3ccc([N+](=O)[O-] )cc3) -> c3ccc([N+](=O)[O-])cc3) (frequency 2601)\n", "pair 2437: nc( N2 -> nc(N2 (frequency 2600)\n", "pair 2438: c2n nc(- -> c2nnc(- (frequency 2596)\n", "pair 2439: CC n2 -> CCn2 (frequency 2596)\n", "pair 2440: c1( N2 -> c1(N2 (frequency 2594)\n", "pair 2441: c4 )cc3) -> c4)cc3) (frequency 2591)\n", "pair 2442: C1= CC(=O) -> C1=CC(=O) (frequency 2591)\n", "pair 2443: S S -> SS (frequency 2588)\n", "pair 2444: C2 1 -> C21 (frequency 2588)\n", "pair 2445: O=C(N /N=C/ -> O=C(N/N=C/ (frequency 2586)\n", "pair 2446: c(- n2 -> c(-n2 (frequency 2581)\n", "pair 2447: nc3 c2 -> nc3c2 (frequency 2579)\n", "pair 2448: C( N1CCN( -> C(N1CCN( (frequency 2578)\n", "pair 2449: n( Cc2ccccc2) -> n(Cc2ccccc2) (frequency 2577)\n", "pair 2450: C(O) =C( -> C(O)=C( (frequency 2577)\n", "pair 2451: N2CCN( C( -> N2CCN(C( (frequency 2575)\n", "pair 2452: c2ccccc2 c1) -> c2ccccc2c1) (frequency 2574)\n", "pair 2453: CC(O) CO -> CC(O)CO (frequency 2573)\n", "pair 2454: c2cc( F)c( -> c2cc(F)c( (frequency 2572)\n", "pair 2455: CC(=O) OC -> CC(=O)OC (frequency 2572)\n", "pair 2456: C(=O)N c1ccccc1 -> C(=O)Nc1ccccc1 (frequency 2572)\n", "pair 2457: c3cc(OC) ccc3 -> c3cc(OC)ccc3 (frequency 2569)\n", "pair 2458: Br) c3) -> Br)c3) (frequency 2565)\n", "pair 2459: 4CCCC C4) -> 4CCCCC4) (frequency 2565)\n", "pair 2460: CCCC (N -> CCCC(N (frequency 2564)\n", "pair 2461: C( C(C)C) -> C(C(C)C) (frequency 2562)\n", "pair 2462: CC C(=O)N1 -> CCC(=O)N1 (frequency 2561)\n", "pair 2463: C(=O)N /N=C/ -> C(=O)N/N=C/ (frequency 2561)\n", "pair 2464: c1ccc(Cl)cc1 Cl -> c1ccc(Cl)cc1Cl (frequency 2560)\n", "pair 2465: c3ccccc3 C) -> c3ccccc3C) (frequency 2559)\n", "pair 2466: c3 c2cccc3) -> c3c2cccc3) (frequency 2559)\n", "pair 2467: c1ccc( Cl)c( -> c1ccc(Cl)c( (frequency 2559)\n", "pair 2468: 4) c3) -> 4)c3) (frequency 2556)\n", "pair 2469: o 2 -> o2 (frequency 2551)\n", "pair 2470: c3cccc (O) -> c3cccc(O) (frequency 2551)\n", "pair 2471: C1 CCN(C -> C1CCN(C (frequency 2550)\n", "pair 2472: c(=O) cc( -> c(=O)cc( (frequency 2548)\n", "pair 2473: [C@@]4 (C)CC -> [C@@]4(C)CC (frequency 2548)\n", "pair 2474: Cc1cc( O) -> Cc1cc(O) (frequency 2547)\n", "pair 2475: [C@H](C) N -> [C@H](C)N (frequency 2544)\n", "pair 2476: Cc1c( Cl) -> Cc1c(Cl) (frequency 2544)\n", "pair 2477: c2ccccc2) CC1) -> c2ccccc2)CC1) (frequency 2541)\n", "pair 2478: c1(F) ccc( -> c1(F)ccc( (frequency 2540)\n", "pair 2479: CN( S(=O)(=O) -> CN(S(=O)(=O) (frequency 2540)\n", "pair 2480: cc1) =O) -> cc1)=O) (frequency 2537)\n", "pair 2481: c1c( cccc1) -> c1c(cccc1) (frequency 2537)\n", "pair 2482: c1c( OCC -> c1c(OCC (frequency 2537)\n", "pair 2483: c3 nc(N) -> c3nc(N) (frequency 2536)\n", "pair 2484: c2c( F)cc( -> c2c(F)cc( (frequency 2536)\n", "pair 2485: c(OC) c3) -> c(OC)c3) (frequency 2535)\n", "pair 2486: c2s ccc2 -> c2sccc2 (frequency 2534)\n", "pair 2487: CC2 =O -> CC2=O (frequency 2534)\n", "pair 2488: n ccc2 -> nccc2 (frequency 2533)\n", "pair 2489: CCC3( CC2) -> CCC3(CC2) (frequency 2533)\n", "pair 2490: Cn2 c( -> Cn2c( (frequency 2530)\n", "pair 2491: CCCC(=O) O) -> CCCC(=O)O) (frequency 2529)\n", "pair 2492: /C(=N\\ O) -> /C(=N\\O) (frequency 2529)\n", "pair 2493: cccc 2)cc1 -> cccc2)cc1 (frequency 2528)\n", "pair 2494: c3c( ccc( -> c3c(ccc( (frequency 2527)\n", "pair 2495: c2c(C) cccc2 -> c2c(C)cccc2 (frequency 2527)\n", "pair 2496: P(O) (O)=O) -> P(O)(O)=O) (frequency 2527)\n", "pair 2497: CN =C( -> CN=C( (frequency 2522)\n", "pair 2498: C( OC -> C(OC (frequency 2521)\n", "pair 2499: N 2) -> N2) (frequency 2520)\n", "pair 2500: cc c5 -> ccc5 (frequency 2519)\n", "pair 2501: cc (=O) -> cc(=O) (frequency 2518)\n", "pair 2502: COc1cc( N -> COc1cc(N (frequency 2518)\n", "pair 2503: c1s ccc1 -> c1sccc1 (frequency 2515)\n", "pair 2504: O=C1 N( -> O=C1N( (frequency 2514)\n", "pair 2505: /C=C3 \\ -> /C=C3\\ (frequency 2514)\n", "pair 2506: C(C)(C) O -> C(C)(C)O (frequency 2512)\n", "pair 2507: CC )cc2) -> CC)cc2) (frequency 2504)\n", "pair 2508: c1c( F)cc( -> c1c(F)cc( (frequency 2503)\n", "pair 2509: C) C(=O) -> C)C(=O) (frequency 2502)\n", "pair 2510: C3 )cc2) -> C3)cc2) (frequency 2500)\n", "pair 2511: C2 (C)C) -> C2(C)C) (frequency 2498)\n", "pair 2512: c2c(=O) n1 -> c2c(=O)n1 (frequency 2496)\n", "pair 2513: CCN S(=O)(=O) -> CCNS(=O)(=O) (frequency 2496)\n", "pair 2514: c3n nn -> c3nnn (frequency 2495)\n", "pair 2515: C(=S) S -> C(=S)S (frequency 2493)\n", "pair 2516: C [C@@]12 -> C[C@@]12 (frequency 2493)\n", "pair 2517: c2ccc(- c3ccccc3)cc2) -> c2ccc(-c3ccccc3)cc2) (frequency 2492)\n", "pair 2518: c2ccc( N(C)C) -> c2ccc(N(C)C) (frequency 2491)\n", "pair 2519: C1 N(C(=O) -> C1N(C(=O) (frequency 2491)\n", "pair 2520: Cc1cc(C) cc( -> Cc1cc(C)cc( (frequency 2489)\n", "pair 2521: c1 oc( -> c1oc( (frequency 2488)\n", "pair 2522: [C@@H](C) CO) -> [C@@H](C)CO) (frequency 2486)\n", "pair 2523: c4 )cc -> c4)cc (frequency 2485)\n", "pair 2524: c( C(N)=O) -> c(C(N)=O) (frequency 2484)\n", "pair 2525: N O -> NO (frequency 2483)\n", "pair 2526: /C=C /C=C/ -> /C=C/C=C/ (frequency 2483)\n", "pair 2527: - n1 -> -n1 (frequency 2483)\n", "pair 2528: c1ccc( C(F)(F)F)cc1 -> c1ccc(C(F)(F)F)cc1 (frequency 2482)\n", "pair 2529: c(N (C)C) -> c(N(C)C) (frequency 2481)\n", "pair 2530: C1 (N -> C1(N (frequency 2480)\n", "pair 2531: C(C) (O) -> C(C)(O) (frequency 2480)\n", "pair 2532: N3 C(=O) -> N3C(=O) (frequency 2479)\n", "pair 2533: C c1cccc(C)c1 -> Cc1cccc(C)c1 (frequency 2479)\n", "pair 2534: c3cc4 ccccc4 -> c3cc4ccccc4 (frequency 2478)\n", "pair 2535: C3 4 -> C34 (frequency 2478)\n", "pair 2536: ( C(=O)O) -> (C(=O)O) (frequency 2478)\n", "pair 2537: [C@H]1 [C@H]( -> [C@H]1[C@H]( (frequency 2475)\n", "pair 2538: [C@H]1 O) -> [C@H]1O) (frequency 2475)\n", "pair 2539: N( S(=O)(=O) -> N(S(=O)(=O) (frequency 2474)\n", "pair 2540: c2cccc(OC) c2) -> c2cccc(OC)c2) (frequency 2473)\n", "pair 2541: O=C1 NC(=O) -> O=C1NC(=O) (frequency 2473)\n", "pair 2542: CC(C) O -> CC(C)O (frequency 2472)\n", "pair 2543: C2=C( O) -> C2=C(O) (frequency 2469)\n", "pair 2544: [C@H]2 CC -> [C@H]2CC (frequency 2467)\n", "pair 2545: C(C)(C)C) cc2) -> C(C)(C)C)cc2) (frequency 2467)\n", "pair 2546: Cc1 c2c( -> Cc1c2c( (frequency 2465)\n", "pair 2547: CCC [C@@H]2 -> CCC[C@@H]2 (frequency 2465)\n", "pair 2548: C2 CCN(C(=O) -> C2CCN(C(=O) (frequency 2465)\n", "pair 2549: c(OC) c1) -> c(OC)c1) (frequency 2462)\n", "pair 2550: c4cc cs -> c4cccs (frequency 2461)\n", "pair 2551: COc1cc( OC) -> COc1cc(OC) (frequency 2460)\n", "pair 2552: NC(=O) c1cc( -> NC(=O)c1cc( (frequency 2459)\n", "pair 2553: c2cccc( NC(=O) -> c2cccc(NC(=O) (frequency 2458)\n", "pair 2554: -c2ccc( F)cc2) -> -c2ccc(F)cc2) (frequency 2458)\n", "pair 2555: S(=O)(=O) N1 -> S(=O)(=O)N1 (frequency 2457)\n", "pair 2556: c2cc(- c3ccccc3) -> c2cc(-c3ccccc3) (frequency 2456)\n", "pair 2557: c( CO -> c(CO (frequency 2456)\n", "pair 2558: c(C) c2 -> c(C)c2 (frequency 2454)\n", "pair 2559: P(O) (O) -> P(O)(O) (frequency 2454)\n", "pair 2560: CCN(CC) C(=O) -> CCN(CC)C(=O) (frequency 2453)\n", "pair 2561: C( C(F)(F)F) -> C(C(F)(F)F) (frequency 2453)\n", "pair 2562: n c2n( -> nc2n( (frequency 2452)\n", "pair 2563: c2ccc(C) cc2 -> c2ccc(C)cc2 (frequency 2452)\n", "pair 2564: n( C(C)C) -> n(C(C)C) (frequency 2451)\n", "pair 2565: CC[C@]4 3C) -> CC[C@]43C) (frequency 2450)\n", "pair 2566: C(=O)N (C)C -> C(=O)N(C)C (frequency 2450)\n", "pair 2567: c4ccco 4) -> c4ccco4) (frequency 2449)\n", "pair 2568: cn (C) -> cn(C) (frequency 2448)\n", "pair 2569: c2n cc(- -> c2ncc(- (frequency 2445)\n", "pair 2570: c1n ccc( -> c1nccc( (frequency 2444)\n", "pair 2571: c3cn ccc3) -> c3cnccc3) (frequency 2443)\n", "pair 2572: c3ccc(Cl)cc3 Cl) -> c3ccc(Cl)cc3Cl) (frequency 2443)\n", "pair 2573: N c1nc(N -> Nc1nc(N (frequency 2442)\n", "pair 2574: no 1) -> no1) (frequency 2440)\n", "pair 2575: S1 (=O)=O -> S1(=O)=O (frequency 2440)\n", "pair 2576: P(O)(=O) O) -> P(O)(=O)O) (frequency 2440)\n", "pair 2577: n n2)c1 -> nn2)c1 (frequency 2438)\n", "pair 2578: c2ccc(F)cc2) CC1 -> c2ccc(F)cc2)CC1 (frequency 2436)\n", "pair 2579: C2 (C)C -> C2(C)C (frequency 2434)\n", "pair 2580: nc(S C -> nc(SC (frequency 2433)\n", "pair 2581: CCN 4 -> CCN4 (frequency 2431)\n", "pair 2582: CCCO 3) -> CCCO3) (frequency 2431)\n", "pair 2583: c 7 -> c7 (frequency 2430)\n", "pair 2584: CC[C@]4(C) [C@H]3CC -> CC[C@]4(C)[C@H]3CC (frequency 2430)\n", "pair 2585: c2ccc( S(=O)(=O)N -> c2ccc(S(=O)(=O)N (frequency 2429)\n", "pair 2586: c2ccc( Br)cc2 -> c2ccc(Br)cc2 (frequency 2428)\n", "pair 2587: [N+] (C)( -> [N+](C)( (frequency 2427)\n", "pair 2588: c4cc5 c( -> c4cc5c( (frequency 2422)\n", "pair 2589: C(=O)N[C@@H]( CC(C)C) -> C(=O)N[C@@H](CC(C)C) (frequency 2417)\n", "pair 2590: c1ccc2c(c1) OCO2) -> c1ccc2c(c1)OCO2) (frequency 2415)\n", "pair 2591: N C(=O)N1 -> NC(=O)N1 (frequency 2414)\n", "pair 2592: n(C) c1 -> n(C)c1 (frequency 2413)\n", "pair 2593: Cc1ccc( C) -> Cc1ccc(C) (frequency 2410)\n", "pair 2594: Cc1 nc(N -> Cc1nc(N (frequency 2409)\n", "pair 2595: c2cc(C) cc(C) -> c2cc(C)cc(C) (frequency 2407)\n", "pair 2596: c1( OC)cc( -> c1(OC)cc( (frequency 2406)\n", "pair 2597: c2cc o -> c2cco (frequency 2405)\n", "pair 2598: C( CCC) -> C(CCC) (frequency 2405)\n", "pair 2599: COC(=O) c1ccc( -> COC(=O)c1ccc( (frequency 2403)\n", "pair 2600: C) C1 -> C)C1 (frequency 2403)\n", "pair 2601: N1 2 -> N12 (frequency 2402)\n", "pair 2602: c3cccc c13) -> c3ccccc13) (frequency 2401)\n", "pair 2603: [C@]3 (O) -> [C@]3(O) (frequency 2399)\n", "pair 2604: c3s ccc3) -> c3sccc3) (frequency 2398)\n", "pair 2605: O= c1cc( -> O=c1cc( (frequency 2398)\n", "pair 2606: NC(=O)[C@H]( CC(C)C) -> NC(=O)[C@H](CC(C)C) (frequency 2398)\n", "pair 2607: C4CCCC C4) -> C4CCCCC4) (frequency 2398)\n", "pair 2608: CCN1 CCC( -> CCN1CCC( (frequency 2396)\n", "pair 2609: C2 CC( -> C2CC( (frequency 2396)\n", "pair 2610: c4cccc 5 -> c4cccc5 (frequency 2393)\n", "pair 2611: c2n (C -> c2n(C (frequency 2393)\n", "pair 2612: NS(=O)(=O) c1ccc( -> NS(=O)(=O)c1ccc( (frequency 2393)\n", "pair 2613: CCCN =C(N)N) -> CCCN=C(N)N) (frequency 2390)\n", "pair 2614: n1 c(N) -> n1c(N) (frequency 2388)\n", "pair 2615: cc4 c( -> cc4c( (frequency 2388)\n", "pair 2616: c2 c3 -> c2c3 (frequency 2386)\n", "pair 2617: N S(=O)( -> NS(=O)( (frequency 2385)\n", "pair 2618: OCCO 4) -> OCCO4) (frequency 2384)\n", "pair 2619: N1( C( -> N1(C( (frequency 2384)\n", "pair 2620: nc(N3 CCOCC3) -> nc(N3CCOCC3) (frequency 2383)\n", "pair 2621: - c1cc( -> -c1cc( (frequency 2383)\n", "pair 2622: c4ccc5c( c4) -> c4ccc5c(c4) (frequency 2381)\n", "pair 2623: N )cc1 -> N)cc1 (frequency 2378)\n", "pair 2624: c1ccccc1 F) -> c1ccccc1F) (frequency 2377)\n", "pair 2625: c4ccncc 4) -> c4ccncc4) (frequency 2376)\n", "pair 2626: N =C -> N=C (frequency 2376)\n", "pair 2627: c3c( c1) -> c3c(c1) (frequency 2375)\n", "pair 2628: /N=C(\\ C) -> /N=C(\\C) (frequency 2375)\n", "pair 2629: c4 n( -> c4n( (frequency 2374)\n", "pair 2630: c3cc(C) ccc3 -> c3cc(C)ccc3 (frequency 2373)\n", "pair 2631: c2o c(- -> c2oc(- (frequency 2371)\n", "pair 2632: [C@@H]( C(=O)O) -> [C@@H](C(=O)O) (frequency 2370)\n", "pair 2633: CCC(O) ( -> CCC(O)( (frequency 2370)\n", "pair 2634: c3 Cl) -> c3Cl) (frequency 2369)\n", "pair 2635: C12CC3 CC(CC(C3) -> C12CC3CC(CC(C3) (frequency 2368)\n", "pair 2636: C c1ccc2c( -> Cc1ccc2c( (frequency 2368)\n", "pair 2637: c(=O) c2c1 -> c(=O)c2c1 (frequency 2367)\n", "pair 2638: c3ccc( C(=O)O) -> c3ccc(C(=O)O) (frequency 2365)\n", "pair 2639: /C1 =C/ -> /C1=C/ (frequency 2365)\n", "pair 2640: C(Cl) (Cl) -> C(Cl)(Cl) (frequency 2363)\n", "pair 2641: c2cc(OC) ccc2 -> c2cc(OC)ccc2 (frequency 2357)\n", "pair 2642: OC [C@@H]2 -> OC[C@@H]2 (frequency 2356)\n", "pair 2643: C( C(O)=O) -> C(C(O)=O) (frequency 2356)\n", "pair 2644: cc(C) cc1 -> cc(C)cc1 (frequency 2354)\n", "pair 2645: c12 n( -> c12n( (frequency 2354)\n", "pair 2646: 3 )cc1) -> 3)cc1) (frequency 2351)\n", "pair 2647: Cn1 c2c( -> Cn1c2c( (frequency 2350)\n", "pair 2648: ccc( Cl)cc1 -> ccc(Cl)cc1 (frequency 2349)\n", "pair 2649: c3c( n2) -> c3c(n2) (frequency 2349)\n", "pair 2650: F) c1 -> F)c1 (frequency 2347)\n", "pair 2651: [C@@]3 (O) -> [C@@]3(O) (frequency 2342)\n", "pair 2652: O=C( CN1 -> O=C(CN1 (frequency 2340)\n", "pair 2653: c1( -c2cc( -> c1(-c2cc( (frequency 2339)\n", "pair 2654: C3 CCC3) -> C3CCC3) (frequency 2339)\n", "pair 2655: c1( Cl)cc( -> c1(Cl)cc( (frequency 2338)\n", "pair 2656: N( C(=O)N -> N(C(=O)N (frequency 2338)\n", "pair 2657: c3cc( Br)cc -> c3cc(Br)cc (frequency 2337)\n", "pair 2658: CCC1 2 -> CCC12 (frequency 2336)\n", "pair 2659: cc n1) -> ccn1) (frequency 2332)\n", "pair 2660: Cc1cc cnc1) -> Cc1cccnc1) (frequency 2332)\n", "pair 2661: c3cccc4 cccc -> c3cccc4cccc (frequency 2330)\n", "pair 2662: [C@H]2 C -> [C@H]2C (frequency 2330)\n", "pair 2663: [C@H]2 [C@@H]( -> [C@H]2[C@@H]( (frequency 2329)\n", "pair 2664: CCC( C(=O)N -> CCC(C(=O)N (frequency 2329)\n", "pair 2665: c3ccccc3) CC2)cc1 -> c3ccccc3)CC2)cc1 (frequency 2328)\n", "pair 2666: CCCC CC3) -> CCCCCC3) (frequency 2328)\n", "pair 2667: n(C)c(=O) n(C) -> n(C)c(=O)n(C) (frequency 2326)\n", "pair 2668: [C@H](O) [C@@H](O)[C@H](O) -> [C@H](O)[C@@H](O)[C@H](O) (frequency 2326)\n", "pair 2669: N(CCCl) CC -> N(CCCl)CC (frequency 2326)\n", "pair 2670: Cc1ccco 1 -> Cc1ccco1 (frequency 2326)\n", "pair 2671: CCN2 CCOCC2) -> CCN2CCOCC2) (frequency 2325)\n", "pair 2672: [C@H]( CO -> [C@H](CO (frequency 2324)\n", "pair 2673: OC( CO) -> OC(CO) (frequency 2324)\n", "pair 2674: c1- 2 -> c1-2 (frequency 2322)\n", "pair 2675: CC(C)(C) N -> CC(C)(C)N (frequency 2322)\n", "pair 2676: =C /C=C/ -> =C/C=C/ (frequency 2322)\n", "pair 2677: C(=O) OCC(=O) -> C(=O)OCC(=O) (frequency 2318)\n", "pair 2678: c4cccc (F) -> c4cccc(F) (frequency 2317)\n", "pair 2679: c1ccc( S(=O)(=O)N -> c1ccc(S(=O)(=O)N (frequency 2317)\n", "pair 2680: 4CCCC 4) -> 4CCCC4) (frequency 2317)\n", "pair 2681: /C=N/ NC( -> /C=N/NC( (frequency 2316)\n", "pair 2682: c1cn (C) -> c1cn(C) (frequency 2312)\n", "pair 2683: c1c(C) cc( -> c1c(C)cc( (frequency 2311)\n", "pair 2684: F) c(- -> F)c(- (frequency 2311)\n", "pair 2685: C(C)(C) [C@@H]5 -> C(C)(C)[C@@H]5 (frequency 2311)\n", "pair 2686: c1ccc( S -> c1ccc(S (frequency 2308)\n", "pair 2687: C1= S -> C1=S (frequency 2308)\n", "pair 2688: Cc1cc cs1) -> Cc1cccs1) (frequency 2305)\n", "pair 2689: c3n [nH] -> c3n[nH] (frequency 2304)\n", "pair 2690: CCN(C) CC1) -> CCN(C)CC1) (frequency 2303)\n", "pair 2691: [nH] 2)c1 -> [nH]2)c1 (frequency 2302)\n", "pair 2692: c(N C(=O)N -> c(NC(=O)N (frequency 2299)\n", "pair 2693: c(Cl) c2) -> c(Cl)c2) (frequency 2299)\n", "pair 2694: =O) cc3) -> =O)cc3) (frequency 2298)\n", "pair 2695: c3cccc (N -> c3cccc(N (frequency 2297)\n", "pair 2696: c(S C) -> c(SC) (frequency 2297)\n", "pair 2697: c3 2)cc1 -> c32)cc1 (frequency 2293)\n", "pair 2698: c(F) c3) -> c(F)c3) (frequency 2293)\n", "pair 2699: c2 - -> c2- (frequency 2291)\n", "pair 2700: c3ncc cn3) -> c3ncccn3) (frequency 2290)\n", "pair 2701: c3cc( N -> c3cc(N (frequency 2290)\n", "pair 2702: -c2ccc( Cl)cc2) -> -c2ccc(Cl)cc2) (frequency 2289)\n", "pair 2703: c4 c(Cl) -> c4c(Cl) (frequency 2287)\n", "pair 2704: c( OCC) -> c(OCC) (frequency 2287)\n", "pair 2705: c3ccc( S(=O)(=O)N -> c3ccc(S(=O)(=O)N (frequency 2285)\n", "pair 2706: nc(- c2ccccc2) -> nc(-c2ccccc2) (frequency 2283)\n", "pair 2707: CCOCC 3)cc2) -> CCOCC3)cc2) (frequency 2283)\n", "pair 2708: c(=O) n(C -> c(=O)n(C (frequency 2282)\n", "pair 2709: C(C)C) cc2) -> C(C)C)cc2) (frequency 2282)\n", "pair 2710: c2n o -> c2no (frequency 2281)\n", "pair 2711: c4 c5c( -> c4c5c( (frequency 2278)\n", "pair 2712: COc1ccccc1 N1CCN( -> COc1ccccc1N1CCN( (frequency 2277)\n", "pair 2713: n 5) -> n5) (frequency 2275)\n", "pair 2714: CC COc1ccc( -> CCCOc1ccc( (frequency 2272)\n", "pair 2715: co 1 -> co1 (frequency 2271)\n", "pair 2716: c1cccc( O)c1 -> c1cccc(O)c1 (frequency 2271)\n", "pair 2717: (N 3 -> (N3 (frequency 2271)\n", "pair 2718: c1cc( OCC -> c1cc(OCC (frequency 2269)\n", "pair 2719: N1 ) -> N1) (frequency 2269)\n", "pair 2720: =O) ( -> =O)( (frequency 2268)\n", "pair 2721: = N2 -> =N2 (frequency 2267)\n", "pair 2722: nc( NC( -> nc(NC( (frequency 2266)\n", "pair 2723: c4ccc( N -> c4ccc(N (frequency 2266)\n", "pair 2724: c1cccc( -n2 -> c1cccc(-n2 (frequency 2266)\n", "pair 2725: OC 4 -> OC4 (frequency 2266)\n", "pair 2726: c3c( N) -> c3c(N) (frequency 2265)\n", "pair 2727: O [C@H](C) -> O[C@H](C) (frequency 2265)\n", "pair 2728: n c2)c1 -> nc2)c1 (frequency 2264)\n", "pair 2729: c4ccc( C(F)(F)F)cc -> c4ccc(C(F)(F)F)cc (frequency 2259)\n", "pair 2730: c2cc( OCC -> c2cc(OCC (frequency 2259)\n", "pair 2731: c2nc( O) -> c2nc(O) (frequency 2257)\n", "pair 2732: Cc1ccc( OC) -> Cc1ccc(OC) (frequency 2256)\n", "pair 2733: N1 (C -> N1(C (frequency 2255)\n", "pair 2734: Cc1cc cnc1 -> Cc1cccnc1 (frequency 2255)\n", "pair 2735: c4cccn c4) -> c4cccnc4) (frequency 2252)\n", "pair 2736: c3 n2) -> c3n2) (frequency 2251)\n", "pair 2737: C( OC(=O) -> C(OC(=O) (frequency 2251)\n", "pair 2738: c5 cn -> c5cn (frequency 2250)\n", "pair 2739: O=C( CS -> O=C(CS (frequency 2250)\n", "pair 2740: C1( c2ccccc2) -> C1(c2ccccc2) (frequency 2250)\n", "pair 2741: C(F)(F)F) c( -> C(F)(F)F)c( (frequency 2249)\n", "pair 2742: [C@H]1 CO -> [C@H]1CO (frequency 2248)\n", "pair 2743: c4 ncn -> c4ncn (frequency 2246)\n", "pair 2744: C(C#N) =C(N) -> C(C#N)=C(N) (frequency 2246)\n", "pair 2745: C1 CN(C(=O) -> C1CN(C(=O) (frequency 2245)\n", "pair 2746: C1 c2c( -> C1c2c( (frequency 2244)\n", "pair 2747: c1( Br) -> c1(Br) (frequency 2241)\n", "pair 2748: n[nH] 1 -> n[nH]1 (frequency 2237)\n", "pair 2749: c3c( c2) -> c3c(c2) (frequency 2233)\n", "pair 2750: c2 )cc -> c2)cc (frequency 2233)\n", "pair 2751: C(=O) C(C) -> C(=O)C(C) (frequency 2232)\n", "pair 2752: c4c( F) -> c4c(F) (frequency 2231)\n", "pair 2753: c1ccc( C( -> c1ccc(C( (frequency 2230)\n", "pair 2754: /C =C1/ -> /C=C1/ (frequency 2230)\n", "pair 2755: c(OC) c(OC) -> c(OC)c(OC) (frequency 2229)\n", "pair 2756: [C@H]( C(=O)N -> [C@H](C(=O)N (frequency 2229)\n", "pair 2757: c1c(O) cc( -> c1c(O)cc( (frequency 2227)\n", "pair 2758: c1ccc( CN2 -> c1ccc(CN2 (frequency 2226)\n", "pair 2759: [C@]2 (C)CC -> [C@]2(C)CC (frequency 2224)\n", "pair 2760: Cc1n n(C) -> Cc1nn(C) (frequency 2224)\n", "pair 2761: CC(=O) OC) -> CC(=O)OC) (frequency 2223)\n", "pair 2762: c1c( C2 -> c1c(C2 (frequency 2222)\n", "pair 2763: C3 CCCCC3 -> C3CCCCC3 (frequency 2222)\n", "pair 2764: c1c( C#N) -> c1c(C#N) (frequency 2220)\n", "pair 2765: c1(C) ccc( -> c1(C)ccc( (frequency 2220)\n", "pair 2766: C2=O) ccc1 -> C2=O)ccc1 (frequency 2220)\n", "pair 2767: C) ( -> C)( (frequency 2218)\n", "pair 2768: [C@@H](O) [C@H]1O -> [C@@H](O)[C@H]1O (frequency 2216)\n", "pair 2769: c2ccccc2 O) -> c2ccccc2O) (frequency 2215)\n", "pair 2770: c2cc nc(N -> c2ccnc(N (frequency 2214)\n", "pair 2771: c2c(F) cccc2) -> c2c(F)cccc2) (frequency 2214)\n", "pair 2772: P(=O) (O -> P(=O)(O (frequency 2214)\n", "pair 2773: )cc( OC)c1 -> )cc(OC)c1 (frequency 2214)\n", "pair 2774: c3cc(O) ccc3 -> c3cc(O)ccc3 (frequency 2213)\n", "pair 2775: C1 N -> C1N (frequency 2212)\n", "pair 2776: Br) c1) -> Br)c1) (frequency 2212)\n", "pair 2777: [C@H]( Cc1ccccc1)NC(=O) -> [C@H](Cc1ccccc1)NC(=O) (frequency 2211)\n", "pair 2778: [nH] c12 -> [nH]c12 (frequency 2209)\n", "pair 2779: 3)cc2 1 -> 3)cc21 (frequency 2209)\n", "pair 2780: cn c12 -> cnc12 (frequency 2208)\n", "pair 2781: c1cc( F)cc -> c1cc(F)cc (frequency 2207)\n", "pair 2782: c( C(C)C) -> c(C(C)C) (frequency 2207)\n", "pair 2783: c4cc( F)cc -> c4cc(F)cc (frequency 2206)\n", "pair 2784: N( S( -> N(S( (frequency 2204)\n", "pair 2785: CCn1 c(=O) -> CCn1c(=O) (frequency 2204)\n", "pair 2786: C(=O)N2 C -> C(=O)N2C (frequency 2201)\n", "pair 2787: C( CS -> C(CS (frequency 2201)\n", "pair 2788: s c1) -> sc1) (frequency 2200)\n", "pair 2789: c2cn (C) -> c2cn(C) (frequency 2200)\n", "pair 2790: c1cc( C2 -> c1cc(C2 (frequency 2199)\n", "pair 2791: c(N) n1 -> c(N)n1 (frequency 2198)\n", "pair 2792: C[C@H]( NC( -> C[C@H](NC( (frequency 2197)\n", "pair 2793: C [S+]([O-]) -> C[S+]([O-]) (frequency 2197)\n", "pair 2794: c2cn n3 -> c2cnn3 (frequency 2195)\n", "pair 2795: CC[C@H]( NC(=O) -> CC[C@H](NC(=O) (frequency 2194)\n", "pair 2796: C#N )cc1) -> C#N)cc1) (frequency 2193)\n", "pair 2797: c2ccc(- c3n -> c2ccc(-c3n (frequency 2192)\n", "pair 2798: P (O -> P(O (frequency 2192)\n", "pair 2799: CCN(CC) CC) -> CCN(CC)CC) (frequency 2191)\n", "pair 2800: c3cccc(OC) c3) -> c3cccc(OC)c3) (frequency 2190)\n", "pair 2801: CCCC CCC) -> CCCCCCC) (frequency 2188)\n", "pair 2802: OCCO 2) -> OCCO2) (frequency 2187)\n", "pair 2803: CC c2c( -> CCc2c( (frequency 2187)\n", "pair 2804: c2) nc1 -> c2)nc1 (frequency 2186)\n", "pair 2805: ccc( C)c1 -> ccc(C)c1 (frequency 2185)\n", "pair 2806: c3 - -> c3- (frequency 2185)\n", "pair 2807: c2ccc(Cl)cc2) CC1 -> c2ccc(Cl)cc2)CC1 (frequency 2183)\n", "pair 2808: nc3 C) -> nc3C) (frequency 2182)\n", "pair 2809: C23CC4 CC(CC(C4) -> C23CC4CC(CC(C4) (frequency 2182)\n", "pair 2810: c3ccc( C(F)(F)F)cc -> c3ccc(C(F)(F)F)cc (frequency 2181)\n", "pair 2811: C[C@@H]( NC(=O) -> C[C@@H](NC(=O) (frequency 2180)\n", "pair 2812: c2cc( S(=O)(=O)N -> c2cc(S(=O)(=O)N (frequency 2179)\n", "pair 2813: cc( Cl) -> cc(Cl) (frequency 2178)\n", "pair 2814: n1 ccc( -> n1ccc( (frequency 2177)\n", "pair 2815: [C@]1 (C)CC -> [C@]1(C)CC (frequency 2175)\n", "pair 2816: ncn c32) -> ncnc32) (frequency 2172)\n", "pair 2817: c4ccc(Cl)cc 4 -> c4ccc(Cl)cc4 (frequency 2171)\n", "pair 2818: c3cccc c3c(=O) -> c3ccccc3c(=O) (frequency 2170)\n", "pair 2819: 3)cc2) CC1 -> 3)cc2)CC1 (frequency 2165)\n", "pair 2820: N S( -> NS( (frequency 2162)\n", "pair 2821: c(C) cc2) -> c(C)cc2) (frequency 2160)\n", "pair 2822: CC [C@@]4 -> CC[C@@]4 (frequency 2160)\n", "pair 2823: ccc1) =O -> ccc1)=O (frequency 2158)\n", "pair 2824: Cc1ccc( S(=O)(=O)N2 -> Cc1ccc(S(=O)(=O)N2 (frequency 2158)\n", "pair 2825: cc nc1 -> ccnc1 (frequency 2155)\n", "pair 2826: CC1 CC1 -> CC1CC1 (frequency 2153)\n", "pair 2827: c1ccc( Cl)c(Cl)c1 -> c1ccc(Cl)c(Cl)c1 (frequency 2152)\n", "pair 2828: c1c( OC)c(OC) -> c1c(OC)c(OC) (frequency 2150)\n", "pair 2829: CC( CO -> CC(CO (frequency 2143)\n", "pair 2830: n( CC -> n(CC (frequency 2142)\n", "pair 2831: c4 n3) -> c4n3) (frequency 2142)\n", "pair 2832: c2ccccc2 C1 -> c2ccccc2C1 (frequency 2142)\n", "pair 2833: nc3 n2 -> nc3n2 (frequency 2141)\n", "pair 2834: c2)cc1 OC -> c2)cc1OC (frequency 2140)\n", "pair 2835: c1( Cl)c( -> c1(Cl)c( (frequency 2139)\n", "pair 2836: CO 2) -> CO2) (frequency 2139)\n", "pair 2837: Cc2ccccc2) c1 -> Cc2ccccc2)c1 (frequency 2138)\n", "pair 2838: C=C 3 -> C=C3 (frequency 2137)\n", "pair 2839: C4 ( -> C4( (frequency 2137)\n", "pair 2840: N [C@H]1 -> N[C@H]1 (frequency 2135)\n", "pair 2841: c1n cc(- -> c1ncc(- (frequency 2133)\n", "pair 2842: [N+]([O-])=O) cc2) -> [N+]([O-])=O)cc2) (frequency 2133)\n", "pair 2843: c4 nn -> c4nn (frequency 2132)\n", "pair 2844: o c12 -> oc12 (frequency 2131)\n", "pair 2845: / C(O) -> /C(O) (frequency 2131)\n", "pair 2846: CN( Cc1ccccc1) -> CN(Cc1ccccc1) (frequency 2130)\n", "pair 2847: C3) =O) -> C3)=O) (frequency 2129)\n", "pair 2848: [N+]([O-]) =O)cc1 -> [N+]([O-])=O)cc1 (frequency 2128)\n", "pair 2849: CCCCCCCC CC -> CCCCCCCCCC (frequency 2128)\n", "pair 2850: -c2 c[nH] -> -c2c[nH] (frequency 2126)\n", "pair 2851: C(=O) c2c( -> C(=O)c2c( (frequency 2125)\n", "pair 2852: c1( CN2 -> c1(CN2 (frequency 2124)\n", "pair 2853: c3 n2 -> c3n2 (frequency 2123)\n", "pair 2854: C1=C(C) N -> C1=C(C)N (frequency 2123)\n", "pair 2855: C#N )cc( -> C#N)cc( (frequency 2122)\n", "pair 2856: C(=O)N [C@@H]1 -> C(=O)N[C@@H]1 (frequency 2121)\n", "pair 2857: c1 C) -> c1C) (frequency 2120)\n", "pair 2858: O=C( N1 -> O=C(N1 (frequency 2120)\n", "pair 2859: N( C(N -> N(C(N (frequency 2120)\n", "pair 2860: COc1cc c2[nH] -> COc1ccc2[nH] (frequency 2120)\n", "pair 2861: c2n ccc( -> c2nccc( (frequency 2119)\n", "pair 2862: O) =O -> O)=O (frequency 2118)\n", "pair 2863: cn cc1 -> cncc1 (frequency 2117)\n", "pair 2864: c3c(F)cccc3 F) -> c3c(F)cccc3F) (frequency 2116)\n", "pair 2865: CC(C)C) =O) -> CC(C)C)=O) (frequency 2116)\n", "pair 2866: [nH] cc2 -> [nH]cc2 (frequency 2115)\n", "pair 2867: /C( =C(\\ -> /C(=C(\\ (frequency 2114)\n", "pair 2868: c(=O) n(- -> c(=O)n(- (frequency 2113)\n", "pair 2869: C[C@@H]( CO) -> C[C@@H](CO) (frequency 2113)\n", "pair 2870: CCN3 CCOCC3) -> CCN3CCOCC3) (frequency 2112)\n", "pair 2871: c1cc( OC)c( -> c1cc(OC)c( (frequency 2111)\n", "pair 2872: N1 (C) -> N1(C) (frequency 2111)\n", "pair 2873: c4ccc(F)cc 4 -> c4ccc(F)cc4 (frequency 2110)\n", "pair 2874: [C@@H]2 C -> [C@@H]2C (frequency 2110)\n", "pair 2875: n cc3) -> ncc3) (frequency 2108)\n", "pair 2876: O=C(N c1cccc( -> O=C(Nc1cccc( (frequency 2108)\n", "pair 2877: cccc1) =O -> cccc1)=O (frequency 2107)\n", "pair 2878: c2ccc( S(=O)(=O)N3 -> c2ccc(S(=O)(=O)N3 (frequency 2107)\n", "pair 2879: N# Cc1cccc( -> N#Cc1cccc( (frequency 2107)\n", "pair 2880: c1c( F)c( -> c1c(F)c( (frequency 2106)\n", "pair 2881: c1( CN -> c1(CN (frequency 2105)\n", "pair 2882: c3ccc(C) cc3 -> c3ccc(C)cc3 (frequency 2102)\n", "pair 2883: c( [N+](=O)[O-])c1 -> c([N+](=O)[O-])c1 (frequency 2102)\n", "pair 2884: c( Cl)cc -> c(Cl)cc (frequency 2102)\n", "pair 2885: c2cccc(C) c2) -> c2cccc(C)c2) (frequency 2101)\n", "pair 2886: c(Br) c1 -> c(Br)c1 (frequency 2101)\n", "pair 2887: CC(=O) N1CCN( -> CC(=O)N1CCN( (frequency 2100)\n", "pair 2888: c1c( C(F)(F)F) -> c1c(C(F)(F)F) (frequency 2098)\n", "pair 2889: [N+] 2 -> [N+]2 (frequency 2098)\n", "pair 2890: CCN( S(=O)(=O) -> CCN(S(=O)(=O) (frequency 2098)\n", "pair 2891: C) cc2 -> C)cc2 (frequency 2096)\n", "pair 2892: /C( =C(/ -> /C(=C(/ (frequency 2095)\n", "pair 2893: c2c(Cl)cccc2 Cl) -> c2c(Cl)cccc2Cl) (frequency 2094)\n", "pair 2894: [C@@]1 3 -> [C@@]13 (frequency 2093)\n", "pair 2895: c(- c4ccccc4) -> c(-c4ccccc4) (frequency 2091)\n", "pair 2896: nc1 S -> nc1S (frequency 2090)\n", "pair 2897: O [C@H]3 -> O[C@H]3 (frequency 2090)\n", "pair 2898: C(=O)O) cc1) -> C(=O)O)cc1) (frequency 2090)\n", "pair 2899: c2c(C) cccc2) -> c2c(C)cccc2) (frequency 2089)\n", "pair 2900: c1c( O)c( -> c1c(O)c( (frequency 2088)\n", "pair 2901: n2) =O) -> n2)=O) (frequency 2086)\n", "pair 2902: OCO 5) -> OCO5) (frequency 2086)\n", "pair 2903: O =C -> O=C (frequency 2086)\n", "pair 2904: C(N (C)C) -> C(N(C)C) (frequency 2086)\n", "pair 2905: c1ccc(- c2ccccc2)cc1 -> c1ccc(-c2ccccc2)cc1 (frequency 2085)\n", "pair 2906: CC2 =O) -> CC2=O) (frequency 2084)\n", "pair 2907: C( CNC(=O) -> C(CNC(=O) (frequency 2084)\n", "pair 2908: c5 [nH] -> c5[nH] (frequency 2082)\n", "pair 2909: CC(=O)N (C) -> CC(=O)N(C) (frequency 2082)\n", "pair 2910: cs 2)c1 -> cs2)c1 (frequency 2081)\n", "pair 2911: c2ncn c(N -> c2ncnc(N (frequency 2080)\n", "pair 2912: c2ccc(Cl)cc 2)c1 -> c2ccc(Cl)cc2)c1 (frequency 2079)\n", "pair 2913: Cc1n c2c( -> Cc1nc2c( (frequency 2079)\n", "pair 2914: CC( CO) -> CC(CO) (frequency 2079)\n", "pair 2915: c(S C -> c(SC (frequency 2077)\n", "pair 2916: c(C) c1) -> c(C)c1) (frequency 2077)\n", "pair 2917: [n+] (C -> [n+](C (frequency 2077)\n", "pair 2918: C) CC1 -> C)CC1 (frequency 2077)\n", "pair 2919: c3cccc(C) c3) -> c3cccc(C)c3) (frequency 2075)\n", "pair 2920: c3cn (C) -> c3cn(C) (frequency 2073)\n", "pair 2921: c1cn n2 -> c1cnn2 (frequency 2073)\n", "pair 2922: [nH] c2) -> [nH]c2) (frequency 2073)\n", "pair 2923: CC [C@]2(C) -> CC[C@]2(C) (frequency 2072)\n", "pair 2924: c1cccc( [N+](=O)[O-])c1 -> c1cccc([N+](=O)[O-])c1 (frequency 2071)\n", "pair 2925: C(N)=O) c(N -> C(N)=O)c(N (frequency 2071)\n", "pair 2926: c2ccc(- n3 -> c2ccc(-n3 (frequency 2069)\n", "pair 2927: [C@@H]4 CC -> [C@@H]4CC (frequency 2068)\n", "pair 2928: C(=O)N S(=O)(=O) -> C(=O)NS(=O)(=O) (frequency 2067)\n", "pair 2929: C(=O)N C2 -> C(=O)NC2 (frequency 2067)\n", "pair 2930: c( F)cc2 -> c(F)cc2 (frequency 2066)\n", "pair 2931: S C( -> SC( (frequency 2066)\n", "pair 2932: c1ccccc1 F -> c1ccccc1F (frequency 2064)\n", "pair 2933: c1cc( Br) -> c1cc(Br) (frequency 2064)\n", "pair 2934: c1cc n -> c1ccn (frequency 2063)\n", "pair 2935: [C@@H]1 [C@@H]( -> [C@@H]1[C@@H]( (frequency 2063)\n", "pair 2936: c(OC) cc( -> c(OC)cc( (frequency 2062)\n", "pair 2937: Cc2ccc( C)cc2) -> Cc2ccc(C)cc2) (frequency 2062)\n", "pair 2938: c3c( Cl)cc( -> c3c(Cl)cc( (frequency 2061)\n", "pair 2939: c1cc c2nc( -> c1ccc2nc( (frequency 2060)\n", "pair 2940: [nH] c1) -> [nH]c1) (frequency 2060)\n", "pair 2941: c2cc( Br) -> c2cc(Br) (frequency 2057)\n", "pair 2942: [C@@H]2 C1 -> [C@@H]2C1 (frequency 2057)\n", "pair 2943: /C(C) =N/ -> /C(C)=N/ (frequency 2057)\n", "pair 2944: c3ccc( NC(=O) -> c3ccc(NC(=O) (frequency 2056)\n", "pair 2945: ncc 4 -> ncc4 (frequency 2055)\n", "pair 2946: P(=O)( OCC) -> P(=O)(OCC) (frequency 2055)\n", "pair 2947: c1n [nH] -> c1n[nH] (frequency 2054)\n", "pair 2948: CCN1 C -> CCN1C (frequency 2054)\n", "pair 2949: ccc( F)cc1 -> ccc(F)cc1 (frequency 2053)\n", "pair 2950: OC)c(OC) c1) -> OC)c(OC)c1) (frequency 2052)\n", "pair 2951: N( S(=O)( -> N(S(=O)( (frequency 2052)\n", "pair 2952: c1cc( O)c(O) -> c1cc(O)c(O) (frequency 2051)\n", "pair 2953: C1 C2( -> C1C2( (frequency 2051)\n", "pair 2954: CC(C) C[C@H](NC(=O) -> CC(C)C[C@H](NC(=O) (frequency 2049)\n", "pair 2955: c1c(O) ccc( -> c1c(O)ccc( (frequency 2047)\n", "pair 2956: (O) =O -> (O)=O (frequency 2046)\n", "pair 2957: S2 (=O)=O -> S2(=O)=O (frequency 2045)\n", "pair 2958: N( C(C)C) -> N(C(C)C) (frequency 2045)\n", "pair 2959: C2 CCC2) -> C2CCC2) (frequency 2045)\n", "pair 2960: ncn c2 -> ncnc2 (frequency 2044)\n", "pair 2961: c3ccccc3 C2=O) -> c3ccccc3C2=O) (frequency 2043)\n", "pair 2962: C4 )cc3) -> C4)cc3) (frequency 2043)\n", "pair 2963: [C@@H]( CC(C)C) -> [C@@H](CC(C)C) (frequency 2039)\n", "pair 2964: OC(C)(C)C) =O) -> OC(C)(C)C)=O) (frequency 2038)\n", "pair 2965: [N+]([O-]) =O -> [N+]([O-])=O (frequency 2037)\n", "pair 2966: C= CC(=O) -> C=CC(=O) (frequency 2036)\n", "pair 2967: C) O -> C)O (frequency 2035)\n", "pair 2968: c3 c1 -> c3c1 (frequency 2032)\n", "pair 2969: c1( -c2ccccc2) -> c1(-c2ccccc2) (frequency 2031)\n", "pair 2970: c3 O) -> c3O) (frequency 2028)\n", "pair 2971: cn c2) -> cnc2) (frequency 2026)\n", "pair 2972: o c2 -> oc2 (frequency 2025)\n", "pair 2973: n4 cn -> n4cn (frequency 2025)\n", "pair 2974: N c1nc(- -> Nc1nc(- (frequency 2024)\n", "pair 2975: CC( NC( -> CC(NC( (frequency 2024)\n", "pair 2976: cn n1 -> cnn1 (frequency 2023)\n", "pair 2977: C(F)(F) C(F)(F) -> C(F)(F)C(F)(F) (frequency 2023)\n", "pair 2978: c23) c1 -> c23)c1 (frequency 2022)\n", "pair 2979: CCCC 2)cc1 -> CCCC2)cc1 (frequency 2022)\n", "pair 2980: c3cc( C(=O)N -> c3cc(C(=O)N (frequency 2021)\n", "pair 2981: CC(=O) NC( -> CC(=O)NC( (frequency 2021)\n", "pair 2982: c1cc( Br)ccc1 -> c1cc(Br)ccc1 (frequency 2020)\n", "pair 2983: C(=O)N1 CCN(C(=O) -> C(=O)N1CCN(C(=O) (frequency 2016)\n", "pair 2984: C(C)=O) cc1 -> C(C)=O)cc1 (frequency 2015)\n", "pair 2985: c2cc3c(cc2) OCO3) -> c2cc3c(cc2)OCO3) (frequency 2014)\n", "pair 2986: nc(S C) -> nc(SC) (frequency 2013)\n", "pair 2987: [C@@H]( CC) -> [C@@H](CC) (frequency 2013)\n", "pair 2988: OC [C@H]2 -> OC[C@H]2 (frequency 2011)\n", "pair 2989: c1ccc( C(N -> c1ccc(C(N (frequency 2010)\n", "pair 2990: C(=O) [C@H]( -> C(=O)[C@H]( (frequency 2009)\n", "pair 2991: C =N -> C=N (frequency 2009)\n", "pair 2992: c23) CC1 -> c23)CC1 (frequency 2007)\n", "pair 2993: [N+]([O-]) =O)c1 -> [N+]([O-])=O)c1 (frequency 2007)\n", "pair 2994: 3)cc c21 -> 3)ccc21 (frequency 2007)\n", "pair 2995: )ccc1 O -> )ccc1O (frequency 2007)\n", "pair 2996: [C@@H]1 (O) -> [C@@H]1(O) (frequency 2004)\n", "pair 2997: /N=C2 \\ -> /N=C2\\ (frequency 2004)\n", "pair 2998: CC( c1ccccc1) -> CC(c1ccccc1) (frequency 2003)\n", "pair 2999: no 2)cc1 -> no2)cc1 (frequency 2000)\n", "pair 3000: n n(C -> nn(C (frequency 2000)\n", "pair 3001: [C@H]1 (O) -> [C@H]1(O) (frequency 2000)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 1h 26min 50s, sys: 17 s, total: 1h 27min 7s\n", "Wall time: 1h 26min 49s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "no pair has frequency >= 2000. Stopping\n" ] } ], "source": [ "%%time\n", "output = codecs.open('../SPE_ChEMBL.txt', 'w')\n", "learn_SPE(SMILES, output, 30000, min_frequency=2000, augmentation=1, verbose=True, total_symbols=True)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.7" } }, "nbformat": 4, "nbformat_minor": 4 } ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: MANIFEST.in ================================================ include settings.ini include LICENSE include CONTRIBUTING.md include README.md recursive-exclude * __pycache__ ================================================ FILE: Makefile ================================================ SRC = $(wildcard notebooks_build/*.ipynb) all: SmilesPE docs SmilesPE: $(SRC) nbdev_build_lib touch SmilesPE docs_serve: docs cd docs && bundle exec jekyll serve docs: $(SRC) nbdev_build_docs touch docs test: nbdev_test_nbs release: pypi nbdev_bump_version pypi: dist twine upload --repository pypi dist/* dist: clean python setup.py sdist bdist_wheel clean: rm -rf dist ================================================ FILE: README.md ================================================ # SMILES Pair Encoding: A Data-Driven Substructure Tokenization Algorithm for Deep Learning > SMILES Pair Encoding ([JCIM](https://pubs.acs.org/doi/abs/10.1021/acs.jcim.0c01127)) first learns a vocabulary of high frequency SMILES substrings from a large chemical dataset (e.g., ChEMBL) and then tokenizes SMILES based on the learned vocabulary for deep learning models. SMILES Pair Encoding is inspired by [byte-pair-encoding (BPE)](https://www.aclweb.org/anthology/P16-1162/). ![SPE Overview](TOC.PNG) ## How it works A SMILES Pair Encoding (SPE) vocabulary is trained with following steps: - Step 1: Tokenize SMILES from a large dataset (e.g., ChEMBL) at atom-level. - Step 2: Initialize the vocabulary with all unique tokens. - Step 3: Iteratively count the occurs of all token pairs in the tokenized SMILES and merge the most frequent occurring token pair as a new token and add it to the vocabulary. This step will stop when one of the conditions is met: (1) A desired vocabulary size is achieved or (2) No pair of tokens has frequency larger than the frequency threshold. The vocabulary size and frequency threshold are hyperparameters for training SMILES pair encoding. After training the SPE vocabulary, we can then tokenize SMILES based on the trained vocabulary. The SMILES substrings in the trained vocabulary are ordered by their frequency. During the tokenization process, the SMILES is first tokenized at atom-level. SPE will then iteratively check the frequency of each pairs of tokens and merge the pair of tokens that have the highest frequency count in the trained SPE vocabulary until no further merge operation can be conducted. ## Installation ``` pip install SmilesPE ``` ## Usage Instructions ### Basic Tokenizers 1. Atom-level Tokenizer ```python from SmilesPE.pretokenizer import atomwise_tokenizer smi = 'CC[N+](C)(C)Cc1ccccc1Br' toks = atomwise_tokenizer(smi) print(toks) ``` ['C', 'C', '[N+]', '(', 'C', ')', '(', 'C', ')', 'C', 'c', '1', 'c', 'c', 'c', 'c', 'c', '1', 'Br'] 2. K-mer Tokenzier ```python from SmilesPE.pretokenizer import kmer_tokenizer smi = 'CC[N+](C)(C)Cc1ccccc1Br' toks = kmer_tokenizer(smi, ngram=4) print(toks) ``` ['CC[N+](', 'C[N+](C', '[N+](C)', '(C)(', 'C)(C', ')(C)', '(C)C', 'C)Cc', ')Cc1', 'Cc1c', 'c1cc', '1ccc', 'cccc', 'cccc', 'ccc1', 'cc1Br'] The basic tokenizers are also compatible with [SELFIES](https://github.com/aspuru-guzik-group/selfies) and [DeepSMILES](https://github.com/baoilleach/deepsmiles). Package installations are required. Example of SELFIES ```python import selfies smi = 'CC[N+](C)(C)Cc1ccccc1Br' sel = selfies.encoder(smi) print(f'SELFIES string: {sel}') SELFIES string: [C][C][N+][Branch1_2][epsilon][C][Branch1_3][epsilon][C][C][c][c][c][c][c][c][Ring1][Branch1_1][Br] toks = atomwise_tokenizer(sel) print(toks) >>> ['[C]', '[C]', '[N+]', '[Branch1_2]', '[epsilon]', '[C]', '[Branch1_3]', '[epsilon]', '[C]', '[C]', '[c]', '[c]', '[c]', '[c]', '[c]', '[c]', '[Ring1]', '[Branch1_1]', '[Br]'] toks = kmer_tokenizer(sel, ngram=4) print(toks) >>> ['[C][C][N+][Branch1_2]', '[C][N+][Branch1_2][epsilon]', '[N+][Branch1_2][epsilon][C]', '[Branch1_2][epsilon][C][Branch1_3]', '[epsilon][C][Branch1_3][epsilon]', '[C][Branch1_3][epsilon][C]', '[Branch1_3][epsilon][C][C]', '[epsilon][C][C][c]', '[C][C][c][c]', '[C][c][c][c]', '[c][c][c][c]', '[c][c][c][c]', '[c][c][c][c]', '[c][c][c][Ring1]', '[c][c][Ring1][Branch1_1]', '[c][Ring1][Branch1_1][Br]'] ``` Example of DeepSMILES ```python import deepsmiles converter = deepsmiles.Converter(rings=True, branches=True) smi = 'CC[N+](C)(C)Cc1ccccc1Br' deepsmi = converter.encode(smi) print(f'DeepSMILES string: {deepsmi}') >>> DeepSMILES string: CC[N+]C)C)Ccccccc6Br toks = atomwise_tokenizer(deepsmi) print(toks) >>> ['C', 'C', '[N+]', 'C', ')', 'C', ')', 'C', 'c', 'c', 'c', 'c', 'c', 'c', '6', 'Br'] toks = kmer_tokenizer(deepsmi, ngram=4) print(toks) >>> ['CC[N+]C', 'C[N+]C)', '[N+]C)C', 'C)C)', ')C)C', 'C)Cc', ')Ccc', 'Cccc', 'cccc', 'cccc', 'cccc', 'ccc6', 'cc6Br'] ``` ### Use the Pre-trained SmilesPE Tokenizer Dowbload ['SPE_ChEMBL.txt'](https://github.com/XinhaoLi74/SmilesPE/blob/master/SPE_ChEMBL.txt). ```python import codecs from SmilesPE.tokenizer import * spe_vob= codecs.open('../SPE_ChEMBL.txt') spe = SPE_Tokenizer(spe_vob) smi = 'CC[N+](C)(C)Cc1ccccc1Br' spe.tokenize(smi) >>> 'CC [N+](C) (C)C c1ccccc1 Br' ``` ### Pre-trained Models used in the Paper: See the donwload links and the instructions in MolPMoFiT [Github](https://github.com/XinhaoLi74/MolPMoFiT) ### Train a SmilesPE Tokenizer with a Custom Dataset See [train_SPE.ipynb](https://github.com/XinhaoLi74/SmilesPE/blob/master/Examples/train_SPE.ipynb) for an example of training A SPE tokenizer on ChEMBL data. ### Use SPE in Huggingface library Please see this [colab](https://colab.research.google.com/drive/1tsiTpC4i26QNdRzBHFfXIOFVToE54-9b?usp=sharing) for an example. ================================================ FILE: SPE_ChEMBL.txt ================================================ c c C C O ) c 1 c ( C ( = O) c 2 C ) cc cc c 3 C( =O) cc c( ) cc ( C) ( =O) 2 ) cc ( C 1 F ) N ( CC CC c1 ccc( 3 ) C O c n c1 cccc c2 cccc c 4 C N C(=O) N O C) )cc 1 c2 ) C 2 n 1 n c( C( C) c2 ccc( N ) [C@H] ( [C@@H] ( c2 c( = C( CC 1 c1 ) Cl ) c3 cccc N C(=O) cc 1 O CC c1 c( c1 cc c1 cc( = C S (=O) CC N( ( F) c3 ) N 1 n 2 = O c3 ccc( S(=O) (=O) c3 c( CC (=O) CC ( / C c1cccc c1 C( F) F )cc c2 cc c2 n CC C N 2 4 ) c2cccc c2) O C Cl )cc O =C( C(F) (F) c1 n c( =O) c2cccc c2 (C) C) c2 cc( C( N c1 ( [C@H] 1 =C / = N n ( [C@@H] 1 C 3 CC ) CC O CC N CC 2) 1 ) C # c1 2 CC 2 C N( c1cccc ( c( - CC (C) c3cccc c3 c( C) cc 2) N C( cc 2 ( O) c1cccc c1) [C@H] 2 C c1ccc( c( OC) C(=O) O) c3cccc c3) [C@@H] 2 n 2) nc( - C(C) C) c4 cccc )cc1 ) O = /C =C/ c3 cc C 2) CC C( c 5 3 )cc CO c1ccc( CN 1 [O-] ) C O) C(F)(F) F) cc1 ) [C@H]( O) n c2 c( N [C@@H]( O) C( O) c3 cc( CC(=O) N c s CC1 ) / C( CC 3) [N+] (=O) C S c4cccc c4 c1cc 2 c3 n c( O) 2 )cc1 CC CN nc( N CC OCC cc 3) (C) C (=O) =O) n 3 C1 ) n c1 S ) C# N) cc c1 / N )cc ( C(=O) O [C@H] 3 CCCC CCCC ( N c1 cn C 3) Br ) )cc c1 c2 c1 O=C( N cccc 1 [C@@H] 3 c2cccc ( C(C) (C)C) c1cc c2c( c4 ccc( CN 2 C = - c2ccc( - c2 C(C) =O) c( Cl) CO C(=O) )cc 2) C [C@H]( n (C) C( N) =C \ n1 ) C(C) (C) cc 3 c o S ( OCC ) CC N1 c [nH] C( = n cn C n1 C(F)(F) F)cc O C(=O) [C@H] (C) ( [O-]) C c2ccc( [C@@H] (C) c4 c( OC) c(OC) C(=O)N 1 )cc 2 CN C(=O) nc( C) 2) c1 =O) cc1 S(=O)(=O) N CC 3 C [C@@H]( C(=O) N( Cl )cc1 n n C c1 O= C1 C(=O)N 2 S(=O) ( C c2ccccc2) O CO C 4 CN (C) C1 =O c4 ) =N / C c1cc c2 cn 5 ) cccc 2 cc( - )cc 3) [N+](=O) [O-] C1 ( c1ccc2c( c1) F )cc1 C c1cc( c1cccc 2 c1cc2 c( n 3) CO c1cc( CC C) c2cc 3 cc (C) C(O) =O) C c1ccccc1) [C@] 1 n c3 - c2n c2ccc( Cl)cc [N+](=O) [O-]) O C( CCN( C(=O) CC(C) (C) [C@@] 2 CC C1 N 3 =O) c1 cccc 2) [N+] ([O-]) = C2 C# N c( F) [C@] 2 n( - s 1 O 1 CC C2 o 1 C c1ccccc1 cc c2 (C) CC c( N) [C@@] 1 CC( O) CCO C(=O) N C(=O)N C c1n c4ccccc4 ) cccc 3) c1 c2c( cn 1 F)cc 2) C c1c( CC (C)C) nc( N) C [C@H]1 O=C( O) N =C( c3cccc ( C1 CCCC = C1 C n2 = S) c2cc c3c( CC( N - n2 CCN( C c3ccc( Cl)cc c2 )cc1 Cl) c( c(=O) [nH] c3 cn C(N) =O) c3ccc( F)cc P (=O) ( C N (C) n2 )cc1 c1c( C) CCC N( c2 [nH] CCCC C2) c2c( C) CC [C@H]( 3)cc 2) c2 s O [C@H]( C [C@@H]1 OC) c1 C) cc1 C2 =O) c4 cc CC CO c2) c1 N S(=O)(=O) Cl)cc ( c12 c( =N \ c2 3) c1 nc( /N =C/ C) =O) N # cccc c1 3 )cc1 N2 CCN( N1 CCN( CC(=O) O) =C( \ OCC (=O) c1 =O (C) ( CC 4) CC2 )cc1 CCN (C) c1 - n cc c2ccc( - [C@] 3 S (C) O ( C c1cccc( CC NC(=O) N( C(=O) c2 1 c2ccc(Cl)cc 2) c1 s cc c3 CN 3 CCCC ) cc c2) F )cc1) cn 2 CC 4 C(=O)N [C@@H]( c4 cc( C 4) Cl )cc1) [N+]([O-]) =O) CC O) [C@H] 4 c( S CC S CC CN1 Cl) c1 c2 o [C@@H] 4 C(=O) OC) c2 nc( c1ccc( Cl)cc1 CCCC 1 n n1 c2ccc3c( c2) C [C@H]2 n o c2ccccc2 1 [C@@] 3 C2 ( c5 cccc CC n1 C) cc2) CC N2 c1ccc( N 3)cc 2 N) N cccc 3 cccc 1) CCCC 2) c2cc( - c( C(=O)N OC) =O) c2cc3 c( c3ccc(F)cc 3) C [C@@H]2 C2 =O /C =C\ n( C S(C) (=O)=O) cn 2) cn c1 Cl) c(Cl) c2ccc( OC) N1 ( c2cc cn c2 c3c( cc c1) O) cc1 ( - c1ccc( - c2ccccc2 )cc1 cn c3 c3ccc(Cl)cc 3) c(C) c1 n [nH] )cc 3 OC) cc( CCOCC 2) N c1n c2 =O) = C(C) CO c1cccc( O C(C)=O) CC C(N c2ccc( F)cc2) c2cc n F) c( c2 nc(- P(=O) (O) N( C( CO c1ccccc1 N C CCCC C3) CCN( CC) n cc1 OCC O c2) =O) s 2) O [C@@H]( n 4 C(= S) /C =N/ CC C(=O)N O) c( CCOCC 3) N (C)C) C1 2 C2 )cc1 c2 n1 CCCC 3) CC = nc( S CN( C(=O) N( C n2) c1 c1ccc( OC) C =C =O) =O) [nH] 1 /C( =C/ c3ccc( OC) CC [C@@H]( /C( =C\ cc 4 CN C( O c1ccc( c1 [nH] C1 =O) c3ccc( C) c2cc (C) CCC 3 c5cccc c5 c2 cc1 CC2) c1 O) c(O) CCCC 2 c1cc (C) c1ccc( O C2 =N cc2 )cc1 F)cc 2 c2n ( C1 CCN( c3cc 4 c2cccc c12 cc 4) N) =O) [C@]1 2 NC(=O) [C@H]( c1 (C) C1 CN( C c3ccccc3) c1 nc(- OC)c(OC) c(OC) [C@] 4 Cl)cc 2) N2 C(=O) C3 =O) C) c1 c2cccc 3 c2c( - N c1ccc( CCCC C1 C( O S(=O) (N CC(=O) O cc c3) CC C(C) C S(=O)(=O) c1c( O) c3 [nH] OCC) =O) OC) c( C(=O) OC [C@@H]1 O c2n cn c 6 s 1) C1 =C( CCCN 2 C [C@@H](O) CCC 2) (=O) =O o c( ( O N( CC) c(=O) n( n n2 /C =C c2c( =O) C [C@H](O) -c2 cn c2c( cccc2) c3c( C) c1 O - c2ccccc2) - c1ccc( NC( =S) C(C) C CCO CC1 =C( / C(=O)N C c3c( cccc3) ( C(F)(F)F) [n+] 1 CC1 (C) cc( O) - c2cc( N 4 O 2) C( OC)=O) CCCC (=O) - 2 cn 3) c3ccc( - = N) CC # c2) CC1 [C@@H](O) [C@H](O) C(=O)N [C@H]( c( NC(=O) c1cc2c( cc1 c2 nc(N C c1cn C2 CC2) O 2 C(=O)N 3 N1 C(=O) [C@@H]( CO) c1ccc( Cl)cc1) c3 s /C( =N\ C =C( S(=O)(=O) N2 CCO c1ccc( /C =C2 (F) (F) [C@H]1 O CC [C@H]2 cs 1 c2ccc(OC) cc2) c3 o C1 C( n c2) c2cc co cs 2) c2c( O) F)cc ( [C@]2 (C) [C@@] 4 [C@@]2 (C) O[C@H]( CO) c1c( Cl) c2cc( Cl)cc Cl)cc 2 CCCC ( ccc( - C1 N( c1 C c2cccc( Cl) Cc1cc (C) C1 = cn 1) CC(=O) N1 n1 c( c1ccc( F)cc1 cccc 4) c1 2) S( N) CN1 CCN( CCC( O) C3 CC3) c3cc c4c( c1c( OC) CC C(=O)O) C(=O) OCC) Br )cc n cc( c1 o n n2) O CCCC - c1ccccc1 C2 =C( C2 CCCCC2) C1 (=O) c2ccc( C)cc2) c1cc2c( cc1) /C =C(\ nc2 c1 c3cc n c2ccc( O) C1 =N S 1 C) cc1) ( CC) C(=O) C( CCN 3 c2ccc(Cl)cc 2 cn c2 C c2ccccc2 C [C@H]3 n cc2 CS c1n CO C( c3ccc(OC) cc3) c3 c2 c2c( Cl) CCC 3) [C@@] (C)( Br )cc1 C(F)(F) F CC c1ccc( [C@@H]( N) C(C) (C)C c(N 3 c(=O) c( C2 CCN( c3cccc c23) C(=O) OCC S(N) (=O)=O) s c1 o 2) C( OCC)=O) / C(C) c1 nc(N N C(N CN1 C(=O) S (N CN ) Cl)cc c1 C( c2ccccc2) CO c1cc c2ccn cc2) OCO 2 OCO 3) c(OC) c1 c( O c3 4) c4 n OC)c1 OC 4 )cc I ) c1) =O n 4) C [C@@H]3 c4 cn c5 c( N) N) CCCC C1) OCC(=O) N - c2ccccc2 [C@@H]( NC(=O) N2 CCOCC2) N1 CCC( C( CC) CC2) =O) n2 c( CCO CC1) C# C [nH] c(=O) C= C1 C(=O)N (C) CN( C O) cc1) [C@] (C)( O) c1 cc2 c( F) c(F) [C@@]1 2 cccc ( c2ccc( C) c2 c[nH] s 3) C(=O)N ) N( CC n c12 N CC c2n cc CCN (C)C) n(C) c(=O) c3ccccc3 2) c1 =O) c3n cn O C1 c3ccc(C) cc3) P (O) ccc( OC) /C( =N/ c3cc (C) c( C#N) c3ccc( O) o c(- C(F)(F) F)cc1 C2 =O)cc1 c1ccc( F)cc1) ( CC C(=O)N CC c2ccco 2) O c2ccc( C(= N)N =O) cc2) c3cc4 c( C(C) ( c2 C) CCCCCCCC CCCC Br )cc2) =N /N C( c1ccccc1) C c2c( O [C@H]1 [C@]3 (C) 4 )cc3) C1 CC C =O) [N+] (C) CC [C@@H]1 c3 2) c1( N C( N1 c4ccc( F)cc c2) n1 C(=O) ( C2 = O [C@@H]1 c1ccc( NC(=O) C c2cc( c( Br) n c3) c3ccc4c( c3) c2n c3c( 5 )cc CC [C@H]1 - c1n c2n cc( c(Cl) c1 c2cccc n2) c1c( F) C1CCCC C1 =O) ccc1 N C(C)=O) c5 ccc( n1 cn [n+] 2 n c1) O 3) c2cc cs2) c1cc( - [n+] ( c3cc cn C(F)(F)F)cc 2) CC1 ( [C@]2 ( c3cc( Cl)cc O= [N+]([O-]) CC(=O) N( c1 c[nH] cc c4 c2ccccc2) c1 cs 1) c2ccc( O CN (C)C) [nH] 2) cc( OC) c3cc co c3 n( n c4 C1 CC1 CC(O) =O) O 1) c1cc( OC) 3 C) nc(N 3 C( N2 c1cc co 2 )ccc1 /N =C(\ CCC2 ( n c2c( CC( CC( CC [C@@H]2 c3ccccc3 )cc2) OC 3 [C@] ( CCN( C( CCCC 1) C 5 CC [C@H]3 c4ccc( Cl)cc c( C(N c2ccccc2) CC1 OCC ( C(=O) /C=C/ [C@@] ( 4 CCCC cs 3) n1 cc( N=C( N) /C=C2 \ / C1 c2c( F) =C1 \ c3c( =O) c3 c4c( )cc c2 c1c( N Cl) c3) nc( O) c2n c3ccccc3 C( C(=O)N C n3 C(F)(F)F) c1 c3ccn cc3) C(= C) c1cc cnc1 c1cccc2 c1 [N+](=O)[O-] )cc1 c1c( - CC(=O) N2 O) =O) c2ccc( N O (C) CC) =O) ccc( C) c3cccc 4 [C@]1 (C) CC= C) CCCC C3 n2 cn S(=O) (C) C(C) =O O= S(=O)( c(=O) o c2 nc(C) N [C@@H]( CC c1n CC 3)cc c2) C1 O C2 = C) c2cccc (F) CCC 4 n n( [nH] c( c2c( c1) n1 2 [C@H]( NC(=O) c1c( =O) c1cc( N c3ccco 3) C( C( P(=O)(O) O) CC(C) ( c2cc3c( cc2) c3cccc n3) [C@@]1 (C) C[C@H]( NC(=O) ccc( F) CCCC C2 CCOCC 4) CCN1 C(=O) CC C1) S C) c(C) c( C1CCCC C1) c5 ) CO c1c( c2c( OC) c1cccc (F) C1 CC( OC(=O) C) [C@@]2 ( c4cccc ( C3 CCCCC3) n nc1 c1ccc(OC) cc1) (O) ( cccc 4 c2c( cc( s c( n o1 CS c2n [C@H]( CO) CO c1cc2 c3n cc c2cccc c12) C [C@@H](C) c(F) c1 C /C=C\ CC S(=O)(=O) CC OC CN (C)C c1cn ( CO C) = CC(=O) [C@@H]1 2 CC2 ( (C) c1 CC [C@]4 nc( NC(=O) n1 ( C(=O)N c2ccc( c3 =O) CCCN C(=O) c1 (=O) Cc1 c[nH] c1n ( c1n cc( c2cc( OC) cn c1) CCCC N CCN (C)CC c(- c3ccc( [C@]1 ( ccc( N o 1) N1 CC c(- c2ccc( CCCC CC [C@@H] 5 6 ) F)cc (F) c1( OC) c1cc( O) C( c1ccc( NC(=S) N c3) =O) [C@@]3 (C) c( OCC C(=O)N c1ccc( OCO 4) O C(C)(C)C) F)cc c1 c2cccn c2) =C 3 O [C@@H]2 c2 )ccc1 C c3ccccc3 c3ccc(Cl)cc 3 c1 cs c -2 C(=O)O) cc1 c2ccccc2 c1 c2 3)cc1 C2) c1 OC)cc( OC) -c2 nc( n2) CC1 c2ccccc2 n1 [C@@]1 ( n cc1) c3cc( - c1ccc(OC) cc1 C3 ( 4) CC3) CCCN 3 c1) =O) Cn1 cc( n n1) c1n cc F C(F)(F) C#N )cc N c1ccccc1 C1 (C) P(=O) ( c3c( Cl) [C@H]1 2 C(N) =N c(- c3ccccc3) [nH] 3) c2cccc( - c1( - c2ccccc2) =O) C( NC(=O) /C=C(\ C) c(=O) n1 c3cccn c3) c2cccc( C(F)(F)F) C(N C( O C(C)(C) C(=O)N C( C(F)(F)F)cc 3) CC C(=O) CO c2ccc( n(- c2ccccc2) OC c1ccccc1) C( N( CC [C@@H]3 3) c1 )cc( OC)c1OC C c2n [N+] ( P ( c3 nc( CCC n1 C(F) F) CN2 C(=O) S(=O)(=O)N 3 c(O) c1 s c2 [C@H]( N) 3 c( c1( Cl) [C@H](O) [C@H](O) =O) ccc( n cc3 cc cn CCCC 3 /C=C/ C(=O) c3cc cs3) c2ccccc2 Cl) c1n c2c( CN2 CCN( c(- c2ccccc2) CC (F)(F) C(=S) N c3c( F) N) ncn Cl c1ccc( )cc 2)cc1 o c(=O) c5ccccc5 ) O [C@H]2 c1cccc( Cl)c1 [N+](=O)[O-]) c1 N2 CCC( C1 C2 CCC [C@H]1 S 2 [C@H]1 CC CN( C( c3ccc( Br C(= N)N) cn ( Br )cc1) Cl) c1) N1 CCOCC1 Cn1 cn C(=O) C) C(N [C@H]( c1cc( Cl)ccc1 (O) =O) c1ccc( OCC nc2 1 CO c1ccc2c(c1) c2cc c3ccccc3 CN(C) C(=O) C4 CC4) CC1) =O c3ccc( O c3 C) c1 (F) O= S( C( CCCC CC2 )ccc1 [C@@H]( NC( c3ccccc3) CC2) c2ccc( N3 O=C(N c1ccc( cc3 c( C) =O CC1 =C( c2 c1) n3 c( (=O) o =O) C c2ccc(O) cc2) cc( N O=C( /C=C/ C(=O)N O) c2cc( O) c( NC( CC c1ccccc1) C 5) C[C@H]( N) [S+] ([O-]) c2 =O)cc1 =O) CC1 OC) cc2) C= C(C) c2 cs /C =C(/ c3 nc(- c2cc( F)cc c3 )cc2) C2 CCCC2) o 3) C(O) ( Cc1ccc( NC(=O) C2 3 S(=O)(=O) c1ccc( C1 =C(C) C(N [C@@H]( C(F) ( o 2)cc1 c2ccc3c(c2) OCO3) n2 C) c2cccc(Cl) c2) n1 C c2n cccc2 [C@@H]1 CC [nH] 1) n2 )ccc1 c3cc cnc3 = S Br )cc( c2ccccc2 F) c2ccc( F)cc2 c2ccc( Br)cc2) - c1c( Cc2ccc( - c2ccc( OCC /C =N/N C(C)C) =O) C(F)(F) F)cc1) P(=O)(O) O c1cc ncc1 c(O) c( # N) CCN C( 3)cc ( n n3 c - CC2) C1 c3cc( OC) Cn1 c(=O) CC2) CC1 C [C@H](C) C1 CC1) OC [C@H]1 = [N+] (C) (O) [C@H] 5 1 C C#N )cc1 c1( O) c1c2c( ccc1) [n+] ([O-]) [N+](=O)[O-] )cc2) nn 3) / c( CN 4 c4ccc(F)cc 4) F c1ccc( s 2)cc1 Cc1 nc( c( N2 C [C@]12 OCC 2 c1cc c2[nH] O) cc2) [C@H]2 O) c2ccc( NC(=O) c1) N c4 [nH] =O) cc( c1 nc(N) CC OC) C= C2 3)cc 2)cc1 N(C) C(=O) c3c( O) c1cccc n1 (=O) N nc2 n1 c1cc( -c2ccc( c1cc2 cccc C( C Br )cc2 c1c( -c2ccc( C( \ O=C1 N c2cc3c( cc2 C( Cl) c1 nc(C) C2 CCCC CC 5) c3ccc(O) cc3) c3c( - c1 N c1ccc( -c2ccc( c3c( cc( CC c1ccccc1 /C=N/ NC(=O) n( -c2ccc( c3ccc(F)cc 3 ccc( Cl) CO c1cc2c(cc1 C(F)(F)F)cc (C(F)(F)F) C1 O c2cccc (C) c1cc( NC(=O) C c3ccc( C( C#N) C(=O)O C(C)(C)C) C(=O) c1ccccc1 CC(C)(C) O c3cccc (F) CCCCCCCC CCCCCCCC c2 F) CC= C(C)C) O c1c( C( OC) CC c2ccccc2) c2 )cc( =O) cc2 c4ccccc4 3) Br) c1 (F) F C(N) =O n1 c2c( =C( O) n nc2 c4cccc c34) /C=C /C Cc2ccc( F)cc2) c1cccc n1) O= c1[nH] n(- c3ccccc3) C( C(N [C@H]3 CC O[C@H](CO) [C@@H](O) CCCC C) - c2cc c1ccc( C)cc1) ncn 2 c(=O) n2 c2cccc( OC) nc1 - c2cc3 cccc C4 CCCC =O) cc1) c( C(F)(F)F) O C(C)C) c3 c2) o c1 c2ccccc2 1) c3 )cc c2 c3ccccc3 C(F)(F)F)cc 2 c2n (C) 2) CC1 s c2c1 CCC3 ( c1n cn2 )cc1) =O c4c( cccc4) /C(C) =C/ COc1ccc( NC(=O) [C@H]1 ( [C@@] 5 Cl)cc( Cl) = N1 c3 )cc2 O= S(=O)(N C1CCCC 1 c1ccccc1 - n2 cc( c1( -c2ccc( S CC(=O)N [N+](=O)[O-] )cc N# Cc1ccc( c12c( cccc1) OC c2ccccc2) C(N 3 C1 C c2 Cl) c1ccc( -c2n CCCN( C(=O) c1ccc( -n2 CN2 CCC( c1c( NC(=O) CN C N (C)C c3n c4ccccc4 c(=O)[nH] c1=O [C@H](O) [C@@H](O) =O) C1 c3 c[nH] C(=O) C1 c3cccc( Cl)c3) -c2 o N [C@H]( OC c1ccccc1 O CC1 n1 c(- c3cc( F)cc -c2 cs c1ccc( O)cc1 O[C@H](CO) [C@@H](O)[C@H](O) C#N )cc2) )cc (C) c4ccc(Cl)cc 4) [C@@]2 3 ccc( O) C c1ccc2c(c1) Cc1cc co CC( NC(=O) Cc2ccc( Cl)cc2) c2 O) c3n cc( C(=O) C2 c1cc cs1 CCCN (C)C) n c3c( [C@]2 3 C1 CO nc1 N C(=O) C CCN(C) CC3) c2n cnc3 CN1 CCC( c2cc( OC)c(OC)c(OC) c( C(=O)O) c2 nc(N) [n+] (C) C3 =N c2cc( NC(=O) cc2 1 c(C) cc1 CCN1 CCN( C2) C1 c2cc c1 S(=O)(C) =O) C= C) C(=O)N1 CCN( n2 cc [C@@H]1 ( c3ccccc3 )cc2 CN S(=O)(=O) c4ccc( OC) C( CO) (C) (C) C( /C=C/ cc2) c1 c2 N CCCN1 C(=O) c1) C =C1 / OCC (O) )cc1 2 c1ccc( N2 C) c( c2c( N cc2 c1 N c1nc( [C@]3 ( [C@@H]3 CC n3 cn N=C( N)N o n1 C(=O)N (C)C) cc n1 [C@@H]( OC(C)=O) S(=O)(=O) O) c3 nc(N cn 3 c2cc( OC)c(OC) N( CC(=O)N C3 CCCC3) [nH] c(- cc c4) CC(C) = O c1ccccc1 Cc1ccccc1) NC(=O) c2c( c1 [C@@H]2 CC c3c( OC) C2 =O)c1 C1 CCC( C( S c4ccc( C) OC) cc1 C c2cn n n(C) CC( N) c3ccc( N4 C(C)C) cc1 c1n cn CCC O) c1ccccc1) c1ccccc1 Cl)c(Cl) c1 n1 cc 3) CC1 C( C(=O)O) ncn 1 CN1 CC c1ccco 1) C12 CC3 [C@@H]( N N2 CCCCC2) C3 =C( c3cc( O) [C@H] (C)C c12 ccccc1 c3cc( OC)c(OC) 2 )cc( C(=O)N2 CCN( [C@H]( O c2cccc3 cccc N1 CCCC1 c(S CC(=O)N CCOCC 2)cc1 c12 cc( [C@@H]( O C2) CC1 =O) [nH] c3ccc(Br )cc3) c(=O) n(C) C( CO C(= N) c3ccccc3 Cl) c( C(=O) cc c2c1 c4cc cn N c1cc( Cc1 o OCO 2) c1ccco 1 Cc1c( C) c3ccccc3) =O) CC 3)cc2 = [N-] c1cc cnc1) C(C)(C)C) =O) c( CC) c1cccc(F) c1 CC 5 4 CCOCC4) CCN(C) CC2) 3)cc c1 c3 nc(C) [C@@]3 ( c2ccccc2 )cc1) [C@@H]( CC CCC2( CC1) C(F)(F)F) c3) c1cc cs1) [C@H]( OC(C)=O) c2cc( N P(O) (=O) =C( N) /N =N/ n nc( CO 2 C(=O)N( C c1ccc( C)cc1 CC# N) O c3ccc( cccc c12 S(=O)(=O) C) c1( NC( CC[C@]4 (C) CCCC N) [nH] c1 CCCC NC(=O) ( c2ccccc2) nc1 C c1cccc (C)c1 c1c( N) cc c12 (C)C) =O) C( CC c4ccccc4 )cc3) n2 C CCN CC3) c2c( N) CCCC O CCCC C c3n cccc3 c1cc( C(=O)N Cl)cc 3) s c3 c2cc1 OC c(C) c(C) /C=C2 / c1cc ncc1) c2nc(N 3 c3ccc( OCC c2c(Cl) cccc2 2 )cc1) O C(C) O= c1 4) =O) [nH] c2 C(=O)N1 CCC( [C@] 5 [C@@H]( CO C3 = c3ccccc3 2)cc1 c4ccccc4) CC3) CC( N)=O) c2cc( Cl)c( c2cccc (N [C@H]3 O) c2n c1 )cc2 c( N =C1 CC2) n1 c5 cc cc2) =O) c1cccc( NC(=O) OCCO 2 c1cc( Cl)c( [C@H]1 CC[C@H]( n2) C1 C /C=C/ c( Cl)cc1 ccc( Cl)c1 C(C) =C( C /C(=N\ C1) C2 c1ccc( Br)cc1 CCC S NC( N) CN( CC) 3)cc c2 C(N C )cc c3 c(OC) cc1 c( C nc(- c3ccccc3) O O c4ccc( O) C4 =O) S) =N cc1) =O =[N+] =[N-] C /C(=C\ /N =C(/ N2 CC O c2c( CCCN ) 2) C1 Cn1 cc c3ccccc3 F) c1ccc( C(=O)N N =C2 nc(S CC(=O)N CCCC N1 C1CCCC 1) c1c(Cl) cccc1 = C(C)C) n3 C) c1cc(OC) ccc1 C(=O) (N F) c(Cl) c3cccc c23)cc1 c3cccc (C) c1ccc( OC n1 - c1c(OC) ccc( Cc1ccc( O)cc1) c4cc ncc cccc c4 c( =S) N(CC) CC) n n(- [C@@H]( C c1n c2ccccc2 c2 1) c1cccc( N 4 )cc3 c(=O)[nH] c(=O) c2ccc( Cl)c(Cl) c1c( NC( O[C@H]( CO n3 cc Cc1ccc( Cl)cc1 =O) =O c1n cccc1 ( Cl) / C2 o c(C) c1cc(C) ccc1 c1( NC(=O) CC2 CC2) n 3)cc2) CCO 1 c2cccc (O) Cc1 nc(- c2c(F) cccc2 c1cc( -c2n N2 CCCC C( OCC /C=C 3 CC1 2 C(O) =O C[C@H]1 CN( = N)N [C@H]( CC C) ccc1 c2 c1cccc2 c1cn cc( c1c( Cl)cc( C3 CCN( c2cccc(F) c2) c3ccc4c(c3) OCO4) N C1 CCO c1ccccc1 N1 CCN(C(=O) F)cc 3) Cc1 cs c- 3 n2cn c3c( N2 CCCC2) [C@@]4 (C) - c2c( c3cc( OC)c(OC)c(OC) c1c(Cl) ccc( n( CC) c1( C(N c5 cc( C( c1cc( [C@@H]( OC(=O) [C@@H](O) [C@@H](O) (=O)=O) cc1 NC( =N) c1c( C(=O)N [C@@H] (C)C cn c(N CN CC N2 CCN(C(=O) N C1=O [C@@]1 (O) c2) ccc( ccc2 1 S C NC(=O) C( CC3) =O) c2cc( C(F)(F)F)cc )cc n1 [N+] 1 NC(=O) c1ccc( CC [C@H](C) c4cc 5 c3cc4c( cc3 N1 CCCCC1 CO 1 c1cc( F)ccc1 CC(=O)N c1ccc( CC 3)cc2) N CCCC CO c1ccc2c( -c2 [nH] c2cc(Cl)cc c2 c2ccc( C(F)(F)F)cc2) COc1cc2c(cc1 OC) COc1ccc( -c2n o c2c1 =C( N)N) cc2 )ccc1 C3) C2 C1 =C S(=O)(=O) N( CO C CC [C@]3(C) COc1ccc( C2 C( c3ccccc3) c3ccc( N CC1 CC1) )cc( - n1 c(N CC(C) (C)C) n 3)cc c2c( Cl)cc( c1c(F) cccc1 N2 C [C@H](C) CO) -c2 nc(- [N+](=O)[O-] )cc1) /C=C/ C(=O)N C c1cc2 CC(C) C c1( -c2n CN C(=O)N C(=O)O 1 C c1cccc2 n n2)cc1 c2cccc(C(F)(F)F) c2) c3cc c4ccccc4 F C(F)( n2 c(=O) S(=O)(=O) c2ccc( Cc1cn ( ccc(F) c1 O [C@@H]3 c3cccc(F) c3) CC N) c2o ccc2) CCC 4) C#N )cc3) Cc1ccc( C(=O)N OC[C@H]1 O[C@@H]( c( C( n c4) C [n+]1 F)cc 2)cc1 [C@H](O) [C@@H]1O ccc( NC(=O) CC c1cc( c3ccc( C(F)(F)F)cc3) -c2 s [C@] (O)( c1n (C) CS ) c(F) c2) C(O) C(O) CC1 CCCCC1) n1 c(=O) C) C N1 CCOCC1) [C@@] (O)( CC1 =O c1ccc2c(c1) OCO2 CCN CC2) Cn2 cn [C@@]2 (O) c3 cs Cn1 c( c4 c(C) c2cccc 3) c2cc (=O) c2 C1 c(=O)c( C(=O)O) Cc2ccccc2) CC1 /C1 =C\ n c2ccccc12 CCCC CC) c1c2c( cc( c2c( n1) c1( C( Cc1ccc( F)cc1) n2 )cc( cccc1 2 [N+](=O)[O-] )cc2 c1c( C(N c2[nH] 1 C(=O) c2ccccc2 CO C(=O)N CCN( CC Cc1ccc( F)cc1 n nc(- C1 CCCN( C[C@@H]( N) CCN CC1 [C@@H]1 C C2) =O) [C@]1 (O) /C(=N/ O) cs 2)cc1 c3cccc( OC) C( =C/ C(=O) CC [C@H]( OC(=O) ccc( NC( c2n nc( c1 3) cccc 2)c1 C(F)(F)F)cc ( OCCO 3) Cc1 s c1n nc( c1cc( Cl)cc c4 3) [C@@]2 1C ncn 3) c2ccc( [N+](=O)[O-])cc2) CCN(C) CC1 c2ccc( C(=O)N CC1) =O) c1cc( NC( c1cc( Cl) CC2 )cc( C Cl) [nH] c3 c3ccccc3 )cc cc( NC(=O) c2cc( C(=O)N c1ccc( O)cc1) [N+](C) (C)C) C1 CC2 C( CN cn 2)cc1 cc2 C) CC3 ( Cc1ccc( N C1) =O c1cc( Cl)cc( c(=O)[nH] 1 c( OC o 2)c1 c1c( -c2n COc1ccc( N /C =C1\ c2cccc3cccc c23) N c1cccc( c1c( O Br )ccc1 N = OC)c(OC) c1 CC(F)(F) F) CC2 CCCCC2) cc( C(=O)N c3c(F) cccc3 CC( C(=O)N /N = NC(=O) c1ccccc1 c2c[nH] c3ccccc23) c(N C s 2)c1 [C@H]1 CC[C@H]2 c2 )cc1) N c1cc C23 CC4 c12 cccc [N+](C) (C) [C@]2 (O) c3cc(Cl)cc c3 C(C)(C)C) cc1 cc n2) c1n nc(- c2cc s [C@H]1 C N3 CCOCC3) OCC N F)cc 3 c2ccc( OC Cc1ccc( -c2ccccc2 C1 CCN(C(=O) cc(C) c1 c2ccccc2) C1 c3ccc( [N+](=O)[O-] C(C)C) c1 Cc1c[nH] c2ccccc12) c1ccc( Br)cc1) c4ccc( - C3 )cc1 (F) F) CC3 CC3) nn n2 Cc1ccc( S(=O)(=O)N Cc1ccc( - N) c1 c1( N) nn n1 ccc( O cc 3)cc Cc1ccc( Cl)cc1) COc1ccc( -n2 c1ccc( Cl) c2cc( F) [nH] c2c1 CCO C( n2 c1 c1ccc( C) c1ccc( [N+](=O)[O-])cc1 C1=C( O) C(F)(F)F) c1) CC c3ccccc3) Cc1ccc( -n2 [C@]4 (C) c3cc4c( cc3) [C@@H]2 [C@@H]( c3n c4c( [C@H]( C c2cc c3[nH] n 5 NC(=O) CS nc2 N c(C) c2) S1 (=O) CCCC CCC C( =N C1 (C)C c1 c2cccc OC)c(OC) c3) CC(CC( C3) c( [N+](=O)[O-]) [C@H]( N [C@H]( C(=O)O) c2cccc( [N+](=O)[O-]) ccc1 O c(C) n1 CCC(O) =O) CC C1( c1cc( O COc1ccc( C(=O)N [C@@H]2 O c4cc c5c( CCO C1 B (O) C2 C( N# Cc1c( CCO c1cc( c1ccccc1) =O CC(=O)N [C@@H]( 5)cc 4) c3n (C) NC(=O) CO c3cccc( Cl) S(=O)( N)=O) C(=O)O) c1 c2s ccc2) c1c( C( c2ccccc2) n1 N3 CCN( c3ccc( C(=O)N c1 co C(N CC # N OCC N( =O) CC2) n2 c3c( c1(Cl) ccc( N(CC) CC c3 F) COc1ccc( - CCCC(=O) N F)cc 2)c1 c4cc co c2ccc( OC)c(OC) c1( C(=O)N C(C)(C) O) Cc1cc( N c2 =O N1 C =C( N c1ccccc1 2 nc2 cc1 c1s c( S(C) (=O)=O nc( Cl) cn 4) OCC CO ncn 2) CC( O c4 s c(O) cc1 O=C( CSc1n CC [C@H](O) C(=O)O) cc2) c4cc( Cl)cc [nH] 2)cc1 c1ccccc1 Cl) Cl)c(Cl) c3) c2 =O)c1 c6 cccc [C@H]( CC(C)C) =O) C) C(=O)N( CC) CCCC 4) c2nc(- c3ccccc3) C2 C3 c2cc( Br)cc (- c3ccccc3) c12 ccc( CC [C@@H](C) nc2 )cc1 CC1 = (F)(F) F) C(F)(F)F)cc c1 c12c( cc( C(=O) c1c( CCC [C@H]2 C(=O)N[C@@H]( Cc1ccccc1) CCC =C(C)C) =[N+]=[N-] ) =C 4 n1 c(C) c1cc( -n2 Cn2 cc n2) ccc( c2s c( OCO 4 C [C@H]4 c2ccc(Cl)cc2 Cl) OC c3ccccc3) c2cc( Cl) Cl)c(Cl) c1) [C@@]1 (C)CC /N =C2 c(- c2n [C@H]( C(C)C) c1c2c( ccc1 c2cn ( n1 (C) Cl)cc 3 c2ccccc2 OC) c2c(- c3ccccc3) /C( C#N) no 2) S(=O)(=O) c1ccccc1 nc2 C) c2s c3c( =C /C c3cc(OC)c(OC)c(OC) c3) cc1 - s 2 c4ccc(OC) cc4) CC(O) ( Cc1 [nH] c1( O C(=O)N1 CC n( CC(=O)N n ccc1 C S(=O)(=O)N )cc 4) c4 ncc [C@] (C)(O) C) C) C2) n1 Cc2ccc( OC)cc2) c2cccn c2 CCN (C)C c1( -n2 N c1c( CCN2 C(=O) C [C@@H]4 [nH] 2 [C@H]( NC( c1ccc( NC( OCC (N COc1ccc( Cl)cc1 CC1 (C)C c4 c3 CN2 CC c4cccc n4) NC(=O) [C@@H]1 Cc1ccco 1) C(=O)N2 CC c2c( ccc( cc 5) cc1 2 c1cc (=O) NC(=O) C CCn1 cc( c1cccc( Cl)c1) C2 )ccc1 C2 CCN(C c1cc( OC)c(OC)c(OC) 4 )cc2) c1c( -n2 c3cccc( C(F)(F)F)c3) O=C1 c2ccccc2 C[C@H]( N ( N) CCCC (C) c2ccc( Cl) [N+](=O)[O-] )cc( CCOCC 2)c1 CC C(C)(C) n2 )cc1) Cn2 c(=O) c2ncn ( CC(C) N c(=O) c1 [C@]3 4 C c3c( )cc c1) C1 (O) CCC) =O) c2ccccc2 )ccc1 C(=O)N C(=O) C( OCC) C /C( c1cn c( [C@@H]1 O) CC3 )cc1 c1nc(- c2ccccc2) c1cn 2 /N =C1\ CCC [C@@H]( Nc1n cn C= CC( c1c(F) ccc( c1ccc( C2 [C@@H]( OC) Cc1n o c3) CC2) OC [C@@H]1 Cl)cc2 Cl) C(N) =N) cc 3)cc2) [C@H]2 [C@H]( cccc 5) -c2ccc( - OCC OCC C3 =O N2 CCN(C CO c1cccc2 S) N ( CO) c1ccccc1 Cl c1cc c2ccccc2c1 n cc(- ccc(OC) cc1 nc2 - CC( C(=O)O) c2ccc(OC) cc2 c3cc o C(N C)=O) CCCC CCN CC(C) (O) c2cc3cccc c3 =O) o -c2 c3c( (C)C) cc1 OCO 3 Cc1ccc( O CC( c2ccccc2) CC4 )cc3) n cc2) CCCCCCCC ) c1cc2 cc( (=O)=O) c1 n2 ccc( cc3) =O) c1c( Br) cc2c( c1) c1ccccc1) =O) N(CC Cl) N( c2ccccc2) Cc1ccc( -c2n 5 CCCC O c1cc( 3)cc 2)c1 Nc1n cc( c2 C)cc1 N( Cc2ccccc2) CC )cc1 c2c( C#N) CC1 CCN( OC)c(OC) cc1 c1cccc( O o n2) ncn c(N c1cn ccc1 C1 N(C( [C@@] (C)(O) NC(=O) [C@@H]( c3 c4ccccc4 c1c( Cl)c( /C=C/ C( c2ccn cc2 [C@H]( OC) n c2ccccc21 5 CCOCC c4 cc(C) Br) c2) n c3ccccc3 cnc2 c1ncn2 CCCC N( CC) c1 cn n2 =O) c( c1( C2 O C(C)(C)C n(C c3ccccc3) c(C) c3) nc( C(=O)N CC2 )cc1) c2cc( N3 c2c(Cl) cccc2) c2ccc( C#N)cc2) O 4) c1cccc 2) CC C(=O)O c4ccc(C) cc4) cn2) CC1 OCC 3 (- c2ccccc2) N1 CCCC c1cn c(N c4) CC3) c [n+]( Cc1ccc( S(=O)(=O) c4cccc( Cl) c3cc s CC1 =N c[nH] 1 c(=O) n(C)c(=O) cc(- c3ccccc3) OCC(=O) O) c1(OC) ccc( CC C(C)C) c3cc( Cl)c( c1- c1ccccc1 C#N) c1 c2ccc3ccccc3 c2) CC(CC( C4) C) cc( c1cccc(F) c1) c3ccc( C#N)cc3) CN(C) c1ccc( ( c1ccccc1) c1cc( C(N c1c( OC)cc( CC1 CCCO CCC [C@@H]1 nn1 2 Cn2 cc( (=O) O [C@]12 C [C@H]2 O c1cc( C( c(OC) c2) CCC(N 3 c3ccc(OC) cc3 C2) C3) NC(=O) C2 c2cn ccc2) C/C=C\ C/C=C\ c4 o NC( =N)N [C@H]( CC) c2cc( O C(=O) CS CCCC C2)cc1 O=C( COC(=O) c1ccc( O) c2c(C) cc(C) C(=O)N2 CCC( ncn c3 c3ccccc3 OC) [C@@H]4 [C@@]5 [C@H]2 CC[C@H]( /N =C\ [C@@H]2 O) c3cccc( - N( C(C)=O) c3ccccc3 - c3cc(OC)c(OC) cc3 c2ccccc2 c1=O S C(=S) c1ccccc1) N CCCN (C) CCS C) nc(C) c1 NC(=O) C) C(=O)N C(=O)N c1c(C) ccc( c2n cccc2) c(N 4 C(=O)N1 CCC[C@H]1 CC(=O)N 3 CN3 CCN( c2ccc(O) cc2 CCC( CC) n2) n1 [C@@H](C) O) NC(=O) /C=C/ =O) n( [n+] 3 C#N )cc2 F C( c2cc( Cl)cc(Cl) N C1=N c3c( N c2c1 =O CCC #N) c1 O) C c4ccccc4) c2cccc3 c2 N( Cc1ccccc1) c6cccc c6 ccc( OC)c1 O C1=O nc( C(F)(F)F) C(=O)N C) c2 co c( F)cc1 c3o ccc3) O 2)cc1 c1cccc2 ccccc12 N=C( N c1ccccc1 O N1 CCN(C n s nc2 3) c4 c3) CO C(C)=O) CC3 CC( c3ccc( Cl) CCCN( CCC) c1cc( F)c( [C@]3 (C)CC c(C) cc( c1cccc( C(F)(F)F)c1 c1ccc( CN C(=O) /C(=C/ c3ccc(O) cc3 c1cccc( - cc n3) - c1cc c1c( -c2ccccc2) S C1 CO [C@@H]1 c1c(C) cccc1 O= S1(=O) c1cccc( -c2n c(C) cc(C) cc 5 c( CN c3c(Cl) cccc3 COc1ccc( OC)c( C(=O)N [C@@H](C) c3cc( C(F)(F)F)cc n1) =O =C( C#N) =O) cccc1 CO [C@H]1 [C@H]( Cc2ccccc2) c(=O) n3 CCCN( C CC(C) (C)C c2c(OC) cccc2) CN(C) S(=O)(=O) s c(- c1cccc (C) Cc2ccc( O)cc2) c1ccc( O)c(O) [N+](=O)[O-] )ccc1 c(=O)[nH] c2=O) [C@@]3 (C)CC nc3 2) CO CCO c2cc c3n [C@H]( Cc1ccccc1) CC [C@@H](O) c3ccc( Cl)c(Cl)c3) c2cc(C) ccc2 c2ccccc2 C) [C@@]2 1 -c2 nc(N [C@@]3 4 /C =N\ C =C\ nc( OC) CCC( N) c2ccc(Cl)cc 2)cc1 c1( S [nH] n1 ccccc1 2) c1 c-2 CN( CC CO c1cc2c( c2cc(OC)c(OC)c(OC) c2) CCCN2 C(=O) C2 CC C(N O)=O) C(=O)N [C@H]1 CCCN1 CCN( c2cccc( O C2 (C) CCCO 2) cc3 C) [C@@]2 (C)CC )cc (O) N( O) C( C(=O) c( /C=C/ c2ccc(Cl)c(Cl) c2) CC [C@H]4 N( CCC) C(=O)N CCCC c3n cccc3) n 3)cc2 4 )cc2 COc1ccc( C(=O) cn 2)c1 c4 5) CCCN C(=N)N) CCC [C@H]( c3ccc([N+](=O)[O-] )cc3) nc( N2 c2n nc(- CC n2 c1( N2 c4 )cc3) C1= CC(=O) S S C2 1 O=C(N /N=C/ c(- n2 nc3 c2 C( N1CCN( n( Cc2ccccc2) C(O) =C( N2CCN( C( c2ccccc2 c1) CC(O) CO c2cc( F)c( CC(=O) OC C(=O)N c1ccccc1 c3cc(OC) ccc3 Br) c3) 4CCCC C4) CCCC (N C( C(C)C) CC C(=O)N1 C(=O)N /N=C/ c1ccc(Cl)cc1 Cl c3ccccc3 C) c3 c2cccc3) c1ccc( Cl)c( 4) c3) o 2 c3cccc (O) C1 CCN(C c(=O) cc( [C@@]4 (C)CC Cc1cc( O) [C@H](C) N Cc1c( Cl) c2ccccc2) CC1) c1(F) ccc( CN( S(=O)(=O) cc1) =O) c1c( cccc1) c1c( OCC c3 nc(N) c2c( F)cc( c(OC) c3) c2s ccc2 CC2 =O n ccc2 CCC3( CC2) Cn2 c( CCCC(=O) O) /C(=N\ O) cccc 2)cc1 c3c( ccc( c2c(C) cccc2 P(O) (O)=O) CN =C( C( OC N 2) cc c5 cc (=O) COc1cc( N c1s ccc1 O=C1 N( /C=C3 \ C(C)(C) O CC )cc2) c1c( F)cc( C) C(=O) C3 )cc2) C2 (C)C) c2c(=O) n1 CCN S(=O)(=O) c3n nn C(=S) S C [C@@]12 c2ccc(- c3ccccc3)cc2) c2ccc( N(C)C) C1 N(C(=O) Cc1cc(C) cc( c1 oc( [C@@H](C) CO) c4 )cc c( C(N)=O) N O /C=C /C=C/ - n1 c1ccc( C(F)(F)F)cc1 c(N (C)C) C1 (N C(C) (O) N3 C(=O) C c1cccc(C)c1 c3cc4 ccccc4 C3 4 ( C(=O)O) [C@H]1 [C@H]( [C@H]1 O) N( S(=O)(=O) c2cccc(OC) c2) O=C1 NC(=O) CC(C) O C2=C( O) [C@H]2 CC C(C)(C)C) cc2) Cc1 c2c( CCC [C@@H]2 C2 CCN(C(=O) c(OC) c1) c4cc cs COc1cc( OC) NC(=O) c1cc( c2cccc( NC(=O) -c2ccc( F)cc2) S(=O)(=O) N1 c2cc(- c3ccccc3) c( CO c(C) c2 P(O) (O) CCN(CC) C(=O) C( C(F)(F)F) n c2n( c2ccc(C) cc2 n( C(C)C) CC[C@]4 3C) C(=O)N (C)C c4ccco 4) cn (C) c2n cc(- c1n ccc( c3cn ccc3) c3ccc(Cl)cc3 Cl) N c1nc(N no 1) S1 (=O)=O P(O)(=O) O) n n2)c1 c2ccc(F)cc2) CC1 C2 (C)C nc(S C CCN 4 CCCO 3) c 7 CC[C@]4(C) [C@H]3CC c2ccc( S(=O)(=O)N c2ccc( Br)cc2 [N+] (C)( c4cc5 c( C(=O)N[C@@H]( CC(C)C) c1ccc2c(c1) OCO2) N C(=O)N1 n(C) c1 Cc1ccc( C) Cc1 nc(N c2cc(C) cc(C) c1( OC)cc( c2cc o C( CCC) COC(=O) c1ccc( C) C1 N1 2 c3cccc c13) [C@]3 (O) c3s ccc3) O= c1cc( NC(=O)[C@H]( CC(C)C) C4CCCC C4) CCN1 CCC( C2 CC( c4cccc 5 c2n (C NS(=O)(=O) c1ccc( CCCN =C(N)N) n1 c(N) cc4 c( c2 c3 N S(=O)( OCCO 4) N1( C( nc(N3 CCOCC3) - c1cc( c4ccc5c( c4) N )cc1 c1ccccc1 F) c4ccncc 4) N =C c3c( c1) /N=C(\ C) c4 n( c3cc(C) ccc3 c2o c(- [C@@H]( C(=O)O) CCC(O) ( c3 Cl) C12CC3 CC(CC(C3) C c1ccc2c( c(=O) c2c1 c3ccc( C(=O)O) /C1 =C/ C(Cl) (Cl) c2cc(OC) ccc2 OC [C@@H]2 C( C(O)=O) cc(C) cc1 c12 n( 3 )cc1) Cn1 c2c( ccc( Cl)cc1 c3c( n2) F) c1 [C@@]3 (O) O=C( CN1 c1( -c2cc( C3 CCC3) c1( Cl)cc( N( C(=O)N c3cc( Br)cc CCC1 2 cc n1) Cc1cc cnc1) c3cccc4 cccc [C@H]2 C [C@H]2 [C@@H]( CCC( C(=O)N c3ccccc3) CC2)cc1 CCCC CC3) n(C)c(=O) n(C) [C@H](O) [C@@H](O)[C@H](O) N(CCCl) CC Cc1ccco 1 CCN2 CCOCC2) [C@H]( CO OC( CO) c1- 2 CC(C)(C) N =C /C=C/ C(=O) OCC(=O) c4cccc (F) c1ccc( S(=O)(=O)N 4CCCC 4) /C=N/ NC( c1cn (C) c1c(C) cc( F) c(- C(C)(C) [C@@H]5 c1ccc( S C1= S Cc1cc cs1) c3n [nH] CCN(C) CC1) [nH] 2)c1 c(N C(=O)N c(Cl) c2) =O) cc3) c3cccc (N c(S C) c3 2)cc1 c(F) c3) c2 - c3ncc cn3) c3cc( N -c2ccc( Cl)cc2) c4 c(Cl) c( OCC) c3ccc( S(=O)(=O)N nc(- c2ccccc2) CCOCC 3)cc2) c(=O) n(C C(C)C) cc2) c2n o c4 c5c( COc1ccccc1 N1CCN( n 5) CC COc1ccc( co 1 c1cccc( O)c1 (N 3 c1cc( OCC N1 ) =O) ( = N2 nc( NC( c4ccc( N c1cccc( -n2 OC 4 c3c( N) O [C@H](C) n c2)c1 c4ccc( C(F)(F)F)cc c2cc( OCC c2nc( O) Cc1ccc( OC) N1 (C Cc1cc cnc1 c4cccn c4) c3 n2) C( OC(=O) c5 cn O=C( CS C1( c2ccccc2) C(F)(F)F) c( [C@H]1 CO c4 ncn C(C#N) =C(N) C1 CN(C(=O) C1 c2c( c1( Br) n[nH] 1 c3c( c2) c2 )cc C(=O) C(C) c4c( F) c1ccc( C( /C =C1/ c(OC) c(OC) [C@H]( C(=O)N c1c(O) cc( c1ccc( CN2 [C@]2 (C)CC Cc1n n(C) CC(=O) OC) c1c( C2 C3 CCCCC3 c1c( C#N) c1(C) ccc( C2=O) ccc1 C) ( [C@@H](O) [C@H]1O c2ccccc2 O) c2cc nc(N c2c(F) cccc2) P(=O) (O )cc( OC)c1 c3cc(O) ccc3 C1 N Br) c1) [C@H]( Cc1ccccc1)NC(=O) [nH] c12 3)cc2 1 cn c12 c1cc( F)cc c( C(C)C) c4cc( F)cc N( S( CCn1 c(=O) C(=O)N2 C C( CS s c1) c2cn (C) c1cc( C2 c(N) n1 C[C@H]( NC( C [S+]([O-]) c2cn n3 CC[C@H]( NC(=O) C#N )cc1) c2ccc(- c3n P (O CCN(CC) CC) c3cccc(OC) c3) CCCC CCC) OCCO 2) CC c2c( c2) nc1 ccc( C)c1 c3 - c2ccc(Cl)cc2) CC1 nc3 C) C23CC4 CC(CC(C4) c3ccc( C(F)(F)F)cc C[C@@H]( NC(=O) c2cc( S(=O)(=O)N cc( Cl) n1 ccc( [C@]1 (C)CC ncn c32) c4ccc(Cl)cc 4 c3cccc c3c(=O) 3)cc2) CC1 N S( c(C) cc2) CC [C@@]4 ccc1) =O Cc1ccc( S(=O)(=O)N2 cc nc1 CC1 CC1 c1ccc( Cl)c(Cl)c1 c1c( OC)c(OC) CC( CO n( CC c4 n3) c2ccccc2 C1 nc3 n2 c2)cc1 OC c1( Cl)c( CO 2) Cc2ccccc2) c1 C=C 3 C4 ( N [C@H]1 c1n cc(- [N+]([O-])=O) cc2) c4 nn o c12 / C(O) CN( Cc1ccccc1) C3) =O) [N+]([O-]) =O)cc1 CCCCCCCC CC -c2 c[nH] C(=O) c2c( c1( CN2 c3 n2 C1=C(C) N C#N )cc( C(=O)N [C@@H]1 c1 C) O=C( N1 N( C(N COc1cc c2[nH] c2n ccc( O) =O cn cc1 c3c(F)cccc3 F) CC(C)C) =O) [nH] cc2 /C( =C(\ c(=O) n(- C[C@@H]( CO) CCN3 CCOCC3) c1cc( OC)c( N1 (C) c4ccc(F)cc 4 [C@@H]2 C n cc3) O=C(N c1cccc( cccc1) =O c2ccc( S(=O)(=O)N3 N# Cc1cccc( c1c( F)c( c1( CN c3ccc(C) cc3 c( [N+](=O)[O-])c1 c( Cl)cc c2cccc(C) c2) c(Br) c1 CC(=O) N1CCN( c1c( C(F)(F)F) [N+] 2 CCN( S(=O)(=O) C) cc2 /C( =C(/ c2c(Cl)cccc2 Cl) [C@@]1 3 c(- c4ccccc4) nc1 S O [C@H]3 C(=O)O) cc1) c2c(C) cccc2) c1c( O)c( n2) =O) OCO 5) O =C C(N (C)C) c1ccc(- c2ccccc2)cc1 CC2 =O) C( CNC(=O) c5 [nH] CC(=O)N (C) cs 2)c1 c2ncn c(N c2ccc(Cl)cc 2)c1 Cc1n c2c( CC( CO) c(S C c(C) c1) [n+] (C C) CC1 c3cccc(C) c3) c3cn (C) c1cn n2 [nH] c2) CC [C@]2(C) c1cccc( [N+](=O)[O-])c1 C(N)=O) c(N c2ccc(- n3 [C@@H]4 CC C(=O)N S(=O)(=O) C(=O)N C2 c( F)cc2 S C( c1ccccc1 F c1cc( Br) c1cc n [C@@H]1 [C@@H]( c(OC) cc( Cc2ccc( C)cc2) c3c( Cl)cc( c1cc c2nc( [nH] c1) c2cc( Br) [C@@H]2 C1 /C(C) =N/ c3ccc( NC(=O) ncc 4 P(=O)( OCC) c1n [nH] CCN1 C ccc( F)cc1 OC)c(OC) c1) N( S(=O)( c1cc( O)c(O) C1 C2( CC(C) C[C@H](NC(=O) c1c(O) ccc( (O) =O S2 (=O)=O N( C(C)C) C2 CCC2) ncn c2 c3ccccc3 C2=O) C4 )cc3) [C@@H]( CC(C)C) OC(C)(C)C) =O) [N+]([O-]) =O C= CC(=O) C) O c3 c1 c1( -c2ccccc2) c3 O) cn c2) o c2 n4 cn N c1nc(- CC( NC( cn n1 C(F)(F) C(F)(F) c23) c1 CCCC 2)cc1 c3cc( C(=O)N CC(=O) NC( c1cc( Br)ccc1 C(=O)N1 CCN(C(=O) C(C)=O) cc1 c2cc3c(cc2) OCO3) nc(S C) [C@@H]( CC) OC [C@H]2 c1ccc( C(N C(=O) [C@H]( C =N c23) CC1 [N+]([O-]) =O)c1 3)cc c21 )ccc1 O [C@@H]1 (O) /N=C2 \ CC( c1ccccc1) no 2)cc1 n n(C [C@H]1 (O) ================================================ FILE: SmilesPE/__init__.py ================================================ __version__ = "0.0.4" ================================================ FILE: SmilesPE/_nbdev.py ================================================ # AUTOGENERATED BY NBDEV! DO NOT EDIT! __all__ = ["index", "modules", "custom_doc_links", "git_url"] index = {"atomwise_tokenizer": "00_pretokenizer.ipynb", "kmer_tokenizer": "00_pretokenizer.ipynb", "tokens_to_mer": "00_pretokenizer.ipynb", "randomize_smiles": "01_learner.ipynb", "corpus_augment": "01_learner.ipynb", "get_vocabulary": "01_learner.ipynb", "update_pair_statistics": "01_learner.ipynb", "get_pair_statistics": "01_learner.ipynb", "replace_pair": "01_learner.ipynb", "prune_stats": "01_learner.ipynb", "learn_SPE": "01_learner.ipynb", "SPE_Tokenizer": "02_tokenizer.ipynb", "encode": "02_tokenizer.ipynb", "isolate_glossary": "02_tokenizer.ipynb", "Corpus": "03_spe2vec.ipynb", "learn_spe2vec": "03_spe2vec.ipynb", "load_spe2vec": "03_spe2vec.ipynb", "SPE2Vec": "03_spe2vec.ipynb"} modules = ["pretokenizer.py", "learner.py", "tokenizer.py", "spe2vec.py"] doc_url = "https://XinhaoLi74.github.io/SmilesPE/" git_url = "https://github.com/XinhaoLi74/SmilesPE/tree/master/" def custom_doc_links(name): return None ================================================ FILE: SmilesPE/learner.py ================================================ # AUTOGENERATED! DO NOT EDIT! File to edit: notebooks_build/01_learner.ipynb (unless otherwise specified). __all__ = ['randomize_smiles', 'corpus_augment', 'get_vocabulary', 'update_pair_statistics', 'get_pair_statistics', 'replace_pair', 'prune_stats', 'learn_SPE'] # Cell import os import sys import inspect import copy import io import warnings import re from collections import defaultdict, Counter from fastprogress.fastprogress import master_bar, progress_bar from .pretokenizer import * def randomize_smiles(smiles): """ Require `RDKit` library. Generate a new SMILES string for the same molecule. Perform a randomization of a SMILES string must be RDKit sanitizable. """ import random import numpy as np from rdkit import Chem m = Chem.MolFromSmiles(smiles) ans = list(range(m.GetNumAtoms())) np.random.shuffle(ans) nm = Chem.RenumberAtoms(m,ans) return Chem.MolToSmiles(nm, canonical=False, isomericSmiles=True, kekuleSmiles=False) def corpus_augment(infile, outdir, cycles): ''' infile: line separated SMILES file outdir: directory to save the augmented SMILE file. Each round of augmentation will save as a separated file, named as `infile_Ri`. cycles: number of rounds for SMILES augmentation ''' if cycles <= 0: raise ValueError("Invalid option, cycle should be larger than 0") with open(infile, "r") as ins: can_smiles = [] for line in ins: can_smiles.append(line.split('\n')[0]) fname = os.path.basename(infile).split('.')[0] ftype = os.path.basename(infile).split('.')[1] mb = master_bar(range(cycles)) for i in mb: with open(f'{outdir}/{fname}_R{i}.{ftype}', 'a') as outfile: for smi in progress_bar(can_smiles, parent=mb): randomized_smi = randomize_smiles(smi) outfile.write(randomized_smi + '\n') def get_vocabulary(smiles, augmentation=0, exclusive_tokens = False): """Read text and return dictionary that encodes vocabulary """ print('Counting SMILES...') vocab = Counter() for i, smi in enumerate(smiles): vocab[smi] += 1 print(f'{len(vocab)} unique Canonical SMILES') if augmentation>0: print(f'Augmenting SMILES...({augmentation} times)') mb = master_bar(range(augmentation)) for i in mb: for smi in progress_bar(smiles, parent=mb): randomized_smi = randomize_smiles(smi) vocab[randomized_smi] += 1 print(f'{len(vocab)} unique SMILES (Canonical + Augmented)') return dict([(tuple(atomwise_tokenizer(x)) ,y) for (x,y) in vocab.items()]) def update_pair_statistics(pair, changed, stats, indices): """Minimally update the indices and frequency of symbol pairs if we merge a pair of symbols, only pairs that overlap with occurrences of this pair are affected, and need to be updated. """ stats[pair] = 0 indices[pair] = defaultdict(int) first, second = pair new_pair = first+second for j, word, old_word, freq in changed: # find all instances of pair, and update frequency/indices around it i = 0 while True: # find first symbol try: i = old_word.index(first, i) except ValueError: break # if first symbol is followed by second symbol, we've found an occurrence of pair (old_word[i:i+2]) if i < len(old_word)-1 and old_word[i+1] == second: # assuming a symbol sequence "A B C", if "B C" is merged, reduce the frequency of "A B" if i: prev = old_word[i-1:i+1] stats[prev] -= freq indices[prev][j] -= 1 if i < len(old_word)-2: # assuming a symbol sequence "A B C B", if "B C" is merged, reduce the frequency of "C B". # however, skip this if the sequence is A B C B C, because the frequency of "C B" will be reduced by the previous code block if old_word[i+2] != first or i >= len(old_word)-3 or old_word[i+3] != second: nex = old_word[i+1:i+3] stats[nex] -= freq indices[nex][j] -= 1 i += 2 else: i += 1 i = 0 while True: try: # find new pair i = word.index(new_pair, i) except ValueError: break # assuming a symbol sequence "A BC D", if "B C" is merged, increase the frequency of "A BC" if i: prev = word[i-1:i+1] stats[prev] += freq indices[prev][j] += 1 # assuming a symbol sequence "A BC B", if "B C" is merged, increase the frequency of "BC B" # however, if the sequence is A BC BC, skip this step because the count of "BC BC" will be incremented by the previous code block if i < len(word)-1 and word[i+1] != new_pair: nex = word[i:i+2] stats[nex] += freq indices[nex][j] += 1 i += 1 def get_pair_statistics(vocab): """Count frequency of all symbol pairs, and create index""" # data structure of pair frequencies stats = defaultdict(int) #index from pairs to words indices = defaultdict(lambda: defaultdict(int)) for i, (word, freq) in enumerate(progress_bar(vocab)): prev_char = word[0] for char in word[1:]: stats[prev_char, char] += freq indices[prev_char, char][i] += 1 prev_char = char return stats, indices def replace_pair(pair, vocab, indices): """Replace all occurrences of a symbol pair ('A', 'B') with a new symbol 'AB'""" first, second = pair pair_str = ''.join(pair) pair_str = pair_str.replace('\\','\\\\') changes = [] pattern = re.compile(r'(?= {0}. Stopping\n'.format(min_frequency)) break if verbose: sys.stderr.write('pair {0}: {1} {2} -> {1}{2} (frequency {3})\n'.format(i, most_frequent[0], most_frequent[1], stats[most_frequent])) outfile.write('{0} {1}\n'.format(*most_frequent)) changes = replace_pair(most_frequent, sorted_vocab, indices) update_pair_statistics(most_frequent, changes, stats, indices) stats[most_frequent] = 0 if not i % 100: prune_stats(stats, big_stats, threshold) ================================================ FILE: SmilesPE/pretokenizer.py ================================================ # AUTOGENERATED! DO NOT EDIT! File to edit: notebooks_build/00_pretokenizer.ipynb (unless otherwise specified). __all__ = ['atomwise_tokenizer', 'kmer_tokenizer', 'tokens_to_mer'] # Cell def atomwise_tokenizer(smi, exclusive_tokens = None): """ Tokenize a SMILES molecule at atom-level: (1) 'Br' and 'Cl' are two-character tokens (2) Symbols with bracket are considered as tokens exclusive_tokens: A list of specifical symbols with bracket you want to keep. e.g., ['[C@@H]', '[nH]']. Other symbols with bracket will be replaced by '[UNK]'. default is `None`. """ import re pattern = "(\[[^\]]+]|Br?|Cl?|N|O|S|P|F|I|b|c|n|o|s|p|\(|\)|\.|=|#|-|\+|\\\\|\/|:|~|@|\?|>|\*|\$|\%[0-9]{2}|[0-9])" regex = re.compile(pattern) tokens = [token for token in regex.findall(smi)] if exclusive_tokens: for i, tok in enumerate(tokens): if tok.startswith('['): if tok not in exclusive_tokens: tokens[i] = '[UNK]' return tokens # Cell def kmer_tokenizer(smiles, ngram=4, stride=1, remove_last = False, exclusive_tokens = None): units = atomwise_tokenizer(smiles, exclusive_tokens = exclusive_tokens) #collect all the atom-wise tokens from the SMILES if ngram == 1: tokens = units else: tokens = [tokens_to_mer(units[i:i+ngram]) for i in range(0, len(units), stride) if len(units[i:i+ngram]) == ngram] if remove_last: if len(tokens[-1]) < ngram: #truncate last whole k-mer if the length of the last k-mers is less than ngram. tokens = tokens[:-1] return tokens def tokens_to_mer(toks): return ''.join(toks) ================================================ FILE: SmilesPE/spe2vec.py ================================================ # AUTOGENERATED! DO NOT EDIT! File to edit: notebooks_build/03_spe2vec.ipynb (unless otherwise specified). __all__ = ['Corpus', 'learn_spe2vec', 'load_spe2vec', 'SPE2Vec'] # Cell import os import gensim class Corpus(object): ''' *filename*: A file that stores SMILES line-by-line. *tokenizer*: SPE tokenizer *dropout*: SPE dropout, default = 0 ''' def __init__(self, infile, tokenizer, isdir=False, dropout=0): self.infile = infile self.tokenizer = tokenizer self.dropout = dropout self.isdir = isdir def __iter__(self): if self.isdir: for fname in os.listdir(self.infile): for smi in open(os.path.join(self.infile, fname)): yield self.tokenizer.tokenize(smi, dropout=self.dropout).split(' ') else: for smi in open(self.infile): yield self.tokenizer.tokenize(smi, dropout=self.dropout).split(' ') def learn_spe2vec(corpus, outfile=None, vector_size=100, window=10, min_count=10, n_jobs = 1, method = 'skip-gram', **kwargs): ''' Train a spe2vec model. *corpus*: an instance of `Class Corpus()` *outfile*: str, name of the spe2vec model file. *vector_size*: dimensions of embedding. *window*: number of tokens considered as context *min_count*: number of occurrences a token should have to be considered in training *n_jobs*: number of cpu cores used for training. *method*: modeling method, choose from ['cbow', 'skip-gram'] More training parameter can be found https://radimrehurek.com/gensim/models/word2vec.html#gensim.models.word2vec.Word2Vec ''' if method.lower() == 'skip-gram': sg = 1 elif method.lower() == 'cbow': sg = 0 else: raise ValueError("Invalid option, choose from ['cbow', 'skip-gram']") model = gensim.models.Word2Vec(corpus, size=vector_size, window=window, min_count=min_count, workers=n_jobs, sg=sg, **kwargs) if outfile: model.save(outfile) return model def load_spe2vec(model_path): return gensim.models.Word2Vec.load(model_path) # Cell import numpy as np class SPE2Vec(object): def __init__(self, model_path, tokenizer): self.model = gensim.models.Word2Vec.load(model_path) self.tokenizer = tokenizer self.token_keys = set(self.model.wv.vocab.keys()) #get the vector for unknown tokens. simply averge the vectors of all known tokens. vectors = [] for word in self.model.wv.vocab: vectors.append(self.model.wv[word]) self.unknown = np.mean(vectors, axis=0) def tokenize(self, smi, dropout=0): ''' tokenize SMILES into substructure tokens. ''' return self.tokenizer.tokenize(smi, dropout) def smiles2vec(self, smi, dropout=0, mode = 'average'): ''' Generate a vector for a SMILES. The vector is construc in four modes: ['average', 'sum', 'avg_pool', 'sum_pool'] `average`: average the embedding of all tokens `sum`: sum the embedding of all tokens `avg_pool`: concatenation of average, max pooling and min pooling `sum_pool`: concatenation of sum, max pooling and min pooling The Unknown token will be skipped ''' if mode not in ['average', 'sum', 'avg_pool', 'sum_pool']: raise ValueError("Invalid option, choose from ['average', 'sum', 'avg_pool', 'sum_pool']") tokens = self.tokenizer.tokenize(smi, dropout).split(' ') if mode == 'average': return np.mean([self.model.wv[tok] for tok in tokens if tok in self.token_keys], axis=0) if mode == 'sum': return np.sum([self.model.wv[tok] for tok in tokens if tok in self.token_keys], axis=0) if mode == 'avg_pool': tok_mean = np.mean([self.model.wv[tok] for tok in tokens if tok in self.token_keys], axis=0) tok_max = np.amax([self.model.wv[tok] for tok in tokens if tok in self.token_keys], axis=0) tok_min = np.amin([self.model.wv[tok] for tok in tokens if tok in self.token_keys], axis=0) tok_concate = np.concatenate((tok_mean, tok_max, tok_min)) return tok_concate if mode == 'sum_pool': tok_sum = np.sum([self.model.wv[tok] for tok in tokens if tok in self.token_keys], axis=0) tok_max = np.amax([self.model.wv[tok] for tok in tokens if tok in self.token_keys], axis=0) tok_min = np.amin([self.model.wv[tok] for tok in tokens if tok in self.token_keys], axis=0) tok_concate = np.concatenate((tok_sum, tok_max, tok_min)) return tok_concate def spe2vec(self, smi, dropout=0, skip_unknown=False): ''' Generate a list of vectors (np.array). Each vector is spe vector of each token. The unknown token will be represented by the mean of all token vectors from the model. ''' token_keys = set(self.model.wv.vocab.keys()) tokens = self.tokenizer.tokenize(smi, dropout).split(' ') if skip_unknown: vec = [self.model.wv[tok] for tok in tokens if tok in self.token_keys] else: vec = [self.model.wv[tok] if tok in self.token_keys else self.unknown for tok in tokens] return vec ================================================ FILE: SmilesPE/tokenizer.py ================================================ # AUTOGENERATED! DO NOT EDIT! File to edit: notebooks_build/02_tokenizer.ipynb (unless otherwise specified). __all__ = ['SPE_Tokenizer', 'encode', 'isolate_glossary'] # Cell import sys import os import inspect import codecs import io import argparse import re import warnings import random sys.path.append('..') from .pretokenizer import * from .learner import * class SPE_Tokenizer(object): """ Tokenize SMILES based on the learned SPE tokens. codes: output file of `learn_SPE()` merges: number of learned SPE tokens you want to use. `-1` means using all of them. `1000` means use the most frequent 1000. exclusive_tokens: argument that passes to `atomwise_tokenizer()` glossaries: argument that passes to `isolate_glossary()` dropout: See [BPE-Dropout: Simple and Effective Subword Regularization](https://arxiv.org/abs/1910.13267). If `dropout` is set to 0, the segmentation is equivalent to the standard BPE; if `dropout` is set to 1, the segmentation splits words into distinct characters. """ def __init__(self, codes, merges=-1, glossaries=None, exclusive_tokens=None): codes.seek(0) offset=1 self.bpe_codes = [tuple(item.strip('\r\n ').split(' ')) for (n, item) in enumerate(codes) if (n < merges or merges == -1)] for i, item in enumerate(self.bpe_codes): if len(item) != 2: sys.stderr.write('Error: invalid line {0} in BPE codes file: {1}\n'.format(i+offset, ' '.join(item))) sys.stderr.write('The line should exist of exactly two subword units, separated by whitespace\n') sys.exit(1) # some hacking to deal with duplicates (only consider first instance) self.bpe_codes = dict([(code,i) for (i,code) in reversed(list(enumerate(self.bpe_codes)))]) self.bpe_codes_reverse = dict([(pair[0] + pair[1], pair) for pair,i in self.bpe_codes.items()]) self.glossaries = glossaries if glossaries else [] self.glossaries_regex = re.compile('^({})$'.format('|'.join(glossaries))) if glossaries else None self.exclusive_tokens = exclusive_tokens self.cache = {} def tokenize(self, smi, dropout=0): segments = [out for segment in self._isolate_glossaries(smi) for out in encode(segment, self.bpe_codes, self.bpe_codes_reverse, self.cache, self.exclusive_tokens, self.glossaries_regex, dropout)] return ' '.join(segments) def _isolate_glossaries(self, word): word_segments = [word] for gloss in self.glossaries: word_segments = [out_segments for segment in word_segments for out_segments in isolate_glossary(segment, gloss)] return word_segments def encode(orig, bpe_codes, bpe_codes_reverse, cache, exclusive_tokens=None, glossaries_regex=None, dropout=0): """Encode word based on list of SPE merge operations, which are applied consecutively. """ if not dropout and orig in cache: return cache[orig] if glossaries_regex and glossaries_regex.match(orig): cache[orig] = (orig,) return (orig,) if len(orig) == 1: return orig word = atomwise_tokenizer(orig, exclusive_tokens=exclusive_tokens) while len(word) > 1: # get list of symbol pairs; optionally apply dropout pairs = [(bpe_codes[pair],i,pair) for (i,pair) in enumerate(zip(word, word[1:])) if (not dropout or random.random() > dropout) and pair in bpe_codes] if not pairs: break #get first merge operation in list of BPE codes bigram = min(pairs)[2] # find start position of all pairs that we want to merge positions = [i for (rank,i,pair) in pairs if pair == bigram] i = 0 new_word = [] bigram = ''.join(bigram) for j in positions: # merges are invalid if they start before current position. This can happen if there are overlapping pairs: (x x x -> xx x) if j < i: continue new_word.extend(word[i:j]) # all symbols before merged pair new_word.append(bigram) # merged pair i = j+2 # continue after merged pair new_word.extend(word[i:]) # add all symbols until end of word word = new_word word = tuple(word) cache[orig] = word return word def isolate_glossary(word, glossary): """ Isolate a glossary present inside a word. Returns a list of subwords. In which all 'glossary' glossaries are isolated. For example, if 'USA' is the glossary and '1934USABUSA' the word, the return value is: ['1934', 'USA', 'B', 'USA'] """ # regex equivalent of (if word == glossary or glossary not in word) if re.match('^'+glossary+'$', word) or not re.search(glossary, word): return [word] else: segments = re.split(r'({})'.format(glossary), word) segments, ending = segments[:-1], segments[-1] segments = list(filter(None, segments)) # Remove empty strings in regex group. return segments + [ending.strip('\r\n ')] if ending != '' else segments ================================================ FILE: docs/.gitignore ================================================ _site/ ================================================ FILE: docs/Gemfile ================================================ source "https://rubygems.org" gem 'github-pages', group: :jekyll_plugins # Added at 2019-11-25 10:11:40 -0800 by jhoward: gem "jekyll", "~> 3.7" ================================================ FILE: docs/_config.yml ================================================ repository: XinhaoLi74/SmilesPE output: web topnav_title: SMILES Pair Encoding site_title: SMILES Pair Encoding company_name: Xinhao Li description: Tokenize SMILES with substructure units # Set to false to disable KaTeX math use_math: true # Add Google analytics id if you have one and want to use it here google_analytics: # See http://nbdev.fast.ai/search for help with adding Search google_search: host: 127.0.0.1 # the preview server used. Leave as is. port: 4000 # the port where the preview is rendered. exclude: - .idea/ - .gitignore - vendor exclude: [vendor] highlighter: rouge markdown: kramdown kramdown: input: GFM auto_ids: true hard_wrap: false syntax_highlighter: rouge collections: tooltips: output: false defaults: - scope: path: "" type: "pages" values: layout: "page" comments: true search: true sidebar: home_sidebar topnav: topnav - scope: path: "" type: "tooltips" values: layout: "page" comments: true search: true tooltip: true sidebars: - home_sidebar permalink: pretty theme: jekyll-theme-cayman baseurl: /SmilesPE/ ================================================ FILE: docs/_data/alerts.yml ================================================ tip: '