Full Code of chao1224/ChatDrug for AI

main fb8470b81686 cached
26 files
921.3 KB
475.7k tokens
159 symbols
1 requests
Download .txt
Showing preview only (949K chars total). Download the full file or copy to clipboard to get everything.
Repository: chao1224/ChatDrug
Branch: main
Commit: fb8470b81686
Files: 26
Total size: 921.3 KB

Directory structure:
gitextract_jfht9tp3/

├── ChatDrug/
│   ├── TAPE_benchmark/
│   │   ├── __init__.py
│   │   ├── datasets.py
│   │   ├── metrics.py
│   │   ├── models.py
│   │   └── trainer.py
│   └── task_and_evaluation/
│       ├── Conversational_LLMs_utils.py
│       ├── __init__.py
│       ├── peptide_editing.py
│       ├── prompt_specification.json
│       ├── protein_editing.py
│       └── small_molecule_editing.py
├── ChatDrug_demo.ipynb
├── README.md
├── data/
│   ├── README.md
│   ├── peptide/
│   │   ├── class1_pseudosequences.csv
│   │   ├── peptide_editing.json
│   │   ├── peptide_editing_threshold.json
│   │   ├── preprocess_step_1_data_extraction.py
│   │   ├── preprocess_step_2_single_prop.py
│   │   ├── preprocess_step_3_multi_prop.py
│   │   └── selected_alleles.txt
│   └── small_molecule/
│       └── small_molecule_editing.txt
├── main_ChatDrug.py
├── main_InContext.py
├── setup.py
└── utils.py

================================================
FILE CONTENTS
================================================

================================================
FILE: ChatDrug/TAPE_benchmark/__init__.py
================================================
from ChatDrug.TAPE_benchmark.datasets import dataset_processor_mapping, output_mode_mapping
from ChatDrug.TAPE_benchmark.models import model_mapping, load_adam_optimizer_and_scheduler
from ChatDrug.TAPE_benchmark.trainer import OntoProteinTrainer
from ChatDrug.TAPE_benchmark.metrics import build_compute_metrics_fn

================================================
FILE: ChatDrug/TAPE_benchmark/datasets.py
================================================
from pathlib import Path
from typing import Union

import pickle as pkl
import lmdb
import numpy as np
import pandas as pd
import re
import torch
from scipy.spatial.distance import squareform, pdist
from typing import Union, List, Tuple, Sequence, Dict, Any, Optional, Collection
from torch.utils.data import Dataset
import os


def dataset_factory(data_file: Union[str, Path], *args, **kwargs) -> Dataset:
    data_file = Path(data_file)
    if not data_file.exists():
        raise FileNotFoundError(data_file)
    if data_file.suffix == '.lmdb':
        return LMDBDataset(data_file, *args, **kwargs)
    elif data_file.suffix in {'.fasta', '.fna', '.ffn', '.faa', '.frn'}:
        return FastaDataset(data_file, *args, **kwargs)
    elif data_file.suffix == '.json':
        return JSONDataset(data_file, *args, **kwargs)
    elif data_file.is_dir():
        return NPZDataset(data_file, *args, **kwargs)
    else:
        raise ValueError(f"Unrecognized datafile type {data_file.suffix}")


def pad_sequences(sequences: Sequence, constant_value=0, dtype=None) -> np.ndarray:
    batch_size = len(sequences)
    shape = [batch_size] + np.max([seq.shape for seq in sequences], 0).tolist()

    if dtype is None:
        dtype = sequences[0].dtype

    if isinstance(sequences[0], np.ndarray):
        array = np.full(shape, constant_value, dtype=dtype)
    elif isinstance(sequences[0], torch.Tensor):
        array = torch.full(shape, constant_value, dtype=dtype)

    for arr, seq in zip(array, sequences):
        arrslice = tuple(slice(dim) for dim in seq.shape)
        arr[arrslice] = seq

    return array


class LMDBDataset(Dataset):
    def __init__(self, data_file, in_memory):
        env = lmdb.open(data_file, max_readers=1, readonly=True,
                        lock=False, readahead=False, meminit=False)

        with env.begin(write=False) as txn:
            num_examples = pkl.loads(txn.get(b'num_examples'))

        if in_memory:
            cache = [None] * num_examples
            self._cache = cache

        self._env = env
        self._in_memory = in_memory
        self._num_examples = num_examples

    def __len__(self):
        return self._num_examples

    def __getitem__(self, index):
        if self._in_memory and self._cache[index] is not None:
            item = self._cache[index]
        else:
            with self._env.begin(write=False) as txn:
                item = pkl.loads(txn.get(str(index).encode()))
                if 'id' not in item:
                    item['id'] = str(index)
                if self._in_memory:
                    self._cache[index] = item
        return item


class DataProcessor:
    """Base class for data converters for biological tasks data sets."""
    def get_train_examples(self, data_dir):
        """Gets a collection of :class:`InputExample` for the train set."""
        raise NotImplementedError()

    def get_dev_examples(self, data_dir):
        """Gets a collection of :class:`InputExample` for the dev set."""
        raise NotImplementedError()

    def get_test_examples(self, data_dir):
        """Gets a collection of :class:`InputExample` for the test set."""
        raise NotImplementedError()

    def get_labels(self):
        """Gets the list of labels for this data set."""
        raise NotImplementedError()


class FluorescenceProgress(DataProcessor):
    def __init__(self, tokenizer):
        super().__init__()
        self.tokenizer = tokenizer

    def get_train_examples(self, data_dir, in_memory=True):
        dataset = FluorescenceDataset(data_dir, split='train', tokenizer=self.tokenizer)
        return dataset

    def get_dev_examples(self, data_dir, in_memory=True):
        dataset = FluorescenceDataset(data_dir, split='valid', tokenizer=self.tokenizer)
        return dataset

    def get_test_examples(self, data_dir, data_cat, in_memory=True):
        if data_cat is not None:
            dataset = FluorescenceDataset(data_dir, split=data_cat, tokenizer=self.tokenizer)
        else:
            dataset = FluorescenceDataset(data_dir, split='test', tokenizer=self.tokenizer)
        return dataset

    def get_labels(self):
        return list(range(1))


class SecondaryStructureProcessor3(DataProcessor):
    def __init__(self, tokenizer):
        super().__init__()
        self.tokenizer = tokenizer

    def get_train_examples(self, data_dir, in_memory=True):
        dataset = SecondaryStructureDataset3(data_dir, split='train', tokenizer=self.tokenizer, target='ss3', in_memory=in_memory)
        return dataset

    def get_dev_examples(self, data_dir, in_memory=True):
        dataset = SecondaryStructureDataset3(data_dir, split='valid', tokenizer=self.tokenizer, target='ss3', in_memory=in_memory)
        return dataset

    def get_test_examples(self, data_dir, data_cat, in_memory=True):
        dataset = SecondaryStructureDataset3(data_dir, split=data_cat, tokenizer=self.tokenizer, target='ss3', in_memory=in_memory)
        return dataset

    def get_labels(self):
        return list(range(3))


class SecondaryStructureProcessor8(DataProcessor):
    def __init__(self, tokenizer):
        super().__init__()
        self.tokenizer = tokenizer

    def get_train_examples(self, data_dir, in_memory=True):
        dataset = SecondaryStructureDataset8(data_dir, split='train', tokenizer=self.tokenizer, target='ss8', in_memory=in_memory)
        return dataset

    def get_dev_examples(self, data_dir, in_memory=True):
        dataset = SecondaryStructureDataset8(data_dir, split='valid', tokenizer=self.tokenizer, target='ss8', in_memory=in_memory)
        return dataset

    def get_test_examples(self, data_dir, data_cat, in_memory=True):
        dataset = SecondaryStructureDataset8(data_dir, split=data_cat, tokenizer=self.tokenizer, target='ss8', in_memory=in_memory)
        return dataset

    def get_labels(self):
        return list(range(8))


class ContactProgress(DataProcessor):
    def __init__(self, tokenizer):
        super().__init__()
        self.tokenizer = tokenizer

    def get_train_examples(self, data_dir, in_memory=True):
        dataset = ProteinnetDataset(data_dir, split='train', tokenizer=self.tokenizer)
        return dataset

    def get_dev_examples(self, data_dir, in_memory=True):
        dataset = ProteinnetDataset(data_dir, split='valid', tokenizer=self.tokenizer)
        return dataset

    def get_test_examples(self, data_dir, data_cat, in_memory=True):
        if data_cat is not None:
            dataset = ProteinnetDataset(data_dir, split=data_cat, tokenizer=self.tokenizer)
        else:
            dataset = ProteinnetDataset(data_dir, split='test', tokenizer=self.tokenizer)
        return dataset

    def get_labels(self):
        return list(range(2))


class StabilityProgress(DataProcessor):
    def __init__(self, tokenizer):
        super().__init__()
        self.tokenizer = tokenizer

    def get_train_examples(self, data_dir, in_memory=True):
        dataset = StabilityDataset(data_dir, split='train', tokenizer=self.tokenizer)
        return dataset

    def get_dev_examples(self, data_dir, in_memory=True):
        dataset = StabilityDataset(data_dir, split='valid', tokenizer=self.tokenizer)
        return dataset

    def get_test_examples(self, data_dir, data_cat, in_memory=True):
        if data_cat is not None:
            dataset = StabilityDataset(data_dir, split=data_cat, tokenizer=self.tokenizer)
        else:
            dataset = StabilityDataset(data_dir, split='test', tokenizer=self.tokenizer)
        return dataset

    def get_labels(self):
        return list(range(1))


class RemoteHomologyProgress(DataProcessor):
    def __init__(self, tokenizer):
        super().__init__()
        self.tokenizer = tokenizer

    def get_train_examples(self, data_dir, in_memory=True):
        dataset = RemoteHomologyDataset(data_dir, split='train', tokenizer=self.tokenizer)
        return dataset

    def get_dev_examples(self, data_dir, in_memory=True):
        dataset = RemoteHomologyDataset(data_dir, split='valid', tokenizer=self.tokenizer)
        return dataset

    def get_test_examples(self, data_dir, data_cat, in_memory=True):
        if data_cat is not None:
            dataset = RemoteHomologyDataset(data_dir, split=data_cat, tokenizer=self.tokenizer)
        else:
            dataset = RemoteHomologyDataset(data_dir, split='test', tokenizer=self.tokenizer)
        return dataset

    def get_labels(self):
        return list(range(1195))


class ProteinnetDataset(Dataset):

    def __init__(self,
                 data_path: Union[str, Path],
                 split: str,
                 tokenizer):

        if split not in ('train', 'train_unfiltered', 'valid', 'test'):
            raise ValueError(f"Unrecognized split: {split}. Must be one of "
                             f"['train', 'train_unfiltered', 'valid', 'test']")

        self.tokenizer = tokenizer

        data_path = Path(data_path)
        data_file = f'proteinnet/proteinnet_{split}.json'
        self.data = dataset_factory(data_path / data_file)

    def __len__(self) -> int:
        return len(self.data)

    def __getitem__(self, index: int):
        item = self.data[index]

        seq = list(re.sub(r"[UZOB]", "X", item['primary']))
        token_ids = self.tokenizer(seq, is_split_into_words=True)
        token_ids = np.asarray(token_ids['input_ids'], dtype=int)
        protein_length = len(seq)
        #if protein_length > 1000:
        #    print(seq)
        input_mask = np.ones_like(token_ids)

        valid_mask = item['valid_mask']
        valid_mask = np.array(valid_mask)
        #print("type:", type(valid_mask))
        #print("valid_mask", valid_mask)
        contact_map = np.less(squareform(pdist(torch.tensor(item['tertiary']))), 8.0).astype(np.int64)

        yind, xind = np.indices(contact_map.shape)
        # DEL
        invalid_mask = ~(valid_mask[:, None] & valid_mask[None, :])
        invalid_mask |= np.abs(yind - xind) < 6
        contact_map[invalid_mask] = -1

        return token_ids, protein_length, input_mask, contact_map

    def collate_fn(self, batch):
        input_ids, protein_length, input_mask, contact_labels = tuple(zip(*batch))

        input_ids = torch.from_numpy(pad_sequences(input_ids, 0))
        input_mask = torch.from_numpy(pad_sequences(input_mask, 0))
        contact_labels = torch.from_numpy(pad_sequences(contact_labels, -1))
        protein_length = torch.LongTensor(protein_length)  # type: ignore

        return {'input_ids': input_ids,
                'attention_mask': input_mask,
                'labels': contact_labels,
                'protein_length': protein_length}


class FluorescenceDataset(Dataset):
    def __init__(self, file_path, split, tokenizer):
        self.tokenizer = tokenizer
        self.file_path = file_path

        if split not in ('train', 'valid', 'test'):
            raise ValueError(f"Unrecognized split: {split}. Must be one of "
                             f"['train', 'valid', 'test'")

        data_file = f'{self.file_path}/fluorescence/fluorescence_{split}.json'
        self.seqs, self.labels = self.get_data(data_file)

    def get_data(self, file):
        # print(file)
        fp = pd.read_json(file)
        seqs = fp.primary
        labels = fp.log_fluorescence

        return seqs, labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, index):
        seq = list(re.sub(r"[UZOB]", "X", self.seqs[index]))

        input_ids = self.tokenizer(seq, is_split_into_words=True, truncation=True, padding="max_length", max_length=239)
        input_ids = np.array(input_ids['input_ids'])
        input_mask = np.ones_like(input_ids)

        label = self.labels[index]

        return input_ids, input_mask, label

    def collate_fn(self, batch):
        input_ids, input_mask, fluorescence_true_value = tuple(zip(*batch))
        input_ids = torch.from_numpy(pad_sequences(input_ids, 0))
        input_mask = torch.from_numpy(pad_sequences(input_mask, 0))
        fluorescence_true_value = torch.FloatTensor(fluorescence_true_value)  # type: ignore

        #print(fluorescence_true_value.shape)
        return {'input_ids': input_ids,
                'attention_mask': input_mask,
                'labels': fluorescence_true_value}

class StabilityDataset(Dataset):
    def __init__(self, file_path, split, tokenizer):
        self.file_path = file_path
        self.tokenizer = tokenizer

        if split not in ('train', 'valid', 'test'):
            raise ValueError(f"Unrecognized split: {split}. Must be one of "
                             f"['train', 'valid', 'test'")

        data_file = f'{self.file_path}/stability/stability_{split}.json'
        self.seqs, self.labels = self.get_data(data_file)

    def get_data(self, path):
        read_file = pd.read_json(path)

        seqs = read_file.primary
        labels = read_file.stability_score

        return seqs, labels

    def __getitem__(self, index):
        seq = list(re.sub(r"[UZOB]", "X", self.seqs[index]))

        input_ids = self.tokenizer(seq, is_split_into_words=True, padding="max_length", max_length=50, truncation=True)
        input_ids = np.array(input_ids['input_ids'])
        input_mask = np.ones_like(input_ids)

        label = self.labels[index]

        return input_ids, input_mask, label

    def __len__(self):
        return len(self.labels)

    def collate_fn(self, batch):
        input_ids, input_mask, stability_true_value = tuple(zip(*batch))
        input_ids = torch.from_numpy(pad_sequences(input_ids, 0))
        input_mask = torch.from_numpy(pad_sequences(input_mask, 0))
        stability_true_value = torch.FloatTensor(stability_true_value)  # type: ignore

        return {'input_ids': input_ids,
                'attention_mask': input_mask,
                'labels': stability_true_value}


class RemoteHomologyDataset(Dataset):
    def __init__(self, file_path, split, tokenizer):
        self.tokenizer = tokenizer
        self.file_path = file_path

        if split not in ('train', 'valid', 'test_fold_holdout',
                         'test_family_holdout', 'test_superfamily_holdout'):
            raise ValueError(f"Unrecognized split: {split}. Must be one of "
                             f"['train', 'valid', 'test_fold_holdout', "
                             f"'test_family_holdout', 'test_superfamily_holdout']")

        data_file = f'{self.file_path}/remote_homology/remote_homology_{split}.json'

        self.seqs, self.labels = self.get_data(data_file)

    def get_data(self, file):
        fp = pd.read_json(file)

        seqs = fp.primary
        labels = fp.fold_label

        return seqs, labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, index):
        seq = list(re.sub(r"[UZOB]", "X", self.seqs[index]))

        input_ids = self.tokenizer(seq, is_split_into_words=True, truncation=True, padding="max_length", max_length=512)
        input_ids = np.array(input_ids['input_ids'])
        input_mask = np.ones_like(input_ids)

        label = self.labels[index]

        return input_ids, input_mask, label

    def collate_fn(self, batch):
        input_ids, input_mask, fold_label = tuple(zip(*batch))
        input_ids = torch.from_numpy(pad_sequences(input_ids, 0))
        input_mask = torch.from_numpy(pad_sequences(input_mask, 0))
        fold_label = torch.LongTensor(fold_label)  # type: ignore

        return {'input_ids': input_ids,
                'attention_mask': input_mask,
                'labels': fold_label}


class SecondaryStructureDataset3(Dataset):
    def __init__(
            self,
            data_path,
            split,
            tokenizer,
            in_memory,
            target='ss3'
    ):
        self.tokenizer = tokenizer
        data_file = f'secondary_structure/secondary_structure_{split}.lmdb'
        print(data_file)
        self.data = LMDBDataset(data_file=os.path.join(data_path, data_file), in_memory=in_memory)
        self.target = target
        print("in total {} proteins".format(len(self.data)))

        self.ignore_index: int = -100

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index: int):
        item = self.data[index]
        if len(item['primary']) > 1024:
            item['primary'] = item['primary'][:1024]
            item['ss3'] = item['ss3'][:1024]
        token_ids = self.tokenizer(list(item['primary']), is_split_into_words=True, return_offsets_mapping=True, truncation=False, padding=True)
        token_ids = np.array(token_ids['input_ids'])
        input_mask = np.ones_like(token_ids)
        
        # pad with -1s because of cls/sep tokens
        labels = np.asarray(item['ss3'], np.int64)
        labels = np.pad(labels, (1, 1), 'constant', constant_values=self.ignore_index)

        return token_ids, input_mask, labels

    def collate_fn(self, batch):
        input_ids, input_mask, ss_label = tuple(zip(*batch))
        input_ids = torch.from_numpy(pad_sequences(input_ids, constant_value=self.tokenizer.pad_token_id))
        attention_mask = torch.from_numpy(pad_sequences(input_mask, constant_value=0))
        labels = torch.from_numpy(pad_sequences(ss_label, constant_value=self.ignore_index))

        output = {'input_ids': input_ids,
                  'attention_mask': attention_mask,
                  'labels': labels}

        return output


class SecondaryStructureDataset8(Dataset):
    def __init__(
            self,
            data_path,
            split,
            tokenizer,
            in_memory,
            target='ss8'
    ):
        self.tokenizer = tokenizer
        data_file = f'secondary_structure/secondary_structure_{split}.lmdb'
        self.data = LMDBDataset(data_file=os.path.join(data_path, data_file), in_memory=in_memory)
        self.target = target

        self.ignore_index: int = -100

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index: int):
        item = self.data[index]
        if len(item['primary']) > 1024:
            item['primary'] = item['primary'][:1024]
            item['ss8'] = item['ss8'][:1024]
        token_ids = self.tokenizer(list(item['primary']), is_split_into_words=True, return_offsets_mapping=True, truncation=False, padding=True)
        token_ids = np.array(token_ids['input_ids'])
        input_mask = np.ones_like(token_ids)

        # pad with -1s because of cls/sep tokens
        labels = np.asarray(item['ss8'], np.int64)
        labels = np.pad(labels, (1, 1), 'constant', constant_values=self.ignore_index)

        return token_ids, input_mask, labels

    def collate_fn(self, batch):
        input_ids, input_mask, ss_label = tuple(zip(*batch))

        input_ids = torch.from_numpy(pad_sequences(input_ids, constant_value=self.tokenizer.pad_token_id))
        attention_mask = torch.from_numpy(pad_sequences(input_mask, constant_value=0))
        labels = torch.from_numpy(pad_sequences(ss_label, constant_value=self.ignore_index))

        output = {'input_ids': input_ids,
                  'attention_mask': attention_mask,
                  'labels': labels}

        return output


output_mode_mapping = {
    'ss3': 'token-level-classification',
    'ss8': 'token-level-classification',
    'contact': 'token-level-classification',
    'remote_homology': 'sequence-level-classification',
    'fluorescence': 'sequence-level-regression',
    'stability': 'sequence-level-regression',
}

dataset_processor_mapping = {
    'remote_homology': RemoteHomologyProgress,
    'fluorescence': FluorescenceProgress,
    'stability': StabilityProgress,
    'contact': ContactProgress,
    'ss3': SecondaryStructureProcessor3,
    'ss8': SecondaryStructureProcessor8
}


if __name__ == "__main__":
    from transformers import BertTokenizer
    protein_tokenizer = BertTokenizer.from_pretrained("Rostlab/prot_bert", do_lower_case=False, chache_dir="../../data/temp_pretrained_PotBert")
    data_dir = "../../data/downstream_datasets"

    dataset_name_list = ['ss3', 'ss8', 'contact', 'remote_homology', 'fluorescence', 'stability']

    for dataset_name in dataset_name_list:
        print(dataset_name)
        output_mode = output_mode_mapping[dataset_name]
        processor = dataset_processor_mapping[dataset_name](protein_tokenizer)
        num_labels = len(processor.get_labels())
        print("num labels: {}".format(num_labels))

        train_dataset = (processor.get_train_examples(data_dir=data_dir))
        eval_dataset = (processor.get_dev_examples(data_dir=data_dir))
        print("train_dataset", len(train_dataset))
        print("eval_dataset", len(eval_dataset))

        if dataset_name == 'remote_homology':
            test_fold_dataset = (
                processor.get_test_examples(data_dir=data_dir, data_cat='test_fold_holdout')
            )
            test_family_dataset = (
                processor.get_test_examples(data_dir=data_dir, data_cat='test_family_holdout')
            )
            test_superfamily_dataset = (
                processor.get_test_examples(data_dir=data_dir, data_cat='test_superfamily_holdout')
            )
            print("test_fold_dataset", len(test_fold_dataset))
            print("test_family_dataset", len(test_family_dataset))
            print("test_superfamily_dataset", len(test_superfamily_dataset))
            print("test in total", len(test_fold_dataset) + len(test_family_dataset) + len(test_superfamily_dataset))

        elif dataset_name == 'ss3' or dataset_name == 'ss8':
            cb513_dataset = (
                processor.get_test_examples(data_dir=data_dir, data_cat='cb513')
            )
            ts115_dataset = (
                processor.get_test_examples(data_dir=data_dir, data_cat='ts115')
            )
            casp12_dataset = (
                processor.get_test_examples(data_dir=data_dir, data_cat='casp12')
            )
            print("cb513_dataset", len(cb513_dataset))
            print("ts115_dataset", len(ts115_dataset))
            print("casp12_dataset", len(casp12_dataset))
            print("test in total", len(cb513_dataset) + len(ts115_dataset) + len(casp12_dataset))
            
        else:
            test_dataset = (
                processor.get_test_examples(data_dir=data_dir, data_cat='test')
            )
            print("test_dataset", len(test_dataset))
        print()


================================================
FILE: ChatDrug/TAPE_benchmark/metrics.py
================================================
from typing import Sequence, Callable, Dict

import numpy as np
import scipy
import torch
from seqeval.metrics import accuracy_score
from transformers import EvalPrediction


def accuracy_score_remote(y_true, y_pred):
    pred_idx = np.argmax(y_pred, axis=1)
    # for y_t, y_p in zip(y_true, pred_idx):
    #     print(y_t, y_p)
    nb_correct = sum(y_t == y_p for y_t, y_p in zip(y_true, pred_idx))

    nb_true = len(y_true)
    score_top1 = nb_correct / nb_true

    return score_top1


def spearmanr(target: Sequence[float],
              prediction: Sequence[float]) -> float:
    target_array = np.asarray(target)
    prediction_array = np.asarray(prediction)
    return scipy.stats.spearmanr(target_array, prediction_array).correlation


def compute_accuracy_metrics(task_name, preds, labels):
    if task_name == 'remote_homology':
        return {
            "accuracy": accuracy_score_remote(labels, preds)
        }
    else:
        raise KeyError(task_name)


def compute_spearmanr_metrics(task_name, preds, labels):
    # print(p.label_ids.shape, p.predictions.shape)
    if task_name == 'fluorescence' or task_name == 'stability':
        return{
            "spearmanr": spearmanr(labels, preds)
        }
    else:
        raise KeyError(task_name)


def simple_accuracy(preds, labels):
    return (preds == labels).float().mean()


def bt_compute_metrics(task_name, preds, labels):
    assert len(preds) == len(labels), f"Predictions and labels have mismatched lengths {len(preds)} and {len(labels)}"

    # TODO: complement remain tasks' metrics
    if task_name == 'ss3' or task_name == 'ss8':
        return {'acc': simple_accuracy(preds, labels)}
    else:
        raise KeyError(task_name)


def build_compute_metrics_fn(task_name: str, output_type: str) -> Callable[[EvalPrediction], Dict]:
    def compute_metrics_fn(p: EvalPrediction):
        if output_type == 'token-level-classification':
            logits = p.predictions
            preds = np.argmax(logits, axis=-1)
            label_ids = torch.from_numpy(p.label_ids)
            preds = torch.from_numpy(preds)

            active_index = (label_ids.view(-1) != -100)
            active_preds = preds.view(-1)[active_index]
            active_labels = label_ids.view(-1)[active_index]
            return compute_metrics_mapping[task_name](task_name, active_preds, active_labels)
        elif output_type == 'sequence-level-classification' or output_type == 'sequence-level-regression':
            logits = p.predictions
            # preds = np.argmax(logits, axis=1)
            label_ids = p.label_ids
            return compute_metrics_mapping[task_name](task_name, logits, label_ids)
        else:
            raise Exception("output type not supported.")

    return compute_metrics_fn


compute_metrics_mapping = {
    'ss3': bt_compute_metrics,
    'ss8': bt_compute_metrics,
    'remote_homology': compute_accuracy_metrics,
    'fluorescence': compute_spearmanr_metrics,
    'stability': compute_spearmanr_metrics,
    'contact': None
}


================================================
FILE: ChatDrug/TAPE_benchmark/models.py
================================================
from torch import nn
from torch.nn import MSELoss, CrossEntropyLoss, BCEWithLogitsLoss
from transformers import BertPreTrainedModel, BertModel, get_linear_schedule_with_warmup

import torch
from transformers.modeling_outputs import SequenceClassifierOutput, TokenClassifierOutput


class PairwiseContactPredictionHead(nn.Module):

    def __init__(self, hidden_size: int, ignore_index=-100):
        super().__init__()
        self.predict = nn.Sequential(
            nn.Dropout(), nn.Linear(2 * hidden_size, 2))
        self._ignore_index = ignore_index

    def forward(self, inputs, sequence_lengths, targets=None):
        prod = inputs[:, :, None, :] * inputs[:, None, :, :]
        diff = inputs[:, :, None, :] - inputs[:, None, :, :]
        pairwise_features = torch.cat((prod, diff), -1)
        prediction = self.predict(pairwise_features)
        prediction = (prediction + prediction.transpose(1, 2)) / 2
        prediction = prediction[:, 1:-1, 1:-1].contiguous()  # remove start/stop tokens
        outputs = (prediction,)

        if targets is not None:
            loss_fct = nn.CrossEntropyLoss(ignore_index=self._ignore_index)
            contact_loss = loss_fct(
                prediction.view(-1, 2), targets.view(-1))
            metrics = {'precision_at_l5':
                       self.compute_precision_at_l5(sequence_lengths, prediction, targets)}
            loss_and_metrics = (contact_loss, metrics)
            outputs = (loss_and_metrics,) + outputs

        return outputs

    def compute_precision_at_l5(self, sequence_lengths, prediction, labels):
        with torch.no_grad():
            valid_mask = labels != self._ignore_index
            seqpos = torch.arange(valid_mask.size(1), device=sequence_lengths.device)
            x_ind, y_ind = torch.meshgrid(seqpos, seqpos)
            valid_mask &= ((y_ind - x_ind) >= 6).unsqueeze(0)
            probs = F.softmax(prediction, 3)[:, :, :, 1]
            valid_mask = valid_mask.type_as(probs)
            correct = 0
            total = 0
            for length, prob, label, mask in zip(sequence_lengths, probs, labels, valid_mask):
                masked_prob = (prob * mask).view(-1)
                most_likely = masked_prob.topk(length // 5, sorted=False)
                selected = label.view(-1).gather(0, most_likely.indices)
                correct += selected.sum().float()
                total += selected.numel()
            return correct / total


class BertForOntoProteinContactPrediction(BertPreTrainedModel):
    def __init__(self, config, mean_output):
        super().__init__(config)
        self.num_labels = config.num_labels

        self.bert = BertModel(config, add_pooling_layer=False)

        self.predict = PairwiseContactPredictionHead(config.hidden_size, ignore_index=-1)
        self.classifier = nn.Linear(config.hidden_size, config.num_labels)
        self.mean_output = mean_output
        self.init_weights()

    def forward(self, input_ids, protein_length, attention_mask=None, labels=None):
        targets = labels
        outputs = self.bert(input_ids)
        # targets

        sequence_output = outputs[0]
        # print(sequence_output.shape)
        output_precition = self.predict(sequence_output, protein_length, targets) + outputs[2:]
        # (loss), prediction_scores, (hidden_states), (attentions)
        outputs['loss'] = output_precition[0][0]
        outputs['logits'] = output_precition[1]
        outputs['prediction_score'] = output_precition[0][1]
        return outputs


class BertForSequenceClassification2(BertPreTrainedModel):
    def __init__(self, config, mean_output):
        super().__init__(config)
        self.num_labels = config.num_labels
        self.config = config

        self.bert = BertModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, config.num_labels)

        self.mean_output = mean_output

    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        token_type_ids=None,
        position_ids=None,
        head_mask=None,
        inputs_embeds=None,
        labels=None,
        output_attentions=None,
        output_hidden_states=None,
        return_dict=True,
    ):
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict

        outputs = self.bert(
            input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )

        if self.mean_output is not True:
            outputs_ = outputs[1]
        else:
            outputs_ = outputs
            attention_mask = attention_mask.bool()
            num_batch_size = attention_mask.size(0)
            outputs_ = torch.stack([outputs_.last_hidden_state[i, attention_mask[i, :], :].mean(dim=0) for i in
                                      range(num_batch_size)], dim=0)

        outputs_ = self.dropout(outputs_)
        logits = self.classifier(outputs_)

        loss = None
        if labels is not None:
            if self.config.problem_type is None:
                if self.num_labels == 1:
                    self.config.problem_type = "regression"
                elif self.num_labels > 1 and (labels.dtype == torch.long or labels.dtype == torch.int):
                    self.config.problem_type = "single_label_classification"
                else:
                    self.config.problem_type = "multi_label_classification"

            if self.config.problem_type == "regression":
                loss_fct = MSELoss()
                if self.num_labels == 1:
                    loss = loss_fct(logits.squeeze(), labels.squeeze())
                else:
                    loss = loss_fct(logits, labels)
            elif self.config.problem_type == "single_label_classification":
                loss_fct = CrossEntropyLoss()
                loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
            elif self.config.problem_type == "multi_label_classification":
                loss_fct = BCEWithLogitsLoss()
                loss = loss_fct(logits, labels)

        if not return_dict:
            output = (logits,) + outputs[2:]
            return ((loss,) + output) if loss is not None else output

        return SequenceClassifierOutput(
            loss=loss,
            logits=logits,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )


def load_adam_optimizer_and_scheduler(model, args, train_dataset):
    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

    total_steps = len(train_dataset) // args.train_batch_size // args.gradient_accumulation_steps * args.num_train_epochs
    warmup_steps = int(0.1 * total_steps)
    scheduler = get_linear_schedule_with_warmup(optimizer, warmup_steps, total_steps)

    return optimizer, scheduler


class BertForTokenClassification2(BertPreTrainedModel):
    def __init__(self, config, mean_output):
        super().__init__(config)
        self.num_labels = config.num_labels

        self.bert = BertModel(config, add_pooling_layer=False)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, config.num_labels)
        self.mean_output = mean_output

    def forward(
            self,
            input_ids=None,
            attention_mask=None,
            token_type_ids=None,
            position_ids=None,
            head_mask=None,
            inputs_embeds=None,
            labels=None,
            output_attentions=None,
            output_hidden_states=None,
            return_dict=True,
    ):
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict

        outputs = self.bert(
            input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )

        sequence_output = outputs[0]

        sequence_output = self.dropout(sequence_output)
        logits = self.classifier(sequence_output)

        loss = None
        if labels is not None:
            loss_fct = CrossEntropyLoss()
            # Only keep active parts of the loss
            if attention_mask is not None:
                active_loss = attention_mask.view(-1) == 1
                active_logits = logits.view(-1, self.num_labels)
                active_labels = torch.where(
                    active_loss, labels.view(-1), torch.tensor(loss_fct.ignore_index).type_as(labels)
                )
                loss = loss_fct(active_logits, active_labels)
            else:
                loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))

        if not return_dict:
            output = (logits,) + outputs[2:]
            return ((loss,) + output) if loss is not None else output

        return TokenClassifierOutput(
            loss=loss,
            logits=logits,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )


model_mapping = {
    'remote_homology': BertForSequenceClassification2,
    'contact': BertForOntoProteinContactPrediction,
    'fluorescence': BertForSequenceClassification2,
    'stability': BertForSequenceClassification2,
    'ss3': BertForTokenClassification2,
    'ss8': BertForTokenClassification2
}


================================================
FILE: ChatDrug/TAPE_benchmark/trainer.py
================================================
import collections
import warnings
from typing import Tuple, Optional, Union, Dict, Any, List

import torch
import torch.nn as nn
from torch.cuda.amp import autocast
from torch.utils.data import IterableDataset, DataLoader
from transformers import Trainer, EvalPrediction, is_torch_tpu_available
from transformers.trainer_pt_utils import find_batch_size, nested_numpify
from transformers.trainer_utils import EvalLoopOutput, denumpify_detensorize, PredictionOutput
import numpy as np


class OntoProteinTrainer(Trainer):

    def prediction_step(
        self,
        model: nn.Module,
        inputs: Dict[str, Union[torch.Tensor, Any]],
        prediction_loss_only: False,
        ignore_keys: Optional[List[str]] = None,
    ) -> Tuple[Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor]]:
        inputs = self._prepare_inputs(inputs)
        with torch.no_grad():
            loss, outputs = self.compute_loss(model, inputs, return_outputs=True)
            #loss tensor (batch_size, loss)
            #dict: outputs['loss'] (batch_size, loss)
            #outputs['logits'] (batch_size, protein_length, protein_length, num_labels)
            #outputs['prediction_score'] dict{'precision_at_l5:' (batch_size, prediction_score)}
            loss = loss.mean().detach()
            if isinstance(outputs, dict):
                logits = tuple(v for k, v in outputs.items())
                #logits: Tuple:
                #logits[0] : model_output (batch_size, protein_length, hidden_size)
                #logits[1] : prediction (batch_size, protein_length, protein_length, num_labels)
                #logits[2] : dict{'precision_at_l5:' (batch_size, prediction_score)}
            else:
                logits = outputs[1:]

        if prediction_loss_only:
            pass
            #return (loss, None, None, None)

        logit = logits[2]

        prediction_score = {}

        prediction_score['precision_at_l5'] = logits[3]['precision_at_l5']
        prediction_score['precision_at_l2'] = logits[3]['precision_at_l2']
        prediction_score['precision_at_l'] = logits[3]['precision_at_l']
        labels = inputs['labels']
        if len(logits) == 1:
            logit = logits[0]

        return (loss, logit, labels, prediction_score)

    def prediction_loop(self, dataloader: DataLoader, description: str, prediction_loss_only: Optional[bool] = None):
        if hasattr(self, "_prediction_loop"):
            warnings.warn(
                "The `_prediction_loop` method is deprecated and won't be called in a future version, define `prediction_loop` in your subclass.",
                FutureWarning,
            )
            return self._prediction_loop(dataloader, description, prediction_loss_only=prediction_loss_only)

        if not isinstance(dataloader.dataset, collections.abc.Sized):
            raise ValueError("dataset must implement __len__")
        prediction_loss_only = (
            prediction_loss_only if prediction_loss_only is not None else self.args.prediction_loss_only
        )

        model = self.model
        # multi-gpu eval
        if self.args.n_gpu > 1:
            model = torch.nn.DataParallel(model)
        # Note: in torch.distributed mode, there's no point in wrapping the model
        # inside a DistributedDataParallel as we'll be under `no_grad` anyways.

        batch_size = dataloader.batch_size
        num_examples = self.num_examples(dataloader)
        print("***** Running %s *****", description)
        print("  Num examples = %d", num_examples)
        print("  Batch size = %d", batch_size)
        losses_host: torch.Tensor = None
        #preds_host: Union[torch.Tensor, List[torch.Tensor]] = None
        #labels_host: Union[torch.Tensor, List[torch.Tensor]] = None

        world_size = 1
        if is_torch_tpu_available():
            world_size = xm.xrt_world_size()
        elif self.args.local_rank != -1:
            world_size = torch.distributed.get_world_size()
        world_size = max(1, world_size)

        eval_losses_gatherer = DistributedTensorGatherer(world_size, num_examples, make_multiple_of=batch_size)
        #preds_gatherer = DistributedTensorGatherer(world_size, num_examples)
        #labels_gatherer = DistributedTensorGatherer(world_size, num_examples)

        model.eval()

        if is_torch_tpu_available():
            dataloader = pl.ParallelLoader(dataloader, [self.args.device]).per_device_loader(self.args.device)

        if self.args.past_index >= 0:
            self._past = None

        self.callback_handler.eval_dataloader = dataloader

        contact_meterics_l5 = []
        contact_meterics_l2 = []
        contact_meterics_l = []
        for step, inputs in enumerate(dataloader):
            loss, logits, labels, prediction_score = self.prediction_step(model, inputs, prediction_loss_only)
            contact_meterics_l5.append(torch.mean(prediction_score['precision_at_l5']))
            contact_meterics_l2.append(torch.mean(prediction_score['precision_at_l2']))
            contact_meterics_l.append(torch.mean(prediction_score['precision_at_l']))
            if loss is not None:
                losses = loss.repeat(batch_size)
                losses_host = losses if losses_host is None else torch.cat((losses_host, losses), dim=0)

            self.control = self.callback_handler.on_prediction_step(self.args, self.state, self.control)

            # Gather all tensors and put them back on the CPU if we have done enough accumulation steps.
            if self.args.eval_accumulation_steps is not None and (step + 1) % self.args.eval_accumulation_steps == 0:
                eval_losses_gatherer.add_arrays(self._gather_and_numpify(losses_host, "eval_losses"))

                # Set back to None to begin a new accumulation
                losses_host = None

        if self.args.past_index and hasattr(self, "_past"):
            # Clean the state at the end of the evaluation loop
            delattr(self, "_past")

        # Gather all remaining tensors and put them back on the CPU
        eval_losses_gatherer.add_arrays(self._gather_and_numpify(losses_host, "eval_losses"))
        metrics = {}
        eval_loss = eval_losses_gatherer.finalize()
        metrics["accuracy_l5"] = sum(contact_meterics_l5) / len(contact_meterics_l5)
        metrics["accuracy_l2"] = sum(contact_meterics_l2) / len(contact_meterics_l2)
        metrics["accuracy_l"] = sum(contact_meterics_l) / len(contact_meterics_l)
        metrics = denumpify_detensorize(metrics)

        return PredictionOutput(predictions=None, label_ids=None, metrics=metrics)

    def evaluation_loop(
            self,
            dataloader: DataLoader,
            description: str,
            prediction_loss_only: Optional[bool] = None,
            ignore_keys: Optional[List[str]] = None,
            metric_key_prefix: str = "eval",
    ) -> EvalLoopOutput:
        prediction_loss_only = (
            prediction_loss_only if prediction_loss_only is not None else self.args.prediction_loss_only
        )

        # if eval is called w/o train init deepspeed here
        if self.args.deepspeed and not self.deepspeed:

            # XXX: eval doesn't have `resume_from_checkpoint` arg but we should be able to do eval
            # from the checkpoint eventually
            deepspeed_engine, _, _ = deepspeed_init(self, num_training_steps=0, resume_from_checkpoint=None)
            self.model = deepspeed_engine.module
            self.model_wrapped = deepspeed_engine
            self.deepspeed = deepspeed_engine
            # XXX: we don't need optim/sched for inference, but this needs to be sorted out, since
            # for example the Z3-optimizer is a must for zero3 to work even for inference - what we
            # don't need is the deepspeed basic optimizer which is self.optimizer.optimizer
            deepspeed_engine.optimizer.optimizer = None
            deepspeed_engine.lr_scheduler = None

        model = self._wrap_model(self.model, training=False)

        # if full fp16 is wanted on eval and this ``evaluation`` or ``predict`` isn't called while
        # ``train`` is running, halve it first and then put on device
        if not self.is_in_train and self.args.fp16_full_eval:
            model = model.half().to(self.args.device)

        batch_size = dataloader.batch_size

        print(f"***** Running {description} *****")
        if isinstance(dataloader.dataset, collections.abc.Sized):
            print(f"  Num examples = {self.num_examples(dataloader)}")
        else:
            print("  Num examples: Unknown")
        print(f"  Batch size = {batch_size}")

        model.eval()

        self.callback_handler.eval_dataloader = dataloader
        # Do this before wrapping.
        eval_dataset = dataloader.dataset

        if is_torch_tpu_available():
            dataloader = pl.ParallelLoader(dataloader, [self.args.device]).per_device_loader(self.args.device)

        if self.args.past_index >= 0:
            self._past = None

        losses_host = None

        all_losses = None

        # Will be useful when we have an iterable dataset so don't know its length.
        contact_meterics_l5 = []
        contact_meterics_l2 = []
        contact_meterics_l = []
        observed_num_examples = 0
        # Main evaluation loop
        for step, inputs in enumerate(dataloader):
            # Update the observed num examples
            observed_batch_size = find_batch_size(inputs)
            if observed_batch_size is not None:
                observed_num_examples += observed_batch_size

            # Prediction step
            loss, logits, labels, prediction_score = self.prediction_step(model, inputs, prediction_loss_only, ignore_keys=ignore_keys)
            contact_meterics_l5.append(torch.mean(prediction_score['precision_at_l5']))
            contact_meterics_l2.append(torch.mean(prediction_score['precision_at_l2']))
            contact_meterics_l.append(torch.mean(prediction_score['precision_at_l']))
            # Update containers on host
            if loss is not None:
                losses = self._nested_gather(loss.repeat(batch_size))
                losses_host = losses if losses_host is None else torch.cat((losses_host, losses), dim=0)

            self.control = self.callback_handler.on_prediction_step(self.args, self.state, self.control)

            # Gather all tensors and put them back on the CPU if we have done enough accumulation steps.
            if self.args.eval_accumulation_steps is not None and (step + 1) % self.args.eval_accumulation_steps == 0:
                if losses_host is not None:
                    losses = nested_numpify(losses_host)
                    all_losses = losses if all_losses is None else np.concatenate((all_losses, losses), axis=0)

                # Set back to None to begin a new accumulation
                losses_host, preds_host, labels_host = None, None, None

        if self.args.past_index and hasattr(self, "_past"):
            # Clean the state at the end of the evaluation loop
            delattr(self, "_past")

        # Gather all remaining tensors and put them back on the CPU
        if losses_host is not None:
            losses = nested_numpify(losses_host)
            all_losses = losses if all_losses is None else np.concatenate((all_losses, losses), axis=0)

        # Number of samples
        if not isinstance(eval_dataset, IterableDataset):
            num_samples = len(eval_dataset)
        # The instance check is weird and does not actually check for the type, but whether the dataset has the right
        # methods. Therefore we need to make sure it also has the attribute.
        elif isinstance(eval_dataset, IterableDatasetShard) and hasattr(eval_dataset, "num_examples"):
            num_samples = eval_dataset.num_examples
        else:
            num_samples = observed_num_examples

        # Number of losses has been rounded to a multiple of batch_size and in a distributed training, the number of
        # samplers has been rounded to a multiple of batch_size, so we truncate.
        if all_losses is not None:
            all_losses = all_losses[:num_samples]

        metrics = {}
        #metrics = prediction_score  # mean
        metrics["accuracy_l5"] = sum(contact_meterics_l5) / len(contact_meterics_l5)
        metrics["accuracy_l2"] = sum(contact_meterics_l2) / len(contact_meterics_l2)
        metrics["accuracy_l"] = sum(contact_meterics_l) / len(contact_meterics_l)
        # To be JSON-serializable, we need to remove numpy types or zero-d tensors
        metrics = denumpify_detensorize(metrics)

        return EvalLoopOutput(predictions=None, label_ids=None, metrics=metrics, num_samples=num_samples)


================================================
FILE: ChatDrug/task_and_evaluation/Conversational_LLMs_utils.py
================================================
import sys
import openai
import time
import torch
import sys

openai.api_key = YOUR_API_KEY

def complete(messages, model, tokenizer, conversational_LLM, drug_type, round_index=None):
    if conversational_LLM == 'chatgpt':
        return complete_chatgpt(messages)
    elif conversational_LLM == 'llama2':
        return complete_llama(messages, model, tokenizer)
    elif conversational_LLM == 'galactica':
        if drug_type=="molecule":
            return complete_galactica_molecule(messages, model, tokenizer, round_index)
        elif drug_type=="peptide":
            return complete_galactica_peptide(messages, model, tokenizer, round_index)
        elif drug_type == 'protein':
            return complete_galactica_protein(messages, model, tokenizer, round_index)
    else:
        print(f'>>Using Vicuna Model')
        raise NotImplementedError
        # return complete_vicuna(prompt, model, tokenizer)


def complete_chatgpt(messages):
    received = False
    temperature = 0
    while not received:
        try:
            response = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                messages=messages,
                temperature=temperature,
                frequency_penalty=0.2,
                n=None)
            raw_generated_text = response["choices"][0]["message"]['content']   
            received=True
        except:
            error = sys.exc_info()[0]
            if error == openai.error.InvalidRequestError: # something is wrong: e.g. prompt too long
                print(f"InvalidRequestError\nPrompt error.\n\n")
                print("prompt too long")
                return "prompt too long"
            if error == AssertionError:
                print("Assert error:", sys.exc_info()[1])
                # assert False
            else:
                print("API error:", error)
            time.sleep(1)
    return raw_generated_text#, messages


def complete_galactica_molecule(
    messages,
    model,
    tokenizer,
    round_index,
):
    with torch.no_grad():
        if round_index==0:
            input_text = messages[1]['content']
            input_text = input_text+" [START_I_SMILES]"
        else:
            input_text = ""
            for i in range(len(messages)-1):
                if i%2==0:
                    input_text+= messages[i+1]['content']+" [START_I_SMILES]"
                if i%2==1:
                    input_text+= messages[i+1]['content']+"[END_I_SMILES]"+"\n\n"
        input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to('cuda')
        outputs = model.generate(
            input_ids,
            max_new_tokens=100,
            do_sample=True,
            top_p=0.95,
            temperature=1.0,
            use_cache=True,
            top_k=50,
            repetition_penalty=1.0,
            length_penalty=1,
        )

        output_text = tokenizer.decode(outputs[0])
        output_text_list = output_text.split("[START_I_SMILES]")
        output_text = output_text_list[2+round_index*3].strip()
        output_text_list = output_text.split("[END_I_SMILES]")
        output_text = output_text_list[0].strip()

    return output_text


def complete_galactica_peptide(
    messages,
    model,
    tokenizer,
    round_index,
):
    with torch.no_grad():
        if round_index==0:
            input_text = messages[1]['content']
        else:
            print(messages)
            input_text = ""
            for i in range(len(messages)-1):
                if i%2==0:
                    input_text+= messages[i+1]['content']
                if i%2==1:
                    input_text+= messages[i+1]['content']+"\n\n"
        input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to('cuda')
        outputs = model.generate(
            input_ids,
            max_new_tokens=100,
            do_sample=True,
            top_p=0.95,
            temperature=1.0,
            use_cache=True,
            top_k=50,
            repetition_penalty=1.0,
            length_penalty=1,
        )

        output_text = tokenizer.decode(outputs[0])
        output_text_list = output_text.split("Answer:")
        output_text = output_text_list[1+round_index].strip()
        output_text_list = output_text.split("Question:")
        output_text = output_text_list[0].strip()

    return output_text


def complete_galactica_protein(
    messages,
    model,
    tokenizer,
    round_index,
):
    with torch.no_grad():
        if round_index==0:
            input_text = messages[1]['content']
            input_text = input_text+" [START_AMINO]"
        else:
            input_text = ""
            for i in range(len(messages)-1):
                if i%2==0:
                    input_text+= messages[i+1]['content']+" [START_AMINO]"
                if i%2==1:
                    input_text+= messages[i+1]['content']+"[END_AMINO]"+"\n\n"
        input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to('cuda')
        outputs = model.generate(
            input_ids,
            max_new_tokens=100,
            do_sample=True,
            top_p=0.95,
            temperature=1.0,
            use_cache=True,
            top_k=50,
            repetition_penalty=1.0,
            length_penalty=1,
        )

        output_text = tokenizer.decode(outputs[0])
        output_text_list = output_text.split("[START_AMINO]")
        output_text = output_text_list[2+round_index*3].strip()
        output_text_list = output_text.split("[END_AMINO]")
        output_text = output_text_list[0].strip()

    return output_text


B_INST, E_INST = "[INST]", "[/INST]"
B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
DEFAULT_SYSTEM_PROMPT = """\
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.

If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."""

def format_tokens(dialogs, tokenizer):
    prompt_tokens = []
    for dialog in dialogs:
        if dialog[0]["role"] != "system":
                dialog = [
                    {
                        "role": "system",
                        "content": DEFAULT_SYSTEM_PROMPT,
                    }
                ] + dialog
        dialog = [
            {
                "role": dialog[1]["role"],
                "content": B_SYS
                + dialog[0]["content"]
                + E_SYS
                + dialog[1]["content"],
            }
        ] + dialog[2:]
        assert all([msg["role"] == "user" for msg in dialog[::2]]) and all(
            [msg["role"] == "assistant" for msg in dialog[1::2]]
        ), (
            "model only supports 'system','user' and 'assistant' roles, "
            "starting with user and alternating (u/a/u/a/u...)"
        )
        """
        Please verify that yout tokenizer support adding "[INST]", "[/INST]" to your inputs.
        Here, we are adding it manually.
        """

        dialog_tokens = sum(
            [
                tokenizer.encode(
                    f"{B_INST} {(prompt['content']).strip()} {E_INST} {(answer['content']).strip()} ",
                )
                for prompt, answer in zip(dialog[::2], dialog[1::2])
            ],
            [],
        )
        assert (
            dialog[-1]["role"] == "user"
        ), f"Last message must be from user, got {dialog[-1]['role']}"
        dialog_tokens += tokenizer.encode(
            f"{B_INST} {(dialog[-1]['content']).strip()} {E_INST}",
        )
        prompt_tokens.append(dialog_tokens)
    return prompt_tokens


def complete_llama(
    dialogs,
    model,
    tokenizer,
    max_new_tokens =1024, #The maximum numbers of tokens to generate
    seed: int=42, #seed value for reproducibility
    do_sample: bool=True, #Whether or not to use sampling ; use greedy decoding otherwise.
    use_cache: bool=True,  #[optional] Whether or not the model should use the past last key/values attentions Whether or not the model should use the past last key/values attentions (if applicable to the model) to speed up decoding.
    top_p: float=0.95, # [optional] If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation.
    temperature: float=1.0, # [optional] The value used to modulate the next token probabilities.
    top_k: int=50, # [optional] The number of highest probability vocabulary tokens to keep for top-k-filtering.
    repetition_penalty: float=1.0, #The parameter for repetition penalty. 1.0 means no penalty.
    length_penalty: int=1, #[optional] Exponential penalty to the length that is used with beam-based generation.
    **kwargs
):
    # Set the seeds for reproducibility
    torch.cuda.manual_seed(seed)
    chats = format_tokens([dialogs], tokenizer)
    chat = chats[0]

    with torch.no_grad():
        tokens= torch.tensor(chat).long()
        tokens= tokens.unsqueeze(0)
        tokens= tokens.to("cuda:0")
        outputs = model.generate(
            tokens,
            max_new_tokens=max_new_tokens,
            do_sample=do_sample,
            top_p=top_p,
            temperature=temperature,
            use_cache=use_cache,
            top_k=top_k,
            repetition_penalty=repetition_penalty,
            length_penalty=length_penalty,
            **kwargs
        )

        output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
        output_text_list = output_text.split("[/INST]")
        output_text = output_text_list[-1].strip()

    return output_text

================================================
FILE: ChatDrug/task_and_evaluation/__init__.py
================================================
import numpy as np
from .small_molecule_editing import evaluate_molecule, task_specification_dict_molecule, parse_molecule, task2threshold_list
from .peptide_editing import evaluate_peptide, task_specification_dict_peptide, parse_peptide
from .protein_editing import evaluate_pairwise_list_result, load_ProteinDT_model, task_specification_dict_protein, parse_protein
from transformers import BertTokenizerFast

def task_to_drug(task):
    if task < 300:
        return 'molecule'
    elif task < 500:
        return 'peptide'
    elif task < 600:
        return 'protein'
    else:
        raise NotImplementedError
    

def get_task_specification_dict(task):
    if task < 300:
        return task_specification_dict_molecule
    elif task < 500:
        return task_specification_dict_peptide
    elif task < 600:
        return task_specification_dict_protein
    else:
        raise NotImplementedError


def parse(task, input_drug, generated_text, addition_drug=None):
    if task < 300:
        return parse_molecule(input_drug, generated_text, addition_drug)
    elif task < 500:
        return parse_peptide(input_drug, generated_text, addition_drug)
    elif task < 600:
        return parse_protein(input_drug, generated_text, addition_drug)
    else:
        raise NotImplementedError


def evaluate(input_drug, generated_drug, task, constraint, threshold_dict):
    if task<300:
        if constraint == 'loose':
            threshold_list = task2threshold_list[task][0]
        else:
            threshold_list = task2threshold_list[task][1]
        _, _, answer = evaluate_molecule(input_drug, generated_drug, task, threshold_list=threshold_list)
    elif task<400:
        task_specification_dict = get_task_specification_dict(task)
        _, _, target_allele_type = task_specification_dict[task]
        try:
            _, _, answer = evaluate_peptide([input_drug], [generated_drug], target_allele_type, [threshold_dict[target_allele_type]])
        except:
            return -1
        answer = answer[0]
    elif task<500:
        task_specification_dict = get_task_specification_dict(task)
        _, _, target_allele_type1, target_allele_type2 = task_specification_dict[task]
        try:
            _, _, answer1 = evaluate_peptide([input_drug], [generated_drug], target_allele_type1, [threshold_dict[target_allele_type1]])
            _, _, answer2 = evaluate_peptide([input_drug], [generated_drug], target_allele_type2, [threshold_dict[target_allele_type2]])
        except:
            return -1
        answer = np.logical_and(answer1, answer2)
        answer = answer[0]
    else:
        device = "cuda"
        chache_dir = "./data/protein_editing/temp_pretrained_ProteinDT"
        input_model_path = "./data/protein_editing/pytorch_model_ss3.bin"
        model = load_ProteinDT_model(input_model_path, chache_dir, mean_output=True, num_labels=3)
        model = model.to(device)
        tokenizer = BertTokenizerFast.from_pretrained("Rostlab/prot_bert_bfd", chache_dir=chache_dir, do_lower_case=False)
        _, _, answer = evaluate_pairwise_list_result(model=model, tokenizer=tokenizer, input_protein_list=[input_drug], output_protein_list=[generated_drug], task_id=task, device=device)
        answer = answer[0]

    return answer


================================================
FILE: ChatDrug/task_and_evaluation/peptide_editing.py
================================================
from collections import defaultdict
import re
import numpy as np
from mhcflurry import Class1PresentationPredictor

AMINO_ACIDS = ["A", "R", "N", "D", "C", "Q", "E", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V"]


task_specification_dict_peptide = {
    301: [
        "We want a peptide that binds to TARGET_ALLELE_TYPE. We have a peptide PEPTIDE_SEQUENCE that binds to SOURCE_ALLELE_TYPE, can you help modify it? The output peptide should be similar to input peptide.",
        "HLA-C*16:01", "HLA-B*44:02"
    ],
    302: [
        "We want a peptide that binds to TARGET_ALLELE_TYPE. We have a peptide PEPTIDE_SEQUENCE that binds to SOURCE_ALLELE_TYPE, can you help modify it? The output peptide should be similar to input peptide.",
        "HLA-B*08:01", "HLA-C*03:03"
    ],
    303: [
        "We want a peptide that binds to TARGET_ALLELE_TYPE. We have a peptide PEPTIDE_SEQUENCE that binds to SOURCE_ALLELE_TYPE, can you help modify it? The output peptide should be similar to input peptide.",
        "HLA-C*12:02", "HLA-B*40:01"
    ],
    304: [
        "We want a peptide that binds to TARGET_ALLELE_TYPE. We have a peptide PEPTIDE_SEQUENCE that binds to SOURCE_ALLELE_TYPE, can you help modify it? The output peptide should be similar to input peptide.",
        "HLA-A*11:01", "HLA-B*08:01"
    ],
    305: [
        "We want a peptide that binds to TARGET_ALLELE_TYPE. We have a peptide PEPTIDE_SEQUENCE that binds to SOURCE_ALLELE_TYPE, can you help modify it? The output peptide should be similar to input peptide.",
        "HLA-A*24:02", "HLA-B*08:01"
    ],
    306: [
        "We want a peptide that binds to TARGET_ALLELE_TYPE. We have a peptide PEPTIDE_SEQUENCE that binds to SOURCE_ALLELE_TYPE, can you help modify it? The output peptide should be similar to input peptide.",
        "HLA-C*12:02", "HLA-B*40:02"
    ],


    401: [
        "We want a peptide that binds to TARGET_ALLELE_TYPE_01 and TARGET_ALLELE_TYPE_02. We have a peptide PEPTIDE_SEQUENCE that binds to SOURCE_ALLELE_TYPE, can you help modify it? The output peptide should be similar to input peptide.",
        "HLA-A*29:02", "HLA-B*08:01", "HLA-C*15:02"
    ],
    402: [
        "We want a peptide that binds to TARGET_ALLELE_TYPE_01 and TARGET_ALLELE_TYPE_02. We have a peptide PEPTIDE_SEQUENCE that binds to SOURCE_ALLELE_TYPE, can you help modify it? The output peptide should be similar to input peptide.",
        "HLA-A*03:01", "HLA-B*40:02", "HLA-C*14:02"
    ],
    403: [
        "We want a peptide that binds to TARGET_ALLELE_TYPE_01 and TARGET_ALLELE_TYPE_02. We have a peptide PEPTIDE_SEQUENCE that binds to SOURCE_ALLELE_TYPE, can you help modify it? The output peptide should be similar to input peptide.",
        "HLA-C*14:02", "HLA-B*08:01", "HLA-A*11:01"
    ],
}


model_pretrained_checkpoint = "data/peptide/models_class1_presentation/models"
MHC_peptide_predictor = Class1PresentationPredictor.load(model_pretrained_checkpoint)
EPS = 1e-10


def parse_peptide(input_peptide, raw_text, retrieval_sequence):
    pattern = re.compile('[A-Z]{5,}')
    output_peptide_list = pattern.findall(raw_text)
    while input_peptide in output_peptide_list:
        output_peptide_list.remove(input_peptide)

    if retrieval_sequence!=None:
        while retrieval_sequence in output_peptide_list:
            output_peptide_list.remove(retrieval_sequence)

    if len(output_peptide_list) > 0:
        output_peptide = output_peptide_list[0]
        if len(output_peptide) < 16 and "X" not in output_peptide: 
            output_peptide = [output_peptide]
        else: 
            output_peptide = None
    else:
        output_peptide=[]
    return output_peptide


def evaluate_peptide(input_peptide_sequence_list, output_peptide_sequence_list, target_allele_type, threshold_list=[0.75]):
    input_df = MHC_peptide_predictor.predict(peptides=input_peptide_sequence_list, alleles=[target_allele_type], verbose=False)
    input_value = input_df["presentation_score"].to_list()
    input_value = np.array(input_value)

    output_df = MHC_peptide_predictor.predict(peptides=output_peptide_sequence_list, alleles=[target_allele_type], verbose=False)
    output_value = output_df["presentation_score"].to_list()
    output_value = np.array(output_value)

    flag = np.logical_and((output_value > input_value + EPS), (output_value > threshold_list[0]))

    return input_df, output_df, flag


def load_allele2protein_sequence(file_path):
    f = open(file_path, "r")
    allele2protein_sequence = {}
    for line in f.readlines()[1:]:
        line = line.strip()
        line = line.split(" ")
        allele = line[0]
        protein_sequence = line[1]
        if allele in allele2protein_sequence:
            continue
        allele2protein_sequence[allele] = protein_sequence
    return allele2protein_sequence


def load_selected_allele_list(file_path):
    f = open(file_path, "r")
    allele_list = []
    for line in f.readlines():
        allele_list.append(line.strip())
    return allele_list


def load_raw_allele2peptide(file_path):
    import pandas as pd
    df = pd.read_csv(file_path)
    allele_list = df["allele"].tolist()
    peptide_list = df["peptide"].tolist()
    allele2peptide = defaultdict(list)
    
    for allele, peptide in zip(allele_list, peptide_list):
        allele2peptide[allele].append(peptide)
    return allele2peptide


def load_processed_allele2peptide(file_path):
    import json
    f = open(file_path, "r")
    data = json.load(f)
    return data


================================================
FILE: ChatDrug/task_and_evaluation/prompt_specification.json
================================================
{"task_id": 101, "PPDS_prompt": "Can you make molecule [input SMILES] more soluble in water? The output molecule should be similar to the input molecule. Give me five molecules in SMILES only and list them using bullet points. No explanation is needed.", "ReDF_prompt": "Your provided sequence [output SMILES] is not correct. We find a sequence [retrieval SMILES] which is correct and similar to the molecule you provided. Can you give me a new molecule?"}
{"task_id": 102, "PPDS_prompt": "Can you make molecule [input SMILES] less soluble in water? The output molecule should be similar to the input molecule. Give me five molecules in SMILES only and list them using bullet points. No explanation is needed.", "ReDF_prompt": "Your provided sequence [output SMILES] is not correct. We find a sequence [retrieval SMILES] which is correct and similar to the molecule you provided. Can you give me a new molecule?"}
{"task_id": 103, "PPDS_prompt": "Can you make molecule [input SMILES] more like a drug? The output molecule should be similar to the input molecule. Give me five molecules in SMILES only and list them using bullet points. No explanation is needed.", "ReDF_prompt": "Your provided sequence [output SMILES] is not correct. We find a sequence [retrieval SMILES] which is correct and similar to the molecule you provided. Can you give me a new molecule?"}
{"task_id": 104, "PPDS_prompt": "Can you make molecule [input SMILES] less like a drug? The output molecule should be similar to the input molecule. Give me five molecules in SMILES only and list them using bullet points. No explanation is needed.", "ReDF_prompt": "Your provided sequence [output SMILES] is not correct. We find a sequence [retrieval SMILES] which is correct and similar to the molecule you provided. Can you give me a new molecule?"}
{"task_id": 105, "PPDS_prompt": "Can you make molecule [input SMILES] higher permeability? The output molecule should be similar to the input molecule. Give me five molecules in SMILES only and list them using bullet points. No explanation is needed.", "ReDF_prompt": "Your provided sequence [output SMILES] is not correct. We find a sequence [retrieval SMILES] which is correct and similar to the molecule you provided. Can you give me a new molecule?"}
{"task_id": 106, "PPDS_prompt": "Can you make molecule [input SMILES] lower permeability? The output molecule should be similar to the input molecule. Give me five molecules in SMILES only and list them using bullet points. No explanation is needed.", "ReDF_prompt": "Your provided sequence [output SMILES] is not correct. We find a sequence [retrieval SMILES] which is correct and similar to the molecule you provided. Can you give me a new molecule?"}
{"task_id": 107, "PPDS_prompt": "Can you make molecule [input SMILES] with more hydrogen bond acceptors? The output molecule should be similar to the input molecule. Give me five molecules in SMILES only and list them using bullet points. No explanation is needed.", "ReDF_prompt": "Your provided sequence [output SMILES] is not correct. We find a sequence [retrieval SMILES] which is correct and similar to the molecule you provided. Can you give me a new molecule?"}
{"task_id": 108, "PPDS_prompt": "Can you make molecule [input SMILES] with more hydrogen bond donors? The output molecule should be similar to the input molecule. Give me five molecules in SMILES only and list them using bullet points. No explanation is needed.", "ReDF_prompt": "Your provided sequence [output SMILES] is not correct. We find a sequence [retrieval SMILES] which is correct and similar to the molecule you provided. Can you give me a new molecule?"}
{"task_id": 201, "PPDS_prompt": "Can you make molecule [input SMILES] more soluble in water and more hydrogen bond acceptors? The output molecule should be similar to the input molecule. Give me five molecules in SMILES only and list them using bullet points. No explanation is needed.", "ReDF_prompt": "Your provided sequence [output SMILES] is not correct. We find a sequence [retrieval SMILES] which is correct and similar to the molecule you provided. Can you give me a new molecule?"}
{"task_id": 202, "PPDS_prompt": "Can you make molecule [input SMILES] less soluble in water and more hydrogen bond acceptors? The output molecule should be similar to the input molecule. Give me five molecules in SMILES only and list them using bullet points. No explanation is needed.", "ReDF_prompt": "Your provided sequence [output SMILES] is not correct. We find a sequence [retrieval SMILES] which is correct and similar to the molecule you provided. Can you give me a new molecule?"}
{"task_id": 203, "PPDS_prompt": "Can you make molecule [input SMILES] more soluble in water and more hydrogen bond donors? The output molecule should be similar to the input molecule. Give me five molecules in SMILES only and list them using bullet points. No explanation is needed.", "ReDF_prompt": "Your provided sequence [output SMILES] is not correct. We find a sequence [retrieval SMILES] which is correct and similar to the molecule you provided. Can you give me a new molecule?"}
{"task_id": 204, "PPDS_prompt": "Can you make molecule [input SMILES] less soluble in water and more hydrogen bond donors? The output molecule should be similar to the input molecule. Give me five molecules in SMILES only and list them using bullet points. No explanation is needed.", "ReDF_prompt": "Your provided sequence [output SMILES] is not correct. We find a sequence [retrieval SMILES] which is correct and similar to the molecule you provided. Can you give me a new molecule?"}
{"task_id": 205, "PPDS_prompt": "Can you make molecule [input SMILES] more soluble in water and higher permeability? The output molecule should be similar to the input molecule. Give me five molecules in SMILES only and list them using bullet points. No explanation is needed.", "ReDF_prompt": "Your provided sequence [output SMILES] is not correct. We find a sequence [retrieval SMILES] which is correct and similar to the molecule you provided. Can you give me a new molecule?"}
{"task_id": 206, "PPDS_prompt": "Can you make molecule [input SMILES] more soluble in water and lower permeability? The output molecule should be similar to the input molecule. Give me five molecules in SMILES only and list them using bullet points. No explanation is needed.", "ReDF_prompt": "Your provided sequence [output SMILES] is not correct. We find a sequence [retrieval SMILES] which is correct and similar to the molecule you provided. Can you give me a new molecule?"}
{"task_id": 301, "PPDS_prompt": "We want a peptide that binds to HLA-B*44:02. We have a peptide [input peptide] that binds to HLA-C*16:01, can you help modify it? The output peptide should be similar to input peptide. Please provide the possible modified peptide sequence only. No explanation is needed.", "ReDF_prompt": "Your provided sequence [output peptide] is not correct. We find a sequence [retrieval peptide] which is correct and similar to the peptide you provided. Can you give me a new peptide?"}
{"task_id": 302, "PPDS_prompt": "We want a peptide that binds to HLA-C*03:03. We have a peptide [input peptide] that binds to HLA-B*08:01, can you help modify it? The output peptide should be similar to input peptide. Please provide the possible modified peptide sequence only. No explanation is needed.", "ReDF_prompt": "Your provided sequence [output peptide] is not correct. We find a sequence [retrieval peptide] which is correct and similar to the peptide you provided. Can you give me a new peptide?"}
{"task_id": 303, "PPDS_prompt": "We want a peptide that binds to HLA-B*40:01. We have a peptide [input peptide] that binds to HLA-C*12:02, can you help modify it? The output peptide should be similar to input peptide. Please provide the possible modified peptide sequence only. No explanation is needed.", "ReDF_prompt": "Your provided sequence [output peptide] is not correct. We find a sequence [retrieval peptide] which is correct and similar to the peptide you provided. Can you give me a new peptide?"}
{"task_id": 304, "PPDS_prompt": "We want a peptide that binds to HLA-B*08:01. We have a peptide [input peptide] that binds to HLA-A*11:01, can you help modify it? The output peptide should be similar to input peptide. Please provide the possible modified peptide sequence only. No explanation is needed.", "ReDF_prompt": "Your provided sequence [output peptide] is not correct. We find a sequence [retrieval peptide] which is correct and similar to the peptide you provided. Can you give me a new peptide?"}
{"task_id": 305, "PPDS_prompt": "We want a peptide that binds to HLA-B*08:01. We have a peptide [input peptide] that binds to HLA-A*24:02, can you help modify it? The output peptide should be similar to input peptide. Please provide the possible modified peptide sequence only. No explanation is needed.", "ReDF_prompt": "Your provided sequence [output peptide] is not correct. We find a sequence [retrieval peptide] which is correct and similar to the peptide you provided. Can you give me a new peptide?"}
{"task_id": 306, "PPDS_prompt": "We want a peptide that binds to HLA-B*40:02. We have a peptide [input peptide] that binds to HLA-C*12:02, can you help modify it? The output peptide should be similar to input peptide. Please provide the possible modified peptide sequence only. No explanation is needed.", "ReDF_prompt": "Your provided sequence [output peptide] is not correct. We find a sequence [retrieval peptide] which is correct and similar to the peptide you provided. Can you give me a new peptide?"}
{"task_id": 401, "PPDS_prompt": "We want a peptide that binds to HLA-B*08:01 and HLA-C*15:02. We have a peptide [input peptide] that binds to HLA-A*29:02, can you help modify it? The output peptide should be similar to input peptide. Please provide the possible modified peptide sequence only. No explanation is needed.", "ReDF_prompt": "Your provided sequence [output peptide] is not correct. We find a sequence [retrieval peptide] which is correct and similar to the peptide you provided. Can you give me a new peptide?"}
{"task_id": 402, "PPDS_prompt": "We want a peptide that binds to HLA-B*40:02 and HLA-C*14:02. We have a peptide [input peptide] that binds to HLA-A*03:01, can you help modify it? The output peptide should be similar to input peptide. Please provide the possible modified peptide sequence only. No explanation is needed.", "ReDF_prompt": "Your provided sequence [output peptide] is not correct. We find a sequence [retrieval peptide] which is correct and similar to the peptide you provided. Can you give me a new peptide?"}
{"task_id": 403, "PPDS_prompt": "We want a peptide that binds to HLA-B*08:01 and HLA-A*11:01. We have a peptide [input peptide] that binds to HLA-C*14:02, can you help modify it? The output peptide should be similar to input peptide. Please provide the possible modified peptide sequence only. No explanation is needed.", "ReDF_prompt": "Your provided sequence [output peptide] is not correct. We find a sequence [retrieval peptide] which is correct and similar to the peptide you provided. Can you give me a new peptide?"}
{"task_id": 501, "PPDS_prompt": "We have a protein [input protein]. Can you update modify it by making more amino acids into the helix structure (secondary structure)? The input and output protein sequences should be similar but different. No explanation is needed.", "ReDF_prompt": "Your provided sequence [output protein] is not correct. We find a sequence [retrieval protein] which is correct and similar to the protein you provided. Can you give me a new protein?"}
{"task_id": 502, "PPDS_prompt": "We have a protein [input protein]. Can you update modify it by making more amino acids into the strand structure (secondary structure)? The input and output protein sequences should be similar but different. No explanation is needed.", "ReDF_prompt": "Your provided sequence [output protein] is not correct. We find a sequence [retrieval protein] which is correct and similar to the protein you provided. Can you give me a new protein?"}


================================================
FILE: ChatDrug/task_and_evaluation/protein_editing.py
================================================
import lmdb
import pickle as pkl
import numpy as np
import torch
from torch.utils.data import Dataset
import torch.nn.functional as F
import re
from transformers import BertTokenizerFast


def load_ProteinDT_model(input_model_path, chache_dir, mean_output, num_labels):
    from ChatDrug.TAPE_benchmark.models import BertForTokenClassification2

    model = BertForTokenClassification2.from_pretrained(
        "Rostlab/prot_bert_bfd",
        cache_dir=chache_dir,
        mean_output=mean_output,
        num_labels=num_labels,
    )

    # load model from checkpoint
    print("Loading protein model from {}...".format(input_model_path))
    state_dict = torch.load(input_model_path, map_location='cpu')
    missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False)
    print("missing keys: {}".format(missing_keys))
    print("unexpected keys: {}".format(unexpected_keys))
    
    return model


# load protein model
device = "cuda"
chache_dir = "data/protein/temp_pretrained_ProteinDT"
input_model_path = "data/protein/pytorch_model_ss3.bin"
protein_model = load_ProteinDT_model(input_model_path, chache_dir, mean_output=True, num_labels=3)
protein_model = protein_model.to(device)
protein_tokenizer = BertTokenizerFast.from_pretrained("Rostlab/prot_bert_bfd", chache_dir=chache_dir, do_lower_case=False)


task_specification_dict_protein = {
    501: "We have a protein PROTEIN_SEQUENCE_PLACEHOLDER. Can you update modify it by making more amino acids into the helix structure (secondary structure)? The input and output protein sequences should be similar but different.",
    502: "We have a protein PROTEIN_SEQUENCE_PLACEHOLDER. Can you update modify it by making more amino acids into the strand structure (secondary structure)? The input and output protein sequences should be similar but different.",
}


def parse_protein(input_protein, raw_text, retrieval_sequence):
    pattern = re.compile('[A-Z]{5,}')
    output_protein_list = pattern.findall(raw_text)
    while input_protein in output_protein_list:
        output_protein_list.remove(input_protein)

    if retrieval_sequence!=None:
        while retrieval_sequence in output_protein_list:
            output_protein_list.remove(retrieval_sequence)

    if len(output_protein_list) > 0:
        output_protein = output_protein_list[0][:1024]
        return [output_protein]
    else:
        return []


def pad_sequences(sequences, constant_value=0, dtype=None) -> np.ndarray:
    batch_size = len(sequences)
    shape = [batch_size] + np.max([seq.shape for seq in sequences], 0).tolist()

    if dtype is None:
        dtype = sequences[0].dtype

    if isinstance(sequences[0], np.ndarray):
        array = np.full(shape, constant_value, dtype=dtype)
    elif isinstance(sequences[0], torch.Tensor):
        array = torch.full(shape, constant_value, dtype=dtype)

    for arr, seq in zip(array, sequences):
        arrslice = tuple(slice(dim) for dim in seq.shape)
        arr[arrslice] = seq

    return array


class ProteinSecondaryStructureDataset(Dataset):
    def __init__(self, data_path, tokenizer, target='ss3'):
        self.tokenizer = tokenizer
        self.target = target
        self.ignore_index = -100

        env = lmdb.open(data_path, max_readers=1, readonly=True,
                        lock=False, readahead=False, meminit=False)

        with env.begin(write=False) as txn:
            num_examples = pkl.loads(txn.get(b'num_examples'))
        
        self.protein_sequence_list = []
        self.ss3_labels_list = []
        self.ss8_labels_list = []

        for index in range(num_examples):
            with env.begin(write=False) as txn:
                item = pkl.loads(txn.get(str(index).encode()))
            # print(item.keys())
            protein_sequence = item["primary"]
            ss3_labels = item["ss3"]
            ss8_labels = item["ss8"]
            protein_length = item["protein_length"]

            if len(protein_sequence) > 1024:
                protein_sequence = protein_sequence[:1024]
                ss3_labels = ss3_labels[:1024]
                ss8_labels = ss8_labels[:1024]
                
            self.protein_sequence_list.append(protein_sequence)
            self.ss3_labels_list.append(ss3_labels)
            self.ss8_labels_list.append(ss8_labels)
        
        if self.target == "ss3":
            self.labels_list = self.ss3_labels_list
            self.num_labels = 3
        else:
            self.labels_list = self.ss8_labels_list
            self.num_labels = 8
        return

    def __len__(self):
        return len(self.labels_list)

    def __getitem__(self, index: int):
        protein_sequence = self.protein_sequence_list[index]
        labels = self.labels_list[index]

        token_ids = self.tokenizer(list(protein_sequence), is_split_into_words=True, return_offsets_mapping=True, truncation=False, padding=True)
        token_ids = np.array(token_ids['input_ids'])
        input_mask = np.ones_like(token_ids)
        
        # pad with -1s because of cls/sep tokens
        labels = np.asarray(labels, np.int64)
        labels = np.pad(labels, (1, 1), 'constant', constant_values=self.ignore_index)

        return token_ids, input_mask, labels

    def collate_fn(self, batch):
        input_ids, input_mask, ss_label = tuple(zip(*batch))
        input_ids = torch.from_numpy(pad_sequences(input_ids, constant_value=self.tokenizer.pad_token_id))
        attention_mask = torch.from_numpy(pad_sequences(input_mask, constant_value=0))
        labels = torch.from_numpy(pad_sequences(ss_label, constant_value=self.ignore_index))

        output = {'input_ids': input_ids, 'attention_mask': attention_mask, 'labels': labels}
        return output


def tokenize_one_sequence(tokenizer, protein_sequence):
    token_ids = tokenizer(list(protein_sequence), is_split_into_words=True, return_offsets_mapping=True, truncation=False, padding=True)
    token_ids = np.array(token_ids['input_ids'])
    input_mask = np.ones_like(token_ids)
    return token_ids, input_mask


def tokenize_sequences(tokenizer, sequence_list, labels):
    ignore_index = -100

    input_sequence, output_sequence = sequence_list
    input_token_ids, input_attention_mask = tokenize_one_sequence(tokenizer, input_sequence)
    output_token_ids, output_attention_mask = tokenize_one_sequence(tokenizer, output_sequence)
    token_ids = [input_token_ids, output_token_ids]
    attention_mask = [input_attention_mask, output_attention_mask]

    labels = np.asarray(labels, np.int64)
    labels = np.pad(labels, (1, 1), 'constant', constant_values=ignore_index)
    labels = [labels, labels]  # just duplicate the labels

    token_ids = torch.from_numpy(pad_sequences(token_ids, constant_value=tokenizer.pad_token_id))
    attention_mask = torch.from_numpy(pad_sequences(attention_mask, constant_value=0))
    labels = torch.from_numpy(pad_sequences(labels, constant_value=ignore_index))

    return token_ids, attention_mask, labels


@torch.no_grad()
def evaluate_result(input_protein_sequence, output_protein_sequence, labels, task_id, device="cuda"):
    """
    sequence_list = [input_sequence, output_sequence]
    labels: ground-truth SS-3/SS-8 labels for input_sequence
    """
    sequence_list = [input_protein_sequence, output_protein_sequence]
    input_ids, attention_mask, labels = tokenize_sequences(protein_tokenizer, sequence_list, labels)

    input_ids = input_ids.to(device)
    attention_mask = attention_mask.to(device)
    labels = labels.to(device)

    output = protein_model(input_ids=input_ids, attention_mask=attention_mask, return_dict=True)
    logits = output.logits  # [2, seq_length, 3]
    predicted_labels = F.softmax(logits, dim=-1)  # [2, seq_length, 3]
    predicted_labels = predicted_labels.argmax(dim=-1)  # [2, seq_length]

    if task_id == 501:
        target_label = 0
    elif task_id == 502:
        target_label = 1

    input_predicted_labels, output_predicted_labels = predicted_labels
    input_attention_mask, output_attention_mask = attention_mask
    input_count = ((input_predicted_labels == target_label) * input_attention_mask).sum()
    output_count = ((output_predicted_labels == target_label) * output_attention_mask).sum()

    return input_count, output_count, output_count > input_count


class ProteinListDataset(Dataset):
    def __init__(self, protein_sequence_list, tokenizer, task_id):
        self.tokenizer = tokenizer
        self.ignore_index = -100
        self.protein_sequence_list = protein_sequence_list
        return

    def __len__(self):
        return len(self.protein_sequence_list)

    def __getitem__(self, index: int):
        protein_sequence = self.protein_sequence_list[index]

        token_ids = self.tokenizer(list(protein_sequence), is_split_into_words=True, return_offsets_mapping=True, truncation=False, padding=True)
        token_ids = np.array(token_ids['input_ids'])
        input_mask = np.ones_like(token_ids)

        return token_ids, input_mask

    def collate_fn(self, batch):
        input_ids, input_mask = tuple(zip(*batch))
        input_ids = torch.from_numpy(pad_sequences(input_ids, constant_value=self.tokenizer.pad_token_id))
        attention_mask = torch.from_numpy(pad_sequences(input_mask, constant_value=0))

        output = {'input_ids': input_ids, 'attention_mask': attention_mask}
        return output


@torch.no_grad()
def evaluate_pairwise_list_result(input_protein_list, output_protein_list, task_id, device="cuda"):
    from torch.utils.data import DataLoader

    batch_size = 16
    input_dataset = ProteinListDataset(input_protein_list, tokenizer=protein_tokenizer, task_id=task_id)
    input_dataloader = DataLoader(input_dataset, batch_size=batch_size, shuffle=False, num_workers=0, collate_fn=input_dataset.collate_fn)

    output_dataset = ProteinListDataset(output_protein_list, tokenizer=protein_tokenizer, task_id=task_id)
    output_dataloader = DataLoader(output_dataset, batch_size=batch_size, shuffle=False, num_workers=0, collate_fn=input_dataset.collate_fn)

    if task_id == 501:
        target_label = 0
    elif task_id == 502:
        target_label = 1

    def get_target_label_count_list(dataloader, target_label):
        count_list = []
        for batch in dataloader:
            input_ids, attention_mask = batch["input_ids"], batch["attention_mask"]
            input_ids = input_ids.to(device)
            attention_mask = attention_mask.to(device)
            output = protein_model(input_ids=input_ids, attention_mask=attention_mask, return_dict=True)

            logits = output.logits  # [B, seq_length, 3]
            predicted_labels = F.softmax(logits, dim=-1)  # [B, seq_length, 3]
            predicted_labels = predicted_labels.argmax(dim=-1)  # [B, seq_length]

            temp_count_list = ((predicted_labels == target_label) * attention_mask)
            temp_count_list = temp_count_list.sum(dim=1)  # [B]
            count_list.append(temp_count_list.detach().cpu().numpy())
        
        count_list = np.concatenate(count_list)
        print("count_list", count_list.shape)
        return count_list

    input_count_list = get_target_label_count_list(input_dataloader, target_label)
    output_count_list = get_target_label_count_list(output_dataloader, target_label)

    return input_count_list, output_count_list, output_count_list > input_count_list


@torch.no_grad()
def evaluate_fast_protein_dict(input_protein_list, task_id, device="cuda"):
    from torch.utils.data import DataLoader

    batch_size = 128
    input_dataset = ProteinListDataset(input_protein_list, tokenizer=protein_tokenizer, task_id=task_id)
    input_dataloader = DataLoader(input_dataset, batch_size=batch_size, shuffle=False, num_workers=0, collate_fn=input_dataset.collate_fn)

    if task_id == 501:
        target_label = 0
    elif task_id == 502:
        target_label = 1

    def get_target_label_count_list(dataloader, target_label):
        count_list = []
        for batch in dataloader:
            input_ids, attention_mask = batch["input_ids"], batch["attention_mask"]
            input_ids = input_ids.to(device)
            attention_mask = attention_mask.to(device)
            output = protein_model(input_ids=input_ids, attention_mask=attention_mask, return_dict=True)

            logits = output.logits  # [B, seq_length, 3]
            predicted_labels = F.softmax(logits, dim=-1)  # [B, seq_length, 3]
            predicted_labels = predicted_labels.argmax(dim=-1)  # [B, seq_length]

            temp_count_list = ((predicted_labels == target_label) * attention_mask)
            temp_count_list = temp_count_list.sum(dim=1)  # [B]
            count_list.append(temp_count_list.detach().cpu().numpy())
        
        count_list = np.concatenate(count_list)
        print("count_list", count_list.shape)
        return count_list

    input_count_list = get_target_label_count_list(input_dataloader, target_label)

    return input_count_list


@torch.no_grad()
def evaluate_fast_protein(input_protein_list, output_protein_list, task_id, dict_sequence, device="cuda"):
    from torch.utils.data import DataLoader

    batch_size = 1
    output_dataset = ProteinListDataset(output_protein_list, tokenizer=protein_tokenizer, task_id=task_id)
    output_dataloader = DataLoader(output_dataset, batch_size=batch_size, shuffle=False, num_workers=0, collate_fn=output_dataset.collate_fn)

    if task_id == 501:
        target_label = 0
    elif task_id == 502:
        target_label = 1

    def get_target_label_count_list(dataloader, target_label):
        count_list = []
        for batch in dataloader:
            input_ids, attention_mask = batch["input_ids"], batch["attention_mask"]
            input_ids = input_ids.to(device)
            attention_mask = attention_mask.to(device)
            output = protein_model(input_ids=input_ids, attention_mask=attention_mask, return_dict=True)

            logits = output.logits  # [B, seq_length, 3]
            predicted_labels = F.softmax(logits, dim=-1)  # [B, seq_length, 3]
            predicted_labels = predicted_labels.argmax(dim=-1)  # [B, seq_length]

            temp_count_list = ((predicted_labels == target_label) * attention_mask)
            temp_count_list = temp_count_list.sum(dim=1)  # [B]
            count_list.append(temp_count_list.detach().cpu().numpy())
        
        count_list = np.concatenate(count_list)
        print("count_list", count_list.shape)
        return count_list

    output_count_list = get_target_label_count_list(output_dataloader, target_label)

    input_count_list = []
    for sequence in input_protein_list:
        input_count_list.append(dict_sequence[sequence])

    return output_count_list > input_count_list

================================================
FILE: ChatDrug/task_and_evaluation/small_molecule_editing.py
================================================
from rdkit import Chem
from rdkit.Chem import AllChem, Descriptors
import re

props = ["MolLogP", "qed", "TPSA", "NumHAcceptors", "NumHDonors"]
prop_pred = [(n, func) for n, func in Descriptors.descList if n.split("_")[-1] in props]

prop2func = {}
for prop, func in prop_pred:
    prop2func[prop] = func


task_specification_dict_molecule = {
    101: "Can you make molecule SMILES_PLACEHOLDER more soluble in water? The output molecule should be similar to the input molecule.",
    102: "Can you make molecule SMILES_PLACEHOLDER less soluble in water? The output molecule should be similar to the input molecule.",
    103: "Can you make molecule SMILES_PLACEHOLDER more like a drug? The output molecule should be similar to the input molecule.",
    104: "Can you make molecule SMILES_PLACEHOLDER less like a drug? The output molecule should be similar to the input molecule.",
    105: "Can you make molecule SMILES_PLACEHOLDER higher permeability? The output molecule should be similar to the input molecule.",
    106: "Can you make molecule SMILES_PLACEHOLDER lower permeability? The output molecule should be similar to the input molecule.",
    107: "Can you make molecule SMILES_PLACEHOLDER with more hydrogen bond acceptors? The output molecule should be similar to the input molecule.",
    108: "Can you make molecule SMILES_PLACEHOLDER with more hydrogen bond donors? The output molecule should be similar to the input molecule.",

    201: "Can you make molecule SMILES_PLACEHOLDER more soluble in water and more hydrogen bond acceptors? The output molecule should be similar to the input molecule.",
    202: "Can you make molecule SMILES_PLACEHOLDER less soluble in water and more hydrogen bond acceptors? The output molecule should be similar to the input molecule.",
    203: "Can you make molecule SMILES_PLACEHOLDER more soluble in water and more hydrogen bond donors? The output molecule should be similar to the input molecule.",
    204: "Can you make molecule SMILES_PLACEHOLDER less soluble in water and more hydrogen bond donors? The output molecule should be similar to the input molecule.",
    205: "Can you make molecule SMILES_PLACEHOLDER more soluble in water and higher permeability? The output molecule should be similar to the input molecule.",
    206: "Can you make molecule SMILES_PLACEHOLDER more soluble in water and lower permeability? The output molecule should be similar to the input molecule.",
}


task2threshold_list = {
    101: [[0], [0.5]],
    102: [[0], [0.5]],
    103: [[0], [0.1]],
    104: [[0], [0.1]],
    105: [[0], [10]],
    106: [[0], [10]],
    107: [[0], [1]],
    108: [[0], [1]],

    201: [[0, 0], [0.5, 1]],
    202: [[0, 0], [0.5, 1]],
    203: [[0, 0], [0.5, 1]],
    204: [[0, 0], [0.5, 1]],
    205: [[0, 0], [0.5, 10]],
    206: [[0, 0], [0.5, 10]],
}


# def parse_molecule(input_sequence, raw_text, retrieval_sequence):
#     record=[]
#     for line in raw_text.strip().split("\n"):
#         line = line.strip()
#         # for same molecule return
#         line += " "
#         line = line.replace(input_sequence + " ", " ")
#         if retrieval_sequence != None:
#             line = line.replace(retrieval_sequence + " ", " ")
#         if line.startswith("- "):
#             output_SMILES = line[2:]
#             output_SMILES = output_SMILES.split(" ")[0]
#             record.append(output_SMILES)
#     while '' in record:
#         record.remove('')

#     return record


def parse_molecule(input_sequence, raw_text, retrieval_sequence):
    pattern = re.compile(r'[0-9BCOHNSOPrIFlanocs@+\.\-\[\]\(\)\\\/%=#$]{6,}')
    output_sequence_list = pattern.findall(raw_text)
    while input_sequence in output_sequence_list:
        output_sequence_list.remove(input_sequence)

    if retrieval_sequence!=None:
        while retrieval_sequence in output_sequence_list:
            output_sequence_list.remove(retrieval_sequence)

    if len(output_sequence_list) > 0:
        output_sequence = [output_sequence_list[0]]
    else:
        output_sequence=[]
    return output_sequence


def evaluate_molecule(input_SMILES, output_SMILES, task_id, threshold_list=[0]):
    input_mol = Chem.MolFromSmiles(input_SMILES)
    Chem.Kekulize(input_mol)

    try:
        output_mol = Chem.MolFromSmiles(output_SMILES)
        Chem.Kekulize(output_mol)
    except:
        # print("Invalid output SMILES: {}".format(output_SMILES))
        return None, None, -1

    if output_mol is None:
        # print("Invalid output SMILES: {}".format(output_SMILES))
        return None, None, -1

    elif task_id == 101:
        prop = "MolLogP"
        threshold = threshold_list[0]
        input_value = prop2func[prop](input_mol)
        output_value = prop2func[prop](output_mol)
        return input_value, output_value, output_value  + threshold < input_value
    
    elif task_id == 102:
        prop = "MolLogP"
        threshold = threshold_list[0]
        input_value = prop2func[prop](input_mol)
        output_value = prop2func[prop](output_mol)
        return input_value, output_value, output_value > input_value + threshold

    elif task_id == 103:
        prop = "qed"
        threshold = threshold_list[0]
        input_value = prop2func[prop](input_mol)
        output_value = prop2func[prop](output_mol)
        return input_value, output_value, output_value > input_value + threshold
    
    elif task_id == 104:
        prop = "qed"
        threshold = threshold_list[0]
        input_value = prop2func[prop](input_mol)
        output_value = prop2func[prop](output_mol)
        return input_value, output_value, output_value + threshold < input_value

    elif task_id == 105:
        prop = "TPSA"
        threshold = threshold_list[0]
        input_value = prop2func[prop](input_mol)
        output_value = prop2func[prop](output_mol)
        return input_value, output_value, output_value + threshold < input_value
    
    elif task_id == 106:
        prop = "TPSA"
        threshold = threshold_list[0]
        input_value = prop2func[prop](input_mol)
        output_value = prop2func[prop](output_mol)
        return input_value, output_value, output_value > input_value + threshold

    elif task_id == 107:
        prop = "NumHAcceptors"
        threshold = threshold_list[0]
        input_value = prop2func[prop](input_mol)
        output_value = prop2func[prop](output_mol)
        return input_value, output_value, output_value > input_value + threshold

    elif task_id == 108:
        prop = "NumHDonors"
        threshold = threshold_list[0]
        input_value = prop2func[prop](input_mol)
        output_value = prop2func[prop](output_mol)
        return input_value, output_value, output_value > input_value + threshold

    elif task_id == 201:
        input_value_01, output_value_01, result_01 = evaluate_molecule(input_SMILES, output_SMILES, 101, [threshold_list[0]])
        input_value_02, output_value_02, result_02 = evaluate_molecule(input_SMILES, output_SMILES, 107, [threshold_list[1]])
        return (input_value_01, input_value_02), (output_value_01, output_value_02), result_01 and result_02

    elif task_id == 202:
        input_value_01, output_value_01, result_01 = evaluate_molecule(input_SMILES, output_SMILES, 102, [threshold_list[0]])
        input_value_02, output_value_02, result_02 = evaluate_molecule(input_SMILES, output_SMILES, 107, [threshold_list[1]])
        return (input_value_01, input_value_02), (output_value_01, output_value_02), result_01 and result_02

    elif task_id == 203:
        input_value_01, output_value_01, result_01 = evaluate_molecule(input_SMILES, output_SMILES, 101, [threshold_list[0]])
        input_value_02, output_value_02, result_02 = evaluate_molecule(input_SMILES, output_SMILES, 108, [threshold_list[1]])
        return (input_value_01, input_value_02), (output_value_01, output_value_02), result_01 and result_02

    elif task_id == 204:
        input_value_01, output_value_01, result_01 = evaluate_molecule(input_SMILES, output_SMILES, 102, [threshold_list[0]])
        input_value_02, output_value_02, result_02 = evaluate_molecule(input_SMILES, output_SMILES, 108, [threshold_list[1]])
        return (input_value_01, input_value_02), (output_value_01, output_value_02), result_01 and result_02

    elif task_id == 205:
        input_value_01, output_value_01, result_01 = evaluate_molecule(input_SMILES, output_SMILES, 101, [threshold_list[0]])
        input_value_02, output_value_02, result_02 = evaluate_molecule(input_SMILES, output_SMILES, 105, [threshold_list[1]])
        return (input_value_01, input_value_02), (output_value_01, output_value_02), result_01 and result_02

    elif task_id == 206:
        input_value_01, output_value_01, result_01 = evaluate_molecule(input_SMILES, output_SMILES, 101, [threshold_list[0]])
        input_value_02, output_value_02, result_02 = evaluate_molecule(input_SMILES, output_SMILES, 106, [threshold_list[1]])
        return (input_value_01, input_value_02), (output_value_01, output_value_02), result_01 and result_02


================================================
FILE: ChatDrug_demo.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# ChatDrug Usage Guide\n",
    "This is a guide for ChatDrug usage."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Import Packages"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%capture\n",
    "import sys\n",
    "from ChatDrug.task_and_evaluation.Conversational_LLMs_utils import complete\n",
    "from utils import (\n",
    "    construct_PDDS_prompt, load_retrieval_DB, retrieve_and_feedback, load_thredhold\n",
    ")\n",
    "from ChatDrug.task_and_evaluation import get_task_specification_dict, evaluate, parse"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Conversation Function"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def demo_conversation(test_drug, task, C, constraint, retrieval_DB, threshold_dict, messages):\n",
    "    for round_index in range(C+1):\n",
    "        generated_text = complete(messages, 'chatgpt')\n",
    "        messages.append({\"role\": \"assistant\", \"content\": generated_text})\n",
    "        \n",
    "        print(\">>Start Conversation \" + str(round_index+1))\n",
    "        print(\"User: \" + messages[2*round_index+1][\"content\"])\n",
    "        print(\"ChatGPT: \" + generated_text)\n",
    "\n",
    "        if round_index < 1:\n",
    "            closest_drug = None\n",
    "        generated_drug_list = parse(task, test_drug, generated_text, closest_drug)\n",
    "\n",
    "        # Check Parsing Results\n",
    "        if generated_drug_list == None:\n",
    "            print(\">>Invalid Drug Generation\")\n",
    "            break\n",
    "        elif len(generated_drug_list) == 0:\n",
    "            print(\">>Drug Generation Error\")\n",
    "            break\n",
    "        else:\n",
    "            generated_drug = generated_drug_list[0]\n",
    "            print(\"Generated Result: \"+str(generated_drug))\n",
    "        \n",
    "        # Check Evaluation Results\n",
    "        answer = evaluate(test_drug, generated_drug, task, constraint, threshold_dict)\n",
    "\n",
    "        if answer == -1:\n",
    "            print(\">>Invalid Drug Generation\")\n",
    "            break\n",
    "\n",
    "        print('Evaluation result: '+str(answer))\n",
    "\n",
    "        if answer:\n",
    "            print(\">>Drug Generation Correct\")\n",
    "            break\n",
    "        else:\n",
    "            if round_index < C:\n",
    "                print('>>Start Retrieval ' + str(round_index+1))\n",
    "                try:\n",
    "                    closest_drug = retrieve_and_feedback(task, retrieval_DB, test_drug, generated_drug, constraint, threshold_dict)\n",
    "                except:\n",
    "                    error = sys.exc_info()\n",
    "                    if error[0] == Exception:\n",
    "                        print('>>Cannot Find Retrieval Result')\n",
    "                        break\n",
    "                    else:\n",
    "                        print('>>Invalid Drug Generation')\n",
    "                        break\n",
    "\n",
    "                print(\"Retrieval Result: \" + closest_drug)\n",
    "\n",
    "                prompt_ReDF = f'Your provided sequence {generated_drug} is not correct. We find a sequence {closest_drug} which is correct and similar to the {test_drug_type} you provided. Can you give me a new {test_drug_type}?'\n",
    "                messages.append({\"role\": \"user\", \"content\": prompt_ReDF})\n",
    "            else:\n",
    "                print(\">>Drug Generation Error\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Parameters\n",
    "The following blocks are ChatDrug parameters for molecule, peptide and protein respectively. Choose the drug type by running corresponding blocks. \n",
    "`test_drug`, `task` and `C` can also be changed by setting your own input durg, task id and conversation round number. Here are several examples:"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Molecule"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_drug_type = 'molecule'\n",
    "test_drug = 'O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O'\n",
    "task = 203\n",
    "C = 2\n",
    "constraint = 'loose'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Peptide"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_drug_type = 'peptide'\n",
    "test_drug = 'AAASHFFNL'\n",
    "task = 301\n",
    "C = 2 \n",
    "constraint = None"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Protein"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_drug_type = 'protein'\n",
    "test_drug = 'GAPVPVDENDEGLQRALQFAMAEYNRASNDKYSSRVVRVISAKRQLVSGIKYILQVEIGRTTCPKSSGDLQSCEFHDEPEMAKYTTCTFVVYSIPWLNQIKLLESKCQ'\n",
    "task = 501\n",
    "C = 2\n",
    "constraint = None"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Load Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "task_specification_dict = get_task_specification_dict(task)\n",
    "input_drug_list, retrieval_DB = load_retrieval_DB(task, seed=0)\n",
    "threshold_dict = load_thredhold(test_drug_type)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Start ChatDrug"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      ">>Start ChatDrug\n",
      "System: You are an expert in the field of molecular chemistry.\n",
      ">>Start Conversation 1\n",
      "User: Can you make molecule O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O more soluble in water and more hydrogen bond donors? The output molecule should be similar to the input molecule. Give me five molecules in SMILES only and list them using bullet points. No explanation is needed.\n",
      "ChatGPT: Sure, here are five molecules in SMILES that are similar to the input molecule but have increased solubility in water and more hydrogen bond donors:\n",
      "\n",
      "- O=C(NC[C@H]1CCCO1)c1ccccc1N1CCOCC1=O\n",
      "- O=C(NC[C@H]1CCCO1)c1ccccc1N1CCNCC1=O\n",
      "- O=C(NC[C@H]1CCCO1)c1ccccc1N1CC(O)CC1=O\n",
      "- O=C(NC[C@H]1CCCO1)c1ccccc1N1CCC(O)C1=O\n",
      "- O=C(NC[C@H]1CCCO1)c1ccccc1N1CC(O)(C)C1=O\n",
      "Generated Result: O=C(NC[C@H]1CCCO1)c1ccccc1N1CCOCC1=O\n",
      "Evaluation result: False\n",
      ">>Start Retrieval 1\n",
      "Retrieval Result: O=C(NC[C@@H]1CCCO1)C(=O)Nc1cnc(-c2ccccc2)s1\n",
      ">>Start Conversation 2\n",
      "User: Your provided sequence O=C(NC[C@H]1CCCO1)c1ccccc1N1CCOCC1=O is not correct. We find a sequence O=C(NC[C@@H]1CCCO1)C(=O)Nc1cnc(-c2ccccc2)s1 which is correct and similar to the molecule you provided. Can you give me a new molecule?\n",
      "ChatGPT: I apologize for the mistake. Here is a new molecule that is similar to the input molecule but has increased solubility in water and more hydrogen bond donors:\n",
      "\n",
      "- O=C(NC[C@H]1CCCO1)c1ccccc1N1CC(O)C(O)C1=O\n",
      "Generated Result: O=C(NC[C@H]1CCCO1)c1ccccc1N1CC(O)C(O)C1=O\n",
      "Evaluation result: True\n",
      ">>Drug Generation Correct\n"
     ]
    }
   ],
   "source": [
    "print(\">>Start ChatDrug\")\n",
    "\n",
    "# ChatGPT message\n",
    "messages = [{\"role\": \"system\", \"content\": \"You are an expert in the field of molecular chemistry.\"}]\n",
    "print(\"System: You are an expert in the field of molecular chemistry.\")\n",
    "\n",
    "# PDDS\n",
    "PDDS_prompt = construct_PDDS_prompt(task_specification_dict, test_drug, test_drug_type, task)\n",
    "messages.append({\"role\": \"user\", \"content\": PDDS_prompt})\n",
    "\n",
    "# Conversation\n",
    "demo_conversation(test_drug, task, C, constraint, retrieval_DB, threshold_dict, messages)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "ChatDrug-public",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.12"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}


================================================
FILE: README.md
================================================
# Conversational Drug Editing Using Retrieval and Domain Feedback

**ICLR 2024**

Authors: Shengchao Liu<sup>+</sup>, Jiongxiao Wang<sup>+</sup>, Yijin Yang, Chengpeng Wang, Ling Liu, Hongyu Guo<sup>\*</sup>, Chaowei Xiao<sup>\*</sup>

<sup>+</sup> Equal contribution<br>
<sup>\*</sup> Equal advising

[[Paper](https://openreview.net/forum?id=yRrPfKyJQ2)]
[[Project Page](https://chao1224.github.io/ChatDrug)]
[[ArXiv](https://arxiv.org/abs/2305.18090)]

<p align="center">
  <img src="figure/pipeline.png" /> 
</p>


ChatDrug is for conversational drug editing, and three types of drugs are considered:
- Small Molecules
- Peptides
- Proteins
<p align="left">
  <img src="figure/final_demo.gif" width="100%" /> 
</p>

## Environment

Setup the anaconda (skip this if you already have conda)
 ```bash
wget https://repo.continuum.io/archive/Anaconda3-2019.10-Linux-x86_64.sh
bash Anaconda3-2019.10-Linux-x86_64.sh -b
export PATH=$PWD/anaconda3/bin:$PATH
```

Then download the required python packages:
```bash
conda create -n ChatDrug python=3.8
conda activate ChatDrug
pip install rdkit-pypi==2022.9.4
conda install -y numpy networkx scikit-learn
conda install -y -c conda-forge -c pytorch pytorch=1.9.1

pip install tensorflow
pip install mhcflurry
pip install levenshtein

pip install transformers
pip install lmdb
pip install seqeval
pip install openai
pip install fastchat
pip install psutil
pip install accelerate

pip install -e .
```

## Dataset

We provide the dataset in [this link](https://huggingface.co/datasets/chao1224/ChatDrug_data). You can manually download and move to the `data` folder or using the following python script.
```
from huggingface_hub import snapshot_download

snapshot_download(repo_id="chao1224/ChatDrug_data", repo_type="dataset", local_dir="data", local_dir_use_symlinks=False, ignore_patterns=["README.md"])
```
Please give credits to the original papers. For more details of dataset, please check the [data folder](./data).

## Evaluation

The evaluation metrics for three editing tasks are below:
| Drug Type | Evaluation |
| -- | -- |
| Small Molecule | RDKit (`conda install -y -c rdkit rdkit`)|
| Peptide | [MHCFlurry](https://github.com/openvax/mhcflurry)|
| Protein | [ProteinDT paper](https://arxiv.org/abs/2302.04611), [checkpoints](https://huggingface.co/chao1224/ProteinCLAP_pretrain_EBM_NCE_downstream_property_prediction) |

For evaluation on peptides and proteins, please read the following instructions:
- For peptides (MHCFlurry), please run the following bash commands:
```
> pip install mhcflurry
> mhcflurry-downloads fetch models_class1_presentation
> mhcflurry-downloads path models_class1_presentation
$PATH
> mv $PATH data/peptide/models_class1_presentation
```
- For proteins (ProteinDT / ProteinCLAP), please run the following python script:
```
from huggingface_hub import hf_hub_download

hf_hub_download(
  repo_id="chao1224/ProteinCLAP_pretrain_EBM_NCE_downstream_property_prediction",
  repo_type="model",
  filename="pytorch_model_ss3.bin",
  cache_dir="data/protein")
```
Please give credits to the original papers. For more details of evaluation, please check the [data folder](./data).

## Prompt for Drug Editing

All the task prompts are defined in `ChatDrug/task_and_evaluation`. you can also find it on [the hugging face link](https://huggingface.co/datasets/chao1224/ChatDrug_prompt).

## Usage

Please provide your OpenAI API Key in `ChatDrug/task_and_evaluation/Conversational_LLMs_utils.py`

To use ChatDrug, please use the following command:
```
python main_ChatDrug.py --task task_id --log_file results/ChatDrug.log --record_file results/ChatDrug.json --C 2
```
Results will be saved in `results/`.

For protein editing tasks, multiple evaluation times in retrieval process would consume a lot of time. Thus, we provide a fast version of conversation setting. Running the following command to implement accelerate ChatDrug for protein editing tasks:
```
python main_ChatDrug.py --task task_id --log_file results/ChatDrug_fast_protein.log --record_file results/ChatDrug_fast_protein.json --C 2 --fast_protein
```

We also provide code for In-Context Learning setting:
```
python main_InContext.py --task task_id --log_file results/InContext.log --record_file results/InContext.json
```


## Cite Us
Feel free to cite this work if you find it useful to you!

```
@inproceedings{liu2024chatdrug,
    title={Conversational Drug Editing Using Retrieval and Domain Feedback},
    author={Shengchao Liu, Jiongxiao Wang, Yijin Yang, Chengpeng Wang, Ling Liu, Hongyu Guo, Chaowei Xiao},
    booktitle={The Twelfth International Conference on Learning Representations},
    year={2024},
    url={https://openreview.net/forum?id=yRrPfKyJQ2}
}
```


================================================
FILE: data/README.md
================================================

# Dataset and Evaluation Preparation for ChatDrug

First please make and go to the `data` folder:
```
mkdir -p data
cd data
```

And then do the following for dataset and evaluation preparation.

## Small Molecule Editing

- For small molecule editing dataset, please check `small_molecule_editing.txt`. Credit to [MoleculeSTM paper](https://arxiv.org/abs/2212.10789).
- For the retrieval database, please use the ZINC250K dataset from [here](https://github.com/aspuru-guzik-group/chemical_vae/blob/main/models/zinc/250k_rndm_zinc_drugs_clean_3.csv).

## Peptide Editing

- Both the editing and retrieval dataset can be found in [this repo](https://github.com/minrq/pMHC).
- We provide most of the pretrained datasets in `peptide`. You only need to download the `Data_S3.csv` from [this link](https://github.com/minrq/pMHC/blob/main/data/mhcflurry/Data_S3.csv).
- If you want to do the data preprocessing yourself, please refer to the following:
```
cd peptide
python preprocess_step_1_data_extraction.py
python preprocess_step_2_single_prop.py
python preprocess_step_3_multi_prop.py
```

## Protein Editing

- Download dataset from [this google drive](https://drive.google.com/file/d/11szX_dd8NdHfKG5zaMNSDMwP6Vk6EX90/view?usp=share_link).
- Unzip to `protein` folder.
- This includes both the editing and retrieval dataset.
- For evaluation, please download `pytorch_model_ss3.bin` from [this link](https://huggingface.co/chao1224/ProteinCLAP_pretrain_EBM_NCE_downstream_property_prediction). Credit to [ProteinDT](https://arxiv.org/abs/2302.04611).

```
.
├── peptide
│   ├── class1_pseudosequences.csv
│   ├── Data_S3.csv
│   ├── models_class1_presentation
│   │   ├── 10755300.stderr
│   │   .
│   │   .
│   │   .
│   │   └── train_data.csv.bz2
│   ├── peptide_editing.json
│   ├── peptide_editing.json
│   ├── peptide_editing_threshold.json
│   ├── preprocess_step_1_data_extraction.py
│   ├── preprocess_step_2_single_prop.py
│   ├── preprocess_step_3_multi_prop.py
│   └── selected_alleles.txt
├── protein
│   ├── downstream_datasets
│   │   └── secondary_structure
│   │       ├── secondary_structure_casp12.lmdb
│   │       │   ├── data.mdb
│   │       │   └── lock.mdb
│   │       ├── secondary_structure_cb513.lmdb
│   │       │   ├── data.mdb
│   │       │   └── lock.mdb
│   │       ├── secondary_structure_train.lmdb
│   │       │   ├── data.mdb
│   │       │   └── lock.mdb
│   │       ├── secondary_structure_ts115.lmdb
│   │       │   ├── data.mdb
│   │       │   └── lock.mdb
│   │       └── secondary_structure_valid.lmdb
│   │           ├── data.mdb
│   │           └── lock.mdb
│   ├── pytorch_model_ss3.bin
│   └── pytorch_model_ss8.bin
├── README.md
└── small_molecule
    ├── 250k_rndm_zinc_drugs_clean_3.csv
    └── small_molecule_editing.txt
```


================================================
FILE: data/peptide/class1_pseudosequences.csv
================================================
allele pseudosequence
BoLA-100901 YYSMYREISENVYGSNLYLLYRDYTWEYLNYRWY
BoLA-100902 YYSEYREISENVYESNLYLLYRDYTWEYLNYRWY
BoLA-101901 YHTKYREISENVYGSNLYYDYDYYTWAVFNYRGY
BoLA-102001 YHTKYREISENVYGSNLYFLYMDYTWAVFNYRGY
BoLA-102101 YYTKYREISENVYGSNLYFQFRYYTWADFNYEGY
BoLA-102301 YYSEYREISENVYESNLYIAYSDYTWEYLNYRWY
BoLA-102801 YYTKYREISEKLYENTLYLQFRYYTWADFNYEWY
BoLA-102901 YYTRYREISENLYKNTAYITFMYYTWANENYRGY
BoLA-103101 YYTKYDEISENLYKNTLYIAFRDYTWAYLNYTWY
BoLA-103102 YYTKYDEISENLYKDTLYIAFRDYTWAYLNYTWY
BoLA-104201 YHTKYDEISENLYKDTLYIAYRDYTWEYLNYRGY
BoLA-104901 YYAEYREISDTSFVGTLYIEYEYYTWAYLNYEGY
BoLA-106101 YYTIYREISENVYESNLYFRYDFYTWADFNYRWY
BoLA-106701 YYAMYEMDAEDRSLCTLYFQFTFYTWAAFNYTWY
BoLA-107401 YYTKYREISENLYKNTAYLRFSFYTWAAENYRGY
BoLA-1:00901 YYSMYREISENVYGSNLYLLYRDYTWEYLNYRWY
BoLA-1:00902 YYSEYREISENVYESNLYLLYRDYTWEYLNYRWY
BoLA-1:01901 YHTKYREISENVYGSNLYYDYDYYTWAVFNYRGY
BoLA-1:02001 YHTKYREISENVYGSNLYFLYMDYTWAVFNYRGY
BoLA-1:02101 YYTKYREISENVYGSNLYFQFRYYTWADFNYEGY
BoLA-1:02301 YYSEYREISENVYESNLYIAYSDYTWEYLNYRWY
BoLA-1:02801 YYTKYREISEKLYENTLYLQFRYYTWADFNYEWY
BoLA-1:02901 YYTRYREISENLYKNTAYITFMYYTWANENYRGY
BoLA-1:03101 YYTKYDEISENLYKNTLYIAFRDYTWAYLNYTWY
BoLA-1:03102 YYTKYDEISENLYKDTLYIAFRDYTWAYLNYTWY
BoLA-1:04201 YHTKYDEISENLYKDTLYIAYRDYTWEYLNYRGY
BoLA-1:04901 YYAEYREISDTSFVGTLYIEYEYYTWAYLNYEGY
BoLA-1:06101 YYTIYREISENVYESNLYFRYDFYTWADFNYRWY
BoLA-1:06701 YYAMYEMDAEDRSLCTLYFQFTFYTWAAFNYTWY
BoLA-1:07401 YYTKYREISENLYKNTAYLRFSFYTWAAENYRGY
BoLA-200501 YYAEYRNIYDTIFVDTLYIAYWFYTWAAWNYEWY
BoLA-200601 YSAEYRNIYDTTFVYTLYLWSMFYTWANENYEWY
BoLA-200602 YSAEYRNIYDTTFVYTLYLWSMFYTWANENYEWH
BoLA-200801 YLIMYRENSETTFANTAYVEYMDYTWADWNYRWY
BoLA-200802 YLIMYRENSETTFANTAYVEYMDYTWADWNYRGY
BoLA-201201 YYATYRENFDTTFVDTLYIAYRDYTWAEHNYTWY
BoLA-201601 YSAEYRNIYDTTFVYTLYLWSMFYTWANENYEGY
BoLA-201602 YSAEYRNIYDTTFVYTLYLWSMFYTWANENYEWY
BoLA-201801 YYADYRNIYDTIFANTAYFEYMFYTWAEQNYRGY
BoLA-201802 YYADYRNIYDTIFANTAYFEYMFYTWAEQNYRGY
BoLA-202201 YHSEYEQIVDTSFVGTLYLLYEDYTRAALNYTGY
BoLA-202501 YSAEYRNIYDTTFVYALYLWSWFYTWAAENYRGY
BoLA-202601 YYAEYREISETTFVDTLYIEYEYYTWAYLNYRGY
BoLA-202602 YYAEYREISETTFVDTLYIEYEYYTWAYLNYRGY
BoLA-202603 YYAEYREISETTFVDTLYIEYEYYTWAYLNYRGY
BoLA-203001 YYSEYRNIYDTNFVSNLYLWSWFYTWANENYEWY
BoLA-203202 YYATYRENLGATFVDTLYIEYRDYTWAYLNYTWY
BoLA-204301 YSEMYRERAGNTFVNTLYIWYRDYTWAVFNYLGY
BoLA-204401 YYAMYEEKADTTFVDTLYIAYRDYTWAVFNYLGY
BoLA-204402 YYAMYEEKADTTFVDTLYIWYRDYTWAVFNYLGY
BoLA-204501 YYATYRENLDTTFVDTLYIEYRDYTWAEFNYLGY
BoLA-204601 YSEMYRERAGNTFVNTLYIWYRDYTWAEQNYTWY
BoLA-204701 YSEMYQERAGNTFVDTLYLWYMDYTWAEQNYTWY
BoLA-204801 YYSEYEQIVDTSFVGTLYLLYMDYTRAAQNYRGY
BoLA-205401 YYIMYQENSGATFANTLYFWYWFYTWANENYRGY
BoLA-205501 YYAEYREISETTFVDSLYIAYRDYTWAYLNYRGY
BoLA-205601 YYATYQENFDATFANTLYFLSTYYTWEAHNYRGY
BoLA-205701 YYIMYREISETTFVDTLYIEYDFYTWEYLNYRGY
BoLA-206001 YSAEYRNIYDTTFVYTLYLWSWFYTWANGNYEGY
BoLA-206201 YYATYQEIQENTFANTLYIEYRDYTWAYFNYRWY
BoLA-206901 YYSEYEQIVDTSFVNTLYLWYRDYTWEAENYRWY
BoLA-207001 YYATYRENLDATFVNTLYLWYRDYTWAERNYRWY
BoLA-207101 YYATYRENLGATFVDTLYIAYSDYTWAEFNYRGY
BoLA-2:00501 YYAEYRNIYDTIFVDTLYIAYWFYTWAAWNYEWY
BoLA-2:00601 YSAEYRNIYDTTFVYTLYLWSMFYTWANENYEWY
BoLA-2:00602 YSAEYRNIYDTTFVYTLYLWSMFYTWANENYEWH
BoLA-2:00801 YLIMYRENSETTFANTAYVEYMDYTWADWNYRWY
BoLA-2:00802 YLIMYRENSETTFANTAYVEYMDYTWADWNYRGY
BoLA-2:01201 YYATYRENFDTTFVDTLYIAYRDYTWAEHNYTWY
BoLA-2:01601 YSAEYRNIYDTTFVYTLYLWSMFYTWANENYEGY
BoLA-2:01602 YSAEYRNIYDTTFVYTLYLWSMFYTWANENYEWY
BoLA-2:01801 YYADYRNIYDTIFANTAYFEYMFYTWAEQNYRGY
BoLA-2:01802 YYADYRNIYDTIFANTAYFEYMFYTWAEQNYRGY
BoLA-2:02201 YHSEYEQIVDTSFVGTLYLLYEDYTRAALNYTGY
BoLA-2:02501 YSAEYRNIYDTTFVYALYLWSWFYTWAAENYRGY
BoLA-2:02601 YYAEYREISETTFVDTLYIEYEYYTWAYLNYRGY
BoLA-2:02602 YYAEYREISETTFVDTLYIEYEYYTWAYLNYRGY
BoLA-2:02603 YYAEYREISETTFVDTLYIEYEYYTWAYLNYRGY
BoLA-2:03001 YYSEYRNIYDTNFVSNLYLWSWFYTWANENYEWY
BoLA-2:03202 YYATYRENLGATFVDTLYIEYRDYTWAYLNYTWY
BoLA-2:04301 YSEMYRERAGNTFVNTLYIWYRDYTWAVFNYLGY
BoLA-2:04401 YYAMYEEKADTTFVDTLYIAYRDYTWAVFNYLGY
BoLA-2:04402 YYAMYEEKADTTFVDTLYIWYRDYTWAVFNYLGY
BoLA-2:04501 YYATYRENLDTTFVDTLYIEYRDYTWAEFNYLGY
BoLA-2:04601 YSEMYRERAGNTFVNTLYIWYRDYTWAEQNYTWY
BoLA-2:04701 YSEMYQERAGNTFVDTLYLWYMDYTWAEQNYTWY
BoLA-2:04801 YYSEYEQIVDTSFVGTLYLLYMDYTRAAQNYRGY
BoLA-2:05401 YYIMYQENSGATFANTLYFWYWFYTWANENYRGY
BoLA-2:05501 YYAEYREISETTFVDSLYIAYRDYTWAYLNYRGY
BoLA-2:05601 YYATYQENFDATFANTLYFLSTYYTWEAHNYRGY
BoLA-2:05701 YYIMYREISETTFVDTLYIEYDFYTWEYLNYRGY
BoLA-2:06001 YSAEYRNIYDTTFVYTLYLWSWFYTWANGNYEGY
BoLA-2:06201 YYATYQEIQENTFANTLYIEYRDYTWAYFNYRWY
BoLA-2:06901 YYSEYEQIVDTSFVNTLYLWYRDYTWEAENYRWY
BoLA-2:07001 YYATYRENLDATFVNTLYLWYRDYTWAERNYRWY
BoLA-2:07101 YYATYRENLGATFVDTLYIAYSDYTWAEFNYRGY
BoLA-300101 YSEMYRERAGNFFVSNLYLWSMFYSMAEQNYRWY
BoLA-300102 YSSMYRERAGNFFVSNLYLWSMFYSMAEQNYRWY
BoLA-300103 YSIMYRERAGNFFVSNLYLWSMFYSMAEQNYRWY
BoLA-300201 YLEMYQEKAGNFFVSNLYLLSMFYSMAEQNYRWY
BoLA-300401 YSEMYRNNAGNSFVNTLYLWSMYYTWAYQNYEWY
BoLA-300402 YSEMYRNNAGNSFVNTLYLWSMYYTWAYQNYEWY
BoLA-300403 YSEMYRNNAGNSFVNTLYLWSMYYTWAYQNYEWY
BoLA-301001 YYSEYRNIYDTTFVDTLYLEYEYYSVAEFNYRGY
BoLA-301101 YSEMYQEKAGTTFANIAYFWYMYYTWAEQNYTWY
BoLA-301701 YSEMYRERAGNIFVSNLYFWYEYYTWAAQNYRWY
BoLA-301702 YSEMYRERAGNIFVSNLYFWYMYYTWAAQNYRWY
BoLA-301703 YSEMYRERAGNIFVSNLYFWYMYYTWAEQNYRWY
BoLA-302701 YSEMYRNNAGNSFVGTLYLWSMYYTWEYQNYEWH
BoLA-302702 YSEMYRNNAGNSFVGTLYLWSMYYTWEYQNYEWH
BoLA-303501 YYNMYQENAGNTFVGTLYLWSEFYTWAAHNYTWY
BoLA-303601 YYAMYRNNADATFVNTLYFLYEYYTVADHNYRWY
BoLA-303701 YSEMYRNNAGNSFVGTLYLLYMDYSRAVQNYRWY
BoLA-303801 YNEMYRNNAGNDSVGTLYLWYMYYSMAVQNYTWY
BoLA-305001 YSEMYRNNAGNTFGSNLYFLYTYYTWAEWNYTWH
BoLA-305002 YSEMYRNNAGNTFGSNLYFWYMYYTWAEQNYTWH
BoLA-305101 YSEMYRERAGNTFVNTLYIWYRDYTWAAENYTWY
BoLA-305201 YYSMYRENSDTGFVDTLYLLYTYYSVAVQNYRWY
BoLA-305301 YSEMYRNNAGNSFVNTLYLWSMYYTWAYQNYEWY
BoLA-305801 YSEMYRERAGNTFVGTLYLWYMDYSRAVQNYRWY
BoLA-305901 YSEMYRNNAGNSFVGTLYLWSMFYTWEYQNYRWH
BoLA-306501 YSEMYQEKAGTSSVGTLYLAYMFYSMAVQNYEWY
BoLA-306601 YYEMYQEKADTTFVDTLYLLYTYYSMAEFNYTWY
BoLA-306602 YYEMYQEKADTTFVDTLYLLYTFYSMAEFNYTWY
BoLA-306801 YSIVYQNNAGTTFANTLYLLYMYYTWAAHNYEWY
BoLA-307301 YYIIYQEISDTSFVSNLYLWYTYYSMAVQNYEWY
BoLA-3:00101 YSEMYRERAGNFFVSNLYLWSMFYSMAEQNYRWY
BoLA-3:00102 YSSMYRERAGNFFVSNLYLWSMFYSMAEQNYRWY
BoLA-3:00103 YSIMYRERAGNFFVSNLYLWSMFYSMAEQNYRWY
BoLA-3:00201 YLEMYQEKAGNFFVSNLYLLSMFYSMAEQNYRWY
BoLA-3:00401 YSEMYRNNAGNSFVNTLYLWSMYYTWAYQNYEWY
BoLA-3:00402 YSEMYRNNAGNSFVNTLYLWSMYYTWAYQNYEWY
BoLA-3:00403 YSEMYRNNAGNSFVNTLYLWSMYYTWAYQNYEWY
BoLA-3:01001 YYSEYRNIYDTTFVDTLYLEYEYYSVAEFNYRGY
BoLA-3:01101 YSEMYQEKAGTTFANIAYFWYMYYTWAEQNYTWY
BoLA-3:01701 YSEMYRERAGNIFVSNLYFWYEYYTWAAQNYRWY
BoLA-3:01702 YSEMYRERAGNIFVSNLYFWYMYYTWAAQNYRWY
BoLA-3:01703 YSEMYRERAGNIFVSNLYFWYMYYTWAEQNYRWY
BoLA-3:02701 YSEMYRNNAGNSFVGTLYLWSMYYTWEYQNYEWH
BoLA-3:02702 YSEMYRNNAGNSFVGTLYLWSMYYTWEYQNYEWH
BoLA-3:03501 YYNMYQENAGNTFVGTLYLWSEFYTWAAHNYTWY
BoLA-3:03601 YYAMYRNNADATFVNTLYFLYEYYTVADHNYRWY
BoLA-3:03701 YSEMYRNNAGNSFVGTLYLLYMDYSRAVQNYRWY
BoLA-3:03801 YNEMYRNNAGNDSVGTLYLWYMYYSMAVQNYTWY
BoLA-3:05001 YSEMYRNNAGNTFGSNLYFLYTYYTWAEWNYTWH
BoLA-3:05002 YSEMYRNNAGNTFGSNLYFWYMYYTWAEQNYTWH
BoLA-3:05101 YSEMYRERAGNTFVNTLYIWYRDYTWAAENYTWY
BoLA-3:05201 YYSMYRENSDTGFVDTLYLLYTYYSVAVQNYRWY
BoLA-3:05301 YSEMYRNNAGNSFVNTLYLWSMYYTWAYQNYEWY
BoLA-3:05801 YSEMYRERAGNTFVGTLYLWYMDYSRAVQNYRWY
BoLA-3:05901 YSEMYRNNAGNSFVGTLYLWSMFYTWEYQNYRWH
BoLA-3:06501 YSEMYQEKAGTSSVGTLYLAYMFYSMAVQNYEWY
BoLA-3:06601 YYEMYQEKADTTFVDTLYLLYTYYSMAEFNYTWY
BoLA-3:06602 YYEMYQEKADTTFVDTLYLLYTFYSMAEFNYTWY
BoLA-3:06801 YSIVYQNNAGTTFANTLYLLYMYYTWAAHNYEWY
BoLA-3:07301 YYIIYQEISDTSFVSNLYLWYTYYSMAVQNYEWY
BoLA-402401 YSIAYEQIVDTTFANTAYIAYSDYTWEYLNYTWY
BoLA-402402 YSIAYEEIVDTTFANTAYLPYSDYTWTYLNYTWY
BoLA-406301 YYSTYRENFETTFVNTLYILYTFYSRAALNYRGY
BoLA-4:02401 YSIAYEQIVDTTFANTAYIAYSDYTWEYLNYTWY
BoLA-4:02402 YSIAYEEIVDTTFANTAYLPYSDYTWTYLNYTWY
BoLA-4:06301 YYSTYRENFETTFVNTLYILYTFYSRAALNYRGY
BoLA-500301 YLIVYEERADHFFRGALYFEYEFYSWASYNYEWY
BoLA-503901 YYIVYQEKADTFFLGTLYLWCWFYTWANENYEWY
BoLA-506401 YYIVYQEKADHTFANTLYLWHWFYTWANENYEWY
BoLA-507201 YYIVYQEKADHFFLGTLYLWYWFYSWAVQNYTWY
BoLA-5:00301 YLIVYEERADHFFRGALYFEYEFYSWASYNYEWY
BoLA-5:03901 YYIVYQEKADTFFLGTLYLWCWFYTWANENYEWY
BoLA-5:06401 YYIVYQEKADHTFANTLYLWHWFYTWANENYEWY
BoLA-5:07201 YYIVYQEKADHFFLGTLYLWYWFYSWAVQNYTWY
BoLA-601301 YHTTYREISENWYEANLYLEYEYYSMAAFNYTWY
BoLA-601302 YHTTYREISENWYEANLYLLYEYYSMAAFNYTWY
BoLA-601401 YHTKYREISENWYEANLYYRYTFYTWAEFNYRGY
BoLA-601402 YHTKYREISENKYEAILYYRYTFYTWAEFNYRWY
BoLA-601501 YYTKYREISENWYEANLYLLYTFYSMADQNYRGY
BoLA-601502 YYTKYREISENWYEANLYLQFTFYSMADQNYRGY
BoLA-603401 YHTKYREISENVYGSNLYLLYTFYSMADRNYRGY
BoLA-604001 YSEMYEERAGIVFVNTLYLWCWFYSMAAGKYTWY
BoLA-604101 YHTKYREISENWYEATLYLEYEYYSMAAFNYRSY
BoLA-6:01301 YHTTYREISENWYEANLYLEYEYYSMAAFNYTWY
BoLA-6:01302 YHTTYREISENWYEANLYLLYEYYSMAAFNYTWY
BoLA-6:01401 YHTKYREISENWYEANLYYRYTFYTWAEFNYRGY
BoLA-6:01402 YHTKYREISENKYEAILYYRYTFYTWAEFNYRWY
BoLA-6:01501 YYTKYREISENWYEANLYLLYTFYSMADQNYRGY
BoLA-6:01502 YYTKYREISENWYEANLYLQFTFYSMADQNYRGY
BoLA-6:03401 YHTKYREISENVYGSNLYLLYTFYSMADRNYRGY
BoLA-6:04001 YSEMYEERAGIVFVNTLYLWCWFYSMAAGKYTWY
BoLA-6:04101 YHTKYREISENWYEATLYLEYEYYSMAAFNYRSY
BoLA-AW10 YSEMYRERAGNFFVSNLYLWSMFYSMAEQNYRWY
BoLA-D18.4 YYSEYREISENVYESNLYIAYSDYTWEYLNYRWY
BoLA-HD6 YHTTYREISENWYEANLYLEYEYYSMAAFNYTWY
BoLA-JSP.1 YLEMYQEKAGNFFVSNLYLLSMFYSMAEQNYRWY
BoLA-N:00101 YSEMYRERAGNFFVSNLYLWSMFYSMAEQNYRWY
BoLA-N:00102 YSSMYRERAGNFFVSNLYLWSMFYSMAEQNYRWY
BoLA-N:00103 YSIMYRERAGNFFVSNLYLWSMFYSMAEQNYRWY
BoLA-N:00201 YLEMYQEKAGNFFVSNLYLLSMFYSMAEQNYRWY
BoLA-N:00301 YLIVYEERADHFFRGALYFEYEFYSWASYNYEWY
BoLA-N:00401 YSEMYRNNAGNSFVNTLYLWSMYYTWAYQNYEWY
BoLA-N:00402 YSEMYRNNAGNSFVNTLYLWSMYYTWAYQNYEWY
BoLA-N:00501 YYAEYRNIYDTIFVDTLYIAYWFYTWAAWNYEWY
BoLA-N:00601 YSAEYRNIYDTTFVYTLYLWSMFYTWANENYEWY
BoLA-N:00602 YSAEYRNIYDTTFVYTLYLWSMFYTWANENYEWH
BoLA-N:00801 YLIMYRENSETTFANTAYVEYMDYTWADWNYRWY
BoLA-N:00802 YLIMYRENSETTFANTAYVEYMDYTWADWNYRGY
BoLA-N:00901 YYSMYREISENVYGSNLYLLYRDYTWEYLNYRWY
BoLA-N:00902 YYSEYREISENVYESNLYLLYRDYTWEYLNYRWY
BoLA-N:01001 YYSEYRNIYDTTFVDTLYLEYEYYSVAEFNYRGY
BoLA-N:01101 YSEMYQEKAGTTFANIAYFWYMYYTWAEQNYTWY
BoLA-N:01201 YYATYRENFDTTFVDTLYIAYRDYTWAEHNYTWY
BoLA-N:01301 YHTTYREISENWYEANLYLEYEYYSMAAFNYTWY
BoLA-N:01302 YHTTYREISENWYEANLYLLYEYYSMAAFNYTWY
BoLA-N:01401 YHTKYREISENWYEANLYYRYTFYTWAEFNYRGY
BoLA-N:01402 YHTKYREISENKYEAILYYRYTFYTWAEFNYRWY
BoLA-N:01501 YYTKYREISENWYEANLYLLYTFYSMADQNYRGY
BoLA-N:01502 YYTKYREISENWYEANLYLQFTFYSMADQNYRGY
BoLA-N:01601 YSAEYRNIYDTTFVYTLYLWSMFYTWANENYEGY
BoLA-N:01602 YSAEYRNIYDTTFVYTLYLWSMFYTWANENYEWY
BoLA-N:01701 YSEMYRERAGNIFVSNLYFWYEYYTWAAQNYRWY
BoLA-N:01702 YSEMYRERAGNIFVSNLYFWYMYYTWAAQNYRWY
BoLA-N:01801 YYADYRNIYDTIFANTAYFEYMFYTWAEQNYRGY
BoLA-N:01802 YYADYRNIYDTIFANTAYFEYMFYTWAEQNYRGY
BoLA-N:01901 YHTKYREISENVYGSNLYYDYDYYTWAVFNYRGY
BoLA-N:02001 YHTKYREISENVYGSNLYFLYMDYTWAVFNYRGY
BoLA-N:02101 YYTKYREISENVYGSNLYFQFRYYTWADFNYEGY
BoLA-N:02201 YHSEYEQIVDTSFVGTLYLLYEDYTRAALNYTGY
BoLA-N:02301 YYSEYREISENVYESNLYIAYSDYTWEYLNYRWY
BoLA-N:02401 YSIAYEQIVDTTFANTAYIAYSDYTWEYLNYTWY
BoLA-N:02402 YSIAYEEIVDTTFANTAYLPYSDYTWTYLNYTWY
BoLA-N:02501 YSAEYRNIYDTTFVYALYLWSWFYTWAAENYRGY
BoLA-N:02601 YYAEYREISETTFVDTLYIEYEYYTWAYLNYRGY
BoLA-N:02602 YYAEYREISETTFVDTLYIEYEYYTWAYLNYRGY
BoLA-N:02701 YSEMYRNNAGNSFVGTLYLWSMYYTWEYQNYEWH
BoLA-N:02702 YSEMYRNNAGNSFVGTLYLWSMYYTWEYQNYEWH
BoLA-N:02801 YYTKYREISEKLYENTLYLQFRYYTWADFNYEWY
BoLA-N:02901 YYTRYREISENLYKNTAYITFMYYTWANENYRGY
BoLA-N:03001 YYSEYRNIYDTNFVSNLYLWSWFYTWANENYEWY
BoLA-N:03101 YYTKYDEISENLYKNTLYIAFRDYTWAYLNYTWY
BoLA-N:03401 YHTKYREISENVYGSNLYLLYTFYSMADRNYRGY
BoLA-N:03501 YYNMYQENAGNTFVGTLYLWSEFYTWAAHNYTWY
BoLA-N:03601 YYAMYRNNADATFVNTLYFLYEYYTVADHNYRWY
BoLA-N:03701 YSEMYRNNAGNSFVGTLYLLYMDYSRAVQNYRWY
BoLA-N:03801 YNEMYRNNAGNDSVGTLYLWYMYYSMAVQNYTWY
BoLA-N:03901 YYIVYQEKADTFFLGTLYLWCWFYTWANENYEWY
BoLA-N:04001 YSEMYEERAGIVFVNTLYLWCWFYSMAAGKYTWY
BoLA-N:04101 YHTKYREISENWYEATLYLEYEYYSMAAFNYRSY
BoLA-N:04201 YHTKYDEISENLYKDTLYIAYRDYTWEYLNYRGY
BoLA-N:04301 YSEMYRERAGNTFVNTLYIWYRDYTWAVFNYLGY
BoLA-N:04401 YYAMYEEKADTTFVDTLYIAYRDYTWAVFNYLGY
BoLA-N:04501 YYATYRENLDTTFVDTLYIEYRDYTWAEFNYLGY
BoLA-N:04601 YSEMYRERAGNTFVNTLYIWYRDYTWAEQNYTWY
BoLA-N:04701 YSEMYQERAGNTFVDTLYLWYMDYTWAEQNYTWY
BoLA-N:04801 YYSEYEQIVDTSFVGTLYLLYMDYTRAAQNYRGY
BoLA-N:04901 YYAEYREISDTSFVGTLYIEYEYYTWAYLNYEGY
BoLA-N:05001 YSEMYRNNAGNTFGSNLYFLYTYYTWAEWNYTWH
BoLA-N:05101 YSEMYRERAGNTFVNTLYIWYRDYTWAAENYTWY
BoLA-N:05201 YYSMYRENSDTGFVDTLYLLYTYYSVAVQNYRWY
BoLA-N:05301 YSEMYRNNAGNSFVNTLYLWSMYYTWAYQNYEWY
BoLA-N:05401 YYIMYQENSGATFANTLYFWYWFYTWANENYRGY
BoLA-N:05501 YYAEYREISETTFVDSLYIAYRDYTWAYLNYRGY
BoLA-N:05601 YYATYQENFDATFANTLYFLSTYYTWEAHNYRGY
BoLA-T2C YYIIYRNISDTSFVSNLYLLYTYYSMAVQNYEWH
BoLA-T2a YYATYRENFDTTFVDTLYIAYRDYTWAEHNYTWY
BoLA-T2b YHTKYREISENWYEATLYLEYEYYSMAAFNYRSY
BoLA-T2c YYIIYRNISDTSFVSNLYLLYTYYSMAVQNYEWH
BoLA-T5 YYSEYREISENVYESNLYLLYRDYTWEYLNYRWY
BoLA-T7 YLAMYRNNANTTFVNNLYVEHMYYSMAEQNYTWY
BoLA-amani.1 YYATYRENLDATFVNTAYIAYMDYTWEYQNYEWY
BoLA-gb1.7 YSEMYRNNAGNSFVNTLYLWSMYYTWAYQNYEWY
Chi-B0401 YRTYYGQIGLNINENIRRVWFRSYTWEEWNYTWY
Chi-B1201 YRDYYGQIGGNIDENILRVWYYMYTWGYLQYTWY
Chi-B1501 YSDAYSETSRTIDDGTLRVLYSDYTWGYLQYTWY
DLA-8803401 YYAMYGEKVETLYVDTLYITYSDYTRADLNYTWY
DLA-8850101 YYAMYPQTIETTFVDTLYRTYRDYTWAVWNYTWY
DLA-8850801 YYATYGEKVETVYVDTLYITYRDYTWAVWNYTWY
Eqca-100101 YKSMYEETAGHTFGNIAYFWSSFYTWAEHNYRWY
Eqca-1600101 YYTMYRESVGHTFVNTLYLLYFYYTWAAFNYRSY
Eqca-16:00101 YYTMYRESVGHTFVNTLYLLYFYYTWAAFNYRSY
Eqca-1:00101 YKSMYEETAGHTFGNIAYFWSSFYTWAEHNYRWY
Gogo-B0101 YDTMYRETSAQTDENIAYIRFSSYTWAELAYTWY
H-2-Db YESYYREKAGQWFVSNLYLQSLFYTWSAYAYEWY
H-2-Dd YVEYYRERAGNSFVDTAYLWAWFYTWAADAYEWY
H-2-Dq YESYYRIIADNWFVSTAYIRYEFYTWGAYAYEWY
H-2-Kb YVEYYREKAGNSFVDTLYIVSQYYTWAELAYTWY
H-2-Kd YVAFYEQRASDWFVSTAYFRFQFYTWADYAYEWY
H-2-Kk YHSYYRNIAGNIFVNTAYFRYEYYTWADDAYTWY
H-2-Kq YHSYYRNIADNSSVDTLYIRYEVYTWAARAYAWH
H-2-Ld YESYYRIIAGQWFVNTLYLWYEFYTWAAYAYEWY
H-2-Lq YESYYRIIAGQWFVNTLYIRYEYYTWAAYAYEWY
H-2-Qa1 YHIMYREKADMNFVNTLYLWYCEYSSVEQAYPWY
H-2-Qa2 YHSMYREIAGHSFGSTAYLWYLFYTWAIDAYTSY
H2-Db YESYYREKAGQWFVSNLYLQSLFYTWSAYAYEWY
H2-Dd YVEYYRERAGNSFVDTAYLWAWFYTWAADAYEWY
H2-Dq YESYYRIIADNWFVSTAYIRYEFYTWGAYAYEWY
H2-Kb YVEYYREKAGNSFVDTLYIVSQYYTWAELAYTWY
H2-Kd YVAFYEQRASDWFVSTAYFRFQFYTWADYAYEWY
H2-Kk YHSYYRNIAGNIFVNTAYFRYEYYTWADDAYTWY
H2-Kq YHSYYRNIADNSSVDTLYIRYEVYTWAARAYAWH
H2-Ld YESYYRIIAGQWFVNTLYLWYEFYTWAAYAYEWY
H2-Lq YESYYRIIAGQWFVNTLYIRYEYYTWAAYAYEWY
H2-Qa1 YHIMYREKADMNFVNTLYLWYCEYSSVEQAYPWY
H2-Qa2 YHSMYREIAGHSFGSTAYLWYLFYTWAIDAYTSY
HLA-A0101 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A0102 YSAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A0103 YFAMYQENMAHTDANTLYIMYRDYTWVARVYRGY
HLA-A0104 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A0106 YFAMYQENMAHTDANTLYIIYRDYTWVALAYRGY
HLA-A0107 YFAMYQENVAHTDENTLYIIYRDYTWVARVYRGY
HLA-A0108 YFAMYQENMAHTDANTLYIIYRDYTWVARVYWGY
HLA-A0109 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A0110 YFAMYQENMAHTDANTLYIIYRDYTWARRVYRGY
HLA-A0111 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A0112 YFAMYQENMAHTDANTLYIIYRDYTWAVQAYTGY
HLA-A0113 YFAMYQENMAQTDVDTLYIIYRDYTWVARVYRGY
HLA-A0114 YFAMYQENMAHTDANTLYIIYRDYTWVARVYTGY
HLA-A0115 YFAMYQENMAHTDANTLYIIYRDYTWVARVYGGT
HLA-A0117 YFAMYQENMAQTDANTLYIIYRDYTWVARVYRGY
HLA-A0118 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A0119 YFAMYQENMAHTDANTLYIIYRDYTWAVQAYTGY
HLA-A0120 YSAMYQENMAHTDANTLYVRYRDYTWVARVYRGY
HLA-A0121 YFAMYQENMAHTDANTLYIIYRDYTWAVRVYRGY
HLA-A0122 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A0123 YFAMYQENVAHTDANTLYIIYRDYTWVARVYRGY
HLA-A0124 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A0125 YFAMYQENMAHTDANTLYIIYRDYTWVAQVYRGY
HLA-A0126 YFAMYQENMAHTDANTLYIIYRDYTWAARVYRGY
HLA-A01:01 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:02 YSAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:03 YFAMYQENMAHTDANTLYIMYRDYTWVARVYRGY
HLA-A01:04 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:06 YFAMYQENMAHTDANTLYIIYRDYTWVALAYRGY
HLA-A01:07 YFAMYQENVAHTDENTLYIIYRDYTWVARVYRGY
HLA-A01:08 YFAMYQENMAHTDANTLYIIYRDYTWVARVYWGY
HLA-A01:09 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:10 YFAMYQENMAHTDANTLYIIYRDYTWARRVYRGY
HLA-A01:100 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:101 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:102 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:103 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:104 YFAMYQENMAHTHANTLYIIYRDYTWVARVYRGY
HLA-A01:105 YFAMYQENIAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:106 YFAMYQENMAHTDANTLYIIYRDYSWVARVYRGY
HLA-A01:107 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:108 YFAMYQENMAHTNANTLYIIYRDYTWVARVYRGY
HLA-A01:109 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:110 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:111 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:112 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:113 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:114 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:115 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:116 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:117 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:118 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:119 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:12 YFAMYQENMAHTDANTLYIIYRDYTWAVQAYTGY
HLA-A01:120 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:121 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:122 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:124 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:125 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:126 YFAMYQENMAHTDANTLYIIYRDYTWVVRVYRGY
HLA-A01:127 YFAMYQENMAHTDANTLYIIYRDYTWVAQAYRGY
HLA-A01:128 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:129 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:13 YFAMYQENMAQTDVDTLYIIYRDYTWVARVYRGY
HLA-A01:130 YFAMYQENMAHTDANTLYVRCRDYTWVARVYRGY
HLA-A01:131 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:132 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:133 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:134 YFAMYQENMAHTHVNTLYIIYRDYTWVARVYRGY
HLA-A01:135 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:136 YFAMYQENMAHTDANTLYIIYRDYTWAAQAYRGY
HLA-A01:137 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:138 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:139 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:14 YFAMYQENMAHTDANTLYIIYRDYTWVARVYTGY
HLA-A01:140 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:141 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:142 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:143 YTAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:144 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:145 YFAMYQENMAHTDANTLYIIYQDYTWVARVYRGY
HLA-A01:146 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:148 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:149 YFAMYQENMAHTDANTLYIIYRDYTWVARVYGGY
HLA-A01:150 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:151 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:152 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:153 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:154 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:155 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:156 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:157 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:158 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:159 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:161 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:163 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:164 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:165 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:166 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:167 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:168 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:169 YFAMCQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:17 YFAMYQENMAQTDANTLYIIYRDYTWVARVYRGY
HLA-A01:170 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:171 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:172 YFAMYQENMAHTDANTQYIIYRDYTWVARVYRGY
HLA-A01:173 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:174 YFAMYQENMAHTDANTLYIIYRDHTWVARVYRGY
HLA-A01:175 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:176 YFAMYQENVAQTDVDTLYIIYRDYTWVARVYRGY
HLA-A01:177 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:180 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:181 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:182 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:183 YFAMYQENMAHTDANILYIIYRDYTWVARVYRGY
HLA-A01:184 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:185 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:187 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:188 YSAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:189 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:19 YFAMYQENMAHTDANTLYIIYRDYTWAVQAYTGY
HLA-A01:190 YSAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:191 YFAMYQEKVAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:192 YFAMYQENMAHTDANTLYIMYRDYTWAARVYRGY
HLA-A01:193 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:194 YFAMYQENMAQTDVDTLYIIYRDYTWVARVYRGY
HLA-A01:195 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:196 YFAMYQENMAHTDANTLYIIYRDYTWVERVYRGY
HLA-A01:197 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:198 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:199 YFSMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:20 YSAMYQENMAHTDANTLYVRYRDYTWVARVYRGY
HLA-A01:200 YFAMYQENMAHTDANTLYIIYRDYTWAVLAYTWY
HLA-A01:201 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:202 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:203 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:204 YFAMYQENMTHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:205 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:206 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:207 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:209 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:21 YFAMYQENMAHTDANTLYIIYRDYTWAVRVYRGY
HLA-A01:210 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:211 YFAMYQENMAHSDANTLYIIYRDYTWVARVYRGY
HLA-A01:212 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:213 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:214 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:215 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:216 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:217 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:218 YFAMYQENMAHTDANTLYIIYRGYTWVARVYRGY
HLA-A01:219 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:220 HFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:221 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:222 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:223 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:224 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:225 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:226 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:227 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:229 YFAMYQENMAHTHVDTLYIIYRDYTWVARVYRGY
HLA-A01:23 YFAMYQENVAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:230 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:231 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:232 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:233 YFAMYQENMAHTDANTLYIIYRDYTWVARIYRGY
HLA-A01:234 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:235 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:236 YFAMYQENMAHTDANTLYIIYHYYTWVARVYRGY
HLA-A01:237 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:238 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:239 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:24 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:241 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:242 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:243 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:244 YFAMYQENMAHTDANTLYIIYRDYTWAVLAYTWY
HLA-A01:245 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:246 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:249 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:25 YFAMYQENMAHTDANTLYIIYRDYTWVAQVYRGY
HLA-A01:251 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:252 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:253 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:254 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:255 YFAMYQENMAHTDANTLYITYRDYTWVARVYRGY
HLA-A01:256 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:257 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:259 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:26 YFAMYQENMAHTDANTLYIIYRDYTWAARVYRGY
HLA-A01:260 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:261 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:262 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:263 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:264 YFAMYQENMAHTDANTLYIIYRDYTWFARVYRGY
HLA-A01:265 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:266 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:267 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:268 YFAMYQENMAHTDANTLYIIYRDQTWVARVYRGY
HLA-A01:270 YFAMYQENMAHTGANTLYIIYRDYTWVARVYRGY
HLA-A01:271 YFAMYQENMAHTDANTLYIIYWDYTWVARVYRGY
HLA-A01:272 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:273 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:274 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:275 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:276 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:277 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:278 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:279 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:28 YFAMYQENMAHTDVDTLYIIYRDYTWVARVYRGY
HLA-A01:280 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:281 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:282 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:283 YFTMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:284 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRRY
HLA-A01:286 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:288 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:289 YFAMYQENMAHTDENIAYIIYRDYTWVARVYRGY
HLA-A01:29 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:291 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:292 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:294 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:295 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:296 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:297 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:30 YFAMYQENMAHTDANTLYIIYHYYTWVARVYRGY
HLA-A01:32 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:33 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:35 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:36 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:37 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:38 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:39 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:40 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:41 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:42 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:43 YYAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:44 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:45 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:46 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:47 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:48 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:49 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:50 YFAMYQENMAHTDANTLYIIYREYTWVARVYRGY
HLA-A01:51 YFAMYRNNVAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:54 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:55 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:58 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:59 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:60 YFAMYPENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:61 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:62 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:63 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:64 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:65 YFAMYQENMAHTDANTLYIIYRDYTWVARVCRGY
HLA-A01:66 YFAMYQENMAHTDANTLYVRYRDYTWVARVYRGY
HLA-A01:67 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:68 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:69 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:70 YFAMYQENMAHTDANTLYIIYRDYTCVARVYRGY
HLA-A01:71 YFAMYQDNMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:72 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRWY
HLA-A01:73 YFAMYQENMAHTDANTLYLRYRDYTWVARVYRGY
HLA-A01:74 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:75 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:76 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:77 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:78 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:79 YFAMYQENMAHTDANTLYIIYPDYTWVARVYRGY
HLA-A01:80 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:81 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:82 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:83 YFAMYGEKVAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:84 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:85 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:86 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:88 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:89 YFAMYQENMAHTDANTLYLIYRDYTWVARVYRGY
HLA-A01:90 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:91 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:92 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:93 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:94 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:95 YFAMYQENMAHTDENIAYIIYRDYTWVARVYRGY
HLA-A01:96 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:97 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A01:98 YFAMYQENMAHTDANTLYIIYRDYTWVARAYRGY
HLA-A01:99 YFAMYQENMAHTDANTLYIIYRDYTWVARVYRGY
HLA-A0201 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A0202 YFAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A0203 YFAMYGEKVAHTHVDTLYVRYHYYTWAEWAYTWY
HLA-A0204 YFAMYGEKVAHTHVDTLYVMYHYYTWAVLAYTWY
HLA-A0205 YYAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A0206 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A0207 YFAMYGEKVAHTHVDTLYVRCHYYTWAVLAYTWY
HLA-A0208 YYAMYGENVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A0209 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A0210 YYAMYGEKVAHTHVDTLYVRFHYYTWAVLAYTWY
HLA-A0211 YFAMYGEKVAHIDVDTLYVRYHYYTWAVLAYTWY
HLA-A0212 YFAMYGEKVAHTHVDTLYVRYHYYTWAVQAYTWY
HLA-A0213 YFAMYGEKVAHTHVDTLYVRYHYYTWAEQAYTWY
HLA-A0214 YYAMYGEKVAHTHVDTLYLRYHYYTWAVLAYTWY
HLA-A0215 YFAMYGEKVAHTHVDTLYVRCHYYTWAVLAYTWY
HLA-A0216 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYEWY
HLA-A0217 YFAMYGEKVAHTHVDTLYLMFHYYTWAVLAYTWY
HLA-A0218 YFAMYGEKVAHTHVDTLYVRCHYYTWAVLAYTWY
HLA-A0219 YFAMYGEKVAHTHVDTLYVRYHYYTWAVQAYTGY
HLA-A0220 YFAMYGENVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A0221 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A0222 YFAMYGEKVAHTHVDTLYVRYHYYTWAVWAYTWY
HLA-A0224 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A0225 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A0226 YFAMYGEKVAHTHVDTLYVRYHYYTWAELAYTWY
HLA-A0227 YFAMYGEKVAHTHVDTLYVRYHYYTWAAQAYTWY
HLA-A0228 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A0229 YFAMYGEQVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A0230 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A0231 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A0233 YFAMYGEKVAHTHVDTLYVRSHYYTWAVLAYTWY
HLA-A0234 YFAMYGEKVAQTHVDTLYVRYHYYTWAVLAYTWY
HLA-A0235 YFAMYGEKVAQTDVDTLYVRYHYYTWAVLAYTWY
HLA-A0236 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTGY
HLA-A0237 YFAMYGEKVAHTHVDTLYVRYHYYTWAVQAYTGY
HLA-A0238 YFAMYGEKVAHTHVDTLYVRYHYYTWAEQAYRWY
HLA-A0239 YFAMYGEKVAHTHVDTLYVRFHYYTWAVLAYTWY
HLA-A0240 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A0241 YYAMYGEKVAHTHVDTLYVRYQYYTWAVLAYTWY
HLA-A0242 YFSMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A0243 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A0244 YYAMYGEKVAHTHVDTLYVRYHYYTWAVQAYTWY
HLA-A0245 YFAMYQEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A0246 YFAMYEEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A0247 YFAMYGEKVAHSHVDTLYLRYHYYTWAVWAYTWY
HLA-A0248 YFAMYEEKVAHTDVDTLYVRYHYYTWAVLAYTWY
HLA-A0249 YFAMYGEKVAHTHVDTLYVRYHYYTWAVRAYTWY
HLA-A0250 YFAMYGEKVAHTHVDTLYIRYHYYTWAVWAYTWY
HLA-A0251 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A0252 YFAMYGEKVAHTHVDTLYVRYEHYTWAVLAYTWY
HLA-A0254 YYAMYGEKVAHTHVDTLYVRYHYYTWAVQAYTGY
HLA-A0255 YFAMYRNNVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A0256 YFAMYQENVAQTHVDTLYVRYHYYTWAVLAYTWY
HLA-A0257 YYAMYGEKVAHTHVDTLYLMYHYYTWAVLAYTWY
HLA-A0258 YFAMYGEKVAHTHVDTLYLRYHYYTWAVLAYTWY
HLA-A0259 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A0260 YFAMYGEKVAHTHVDTLYVRYHFYTWAVLAYTWY
HLA-A0261 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A0262 YFAMYGENVAQTHVDTLYVRYHYYTWAVLAYTWY
HLA-A0263 YFAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A0264 YFAMYGEKVAHTHVDTLYVRYHSYTWAVLAYTWY
HLA-A0265 YFAMYGEKVAHTHVDTLYIMYQDYTWAVLAYTWY
HLA-A0266 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A0267 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A0268 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A0269 YFAMYGEKVAHIDVDTLYVRYHYYTWAVLAYTWY
HLA-A0270 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A0271 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A0272 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A0273 YFAMYGEKVAHTHVDTLYIRYHYYTWAVLAYTWY
HLA-A0274 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A0275 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A0276 YSAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A0277 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A0278 YYAMYQENVAQTDVDTLYVRYHYYTWAVLAYTWY
HLA-A0279 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A0280 YFAMYGEKVAHTHVDTLYVRYQDYTWAVLAYTWY
HLA-A0281 YFAMYGEKVAHTDESIAYVRYHYYTWAVLAYTWY
HLA-A0283 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A0284 YYAMYGEKVAHTHVDTLYFRYHYYTWAVLAYTWY
HLA-A0285 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A0286 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A0287 YFAMYGEKVAHTDENIAYVRYHYYTWAVLAYTWY
HLA-A0289 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A0290 YFAMYGEKVAHTDVDTLYVRYHYYTWAVLAYTWY
HLA-A0291 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A0292 YFAMYEEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A0293 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A0295 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A0296 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A0297 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A0299 YYAMYGEKVAHTHVDTLYVRYHYYTWAELAYTWY
HLA-A02:01 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:02 YFAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:03 YFAMYGEKVAHTHVDTLYVRYHYYTWAEWAYTWY
HLA-A02:04 YFAMYGEKVAHTHVDTLYVMYHYYTWAVLAYTWY
HLA-A02:05 YYAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:06 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:07 YFAMYGEKVAHTHVDTLYVRCHYYTWAVLAYTWY
HLA-A02:08 YYAMYGENVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:09 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:10 YYAMYGEKVAHTHVDTLYVRFHYYTWAVLAYTWY
HLA-A02:101 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYRWY
HLA-A02:102 YFAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:103 YFAMYQENVAQTHVDTLYVRCHYYTWAVLAYTWY
HLA-A02:104 YFAMYGEKVAHTHVDTLYVRYHYYTWAVWAYTWY
HLA-A02:105 YFAMYGEKVAHTHVDTLYVRYEYYTWAVLAYTWY
HLA-A02:106 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:107 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:108 YYAMYGEKVAHTHVDTLYLMFHYYTWAVLAYTWY
HLA-A02:109 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:11 YFAMYGEKVAHIDVDTLYVRYHYYTWAVLAYTWY
HLA-A02:110 YFAMYGEKVAHTHVDTLYLMFHYYTWAVLAYTWY
HLA-A02:111 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:112 YFAMYGEKVAHTDENIAYVRCHYYTWAVLAYTWY
HLA-A02:114 YFAMYGEKVAHTHVDTLYVRYRDYTWAVLAYTWY
HLA-A02:115 YFAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:116 YFAMYGEKVAHTHLDTLYVRYHYYTWAVLAYTWY
HLA-A02:117 YFAMYGEKVAHTHVDTLYVRYQDYTWAEWAYTWY
HLA-A02:118 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:119 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:12 YFAMYGEKVAHTHVDTLYVRYHYYTWAVQAYTWY
HLA-A02:120 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:121 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:122 YYAMYGEKVAHTHVDTLYIRYHYYTWAVWAYTWY
HLA-A02:123 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:124 YFAMYGEKVAHTDESIAYVRYHYYTWAVLAYTWY
HLA-A02:126 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:127 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYKWY
HLA-A02:128 YFAMYGENVAHIDVDTLYVRYHYYTWAVLAYTWY
HLA-A02:129 YYAMYEEKVAHTDENIAYVRYHYYTWAVLAYTWY
HLA-A02:13 YFAMYGEKVAHTHVDTLYVRYHYYTWAEQAYTWY
HLA-A02:130 YFAMYGEKVAHTHVDTLYVRCHYYTWAVLAYTWY
HLA-A02:131 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYEWY
HLA-A02:132 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:133 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:134 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:135 YFAMYGEKVAHTHVDTLYIRYQDYTWAEWAYRWY
HLA-A02:136 YFAMYGEKVAHTDENIAYVRYHYYTWAVWAYTWY
HLA-A02:137 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:138 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:139 YFAMYGEKVTHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:14 YYAMYGEKVAHTHVDTLYLRYHYYTWAVLAYTWY
HLA-A02:140 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:141 YFVMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:142 YYAMYGEKVAHTHVDTLYVRYHYYTWAVQAYTWY
HLA-A02:143 YYAMYREKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:144 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:145 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:146 YFAMYGEKVAHTDANTLYVRYHYYTWAVLAYTWY
HLA-A02:147 YFAMYGEKVAHTHVDTLYVRYDYYTWAVLAYTWY
HLA-A02:148 YFAMYGEKVAHTHVDTLYVRFHYYTWAEWAYTWY
HLA-A02:149 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:150 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:151 YFAMYGEKVAHTHVDTLYVRYDYYTWAVLAYTWY
HLA-A02:152 YFAMYGEKVAHTHVDTLYIMYQDYTWAVLAYTWY
HLA-A02:153 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:154 YYAMYGEKVAHTHVDTLYLRYHYYTWAVWAYRWY
HLA-A02:155 YFAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:156 YFAMYGEKVAHTHVDTLYIIYHYYTWAVLAYTWY
HLA-A02:157 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:158 YFAMYGEKVAHAHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:159 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:16 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYEWY
HLA-A02:160 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:161 YFAVYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:162 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:163 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:164 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:165 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:166 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:167 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYKWY
HLA-A02:168 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:169 YYAMYQENVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:17 YFAMYGEKVAHTHVDTLYLMFHYYTWAVLAYTWY
HLA-A02:170 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:171 YFAMYGEKVAHTHVDTLYVRYHYYTWAELAYTWY
HLA-A02:172 YYAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:173 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:174 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:175 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:176 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:177 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:178 YYAMYGEKVAHTHVDTLYVRYHSYTWAVLAYTWY
HLA-A02:179 YYAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:18 YFAMYGEKVAHTHVDTLYVRCHYYTWAVLAYTWY
HLA-A02:180 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:181 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:182 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:183 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:184 YFAMYGEKVAHTHEDTLYVRYHYYTWAVLAYTWY
HLA-A02:185 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:186 YFAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:187 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:188 YFAMYGEKVAHTHVDTLYVRYDSYTWAVLAYTWY
HLA-A02:189 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:19 YFAMYGEKVAHTHVDTLYVRYHYYTWAVQAYTGY
HLA-A02:190 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:191 YFAMYGEKVAHTHVDTLYVRCHYYTWAVWAYTWY
HLA-A02:192 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:193 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:194 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:195 YFAMYQENVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:196 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:197 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:198 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:199 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:20 YFAMYGENVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:200 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:201 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:202 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:203 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:204 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:205 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:206 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:207 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:208 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:209 YFAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:21 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:210 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:211 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:212 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:213 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:214 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:215 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:216 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:217 YFAMYREKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:218 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:219 YFAMYGEKVAHTHVDTLYVRCHYYTWAVLAYTWY
HLA-A02:22 YFAMYGEKVAHTHVDTLYVRYHYYTWAVWAYTWY
HLA-A02:220 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:221 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:224 YFAMYGEKVAHTHVDTLYVGYHYYTWAVLAYTWY
HLA-A02:228 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:229 YYAMYGEKVAHTHVDTLYLRYRYYTWAVWAYTWY
HLA-A02:230 YFAMYGEKVAHTHVDTLYVRYHYYTWAEWAYTWY
HLA-A02:231 YFAMYGEKVAHTHVDTLYVRNHYYTWAVLAYTWY
HLA-A02:232 YYAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:233 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTRY
HLA-A02:234 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:235 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:236 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:237 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:238 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:239 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:24 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:240 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:241 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:242 YFAMYGEKVAHTHVDTLYVRFHYYTWAVLAYTWY
HLA-A02:243 YTAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:244 YYAMYGEKVAHTHVDTLYVRFHYYTWAVLAYTWY
HLA-A02:245 YFAMYGEKVAHTHVDTLYIRYHYYTWAVLAYTWY
HLA-A02:246 YFAMYGEKVAHTHVDTLYVRYRDYTWAVLAYTWY
HLA-A02:247 YFAMYGEKVAHTDENTLYVRYHYYTWAVLAYTWY
HLA-A02:248 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:249 YFAMYVEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:25 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:251 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:252 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:253 YFAMYGEKVAHTHVDTLYVRYHYYTWAEWAYTWY
HLA-A02:254 YFAMYGEKVAHTHVDTLYVRYNFYTWAVLAYTWY
HLA-A02:255 YFAMYGEKVAHTHVDTLYVRCHYYTWAVLAYTGY
HLA-A02:256 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:257 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:258 YFAMYGEKVAHTHVDTLYVRYHYYTWAEWAYTWY
HLA-A02:259 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:26 YFAMYGEKVAHTHVDTLYVRYHYYTWAELAYTWY
HLA-A02:260 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:261 YFAMYGEKVAHTHMDTLYVRCHYYTWAVLAYTWY
HLA-A02:262 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLVYTWY
HLA-A02:263 YFAMYGEKVAHTHVDTLYVRYHYYTWSVLAYTWY
HLA-A02:264 YFAMYGEKVAHTHVDTLYVRYHYYTWAEWAYTWY
HLA-A02:265 YFAMYGEKVAHTHVDTLYVRCHYYTWAVLAYTWY
HLA-A02:266 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:267 YFAMYGEKVAHTHVDTLYVRYHYYTWAAWAYTWY
HLA-A02:268 YFAMYGEKVAHTHVDTLYVMFHYYTWAVLAYTWY
HLA-A02:269 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:27 YFAMYGEKVAHTHVDTLYVRYHYYTWAAQAYTWY
HLA-A02:270 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:271 YYAMYGEKVAHTHVDTLYLRYHYYTWAVQAYTWY
HLA-A02:272 YFAMYGEKLAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:273 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:274 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:275 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:276 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:277 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:278 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:279 YFAMYGEKVAHTHVDTLYVRYRDYTWAVLAYTWY
HLA-A02:28 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:280 YFAMYGEKVAHTHVDTLYVRYHYYTWAEQAYTWY
HLA-A02:281 YFAMYGEKVAHTHVDILYVRYHYYTWAEWAYTWY
HLA-A02:282 YFAMYGEKVAHTHVDTLYVRCHYYTWAVLAYTWY
HLA-A02:283 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:285 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:286 YYAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:287 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:288 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:289 YFAMYGEKVAHTHVDTLYVRYQYYTWAVLAYTWY
HLA-A02:29 YFAMYGEQVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:290 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:291 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:292 YFAMYGEKVSHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:294 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:295 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:296 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:297 YFAMYGEKVAHIDVDTLYVRYHYYTWAVLAYTWY
HLA-A02:298 YFAMYGEKVAHIDVDTLYVRYHDYTWAVLAYTWY
HLA-A02:299 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:30 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:300 YYAMYGEKVAHTHVDTLYLMFHYYTWAVLAYTWY
HLA-A02:302 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:303 YFAMYGEKVAHTHVDTLYLMFHYYTWAVLAYTWY
HLA-A02:304 YFAMYGEKVAHTHVDTLYVRYQDYTWAVLAYTWY
HLA-A02:306 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:307 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:308 YFAMYGEKVAHIDVDTLYVRYHYYTWAVLAYTWY
HLA-A02:309 YFAMYGEKVAHTHVDTLYVRYQDYTWAVLAYTWY
HLA-A02:31 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:310 YYSMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:311 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:312 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:313 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:315 YFAMYGEKVAHTHVDTLYVRYHYYTWAEWAYTWY
HLA-A02:316 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:317 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:318 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:319 YSAMYGEKVAHTHVDTLYVRCHYYTWAVLAYTWY
HLA-A02:320 YFAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:322 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:323 YFAMYGEKVAHTHVDTLYVRYHYYTWAEWAYTWY
HLA-A02:324 YYAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:325 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:326 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:327 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:328 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:329 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:33 YFAMYGEKVAHTHVDTLYVRSHYYTWAVLAYTWY
HLA-A02:330 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:331 YYAMYGEKVAHTDVDTLYVRYHYYTWAVLAYTWY
HLA-A02:332 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWH
HLA-A02:333 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:334 YFAMYGEKVAHTHVDTLYIMYHYYTWAVLAYTWY
HLA-A02:335 YFAMYGEKVAHTHVDTLYVRCHYYTWAVLAYTWY
HLA-A02:336 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:337 YYAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:338 YFAMYGEKVAHTHVDTLYIIYHYYTWAVLAYTWY
HLA-A02:339 YFAMYGEKVAHTHVDTLYVRYDLYTWAVLAYTWY
HLA-A02:34 YFAMYGEKVAQTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:340 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:341 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:342 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:343 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:344 YYAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:345 YFAMYGEKVAHTHVDTLYVRYHYYTWAEWAYTWY
HLA-A02:346 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:347 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:348 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:349 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:35 YFAMYGEKVAQTDVDTLYVRYHYYTWAVLAYTWY
HLA-A02:351 YFAMYGEKVARTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:352 CFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:353 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:354 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYAWY
HLA-A02:355 YYAMYGEKVAHTHVDTLYVRYHYYTWAEWAYTWY
HLA-A02:357 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:358 YYAMYEEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:359 YYAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:36 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTGY
HLA-A02:360 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:361 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:362 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:363 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:364 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:365 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:367 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:368 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:369 YFAMYEEKVAHTHVDTLYVRCHYYTWAVLAYTWY
HLA-A02:37 YFAMYGEKVAHTHVDTLYVRYHYYTWAVQAYTGY
HLA-A02:370 YFAMYGEKVAHTHVDTLYVRYHYYTWAEWAYTWY
HLA-A02:371 YFAMYGEKVAHTHVDTLYVRYHYYIWAVLAYTWY
HLA-A02:372 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:374 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:375 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:376 YYAMYGEKVAHTHVDTLYLRYHYYTWAVWAYLWY
HLA-A02:377 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:378 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:379 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:38 YFAMYGEKVAHTHVDTLYVRYHYYTWAEQAYRWY
HLA-A02:380 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:381 YFAMYGEKVAHTHVDSLYVRYHYYTWAVLAYTWY
HLA-A02:382 YYAMYGEKVAHTHVDTLYVRYHYYTWAVWAYTWY
HLA-A02:383 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:384 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:385 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYMWY
HLA-A02:386 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:387 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:388 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:389 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:39 YFAMYGEKVAHTHVDTLYVRFHYYTWAVLAYTWY
HLA-A02:390 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:391 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:392 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:393 YFAMYGEKVAHTHVDTLYVRYHYYTWAELAYTWY
HLA-A02:394 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:396 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:397 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:398 YYAMYGEKVAHTHVDTLYVRFHYYTWAVLAYTWY
HLA-A02:399 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:40 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:400 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:401 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:402 YFAMYGEKVAHTHVDTLYVRYHYYTWAEWAYTWY
HLA-A02:403 YFAMYGEKVAHTHVDTLYVRCHYYTWAVLTYTWY
HLA-A02:404 YYAMYGEKVAHTHVDTLYVRYHHYTWAVLAYTWY
HLA-A02:405 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:406 YFAMYGEKVAHTHVDTLYVRYHDYTWAVLAYTWY
HLA-A02:407 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:408 YFAMYGEKVAHTHVDTLYVRCHYYTWAALAYTWY
HLA-A02:409 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:41 YYAMYGEKVAHTHVDTLYVRYQYYTWAVLAYTWY
HLA-A02:410 YFAMYAEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:411 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:412 YFAMYGEKVAHTHVDTLYVRYHSYTWAEWAYTWY
HLA-A02:413 YYAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:414 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:415 YYAMYGENVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:416 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:417 YFAMYGEKVAHTHVDTLYVRYHYYTWAVQAYTGY
HLA-A02:418 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:419 YYAMYREKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:42 YFSMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:420 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:421 YYAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:422 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:423 YFAMYGEKVAHTHVDTLYVRYHHYTWAVLAYTWY
HLA-A02:424 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:425 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:426 YFAMYGEKVAHTHVDTLYVRCHYYTWAVLAYTWY
HLA-A02:427 YFAMYGEKVAHTHVDTLYVRYHYYTWAEWAYTWY
HLA-A02:428 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:429 YFAMYGEKVAHTHVDTLYVRCHYYTWAVLAYTWY
HLA-A02:430 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:431 YFAMYGEKVAHTHVDTLYVRYHYYTWAEWAYTWY
HLA-A02:432 YFAMYGEKVAHTHVDTLYVRCHYYTWAVLAYTSY
HLA-A02:433 YYAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:434 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:435 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:436 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:437 YFAMYGEKVAHTHVDTLYVRCHYYTWAVLAYEWY
HLA-A02:438 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:44 YYAMYGEKVAHTHVDTLYVRYHYYTWAVQAYTWY
HLA-A02:441 HFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:442 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:443 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:444 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:445 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:446 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:447 YFAMYGEKVAHTHVDTLYVRYHYYTWAEWAYRWY
HLA-A02:448 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:449 YFAMYGEKVAHTHVDTLYVRCHYYTWAVLAYTWY
HLA-A02:45 YFAMYQEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:450 YFAMYGEKVAHTHVDTLYVRCHYYTWAVLAYTWY
HLA-A02:451 YFAMYGEKVAHTHVDTLYVRCHYYTWAVLAYTWY
HLA-A02:452 YFAMYGEKVAHTHVDTLYVRCHYYTWAVLAYTWY
HLA-A02:453 YYAMYGEKVAHTHVDTLYVRFHYYTWAVLAYTWY
HLA-A02:454 YYAMYGEKVAHTHVDTLYVRYQDYTWAVLAYTWY
HLA-A02:455 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:456 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:457 YFAMYGEKVAHIDVDTLYVRYHYYTWAVLAYTWY
HLA-A02:458 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:459 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:46 YFAMYEEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:460 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:461 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:462 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:463 YFAMYGEKVAHTHVDTLYVRYHYYTWAEWAYTWY
HLA-A02:464 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:465 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:466 YFAMYGEKVAHTHVDTLYVRYHYYTWAEWAYTWY
HLA-A02:467 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:469 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:47 YFAMYGEKVAHSHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:470 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:471 YYAMYGEKVVHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:472 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:473 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:474 YYAMYGEKVAHTHVDTLYVRCHYYTWAVLAYTWY
HLA-A02:475 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:477 YFAMYGEKVAHTHVDTLYVRCHYYTWAVLAYTWY
HLA-A02:478 YFAMYGEKVAHTHVDTLYVRCHYYTWAVLAYTWY
HLA-A02:479 YFAMYGEKVAHSHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:48 YFAMYEEKVAHTDVDTLYVRYHYYTWAVLAYTWY
HLA-A02:480 YFAMYGEKVAHTHVDTLYVRYHYYTWAEWAYTWY
HLA-A02:481 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:482 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:483 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:484 YYAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:485 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:486 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:487 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYEWY
HLA-A02:488 YFAMYGEKVAHTHVDTLYVRYHYCTWAVLAYTWY
HLA-A02:489 YYAMYGEKVAHTHVDTLYLRYHYYTWAEWAYTWY
HLA-A02:49 YFAMYGEKVAHTHVDTLYVRYHYYTWAVRAYTWY
HLA-A02:491 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:492 YFAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:493 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:494 YFAMYGEKVAHIDVDTLYVRYHYYTWAVLAYTWY
HLA-A02:495 YYAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:496 YYAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:497 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:498 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:499 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:50 YFAMYGEKVAHTHVDTLYIRYHYYTWAVWAYTWY
HLA-A02:502 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:503 YFAMYGEKVAHIDVDTLYVRYHYYTWAVLAYTWY
HLA-A02:504 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:505 YFAMYGEKVAHTHVDTLYVRYHYYTWAEWAYTWY
HLA-A02:507 YYAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:508 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:509 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:51 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:510 YFAMYGEKVAHTHVDTLYVRYHLYTWAVLAYTWY
HLA-A02:511 YFAMYGEKVAHTHVDTLYVSYHYYTWAVLAYTWY
HLA-A02:512 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:513 YFAMYGEKVAHTHVDTLYVRCHYYTWAVLAYTWY
HLA-A02:515 YFAMYGEKVAHTHMDTLYVRYHYYTWAVLAYTWY
HLA-A02:517 YFAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:518 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:519 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:52 YFAMYGEKVAHTHVDTLYVRYEHYTWAVLAYTWY
HLA-A02:520 YFAMYGEKVAHTHVDTLYVRYYYYTWAVLAYTWY
HLA-A02:521 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:522 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:523 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:524 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:526 YFAMYGEKVAHTHVDTLYVKYHYYTWAVLAYTWY
HLA-A02:527 YYAMYGEKVAHTHVDTLYLRYRDYTWAVWAYTWY
HLA-A02:528 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:529 YFAMYGEKVAHTHVDTLYVRYHYYTWAEWAYMWY
HLA-A02:530 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:531 YFAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:532 YYAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:533 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:534 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:535 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:536 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:537 YFAMYGEKVAHTHVDTLYVRYHYYTWDVLAYTWY
HLA-A02:538 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:539 YFAMYGEKVAHTHVDTLYVRYHYYTLAVLAYTWY
HLA-A02:54 YYAMYGEKVAHTHVDTLYVRYHYYTWAVQAYTGY
HLA-A02:541 YFAMYGEKVAHTHVDTLYVRCHYYTWAELAYTWY
HLA-A02:542 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:543 YFAMYGEKVAHTHVDTLYVRYHYYTWAVQAYRWY
HLA-A02:544 YFAMYGEKVAHTHVDTLYVRCHYYTWAEWAYTWY
HLA-A02:545 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:546 YYAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:547 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLVYTWY
HLA-A02:548 YFAMYGEKVAHTHVDTLYVRHHYYTWAVLAYTWY
HLA-A02:549 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:55 YFAMYRNNVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:550 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:551 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:552 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:553 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:554 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:555 YFAMYGEKVAHTHVDTLYVRYNYYTWAVLAYTWY
HLA-A02:556 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:557 YFAMYGEKVAHTHVDTLYVRYHYYTWAEWAYTWY
HLA-A02:558 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:559 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:56 YFAMYQENVAQTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:560 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYEWY
HLA-A02:561 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:562 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:563 YFAMYGEKVAHTHVDTLYVRYHYYAWAVLAYTWY
HLA-A02:564 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:565 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:566 YFAMYGEKVAHTHVDTLYVRCHYYTWAVLAYTWY
HLA-A02:567 YFAMYGEKVAHTHVDTLYVRCHYYTWAVLAYTWY
HLA-A02:568 YFAMYGEKVAHTHVDTLYVRYHYYTWAEWAYTWY
HLA-A02:569 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:57 YYAMYGEKVAHTHVDTLYLMYHYYTWAVLAYTWY
HLA-A02:570 YFTMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:571 YFAMYEEKVAHTDENIAYVRYHYYTWAVLAYTWY
HLA-A02:572 YYAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:573 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:574 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:575 YFAMYGEKVAHTHVDTLYVRCHYYTWAVLAYTWY
HLA-A02:576 YFAMYGEKVAHTHVDTLYVRYHYYTWVVLAYTWY
HLA-A02:577 YYAMYGEKVAHTHGDTLYLRYHYYTWAVWAYTWY
HLA-A02:578 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:579 YFAMYGEKVAHIDVDTLYVRYHYYTWAVLAYTWY
HLA-A02:58 YFAMYGEKVAHTHVDTLYLRYHYYTWAVLAYTWY
HLA-A02:580 YFAMYGEKVAQTDVDTLYVRYHYYTWAVLAYTWY
HLA-A02:581 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYEWY
HLA-A02:582 YFAMYGEKVAHTHVDTLYVRYRDYTWAVWAYTWY
HLA-A02:583 YFAMYGEKVAHTHVDTLYVRCHYYTWAVLAYTWY
HLA-A02:584 YFAMYGEKVAHIDVDTLYVRYHYYTWAVLAYTWY
HLA-A02:585 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:586 YFAMYGEKVAHTHVDTLYVRCHYYTWAVLAYTWY
HLA-A02:587 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:588 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:589 YFAMYGEKVAHIDVDTLYVRYHYYTWAELAYTWY
HLA-A02:59 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:590 YFAMYGEKVAHTHVDTLYVRYHYYTWAALAYTWY
HLA-A02:591 YYAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:592 YYAMYGEKVAHTHVDTLYVRYHYYTWAELAYTWY
HLA-A02:593 YYAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:594 YFAMYGEKVAHTHVDTLYVRYNFYTWAVLAYTWY
HLA-A02:595 YFAMYGEKVAHTHVDTLYVRYHYYTWAEWAYTWY
HLA-A02:596 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:597 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:598 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:599 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:60 YFAMYGEKVAHTHVDTLYVRYHFYTWAVLAYTWY
HLA-A02:600 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:601 YFAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:602 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:603 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:604 YFAMYGEKVAHTHVDTLYVRIHYYTWAVLAYTWY
HLA-A02:606 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:607 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:609 YFAMYGENMAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:61 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:610 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:611 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:612 YFAMYGEKVAHTHVDTLYVRYHYYTWAEWAYTWY
HLA-A02:613 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:614 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:615 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:616 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:617 YFAMYGEKVAHTHVDTLYLMFHYYTWAVLAYTWY
HLA-A02:619 YFAMYGEKVAHTHVDTLYVRYHYYTWAELAYTWY
HLA-A02:62 YFAMYGENVAQTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:620 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:621 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:623 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:624 YFAMYGEKVAHTHVDTLCVRYHYYTWAVLAYTWY
HLA-A02:625 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:626 YYAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:627 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:628 YFAMYGEKVAHTHVDTLYVRFHYYTWAVLAYTWY
HLA-A02:629 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:63 YFAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:630 YYAMYGEKVAHTHVDTLYVRFHYYTWAVQAYTWY
HLA-A02:631 YYAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:632 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:633 YFAMYGEKVAHTHVDTLYVRYHYYTWAEWAYTWY
HLA-A02:634 YFAMYGENVAHTHVDTLYVRYHYYTWAEWAYTWY
HLA-A02:635 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:636 YFAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:637 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:638 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:639 YFAMYGEKVAHTHVDILYVRYHYYTWAVLAYTWY
HLA-A02:64 YFAMYGEKVAHTHVDTLYVRYHSYTWAVLAYTWY
HLA-A02:640 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:641 YFAMYGEKVAHTHVDTLYVRYHYYTWAEWAYTWY
HLA-A02:642 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:644 YFAMYRNNVAHTDANTLYVRYHYYTWAVLAYTWY
HLA-A02:645 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:646 YYAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:647 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:648 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:649 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:65 YFAMYGEKVAHTHVDTLYIMYQDYTWAVLAYTWY
HLA-A02:650 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:651 YFAMYGEKVAHTHVDTLYVRYHYYTWAVWAYTWY
HLA-A02:652 YFAMYGEKVAHTHVDTLNVRCHYYTWAVLAYTWY
HLA-A02:653 YFAMYGEKVAHTHVDTLHVRYHYYTWAVLAYTWY
HLA-A02:654 YFAMYGEKVAHTHVDTLYVRYHYYTCAVLAYTWY
HLA-A02:655 YYAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:656 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:657 YFAMYGEKVAHTHVDTLYLMFHYYTWAVLAYTWY
HLA-A02:658 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:659 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:66 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:660 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:661 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:662 YFAMYGEKVAHTHVDTLYVRYRDYTWAAQAYTWY
HLA-A02:663 YFAMYGEKVAYTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:664 YFAMYGEKVAHTHVDTLYVMYHYYTWAVLAYTWY
HLA-A02:665 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:666 YSAMYGEKVAHTHVDTLYVRYHYYTWAEWAYTWY
HLA-A02:667 YFAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:668 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:669 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:67 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:670 YYAMYGEKVAHTHVDTLHLRYHYYTWAVWAYTWY
HLA-A02:671 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:673 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:674 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:676 YYAMYGEKVAHTHVDTLYLRYHSYTWAVWAYTWY
HLA-A02:677 YFAMYGEKVDHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:678 YFAMYGEKVAHTHVDTLYVRCHSYTWAVLAYTWY
HLA-A02:679 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:68 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:680 YFAMYGEKVAHTHVDTLYLMFHYYTWAVWAYTWY
HLA-A02:681 YFAMYGEKVAHTHVDTLYVRYRYYTWAVLAYTWY
HLA-A02:682 YFAMYGEKVAHTHVDTLYVRYHYYTWVARAYTWY
HLA-A02:683 YFAMYGEKVAHTHVDTLYVRYHYYTWAVRAYTWY
HLA-A02:684 YFAMYGEKVAHTHVDTLYVRYHYYTWAEWAYTWY
HLA-A02:685 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:686 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:687 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:688 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:689 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:69 YFAMYGEKVAHIDVDTLYVRYHYYTWAVLAYTWY
HLA-A02:690 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:692 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:693 YFAMYGEKVAHTHVDTLYVRYHYYTWAVFAYEWY
HLA-A02:694 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:695 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:697 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:698 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:699 YFAMYGEKVAHTHVDTLYVRYHYYTWAGLAYTWY
HLA-A02:70 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:700 YFAMYGEKVAHTHVDTLYVRYHYYTWAVQVYTWY
HLA-A02:701 YFAMYGEKVAHIDVDTLYVRYHYYTWAVLAYTWY
HLA-A02:702 YFAMYGEKVALTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:703 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:704 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:705 YFAMYGEKVAHIDVDTLYVRYHYYTWAVLAYTWY
HLA-A02:706 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:707 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:708 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLANTWY
HLA-A02:709 YFAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:71 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:711 YFAMYGEKVAHTHVDTLYVRYHYYTWAEWAYTWY
HLA-A02:712 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:713 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:714 YFAMYGEKVAHIDVDTLYVRYHYYTWAVLAYTWY
HLA-A02:716 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:717 YFAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:718 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:719 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:72 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:720 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:721 YFAMYGEKVAHTHVDTLYVRCHYYTWAVLAYTWY
HLA-A02:722 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:723 YYAMYGEKVAHTHVDTLYVRYHYYTWAVQAYTGY
HLA-A02:724 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:725 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:726 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:727 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:728 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:729 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:73 YFAMYGEKVAHTHVDTLYIRYHYYTWAVLAYTWY
HLA-A02:730 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:731 YYAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:732 YFAMYGEKVAHTHVYTLYVRYHYYTWAVLAYTWY
HLA-A02:733 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:734 YFAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:735 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:736 YFAMYGEKVAHTHVDTLYVWYHYYTWAVLAYTWY
HLA-A02:737 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:738 YFAMYGEKVVHIDVDTLYVRYHYYTWAVLAYTWY
HLA-A02:739 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:74 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:740 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:741 YFAMYRNKVAQTDVDTLYVRYHYYTWAVLAYTWY
HLA-A02:742 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:743 YFAMYGEKVAHTHVDTLYVRYNYYTWAVLAYTWY
HLA-A02:744 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:745 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:746 YFAMYWEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:747 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:749 YFAMYGEKVAHTDANTLYVRYHYYTWAVLAYTWY
HLA-A02:75 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:750 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:751 YFAMYGEKVAHIDVDTLYVRYHYYTWAVLAYTWY
HLA-A02:752 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:753 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:754 YFAMYGEKVAHIDVDTLYVRYHYYTWAVLAYTWY
HLA-A02:755 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:756 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:757 YFAMYGEKVAHIDVDTLYVRYHYYTWAVLAYTWY
HLA-A02:758 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:759 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:76 YSAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:761 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:762 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:763 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:764 YFAMYGEKVAHTHVDTLYVRCHYYTWAVLAYTWY
HLA-A02:765 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:766 YYAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:767 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:768 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:769 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:77 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:770 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:771 YFAMYGEKVAHTHVDTLYVRCHYYTWAVLAYTWY
HLA-A02:772 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:774 YFAMYGEKVAHTHVDTLYVRYHYYTWAEWAYTWY
HLA-A02:776 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:777 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:778 YYAMYGEKVAHNHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:779 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:78 YYAMYQENVAQTDVDTLYVRYHYYTWAVLAYTWY
HLA-A02:780 YFAMYGEQVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:781 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:782 YFAMYGEKVAHTHVDTLYVRYHYYTWAELAYTWY
HLA-A02:783 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:784 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:785 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:786 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:787 YFAMYGEKVVHTHVDTLYVRCHYYTWAVLAYTWY
HLA-A02:79 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:790 YYAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:794 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:795 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:798 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:799 YFAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:80 YFAMYGEKVAHTHVDTLYVRYQDYTWAVLAYTWY
HLA-A02:800 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:801 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:802 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:804 YFAMYGEKVAHTHVDTLYLMFHYYTWAVQAYTGY
HLA-A02:808 YFAMYGEKVAHIDVDTLYVRYHYYTWAVLAYTWY
HLA-A02:809 YFAMYGEKVAHTHVDTLYVRYHYYTWAEWAYTWY
HLA-A02:81 YFAMYGEKVAHTDESIAYVRYHYYTWAVLAYTWY
HLA-A02:810 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:811 YFAMYGEKVAHTHVDTLYVRYHYYTWAVFAYTWY
HLA-A02:812 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:813 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:814 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:815 YFAMYRNNVAHIDVDTLYVRYHYYTWAVLAYTWY
HLA-A02:816 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:817 YYAMYGEKVAHTHVDTLYLRYHYYTWAVWAYTWY
HLA-A02:818 YYAMYGEKVAHTHVDTLYLRYHYYTWAVLAYTWY
HLA-A02:819 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:820 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:821 YFAMYGEKVAHIDVDTLYVRYHYYTWAVLAYTWY
HLA-A02:822 YFAMYGEKVAHTHVDTLYVRCHYYTWAVLAYTWY
HLA-A02:823 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:824 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:825 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:84 YYAMYGEKVAHTHVDTLYFRYHYYTWAVLAYTWY
HLA-A02:85 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:86 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:87 YFAMYGEKVAHTDENIAYVRYHYYTWAVLAYTWY
HLA-A02:89 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:90 YFAMYGEKVAHTDVDTLYVRYHYYTWAVLAYTWY
HLA-A02:91 YYAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:92 YFAMYEEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:93 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:95 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:96 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:97 YFAMYGEKVAHTHVDTLYVRYHYYTWAVLAYTWY
HLA-A02:99 YYAMYGEKVAHTHVDTLYVRYHYYTWAELAYTWY
HLA-A0301 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A0302 YFAMYQENVAQTDVDTLYIIYRDYTWAVQAYTWY
HLA-A0303 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A0304 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A0305 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A0306 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A0307 YFAMYQENVAQTDVDTLYIIYRDYTWAVLAYTWY
HLA-A0308 YFAMYQENVAHTDVDTLYIIYRDYTWAELAYTWY
HLA-A0309 YFAMYQENVAQTHVDTLYIIYRDYTWAELAYTWY
HLA-A0310 YFAMYQENVAQTDVDTLYIIYRDYTWAVQAYTWY
HLA-A0312 YYAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A0313 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A0314 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A0315 YFAMYQENVAQTDVDTLYIIFRDYTWAELAYTWY
HLA-A0316 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A0317 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A0318 YFAMYQENVAQTDVDTLYIIYRDYTWVARVYRGY
HLA-A0319 YFAMYQENVAQTDVDTLYIIFHYYTWAELAYTWY
HLA-A0320 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A0321 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A0322 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A0323 YFAMYGEKVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A0324 YFAMYRNNVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A0325 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A0326 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A0327 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A0328 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A0329 YFAMYQENVVQTDVDTLYIIYRDYTWAELAYTWY
HLA-A0330 YFAMYEEKVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:01 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:02 YFAMYQENVAQTDVDTLYIIYRDYTWAVQAYTWY
HLA-A03:04 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:05 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:06 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:07 YFAMYQENVAQTDVDTLYIIYRDYTWAVLAYTWY
HLA-A03:08 YFAMYQENVAHTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:09 YFAMYQENVAQTHVDTLYIIYRDYTWAELAYTWY
HLA-A03:10 YFAMYQENVAQTDVDTLYIIYRDYTWAVQAYTWY
HLA-A03:100 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:101 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:102 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:103 YFAMYQENVAQTDVDTLYIIYQDYTWAELAYTWY
HLA-A03:104 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWH
HLA-A03:105 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYRWY
HLA-A03:106 YFAMYQENVAQTDVDTLYIIYRDYTWAVQAYTWY
HLA-A03:107 YFAMYQENMAHTDANTLYIIYRDYTWAELAYTWY
HLA-A03:108 YFAMYQENVAHTHVDTLYIIYRDYTWAELAYTWY
HLA-A03:109 YFAMYQENVAQTDVHTLYIIYRDYTWAELAYTWY
HLA-A03:110 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:111 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:112 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:113 YFAMYQEKVAQTDVDTLYIIYRDYTWAVQAYTWY
HLA-A03:114 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:115 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:116 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:117 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:118 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:119 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:12 YYAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:120 YFAMYQENVAQTDVDTLYIIYRDCTWAELAYTWY
HLA-A03:121 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:122 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTGY
HLA-A03:123 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:124 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:125 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:126 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:127 YFAMYQENVAQTDVDTLYIIYRDYTWAALAYTWY
HLA-A03:128 YFAMYQENVAQTDLDTLYIIYRDYTWAELAYTWY
HLA-A03:13 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:130 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:131 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:132 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:133 YFAMYQENVAQTDVDTLYIIYRDYTWAVLAYTWY
HLA-A03:134 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:135 YFAMYQENVAQTDVDTLYIIYRDYTWAERVYRGY
HLA-A03:136 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:137 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:138 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:139 YFAMYQENVAQTDVDTLYIIYRDYTWAKLAYTWY
HLA-A03:14 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:140 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:141 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:142 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:143 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:144 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:145 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:146 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:147 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:148 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:149 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:15 YFAMYQENVAQTDVDTLYIIFRDYTWAELAYTWY
HLA-A03:150 YFAMYQENVAQTDVDTLYIIYRDYTWAELVYTWY
HLA-A03:151 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:152 YFAMYEEKVAHTDENIAYIIYRDYTWAELAYTWY
HLA-A03:153 YFAMYQENVAQTDVDTLYIIYRDYTWAERVYTWY
HLA-A03:154 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:155 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:156 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:157 YFAMYQEKVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:158 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:159 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:16 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:160 YFAMYQENVAQTDVDTLYIIYRDYTWAVQAYTWY
HLA-A03:163 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:164 YFAMYQENMAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:165 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:166 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:167 YFAMYQENVAQTDVDTLYIIYRDYTWAEQAYTGY
HLA-A03:169 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:17 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:170 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:171 YFAMYQENVAQTDVDTLYIIYRDYTWAVLAYTWY
HLA-A03:172 YFAMYQEKVAHTHVDTLYIIYRDYTWAELAYTWY
HLA-A03:173 YFAMYQENVAQTDEDTLYIIYRDYTWAELAYTWY
HLA-A03:174 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:175 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:176 YFAMYQEKVAHTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:177 YFAMYQENVAQTDVDTLYIRYRDYTWAELAYTWY
HLA-A03:179 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:18 YFAMYQENVAQTDVDTLYIIYRDYTWVARVYRGY
HLA-A03:180 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:181 YFAMYQENVAQTDVDSLYIIYRDYTWAELAYTWY
HLA-A03:182 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:183 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:184 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:185 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:186 YFAMYQENVAQTDVDTLYIIYEHYTWAELAYTWY
HLA-A03:187 YFAMYQENVAQTDVDTLYIIYRDYTWVARVYTWY
HLA-A03:188 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:189 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:19 YFAMYQENVAQTDVDTLYIIFHYYTWAELAYTWY
HLA-A03:190 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:191 YFAMYQENVAQTDVDTLYIIYGDYTWAELAYTWY
HLA-A03:193 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:195 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:196 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:198 YFAMYGEKVAHTHVDTLYIIYRDYTWAVQAYTWY
HLA-A03:199 YFAMYQENVAQSDVDTLYIIYRDYTWAELAYTWY
HLA-A03:20 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:201 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:202 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:203 YFAMYQENVAQSDVDTLYIIYRDYTWAELAYTWY
HLA-A03:204 YFAMYQENVAQTDVDTLYMVYRDYTWAELAYTWY
HLA-A03:205 YTAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:206 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:207 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:208 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYEWY
HLA-A03:209 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:210 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:211 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:212 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:213 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:214 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:215 YFAMYQENVAQTDVDTLYIMYRDYTWAELAYTWY
HLA-A03:216 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:217 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:218 YFAMYQENVAQTDVDTLYIIYRDYTWAVQAYTWY
HLA-A03:219 YFAMYQENVAQTDENIAYIIYRDYTWAELAYTWY
HLA-A03:22 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:220 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:221 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:222 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:223 YFAMYQENVAQTDVDTLYIIYRDYTWAVQAYTWY
HLA-A03:224 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:225 YFAMYQENVAQTDVDTLYIIYRDYTWAERAYTWY
HLA-A03:226 YFAMYQENVAQTDVDTLYIIYPDYTWAELAYTWY
HLA-A03:227 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:228 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:229 YFAMYQENVAQTDVDTLYIIYRDYTWAEQAYTWY
HLA-A03:23 YFAMYGEKVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:230 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:231 YFAMYQENVAQTDVDTLYIIYRDYTWARLAYTWY
HLA-A03:232 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:233 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:235 YFAMYQENVAQTDVDTLYNIYRDYTWAELAYTWY
HLA-A03:236 YFAMYQENVAQTDVDTLYIIYGDYTWAVQAYTWY
HLA-A03:237 YFAMYQENVAQTDVDTLYIIYRDYTWAVQAYTWY
HLA-A03:238 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:239 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:24 YFAMYRNNVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:240 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:241 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:242 YFAMYQENVAQTDVDTLYIIYRDYTWAVQAYTWY
HLA-A03:243 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:244 YFAMYQENVAQTDVDTLYIIYRDYTWAVQAYTWY
HLA-A03:245 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:246 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:247 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:248 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:249 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:25 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:250 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:251 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:252 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:253 YFAMYQENVAQTDVDTLYIIYRDYTWAVQAYTWY
HLA-A03:254 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:255 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:256 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:257 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:258 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:259 YFAMYQENVAQTYVDTLYIIYRDYTWAELAYTWY
HLA-A03:26 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:260 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:261 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:263 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:264 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:265 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:267 YFAMYQENVAQTDVNTLYIIYRDYTWAELAYTWY
HLA-A03:268 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:27 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:270 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:271 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:272 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:273 YFAMYEEKVAHTDENTLYIIYRDYTWAELAYTWY
HLA-A03:274 YFAMYQENVAQTDVDTLYIIYRDYTWAVQAYTWY
HLA-A03:276 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:277 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:278 YFAMYLQNVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:28 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:280 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:281 YFAMYQENVAQTDVDTLYIIYRDYTWAVQAYTWY
HLA-A03:282 YFAMYQENVAQTDVDTLYIIYQDYTWAELAYTWY
HLA-A03:285 YFAMYQENVAQTDVDTLYIIYRDYTWAVQAYTWY
HLA-A03:287 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:288 YFAMYQENVAQTDVDTLYMIYRDYTWAELAYTWY
HLA-A03:289 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:29 YFAMYQENVVQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:290 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:291 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:292 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:293 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:294 YFAMYQENVAQTDVDTLYIIYRDYIWAELAYTWY
HLA-A03:295 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:296 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:298 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:299 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:30 YFAMYEEKVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:300 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:301 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:302 YFAMYQENVAQTDVDTLYIIYRDYTWAELAYTWY
HLA-A03:303 YFAMYEENVAQTDVDTLYIIYRDYTWAELAY
Download .txt
gitextract_jfht9tp3/

├── ChatDrug/
│   ├── TAPE_benchmark/
│   │   ├── __init__.py
│   │   ├── datasets.py
│   │   ├── metrics.py
│   │   ├── models.py
│   │   └── trainer.py
│   └── task_and_evaluation/
│       ├── Conversational_LLMs_utils.py
│       ├── __init__.py
│       ├── peptide_editing.py
│       ├── prompt_specification.json
│       ├── protein_editing.py
│       └── small_molecule_editing.py
├── ChatDrug_demo.ipynb
├── README.md
├── data/
│   ├── README.md
│   ├── peptide/
│   │   ├── class1_pseudosequences.csv
│   │   ├── peptide_editing.json
│   │   ├── peptide_editing_threshold.json
│   │   ├── preprocess_step_1_data_extraction.py
│   │   ├── preprocess_step_2_single_prop.py
│   │   ├── preprocess_step_3_multi_prop.py
│   │   └── selected_alleles.txt
│   └── small_molecule/
│       └── small_molecule_editing.txt
├── main_ChatDrug.py
├── main_InContext.py
├── setup.py
└── utils.py
Download .txt
SYMBOL INDEX (159 symbols across 12 files)

FILE: ChatDrug/TAPE_benchmark/datasets.py
  function dataset_factory (line 16) | def dataset_factory(data_file: Union[str, Path], *args, **kwargs) -> Dat...
  function pad_sequences (line 32) | def pad_sequences(sequences: Sequence, constant_value=0, dtype=None) -> ...
  class LMDBDataset (line 51) | class LMDBDataset(Dataset):
    method __init__ (line 52) | def __init__(self, data_file, in_memory):
    method __len__ (line 67) | def __len__(self):
    method __getitem__ (line 70) | def __getitem__(self, index):
  class DataProcessor (line 83) | class DataProcessor:
    method get_train_examples (line 85) | def get_train_examples(self, data_dir):
    method get_dev_examples (line 89) | def get_dev_examples(self, data_dir):
    method get_test_examples (line 93) | def get_test_examples(self, data_dir):
    method get_labels (line 97) | def get_labels(self):
  class FluorescenceProgress (line 102) | class FluorescenceProgress(DataProcessor):
    method __init__ (line 103) | def __init__(self, tokenizer):
    method get_train_examples (line 107) | def get_train_examples(self, data_dir, in_memory=True):
    method get_dev_examples (line 111) | def get_dev_examples(self, data_dir, in_memory=True):
    method get_test_examples (line 115) | def get_test_examples(self, data_dir, data_cat, in_memory=True):
    method get_labels (line 122) | def get_labels(self):
  class SecondaryStructureProcessor3 (line 126) | class SecondaryStructureProcessor3(DataProcessor):
    method __init__ (line 127) | def __init__(self, tokenizer):
    method get_train_examples (line 131) | def get_train_examples(self, data_dir, in_memory=True):
    method get_dev_examples (line 135) | def get_dev_examples(self, data_dir, in_memory=True):
    method get_test_examples (line 139) | def get_test_examples(self, data_dir, data_cat, in_memory=True):
    method get_labels (line 143) | def get_labels(self):
  class SecondaryStructureProcessor8 (line 147) | class SecondaryStructureProcessor8(DataProcessor):
    method __init__ (line 148) | def __init__(self, tokenizer):
    method get_train_examples (line 152) | def get_train_examples(self, data_dir, in_memory=True):
    method get_dev_examples (line 156) | def get_dev_examples(self, data_dir, in_memory=True):
    method get_test_examples (line 160) | def get_test_examples(self, data_dir, data_cat, in_memory=True):
    method get_labels (line 164) | def get_labels(self):
  class ContactProgress (line 168) | class ContactProgress(DataProcessor):
    method __init__ (line 169) | def __init__(self, tokenizer):
    method get_train_examples (line 173) | def get_train_examples(self, data_dir, in_memory=True):
    method get_dev_examples (line 177) | def get_dev_examples(self, data_dir, in_memory=True):
    method get_test_examples (line 181) | def get_test_examples(self, data_dir, data_cat, in_memory=True):
    method get_labels (line 188) | def get_labels(self):
  class StabilityProgress (line 192) | class StabilityProgress(DataProcessor):
    method __init__ (line 193) | def __init__(self, tokenizer):
    method get_train_examples (line 197) | def get_train_examples(self, data_dir, in_memory=True):
    method get_dev_examples (line 201) | def get_dev_examples(self, data_dir, in_memory=True):
    method get_test_examples (line 205) | def get_test_examples(self, data_dir, data_cat, in_memory=True):
    method get_labels (line 212) | def get_labels(self):
  class RemoteHomologyProgress (line 216) | class RemoteHomologyProgress(DataProcessor):
    method __init__ (line 217) | def __init__(self, tokenizer):
    method get_train_examples (line 221) | def get_train_examples(self, data_dir, in_memory=True):
    method get_dev_examples (line 225) | def get_dev_examples(self, data_dir, in_memory=True):
    method get_test_examples (line 229) | def get_test_examples(self, data_dir, data_cat, in_memory=True):
    method get_labels (line 236) | def get_labels(self):
  class ProteinnetDataset (line 240) | class ProteinnetDataset(Dataset):
    method __init__ (line 242) | def __init__(self,
    method __len__ (line 257) | def __len__(self) -> int:
    method __getitem__ (line 260) | def __getitem__(self, index: int):
    method collate_fn (line 285) | def collate_fn(self, batch):
  class FluorescenceDataset (line 299) | class FluorescenceDataset(Dataset):
    method __init__ (line 300) | def __init__(self, file_path, split, tokenizer):
    method get_data (line 311) | def get_data(self, file):
    method __len__ (line 319) | def __len__(self):
    method __getitem__ (line 322) | def __getitem__(self, index):
    method collate_fn (line 333) | def collate_fn(self, batch):
  class StabilityDataset (line 344) | class StabilityDataset(Dataset):
    method __init__ (line 345) | def __init__(self, file_path, split, tokenizer):
    method get_data (line 356) | def get_data(self, path):
    method __getitem__ (line 364) | def __getitem__(self, index):
    method __len__ (line 375) | def __len__(self):
    method collate_fn (line 378) | def collate_fn(self, batch):
  class RemoteHomologyDataset (line 389) | class RemoteHomologyDataset(Dataset):
    method __init__ (line 390) | def __init__(self, file_path, split, tokenizer):
    method get_data (line 404) | def get_data(self, file):
    method __len__ (line 412) | def __len__(self):
    method __getitem__ (line 415) | def __getitem__(self, index):
    method collate_fn (line 426) | def collate_fn(self, batch):
  class SecondaryStructureDataset3 (line 437) | class SecondaryStructureDataset3(Dataset):
    method __init__ (line 438) | def __init__(
    method __len__ (line 455) | def __len__(self):
    method __getitem__ (line 458) | def __getitem__(self, index: int):
    method collate_fn (line 473) | def collate_fn(self, batch):
  class SecondaryStructureDataset8 (line 486) | class SecondaryStructureDataset8(Dataset):
    method __init__ (line 487) | def __init__(
    method __len__ (line 502) | def __len__(self):
    method __getitem__ (line 505) | def __getitem__(self, index: int):
    method collate_fn (line 520) | def collate_fn(self, batch):

FILE: ChatDrug/TAPE_benchmark/metrics.py
  function accuracy_score_remote (line 10) | def accuracy_score_remote(y_true, y_pred):
  function spearmanr (line 22) | def spearmanr(target: Sequence[float],
  function compute_accuracy_metrics (line 29) | def compute_accuracy_metrics(task_name, preds, labels):
  function compute_spearmanr_metrics (line 38) | def compute_spearmanr_metrics(task_name, preds, labels):
  function simple_accuracy (line 48) | def simple_accuracy(preds, labels):
  function bt_compute_metrics (line 52) | def bt_compute_metrics(task_name, preds, labels):
  function build_compute_metrics_fn (line 62) | def build_compute_metrics_fn(task_name: str, output_type: str) -> Callab...

FILE: ChatDrug/TAPE_benchmark/models.py
  class PairwiseContactPredictionHead (line 9) | class PairwiseContactPredictionHead(nn.Module):
    method __init__ (line 11) | def __init__(self, hidden_size: int, ignore_index=-100):
    method forward (line 17) | def forward(self, inputs, sequence_lengths, targets=None):
    method compute_precision_at_l5 (line 37) | def compute_precision_at_l5(self, sequence_lengths, prediction, labels):
  class BertForOntoProteinContactPrediction (line 56) | class BertForOntoProteinContactPrediction(BertPreTrainedModel):
    method __init__ (line 57) | def __init__(self, config, mean_output):
    method forward (line 68) | def forward(self, input_ids, protein_length, attention_mask=None, labe...
  class BertForSequenceClassification2 (line 83) | class BertForSequenceClassification2(BertPreTrainedModel):
    method __init__ (line 84) | def __init__(self, config, mean_output):
    method forward (line 95) | def forward(
  function load_adam_optimizer_and_scheduler (line 169) | def load_adam_optimizer_and_scheduler(model, args, train_dataset):
  class BertForTokenClassification2 (line 179) | class BertForTokenClassification2(BertPreTrainedModel):
    method __init__ (line 180) | def __init__(self, config, mean_output):
    method forward (line 189) | def forward(

FILE: ChatDrug/TAPE_benchmark/trainer.py
  class OntoProteinTrainer (line 15) | class OntoProteinTrainer(Trainer):
    method prediction_step (line 17) | def prediction_step(
    method prediction_loop (line 58) | def prediction_loop(self, dataloader: DataLoader, description: str, pr...
    method evaluation_loop (line 145) | def evaluation_loop(

FILE: ChatDrug/task_and_evaluation/Conversational_LLMs_utils.py
  function complete (line 9) | def complete(messages, model, tokenizer, conversational_LLM, drug_type, ...
  function complete_chatgpt (line 27) | def complete_chatgpt(messages):
  function complete_galactica_molecule (line 55) | def complete_galactica_molecule(
  function complete_galactica_peptide (line 94) | def complete_galactica_peptide(
  function complete_galactica_protein (line 133) | def complete_galactica_protein(
  function format_tokens (line 179) | def format_tokens(dialogs, tokenizer):
  function complete_llama (line 228) | def complete_llama(

FILE: ChatDrug/task_and_evaluation/__init__.py
  function task_to_drug (line 7) | def task_to_drug(task):
  function get_task_specification_dict (line 18) | def get_task_specification_dict(task):
  function parse (line 29) | def parse(task, input_drug, generated_text, addition_drug=None):
  function evaluate (line 40) | def evaluate(input_drug, generated_drug, task, constraint, threshold_dict):

FILE: ChatDrug/task_and_evaluation/peptide_editing.py
  function parse_peptide (line 56) | def parse_peptide(input_peptide, raw_text, retrieval_sequence):
  function evaluate_peptide (line 77) | def evaluate_peptide(input_peptide_sequence_list, output_peptide_sequenc...
  function load_allele2protein_sequence (line 91) | def load_allele2protein_sequence(file_path):
  function load_selected_allele_list (line 105) | def load_selected_allele_list(file_path):
  function load_raw_allele2peptide (line 113) | def load_raw_allele2peptide(file_path):
  function load_processed_allele2peptide (line 125) | def load_processed_allele2peptide(file_path):

FILE: ChatDrug/task_and_evaluation/protein_editing.py
  function load_ProteinDT_model (line 11) | def load_ProteinDT_model(input_model_path, chache_dir, mean_output, num_...
  function parse_protein (line 46) | def parse_protein(input_protein, raw_text, retrieval_sequence):
  function pad_sequences (line 63) | def pad_sequences(sequences, constant_value=0, dtype=None) -> np.ndarray:
  class ProteinSecondaryStructureDataset (line 82) | class ProteinSecondaryStructureDataset(Dataset):
    method __init__ (line 83) | def __init__(self, data_path, tokenizer, target='ss3'):
    method __len__ (line 124) | def __len__(self):
    method __getitem__ (line 127) | def __getitem__(self, index: int):
    method collate_fn (line 141) | def collate_fn(self, batch):
  function tokenize_one_sequence (line 151) | def tokenize_one_sequence(tokenizer, protein_sequence):
  function tokenize_sequences (line 158) | def tokenize_sequences(tokenizer, sequence_list, labels):
  function evaluate_result (line 179) | def evaluate_result(input_protein_sequence, output_protein_sequence, lab...
  class ProteinListDataset (line 209) | class ProteinListDataset(Dataset):
    method __init__ (line 210) | def __init__(self, protein_sequence_list, tokenizer, task_id):
    method __len__ (line 216) | def __len__(self):
    method __getitem__ (line 219) | def __getitem__(self, index: int):
    method collate_fn (line 228) | def collate_fn(self, batch):
  function evaluate_pairwise_list_result (line 238) | def evaluate_pairwise_list_result(input_protein_list, output_protein_lis...
  function evaluate_fast_protein_dict (line 280) | def evaluate_fast_protein_dict(input_protein_list, task_id, device="cuda"):
  function evaluate_fast_protein (line 318) | def evaluate_fast_protein(input_protein_list, output_protein_list, task_...

FILE: ChatDrug/task_and_evaluation/small_molecule_editing.py
  function parse_molecule (line 70) | def parse_molecule(input_sequence, raw_text, retrieval_sequence):
  function evaluate_molecule (line 87) | def evaluate_molecule(input_SMILES, output_SMILES, task_id, threshold_li...

FILE: main_ChatDrug.py
  function conversation (line 13) | def conversation(messages, model, tokenizer, conversational_LLM, C, roun...
  function ReDF (line 67) | def ReDF(messages, conversational_LLM, round_index, task, drug_type, inp...
  function main (line 102) | def main(args):

FILE: main_InContext.py
  function main (line 9) | def main(args):

FILE: utils.py
  function construct_PDDS_prompt (line 15) | def construct_PDDS_prompt(task_specification_dict, input_drug, drug_type...
  function construct_PDDS_prompt_galactica (line 34) | def construct_PDDS_prompt_galactica(task_specification_dict, input_drug,...
  function construct_prompt_incontext (line 55) | def construct_prompt_incontext(task_specification_dict, input_drug, drug...
  function load_dataset (line 75) | def load_dataset(drug_type, task, task_specification_dict):
  function load_retrieval_DB (line 106) | def load_retrieval_DB(task, seed):
  function load_thredhold (line 176) | def load_thredhold(drug_type):
  function sim_molecule (line 188) | def sim_molecule(smile0, smile1):
  function sim_sequence (line 199) | def sim_sequence(task, SEQ1, SEQ2):
  function retrieve_and_feedback (line 207) | def retrieve_and_feedback(task, DB, input_drug, generated_drug, constrai...
  function retrieve_and_feedback_fast_protein (line 230) | def retrieve_and_feedback_fast_protein(task, sim_DB_dict, test_example_d...
  function generate_retrieval_dict (line 246) | def generate_retrieval_dict(task, input_drug_list, DB, saved_file):
  function fast_protein_dict (line 270) | def fast_protein_dict(task, drug_type, fast_protein, input_drug_list, re...
Condensed preview — 26 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (991K chars).
[
  {
    "path": "ChatDrug/TAPE_benchmark/__init__.py",
    "chars": 315,
    "preview": "from ChatDrug.TAPE_benchmark.datasets import dataset_processor_mapping, output_mode_mapping\nfrom ChatDrug.TAPE_benchmark"
  },
  {
    "path": "ChatDrug/TAPE_benchmark/datasets.py",
    "chars": 22443,
    "preview": "from pathlib import Path\nfrom typing import Union\n\nimport pickle as pkl\nimport lmdb\nimport numpy as np\nimport pandas as "
  },
  {
    "path": "ChatDrug/TAPE_benchmark/metrics.py",
    "chars": 3035,
    "preview": "from typing import Sequence, Callable, Dict\n\nimport numpy as np\nimport scipy\nimport torch\nfrom seqeval.metrics import ac"
  },
  {
    "path": "ChatDrug/TAPE_benchmark/models.py",
    "chars": 9861,
    "preview": "from torch import nn\nfrom torch.nn import MSELoss, CrossEntropyLoss, BCEWithLogitsLoss\nfrom transformers import BertPreT"
  },
  {
    "path": "ChatDrug/TAPE_benchmark/trainer.py",
    "chars": 12752,
    "preview": "import collections\nimport warnings\nfrom typing import Tuple, Optional, Union, Dict, Any, List\n\nimport torch\nimport torch"
  },
  {
    "path": "ChatDrug/task_and_evaluation/Conversational_LLMs_utils.py",
    "chars": 9860,
    "preview": "import sys\nimport openai\nimport time\nimport torch\nimport sys\n\nopenai.api_key = YOUR_API_KEY\n\ndef complete(messages, mode"
  },
  {
    "path": "ChatDrug/task_and_evaluation/__init__.py",
    "chars": 3270,
    "preview": "import numpy as np\nfrom .small_molecule_editing import evaluate_molecule, task_specification_dict_molecule, parse_molecu"
  },
  {
    "path": "ChatDrug/task_and_evaluation/peptide_editing.py",
    "chars": 5519,
    "preview": "from collections import defaultdict\nimport re\nimport numpy as np\nfrom mhcflurry import Class1PresentationPredictor\n\nAMIN"
  },
  {
    "path": "ChatDrug/task_and_evaluation/prompt_specification.json",
    "chars": 12138,
    "preview": "{\"task_id\": 101, \"PPDS_prompt\": \"Can you make molecule [input SMILES] more soluble in water? The output molecule should "
  },
  {
    "path": "ChatDrug/task_and_evaluation/protein_editing.py",
    "chars": 14768,
    "preview": "import lmdb\nimport pickle as pkl\nimport numpy as np\nimport torch\nfrom torch.utils.data import Dataset\nimport torch.nn.fu"
  },
  {
    "path": "ChatDrug/task_and_evaluation/small_molecule_editing.py",
    "chars": 9056,
    "preview": "from rdkit import Chem\nfrom rdkit.Chem import AllChem, Descriptors\nimport re\n\nprops = [\"MolLogP\", \"qed\", \"TPSA\", \"NumHAc"
  },
  {
    "path": "ChatDrug_demo.ipynb",
    "chars": 8881,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# ChatDrug Usage Guide\\n\",\n    \"Thi"
  },
  {
    "path": "README.md",
    "chars": 4719,
    "preview": "# Conversational Drug Editing Using Retrieval and Domain Feedback\n\n**ICLR 2024**\n\nAuthors: Shengchao Liu<sup>+</sup>, Ji"
  },
  {
    "path": "data/README.md",
    "chars": 2774,
    "preview": "\n# Dataset and Evaluation Preparation for ChatDrug\n\nFirst please make and go to the `data` folder:\n```\nmkdir -p data\ncd "
  },
  {
    "path": "data/peptide/class1_pseudosequences.csv",
    "chars": 603964,
    "preview": "allele pseudosequence\nBoLA-100901 YYSMYREISENVYGSNLYLLYRDYTWEYLNYRWY\nBoLA-100902 YYSEYREISENVYESNLYLLYRDYTWEYLNYRWY\nBoLA"
  },
  {
    "path": "data/peptide/peptide_editing.json",
    "chars": 175959,
    "preview": "{\"HLA-A*02:01\": [\"SFDGIIAMM\", \"CIRVFLAAR\", \"RLADALQEL\", \"NLDPAVHEV\", \"MALWMRLLP\", \"AIAIPVTVA\", \"KFMSNGEHV\", \"QLPLESDAV\","
  },
  {
    "path": "data/peptide/peptide_editing_threshold.json",
    "chars": 943,
    "preview": "{\"HLA-A*02:01\": 0.5552409130583961, \"HLA-A*03:01\": 0.4955139727872223, \"HLA-A*11:01\": 0.49967627123450165, \"HLA-A*01:01\""
  },
  {
    "path": "data/peptide/preprocess_step_1_data_extraction.py",
    "chars": 3171,
    "preview": "from ChatDrug.task_and_evaluation.peptide_editing import load_allele2protein_sequence, load_selected_allele_list, load_r"
  },
  {
    "path": "data/peptide/preprocess_step_2_single_prop.py",
    "chars": 1258,
    "preview": "from ChatDrug.task_and_evaluation.peptide_editing import load_allele2protein_sequence, load_selected_allele_list, load_r"
  },
  {
    "path": "data/peptide/preprocess_step_3_multi_prop.py",
    "chars": 1470,
    "preview": "from ChatDrug.task_and_evaluation.peptide_editing import load_allele2protein_sequence, load_selected_allele_list, load_r"
  },
  {
    "path": "data/peptide/selected_alleles.txt",
    "chars": 360,
    "preview": "HLA-A*02:01\nHLA-A*03:01\nHLA-A*11:01\nHLA-A*01:01\nHLA-A*24:02\nHLA-A*68:01\nHLA-A*68:02\nHLA-A*29:02\nHLA-A*02:03\nHLA-A*31:01\n"
  },
  {
    "path": "data/small_molecule/small_molecule_editing.txt",
    "chars": 8417,
    "preview": "O=C(NC[C@H]1CCCO1)c1ccccc1N1CCCC1=O\nCn1ccc(C(=O)Nc2sc3c(c2C#N)CCC3)cc1=O\nCCCCn1nc(C(=O)Nc2ccc(N3CCOCC3)nc2)ccc1=O\nCC[C@@"
  },
  {
    "path": "main_ChatDrug.py",
    "chars": 9997,
    "preview": "import json\nimport argparse\nimport sys\nfrom ChatDrug.task_and_evaluation.Conversational_LLMs_utils import complete\nfrom "
  },
  {
    "path": "main_InContext.py",
    "chars": 4534,
    "preview": "import json\nimport argparse\nimport sys\nfrom ChatDrug.task_and_evaluation.Conversational_LLMs_utils import complete\nfrom "
  },
  {
    "path": "setup.py",
    "chars": 285,
    "preview": "from setuptools import setup, find_packages\n\nsetup(name='ChatDrug',\n      description='Source codes for ChatGPT-powered "
  },
  {
    "path": "utils.py",
    "chars": 13702,
    "preview": "import os\nimport json\nimport numpy as np\nimport pandas as pd\n\nfrom rdkit.Chem import Draw, AllChem\nfrom rdkit import Che"
  }
]

About this extraction

This page contains the full source code of the chao1224/ChatDrug GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 26 files (921.3 KB), approximately 475.7k tokens, and a symbol index with 159 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Copied to clipboard!