Repository: GT-Vision-Lab/VQA Branch: master Commit: a013f0043c1e Files: 12 Total size: 10.7 MB Directory structure: gitextract_za4o5544/ ├── .gitignore ├── PythonEvaluationTools/ │ ├── vqaEvalDemo.py │ └── vqaEvaluation/ │ ├── __init__.py │ └── vqaEval.py ├── PythonHelperTools/ │ ├── vqaDemo.py │ └── vqaTools/ │ ├── __init__.py │ └── vqa.py ├── QuestionTypes/ │ ├── abstract_v002_question_types.txt │ └── mscoco_question_types.txt ├── README.md ├── Results/ │ └── OpenEnded_mscoco_train2014_fake_results.json └── license.txt ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ *.pyc *.swp* ================================================ FILE: PythonEvaluationTools/vqaEvalDemo.py ================================================ # coding: utf-8 import sys dataDir = '../../VQA' sys.path.insert(0, '%s/PythonHelperTools/vqaTools' %(dataDir)) from vqa import VQA from vqaEvaluation.vqaEval import VQAEval import matplotlib.pyplot as plt import skimage.io as io import json import random import os # set up file names and paths versionType ='v2_' # this should be '' when using VQA v2.0 dataset taskType ='OpenEnded' # 'OpenEnded' only for v2.0. 'OpenEnded' or 'MultipleChoice' for v1.0 dataType ='mscoco' # 'mscoco' only for v1.0. 'mscoco' for real and 'abstract_v002' for abstract for v1.0. dataSubType ='train2014' annFile ='%s/Annotations/%s%s_%s_annotations.json'%(dataDir, versionType, dataType, dataSubType) quesFile ='%s/Questions/%s%s_%s_%s_questions.json'%(dataDir, versionType, taskType, dataType, dataSubType) imgDir ='%s/Images/%s/%s/' %(dataDir, dataType, dataSubType) resultType ='fake' fileTypes = ['results', 'accuracy', 'evalQA', 'evalQuesType', 'evalAnsType'] # An example result json file has been provided in './Results' folder. [resFile, accuracyFile, evalQAFile, evalQuesTypeFile, evalAnsTypeFile] = ['%s/Results/%s%s_%s_%s_%s_%s.json'%(dataDir, versionType, taskType, dataType, dataSubType, \ resultType, fileType) for fileType in fileTypes] # create vqa object and vqaRes object vqa = VQA(annFile, quesFile) vqaRes = vqa.loadRes(resFile, quesFile) # create vqaEval object by taking vqa and vqaRes vqaEval = VQAEval(vqa, vqaRes, n=2) #n is precision of accuracy (number of places after decimal), default is 2 # evaluate results """ If you have a list of question ids on which you would like to evaluate your results, pass it as a list to below function By default it uses all the question ids in annotation file """ vqaEval.evaluate() # print accuracies print "\n" print "Overall Accuracy is: %.02f\n" %(vqaEval.accuracy['overall']) print "Per Question Type Accuracy is the following:" for quesType in vqaEval.accuracy['perQuestionType']: print "%s : %.02f" %(quesType, vqaEval.accuracy['perQuestionType'][quesType]) print "\n" print "Per Answer Type Accuracy is the following:" for ansType in vqaEval.accuracy['perAnswerType']: print "%s : %.02f" %(ansType, vqaEval.accuracy['perAnswerType'][ansType]) print "\n" # demo how to use evalQA to retrieve low score result evals = [quesId for quesId in vqaEval.evalQA if vqaEval.evalQA[quesId]<35] #35 is per question percentage accuracy if len(evals) > 0: print 'ground truth answers' randomEval = random.choice(evals) randomAnn = vqa.loadQA(randomEval) vqa.showQA(randomAnn) print '\n' print 'generated answer (accuracy %.02f)'%(vqaEval.evalQA[randomEval]) ann = vqaRes.loadQA(randomEval)[0] print "Answer: %s\n" %(ann['answer']) imgId = randomAnn[0]['image_id'] imgFilename = 'COCO_' + dataSubType + '_'+ str(imgId).zfill(12) + '.jpg' if os.path.isfile(imgDir + imgFilename): I = io.imread(imgDir + imgFilename) plt.imshow(I) plt.axis('off') plt.show() # plot accuracy for various question types plt.bar(range(len(vqaEval.accuracy['perQuestionType'])), vqaEval.accuracy['perQuestionType'].values(), align='center') plt.xticks(range(len(vqaEval.accuracy['perQuestionType'])), vqaEval.accuracy['perQuestionType'].keys(), rotation='0',fontsize=10) plt.title('Per Question Type Accuracy', fontsize=10) plt.xlabel('Question Types', fontsize=10) plt.ylabel('Accuracy', fontsize=10) plt.show() # save evaluation results to ./Results folder json.dump(vqaEval.accuracy, open(accuracyFile, 'w')) json.dump(vqaEval.evalQA, open(evalQAFile, 'w')) json.dump(vqaEval.evalQuesType, open(evalQuesTypeFile, 'w')) json.dump(vqaEval.evalAnsType, open(evalAnsTypeFile, 'w')) ================================================ FILE: PythonEvaluationTools/vqaEvaluation/__init__.py ================================================ author='aagrawal' ================================================ FILE: PythonEvaluationTools/vqaEvaluation/vqaEval.py ================================================ # coding=utf-8 __author__='aagrawal' import re # This code is based on the code written by Tsung-Yi Lin for MSCOCO Python API available at the following link: # (https://github.com/tylin/coco-caption/blob/master/pycocoevalcap/eval.py). import sys class VQAEval: def __init__(self, vqa, vqaRes, n=2): self.n = n self.accuracy = {} self.evalQA = {} self.evalQuesType = {} self.evalAnsType = {} self.vqa = vqa self.vqaRes = vqaRes self.params = {'question_id': vqa.getQuesIds()} self.contractions = {"aint": "ain't", "arent": "aren't", "cant": "can't", "couldve": "could've", "couldnt": "couldn't", \ "couldn'tve": "couldn't've", "couldnt've": "couldn't've", "didnt": "didn't", "doesnt": "doesn't", "dont": "don't", "hadnt": "hadn't", \ "hadnt've": "hadn't've", "hadn'tve": "hadn't've", "hasnt": "hasn't", "havent": "haven't", "hed": "he'd", "hed've": "he'd've", \ "he'dve": "he'd've", "hes": "he's", "howd": "how'd", "howll": "how'll", "hows": "how's", "Id've": "I'd've", "I'dve": "I'd've", \ "Im": "I'm", "Ive": "I've", "isnt": "isn't", "itd": "it'd", "itd've": "it'd've", "it'dve": "it'd've", "itll": "it'll", "let's": "let's", \ "maam": "ma'am", "mightnt": "mightn't", "mightnt've": "mightn't've", "mightn'tve": "mightn't've", "mightve": "might've", \ "mustnt": "mustn't", "mustve": "must've", "neednt": "needn't", "notve": "not've", "oclock": "o'clock", "oughtnt": "oughtn't", \ "ow's'at": "'ow's'at", "'ows'at": "'ow's'at", "'ow'sat": "'ow's'at", "shant": "shan't", "shed've": "she'd've", "she'dve": "she'd've", \ "she's": "she's", "shouldve": "should've", "shouldnt": "shouldn't", "shouldnt've": "shouldn't've", "shouldn'tve": "shouldn't've", \ "somebody'd": "somebodyd", "somebodyd've": "somebody'd've", "somebody'dve": "somebody'd've", "somebodyll": "somebody'll", \ "somebodys": "somebody's", "someoned": "someone'd", "someoned've": "someone'd've", "someone'dve": "someone'd've", \ "someonell": "someone'll", "someones": "someone's", "somethingd": "something'd", "somethingd've": "something'd've", \ "something'dve": "something'd've", "somethingll": "something'll", "thats": "that's", "thered": "there'd", "thered've": "there'd've", \ "there'dve": "there'd've", "therere": "there're", "theres": "there's", "theyd": "they'd", "theyd've": "they'd've", \ "they'dve": "they'd've", "theyll": "they'll", "theyre": "they're", "theyve": "they've", "twas": "'twas", "wasnt": "wasn't", \ "wed've": "we'd've", "we'dve": "we'd've", "weve": "we've", "werent": "weren't", "whatll": "what'll", "whatre": "what're", \ "whats": "what's", "whatve": "what've", "whens": "when's", "whered": "where'd", "wheres": "where's", "whereve": "where've", \ "whod": "who'd", "whod've": "who'd've", "who'dve": "who'd've", "wholl": "who'll", "whos": "who's", "whove": "who've", "whyll": "why'll", \ "whyre": "why're", "whys": "why's", "wont": "won't", "wouldve": "would've", "wouldnt": "wouldn't", "wouldnt've": "wouldn't've", \ "wouldn'tve": "wouldn't've", "yall": "y'all", "yall'll": "y'all'll", "y'allll": "y'all'll", "yall'd've": "y'all'd've", \ "y'alld've": "y'all'd've", "y'all'dve": "y'all'd've", "youd": "you'd", "youd've": "you'd've", "you'dve": "you'd've", \ "youll": "you'll", "youre": "you're", "youve": "you've"} self.manualMap = { 'none': '0', 'zero': '0', 'one': '1', 'two': '2', 'three': '3', 'four': '4', 'five': '5', 'six': '6', 'seven': '7', 'eight': '8', 'nine': '9', 'ten': '10' } self.articles = ['a', 'an', 'the' ] self.periodStrip = re.compile("(?!<=\d)(\.)(?!\d)") self.commaStrip = re.compile("(\d)(\,)(\d)") self.punct = [';', r"/", '[', ']', '"', '{', '}', '(', ')', '=', '+', '\\', '_', '-', '>', '<', '@', '`', ',', '?', '!'] def evaluate(self, quesIds=None): if quesIds == None: quesIds = [quesId for quesId in self.params['question_id']] gts = {} res = {} for quesId in quesIds: gts[quesId] = self.vqa.qa[quesId] res[quesId] = self.vqaRes.qa[quesId] # ================================================= # Compute accuracy # ================================================= accQA = [] accQuesType = {} accAnsType = {} print "computing accuracy" step = 0 for quesId in quesIds: for ansDic in gts[quesId]['answers']: ansDic['answer'] = ansDic['answer'].replace('\n', ' ') ansDic['answer'] = ansDic['answer'].replace('\t', ' ') ansDic['answer'] = ansDic['answer'].strip() resAns = res[quesId]['answer'] resAns = resAns.replace('\n', ' ') resAns = resAns.replace('\t', ' ') resAns = resAns.strip() gtAcc = [] gtAnswers = [ans['answer'] for ans in gts[quesId]['answers']] if len(set(gtAnswers)) > 1: for ansDic in gts[quesId]['answers']: ansDic['answer'] = self.processPunctuation(ansDic['answer']) ansDic['answer'] = self.processDigitArticle(ansDic['answer']) resAns = self.processPunctuation(resAns) resAns = self.processDigitArticle(resAns) for gtAnsDatum in gts[quesId]['answers']: otherGTAns = [item for item in gts[quesId]['answers'] if item!=gtAnsDatum] matchingAns = [item for item in otherGTAns if item['answer']==resAns] acc = min(1, float(len(matchingAns))/3) gtAcc.append(acc) quesType = gts[quesId]['question_type'] ansType = gts[quesId]['answer_type'] avgGTAcc = float(sum(gtAcc))/len(gtAcc) accQA.append(avgGTAcc) if quesType not in accQuesType: accQuesType[quesType] = [] accQuesType[quesType].append(avgGTAcc) if ansType not in accAnsType: accAnsType[ansType] = [] accAnsType[ansType].append(avgGTAcc) self.setEvalQA(quesId, avgGTAcc) self.setEvalQuesType(quesId, quesType, avgGTAcc) self.setEvalAnsType(quesId, ansType, avgGTAcc) if step%100 == 0: self.updateProgress(step/float(len(quesIds))) step = step + 1 self.setAccuracy(accQA, accQuesType, accAnsType) print "Done computing accuracy" def processPunctuation(self, inText): outText = inText for p in self.punct: if (p + ' ' in inText or ' ' + p in inText) or (re.search(self.commaStrip, inText) != None): outText = outText.replace(p, '') else: outText = outText.replace(p, ' ') outText = self.periodStrip.sub("", outText, re.UNICODE) return outText def processDigitArticle(self, inText): outText = [] tempText = inText.lower().split() for word in tempText: word = self.manualMap.setdefault(word, word) if word not in self.articles: outText.append(word) else: pass for wordId, word in enumerate(outText): if word in self.contractions: outText[wordId] = self.contractions[word] outText = ' '.join(outText) return outText def setAccuracy(self, accQA, accQuesType, accAnsType): self.accuracy['overall'] = round(100*float(sum(accQA))/len(accQA), self.n) self.accuracy['perQuestionType'] = {quesType: round(100*float(sum(accQuesType[quesType]))/len(accQuesType[quesType]), self.n) for quesType in accQuesType} self.accuracy['perAnswerType'] = {ansType: round(100*float(sum(accAnsType[ansType]))/len(accAnsType[ansType]), self.n) for ansType in accAnsType} def setEvalQA(self, quesId, acc): self.evalQA[quesId] = round(100*acc, self.n) def setEvalQuesType(self, quesId, quesType, acc): if quesType not in self.evalQuesType: self.evalQuesType[quesType] = {} self.evalQuesType[quesType][quesId] = round(100*acc, self.n) def setEvalAnsType(self, quesId, ansType, acc): if ansType not in self.evalAnsType: self.evalAnsType[ansType] = {} self.evalAnsType[ansType][quesId] = round(100*acc, self.n) def updateProgress(self, progress): barLength = 20 status = "" if isinstance(progress, int): progress = float(progress) if not isinstance(progress, float): progress = 0 status = "error: progress var must be float\r\n" if progress < 0: progress = 0 status = "Halt...\r\n" if progress >= 1: progress = 1 status = "Done...\r\n" block = int(round(barLength*progress)) text = "\rFinshed Percent: [{0}] {1}% {2}".format( "#"*block + "-"*(barLength-block), int(progress*100), status) sys.stdout.write(text) sys.stdout.flush() ================================================ FILE: PythonHelperTools/vqaDemo.py ================================================ # coding: utf-8 from vqaTools.vqa import VQA import random import skimage.io as io import matplotlib.pyplot as plt import os dataDir ='../../VQA' versionType ='v2_' # this should be '' when using VQA v2.0 dataset taskType ='OpenEnded' # 'OpenEnded' only for v2.0. 'OpenEnded' or 'MultipleChoice' for v1.0 dataType ='mscoco' # 'mscoco' only for v1.0. 'mscoco' for real and 'abstract_v002' for abstract for v1.0. dataSubType ='train2014' annFile ='%s/Annotations/%s%s_%s_annotations.json'%(dataDir, versionType, dataType, dataSubType) quesFile ='%s/Questions/%s%s_%s_%s_questions.json'%(dataDir, versionType, taskType, dataType, dataSubType) imgDir = '%s/Images/%s/%s/' %(dataDir, dataType, dataSubType) # initialize VQA api for QA annotations vqa=VQA(annFile, quesFile) # load and display QA annotations for given question types """ All possible quesTypes for abstract and mscoco has been provided in respective text files in ../QuestionTypes/ folder. """ annIds = vqa.getQuesIds(quesTypes='how many'); anns = vqa.loadQA(annIds) randomAnn = random.choice(anns) vqa.showQA([randomAnn]) imgId = randomAnn['image_id'] imgFilename = 'COCO_' + dataSubType + '_'+ str(imgId).zfill(12) + '.jpg' if os.path.isfile(imgDir + imgFilename): I = io.imread(imgDir + imgFilename) plt.imshow(I) plt.axis('off') plt.show() # load and display QA annotations for given answer types """ ansTypes can be one of the following yes/no number other """ annIds = vqa.getQuesIds(ansTypes='yes/no'); anns = vqa.loadQA(annIds) randomAnn = random.choice(anns) vqa.showQA([randomAnn]) imgId = randomAnn['image_id'] imgFilename = 'COCO_' + dataSubType + '_'+ str(imgId).zfill(12) + '.jpg' if os.path.isfile(imgDir + imgFilename): I = io.imread(imgDir + imgFilename) plt.imshow(I) plt.axis('off') plt.show() # load and display QA annotations for given images """ Usage: vqa.getImgIds(quesIds=[], quesTypes=[], ansTypes=[]) Above method can be used to retrieve imageIds for given question Ids or given question types or given answer types. """ ids = vqa.getImgIds() annIds = vqa.getQuesIds(imgIds=random.sample(ids,5)); anns = vqa.loadQA(annIds) randomAnn = random.choice(anns) vqa.showQA([randomAnn]) imgId = randomAnn['image_id'] imgFilename = 'COCO_' + dataSubType + '_'+ str(imgId).zfill(12) + '.jpg' if os.path.isfile(imgDir + imgFilename): I = io.imread(imgDir + imgFilename) plt.imshow(I) plt.axis('off') plt.show() ================================================ FILE: PythonHelperTools/vqaTools/__init__.py ================================================ __author__ = 'aagrawal' ================================================ FILE: PythonHelperTools/vqaTools/vqa.py ================================================ __author__ = 'aagrawal' __version__ = '0.9' # Interface for accessing the VQA dataset. # This code is based on the code written by Tsung-Yi Lin for MSCOCO Python API available at the following link: # (https://github.com/pdollar/coco/blob/master/PythonAPI/pycocotools/coco.py). # The following functions are defined: # VQA - VQA class that loads VQA annotation file and prepares data structures. # getQuesIds - Get question ids that satisfy given filter conditions. # getImgIds - Get image ids that satisfy given filter conditions. # loadQA - Load questions and answers with the specified question ids. # showQA - Display the specified questions and answers. # loadRes - Load result file and create result object. # Help on each function can be accessed by: "help(COCO.function)" import json import datetime import copy class VQA: def __init__(self, annotation_file=None, question_file=None): """ Constructor of VQA helper class for reading and visualizing questions and answers. :param annotation_file (str): location of VQA annotation file :return: """ # load dataset self.dataset = {} self.questions = {} self.qa = {} self.qqa = {} self.imgToQA = {} if not annotation_file == None and not question_file == None: print 'loading VQA annotations and questions into memory...' time_t = datetime.datetime.utcnow() dataset = json.load(open(annotation_file, 'r')) questions = json.load(open(question_file, 'r')) print datetime.datetime.utcnow() - time_t self.dataset = dataset self.questions = questions self.createIndex() def createIndex(self): # create index print 'creating index...' imgToQA = {ann['image_id']: [] for ann in self.dataset['annotations']} qa = {ann['question_id']: [] for ann in self.dataset['annotations']} qqa = {ann['question_id']: [] for ann in self.dataset['annotations']} for ann in self.dataset['annotations']: imgToQA[ann['image_id']] += [ann] qa[ann['question_id']] = ann for ques in self.questions['questions']: qqa[ques['question_id']] = ques print 'index created!' # create class members self.qa = qa self.qqa = qqa self.imgToQA = imgToQA def info(self): """ Print information about the VQA annotation file. :return: """ for key, value in self.datset['info'].items(): print '%s: %s'%(key, value) def getQuesIds(self, imgIds=[], quesTypes=[], ansTypes=[]): """ Get question ids that satisfy given filter conditions. default skips that filter :param imgIds (int array) : get question ids for given imgs quesTypes (str array) : get question ids for given question types ansTypes (str array) : get question ids for given answer types :return: ids (int array) : integer array of question ids """ imgIds = imgIds if type(imgIds) == list else [imgIds] quesTypes = quesTypes if type(quesTypes) == list else [quesTypes] ansTypes = ansTypes if type(ansTypes) == list else [ansTypes] if len(imgIds) == len(quesTypes) == len(ansTypes) == 0: anns = self.dataset['annotations'] else: if not len(imgIds) == 0: anns = sum([self.imgToQA[imgId] for imgId in imgIds if imgId in self.imgToQA],[]) else: anns = self.dataset['annotations'] anns = anns if len(quesTypes) == 0 else [ann for ann in anns if ann['question_type'] in quesTypes] anns = anns if len(ansTypes) == 0 else [ann for ann in anns if ann['answer_type'] in ansTypes] ids = [ann['question_id'] for ann in anns] return ids def getImgIds(self, quesIds=[], quesTypes=[], ansTypes=[]): """ Get image ids that satisfy given filter conditions. default skips that filter :param quesIds (int array) : get image ids for given question ids quesTypes (str array) : get image ids for given question types ansTypes (str array) : get image ids for given answer types :return: ids (int array) : integer array of image ids """ quesIds = quesIds if type(quesIds) == list else [quesIds] quesTypes = quesTypes if type(quesTypes) == list else [quesTypes] ansTypes = ansTypes if type(ansTypes) == list else [ansTypes] if len(quesIds) == len(quesTypes) == len(ansTypes) == 0: anns = self.dataset['annotations'] else: if not len(quesIds) == 0: anns = sum([self.qa[quesId] for quesId in quesIds if quesId in self.qa],[]) else: anns = self.dataset['annotations'] anns = anns if len(quesTypes) == 0 else [ann for ann in anns if ann['question_type'] in quesTypes] anns = anns if len(ansTypes) == 0 else [ann for ann in anns if ann['answer_type'] in ansTypes] ids = [ann['image_id'] for ann in anns] return ids def loadQA(self, ids=[]): """ Load questions and answers with the specified question ids. :param ids (int array) : integer ids specifying question ids :return: qa (object array) : loaded qa objects """ if type(ids) == list: return [self.qa[id] for id in ids] elif type(ids) == int: return [self.qa[ids]] def showQA(self, anns): """ Display the specified annotations. :param anns (array of object): annotations to display :return: None """ if len(anns) == 0: return 0 for ann in anns: quesId = ann['question_id'] print "Question: %s" %(self.qqa[quesId]['question']) for ans in ann['answers']: print "Answer %d: %s" %(ans['answer_id'], ans['answer']) def loadRes(self, resFile, quesFile): """ Load result file and return a result object. :param resFile (str) : file name of result file :return: res (obj) : result api object """ res = VQA() res.questions = json.load(open(quesFile)) res.dataset['info'] = copy.deepcopy(self.questions['info']) res.dataset['task_type'] = copy.deepcopy(self.questions['task_type']) res.dataset['data_type'] = copy.deepcopy(self.questions['data_type']) res.dataset['data_subtype'] = copy.deepcopy(self.questions['data_subtype']) res.dataset['license'] = copy.deepcopy(self.questions['license']) print 'Loading and preparing results... ' time_t = datetime.datetime.utcnow() anns = json.load(open(resFile)) assert type(anns) == list, 'results is not an array of objects' annsQuesIds = [ann['question_id'] for ann in anns] assert set(annsQuesIds) == set(self.getQuesIds()), \ 'Results do not correspond to current VQA set. Either the results do not have predictions for all question ids in annotation file or there is atleast one question id that does not belong to the question ids in the annotation file.' for ann in anns: quesId = ann['question_id'] if res.dataset['task_type'] == 'Multiple Choice': assert ann['answer'] in self.qqa[quesId]['multiple_choices'], 'predicted answer is not one of the multiple choices' qaAnn = self.qa[quesId] ann['image_id'] = qaAnn['image_id'] ann['question_type'] = qaAnn['question_type'] ann['answer_type'] = qaAnn['answer_type'] print 'DONE (t=%0.2fs)'%((datetime.datetime.utcnow() - time_t).total_seconds()) res.dataset['annotations'] = anns res.createIndex() return res ================================================ FILE: QuestionTypes/abstract_v002_question_types.txt ================================================ how many what color is the is the where is the what what is are the what is the is there a does the is the woman is the man what is on the is it is the girl is the boy is the dog are they who is what kind of what color are the what is in the what is the man is there what is the woman what are the what is the boy are there what is the girl is this how which how many people are is the cat why is the are will the what type of what is the dog do is she does do the is is the baby are there any is the lady can what animal is where are the is the sun what are they did the what is the cat what is the lady how many clouds are is that is the little girl is he are these how many trees are how many pillows are the people why is the young how many windows are is this a what is the little is the tv how many animals are who how many pictures how many plants are how many birds are what color is what is the baby is anyone what color how many bushes is the old man none of the above ================================================ FILE: QuestionTypes/mscoco_question_types.txt ================================================ how many is the what what color is the what is the is this is this a what is are the what kind of is there a what type of is it what are the where is the is there does the what color are the are these are there which is what is the man is the man are how does this what is on the what does the how many people are what is in the what is this do what are are they what time what sport is are there any is he what color is why where are the what color who is what animal is is the woman is this an do you how many people are in what room is has is this person what is the woman can you why is the is the person what is the color of the what is the person could was is that a what number is what is the name what brand none of the above ================================================ FILE: README.md ================================================ Python API and Evaluation Code for v2.0 and v1.0 releases of the VQA dataset. =================== ## VQA v2.0 release ## This release consists of - Real - 82,783 MS COCO training images, 40,504 MS COCO validation images and 81,434 MS COCO testing images (images are obtained from [MS COCO website] (http://mscoco.org/dataset/#download)) - 443,757 questions for training, 214,354 questions for validation and 447,793 questions for testing - 4,437,570 answers for training and 2,143,540 answers for validation (10 per question) There is only one type of task - Open-ended task ## VQA v1.0 release ## This release consists of - Real - 82,783 MS COCO training images, 40,504 MS COCO validation images and 81,434 MS COCO testing images (images are obtained from [MS COCO website] (http://mscoco.org/dataset/#download)) - 248,349 questions for training, 121,512 questions for validation and 244,302 questions for testing (3 per image) - 2,483,490 answers for training and 1,215,120 answers for validation (10 per question) - Abstract - 20,000 training images, 10,000 validation images and 20,000 MS COCO testing images - 60,000 questions for training, 30,000 questions for validation and 60,000 questions for testing (3 per image) - 600,000 answers for training and 300,000 answers for validation (10 per question) There are two types of tasks - Open-ended task - Multiple-choice task (18 choices per question) ## Requirements ## - python 2.7 - scikit-image (visit [this page](http://scikit-image.org/docs/dev/install.html) for installation) - matplotlib (visit [this page](http://matplotlib.org/users/installing.html) for installation) ## Files ## ./Questions - For v2.0, download the question files from the [VQA download page](http://www.visualqa.org/download.html), extract them and place in this folder. - For v1.0, both real and abstract, question files can be found on the [VQA v1 download page](http://www.visualqa.org/vqa_v1_download.html). - Question files from Beta v0.9 release (123,287 MSCOCO train and val images, 369,861 questions, 3,698,610 answers) can be found below - [training question files](http://visualqa.org/data/mscoco/prev_rel/Beta_v0.9/Questions_Train_mscoco.zip) - [validation question files](http://visualqa.org/data/mscoco/prev_rel/Beta_v0.9/Questions_Val_mscoco.zip) - Question files from Beta v0.1 release (10k MSCOCO images, 30k questions, 300k answers) can be found [here](http://visualqa.org/data/mscoco/prev_rel/Beta_v0.1/Questions_Train_mscoco.zip). ./Annotations - For v2.0, download the annotations files from the [VQA download page](http://www.visualqa.org/download.html), extract them and place in this folder. - For v1.0, for both real and abstract, annotation files can be found on the [VQA v1 download page](http://www.visualqa.org/vqa_v1_download.html). - Annotation files from Beta v0.9 release (123,287 MSCOCO train and val images, 369,861 questions, 3,698,610 answers) can be found below - [training annotation files](http://visualqa.org/data/mscoco/prev_rel/Beta_v0.9/Annotations_Train_mscoco.zip) - [validation annotation files](http://visualqa.org/data/mscoco/prev_rel/Beta_v0.9/Annotations_Val_mscoco.zip) - Annotation files from Beta v0.1 release (10k MSCOCO images, 30k questions, 300k answers) can be found [here](http://visualqa.org/data/mscoco/prev_rel/Beta_v0.1/Annotations_Train_mscoco.zip). ./Images - For real, create a directory with name mscoco inside this directory. For each of train, val and test, create directories with names train2014, val2014 and test2015 respectively inside mscoco directory, download respective images from [MS COCO website](http://mscoco.org/dataset/#download) and place them in respective folders. - For abstract, create a directory with name abstract_v002 inside this directory. For each of train, val and test, create directories with names train2015, val2015 and test2015 respectively inside abstract_v002 directory, download respective images from [VQA download page](http://www.visualqa.org/download.html) and place them in respective folders. ./PythonHelperTools - This directory contains the Python API to read and visualize the VQA dataset - vqaDemo.py (demo script) - vqaTools (API to read and visualize data) ./PythonEvaluationTools - This directory contains the Python evaluation code - vqaEvalDemo.py (evaluation demo script) - vqaEvaluation (evaluation code) ./Results - OpenEnded_mscoco_train2014_fake_results.json (an example of a fake results file for v1.0 to run the demo) - Visit [VQA evaluation page] (http://visualqa.org/evaluation) for more details. ./QuestionTypes - This directory contains the following lists of question types for both real and abstract questions (question types are unchanged from v1.0 to v2.0). In a list, if there are question types of length n+k and length n with the same first n words, then the question type of length n does not include questions that belong to the question type of length n+k. - mscoco_question_types.txt - abstract_v002_question_types.txt ## References ## - [VQA: Visual Question Answering](http://visualqa.org/) - [Microsoft COCO](http://mscoco.org/) ## Developers ## - Aishwarya Agrawal (Virginia Tech) - Code for API is based on [MSCOCO API code](https://github.com/pdollar/coco). - The format of the code for evaluation is based on [MSCOCO evaluation code](https://github.com/tylin/coco-caption). ================================================ FILE: Results/OpenEnded_mscoco_train2014_fake_results.json ================================================ [File too large to display: 10.6 MB] ================================================ FILE: license.txt ================================================ Copyright (c) 2014, Aishwarya Agrawal All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. The views and conclusions contained in the software and documentation are those of the authors and should not be interpreted as representing official policies, either expressed or implied, of the FreeBSD Project.