Repository: dlau/mineye Branch: master Commit: 486e01eb75f3 Files: 10 Total size: 14.8 KB Directory structure: gitextract_aagimesr/ ├── .gitignore ├── README.md ├── img.py ├── package.json ├── pages/ │ ├── home.jsx │ └── home.less ├── server.py ├── static/ │ ├── img/ │ │ └── .gitignore │ └── js/ │ └── .gitignore └── webpack.config.js ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ .DS_Store node_modules bower *.pyc static/img/bank/**/*.jpg .idea bank.db ================================================ FILE: README.md ================================================ # Overview Very simply, this project demonstrates how to match an image to a bank of pre-existing images. It contains a simple front-end and image bank. The python implementation of the image bank can be easily adapted for other applications. The image comparisons use [SURF: Speeded Up Robust Features](http://www.vision.ee.ethz.ch/~surf/eccv06.pdf) which is **scale, orientation, and to some degree affine invariant**. A common problem in managing large numbers of images is detecting *slight* duplicates. Using a library like OpenCV which is widely available across platforms and languages is a great way to detect these duplicates. ![scale orientation invariant](http://i.imgur.com/nFASitk.gif) # Animated description ![animation](http://i.cubeupload.com/8nVjdO.gif) # How it works To add an image to the bank: - Compute SURF descriptors for the image - Concatenate the descriptor to a "mega matrix" of pre-existing ones, making note of it's position. To look up an image: - Compute SURF descriptors for the image - Perform a knn search in the "mega matrix" for the SURF descriptors found above - For all matches, if the two are within a certain distance threshold, we increment a similary value with respect to that candidate by 1. This creates an arbitrary similarity index. - Return the top results The server is implemented using [flask](http://flask.pocoo.org/) and the front end uses [react](http://facebook.github.io/react/) # Install: ## OSX Need to install `opencv` and `imagemagick` (todo: add links) ```sh pip install sqlite3 pip install numpy pip install flask pip install wand pip install flask npm install ``` # Development: compile front end `webpack` watch for changes on front end `webpack --watch` run server: `python server.py` watch for changes on server: uncomment this line in `server.py` `app.debug = True` **note: this is by default on** # Optimization: - The implementation is poorly optimized, there is a rudimentary attempt to distribute the "mega matrix" to take advantage of multiple cores. At any sort of scale, you probably want to look into doing some sort of distributed nearest neighbor search. - By default the server persists the bank data in `bank.db` which is a simple sqlite database with pickled python objects. This is merely for convenience between server restarts. While it is running, the server keeps everything in local memory. # Related projects: - [isk-daemon](https://github.com/ricardocabral/iskdaemon) # Notes: - Tested with around 200k images without issues. - This is only tested on OS X Mavericks, it shouldn't have any problems on linux. It is completely untested on windows. - [A Sample dataset](http://www.vision.caltech.edu/Image_Datasets/Caltech256/). untar it and just POST them all to the server `find -name "*." -exec curl -i -F file=@{} \;` # LICENSE **mineye** source code is released under the **MIT License** The **SURF and SIFT algorithms implemented by OpenCV are patented** You will have to switch out the feature detector for something else. ================================================ FILE: img.py ================================================ import cv2 import numpy import sqlite3 import pickle from datetime import datetime #max number of images in each matrix, for parallel processing DESC_MAX_LEN = 100000 #sqlite db for persistence BANK_FILENAME = 'bank.db' ''' note the licensing issues with using SURF/SIFT, alternatives are FREAK, BRISK for feature detection ''' def get_surf_des(filename): f = cv2.imread(filename) #hessian threshold 800, 64 not 128 surf = cv2.SURF(800, extended=False) kp, des = surf.detectAndCompute(f, None) return kp, des def get_conn(): return sqlite3.connect('bank.db') class _img: def __init__(self): self.imap = [] self.r = 0 self.descs = [] index_params = dict(algorithm=1,trees=4) self.flann = cv2.FlannBasedMatcher(index_params,dict()) def add_image(self, filename, des=None): if des == None: kv, des = get_surf_des(filename) self.imap.append({ 'index_start' : self.r, 'index_end' : self.r + des.shape[0] - 1, 'file_name' : filename }) self.r += des.shape[0] #it's really slow to do a vstack every time, so just maintain a list and #replicate it as a concatenated numpy ndarray every time. an optimization #would be to do a numpy.vstack((self.descs, numpy,array(des))) where self.descs #is a numpy.array self.descs.append(des) def match(self, filename, limit=20): kp, to_match = get_surf_des(filename) img_db = numpy.vstack(numpy.array(self.descs)) #this should be reversed, need to update distance calculation matches = self.flann.knnMatch(img_db, to_match, k=4) sim = dict() for img in self.imap: sim[img['file_name']] = 0 for i in xrange(0, len(matches)): match = matches[i] if match[0].distance < (.6 * match[1].distance): for img in self.imap: if img['index_start'] <= i and img['index_end'] >= i: sim[img['file_name']] += 1 return sim def __len__(self): return len(self.descs) class img: def __init__(self): self.ims = [_img()] self.count = 0 def get_count(self): return self.count def add_image(self, filename, des=None): self.count += 1 self.ims[-1].add_image(filename, des=des) if len(self.ims[-1]) > DESC_MAX_LEN: self.ims.append(_img()) def match(self, filename, limit=20): import multiprocessing.dummy p = multiprocessing.dummy.Pool(10) def f(instance): return instance.match(filename, limit=limit) res = p.map(f, [i for i in self.ims]) sim = dict((k,v) for d in res for (k,v) in d.items()) sorted_sim = sorted(sim.items(), key=lambda x:x[1], reverse=True)[0:limit] sorted_sim = [{'image' : x[0], 'similarity' : x[1]} for x in sorted_sim] sorted_sim = filter(lambda x:x['similarity'] > 5, sorted_sim) return sorted_sim class persisted_img(img): def __init__(self): #optimization, should additionally wrap img once more instead, so it works without persistence img.__init__(self) with get_conn() as conn: c = conn.cursor() c.execute('''CREATE TABLE IF NOT EXISTS descs (filename, des,kp) ''') conn.commit() c.execute( ''' SELECT filename,des FROM descs ''') while True: row = c.fetchone() if not row: break filename = row[0] des = pickle.loads(str(row[1])) print 'img.__init__: loading descriptor for file %s from db' % (filename) if des == None: print 'img.__init__: error loading descriptor for %s from db' % (filename) continue self.add_image(filename, des=des) def add_image(self, filename, des=None): if des == None: kv, des = get_surf_des(filename) with get_conn() as conn: c = conn.cursor() data = sqlite3.Binary(pickle.dumps(des, pickle.HIGHEST_PROTOCOL)) c.execute(''' INSERT INTO descs(filename, des) VALUES (?,:data) ''', [filename, data] ) print 'INSERT %s to db' % (filename) conn.commit() img.add_image(self, filename, des=des) ================================================ FILE: package.json ================================================ { "name": "similarity", "version": "0.0.0", "description": "", "main": "webpack.config.js", "dependencies": { "jsx-loader": "~0.9.0", "css-loader": "~0.6.12", "less-loader": "~0.7.2", "less": "~1.7.0", "style-loader": "~0.6.3", "envify": "~1.2.1", "react": "~0.10.0", "superagent": "~0.17.0", "lodash": "~2.4.1", "jquery": "~2.1.0" }, "devDependencies": {}, "scripts": { "test": "echo \"Error: no test specified\" && exit 1" }, "author": "", "license": "ISC" } ================================================ FILE: pages/home.jsx ================================================ /** @jsx React.DOM */ /*NPM includes*/ var _ = require('lodash'); var $ = require('jQuery'); /*React includes*/ var React = require("react/addons"); var cx = React.addons.classSet; /*Styling*/ require('./home.less'); var UploadForm = React.createClass({ handleClick : function(e){ var node = this.getDOMNode(); onComplete = this.props.onComplete; e.preventDefault(); var formData = new FormData(this.getDOMNode()); $.ajax({ type:$(node).attr('method'), url: $(node).attr('action'), data:formData, cache:false, contentType: false, processData: false, success:function(data){ onComplete(data); }, error: function(data){ alert('error uploading!'); } }); }, render : function(){ return (
); } }); var View = React.createClass({ getInitialState : function(){ return { bank : [], count : 0, related : [] }; }, refreshBank : function(){ $.getJSON('/bank', function(bank){ if(!bank || !bank.latest || !bank.count){ return; } this.setState({ bank : bank.latest, count : bank.count }); }.bind(this)); }, relatedImagesChanged : function(images){ if(!images || !images.length){ return; } images = JSON.parse(images); this.setState({ related : images }); }, componentDidMount : function(){ this.refreshBank(); }, render : function(){ var images = _.map(this.state.bank, function(image){ return (
  • ); }); var related = _.map(this.state.related, function(image){ return (
  • similarity : {image.similarity}
  • ); }); return (

    Upload a file to check!

    Results

    Please upload a file to get some matches

      {related}

    Recent Images Uploaded

      {images}

    Total Number of Images: {this.state.count}

    Add a file to the bank!

    ) } }); React.renderComponent( View(), document.getElementById('content') ); ================================================ FILE: pages/home.less ================================================ .thumb-100{ width: 100px; height: 100px; } ul.thumbs{ >li{ display: inline-block; margin-left: 1px; } } ================================================ FILE: server.py ================================================ from os import listdir from os.path import isfile, join import traceback import json import uuid import re import tempfile from flask import Flask, request import wand.image import wand.display import wand.exceptions app = Flask(__name__) #local stuff from img import persisted_img im = persisted_img() BANK_PATH = 'static/img/bank' BANK_THUMB_PATH = join(BANK_PATH,'thumb') print 'USING BANK PATH ' + BANK_PATH print 'USING THUMB PATH ' + BANK_THUMB_PATH def get_images(path): #this isn't very robust, oh well return filter( lambda x : re.search('\.(jpg|jpeg|png)', x.lower()) != None, [join(path, f) for f in listdir(path) if isfile(join(path,f))] ) def get_bank_images(): return get_images(BANK_PATH) def get_thumb_images(): return get_images(BANK_THUMB_PATH) @app.route("/") def index(): return '''
    ''' @app.route('/similar', methods=['POST']) def similar(): if request.method == 'POST': file = request.files['file'] if file: tmpfile = join( tempfile.gettempdir(), file.name ) file.save(tmpfile) #lol shitty try: with wand.image.Image(filename=tmpfile) as img: img.resize(256, 256) img.save(filename=tmpfile) matches = im.match(tmpfile, limit=10) return json.dumps(matches) except: traceback.print_exc() pass return '', 400 @app.route('/bank', methods=['GET', 'POST']) def bank(): if request.method == 'POST': file = request.files['file'] print file if file: tmpfile = join( tempfile.gettempdir(), file.name ) guid = str(uuid.uuid4().get_hex().upper()[0:12]) + '.jpg' dstfile = join( BANK_PATH, guid ) dstfile_thumb = join( BANK_THUMB_PATH, guid ) file.save(tmpfile) try: with wand.image.Image(filename=tmpfile) as img: img.save(filename=dstfile) #will potentially produce some funny results with extremely wide/oblong images img.resize(256, 256) img.save(filename=dstfile_thumb) im.add_image(dstfile_thumb) except wand.exceptions.MissingDelegateError: return 'input is not a valid image', 500 return '', 200 elif request.method == 'GET': limit = 10 try: limit = int(request.args.get('limit', '10')) except ValueError: pass #note, will spit back any non dir files = get_bank_images() return json.dumps({ 'count' : im.get_count(), 'latest' : ['/'+f for f in files[0:limit]] }) return '', 400 if __name__ == "__main__": #todo: toggle debug from config app.debug = True app.run() ================================================ FILE: static/img/.gitignore ================================================ * !.gitignore ================================================ FILE: static/js/.gitignore ================================================ all.js ================================================ FILE: webpack.config.js ================================================ module.exports = { context: __dirname, entry: './pages/home.jsx', output: { path: __dirname + '/static/js', filename: 'all.js' }, module: { loaders: [ {test: /\.jsx$/, loader: 'jsx-loader'}, {test: /\.less$/, loader: 'style-loader!css-loader!less-loader'}, {test: /bower_components\.*\.js$/, loader: "script-loader"} ] } };