Repository: diogenes/coletivo Branch: master Commit: cec01c4a9939 Files: 28 Total size: 23.6 KB Directory structure: gitextract_tjaaxpp2/ ├── .circleci/ │ └── config.yml ├── .document ├── .gitignore ├── Gemfile ├── LICENSE.txt ├── README.rdoc ├── Rakefile ├── VERSION ├── coletivo.gemspec ├── lib/ │ ├── coletivo/ │ │ ├── models/ │ │ │ ├── person.rb │ │ │ ├── person_rating.rb │ │ │ └── recommendable.rb │ │ ├── rails/ │ │ │ ├── active_record.rb │ │ │ └── engine.rb │ │ └── similarity/ │ │ ├── base_strategy.rb │ │ ├── engine.rb │ │ ├── euclidean_distance_strategy.rb │ │ └── pearson_correlation_strategy.rb │ ├── coletivo.rb │ └── generators/ │ └── coletivo/ │ ├── coletivo_generator.rb │ └── templates/ │ └── person_ratings_migration.rb └── test/ ├── coletivo_test.rb ├── db/ │ └── schema.rb ├── helper.rb ├── models/ │ ├── person_rating_test.rb │ ├── person_test.rb │ └── recommendable_test.rb └── models_helper.rb ================================================ FILE CONTENTS ================================================ ================================================ FILE: .circleci/config.yml ================================================ # Ruby CircleCI 2.0 configuration file # # Check https://circleci.com/docs/2.0/language-ruby/ for more details # version: 2 jobs: build: docker: # specify the version you desire here - image: circleci/ruby:2.4.1-node-browsers # Specify service dependencies here if necessary # CircleCI maintains a library of pre-built images # documented at https://circleci.com/docs/2.0/circleci-images/ # - image: circleci/postgres:9.4 working_directory: ~/repo steps: - checkout # Download and cache dependencies - restore_cache: keys: - v1-dependencies-{{ checksum "Gemfile.lock" }} # fallback to using the latest cache if no exact match is found - v1-dependencies- - run: name: install dependencies command: | bundle install --jobs=4 --retry=3 --path vendor/bundle - save_cache: paths: - ./vendor/bundle key: v1-dependencies-{{ checksum "Gemfile.lock" }} # Database setup - run: bundle exec rake db:create - run: bundle exec rake db:schema:load # run tests! - run: name: run tests command: | mkdir /tmp/test-results TEST_FILES="$(circleci tests glob "spec/**/*_spec.rb" | circleci tests split --split-by=timings)" bundle exec rspec --format progress \ --format RspecJunitFormatter \ --out /tmp/test-results/rspec.xml \ --format progress \ $TEST_FILES # collect reports - store_test_results: path: /tmp/test-results - store_artifacts: path: /tmp/test-results destination: test-results ================================================ FILE: .document ================================================ lib/**/*.rb bin/* - features/**/*.feature LICENSE.txt ================================================ FILE: .gitignore ================================================ Gemfile.lock # rdoc generated rdoc # yard generated doc .yardoc # bundler .bundle # jeweler generated pkg # Have editor/IDE/OS specific files you need to ignore? Consider using a global gitignore: # # * Create a file at ~/.gitignore # * Include files you want ignored # * Run: git config --global core.excludesfile ~/.gitignore # # After doing this, these files will be ignored in all your git projects, # saving you from having to 'pollute' every project you touch with them # # Not sure what to needs to be ignored for particular editors/OSes? Here's some ideas to get you started. (Remember, remove the leading # of the line) # # For MacOS: # #.DS_Store # For TextMate #*.tmproj #tmtags # For emacs: #*~ #\#* #.\#* # For vim: #*.swp # For redcar: #.redcar # For rubinius: #*.rbc ================================================ FILE: Gemfile ================================================ source "http://rubygems.org" gem "rails", ">= 3.0.7" group :development do gem "shoulda", "~> 3.0.0.beta2" gem "bundler", "~> 1.15.4" gem "jeweler", "~> 1.6.2" gem "turn" gem "sqlite3" gem "rdoc" end ================================================ FILE: LICENSE.txt ================================================ Copyright (c) 2011 Diógenes Falcão Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.rdoc ================================================ = coletivo A simple Rails 3 recommendations engine. Coletivo uses {Euclidean Distance}[http://en.wikipedia.org/wiki/Euclidean_distance] or {Pearson's Correlation Coefficient}[http://en.wikipedia.org/wiki/Pearson_product-moment_correlation_coefficient] to calculate the similarity between persons and their preferences. == Installation: sudo gem install coletivo rails g coletivo rake db:migrate == Usage: At your Rails model that represents a person (can be an _User_, _Member_, or something like that): class User < ActiveRecord::Base has_own_preferences # ... end So, a person can rate things: current_user = User.create(:name => 'Diogenes') movie = Movie.create(:name => 'The Tourist', :year => 2010) current_user.rate!(movie, 4.5) And after a lot of ratings... *recommendations*: Movie.find_recommendations_for(current_user) # => movies and more movies... By default, the similarity strategy used is Euclidean Distance, but you can change that passing the _strategy_ option: Movie.find_recommendations_for(current_user, :strategy => :pearson) == Contributing to coletivo * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it * Fork the project * Start a feature/bugfix branch * Commit and push until you are happy with your contribution * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally. * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it. == Copyright Copyright (c) 2011 Diógenes Falcão. See LICENSE.txt for further details. ================================================ FILE: Rakefile ================================================ # encoding: utf-8 require 'rubygems' require 'bundler' begin Bundler.setup(:default, :development) rescue Bundler::BundlerError => e $stderr.puts e.message $stderr.puts "Run `bundle install` to install missing gems" exit e.status_code end require 'rake' require 'jeweler' Jeweler::Tasks.new do |gem| # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options gem.name = "coletivo" gem.homepage = "http://github.com/diogenes/coletivo" gem.license = "MIT" gem.summary = %Q{A simple Rails 3 recommendations engine} gem.description = %Q{A simple Rails 3 recommendations engine} gem.email = "diogenes.araujo@gmail.com" gem.authors = ["Diógenes Falcão"] gem.files = Dir["{lib}/**/*"] # dependencies defined in Gemfile end Jeweler::RubygemsDotOrgTasks.new require 'rake/testtask' Rake::TestTask.new(:test) do |test| test.libs << 'lib' << 'test' test.pattern = 'test/**/*_test.rb' test.verbose = true end task :default => :test require 'rake/rdoctask' Rake::RDocTask.new do |rdoc| version = File.exist?('VERSION') ? File.read('VERSION') : "" rdoc.rdoc_dir = 'rdoc' rdoc.title = "coletivo #{version}" rdoc.rdoc_files.include('README*') rdoc.rdoc_files.include('lib/**/*.rb') end ================================================ FILE: VERSION ================================================ 0.0.3 ================================================ FILE: coletivo.gemspec ================================================ # Generated by jeweler # DO NOT EDIT THIS FILE DIRECTLY # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec' # -*- encoding: utf-8 -*- Gem::Specification.new do |s| s.name = %q{coletivo} s.version = "0.0.3" s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version= s.authors = ["Di\303\263genes Falc\303\243o"] s.date = %q{2011-10-31} s.description = %q{A simple Rails 3 recommendations engine} s.email = %q{diogenes.araujo@gmail.com} s.extra_rdoc_files = [ "LICENSE.txt", "README.rdoc" ] s.files = [ "lib/coletivo.rb", "lib/coletivo/models/person.rb", "lib/coletivo/models/person_rating.rb", "lib/coletivo/models/recommendable.rb", "lib/coletivo/rails/active_record.rb", "lib/coletivo/rails/engine.rb", "lib/coletivo/similarity/base_strategy.rb", "lib/coletivo/similarity/engine.rb", "lib/coletivo/similarity/euclidean_distance_strategy.rb", "lib/coletivo/similarity/pearson_correlation_strategy.rb", "lib/generators/coletivo/coletivo_generator.rb", "lib/generators/coletivo/templates/person_ratings_migration.rb" ] s.homepage = %q{http://github.com/diogenes/coletivo} s.licenses = ["MIT"] s.require_paths = ["lib"] s.rubygems_version = %q{1.3.7} s.summary = %q{A simple Rails 3 recommendations engine} if s.respond_to? :specification_version then current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION s.specification_version = 3 if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then s.add_runtime_dependency(%q, [">= 3.0.7"]) s.add_development_dependency(%q, ["~> 3.0.0.beta2"]) s.add_development_dependency(%q, ["~> 1.0.14"]) s.add_development_dependency(%q, ["~> 1.6.2"]) s.add_development_dependency(%q, [">= 0"]) s.add_development_dependency(%q, [">= 0"]) else s.add_dependency(%q, [">= 3.0.7"]) s.add_dependency(%q, ["~> 3.0.0.beta2"]) s.add_dependency(%q, ["~> 1.0.14"]) s.add_dependency(%q, ["~> 1.6.2"]) s.add_dependency(%q, [">= 0"]) s.add_dependency(%q, [">= 0"]) end else s.add_dependency(%q, [">= 3.0.7"]) s.add_dependency(%q, ["~> 3.0.0.beta2"]) s.add_dependency(%q, ["~> 1.0.14"]) s.add_dependency(%q, ["~> 1.6.2"]) s.add_dependency(%q, [">= 0"]) s.add_dependency(%q, [">= 0"]) end end ================================================ FILE: lib/coletivo/models/person.rb ================================================ module Coletivo module Models module Person def self.included(base) base.extend ClassMethods end module ClassMethods # TODO: has_own_preferences doc. def has_own_preferences(options = {}) self.send :include, InstanceMethods end end # ClassMethods module InstanceMethods def rate!(rateable, weight) Coletivo::Config.ratings_container.create!({ :person => self, :rateable => rateable, :weight => weight }) end end # InstanceMethods end # Person end # Models end ================================================ FILE: lib/coletivo/models/person_rating.rb ================================================ module Coletivo module Models class PersonRating < ActiveRecord::Base belongs_to :person, :polymorphic => true belongs_to :rateable, :polymorphic => true validates :person, :rateable, :weight, :presence => true def self.find_for_recommendation(person, rateable_type) where(:rateable_type => rateable_type.to_s) end end end end ================================================ FILE: lib/coletivo/models/recommendable.rb ================================================ module Coletivo module Models module Recommendable def self.included(base) base.extend ClassMethods base.send :include, InstanceMethods end module ClassMethods def find_recommendations_for(person, options = {}) preferences = options[:preferences] ||= load_preferences_for_recommendation(person) top = predict_highest_ratings(person, preferences, options) ids = top.collect(&:last) where(:id => ids).limit(options[:limit]).all end def map_ratings_to_preferences(ratings) #TODO: (???) Item based mapping. key, subkey = :person_id, :rateable_id preferences = {} ratings.each do |rating| p = preferences[rating.send(key)] ||= {} p[rating.send(subkey)] = rating.weight end preferences end def load_preferences_for_recommendation(person) r = Coletivo::Config.ratings_container\ .find_for_recommendation(person, self) map_ratings_to_preferences(r) end private def predict_highest_ratings(person, people_preferences, options) data = {} people_preferences.each do |other, other_prefs| next if other == person sim = person.similarity_with(other, options) next if sim <= 0 other_prefs.each do |item, weight| unless people_preferences[person.id].keys.include?(item) data[item] ||= {:total_similarity => 0.0, :weighted_mean => 0.0} data[item][:total_similarity] += sim data[item][:weighted_mean] += weight * sim end end end # e.g: [[5.35, "movie_2"], [2.0, "movie_4"]] guessed_rating_and_id = Proc.new do |item, item_data| [item_data[:weighted_mean] / item_data[:total_similarity], item] end # DESC sorting by weighted mean of ratings data.collect(&guessed_rating_and_id).sort_by(&:first).reverse end end module InstanceMethods def similarity_with(other_id, options = {}) p = options[:preferences] || self.class.load_preferences_for_recommendation(self) Coletivo::Similarity::Engine\ .similarity_between(self.id, other_id, p, options) end end end # Recommendable end # Models end ================================================ FILE: lib/coletivo/rails/active_record.rb ================================================ ActiveRecord::Base.send :include, Coletivo::Models::Person ActiveRecord::Base.send :include, Coletivo::Models::Recommendable ================================================ FILE: lib/coletivo/rails/engine.rb ================================================ require 'coletivo' module Coletivo class Engine < Rails::Engine end end ================================================ FILE: lib/coletivo/similarity/base_strategy.rb ================================================ module Coletivo module Similarity class BaseStrategy attr_accessor :preferences def similarity_between(one, other) raise "The #similarity_between was not implemented in #{self.class}" end def train_with(people_preferences) @preferences = people_preferences end protected def shared_items_between(one, other) return [] unless preferences[one] && preferences[other] preferences[one].keys.select { |item| preferences[other].keys.include? item } end end end # Similarity end # Coletivo ================================================ FILE: lib/coletivo/similarity/engine.rb ================================================ module Coletivo module Similarity class Engine def self.similarity_between(one, other, preferences, options = {}) strategy = load_strategy options[:strategy] strategy.train_with(preferences) strategy.similarity_between(one, other) end protected def self.load_strategy(key) if :pearson == key Coletivo::Similarity::PearsonCorrelationStrategy.new else Coletivo::Similarity::EuclideanDistanceStrategy.new end end end # Engine end # Similarity end ================================================ FILE: lib/coletivo/similarity/euclidean_distance_strategy.rb ================================================ module Coletivo module Similarity class EuclideanDistanceStrategy < BaseStrategy def similarity_between(one, other) shared = shared_items_between(one, other) return 0 if shared.empty? sum_of_squares = shared.inject(0.0) { |sum, item| sum + (preferences[one][item] - preferences[other][item]) ** 2 } 1 / (1 + sum_of_squares) end end end end ================================================ FILE: lib/coletivo/similarity/pearson_correlation_strategy.rb ================================================ module Coletivo module Similarity class PearsonCorrelationStrategy < BaseStrategy def similarity_between(one, other) shared = shared_items_between(one, other) prefs_one = preferences[one] prefs_other = preferences[other] return 0 if shared.empty? sum_prefs_one = sum_prefs_other = sum_squares_one = \ sum_squares_other = p_sum = 0.0 shared.each { |item| sum_prefs_one += prefs_one[item] sum_prefs_other += prefs_other[item] sum_squares_one += prefs_one[item] ** 2 sum_squares_other += prefs_other[item] ** 2 p_sum += prefs_one[item] * prefs_other[item] } total_shared = shared.size numerator = p_sum - (sum_prefs_one * sum_prefs_other / total_shared) den_one = sum_squares_one - (sum_prefs_one ** 2) / total_shared den_other = sum_squares_other - (sum_prefs_other ** 2) / total_shared denominator = Math.sqrt(den_one * den_other) denominator == 0 ? 0 : numerator / denominator end end end end ================================================ FILE: lib/coletivo.rb ================================================ require 'rails' require 'active_model' require 'active_record' require 'active_support' module Coletivo module Models autoload :Recommendable, 'coletivo/models/recommendable' autoload :Person, 'coletivo/models/person' autoload :PersonRating, 'coletivo/models/person_rating' end module Similarity NO_SIMILARITY = -1.0..0.49 SIMILAR = 0.5..0.99 IDENTICAL = 1.0 autoload :BaseStrategy, 'coletivo/similarity/base_strategy' autoload :EuclideanDistanceStrategy, 'coletivo/similarity/euclidean_distance_strategy' autoload :PearsonCorrelationStrategy, 'coletivo/similarity/pearson_correlation_strategy' autoload :Engine, 'coletivo/similarity/engine' end module Config mattr_accessor :ratings_container # Defaults self.ratings_container = Coletivo::Models::PersonRating end if defined?(Rails) require 'coletivo/rails/engine' require 'coletivo/rails/active_record' end end ================================================ FILE: lib/generators/coletivo/coletivo_generator.rb ================================================ require 'rails/generators' require 'rails/generators/migration' class ColetivoGenerator < Rails::Generators::Base include Rails::Generators::Migration def self.source_root File.join(File.dirname(__FILE__), 'templates') end # Implement the required interface for Rails::Generators::Migration. def self.next_migration_number(dirname) #:nodoc: if ActiveRecord::Base.timestamped_migrations Time.now.utc.strftime("%Y%m%d%H%M%S") else "%.3d" % (current_migration_number(dirname) + 1) end end def create_migration_file migration_template 'person_ratings_migration.rb', 'db/migrate/create_person_ratings.rb' end end ================================================ FILE: lib/generators/coletivo/templates/person_ratings_migration.rb ================================================ class CreatePersonRatings < ActiveRecord::Migration def self.up create_table :person_ratings do |t| t.integer :person_id t.string :person_type t.integer :rateable_id t.string :rateable_type t.decimal :weight, :precision => 5, :scale => 2 t.timestamps end add_index :person_ratings, :rateable_type, :unique => false end def self.down drop_table :person_ratings end end ================================================ FILE: test/coletivo_test.rb ================================================ require 'helper' class ColetivoTest < Test::Unit::TestCase should "be able to change the ratings container class" do config = Coletivo::Config.dup config.ratings_container = Object assert_equal Object, config.ratings_container end end ================================================ FILE: test/db/schema.rb ================================================ require 'generators/coletivo/templates/person_ratings_migration' ActiveRecord::Schema.define(:version => 1) do CreatePersonRatings.up create_table :users do |t| t.string :name t.string :email t.timestamps end create_table :movies do |t| t.string :name t.timestamps end create_table :actors do |t| t.string :name t.timestamps end end ================================================ FILE: test/helper.rb ================================================ require 'rubygems' require 'bundler' begin Bundler.setup(:default, :development) rescue Bundler::BundlerError => e $stderr.puts e.message $stderr.puts "Run `bundle install` to install missing gems" exit e.status_code end $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib')) $LOAD_PATH.unshift(File.dirname(__FILE__)) require 'coletivo' require 'test/unit' require 'turn' require 'shoulda' ActiveRecord::Base.establish_connection({ :adapter => 'sqlite3', :database => ':memory:' }) ActiveRecord::Migration.verbose = false require 'db/schema' class Test::Unit::TestCase end ================================================ FILE: test/models/person_rating_test.rb ================================================ require 'helper' require 'models_helper' class PersonRatingTest < Test::Unit::TestCase subject { Coletivo::Models::PersonRating.new } should validate_presence_of(:person) should validate_presence_of(:rateable) should validate_presence_of(:weight) context "#find_for_recommendation" do subject { Coletivo::Models::PersonRating } should "list only ratings of the type to recommend" do user = User.create(:name => 'A Good User') movie = Movie.create(:name => 'The Tourist') actress = Actor.create(:name => 'Angelina Jolie') user.rate!(movie, 5.0) user.rate!(actress, 10.0) # :-) recommendations = subject.find_for_recommendation(user, Movie) assert_equal 1, recommendations.size end end end ================================================ FILE: test/models/person_test.rb ================================================ require 'helper' require 'models_helper' class PersonTest < Test::Unit::TestCase def setup super @person = User.create(:name => 'Uber Geek') end should "be able to rate an object" do movie = Movie.create(:name => 'Lovely Movie') @person.rate!(movie, 1) assert_equal 1, ratings_container.all.size end end ================================================ FILE: test/models/recommendable_test.rb ================================================ require 'helper' require 'models_helper' class RecommendableTest < Test::Unit::TestCase def setup super @person1 = User.create(:name => 'Person 1') @person2 = User.create(:name => 'Person 2') end [:euclidean, :pearson].each do |strategy| context "using #{strategy.to_s.upcase}" do should "matches perfect similarity when preferences are identical" do m1 = Movie.create(:name => 'Movie 1') m2 = Movie.create(:name => 'Movie 2') p = { @person1.id => {m1.id => 2.5, m2.id => 1.0}, @person2.id => {m1.id => 2.5, m2.id => 1.0} } sim = similarity_between(@person1, @person2, p, strategy) assert_equal Coletivo::Similarity::IDENTICAL, sim end should "matches no similarity when preferences are very different" do m1 = Movie.create(:name => 'Movie 1') m2 = Movie.create(:name => 'Movie 2') p = { @person1.id => {m1.id => 1.0, m2.id => 10.0}, @person2.id => {m1.id => 10.0, m2.id => 1.0} } sim = similarity_between(@person1, @person2, p, strategy) assert Coletivo::Similarity::NO_SIMILARITY.include?(sim) end should "matches similarity when preferences are similar" do m1 = Movie.create(:name => 'Movie 1') m2 = Movie.create(:name => 'Movie 2') p = { @person1.id => {m1.id => 3.0, m2.id => 5.0}, @person2.id => {m1.id => 4.0, m2.id => 5.0} } sim = similarity_between(@person1, @person2, p, strategy) assert Coletivo::Similarity::SIMILAR.include?(sim) || Coletivo::Similarity::IDENTICAL == sim end should "recommend items for a person - sorted by better ratings" do person3 = User.create(:name => 'Person 3') m1 = Movie.create(:name => 'Movie 1') m2 = Movie.create(:name => 'Movie 2') m3 = Movie.create(:name => 'Movie 3') m4 = Movie.create(:name => 'Movie 4') p = { @person1.id => {m1.id => 2.0, m3.id => 1.0}, @person2.id => {m1.id => 1.5, m2.id => 4.7, m3.id => 1.5, m4.id => 2.5}, person3.id => {m1.id => 2.5, m2.id => 6.0, m3.id => 0.5, m4.id => 1.5} } recommendations = Movie.find_recommendations_for(@person1, :preferences => p, :strategy => strategy) assert recommendations.index(m2) < recommendations.index(m4) end should "be able to recommend a limited number of items" do person3 = User.create(:name => 'Person 3') m1 = Movie.create(:name => 'Movie 1') m2 = Movie.create(:name => 'Movie 2') m3 = Movie.create(:name => 'Movie 3') m4 = Movie.create(:name => 'Movie 4') p = { @person1.id => {m1.id => 2.0, m3.id => 1.0}, @person2.id => {m1.id => 1.5, m2.id => 4.7, m3.id => 1.5, m4.id => 2.5}, person3.id => {m1.id => 2.5, m2.id => 6.0, m3.id => 0.5, m4.id => 1.5} } assert_equal 1, Movie.find_recommendations_for(@person1, :preferences => p, :strategy => strategy, :limit => 1).size end end end def similarity_between(one, other, preferences, strategy) one.similarity_with(other.id, :preferences => preferences, :strategy => strategy) end end ================================================ FILE: test/models_helper.rb ================================================ require 'coletivo' class User < ActiveRecord::Base has_own_preferences end class Movie < ActiveRecord::Base end class Actor < ActiveRecord::Base end def ratings_container Coletivo::Config.ratings_container end class Test::Unit::TestCase def setup truncate! :person_ratings, :users, :movies, :actors end private def truncate!(*tables) [*tables].each do |t| ActiveRecord::Base.connection.execute("DELETE FROM #{t}") end end end