Repository: scotch-io/node-web-scraper Branch: master Commit: 044071bccf27 Files: 4 Total size: 2.0 KB Directory structure: gitextract_g72w4jjs/ ├── .gitignore ├── README.md ├── package.json └── server.js ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ lib-cov *.seed *.log *.csv *.dat *.out *.pid *.gz pids logs results npm-debug.log node_modules ================================================ FILE: README.md ================================================ node-web-scraper ================ Simple web scraper to get a movie name, release year and community rating from IMDB. To run this example use the following commands: ``` shell $ npm install $ node server.js ``` Then it will start up our node server, navigate to http://localhost:8081/scrape and see what happens. ================================================ FILE: package.json ================================================ { "name" : "node-web-scrape", "version" : "0.0.1", "description" : "Scrape le web.", "main" : "server.js", "author" : "Scotch", "repository" : { "type" : "git", "url" : "https://github.com/scotch-io/node-web-scraper" }, "dependencies" : { "express" : "latest", "request" : "latest", "cheerio" : "latest" } } ================================================ FILE: server.js ================================================ var express = require('express'); var fs = require('fs'); var request = require('request'); var cheerio = require('cheerio'); var app = express(); app.get('/scrape', function(req, res){ // Let's scrape Anchorman 2 url = 'http://www.imdb.com/title/tt1229340/'; request(url, function(error, response, html){ if(!error){ var $ = cheerio.load(html); var title, release, rating; var json = { title : "", release : "", rating : ""}; $('.title_wrapper').filter(function(){ var data = $(this); title = data.children().first().text().trim(); release = data.children().last().children().last().text().trim(); json.title = title; json.release = release; }) $('.ratingValue').filter(function(){ var data = $(this); rating = data.text().trim(); json.rating = rating; }) } fs.writeFile('output.json', JSON.stringify(json, null, 4), function(err){ console.log('File successfully written! - Check your project directory for the output.json file'); }) res.send('Check your console!') }) }) app.listen('8081') console.log('Magic happens on port 8081'); exports = module.exports = app;