Repository: mnmldave/scraper
Branch: master
Commit: cb75851385c3
Files: 45
Total size: 1.1 MB
Directory structure:
gitextract_812n7y4k/
├── .gitignore
├── LICENSE.txt
├── README.md
├── Rakefile
├── psd/
│ ├── scraper128.psd
│ ├── scraper32.psd
│ └── scraper48.psd
└── src/
├── background.html
├── chrome_ex_oauth.html
├── chrome_ex_oauth.js
├── chrome_ex_oauthsimple.js
├── css/
│ ├── base.css
│ ├── popup.css
│ └── viewer.css
├── js/
│ ├── background.js
│ ├── bit155/
│ │ ├── attr.js
│ │ ├── csv.js
│ │ └── scraper.js
│ ├── contentscript.js
│ ├── popup.js
│ ├── shared.js
│ └── viewer.js
├── lib/
│ ├── datatables-1.7.4/
│ │ ├── images/
│ │ │ └── Sorting icons.psd
│ │ └── js/
│ │ └── jquery.dataTables.js
│ ├── jquery-ui-1.8.6/
│ │ ├── css/
│ │ │ └── custom-theme/
│ │ │ └── jquery-ui-1.8.6.custom.css
│ │ └── js/
│ │ ├── jquery-1.4.2.js
│ │ ├── jquery-ui-1.8.6.highlight.js
│ │ └── jquery-ui-1.8.6.js
│ ├── jquery.layout-1.2.0.js
│ └── jquery.tablednd_0_5.js
├── license.html
├── manifest.json
├── popup.html
├── test/
│ ├── SpecRunner.html
│ ├── lib/
│ │ └── jasmine-1.0.1/
│ │ ├── MIT.LICENSE
│ │ ├── jasmine-html.js
│ │ ├── jasmine.css
│ │ └── jasmine.js
│ └── spec/
│ ├── bit155/
│ │ ├── attr.spec.js
│ │ ├── csv.spec.js
│ │ └── scraper.spec.js
│ ├── jquery-commonAncestor.spec.js
│ ├── jquery-serializeParams.spec.js
│ └── jquery-xpath.spec.js
└── viewer.html
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
syntax:glob
build
*.pem
.DS_Store
target
Icon?
ehthumbs.db
Thumbs.db
*.crx
*.zip
pkg
================================================
FILE: LICENSE.txt
================================================
Copyright (c) 2010, David Heaton
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of bit155 nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
================================================
FILE: README.md
================================================
Scraper
=======
A Google Chrome extension for getting data out of web pages and into spreadsheets.
Usage
-----
Highlight a part of the page that is similar to what you want to scrape. Right-click and select the "Scrape selected..." item. The scraper window will appear, showing you the initial results. You can export the table to by pressing the "Export to Google Docs..." button or use the left-hand pane to further refine or customize your scraping.
The "Selector" section lets you change which page elements are scraped. You can specify the query as either a [jQuery selector](http://api.jquery.com/category/selectors/), or in [XPath](http://www.w3schools.com/XPath/xpath_intro.asp).
You may also customize the columns of the table in the "Columns" section. These must be specified in XPath. You can specify names for columns if you would like.
Selecting the "Exclude empty results" filter will prevent any matches that contain no column values from appearing in the table.
After making any customizations, you must press the "Scrape" button to update the table of results.
Download
--------
Download the extension from [http://chrome.google.com/extensions/detail/mbigbapnjcgaffohmbkdlecaccepngjd](http://chrome.google.com/extensions/detail/mbigbapnjcgaffohmbkdlecaccepngjd).
Get the sources from [https://github.com/mnmldave/scraper](https://github.com/mnmldave/scraper).
Building
--------
You don't need to 'build' this extension per se. To test it out, you first
need to navigate to `chrome://extensions` from Google Chrome then expand "Developer Mode". Click the "Load unpacked extension..." button and point it to the `src` directory.
Learn more about plugin development from the [Google Chrome Extensions](http://code.google.com/chrome/extensions/index.html "Google Chrome Extensions - Google Code") page.
A `Rakefile` is included for compiling the Google Chrome extension into a
zip file. It also does javascript and css minification.
License
-------
Scraper is open-sourced under a BSD license which you can find in `LICENSE.txt`.
Credits
-------
Many of the icons used in this extension are from the generous [Yusuke Kamiyamane](http://p.yusukekamiyamane.com/).
-----------------------------------------------------------------------------
Copyright (c) 2010 David Heaton (dave@bit155.com)
================================================
FILE: Rakefile
================================================
# Copyright (c) 2010, David Heaton
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# * Neither the name of bit155 nor the names of its contributors
# may be used to endorse or promote products derived from this software
# without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
require 'json'
require 'yui/compressor'
require 'closure-compiler'
task :default => [:rebuild, :repackage]
# Metadata
# --------------------------------------------------------------------------
manifest = open(File.join('src', 'manifest.json')) do |file|
JSON.load(file)
end
name = manifest['name']
version = manifest['version']
# Building
# --------------------------------------------------------------------------
build_dir = 'build'
desc 'Rebuilds the extension'
task :rebuild => [:clobber_build, :build]
desc 'Removes build artifacts'
task :clobber_build do
rmtree build_dir rescue nil
end
desc 'Builds the extension'
file build_dir do
source_files = Dir.glob(File.join('src', '**'))
mkdir_p build_dir rescue nil
cp_r source_files, build_dir
# compress css
css_compressor = YUI::CssCompressor.new
Dir.glob(File.join(build_dir, '**', '*.css')) do |path|
puts 'Compressing: ' + path
css = File.open(path, 'r') { |file| css_compressor.compress(file) }
File.open(path, 'w') { |file| file.write(css) }
end
# compress javascript
compiler = Closure::Compiler.new
Dir.glob(File.join(build_dir, '**', '*.js')) do |path|
puts 'Compiling: ' + path
begin
js = compiler.compile(File.read(path))
File.open(path, 'w') { |file| file.write(js) }
rescue
print 'Failed: ', $!, "\n"
end
end
end
# Packaging
# --------------------------------------------------------------------------
package_name = "#{name}-#{version}"
package_dir = 'pkg'
package_dir_path = File.join(package_dir, package_name)
zip_file = "#{package_name}.zip"
# most of this packaging stuff right from rake/packagetask
desc 'Packages the extension'
task :package => ["#{package_dir}/#{zip_file}"]
file "#{package_dir}/#{zip_file}" => package_dir_path do
chdir(package_dir) do
sh %{zip -r #{zip_file} #{package_name}}
end
end
directory package_dir
file package_dir_path => [package_dir, build_dir] do
chdir(build_dir) do
Dir.glob('**/*').each do |fn|
f = File.join(File.dirname(__FILE__), package_dir_path, fn)
fdir = File.dirname(f)
mkdir_p(fdir) if !File.exist?(fdir)
if File.directory?(fn)
mkdir_p(f)
else
rm_f f
safe_ln(fn, f)
end
end
end
end
desc 'Removes the package artifacts'
task :clobber_package do
rmtree package_dir rescue nil
end
desc 'Repackages the extension'
task :repackage => [:clobber_package, :package]
desc 'Removes all rake artifacts'
task :clobber => [:clobber_package, :clobber_build]
================================================
FILE: src/background.html
================================================
================================================
FILE: src/chrome_ex_oauth.html
================================================
OAuth Redirect Page
Redirecting...
================================================
FILE: src/chrome_ex_oauth.js
================================================
/**
* Copyright (c) 2010 The Chromium Authors. All rights reserved. Use of this
* source code is governed by a BSD-style license that can be found in the
* LICENSE file.
*/
/**
* Constructor - no need to invoke directly, call initBackgroundPage instead.
* @constructor
* @param {String} url_request_token The OAuth request token URL.
* @param {String} url_auth_token The OAuth authorize token URL.
* @param {String} url_access_token The OAuth access token URL.
* @param {String} consumer_key The OAuth consumer key.
* @param {String} consumer_secret The OAuth consumer secret.
* @param {String} oauth_scope The OAuth scope parameter.
* @param {Object} opt_args Optional arguments. Recognized parameters:
* "app_name" {String} Name of the current application
* "callback_page" {String} If you renamed chrome_ex_oauth.html, the name
* this file was renamed to.
*/
function ChromeExOAuth(url_request_token, url_auth_token, url_access_token,
consumer_key, consumer_secret, oauth_scope, opt_args) {
this.url_request_token = url_request_token;
this.url_auth_token = url_auth_token;
this.url_access_token = url_access_token;
this.consumer_key = consumer_key;
this.consumer_secret = consumer_secret;
this.oauth_scope = oauth_scope;
this.app_name = opt_args && opt_args['app_name'] ||
"ChromeExOAuth Library";
this.key_token = "oauth_token";
this.key_token_secret = "oauth_token_secret";
this.callback_page = opt_args && opt_args['callback_page'] ||
"chrome_ex_oauth.html";
this.auth_params = {};
if (opt_args && opt_args['auth_params']) {
for (key in opt_args['auth_params']) {
if (opt_args['auth_params'].hasOwnProperty(key)) {
this.auth_params[key] = opt_args['auth_params'][key];
}
}
}
};
/*******************************************************************************
* PUBLIC API METHODS
* Call these from your background page.
******************************************************************************/
/**
* Initializes the OAuth helper from the background page. You must call this
* before attempting to make any OAuth calls.
* @param {Object} oauth_config Configuration parameters in a JavaScript object.
* The following parameters are recognized:
* "request_url" {String} OAuth request token URL.
* "authorize_url" {String} OAuth authorize token URL.
* "access_url" {String} OAuth access token URL.
* "consumer_key" {String} OAuth consumer key.
* "consumer_secret" {String} OAuth consumer secret.
* "scope" {String} OAuth access scope.
* "app_name" {String} Application name.
* "auth_params" {Object} Additional parameters to pass to the
* Authorization token URL. For an example, 'hd', 'hl', 'btmpl':
* http://code.google.com/apis/accounts/docs/OAuth_ref.html#GetAuth
* @return {ChromeExOAuth} An initialized ChromeExOAuth object.
*/
ChromeExOAuth.initBackgroundPage = function(oauth_config) {
window.chromeExOAuthConfig = oauth_config;
window.chromeExOAuth = ChromeExOAuth.fromConfig(oauth_config);
window.chromeExOAuthRedirectStarted = false;
window.chromeExOAuthRequestingAccess = false;
var url_match = chrome.extension.getURL(window.chromeExOAuth.callback_page);
var tabs = {};
chrome.tabs.onUpdated.addListener(function(tabId, changeInfo, tab) {
if (changeInfo.url &&
changeInfo.url.substr(0, url_match.length) === url_match &&
changeInfo.url != tabs[tabId] &&
window.chromeExOAuthRequestingAccess == false) {
chrome.tabs.create({ 'url' : changeInfo.url }, function(tab) {
tabs[tab.id] = tab.url;
chrome.tabs.remove(tabId);
});
}
});
return window.chromeExOAuth;
};
/**
* Authorizes the current user with the configued API. You must call this
* before calling sendSignedRequest.
* @param {Function} callback A function to call once an access token has
* been obtained. This callback will be passed the following arguments:
* token {String} The OAuth access token.
* secret {String} The OAuth access token secret.
*/
ChromeExOAuth.prototype.authorize = function(callback) {
if (this.hasToken()) {
callback(this.getToken(), this.getTokenSecret());
} else {
window.chromeExOAuthOnAuthorize = function(token, secret) {
callback(token, secret);
};
chrome.tabs.create({ 'url' :chrome.extension.getURL(this.callback_page) });
}
};
/**
* Clears any OAuth tokens stored for this configuration. Effectively a
* "logout" of the configured OAuth API.
*/
ChromeExOAuth.prototype.clearTokens = function() {
delete localStorage[this.key_token + encodeURI(this.oauth_scope)];
delete localStorage[this.key_token_secret + encodeURI(this.oauth_scope)];
};
/**
* Returns whether a token is currently stored for this configuration.
* Effectively a check to see whether the current user is "logged in" to
* the configured OAuth API.
* @return {Boolean} True if an access token exists.
*/
ChromeExOAuth.prototype.hasToken = function() {
return !!this.getToken();
};
/**
* Makes an OAuth-signed HTTP request with the currently authorized tokens.
* @param {String} url The URL to send the request to. Querystring parameters
* should be omitted.
* @param {Function} callback A function to be called once the request is
* completed. This callback will be passed the following arguments:
* responseText {String} The text response.
* xhr {XMLHttpRequest} The XMLHttpRequest object which was used to
* send the request. Useful if you need to check response status
* code, etc.
* @param {Object} opt_params Additional parameters to configure the request.
* The following parameters are accepted:
* "method" {String} The HTTP method to use. Defaults to "GET".
* "body" {String} A request body to send. Defaults to null.
* "parameters" {Object} Query parameters to include in the request.
* "headers" {Object} Additional headers to include in the request.
*/
ChromeExOAuth.prototype.sendSignedRequest = function(url, callback,
opt_params) {
var method = opt_params && opt_params['method'] || 'GET';
var body = opt_params && opt_params['body'] || null;
var params = opt_params && opt_params['parameters'] || {};
var headers = opt_params && opt_params['headers'] || {};
var signedUrl = this.signURL(url, method, params);
ChromeExOAuth.sendRequest(method, signedUrl, headers, body, function (xhr) {
if (xhr.readyState == 4) {
callback(xhr.responseText, xhr);
}
});
};
/**
* Adds the required OAuth parameters to the given url and returns the
* result. Useful if you need a signed url but don't want to make an XHR
* request.
* @param {String} method The http method to use.
* @param {String} url The base url of the resource you are querying.
* @param {Object} opt_params Query parameters to include in the request.
* @return {String} The base url plus any query params plus any OAuth params.
*/
ChromeExOAuth.prototype.signURL = function(url, method, opt_params) {
var token = this.getToken();
var secret = this.getTokenSecret();
if (!token || !secret) {
throw new Error("No oauth token or token secret");
}
var params = opt_params || {};
var result = OAuthSimple().sign({
action : method,
path : url,
parameters : params,
signatures: {
consumer_key : this.consumer_key,
shared_secret : this.consumer_secret,
oauth_secret : secret,
oauth_token: token
}
});
return result.signed_url;
};
/**
* Generates the Authorization header based on the oauth parameters.
* @param {String} url The base url of the resource you are querying.
* @param {Object} opt_params Query parameters to include in the request.
* @return {String} An Authorization header containing the oauth_* params.
*/
ChromeExOAuth.prototype.getAuthorizationHeader = function(url, method,
opt_params) {
var token = this.getToken();
var secret = this.getTokenSecret();
if (!token || !secret) {
throw new Error("No oauth token or token secret");
}
var params = opt_params || {};
return OAuthSimple().getHeaderString({
action: method,
path : url,
parameters : params,
signatures: {
consumer_key : this.consumer_key,
shared_secret : this.consumer_secret,
oauth_secret : secret,
oauth_token: token
}
});
};
/*******************************************************************************
* PRIVATE API METHODS
* Used by the library. There should be no need to call these methods directly.
******************************************************************************/
/**
* Creates a new ChromeExOAuth object from the supplied configuration object.
* @param {Object} oauth_config Configuration parameters in a JavaScript object.
* The following parameters are recognized:
* "request_url" {String} OAuth request token URL.
* "authorize_url" {String} OAuth authorize token URL.
* "access_url" {String} OAuth access token URL.
* "consumer_key" {String} OAuth consumer key.
* "consumer_secret" {String} OAuth consumer secret.
* "scope" {String} OAuth access scope.
* "app_name" {String} Application name.
* "auth_params" {Object} Additional parameters to pass to the
* Authorization token URL. For an example, 'hd', 'hl', 'btmpl':
* http://code.google.com/apis/accounts/docs/OAuth_ref.html#GetAuth
* @return {ChromeExOAuth} An initialized ChromeExOAuth object.
*/
ChromeExOAuth.fromConfig = function(oauth_config) {
return new ChromeExOAuth(
oauth_config['request_url'],
oauth_config['authorize_url'],
oauth_config['access_url'],
oauth_config['consumer_key'],
oauth_config['consumer_secret'],
oauth_config['scope'],
{
'app_name' : oauth_config['app_name'],
'auth_params' : oauth_config['auth_params']
}
);
};
/**
* Initializes chrome_ex_oauth.html and redirects the page if needed to start
* the OAuth flow. Once an access token is obtained, this function closes
* chrome_ex_oauth.html.
*/
ChromeExOAuth.initCallbackPage = function() {
var background_page = chrome.extension.getBackgroundPage();
var oauth_config = background_page.chromeExOAuthConfig;
var oauth = ChromeExOAuth.fromConfig(oauth_config);
background_page.chromeExOAuthRedirectStarted = true;
oauth.initOAuthFlow(function (token, secret) {
background_page.chromeExOAuthOnAuthorize(token, secret);
background_page.chromeExOAuthRedirectStarted = false;
chrome.tabs.getSelected(null, function (tab) {
chrome.tabs.remove(tab.id);
});
});
};
/**
* Sends an HTTP request. Convenience wrapper for XMLHttpRequest calls.
* @param {String} method The HTTP method to use.
* @param {String} url The URL to send the request to.
* @param {Object} headers Optional request headers in key/value format.
* @param {String} body Optional body content.
* @param {Function} callback Function to call when the XMLHttpRequest's
* ready state changes. See documentation for XMLHttpRequest's
* onreadystatechange handler for more information.
*/
ChromeExOAuth.sendRequest = function(method, url, headers, body, callback) {
var xhr = new XMLHttpRequest();
xhr.onreadystatechange = function(data) {
callback(xhr, data);
}
xhr.open(method, url, true);
if (headers) {
for (var header in headers) {
if (headers.hasOwnProperty(header)) {
xhr.setRequestHeader(header, headers[header]);
}
}
}
xhr.send(body);
};
/**
* Decodes a URL-encoded string into key/value pairs.
* @param {String} encoded An URL-encoded string.
* @return {Object} An object representing the decoded key/value pairs found
* in the encoded string.
*/
ChromeExOAuth.formDecode = function(encoded) {
var params = encoded.split("&");
var decoded = {};
for (var i = 0, param; param = params[i]; i++) {
var keyval = param.split("=");
if (keyval.length == 2) {
var key = ChromeExOAuth.fromRfc3986(keyval[0]);
var val = ChromeExOAuth.fromRfc3986(keyval[1]);
decoded[key] = val;
}
}
return decoded;
};
/**
* Returns the current window's querystring decoded into key/value pairs.
* @return {Object} A object representing any key/value pairs found in the
* current window's querystring.
*/
ChromeExOAuth.getQueryStringParams = function() {
var urlparts = window.location.href.split("?");
if (urlparts.length >= 2) {
var querystring = urlparts.slice(1).join("?");
return ChromeExOAuth.formDecode(querystring);
}
return {};
};
/**
* Binds a function call to a specific object. This function will also take
* a variable number of additional arguments which will be prepended to the
* arguments passed to the bound function when it is called.
* @param {Function} func The function to bind.
* @param {Object} obj The object to bind to the function's "this".
* @return {Function} A closure that will call the bound function.
*/
ChromeExOAuth.bind = function(func, obj) {
var newargs = Array.prototype.slice.call(arguments).slice(2);
return function() {
var combinedargs = newargs.concat(Array.prototype.slice.call(arguments));
func.apply(obj, combinedargs);
};
};
/**
* Encodes a value according to the RFC3986 specification.
* @param {String} val The string to encode.
*/
ChromeExOAuth.toRfc3986 = function(val){
return encodeURIComponent(val)
.replace(/\!/g, "%21")
.replace(/\*/g, "%2A")
.replace(/'/g, "%27")
.replace(/\(/g, "%28")
.replace(/\)/g, "%29");
};
/**
* Decodes a string that has been encoded according to RFC3986.
* @param {String} val The string to decode.
*/
ChromeExOAuth.fromRfc3986 = function(val){
var tmp = val
.replace(/%21/g, "!")
.replace(/%2A/g, "*")
.replace(/%27/g, "'")
.replace(/%28/g, "(")
.replace(/%29/g, ")");
return decodeURIComponent(tmp);
};
/**
* Adds a key/value parameter to the supplied URL.
* @param {String} url An URL which may or may not contain querystring values.
* @param {String} key A key
* @param {String} value A value
* @return {String} The URL with URL-encoded versions of the key and value
* appended, prefixing them with "&" or "?" as needed.
*/
ChromeExOAuth.addURLParam = function(url, key, value) {
var sep = (url.indexOf('?') >= 0) ? "&" : "?";
return url + sep +
ChromeExOAuth.toRfc3986(key) + "=" + ChromeExOAuth.toRfc3986(value);
};
/**
* Stores an OAuth token for the configured scope.
* @param {String} token The token to store.
*/
ChromeExOAuth.prototype.setToken = function(token) {
localStorage[this.key_token + encodeURI(this.oauth_scope)] = token;
};
/**
* Retrieves any stored token for the configured scope.
* @return {String} The stored token.
*/
ChromeExOAuth.prototype.getToken = function() {
return localStorage[this.key_token + encodeURI(this.oauth_scope)];
};
/**
* Stores an OAuth token secret for the configured scope.
* @param {String} secret The secret to store.
*/
ChromeExOAuth.prototype.setTokenSecret = function(secret) {
localStorage[this.key_token_secret + encodeURI(this.oauth_scope)] = secret;
};
/**
* Retrieves any stored secret for the configured scope.
* @return {String} The stored secret.
*/
ChromeExOAuth.prototype.getTokenSecret = function() {
return localStorage[this.key_token_secret + encodeURI(this.oauth_scope)];
};
/**
* Starts an OAuth authorization flow for the current page. If a token exists,
* no redirect is needed and the supplied callback is called immediately.
* If this method detects that a redirect has finished, it grabs the
* appropriate OAuth parameters from the URL and attempts to retrieve an
* access token. If no token exists and no redirect has happened, then
* an access token is requested and the page is ultimately redirected.
* @param {Function} callback The function to call once the flow has finished.
* This callback will be passed the following arguments:
* token {String} The OAuth access token.
* secret {String} The OAuth access token secret.
*/
ChromeExOAuth.prototype.initOAuthFlow = function(callback) {
if (!this.hasToken()) {
var params = ChromeExOAuth.getQueryStringParams();
if (params['chromeexoauthcallback'] == 'true') {
var oauth_token = params['oauth_token'];
var oauth_verifier = params['oauth_verifier']
this.getAccessToken(oauth_token, oauth_verifier, callback);
} else {
var request_params = {
'url_callback_param' : 'chromeexoauthcallback'
}
this.getRequestToken(function(url) {
window.location.href = url;
}, request_params);
}
} else {
callback(this.getToken(), this.getTokenSecret());
}
};
/**
* Requests an OAuth request token.
* @param {Function} callback Function to call once the authorize URL is
* calculated. This callback will be passed the following arguments:
* url {String} The URL the user must be redirected to in order to
* approve the token.
* @param {Object} opt_args Optional arguments. The following parameters
* are accepted:
* "url_callback" {String} The URL the OAuth provider will redirect to.
* "url_callback_param" {String} A parameter to include in the callback
* URL in order to indicate to this library that a redirect has
* taken place.
*/
ChromeExOAuth.prototype.getRequestToken = function(callback, opt_args) {
if (typeof callback !== "function") {
throw new Error("Specified callback must be a function.");
}
var url = opt_args && opt_args['url_callback'] ||
window && window.top && window.top.location &&
window.top.location.href;
var url_param = opt_args && opt_args['url_callback_param'] ||
"chromeexoauthcallback";
var url_callback = ChromeExOAuth.addURLParam(url, url_param, "true");
var result = OAuthSimple().sign({
path : this.url_request_token,
parameters: {
"xoauth_displayname" : this.app_name,
"scope" : this.oauth_scope,
"oauth_callback" : url_callback
},
signatures: {
consumer_key : this.consumer_key,
shared_secret : this.consumer_secret
}
});
var onToken = ChromeExOAuth.bind(this.onRequestToken, this, callback);
ChromeExOAuth.sendRequest("GET", result.signed_url, null, null, onToken);
};
/**
* Called when a request token has been returned. Stores the request token
* secret for later use and sends the authorization url to the supplied
* callback (for redirecting the user).
* @param {Function} callback Function to call once the authorize URL is
* calculated. This callback will be passed the following arguments:
* url {String} The URL the user must be redirected to in order to
* approve the token.
* @param {XMLHttpRequest} xhr The XMLHttpRequest object used to fetch the
* request token.
*/
ChromeExOAuth.prototype.onRequestToken = function(callback, xhr) {
if (xhr.readyState == 4) {
if (xhr.status == 200) {
var params = ChromeExOAuth.formDecode(xhr.responseText);
var token = params['oauth_token'];
this.setTokenSecret(params['oauth_token_secret']);
var url = ChromeExOAuth.addURLParam(this.url_auth_token,
"oauth_token", token);
for (var key in this.auth_params) {
if (this.auth_params.hasOwnProperty(key)) {
url = ChromeExOAuth.addURLParam(url, key, this.auth_params[key]);
}
}
callback(url);
} else {
throw new Error("Fetching request token failed. Status " + xhr.status);
}
}
};
/**
* Requests an OAuth access token.
* @param {String} oauth_token The OAuth request token.
* @param {String} oauth_verifier The OAuth token verifier.
* @param {Function} callback The function to call once the token is obtained.
* This callback will be passed the following arguments:
* token {String} The OAuth access token.
* secret {String} The OAuth access token secret.
*/
ChromeExOAuth.prototype.getAccessToken = function(oauth_token, oauth_verifier,
callback) {
if (typeof callback !== "function") {
throw new Error("Specified callback must be a function.");
}
var bg = chrome.extension.getBackgroundPage();
if (bg.chromeExOAuthRequestingAccess == false) {
bg.chromeExOAuthRequestingAccess = true;
var result = OAuthSimple().sign({
path : this.url_access_token,
parameters: {
"oauth_token" : oauth_token,
"oauth_verifier" : oauth_verifier
},
signatures: {
consumer_key : this.consumer_key,
shared_secret : this.consumer_secret,
oauth_secret : this.getTokenSecret(this.oauth_scope)
}
});
var onToken = ChromeExOAuth.bind(this.onAccessToken, this, callback);
ChromeExOAuth.sendRequest("GET", result.signed_url, null, null, onToken);
}
};
/**
* Called when an access token has been returned. Stores the access token and
* access token secret for later use and sends them to the supplied callback.
* @param {Function} callback The function to call once the token is obtained.
* This callback will be passed the following arguments:
* token {String} The OAuth access token.
* secret {String} The OAuth access token secret.
* @param {XMLHttpRequest} xhr The XMLHttpRequest object used to fetch the
* access token.
*/
ChromeExOAuth.prototype.onAccessToken = function(callback, xhr) {
if (xhr.readyState == 4) {
var bg = chrome.extension.getBackgroundPage();
if (xhr.status == 200) {
var params = ChromeExOAuth.formDecode(xhr.responseText);
var token = params["oauth_token"];
var secret = params["oauth_token_secret"];
this.setToken(token);
this.setTokenSecret(secret);
bg.chromeExOAuthRequestingAccess = false;
callback(token, secret);
} else {
bg.chromeExOAuthRequestingAccess = false;
throw new Error("Fetching access token failed with status " + xhr.status);
}
}
};
================================================
FILE: src/chrome_ex_oauthsimple.js
================================================
/* OAuthSimple
* A simpler version of OAuth
*
* author: jr conlin
* mail: src@anticipatr.com
* copyright: unitedHeroes.net
* version: 1.0
* url: http://unitedHeroes.net/OAuthSimple
*
* Copyright (c) 2009, unitedHeroes.net
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the unitedHeroes.net nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY UNITEDHEROES.NET ''AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL UNITEDHEROES.NET BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
var OAuthSimple;
if (OAuthSimple === undefined)
{
/* Simple OAuth
*
* This class only builds the OAuth elements, it does not do the actual
* transmission or reception of the tokens. It does not validate elements
* of the token. It is for client use only.
*
* api_key is the API key, also known as the OAuth consumer key
* shared_secret is the shared secret (duh).
*
* Both the api_key and shared_secret are generally provided by the site
* offering OAuth services. You need to specify them at object creation
* because nobody ing uses OAuth without that minimal set of
* signatures.
*
* If you want to use the higher order security that comes from the
* OAuth token (sorry, I don't provide the functions to fetch that because
* sites aren't horribly consistent about how they offer that), you need to
* pass those in either with .setTokensAndSecrets() or as an argument to the
* .sign() or .getHeaderString() functions.
*
* Example:
var oauthObject = OAuthSimple().sign({path:'http://example.com/rest/',
parameters: 'foo=bar&gorp=banana',
signatures:{
api_key:'12345abcd',
shared_secret:'xyz-5309'
}});
document.getElementById('someLink').href=oauthObject.signed_url;
*
* that will sign as a "GET" using "SHA1-MAC" the url. If you need more than
* that, read on, McDuff.
*/
/** OAuthSimple creator
*
* Create an instance of OAuthSimple
*
* @param api_key {string} The API Key (sometimes referred to as the consumer key) This value is usually supplied by the site you wish to use.
* @param shared_secret (string) The shared secret. This value is also usually provided by the site you wish to use.
*/
OAuthSimple = function (consumer_key,shared_secret)
{
/* if (api_key == undefined)
throw("Missing argument: api_key (oauth_consumer_key) for OAuthSimple. This is usually provided by the hosting site.");
if (shared_secret == undefined)
throw("Missing argument: shared_secret (shared secret) for OAuthSimple. This is usually provided by the hosting site.");
*/ this._secrets={};
this._parameters={};
// General configuration options.
if (consumer_key !== undefined) {
this._secrets['consumer_key'] = consumer_key;
}
if (shared_secret !== undefined) {
this._secrets['shared_secret'] = shared_secret;
}
this._default_signature_method= "HMAC-SHA1";
this._action = "GET";
this._nonce_chars = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
this.reset = function() {
this._parameters={};
this._path=undefined;
return this;
};
/** set the parameters either from a hash or a string
*
* @param {string,object} List of parameters for the call, this can either be a URI string (e.g. "foo=bar&gorp=banana" or an object/hash)
*/
this.setParameters = function (parameters) {
if (parameters === undefined) {
parameters = {};
}
if (typeof(parameters) == 'string') {
parameters=this._parseParameterString(parameters);
}
this._parameters = parameters;
if (this._parameters['oauth_nonce'] === undefined) {
this._getNonce();
}
if (this._parameters['oauth_timestamp'] === undefined) {
this._getTimestamp();
}
if (this._parameters['oauth_method'] === undefined) {
this.setSignatureMethod();
}
if (this._parameters['oauth_consumer_key'] === undefined) {
this._getApiKey();
}
if(this._parameters['oauth_token'] === undefined) {
this._getAccessToken();
}
return this;
};
/** convienence method for setParameters
*
* @param parameters {string,object} See .setParameters
*/
this.setQueryString = function (parameters) {
return this.setParameters(parameters);
};
/** Set the target URL (does not include the parameters)
*
* @param path {string} the fully qualified URI (excluding query arguments) (e.g "http://example.org/foo")
*/
this.setURL = function (path) {
if (path == '') {
throw ('No path specified for OAuthSimple.setURL');
}
this._path = path;
return this;
};
/** convienence method for setURL
*
* @param path {string} see .setURL
*/
this.setPath = function(path){
return this.setURL(path);
};
/** set the "action" for the url, (e.g. GET,POST, DELETE, etc.)
*
* @param action {string} HTTP Action word.
*/
this.setAction = function(action) {
if (action === undefined) {
action="GET";
}
action = action.toUpperCase();
if (action.match('[^A-Z]')) {
throw ('Invalid action specified for OAuthSimple.setAction');
}
this._action = action;
return this;
};
/** set the signatures (as well as validate the ones you have)
*
* @param signatures {object} object/hash of the token/signature pairs {api_key:, shared_secret:, oauth_token: oauth_secret:}
*/
this.setTokensAndSecrets = function(signatures) {
if (signatures)
{
for (var i in signatures) {
this._secrets[i] = signatures[i];
}
}
// Aliases
if (this._secrets['api_key']) {
this._secrets.consumer_key = this._secrets.api_key;
}
if (this._secrets['access_token']) {
this._secrets.oauth_token = this._secrets.access_token;
}
if (this._secrets['access_secret']) {
this._secrets.oauth_secret = this._secrets.access_secret;
}
// Gauntlet
if (this._secrets.consumer_key === undefined) {
throw('Missing required consumer_key in OAuthSimple.setTokensAndSecrets');
}
if (this._secrets.shared_secret === undefined) {
throw('Missing required shared_secret in OAuthSimple.setTokensAndSecrets');
}
if ((this._secrets.oauth_token !== undefined) && (this._secrets.oauth_secret === undefined)) {
throw('Missing oauth_secret for supplied oauth_token in OAuthSimple.setTokensAndSecrets');
}
return this;
};
/** set the signature method (currently only Plaintext or SHA-MAC1)
*
* @param method {string} Method of signing the transaction (only PLAINTEXT and SHA-MAC1 allowed for now)
*/
this.setSignatureMethod = function(method) {
if (method === undefined) {
method = this._default_signature_method;
}
//TODO: accept things other than PlainText or SHA-MAC1
if (method.toUpperCase().match(/(PLAINTEXT|HMAC-SHA1)/) === undefined) {
throw ('Unknown signing method specified for OAuthSimple.setSignatureMethod');
}
this._parameters['oauth_signature_method']= method.toUpperCase();
return this;
};
/** sign the request
*
* note: all arguments are optional, provided you've set them using the
* other helper functions.
*
* @param args {object} hash of arguments for the call
* {action:, path:, parameters:, method:, signatures:}
* all arguments are optional.
*/
this.sign = function (args) {
if (args === undefined) {
args = {};
}
// Set any given parameters
if(args['action'] !== undefined) {
this.setAction(args['action']);
}
if (args['path'] !== undefined) {
this.setPath(args['path']);
}
if (args['method'] !== undefined) {
this.setSignatureMethod(args['method']);
}
this.setTokensAndSecrets(args['signatures']);
if (args['parameters'] !== undefined){
this.setParameters(args['parameters']);
}
// check the parameters
var normParams = this._normalizedParameters();
this._parameters['oauth_signature']=this._generateSignature(normParams);
return {
parameters: this._parameters,
signature: this._oauthEscape(this._parameters['oauth_signature']),
signed_url: this._path + '?' + this._normalizedParameters(),
header: this.getHeaderString()
};
};
/** Return a formatted "header" string
*
* NOTE: This doesn't set the "Authorization: " prefix, which is required.
* I don't set it because various set header functions prefer different
* ways to do that.
*
* @param args {object} see .sign
*/
this.getHeaderString = function(args) {
if (this._parameters['oauth_signature'] === undefined) {
this.sign(args);
}
var result = 'OAuth ';
for (var pName in this._parameters)
{
if (!pName.match(/^oauth/)) {
continue;
}
if ((this._parameters[pName]) instanceof Array)
{
var pLength = this._parameters[pName].length;
for (var j=0;j>16)+(y>>16)+(l>>16);return(m<<16)|(l&0xFFFF);}function _r(n,c){return(n<>>(32-c));}function _c(x,l){x[l>>5]|=0x80<<(24-l%32);x[((l+64>>9)<<4)+15]=l;var w=[80],a=1732584193,b=-271733879,c=-1732584194,d=271733878,e=-1009589776;for(var i=0;i>5]|=(s.charCodeAt(i/8)&m)<<(32-_z-i%32);}return b;}function _h(k,d){var b=_b(k);if(b.length>16){b=_c(b,k.length*_z);}var p=[16],o=[16];for(var i=0;i<16;i++){p[i]=b[i]^0x36363636;o[i]=b[i]^0x5C5C5C5C;}var h=_c(p.concat(_b(d)),512+d.length*_z);return _c(o.concat(h),512+160);}function _n(b){var t="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",s='';for(var i=0;i>2]>>8*(3-i%4))&0xFF)<<16)|(((b[i+1>>2]>>8*(3-(i+1)%4))&0xFF)<<8)|((b[i+2>>2]>>8*(3-(i+2)%4))&0xFF);for(var j=0;j<4;j++){if(i*8+j*6>b.length*32){s+=_p;}else{s+=t.charAt((r>>6*(3-j))&0x3F);}}}return s;}function _x(k,d){return _n(_h(k,d));}return _x(k,d);
}
this._normalizedParameters = function() {
var elements = new Array();
var paramNames = [];
var ra =0;
for (var paramName in this._parameters)
{
if (ra++ > 1000) {
throw('runaway 1');
}
paramNames.unshift(paramName);
}
paramNames = paramNames.sort();
pLen = paramNames.length;
for (var i=0;i 1000) {
throw('runaway 1');
}
elements.push(this._oauthEscape(paramName) + '=' +
this._oauthEscape(sorted[j]));
}
continue;
}
elements.push(this._oauthEscape(paramName) + '=' +
this._oauthEscape(this._parameters[paramName]));
}
return elements.join('&');
};
this._generateSignature = function() {
var secretKey = this._oauthEscape(this._secrets.shared_secret)+'&'+
this._oauthEscape(this._secrets.oauth_secret);
if (this._parameters['oauth_signature_method'] == 'PLAINTEXT')
{
return secretKey;
}
if (this._parameters['oauth_signature_method'] == 'HMAC-SHA1')
{
var sigString = this._oauthEscape(this._action)+'&'+this._oauthEscape(this._path)+'&'+this._oauthEscape(this._normalizedParameters());
return this.b64_hmac_sha1(secretKey,sigString);
}
return null;
};
return this;
};
}
================================================
FILE: src/css/base.css
================================================
html, body, div, span, applet, object, iframe,
h1, h2, h3, h4, h5, h6, p, blockquote, pre,
a, abbr, acronym, address, big, cite, code,
del, dfn, em, font, img, ins, kbd, q, s, samp,
small, strike, strong, sub, sup, tt, var,
b, u, i, center,
dl, dt, dd, ol, ul, li,
fieldset, form, label, legend,
table, caption, tbody, tfoot, thead, tr, th, td {
margin: 0;
padding: 0;
border: 0;
outline: 0;
font-size: 100%;
vertical-align: baseline;
background: transparent;
}
body {
line-height: 1;
font-family: "Lucida Grande",Arial,sans-serif;
font-size: 10px;
color: #333;
}
ol, ul {
list-style: none;
}
blockquote, q {
quotes: none;
}
blockquote:before, blockquote:after,
q:before, q:after {
content: '';
content: none;
}
:focus {
outline: 0;
}
ins {
text-decoration: none;
}
del {
text-decoration: line-through;
}
table {
border-collapse: collapse;
border-spacing: 0;
}
p {
margin-bottom: 1em;
line-height: 1.5;
}
a {
color: #4492D7;
text-decoration: none;
}
a:hover {
text-decoration: underline;
}
.disabled {
color: #aaa;
}
a.button, button, input[type=submit] {
outline: none;
border: 1px solid #aaa;
padding: 7px 10px;
border-radius: 5px;
background: -webkit-gradient(linear, left top, left bottom, from(#fff), to(#ddd));
color: #333;
text-shadow: 0 1px rgba(255,255,255,0.8);
text-decoration: none;
font-size: 11px;
}
a.button:active, button:active, input[type=submit]:active {
background: -webkit-gradient(linear, left top, left bottom, from(#ddd), to(#eee));
}
a.button:focus, button:focus, input[type=submit]:focus {
border-color: #999;
-webkit-box-shadow: 0 0 3px rgba(0,0,0,0.3);
}
select {
border: 1px solid #aaa;
padding: 3px;
}
================================================
FILE: src/css/popup.css
================================================
@import url('base.css');
html {
width: 300px;
}
body {
margin: 10px;
padding: 0;
}
#presets {
margin: 0;
overflow: auto;
max-height: 300px;
}
#presets li {
list-style: none;
margin: 0;
}
#presets .preset a {
color: #333;
display: block;
text-decoration: none;
padding: 10px;
padding-left: 31px;
background-image: url('../img/application-form.png');
background-repeat: no-repeat;
background-position: 10px;
}
#presets .preset a:focus {
outline: none;
}
#presets .preset:hover {
background-color: #eee;
text-decoration: underline;
}
#footer {
margin-top: 10px;
padding-top: 10px;
text-align: right;
border-top: 1px solid #999;
}
#scraper {
float: left;
text-decoration: none;
font-weight: bold;
color: #333;
}
================================================
FILE: src/css/viewer.css
================================================
@import url('base.css');
body {
font-family: "Lucida Grande",Arial,sans-serif;
font-size: 10px;
margin: 0;
padding: 0;
}
a:focus {
outline: none;
}
div.error {
line-height: 1.5;
}
div.error.ui-dialog-content.ui-widget-content {
padding-left: 40px;
background-repeat: no-repeat;
background-position: 12px 12px;
background-image: url('../img/exclamation-red.png');
}
#presets {
display: none;
padding-top: 10px;
}
#presets-form {
padding-bottom: 15px;
}
#presets-form fieldset {
margin-top: 10px;
padding-top: 5px;
border: none;
border-top: 1px solid #aaa;
}
#presets-form-name {
width: 70%;
border: 1px solid #999;
border-radius: 5px;
-webkit-box-shadow: inset 1px 2px 3px rgba(0,0,0,0.2);
padding: 5px 5px;
margin-bottom: 10px;
}
#presets-list {
margin: 0;
padding: 0;
overflow: auto;
}
#presets-list li {
list-style: none;
margin: 0;
padding: 10px;
}
#presets-list li a {
color: #333;
}
#presets-list li a:focus {
outline: none;
}
#presets-list li:hover {
background-color: #eee;
}
#presets-list li {
padding-right: 26px;
}
#presets-list li .preset-handle {
cursor: move;
display: block;
float: left;
}
#presets-list li .preset-load {
cursor: pointer;
display: block;
margin-top: 2px;
margin-left: 26px;
margin-right: 26px;
line-height: 1.5;
}
#presets-list li .preset-remove {
display: inline-block;
position: absolute;
right: 16px;
}
#options {
-webkit-user-select: none;
}
#options-header {
min-height: 30px;
border-top: 1px solid #fff !important;
background: -webkit-gradient(linear, left bottom, left top, color-stop(0.1, #ccc), color-stop(0.8, #eee));
padding: 10px;
line-height: 1.2;
}
#options-meta-page {
background-image: url('../img/scraper32.png');
background-position: top left;
background-repeat: no-repeat;
min-height: 32px;
padding-left: 42px;
}
#options-meta-page a {
text-decoration: none;
color: #444;
text-shadow: 0 1px 0 #fff;
font-weight: bold;
font-size: 120%;
}
#options-center {
background-color: #f4f4f4 !important;
border-bottom: 1px solid #aaa !important;
padding: 10px;
border-top: 1px solid #aaa;
}
#options fieldset {
margin: 10px 0;
border: none;
border-top: 1px solid #ccc;
padding: 10px;
}
#options fieldset legend {
font-weight: bold;
color: #333;
padding: 0 5px;
text-shadow: 0 1px rgba(255,255,255,0.9);
}
#options-selector-table {
margin: 0;
padding: 0;
width: 100%;
border-collapse: collapse;
}
#options-selector-table td {
padding: 2px 5px;
}
#options-selector-table select {
height: 25px;
}
#options-selector-table input[type=text] {
border: 1px solid #999;
border-radius: 5px;
-webkit-box-shadow: inset 1px 2px 3px rgba(0,0,0,0.2);
padding: 5px 5px;
}
#options-language-help a {
background-image: url('../img/question-small-white.png');
background-repeat: no-repeat;
background-position: 0 -2px;
padding-left: 20px;
min-height: 16px;
display: inline-block;
vertical-align: middle;
color: #666;
text-decoration: none;
}
#options-language-help a:hover {
text-decoration: underline;
}
#options-selector {
width: 100%;
}
#options-attributes {
width: 100%;
border-collapse: collapse;
border-spacing: 0;
}
#options-attributes tbody td {
padding: 4px;
}
#options-attributes thead tr {
border: 1px solid #ccc;
}
#options-attributes thead th {
background: -webkit-gradient(linear, left bottom, left top, color-stop(0.1, #ccc), color-stop(0.8, #eee));
border: none;
color: #333;
padding: 4px;
text-align: left;
text-shadow: 0 1px rgba(255,255,255,0.9);
}
#options-attributes .dragHandle {
width: 16px;
cursor: move;
background-image: url('../img/handle.png');
background-position: left center;
background-repeat: no-repeat;
}
#options-attributes tr.tDnD_whileDrag {
opacity: 0.5;
}
#options-attributes tr.tDnD_whileDrag .dragHandle {
background-position: left center;
background-repeat: no-repeat;
}
#options-attributes input {
width: 100%;
border: none;
border-radius: 0;
-webkit-box-shadow: none;
padding: 0;
background: transparent;
padding: 4px 2px;
}
#options-attributes input:focus {
border: 1px solid #aaa;
padding: 3px 1px;
background-color: #fff;
}
#options-attributes img {
margin-right: 5px;
}
#options-presets-select {
width: 50%;
}
#center {
margin-left: 380px;
border-top: 1px solid #aaa;
}
#results-table {
border-bottom: 1px solid #aaa;
}
#results-table table {
width: 100%;
border-spacing: 0;
}
#results-table table thead {
border-bottom: 1px solid #aaa;
background: -webkit-gradient(linear, left bottom, left top, color-stop(0.1, #ccc), color-stop(0.8, #eee));
color: #333;
text-shadow: 0 1px rgba(255,255,255,0.9);
}
#results-table table thead tr {
position: relative;
top: 0;
}
#results-table table thead th {
padding: 5px;
border-left: 1px solid #fff;
border-right: 1px solid #aaa;
border-bottom: 1px solid #aaa;
cursor: pointer;
}
#results-table table td {
padding: 5px;
}
#results-table td.tools, #results-table td.index {
width: 10px;
}
#results-table table td.tools img {
margin-right: 2px;
cursor: pointer;
}
#results-table table tr.odd {
background-color: #f0f0f0;
}
#results-table table tr:hover td {
background-color: #f5f5ff;
}
#export {
border-top: 1px solid #fff !important;
background: -webkit-gradient(linear, left bottom, left top, color-stop(0.1, #ccc), color-stop(0.8, #eee));
padding: 10px;
text-align: right;
height: 30px;
}
#about {
display: none;
padding-top: 10px;
background-image: url('../img/scraper48.png');
background-repeat: no-repeat;
background-position: 10px 10px;
padding-left: 70px;
}
#about h1 {
margin: 0;
font-size: 200%;
font-weight: normal;
margin-bottom: 0.2em;
}
#about h2 {
font-size: 100%;
margin-bottom: 2em;
}
#about dl {
margin-top: 2em;
}
#about dl dt {
font-weight: bold;
margin-bottom: 0.5em;
}
#about dl dd {
padding-left: 1em;
margin-bottom: 0.5em;
}
#about a {
text-decoration: underline;
}
.pane-footer {
height: 30px;
border-top: 1px solid #fff !important;
background: -webkit-gradient(linear, left bottom, left top, color-stop(0.1, #ccc), color-stop(0.8, #eee));
padding: 10px;
}
.pane-footer table {
width: 100%;
margin: 0;
padding: 0;
border: 0;
border-collapse: collapse;
}
.pane-footer table td {
margin: 0;
padding: 0;
text-align: right;
}
.pane-footer table td:first-child {
text-align: left;
}
.ui-corner-all {
border-radius: 0;
}
.ui-tabs {
padding: 0;
}
.ui-state-default a {
display: inline-block;
vertical-align: top;
}
.ui-state-default a img {
margin-right: 5px;
}
/*
* PANES & CONTENT-DIVs
*/
.ui-layout-pane {
background: #FFF;
border: 1px solid #BBB;
overflow: auto;
}
.ui-layout-content {
position: relative;
overflow: auto;
}
/*
* RESIZER-BARS
*/
.ui-layout-resizer { /* all 'resizer-bars' */
background: #eee;
border-right: 1px solid #ddd !important;
border-left: 1px solid #fff !important;
border-width: 0;
}
.ui-layout-resizer-drag { /* REAL resizer while resize in progress */
}
.ui-layout-resizer-hover { /* affects both open and closed states */
}
/* NOTE: It looks best when 'hover' and 'dragging' are set to the same color,
otherwise color shifts while dragging when bar can't keep up with mouse */
.ui-layout-resizer-open-hover , /* hover-color to 'resize' */
.ui-layout-resizer-dragging { /* resizer beging 'dragging' */
background: rgba(255,255,255,0.5);
}
.ui-layout-resizer-dragging { /* CLONED resizer being dragged */
border-right: 1px solid #ddd !important;
border-left: 1px solid #fff !important;
}
/* NOTE: Add a 'dragging-limit' color to provide visual feedback when resizer hits min/max size limits */
.ui-layout-resizer-dragging-limit { /* CLONED resizer at min or max size-limit */
background: #E1A4A4; /* red */
}
.ui-layout-resizer-closed-hover { /* hover-color to 'slide open' */
background: #EBD5AA;
}
.ui-layout-resizer-sliding { /* resizer when pane is 'slid open' */
opacity: .10; /* show only a slight shadow */
filter: alpha(opacity=10);
}
.ui-layout-resizer-sliding-hover { /* sliding resizer - hover */
opacity: 1.00; /* on-hover, show the resizer-bar normally */
filter: alpha(opacity=100);
}
/* sliding resizer - add 'outside-border' to resizer on-hover
* this sample illustrates how to target specific panes and states */
.ui-layout-resizer-north-sliding-hover { border-bottom-width: 1px; }
.ui-layout-resizer-south-sliding-hover { border-top-width: 1px; }
.ui-layout-resizer-west-sliding-hover { border-right-width: 1px; }
.ui-layout-resizer-east-sliding-hover { border-left-width: 1px; }
/*
* TOGGLER-BUTTONS
*/
.ui-layout-toggler {
border: 1px solid #ddd; /* match pane-border */
background-color: #ddd;
}
.ui-layout-resizer-hover .ui-layout-toggler {
opacity: .60;
filter: alpha(opacity=60);
}
.ui-layout-resizer-hover .ui-layout-toggler-hover { /* need specificity */
background-color: #FC6;
opacity: 1.00;
filter: alpha(opacity=100);
}
.ui-layout-toggler-north ,
.ui-layout-toggler-south {
border-width: 0 1px; /* left/right borders */
}
.ui-layout-toggler-west ,
.ui-layout-toggler-east {
border-width: 1px 0; /* top/bottom borders */
}
/* hide the toggler-button when the pane is 'slid open' */
.ui-layout-resizer-sliding ui-layout-toggler {
display: none;
}
/*
* style the text we put INSIDE the togglers
*/
.ui-layout-toggler .content {
color: #666;
font-size: 12px;
font-weight: bold;
width: 100%;
padding-bottom: 0.35ex; /* to 'vertically center' text inside text-span */
}
.sorting_asc {
background: url('../img/control-090-small.png') no-repeat center right;
}
.sorting_desc {
background: url('../img/control-270-small.png') no-repeat center right;
}
.sorting {
/* background: url('../images/sort_both.png') no-repeat center right;*/
}
.sorting_asc_disabled {
background: url('../img/control-090-small.png') no-repeat center right;
}
.sorting_desc_disabled {
background: url('../img/control-270-small.png') no-repeat center right;
}
================================================
FILE: src/js/background.js
================================================
/*
* background.js
*
* Author: dave@bit155.com
*
* ---------------------------------------------------------------------------
*
* Copyright (c) 2010, David Heaton
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name of bit155 nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
// oauth
var oauth = ChromeExOAuth.initBackgroundPage({
'request_url': 'https://www.google.com/accounts/OAuthGetRequestToken',
'authorize_url': 'https://www.google.com/accounts/OAuthAuthorizeToken',
'access_url': 'https://www.google.com/accounts/OAuthGetAccessToken',
'consumer_key': 'anonymous',
'consumer_secret': 'anonymous',
'scope': 'https://docs.google.com/feeds/',
'app_name': 'Scraper'
});
chrome.extension.onRequest.addListener(function(request, sender, sendResponse) {
var command = request.command;
var payload = request.payload;
if (command === 'scraperScrapeTab') {
// forward requests for "scraperScrape" to the appropriate tab
chrome.tabs.sendRequest(parseInt(payload.tab, 10), { command: 'scraperScrape', payload: payload.options }, sendResponse);
} else if (command === 'scraperSpreadsheet') {
// export spreadsheet to google docs
oauth.authorize(function() {
// remove trailing colons from slug as this will result in error due to
// http://code.google.com/a/google.com/p/apps-api-issues/issues/detail?id=2136
var title = payload.title || '';
var slug = encodeURIComponent(title.replace(/[:]+\s*$/,''));
var request = {
'method': 'POST',
'headers': {
'GData-Version': '3.0',
'Content-Type': 'text/csv',
'Slug': slug
},
'parameters': {
'alt': 'json'
},
'body': payload.csv
};
var url = 'https://docs.google.com/feeds/default/private/full';
var callback = function(response, xhr) {
if (xhr.status == 401) {
// unauthorized, token probably bad so clear it
oauth.clearTokens();
sendResponse({error: 'Google authentication failed. Please try exporting again, and you will be re-authenticated.'});
} else if (xhr.status - 200 < 100) {
try {
var json = JSON.parse(response);
// open page
if (json && json.entry && json.entry.link) {
var links = json.entry.link;
for (var i = 0; i < links.length; i++) {
if (links[i].rel === 'alternate' && links[i].type === 'text/html') {
chrome.tabs.create({
url: links[i].href
});
}
}
}
// forward response to the caller
sendResponse(json);
} catch (error) {
sendResponse({
error: error
});
}
} else {
sendResponse({
error: 'Received an unexpected response.\n\n' + response
});
}
};
oauth.sendSignedRequest(url, callback, request);
});
}
});
// make some default presets
if (!bit155.scraper.presets()) {
bit155.scraper.presets([
{
name: 'Paragraph Text',
options: {
language: 'xpath',
selector: '//p',
attributes: [
{ xpath: '.', name: 'Text' }
],
filters: [ 'empty' ]
}
},
{
name: 'Links',
options: {
language: 'xpath',
selector: '//a',
attributes: [
{ xpath: '.', name: 'Link' },
{ xpath: '@href', name: 'URL' }
],
filters: ['empty']
}
}
]);
};
// context menus
var scrapeSimilarItem = chrome.contextMenus.create({
title: "Scrape similar...",
contexts: ['all'],
onclick: function(info, tab) {
var active = false;
// get selection options and open viewer with the response
chrome.tabs.sendRequest(tab.id, { command: 'scraperSelectionOptions' }, function(response) {
active = true;
bit155.scraper.viewer(tab, response);
});
// offer to reload page if no response
setTimeout(function() {
if (!active && confirm('You need to reload this page before you can use Scraper. Press ok if you would like to reload it now, or cancel if not.')) {
chrome.tabs.update(tab.id, {url: "javascript:window.location.reload()"});
}
}, 500);
}
});
================================================
FILE: src/js/bit155/attr.js
================================================
/*
* attr.js
*
* Author: dave@bit155.com
*
* ---------------------------------------------------------------------------
*
* Copyright (c) 2010, David Heaton
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name of bit155 nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
var bit155 = bit155 || {};
/**
* Creates an attribute accessor function.
*
* If one argument is passed to the function, then the value will be assigned
* to the attribute.
*
* If multiple arguments are passed, then they will be stored in an array and
* the array will be assigned to the attribute.
*
* Otherwise, if no arguments are provided, then the value of the attribute is
* returned.
*
* @param initial {any} the initial value, will NOT be passed through the
* filter
* @param filter {function(newValue, oldValue)} (optional) function called
* before assigning a new value, which returns a filtered version of
* the value
* @param callback {function(newValue, oldValue)} (optional) function called
* after assigning a new value
*/
bit155.attr = function(options) {
var _value = options ? options.initial : null;
var filter = options ? options.filter : false;
var callback = options ? options.callback : false;
return function() {
var newValue, oldValue;
if (arguments.length > 0) {
if (arguments.length === 1) {
newValue = arguments[0];
} else {
var i;
newValue = [];
for (i = 0; i < arguments.length; i++) {
newValue.push(arguments[i]);
}
}
// filter value
oldValue = _value;
if (filter) {
var filteredValue = filter.call(this, newValue, oldValue);
if (filteredValue !== undefined) {
newValue = filteredValue;
}
}
// copy new value
if (typeof newValue === 'object') {
if ($.isArray(newValue)) {
_value = $.extend(true, [], newValue);
} else {
_value = $.extend(true, {}, newValue);
}
} else {
_value = newValue;
}
if (callback) {
callback.call(this, newValue, oldValue);
}
return this;
}
return _value;
};
};
================================================
FILE: src/js/bit155/csv.js
================================================
/*
* csv.js
*
* Author: dave@bit155.com
*
* ---------------------------------------------------------------------------
*
* Copyright (c) 2010, David Heaton
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name of bit155 nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
var bit155 = bit155 || {};
bit155.csv = bit155.csv || {};
/**
* Encodes a CSV cell.
* @param cell {string} cell to encode
*/
bit155.csv.cell = function(cell) {
var str;
if (cell === undefined || cell === null) {
return "";
} else if (typeof cell === 'string') {
str = cell;
} else {
str = cell.toString();
}
if (str.match(/[,"\n\r]/)) {
str = str.replace(/(["])/g, '"$1');
str = '"' + str + '"';
}
return str;
};
/**
* Encodes an array as a CSV row. Accepts an array of values or you can pass
* variable arguments to it.
*
* @param row (any) a single array of values or any number of variable
* arguments
*/
bit155.csv.row = function() {
var row, text = '', i;
if (arguments.length === 1) {
row = $.isArray(arguments[0]) ? arguments[0] : arguments;
} else {
row = arguments;
}
for (i = 0; i < row.length; i++) {
if (i > 0) {
text += ',';
}
text += bit155.csv.cell(row[i]);
}
return text;
};
bit155.csv.csv = function(data) {
var text = '';
var i;
if (!$.isArray(data)) {
return "";
}
for (i = 0; i < data.length; i++) {
text += bit155.csv.row(data[i]) + '\n';
}
return text;
};
================================================
FILE: src/js/bit155/scraper.js
================================================
/*
* scraper.js
*
* Author: dave@bit155.com
*
* ---------------------------------------------------------------------------
*
* Copyright (c) 2010, David Heaton
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name of bit155 nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
var bit155 = bit155 || {};
bit155.scraper = bit155.scraper || {};
/**
* Function that creates a new viewer window bound to the specified tab.
*
* @param {Object} tab (optional) the tab object to bind the viewer to
* (defaults to the currently selected tab)
* @param {Object} options (optional) options to initialize viewer with
*/
bit155.scraper.viewer = function(tab, options) {
options = options || {};
// call this again with selected tab if none specified
if (!tab) {
chrome.tabs.getSelected(undefined, function(tab) {
if (tab) {
bit155.scraper.viewer(tab, options);
}
});
return;
}
// can't work on extensions pages
if (tab.url.indexOf("https://chrome.google.com/extensions") == 0 || tab.url.indexOf("chrome://") == 0) {
alert("Scraper is not permitted to work on the Google Chrome extensions page for security reasons.");
return;
}
// open window if we get a ping response
chrome.windows.create({
url: chrome.extension.getURL('viewer.html')
+ "?tab=" + tab.id
+ "&options=" + encodeURIComponent(JSON.stringify(options)),
type: 'popup',
width: Math.max(650, parseInt((localStorage['viewer.width'] || '960'), 10)),
height: Math.max(250, parseInt((localStorage['viewer.height'] || '400'), 10))
});
};
/**
* Contains presets, backed by localStorage['presets']. Contains migration
* from the old localStorage['viewer.presets'] since this attribute has
* larger scope than just the viewer.
*/
bit155.scraper.presets = bit155.attr({
initial: JSON.parse(localStorage['presets'] || localStorage['viewer.presets'] || 'null'),
filter: function(v) {
if (v && !$.isArray(v)) {
throw new Error('Preset must be an array.');
}
return v;
},
callback: function(v) {
localStorage['presets'] = v ? JSON.stringify(v) : null;
}
});
/**
* Generates an xpath that is specific, but hopefully not too specific, for
* a node.
*
* @param {Object} node to generate xpath for
*/
bit155.scraper.xpathForNode = function(node) {
var xpath = $(node).xpath(),
xpathLastPredicateRegex = /^(.*)(\[\d+\])([^\[\]]*)$/,
xpathFirstSegmentRegex = /^(\/+[^\/]+)(.*)$/,
result,
selection,
selectionTrimmed;
// keep cutting out the last predicate until we match more than one node
// and consider this our ideal selection
while ((result = xpathLastPredicateRegex.exec(xpath))) {
selection = bit155.scraper.select(document, xpath, 'xpath');
if (selection.length > 1) {
break;
}
xpath = result[1] + result[3];
}
if (!selection) {
return xpath;
}
// trim the front of the path until we have smallest xpath that returns
// same number of elements
while ((result = xpathFirstSegmentRegex.exec(xpath))) {
selectionTrimmed = bit155.scraper.select(document, '/' + result[2], 'xpath') || [];
if (selectionTrimmed.length !== selection.length) {
break;
}
xpath = '/' + result[2];
}
return xpath;
};
/**
* Generates bit155.scraper.scrape options for the given selection. Uses magic
* to try and guess reasonable defaults.
*
* @param {Object} focusNode same semantics as Selection.focusNode
* @param {Object} anchorNode (optional) same as Selection.anchorNode
* @param {HTMLDocument} doc the document in which to match
*/
bit155.scraper.optionsForSelection = function(focusNode, anchorNode, doc) {
var options = {},
ancestor,
ancestorTagName,
ancestorClassName,
node;
doc = doc || window.document;
// determine common ancestor based on user's current selection
if (anchorNode) {
ancestor = $([focusNode, anchorNode]).commonAncestor();
} else {
ancestor = $(focusNode).closest('*');
}
// tweak ancestor for some types of elements
// XXX design
if (ancestor && ancestor.length > 0) {
ancestorTagName = ancestor.get(0).tagName.toLowerCase();
if (ancestorTagName === 'table' || ancestorTagName === 'tbody' || ancestorTagName === 'thead' || ancestorTagName === 'tfoot') {
// table? select rows instead
ancestor = $(focusNode).closest('tr');
} else if (ancestorTagName === 'dl') {
// dl? select terms instead
ancestor = ancestor.find('dt').first();
} else if (ancestorTagName === 'ul' || ancestorTagName === 'ol') {
// dl? select terms instead
ancestor = ancestor.find('li').first();
}
}
// populate options
options.language = 'jquery';
options.selector = '';
options.attributes = [];
if (ancestor && ancestor.length > 0) {
node = ancestor.get(0);
ancestorTagName = node.tagName.toLowerCase();
ancestorClassName = $.trim(node.className);
options.selector = ancestorTagName;
// find first xpath that matches more than one element by removing the
// index selector from each xpath segment. biggest caveats:
//
// * only selecting elements with same structure
// * won't work when selecting an outlier with deeper structure than peers
// * ignores semantics
//
options.language = 'xpath';
options.selector = bit155.scraper.xpathForNode(node);
// use "magical" attributes depending on what custom ancestor is
if (ancestorTagName === 'tr') {
var headers = (function() {
var table = ancestor.closest('table');
var columns = ancestor.children().length;
var firstRow = table.find('tr').first();
var headerRow;
// find first row in the table, and if it contains the same number of
// TH cells as data cells in our TR ancestor, then assume it contains
// column names
if (firstRow && firstRow.children('th').length == columns) {
headerRow = firstRow;
} else {
headerRow = ancestor;
}
return headerRow.children().map(function(index, cell) {
if (cell.tagName === 'TH') {
return $(cell).text();
} else {
return 'Column ' + (index + 1);
}
});
})();
// create an attribute for each header
$.each(headers, function(index,name) {
options.attributes.push({ xpath: '*[' + (index + 1) + ']', name: name });
});
// append a [td] constraint to the selector so that we don't scrape
// rows containing only headers
options.selector = options.selector + "[td]";
} else if (ancestorTagName === 'a') {
options.attributes.push({ xpath: '.', name: 'Link' });
options.attributes.push({ xpath: '@href', name: 'URL' });
} else if (ancestorTagName === 'img') {
options.attributes.push({ xpath: '@title', name: 'Title' });
options.attributes.push({ xpath: '@src', name: 'Source' });
} else if (ancestorTagName === 'dt') {
options.attributes.push({ xpath: '.', name: 'Term' });
options.attributes.push({ xpath: './following-sibling::dd', name: 'Definition' });
} else {
options.attributes.push({ xpath: '.', name: 'Text' });
}
}
return options;
};
/**
* Selects elements using a selector string in some language.
*
* @param {node} context what to search
* @param {string} selector the query string
* @param {string} language what language ("jquery" or "xpath") the selector
* is expressed in
*/
bit155.scraper.select = function(context, selector, language) {
if (typeof context !== 'object') {
throw "Context object is required.";
}
if (typeof selector !== 'string') {
throw "Selector string is required.";
}
if (language === 'xpath') {
// https://developer.mozilla.org/en/XPathResult
// http://stackoverflow.com/questions/727902/jquery-select-text
var xpr = document.evaluate(selector, context || document, null, XPathResult.ANY_TYPE, null);
var i, item, result = [];
for (i = 0; item = xpr.iterateNext(); i++) {
result.push(item);
}
return $(result);
} else if (language === 'jquery') {
return $(context).find(selector);
} else {
throw new Error('Unsupported selector language: ' + language);
}
};
/**
* Scrapes a page.
*/
bit155.scraper.scrape = function(options) {
var selector = options['selector'];
var attributes = options['attributes'] || [];
var filters = options.filters || [];
var result = [];
// make sure xpath in each attribute
$.each(attributes, function() {
if (!this.xpath) {
throw new Error("XPath is required for each attribute.");
}
});
// collect results
bit155.scraper.select(document, options.selector, options.language).each(function(i,e) {
var el = $(e);
var values = [];
var include = true;
if (attributes) {
var xpathResult = null;
$.each(attributes, function() {
values.push(document.evaluate(this.xpath, e, null, XPathResult.STRING_TYPE, null).stringValue);
});
}
result.push({
'xpath': el.xpath(),
'values': values
});
});
// apply filters
$.each(filters, function(i,filter) {
if (filter === 'empty') {
result = result.filter(function(result) {
for (var i = 0; i < result.values.length; i++) {
if ($.trim(result.values[i]) !== '') {
return true;
}
}
return false;
});
}
});
return result;
};
================================================
FILE: src/js/contentscript.js
================================================
/*
* contentscript.js
*
* Author: dave@bit155.com
*
* ---------------------------------------------------------------------------
*
* Copyright (c) 2010, David Heaton
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name of bit155 nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
(function(){
// listen for context menu
var contextNode;
addEventListener("contextmenu", function(e) {
contextNode = e.srcElement;
});
// listen for requests
chrome.extension.onRequest.addListener(function(request, sender, sendResponse) {
var command = request.command,
payload = request.payload,
response = $.extend({}, payload);
try {
if (command === 'scraperScrape') {
// scrape
response.result = bit155.scraper.scrape(response);
} else if (command === 'scraperSelectionOptions') {
// selection options
(function(){
var focusNode,
anchorNode,
selectionDocument,
selection;
// abort if no contextNode as probably being invoked from another
// frame
if (!contextNode) {
response.error = "Frames are not supported at the moment. Please open the frame in a new tab or window and try scraping again.";
return;
}
// determine range of selection
selection = window.getSelection();
selectionDocument = window.document;
if (selection.isCollapsed) {
// nothing selected, so use whatever node is under the cursor
focusNode = contextNode;
} else {
// select focus and anchor nodes from selection
focusNode = selection.focusNode;
anchorNode = selection.anchorNode;
}
// clear context node
contextNode = null;
// extend response with options generated from current selection
response = $.extend(response, bit155.scraper.optionsForSelection(focusNode, anchorNode, selectionDocument));
}());
} else if (command === 'scraperHighlight') {
// highlight
(function() {
var elements;
if (payload.selector) {
elements = bit155.scraper.select(document, payload.selector, payload.language);
} else if (payload.xpath) {
elements = bit155.scraper.select(document, payload.xpath, 'xpath');
} else if (payload.jquery) {
elements = $(payload.jquery);
}
if (elements) {
window.scrollTo(elements.offset().left, elements.offset().top);
elements.filter(':visible').effect('highlight', {}, 'slow');
}
}());
} else if (command === 'scraperPing') {
// ping
} else {
throw new Error('Unsupported request: ' + JSON.stringify(request));
}
} catch (error) {
console.error(error);
response.error = error;
}
sendResponse(response);
});
}());
================================================
FILE: src/js/popup.js
================================================
/*
* popup.js
*
* Author: dave@bit155.com
*
* ---------------------------------------------------------------------------
*
* Copyright (c) 2010, David Heaton
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name of bit155 nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
$(function() {
var
presets = bit155.scraper.presets(),
presetList = $('#presets');
$('.viewer').click(function() {
bit155.scraper.viewer();
return false;
});
if (presets.length === 0) {
presetList.append($('
').text("No presets have been defined yet."));
} else {
$.each(presets, function(index, preset) {
presetList.append($('