SYMBOL INDEX (182 symbols across 16 files) FILE: crawlplot.py class CrawlPlot (line 33) | class CrawlPlot: method create_figure (line 88) | def create_figure(self, ratio=1.0): method set_title (line 100) | def set_title(self, ax, title): method apply_ggplot2_style (line 115) | def apply_ggplot2_style(self, ax, show_grid=True, grid_axis='both'): method set_tick_labels_black (line 136) | def set_tick_labels_black(self, ax): method apply_nice_ticks (line 145) | def apply_nice_ticks(self, ax, axis='y', use_scientific=True): method save_figure (line 174) | def save_figure(self, fig, img_path): method hide_tick_marks (line 192) | def hide_tick_marks(self, ax, tick_color='#FFFFFF'): method __init__ (line 204) | def __init__(self): method read_from_stdin_or_file (line 272) | def read_from_stdin_or_file(self): method read_data (line 290) | def read_data(self, stream): method line_plot_with_ggplot (line 306) | def line_plot_with_ggplot( method line_plot_with_rpy2_ggplot2 (line 334) | def line_plot_with_rpy2_ggplot2( method nice_tick_step (line 375) | def nice_tick_step(vmin, vmax, n=5): method center_legend_title (line 399) | def center_legend_title(fig, ax, leg_items, leg_title, x_axes=0.1): method line_plot_with_matplotlib (line 408) | def line_plot_with_matplotlib( method line_plot (line 518) | def line_plot( FILE: crawlstats.py class MonthlyCrawl (line 35) | class MonthlyCrawl: method get_by_name (line 168) | def get_by_name(name): method to_name (line 172) | def to_name(crawl): method to_bit_mask (line 176) | def to_bit_mask(crawl): method date_of (line 180) | def date_of(crawl): method year_of (line 191) | def year_of(crawl): method short_name (line 195) | def short_name(name): method get_latest (line 199) | def get_latest(n): class MonthlyCrawlSet (line 203) | class MonthlyCrawlSet: method __init__ (line 209) | def __init__(self, crawls=0): method add (line 212) | def add(self, crawl): method update (line 215) | def update(self, *others): method clear (line 219) | def clear(self): method discard (line 222) | def discard(self, crawl): method __contains__ (line 225) | def __contains__(self, crawl): method __len__ (line 228) | def __len__(self): method get_bits (line 235) | def get_bits(self): method get_crawls (line 238) | def get_crawls(self): method is_new (line 247) | def is_new(self, crawl): method is_newest (line 263) | def is_newest(self, crawl): class CST (line 271) | class CST(Enum): class MultiCount (line 375) | class MultiCount(defaultdict): method __init__ (line 378) | def __init__(self, size): method incr (line 382) | def incr(self, key, *counts): method compress (line 387) | def compress(size, counts): method get_compressed (line 397) | def get_compressed(self, key): method get_count (line 401) | def get_count(index, value): method sum_values (line 409) | def sum_values(values, compress=True): class CrawlStatsJSONEncoder (line 436) | class CrawlStatsJSONEncoder(json.JSONEncoder): method default (line 438) | def default(self, o): method json_encode_hyperloglog (line 446) | def json_encode_hyperloglog(o): class CrawlStatsJSONDecoder (line 452) | class CrawlStatsJSONDecoder(json.JSONDecoder): method __init__ (line 454) | def __init__(self, *args, **kargs): method dict_to_object (line 458) | def dict_to_object(self, dic): method json_decode_hyperloglog (line 471) | def json_decode_hyperloglog(dic): class HostDomainCount (line 480) | class HostDomainCount: method __init__ (line 487) | def __init__(self): method add (line 491) | def add(self, url, count): method output (line 499) | def output(self, crawl): class SurtDomainCount (line 529) | class SurtDomainCount: method __init__ (line 534) | def __init__(self, surt_domain): method add (line 547) | def add(self, _path, metadata): method unique_urls (line 595) | def unique_urls(self): method output (line 598) | def output(self, crawl, exact_count=True, min_surt_hll_size=50000): class UnhandledTypeError (line 642) | class UnhandledTypeError(Exception): method __init__ (line 643) | def __init__(self, outputType): class InputError (line 647) | class InputError(Exception): method __init__ (line 648) | def __init__(self, message): class CCStatsJob (line 652) | class CCStatsJob(MRJob): method configure_args (line 674) | def configure_args(self): method input_protocol (line 711) | def input_protocol(self): method hadoop_input_format (line 718) | def hadoop_input_format(self): method count_mapper_init (line 726) | def count_mapper_init(self): method count_mapper (line 766) | def count_mapper(self, _, line): method count_mapper_final (line 801) | def count_mapper_final(self): method reducer_init (line 829) | def reducer_init(self): method count_reducer (line 833) | def count_reducer(self, key, values): method stats_mapper_init (line 910) | def stats_mapper_init(self): method stats_mapper (line 913) | def stats_mapper(self, key, value): method stats_mapper_final (line 944) | def stats_mapper_final(self): method stats_reducer (line 948) | def stats_reducer(self, key, values): method reducer_final (line 1007) | def reducer_final(self): method steps (line 1021) | def steps(self): FILE: plot/charset.py class CharsetStats (line 7) | class CharsetStats(TabularStats): method __init__ (line 12) | def __init__(self): method add (line 16) | def add(self, key, val): FILE: plot/crawl_size.py class CrawlSizePlot (line 26) | class CrawlSizePlot(CrawlPlot): method __init__ (line 34) | def __init__(self): method add (line 46) | def add(self, key, val): method add_by_type (line 63) | def add_by_type(self, crawl, item_type, count): method cumulative_size (line 90) | def cumulative_size(self): method transform_data (line 157) | def transform_data(self): method save_data (line 162) | def save_data(self): method duplicate_ratio (line 167) | def duplicate_ratio(self): method plot (line 178) | def plot(self): method plot_with_rpy2_ggplot2 (line 310) | def plot_with_rpy2_ggplot2(self, by_year_by_type, img_path): method plot_with_matplotlib (line 340) | def plot_with_matplotlib(self, by_year_by_type, img_path): method export_csv (line 434) | def export_csv(self, data, csv): method norm_data (line 441) | def norm_data(self, data, row_filter, type_name_norm): method size_plot (line 460) | def size_plot(self, data, row_filter, type_name_norm, FILE: plot/crawler_metrics.py class CrawlerMetrics (line 26) | class CrawlerMetrics(CrawlSizePlot): method __init__ (line 68) | def __init__(self): method add (line 72) | def add(self, key, val): method save_data (line 90) | def save_data(self): method add_percent (line 96) | def add_percent(self): method row2title (line 116) | def row2title(row): method plot (line 124) | def plot(self): method plot_fetch_status_with_rpy2_ggplot2 (line 162) | def plot_fetch_status_with_rpy2_ggplot2(self, data, img_path, ratio): method plot_fetch_status_with_matplotlib (line 183) | def plot_fetch_status_with_matplotlib(self, data, categories, img_path... method plot_fetch_status (line 248) | def plot_fetch_status(self, data, row_filter, img_file, ratio=1.0): method plot_crawldb_status_with_rpy2_ggplot2 (line 271) | def plot_crawldb_status_with_rpy2_ggplot2(self, data, img_path, ratio): method plot_crawldb_status_with_matplotlib (line 291) | def plot_crawldb_status_with_matplotlib(self, data, img_path, ratio): method plot_crawldb_status (line 360) | def plot_crawldb_status(self, data, row_filter, img_file, ratio=1.0): FILE: plot/domain.py class DomainStats (line 9) | class DomainStats(TabularStats): method __init__ (line 14) | def __init__(self, crawl): method add (line 19) | def add(self, key, val): method transform_data (line 37) | def transform_data(self): method save_data (line 46) | def save_data(self, name, dir_name='data/'): method plot (line 50) | def plot(self, name): FILE: plot/histogram.py class CrawlHistogram (line 22) | class CrawlHistogram(CrawlPlot): method __init__ (line 34) | def __init__(self): method add (line 39) | def add(self, key, frequency): method transform_data (line 57) | def transform_data(self): method save_data (line 61) | def save_data(self): method plot_dupl_url (line 65) | def plot_dupl_url(self): method plot_host_domain_tld (line 86) | def plot_host_domain_tld(self): method plot_domain_cumul_with_rpy2_ggplot2 (line 108) | def plot_domain_cumul_with_rpy2_ggplot2(self, data, title, img_path): method plot_domain_cumul (line 125) | def plot_domain_cumul(self, crawl): FILE: plot/language.py class LanguageStats (line 8) | class LanguageStats(TabularStats): method __init__ (line 13) | def __init__(self): method add (line 17) | def add(self, key, val): FILE: plot/mimetype.py class MimeTypeStats (line 8) | class MimeTypeStats(TabularStats): method __init__ (line 22) | def __init__(self): method norm_value (line 26) | def norm_value(self, mimetype): method add (line 35) | def add(self, key, val): FILE: plot/mimetype_detected.py class MimeTypeDetectedStats (line 7) | class MimeTypeDetectedStats(MimeTypeStats): method __init__ (line 9) | def __init__(self): method norm_value (line 13) | def norm_value(self, mimetype): method add (line 16) | def add(self, key, val): FILE: plot/overlap.py class CrawlOverlap (line 20) | class CrawlOverlap(CrawlPlot): method __init__ (line 30) | def __init__(self): method add (line 37) | def add(self, key, val): method fill_overlap_matrix (line 47) | def fill_overlap_matrix(self): method save_overlap_matrix (line 70) | def save_overlap_matrix(self): method plot_similarity_graph (line 78) | def plot_similarity_graph(self, show_edges=False): method plot_similarity_matrix_with_rpy2_ggplot2 (line 100) | def plot_similarity_matrix_with_rpy2_ggplot2(self, data, midpoint, tit... method plot_similarity_matrix_with_matplotlib (line 122) | def plot_similarity_matrix_with_matplotlib(self, data, decimals, title... method plot_similarity_matrix (line 211) | def plot_similarity_matrix(self, item_type, image_file, title): FILE: plot/table.py class TabularStats (line 12) | class TabularStats(CrawlPlot): method __init__ (line 14) | def __init__(self): method norm_value (line 24) | def norm_value(self, typeval): method add_check_type (line 27) | def add_check_type(self, key, val, requ_type_cst): method transform_data (line 49) | def transform_data(self, top_n, min_avg_count, check_pattern=None): method save_data (line 121) | def save_data(self, base_name, dir_name='data/'): method save_data_percentage (line 124) | def save_data_percentage(self, base_name, dir_name='data/', type_name=... method plot (line 137) | def plot(self, crawls, name, column_header, xtra_css_classes=[]): FILE: plot/tld.py class TldStats (line 18) | class TldStats(CrawlPlot): method __init__ (line 20) | def __init__(self): method add (line 27) | def add(self, key, val): method transform_data (line 35) | def transform_data(self): method field_percentage_formatter (line 89) | def field_percentage_formatter(precision=2, nan='-'): method save_data (line 94) | def save_data(self): method percent_agg (line 97) | def percent_agg(self, data, column, index, values, aggregate): method pivot_percentage (line 105) | def pivot_percentage(self, data, column, index, values, aggregate): method plot_groups (line 110) | def plot_groups(self): method plot (line 130) | def plot(self, crawls, latest_crawl): method plot_comparison (line 187) | def plot_comparison(self, crawl, name, topNlimit=None, method='spearma... method plot_comparison_groups (line 232) | def plot_comparison_groups(self): FILE: plot/tld_by_continent.py function tld2continent (line 126) | def tld2continent(tld): function get_data (line 135) | def get_data(f): class TLDByContinentPlot (line 163) | class TLDByContinentPlot(CrawlPlot): method __init__ (line 166) | def __init__(self): method plot (line 169) | def plot(self): method plot_with_rpy2_ggplot2 (line 280) | def plot_with_rpy2_ggplot2(self, data): method plot_with_matplotlib (line 301) | def plot_with_matplotlib(self, data): FILE: tests/test_crawlstat.py function test_monthly_crawl (line 18) | def test_monthly_crawl(): function test_monthly_crawl_set (line 25) | def test_monthly_crawl_set(): function test_crawlstatstype (line 78) | def test_crawlstatstype(): function test_json_hyperloglog (line 83) | def test_json_hyperloglog(): function test_multicount (line 96) | def test_multicount(): FILE: top_level_domain.py class TopLevelDomain (line 6) | class TopLevelDomain: method __init__ (line 24) | def __init__(self, tld): method __str__ (line 43) | def __str__(self): method _read_data (line 58) | def _read_data(): method short_type (line 117) | def short_type(name):