Copy disabled (too large)
Download .txt
Showing preview only (43,393K chars total). Download the full file to get everything.
Repository: molybdenum-99/infoboxer
Branch: master
Commit: 779c688c493d
Files: 213
Total size: 41.3 MB
Directory structure:
gitextract_fx0stib8/
├── .codeclimate.yml
├── .dokaz
├── .github/
│ └── workflows/
│ └── ci.yml
├── .gitignore
├── .rspec
├── .rubocop.yml
├── .rubocop_todo.yml
├── .travis.yml
├── .yardopts
├── CHANGELOG.md
├── CONTRIBUTING.md
├── Gemfile
├── LICENSE.txt
├── Parsing.md
├── README.md
├── Rakefile
├── bin/
│ └── infoboxer
├── infoboxer.gemspec
├── lib/
│ ├── infoboxer/
│ │ ├── core_ext.rb
│ │ ├── definitions/
│ │ │ └── en.wikipedia.org.rb
│ │ ├── media_wiki/
│ │ │ ├── page.rb
│ │ │ └── traits.rb
│ │ ├── media_wiki.rb
│ │ ├── navigation/
│ │ │ ├── lookup.rb
│ │ │ ├── sections.rb
│ │ │ ├── selector.rb
│ │ │ ├── shortcuts.rb
│ │ │ └── wikipath.rb
│ │ ├── navigation.rb
│ │ ├── parser/
│ │ │ ├── context.rb
│ │ │ ├── html.rb
│ │ │ ├── image.rb
│ │ │ ├── inline.rb
│ │ │ ├── paragraphs.rb
│ │ │ ├── table.rb
│ │ │ ├── template.rb
│ │ │ └── util.rb
│ │ ├── parser.rb
│ │ ├── templates/
│ │ │ ├── base.rb
│ │ │ └── set.rb
│ │ ├── templates.rb
│ │ ├── tree/
│ │ │ ├── compound.rb
│ │ │ ├── document.rb
│ │ │ ├── gallery.rb
│ │ │ ├── html.rb
│ │ │ ├── image.rb
│ │ │ ├── inline.rb
│ │ │ ├── linkable.rb
│ │ │ ├── list.rb
│ │ │ ├── math.rb
│ │ │ ├── node.rb
│ │ │ ├── nodes.rb
│ │ │ ├── paragraphs.rb
│ │ │ ├── ref.rb
│ │ │ ├── table.rb
│ │ │ ├── template.rb
│ │ │ ├── text.rb
│ │ │ └── wikilink.rb
│ │ ├── tree.rb
│ │ ├── version.rb
│ │ └── wiki_path.rb
│ └── infoboxer.rb
├── profile/
│ ├── out/
│ │ └── .gitkeep
│ └── pages/
│ ├── argentina.txt
│ ├── canada.wiki
│ ├── ukraine.wiki
│ └── usa.wiki
├── regression/
│ └── pages/
│ ├── 2012_bdo_world_darts_championship.wiki
│ ├── area.wiki
│ ├── articuno.wiki
│ ├── canada.wiki
│ ├── chiang_mai.wiki
│ ├── greece.wiki
│ ├── list_of_countries.wiki
│ ├── progress_wrestling.wiki
│ ├── south_america.wiki
│ ├── south_america_new.wiki
│ ├── ukraine.wiki
│ ├── ukrainian_galician_army.wiki
│ ├── usa.wiki
│ └── wyoming.wiki
└── spec/
├── dokaz_helpers.rb
├── fixtures/
│ ├── argentina.wiki
│ ├── broken_table_caption.txt
│ ├── large_infobox.txt
│ ├── large_table.txt
│ └── vcr_cassettes/
│ ├── Infoboxer/
│ │ └── common_MediaWiki_shortcuts/
│ │ ├── Wikia/
│ │ │ ├── language/
│ │ │ │ ├── 1_1_2_3_1.yml
│ │ │ │ └── api_base_url_to_s/
│ │ │ │ └── 1_1_2_3_2_1.yml
│ │ │ ├── simple/
│ │ │ │ ├── 1_1_2_1_1.yml
│ │ │ │ └── api_base_url_to_s/
│ │ │ │ └── 1_1_2_1_2_1.yml
│ │ │ └── subdomain/
│ │ │ ├── 1_1_2_2_1.yml
│ │ │ └── api_base_url_to_s/
│ │ │ └── 1_1_2_2_2_1.yml
│ │ └── Wikipedia/
│ │ ├── caching/
│ │ │ └── constructs_object_only_once.yml
│ │ ├── default/
│ │ │ ├── 1_1_1_1_1.yml
│ │ │ └── api_base_url_to_s/
│ │ │ └── 1_1_1_1_2_1.yml
│ │ ├── language/
│ │ │ └── api_base_url_to_s/
│ │ │ └── 1_1_1_3_1_1.yml
│ │ └── shortcut/
│ │ └── api_base_url_to_s/
│ │ └── 1_1_1_4_1_1.yml
│ ├── Infoboxer_MediaWiki/
│ │ ├── category/
│ │ │ ├── category_name_transformation/
│ │ │ │ ├── default_namespace/
│ │ │ │ │ └── uri_query_values/
│ │ │ │ │ └── 1_6_3_2_1_1.yml
│ │ │ │ ├── localized_namespace/
│ │ │ │ │ └── uri_query_values/
│ │ │ │ │ └── 1_6_3_3_1_1.yml
│ │ │ │ ├── not_a_namespace/
│ │ │ │ │ └── uri_query_values/
│ │ │ │ │ └── 1_6_3_4_1_1.yml
│ │ │ │ └── when_no_namespace/
│ │ │ │ └── uri_query_values/
│ │ │ │ └── 1_6_3_1_1_1.yml
│ │ │ ├── when_category_exists/
│ │ │ │ ├── 1_6_1_1.yml
│ │ │ │ ├── count/
│ │ │ │ │ └── 1_6_1_2_1.yml
│ │ │ │ └── map_title_/
│ │ │ │ └── 1_6_1_3_1.yml
│ │ │ └── when_category_is_not/
│ │ │ ├── 1_6_2_1.yml
│ │ │ └── 1_6_2_2.yml
│ │ ├── get/
│ │ │ ├── _prop/
│ │ │ │ └── source/
│ │ │ │ └── 1_4_6_1_1.yml
│ │ │ ├── interwiki/
│ │ │ │ └── url/
│ │ │ │ └── 1_4_7_1_1.yml
│ │ │ ├── processor/
│ │ │ │ └── source/
│ │ │ │ └── 1_4_6_1_1.yml
│ │ │ ├── when_invalid_title_requested/
│ │ │ │ ├── as_block/
│ │ │ │ │ └── 1_4_5_1_1.yml
│ │ │ │ └── call/
│ │ │ │ └── 1_4_5_1_1.yml
│ │ │ ├── when_several_pages/
│ │ │ │ └── 1_4_2_1.yml
│ │ │ ├── when_several_pages_including_non-existent/
│ │ │ │ └── count/
│ │ │ │ └── 1_4_4_1_1.yml
│ │ │ ├── when_signle_non-existing_page/
│ │ │ │ └── 1_4_3_1.yml
│ │ │ └── when_single_page/
│ │ │ ├── 1_4_1_1.yml
│ │ │ ├── source/
│ │ │ │ └── 1_4_1_4_1.yml
│ │ │ ├── title/
│ │ │ │ └── 1_4_1_2_1.yml
│ │ │ └── url/
│ │ │ └── 1_4_1_3_1.yml
│ │ ├── get_h/
│ │ │ ├── when_several_pages_including_non-existent/
│ │ │ │ ├── 1_5_1_1.yml
│ │ │ │ ├── _WTF_I_just_read_Make_me_unsee_it_/
│ │ │ │ │ └── 1_5_1_3_1.yml
│ │ │ │ └── keys/
│ │ │ │ └── 1_5_1_2_1.yml
│ │ │ ├── when_several_pages_including_redirected_to_same/
│ │ │ │ ├── 1_5_2_1.yml
│ │ │ │ ├── keys/
│ │ │ │ │ └── 1_5_2_2_1.yml
│ │ │ │ └── values/
│ │ │ │ ├── 1_5_2_3_1.yml
│ │ │ │ └── 1_5_2_4_1.yml
│ │ │ └── with_downcase_titles/
│ │ │ ├── 1_5_3_1.yml
│ │ │ ├── keys/
│ │ │ │ └── 1_5_3_2_1.yml
│ │ │ └── values/
│ │ │ └── 1_5_3_3_1.yml
│ │ ├── inspect/
│ │ │ └── inspect/
│ │ │ └── 1_1_1_1.yml
│ │ ├── prefixsearch/
│ │ │ ├── when_found/
│ │ │ │ ├── 1_8_1_1.yml
│ │ │ │ ├── count/
│ │ │ │ │ └── 1_8_1_2_1.yml
│ │ │ │ └── map_title_/
│ │ │ │ └── 1_8_1_3_1.yml
│ │ │ └── when_not_found/
│ │ │ └── 1_8_2_1.yml
│ │ ├── raw/
│ │ │ ├── several_pages/
│ │ │ │ ├── _50_pages/
│ │ │ │ │ ├── 1_2_2_2_1.yml
│ │ │ │ │ ├── count/
│ │ │ │ │ │ └── 1_2_2_2_2_1.yml
│ │ │ │ │ └── map_title_/
│ │ │ │ │ └── 1_2_2_2_3_1.yml
│ │ │ │ ├── default/
│ │ │ │ │ ├── 1_2_2_1_1.yml
│ │ │ │ │ ├── count/
│ │ │ │ │ │ └── 1_2_2_1_2_1.yml
│ │ │ │ │ └── map_title_/
│ │ │ │ │ └── 1_2_2_1_3_1.yml
│ │ │ │ └── no_pages/
│ │ │ │ └── 1_2_2_3_1.yml
│ │ │ ├── single_page/
│ │ │ │ ├── default/
│ │ │ │ │ ├── 1_2_1_1_1.yml
│ │ │ │ │ ├── 1_2_1_1_3.yml
│ │ │ │ │ ├── _fullurl_/
│ │ │ │ │ │ └── 1_2_1_1_4_1.yml
│ │ │ │ │ └── _title_/
│ │ │ │ │ └── 1_2_1_1_2_1.yml
│ │ │ │ ├── non-existent/
│ │ │ │ │ ├── _missing_/
│ │ │ │ │ │ └── 1_2_1_2_2_1.yml
│ │ │ │ │ └── _title_/
│ │ │ │ │ └── 1_2_1_2_1_1.yml
│ │ │ │ └── redirect/
│ │ │ │ ├── 1_2_1_3_2.yml
│ │ │ │ ├── _fullurl_/
│ │ │ │ │ └── 1_2_1_3_3_1.yml
│ │ │ │ └── _title_/
│ │ │ │ └── 1_2_1_3_1_1.yml
│ │ │ └── user-agent/
│ │ │ ├── default/
│ │ │ │ └── 1_2_3_1_1.yml
│ │ │ ├── globally_set/
│ │ │ │ └── 1_2_3_2_1.yml
│ │ │ └── locally_set/
│ │ │ └── 1_2_3_3_1.yml
│ │ ├── search/
│ │ │ ├── when_found/
│ │ │ │ ├── 1_7_1_1.yml
│ │ │ │ ├── count/
│ │ │ │ │ └── 1_7_1_2_1.yml
│ │ │ │ └── map_title_/
│ │ │ │ └── 1_7_1_3_1.yml
│ │ │ └── when_not_found/
│ │ │ └── 1_7_2_1.yml
│ │ └── traits/
│ │ ├── dynamic_part_-_taken_from_API/
│ │ │ ├── after_page_fetched/
│ │ │ │ ├── category_namespace/
│ │ │ │ │ └── 1_3_2_2_2_1.yml
│ │ │ │ └── file_namespace/
│ │ │ │ └── 1_3_2_2_1_1.yml
│ │ │ └── before_first_page_fetched/
│ │ │ ├── category_namespace/
│ │ │ │ └── 1_3_2_1_2_1.yml
│ │ │ └── file_namespace/
│ │ │ └── 1_3_2_1_1_1.yml
│ │ └── static_part_-_guess_by_domain/
│ │ └── 1_3_1_1.yml
│ ├── Infoboxer_Tree_Node/
│ │ └── Infoboxer_MediaWiki_Page/
│ │ └── 1_5_1.yml
│ ├── Infoboxer_Tree_Nodes/
│ │ ├── 2_1.yml
│ │ ├── 2_2.yml
│ │ └── when_interwiki_link/
│ │ ├── 2_3_1.yml
│ │ └── map_url_/
│ │ └── 2_3_2_1.yml
│ ├── Infoboxer_Tree_Wikilink/
│ │ ├── follow/
│ │ │ ├── 1_2_1.yml
│ │ │ ├── text/
│ │ │ │ └── 1_2_3_1.yml
│ │ │ ├── title/
│ │ │ │ └── 1_2_2_1.yml
│ │ │ └── when_interwiki_link/
│ │ │ ├── 1_2_4_1.yml
│ │ │ └── url/
│ │ │ └── 1_2_4_2_1.yml
│ │ └── url/
│ │ └── 1_1_1.yml
│ ├── en_wikipedia_org/
│ │ └── 1_1.yml
│ ├── follow-chile.yml
│ ├── follow-several.yml
│ ├── follow-source-argentine.yml
│ ├── follow-source-argentine2.yml
│ ├── follow-source-forests.yml
│ ├── follow-template.yml
│ └── other-language_Wikipedia/
│ ├── categories/
│ │ ├── 1_2_1.yml
│ │ └── should_include_existing_category.yml
│ └── files/
│ ├── default_prefix/
│ │ └── 1_1_1_1.yml
│ └── localized_prefix/
│ └── 1_1_2_1.yml
├── infoboxer/
│ ├── en.wikipedia.org/
│ │ ├── calc_templates_spec.rb
│ │ └── simple_templates_spec.rb
│ ├── infoboxer_spec.rb
│ ├── integration/
│ │ ├── all_en_templates_spec.rb
│ │ ├── fr_spec.rb
│ │ └── site_traits_spec.rb
│ ├── media_wiki/
│ │ ├── follow_spec.rb
│ │ └── traits_spec.rb
│ ├── media_wiki_spec.rb
│ ├── navigation/
│ │ ├── lookup/
│ │ │ └── selector_spec.rb
│ │ ├── lookup_spec.rb
│ │ ├── sections_spec.rb
│ │ ├── shortcuts_spec.rb
│ │ └── wikipath_spec.rb
│ ├── parser/
│ │ ├── flow_spec.rb
│ │ ├── image_spec.rb
│ │ ├── inline_spec.rb
│ │ ├── paragraphs_spec.rb
│ │ ├── ref_spec.rb
│ │ ├── table_spec.rb
│ │ └── template_spec.rb
│ ├── templates/
│ │ └── set_spec.rb
│ ├── tree/
│ │ ├── inspect_spec.rb
│ │ ├── nodes_spec.rb
│ │ ├── template_spec.rb
│ │ ├── text_spec.rb
│ │ ├── to_tree_spec.rb
│ │ └── wikilink_spec.rb
│ └── wiki_path_spec.rb
└── spec_helper.rb
================================================
FILE CONTENTS
================================================
================================================
FILE: .codeclimate.yml
================================================
exclude_paths:
- lib/infoboxer/definitions/en.wikipedia.org.rb # I know, ok?
================================================
FILE: .dokaz
================================================
--require ./spec/dokaz_helpers.rb
================================================
FILE: .github/workflows/ci.yml
================================================
name: CI
on:
push:
branches: [ master ]
pull_request:
branches: [ master ]
jobs:
main:
name: >-
${{ matrix.ruby }}
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
# ruby: [ 2.6, 2.7, 3.0, head ]
ruby: [ 2.6, 2.7, 3.0 ]
steps:
- name: checkout
uses: actions/checkout@v2
- name: set up Ruby
uses: ruby/setup-ruby@v1
with:
ruby-version: ${{ matrix.ruby }}
- name: install dependencies
run: bundle install --jobs 3 --retry 3
- name: spec
run: bundle exec rake spec
- name: rubocop
run: bundle exec rake rubocop
================================================
FILE: .gitignore
================================================
.bundle
vendor
deprecated
TODO.txt
tmp
examples
profile/out/*.html
rubocop
.yardoc
*.gem
.coveralls.yml
coverage
doc
.byebug*
================================================
FILE: .rspec
================================================
--require=./spec/spec_helper.rb
--color
================================================
FILE: .rubocop.yml
================================================
inherit_from: .rubocop_todo.yml
require: rubocop-rspec
AllCops:
Include:
- 'lib/**/*'
Exclude:
- 'bin/*'
- 'vendor/**/*'
- 'examples/**/*'
- 'profile/**/*'
- 'tmp/**/*'
- 'deprecated/**/*'
- 'spec/*_helpers.rb'
- 'Rakefile'
- 'Gemfile'
- 'infoboxer.gemspec'
DisplayCopNames: true
NewCops: enable
SuggestExtensions: false
TargetRubyVersion: 2.6
Style/OptionHash:
Enabled: true
# FIXME
Lint/MissingSuper:
Enabled: false
# My personal style
Layout/SpaceInsideHashLiteralBraces:
EnforcedStyle: no_space
Lint/ConstantDefinitionInBlock:
Exclude:
- 'lib/infoboxer/definitions/en.wikipedia.org.rb'
Naming/MethodParameterName:
Enabled: false
Style/AccessorGrouping:
Enabled: false
Style/ClassAndModuleChildren:
Enabled: false
Style/ClassEqualityComparison:
Enabled: false
Style/CommentAnnotation:
Enabled: false
Style/EmptyElse:
Enabled: false
Style/CaseEquality:
Enabled: false
Style/FormatString:
EnforcedStyle: percent
Style/OptionalBooleanParameter:
Enabled: false
Style/SignalException:
EnforcedStyle: semantic
Style/StringConcatenation:
Enabled: false
Style/MutableConstant:
Enabled: false
Style/ParallelAssignment:
Enabled: false
Style/AndOr:
EnforcedStyle: conditionals
Style/Alias:
EnforcedStyle: prefer_alias_method
Style/EmptyCaseCondition:
Enabled: false
Style/PercentLiteralDelimiters:
PreferredDelimiters:
default: "{}"
"%w": "[]"
Style/TrailingCommaInArguments:
Enabled: false
Style/TrailingCommaInHashLiteral:
Enabled: false
Style/TrailingCommaInArrayLiteral:
Enabled: false
Style/ModuleFunction:
Exclude:
- 'lib/infoboxer.rb'
Style/MultilineBlockChain:
Enabled: false
Style/Documentation:
Enabled: false
Style/AsciiComments:
Enabled: false
Layout/LineLength:
Max: 120
Exclude:
- 'lib/infoboxer/definitions/en.wikipedia.org.rb'
- 'spec/**/*'
Metrics/BlockLength:
Exclude:
- 'lib/infoboxer/definitions/en.wikipedia.org.rb'
- 'spec/**/*'
Metrics/PerceivedComplexity:
Max: 8
Exclude:
- 'spec/**/*'
Style/BlockDelimiters:
Enabled: false
Style/RedundantPercentQ:
Exclude:
- 'spec/**/*'
Style/FormatStringToken:
Enabled: false
# Specs style
RSpec/LeadingSubject:
Enabled: false
RSpec/EmptyExampleGroup:
Enabled: false
RSpec/DescribeClass:
Exclude:
- 'spec/infoboxer/en.wikipedia.org/calc_templates_spec.rb'
- 'spec/infoboxer/en.wikipedia.org/simple_templates_spec.rb'
- 'spec/infoboxer/integration/all_en_templates_spec.rb'
- 'spec/infoboxer/integration/fr_spec.rb'
- 'spec/infoboxer/integration/site_traits_spec.rb'
RSpec/ExampleLength:
Enabled: false
RSpec/NestedGroups:
Enabled: false
RSpec/ContextWording:
Enabled: false
# Next two are useful for preloading large documents
RSpec/BeforeAfterAll:
Enabled: false
RSpec/InstanceVariable:
Enabled: false
# TODO
RSpec/NamedSubject:
Enabled: false
RSpec/ExampleWording:
Enabled: false
RSpec/MultipleExpectations:
Enabled: false
RSpec/DescribeSymbol:
Enabled: false
# Complexity. I dare you to fix it! (Without perf.degradation)
Metrics/AbcSize:
Max: 27
Exclude:
- 'spec/**/*'
# Offense count: 1
Metrics/ClassLength:
Max: 131
# Offense count: 6
Metrics/CyclomaticComplexity:
Max: 14
# Offense count: 26
Metrics/MethodLength:
Max: 30
# Offense count: 3
Metrics/ModuleLength:
Max: 288
Exclude:
- 'spec/**/*'
================================================
FILE: .rubocop_todo.yml
================================================
================================================
FILE: .travis.yml
================================================
cache: bundler
language: ruby
rvm:
- "2.6"
- "2.7"
- "3.0"
#- jruby-19mode # Due to https://github.com/jruby/jruby/issues/2599
jobs:
allow_failures:
- "2.7" # bundler version incompatibility, fix later...
install:
- bundle install --retry=3
script:
- bundle exec rake
================================================
FILE: .yardopts
================================================
--markup=markdown
--markup-provider=redcarpet
--no-private
================================================
FILE: CHANGELOG.md
================================================
# Infoboxer's change log
## 0.4.0 (2021-05-30)
* A cluster of bugs found in #81 fixed:
* Empty comment (`<!---->`) now processed properly;
* Templates that are implicitly inside tables (put on a separate row) now always create
an implicit `<TableCell>`
* Heading after non-closed table closes the table implicitly instead of being inserted
into the last cell.
* Drop Ruby < 2.6, and support 3.0 instead.
PS: Yeah, year-and-almost-half is much better than 2 years between releases, I guess.. And let's call
it non-patch version then.
## 0.3.3 (2020-02-09)
* Fixed table captions handling (thanks @robfors for reporting)
PS: Funny that this small bugfix release is exactly two years after the previous one :(
## 0.3.2 (2018-02-09)
* Updated MediaWiktory to finally turn on gzip encoding of responses;
* Utility methods to expose some internals (`MediaWiki#api`, `Infoboxer#url_for(:wikipedia)`,
`Page#namespaces`, `Template#named_variables` and so on);
* Fix parsing of lowercase `file:` links in `<gallery>`.
## 0.3.1 (2017-12-04)
* (Experimental) new representation of templates, much more readable;
* More access to querying process and underlying `MediaWiktory::Wikipedia::Query`;
* Finally, `limit` parameter for multi-page queries (category, search, prefixsearch).
## 0.3.1.pre (2017-09-16)
* Introduce interwiki links following (and proper handling of interwikis, in general);
* Add `<gallery>` tag support;
* Introduce `Navigation::Selector#===`;
* Much more `Enumerable`'s methods supported by `Nodes`;
* Lot of small simplifications, cleanups and bugfixes.
TBH, it should be 0.4.0 or more, but it would be a shame to change versions so fast :) So, at least
until it is `-pre`, let it be 0.3.1.
## 0.3.0 (2017-07-23)
* Change logic of navigation through templates; now templates contents aren't hidden from global
lookups. While sometimes leading to less impressive demos, this approach proved itself to be more
useful for production.
* Introduce WikiPath query language as an alternative to series of lookups.
## 0.2.8 (2017-05-11)
* Switch to MediaWiktory 0.1.0 + some subsequent cleanup of internal logic;
* Additional `prop:` param for `MediaWiki#get`, `#get_h`, and `#raw`, allowing to fetch arbitrary
page properties.
## 0.2.7 (2016-09-18)
* Fix `Math` node rendering to text (#68);
* Fix consequtive apostrophes problem (#69);
* Fix math parsing in templates (#70).
## 0.2.6 (2016-06-27)
_0.2.5 was erroneously released without any changes._
* Fix of `<math>` tags parsing (#66)
## 0.2.4 (2016-04-16)
* `MediaWiki#get_h` works correctly with several synonymous pages now;
* `get` and `get_h` work better when some of required titles are downcase;
* Travis compatibility check restored.
## 0.2.3 (2016-03-02)
New and enchanced features:
* more useful templates (quick-n-dirty sometimes, but prettier output for
typical cases);
* Caching of wikiobjects, so for several calls to `Infoboxer.wp` it would
be only one API call for wiki metainformation;
* `MediaWiki#get` now preserves order of pages (page list would be in
the same order as requested titles);
* `MediaWiki#get_h` to receive hash of `title => page object` (useful
to know which titles have been no pages for and better control on
redirects).
Fixes:
* `Image` node equality fixed.
## 0.2.2 (2016-01-03)
Fixes:
* more sophisticated table parsing;
* empty `<nowiki/>` is parsed properly;
* inline unclosed markup inside wikilinks works;
* `MediaWiki::Traits` can now be continued in several places.
## 0.2.1 (2015-12-21)
* `infoboxer` binary properly registered.
## 0.2.0 (2015-12-21)
* MediaWiki backend changed to (our own handcrafted)
[mediawiktory](https://github.com/molybdenum-99/mediawiktory);
* Added page lists fetching like `MediaWiki#category(categoryname)`,
`MediaWiki#search(search_phrase)`;
* `MediaWiki#get` now can fetch any number of pages at once (it was only
50 in previous versions);
* `bin/infoboxer` console added for quick experimenting;
* `Template#to_h` added for quick information extraction;
* many small bugfixes and enchancements.
## 0.1.2.1 (2015-12-04)
* Small bug with newlines in templates fixed.
## 0.1.2 (2015-08-18)
Aaaaand, rrrrrelease it into the wilde!
* `ImageCaption` class added;
* Smallest refactorings;
* More documentation fixes.
## 0.1.1 (2015-08-11)
Basically, preparing for wider release!
* Small refactorings;
* Documentation fixes.
## 0.1.0 (2015-08-07)
Initial (ok, I know it's typically called 0.0.1, but here's work of
three monthes, numerous documentations and examples and so on... so, let
it be 0.1.0).
================================================
FILE: CONTRIBUTING.md
================================================
# Contributing to Infoboxer
_(Also duplicated in [wiki](https://github.com/molybdenum-99/infoboxer/wiki/Contributing).)_
## Contributing via test cases
If you are assured that Infoboxer takes some page wrong, please create an
[issue](https://github.com/molybdenum-99/infoboxer/issues) with link
to page (or raw wikitext) and description of a problem.
## Contributing via localizations and templates describing
Look at [en.wikipedia.org](https://github.com/molybdenum-99/infoboxer/blob/master/lib/infoboxer/definitions/en.wikipedia.org.rb)
template definitions. It can be extended. Also, similar definitions
can/should be created for other language wikipedias and other popular
wikis.
You can do pull requests with your own definitions, or create an
[issue](https://github.com/molybdenum-99/infoboxer/issues) describing
which template definitions should be added to Infoboxer.
## Contributing via code
If you want to fix some bug or implement some feature, please just
follow the standard process for github opensource: fork, fix, push,
make pull request.
Some (scanty) information below.
### Understanding the code
* Infoboxer is splitted in several modules (which are clearly visible in
API docs and folders structure).
* Most of "easy features"
can be added to [Navigation](http://www.rubydoc.info/gems/infoboxer/Infoboxer/Navigation)
module and its submodules: enchancing of navigational experience and
implement clever shortcuts (like "converting table to dataframe/list of
hashes", for ex.).
* Most of potential bugs can seat in
[Parser](http://www.rubydoc.info/gems/infoboxer/Infoboxer/Parser) class
and its modules; MediaWiki markup IS tricky and tightly coupled and
ambigous; there's also some non-implemented features, like `<source>`
tag parsing and template definition pages (which, possibly, is not
target of Infoboxer anyways).
* Most of underfeatured area is in
[MediaWiki](http://www.rubydoc.info/gems/infoboxer/Infoboxer/MediaWiki)
-- seems reasonable for information extraction purposes to have more
features from MediaWiki API, like "page list generators", search,
"what links here" and similar functionality.
* Most of clarification and documentation is required for
[Templates](http://www.rubydoc.info/gems/infoboxer/Infoboxer/Templates)
module, which is still underloved heart of Infoboxer.
### Parser: quick, not clever
Whether you'd want to put your hands on Parser: please remember, that
it's hand-crafted and thoroughly optimized. The first thought you may
have that it needs more OO decompozition, a class for each case; or more
ideomatic Ruby, or ... Trust me, I've tried it all. But when you are
dealing with hundreds of thousands of parsing operations and tens of
thousands of resulting nodes, it turns out even simplest things like
`Object#tap` have performance penalty on large number of calls.
================================================
FILE: Gemfile
================================================
source 'https://rubygems.org'
gemspec
# gem 'mediawiktory', github: 'molybdenum-99/mediawiktory', branch: 'develop'
group :docs do
gem 'dokaz', git: 'https://github.com/zverok/dokaz.git'
gem 'yard', '~> 0.9'
gem 'redcarpet'
#gem 'inch'
end
group :development do
gem 'rake'
gem 'ruby-prof' unless RUBY_PLATFORM.include?('java')
gem 'rubygems-tasks'
gem 'byebug' unless RUBY_PLATFORM.include?('java')
gem 'rubocop', '~> 1.15.0'
gem 'rubocop-rspec', '~> 2.3.0'
end
group :test do
gem 'rspec', '~> 3'
gem 'rspec-its', '~> 1'
gem 'vcr'
gem 'webmock'
gem 'timecop'
gem 'saharspec', '= 0.0.4'
gem 'coveralls', require: false
gem 'yard-junk', '~> 0.0.7'
end
================================================
FILE: LICENSE.txt
================================================
The MIT License (MIT)
Copyright (c) 2014-15 Victor 'Zverok' Shepelev
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
================================================
FILE: Parsing.md
================================================
Parsing Wikipedia is not an easy tasks. Some tags and formattings signs
can be only after newline, some can be everywhere in text; some formatting
can span several lines, some is force-closed on line end; there can be
tons and tons of markup inside image captions, templates and <ref>'s, so...
Here's what I've came with:
1. Entire page text is split into lines (after replacing of `<!-- -->`
comments -- they go nowhere).
2. First, we are in *paragraph* context. We are looking at next line in
list and guessing what it is: list, heading and so on
3. Then, we are in *inline* context for text of paragraph (unless it is
table, which is different story, and headings, which also different,
and of course preformatted text,... you've got the idea). We scan text
until *any* of inline formatting will be met (or end of line).
4. When met with some formatting, we push current context and scan inside
it. The inline scanning is tricky!
* Simple formatting like `''` (italic) is implicitly closed at the end
of line (it is called "short inline scan" inside Infoboxer's parser)
* Long formatting like templates can span several lines, so we continue
scan through next lines, till template end (it means we are still in
same paragraph!), it's "normal inline scan", or just "inline scan"
* Some __inline__ formatting (like `<ref>`'s) and special formatting,
like table cells, can have other paragraphs inside! (But it's still
"inline" formatting, because when <ref> is ended, the same paragraph
is continued -- while showing it in Wikipedia, ref will leave a small
footnote mark in paragraph, and the contents will be below). We call
such a cases "long inline scan".
5. So, parser tries to do everything in one forward scan, without returning
to previous positions or tricks like "scan all symbols till the end of
template, then parse them as a separate sub-document" (the letter is
the simplest way to parse MediaWiki markup; that's how Infoboxer worked
at first; it was not very fast and not memory-effective at all).
================================================
FILE: README.md
================================================
# Infoboxer
[](http://badge.fury.io/rb/infoboxer)

[](https://coveralls.io/github/molybdenum-99/infoboxer?branch=master)
[](https://codeclimate.com/github/molybdenum-99/infoboxer)
[](https://gitter.im/molybdenum-99/infoboxer)
**Infoboxer** is pure-Ruby Wikipedia (and generic MediaWiki) client and
parser, targeting information extraction (hence the name).
It can be useful in tasks like:
* get a plaintext abstract of an article (paragraphs before first heading);
* get structured data variables from page's **infobox**;
* list page's sections and count paragraphs, images and tables in them;
* convert some huge "comparison table" to data;
* and much, much more!
The whole idea is: you can have any Wikipedia page as a parsed tree with
obvious structure, you can navigate that tree easily, and you have a
bunch of hi-level helpers method, so typical information extraction
tasks should be super-easy, one-liners in best cases.
_(For those already thinking "Why should you do this, we already have
DBPedia?" -- please, read "[Reasons](https://github.com/molybdenum-99/infoboxer/wiki/Reasons)"
page in our wiki.)_
## Showcase
```ruby
Infoboxer.wikipedia.
get('Breaking Bad (season 1)').
sections('Episodes').templates(name: 'Episode table').
fetch('episodes').templates(name: /^Episode list/).
fetch_hashes('EpisodeNumber', 'EpisodeNumber2', 'Title', 'ShortSummary')
# => [{"EpisodeNumber"=>#<Var(EpisodeNumber): 1>, "EpisodeNumber2"=>#<Var(EpisodeNumber2): 1>, "Title"=>#<Var(Title): Pilot>, "ShortSummary"=>#<Var(ShortSummary): Walter White, a 50-year old che...>},
# {"EpisodeNumber"=>#<Var(EpisodeNumber): 2>, "EpisodeNumber2"=>#<Var(EpisodeNumber2): 2>, "Title"=>#<Var(Title): Cat's in the Bag...>, "ShortSummary"=>#<Var(ShortSummary): Walt and Jesse try to dispose o...>},
# ...and so on
```
Do you _feel_ it now?
You also can take a look at [Showcase](https://github.com/molybdenum-99/infoboxer/wiki/Showcase).
## Usage
### Install gem
Install it as usual: `gem 'infoboxer'` in your Gemfile, then `bundle install`.
Or just `[sudo] gem install infoboxer` if you prefer.
### Grab the page
```ruby
# From English Wikipedia
page = Infoboxer.wikipedia.get('Argentina')
# or
page = Infoboxer.wp.get('Argentina')
# From other language Wikipedia:
page = Infoboxer.wikipedia('fr').get('Argentina')
# From any wiki with the same engine:
page = Infoboxer.wiki('http://companywiki.com').get('Our Product')
```
See more examples and options at [Retrieving pages](https://github.com/molybdenum-99/infoboxer/wiki/Retrieving%20pages)
### Play with page
Basically, page is a tree of [Nodes](https://github.com/molybdenum-99/infoboxer/wiki/Nodes), you can think of it as some kind of
[DOM](https://en.wikipedia.org/wiki/Document_Object_Model).
So, you can navigate it:
```ruby
# Simple traversing and inspect
node = page.children.first.children.first
node.to_tree
node.to_text
# Various lookups
page.lookup(:Template, name: /^Infobox/)
```
See [Tree navigation basics](https://github.com/molybdenum-99/infoboxer/wiki/Tree-navigation-basics).
On the top of the basic navigation Infoboxer adds some useful shortcuts
for convenience and brevity, which allows things like this:
```ruby
page.section('Episodes').tables.first
```
See [Navigation shortcuts](https://github.com/molybdenum-99/infoboxer/wiki/Navigation-shortcuts)
To put it all in one piece, also take a look at [Data extraction tips and tricks](https://github.com/molybdenum-99/infoboxer/wiki/Tips-and-tricks).
### infoboxer executable
Just try `infoboxer` command.
Without any options, it starts IRB session with infoboxer required and
included into main namespace.
With `-w` option, it provides a shortcut to MediaWiki instance you want.
Like this:
```
$ infoboxer -w https://en.wikipedia.org/w/api.php
> get('Argentina')
=> #<Page(title: "Argentina", url: "https://en.wikipedia.org/wiki/Argentina"): ....
```
You can also use shortcuts like `infoboxer -w wikipedia` for common
wikies (and, just for fun, `infoboxer -wikipedia` also).
## Advanced topics
* [Reasons](https://github.com/molybdenum-99/infoboxer/wiki/Reasons) for
Infoboxer creation;
* [Parsing quality](https://github.com/molybdenum-99/infoboxer/wiki/Parsing-quality)
(TL;DR: very good, but not ideal);
* [Performance](https://github.com/molybdenum-99/infoboxer/wiki/Performance)
(TL;DR: 0.1-0.4 sec for parsing hugest pages);
* [Localization](https://github.com/molybdenum-99/infoboxer/wiki/Localization)
(TL;DR: For now, you'll need some work to use Infoboxer's
most advanced features with non-English or non-WikiMedia wikis; basic
and mid-level features work always);
* If you plan to use Wikipedia or sister projects data in production,
please consider [Wikipedia terms and conditions](https://github.com/molybdenum-99/infoboxer/wiki/Wikipedia-terms-and-conditions).
## Compatibility
As of now, Infoboxer reported to be compatible with any MRI Ruby since 2.0.0
(1.9.3 previously, dropped since Infoboxer 0.2.0). In Travis-CI tests,
JRuby is failing due to bug in old Java 7/Java 8 SSL certificate support
([see here](https://github.com/jruby/jruby/issues/2599)),
and Rubinius failing 3 specs of 500 by mystery, which is uninvestigated yet.
Therefore, those Ruby versions are excluded from Travis config, though,
they may still work for you.
## Links
* [Wiki](https://github.com/molybdenum-99/infoboxer/wiki)
* [API Docs](http://www.rubydoc.info/gems/infoboxer)
* [Contributing](https://github.com/molybdenum-99/infoboxer/wiki/Contributing)
* [Roadmap](https://github.com/molybdenum-99/infoboxer/wiki/Roadmap)
## License
[MIT](https://github.com/molybdenum-99/infoboxer/blob/master/LICENSE.txt).
================================================
FILE: Rakefile
================================================
# coding: utf-8
require 'bundler/setup'
require 'infoboxer'
require 'rubygems/tasks'
Gem::Tasks.new
require 'yard-junk/rake'
YardJunk::Rake.define_task
require 'rspec/core/rake_task'
RSpec::Core::RakeTask.new
require 'rubocop/rake_task'
RuboCop::RakeTask.new
task default: %w[spec rubocop yard:junk]
namespace :dev do
desc "Run regression check (just parsed/error) on set of large and dirty pages"
task :regression do
Dir['regression/pages/*.wiki'].each do |f|
start = Time.now
text = File.read(f)
begin
Infoboxer::Parser.document(text)
tm = Time.now - start
puts '%s successfully parsed in %.3f' % [File.basename(f), tm]
rescue Infoboxer::Parser::ParsingError => e
tm = Time.now - start
puts "%s: parsing error after %.3f: %s:\n\t%s" % [File.basename(f), tm, e.message, e.backtrace.first(5).join("\n\t")]
rescue => e
tm = Time.now - start
puts "%s: error %s after %.3f: %s:\n\t%s" % [File.basename(f), e.class, tm, e.message, e.backtrace.first(5).join("\n\t")]
end
end
end
desc "Run profiling on several pages and dump results to HTML"
task :profile do
require 'ruby-prof'
Dir['profile/pages/*.wiki'].each do |f|
name = File.basename(f).sub('.wiki', '')
out = "profile/out/#{name}.html"
text = File.read(f)
RubyProf.start
Infoboxer::Parser.document(text)
res = RubyProf.stop
printer = RubyProf::GraphHtmlPrinter.new(res)
printer.print(File.open(out, 'w'))
puts '%s successfully parsed, see res: %s' % [File.basename(f), out]
end
end
end
================================================
FILE: bin/infoboxer
================================================
#!/usr/bin/env ruby
require 'rubygems'
require 'infoboxer'
include Infoboxer
require 'optparse'
wiki_url = nil
OptionParser.new do |opts|
opts.banner = 'Usage: infoboxer [-w wiki_api_url]'
opts.on('-w', '--wiki WIKI_API_URL',
"Make wiki by WIKI_API_URL a default wiki, and use it with just get('Pagename')") do |w|
wiki_url = w
end
end.parse!
if wiki_url
if wiki_url =~ /^[a-z]+$/
wiki_url = case
when domain = Infoboxer::WIKIMEDIA_PROJECTS[wiki_url.to_sym]
"https://en.#{domain}/w/api.php"
when domain = Infoboxer::WIKIMEDIA_PROJECTS[('w' + wiki_url).to_sym]
"https://en.#{domain}/w/api.php"
else
fail("Unidentified wiki: #{wiki_url}")
end
end
DEFAULT_WIKI = Infoboxer.wiki(wiki_url)
puts "Default Wiki selected: #{wiki_url}.\nNow you can use `get('Pagename')`, `category('Categoryname')` and so on.\n\n"
[:raw, :get, :get_h, :category, :search, :prefixsearch].each do |m|
define_method(m) { |*arg|
DEFAULT_WIKI.send(m, *arg)
}
end
end
require 'irb'
ARGV.shift until ARGV.empty?
IRB.start
================================================
FILE: infoboxer.gemspec
================================================
require './lib/infoboxer/version'
Gem::Specification.new do |s|
s.name = 'infoboxer'
s.version = Infoboxer::VERSION
s.authors = ['Victor Shepelev']
s.email = 'zverok.offline@gmail.com'
s.homepage = 'https://github.com/molybdenum-99/infoboxer'
s.summary = 'MediaWiki client and parser, targeting information extraction.'
s.description = <<-EOF
Infoboxer is library targeting use of Wikipedia (or any other
MediaWiki-based wiki) as a rich powerful data source.
EOF
s.licenses = ['MIT']
s.required_ruby_version = '>= 2.1.0'
s.files = `git ls-files`.split($RS).reject do |file|
file =~ /^(?:
spec\/.*
|Gemfile
|Rakefile
|\.rspec
|\.gitignore
|\.rubocop.yml
|\.travis.yml
)$/x
end
s.require_paths = ["lib"]
s.bindir = 'bin'
s.executables << 'infoboxer'
s.add_dependency 'htmlentities'
s.add_dependency 'mediawiktory', '= 0.1.3'
s.add_dependency 'addressable'
s.add_dependency 'terminal-table'
end
================================================
FILE: lib/infoboxer/core_ext.rb
================================================
# frozen_string_literal: true
# @private
class Object
# Unfortunately, not in backports gem still :(
if RUBY_VERSION < '2.2.0'
def itself
self
end
end
end
================================================
FILE: lib/infoboxer/definitions/en.wikipedia.org.rb
================================================
# frozen_string_literal: true
# rubocop:disable Layout/EmptyLinesAroundArguments
module Infoboxer
MediaWiki::Traits.for('en.wikipedia.org') do
templates do
# https://en.wikipedia.org/wiki/Category:Wikipedia_character-substitution_templates
# ---------------------------------------------------------------------------------
# Extracted semi-automatically
# TODO: fully automatical extraction
literal(
'&',
';',
'=',
'?',
'—',
'1/2',
'1/3',
'1/4',
'2/3',
'3/4',
)
replace(
'!!' => '||',
'!(' => '[',
'!((' => '[[',
'!-' => '|-',
'!:' => ':',
"'" => " '",
"''" => '″',
"'s" => "'s",
'(' => '{',
'((' => '{{',
'(((' => '{{{',
')' => '}',
')!' => ']',
'))' => '}}',
'))!' => ']]',
')))' => '}}}',
'Asterisk' => '*',
'Colon' => ':',
'Em dash' => '—',
'Gc' => '†',
'Ibeam' => 'I',
'Long dash' => ' ——— ',
'Nbhyph' => '‑',
'Number sign' => '#',
'Shy' => '', # soft hyphen
'Single space' => "' ",
'Space single' => " '",
'Spaced ndash' => ' – ',
'Square bracket close' => ']',
'Square bracket open' => '[',
'Zwsp' => '',
'\\' => ' / ',
'`' => "'",
'·' => ' · ',
'‘' => '‘',
'•' => ' • ',
)
# https://en.wikipedia.org/wiki/Category:Line-handling_templates
# ------------------------------------------------------------------
replace(
'-' => "\n",
'Break' => "\n", # FIXME: in fact, break has optional parameter "how many breaks"
'Crlf' => "\n", # FIXME: in fact, alias for break, should have DSL syntax for it!
'Crlf2' => "\n",
)
show(
'Allow wrap',
'Nowrap',
'j', 'nobr', 'nobreak', # aliases for Nowrap
'nowraplinks',
)
# inflow_template('Normalwraplink') # TODO: tricky
# https://en.wikipedia.org/wiki/Category:List_formatting_and_function_templates
# -----------------------------------------------------------------------------
# NB: it's enough for most cases to have all list-representing templates
# just navigable inside and rendered as space-separated list of entries
show(
'Br separated entries',
'Bulleted list',
'Collapsible list',
'Comma separated entries',
'Hlist',
'Flatlist',
'Flowlist',
'Pagelist',
'Ordered list',
'Plainlist',
'Space separated entries',
'Toolbar',
)
# https://en.wikipedia.org/wiki/Category:Wikipedia_XHTML_tag-replacing_templates
# ------------------------------------------------------------------------------
show(
# Font size
'Small',
'Smaller',
'Midsize',
'Larger',
'Big',
'Large',
'Huge',
# Align
'left',
'Center',
'Right',
# Simple style
'Em',
'Kbd',
'Var',
'Varserif',
'Samp',
'Strikethrough',
'Strong',
'Sub',
'Sup',
'Underline',
# FIXME: should do something wiser
'Pre',
'Pre2',
'Code'
)
template 'Abbr' do
def children
fetch('1')
end
end
# TODO: has aliases: {{Define}}, {{Explain}}, {{Tooltip}}
template 'Align' do
def children
fetch('2')
end
end
template 'Dfn' do
def children
fetch('1')
end
end
template 'Resize' do
def children
unnamed_variables.count < 2 ? fetch('1') : fetch('2')
end
end
template 'Font' do
def children
res = fetch('text')
res.empty? ? fetch('1') : res
end
end
# https://en.wikipedia.org/wiki/Category:Text_color_templates
show(
'white', 'silver (color)', 'gray', 'black', 'pink', 'red', 'darkred',
'maroon', 'brown', 'orange (color)', 'gold (color)', 'yellow', 'olive',
'lime', 'green', 'aqua (color)', 'cyan', 'teal', 'blue', 'navy (color)',
'purple', 'fuchsia', 'magenta'
)
# Some most popular templates, without categorical splitting
# https://en.wikipedia.org/wiki/Wikipedia:Database_reports/Templates_transcluded_on_the_most_pages
# ------------------------------------------------------------------------------------------------
# Currently scanned by eyes up to 250-th line, which is used in 130549 articles, according to the
# page - which, though, is dramatically outdated.
template 'Stub', match: /-stub$/ do
def stub?
true
end
end
template 'Infobox', match: /^Infobox/i do
def infobox?
true
end
end
template 'Coord' do
def model
@model ||= begin
npos = lookup_children(text: /^N|S$/).first.index rescue nil # rubocop:disable Style/RescueModifier
case npos
when 1
:decimal
when 2
:min
when 3
:sec
else
:decimal_sign
end
end
end
def lat
case model
when :decimal
'%s°%s′%s' % fetch('1', '2').map(&:text)
when :decimal_sign
fetch('1').text
when :min
'%s°%s′%s' % fetch('1', '2', '3').map(&:text)
when :sec
'%s°%s′%s″%s' % fetch('1', '2', '3', '4').map(&:text)
end
end
def lng
case model
when :decimal, :decimal_sign
fetch('1').text
when :min
'%s°%s′%s' % fetch('1', '2', '3').map(&:text)
when :sec
'%s°%s′%s″%s' % fetch('1', '2', '3', '4').map(&:text)
end
end
end
template 'Convert' do
def value1
fetch('1').text
end
ALLOW_BETWEEN = ['-;', '–',
'and', '&', 'and(-)', ', and',
'or', ', or',
'to', 'to(-)', 'to about',
'+/-', '±', '+',
'by', 'x', '×', 'x',].freeze
def between
ALLOW_BETWEEN.include?(fetch('2').text) ? fetch('2').text : nil
end
def value2
between ? fetch('3').text : nil
end
def measure_from
between ? fetch('4').text : fetch('2').text
end
def measure_to
between ? fetch('5').text : fetch('3').text
end
def text
[value1, between, value2, measure_from].compact.join(' ')
end
end
template 'Age' do
def from
fetch_date('1', '2', '3')
end
def to
fetch_date('4', '5', '6') || Date.today
end
def value
(to - from).to_i / 365 # FIXME: obviously
end
def text
"#{value} years"
end
end
template 'Birth date and age' do
def date
fetch_date('1', '2', '3')
end
def text
date.to_s
end
end
# TODO: aliased as bda
template 'Birth date' do
def date
fetch_date('1', '2', '3')
end
def text
date.to_s
end
end
# TODO: aliased as dob
template 'Time ago' do
def text
str = fetch('1').text
begin
date = Date.parse(str)
"#{(Date.today - date).to_i} days ago" # not trying complext time_distance_in_words formatting here
rescue ArgumentError
str
end
end
end
template 'Flagcountry' do # very popular instead of country name
def children
fetch('1')
end
end
template 'Flag' do # very popular instead of country name
def children
fetch('1')
end
end
show 'Plural'
template 'URL' do
def children
unnamed_variables.count > 1 ? fetch('2') : fetch('1')
end
end
# Prononciation/lang templates - are frequent in article abstracts
# Doint it dirty, but useful, for now:
template 'Lang', match: /^lang-(\w{2,3})$/i do
def children
fetch('1')
end
end
template 'IPAc', match: /^IPAc[12]?-(\w{2,3})$/i do
def text
unnamed_variables.text
end
end
template 'IPA', match: /^IPA-(\w{2,3})$/i do
def text
fetch('1').text
end
end
# TODO: extremely popular:
# Str left - https://en.wikipedia.org/wiki/Category:String_manipulation_templates
# Rnd - https://en.wikipedia.org/wiki/Category:Mathematical_function_templates
# TODO: useful categories
# https://en.wikipedia.org/wiki/Category:Date_mathematics_templates
# https://en.wikipedia.org/wiki/Category:Mathematical_function_templates
# https://en.wikipedia.org/wiki/Category:Wikipedia_formatting_and_function_templates
# https://en.wikipedia.org/wiki/Category:Semantic_markup_templates
# https://en.wikipedia.org/wiki/Category:Quotation_templates
# https://en.wikipedia.org/wiki/Category:Typing-aid_templates
# https://en.wikipedia.org/wiki/Category:Inline_spacing_templates
# https://en.wikipedia.org/wiki/Category:Sorting_templates
# https://en.wikipedia.org/wiki/Wikipedia:Database_reports/Templates_transcluded_on_the_most_pages
end
end
end
# rubocop:enable Layout/EmptyLinesAroundArguments
================================================
FILE: lib/infoboxer/media_wiki/page.rb
================================================
# frozen_string_literal: true
module Infoboxer
class MediaWiki
# A descendant of {Tree::Document Document}, representing page,
# received from {MediaWiki} client.
#
# Alongside with document tree structure, knows document's title as
# represented by MediaWiki and human (non-API) URL.
class Page < Tree::Document
def initialize(client, children, source)
@client, @source = client, source
super(children, title: source['title'], url: source['fullurl'])
end
# Instance of {MediaWiki} which this page was received from
# @return {MediaWiki}
attr_reader :client
# Instance of MediaWiktory::Page class with source data
# @return {MediaWiktory::Page}
attr_reader :source
# @!attribute [r] title
# Page title.
# @return [String]
# @!attribute [r] url
# Page friendly URL.
# @return [String]
def_readers :title, :url
def traits
client.traits
end
# FIXME: take from siteinfo!
def namespace
Traits::STANDARD_NAMESPACES[source.fetch('ns') + 2] # Media = -2, Specia = -1, Main = 0
end
def category?
namespace == 'Category'
end
private
PARAMS_TO_INSPECT = %i[url title].freeze
def show_params
super(params.select { |k, _v| PARAMS_TO_INSPECT.include?(k) })
end
end
end
end
================================================
FILE: lib/infoboxer/media_wiki/traits.rb
================================================
# frozen_string_literal: true
module Infoboxer
class MediaWiki
# DSL for defining "traits" for some site.
#
# More docs (and possible refactoring) to follow.
#
# You can look at current
# [English Wikipedia traits](https://github.com/molybdenum-99/infoboxer/blob/master/lib/infoboxer/definitions/en.wikipedia.org.rb)
# definitions in Infoboxer's repo.
class Traits
class << self
# Define set of templates for current site's traits.
#
# See {Templates::Set} for longer (yet insufficient) explanation.
#
# Expected to be used inside Traits definition block.
def templates(&definition)
@templates ||= Templates::Set.new
return @templates unless definition
@templates.define(&definition)
end
# @private
def domain(d)
# NB: explicitly store all domains in base Traits class
Traits.domains.key?(d) and
fail(ArgumentError, "Domain binding redefinition: #{Traits.domains[d]}")
Traits.domains[d] = self
end
# @private
def get(domain, site_info = {})
(Traits.domains[domain] || Traits).new(site_info)
end
# @private
def domains
@domains ||= {}
end
# Define traits for some domain. Use it like:
#
# ```ruby
# MediaWiki::Traits.for 'ru.wikipedia.org' do
# templates do
# template '...' do
# # some template definition
# end
# end
# end
# ```
#
# Again, you can look at current
# [English Wikipedia traits](https://github.com/molybdenum-99/infoboxer/blob/master/lib/infoboxer/definitions/en.wikipedia.org.rb)
# for example implementation.
def for(domain, &block)
Traits.domains[domain]&.instance_eval(&block) ||
Class.new(self, &block).domain(domain)
end
# @private
alias_method :default, :new
end
def initialize(site_info = {})
@site_info = site_info
end
def namespace?(prefix)
known_namespaces.include?(prefix)
end
def interwiki?(prefix)
known_interwikis.key?(prefix)
end
# @private
def file_namespace
@file_namespace ||= ns_aliases('File')
end
# @private
def category_namespace
@category_namespace ||= ns_aliases('Category')
end
# @private
def templates
self.class.templates
end
private
def known_namespaces
@known_namespaces ||=
if @site_info.empty?
STANDARD_NAMESPACES
else
(@site_info['namespaces'].values + @site_info['namespacealiases']).map { |n| n['*'] }
end
end
def known_interwikis
@known_interwikis ||=
if @site_info.empty?
{}
else
@site_info['interwikimap'].map { |iw| [iw['prefix'], iw] }.to_h
end
end
def ns_aliases(base)
return [base] if @site_info.empty?
main = @site_info['namespaces'].values.detect { |n| n['canonical'] == base }
[base, main['*']] +
@site_info['namespacealiases']
.select { |a| a['id'] == main['id'] }.flat_map { |n| n['*'] }
.compact.uniq
end
# See https://www.mediawiki.org/wiki/Help:Namespaces#Standard_namespaces
STANDARD_NAMESPACES = [
'Media', # Direct linking to media files.
'Special', # Special (non-editable) pages.
'', # (Main)
'Talk', # Article discussion.
'User', #
'User talk', #
'Project', # Meta-discussions related to the operation and development of the wiki.
'Project talk', #
'File', # Metadata for images, videos, sound files and other media.
'File talk', #
'MediaWiki', # System messages and other important content.
'MediaWiki talk', #
'Template', # Templates: blocks of text or wikicode that are intended to be transcluded.
'Template talk', #
'Help', # Help files, instructions and "how-to" guides.
'Help talk', #
'Category', # Categories: dynamic lists of other pages.
'Category talk', #
].freeze
end
end
end
================================================
FILE: lib/infoboxer/media_wiki.rb
================================================
# frozen_string_literal: true
require 'mediawiktory'
require 'addressable/uri'
require_relative 'media_wiki/traits'
require_relative 'media_wiki/page'
module Infoboxer
# MediaWiki client class.
#
# Usage:
#
# ```ruby
# client = Infoboxer::MediaWiki
# .new('http://en.wikipedia.org/w/api.php', user_agent: 'My Own Project')
# page = client.get('Argentina')
# ```
#
# Consider using shortcuts like {Infoboxer.wiki}, {Infoboxer.wikipedia},
# {Infoboxer.wp} and so on instead of direct instation of this class
# (although you can if you want to!)
#
class MediaWiki
# Default Infoboxer User-Agent header.
#
# You can set yours as an option to {Infoboxer.wiki} and its shortcuts,
# or to {#initialize}
UA = "Infoboxer/#{Infoboxer::VERSION} "\
'(https://github.com/molybdenum-99/infoboxer; zverok.offline@gmail.com)'
class << self
# User agent getter/setter.
#
# Default value is {UA}.
#
# You can also use per-instance option, see {#initialize}
#
# @return [String]
attr_accessor :user_agent
end
# @private
attr_reader :api_base_url, :traits
# @return [MediaWiktory::Wikipedia::Client]
attr_reader :api
# Creating new MediaWiki client. {Infoboxer.wiki} provides shortcut
# for it, as well as shortcuts for some well-known wikis, like
# {Infoboxer.wikipedia}.
#
# @param api_base_url [String] URL of `api.php` file in your MediaWiki
# installation. Typically, its `<domain>/w/api.php`, but can vary
# in different wikis.
# @param user_agent [String] (also aliased as `:ua`) Custom User-Agent header.
def initialize(api_base_url, ua: nil, user_agent: ua)
@api_base_url = Addressable::URI.parse(api_base_url)
@api = MediaWiktory::Wikipedia::Api.new(api_base_url, user_agent: user_agent(user_agent))
@traits = Traits.get(@api_base_url.host, siteinfo)
end
# Receive "raw" data from Wikipedia (without parsing or wrapping in
# classes).
#
# @param titles [Array<String>] List of page titles to get.
# @param processor [Proc] Optional block to preprocess MediaWiktory query. Refer to
# [MediaWiktory::Actions::Query](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query)
# for its API. Infoboxer assumes that the block returns new instance of `Query`, so be careful
# while using it.
#
# @return [Hash{String => Hash}] Hash of `{requested title => raw MediaWiki object}`. Note that
# even missing (does not exist in current Wiki) or invalid (impossible title) still be present
# in response, just will have `"missing"` or `"invalid"` key, just like MediaWiki returns them.
def raw(*titles, &processor)
# could emerge on "automatically" created page lists, should work
return {} if titles.empty?
titles.each_slice(50).map do |part|
request = prepare_request(@api.query.titles(*part), &processor)
response = request.response
# If additional props are required, there may be additional pages, even despite each_slice(50)
response = response.continue while response.continue?
sources = response['pages'].values.map { |page| [page['title'], page] }.to_h
redirects =
if response['redirects']
response['redirects'].map { |r| [r['from'], sources[r['to']]] }.to_h
else
{}
end
# This way for 'Einstein' query we'll have {'Albert Einstein' => page, 'Einstein' => same page}
sources.merge(redirects)
end.inject(:merge)
end
# Receive list of parsed MediaWiki pages for list of titles provided.
# All pages are received with single query to MediaWiki API.
#
# **NB**: if you are requesting more than 50 titles at once
# (MediaWiki limitation for single request), Infoboxer will do as
# many queries as necessary to extract them all (it will be like
# `(titles.count / 50.0).ceil` requests)
#
# @param titles [Array<String>] List of page titles to get.
# @param interwiki [Symbol] Identifier of other wiki, related to current, to fetch pages from.
# @param processor [Proc] Optional block to preprocess MediaWiktory query. Refer to
# [MediaWiktory::Actions::Query](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query)
# for its API. Infoboxer assumes that the block returns new instance of `Query`, so be careful
# while using it.
#
# @return [Page, Tree::Nodes<Page>] array of parsed pages. Notes:
# * if you call `get` with only one title, one page will be
# returned instead of an array
# * if some of pages are not in wiki, they will not be returned,
# therefore resulting array can be shorter than titles array;
# you can always check `pages.map(&:title)` to see what you've
# really received; this approach allows you to write absent-minded
# code like this:
#
# ```ruby
# Infoboxer.wp.get('Argentina', 'Chile', 'Something non-existing').
# infobox.fetch('some value')
# ```
# and obtain meaningful results instead of `NoMethodError` or
# `SomethingNotFound`.
#
def get(*titles, interwiki: nil, &processor)
return interwikis(interwiki).get(*titles, &processor) if interwiki
pages = get_h(*titles, &processor).values.compact
titles.count == 1 ? pages.first : Tree::Nodes[*pages]
end
# Same as {#get}, but returns hash of `{requested title => page}`.
#
# Useful quirks:
# * when requested page not existing, key will be still present in
# resulting hash (value will be `nil`);
# * when requested page redirects to another, key will still be the
# requested title. For ex., `get_h('Einstein')` will return hash
# with key 'Einstein' and page titled 'Albert Einstein'.
#
# This allows you to be in full control of what pages of large list
# you've received.
#
# @param titles [Array<String>] List of page titles to get.
# @param processor [Proc] Optional block to preprocess MediaWiktory query. Refer to
# [MediaWiktory::Actions::Query](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query)
# for its API. Infoboxer assumes that the block returns new instance of `Query`, so be careful
# while using it.
#
# @return [Hash<String, Page>]
#
def get_h(*titles, &processor)
raw_pages = raw(*titles, &processor)
.tap { |ps| ps.detect { |_, p| p['invalid'] }.tap { |_, i| i && fail(i['invalidreason']) } }
.reject { |_, p| p.key?('missing') }
titles.map { |title| [title, make_page(raw_pages, title)] }.to_h
end
# Receive list of parsed MediaWiki pages from specified category.
#
# @param title [String] Category title. You can use namespaceless title (like
# `"Countries in South America"`), title with namespace (like
# `"Category:Countries in South America"`) or title with local
# namespace (like `"Catégorie:Argentine"` for French Wikipedia)
# @param limit [Integer, "max"]
# @param processor [Proc] Optional block to preprocess MediaWiktory query. Refer to
# [MediaWiktory::Actions::Query](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query)
# for its API. Infoboxer assumes that the block returns new instance of `Query`, so be careful
# while using it.
#
# @return [Tree::Nodes<Page>] array of parsed pages.
#
def category(title, limit: 'max', &processor)
title = normalize_category_title(title)
list(@api.query.generator(:categorymembers).title(title), limit, &processor)
end
# Receive list of parsed MediaWiki pages for provided search query.
# See [MediaWiki API docs](https://www.mediawiki.org/w/api.php?action=help&modules=query%2Bsearch)
# for details.
#
# @param query [String] Search query. For old installations, look at
# https://www.mediawiki.org/wiki/Help:Searching
# for search syntax. For new ones (including Wikipedia), see at
# https://www.mediawiki.org/wiki/Help:CirrusSearch.
# @param limit [Integer, "max"]
# @param processor [Proc] Optional block to preprocess MediaWiktory query. Refer to
# [MediaWiktory::Actions::Query](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query)
# for its API. Infoboxer assumes that the block returns new instance of `Query`, so be careful
# while using it.
#
# @return [Tree::Nodes<Page>] array of parsed pages.
#
def search(query, limit: 'max', &processor)
list(@api.query.generator(:search).search(query), limit, &processor)
end
# Receive list of parsed MediaWiki pages with titles startin from prefix.
# See [MediaWiki API docs](https://www.mediawiki.org/w/api.php?action=help&modules=query%2Bprefixsearch)
# for details.
#
# @param prefix [String] Page title prefix.
# @param limit [Integer, "max"]
# @param processor [Proc] Optional block to preprocess MediaWiktory query. Refer to
# [MediaWiktory::Actions::Query](http://www.rubydoc.info/gems/mediawiktory/MediaWiktory/Wikipedia/Actions/Query)
# for its API. Infoboxer assumes that the block returns new instance of `Query`, so be careful
# while using it.
#
# @return [Tree::Nodes<Page>] array of parsed pages.
#
def prefixsearch(prefix, limit: 'max', &processor)
list(@api.query.generator(:prefixsearch).search(prefix), limit, &processor)
end
# @return [String]
def inspect
"#<#{self.class}(#{@api_base_url.host})>"
end
private
def make_page(raw_pages, title)
_, source = raw_pages.detect { |ptitle, _| ptitle.casecmp(title).zero? }
source or return nil
Page.new(self, Parser.paragraphs(source['revisions'].first['*'], traits), source)
end
def list(query, limit, &processor)
request = prepare_request(query.limit(limit), &processor)
response = request.response
response = response.continue while response.continue? && (limit == 'max' || response['pages'].count < limit)
return Tree::Nodes[] if response['pages'].nil?
pages = response['pages']
.values.select { |p| p['missing'].nil? }
.map { |raw| Page.new(self, Parser.paragraphs(raw['revisions'].first['*'], traits), raw) }
Tree::Nodes[*pages]
end
def prepare_request(request)
request = request.prop(:revisions, :info).prop(:content, :timestamp, :url).redirects
block_given? ? yield(request) : request
end
def normalize_category_title(title)
# FIXME: shouldn't it go to MediaWiktory?..
namespace, titl = title.include?(':') ? title.split(':', 2) : [nil, title]
namespace, titl = nil, title unless traits.category_namespace.include?(namespace)
namespace ||= traits.category_namespace.first
[namespace, titl].join(':')
end
def user_agent(custom)
custom || self.class.user_agent || UA
end
def siteinfo
@siteinfo ||= @api.query.meta(:siteinfo).prop(:namespaces, :namespacealiases, :interwikimap).response.to_h
end
def interwikis(prefix)
@interwikis ||= Hash.new { |h, pre|
interwiki = siteinfo['interwikimap'].detect { |iw| iw['prefix'] == prefix } or
fail ArgumentError, "Undefined interwiki: #{prefix}"
# FIXME: fragile, but what can we do?..
m = interwiki['url'].match(%r{^(.+)/wiki/\$1$}) or
fail ArgumentError, "Interwiki #{interwiki} seems not to be a MediaWiki instance"
h[pre] = self.class.new("#{m[1]}/w/api.php") # TODO: copy useragent
}
@interwikis[prefix]
end
end
end
================================================
FILE: lib/infoboxer/navigation/lookup.rb
================================================
# frozen_string_literal: true
require_relative 'selector'
module Infoboxer
module Navigation
# See {Lookup::Node Lookup::Node} for everything!
module Lookup
# `Lookup::Node` module provides methods for navigating through
# page tree in XPath-like manner.
#
# What you need to know about it:
#
# ## Selectors
#
# Each `lookup_*` method (and others similar) receive
# _list of selectors_. Examples of acceptable selectors:
#
# ```ruby
# # 1. Node class:
# document.lookup(Bold) # all Bolds
#
# # 2. Class symbol
# document.lookup(:Bold)
# # same as above, useful if you don't want to include Infoboxer::Tree
# # in all of your code or write things like lookup(Infoboxer::Tree::Bold)
#
# # 3. Getter/pattern:
# document.lookup(text: /something/)
# # finds all nodes where result of getter matches pattern
#
# # Checks against patterns are performed with `===`, so you can
# # use regexps to find by text, or ranges to find by number, like
# document.lookup(:Heading, level: (3..4))
#
# # Nodes where method is not defined are ignored, so you can
# # rewrite above example as just
# document.lookup(level: 3..4)
# # ...and receive meaningful result without any NoMethodError
#
# # 4. Check symbol
# document.lookup(:bold?)
# # finds all nodes for which `:bold?` is defined and returns
# # truthy value;
#
# # 5. Code block
# document.lookup{|node| node.params.has_key?(:class)}
# ```
#
# You also can use any of those method without **any** selector,
# thus receiving ALL parents, ALL children, ALL siblings and so on.
#
# ## Chainable navigation
#
# Each `lookup_*` method returns an instance of {Tree::Nodes} class,
# which behaves like an Array, but also defines similar set of
# `lookup_*` methods, so, you can brainlessly do the things like
#
# ```ruby
# document.
# lookup(:Paragraph){|p| p.text.length > 100}.
# lookup(:Wikilink, text: /^List of/).
# select(&:bold?)
# ```
#
# ## Underscored methods
#
# For all methods of this module you can notice "underscored" version
# (`lookup_children` vs `_lookup_children` and so on). Basically,
# underscored versions accept instance of {Lookup::Selector}, which
# is already preprocessed version of all selectors. It is kinda
# internal thing, though can be useful if you store selectors in
# variables -- it is easier to have and use just one instance of
# Selector, than list of arguments and blocks.
#
module Node
# @!method matches?(*selectors, &block)
# Checks if current node matches selectors.
# @!method lookup(*selectors, &block)
# Selects matching nodes from entire subtree inside current node.
# @!method lookup_children(*selectors, &block)
# Selects nodes only from this node's direct children.
# @!method lookup_parents(*selectors, &block)
# Selects matching nodes of this node's parents chain, up to
# entire {Tree::Document Document}.
# @!method lookup_siblings(*selectors, &block)
# Selects matching nodes from current node's siblings.
# @!method lookup_next_siblings(*selectors, &block)
# Selects matching nodes from current node's siblings, which
# are below current node in parents children list.
# @!method lookup_prev_siblings(*selectors, &block)
# Selects matching nodes from current node's siblings, which
# are above current node in parents children list.
# @!method lookup_prev_sibling(*selectors, &block)
# Selects first matching nodes from current node's siblings, which
# are above current node in parents children list.
# Underscored version of {#matches?}
def _matches?(selector)
selector === self
end
# Underscored version of {#lookup}
def _lookup(selector)
Tree::Nodes[_matches?(selector) ? self : nil, *children._lookup(selector)]
.flatten.compact
end
# Underscored version of {#lookup_children}
def _lookup_children(selector)
@children._find(selector)
end
# Underscored version of {#lookup_parents}
def _lookup_parents(selector)
case
when !parent
Tree::Nodes[]
when parent._matches?(selector)
Tree::Nodes[parent, *parent._lookup_parents(selector)]
else
parent._lookup_parents(selector)
end
end
# Underscored version of {#lookup_siblings}
def _lookup_siblings(selector)
siblings._find(selector)
end
# Underscored version of {#lookup_prev_siblings}
def _lookup_prev_siblings(selector)
prev_siblings._find(selector)
end
# Underscored version of {#lookup_prev_sibling}
def _lookup_prev_sibling(selector)
prev_siblings.reverse.detect { |n| selector === n }
end
# Underscored version of {#lookup_next_siblings}
def _lookup_next_siblings(selector)
next_siblings._find(selector)
end
%i[
matches?
lookup lookup_children lookup_parents
lookup_siblings
lookup_next_siblings lookup_prev_siblings
lookup_prev_sibling
]
.map { |sym| [sym, :"_#{sym}"] }
.each do |sym, underscored|
define_method(sym) do |*args, &block|
send(underscored, Selector.new(*args, &block))
end
end
# Checks if node has any parent matching selectors.
def parent?(*selectors, &block)
!lookup_parents(*selectors, &block).empty?
end
end
# This module provides implementations for all `lookup_*` methods
# of {Lookup::Node} for be used on nodes list. Note, that all
# those methods return _flat_ list of results (so, if you have
# found several nodes, and then look for their siblings, you should
# not expect array of arrays -- just one array of nodes).
#
# See {Lookup::Node} for detailed lookups and selectors explanation.
module Nodes
# @!method lookup(*selectors, &block)
# @!method lookup_children(*selectors, &block)
# @!method lookup_parents(*selectors, &block)
# @!method lookup_siblings(*selectors, &block)
# @!method lookup_next_siblings(*selectors, &block)
# @!method lookup_prev_siblings(*selectors, &block)
# @!method _lookup(selector)
# @!method _lookup_children(selector)
# @!method _lookup_parents(selector)
# @!method _lookup_siblings(selector)
# @!method _lookup_next_siblings(selector)
# @!method _lookup_prev_siblings(selector)
# Underscored version of {#find}.
def _find(selector)
select { |n| n._matches?(selector) }
end
# Selects nodes of current list (and only it, no children checks),
# which are matching selectors.
def find(*selectors, &block)
_find(Selector.new(*selectors, &block))
end
%i[
_lookup _lookup_children _lookup_parents
_lookup_siblings _lookup_prev_siblings _lookup_next_siblings
].each do |sym|
define_method(sym) do |*args|
make_nodes(map { |n| n.send(sym, *args) })
end
end
# not delegate, but redefine: Selector should be constructed only once
%i[
lookup lookup_children lookup_parents
lookup_siblings
lookup_next_siblings lookup_prev_siblings
].map { |sym| [sym, :"_#{sym}"] }.each do |sym, underscored|
define_method(sym) do |*args, &block|
send(underscored, Selector.new(*args, &block))
end
end
end
end
end
end
================================================
FILE: lib/infoboxer/navigation/sections.rb
================================================
# frozen_string_literal: true
module Infoboxer
module Navigation
# `Sections` module provides logical view on document strcture.
#
# From this module's point of view, each {Tree::Document Document} is a
# {Sections::Container Sections::Container}, which consists of
# {Sections::Container#intro} (before first heading) and a set of
# nested {Sections::Container#sections}.
#
# Each document node, in turn, provides method {Sections::Node#in_sections},
# allowing you to receive list of sections, which contains current
# node.
#
# **NB**: Sections are "virtual" nodes, they are not, in fact, in
# documents tree. So, you can be surprised with:
#
# ```ruby
# document.sections # => list of Section instances
# document.lookup(:Section) # => []
#
# paragraph.in_sections # => list of sections
# paragraph.
# lookup_parents(:Section) # => []
# ```
module Sections
# This module is included in {Tree::Document Document}, allowing
# you to navigate through document's logical sections (and also
# included in each {Sections::Section} instance, allowing to navigate
# recursively).
#
# See also {Sections parent module} docs.
module Container
# All container's paragraph-level nodes before first heading.
#
# @return {Tree::Nodes}
def intro
children
.take_while { |n| !n.is_a?(Tree::Heading) }
.select { |n| n.is_a?(Tree::BaseParagraph) }
end
# List of sections inside current container.
#
# Examples of usage:
#
# ```ruby
# document.sections # all top-level sections
# document.sections('Culture') # only "Culture" section
# document.sections(/^List of/) # all sections with heading matching pattern
#
# document.
# sections('Culture'). # long way of recieve nested section
# sections('Music') # (Culture / Music)
#
# document.
# sections('Culture', 'Music') # the same as above
#
# document.
# sections('Culture' => 'Music') # pretty-looking version for 2 levels of nesting
# ```
#
# @return {Tree::Nodes<Section>}
def sections(*names)
@sections ||= make_sections
if names.first.is_a?(Hash)
h = names.shift
h.count == 1 or fail(ArgumentError, "Undefined behavior with #{h}")
names.unshift(h.keys.first, h.values.first)
end
case names.count
when 0
@sections
when 1
@sections.select { |s| names.first === s.heading.text_ }
else
@sections.select { |s| names.first === s.heading.text_ }.sections(*names[1..])
end
end
def subsections(*names)
sections = names.map { |name|
heading = lookup_children(:Heading, text_: name).first
next unless heading
body = heading.next_siblings
.take_while { |n| !n.is_a?(Tree::Heading) || n.level > heading.level }
Section.new(heading, body)
}.compact
Tree::Nodes.new(sections)
end
def lookup_children(*arg)
if arg.include?(:Section)
sections.find(*(arg - [:Section]))
else
super
end
end
private
def make_sections
res = Tree::Nodes[]
return res if headings.empty?
level = headings.first.level
children
.chunk { |n| n.matches?(Tree::Heading, level: level) }
.drop_while { |is_heading, _nodes| !is_heading }
.each do |is_heading, nodes|
if is_heading
nodes.each do |node|
res << Section.new(node)
end
else
res.last.push_children(*nodes)
end
end
res
end
end
# Part of {Sections} navigation, allowing each node to know exact
# list of sections it contained in.
#
# See also {Sections parent module} documentation.
module Node
# List of sections current node contained in (bottom-to-top:
# smallest section first).
#
# @return {Tree::Nodes<Section>}
def in_sections
return parent.in_sections unless parent.is_a?(Tree::Document)
return @in_sections if @in_sections
heading =
if is_a?(Tree::Heading)
lookup_prev_sibling(Tree::Heading, level: level - 1)
else
lookup_prev_sibling(Tree::Heading)
end
unless heading
@in_sections = Tree::Nodes[]
return @in_sections
end
body = heading.next_siblings
.take_while { |n| !n.is_a?(Tree::Heading) || n.level > heading.level }
section = Section.new(heading, body)
@in_sections = Tree::Nodes[section, *heading.in_sections]
end
end
# Part of {Sections} navigation, allowing chains of section search.
#
# See {Sections parent module} documentation.
module Nodes
# @!method sections(*names)
# @!method in_sections
%i[sections in_sections].each do |sym|
define_method(sym) do |*args|
make_nodes(map { |n| n.send(sym, *args) })
end
end
def lookup_children(*arg)
if arg.include?(:Section)
sections.find(*(arg - [:Section]))
else
super
end
end
end
# Virtual node, representing logical section of the document.
# Is not, in fact, in the tree.
#
# See {Sections parent module} documentation for details.
class Section < Tree::Compound
def initialize(heading, children = Tree::Nodes[])
# no super: we don't wont to rewrite children's parent
@children = Tree::Nodes[*children]
@heading = heading
@params = {level: heading.level, heading: heading.text.strip}
end
# Section's heading.
#
# @return {Tree::Heading}
attr_reader :heading
# no rewriting of parent, again
def push_children(*nodes)
nodes.each do |n|
@children << n
end
end
def empty?
false
end
def inspect
"#<#{descr}: #{children.count} nodes>"
end
include Container
end
end
end
end
================================================
FILE: lib/infoboxer/navigation/selector.rb
================================================
# frozen_string_literal: true
module Infoboxer
module Navigation
module Lookup
# Incapsulates storage of selectors, used in {Lookup::Node node lookup}.
#
# See {Lookup::Node Lookup::Node} for detailed explanation of available selectors.
class Selector
def initialize(*arg, &block)
@arg = [arg, block].flatten.compact.map(&method(:sym_to_class))
@arg.each do |a|
a.compact! if a.is_a?(Hash)
end
end
attr_reader :arg
def ==(other)
self.class == other.class && arg == other.arg
end
def inspect
"#<Selector(#{@arg.map(&:to_s).join(', ')})>"
end
def ===(other)
@arg.all? { |a| arg_matches?(a, other) }
end
private
def sym_to_class(a)
if a.is_a?(Symbol) && a =~ /^[A-Z][a-zA-Z]+$/ && Tree.const_defined?(a)
Tree.const_get(a)
else
a
end
end
def arg_matches?(check, node)
case check
when Proc
check.call(node)
when Hash
check.all? { |attr, value|
node.respond_to?(attr) && value_matches?(value, node.send(attr)) ||
node.params.key?(attr) && value_matches?(value, node.params[attr])
}
when Symbol
node.respond_to?(check) && node.send(check)
else
check === node
end
end
def value_matches?(matcher, value)
if matcher.is_a?(String) && value.is_a?(String)
matcher.casecmp(value).zero?
else
matcher === value
end
end
end
end
end
end
================================================
FILE: lib/infoboxer/navigation/shortcuts.rb
================================================
# frozen_string_literal: true
module Infoboxer
module Navigation
# See {Shortcuts::Node Shortcuts::Node} for everything!
module Shortcuts
# `Shortcuts::Node` module provides some convenience methods for
# most used lookups. It's not a rocket science (as you can see
# from methods code), yet should make your code cleaner and
# more readable.
#
# **NB**: as usual, {Tree::Nodes} class have synonyms for all of
# those methods, so you can call them fearlessly on any results of
# node lookup.
#
module Node
# Returns all wikilinks inside current node.
#
# @param namespace from which namespace links do you want. It's
# `''` (main namespace only) by default, if you really want all
# wikilinks on the page, including categories, interwikies and
# stuff, use `wikilinks(nil)`
# @return {Tree::Nodes}
def wikilinks(namespace = '')
lookup(Tree::Wikilink, namespace: namespace)
end
# Returns all headings inside current node.
#
# @param level headings level to return.
# @return {Tree::Nodes}
def headings(level = nil)
lookup(Tree::Heading, level: level)
end
# Returns all paragraph-level nodes (list items, plain paragraphs,
# headings and so on) inside current node.
#
# @param selectors node selectors, as described at {Lookup::Node}
# @return {Tree::Nodes}
def paragraphs(*selectors, &block)
lookup(Tree::BaseParagraph, *selectors, &block)
end
# Returns all external links inside current node.
#
# @param selectors node selectors, as described at {Lookup::Node}
# @return {Tree::Nodes}
def external_links(*selectors, &block)
lookup(Tree::ExternalLink, *selectors, &block)
end
# Returns all images (media) inside current node.
#
# @param selectors node selectors, as described at {Lookup::Node}
# @return {Tree::Nodes}
def images(*selectors, &block)
lookup(Tree::Image, *selectors, &block)
end
# Returns all templates inside current node.
#
# @param selectors node selectors, as described at {Lookup::Node}
# @return {Tree::Nodes}
def templates(*selectors, &block)
lookup(Tree::Template, *selectors, &block)
end
# Returns all tables inside current node.
#
# @param selectors node selectors, as described at {Lookup::Node}
# @return {Tree::Nodes}
def tables(*selectors, &block)
lookup(Tree::Table, *selectors, &block)
end
# Returns all lists (ordered/unordered/definition) inside current node.
#
# @param selectors node selectors, as described at {Lookup::Node}
# @return {Tree::Nodes}
def lists(*selectors, &block)
lookup(Tree::List, *selectors, &block)
end
# Returns true, if current node is **inside** bold.
def bold?
parent?(Tree::Bold)
end
# Returns true, if current node is **inside** italic.
def italic?
parent?(Tree::Italic)
end
# Returns true, if current node is **inside** heading.
#
# @param level optional concrete level to check
def heading?(level = nil)
parent?(Tree::Heading, level: level)
end
# Returns all infoboxes inside current node.
#
# Definition of what considered to be infobox depends on templates
# set used when parsing the page.
#
# @param selectors node selectors, as described at {Lookup::Node}
# @return {Tree::Nodes}
def infoboxes(*selectors, &block)
lookup(Tree::Template, :infobox?, *selectors, &block)
end
# Returns all wikilinks in "categories namespace".
#
# **NB**: depending on your MediaWiki settings, name of categories
# namespace may vary. When you are using {MediaWiki#get}, Infoboxer
# tries to handle this transparently (by examining used wiki for
# category names), yet bad things may happen here.
#
# @return {Tree::Nodes}
def categories
lookup(Tree::Wikilink, namespace: /^#{ensure_traits.category_namespace.join('|')}$/)
end
# As users accustomed to have only one infobox on a page
def infobox
infoboxes.first
end
private
def ensure_traits
ensure_page.traits or fail('No site traits found')
end
def ensure_page
(is_a?(MediaWiki::Page) ? self : lookup_parents(MediaWiki::Page).first) or
fail('Node is not inside Page, maybe parsed from text?')
end
end
# Companion module of {Shortcuts::Node Shortcuts::Node}, defining
# all the same methods for {Tree::Nodes} so you can use them
# uniformely on single node or list. See {Shortcuts::Node there} for
# details.
module Nodes
# @!method wikilinks(namespace = '')
# @!method headings(level = nil)
# @!method paragraphs(*selectors, &block)
# @!method external_links(*selectors, &block)
# @!method images(*selectors, &block)
# @!method templates(*selectors, &block)
# @!method tables(*selectors, &block)
# @!method lists(*selectors, &block)
# @!method infoboxes(*selectors, &block)
# @!method categories
%i[wikilinks headings paragraphs external_links images
templates tables lists infoboxes infobox categories]
.each do |m|
define_method(m) do |*args|
make_nodes(map { |n| n.send(m, *args) })
end
end
end
end
end
end
================================================
FILE: lib/infoboxer/navigation/wikipath.rb
================================================
# frozen_string_literal: true
require_relative '../wiki_path'
module Infoboxer
module Navigation
module Wikipath
# Search nodes inside current by XPath alike query language.
#
# This feature is experimental, but should work for most of the useful cases.
#
# Examples of WikiPath:
#
# ```
# /paragraph # direct child of current node, being paragraph
# //paragraph # any node in current node's subtree, being paragraph
# //template[name=Infobox] # template node in subtree, with name attribute equal to Infobox
# //template[name="Infobox country"] # optional quotes are allowed
# //template[name=/^Infobox/] # regexes are supported
# //wikilink[italic] # node predicates are supported (the same as `lookup(:Wikilink, :italic?)`
# //*[italic] # type wildcards are supported
# //template[name=/^Infobox/]/var[name=birthday] # series of lookups work
# ```
#
# @param string [String] WikiPath to lookup
# @return [Nodes]
def wikipath(string)
Infoboxer::WikiPath.parse(string).call(self)
end
end
end
end
================================================
FILE: lib/infoboxer/navigation.rb
================================================
# frozen_string_literal: true
module Infoboxer
# Navigation is one of the things Infoboxer is proud about. It tries
# to be logical, unobtrusive and compact.
#
# There's several levels of navigation:
# * simple tree navigation;
# * navigational shortcuts;
# * logical structure navigation (sections).
#
# ## Simple tree navigation
#
# It's somewhat similar to XPath/CSS selectors you'll use to navigate
# through HTML DOM. It is represented (and documented) in {Lookup::Node}
# module. To show you the taste of it:
#
# ```ruby
# document.
# lookup(:Wikilink, text: /Chile/).
# lookup_parents(:Table){|t| t.params[:class] == 'wikitable'}.
# lookup_children(size: 3)
# ```
#
# ## Navigational shortcuts
#
# There is nothing too complicated, just pretty shortcuts over `lookup_*`
# methods, so, you can write just
#
# ```ruby
# document.paragraphs.last.wikilinks('Category')
# ```
# ...instead of
# ```ruby
# document.lookup(:Paragraph).last.lookup(:Wikilink, namespace: 'Category')
# ```
# ...and so on.
#
# Look into {Shortcuts::Node} documentation for list of shortcuts.
#
# ## Wikipath
#
# WikiPath is XPath-alike query language you can use to navigate the tree:
#
# ```ruby
# document.wikipath('//paragraph//wikilink[namespace=Category]')
# ```
#
# It can look more or less verbose than pure-ruby navigation, but the big advantage of WikiPath
# is it is pure data: you can store some paths in YAML file, for example.
#
# Look at {Wikipath#wikipath #wikipath} method docs for full reference.
#
# ## Logical structure navigation
#
# MediaWiki page structure is flat, like HTML's (there's just sequence
# of headings and paragraphs). Though, for most tasks of information
# extraction it is usefult to think of page as a structure of nested
# sections. {Sections} module provides such ability. It treats document
# as an {Sections::Container#intro intro} and set of subsequent
# {Sections::Section section}s of same level, which, in turn, have inside
# they own intro and sections. Also, each node has
# {Sections::Node#in_sections #in_sections} method, returning all sections
# in which it is nested.
#
# The code with sections can feel like this:
#
# ```ruby
# page.sections('Culture' => 'Music').tables
# # or like this
# page.wikilinks.select{|link| link.in_sections.first.heading.text.include?('Culture')}
# ```
#
# See {Sections::Container} for downwards section navigation, and
# {Sections::Node} for upwards.
#
module Navigation
%w[lookup shortcuts sections wikipath].each do |nav|
require_relative "navigation/#{nav}"
end
class Tree::Node
include Navigation::Lookup::Node
include Navigation::Shortcuts::Node
include Navigation::Sections::Node
include Navigation::Wikipath
end
class Tree::Nodes
include Navigation::Lookup::Nodes
include Navigation::Shortcuts::Nodes
include Navigation::Sections::Nodes
include Navigation::Wikipath
end
class Tree::Document
include Navigation::Sections::Container
end
module Helpers
def W(*arg, &block) # rubocop:disable Naming/MethodName
Lookup::Selector.new(*arg, &block)
end
end
end
end
================================================
FILE: lib/infoboxer/parser/context.rb
================================================
# frozen_string_literal: true
require 'strscan'
module Infoboxer
class Parser
class Context # rubocop:disable Metrics/ClassLength
attr_reader :lineno
attr_reader :traits
def initialize(text, traits = nil)
@lines = text
.gsub(/<!--.*?-->/m, '') # FIXME: will also kill comments inside <nowiki> tag
.split(/[\r\n]/)
@lineno = -1
@traits = traits || MediaWiki::Traits.default
@scanner = StringScanner.new('')
next!
end
attr_reader :next_lines
def colno
@scanner&.pos || 0
end
def matched
@matched ||= @scanner&.matched
end
# check which works only once
def eat_matched?(str)
return false unless matched == str
@matched = 'DUMMY'
true
end
def rest
@rest ||= @scanner&.rest
end
alias_method :current, :rest
# lines navigation
def next!
shift(+1)
end
def prev!
shift(-1)
end
def eof?
!next_lines || # we are after the file end
next_lines.empty? && eol?
end
def inspect
"#<Context(line #{lineno} of #{@lines.count}: #{current})>"
end
# scanning
def scan(re)
res = @scanner.scan(re)
@matched = nil
@rest = nil
res
end
def check(re)
res = @scanner.check(re)
@matched = nil
@rest = nil
res
end
def skip(re)
res = @scanner.skip(re)
@matched = nil
@rest = nil
res
end
def scan_until(re, leave_pattern = false)
guard_eof!
res = _scan_until(re)
res[matched] = '' if res && !leave_pattern
res
end
def push_eol_sign(re)
@inline_eol_sign = re
end
def pop_eol_sign
@inline_eol_sign = nil
end
attr_reader :inline_eol_sign
def inline_eol?(exclude = nil)
# not using StringScanner#check, as it will change #matched value
eol? ||
(
(current =~ %r[^(</ref>|}})] || @inline_eol_sign && current =~ @inline_eol_sign) &&
(!exclude || Regexp.last_match(1) !~ exclude)
) # FIXME: ugly, but no idea of prettier solution
end
def scan_continued_until(re, leave_pattern = false)
res = +''
loop do
chunk = _scan_until(re)
case matched
when re
res << chunk
break
when nil
res << rest << "\n"
next!
eof? && fail!("Unfinished scan: #{re} not found")
end
end
res[/#{re}\Z/] = '' unless leave_pattern
res
end
# state inspection
def matched_inline?(re)
if re.nil?
matched.empty? && eol?
elsif re.inspect.start_with?('/^') # was it REALLY at the beginning of the line?..
@scanner.pos == matched.length && matched =~ re
else
matched =~ re
end
end
def matched?(re)
re && matched =~ re
end
def eol?
!current || current.empty?
end
# basic services
def fail!(text)
fail(ParsingError, "#{text} at line #{@lineno}:\n\t#{current}")
end
def unscan_matched!
return unless @matched
@scanner.pos -= @matched.size
@rest = nil
end
private
# we do hard use of #matched and #rest, its wiser to memoize them
def _scan_until(re)
res = @scanner.scan_until(re)
@matched = nil
@rest = nil
res
end
def guard_eof!
@scanner or fail!('End of input reached')
end
def shift(amount)
@lineno += amount
current = @lines[lineno]
@next_lines = @lines[(lineno + 1)..]
if current
@scanner.string = current
@rest = current
else
@scanner = nil
@rest = nil
end
@matched = nil
end
end
end
end
================================================
FILE: lib/infoboxer/parser/html.rb
================================================
# frozen_string_literal: true
module Infoboxer
class Parser
module HTML
include Tree
def html
case
when @context.check(%r{/[a-z]+>})
html_closing_tag
when @context.check(/br\s*>/)
html_br
when @context.check(%r{[a-z]+[^/>]*/>})
html_auto_closing_tag
when @context.check(%r{[a-z]+[^>/]*>})
html_opening_tag
else
# not an HTML tag at all!
nil
end
end
def html_closing_tag
@context.skip(%r{/})
tag = @context.scan(/[a-z]+/)
@context.skip(/>/)
HTMLClosingTag.new(tag)
end
def html_br
@context.skip(/br\s*>/)
HTMLTag.new('br', {})
end
def html_auto_closing_tag
tag = @context.scan(/[a-z]+/)
attrs = @context.scan(%r{[^/>]*})
@context.skip(%r{/>})
HTMLTag.new(tag, parse_params(attrs))
end
def html_opening_tag
tag = @context.scan(/[a-z]+/)
attrs = @context.scan(/[^>]+/)
@context.skip(/>/)
contents = short_inline(%r{</#{tag}>})
if @context.matched =~ %r{</#{tag}>}
HTMLTag.new(tag, parse_params(attrs), contents)
else
[
HTMLOpeningTag.new(tag, parse_params(attrs)),
*contents
]
end
end
end
end
end
================================================
FILE: lib/infoboxer/parser/image.rb
================================================
# frozen_string_literal: true
module Infoboxer
class Parser
module Image
include Tree
def image
@context.skip(re.file_namespace) or
@context.fail!("Something went wrong: it's not image?")
path = @context.scan_until(/\||\]\]/)
attrs = @context.matched == '|' ? image_attrs : {}
Tree::Image.new(path, **attrs)
end
def image_attrs
nodes = []
loop do
nodes << long_inline(/\||\]\]/)
break if @context.matched == ']]'
end
nodes.map(&method(:image_attr))
.inject(&:merge)
.reject { |_k, v| v.nil? || v.empty? }
end
def image_attr(nodes)
# it's caption, and can have inline markup!
return {caption: ImageCaption.new(nodes)} unless nodes.count == 1 && nodes.first.is_a?(Text)
case (str = nodes.first.text)
when /^(thumb)(?:nail)?$/, /^(frame)(?:d)?$/
{type: Regexp.last_match(1)}
when 'frameless'
{type: str}
when 'border'
{border: str}
when /^(baseline|middle|sub|super|text-top|text-bottom|top|bottom)$/
{alignment: str}
when /^(\d*)(?:x(\d+))?px$/
{width: Regexp.last_match(1), height: Regexp.last_match(2)}
when /^link=(.*)$/i
{link: Regexp.last_match(1)}
when /^alt=(.*)$/i
{alt: Regexp.last_match(1)}
else # text-only caption
{caption: ImageCaption.new(nodes)}
end
end
end
end
end
================================================
FILE: lib/infoboxer/parser/inline.rb
================================================
# frozen_string_literal: true
module Infoboxer
class Parser
module Inline
include Tree
def inline(until_pattern = nil)
start = @context.lineno
nodes = Nodes[]
guarded_loop do
chunk = @context.scan_until(re.inline_until_cache[until_pattern])
nodes << chunk
break if @context.matched_inline?(until_pattern)
nodes << inline_formatting(@context.matched) unless @context.matched.empty?
if @context.eof?
break unless until_pattern
@context.fail!("#{until_pattern.source} not found, starting from #{start}")
end
if @context.eol?
nodes << "\n"
@context.next!
end
end
nodes
end
def short_inline(until_pattern = nil)
nodes = Nodes[]
guarded_loop do
# FIXME: quick and UGLY IS HELL JUST TRYING TO MAKE THE SHIT WORK
chunk =
if @context.inline_eol_sign == /^\]/ # rubocop:disable Style/CaseLikeIf
@context.scan_until(re.short_inline_until_cache_brackets[until_pattern])
elsif @context.inline_eol_sign == /^\]\]/
@context.scan_until(re.short_inline_until_cache_brackets2[until_pattern])
else
@context.scan_until(re.short_inline_until_cache[until_pattern])
end
nodes << chunk
break if @context.matched_inline?(until_pattern)
nodes << inline_formatting(@context.matched)
break if @context.inline_eol?(until_pattern)
end
nodes
end
def long_inline(until_pattern = nil)
nodes = Nodes[]
guarded_loop do
chunk = @context.scan_until(re.inline_until_cache[until_pattern])
nodes << chunk
break if @context.matched?(until_pattern)
nodes << inline_formatting(@context.matched) unless @context.matched.empty?
if @context.eof?
break unless until_pattern
@context.fail!("#{until_pattern.source} not found")
end
if @context.eol?
@context.next!
paragraphs(until_pattern).each do |p|
nodes << p
end
break
end
end
nodes
end
private
def inline_formatting(match) # rubocop:disable Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/AbcSize
case match
when "'''''"
BoldItalic.new(short_inline(/'''''/))
when "'''"
Bold.new(short_inline(/'''/))
when "''"
Italic.new(short_inline(/''/))
when '[['
if @context.check(re.file_namespace)
image
else
wikilink
end
when /\[(.+)/
external_link(Regexp.last_match(1))
when '{{'
template
when /<nowiki([^>]*)>/
nowiki(Regexp.last_match(1))
when %r{<ref([^>]*)/>}
reference(Regexp.last_match(1), true)
when /<ref([^>]*)>/
reference(Regexp.last_match(1))
when /<math>/
math
when /<gallery([^>]*)>/
gallery(Regexp.last_match(1))
when '<'
html || Text.new(match) # it was not HTML, just accidental <
else
match # FIXME: TEMP
end
end
# http://en.wikipedia.org/wiki/Help:Link#Wikilinks
# [[abc]]
# [[a|b]]
def wikilink
link = @context.scan_continued_until(/\||\]\]/)
if @context.matched == '|'
@context.push_eol_sign(/^\]\]/)
caption = inline(/\]\]/)
@context.pop_eol_sign
end
name, namespace = link.split(':', 2).reverse
lnk, params =
if @context.traits.namespace?(namespace)
[link, {namespace: namespace}]
elsif @context.traits.interwiki?(namespace)
[name, {interwiki: namespace}]
else
[link, {}]
end
puts @context.rest if lnk.nil?
Wikilink.new(lnk, caption, **params)
end
# http://en.wikipedia.org/wiki/Help:Link#External_links
# [http://www.example.org]
# [http://www.example.org link name]
def external_link(protocol)
link = @context.scan_continued_until(/\s+|\]/)
if @context.matched =~ /\s+/
@context.push_eol_sign(/^\]/)
caption = short_inline(/\]/)
@context.pop_eol_sign
end
ExternalLink.new(protocol + link, caption)
end
def reference(param_str, closed = false)
children = closed ? Nodes[] : long_inline(%r{</ref>})
Ref.new(children, **parse_params(param_str))
end
def math
Math.new(@context.scan_continued_until(%r{</math>}))
end
def nowiki(tag_rest)
if tag_rest.end_with?('/')
Text.new('')
else
Text.new(@context.scan_continued_until(%r{</nowiki>}))
end
end
def gallery(tag_rest)
params = parse_params(tag_rest)
images = []
guarded_loop do
@context.next! if @context.eol?
path = @context.scan_until(%r{</gallery>|\||$})
attrs = @context.matched == '|' ? gallery_image_attrs : {}
unless path.empty?
# FIXME: what if path NOT matches the namespace?
images << Tree::Image.new(path.sub(/^#{re.file_namespace.source}/i, ''), **attrs)
end
break if @context.matched == '</gallery>'
end
Gallery.new(images, **params)
end
def gallery_image_attrs
nodes = []
guarded_loop do
nodes << short_inline(%r{\||</gallery>})
break if @context.eol? || @context.matched?(%r{</gallery>})
end
nodes.map(&method(:image_attr))
.inject(&:merge)
.reject { |_k, v| v.nil? || v.empty? }
end
end
require_relative 'image'
require_relative 'html'
require_relative 'template'
include Infoboxer::Parser::Image
include Infoboxer::Parser::HTML
include Infoboxer::Parser::Template
end
end
================================================
FILE: lib/infoboxer/parser/paragraphs.rb
================================================
# frozen_string_literal: true
module Infoboxer
class Parser
module Paragraphs
include Tree
def paragraphs(until_pattern = nil)
nodes = Nodes[]
until @context.eof?
nodes << paragraph(until_pattern)
break if until_pattern && @context.matched?(until_pattern)
@context.next!
end
nodes
end
private
def paragraph(until_pattern)
case @context.current
when /^(?<level>={2,})\s*(?<text>.+?)\s*\k<level>$/
heading(Regexp.last_match[:text], Regexp.last_match[:level])
when /^\s*{\|/
table
when /^[*\#:;]./
list(until_pattern)
when /^-{4,}/
HR.new
when /^\s*$/
# will, when merged, close previous paragraph or add spaces to <pre>
EmptyParagraph.new(@context.current)
when /^ (?!\s*{{)/ # Lookahead, because spaces before template are ignored
pre(until_pattern)
else
Paragraph.new(short_inline(until_pattern))
end
end
def heading(text, level)
Heading.new(Parser.inline(text), level.length)
end
# http://en.wikipedia.org/wiki/Help:List
def list(until_pattern)
marker = @context.scan(/^([*\#:;]+)\s*/).strip
List.construct(marker.chars.to_a, short_inline(until_pattern))
end
# FIXME: in fact, there's some formatting, that should work inside pre
def pre(until_pattern)
@context.skip(/^ /)
str = until_pattern ? @context.scan_until(/(#{until_pattern}|$)/) : @context.current
Pre.new(Nodes[Text.new(str)])
end
require_relative 'table'
include Parser::Table
end
end
end
================================================
FILE: lib/infoboxer/parser/table.rb
================================================
# frozen_string_literal: true
module Infoboxer
class Parser
# http://en.wikipedia.org/wiki/Help:Table
module Table
include Tree
def table
@context.current =~ /^\s*{\|/ or
@context.fail!('Something went wrong: trying to parse not a table')
log 'Starting to parse table'
prms = table_params
log "Table params found #{prms}"
table = Tree::Table.new(Nodes[], **prms)
@context.next!
guarded_loop do
table_next_line(table) or break
log 'Next table row'
@context.next!
end
# FIXME: not the most elegant way, huh?
table.children.reject! { |r| r.children.empty? }
table
end
def table_params
@context.skip(/\s*{\|/)
parse_params(@context.rest)
end
def table_next_line(table)
case @context.current
when /^\s*\|}(.*)$/ # table end
@context.scan(/^\s*\|}/)
return false
when /^\s*!/ # heading (th) in a row
table_cells(table, TableHeading)
when /^\s*\|\+/ # caption
table_caption(table)
when /^\s*\|-(.*)$/ # row start
table_row(table, Regexp.last_match(1))
when /^\s*\|/ # cell in row
table_cells(table)
when /^\s*{{/ # template can be at row level
table_template(table)
when nil
return false
when /^(?<level>={2,})\s*(?<text>.+?)\s*\k<level>$/ # heading implicitly closes the table
@context.prev!
return false
else
return table_cell_cont(table)
end
true # should continue parsing
end
def table_row(table, param_str)
log 'Table row found'
table.push_children(TableRow.new(Nodes[], **parse_params(param_str)))
end
def table_caption(table)
log 'Table caption found'
@context.skip(/^\s*\|\+\s*/)
params = if @context.check(/[^|{\[]+\|([^|]|$)/)
parse_params(@context.scan_until(/\|/))
else
{}
end
children = inline(/^\s*([|!]|{\|)/)
if @context.matched
@context.unscan_matched!
@context.prev! # compensate next! which will be done in table()
end
table.push_children(TableCaption.new(children.strip, **params))
end
def table_cells(table, cell_class = TableCell)
log 'Table cells found'
table.push_children(TableRow.new) unless table.children.last.is_a?(TableRow)
row = table.children.last
@context.skip(/\s*[!|]\s*/)
guarded_loop do
params = if @context.check(/[^|{\[]+\|([^|]|$)/)
parse_params(@context.scan_until(/\|/))
else
{}
end
content = short_inline(/(\|\||!!)/)
row.push_children(cell_class.new(content, **params))
break if @context.eol?
end
end
def table_template(table)
contents = paragraph(/^\s*([|!]|{\|)/).to_templates?
# Note: in fact, without full template parsing, we CAN'T know what level to insert it:
# Template can be something like <tr><td>Foo</td></tr>
# But for consistency, we insert all templates inside the <td>, forcing this <td>
# to exist.
table.push_children(TableRow.new) unless table.children.last.is_a?(TableRow)
row = table.children.last
row.push_children(TableCell.new) unless row.children.last.is_a?(BaseCell)
cell = row.children.last
cell.push_children(*contents)
end
# Good news, everyone! Table can be IMPLICITLY closed when it's
# not "cell" context.
#
# Unless it's empty row, which is just skipped.
def table_cell_cont(table)
container = case (last = table.children.last)
when TableRow
last.children.last
when TableCaption
last
else
nil
end
unless container
# return "table not continued" unless row is empty
return true if @context.current.empty?
@context.prev!
return false
end
container.push_children(paragraph(/^\s*([|!]|{\|)/))
table.push_children(container) unless container.parent
true
end
end
end
end
================================================
FILE: lib/infoboxer/parser/template.rb
================================================
# frozen_string_literal: true
module Infoboxer
class Parser
module Template
include Tree
# NB: here we are not distingish templates like `{{Infobox|variable}}`
# and "magic words" like `{{formatnum:123}}`
# Just calling all of them "templates". This behaviour will change
# in future, I presume
# More about magic words: https://www.mediawiki.org/wiki/Help:Magic_words
def template
name = @context.scan_continued_until(/\||:|}}/) or
@context.fail!('Template name not found')
log "Parsing template #{name}"
name.strip!
vars = @context.eat_matched?('}}') ? Nodes[] : template_vars
@context.traits.templates.find(name).new(name, vars)
end
def template_vars
log 'Parsing template variables'
num = 1
res = Nodes[]
guarded_loop do
@context.next! while @context.eol?
if @context.check(/\s*([^=}|<]+)\s*=\s*/)
name = @context.scan(/\s*([^=]+)/).strip
@context.skip(/\s*=\s*/)
else
name = num
num += 1
end
log "Variable #{name} found"
value = sanitize_value(long_inline(/\||}}/))
# it was just empty line otherwise
res << Var.new(name.to_s, value) unless value.empty? && name.is_a?(Numeric)
log 'Variable value found'
break if @context.eat_matched?('}}')
@context.eof? and @context.fail!("Unexpected break of template variables: #{res}")
end
res
end
def sanitize_value(nodes)
nodes.pop if (nodes.last.is_a?(Pre) || nodes.last.is_a?(Text)) && nodes.last.text =~ /^\s*$/ # FIXME: dirty!
nodes
end
end
end
end
================================================
FILE: lib/infoboxer/parser/util.rb
================================================
# frozen_string_literal: true
module Infoboxer
class Parser
module Util
attr_reader :re
FORMATTING = %r((
'''''|'''|'' | # bold, italic, bold italic
\[\[ | # link
{{ | # template
\[[a-z]+:// | # external link
<nowiki[^>]*> | # nowiki
<ref[^>]*> | # reference
<gallery[^>]*>| # gallery
<math> | # math
< # HTML tag
))x
INLINE_EOL = %r[(?= # if we have ahead... (not scanned, just checked
</ref> | # <ref> closed
}}
)]x
INLINE_EOL_BRACK = %r[(?= # if we have ahead... (not scanned, just checked
</ref> | # <ref> closed
}} | # or template closed
(?<!\])\](?!\]) # or ext.link closed,
# the madness with look-ahead/behind means
# "match single bracket but not double"
)]x
# FIXME: ok, NOW it's officially ridiculous
INLINE_EOL_BRACK2 = %r[(?= # if we have ahead... (not scanned, just checked
</ref> | # <ref> closed
}} | # or template closed
\]\] # or int.link closed
)]x
def make_regexps
{
file_namespace: /(#{@context.traits.file_namespace.join('|')}):/,
formatting: FORMATTING,
inline_until_cache: Hash.new { |h, r|
h[r] = Regexp.union(*[r, FORMATTING, /$/].compact.uniq)
},
short_inline_until_cache: Hash.new { |h, r|
h[r] = Regexp.union(*[r, INLINE_EOL, FORMATTING, /$/].compact.uniq)
},
short_inline_until_cache_brackets: Hash.new { |h, r|
h[r] = Regexp.union(*[r, INLINE_EOL_BRACK, FORMATTING, /$/].compact.uniq)
},
short_inline_until_cache_brackets2: Hash.new { |h, r|
h[r] = Regexp.union(*[r, INLINE_EOL_BRACK2, FORMATTING, /$/].compact.uniq)
}
}
end
def parse_params(str)
return {} unless str
scan = StringScanner.new(str)
params = {}
loop do
scan.skip(/\s*/)
name = scan.scan(/[^ \t=]+/) or break
scan.skip(/\s*/)
if scan.peek(1) == '='
scan.skip(/=\s*/)
q = scan.scan(/['"]/)
value = q ? scan.scan_until(/#{q}|$/).sub(q, '') : scan.scan_until(/\s|$/)
params[name.to_sym] = value
else
params[name.to_sym] = name
end
end
params
end
def guarded_loop
loop do
pos_before = @context.lineno, @context.colno
yield
pos_after = @context.lineno, @context.colno
pos_after == pos_before and
@context.fail!("Infinite loop on position #{pos_after.last}")
end
end
end
end
end
================================================
FILE: lib/infoboxer/parser.rb
================================================
# frozen_string_literal: true
require 'ostruct'
require 'logger'
module Infoboxer
class Parser
class ParsingError < RuntimeError
end
class << self
def inline(text, traits = nil)
new(context(text, traits)).inline
end
def paragraphs(text, traits = nil)
new(context(text, traits)).paragraphs
end
def paragraph(text, traits = nil)
paragraphs(text, traits).first
end
def document(text, traits = nil)
Tree::Document.new(paragraphs(text, traits))
end
def fragment(text, traits = nil)
new(context(text, traits)).long_inline
end
private
def context(text, traits)
Context.new(text, coerce_traits(traits))
end
def coerce_traits(traits)
case traits
when nil
MediaWiki::Traits.default
when Hash
MediaWiki::Traits.new(traits)
when MediaWiki::Traits
traits
else
fail(ArgumentError, "Can't coerce site traits from #{traits.inspect}")
end
end
end
include Tree
def initialize(context)
@context = context
@re = OpenStruct.new(make_regexps)
@logger = Logger.new($stdout).tap { |l| l.level = Logger::FATAL }
end
require_relative 'parser/inline'
include Parser::Inline
require_relative 'parser/paragraphs'
include Parser::Paragraphs
private
require_relative 'parser/util'
include Parser::Util
def log(msg)
@logger.info "#{msg} | #{@context.lineno}:#{@context.colno}: #{@context.current}"
end
end
end
require_relative 'parser/context'
================================================
FILE: lib/infoboxer/templates/base.rb
================================================
# frozen_string_literal: true
module Infoboxer
module Templates
class Base < Tree::Template
include Tree
class << self
attr_accessor :template_name, :template_options
def inspect
template_name ? "Infoboxer::Templates::#{clean_name}" : super
end
def clean_name
template_name ? "Template[#{template_name}]" : 'Template'
end
end
def ==(other)
other.is_a?(Tree::Template) && _eq(other)
end
protected
def clean_class
if self.class.template_name == name
self.class.clean_name
else
super
end
end
end
# Renders all of its unnamed variables as space-separated text
# Also allows in-template navigation.
#
# Used for {Set} definitions.
class Show < Base
def text
unnamed_variables.map(&:text).join(children_separator)
end
private
def children_separator
' '
end
end
# Replaces template with replacement, while rendering.
#
# Used for {Set} definitions.
class Replace < Base
def replace
fail(NotImplementedError, 'Descendants should define :replace')
end
def text
replace
end
end
# Replaces template with its name, while rendering.
#
# Used for {Set} definitions.
class Literal < Base
alias_method :text, :name
end
end
end
================================================
FILE: lib/infoboxer/templates/set.rb
================================================
# frozen_string_literal: true
module Infoboxer
module Templates
# Base class for defining set of templates, used for some site/domain.
#
# Currently only can be plugged in via {MediaWiki::Traits.templates}.
#
# Template set defines a DSL for creating new template definitions --
# also simplest ones and very complicated.
#
# You can look at implementation of English Wikipedia
# [common templates set](https://github.com/molybdenum-99/infoboxer/blob/master/lib/infoboxer/definitions/en.wikipedia.org.rb)
# in Infoboxer's repo.
#
class Set
def initialize(&definitions)
@templates = []
define(&definitions) if definitions
end
# @private
def find(name)
_, template = @templates.detect { |m, _t| m === name.downcase }
template || Base
end
# @private
def define(&definitions)
instance_eval(&definitions)
end
# @private
def clear
@templates.clear
end
# Most common form of template definition.
#
# Can be used like:
#
# ```ruby
# template 'Age' do
# def from
# fetch_date('1', '2', '3')
# end
#
# def to
# fetch_date('4', '5', '6') || Date.today
# end
#
# def value
# (to - from).to_i / 365 # FIXME: obviously
# end
#
# def text
# "#{value} years"
# end
# end
# ```
#
# @param name Definition name.
# @param options Definition options.
# Currently recognized options are:
# * `:match` -- regexp or string, which matches template name to
# add this definition to (if not provided, `name` param used
# to match relevant templates);
# * `:base` -- name of template definition to use as a base class;
# for example you can do things like:
#
# ```ruby
# # ...inside template set definition...
# template 'Infobox', match: /^Infobox/ do
# # implementation
# end
#
# template 'Infobox cheese', base: 'Infobox' do
# end
# ```
#
# Expected to be used inside Set definition block.
def template(name, options = {}, &definition)
setup_class(name, Base, options, &definition)
end
# Define list of "replacements": templates, which text should be replaced
# with arbitrary value.
#
# Example:
#
# ```ruby
# # ...inside template set definition...
# replace(
# '!!' => '||',
# '!(' => '['
# )
# ```
# Now, all templates with name `!!` will render as `||` when you
# call their (or their parents') {Tree::Node#text}.
#
# Expected to be used inside Set definition block.
def replace(*replacements)
case
when replacements.count == 2 && replacements.all? { |r| r.is_a?(String) }
name, what = *replacements
setup_class(name, Replace) do
define_method(:replace) do
what
end
end
when replacements.count == 1 && replacements.first.is_a?(Hash)
replacements.first.each do |nm, rep|
replace(nm, rep)
end
else
fail(ArgumentError, "Can't call :replace with #{replacements.join(', ')}")
end
end
# Define list of "show children" templates. Those ones, when rendered
# as text, just provide join of their children text (space-separated).
#
# Example:
#
# ```ruby
# #...in template set definition...
# show 'Small'
# ```
# Now, wikitext paragraph looking like...
#
# ```
# This is {{small|text}} in template
# ```
# ...before this template definition had rendered like
# `"This is in template"` (template contents ommitted), and after
# this definition it will render like `"This is text in template"`
# (template contents rendered as is).
#
# Expected to be used inside Set definition block.
def show(*names)
names.each do |name|
setup_class(name, Show)
end
end
# Define list of "literally rendered templates". It means, when
# rendering text, template is replaced with just its name.
#
# Explanation: in
# MediaWiki, there are contexts (deeply in other templates and
# tables), when you can't just type something like `","` and not
# have it interpreted. So, wikis oftenly define wrappers around
# those templates, looking like `{{,}}` -- so, while rendering texts,
# such templates can be replaced with their names.
#
# Expected to be used inside Set definition block.
def literal(*names)
names.each do |name|
setup_class(name, Literal)
end
end
# @private
def setup_class(name, base_class, options = {}, &definition)
match = options.fetch(:match, name.downcase)
base = options.fetch(:base, base_class)
base = find(base) if base.is_a?(String)
Class.new(base, &definition).tap do |cls|
cls.template_name = name
cls.template_options = options
@templates.unshift [match, cls]
end
end
end
end
end
================================================
FILE: lib/infoboxer/templates.rb
================================================
# frozen_string_literal: true
module Infoboxer
# This module covers advanced MediaWiki templates usage.
#
# It is seriously adviced to read [Wikipedia docs](https://en.wikipedia.org/wiki/Help:Template)
# or at least look through it (and have it opened while reading further).
#
# If you just have a page with templates and want some variable value
# (like "page about country - infobox - total population"), you should
# be totally happy with {Tree::Template} and its features.
#
# What this module does is, basically, two things:
# * allow you to define for arbitrary templates how they are converted
# to text; by default, templates are totally excluded from text, which
# is not most reasonable behavior for many formatting templates;
# * allow you to define additional functionality for arbitrary templates;
# many of them containing pretty complicated logic (see, for ex.,
# [Template:Convert](https://en.wikipedia.org/wiki/Template:Convert)),
# and it seems reasonable to extend instances of such a template.
#
# Infoboxer allows you to define {Templates::Set} of template-specific
# classes for some site/domain.
# There is already defined set of most commonly used templates at
# en.wikipedia.org (so, most of English Wikipedia texts will be rendered
# correctly, and also some advanced functionality is provided).
# You can take a look at
# [lib/infoboxer/definitions/en.wikipedia.org.rb](https://github.com/molybdenum-99/infoboxer/blob/master/lib/infoboxer/definitions/en.wikipedia.org.rb)
# to feel it (and also see a couple of TODOs and FIXMEs and other
# considerations).
#
# From Infoboxer's point-of-view, templates are the most complex part
# of Wikipedia, and we are currently trying hard to do the most reasonable
# things about them.
#
# Future versions also should:
# * define more of common English Wikipedia templates;
# * define templates for other popular wikis;
# * allow to add template definitions on-the-fly, while loading some
# page.
#
module Templates
%w[base set].each do |tmpl|
require_relative "templates/#{tmpl}"
end
end
end
================================================
FILE: lib/infoboxer/tree/compound.rb
================================================
# frozen_string_literal: true
module Infoboxer
module Tree
# Base class for all nodes with children.
class Compound < Node
def initialize(children = Nodes.new, **params)
super(**params)
@children = Nodes[*children]
@children.each { |c| c.parent = self }
end
# List of children
#
# @return {Nodes}
attr_reader :children
# Index of provided node in children list
#
# @return [Fixnum] or `nil` if not a child
def index_of(child)
children.index(child)
end
# @private
# Internal, used by {Parser}
def push_children(*nodes)
nodes.each { |c| c.parent = self }.each do |n|
@children << n
end
end
# See {Node#text}
def text
children.map(&:text).join(children_separator)
end
# See {Node#to_tree}
def to_tree(level = 0)
if children.count == 1 && children.first.is_a?(Text)
"#{indent(level)}#{children.first.text} <#{descr}>\n"
else
"#{indent(level)}<#{descr}>\n" + children.map { |c| c.to_tree(level + 1) }.join
end
end
# Kinda "private" methods, used by Parser only -------------------
# @private
# Internal, used by {Parser}
def can_merge?(_other)
false
end
# @private
# Internal, used by {Parser}
def closed!
@closed = true
end
# @private
# Internal, used by {Parser}
def closed?
@closed
end
# @private
# Internal, used by {Parser}
def empty?
children.empty?
end
protected
def children_separator
''
end
private
def _eq(other)
children == other.children
end
end
end
end
================================================
FILE: lib/infoboxer/tree/document.rb
================================================
# frozen_string_literal: true
module Infoboxer
module Tree
# Represents entire document.
#
# Alongside with standard compound node functionality, is a
# {Navigation::Sections::Container}
class Document < Compound
end
end
end
================================================
FILE: lib/infoboxer/tree/gallery.rb
================================================
# frozen_string_literal: true
module Infoboxer
module Tree
# Represents gallery of images (contents of `<gallery>` special tag).
#
# See [Wikipedia Tutorial](https://en.wikipedia.org/wiki/Help:Gallery_tag)
# for explanation of attributes.
class Gallery < Compound
end
end
end
================================================
FILE: lib/infoboxer/tree/html.rb
================================================
# frozen_string_literal: true
module Infoboxer
module Tree
module HTMLTagCommons
BLOCK_TAGS = %w[div p br].freeze # FIXME: are some other used in WP?
def text
super + (BLOCK_TAGS.include?(tag) ? "\n" : '')
end
end
# Represents HTML tag, surrounding some contents.
class HTMLTag < Compound
def initialize(tag, attrs, children = Nodes.new)
super(children, **attrs)
@tag = tag
end
attr_reader :tag
alias_method :attrs, :params
include HTMLTagCommons
# @private
# Internal, used by {Parser}.
def empty?
# even empty tag, for ex., <br>, should not be dropped!
false
end
private
def descr
"#{clean_class}:#{tag}(#{show_params})"
end
end
# Represents orphan opening HTML tag.
#
# NB: Infoboxer not tries to parse entire structure of HTML-heavy
# MediaWiki articles. So, if you have `<div>` at line 150 and closing
# `</div>` at line 875, there would be orphane `HTMLOpeningTag` and
# {HTMLClosingTag}. It is not always convenient, but reasonable enough.
#
class HTMLOpeningTag < Node
def initialize(tag, attrs)
super(**attrs)
@tag = tag
end
attr_reader :tag
alias_method :attrs, :params
include HTMLTagCommons
private
def descr
"#{clean_class}:#{tag}(#{show_params})"
end
end
# Represents orphan closing HTML tag. See {HTMLOpeningTag} for
# explanation.
class HTMLClosingTag < Node
def initialize(tag)
@tag = tag
end
attr_reader :tag
def descr
"#{clean_class}:#{tag}"
end
end
end
end
================================================
FILE: lib/infoboxer/tree/image.rb
================================================
# frozen_string_literal: true
module Infoboxer
module Tree
# Represents image (or other media file).
#
# See [Wikipedia Tutorial](https://en.wikipedia.org/wiki/Wikipedia:Extended_image_syntax)
# for explanation of attributes.
class Image < Node
def initialize(path, caption: nil, **params)
@caption = caption
super(path: path, **params)
end
# Image caption. Can have (sometimes many) other nodes inside.
#
# @return [Nodes]
attr_reader :caption
# @!attribute [r] path
# @!attribute [r] type
# @!attribute [r] location
# @!attribute [r] alignment
# @!attribute [r] link
# @!attribute [r] alt
def_readers :path, :type,
:location, :alignment, :link,
:alt
def border?
!params[:border].to_s.empty?
end
def width
params[:width].to_i
end
def height
params[:height].to_i
end
def to_tree(level = 0)
super(level) +
if caption && !caption.empty?
indent(level + 1) + "caption:\n" +
caption.children.map { |c| c.to_tree(level + 2) }.join
else
''
end
end
private
def _eq(other)
path == other.path
end
end
# Represents image caption.
class ImageCaption < Compound
end
end
end
================================================
FILE: lib/infoboxer/tree/inline.rb
================================================
# frozen_string_literal: true
module Infoboxer
module Tree
# Represents italic text.
class Italic < Compound
end
# Represents bold text.
class Bold < Compound
end
# Represents bold italic text (and no, it's not a comb of bold+italic,
# from Wikipedia's markup point of view).
class BoldItalic < Compound
end
# Base class for internal/external links,
class Link < Compound
def initialize(link, label = nil, **attr)
super(label || Nodes.new([Text.new(link)]), link: link, **attr)
end
# @!attribute [r] link
def_readers :link
end
# External link. Has other nodes as a contents, and, err, link (url).
class ExternalLink < Link
# @!attribute [r] url
# synonym for `#link`
alias_method :url, :link
end
end
end
require_relative 'wikilink'
================================================
FILE: lib/infoboxer/tree/linkable.rb
================================================
# frozen_string_literal: true
module Infoboxer
module Tree
# Module included into everything, that can be treated as
# link to some MediaWiki page, despite of behavior. Namely,
# {Wikilink} and {Template}.
module Linkable
# Extracts wiki page by this link and returns it parsed (or nil,
# if page not found).
#
# About template "following" see also {Template#follow} docs.
#
# @return {MediaWiki::Page}
#
# **See also**:
# * {Tree::Nodes#follow} for extracting multiple links at once;
# * {MediaWiki#get} for basic information on page extraction.
def follow
client.get(link, interwiki: interwiki)
end
# Human-readable page URL
#
# @return [String]
def url
# FIXME: fragile as hell.
page.url.sub(%r{[^/]+$}, link.tr(' ', '_'))
end
protected
# redefined in {Wikilink}
def interwiki; end
def page
lookup_parents(MediaWiki::Page).first or fail('Not in a page from real source')
end
def client
page.client or fail('MediaWiki client not set')
end
end
end
end
================================================
FILE: lib/infoboxer/tree/list.rb
================================================
# frozen_string_literal: true
module Infoboxer
module Tree
# Represents item of ordered or unordered list.
class ListItem < BaseParagraph
# @private
# Internal, used by {Parser}
def can_merge?(other)
other.class == self.class &&
other.children.first.is_a?(List)
end
# @private
# Internal, used by {Parser}
def merge!(other)
ochildren = other.children.dup
children.last.merge!(ochildren.shift) \
if children.last&.can_merge?(ochildren.first)
push_children(*ochildren)
end
def text
make_marker +
if children.last.is_a?(List)
children[0..-2].map(&:text).join + "\n" + children.last.text
else
children.map(&:text).join + "\n"
end
end
private
def make_marker
parent ? parent.make_marker(self) : '* '
end
end
# "Imaginary" node, grouping {ListItem}s of same level and type.
#
# Base for concrete {OrderedList}, {UnorderedList} and {DefinitionList}.
#
# NB: Nested lists are represented by structures like:
#
# ```
# <OrderedList>
# <ListItem>
# <ListItem>
# <Text>
# <UnorderedList>
# <ListItem>
# <ListItem>
# ...and so on
# ```
class List < Compound
def list_level
lookup_parents(List).count
end
def list_text_indent
' ' * list_level
end
def text
if list_level.zero?
super.sub(/\n+\Z/, "\n\n")
else
super.sub(/\n+\Z/, "\n")
end
end
end
# Represents unordered list (list with markers).
class UnorderedList < List
def make_marker(_item)
list_text_indent + '* '
end
end
# Represents ordered list (list with numbers).
class OrderedList < List
def make_marker(item)
list_text_indent + "#{item.index + 1}. "
end
end
# Represents definitions list (`term: definition` structure),
# consists of {DTerm}s and {DDefinition}s.
#
# NB: In fact, at least in English Wikipedia, orphan "definition terms"
# are used as a low-level headers, especially in lists of links/references.
class DefinitionList < List
def make_marker(item)
case item
when DTerm
list_text_indent
when DDefinition
list_text_indent + ' '
end
end
end
# Term in {DefinitionList}
class DTerm < ListItem
def text
super.sub("\n", ":\n")
end
end
# Term definition in {DefinitionList}
class DDefinition < ListItem
end
class List < Compound
include Mergeable
# @private
# Internal, used by {Parser}
def merge!(other)
ochildren = other.children.dup
if children.last && ochildren.first &&
children.last.can_merge?(ochildren.first)
children.last.merge!(ochildren.shift)
end
push_children(*ochildren)
end
# @private
# Internal, used by {Parser}
def self.construct(marker, nodes)
m = marker.shift
klass = LISTS[m] or
fail("Something went wrong: undefined list marker type #{m}")
item_klass = ITEMS[m]
if marker.empty?
klass.new(item_klass.new(nodes))
else
klass.new(item_klass.new(construct(marker, nodes)))
end
end
# @private
LISTS = {
';' => DefinitionList,
':' => DefinitionList,
'*' => UnorderedList,
'#' => OrderedList
}.freeze
# @private
ITEMS = {
';' => DTerm,
':' => DDefinition,
'*' => ListItem,
'#' => ListItem
}.freeze
end
end
end
================================================
FILE: lib/infoboxer/tree/math.rb
================================================
# frozen_string_literal: true
module Infoboxer
module Tree
# Represents node of math formulae marked with TeX
#
# See also: https://en.wikipedia.org/wiki/Help:Displaying_a_formula
class Math < Text
end
end
end
================================================
FILE: lib/infoboxer/tree/node.rb
================================================
# frozen_string_literal: true
require 'htmlentities'
module Infoboxer
module Tree
# This is the base class for all parse tree nodes.
#
# Basically, you'll
# never create instances of this class or its descendants by yourself,
# you will receive it from tree and use for navigations.
#
class Node
def initialize(**params)
@params = params
end
# Hash of node "params".
#
# Params notin is roughly the same as tag attributes in HTML. This
# is actual for complex nodes like images, tables, raw HTML tags and
# so on.
#
# The most actual params are typically exposed by node as instance
# methods (like {Heading#level}).
#
# @return [Hash]
attr_reader :params
# Node's parent in tree
# @return {Node}
attr_accessor :parent
def ==(other)
self.class == other.class && _eq(other)
end
# Position in parent's children array (zero-based)
def index
parent ? parent.index_of(self) : 0
end
def first?
index.zero?
end
# List of all sibling nodes (children of same parent)
def siblings
parent ? parent.children - [self] : Nodes[]
end
# List of siblings before this one
def prev_siblings
siblings.select { |n| n.index < index }
end
# List of siblings after this one
def next_siblings
siblings.select { |n| n.index > index }
end
# Node children list
def children
Nodes[] # redefined in descendants
end
# @private
# Used only during tree construction in {Parser}.
def can_merge?(_other)
false
end
# @private
# Whether node is empty (definition of "empty" varies for different
# kinds of nodes). Used mainly in {Parser}.
def empty?
false
end
# Textual representation of this node and its children, ready for
# pretty-printing. Use it like this:
#
# ```ruby
# puts page.lookup(:Paragraph).first.to_tree
# # Prints something like
# # <Paragraph>
# # This <Italic>
# # is <Text>
# # <Wikilink(link: "Argentina")>
# # pretty <Italic>
# # complicated <Text>
# ```
#
# Useful for understanding page structure, and Infoboxer's representation
# of this structure
def to_tree(level = 0)
indent(level) + "<#{descr}>\n"
end
def inspect
text.empty? ? "#<#{descr}>" : "#<#{descr}: #{shorten_text}>"
end
# Node text representation. It is defined for all nodes so, that
# entire `Document#text` produce readable text-only representation
# of Wiki page. Therefore, rules are those:
# * inline-formatting nodes (text, bold, italics) just return the
# text;
# * paragraph-level nodes (headings, paragraphs, lists) add `"\n\n"`
# after text;
# * list items add marker before text;
# * nodes, not belonging to "main" text flow (references, templates)
# produce empty text.
#
# If you want just the text of some heading or list item (without
# "formatting" quircks), you can use {Node#text_} method.
#
def text
'' # redefined in descendants
end
# "Clean" version of node text: without trailing linefeeds, list
# markers and other things added for formatting.
#
def text_
text.strip
end
# See {Node#text_}
def to_s
# just aliases will not work when #text will be redefined in subclasses
text_
end
private
MAX_CHARS = 30
def shorten_text
txt = text_.sub(/^([^\n]+)\n.+$/m, '\1...')
txt.length > MAX_CHARS ? txt[0..MAX_CHARS] + '...' : txt
end
def clean_class
self.class.name.sub(/^.*::/, '')
end
def descr
if !params || params.empty?
clean_class.to_s
else
"#{clean_class}(#{show_params})"
end
end
def show_params(prms = nil)
(prms || params).compact.map { |k, v| "#{k}: #{v.inspect}" }.join(', ')
end
def indent(level)
' ' * level
end
def _eq(_other)
false
end
def decode(str)
Node.coder.decode(str)
end
class << self
# Internal: descendandts DSL
def def_readers(*keys)
keys.each do |k|
define_method(k) { params[k] }
end
end
# Internal: HTML entities decoder.
def coder
@coder ||= HTMLEntities.new
end
end
end
end
end
================================================
FILE: lib/infoboxer/tree/nodes.rb
================================================
# frozen_string_literal: true
module Infoboxer
module Tree
# List of nodes, which tries to be useful both as array, and as proxy
# to its contents.
#
# Many of Infoboxer's methods (especially {Navigation}'s) return
# `Nodes`, and in most cases you don't have to think about it. Same
# approach can be seen in jQuery or Nokogiri. You just do things
# like those:
#
# ```ruby
# document.sections. # => Nodes returned,
# select{|section| # you can treat them as array, but also...
# section.text.length > 1000 #
# }. #
# lookup(:Wikilink, text: /Chile/). # ...use Infoboxer's methods
# follow. # ...even to receive lists of other pages
# infoboxes. # ...and use methods on them
# fetch('leader_name1'). # ...including those which only some node types support
# map(&:text) # ...and still have full-functioning Array
# ```
#
class Nodes < Array
# @!method select(&block)
# Just like Array#select, but returns Nodes
# @!method reject(&block)
# Just like Array#reject, but returns Nodes
# @!method sort_by(&block)
# Just like Array#sort_by, but returns Nodes
# @!method flatten
# Just like Array#flatten, but returns Nodes
# @!method compact
# Just like Array#compact, but returns Nodes
# @!method grep(pattern)
# Just like Array#grep, but returns Nodes
# @!method grep_v(pattern)
# Just like Array#grep_v, but returns Nodes
# @!method -(other)
# Just like Array#-, but returns Nodes
# @!method +(other)
# Just like Array#+, but returns Nodes
# NB: Since Ruby 3.0, we need to redefine all Enumerable methods (otherwise they return Array).
# TODO: Check those lacking overrides!
%i[
select reject sort_by flatten compact grep grep_v - +
take_while drop_while
].each do |sym|
define_method(sym) do |*args, &block|
Nodes[*super(*args, &block)]
end
end
alias_method :filter, :select
# Just like Array#first, but returns Nodes, if provided with `n` of elements.
def first(n = nil)
if n.nil?
super()
else
Nodes[*super(n)]
end
end
# Just like Array#last, but returns Nodes, if provided with `n` of elements.
def last(n = nil)
if n.nil?
super()
else
Nodes[*super(n)]
end
end
# Just like Array#map, but returns Nodes, **if** all map results are Node
def map
res = super
if res.all? { |n| n.is_a?(Node) || n.is_a?(Nodes) }
Nodes[*res]
else
res
end
end
# Just like Array#flat_map, but returns Nodes, **if** all map results are Node
def flat_map
res = super
if res.all? { |n| n.is_a?(Node) || n.is_a?(Nodes) }
Nodes[*res]
else
res
end
end
# Just like Array#group, but returns hash with `{<grouping variable> => Nodes}`
def group_by
super.transform_values { |group| Nodes[*group] }
end
# @!method prev_siblings
# Previous siblings (flat list) of all nodes inside.
# @!method next_siblings
# Next siblings (flat list) of all nodes inside.
# @!method siblings
# Siblings (flat list) of all nodes inside.
# @!method fetch
# Fetches by name(s) variables for all templates inside.
#
# See {Tree::Template#fetch} for explanation.
%i[
prev_siblings next_siblings siblings
fetch
].each do |sym|
define_method(sym) do |*args|
make_nodes(map { |n| n.send(sym, *args) })
end
end
# By list of variable names, fetches hashes of `{name => value}`
# from all templates inside.
#
# See {Tree::Template#fetch_hash} for explanation.
#
# @return [Array<Hash>]
def fetch_hashes(*args)
map { |t| t.fetch_hash(*args) }
end
# Just join of all {Node#to_tree Node#to_tree} strings inside.
def to_tree
map(&:to_tree).join("\n")
end
def inspect
'[' +
case
when count > MAX_CHILDREN
self[0...MAX_CHILDREN].map(&:inspect).join(', ') +
", ...#{count - MAX_CHILDREN} more nodes"
else
map(&:inspect).join(', ')
end + ']'
end
# Just join of all {Node#text Node#text}s inside.
def text
map(&:text).join
end
alias_method :to_s, :text
def unwrap
map { |n| n.respond_to?(:unwrap) ? n.unwrap : n }
end
# Fetches pages by ALL wikilinks inside in ONE query to MediaWiki
# API.
#
# **NB**: for now, if there's more then 50 wikilinks (limitation for
# one request to API), Infoboxer **will not** try to do next page.
# It will be fixed in next releases.
#
# @return [Nodes<MediaWiki::Page>] It is still `Nodes`, so you
# still can process them uniformely.
def follow
links = grep(Linkable)
return Nodes[] if links.empty?
page = first.lookup_parents(MediaWiki::Page).first or
fail('Not in a page from real source')
page.client or fail('MediaWiki client not set')
pages = links.group_by(&:interwiki)
.flat_map { |iw, ls| page.client.get(*ls.map(&:link), interwiki: iw) }
pages.count == 1 ? pages.first : Nodes[*pages]
end
# @private
# Internal, used by {Parser}
def <<(node) # rubocop:disable Metrics/PerceivedComplexity
if node.is_a?(Array)
node.each { |n| self << n }
elsif last&.can_merge?(node)
last.merge!(node)
else
return if !node || node.empty?
node = Text.new(node) if node.is_a?(String)
super
end
end
# @private
# Internal, used by {Parser}
def strip
res = dup
res.pop while res.last.is_a?(Text) && res.last.raw_text =~ /^\s*$/
res.last.raw_text.sub!(/\s+$/, '') if res.last.is_a?(Text)
res
end
# @private
# Internal, used by {Parser}
def flow_templates
# TODO: will it be better?..
# make_nodes(map { |n| n.is_a?(Paragraph) ? n.to_templates? : n })
self
end
private
# @private For inspect shortening
MAX_CHILDREN = 5
def make_nodes(arr)
Nodes[*arr.flatten]
end
end
end
end
================================================
FILE: lib/infoboxer/tree/paragraphs.rb
================================================
# frozen_string_literal: true
module Infoboxer
module Tree
# Base class for all "paragraph-level" nodes: {Paragraph}, {ListItem},
# {Heading}. It should be convenient to use it in {Navigation::Lookup::Node#_lookup Node#lookup}
# and similar methods like this:
#
# ```ruby
# page.lookup(:BaseParagraph) # => flat list of paragraph-levels
# ```
class BaseParagraph < Compound
def text
super.strip + "\n\n"
end
end
# @private
class EmptyParagraph < Node
def initialize(text)
@text = text
end
# should never be left in nodes flow
def empty?
true
end
attr_reader :text
end
# @private
module Mergeable
def can_merge?(other)
!closed? && self.class == other.class
end
def merge!(other)
if other.is_a?(EmptyParagraph)
@closed = true
else
[splitter, *other.children].each do |c|
c.parent = self
@children << c
end
@closed = other.closed?
end
end
end
# @private
class MergeableParagraph < BaseParagraph
include Mergeable
def can_merge?(other)
!closed? &&
(self.class == other.class || other.is_a?(EmptyParagraph))
end
end
# Represents plain text paragraph.
class Paragraph < MergeableParagraph
# @private
# Internal, used by {Parser} for merging
def splitter
Text.new(' ')
end
# @private
# Internal, used by {Parser}
def templates_only?
children.all? { |c| c.is_a?(Template) || c.is_a?(Text) && c.raw_text.strip.empty? }
end
# @private
# Internal, used by {Parser}
def to_templates
children.grep(Template)
end
# @private
# Internal, used by {Parser}
def to_templates?
templates_only? ? to_templates : self
end
end
# Represents horisontal ruler splitter. Rarely seen in modern wikis.
class HR < Node
end
# Represents heading.
#
# NB: min heading level in MediaWiki is 2, Heading level 1 (page
# title) is not seen in page flaw.
class Heading < BaseParagraph
def initialize(children, level)
super(children, level: level)
end
# @!attribute [r] level
# @return [Fixnum] lesser numbers is more important heading
def_readers :level
end
# Represents preformatted text chunk.
#
# Paragraph-level thing, can contain many lines of text.
class Pre < MergeableParagraph
# @private
# Internal, used by {Parser}
def merge!(other)
if other.is_a?(EmptyParagraph) && !other.text.empty?
@children.last.raw_text << "\n" << other.text.sub(/^ /, '')
else
super
end
end
# @private
# Internal, used by {Parser} for merging
def splitter
Text.new("\n")
end
end
end
end
================================================
FILE: lib/infoboxer/tree/ref.rb
================================================
# frozen_string_literal: true
module Infoboxer
module Tree
# Represents footnote.
#
# Is not rendered in text flow, so, wikitext like
#
# ```
# ...pushed it back into underdevelopment,<ref>...tons of footnote text...</ref> though it
# nevertheless...
# ```
# when parsed and {Node#text} called, will return text like:
#
# ```
# ...pushed it back into underdevelopment, though it nevertheless...
# ```
#
# ...which most times is most reasonable thing to do.
class Ref < Compound
# @!attribute [r] name
def_readers :name
# @private
# Internal, used by {Parser}
def empty?
# even empty tag should not be dropped!
false
end
def text
# because we want "clean" text,
# without references & footnotes messed up in it
''
end
end
end
end
================================================
FILE: lib/infoboxer/tree/table.rb
================================================
# frozen_string_literal: true
require 'terminal-table'
module Infoboxer
module Tree
# Represents table. Tables are complicated!
class Table < Compound
# Internal, used by {Parser}
def empty?
false
end
# All table rows.
def rows
children.grep(TableRow)
end
# Table caption, if exists.
def caption
children.grep(TableCaption).first
end
# For now, returns first table row, if it consists only of
# {TableHeading}s.
#
# FIXME: it can easily be several table heading rows
def heading_row
rows.first if rows.first&.children&.all? { |c| c.is_a?(TableHeading) }
end
# For now, returns all table rows except {#heading_row}
def body_rows
if rows.first&.children&.all? { |c| c.is_a?(TableHeading) }
rows[1..]
else
rows
end
end
def text
Terminal::Table.new.tap { |table|
table.title = caption.text.sub(/\n+\Z/, '') if caption
table.headings = heading_row.children.map(&:text_) if heading_row
table.rows = body_rows.map { |r| r.children.map(&:text_) }
}.to_s + "\n\n"
end
end
# Represents one table row.
class TableRow < Compound
alias_method :cells, :children
def empty?
false
end
end
# Represents any table cell, either {TableCell cell} or
# {TableHeading heading}.
#
# Can be used for lookups (same way as {BaseParagraph}).
class BaseCell < Compound
def empty?
false
end
end
# Represents ordinary table cell (`td` in HTML).
class TableCell < BaseCell
end
# Represents table heading cell (`th` in HTML).
class TableHeading < BaseCell
end
# Represents table caption.
class TableCaption < Compound
end
end
end
================================================
FILE: lib/infoboxer/tree/template.rb
================================================
# frozen_string_literal: true
require_relative 'linkable'
module Infoboxer
module Tree
# Template variable.
#
# It's basically the thing with name and ANY nodes inside, can be
# seen only as a direct child of {Template}.
class Var < Compound
attr_reader :name
def initialize(name, children = Nodes[])
super(children)
@name = name
end
# Internal, used by {Parser}
# Means even children-less Var should not be removed from parser tree.
def empty?
false
end
def named?
name !~ /^\d+$/
end
protected
def descr
"#{clean_class}(#{name})"
end
def _eq(other)
other.name == name && other.children == children
end
end
# Represents MediaWiki **template**.
#
# [**Template**](https://en.wikipedia.org/wiki/Wikipedia:Templates)
# is basically a thing with name, some variables and their
# values. When pages are displayed in browser, templates are rendered in
# something different by wiki engine; yet, when extracting information
# with Infoboxer, you are working with original templates.
#
# It requires some mastering and understanding, yet allows to do
# very poweful things. There are many kinds of them, from pure
# formatting-related (which are typically not more than small bells
# and whistles for page outlook, and should be rendered as a text)
# to very information-heavy ones, like
# [**infoboxes**](https://en.wikipedia.org/wiki/Help:Infobox), from
# which Infoboxer borrows its name!
#
# Basically, for information extraction from template you'll list
# its {#variables}, and then use {#fetch} method
# (and its variants: {#fetch_hash}/#{fetch_date}) to extract their
# values.
#
# ### On variables naming
#
# MediaWiki templates can contain _named_ and _unnamed_ variables.
# Example:
#
# ```
# {{birth date and age|1953|2|19|df=y}}
# ```
#
# This is template with name "birth date and age", three unnamed
# variables with values "1953", "2" and "19", and one named variable
# with name "df" and value "y".
#
# For consistency, Infoboxer treats unnamed variables _exactly_ the
# same way MediaWiki does: they considered to have numeric names,
# which are _started from 1_ and _stored as a strings_. So, for
# template shown above, the following is correct:
#
# ```ruby
# template.fetch('1').text == '1953'
# template.fetch('2').text == '2'
# template.fetch('3').text == '19'
# template.fetch('df').text == 'y'
# ```
#
# Note also, that _named variables with simple text values_ are
# duplicated as a template node {Node#params}, so, the following is
# correct also:
#
# ```ruby
# template.params['df'] == 'y'
# template.params.has_key?('1') == false
# ```
#
# For more advanced topics, like subclassing templates by names and
# converting them to inline text, please read {Templates} module's
# documentation.
class Template < Compound
# Template name, designating its contents structure.
#
# See also {Linkable#url #url}, which you can navigate to read template's
# definition (and, in Wikipedia and many other projects, its
# documentation).
#
# @return [String]
attr_reader :name
# Template variables list.
#
# See {Var} class to understand what you can do with them.
#
# @return [Nodes<Var>]
# attr_reader :variables
alias_method :variables, :children
def initialize(name, variables = Nodes[])
super(variables, **extract_params(variables))
@name = name
end
def text
res = unnamed_variables.map(&:text).join('|')
res.empty? ? '' : "{#{name}:#{res}}"
end
def unwrap
unnamed_variables.flat_map(&:children).unwrap
end
# See {Node#to_tree}
def to_tree(level = 0)
' ' * level + "<#{descr}>\n" +
variables.map { |var| var.to_tree(level + 1) }.join
end
# Represents entire template as hash of `String => String`,
# where keys are variable names and values are text representation
# of variables contents.
#
# @return [Hash{String => String}]
def to_h
variables.map { |var| [var.name, var.text] }.to_h
end
# Returns list of template variables with numeric names (which
# are treated as "unnamed" variables by MediaWiki templates, see
# {Template class docs} for explanation).
#
# @return [Nodes<Var>]
def unnamed_variables
variables.reject(&:named?)
end
def named_variables
variables.select(&:named?)
end
# Fetches template variable(s) by name(s) or patterns.
#
# Usage:
#
# ```ruby
# argentina.infobox.fetch('leader_title_1') # => one Var node
# argentina.infobox.fetch('leader_title_1',
# 'leader_name_1') # => two Var nodes
# argentina.infobox.fetch(/leader_title_\d+/) # => several Var nodes
# ```
#
# @return [Nodes<Var>]
def fetch(*patterns)
Nodes[*patterns.map { |p| variables.find(name: p) }.flatten]
end
# Fetches hash `{name => variable}`, by same patterns as {#fetch}.
#
# @return [Hash<String => Var>]
def fetch_hash(*patterns)
fetch(*patterns).map { |v| [v.name, v] }.to_h
end
# Fetches date by list of variable names containing date components.
#
# _(Experimental, subject to change or enchance.)_
#
# Explanation: if you have template like
# ```
# {{birth date and age|1953|2|19|df=y}}
# ```
# ...there is a short way to obtain date from it:
# ```ruby
# template.fetch_date('1', '2', '3') # => Date.new(1953,2,19)
# ```
#
# @return [Date]
def fetch_date(*patterns)
components = fetch(*patterns)
components.pop while components.last.nil? && !components.empty?
if components.empty?
nil
else
Date.new(*components.map { |v| v.to_s.to_i })
end
end
include Linkable
# @!method follow
# Extracts template source and returns it parsed (or nil,
# if template not found).
#
# **NB**: Infoboxer does NO variable substitution or other template
# evaluation actions. Moreover, it will almost certainly NOT parse
# template definitions correctly. You should use this method ONLY
# for "transclusion" templates (parts of content, which are
# included into other pages "as is").
#
# Look for example at [this page's](https://en.wikipedia.org/wiki/Tropical_and_subtropical_coniferous_forests)
# [source](https://en.wikipedia.org/w/index.php?title=Tropical_and_subtropical_coniferous_forests&action=edit):
# each subtable about some region is just a transclusion of
# template. This can be processed like:
#
# ```ruby
# Infoboxer.wp.get('Tropical and subtropical coniferous forests').
# templates(name: /forests^/).
# follow.tables #.and_so_on
# ```
#
# @return {MediaWiki::Page}
#
# **See also** {Linkable#follow} for general notes on the following links.
# Wikilink name of this template's source.
def link
# FIXME: super-naive for now, doesn't thinks about subpages and stuff.
"Template:#{name}"
end
# @private
# Internal, used by {Parser}.
def empty?
false
end
protected
def _eq(other)
other.name == name && other.variables == variables
end
def clean_class
"Template[#{name}]"
end
def extract_params(vars)
vars
.select { |v| v.children.count == 1 && v.children.first.is_a?(Text) }
.map { |v| [v.name.to_sym, v.children.first.raw_text] }.to_h
end
def inspect_variables(depth)
variables.to_a[0..1].map { |name, var| "#{name}: #{var.inspect(depth + 1)}" }.join(', ') +
(variables.count > 2 ? ', ...' : '')
end
end
end
end
================================================
FILE: lib/infoboxer/tree/text.rb
================================================
# frozen_string_literal: true
module Infoboxer
module Tree
# Represents plain text node.
#
# Think of it like this: if you have paragraph
# ```
# Some paragraph with ''italic'' and [wikilink].
# ```
# ...then it will be parsed as a sequence of `[Text`, {Italic}, `Text`,
# {Wikilink}, `Text]`.
#
class Text < Node
# Text fragment without decodint of HTML entities.
attr_accessor :raw_text
def initialize(text, **params)
super(**params)
@raw_text = +text
end
# See {Node#text}
def text
@text ||= decode(@raw_text)
end
# See {Node#to_tree}
def to_tree(level = 0)
"#{indent(level)}#{text} <#{descr}>\n"
end
# @private
# Internal, used by {Parser}
def can_merge?(other)
other.is_a?(String) || other.is_a?(Text)
end
# @private
# Internal, used by {Parser}
def merge!(other)
@raw_text <<
case other
when String then other
when Text then other.raw_text
else
fail("Not mergeable into text: #{other.inspect}")
end
end
# @private
# Internal, used by {Parser}
def empty?
raw_text.empty?
end
private
def _eq(other)
text == other.text
end
end
end
end
================================================
FILE: lib/infoboxer/tree/wikilink.rb
================================================
# frozen_string_literal: true
require_relative 'linkable'
module Infoboxer
module Tree
# Internal MediaWiki link class.
#
# See [Wikipedia docs](https://en.wikipedia.org/wiki/Help:Link#Wikilinks)
# for extensive explanation of Wikilink concept.
#
# Note, that Wikilink is {Linkable}, so you can {Linkable#follow #follow}
# it to obtain linked pages.
class Wikilink < Link
def initialize(link, label = nil, namespace: nil, interwiki: nil)
super(link, label, namespace: namespace, interwiki: interwiki)
@namespace = namespace || ''
@interwiki = interwiki
parse_name!
end
# "Clean" wikilink name, for ex., `Cities` for `[Category:Cities]`
attr_reader :name
# Interwiki identifier. For example, `[[wikt:Argentina]]`
# will have `"Argentina"` as its {#name} and `"wikt"` (wiktionary) as an
# interwiki. TODO: how to use it.
#
# See [Wikipedia docs](https://en.wikipedia.org/wiki/Help:Interwiki_linking) for details.
attr_reader :interwiki
# Wikilink namespace, `Category` for `[Category:Cities]`, empty
# string (not `nil`!) for just `[Cities]`
attr_reader :namespace
# Anchor part of hyperlink, like `History` for `[Argentina#History]`
attr_reader :anchor
# Topic part of link name.
#
# There's so-called ["Pipe trick"](https://en.wikipedia.org/wiki/Help:Pipe_trick)
# in wikilink markup, which defines that `[Phoenix, Arizona]` link
# has main part ("Phoenix") and refinement part ("Arizona"). So,
# we are splitting it here in `topic` and {#refinement}.
# The same way, `[Pipe (programming)]` has `topic == 'Pipe'` and
# `refinement == 'programming'`
attr_reader :topic
# Refinement part of link name.
#
# See {#topic} for explanation.
attr_reader :refinement
include Linkable
private
def parse_name!
@name = namespace.empty? ? link : link.sub(/^#{namespace}:/, '')
@name, @anchor = @name.split('#', 2)
@anchor ||= ''
parse_topic!
end
# @see http://en.wikipedia.org/wiki/Help:Pipe_trick
def parse_topic!
@topic, @refinement =
case @name
when /^(.+\S)\s*\((.+)\)$/, /^(.+?),\s*(.+)$/
[Regexp.last_match(1), Regexp.last_match(2)]
else
[@name, '']
end
return unless children.count == 1 &&
children.first.is_a?(Text) && children.first.raw_text.empty?
children.first.raw_text = @topic
end
end
end
end
================================================
FILE: lib/infoboxer/tree.rb
================================================
# frozen_string_literal: true
module Infoboxer
# Infoboxer provides you with tree structure of the Wikipedia page,
# which you can introspect and navigate with ease. This tree structure
# tries to be simple, close to Wikipedia source and logical.
#
# You can always inspect entire page tree yourself:
#
# ```ruby
# page = Infoboxer.wp.get('Argentina')
# puts page.to_tree
# ```
#
# ## Inspecting and understanding single node
#
# Each tree node is descendant of {Tree::Node}, so you should look
# at this class to understand what you can do.
#
# Alongside with basic methods, defined in Node class, some useful
# utility methods are defined in subclasses.
#
# Here's full list of subclasses, representing real nodes, with their
# respective roles:
#
# * inline markup: {Text}, {Bold}, {Italic}, {BoldItalic}, {Wikilink},
# {ExternalLink}, {Image};
# * embedded HTML: {HTMLTag}, {HTMLOpeningTag}, {HTMLClosingTag};
# * paragraph-level nodes: {Heading}, {Paragraph}, {Pre}, {HR};
# * lists: {OrderedList}, {UnorderedList}, {DefinitionList}, {ListItem},
# {DTerm}, {DDefinition};
# * tables: {Table}, {TableCaption}, {TableRow}, {TableHeading}, {TableCell};
# * special elements: {Template}, {Ref}.
#
# ## Tree navigation
#
# {Tree::Node} class has a standard list of methods for traversing tree
# upwards, downwards and sideways: `children`, `parent`, `siblings`,
# `index`. Read through class documentation for their detailed
# descriptions.
#
# {Navigation} module contains more advanced navigational functionality,
# like XPath-like selectors, friendly shortcuts, breakup of document
# into logical "sections" and so on.
#
# Most of navigational and other Node's methods return {Nodes} type,
# which is an `Array` descendant with additional functionality.
#
# ## Complex data extraction
#
# Most of uniform, machine-extractable data in Wikipedia is stored in
# templates and tables. There's entire {Templates} module, which is
# documented explaining what you can do about Wikipedia templates, how
# to understand them and use information. Also, you can look at {Table}
# class, which for now is not that powerful, yet allows you to extract
# some columns and rows.
#
# Also, consider that WIKIpedia is maid of WIKIlinks, and {Wikilink#follow}
# (as well as {Nodes#follow} for multiple links at once) is you good friend.
#
module Tree
require_relative 'tree/node'
require_relative 'tree/nodes'
%w[text compound inline
image gallery html paragraphs list template table ref math
document].each do |type|
require_relative "tree/#{type}"
end
end
end
================================================
FILE: lib/infoboxer/version.rb
================================================
# frozen_string_literal: true
module Infoboxer
MAJOR = 0
MINOR = 4
PATCH = 0
PRE = nil
VERSION = [MAJOR, MINOR, PATCH, PRE].compact.join('.')
end
================================================
FILE: lib/infoboxer/wiki_path.rb
================================================
# frozen_string_literal: true
module Infoboxer
# @private
class WikiPath
ParseError = Class.new(ArgumentError)
class << self
def _parse(string)
scanner = StringScanner.new(string)
res = []
loop do
res << scan_step(scanner)
break if scanner.eos?
end
res
end
def parse(string)
new(_parse(string))
end
private
def scan_step(scanner) # rubocop:disable Metrics/PerceivedComplexity
op = scanner.scan(%r{//?}) or unexpected(scanner, '/')
type = scanner.scan(/[A-Za-z_]*/)
attrs = {}
while scanner.scan(/\[/)
attr = scanner.scan(/[-a-z_0-9]+/) or unexpected(scanner, 'attribute name')
if scanner.scan(/\]/)
(attrs[:predicates] ||= []) << "#{attr}?".to_sym
next
end
scanner.scan(/\s*=\s*/) or unexpected(scanner, '= or ]')
value = scanner.scan(/[^\]]+/) # TODO: probably, should do a proper [] counting?..
scanner.scan(/\]/) or unexpected(scanner, ']')
attrs[attr.to_sym] = process_value(value)
end
res = op == '//' ? {op: :lookup} : {}
res[:type] = process_type(type) unless type.empty?
res.merge(attrs) # TODO: raise if empty selector
end
def process_value(value)
case value
when /^'(.*)'$/, /^"(.*)"$/
Regexp.last_match(1)
when %r{^/(.+)/$}
Regexp.new(Regexp.last_match(1))
else
value
end
end
def process_type(type)
type.gsub(/(?:^|_)([a-z])/, &:upcase).tr('_', '').to_sym
.tap { |t| valid_type?(t) or fail(ParseError, "Unrecognized node type: #{type}") }
end
def valid_type?(t)
t == :Section || Infoboxer::Tree.const_defined?(t)
end
def unexpected(scanner, expected)
place = scanner.eos? ? 'end of pattern' : scanner.rest.inspect
fail ParseError, "Unexpected #{place}, expecting #{expected}"
end
end
def initialize(path)
@path = path
end
def call(node)
@path.inject(node) { |res, step| apply_step(res, step) }
end
private
def apply_step(node, step)
# TODO: "compile" the op/args sequences at WikiPath initialization
step = step.dup
op = step.delete(:op) || :lookup_children
args = []
if (t = step.delete(:type))
args << t
end
if (pred = step.delete(:predicates))
args.concat(pred)
end
args << step unless step.empty?
node.send(op, *args)
end
end
end
================================================
FILE: lib/infoboxer.rb
================================================
# frozen_string_literal: true
# Main client module for entire infoboxer functionality. If you're lucky,
# there's no other classes/modules you need to instantiate or call
# directly. You just do:
#
# ```ruby
# Infoboxer.wp.get('List of radio telescopes')
# # or
# Infoboxer.wikiquote.get('Vonnegut')
# ```
# ...and have fully navigable Wiki information.
#
# Please read [wiki](http://github.com/molybdenum-99/infoboxer/wiki)
# for extensive [showcases](https://github.com/molybdenum-99/infoboxer/wiki/Showcase)
# and usage recommendations.
#
# Here's main components list, which also can serve as a TOC for
# Infoboxer's functionality (we suggest to read their docs in this order):
#
# * {Tree} -- nodes, of which Wikipedia AST is consisting; you'll be
# interested in basic {Tree::Node} functionality, as well as node
# classes list (which is useful for navigation);
# * {Navigation} -- how to navigate the tree you have, basic way
# (children, parents, siblings) and hi-level shortcuts way (like
# all unnumbered list items in second level-3 section);
# * {Templates} -- the most advanced data extraction from wikipedia definitely
# needs your undestanding of this (rather complicated) topic.
#
# You also may be interested in (though may be never need to use them directly):
#
# * {MediaWiki} client class;
# * {Parser} -- which, you know, parses.
#
# **NB** `Infoboxer` module can also be included in other classes, like
# this:
#
# ```ruby
# class MyDataGrabber
# include Infoboxer
#
# def initialize
# wikipedia.get('Argentina')
# end
# end
# ```
#
module Infoboxer
# @private
WIKIA_API_URL = 'http://%s.wikia.com/api.php'
WIKIMEDIA_PROJECTS = {
wikipedia: 'wikipedia.org',
wikivoyage: 'wikivoyage.org',
wikiquote: 'wikiquote.org',
wiktionary: 'wiktionary.org',
wikibooks: 'wikibooks.org',
wikinews: 'wikinews.org',
wikiversity: 'wikiversity.org',
wikisource: 'wikisource.org'
}.freeze
WIKIMEDIA_COMMONS = {
commons: 'commons.wikimedia.org',
species: 'species.wikimedia.org',
}.freeze
def wikis
@wikis ||= {}
end
# Includeable version of {Infoboxer.wiki}
def wiki(api_url, **options)
wikis[api_url] ||= MediaWiki.new(api_url, **options)
end
class << self # rubocop:disable Lint/EmptyClass -- that's for YARD!
# @!method wiki(api_url, options = {})
# Default method for creating MediaWiki API client.
#
# @param api_url should be URL of api.php for your MediaWiki
# @param options list of options.
# The only recognized option for now, though, is
# * `:user_agent` (also aliased as `:ua`) -- custom User-Agent header.
# @return [MediaWiki] an instance of API client, which you can
# further use like this:
#
# ```ruby
# Infoboxer.wiki('some_url').get('Some page title')
# ```
# @!method wikipedia(lang = 'en', options = {})
# Shortcut for creating Wikipedia client.
#
# @param lang two-character code for language version
# @param options (see #wiki for list of options)
# @return [MediaWiki]
# @!method commons(options = {})
# Shortcut for creating [WikiMedia Commons](https://commons.wikimedia.org/) client.
#
# @param options (see #wiki for list of options)
# @return [MediaWiki]
# @!method wikibooks(lang = 'en', options = {})
# Shortcut for creating [Wikibooks](https://en.wikibooks.org/) client.
# See {wikipedia} for params explanation.
# @return [MediaWiki]
# @!method wikiquote(lang = 'en', options = {})
# Shortcut for creating [Wikiquote](https://en.wikiquote.org/) client.
# See {wikipedia} for params explanation.
# @return [MediaWiki]
# @!method wikiversity(lang = 'en', options = {})
# Shortcut for creating [Wikiversity](https://en.wikiversity.org/) client.
# See {wikipedia} for params explanation.
# @return [MediaWiki]
# @!method wikisource(lang = 'en', options = {})
# Shortcut for creating [Wikisource](https://en.wikisource.org/) client.
# See {wikipedia} for params explanation.
# @return [MediaWiki]
# @!method wikivoyage(lang = 'en', options = {})
# Shortcut for creating [Wikivoyage](http://wikivoyage.org) client.
# See {wikipedia} for params explanation.
# @return [MediaWiki]
# @!method wikinews(lang = 'en', options = {})
# Shortcut for creating [Wikinews](https://en.wikinews.org/) client.
# See {wikipedia} for params explanation.
# @return [MediaWiki]
# @!method species(options = {})
# Shortcut for creating [Wikispecies](https://species.wikimedia.org/) client.
#
# @param options (see #wiki for list of options)
# @return [MediaWiki]
# @!method wiktionary(lang = 'en', options = {})
# Shortcut for creating [Wiktionary](https://en.wiktionary.org/) client.
# See {wikipedia} for params explanation.
# @return [MediaWiki]
# @!method wikia(*domains)
# Performs request to wikia.com wikis.
#
# @overload wikia(*domains)
# @param *domains list of domains to merge, like this:
#
# ```ruby
# Infoboxer.wikia('tardis') # looks at tardis.wikia.com
# Infoboxer.wikia('tardis', 'ru') # looks in Russian version, ru.tardis.wikia.com
# ```
# If you are surprised by "reversing" list of subdomains, think of
# it as of chain of refinements (looking in "tardis" wiki, its "ru"
# version, specifically).
#
# @overload wikia(*domains, options)
# @param *domains same as above
# @param options just last of params, if it is hash
# (see {wiki} for list of options)
#
# @return [MediaWiki]
end
WIKIMEDIA_PROJECTS.each do |name, domain|
define_method name do |lang = 'en', **options|
lang, options = 'en', lang if lang.is_a?(Hash)
wiki("https://#{lang}.#{domain}/w/api.php", **options)
end
end
alias_method :wp, :wikipedia
WIKIMEDIA_COMMONS.each do |name, domain|
define_method name do |**options|
wiki("https://#{domain}/w/api.php", **options)
end
end
# Returns URL of API entry-point for a well-known Wiki-project (wikipedia, wikivoyage etc.)
# by project's name.
#
# @param symbol [Symbol] One of {WIKIMEDIA_PROJECTS} or {WIKIMEDIA_COMMONS} keys.
# @param lang [String, Symbol] Language of the project, if applicable.
# @return [String]
def url_for(symbol, lang = 'en')
if (domain = WIKIMEDIA_PROJECTS[symbol])
"https://#{lang}.#{domain}/w/api.php"
elsif (domain = WIKIMEDIA_COMMONS[symbol])
"https://#{domain}/w/api.php"
end
end
# @!method wikipedia(lang = 'en', options = {})
# Includeable version of {Infoboxer.wikipedia}
# @!method commons(options = {})
# Includeable version of {Infoboxer.commons}
# @!method wikibooks(lang = 'en', options = {})
# Includeable version of {Infoboxer.wikibooks}
# @!method wikiquote(lang = 'en', options = {})
# Includeable version of {Infoboxer.wikiquote}
# @!method wikiversity(lang = 'en', options = {})
# Includeable version of {Infoboxer.wikiversity}
# @!method wikisource(lang = 'en', options = {})
# Includeable version of {Infoboxer.wikisource}
# @!method wikivoyage(lang = 'en', options = {})
# Includeable version of {Infoboxer.wikivoyage}
# @!method wikinews(lang = 'en', options = {})
# Includeable version of {Infoboxer.wikinews}
# @!method species(options = {})
# Includeable version of {Infoboxer.species}
# @!method wiktionary(lang = 'en', options = {})
# Includeable version of {Infoboxer.wiktionary}
# Includeable version of {Infoboxer.wikia}
def wikia(*domains)
options = domains.last.is_a?(Hash) ? domains.pop : {}
wiki(WIKIA_API_URL % domains.reverse.join('.'), **options)
end
# Sets user agent string globally. Default user agent is
# {MediaWiki::UA}.
#
# User agent can also be rewriten as an option to {wiki} method (and
# its shortcuts like {wikipedia}), or by using {MediaWiki#initialize}
# explicitly.
#
def self.user_agent=(ua)
MediaWiki.user_agent = ua
end
extend self
end
require_relative 'infoboxer/version'
require_relative 'infoboxer/core_ext'
require_relative 'infoboxer/tree'
require_relative 'infoboxer/parser'
require_relative 'infoboxer/navigation'
require_relative 'infoboxer/templates'
require_relative 'infoboxer/media_wiki'
require_relative 'infoboxer/definitions/en.wikipedia.org'
================================================
FILE: profile/out/.gitkeep
================================================
================================================
FILE: profile/pages/argentina.txt
================================================
{{other uses}}
{{pp-semi|small=yes}}
{{Use dmy dates|date=July 2014}}
{{Infobox country
|conventional_long_name = Argentine Republic{{efn-ua|name=altnames|Article 35 of the [[Argentine Constitution]] gives equal recognition to the names "United Provinces of the River Plate", "Argentine Republic" and "Argentine Confederation" and authorizes the use of "Argentine Nation" in the making and enactment of laws.{{sfn|Constitution of Argentina|loc=art. 35}}}}
|native_name = {{native name|es|República Argentina}}
|common_name = Argentina
|image_flag = Flag of Argentina.svg
|image_coat = Coat of arms of Argentina.svg
|national_motto = {{unbulleted list
|list_style=line-height:125%;
| {{native phrase|es|"[[En unión y libertad]]"|nolink=yes|paren=off}}
| {{small|("In Unity and Freedom")}}
}}
|national_anthem = {{unbulleted list
|item1_style=line-height:125%;
|item2_style=line-height:125%;
|item3_style=margin-top:4px;
| {{native phrase|es|[[Argentine National Anthem|Himno Nacional Argentino]]|nolink=yes|paren=off}}
| {{small|("Argentine National Anthem")}}
| <center>[[File:Himno Nacional Argentino instrumental.ogg]]</center>
}}
|other_symbol = [[File:Sol de Mayo-Bandera de Argentina.svg|90x90px|alt=Sol de Mayo]]
|other_symbol_type = {{native name|es|[[Sol de Mayo]]{{sfnm|1a1=Crow|1y=1992|1p=457|1ps=: "In the meantime, while the crowd assembled in the plaza continued to shout its demands at the cabildo, the sun suddenly broke through the overhanging clouds and clothed the scene in brilliant light. The people looked upward with one accord and took it as a favorable omen for their cause. This was the origin of the ″sun of May″ which has appeared in the center of the Argentine flag and on the Argentine coat of arms ever since."|2a1=Kopka|2y=2011|2p=5|2ps=: "The sun's features are those of [[Inti]], the [[Inca]]n sun god. The sun commemorates the appearance of the sun through cloudy skies on May 25, 1810, during the first mass demonstration in favor of independence."}}|nolink=yes|paren=off}}<br>{{small|(Sun of May)}}
|image_map = Argentina orthographic.svg
|map_width = 220px
|map_caption = {{resize|110%|Mainland Argentina shown in dark green, with [[#Foreign relations|territorial claims]] shown in light green}}
|capital = [[Buenos Aires]]
|latd=34 |latm=36 |latNS=S |longd=58|longm=23 |longEW=W
|largest_city = capital
|official_languages = [[Spanish language|Spanish]]{{ref label|note-lang|a|}}
|ethnic_groups = {{unbulleted list
|
| 97% [[Argentines of European descent|European]]
| 3% [[Mestizo]], [[Indigenous peoples in Argentina|Amerindian]] and [[Asian Argentine|Asian]]
}}
|
|demonym = {{unbulleted list
|[[Argentine people|Argentine]]
|[[Argentine people|Argentinian]]
|{{nowrap|[[Argentine people|Argentinean]] {{small|(uncommon)}}}}
}}
|government_type = [[Federal republic|Federal]] [[Presidential system|presidential]] [[constitutional republic]]
|leader_title1 = [[President of Argentina|President]]
|leader_name1 = [[Cristina Fernández de Kirchner]]
|leader_title2 = [[Vice President of Argentina|Vice President]]
|leader_name2 = [[Amado Boudou]]
|leader_title3 = [[Supreme Court of Argentina|Supreme Court President]]
|leader_name3 = [[Ricardo Lorenzetti]]
|legislature = [[Argentine National Congress|Congress]]
|upper_house = [[Argentine Senate|Senate]]
|lower_house = [[Argentine Chamber of Deputies|Chamber of Deputies]]
|sovereignty_type = [[Argentine War of Independence|Independence]]
|sovereignty_note = from [[Spanish Empire|Spain]]
|established_event1 = [[May Revolution]]
|established_date1 = 25 May 1810
|established_event2 = [[Argentine Declaration of Independence|Declared]]
|established_date2 = 9 July 1816
|established_event3 = {{nowrap|[[Argentine Constitution|Constitution]]}}
|established_date3 = 1 May 1853
|established_event4 = {{nowrap|[[Diplomatic recognition|Recognized]]}}
|established_date4 = 29 April 1857
|area_rank = 8th
|area_magnitude = 1_E12
|area_km2 = 2780400
|area_footnote = {{efn-ua|name=excl_area|Area does not include territorial claims in [[Argentine Antarctica#Argentine claim|Antarctica]] (965,597 km{{smallsup|2}}, including the [[South Orkney Islands]]), the [[Falkland Islands]] (11,410 km{{smallsup|2}}), the [[South Georgia Island|South Georgia]] (3,560 km{{smallsup|2}}) and the [[South Sandwich Islands]] (307 km{{smallsup|2}}).<ref name=totalpop>{{cite web|url=http://www.indec.mecon.ar/nuevaweb/cuadros/2/f020202.xls|format=XLS|title=Población por sexo e índice de masculinidad. Superficie censada y densidad, según provincia. Total del país. Año 2010|work=Censo Nacional de Población, Hogares y Viviendas 2010|publisher=INDEC – Instituto Nacional de Estadística y Censos|place=Buenos Aires|year=2010|language=Spanish|archiveurl=http://web.archive.org/web/20140608011356/http://www.indec.mecon.ar/nuevaweb/cuadros/2/f020202.xls|archivedate=8 June 2014|deadurl=no}}</ref>}}
|percent_water = 1.57
|population_estimate = 42,669,500<ref name=proypop>{{cite web|url=http://www.indec.mecon.ar/nuevaweb/cuadros/2/c1_proyecciones_nac_2010_2040.xls|format=XLS|title=Cuadro 1. Población estimada al 1 de julio de cada año calendario por sexo. Total del país. Años 2010–2040|work=Censo Nacional de Población, Hogares y Viviendas 2010|publisher=INDEC – Instituto Nacional de Estadística y Censos|place=Buenos Aires|year=2010|language=Spanish|archiveurl=http://web.archive.org/web/20140608011629/http://www.indec.mecon.ar/nuevaweb/cuadros/2/c1_proyecciones_nac_2010_2040.xls|archivedate=8 June 2014|deadurl=no}}</ref>
|population_estimate_rank =
|population_estimate_year = 2014
|population_census = 40,117,096<ref name=totalpop/>
|population_census_year = 2010
|population_census_rank = 32nd
|population_density_km2 = 14.4
|population_density_rank = 212th
|pop_den_footnote = <ref name=totalpop/>
|GDP_PPP = $953.029 billion<ref name=imf2>{{cite web|url=http://www.imf.org/external/pubs/ft/weo/2015/01/weodata/weorept.aspx?sy=2013&ey=2020&scsm=1&ssd=1&sort=country&ds=.&br=1&c=213&s=NGDPD%2CNGDPDPC%2CPPPGDP%2CPPPPC&grp=0&a=&pr.x=68&pr.y=6|title=Argentina|work= World Economic Outlook Database |date=April 2015 |publisher=International Monetary Fund |accessdate=30 April 2015}}</ref>
|GDP_PPP_rank = 25th
|GDP_PPP_year = 2015
|GDP_PPP_per_capita = $22,459<ref name=imf2/>
|GDP_PPP_per_capita_rank = 57th
|GDP_nominal = $563.138 billion<ref name=imf2/>
|GDP_nominal_rank = 21st
|GDP_nominal_year = 2015
|GDP_nominal_per_capita = $13,271<ref name=imf2/>
|GDP_nominal_per_capita_rank = 54th
|Gini_year = 2011 <!-- use the year to which the data refers, not the publication year-->
|Gini_change = decrease <!--increase/decrease/steady-->
|Gini = 43.6 <!--number only-->
|Gini_ref = <ref name=gini>{{cite web |url=http://data.worldbank.org/indicator/SI.POV.GINI |title= GINI index (World Bank estimate) |publisher= World Bank |accessdate= 19 February 2015}}</ref>
|Gini_rank =
|HDI_year = 2013 <!-- use the year to which the data refers, not the publication year-->
|HDI_change = increase <!--increase/decrease/steady-->
|HDI = 0.808 <!--number only-->
|HDI_ref = <ref name=hdi>{{cite web|url=http://hdr.undp.org/sites/default/files/hdr14-summary-en.pdf|title=Human Development Report 2014 – Summary|format=PDF|publisher=United Nations Development Programme|place=New York, NY, USA|year=2014|pages=15, 16|archiveurl=http://web.archive.org/web/20140727205555/http://hdr.undp.org/sites/default/files/hdr14-summary-en.pdf|archivedate=27 July 2014|deadurl=no}}</ref>
|HDI_rank = 49th
|currency = [[Argentine peso|Peso]] ([[Dollar sign|$]])
|currency_code = ARS
|time_zone = [[Time in Argentina|ART]]
|utc_offset = −3
|date_format = dd.mm.yyyy ([[Common Era|CE]])
|drives_on = right{{ref label|note-train|b|}}
|calling_code = [[+54]]
|cctld = [[.ar]]
|footnote_a = {{note|note-lang}}''[[De facto]]'' at all government levels.{{efn-ua|name=es|Though not declared official ''[[de jure]]'', the Spanish language is the only one used in the wording of laws, decrees, resolutions, official documents and public acts.}} In addition, some provinces have official ''[[de jure]]'' languages:
:{{,}}[[Guaraní language|Guaraní]] in [[Corrientes Province]].<ref name=gn>{{cite Argentine law|jur=CN|l=5598|date=22 de octubre de 2004}}</ref>
:{{,}}[[Kom language (South America)|Kom]], [[Moqoit language|Moqoit]] and [[Wichi language|Wichi]], in [[Chaco Province]].<ref name=kom>{{cite Argentine law|jur=CC|l=6604|bo=9092|date=28 de julio de 2010}}</ref>
|footnote_b = {{note|note-train}}Trains ride on left.
}}
'''Argentina''' {{IPAc-en|audio=en-us-Argentina.ogg|ˌ|ɑr|dʒ|ən|ˈ|t|iː|n|ə}}, officially the '''Argentine Republic'''{{efn-ua|name=altnames}} ({{lang-es|link=no|República Argentina}} {{IPA-es|reˈpuβlika aɾxenˈtina|}}) is a [[federal republic]] located in southeastern [[South America]]. Sharing the [[Southern Cone]] with its smaller neighbour [[Chile]], it is bordered by [[Bolivia]] and [[Paraguay]] to the north; [[Brazil]] to the northeast; [[Uruguay]] and the [[South Atlantic Ocean]] to the east; Chile to the west and the [[Drake Passage]] to the south.
With a mainland area of {{convert|2780400|km2|mi2|abbr=on}},{{efn-ua|name=excl_area}} Argentina is the [[List of countries and outlying territories by total area|eighth-largest country]] in the world, the second-largest in [[Latin America]], and the largest [[Hispanophone|Spanish-speaking]] one.
Argentina claims sovereignty over [[Argentine Antarctica|part of Antarctica]], the [[Falkland Islands sovereignty dispute|Falkland Islands]] ({{lang-es|link=no|Islas Malvinas}}), [[South Georgia and South Sandwich Islands sovereignty dispute|South Georgia and the South Sandwich Islands]].
The earliest recorded human presence in the area modern-day Argentina dates back to the [[Paleolithic]] period.{{sfn|Abad de Santillán|1971|p=17}} The country has its roots in [[Spanish empire|Spanish]] colonization of the region beginning in 1512.{{sfn|Crow|1992|p=128}} Argentina rose as the successor state of the [[Viceroyalty of the Río de la Plata]],{{sfnm|1a1=Levene|1y=1948|1p=11|1ps=: "[After the Viceroyalty became] a new period that commenced with the revolution of 1810, whose plan consisted in declaring the independence of a nation, thus turning the legal bond of vassalage into one of citizenship as a component of sovereignty and,in addition, organizing the democratic republic."|2a1=Sánchez Viamonte|2y=1948|2pp=196–197|2ps=: "The Argentine nation was a unity in colonial times, during the Viceroyalty, and remained so after the revolution of May 1810. [...] The provinces never acted as independent sovereign states, but as entities created within the nation and as integral parts of it, incidentally affected by internal conflicts."|3a1=Vanossi|3y=1964|3p=11|3ps=: "[The Argentine nationality is a] unique national entity, successor to the Viceroyalty, which, after undergoing a long period of anarchy and disorganization, adopted a decentralized form in 1853–1860 under the Constitution."}} a Spanish overseas colony founded in 1776.
The [[Argentine Declaration of Independence|declaration]] and [[Argentine War of Independence|fight for independence]] (1810–1818) was followed by an [[Argentine Civil Wars|extended civil war]] that lasted until 1861, which culminated in the country's reorganization as a [[federation]] of [[Provinces of Argentina|provinces]] with [[Buenos Aires]] as its capital city. From then on—while [[Immigration in Argentina|massive waves of European immigration]] radically reshaped its cultural and demographic outlook—Argentina enjoyed an historically almost-unparalleled increase in prosperity: by the early 20th century it had already ranked as the seventh wealthiest{{sfn|Bolt|Van Zanden|2013}} developed nation{{sfn|Díaz Alejandro|1970|p=1}} in the world.
After 1930 Argentina descended into political instability and suffered periodic economic crises that pushed it back into underdevelopment,<ref name=developed>{{cite news|url=http://www.economist.com/node/2704457|title=Becoming a serious country|publisher=The Economist|place=London|date=3 June 2004|quote=Argentina is thus not a "developing country". Uniquely, it achieved development and then lost it again.|archiveurl=http://web.archive.org/web/20140320033128/http://www.economist.com/node/2704457|archivedate=20 March 2014|deadurl=no}}</ref> though it nevertheless remained among the fifteen richest countries until the mid-20th century.{{sfn|Bolt|Van Zanden|2013}} Argentina retains its historic status as a [[middle power]]{{sfnm|1a1=Wood|1y=1988|1p=18|2a1=Solomon|2y=1997|2p=3}} in international affairs, and is a prominent [[regional power]] in the Southern Cone and Latin America.{{sfnm|1a1=Huntington|1y=2000|1p=6|2a1=Nierop|2y=2001|2p=61|2ps=: "Secondary regional powers in Huntington's view{{sfn|Huntington|2000|p=6}} include Great Britain, Ukraine, Japan, South Korea, Pakistan, Saudi Arabia and Argentina."|3a1=Lake|3y=2009|3p=55|3ps=: "The US has created a foundation upon which the regional powers, especially Argentina and Brazil, can develop their own rules for further managing regional relations."|4a1=Papadopoulos|4y=2010|4p=283|4ps=: "The driving force behind the adoption of the MERCOSUR agreement was similar to that of the establishment of the EU: the hope of limiting the possibilities of traditional military hostility between the major regional powers, Brazil and Argentina."|5a1=Malamud|5y=2011|5p=9|5ps=: "Though not a surprise, the position of Argentina, Brazil's main regional partner, as the staunchest opponent of its main international ambition [to win a permanent seat on the UN Security Council] dealt a heavy blow to Brazil's image as a regional leader."|6a1=Boughton|6y=2012|6p=101|6ps=: "When the U.S. Treasury organized the next round of finance meetings, it included several non-APEC members, including all the European members of the G7, the Latin American powers Argentina and Brazil, and such other emerging markets as India, Poland, and South Africa."}}{{sfnm|1a1=Morris|1y=1988|1p=63|1ps=: "Argentina has been the leading military and economic power in the Southern Cone in the Twentieth Century."|2a1=Adler|2a2=Greve|2y=2009|2p=78|2ps=: "The southern cone of South America, including Argentina and Brazil, the two regional powers, has recently become a pluralistic security community."|3a1=Ruiz-Dana|3a2=Goldschag|3a3=Claro|3a4=Blanco|3y=2009|3p=18|3ps=: "[...] notably by linking the Southern Cone's rival regional powers, Brazil and Argentina."}} Argentina has the third-largest economy in Latin America and is a member of the [[Group of 15|G-15]] and [[G-20]] major economies. It is also a founding member of the [[United Nations]], [[World Bank Group|World Bank]], [[World Trade Organization|WTO]], [[Mercosur]], [[Union of South American Nations|UNASUR]], [[Community of Latin American and Caribbean States|CELAC]] and [[Organization of Ibero-American States|OEI]].
Because of its stability, market size and growing high-tech sector,<ref name=legatum>{{cite web|url=http://www.prosperity.com/country.aspx?id=AR|title=The 2010 Legatum Prosperity Index|publisher=[[Legatum|Legatum Institute]]|place=London|year=2010|quote=[The country has a] foundation for future growth due to its market size, levels of foreign direct investment, and percentage of high-tech exports as share of total manufactured goods ... Argentina's economy appears stable, but confidence in financial institutions remains low.|archiveurl=http://web.archive.org/web/20111026023022/http://www.prosperity.com/country.aspx?id=AR|archivedate=26 October 2011|deadurl=yes}}</ref> Argentina is classified as a [[emerging economy|middle emerging economy]] with a [[List of countries by Human Development Index#Americas|"very high"]] rating on the [[Human Development Index]].<ref name=hdi/>
==Name and etymology==
The name "Argentina" is derived from [[Latin]] ''argentum'' ("silver", ''plata'' in [[Spanish language|Spanish]]), a noun associated with the [[Sierra de la Plata|silver mountains legend]], widespread among the first European explorers of the [[La Plata Basin]].{{sfnm|1a1=Rock|1y=1987|1pp=6, 8|2a1=Edwards|2y=2008|2p=7}}
The first written use of the name can be traced to ''[[La Argentina (poem)|La Argentina]]'',{{efn-ua|The poem's full name is ''La Argentina y conquista del Río de la Plata, con otros acaecimientos de los reinos del Perú, Tucumán y estado del Brasil''.}} a 1602 poem by [[Martín del Barco Centenera]] describing the region and the foundation of Buenos Aires.{{sfn|Traba|1985|pp=15, 71}}
Although "Argentina" was already in common usage by the 18th century, the country was formally named "Viceroyalty of the Río de la Plata" by the Spanish Empire, and "United Provinces of the Río de la Plata" after independence.
The [[Argentine Constitution of 1826|1826 constitution]] included the first use of the name "Argentine Republic" in legal documents.{{sfn|Constitution of Argentina|loc=1826, art. 1}}
The name "Argentine Confederation" was also commonly used and was formalized in the [[Argentine Constitution of 1853]].{{sfn|Constitution of Argentina|loc=1853, Preamble}}
In 1860 a presidential decree settled the country's name as "Argentine Republic",{{sfn|Rosenblat|1964|p=78}} and that year's constitutional amendment ruled all the names since 1810 as legally valid.{{sfn|Constitution of Argentina|loc=1860 amd., art. 35}}{{efn-ua|Also stated in article 35 of all subsequent amendments: 1866, 1898, 1949, 1957, 1972 and 1994 (current)}}
In the [[English language]] the country was traditionally called "the Argentine", mimicking the typical Spanish usage ''la Argentina''.<ref>{{cite web|url=http://oxforddictionaries.com/definition/english/Argentina|title=Definition of Argentina in Oxford Dictionaries (British & World English)|publisher=Oxford Dictionaries|place=Oxford, UK|date=6 May 2013|archiveurl=http://web.archive.org/web/20140305011413/http://www.oxforddictionaries.com/definition/english/Argentina|archivedate=5 March 2014|deadurl=no}}</ref> This fell out of fashion during the mid-to-late 20th century, and now the country is simply referred to as "Argentina".
In the [[Spanish language]] "Argentina" is [[Grammatical gender|feminine]] ("''La [República] Argentina''"), taking the feminine [[article (grammar)|article]] "La" as the initial syllable of "Argentina" is [[Stress (linguistics)|unstressed]].<ref>[http://www.studyspanish.com/lessons/defart2.htm "The Definite Article: Part II"], Study Spanish</ref>
==History==
{{main|History of Argentina}}
===Pre-Columbian era===
{{main|Indigenous peoples in Argentina}}
[[File:SantaCruz-CuevaManos-P2210651b.jpg|thumb|200px|The [[Cueva de las Manos|Cave of the Hands]] in [[Santa Cruz province, Argentina|Santa Cruz province]], with indigenous artwork dating from 13,000–9,000 years ago|alt=Stencilled hands on the cave's wall]]
The earliest traces of human life in the area now known as Argentina are dated from the [[Paleolithic]] period, with further traces in the [[Mesolithic]] and [[Neolithic]].{{sfn|Abad de Santillán|1971|p=17}}
Until the period of European colonization, Argentina was relatively sparsely populated by a wide number of diverse cultures with different social organizations,{{sfn|Edwards|2008|p=12}} which can be divided into three main groups:{{sfn|Abad de Santillán|1971|pp=18–19}}
*Basic hunters and food gatherers without development of [[pottery]], like the [[Selknam]] and [[Yaghan]] in the extreme south.
*Advanced hunters and food gatherers like the [[Puelche]], [[Querandí]] and Serranos in the center-east; and the [[Tehuelche people|Tehuelche]] in the south—all of them conquered by the [[Mapuche]] spreading from [[Chile]]{{sfn|Edwards|2008|p=13}}—and the [[Kom people (South America)|Kom]] and [[Wichi]] in the north.
*Farmers with pottery, like the [[Charrúa]], [[Minuane]] and [[Guaraní people|Guaraní]] in the northeast, with [[slash and burn agriculture|slash and burn]] semisedentary existence;{{sfn|Edwards|2008|p=12}} the advanced [[Diaguita]] sedentary [[trade|trading culture]] in the northwest, which was conquered by the [[Inca Empire]] around 1480; the [[Toconoté]] and [[Comechingones|Hênîa and Kâmîare]] in the country's center, and the [[Huarpe]] in the center-west, a culture that raised [[llama]] cattle and was strongly influenced by the Incas.{{sfn|Edwards|2008|p=12}}
===Colonial era===
{{main|Colonial Argentina}}
{{see also|Spanish colonization of the Americas}}
[[File:La Reconquista de Buenos Aires.jpg|thumb|left|200px|The surrender of Beresford to [[Santiago de Liniers]] during the [[British invasions of the Río de la Plata]]]]
Europeans first arrived in the region with the 1502 voyage of [[Amerigo Vespucci (explorer)|Amerigo Vespucci]]. The Spanish navigators [[Juan Díaz de Solís]] and [[Sebastian Cabot (
gitextract_fx0stib8/
├── .codeclimate.yml
├── .dokaz
├── .github/
│ └── workflows/
│ └── ci.yml
├── .gitignore
├── .rspec
├── .rubocop.yml
├── .rubocop_todo.yml
├── .travis.yml
├── .yardopts
├── CHANGELOG.md
├── CONTRIBUTING.md
├── Gemfile
├── LICENSE.txt
├── Parsing.md
├── README.md
├── Rakefile
├── bin/
│ └── infoboxer
├── infoboxer.gemspec
├── lib/
│ ├── infoboxer/
│ │ ├── core_ext.rb
│ │ ├── definitions/
│ │ │ └── en.wikipedia.org.rb
│ │ ├── media_wiki/
│ │ │ ├── page.rb
│ │ │ └── traits.rb
│ │ ├── media_wiki.rb
│ │ ├── navigation/
│ │ │ ├── lookup.rb
│ │ │ ├── sections.rb
│ │ │ ├── selector.rb
│ │ │ ├── shortcuts.rb
│ │ │ └── wikipath.rb
│ │ ├── navigation.rb
│ │ ├── parser/
│ │ │ ├── context.rb
│ │ │ ├── html.rb
│ │ │ ├── image.rb
│ │ │ ├── inline.rb
│ │ │ ├── paragraphs.rb
│ │ │ ├── table.rb
│ │ │ ├── template.rb
│ │ │ └── util.rb
│ │ ├── parser.rb
│ │ ├── templates/
│ │ │ ├── base.rb
│ │ │ └── set.rb
│ │ ├── templates.rb
│ │ ├── tree/
│ │ │ ├── compound.rb
│ │ │ ├── document.rb
│ │ │ ├── gallery.rb
│ │ │ ├── html.rb
│ │ │ ├── image.rb
│ │ │ ├── inline.rb
│ │ │ ├── linkable.rb
│ │ │ ├── list.rb
│ │ │ ├── math.rb
│ │ │ ├── node.rb
│ │ │ ├── nodes.rb
│ │ │ ├── paragraphs.rb
│ │ │ ├── ref.rb
│ │ │ ├── table.rb
│ │ │ ├── template.rb
│ │ │ ├── text.rb
│ │ │ └── wikilink.rb
│ │ ├── tree.rb
│ │ ├── version.rb
│ │ └── wiki_path.rb
│ └── infoboxer.rb
├── profile/
│ ├── out/
│ │ └── .gitkeep
│ └── pages/
│ ├── argentina.txt
│ ├── canada.wiki
│ ├── ukraine.wiki
│ └── usa.wiki
├── regression/
│ └── pages/
│ ├── 2012_bdo_world_darts_championship.wiki
│ ├── area.wiki
│ ├── articuno.wiki
│ ├── canada.wiki
│ ├── chiang_mai.wiki
│ ├── greece.wiki
│ ├── list_of_countries.wiki
│ ├── progress_wrestling.wiki
│ ├── south_america.wiki
│ ├── south_america_new.wiki
│ ├── ukraine.wiki
│ ├── ukrainian_galician_army.wiki
│ ├── usa.wiki
│ └── wyoming.wiki
└── spec/
├── dokaz_helpers.rb
├── fixtures/
│ ├── argentina.wiki
│ ├── broken_table_caption.txt
│ ├── large_infobox.txt
│ ├── large_table.txt
│ └── vcr_cassettes/
│ ├── Infoboxer/
│ │ └── common_MediaWiki_shortcuts/
│ │ ├── Wikia/
│ │ │ ├── language/
│ │ │ │ ├── 1_1_2_3_1.yml
│ │ │ │ └── api_base_url_to_s/
│ │ │ │ └── 1_1_2_3_2_1.yml
│ │ │ ├── simple/
│ │ │ │ ├── 1_1_2_1_1.yml
│ │ │ │ └── api_base_url_to_s/
│ │ │ │ └── 1_1_2_1_2_1.yml
│ │ │ └── subdomain/
│ │ │ ├── 1_1_2_2_1.yml
│ │ │ └── api_base_url_to_s/
│ │ │ └── 1_1_2_2_2_1.yml
│ │ └── Wikipedia/
│ │ ├── caching/
│ │ │ └── constructs_object_only_once.yml
│ │ ├── default/
│ │ │ ├── 1_1_1_1_1.yml
│ │ │ └── api_base_url_to_s/
│ │ │ └── 1_1_1_1_2_1.yml
│ │ ├── language/
│ │ │ └── api_base_url_to_s/
│ │ │ └── 1_1_1_3_1_1.yml
│ │ └── shortcut/
│ │ └── api_base_url_to_s/
│ │ └── 1_1_1_4_1_1.yml
│ ├── Infoboxer_MediaWiki/
│ │ ├── category/
│ │ │ ├── category_name_transformation/
│ │ │ │ ├── default_namespace/
│ │ │ │ │ └── uri_query_values/
│ │ │ │ │ └── 1_6_3_2_1_1.yml
│ │ │ │ ├── localized_namespace/
│ │ │ │ │ └── uri_query_values/
│ │ │ │ │ └── 1_6_3_3_1_1.yml
│ │ │ │ ├── not_a_namespace/
│ │ │ │ │ └── uri_query_values/
│ │ │ │ │ └── 1_6_3_4_1_1.yml
│ │ │ │ └── when_no_namespace/
│ │ │ │ └── uri_query_values/
│ │ │ │ └── 1_6_3_1_1_1.yml
│ │ │ ├── when_category_exists/
│ │ │ │ ├── 1_6_1_1.yml
│ │ │ │ ├── count/
│ │ │ │ │ └── 1_6_1_2_1.yml
│ │ │ │ └── map_title_/
│ │ │ │ └── 1_6_1_3_1.yml
│ │ │ └── when_category_is_not/
│ │ │ ├── 1_6_2_1.yml
│ │ │ └── 1_6_2_2.yml
│ │ ├── get/
│ │ │ ├── _prop/
│ │ │ │ └── source/
│ │ │ │ └── 1_4_6_1_1.yml
│ │ │ ├── interwiki/
│ │ │ │ └── url/
│ │ │ │ └── 1_4_7_1_1.yml
│ │ │ ├── processor/
│ │ │ │ └── source/
│ │ │ │ └── 1_4_6_1_1.yml
│ │ │ ├── when_invalid_title_requested/
│ │ │ │ ├── as_block/
│ │ │ │ │ └── 1_4_5_1_1.yml
│ │ │ │ └── call/
│ │ │ │ └── 1_4_5_1_1.yml
│ │ │ ├── when_several_pages/
│ │ │ │ └── 1_4_2_1.yml
│ │ │ ├── when_several_pages_including_non-existent/
│ │ │ │ └── count/
│ │ │ │ └── 1_4_4_1_1.yml
│ │ │ ├── when_signle_non-existing_page/
│ │ │ │ └── 1_4_3_1.yml
│ │ │ └── when_single_page/
│ │ │ ├── 1_4_1_1.yml
│ │ │ ├── source/
│ │ │ │ └── 1_4_1_4_1.yml
│ │ │ ├── title/
│ │ │ │ └── 1_4_1_2_1.yml
│ │ │ └── url/
│ │ │ └── 1_4_1_3_1.yml
│ │ ├── get_h/
│ │ │ ├── when_several_pages_including_non-existent/
│ │ │ │ ├── 1_5_1_1.yml
│ │ │ │ ├── _WTF_I_just_read_Make_me_unsee_it_/
│ │ │ │ │ └── 1_5_1_3_1.yml
│ │ │ │ └── keys/
│ │ │ │ └── 1_5_1_2_1.yml
│ │ │ ├── when_several_pages_including_redirected_to_same/
│ │ │ │ ├── 1_5_2_1.yml
│ │ │ │ ├── keys/
│ │ │ │ │ └── 1_5_2_2_1.yml
│ │ │ │ └── values/
│ │ │ │ ├── 1_5_2_3_1.yml
│ │ │ │ └── 1_5_2_4_1.yml
│ │ │ └── with_downcase_titles/
│ │ │ ├── 1_5_3_1.yml
│ │ │ ├── keys/
│ │ │ │ └── 1_5_3_2_1.yml
│ │ │ └── values/
│ │ │ └── 1_5_3_3_1.yml
│ │ ├── inspect/
│ │ │ └── inspect/
│ │ │ └── 1_1_1_1.yml
│ │ ├── prefixsearch/
│ │ │ ├── when_found/
│ │ │ │ ├── 1_8_1_1.yml
│ │ │ │ ├── count/
│ │ │ │ │ └── 1_8_1_2_1.yml
│ │ │ │ └── map_title_/
│ │ │ │ └── 1_8_1_3_1.yml
│ │ │ └── when_not_found/
│ │ │ └── 1_8_2_1.yml
│ │ ├── raw/
│ │ │ ├── several_pages/
│ │ │ │ ├── _50_pages/
│ │ │ │ │ ├── 1_2_2_2_1.yml
│ │ │ │ │ ├── count/
│ │ │ │ │ │ └── 1_2_2_2_2_1.yml
│ │ │ │ │ └── map_title_/
│ │ │ │ │ └── 1_2_2_2_3_1.yml
│ │ │ │ ├── default/
│ │ │ │ │ ├── 1_2_2_1_1.yml
│ │ │ │ │ ├── count/
│ │ │ │ │ │ └── 1_2_2_1_2_1.yml
│ │ │ │ │ └── map_title_/
│ │ │ │ │ └── 1_2_2_1_3_1.yml
│ │ │ │ └── no_pages/
│ │ │ │ └── 1_2_2_3_1.yml
│ │ │ ├── single_page/
│ │ │ │ ├── default/
│ │ │ │ │ ├── 1_2_1_1_1.yml
│ │ │ │ │ ├── 1_2_1_1_3.yml
│ │ │ │ │ ├── _fullurl_/
│ │ │ │ │ │ └── 1_2_1_1_4_1.yml
│ │ │ │ │ └── _title_/
│ │ │ │ │ └── 1_2_1_1_2_1.yml
│ │ │ │ ├── non-existent/
│ │ │ │ │ ├── _missing_/
│ │ │ │ │ │ └── 1_2_1_2_2_1.yml
│ │ │ │ │ └── _title_/
│ │ │ │ │ └── 1_2_1_2_1_1.yml
│ │ │ │ └── redirect/
│ │ │ │ ├── 1_2_1_3_2.yml
│ │ │ │ ├── _fullurl_/
│ │ │ │ │ └── 1_2_1_3_3_1.yml
│ │ │ │ └── _title_/
│ │ │ │ └── 1_2_1_3_1_1.yml
│ │ │ └── user-agent/
│ │ │ ├── default/
│ │ │ │ └── 1_2_3_1_1.yml
│ │ │ ├── globally_set/
│ │ │ │ └── 1_2_3_2_1.yml
│ │ │ └── locally_set/
│ │ │ └── 1_2_3_3_1.yml
│ │ ├── search/
│ │ │ ├── when_found/
│ │ │ │ ├── 1_7_1_1.yml
│ │ │ │ ├── count/
│ │ │ │ │ └── 1_7_1_2_1.yml
│ │ │ │ └── map_title_/
│ │ │ │ └── 1_7_1_3_1.yml
│ │ │ └── when_not_found/
│ │ │ └── 1_7_2_1.yml
│ │ └── traits/
│ │ ├── dynamic_part_-_taken_from_API/
│ │ │ ├── after_page_fetched/
│ │ │ │ ├── category_namespace/
│ │ │ │ │ └── 1_3_2_2_2_1.yml
│ │ │ │ └── file_namespace/
│ │ │ │ └── 1_3_2_2_1_1.yml
│ │ │ └── before_first_page_fetched/
│ │ │ ├── category_namespace/
│ │ │ │ └── 1_3_2_1_2_1.yml
│ │ │ └── file_namespace/
│ │ │ └── 1_3_2_1_1_1.yml
│ │ └── static_part_-_guess_by_domain/
│ │ └── 1_3_1_1.yml
│ ├── Infoboxer_Tree_Node/
│ │ └── Infoboxer_MediaWiki_Page/
│ │ └── 1_5_1.yml
│ ├── Infoboxer_Tree_Nodes/
│ │ ├── 2_1.yml
│ │ ├── 2_2.yml
│ │ └── when_interwiki_link/
│ │ ├── 2_3_1.yml
│ │ └── map_url_/
│ │ └── 2_3_2_1.yml
│ ├── Infoboxer_Tree_Wikilink/
│ │ ├── follow/
│ │ │ ├── 1_2_1.yml
│ │ │ ├── text/
│ │ │ │ └── 1_2_3_1.yml
│ │ │ ├── title/
│ │ │ │ └── 1_2_2_1.yml
│ │ │ └── when_interwiki_link/
│ │ │ ├── 1_2_4_1.yml
│ │ │ └── url/
│ │ │ └── 1_2_4_2_1.yml
│ │ └── url/
│ │ └── 1_1_1.yml
│ ├── en_wikipedia_org/
│ │ └── 1_1.yml
│ ├── follow-chile.yml
│ ├── follow-several.yml
│ ├── follow-source-argentine.yml
│ ├── follow-source-argentine2.yml
│ ├── follow-source-forests.yml
│ ├── follow-template.yml
│ └── other-language_Wikipedia/
│ ├── categories/
│ │ ├── 1_2_1.yml
│ │ └── should_include_existing_category.yml
│ └── files/
│ ├── default_prefix/
│ │ └── 1_1_1_1.yml
│ └── localized_prefix/
│ └── 1_1_2_1.yml
├── infoboxer/
│ ├── en.wikipedia.org/
│ │ ├── calc_templates_spec.rb
│ │ └── simple_templates_spec.rb
│ ├── infoboxer_spec.rb
│ ├── integration/
│ │ ├── all_en_templates_spec.rb
│ │ ├── fr_spec.rb
│ │ └── site_traits_spec.rb
│ ├── media_wiki/
│ │ ├── follow_spec.rb
│ │ └── traits_spec.rb
│ ├── media_wiki_spec.rb
│ ├── navigation/
│ │ ├── lookup/
│ │ │ └── selector_spec.rb
│ │ ├── lookup_spec.rb
│ │ ├── sections_spec.rb
│ │ ├── shortcuts_spec.rb
│ │ └── wikipath_spec.rb
│ ├── parser/
│ │ ├── flow_spec.rb
│ │ ├── image_spec.rb
│ │ ├── inline_spec.rb
│ │ ├── paragraphs_spec.rb
│ │ ├── ref_spec.rb
│ │ ├── table_spec.rb
│ │ └── template_spec.rb
│ ├── templates/
│ │ └── set_spec.rb
│ ├── tree/
│ │ ├── inspect_spec.rb
│ │ ├── nodes_spec.rb
│ │ ├── template_spec.rb
│ │ ├── text_spec.rb
│ │ ├── to_tree_spec.rb
│ │ └── wikilink_spec.rb
│ └── wiki_path_spec.rb
└── spec_helper.rb
SYMBOL INDEX (565 symbols across 73 files)
FILE: lib/infoboxer.rb
type Infoboxer (line 48) | module Infoboxer
function wikis (line 68) | def wikis
function wiki (line 73) | def wiki(api_url, **options)
function url_for (line 190) | def url_for(symbol, lang = 'en')
function wikia (line 229) | def wikia(*domains)
function user_agent= (line 241) | def self.user_agent=(ua)
FILE: lib/infoboxer/core_ext.rb
class Object (line 4) | class Object
method itself (line 7) | def itself
FILE: lib/infoboxer/definitions/en.wikipedia.org.rb
type Infoboxer (line 4) | module Infoboxer
function children (line 135) | def children
function children (line 142) | def children
function children (line 148) | def children
function children (line 154) | def children
function children (line 160) | def children
function stub? (line 181) | def stub?
function infobox? (line 187) | def infobox?
function model (line 193) | def model
function lat (line 209) | def lat
function lng (line 222) | def lng
function value1 (line 235) | def value1
function between (line 246) | def between
function value2 (line 250) | def value2
function measure_from (line 254) | def measure_from
function measure_to (line 258) | def measure_to
function text (line 262) | def text
function from (line 268) | def from
function to (line 272) | def to
function value (line 276) | def value
function text (line 280) | def text
function date (line 286) | def date
function text (line 290) | def text
function date (line 297) | def date
function text (line 301) | def text
function text (line 308) | def text
function children (line 320) | def children
function children (line 326) | def children
function children (line 334) | def children
function children (line 342) | def children
function text (line 348) | def text
function text (line 354) | def text
FILE: lib/infoboxer/media_wiki.rb
type Infoboxer (line 9) | module Infoboxer
class MediaWiki (line 24) | class MediaWiki
method initialize (line 57) | def initialize(api_base_url, ua: nil, user_agent: ua)
method raw (line 75) | def raw(*titles, &processor)
method get (line 130) | def get(*titles, interwiki: nil, &processor)
method get_h (line 157) | def get_h(*titles, &processor)
method category (line 178) | def category(title, limit: 'max', &processor)
method search (line 200) | def search(query, limit: 'max', &processor)
method prefixsearch (line 217) | def prefixsearch(prefix, limit: 'max', &processor)
method inspect (line 222) | def inspect
method make_page (line 228) | def make_page(raw_pages, title)
method list (line 234) | def list(query, limit, &processor)
method prepare_request (line 249) | def prepare_request(request)
method normalize_category_title (line 254) | def normalize_category_title(title)
method user_agent (line 263) | def user_agent(custom)
method siteinfo (line 267) | def siteinfo
method interwikis (line 271) | def interwikis(prefix)
FILE: lib/infoboxer/media_wiki/page.rb
type Infoboxer (line 3) | module Infoboxer
class MediaWiki (line 4) | class MediaWiki
class Page (line 10) | class Page < Tree::Document
method initialize (line 11) | def initialize(client, children, source)
method traits (line 34) | def traits
method namespace (line 39) | def namespace
method category? (line 43) | def category?
method show_params (line 51) | def show_params
FILE: lib/infoboxer/media_wiki/traits.rb
type Infoboxer (line 3) | module Infoboxer
class MediaWiki (line 4) | class MediaWiki
class Traits (line 12) | class Traits
method templates (line 19) | def templates(&definition)
method domain (line 28) | def domain(d)
method get (line 37) | def get(domain, site_info = {})
method domains (line 42) | def domains
method for (line 61) | def for(domain, &block)
method initialize (line 70) | def initialize(site_info = {})
method namespace? (line 74) | def namespace?(prefix)
method interwiki? (line 78) | def interwiki?(prefix)
method file_namespace (line 83) | def file_namespace
method category_namespace (line 88) | def category_namespace
method templates (line 93) | def templates
method known_namespaces (line 99) | def known_namespaces
method known_interwikis (line 108) | def known_interwikis
method ns_aliases (line 117) | def ns_aliases(base)
FILE: lib/infoboxer/navigation.rb
type Infoboxer (line 3) | module Infoboxer
type Navigation (line 77) | module Navigation
class Tree::Node (line 82) | class Tree::Node
class Tree::Nodes (line 89) | class Tree::Nodes
class Tree::Document (line 96) | class Tree::Document
type Helpers (line 100) | module Helpers
function W (line 101) | def W(*arg, &block) # rubocop:disable Naming/MethodName
FILE: lib/infoboxer/navigation/lookup.rb
type Infoboxer (line 5) | module Infoboxer
type Navigation (line 6) | module Navigation
type Lookup (line 8) | module Lookup
type Node (line 76) | module Node
function _matches? (line 106) | def _matches?(selector)
function _lookup (line 111) | def _lookup(selector)
function _lookup_children (line 117) | def _lookup_children(selector)
function _lookup_parents (line 122) | def _lookup_parents(selector)
function _lookup_siblings (line 134) | def _lookup_siblings(selector)
function _lookup_prev_siblings (line 139) | def _lookup_prev_siblings(selector)
function _lookup_prev_sibling (line 144) | def _lookup_prev_sibling(selector)
function _lookup_next_siblings (line 149) | def _lookup_next_siblings(selector)
function parent? (line 168) | def parent?(*selectors, &block)
type Nodes (line 180) | module Nodes
function _find (line 196) | def _find(selector)
function find (line 202) | def find(*selectors, &block)
FILE: lib/infoboxer/navigation/sections.rb
type Infoboxer (line 3) | module Infoboxer
type Navigation (line 4) | module Navigation
type Sections (line 27) | module Sections
type Container (line 34) | module Container
function intro (line 38) | def intro
function sections (line 65) | def sections(*names)
function subsections (line 84) | def subsections(*names)
function lookup_children (line 97) | def lookup_children(*arg)
function make_sections (line 107) | def make_sections
type Node (line 134) | module Node
function in_sections (line 139) | def in_sections
type Nodes (line 165) | module Nodes
function lookup_children (line 175) | def lookup_children(*arg)
class Section (line 188) | class Section < Tree::Compound
method initialize (line 189) | def initialize(heading, children = Tree::Nodes[])
method push_children (line 202) | def push_children(*nodes)
method empty? (line 208) | def empty?
method inspect (line 212) | def inspect
FILE: lib/infoboxer/navigation/selector.rb
type Infoboxer (line 3) | module Infoboxer
type Navigation (line 4) | module Navigation
type Lookup (line 5) | module Lookup
class Selector (line 9) | class Selector
method initialize (line 10) | def initialize(*arg, &block)
method == (line 19) | def ==(other)
method inspect (line 23) | def inspect
method === (line 27) | def ===(other)
method sym_to_class (line 33) | def sym_to_class(a)
method arg_matches? (line 41) | def arg_matches?(check, node)
method value_matches? (line 57) | def value_matches?(matcher, value)
FILE: lib/infoboxer/navigation/shortcuts.rb
type Infoboxer (line 3) | module Infoboxer
type Navigation (line 4) | module Navigation
type Shortcuts (line 6) | module Shortcuts
type Node (line 16) | module Node
function wikilinks (line 24) | def wikilinks(namespace = '')
function headings (line 32) | def headings(level = nil)
function paragraphs (line 41) | def paragraphs(*selectors, &block)
function external_links (line 49) | def external_links(*selectors, &block)
function images (line 57) | def images(*selectors, &block)
function templates (line 65) | def templates(*selectors, &block)
function tables (line 73) | def tables(*selectors, &block)
function lists (line 81) | def lists(*selectors, &block)
function bold? (line 86) | def bold?
function italic? (line 91) | def italic?
function heading? (line 98) | def heading?(level = nil)
function infoboxes (line 109) | def infoboxes(*selectors, &block)
function categories (line 121) | def categories
function infobox (line 126) | def infobox
function ensure_traits (line 132) | def ensure_traits
function ensure_page (line 136) | def ensure_page
type Nodes (line 146) | module Nodes
FILE: lib/infoboxer/navigation/wikipath.rb
type Infoboxer (line 5) | module Infoboxer
type Navigation (line 6) | module Navigation
type Wikipath (line 7) | module Wikipath
function wikipath (line 27) | def wikipath(string)
FILE: lib/infoboxer/parser.rb
type Infoboxer (line 6) | module Infoboxer
class Parser (line 7) | class Parser
class ParsingError (line 8) | class ParsingError < RuntimeError
method inline (line 12) | def inline(text, traits = nil)
method paragraphs (line 16) | def paragraphs(text, traits = nil)
method paragraph (line 20) | def paragraph(text, traits = nil)
method document (line 24) | def document(text, traits = nil)
method fragment (line 28) | def fragment(text, traits = nil)
method context (line 34) | def context(text, traits)
method coerce_traits (line 38) | def coerce_traits(traits)
method initialize (line 54) | def initialize(context)
method log (line 71) | def log(msg)
FILE: lib/infoboxer/parser/context.rb
type Infoboxer (line 5) | module Infoboxer
class Parser (line 6) | class Parser
class Context (line 7) | class Context # rubocop:disable Metrics/ClassLength
method initialize (line 11) | def initialize(text, traits = nil)
method colno (line 23) | def colno
method matched (line 27) | def matched
method eat_matched? (line 32) | def eat_matched?(str)
method rest (line 39) | def rest
method next! (line 46) | def next!
method prev! (line 50) | def prev!
method eof? (line 54) | def eof?
method inspect (line 59) | def inspect
method scan (line 64) | def scan(re)
method check (line 71) | def check(re)
method skip (line 78) | def skip(re)
method scan_until (line 85) | def scan_until(re, leave_pattern = false)
method push_eol_sign (line 93) | def push_eol_sign(re)
method pop_eol_sign (line 97) | def pop_eol_sign
method inline_eol? (line 103) | def inline_eol?(exclude = nil)
method scan_continued_until (line 112) | def scan_continued_until(re, leave_pattern = false)
method matched_inline? (line 133) | def matched_inline?(re)
method matched? (line 143) | def matched?(re)
method eol? (line 147) | def eol?
method fail! (line 152) | def fail!(text)
method unscan_matched! (line 156) | def unscan_matched!
method _scan_until (line 166) | def _scan_until(re)
method guard_eof! (line 173) | def guard_eof!
method shift (line 177) | def shift(amount)
FILE: lib/infoboxer/parser/html.rb
type Infoboxer (line 3) | module Infoboxer
class Parser (line 4) | class Parser
type HTML (line 5) | module HTML
function html (line 8) | def html
function html_closing_tag (line 24) | def html_closing_tag
function html_br (line 31) | def html_br
function html_auto_closing_tag (line 36) | def html_auto_closing_tag
function html_opening_tag (line 43) | def html_opening_tag
FILE: lib/infoboxer/parser/image.rb
type Infoboxer (line 3) | module Infoboxer
class Parser (line 4) | class Parser
type Image (line 5) | module Image
function image (line 8) | def image
function image_attrs (line 17) | def image_attrs
function image_attr (line 30) | def image_attr(nodes)
FILE: lib/infoboxer/parser/inline.rb
type Infoboxer (line 3) | module Infoboxer
class Parser (line 4) | class Parser
type Inline (line 5) | module Inline
function inline (line 8) | def inline(until_pattern = nil)
function short_inline (line 34) | def short_inline(until_pattern = nil)
function long_inline (line 58) | def long_inline(until_pattern = nil)
function inline_formatting (line 88) | def inline_formatting(match) # rubocop:disable Metrics/MethodLengt...
function wikilink (line 126) | def wikilink
function external_link (line 150) | def external_link(protocol)
function reference (line 160) | def reference(param_str, closed = false)
function math (line 165) | def math
function nowiki (line 169) | def nowiki(tag_rest)
function gallery (line 177) | def gallery(tag_rest)
function gallery_image_attrs (line 193) | def gallery_image_attrs
FILE: lib/infoboxer/parser/paragraphs.rb
type Infoboxer (line 3) | module Infoboxer
class Parser (line 4) | class Parser
type Paragraphs (line 5) | module Paragraphs
function paragraphs (line 8) | def paragraphs(until_pattern = nil)
function paragraph (line 22) | def paragraph(until_pattern)
function heading (line 42) | def heading(text, level)
function list (line 47) | def list(until_pattern)
function pre (line 53) | def pre(until_pattern)
FILE: lib/infoboxer/parser/table.rb
type Infoboxer (line 3) | module Infoboxer
class Parser (line 4) | class Parser
type Table (line 6) | module Table
function table (line 9) | def table
function table_params (line 33) | def table_params
function table_next_line (line 38) | def table_next_line(table)
function table_row (line 66) | def table_row(table, param_str)
function table_caption (line 71) | def table_caption(table)
function table_cells (line 89) | def table_cells(table, cell_class = TableCell)
function table_template (line 107) | def table_template(table)
function table_cell_cont (line 127) | def table_cell_cont(table)
FILE: lib/infoboxer/parser/template.rb
type Infoboxer (line 3) | module Infoboxer
class Parser (line 4) | class Parser
type Template (line 5) | module Template
function template (line 13) | def template
function template_vars (line 24) | def template_vars
function sanitize_value (line 55) | def sanitize_value(nodes)
FILE: lib/infoboxer/parser/util.rb
type Infoboxer (line 3) | module Infoboxer
class Parser (line 4) | class Parser
type Util (line 5) | module Util
function make_regexps (line 40) | def make_regexps
function parse_params (line 59) | def parse_params(str)
function guarded_loop (line 80) | def guarded_loop
FILE: lib/infoboxer/templates.rb
type Infoboxer (line 3) | module Infoboxer
type Templates (line 42) | module Templates
FILE: lib/infoboxer/templates/base.rb
type Infoboxer (line 3) | module Infoboxer
type Templates (line 4) | module Templates
class Base (line 5) | class Base < Tree::Template
method inspect (line 11) | def inspect
method clean_name (line 15) | def clean_name
method == (line 20) | def ==(other)
method clean_class (line 26) | def clean_class
class Show (line 39) | class Show < Base
method text (line 40) | def text
method children_separator (line 46) | def children_separator
class Replace (line 54) | class Replace < Base
method replace (line 55) | def replace
method text (line 59) | def text
class Literal (line 67) | class Literal < Base
FILE: lib/infoboxer/templates/set.rb
type Infoboxer (line 3) | module Infoboxer
type Templates (line 4) | module Templates
class Set (line 16) | class Set
method initialize (line 17) | def initialize(&definitions)
method find (line 23) | def find(name)
method define (line 29) | def define(&definitions)
method clear (line 34) | def clear
method template (line 82) | def template(name, options = {}, &definition)
method replace (line 102) | def replace(*replacements)
method show (line 140) | def show(*names)
method literal (line 157) | def literal(*names)
method setup_class (line 164) | def setup_class(name, base_class, options = {}, &definition)
FILE: lib/infoboxer/tree.rb
type Infoboxer (line 3) | module Infoboxer
type Tree (line 61) | module Tree
FILE: lib/infoboxer/tree/compound.rb
type Infoboxer (line 3) | module Infoboxer
type Tree (line 4) | module Tree
class Compound (line 6) | class Compound < Node
method initialize (line 7) | def initialize(children = Nodes.new, **params)
method index_of (line 21) | def index_of(child)
method push_children (line 27) | def push_children(*nodes)
method text (line 34) | def text
method to_tree (line 39) | def to_tree(level = 0)
method can_merge? (line 51) | def can_merge?(_other)
method closed! (line 57) | def closed!
method closed? (line 63) | def closed?
method empty? (line 69) | def empty?
method children_separator (line 75) | def children_separator
method _eq (line 81) | def _eq(other)
FILE: lib/infoboxer/tree/document.rb
type Infoboxer (line 3) | module Infoboxer
type Tree (line 4) | module Tree
class Document (line 9) | class Document < Compound
FILE: lib/infoboxer/tree/gallery.rb
type Infoboxer (line 3) | module Infoboxer
type Tree (line 4) | module Tree
class Gallery (line 9) | class Gallery < Compound
FILE: lib/infoboxer/tree/html.rb
type Infoboxer (line 3) | module Infoboxer
type Tree (line 4) | module Tree
type HTMLTagCommons (line 5) | module HTMLTagCommons
function text (line 8) | def text
class HTMLTag (line 14) | class HTMLTag < Compound
method initialize (line 15) | def initialize(tag, attrs, children = Nodes.new)
method empty? (line 27) | def empty?
method descr (line 34) | def descr
class HTMLOpeningTag (line 46) | class HTMLOpeningTag < Node
method initialize (line 47) | def initialize(tag, attrs)
method descr (line 59) | def descr
class HTMLClosingTag (line 66) | class HTMLClosingTag < Node
method initialize (line 67) | def initialize(tag)
method descr (line 73) | def descr
FILE: lib/infoboxer/tree/image.rb
type Infoboxer (line 3) | module Infoboxer
type Tree (line 4) | module Tree
class Image (line 9) | class Image < Node
method initialize (line 10) | def initialize(path, caption: nil, **params)
method border? (line 31) | def border?
method width (line 35) | def width
method height (line 39) | def height
method to_tree (line 43) | def to_tree(level = 0)
method _eq (line 55) | def _eq(other)
class ImageCaption (line 61) | class ImageCaption < Compound
FILE: lib/infoboxer/tree/inline.rb
type Infoboxer (line 3) | module Infoboxer
type Tree (line 4) | module Tree
class Italic (line 6) | class Italic < Compound
class Bold (line 10) | class Bold < Compound
class BoldItalic (line 15) | class BoldItalic < Compound
class Link (line 19) | class Link < Compound
method initialize (line 20) | def initialize(link, label = nil, **attr)
class ExternalLink (line 30) | class ExternalLink < Link
FILE: lib/infoboxer/tree/linkable.rb
type Infoboxer (line 3) | module Infoboxer
type Tree (line 4) | module Tree
type Linkable (line 8) | module Linkable
function follow (line 19) | def follow
function url (line 26) | def url
function interwiki (line 34) | def interwiki; end
function page (line 36) | def page
function client (line 40) | def client
FILE: lib/infoboxer/tree/list.rb
type Infoboxer (line 3) | module Infoboxer
type Tree (line 4) | module Tree
class ListItem (line 6) | class ListItem < BaseParagraph
method can_merge? (line 9) | def can_merge?(other)
method merge! (line 16) | def merge!(other)
method text (line 23) | def text
method make_marker (line 34) | def make_marker
class List (line 55) | class List < Compound
method list_level (line 56) | def list_level
method list_text_indent (line 60) | def list_text_indent
method text (line 64) | def text
method merge! (line 119) | def merge!(other)
method construct (line 132) | def self.construct(marker, nodes)
class UnorderedList (line 74) | class UnorderedList < List
method make_marker (line 75) | def make_marker(_item)
class OrderedList (line 81) | class OrderedList < List
method make_marker (line 82) | def make_marker(item)
class DefinitionList (line 92) | class DefinitionList < List
method make_marker (line 93) | def make_marker(item)
class DTerm (line 104) | class DTerm < ListItem
method text (line 105) | def text
class DDefinition (line 111) | class DDefinition < ListItem
class List (line 114) | class List < Compound
method list_level (line 56) | def list_level
method list_text_indent (line 60) | def list_text_indent
method text (line 64) | def text
method merge! (line 119) | def merge!(other)
method construct (line 132) | def self.construct(marker, nodes)
FILE: lib/infoboxer/tree/math.rb
type Infoboxer (line 3) | module Infoboxer
type Tree (line 4) | module Tree
class Math (line 8) | class Math < Text
FILE: lib/infoboxer/tree/node.rb
type Infoboxer (line 5) | module Infoboxer
type Tree (line 6) | module Tree
class Node (line 13) | class Node
method initialize (line 14) | def initialize(**params)
method == (line 34) | def ==(other)
method index (line 39) | def index
method first? (line 43) | def first?
method siblings (line 48) | def siblings
method prev_siblings (line 53) | def prev_siblings
method next_siblings (line 58) | def next_siblings
method children (line 63) | def children
method can_merge? (line 69) | def can_merge?(_other)
method empty? (line 76) | def empty?
method to_tree (line 96) | def to_tree(level = 0)
method inspect (line 100) | def inspect
method text (line 118) | def text
method text_ (line 125) | def text_
method to_s (line 130) | def to_s
method shorten_text (line 139) | def shorten_text
method clean_class (line 144) | def clean_class
method descr (line 148) | def descr
method show_params (line 156) | def show_params(prms = nil)
method indent (line 160) | def indent(level)
method _eq (line 164) | def _eq(_other)
method decode (line 168) | def decode(str)
method def_readers (line 174) | def def_readers(*keys)
method coder (line 181) | def coder
FILE: lib/infoboxer/tree/nodes.rb
type Infoboxer (line 3) | module Infoboxer
type Tree (line 4) | module Tree
class Nodes (line 25) | class Nodes < Array
method first (line 68) | def first(n = nil)
method last (line 77) | def last(n = nil)
method map (line 86) | def map
method flat_map (line 96) | def flat_map
method group_by (line 106) | def group_by
method fetch_hashes (line 139) | def fetch_hashes(*args)
method to_tree (line 144) | def to_tree
method inspect (line 148) | def inspect
method text (line 160) | def text
method unwrap (line 166) | def unwrap
method follow (line 179) | def follow
method << (line 193) | def <<(node) # rubocop:disable Metrics/PerceivedComplexity
method strip (line 208) | def strip
method flow_templates (line 217) | def flow_templates
method make_nodes (line 228) | def make_nodes(arr)
FILE: lib/infoboxer/tree/paragraphs.rb
type Infoboxer (line 3) | module Infoboxer
type Tree (line 4) | module Tree
class BaseParagraph (line 12) | class BaseParagraph < Compound
method text (line 13) | def text
class EmptyParagraph (line 19) | class EmptyParagraph < Node
method initialize (line 20) | def initialize(text)
method empty? (line 25) | def empty?
type Mergeable (line 33) | module Mergeable
function can_merge? (line 34) | def can_merge?(other)
function merge! (line 38) | def merge!(other)
class MergeableParagraph (line 52) | class MergeableParagraph < BaseParagraph
method can_merge? (line 55) | def can_merge?(other)
class Paragraph (line 62) | class Paragraph < MergeableParagraph
method splitter (line 65) | def splitter
method templates_only? (line 71) | def templates_only?
method to_templates (line 77) | def to_templates
method to_templates? (line 83) | def to_templates?
class HR (line 89) | class HR < Node
class Heading (line 96) | class Heading < BaseParagraph
method initialize (line 97) | def initialize(children, level)
class Pre (line 109) | class Pre < MergeableParagraph
method merge! (line 112) | def merge!(other)
method splitter (line 122) | def splitter
FILE: lib/infoboxer/tree/ref.rb
type Infoboxer (line 3) | module Infoboxer
type Tree (line 4) | module Tree
class Ref (line 20) | class Ref < Compound
method empty? (line 26) | def empty?
method text (line 31) | def text
FILE: lib/infoboxer/tree/table.rb
type Infoboxer (line 5) | module Infoboxer
type Tree (line 6) | module Tree
class Table (line 8) | class Table < Compound
method empty? (line 10) | def empty?
method rows (line 15) | def rows
method caption (line 20) | def caption
method heading_row (line 28) | def heading_row
method body_rows (line 33) | def body_rows
method text (line 41) | def text
class TableRow (line 51) | class TableRow < Compound
method empty? (line 54) | def empty?
class BaseCell (line 63) | class BaseCell < Compound
method empty? (line 64) | def empty?
class TableCell (line 70) | class TableCell < BaseCell
class TableHeading (line 74) | class TableHeading < BaseCell
class TableCaption (line 78) | class TableCaption < Compound
FILE: lib/infoboxer/tree/template.rb
type Infoboxer (line 5) | module Infoboxer
type Tree (line 6) | module Tree
class Var (line 11) | class Var < Compound
method initialize (line 14) | def initialize(name, children = Nodes[])
method empty? (line 21) | def empty?
method named? (line 25) | def named?
method descr (line 31) | def descr
method _eq (line 35) | def _eq(other)
class Template (line 98) | class Template < Compound
method initialize (line 116) | def initialize(name, variables = Nodes[])
method text (line 121) | def text
method unwrap (line 126) | def unwrap
method to_tree (line 131) | def to_tree(level = 0)
method to_h (line 141) | def to_h
method unnamed_variables (line 150) | def unnamed_variables
method named_variables (line 154) | def named_variables
method fetch (line 170) | def fetch(*patterns)
method fetch_hash (line 177) | def fetch_hash(*patterns)
method fetch_date (line 195) | def fetch_date(*patterns)
method link (line 234) | def link
method empty? (line 241) | def empty?
method _eq (line 247) | def _eq(other)
method clean_class (line 251) | def clean_class
method extract_params (line 255) | def extract_params(vars)
method inspect_variables (line 261) | def inspect_variables(depth)
FILE: lib/infoboxer/tree/text.rb
type Infoboxer (line 3) | module Infoboxer
type Tree (line 4) | module Tree
class Text (line 14) | class Text < Node
method initialize (line 18) | def initialize(text, **params)
method text (line 24) | def text
method to_tree (line 29) | def to_tree(level = 0)
method can_merge? (line 35) | def can_merge?(other)
method merge! (line 41) | def merge!(other)
method empty? (line 53) | def empty?
method _eq (line 59) | def _eq(other)
FILE: lib/infoboxer/tree/wikilink.rb
type Infoboxer (line 5) | module Infoboxer
type Tree (line 6) | module Tree
class Wikilink (line 14) | class Wikilink < Link
method initialize (line 15) | def initialize(link, label = nil, namespace: nil, interwiki: nil)
method parse_name! (line 58) | def parse_name!
method parse_topic! (line 67) | def parse_topic!
FILE: lib/infoboxer/version.rb
type Infoboxer (line 3) | module Infoboxer
FILE: lib/infoboxer/wiki_path.rb
type Infoboxer (line 3) | module Infoboxer
class WikiPath (line 5) | class WikiPath
method _parse (line 9) | def _parse(string)
method parse (line 19) | def parse(string)
method scan_step (line 25) | def scan_step(scanner) # rubocop:disable Metrics/PerceivedComplexity
method process_value (line 45) | def process_value(value)
method process_type (line 56) | def process_type(type)
method valid_type? (line 61) | def valid_type?(t)
method unexpected (line 65) | def unexpected(scanner, expected)
method initialize (line 71) | def initialize(path)
method call (line 75) | def call(node)
method apply_step (line 81) | def apply_step(node, step)
FILE: spec/infoboxer/en.wikipedia.org/calc_templates_spec.rb
type Infoboxer (line 1) | module Infoboxer
FILE: spec/infoboxer/en.wikipedia.org/simple_templates_spec.rb
type Infoboxer (line 1) | module Infoboxer
function parse (line 5) | def parse(wikitext)
function w2t (line 9) | def w2t(wikitext)
FILE: spec/infoboxer/integration/all_en_templates_spec.rb
type Infoboxer (line 1) | module Infoboxer
FILE: spec/infoboxer/integration/fr_spec.rb
type Infoboxer (line 1) | module Infoboxer
FILE: spec/infoboxer/integration/site_traits_spec.rb
type Infoboxer (line 1) | module Infoboxer
FILE: spec/infoboxer/media_wiki/follow_spec.rb
type Infoboxer (line 1) | module Infoboxer
FILE: spec/infoboxer/media_wiki/traits_spec.rb
type Infoboxer (line 1) | module Infoboxer
function text (line 23) | def text
FILE: spec/infoboxer/media_wiki_spec.rb
type Infoboxer (line 1) | module Infoboxer
FILE: spec/infoboxer/navigation/lookup/selector_spec.rb
type Infoboxer (line 1) | module Infoboxer
FILE: spec/infoboxer/navigation/lookup_spec.rb
type Infoboxer (line 1) | module Infoboxer
function first_list_item? (line 63) | def first_list_item?
FILE: spec/infoboxer/navigation/sections_spec.rb
type Infoboxer (line 1) | module Infoboxer
FILE: spec/infoboxer/navigation/shortcuts_spec.rb
type Infoboxer (line 1) | module Infoboxer
FILE: spec/infoboxer/navigation/wikipath_spec.rb
type Infoboxer (line 1) | module Infoboxer
FILE: spec/infoboxer/parser/flow_spec.rb
type Infoboxer (line 3) | module Infoboxer
FILE: spec/infoboxer/parser/image_spec.rb
type Infoboxer (line 3) | module Infoboxer
FILE: spec/infoboxer/parser/inline_spec.rb
type Infoboxer (line 3) | module Infoboxer
FILE: spec/infoboxer/parser/paragraphs_spec.rb
type Infoboxer (line 3) | module Infoboxer
FILE: spec/infoboxer/parser/ref_spec.rb
type Infoboxer (line 3) | module Infoboxer
FILE: spec/infoboxer/parser/table_spec.rb
type Infoboxer (line 3) | module Infoboxer
FILE: spec/infoboxer/parser/template_spec.rb
type Infoboxer (line 3) | module Infoboxer
FILE: spec/infoboxer/templates/set_spec.rb
type Infoboxer (line 1) | module Infoboxer
function city_names (line 7) | def city_names
function infobox? (line 13) | def infobox?
function children (line 21) | def children
function text (line 25) | def text
FILE: spec/infoboxer/tree/inspect_spec.rb
type Infoboxer (line 1) | module Infoboxer
type Tree (line 2) | module Tree
FILE: spec/infoboxer/tree/nodes_spec.rb
type Infoboxer (line 1) | module Infoboxer
type Tree (line 2) | module Tree
FILE: spec/infoboxer/tree/template_spec.rb
type Infoboxer (line 1) | module Infoboxer
type Tree (line 2) | module Tree
FILE: spec/infoboxer/tree/text_spec.rb
type Infoboxer (line 1) | module Infoboxer
type Tree (line 2) | module Tree
FILE: spec/infoboxer/tree/to_tree_spec.rb
type Infoboxer (line 1) | module Infoboxer
type Tree (line 2) | module Tree
FILE: spec/infoboxer/tree/wikilink_spec.rb
type Infoboxer (line 1) | module Infoboxer
type Tree (line 2) | module Tree
FILE: spec/infoboxer/wiki_path_spec.rb
type Infoboxer (line 3) | module Infoboxer
FILE: spec/spec_helper.rb
function unindent (line 21) | def unindent(text)
type WebMock (line 32) | module WebMock
class Util::HashCounter (line 33) | class Util::HashCounter
method ordered_keys (line 34) | def ordered_keys
function requests (line 39) | def self.requests
function last_request (line 43) | def self.last_request
Copy disabled (too large)
Download .json
Condensed preview — 213 files, each showing path, character count, and a content snippet. Download the .json file for the full structured content (45,375K chars).
[
{
"path": ".codeclimate.yml",
"chars": 79,
"preview": "exclude_paths:\n - lib/infoboxer/definitions/en.wikipedia.org.rb # I know, ok?\n"
},
{
"path": ".dokaz",
"chars": 34,
"preview": "--require ./spec/dokaz_helpers.rb\n"
},
{
"path": ".github/workflows/ci.yml",
"chars": 676,
"preview": "name: CI\n\non:\n push:\n branches: [ master ]\n pull_request:\n branches: [ master ]\n\njobs:\n main:\n name: >-\n "
},
{
"path": ".gitignore",
"chars": 126,
"preview": ".bundle\nvendor\ndeprecated\nTODO.txt\ntmp\nexamples\nprofile/out/*.html\nrubocop\n.yardoc\n*.gem\n.coveralls.yml\ncoverage\ndoc\n.by"
},
{
"path": ".rspec",
"chars": 40,
"preview": "--require=./spec/spec_helper.rb\n--color\n"
},
{
"path": ".rubocop.yml",
"chars": 3441,
"preview": "inherit_from: .rubocop_todo.yml\nrequire: rubocop-rspec\n\nAllCops:\n Include:\n - 'lib/**/*'\n Exclude:\n - 'bin/*'\n "
},
{
"path": ".rubocop_todo.yml",
"chars": 1,
"preview": "\n"
},
{
"path": ".travis.yml",
"chars": 286,
"preview": "cache: bundler\nlanguage: ruby\nrvm:\n - \"2.6\"\n - \"2.7\"\n - \"3.0\"\n #- jruby-19mode # Due to https://github.com/jruby/jru"
},
{
"path": ".yardopts",
"chars": 59,
"preview": "--markup=markdown\n--markup-provider=redcarpet\n--no-private\n"
},
{
"path": "CHANGELOG.md",
"chars": 4612,
"preview": "# Infoboxer's change log\n\n## 0.4.0 (2021-05-30)\n\n* A cluster of bugs found in #81 fixed:\n * Empty comment (`<!---->`) n"
},
{
"path": "CONTRIBUTING.md",
"chars": 2872,
"preview": "# Contributing to Infoboxer\n\n_(Also duplicated in [wiki](https://github.com/molybdenum-99/infoboxer/wiki/Contributing).)"
},
{
"path": "Gemfile",
"chars": 691,
"preview": "source 'https://rubygems.org'\n\ngemspec\n\n# gem 'mediawiktory', github: 'molybdenum-99/mediawiktory', branch: 'develop'\n\ng"
},
{
"path": "LICENSE.txt",
"chars": 1094,
"preview": "The MIT License (MIT)\n\nCopyright (c) 2014-15 Victor 'Zverok' Shepelev\n\nPermission is hereby granted, free of charge, to "
},
{
"path": "Parsing.md",
"chars": 2075,
"preview": "Parsing Wikipedia is not an easy tasks. Some tags and formattings signs\ncan be only after newline, some can be everywher"
},
{
"path": "README.md",
"chars": 6105,
"preview": "# Infoboxer\n\n[](http://badge.fury.io/rb/infoboxer)\n![Build Status]"
},
{
"path": "Rakefile",
"chars": 1628,
"preview": "# coding: utf-8\nrequire 'bundler/setup'\nrequire 'infoboxer'\nrequire 'rubygems/tasks'\nGem::Tasks.new\n\nrequire 'yard-junk/"
},
{
"path": "bin/infoboxer",
"chars": 1163,
"preview": "#!/usr/bin/env ruby\nrequire 'rubygems'\nrequire 'infoboxer'\n\ninclude Infoboxer\n\nrequire 'optparse'\n\nwiki_url = nil\n\nOptio"
},
{
"path": "infoboxer.gemspec",
"chars": 988,
"preview": "require './lib/infoboxer/version'\n\nGem::Specification.new do |s|\n s.name = 'infoboxer'\n s.version = Infoboxer::VE"
},
{
"path": "lib/infoboxer/core_ext.rb",
"chars": 176,
"preview": "# frozen_string_literal: true\n\n# @private\nclass Object\n # Unfortunately, not in backports gem still :(\n if RUBY_VERSIO"
},
{
"path": "lib/infoboxer/definitions/en.wikipedia.org.rb",
"chars": 9897,
"preview": "# frozen_string_literal: true\n\n# rubocop:disable Layout/EmptyLinesAroundArguments\nmodule Infoboxer\n MediaWiki::Traits.f"
},
{
"path": "lib/infoboxer/media_wiki/page.rb",
"chars": 1415,
"preview": "# frozen_string_literal: true\n\nmodule Infoboxer\n class MediaWiki\n # A descendant of {Tree::Document Document}, repre"
},
{
"path": "lib/infoboxer/media_wiki/traits.rb",
"chars": 4513,
"preview": "# frozen_string_literal: true\n\nmodule Infoboxer\n class MediaWiki\n # DSL for defining \"traits\" for some site.\n #\n "
},
{
"path": "lib/infoboxer/media_wiki.rb",
"chars": 11885,
"preview": "# frozen_string_literal: true\n\nrequire 'mediawiktory'\nrequire 'addressable/uri'\n\nrequire_relative 'media_wiki/traits'\nre"
},
{
"path": "lib/infoboxer/navigation/lookup.rb",
"chars": 8167,
"preview": "# frozen_string_literal: true\n\nrequire_relative 'selector'\n\nmodule Infoboxer\n module Navigation\n # See {Lookup::Node"
},
{
"path": "lib/infoboxer/navigation/sections.rb",
"chars": 6742,
"preview": "# frozen_string_literal: true\n\nmodule Infoboxer\n module Navigation\n # `Sections` module provides logical view on doc"
},
{
"path": "lib/infoboxer/navigation/selector.rb",
"chars": 1728,
"preview": "# frozen_string_literal: true\n\nmodule Infoboxer\n module Navigation\n module Lookup\n # Incapsulates storage of se"
},
{
"path": "lib/infoboxer/navigation/shortcuts.rb",
"chars": 5899,
"preview": "# frozen_string_literal: true\n\nmodule Infoboxer\n module Navigation\n # See {Shortcuts::Node Shortcuts::Node} for ever"
},
{
"path": "lib/infoboxer/navigation/wikipath.rb",
"chars": 1145,
"preview": "# frozen_string_literal: true\n\nrequire_relative '../wiki_path'\n\nmodule Infoboxer\n module Navigation\n module Wikipath"
},
{
"path": "lib/infoboxer/navigation.rb",
"chars": 3307,
"preview": "# frozen_string_literal: true\n\nmodule Infoboxer\n # Navigation is one of the things Infoboxer is proud about. It tries\n "
},
{
"path": "lib/infoboxer/parser/context.rb",
"chars": 4098,
"preview": "# frozen_string_literal: true\n\nrequire 'strscan'\n\nmodule Infoboxer\n class Parser\n class Context # rubocop:disable Me"
},
{
"path": "lib/infoboxer/parser/html.rb",
"chars": 1384,
"preview": "# frozen_string_literal: true\n\nmodule Infoboxer\n class Parser\n module HTML\n include Tree\n\n def html\n "
},
{
"path": "lib/infoboxer/parser/image.rb",
"chars": 1537,
"preview": "# frozen_string_literal: true\n\nmodule Infoboxer\n class Parser\n module Image\n include Tree\n\n def image\n "
},
{
"path": "lib/infoboxer/parser/inline.rb",
"chars": 6153,
"preview": "# frozen_string_literal: true\n\nmodule Infoboxer\n class Parser\n module Inline\n include Tree\n\n def inline(un"
},
{
"path": "lib/infoboxer/parser/paragraphs.rb",
"chars": 1732,
"preview": "# frozen_string_literal: true\n\nmodule Infoboxer\n class Parser\n module Paragraphs\n include Tree\n\n def parag"
},
{
"path": "lib/infoboxer/parser/table.rb",
"chars": 4618,
"preview": "# frozen_string_literal: true\n\nmodule Infoboxer\n class Parser\n # http://en.wikipedia.org/wiki/Help:Table\n module "
},
{
"path": "lib/infoboxer/parser/template.rb",
"chars": 1767,
"preview": "# frozen_string_literal: true\n\nmodule Infoboxer\n class Parser\n module Template\n include Tree\n\n # NB: here "
},
{
"path": "lib/infoboxer/parser/util.rb",
"chars": 2948,
"preview": "# frozen_string_literal: true\n\nmodule Infoboxer\n class Parser\n module Util\n attr_reader :re\n\n FORMATTING ="
},
{
"path": "lib/infoboxer/parser.rb",
"chars": 1648,
"preview": "# frozen_string_literal: true\n\nrequire 'ostruct'\nrequire 'logger'\n\nmodule Infoboxer\n class Parser\n class ParsingErro"
},
{
"path": "lib/infoboxer/templates/base.rb",
"chars": 1452,
"preview": "# frozen_string_literal: true\n\nmodule Infoboxer\n module Templates\n class Base < Tree::Template\n include Tree\n\n "
},
{
"path": "lib/infoboxer/templates/set.rb",
"chars": 5401,
"preview": "# frozen_string_literal: true\n\nmodule Infoboxer\n module Templates\n # Base class for defining set of templates, used "
},
{
"path": "lib/infoboxer/templates.rb",
"chars": 2167,
"preview": "# frozen_string_literal: true\n\nmodule Infoboxer\n # This module covers advanced MediaWiki templates usage.\n #\n # It is"
},
{
"path": "lib/infoboxer/tree/compound.rb",
"chars": 1814,
"preview": "# frozen_string_literal: true\n\nmodule Infoboxer\n module Tree\n # Base class for all nodes with children.\n class Co"
},
{
"path": "lib/infoboxer/tree/document.rb",
"chars": 254,
"preview": "# frozen_string_literal: true\n\nmodule Infoboxer\n module Tree\n # Represents entire document.\n #\n # Alongside wi"
},
{
"path": "lib/infoboxer/tree/gallery.rb",
"chars": 305,
"preview": "# frozen_string_literal: true\n\nmodule Infoboxer\n module Tree\n # Represents gallery of images (contents of `<gallery>"
},
{
"path": "lib/infoboxer/tree/html.rb",
"chars": 1725,
"preview": "# frozen_string_literal: true\n\nmodule Infoboxer\n module Tree\n module HTMLTagCommons\n BLOCK_TAGS = %w[div p br]."
},
{
"path": "lib/infoboxer/tree/image.rb",
"chars": 1414,
"preview": "# frozen_string_literal: true\n\nmodule Infoboxer\n module Tree\n # Represents image (or other media file).\n #\n # "
},
{
"path": "lib/infoboxer/tree/inline.rb",
"chars": 860,
"preview": "# frozen_string_literal: true\n\nmodule Infoboxer\n module Tree\n # Represents italic text.\n class Italic < Compound\n"
},
{
"path": "lib/infoboxer/tree/linkable.rb",
"chars": 1167,
"preview": "# frozen_string_literal: true\n\nmodule Infoboxer\n module Tree\n # Module included into everything, that can be treated"
},
{
"path": "lib/infoboxer/tree/list.rb",
"chars": 3799,
"preview": "# frozen_string_literal: true\n\nmodule Infoboxer\n module Tree\n # Represents item of ordered or unordered list.\n cl"
},
{
"path": "lib/infoboxer/tree/math.rb",
"chars": 235,
"preview": "# frozen_string_literal: true\n\nmodule Infoboxer\n module Tree\n # Represents node of math formulae marked with TeX\n "
},
{
"path": "lib/infoboxer/tree/node.rb",
"chars": 4717,
"preview": "# frozen_string_literal: true\n\nrequire 'htmlentities'\n\nmodule Infoboxer\n module Tree\n # This is the base class for a"
},
{
"path": "lib/infoboxer/tree/nodes.rb",
"chars": 6796,
"preview": "# frozen_string_literal: true\n\nmodule Infoboxer\n module Tree\n # List of nodes, which tries to be useful both as arra"
},
{
"path": "lib/infoboxer/tree/paragraphs.rb",
"chars": 2990,
"preview": "# frozen_string_literal: true\n\nmodule Infoboxer\n module Tree\n # Base class for all \"paragraph-level\" nodes: {Paragra"
},
{
"path": "lib/infoboxer/tree/ref.rb",
"chars": 892,
"preview": "# frozen_string_literal: true\n\nmodule Infoboxer\n module Tree\n # Represents footnote.\n #\n # Is not rendered in "
},
{
"path": "lib/infoboxer/tree/table.rb",
"chars": 1886,
"preview": "# frozen_string_literal: true\n\nrequire 'terminal-table'\n\nmodule Infoboxer\n module Tree\n # Represents table. Tables a"
},
{
"path": "lib/infoboxer/tree/template.rb",
"chars": 8321,
"preview": "# frozen_string_literal: true\n\nrequire_relative 'linkable'\n\nmodule Infoboxer\n module Tree\n # Template variable.\n "
},
{
"path": "lib/infoboxer/tree/text.rb",
"chars": 1371,
"preview": "# frozen_string_literal: true\n\nmodule Infoboxer\n module Tree\n # Represents plain text node.\n #\n # Think of it "
},
{
"path": "lib/infoboxer/tree/wikilink.rb",
"chars": 2633,
"preview": "# frozen_string_literal: true\n\nrequire_relative 'linkable'\n\nmodule Infoboxer\n module Tree\n # Internal MediaWiki link"
},
{
"path": "lib/infoboxer/tree.rb",
"chars": 2707,
"preview": "# frozen_string_literal: true\n\nmodule Infoboxer\n # Infoboxer provides you with tree structure of the Wikipedia page,\n "
},
{
"path": "lib/infoboxer/version.rb",
"chars": 157,
"preview": "# frozen_string_literal: true\n\nmodule Infoboxer\n MAJOR = 0\n MINOR = 4\n PATCH = 0\n PRE = nil\n VERSION = [MAJOR, MINO"
},
{
"path": "lib/infoboxer/wiki_path.rb",
"chars": 2622,
"preview": "# frozen_string_literal: true\n\nmodule Infoboxer\n # @private\n class WikiPath\n ParseError = Class.new(ArgumentError)\n"
},
{
"path": "lib/infoboxer.rb",
"chars": 8435,
"preview": "# frozen_string_literal: true\n\n# Main client module for entire infoboxer functionality. If you're lucky,\n# there's no ot"
},
{
"path": "profile/out/.gitkeep",
"chars": 0,
"preview": ""
},
{
"path": "profile/pages/argentina.txt",
"chars": 181079,
"preview": "{{other uses}}\n\n{{pp-semi|small=yes}}\n{{Use dmy dates|date=July 2014}}\n{{Infobox country\n|conventional_long_name = Argen"
},
{
"path": "profile/pages/canada.wiki",
"chars": 148804,
"preview": "{{for||Canada (disambiguation)}}\n{{Use Canadian English|date=November 2014}}\n{{Use mdy dates|date=December 2014}}\n{{pp-s"
},
{
"path": "profile/pages/ukraine.wiki",
"chars": 250156,
"preview": "{{about|the country}}\n{{pp-pc1}}\n{{Use dmy dates|date=February 2015}}\n{{pp-pc1}}{{pp-move-indef}}\n{{Infobox country\n|con"
},
{
"path": "profile/pages/usa.wiki",
"chars": 322968,
"preview": "{{for||US (disambiguation)|USA (disambiguation)|United States (disambiguation)}}\n{{good article}}\n{{pp-semi-indef|small="
},
{
"path": "regression/pages/2012_bdo_world_darts_championship.wiki",
"chars": 29087,
"preview": "{{Use dmy dates|date=December 2014}}\n{{Use British English|date=December 2014}}\n{{Infobox individual darts tournament\n|t"
},
{
"path": "regression/pages/area.wiki",
"chars": 41520,
"preview": "{{About|the geometric quantity}}\n\n[[File:Area.svg|right|thumb|alt=Three shapes on a square grid|The combined area of the"
},
{
"path": "regression/pages/articuno.wiki",
"chars": 5202,
"preview": "{{PokémoncardInfobox |\ncardname=Articuno |\njname=フリーザー |\njtrans=Freezer |\nimage=ArticunoFossil17.jpg |\ncaption={{TCG|Fos"
},
{
"path": "regression/pages/canada.wiki",
"chars": 148804,
"preview": "{{for||Canada (disambiguation)}}\n{{Use Canadian English|date=November 2014}}\n{{Use mdy dates|date=December 2014}}\n{{pp-s"
},
{
"path": "regression/pages/chiang_mai.wiki",
"chars": 205346,
"preview": "{{pagebanner|Chiangmai_banner.jpg|dotm=yes|caption=Wat Phra That Doi Suthep}}\n{{mapframe|18.786915|98.986632|zoom=14|hei"
},
{
"path": "regression/pages/greece.wiki",
"chars": 231637,
"preview": "{{redirect|Hellas|other uses of \"Hellas\" and \"Greece\"|Hellas (disambiguation)|and|Greece (disambiguation)}}\n{{pp-semi|sm"
},
{
"path": "regression/pages/list_of_countries.wiki",
"chars": 112225,
"preview": "{{Redirect2|List of nations|List of countries|other country lists|Lists of countries and territories|non-sovereign depen"
},
{
"path": "regression/pages/progress_wrestling.wiki",
"chars": 52087,
"preview": "{{short description|British professional wrestling promotion}}\n{{Use dmy dates|date=January 2015}}\n{{Use British English"
},
{
"path": "regression/pages/south_america.wiki",
"chars": 64104,
"preview": "{{refimprove|date=June 2015}}\n{{Redirect|Southern America|the United States region|Southern United States|the botanical "
},
{
"path": "regression/pages/south_america_new.wiki",
"chars": 64963,
"preview": "{{Redirect|Southern America|the region of the United States|Southern United States|the botanical continent defined in th"
},
{
"path": "regression/pages/ukraine.wiki",
"chars": 250156,
"preview": "{{about|the country}}\n{{pp-pc1}}\n{{Use dmy dates|date=February 2015}}\n{{pp-pc1}}{{pp-move-indef}}\n{{Infobox country\n|con"
},
{
"path": "regression/pages/ukrainian_galician_army.wiki",
"chars": 7286,
"preview": "{{Armies of Ukraine}}\n\n'''Ukrainian Galician Army''' ({{lang-uk| Українська Галицька Армія, ''Ukrayins’ka Halyts’ka Armi"
},
{
"path": "regression/pages/usa.wiki",
"chars": 322968,
"preview": "{{for||US (disambiguation)|USA (disambiguation)|United States (disambiguation)}}\n{{good article}}\n{{pp-semi-indef|small="
},
{
"path": "regression/pages/wyoming.wiki",
"chars": 89262,
"preview": "{{About|the U.S. state}}\n{{pp-move-indef}}\n{{lead too short|date=July 2016}}\n{{Use mdy dates|date=March 2015}}\n{{Infobox"
},
{
"path": "spec/dokaz_helpers.rb",
"chars": 63,
"preview": "require 'bundler/setup'\nrequire 'infoboxer'\n\ninclude Infoboxer\n"
},
{
"path": "spec/fixtures/argentina.wiki",
"chars": 181079,
"preview": "{{other uses}}\n\n{{pp-semi|small=yes}}\n{{Use dmy dates|date=July 2014}}\n{{Infobox country\n|conventional_long_name = Argen"
},
{
"path": "spec/fixtures/broken_table_caption.txt",
"chars": 201,
"preview": "{| class=\"wikitable floatright\"\n |+''Amatya'' (major officials) in ancient Hindu kingdom<br />per ''Arthashastra''{{sfn|"
},
{
"path": "spec/fixtures/large_infobox.txt",
"chars": 8437,
"preview": "{{Infobox country\n|conventional_long_name = Argentine Republic{{efn-ua|name=altnames|Article 35 of the [[Argentine Const"
},
{
"path": "spec/fixtures/large_table.txt",
"chars": 11982,
"preview": "{| style=\"text-align: center;\" class=\"wikitable sortable\"\n|-\n! style=\"width:16em\" |\n! Maintainer\n! First public release "
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer/common_MediaWiki_shortcuts/Wikia/language/1_1_2_3_1.yml",
"chars": 56685,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: http://ru.tardis.wikia.com/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer/common_MediaWiki_shortcuts/Wikia/language/api_base_url_to_s/1_1_2_3_2_1.yml",
"chars": 56685,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: http://ru.tardis.wikia.com/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer/common_MediaWiki_shortcuts/Wikia/simple/1_1_2_1_1.yml",
"chars": 56093,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: http://tardis.wikia.com/api.php?action=query&format=json&meta"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer/common_MediaWiki_shortcuts/Wikia/simple/api_base_url_to_s/1_1_2_1_2_1.yml",
"chars": 56092,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: http://tardis.wikia.com/api.php?action=query&format=json&meta"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer/common_MediaWiki_shortcuts/Wikia/subdomain/1_1_2_2_1.yml",
"chars": 56685,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: http://ru.tardis.wikia.com/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer/common_MediaWiki_shortcuts/Wikia/subdomain/api_base_url_to_s/1_1_2_2_2_1.yml",
"chars": 56684,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: http://ru.tardis.wikia.com/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer/common_MediaWiki_shortcuts/Wikipedia/caching/constructs_object_only_once.yml",
"chars": 70107,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer/common_MediaWiki_shortcuts/Wikipedia/default/1_1_1_1_1.yml",
"chars": 70106,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer/common_MediaWiki_shortcuts/Wikipedia/default/api_base_url_to_s/1_1_1_1_2_1.yml",
"chars": 70107,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer/common_MediaWiki_shortcuts/Wikipedia/language/api_base_url_to_s/1_1_1_3_1_1.yml",
"chars": 70479,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://fr.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer/common_MediaWiki_shortcuts/Wikipedia/shortcut/api_base_url_to_s/1_1_1_4_1_1.yml",
"chars": 70480,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://fr.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/category/category_name_transformation/default_namespace/uri_query_values/1_6_3_2_1_1.yml",
"chars": 414861,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/category/category_name_transformation/localized_namespace/uri_query_values/1_6_3_3_1_1.yml",
"chars": 133849,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://es.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/category/category_name_transformation/not_a_namespace/uri_query_values/1_6_3_4_1_1.yml",
"chars": 72311,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/category/category_name_transformation/when_no_namespace/uri_query_values/1_6_3_1_1_1.yml",
"chars": 414861,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/category/when_category_exists/1_6_1_1.yml",
"chars": 414862,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/category/when_category_exists/count/1_6_1_2_1.yml",
"chars": 414861,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/category/when_category_exists/map_title_/1_6_1_3_1.yml",
"chars": 414861,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/category/when_category_is_not/1_6_2_1.yml",
"chars": 72309,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/category/when_category_is_not/1_6_2_2.yml",
"chars": 72309,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/get/_prop/source/1_4_6_1_1.yml",
"chars": 302951,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/get/interwiki/url/1_4_7_1_1.yml",
"chars": 170140,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/get/processor/source/1_4_6_1_1.yml",
"chars": 304828,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/get/when_invalid_title_requested/as_block/1_4_5_1_1.yml",
"chars": 72405,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/get/when_invalid_title_requested/call/1_4_5_1_1.yml",
"chars": 72359,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/get/when_several_pages/1_4_2_1.yml",
"chars": 625903,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/get/when_several_pages_including_non-existent/count/1_4_4_1_1.yml",
"chars": 626394,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/get/when_signle_non-existing_page/1_4_3_1.yml",
"chars": 72771,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/get/when_single_page/1_4_1_1.yml",
"chars": 302884,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/get/when_single_page/source/1_4_1_4_1.yml",
"chars": 302884,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/get/when_single_page/title/1_4_1_2_1.yml",
"chars": 302884,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/get/when_single_page/url/1_4_1_3_1.yml",
"chars": 302884,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/get_h/when_several_pages_including_non-existent/1_5_1_1.yml",
"chars": 626394,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/get_h/when_several_pages_including_non-existent/_WTF_I_just_read_Make_me_unsee_it_/1_5_1_3_1.yml",
"chars": 626394,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/get_h/when_several_pages_including_non-existent/keys/1_5_1_2_1.yml",
"chars": 626394,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/get_h/when_several_pages_including_redirected_to_same/1_5_2_1.yml",
"chars": 238578,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/get_h/when_several_pages_including_redirected_to_same/keys/1_5_2_2_1.yml",
"chars": 238578,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/get_h/when_several_pages_including_redirected_to_same/values/1_5_2_3_1.yml",
"chars": 238578,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/get_h/when_several_pages_including_redirected_to_same/values/1_5_2_4_1.yml",
"chars": 238578,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/get_h/with_downcase_titles/1_5_3_1.yml",
"chars": 238497,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/get_h/with_downcase_titles/keys/1_5_3_2_1.yml",
"chars": 238497,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/get_h/with_downcase_titles/values/1_5_3_3_1.yml",
"chars": 238497,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/inspect/inspect/1_1_1_1.yml",
"chars": 70106,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/prefixsearch/when_found/1_8_1_1.yml",
"chars": 1287484,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/prefixsearch/when_found/count/1_8_1_2_1.yml",
"chars": 1287483,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/prefixsearch/when_found/map_title_/1_8_1_3_1.yml",
"chars": 1287483,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/prefixsearch/when_not_found/1_8_2_1.yml",
"chars": 72280,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/raw/several_pages/_50_pages/1_2_2_2_1.yml",
"chars": 4985910,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/raw/several_pages/_50_pages/count/1_2_2_2_2_1.yml",
"chars": 4985909,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/raw/several_pages/_50_pages/map_title_/1_2_2_2_3_1.yml",
"chars": 4985909,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/raw/several_pages/default/1_2_2_1_1.yml",
"chars": 626054,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/raw/several_pages/default/count/1_2_2_1_2_1.yml",
"chars": 626054,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/raw/several_pages/default/map_title_/1_2_2_1_3_1.yml",
"chars": 626055,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/raw/several_pages/no_pages/1_2_2_3_1.yml",
"chars": 70106,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/raw/single_page/default/1_2_1_1_1.yml",
"chars": 303035,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/raw/single_page/default/1_2_1_1_3.yml",
"chars": 303036,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/raw/single_page/default/_fullurl_/1_2_1_1_4_1.yml",
"chars": 303036,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/raw/single_page/default/_title_/1_2_1_1_2_1.yml",
"chars": 303036,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/raw/single_page/non-existent/_missing_/1_2_1_2_2_1.yml",
"chars": 72922,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/raw/single_page/non-existent/_title_/1_2_1_2_1_1.yml",
"chars": 72922,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/raw/single_page/redirect/1_2_1_3_2.yml",
"chars": 233593,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/raw/single_page/redirect/_fullurl_/1_2_1_3_3_1.yml",
"chars": 233593,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/raw/single_page/redirect/_title_/1_2_1_3_1_1.yml",
"chars": 233593,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/raw/user-agent/default/1_2_3_1_1.yml",
"chars": 303036,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/raw/user-agent/globally_set/1_2_3_2_1.yml",
"chars": 302884,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/raw/user-agent/locally_set/1_2_3_3_1.yml",
"chars": 302892,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/search/when_found/1_7_1_1.yml",
"chars": 89719,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/search/when_found/count/1_7_1_2_1.yml",
"chars": 89719,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/search/when_found/map_title_/1_7_1_3_1.yml",
"chars": 89715,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/search/when_not_found/1_7_2_1.yml",
"chars": 72304,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/traits/dynamic_part_-_taken_from_API/after_page_fetched/category_namespace/1_3_2_2_2_1.yml",
"chars": 472897,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://fr.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/traits/dynamic_part_-_taken_from_API/after_page_fetched/file_namespace/1_3_2_2_1_1.yml",
"chars": 472896,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://fr.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/traits/dynamic_part_-_taken_from_API/before_first_page_fetched/category_namespace/1_3_2_1_2_1.yml",
"chars": 70479,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://fr.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/traits/dynamic_part_-_taken_from_API/before_first_page_fetched/file_namespace/1_3_2_1_1_1.yml",
"chars": 70479,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://fr.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_MediaWiki/traits/static_part_-_guess_by_domain/1_3_1_1.yml",
"chars": 70030,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_Tree_Node/Infoboxer_MediaWiki_Page/1_5_1.yml",
"chars": 232978,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&i"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_Tree_Nodes/2_1.yml",
"chars": 288900,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&i"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_Tree_Nodes/2_2.yml",
"chars": 288901,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&i"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_Tree_Nodes/when_interwiki_link/2_3_1.yml",
"chars": 342364,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_Tree_Nodes/when_interwiki_link/map_url_/2_3_2_1.yml",
"chars": 200435,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&i"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_Tree_Wikilink/follow/1_2_1.yml",
"chars": 406270,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&i"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_Tree_Wikilink/follow/text/1_2_3_1.yml",
"chars": 406270,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&i"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_Tree_Wikilink/follow/title/1_2_2_1.yml",
"chars": 406270,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&i"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_Tree_Wikilink/follow/when_interwiki_link/1_2_4_1.yml",
"chars": 166584,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&i"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_Tree_Wikilink/follow/when_interwiki_link/url/1_2_4_2_1.yml",
"chars": 94716,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&i"
},
{
"path": "spec/fixtures/vcr_cassettes/Infoboxer_Tree_Wikilink/url/1_1_1.yml",
"chars": 303451,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/en_wikipedia_org/1_1.yml",
"chars": 173664,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&i"
},
{
"path": "spec/fixtures/vcr_cassettes/follow-chile.yml",
"chars": 173664,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&i"
},
{
"path": "spec/fixtures/vcr_cassettes/follow-several.yml",
"chars": 55555,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&i"
},
{
"path": "spec/fixtures/vcr_cassettes/follow-source-argentine.yml",
"chars": 232978,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&i"
},
{
"path": "spec/fixtures/vcr_cassettes/follow-source-argentine2.yml",
"chars": 232978,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&i"
},
{
"path": "spec/fixtures/vcr_cassettes/follow-source-forests.yml",
"chars": 4638,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&i"
},
{
"path": "spec/fixtures/vcr_cassettes/follow-template.yml",
"chars": 3805,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://en.wikipedia.org/w/api.php?action=query&format=json&i"
},
{
"path": "spec/fixtures/vcr_cassettes/other-language_Wikipedia/categories/1_2_1.yml",
"chars": 198368,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://fr.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/other-language_Wikipedia/categories/should_include_existing_category.yml",
"chars": 198368,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://fr.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/other-language_Wikipedia/files/default_prefix/1_1_1_1.yml",
"chars": 198368,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://fr.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/fixtures/vcr_cassettes/other-language_Wikipedia/files/localized_prefix/1_1_2_1.yml",
"chars": 198368,
"preview": "---\nhttp_interactions:\n- request:\n method: get\n uri: https://fr.wikipedia.org/w/api.php?action=query&format=json&m"
},
{
"path": "spec/infoboxer/en.wikipedia.org/calc_templates_spec.rb",
"chars": 1842,
"preview": "module Infoboxer\n describe 'calculated templates' do\n let(:traits) { MediaWiki::Traits.get('en.wikipedia.org') }\n "
},
{
"path": "spec/infoboxer/en.wikipedia.org/simple_templates_spec.rb",
"chars": 893,
"preview": "module Infoboxer\n describe 'simple templates definitions' do\n let(:traits) { MediaWiki::Traits.get('en.wikipedia.org"
},
{
"path": "spec/infoboxer/infoboxer_spec.rb",
"chars": 1708,
"preview": "describe Infoboxer do\n describe 'common MediaWiki shortcuts', :vcr do\n context 'Wikipedia' do\n describe 'defaul"
},
{
"path": "spec/infoboxer/integration/all_en_templates_spec.rb",
"chars": 274,
"preview": "module Infoboxer\n describe 'en.wikipedia.org', :vcr do\n let(:page) { Infoboxer.wp.get('Chile') }\n\n subject {\n "
},
{
"path": "spec/infoboxer/integration/fr_spec.rb",
"chars": 933,
"preview": "module Infoboxer\n describe 'other-language Wikipedia', vcr: true do\n let(:client) { MediaWiki.new('https://fr.wikipe"
},
{
"path": "spec/infoboxer/integration/site_traits_spec.rb",
"chars": 2470,
"preview": "module Infoboxer\n describe 'Integration of MediaWiki::Traits into data' do\n before do\n MediaWiki::Traits.templa"
},
{
"path": "spec/infoboxer/media_wiki/follow_spec.rb",
"chars": 2472,
"preview": "module Infoboxer\n describe Tree::Wikilink, :vcr do\n let(:source) { Infoboxer.wp.get('Argentina') }\n let(:link) { "
},
{
"path": "spec/infoboxer/media_wiki/traits_spec.rb",
"chars": 2873,
"preview": "module Infoboxer\n describe MediaWiki::Traits do\n before do\n described_class.templates.clear\n described_cla"
},
{
"path": "spec/infoboxer/media_wiki_spec.rb",
"chars": 9470,
"preview": "module Infoboxer\n describe MediaWiki, :vcr do\n let(:client) { MediaWiki.new('https://en.wikipedia.org/w/api.php') }\n"
},
{
"path": "spec/infoboxer/navigation/lookup/selector_spec.rb",
"chars": 1714,
"preview": "module Infoboxer\n describe Navigation::Lookup::Selector do\n context 'when class' do\n subject { described_class."
},
{
"path": "spec/infoboxer/navigation/lookup_spec.rb",
"chars": 5302,
"preview": "module Infoboxer\n describe Navigation::Lookup do\n let(:document) {\n Parser.document(%{\n Test in first ''pa"
},
{
"path": "spec/infoboxer/navigation/sections_spec.rb",
"chars": 4174,
"preview": "module Infoboxer\n describe Navigation::Sections do\n # Document is immutable and is created ~0.3 sec each time.\n #"
},
{
"path": "spec/infoboxer/navigation/shortcuts_spec.rb",
"chars": 2621,
"preview": "module Infoboxer\n describe Navigation::Shortcuts do\n # Document is immutable and is created ~0.3 sec each time.\n "
},
{
"path": "spec/infoboxer/navigation/wikipath_spec.rb",
"chars": 1375,
"preview": "module Infoboxer\n describe Navigation::Wikipath do\n include Saharspec::Util\n\n let(:document) {\n Parser.docum"
},
{
"path": "spec/infoboxer/parser/flow_spec.rb",
"chars": 4547,
"preview": "require 'infoboxer/parser'\n\nmodule Infoboxer\n describe Parser, 'parse flow' do\n let(:ctx) { Parser::Context.new(sour"
},
{
"path": "spec/infoboxer/parser/image_spec.rb",
"chars": 3377,
"preview": "require 'infoboxer/parser'\n\nmodule Infoboxer\n describe Parser, 'images and media' do\n let(:ctx) { Parser::Context.ne"
},
{
"path": "spec/infoboxer/parser/inline_spec.rb",
"chars": 11239,
"preview": "require 'infoboxer/parser'\n\nmodule Infoboxer\n describe Parser, 'inline markup' do\n let(:ctx) { Parser::Context.new(s"
}
]
// ... and 13 more files (download for full content)
About this extraction
This page contains the full source code of the molybdenum-99/infoboxer GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 213 files (41.3 MB), approximately 10.8M tokens, and a symbol index with 565 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.