Repository: theodi/csvlint.rb Branch: main Commit: a770a9448ebb Files: 94 Total size: 331.6 KB Directory structure: gitextract_4wguoljj/ ├── .coveralls.yml ├── .gitattributes ├── .github/ │ ├── ISSUE_TEMPLATE.md │ ├── PULL_REQUEST_TEMPLATE.md │ ├── dependabot.yml │ └── workflows/ │ └── push.yml ├── .gitignore ├── .pre-commit-hooks.yaml ├── .ruby-version ├── .standard_todo.yml ├── Appraisals ├── CHANGELOG.md ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── Dockerfile ├── Gemfile ├── LICENSE.md ├── README.md ├── Rakefile ├── bin/ │ ├── create_schema │ └── csvlint ├── csvlint.gemspec ├── docker_notes_for_windows.txt ├── features/ │ ├── check_format.feature │ ├── cli.feature │ ├── csv_options.feature │ ├── csvupload.feature │ ├── csvw_schema_validation.feature │ ├── fixtures/ │ │ ├── cr-line-endings.csv │ │ ├── crlf-line-endings.csv │ │ ├── inconsistent-line-endings-unquoted.csv │ │ ├── inconsistent-line-endings.csv │ │ ├── invalid-byte-sequence.csv │ │ ├── invalid_many_rows.csv │ │ ├── lf-line-endings.csv │ │ ├── spreadsheet.xls │ │ ├── spreadsheet.xlsx │ │ ├── title-row.csv │ │ ├── valid.csv │ │ ├── valid_many_rows.csv │ │ ├── w3.org/ │ │ │ └── .well-known/ │ │ │ └── csvm │ │ ├── white space in filename.csv │ │ └── windows-line-endings.csv │ ├── information.feature │ ├── parse_csv.feature │ ├── schema_validation.feature │ ├── sources.feature │ ├── step_definitions/ │ │ ├── cli_steps.rb │ │ ├── csv_options_steps.rb │ │ ├── information_steps.rb │ │ ├── parse_csv_steps.rb │ │ ├── schema_validation_steps.rb │ │ ├── sources_steps.rb │ │ ├── validation_errors_steps.rb │ │ ├── validation_info_steps.rb │ │ └── validation_warnings_steps.rb │ ├── support/ │ │ ├── aruba.rb │ │ ├── earl_formatter.rb │ │ ├── env.rb │ │ ├── load_tests.rb │ │ └── webmock.rb │ ├── validation_errors.feature │ ├── validation_info.feature │ └── validation_warnings.feature ├── gemfiles/ │ ├── activesupport_5.2.gemfile │ ├── activesupport_6.0.gemfile │ ├── activesupport_6.1.gemfile │ ├── activesupport_7.0.gemfile │ ├── activesupport_7.1.gemfile │ └── activesupport_7.2.gemfile ├── lib/ │ ├── csvlint/ │ │ ├── cli.rb │ │ ├── csvw/ │ │ │ ├── column.rb │ │ │ ├── date_format.rb │ │ │ ├── metadata_error.rb │ │ │ ├── number_format.rb │ │ │ ├── property_checker.rb │ │ │ ├── table.rb │ │ │ └── table_group.rb │ │ ├── error_collector.rb │ │ ├── error_message.rb │ │ ├── field.rb │ │ ├── schema.rb │ │ ├── validate.rb │ │ └── version.rb │ └── csvlint.rb └── spec/ ├── csvw/ │ ├── column_spec.rb │ ├── date_format_spec.rb │ ├── number_format_spec.rb │ ├── table_group_spec.rb │ └── table_spec.rb ├── field_spec.rb ├── schema_spec.rb ├── spec_helper.rb └── validator_spec.rb ================================================ FILE CONTENTS ================================================ ================================================ FILE: .coveralls.yml ================================================ service_name: travis-ci ================================================ FILE: .gitattributes ================================================ # Don't mess with my CSV files *.csv binary ================================================ FILE: .github/ISSUE_TEMPLATE.md ================================================ > Please provide a general summary of the issue in the Issue Title above > fill out the headings below as applicable to the issue you are reporting, > deleting as appropriate but offering us as much detail as you can to help us resolve the issue ### Expected Behaviour > What should happen? ### Desired Behaviour (for improvement suggestions only) > if relevant include images or hyperlinks to other resources that clarify the enhancement you're seeking ### Current Behaviour (for problems) > What currently happens that isn't expected behaviour? ### Steps to Reproduce (for problems) > Provide a link to a live example, or an unambiguous set of steps to reproduce this bug. Include code to reproduce, if relevant 1. 2. 3. 4. ### Your Environment > Include as many relevant details about the environment you experienced the bug in - this will help us resolve the bug more expediently * Environment name and version (e.g. Chrome 39, node.js 5.4): * Operating System and version (desktop or mobile): ================================================ FILE: .github/PULL_REQUEST_TEMPLATE.md ================================================ This PR fixes # Changes proposed in this pull request: - - - ================================================ FILE: .github/dependabot.yml ================================================ version: 2 updates: - package-ecosystem: bundler directory: "/" schedule: interval: daily open-pull-requests-limit: 10 - package-ecosystem: github-actions directory: "/" schedule: interval: weekly ================================================ FILE: .github/workflows/push.yml ================================================ name: CI on: push: branches: [ main ] pull_request: branches: [ main ] jobs: appraisal: name: Ruby ${{ matrix.ruby-version }} / Rails ${{ matrix.activesupport-version }} runs-on: ubuntu-latest strategy: matrix: ruby-version: ['2.5', '2.6', '2.7', '3.0', '3.1', '3.2', '3.3', '3.4', '4.0'] activesupport-version: - activesupport_5.2 - activesupport_6.0 - activesupport_6.1 - activesupport_7.0 - activesupport_7.1 - activesupport_7.2 exclude: - ruby-version: '2.5' activesupport-version: activesupport_7.0 - ruby-version: '2.6' activesupport-version: activesupport_7.0 - ruby-version: '2.5' activesupport-version: activesupport_7.1 - ruby-version: '2.6' activesupport-version: activesupport_7.1 - ruby-version: '2.5' activesupport-version: activesupport_7.2 - ruby-version: '2.6' activesupport-version: activesupport_7.2 - ruby-version: '2.7' activesupport-version: activesupport_7.2 - ruby-version: '3.0' activesupport-version: activesupport_7.2 fail-fast: false env: BUNDLE_GEMFILE: gemfiles/${{ matrix.activesupport-version }}.gemfile steps: - uses: actions/checkout@v4 - uses: ruby/setup-ruby@v1 with: bundler-cache: true ruby-version: ${{ matrix.ruby-version }} - name: Install dependencies run: bundle install - name: Run the tests run: bundle exec rake lint: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - uses: ruby/setup-ruby@v1 with: bundler-cache: true ruby-version: "4.0" - name: Install dependencies run: bundle install - name: Run the tests run: bundle exec standardrb ================================================ FILE: .gitignore ================================================ *.gem *.rbc .bundle .config .yardoc Gemfile.lock InstalledFiles _yardoc coverage doc/ lib/bundler/man pkg rdoc spec/reports test/tmp test/version_tmp tmp coverage/ /.rspec .idea .DS_Store features/csvw_validation_tests.feature features/fixtures/csvw bin/run-csvw-tests csvlint-earl.ttl .byebug_history gemfiles/*.lock ================================================ FILE: .pre-commit-hooks.yaml ================================================ - id: csvlint name: csvlint entry: csvlint language: ruby files: \.csv$ ================================================ FILE: .ruby-version ================================================ 4.0.1 ================================================ FILE: .standard_todo.yml ================================================ # Auto generated files with errors to ignore. # Remove from this list as you refactor files. --- ignore: - features/support/load_tests.rb: - Security/Open - lib/csvlint/csvw/column.rb: - Style/TernaryParentheses - lib/csvlint/csvw/date_format.rb: - Lint/MixedRegexpCaptureTypes - lib/csvlint/csvw/number_format.rb: - Style/SlicingWithRange - Style/IdenticalConditionalBranches - lib/csvlint/csvw/property_checker.rb: - Performance/InefficientHashSearch - Naming/VariableName - Style/SlicingWithRange - Security/Open - Lint/BooleanSymbol - lib/csvlint/csvw/table_group.rb: - Style/OptionalArguments - lib/csvlint/field.rb: - Naming/VariableName - lib/csvlint/schema.rb: - Security/Open - Style/SlicingWithRange - lib/csvlint/validate.rb: - Performance/Count - Lint/BooleanSymbol - Naming/VariableName - Security/Open - Lint/NonLocalExitFromIterator - lib/csvlint/schema.rb: - Lint/UselessRescue - lib/csvlint/validate.rb: - Lint/UselessRescue - lib/csvlint/cli.rb: - Style/SafeNavigation ================================================ FILE: Appraisals ================================================ # After a new entry: `bundle exec appraisal install` # Add an entry in `.github/workflows/push.yml`'s file appraise "activesupport_5.2" do gem "activesupport", "~> 5.2.0" end appraise "activesupport_6.0" do gem "activesupport", "~> 6.0.0" end appraise "activesupport_6.1" do gem "activesupport", "~> 6.1.0" end appraise "activesupport_7.0" do gem "activesupport", "~> 7.0.0" end appraise "activesupport_7.1" do gem "activesupport", "~> 7.1.0" end appraise "activesupport_7.2" do gem "activesupport", "~> 7.2.0" end ================================================ FILE: CHANGELOG.md ================================================ # Change Log ## [v1.2.0](https://github.com/data-liberation-front/csvlint.rb/tree/v1.2.0) (2023-02-27) [Full Changelog](https://github.com/data-liberation-front/csvlint.rb/compare/v1.1.0...v1.2.0) **Closed issues:** - Pre-commit integration [\#275](https://github.com/Data-Liberation-Front/csvlint.rb/issues/275) **Merged pull requests:** - Pre commit hook [\#276](https://github.com/Data-Liberation-Front/csvlint.rb/pull/276) ([jrottenberg](https://github.com/jrottenberg)) ## [v1.1.0](https://github.com/data-liberation-front/csvlint.rb/tree/v1.1.0) (2022-12-28) [Full Changelog](https://github.com/data-liberation-front/csvlint.rb/compare/v1.0.0...v1.1.0) **Closed issues:** - Requires ruby \< 3.2 [\#272](https://github.com/Data-Liberation-Front/csvlint.rb/issues/272) - Release a new version [\#244](https://github.com/Data-Liberation-Front/csvlint.rb/issues/244) **Merged pull requests:** - bump version to 1.1.0 [\#274](https://github.com/Data-Liberation-Front/csvlint.rb/pull/274) ([Floppy](https://github.com/Floppy)) - Add support for Ruby 3.2 [\#273](https://github.com/Data-Liberation-Front/csvlint.rb/pull/273) ([Floppy](https://github.com/Floppy)) - fix lint error [\#271](https://github.com/Data-Liberation-Front/csvlint.rb/pull/271) ([youpy](https://github.com/youpy)) - optimize validation with regular expression [\#270](https://github.com/Data-Liberation-Front/csvlint.rb/pull/270) ([youpy](https://github.com/youpy)) - Bump actions/checkout from 2 to 3 [\#269](https://github.com/Data-Liberation-Front/csvlint.rb/pull/269) ([dependabot[bot]](https://github.com/apps/dependabot)) - Add GitHub Actions to Dependabot [\#267](https://github.com/Data-Liberation-Front/csvlint.rb/pull/267) ([petergoldstein](https://github.com/petergoldstein)) - Lint with standardrb [\#266](https://github.com/Data-Liberation-Front/csvlint.rb/pull/266) ([Floppy](https://github.com/Floppy)) - Add Dockerfile and notes for usage on MS Windows. [\#243](https://github.com/Data-Liberation-Front/csvlint.rb/pull/243) ([jespertp-systematic](https://github.com/jespertp-systematic)) ## [v1.0.0](https://github.com/Data-Liberation-Front/csvlint.rb/tree/v1.0.0) (2022-07-13) [Full Changelog](https://github.com/theodi/csvlint.rb/compare/0.4.0...v1.0.0) Support Ruby 3.x, and DROPPED support for Ruby 2.4 - that's why the major version bump. That and this has been around long enough that it really shouldn't be on a zero version any more :) ## What's Changed - Don't patch CSV#init_converters for ruby 2.5 compatibility by @rbmrclo in - correct typos in README by @erikj in - add info about your PATH by @ftrotter in - Remove tests on deprecated ruby versions < 2.3 by @Floppy in - Drop mime-types gem dependency by @ohbarye in - remove specific version of net-http-persistent in gemspec by @kotaro0522 in - Replace colorize with rainbow to make licensing consistent. by @cobbr2 in - Update rdf requirement from < 2.0 to < 4.0 by @dependabot-preview in - Test on Ruby 2.5 and 2.6 by @Domon in - Fix load_from_json deprecation warnings. by @jezhiggins in - Fix csvw tests by @Floppy in - Test on Ruby 2.6 and 2.7 by @Floppy in - Create Dependabot config file by @dependabot-preview in - Include active_support/object to ensure this works in ruby 2.6 by @mseverini in - add CI workflow for github actions by @Floppy in - Enable and fix tests for Ruby 2.5 by @Floppy in - Support Ruby 2.6 by @Floppy in - Ruby 2.7 support by @Floppy in - Drop support for Ruby 2.4 by @Floppy in - Ruby 3.0 by @Floppy in ## New Contributors - @rbmrclo made their first contribution in - @erikj made their first contribution in - @ftrotter made their first contribution in - @ohbarye made their first contribution in - @kotaro0522 made their first contribution in - @cobbr2 made their first contribution in - @dependabot-preview made their first contribution in - @Domon made their first contribution in - @mseverini made their first contribution in ## [0.4.0](https://github.com/theodi/csvlint.rb/tree/0.4.0) (2017-xx-xx) [Full Changelog](https://github.com/theodi/csvlint.rb/compare/0.3.3...0.4.0) - Support for Ruby 2.4 - Ruby 2.4 improves detections of unclosed quotes - Support Rails ~> 5.0 - Added `--werror` flag to command line, to treat warnings as errors - Deprecated `Schema#load_from_json` and replaced with `Schema#load_from_uri`. Method will be removed in 1.0.0. - Added `Schema#load_from_string` to load from a string instead of reading a URI **Closed issues:** - CLI doesn't handle filenames with spaces [\#182](https://github.com/theodi/csvlint.rb/issues/182) ## [0.3.3](https://github.com/theodi/csvlint.rb/tree/0.3.3) (2016-11-10) [Full Changelog](https://github.com/theodi/csvlint.rb/compare/0.3.2...0.3.3) **Closed issues:** - testing issue alerts, sorry [\#186](https://github.com/theodi/csvlint.rb/issues/186) **Merged pull requests:** - Add row + col to foreign key & duplicate key errors [\#188](https://github.com/theodi/csvlint.rb/pull/188) ([nickzoic](https://github.com/nickzoic)) - Trap-and-bin this [\#185](https://github.com/theodi/csvlint.rb/pull/185) ([pikesley](https://github.com/pikesley)) - csvw: common property names can be URLs [\#181](https://github.com/theodi/csvlint.rb/pull/181) ([JeniT](https://github.com/JeniT)) - force UTF-8 if encoding is ASCII-8BIT [\#180](https://github.com/theodi/csvlint.rb/pull/180) ([JeniT](https://github.com/JeniT)) ## [0.3.2](https://github.com/theodi/csvlint.rb/tree/0.3.2) (2016-05-24) [Full Changelog](https://github.com/theodi/csvlint.rb/compare/0.3.1...0.3.2) **Merged pull requests:** - Add schema errors to cli json [\#184](https://github.com/theodi/csvlint.rb/pull/184) ([pezholio](https://github.com/pezholio)) ## [0.3.1](https://github.com/theodi/csvlint.rb/tree/0.3.1) (2016-05-23) [Full Changelog](https://github.com/theodi/csvlint.rb/compare/0.3.0...0.3.1) **Closed issues:** - Error installing on Windows because of \*escape\_utils\* dependency [\#175](https://github.com/theodi/csvlint.rb/issues/175) **Merged pull requests:** - Add CLI option to output JSON [\#183](https://github.com/theodi/csvlint.rb/pull/183) ([pezholio](https://github.com/pezholio)) ## [0.3.0](https://github.com/theodi/csvlint.rb/tree/0.3.0) (2016-01-12) [Full Changelog](https://github.com/theodi/csvlint.rb/compare/0.2.6...0.3.0) **Merged pull requests:** - still increment current\_line after invalid\_encoding error [\#174](https://github.com/theodi/csvlint.rb/pull/174) ([wjordan213](https://github.com/wjordan213)) - Support for CSV on the Web transformations [\#173](https://github.com/theodi/csvlint.rb/pull/173) ([JeniT](https://github.com/JeniT)) ## [0.2.6](https://github.com/theodi/csvlint.rb/tree/0.2.6) (2015-11-16) [Full Changelog](https://github.com/theodi/csvlint.rb/compare/0.2.5...0.2.6) ## [0.2.5](https://github.com/theodi/csvlint.rb/tree/0.2.5) (2015-11-16) [Full Changelog](https://github.com/theodi/csvlint.rb/compare/0.2.4...0.2.5) **Merged pull requests:** - Use STDIN instead of ARGF [\#169](https://github.com/theodi/csvlint.rb/pull/169) ([pezholio](https://github.com/pezholio)) ## [0.2.4](https://github.com/theodi/csvlint.rb/tree/0.2.4) (2015-10-20) [Full Changelog](https://github.com/theodi/csvlint.rb/compare/0.2.3...0.2.4) **Merged pull requests:** - Fixes for CLI [\#164](https://github.com/theodi/csvlint.rb/pull/164) ([pezholio](https://github.com/pezholio)) ## [0.2.3](https://github.com/theodi/csvlint.rb/tree/0.2.3) (2015-10-20) [Full Changelog](https://github.com/theodi/csvlint.rb/compare/0.2.2...0.2.3) **Closed issues:** - Include field name with error [\#161](https://github.com/theodi/csvlint.rb/issues/161) - Refactor the binary [\#150](https://github.com/theodi/csvlint.rb/issues/150) **Merged pull requests:** - Refactor CLI [\#163](https://github.com/theodi/csvlint.rb/pull/163) ([pezholio](https://github.com/pezholio)) - Update schema file example to clarify type [\#162](https://github.com/theodi/csvlint.rb/pull/162) ([wachunga](https://github.com/wachunga)) ## [0.2.2](https://github.com/theodi/csvlint.rb/tree/0.2.2) (2015-10-09) [Full Changelog](https://github.com/theodi/csvlint.rb/compare/0.2.1...0.2.2) **Closed issues:** - Eliminate some date and time formats \(for speed\) [\#105](https://github.com/theodi/csvlint.rb/issues/105) **Merged pull requests:** - Check characters in validate\_line method [\#160](https://github.com/theodi/csvlint.rb/pull/160) ([pezholio](https://github.com/pezholio)) - Further optimisations [\#159](https://github.com/theodi/csvlint.rb/pull/159) ([pezholio](https://github.com/pezholio)) - More optimizations after \#157 [\#158](https://github.com/theodi/csvlint.rb/pull/158) ([jpmckinney](https://github.com/jpmckinney)) - Memoize the result of CSV\#encode\_re [\#157](https://github.com/theodi/csvlint.rb/pull/157) ([jpmckinney](https://github.com/jpmckinney)) - Don't pass leading string to parse\_line [\#155](https://github.com/theodi/csvlint.rb/pull/155) ([pezholio](https://github.com/pezholio)) ## [0.2.1](https://github.com/theodi/csvlint.rb/tree/0.2.1) (2015-10-07) [Full Changelog](https://github.com/theodi/csvlint.rb/compare/0.2.0...0.2.1) **Implemented enhancements:** - Get total rows number about the CSV file that was validated [\#143](https://github.com/theodi/csvlint.rb/issues/143) **Closed issues:** - Optimization: Stream CSV [\#122](https://github.com/theodi/csvlint.rb/issues/122) **Merged pull requests:** - Add `row\_count` method [\#153](https://github.com/theodi/csvlint.rb/pull/153) ([pezholio](https://github.com/pezholio)) - Streaming validation [\#146](https://github.com/theodi/csvlint.rb/pull/146) ([pezholio](https://github.com/pezholio)) ## [0.2.0](https://github.com/theodi/csvlint.rb/tree/0.2.0) (2015-10-05) [Full Changelog](https://github.com/theodi/csvlint.rb/compare/0.1.4...0.2.0) **Closed issues:** - CSV on the web support [\#141](https://github.com/theodi/csvlint.rb/issues/141) **Merged pull requests:** - Recover from `ArgumentError`s when attempting to locate a schema and detect bad schema when JSON is malformed [\#152](https://github.com/theodi/csvlint.rb/pull/152) ([pezholio](https://github.com/pezholio)) - Catch errors if link headers are don't have particular values [\#151](https://github.com/theodi/csvlint.rb/pull/151) ([pezholio](https://github.com/pezholio)) - Rescue excel warning [\#149](https://github.com/theodi/csvlint.rb/pull/149) ([quadrophobiac](https://github.com/quadrophobiac)) - CSVW-based validation! [\#142](https://github.com/theodi/csvlint.rb/pull/142) ([JeniT](https://github.com/JeniT)) ## [0.1.4](https://github.com/theodi/csvlint.rb/tree/0.1.4) (2015-08-06) [Full Changelog](https://github.com/theodi/csvlint.rb/compare/0.1.3...0.1.4) **Merged pull requests:** - change made to the constraint parameter in order that it is more cons… [\#140](https://github.com/theodi/csvlint.rb/pull/140) ([quadrophobiac](https://github.com/quadrophobiac)) ## [0.1.3](https://github.com/theodi/csvlint.rb/tree/0.1.3) (2015-07-24) [Full Changelog](https://github.com/theodi/csvlint.rb/compare/0.1.2...0.1.3) **Merged pull requests:** - Error reporting schema expanded test suite [\#138](https://github.com/theodi/csvlint.rb/pull/138) ([quadrophobiac](https://github.com/quadrophobiac)) - Validate header size improvement [\#137](https://github.com/theodi/csvlint.rb/pull/137) ([adamc00](https://github.com/adamc00)) - Invalid schema [\#132](https://github.com/theodi/csvlint.rb/pull/132) ([bcouston](https://github.com/bcouston)) ## [0.1.2](https://github.com/theodi/csvlint.rb/tree/0.1.2) (2015-07-15) [Full Changelog](https://github.com/theodi/csvlint.rb/compare/0.1.1...0.1.2) **Closed issues:** - When an encoding error is thrown the line content is put into the column field in the error object [\#131](https://github.com/theodi/csvlint.rb/issues/131) **Merged pull requests:** - Catch invalid URIs [\#133](https://github.com/theodi/csvlint.rb/pull/133) ([pezholio](https://github.com/pezholio)) - Emit a warning when the CSV header does not match the supplied schema [\#127](https://github.com/theodi/csvlint.rb/pull/127) ([adamc00](https://github.com/adamc00)) ## [0.1.1](https://github.com/theodi/csvlint.rb/tree/0.1.1) (2015-07-13) [Full Changelog](https://github.com/theodi/csvlint.rb/compare/0.1.0...0.1.1) **Closed issues:** - Add Command Line Support [\#128](https://github.com/theodi/csvlint.rb/issues/128) - BUG: Incorrect inconsistent\_values error on numeric columns [\#106](https://github.com/theodi/csvlint.rb/issues/106) **Merged pull requests:** - Fixes line content incorrectly being put into the row column field when there is an encoding error. [\#130](https://github.com/theodi/csvlint.rb/pull/130) ([glacier](https://github.com/glacier)) - Add command line help [\#129](https://github.com/theodi/csvlint.rb/pull/129) ([pezholio](https://github.com/pezholio)) - Remove stray q character. [\#125](https://github.com/theodi/csvlint.rb/pull/125) ([adamc00](https://github.com/adamc00)) - csvlint utility can take arguments to specify a schema and pp errors [\#124](https://github.com/theodi/csvlint.rb/pull/124) ([adamc00](https://github.com/adamc00)) - Fixed warning - use expect\( \) rather than .should [\#123](https://github.com/theodi/csvlint.rb/pull/123) ([jezhiggins](https://github.com/jezhiggins)) - Fixed spelling mistake [\#121](https://github.com/theodi/csvlint.rb/pull/121) ([jezhiggins](https://github.com/jezhiggins)) - Avoid using \#blank? if unnecessary [\#120](https://github.com/theodi/csvlint.rb/pull/120) ([jpmckinney](https://github.com/jpmckinney)) - eliminate some date and time formats, related \#105 [\#119](https://github.com/theodi/csvlint.rb/pull/119) ([jpmckinney](https://github.com/jpmckinney)) - Match another CSV error about line endings [\#118](https://github.com/theodi/csvlint.rb/pull/118) ([jpmckinney](https://github.com/jpmckinney)) - fixed typo mistake in README [\#117](https://github.com/theodi/csvlint.rb/pull/117) ([railsfactory-kumaresan](https://github.com/railsfactory-kumaresan)) - Integrate @jpmickinney's build\_formats improvements [\#112](https://github.com/theodi/csvlint.rb/pull/112) ([Floppy](https://github.com/Floppy)) - make limit\_lines into a non-dialect option [\#110](https://github.com/theodi/csvlint.rb/pull/110) ([Floppy](https://github.com/Floppy)) - fix coveralls stats [\#109](https://github.com/theodi/csvlint.rb/pull/109) ([Floppy](https://github.com/Floppy)) - Limit lines [\#101](https://github.com/theodi/csvlint.rb/pull/101) ([Hoedic](https://github.com/Hoedic)) ## [0.1.0](https://github.com/theodi/csvlint.rb/tree/0.1.0) (2014-11-27) **Implemented enhancements:** - Blank values shouldn't count as inconsistencies [\#90](https://github.com/theodi/csvlint.rb/issues/90) - Make sure we don't check schema column count and ragged row count together [\#66](https://github.com/theodi/csvlint.rb/issues/66) - Include the failed constraints in error message when doing field validation [\#64](https://github.com/theodi/csvlint.rb/issues/64) - Include the column value in error message when field validation fails [\#63](https://github.com/theodi/csvlint.rb/issues/63) - Expose optional JSON table schema fields [\#55](https://github.com/theodi/csvlint.rb/issues/55) - Ensure header rows are properly handled and validated [\#48](https://github.com/theodi/csvlint.rb/issues/48) - Support zipped CSV? [\#30](https://github.com/theodi/csvlint.rb/issues/30) - Improve feedback on inconsistent values [\#29](https://github.com/theodi/csvlint.rb/issues/29) - Reported error positions are not massively useful [\#15](https://github.com/theodi/csvlint.rb/issues/15) **Fixed bugs:** - undefined method `\[\]' for nil:NilClass from fetch\_error [\#71](https://github.com/theodi/csvlint.rb/issues/71) - Inconsistent column bases [\#69](https://github.com/theodi/csvlint.rb/issues/69) - Improve error handling in Schema loading [\#42](https://github.com/theodi/csvlint.rb/issues/42) - Recover from some line ending problems [\#41](https://github.com/theodi/csvlint.rb/issues/41) - Inconsistent values due to number format differences [\#32](https://github.com/theodi/csvlint.rb/issues/32) - New lines in quoted fields are valid [\#31](https://github.com/theodi/csvlint.rb/issues/31) - Wrongly reporting incorrect file extension [\#23](https://github.com/theodi/csvlint.rb/issues/23) - Incorrect extension reported when URL has query options at the end [\#14](https://github.com/theodi/csvlint.rb/issues/14) **Closed issues:** - Get gem continuously deploying [\#93](https://github.com/theodi/csvlint.rb/issues/93) - Publish on rubygems.org [\#92](https://github.com/theodi/csvlint.rb/issues/92) - Duplicate column names [\#87](https://github.com/theodi/csvlint.rb/issues/87) - Return code is always 0 \(except when it isn't\) [\#85](https://github.com/theodi/csvlint.rb/issues/85) - Can't pipe data to csvlint [\#84](https://github.com/theodi/csvlint.rb/issues/84) - They have some validator running if someone wants to inspect it for "inspiration" [\#27](https://github.com/theodi/csvlint.rb/issues/27) - Allow CSV parsing options to be configured as a parameter [\#6](https://github.com/theodi/csvlint.rb/issues/6) - Use explicit CSV parsing options [\#5](https://github.com/theodi/csvlint.rb/issues/5) - Improving encoding detection [\#2](https://github.com/theodi/csvlint.rb/issues/2) **Merged pull requests:** - Speed up \#build\_formats \(changes its API\) [\#103](https://github.com/theodi/csvlint.rb/pull/103) ([jpmckinney](https://github.com/jpmckinney)) - Continuously deploy gem [\#102](https://github.com/theodi/csvlint.rb/pull/102) ([pezholio](https://github.com/pezholio)) - Make csvlint way faster [\#99](https://github.com/theodi/csvlint.rb/pull/99) ([jpmckinney](https://github.com/jpmckinney)) - Update README.md [\#98](https://github.com/theodi/csvlint.rb/pull/98) ([rmalecky](https://github.com/rmalecky)) - Undeclared header error [\#95](https://github.com/theodi/csvlint.rb/pull/95) ([Floppy](https://github.com/Floppy)) - Blank values shouldn't count as inconsistencies [\#91](https://github.com/theodi/csvlint.rb/pull/91) ([pezholio](https://github.com/pezholio)) - Use `reject` instead of `delete\_if` [\#89](https://github.com/theodi/csvlint.rb/pull/89) ([pezholio](https://github.com/pezholio)) - Raise a warning if a title row is found [\#88](https://github.com/theodi/csvlint.rb/pull/88) ([pezholio](https://github.com/pezholio)) - Improve executable [\#86](https://github.com/theodi/csvlint.rb/pull/86) ([pezholio](https://github.com/pezholio)) - Feature undeclared header [\#83](https://github.com/theodi/csvlint.rb/pull/83) ([ldodds](https://github.com/ldodds)) - Support xsd:integer [\#82](https://github.com/theodi/csvlint.rb/pull/82) ([ldodds](https://github.com/ldodds)) - Downgrade header errors [\#81](https://github.com/theodi/csvlint.rb/pull/81) ([ldodds](https://github.com/ldodds)) - Go home, pry [\#78](https://github.com/theodi/csvlint.rb/pull/78) ([pikesley](https://github.com/pikesley)) - Use type validations to check consistency [\#77](https://github.com/theodi/csvlint.rb/pull/77) ([pezholio](https://github.com/pezholio)) - Add data accessor [\#76](https://github.com/theodi/csvlint.rb/pull/76) ([Floppy](https://github.com/Floppy)) - Add failed constraints to schema errors [\#75](https://github.com/theodi/csvlint.rb/pull/75) ([ldodds](https://github.com/ldodds)) - Only perform ragged row check if there's no schema [\#74](https://github.com/theodi/csvlint.rb/pull/74) ([ldodds](https://github.com/ldodds)) - Handle tempfiles [\#73](https://github.com/theodi/csvlint.rb/pull/73) ([pezholio](https://github.com/pezholio)) - Catch errors if regex doesn't match [\#72](https://github.com/theodi/csvlint.rb/pull/72) ([pezholio](https://github.com/pezholio)) - Inconsistent column base [\#70](https://github.com/theodi/csvlint.rb/pull/70) ([ldodds](https://github.com/ldodds)) - include column name in :header\_name message [\#68](https://github.com/theodi/csvlint.rb/pull/68) ([Floppy](https://github.com/Floppy)) - Record default dialect [\#67](https://github.com/theodi/csvlint.rb/pull/67) ([pezholio](https://github.com/pezholio)) - Schema validation message improvements [\#65](https://github.com/theodi/csvlint.rb/pull/65) ([Floppy](https://github.com/Floppy)) - Fix ignore empty fields [\#62](https://github.com/theodi/csvlint.rb/pull/62) ([ldodds](https://github.com/ldodds)) - Create stub schema from existing CSV file [\#61](https://github.com/theodi/csvlint.rb/pull/61) ([ldodds](https://github.com/ldodds)) - Validate dates [\#59](https://github.com/theodi/csvlint.rb/pull/59) ([ldodds](https://github.com/ldodds)) - add schema access from validator [\#58](https://github.com/theodi/csvlint.rb/pull/58) ([Floppy](https://github.com/Floppy)) - Allow schema and fields to have title and description [\#57](https://github.com/theodi/csvlint.rb/pull/57) ([ldodds](https://github.com/ldodds)) - Feature min max ranges [\#56](https://github.com/theodi/csvlint.rb/pull/56) ([ldodds](https://github.com/ldodds)) - Check header without schema [\#54](https://github.com/theodi/csvlint.rb/pull/54) ([ldodds](https://github.com/ldodds)) - Validate types [\#53](https://github.com/theodi/csvlint.rb/pull/53) ([pikesley](https://github.com/pikesley)) - Added open\_uri\_redirections to allow HTTP/HTTPS transfers [\#52](https://github.com/theodi/csvlint.rb/pull/52) ([ldodds](https://github.com/ldodds)) - Added docs on CSV options and header error/warning messages [\#51](https://github.com/theodi/csvlint.rb/pull/51) ([ldodds](https://github.com/ldodds)) - Feature header validation [\#50](https://github.com/theodi/csvlint.rb/pull/50) ([ldodds](https://github.com/ldodds)) - Handle unique columns [\#49](https://github.com/theodi/csvlint.rb/pull/49) ([pikesley](https://github.com/pikesley)) - Validate all the fields [\#47](https://github.com/theodi/csvlint.rb/pull/47) ([ldodds](https://github.com/ldodds)) - Tolerate incomplete schemas [\#46](https://github.com/theodi/csvlint.rb/pull/46) ([ldodds](https://github.com/ldodds)) - Add accessor for line breaks [\#45](https://github.com/theodi/csvlint.rb/pull/45) ([Floppy](https://github.com/Floppy)) - update README for info messages and new error types [\#44](https://github.com/theodi/csvlint.rb/pull/44) ([Floppy](https://github.com/Floppy)) - Info messages for line breaks [\#43](https://github.com/theodi/csvlint.rb/pull/43) ([Floppy](https://github.com/Floppy)) - Add category to messages [\#40](https://github.com/theodi/csvlint.rb/pull/40) ([ldodds](https://github.com/ldodds)) - Badges [\#39](https://github.com/theodi/csvlint.rb/pull/39) ([pikesley](https://github.com/pikesley)) - Generic field validation using JSON Table Schema [\#38](https://github.com/theodi/csvlint.rb/pull/38) ([ldodds](https://github.com/ldodds)) - Feature validate strings and files [\#37](https://github.com/theodi/csvlint.rb/pull/37) ([ldodds](https://github.com/ldodds)) - Support reporting of column number in errors [\#36](https://github.com/theodi/csvlint.rb/pull/36) ([ldodds](https://github.com/ldodds)) - Fix up casing of keys in CSV DDF options [\#35](https://github.com/theodi/csvlint.rb/pull/35) ([ldodds](https://github.com/ldodds)) - Add errors for incorrect newlines [\#34](https://github.com/theodi/csvlint.rb/pull/34) ([pezholio](https://github.com/pezholio)) - Change from parsing CSV line by line to using CSV.new and trapping errors [\#33](https://github.com/theodi/csvlint.rb/pull/33) ([ldodds](https://github.com/ldodds)) - Improved the README, tweaked LICENSE [\#28](https://github.com/theodi/csvlint.rb/pull/28) ([ldodds](https://github.com/ldodds)) - Handle 404s [\#26](https://github.com/theodi/csvlint.rb/pull/26) ([pezholio](https://github.com/pezholio)) - Create more fine-grained errors and warnings for content type issues [\#25](https://github.com/theodi/csvlint.rb/pull/25) ([ldodds](https://github.com/ldodds)) - Report trailing empty rows as an error. Previously threw exception [\#24](https://github.com/theodi/csvlint.rb/pull/24) ([ldodds](https://github.com/ldodds)) - Simplify the guessing of column types [\#22](https://github.com/theodi/csvlint.rb/pull/22) ([ldodds](https://github.com/ldodds)) - Class-ify error messages [\#21](https://github.com/theodi/csvlint.rb/pull/21) ([pezholio](https://github.com/pezholio)) - Error extracts [\#20](https://github.com/theodi/csvlint.rb/pull/20) ([Floppy](https://github.com/Floppy)) - Return headers [\#19](https://github.com/theodi/csvlint.rb/pull/19) ([pezholio](https://github.com/pezholio)) - Return a warning if no character set specified [\#18](https://github.com/theodi/csvlint.rb/pull/18) ([pezholio](https://github.com/pezholio)) - Ignore query params [\#17](https://github.com/theodi/csvlint.rb/pull/17) ([Floppy](https://github.com/Floppy)) - Add invalid\_encoding error for invalid byte sequences [\#16](https://github.com/theodi/csvlint.rb/pull/16) ([ldodds](https://github.com/ldodds)) - Check inconsistent values [\#13](https://github.com/theodi/csvlint.rb/pull/13) ([pezholio](https://github.com/pezholio)) - Add CSV dialect options [\#11](https://github.com/theodi/csvlint.rb/pull/11) ([pezholio](https://github.com/pezholio)) - Return warning if extension doesn't match content type [\#10](https://github.com/theodi/csvlint.rb/pull/10) ([pezholio](https://github.com/pezholio)) - Return warnings for file extension [\#8](https://github.com/theodi/csvlint.rb/pull/8) ([pezholio](https://github.com/pezholio)) - Detect blank rows [\#7](https://github.com/theodi/csvlint.rb/pull/7) ([pezholio](https://github.com/pezholio)) - Detect bad content type [\#3](https://github.com/theodi/csvlint.rb/pull/3) ([pezholio](https://github.com/pezholio)) - Return information about CSV [\#1](https://github.com/theodi/csvlint.rb/pull/1) ([pezholio](https://github.com/pezholio)) \* *This Change Log was automatically generated by [github_changelog_generator](https://github.com/skywinder/Github-Changelog-Generator)* ================================================ FILE: CODE_OF_CONDUCT.md ================================================ ## Code of Conduct ### Our Pledge In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. ### Our Standards Examples of behavior that contributes to creating a positive environment include: * Using welcoming and inclusive language * Being respectful of differing viewpoints and experiences * Gracefully accepting constructive criticism * Focusing on what is best for the community * Showing empathy towards other community members Examples of unacceptable behavior by participants include: * The use of sexualized language or imagery and unwelcome sexual attention or advances * Trolling, insulting/derogatory comments, and personal or political attacks * Public or private harassment * Publishing others' private information, such as a physical or electronic address, without explicit permission * Other conduct which could reasonably be considered inappropriate in a professional setting ### Our Responsibilities Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. ### Scope This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. ### Enforcement Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at [labs@theodi.org](mailto:labs@theodi.org). All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. ### Attribution This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] [homepage]: http://contributor-covenant.org [version]: http://contributor-covenant.org/version/1/4/ ================================================ FILE: CONTRIBUTING.md ================================================ # Contributing to CSVlint.rb The CSVlint library is open source, and contributions are gratefully accepted! Details on how to contribute are below. By participating in this project, you agree to abide by our [Code of Conduct](https://github.com/theodi/csvlint.rb/blob/CODE_OF_CONDUCT.md). Before you start coding, please reach out to us either on our [gitter channel](https://gitter.im/theodi/toolbox) or by tagging a repository administrator on the issue ticket you are interested in contributing towards to indicate your interest in helping. If this is your first time contributing to the ODI’s codebase you will need to [create a fork of this repository](https://help.github.com/articles/fork-a-repo/). Consult our [Getting Started Guide](https://github.com/theodi/toolbox/wiki/Developers-Guide:-Getting-Started) (if necessary) and then follow the [readme instructions](https://github.com/theodi/csvlint.rb/blob/master/README.md#development) to get your Development environment running locally Ensure that the [tests](https://github.com/theodi/csvlint.rb/blob/master/README.md#tests) pass before working on your contribution ## Code Review Process All contributions to the codebase - whether fork or pull request - will be reviewed per the below criteria. To increase your chances of your push being accepted please be aware of the following - Write [well formed commit messages](http://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html) - Follow our [style guide recommendations](https://github.com/theodi/toolbox/blob/README.md#code-style-guide) - Write tests for all changes (additions or refactors of existing code). - Of the github integrations we use two will be utilised to check appraise your contribution. In order of priority these are - Travis ensures that all tests (existing and additions) pass - Travis/Coveralls ensures that overall test coverage for lines of code meets a certain threshold. If this metric dips below what it previously was for the repository you’re pushing to then your PR will be rejected - Gemnasium ensures dependencies are up to date - Once your PR is published and passes the above checks a repository administrator will review your contribution. Where appropriate comments may be provided and amendments suggested before your PR is merged into Master. - Once your PR is accepted you will be granted push access to the repository you have contributed to! Congratulations on joining our community, you’ll no longer need to work from forks. If you make a contribution to another repository in the Toolbox you will be expected to repeat this process. Read more about that [here](https://github.com/theodi/toolbox/blob/master/README.md#push-access). ## Code Style Guide We follow the same code style conventions as detailed in Github’s [Ruby Style Guide](https://github.com/github/rubocop-github/blob/master/STYLEGUIDE.md) ================================================ FILE: Dockerfile ================================================ FROM ruby:2.5.8-buster # throw errors if Gemfile has been modified since Gemfile.lock RUN bundle config --global frozen 1 WORKDIR /usr/src/app ENV LANG C.UTF-8 COPY ./lib/csvlint/version.rb ./lib/csvlint/ COPY csvlint.gemspec Gemfile Gemfile.lock ./ RUN bundle install COPY ./ ./ CMD ["./bin/csvlint"] ================================================ FILE: Gemfile ================================================ source "https://rubygems.org" # Specify your gem's dependencies in csvlint.rb.gemspec gemspec ================================================ FILE: LICENSE.md ================================================ ##Copyright (c) 2014 The Open Data Institute #MIT License Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ [![Build Status](https://img.shields.io/github/workflow/status/Data-Liberation-Front/csvlint.rb/CI/main)](https://travis-ci.org/theodi/csvlint.rb) [![Dependency Status](https://img.shields.io/librariesio/github/Data-Liberation-Front/csvlint.rb)](https://libraries.io/github/Data-Liberation-Front/csvlint.rb) [![Coverage Status](http://img.shields.io/coveralls/Data-Liberation-Front/csvlint.rb.svg)](https://coveralls.io/r/Data-Liberation-Front/csvlint.rb) [![License](http://img.shields.io/:license-mit-blue.svg)](http://theodi.mit-license.org) [![Badges](http://img.shields.io/:badges-5/5-ff6799.svg)](https://github.com/pikesley/badger) # CSV Lint A ruby gem to support validating CSV files to check their syntax and contents. You can either use this gem within your own Ruby code, or as a standalone command line application ## Summary of features * Validation that checks the structural formatting of a CSV file * Validation of a delimiter-separated values (dsv) file accesible via URL, File, or an IO-style object (e.g. StringIO) * Validation against [CSV dialects](http://dataprotocols.org/csv-dialect/) * Validation against multiple schema standards; [JSON Table Schema](https://github.com/theodi/csvlint.rb/blob/master/README.md#json-table-schema-support) and [CSV on the Web](https://github.com/theodi/csvlint.rb/blob/master/README.md#csv-on-the-web-validation-support) ## Development `ruby version 4.0` ### Tests The codebase includes both rspec and cucumber tests, which can be run together using: $ rake or separately: $ rake spec $ rake features When the cucumber tests are first run, a script will create tests based on the latest version of the [CSV on the Web test suite](http://w3c.github.io/csvw/tests/), including creating a local cache of the test files. This requires an internet connection and some patience. Following that download, the tests will run locally; there's also a batch script: $ bin/run-csvw-tests which will run the tests from the command line. If you need to refresh the CSV on the Web tests: $ rm bin/run-csvw-tests $ rm features/csvw_validation_tests.feature $ rm -r features/fixtures/csvw and then run the cucumber tests again or: $ ruby features/support/load_tests.rb ## Installation Add this line to your application's Gemfile: gem 'csvlint' And then execute: $ bundle Or install it yourself as: $ gem install csvlint ## Usage You can either use this gem within your own Ruby code, or as a standalone command line application ## On the command line After installing the gem, you can validate a CSV on the command line like so: csvlint myfile.csv You may need to add the gem exectuable directory to your path, by adding '/usr/local/lib/ruby/gems/2.6.0/bin' or whatever your version is, to your .bash_profile PATH entry. [like so](https://stackoverflow.com/questions/2392293/ruby-gems-returns-command-not-found) You will then see the validation result, together with any warnings or errors e.g. ``` myfile.csv is INVALID 1. blank_rows. Row: 3 1. title_row. 2. inconsistent_values. Column: 14 ``` You can also optionally pass a schema file like so: csvlint myfile.csv --schema=schema.json ## Via pre-commit Add to your .pre-commit-config.yaml file : ``` repos: # `pre-commit autoupdate` to get latest available tags - repo: https://github.com/Data-Liberation-Front/csvlint.rb rev: v1.2.0 hooks: - id: csvlint ``` `pre-commit install` to enable it on your repository. To force a manual run of [pre-commit](https://pre-commit.com/) use the command : ``` pre-commit run -a ``` ## In your own Ruby code Currently the gem supports retrieving a CSV accessible from a URL, File, or an IO-style object (e.g. StringIO) require 'csvlint' validator = Csvlint::Validator.new( "http://example.org/data.csv" ) validator = Csvlint::Validator.new( File.new("/path/to/my/data.csv" )) validator = Csvlint::Validator.new( StringIO.new( my_data_in_a_string ) ) When validating from a URL the range of errors and warnings is wider as the library will also check HTTP headers for best practices #invoke the validation validator.validate #check validation status validator.valid? #access array of errors, each is an Csvlint::ErrorMessage object validator.errors #access array of warnings validator.warnings #access array of information messages validator.info_messages #get some information about the CSV file that was validated validator.encoding validator.content_type validator.extension validator.row_count #retrieve HTTP headers from request validator.headers ## Controlling CSV Parsing The validator supports configuration of the [CSV Dialect](http://dataprotocols.org/csv-dialect/) used in a data file. This is specified by passing a dialect hash to the constructor: dialect = { "header" => true, "delimiter" => "," } validator = Csvlint::Validator.new( "http://example.org/data.csv", dialect ) The options should be a Hash that conforms to the [CSV Dialect](http://dataprotocols.org/csv-dialect/) JSON structure. While these options configure the parser to correctly process the file, the validator will still raise errors or warnings for CSV structure that it considers to be invalid, e.g. a missing header or different delimiters. Note that the parser will also check for a `header` parameter on the `Content-Type` header returned when fetching a remote CSV file. As specified in [RFC 4180](http://www.ietf.org/rfc/rfc4180.txt) the values for this can be `present` and `absent`, e.g: Content-Type: text/csv; header=present ## Error Reporting The validator provides feedback on a validation result using instances of `Csvlint::ErrorMessage`. Errors are divided into errors, warnings and information messages. A validation attempt is successful if there are no errors. Messages provide context including: * `category` has a symbol that indicates the category or error/warning: `:structure` (well-formedness issues), `:schema` (schema validation), `:context` (publishing metadata, e.g. content type) * `type` has a symbol that indicates the type of error or warning being reported * `row` holds the line number of the problem * `column` holds the column number of the issue * `content` holds the contents of the row that generated the error or warning ## Errors The following types of error can be reported: * `:wrong_content_type` -- content type is not `text/csv` * `:ragged_rows` -- row has a different number of columns (than the first row in the file) * `:blank_rows` -- completely empty row, e.g. blank line or a line where all column values are empty * `:invalid_encoding` -- encoding error when parsing row, e.g. because of invalid characters * `:not_found` -- HTTP 404 error when retrieving the data * `:stray_quote` -- missing or stray quote * `:unclosed_quote` -- unclosed quoted field * `:whitespace` -- a quoted column has leading or trailing whitespace * `:line_breaks` -- line breaks were inconsistent or incorrectly specified ## Warnings The following types of warning can be reported: * `:no_encoding` -- the `Content-Type` header returned in the HTTP request does not have a `charset` parameter * `:encoding` -- the character set is not UTF-8 * `:no_content_type` -- file is being served without a `Content-Type` header * `:excel` -- no `Content-Type` header and the file extension is `.xls` * `:check_options` -- CSV file appears to contain only a single column * `:inconsistent_values` -- inconsistent values in the same column. Reported if <90% of values seem to have same data type (either numeric or alphanumeric including punctuation) * `:empty_column_name` -- a column in the CSV header has an empty name * `:duplicate_column_name` -- a column in the CSV header has a duplicate name * `:title_row` -- if there appears to be a title field in the first row of the CSV ## Information Messages There are also information messages available: * `:nonrfc_line_breaks` -- uses non-CRLF line breaks, so doesn't conform to RFC4180. * `:assumed_header` -- the validator has assumed that a header is present ## Schema Validation The library supports validating data against a schema. A schema configuration can be provided as a Hash or parsed from JSON. The structure currently follows JSON Table Schema with some extensions and rudinmentary [CSV on the Web Metadata](http://www.w3.org/TR/tabular-metadata/). An example JSON Table Schema schema file is: { "fields": [ { "name": "id", "constraints": { "required": true, "type": "http://www.w3.org/TR/xmlschema-2/#integer" } }, { "name": "price", "constraints": { "required": true, "minLength": 1 } }, { "name": "postcode", "constraints": { "required": true, "pattern": "[A-Z]{1,2}[0-9][0-9A-Z]? ?[0-9][A-Z]{2}" } } ] } An equivalent CSV on the Web Metadata file is: { "@context": "http://www.w3.org/ns/csvw", "url": "http://example.com/example1.csv", "tableSchema": { "columns": [ { "name": "id", "required": true, "datatype": { "base": "integer" } }, { "name": "price", "required": true, "datatype": { "base": "string", "minLength": 1 } }, { "name": "postcode", "required": true } ] } } Parsing and validating with a schema (of either kind): schema = Csvlint::Schema.load_from_json(uri) validator = Csvlint::Validator.new( "http://example.org/data.csv", nil, schema ) ### CSV on the Web Validation Support This gem passes all the validation tests in the [official CSV on the Web test suite](http://w3c.github.io/csvw/tests/) (though there might still be errors or parts of the [CSV on the Web standard](http://www.w3.org/TR/tabular-metadata/) that aren't tested by that test suite). ### JSON Table Schema Support Supported constraints: * `required` -- there must be a value for this field in every row * `unique` -- the values in every row should be unique * `minLength` -- minimum number of characters in the value * `maxLength` -- maximum number of characters in the value * `pattern` -- values must match the provided regular expression * `type` -- specifies an XML Schema data type. Values of the column must be a valid value for that type * `minimum` -- specify a minimum range for values, the value will be parsed as specified by `type` * `maximum` -- specify a maximum range for values, the value will be parsed as specified by `type` * `datePattern` -- specify a `strftime` compatible date pattern to be used when parsing date values and min/max constraints Supported data types (this is still a work in progress): * String -- `http://www.w3.org/2001/XMLSchema#string` (effectively a no-op) * Integer -- `http://www.w3.org/2001/XMLSchema#integer` or `http://www.w3.org/2001/XMLSchema#int` * Float -- `http://www.w3.org/2001/XMLSchema#float` * Double -- `http://www.w3.org/2001/XMLSchema#double` * URI -- `http://www.w3.org/2001/XMLSchema#anyURI` * Boolean -- `http://www.w3.org/2001/XMLSchema#boolean` * Non Positive Integer -- `http://www.w3.org/2001/XMLSchema#nonPositiveInteger` * Positive Integer -- `http://www.w3.org/2001/XMLSchema#positiveInteger` * Non Negative Integer -- `http://www.w3.org/2001/XMLSchema#nonNegativeInteger` * Negative Integer -- `http://www.w3.org/2001/XMLSchema#negativeInteger` * Date -- `http://www.w3.org/2001/XMLSchema#date` * Date Time -- `http://www.w3.org/2001/XMLSchema#dateTime` * Year -- `http://www.w3.org/2001/XMLSchema#gYear` * Year Month -- `http://www.w3.org/2001/XMLSchema#gYearMonth` * Time -- `http://www.w3.org/2001/XMLSchema#time` Use of an unknown data type will result in the column failing to validate. Schema validation provides some additional types of error and warning messages: * `:missing_value` (error) -- a column marked as `required` in the schema has no value * `:min_length` (error) -- a column with a `minLength` constraint has a value that is too short * `:max_length` (error) -- a column with a `maxLength` constraint has a value that is too long * `:pattern` (error) -- a column with a `pattern` constraint has a value that doesn't match the regular expression * `:malformed_header` (warning) -- the header in the CSV doesn't match the schema * `:missing_column` (warning) -- a row in the CSV file has a missing column, that is specified in the schema. This is a warning only, as it may be legitimate * `:extra_column` (warning) -- a row in the CSV file has extra column. * `:unique` (error) -- a column with a `unique` constraint contains non-unique values * `:below_minimum` (error) -- a column with a `minimum` constraint contains a value that is below the minimum * `:above_maximum` (error) -- a column with a `maximum` constraint contains a value that is above the maximum ### Other validation options You can also provide an optional options hash as the fourth argument to Validator#new. Supported options are: * :limit_lines -- only check this number of lines of the CSV file. Good for a quick check on huge files. ``` options = { limit_lines: 100 } validator = Csvlint::Validator.new( "http://example.org/data.csv", nil, nil, options ) ``` * :lambda -- Pass a block of code to be called when each line is validated, this will give you access to the `Validator` object. For example, this will return the current line number for every line validated: ``` options = { lambda: ->(validator) { puts validator.current_line } } validator = Csvlint::Validator.new( "http://example.org/data.csv", nil, nil, options ) => 1 2 3 4 ..... ``` ================================================ FILE: Rakefile ================================================ require "bundler/gem_tasks" $:.unshift File.join(File.dirname(__FILE__), "lib") require "rubygems" require "cucumber" require "cucumber/rake/task" require "coveralls/rake/task" require "rspec/core/rake_task" RSpec::Core::RakeTask.new(:spec) Coveralls::RakeTask.new Cucumber::Rake::Task.new(:features) do |t| t.cucumber_opts = "features --format pretty" end task default: [:spec, :features, "coveralls:push"] ================================================ FILE: bin/create_schema ================================================ #!/usr/bin/env ruby $:.unshift File.join( File.dirname(__FILE__), "..", "lib") require 'csvlint' begin puts ARGV[0] csv = CSV.new( URI.open(ARGV[0]) ) headers = csv.shift name = File.basename( ARGV[0] ) schema = { "title" => name, "description" => "Auto generated schema for #{name}", "fields" => [] } headers.each do |name| schema["fields"] << { "name" => name, "title" => "", "description" => "", "constraints" => {} } end $stdout.puts JSON.pretty_generate(schema) rescue => e puts e puts e.backtrace puts "Unable to parse CSV file" end ================================================ FILE: bin/csvlint ================================================ #!/usr/bin/env ruby $:.unshift File.join( File.dirname(__FILE__), "..", "lib") require 'csvlint/cli' if ARGV == ["help"] Csvlint::Cli.start(["help"]) else Csvlint::Cli.start(ARGV.unshift("validate")) end ================================================ FILE: csvlint.gemspec ================================================ lib = File.expand_path("../lib", __FILE__) $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) require "csvlint/version" Gem::Specification.new do |spec| spec.name = "csvlint" spec.version = Csvlint::VERSION spec.authors = ["pezholio"] spec.email = ["pezholio@gmail.com"] spec.description = "CSV Validator" spec.summary = "CSV Validator" spec.homepage = "https://github.com/theodi/csvlint.rb" spec.license = "MIT" spec.files = `git ls-files`.split($/) spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) } spec.require_paths = ["lib"] spec.required_ruby_version = [">= 2.5", "< 4.1"] spec.add_dependency "csv" spec.add_dependency "rainbow" spec.add_dependency "open_uri_redirections" spec.add_dependency "activesupport" spec.add_dependency "addressable" spec.add_dependency "typhoeus" spec.add_dependency "escape_utils" spec.add_dependency "uri_template" spec.add_dependency "thor" spec.add_dependency "rack" spec.add_dependency "net-http-persistent" spec.add_dependency "mutex_m" # For Ruby 3.4+ spec.add_development_dependency "bundler", ">= 1.3" spec.add_development_dependency "rake" spec.add_development_dependency "cucumber" spec.add_development_dependency "simplecov" spec.add_development_dependency "simplecov-rcov" spec.add_development_dependency "spork" spec.add_development_dependency "webmock" spec.add_development_dependency "rspec" spec.add_development_dependency "rspec-pride" spec.add_development_dependency "rspec-expectations" spec.add_development_dependency "coveralls_reborn" spec.add_development_dependency "byebug" spec.add_development_dependency "github_changelog_generator" spec.add_development_dependency "aruba" spec.add_development_dependency "rdf", "< 4.0" spec.add_development_dependency "rdf-turtle" spec.add_development_dependency "standardrb" spec.add_development_dependency "appraisal" spec.add_development_dependency "benchmark" end ================================================ FILE: docker_notes_for_windows.txt ================================================ # Note that these commands are specific for a docker environment on MS Windows. # to generate Gemfile.lock file docker run --rm -v %CD%:/usr/src/app -w /usr/src/app ruby:2.5 bundle install # to build docker image from source (the ending dot is significant) docker build -t csvlint . # to run tests docker run -it --rm csvlint rake # to run csvlint command line with a CSV file. # cd to the directory with the CSV file then docker run -it --rm -v %CD%:/tmp csvlint ./bin/csvlint --dump-errors /tmp/file-to-lint.csv # to enter the linux container docker run -it --rm -v %CD%:/tmp csvlint bash # to enter the ruby REPL docker run -it --rm -v %CD%:/tmp csvlint irb ================================================ FILE: features/check_format.feature ================================================ Feature: Check inconsistent formatting Scenario: Inconsistent formatting for integers Given I have a CSV with the following content: """ "1","2","3" "Foo","5","6" "3","2","1" "3","2","1" """ And it is stored at the url "http://example.com/example1.csv" And I ask if there are warnings Then there should be 1 warnings And that warning should have the type "inconsistent_values" And that warning should have the column "1" Scenario: Inconsistent formatting for alpha fields Given I have a CSV with the following content: """ "Foo","Bar","Baz" "Biz","1","Baff" "Boff","Giff","Goff" "Boff","Giff","Goff" """ And it is stored at the url "http://example.com/example1.csv" And I ask if there are warnings Then there should be 1 warnings And that warning should have the type "inconsistent_values" And that warning should have the column "2" Scenario: Inconsistent formatting for alphanumeric fields Given I have a CSV with the following content: """ "Foo 123","Bar","Baz" "1","Bar","Baff" "Boff 432423","Giff","Goff" "Boff444","Giff","Goff" """ And it is stored at the url "http://example.com/example1.csv" And I ask if there are warnings Then there should be 1 warnings And that warning should have the type "inconsistent_values" And that warning should have the column "1" ================================================ FILE: features/cli.feature ================================================ Feature: CSVlint CLI Scenario: Valid CSV from url Given I have a CSV with the following content: """ "Foo","Bar","Baz" "1","2","3" "3","2","1" """ And it is stored at the url "http://example.com/example1.csv" When I run `csvlint http://example.com/example1.csv` Then the output should contain "http://example.com/example1.csv is VALID" Scenario: Valid CSV from file When I run `csvlint ../../features/fixtures/valid.csv` Then the output should contain "valid.csv is VALID" # This is a hacky way of saying to run `cat features/fixtures/valid.csv | csvlint` Scenario: Valid CSV from pipe Given I have stubbed stdin to contain "features/fixtures/valid.csv" When I run `csvlint` Then the output should contain "CSV is VALID" Scenario: URL that 404s Given there is no file at the url "http://example.com/example1.csv" And there is no file at the url "http://example.com/.well-known/csvm" And there is no file at the url "http://example.com/example1.csv-metadata.json" And there is no file at the url "http://example.com/csv-metadata.json" When I run `csvlint http://example.com/example1.csv` Then the output should contain "http://example.com/example1.csv is INVALID" And the output should contain "not_found" Scenario: File doesn't exist When I run `csvlint ../../features/fixtures/non-existent-file.csv` Then the output should contain "non-existent-file.csv not found" Scenario: No file or URL specified Given I have stubbed stdin to contain nothing When I run `csvlint` Then the output should contain "No CSV data to validate" Scenario: No file or URL specified, but schema specified Given I have stubbed stdin to contain nothing And I have a schema with the following content: """ { "fields": [ { "name": "Name", "constraints": { "required": true } }, { "name": "Id", "constraints": { "required": true, "minLength": 1 } }, { "name": "Email", "constraints": { "required": true } } ] } """ And the schema is stored at the url "http://example.com/schema.json" When I run `csvlint --schema http://example.com/schema.json` Then the output should contain "No CSV data to validate" Scenario: Invalid CSV from url Given I have a CSV with the following content: """ "Foo", "Bar" , "Baz" """ And it is stored at the url "http://example.com/example1.csv" When I run `csvlint http://example.com/example1.csv` Then the output should contain "http://example.com/example1.csv is INVALID" And the output should contain "whitespace" Scenario: Invalid CSV from url with JSON Given I have a CSV with the following content: """ "Foo", "Bar" , "Baz" """ And it is stored at the url "http://example.com/example1.csv" When I run `csvlint http://example.com/example1.csv --json` Then the output should contain JSON And the JSON should have a state of "invalid" And the JSON should have 1 error And that error should have the "type" "whitespace" And that error should have the "category" "structure" And that error should have the "row" "1" Scenario: Specify schema Given I have a CSV with the following content: """ "Bob","1234","bob@example.org" "Alice","5","alice@example.com" """ And it is stored at the url "http://example.com/example1.csv" And I have a schema with the following content: """ { "fields": [ { "name": "Name", "constraints": { "required": true } }, { "name": "Id", "constraints": { "required": true, "minLength": 1 } }, { "name": "Email", "constraints": { "required": true } } ] } """ And the schema is stored at the url "http://example.com/schema.json" When I run `csvlint http://example.com/example1.csv --schema http://example.com/schema.json` Then the output should contain "http://example.com/example1.csv is VALID" Scenario: Schema errors Given I have a CSV with the following content: """ "Bob","1234","bob@example.org" "Alice","5","alice@example.com" """ And it is stored at the url "http://example.com/example1.csv" And I have a schema with the following content: """ { "fields": [ { "name": "Name", "constraints": { "required": true } }, { "name": "Id", "constraints": { "required": true, "minLength": 3 } }, { "name": "Email", "constraints": { "required": true } } ] } """ And the schema is stored at the url "http://example.com/schema.json" When I run `csvlint http://example.com/example1.csv --schema http://example.com/schema.json` Then the output should contain "http://example.com/example1.csv is INVALID" And the output should contain "1. Id: min_length. Row: 2,2. 5" And the output should contain "1. malformed_header. Row: 1. Bob,1234,bob@example.org" Scenario: Schema errors with JSON Given I have a CSV with the following content: """ "Bob","1234","bob@example.org" "Alice","5","alice@example.com" """ And it is stored at the url "http://example.com/example1.csv" And I have a schema with the following content: """ { "fields": [ { "name": "Name", "constraints": { "required": true } }, { "name": "Id", "constraints": { "required": true, "minLength": 3 } }, { "name": "Email", "constraints": { "required": true } } ] } """ And the schema is stored at the url "http://example.com/schema.json" When I run `csvlint http://example.com/example1.csv --schema http://example.com/schema.json --json` Then the output should contain JSON And the JSON should have a state of "invalid" And the JSON should have 1 error And error 1 should have the "type" "min_length" And error 1 should have the "header" "Id" And error 1 should have the constraint "min_length" "3" Scenario: Invalid schema Given I have a CSV with the following content: """ "Bob","1234","bob@example.org" "Alice","5","alice@example.com" """ And it is stored at the url "http://example.com/example1.csv" And I have a schema with the following content: """ NO JSON HERE SON """ And the schema is stored at the url "http://example.com/schema.json" Then nothing should be outputted to STDERR When I run `csvlint http://example.com/example1.csv --schema http://example.com/schema.json` And the output should contain "invalid metadata: malformed JSON" Scenario: Schema that 404s Given I have a CSV with the following content: """ "Bob","1234","bob@example.org" "Alice","5","alice@example.com" """ And it is stored at the url "http://example.com/example1.csv" And there is no file at the url "http://example.com/schema404.json" When I run `csvlint http://example.com/example1.csv --schema http://example.com/schema404.json` Then the output should contain "http://example.com/schema404.json not found" Scenario: Schema that doesn't exist Given I have a CSV with the following content: """ "Bob","1234","bob@example.org" "Alice","5","alice@example.com" """ And it is stored at the url "http://example.com/example1.csv" When I run `csvlint http://example.com/example1.csv --schema /fake/file/path.json` Then the output should contain "/fake/file/path.json not found" Scenario: Valid CSVw schema Given I have a CSV with the following content: """ "Bob","1234","bob@example.org" "Alice","5","alice@example.com" """ And it is stored at the url "http://example.com/example1.csv" And I have metadata with the following content: """ { "@context": "http://www.w3.org/ns/csvw", "url": "http://example.com/example1.csv", "dialect": { "header": false }, "tableSchema": { "columns": [ { "name": "Name", "required": true }, { "name": "Id", "required": true, "datatype": { "base": "string", "minLength": 1 } }, { "name": "Email", "required": true } ] } } """ And the schema is stored at the url "http://example.com/schema.json" When I run `csvlint http://example.com/example1.csv --schema http://example.com/schema.json` Then the output should contain "http://example.com/example1.csv is VALID" Scenario: CSVw schema with invalid CSV Given I have a CSV with the following content: """ "Bob","1234","bob@example.org" "Alice","5","alice@example.com" """ And it is stored at the url "http://example.com/example1.csv" And I have metadata with the following content: """ { "@context": "http://www.w3.org/ns/csvw", "url": "http://example.com/example1.csv", "dialect": { "header": false }, "tableSchema": { "columns": [ { "name": "Name", "required": true }, { "name": "Id", "required": true, "datatype": { "base": "string", "minLength": 3 } }, { "name": "Email", "required": true } ] } } """ And the schema is stored at the url "http://example.com/schema.json" When I run `csvlint http://example.com/example1.csv --schema http://example.com/schema.json` Then the output should contain "http://example.com/example1.csv is INVALID" And the output should contain "1. min_length. Row: 2,2. 5" Scenario: CSVw table Schema Given I have stubbed stdin to contain nothing And I have a metadata file called "csvw/countries.json" And the metadata is stored at the url "http://w3c.github.io/csvw/tests/countries.json" And I have a file called "csvw/countries.csv" at the url "http://w3c.github.io/csvw/tests/countries.csv" And I have a file called "csvw/country_slice.csv" at the url "http://w3c.github.io/csvw/tests/country_slice.csv" When I run `csvlint --schema http://w3c.github.io/csvw/tests/countries.json` Then the output should contain "http://w3c.github.io/csvw/tests/countries.csv is VALID" And the output should contain "http://w3c.github.io/csvw/tests/country_slice.csv is VALID" ================================================ FILE: features/csv_options.feature ================================================ Feature: CSV options Scenario: Sucessfully parse a valid CSV Given I have a CSV with the following content: """ 'Foo';'Bar';'Baz' '1';'2';'3' '3';'2';'1' """ And I set the delimiter to ";" And I set quotechar to "'" And it is stored at the url "http://example.com/example1.csv" When I ask if the CSV is valid Then I should get the value of true Scenario: Warn if options seem to return invalid data Given I have a CSV with the following content: """ 'Foo';'Bar';'Baz' '1';'2';'3' '3';'2';'1' """ And I set the delimiter to "," And I set quotechar to """ And it is stored at the url "http://example.com/example1.csv" And I ask if there are warnings Then there should be 1 warnings And that warning should have the type "check_options" Scenario: Use esoteric line endings Given I have a CSV file called "windows-line-endings.csv" And it is stored at the url "http://example.com/example1.csv" When I ask if the CSV is valid Then I should get the value of true ================================================ FILE: features/csvupload.feature ================================================ Feature: Collect all the tests that should trigger dialect check related errors Scenario: Title rows, I wish to trigger a :title_row type message Given I have a CSV file called "title-row.csv" And it is stored at the url "http://example.com/example1.csv" And I ask if there are warnings Then there should be 1 warnings And that warning should have the type "title_row" # :nonrfc_line_breaks Scenario: LF line endings in file give an info message of type :nonrfc_line_breaks Given I have a CSV file called "lf-line-endings.csv" And it is stored at the url "http://example.com/example1.csv" And I set header to "true" And I ask if there are info messages Then there should be 1 info message And one of the messages should have the type "nonrfc_line_breaks" Scenario: CRLF line endings in file produces no info messages of type :nonrfc_line_breaks Given I have a CSV file called "crlf-line-endings.csv" And it is stored at the url "http://example.com/example1.csv" And I set header to "true" And I ask if there are info messages Then there should be 0 info messages # :line_breaks Scenario: Incorrect line endings specified in settings Given I have a CSV file called "lf-line-endings.csv" And I set the line endings to carriage return And it is stored at the url "http://example.com/example1.csv" And I ask if there are errors Then there should be 1 error And that error should have the type "line_breaks" Scenario: inconsistent line endings in file cause an error Given I have a CSV file called "inconsistent-line-endings.csv" And it is stored at the url "http://example.com/example1.csv" And I ask if there are errors Then there should be 1 error And that error should have the type "line_breaks" Scenario: inconsistent line endings with unquoted fields in file cause an error Given I have a CSV file called "inconsistent-line-endings-unquoted.csv" And it is stored at the url "http://example.com/example1.csv" And I ask if there are errors Then there should be 1 error And that error should have the type "line_breaks" #:unclosed_quote Scenario: CSV with incorrect quoting Given I have a CSV with the following content: """ "col1","col2","col3" "Foo","Bar","Baz """ And it is stored at the url "http://example.com/example1.csv" When I ask if there are errors Then there should be 1 error And that error should have the type "unclosed_quote" And that error should have the row "2" And that error should have the content ""Foo","Bar","Baz" # :invalid_encoding Scenario: Report invalid Encoding Given I have a CSV file called "invalid-byte-sequence.csv" And I set an encoding header of "UTF-8" And it is stored at the url "http://example.com/example1.csv" When I ask if there are errors Then there should be 1 error And that error should have the type "invalid_encoding" Scenario: Report invalid file #should this throw an excel error? Given I have a CSV file called "spreadsheet.xls" And it is stored at the url "http://example.com/example1.csv" When I ask if there are errors Then there should be 1 error And that error should have the type "invalid_encoding" # :blank_rows Scenario: Successfully report a CSV with blank rows Given I have a CSV with the following content: """ "col1","col2","col3" "Foo","Bar","Baz" "","", "Baz","Bar","Foo" """ And it is stored at the url "http://example.com/example1.csv" When I ask if there are errors Then there should be 1 error And that error should have the type "blank_rows" And that error should have the row "3" And that error should have the content """,""," Scenario: Successfully report a CSV with multiple trailing empty rows Given I have a CSV with the following content: """ "col1","col2","col3" "Foo","Bar","Baz" "Foo","Bar","Baz" """ And it is stored at the url "http://example.com/example1.csv" When I ask if there are errors Then there should be 1 error And that error should have the type "blank_rows" And that error should have the row "4" Scenario: Successfully report a CSV with an empty row Given I have a CSV with the following content: """ "col1","col2","col3" "Foo","Bar","Baz" "Foo","Bar","Baz" """ And it is stored at the url "http://example.com/example1.csv" When I ask if there are errors Then there should be 1 error And that error should have the type "blank_rows" And that error should have the row "3" #:check_options Scenario: Warn if options seem to return invalid data Given I have a CSV with the following content: """ 'Foo';'Bar';'Baz' '1';'2';'3' '3';'2';'1' """ And I set the delimiter to "," And I set quotechar to """ And it is stored at the url "http://example.com/example1.csv" And I ask if there are warnings Then there should be 1 warnings And that warning should have the type "check_options" ================================================ FILE: features/csvw_schema_validation.feature ================================================ Feature: CSVW Schema Validation Scenario: Valid CSV Given I have a CSV with the following content: """ "Bob","1234","bob@example.org" "Alice","5","alice@example.com" """ And it is stored at the url "http://example.com/example1.csv" And I have metadata with the following content: """ { "@context": "http://www.w3.org/ns/csvw", "url": "http://example.com/example1.csv", "dialect": { "header": false }, "tableSchema": { "columns": [ { "name": "Name", "required": true }, { "name": "Id", "required": true, "datatype": { "base": "string", "minLength": 1 } }, { "name": "Email", "required": true } ] } } """ When I ask if there are errors Then there should be 0 error Scenario: Schema invalid CSV Given I have a CSV with the following content: """ "Bob","1234","bob@example.org" "Alice","5","alice@example.com" """ And it is stored at the url "http://example.com/example1.csv" And I have metadata with the following content: """ { "@context": "http://www.w3.org/ns/csvw", "url": "http://example.com/example1.csv", "dialect": { "header": false }, "tableSchema": { "columns": [ { "name": "Name", "required": true }, { "name": "Id", "required": true, "datatype": { "base": "string", "minLength": 3 } }, { "name": "Email", "required": true } ] } } """ When I ask if there are errors Then there should be 1 error Scenario: CSV with incorrect header Given I have a CSV with the following content: """ "name","id","contact" "Bob","1234","bob@example.org" "Alice","5","alice@example.com" """ And it is stored at the url "http://example.com/example1.csv" And I have metadata with the following content: """ { "@context": "http://www.w3.org/ns/csvw", "url": "http://example.com/example1.csv", "tableSchema": { "columns": [ { "titles": "name", "required": true }, { "titles": "id", "required": true, "datatype": { "base": "string", "minLength": 1 } }, { "titles": "email", "required": true } ] } } """ When I ask if there are errors Then there should be 1 error Scenario: Schema with valid regex Given I have a CSV with the following content: """ "firstname","id","email" "Bob","1234","bob@example.org" "Alice","5","alice@example.com" """ And it is stored at the url "http://example.com/example1.csv" And I have metadata with the following content: """ { "@context": "http://www.w3.org/ns/csvw", "url": "http://example.com/example1.csv", "tableSchema": { "columns": [ { "titles": "firstname", "required": true, "datatype": { "base": "string", "format": "^[A-Za-z0-9_]*$" } }, { "titles": "id", "required": true, "datatype": { "base": "string", "minLength": 1 } }, { "titles": "email", "required": true } ] } } """ When I ask if there are warnings Then there should be 0 warnings Scenario: Schema with invalid regex Given I have a CSV with the following content: """ "firstname","id","email" "Bob","1234","bob@example.org" "Alice","5","alice@example.com" """ And it is stored at the url "http://example.com/example1.csv" And I have metadata with the following content: """ { "@context": "http://www.w3.org/ns/csvw", "url": "http://example.com/example1.csv", "tableSchema": { "columns": [ { "titles": "firstname", "required": true, "datatype": { "base": "string", "format": "((" } }, { "titles": "id", "required": true, "datatype": { "base": "string", "minLength": 1 } }, { "titles": "email", "required": true } ] } } """ When I ask if there are warnings Then there should be 1 warnings And that warning should have the type "invalid_regex" ================================================ FILE: features/fixtures/cr-line-endings.csv ================================================ "Foo","Bar","Baz" "Biff","Baff","Boff" "Qux","Teaspoon","Doge" ================================================ FILE: features/fixtures/crlf-line-endings.csv ================================================ "Foo","Bsr","Baz" "Biff","Baff","Boff" "Qux","Teaspoon","Doge" ================================================ FILE: features/fixtures/inconsistent-line-endings-unquoted.csv ================================================ Foo,Bsr,Baz Biff,Baff,Boff Qux,Teaspoon,Doge ================================================ FILE: features/fixtures/inconsistent-line-endings.csv ================================================ "Foo","Bsr","Baz" "Biff","Baff","Boff" "Qux","Teaspoon","Doge" ================================================ FILE: features/fixtures/invalid-byte-sequence.csv ================================================ "Data","Dependencia Origem","Histrico","Data do Balancete","Nmero do documento","Valor", "10/31/2012","","Saldo Anterior","","0","100.00", "11/01/2012","0000-9","Transferncia on line - 01/11 4885 256620-6 XXXXXXXXXXXXX","","224885000256620","100.00", "11/01/2012","","Depsito COMPE - 033 0502 27588602104 XXXXXXXXXXXXXX","","101150","100.00", "11/01/2012","","Proventos","","496774","1000.00", "11/01/2012","","Benefcio","","496775","100.00", "11/01/2012","0000-0","Compra com Carto - 01/11 09:45 XXXXXXXXXXX","","135102","-1.00", "11/01/2012","0000-0","Compra com Carto - 01/11 09:48 XXXXXXXXXXX","","235338","-10.00", "11/01/2012","0000-0","Compra com Carto - 01/11 12:35 XXXXXXXX","","345329","-10.00", "11/01/2012","0000-0","Compra com Carto - 01/11 23:57 XXXXXXXXXXXXXXXX","","686249","-10.00", "11/01/2012","0000-0","Saque com carto - 01/11 13:17 XXXXXXXXXXXXXXXX","","11317296267021","-10.00", "11/01/2012","","Pagto conta telefone - VIVO DF","","110101","-100.00", "11/01/2012","","Cobrana de I.O.F.","","391100701","-1.00", "11/05/2012","0000-0","Compra com Carto - 02/11 16:57 XXXXXXXXXXXX","","161057","-10.00", "11/05/2012","0000-0","Compra com Carto - 03/11 18:57 XXXXXXXXXXXXXXX","","168279","-10.00", "11/05/2012","0000-0","Compra com Carto - 05/11 12:32 XXXXXXXXXXXXXXXXX","","245166","-10.00", "11/05/2012","0000-0","Compra com Carto - 02/11 17:18 XXXXXXXXXXXXX","","262318","-1.00", "11/05/2012","0000-0","Compra com Carto - 02/11 22:46 XXXXXXXXXXX","","382002","-100.00", "11/05/2012","0000-0","Compra com Carto - 02/11 23:19 XXXXXXXXXXX","","683985","-1.00", "11/05/2012","0000-0","Compra com Carto - 03/11 01:19 XXXXXXXXXXXXXXXX","","704772","-10.00", "11/05/2012","0000-0","Compra com Carto - 03/11 11:08 XXXXXXXX","","840112","-1.00", "11/05/2012","0000-0","Saque com carto - 05/11 19:24 XXXXXXXXXXXXXXXXX","","51924256267021","-10.00", "11/05/2012","0000-0","Transferncia on line - 05/11 4885 256620-6 XXXXXXXXXXXXX","","224885000256620","-100.00", "11/05/2012","","Pagamento de Ttulo - XXXXXXXXXXXXXXXXXXX","","110501","-100.00", ================================================ FILE: features/fixtures/invalid_many_rows.csv ================================================ "Foo","Bar","Baz" "1","2","3" "3","2","1" "1","2","3" " "3","two","1" "1","2","3" "3","2","1" "3","2","1" "3","2","1" "","","" "3","2","1" ================================================ FILE: features/fixtures/lf-line-endings.csv ================================================ "Foo","Bsr","Baz" "Biff","Baff","Boff" "Qux","Teaspoon","Doge" ================================================ FILE: features/fixtures/title-row.csv ================================================ "This is a title row",, "Foo","Bsr","Baz" "Biff","Baff","Boff" "Qux","Teaspoon","Doge" ================================================ FILE: features/fixtures/valid.csv ================================================ "Foo","Bar","Baz" "1","2","3" "3","2","1" ================================================ FILE: features/fixtures/valid_many_rows.csv ================================================ "Foo","Bar","Baz" "1","2","3" "3","2","1" "1","2","3" "3","2","1" "1","2","3" "3","2","1" ================================================ FILE: features/fixtures/w3.org/.well-known/csvm ================================================ {+url}-metadata.json csv-metadata.json {+url}.json csvm.json ================================================ FILE: features/fixtures/white space in filename.csv ================================================ "Foo","Bar","Baz" "1","2","3" "3","2","1" ================================================ FILE: features/fixtures/windows-line-endings.csv ================================================ a,b,c d,e,f ================================================ FILE: features/information.feature ================================================ Feature: Return information Background: Given I have a CSV with the following content: """ "abc","2","3" """ And it is encoded as "utf-8" And the content type is "text/csv" And it is stored at the url "http://example.com/example1.csv?query=true" Scenario: Return encoding Then the "encoding" should be "UTF-8" Scenario: Return content type Then the "content_type" should be "text/csv; charset=utf-8" Scenario: Return extension Then the "extension" should be ".csv" Scenario: Return meta Then the metadata content type should be "text/csv; charset=utf-8" ================================================ FILE: features/parse_csv.feature ================================================ Feature: Parse CSV Scenario: Successfully parse a valid CSV Given I have a CSV with the following content: """ "Foo","Bar","Baz" "1","2","3" "3","2","1" """ And it is stored at the url "http://example.com/example1.csv" When I ask if the CSV is valid Then I should get the value of true Scenario: Successfully parse a CSV with newlines in quoted fields Given I have a CSV with the following content: """ "a","b","c" "d","e","this is valid" "a","b","c" """ And it is stored at the url "http://example.com/example1.csv" When I ask if the CSV is valid Then I should get the value of true Scenario: Successfully parse a CSV with multiple newlines in quoted fields Given I have a CSV with the following content: """ "a","b","c" "d","this is valid","as is this too" """ And it is stored at the url "http://example.com/example1.csv" When I ask if the CSV is valid Then I should get the value of true Scenario: Successfully report an invalid CSV Given I have a CSV with the following content: """ "Foo", "Bar" , "Baz """ And it is stored at the url "http://example.com/example1.csv" When I ask if the CSV is valid Then I should get the value of false Scenario: Successfully report a CSV with incorrect quoting Given I have a CSV with the following content: """ "Foo","Bar","Baz """ And it is stored at the url "http://example.com/example1.csv" When I ask if the CSV is valid Then I should get the value of false Scenario: Successfully report a CSV with incorrect whitespace Given I have a CSV with the following content: """ "Foo","Bar", "Baz" """ And it is stored at the url "http://example.com/example1.csv" When I ask if the CSV is valid Then I should get the value of false Scenario: Successfully report a CSV with ragged rows Given I have a CSV with the following content: """ "col1","col2","col2" "1","2","3" "4","5" """ And it is stored at the url "http://example.com/example1.csv" When I ask if the CSV is valid Then I should get the value of false Scenario: Don't class blank values as inconsistencies Given I have a CSV with the following content: """ "col1","col2","col3" "1","2","3" "4","5","6" "","7","8" "9","10","11" "","12","13" "","14","15" "16","17","18" """ And it is stored at the url "http://example.com/example1.csv" When I ask if there are warnings Then there should be 0 warnings ================================================ FILE: features/schema_validation.feature ================================================ Feature: Schema Validation Scenario: Valid CSV Given I have a CSV with the following content: """ "Bob","1234","bob@example.org" "Alice","5","alice@example.com" """ And it is stored at the url "http://example.com/example1.csv" And I have a schema with the following content: """ { "fields": [ { "name": "Name", "constraints": { "required": true } }, { "name": "Id", "constraints": { "required": true, "minLength": 1 } }, { "name": "Email", "constraints": { "required": true } } ] } """ When I ask if there are errors Then there should be 0 error Scenario: Schema invalid CSV Given I have a CSV with the following content: """ "Bob","1234","bob@example.org" "Alice","5","alice@example.com" """ And it is stored at the url "http://example.com/example1.csv" And I have a schema with the following content: """ { "fields": [ { "name": "Name", "constraints": { "required": true } }, { "name": "Id", "constraints": { "required": true, "minLength": 3 } }, { "name": "Email", "constraints": { "required": true } } ] } """ When I ask if there are errors Then there should be 1 error Scenario: CSV with incorrect header Given I have a CSV with the following content: """ "name","id","contact" "Bob","1234","bob@example.org" "Alice","5","alice@example.com" """ And it is stored at the url "http://example.com/example1.csv" And I have a schema with the following content: """ { "fields": [ { "name": "name", "constraints": { "required": true } }, { "name": "id", "constraints": { "required": true, "minLength": 3 } }, { "name": "email", "constraints": { "required": true } } ] } """ When I ask if there are warnings Then there should be 1 warnings Scenario: Schema with valid regex Given I have a CSV with the following content: """ "firstname","id","email" "Bob","1234","bob@example.org" "Alice","5","alice@example.com" """ And it is stored at the url "http://example.com/example1.csv" And I have a schema with the following content: """ { "fields": [ { "name": "Name", "constraints": { "required": true, "pattern": "^[A-Za-z0-9_]*$" } }, { "name": "Id", "constraints": { "required": true, "minLength": 1 } }, { "name": "Email", "constraints": { "required": true } } ] } """ When I ask if there are errors Then there should be 0 error Scenario: Schema with invalid regex Given I have a CSV with the following content: """ "firstname","id","email" "Bob","1234","bob@example.org" "Alice","5","alice@example.com" """ And it is stored at the url "http://example.com/example1.csv" And I have a schema with the following content: """ { "fields": [ { "name": "Name", "constraints": { "required": true, "pattern": "((" } }, { "name": "Id", "constraints": { "required": true, "minLength": 1 } }, { "name": "Email", "constraints": { "required": true } } ] } """ When I ask if there are errors Then there should be 1 error And that error should have the type "invalid_regex" ================================================ FILE: features/sources.feature ================================================ Feature: Parse CSV from Different Sources Scenario: Successfully parse a valid CSV from a StringIO Given I have a CSV with the following content: """ "Foo","Bar","Baz" "1","2","3" "3","2","1" """ And it is parsed as a StringIO When I ask if the CSV is valid Then I should get the value of true Scenario: Successfully parse a valid CSV from a File Given I parse a file called "valid.csv" When I ask if the CSV is valid Then I should get the value of true ================================================ FILE: features/step_definitions/cli_steps.rb ================================================ Given(/^I have stubbed $stdin to contain "(.*?)"$/) do |file| expect($stdin).to receive(:read).and_return(File.read(file)) end Given(/^I have stubbed $stdin to contain nothing$/) do expect($stdin).to receive(:read).and_return(nil) end Then(/^nothing should be outputted to STDERR$/) do expect($stderr).to_not receive(:puts) end Then(/^the output should contain JSON$/) do @json = JSON.parse(all_stdout) expect(@json["validation"]).to be_present end Then(/^the JSON should have a state of "(.*?)"$/) do |state| expect(@json["validation"]["state"]).to eq(state) end Then(/^the JSON should have (\d+) errors?$/) do |count| @index = count.to_i - 1 expect(@json["validation"]["errors"].count).to eq(count.to_i) end Then(/^that error should have the "(.*?)" "(.*?)"$/) do |k, v| expect(@json["validation"]["errors"][@index][k].to_s).to eq(v) end Then(/^error (\d+) should have the "(.*?)" "(.*?)"$/) do |index, k, v| expect(@json["validation"]["errors"][index.to_i - 1][k].to_s).to eq(v) end Then(/^error (\d+) should have the constraint "(.*?)" "(.*?)"$/) do |index, k, v| expect(@json["validation"]["errors"][index.to_i - 1]["constraints"][k].to_s).to eq(v) end ================================================ FILE: features/step_definitions/csv_options_steps.rb ================================================ Given(/^I set the delimiter to "(.*?)"$/) do |delimiter| @csv_options ||= default_csv_options @csv_options["delimiter"] = delimiter end Given(/^I set quotechar to "(.*?)"$/) do |doublequote| @csv_options ||= default_csv_options @csv_options["quoteChar"] = doublequote end Given(/^I set the line endings to linefeed$/) do @csv_options ||= default_csv_options @csv_options["lineTerminator"] = "\n" end Given(/^I set the line endings to carriage return$/) do @csv_options ||= default_csv_options @csv_options["lineTerminator"] = "\r" end Given(/^I set header to "(.*?)"$/) do |boolean| @csv_options ||= default_csv_options @csv_options["header"] = boolean == "true" end ================================================ FILE: features/step_definitions/information_steps.rb ================================================ Given(/^the content type is "(.*?)"$/) do |arg1| @content_type = "text/csv" end Then(/^the "(.*?)" should be "(.*?)"$/) do |type, encoding| validator = Csvlint::Validator.new(@url, default_csv_options) expect(validator.send(type.to_sym)).to eq(encoding) end Then(/^the metadata content type should be "(.*?)"$/) do |content_type| validator = Csvlint::Validator.new(@url, default_csv_options) expect(validator.headers["content-type"]).to eq(content_type) end ================================================ FILE: features/step_definitions/parse_csv_steps.rb ================================================ Given(/^I have a CSV with the following content:$/) do |string| @csv = string.to_s end Given(/^it has a Link header holding "(.*?)"$/) do |link| @link = "#{link}; type=\"application/csvm+json\"" end Given(/^it is stored at the url "(.*?)"$/) do |url| @url = url content_type = @content_type || "text/csv" charset = @encoding || "UTF-8" headers = {"Content-Type" => "#{content_type}; charset=#{charset}"} headers["Link"] = @link if @link stub_request(:get, url).to_return(status: 200, body: @csv, headers: headers) stub_request(:get, URI.join(url, "/.well-known/csvm")).to_return(status: 404) stub_request(:get, url + "-metadata.json").to_return(status: 404) stub_request(:get, URI.join(url, "csv-metadata.json")).to_return(status: 404) end Given(/^it is stored at the url "(.*?)" with no character set$/) do |url| @url = url content_type = @content_type || "text/csv" stub_request(:get, url).to_return(status: 200, body: @csv, headers: {"Content-Type" => content_type.to_s}) stub_request(:get, URI.join(url, "/.well-known/csvm")).to_return(status: 404) stub_request(:get, url + "-metadata.json").to_return(status: 404) stub_request(:get, URI.join(url, "csv-metadata.json")).to_return(status: 404) end When(/^I ask if the CSV is valid$/) do @csv_options ||= default_csv_options @validator = Csvlint::Validator.new(@url, @csv_options) @valid = @validator.valid? end Then(/^I should get the value of true$/) do expect(@valid).to be(true) end Then(/^I should get the value of false$/) do expect(@valid).to be(false) end ================================================ FILE: features/step_definitions/schema_validation_steps.rb ================================================ Given(/^I have a schema with the following content:$/) do |json| @schema_type = :json_table @schema_json = json end Given(/^I have metadata with the following content:$/) do |json| @schema_type = :csvw_metadata @schema_json = json end Given(/^I have a metadata file called "([^"]*)"$/) do |filename| @schema_type = :csvw_metadata @schema_json = File.read(File.join(File.dirname(__FILE__), "..", "fixtures", filename)) end Given(/^the (schema|metadata) is stored at the url "(.*?)"$/) do |schema_type, schema_url| @schema_url = schema_url stub_request(:get, @schema_url).to_return(status: 200, body: @schema_json.to_str) end Given(/^there is a file at "(.*?)" with the content:$/) do |url, content| stub_request(:get, url).to_return(status: 200, body: content.to_str) end Given(/^I have a file called "(.*?)" at the url "(.*?)"$/) do |filename, url| content = File.read(File.join(File.dirname(__FILE__), "..", "fixtures", filename)) content_type = /.csv$/.match?(filename) ? "text/csv" : "application/csvm+json" stub_request(:get, url).to_return(status: 200, body: content, headers: {"Content-Type" => "#{content_type}; charset=UTF-8"}) end Given(/^there is no file at the url "(.*?)"$/) do |url| stub_request(:get, url).to_return(status: 404) end ================================================ FILE: features/step_definitions/sources_steps.rb ================================================ Given(/^it is parsed as a StringIO$/) do @url = StringIO.new(@csv) end Given(/^I parse a file called "(.*?)"$/) do |filename| @url = File.new(File.join(File.dirname(__FILE__), "..", "fixtures", filename)) end ================================================ FILE: features/step_definitions/validation_errors_steps.rb ================================================ When(/^I ask if there are errors$/) do @csv_options ||= default_csv_options if @schema_json @schema = if @schema_type == :json_table Csvlint::Schema.from_json_table(@schema_url || "http://example.org ", JSON.parse(@schema_json)) else Csvlint::Schema.from_csvw_metadata(@schema_url || "http://example.org ", JSON.parse(@schema_json)) end end @validator = Csvlint::Validator.new(@url, @csv_options, @schema) @errors = @validator.errors end When(/^I carry out CSVW validation$/) do @csv_options ||= default_csv_options begin if @schema_json json = JSON.parse(@schema_json) @schema = if @schema_type == :json_table Csvlint::Schema.from_json_table(@schema_url || "http://example.org ", json) else Csvlint::Schema.from_csvw_metadata(@schema_url || "http://example.org ", json) end end if @url.nil? @errors = [] @warnings = [] @schema.tables.keys.each do |table_url| validator = Csvlint::Validator.new(table_url, @csv_options, @schema) @errors += validator.errors @warnings += validator.warnings end else validator = Csvlint::Validator.new(@url, @csv_options, @schema) @errors = validator.errors @warnings = validator.warnings end rescue JSON::ParserError => e @errors = [e] rescue Csvlint::Csvw::MetadataError => e @errors = [e] end end Then(/^there should be errors$/) do # this test is only used for CSVW testing; :invalid_encoding & :line_breaks mask lack of real errors @errors.delete_if { |e| e.instance_of?(Csvlint::ErrorMessage) && [:invalid_encoding, :line_breaks].include?(e.type) } expect(@errors.count).to be > 0 end Then(/^there should not be errors$/) do expect(@errors.count).to eq(0) end Then(/^there should be (\d+) error$/) do |count| expect(@errors.count).to eq(count.to_i) end Then(/^that error should have the type "(.*?)"$/) do |type| expect(@errors.first.type).to eq(type.to_sym) end Then(/^that error should have the row "(.*?)"$/) do |row| expect(@errors.first.row).to eq(row.to_i) end Then(/^that error should have the column "(.*?)"$/) do |column| expect(@errors.first.column).to eq(column.to_i) end Then(/^that error should have the content "(.*)"$/) do |content| expect(@errors.first.content.chomp).to eq(content.chomp) end Then(/^that error should have no content$/) do expect(@errors.first.content).to eq(nil) end Given(/^I have a CSV that doesn't exist$/) do @url = "http//www.example.com/fake-csv.csv" stub_request(:get, @url).to_return(status: 404) end Then(/^there should be no "(.*?)" errors$/) do |type| @errors.each { |error| error.type.should_not == type.to_sym } end ================================================ FILE: features/step_definitions/validation_info_steps.rb ================================================ Given(/^I ask if there are info messages$/) do @csv_options ||= default_csv_options if @schema_json @schema = if @schema_type == :json_table Csvlint::Schema.from_json_table(@schema_url || "http://example.org ", JSON.parse(@schema_json)) else Csvlint::Schema.from_csvw_metadata(@schema_url || "http://example.org ", JSON.parse(@schema_json)) end end @validator = Csvlint::Validator.new(@url, @csv_options, @schema) @info_messages = @validator.info_messages end Then(/^there should be (\d+) info messages?$/) do |num| expect(@info_messages.count).to eq(num.to_i) end Then(/^one of the messages should have the type "(.*?)"$/) do |msg_type| expect(@info_messages.find { |x| x.type == msg_type.to_sym }).to be_present end ================================================ FILE: features/step_definitions/validation_warnings_steps.rb ================================================ Given(/^it is encoded as "(.*?)"$/) do |encoding| @csv = @csv.encode(encoding) @encoding = encoding end Given(/^I set an encoding header of "(.*?)"$/) do |encoding| @encoding = encoding end Given(/^I do not set an encoding header$/) do @encoding = nil end Given(/^I have a CSV file called "(.*?)"$/) do |filename| @csv = File.read(File.join(File.dirname(__FILE__), "..", "fixtures", filename)) end When(/^I ask if there are warnings$/) do @csv_options ||= default_csv_options if @schema_json @schema = if @schema_type == :json_table Csvlint::Schema.from_json_table(@schema_url || "http://example.org ", JSON.parse(@schema_json)) else Csvlint::Schema.from_csvw_metadata(@schema_url || "http://example.org ", JSON.parse(@schema_json)) end end @validator = Csvlint::Validator.new(@url, @csv_options, @schema) @warnings = @validator.warnings end Then(/^there should be warnings$/) do expect(@warnings.count).to be > 0 end Then(/^there should not be warnings$/) do # this test is only used for CSVW testing, and :inconsistent_values warnings don't count in CSVW @warnings.delete_if { |w| [:inconsistent_values, :check_options].include?(w.type) } expect(@warnings.count).to eq(0) end Then(/^there should be (\d+) warnings$/) do |count| expect(@warnings.count).to eq(count.to_i) end Given(/^the content type is set to "(.*?)"$/) do |type| @content_type = type end Then(/^that warning should have the row "(.*?)"$/) do |row| expect(@warnings.first.row).to eq(row.to_i) end Then(/^that warning should have the column "(.*?)"$/) do |column| expect(@warnings.first.column).to eq(column.to_i) end Then(/^that warning should have the type "(.*?)"$/) do |type| expect(@warnings.first.type).to eq(type.to_sym) end ================================================ FILE: features/support/aruba.rb ================================================ require "aruba" require "aruba/cucumber" require "csvlint/cli" module Csvlint class CliRunner # Allow everything fun to be injected from the outside while defaulting to normal implementations. def initialize(argv, stdin = $stdin, stdout = $stdout, stderr = $stderr, kernel = Kernel) @argv, @stdin, @stdout, @stderr, @kernel = argv, stdin, stdout, stderr, kernel end def execute! exit_code = begin # Thor accesses these streams directly rather than letting them be injected, so we replace them... $stderr = @stderr $stdin = @stdin $stdout = @stdout # Run our normal Thor app the way we know and love. Csvlint::Cli.start(@argv.dup.unshift("validate")) # Thor::Base#start does not have a return value, assume success if no exception is raised. 0 rescue => e # The ruby interpreter would pipe this to STDERR and exit 1 in the case of an unhandled exception b = e.backtrace @stderr.puts("#{b.shift}: #{e.message} (#{e.class})") @stderr.puts(b.map { |s| "\tfrom #{s}" }.join("\n")) 1 rescue SystemExit => e e.status ensure # TODO: reset your app here, free up resources, etc. # Examples: # MyApp.logger.flush # MyApp.logger.close # MyApp.logger = nil # # MyApp.reset_singleton_instance_variables # ...then we put the streams back. $stderr = STDERR $stdin = STDIN $stdout = STDOUT end # Proxy our exit code back to the injected kernel. @kernel.exit(exit_code) end end end Aruba.configure do |config| config.command_launcher = :in_process config.main_class = Csvlint::CliRunner end ================================================ FILE: features/support/earl_formatter.rb ================================================ require "rdf" require "rdf/turtle" class EarlFormatter def initialize(step_mother, io, options) output = RDF::Resource.new("") @graph = RDF::Graph.new @graph << [CSVLINT, RDF.type, RDF::DOAP.Project] @graph << [CSVLINT, RDF.type, EARL.TestSubject] @graph << [CSVLINT, RDF.type, EARL.Software] @graph << [CSVLINT, RDF::DOAP.name, "csvlint"] @graph << [CSVLINT, RDF::DC.title, "csvlint"] @graph << [CSVLINT, RDF::DOAP.description, "CSV validator"] @graph << [CSVLINT, RDF::DOAP.homepage, RDF::Resource.new("https://github.com/theodi/csvlint.rb")] @graph << [CSVLINT, RDF::DOAP.license, RDF::Resource.new("https://raw.githubusercontent.com/theodi/csvlint.rb/master/LICENSE.md")] @graph << [CSVLINT, RDF::DOAP["programming-language"], "Ruby"] @graph << [CSVLINT, RDF::DOAP.implements, RDF::Resource.new("http://www.w3.org/TR/tabular-data-model/")] @graph << [CSVLINT, RDF::DOAP.implements, RDF::Resource.new("http://www.w3.org/TR/tabular-metadata/")] @graph << [CSVLINT, RDF::DOAP.developer, ODI] @graph << [CSVLINT, RDF::DOAP.maintainer, ODI] @graph << [CSVLINT, RDF::DOAP.documenter, ODI] @graph << [CSVLINT, RDF::FOAF.maker, ODI] @graph << [CSVLINT, RDF::DC.creator, ODI] @graph << [output, RDF::FOAF["primaryTopic"], CSVLINT] @graph << [output, RDF::DC.issued, DateTime.now] @graph << [output, RDF::FOAF.maker, ODI] @graph << [ODI, RDF.type, RDF::FOAF.Organization] @graph << [ODI, RDF.type, EARL.Assertor] @graph << [ODI, RDF::FOAF.name, "Open Data Institute"] @graph << [ODI, RDF::FOAF.homepage, "https://theodi.org/"] end def scenario_name(keyword, name, file_colon_line, source_indent) @test = RDF::Resource.new("http://www.w3.org/2013/csvw/tests/#{name.split(" ")[0]}") end def after_steps(steps) passed = true steps.each do |s| passed = false unless s.status == :passed end a = RDF::Node.new @graph << [a, RDF.type, EARL.Assertion] @graph << [a, EARL.assertedBy, ODI] @graph << [a, EARL.subject, CSVLINT] @graph << [a, EARL.test, @test] @graph << [a, EARL.mode, EARL.automatic] r = RDF::Node.new @graph << [a, EARL.result, r] @graph << [r, RDF.type, EARL.TestResult] @graph << [r, EARL.outcome, passed ? EARL.passed : EARL.failed] @graph << [r, RDF::DC.date, DateTime.now] end def after_features(features) RDF::Writer.for(:ttl).open("csvlint-earl.ttl", {prefixes: {"earl" => EARL}, standard_prefixes: true, canonicalize: true, literal_shorthand: true}) do |writer| writer << @graph end end private EARL = RDF::Vocabulary.new("http://www.w3.org/ns/earl#") ODI = RDF::Resource.new("https://theodi.org/") CSVLINT = RDF::Resource.new("https://github.com/theodi/csvlint.rb") end ================================================ FILE: features/support/env.rb ================================================ require "coveralls" Coveralls.wear_merged!("test_frameworks") $:.unshift File.join(File.dirname(__FILE__), "..", "..", "lib") require "rspec/expectations" require "cucumber/rspec/doubles" require "csvlint" require "byebug" require "spork" Spork.each_run do require "csvlint" end class CustomWorld def default_csv_options {} end end World do CustomWorld.new end ================================================ FILE: features/support/load_tests.rb ================================================ require "json" require "open-uri" require "uri" BASE_URI = "https://w3c.github.io/csvw/tests/" BASE_PATH = File.join(File.dirname(__FILE__), "..", "fixtures", "csvw") FEATURE_BASE_PATH = File.join(File.dirname(__FILE__), "..") VALIDATION_FEATURE_FILE_PATH = File.join(FEATURE_BASE_PATH, "csvw_validation_tests.feature") SCRIPT_FILE_PATH = File.join(File.dirname(__FILE__), "..", "..", "bin", "run-csvw-tests") Dir.mkdir(BASE_PATH) unless Dir.exist?(BASE_PATH) def cache_file(filename) file = File.join(BASE_PATH, filename) uri = URI.join(BASE_URI, filename) unless File.exist?(file) if filename.include? "/" levels = filename.split("/")[0..-2] (0..levels.length).each do |i| dir = File.join(BASE_PATH, levels[0..i].join("/")) Dir.mkdir(dir) unless Dir.exist?(dir) end end warn("storing #{file} locally") File.open(file, "wb") do |f| f.puts URI.open(uri, "rb").read end end uri end unless File.exist? SCRIPT_FILE_PATH File.open(SCRIPT_FILE_PATH, "w") do |file| File.chmod(0o755, SCRIPT_FILE_PATH) manifest = JSON.parse(URI.open("#{BASE_URI}manifest-validation.jsonld").read) manifest["entries"].each do |entry| type = "valid" case entry["type"] when "csvt:WarningValidationTest" type = "warnings" when "csvt:NegativeValidationTest" type = "errors" end file.puts "echo \"#{entry["id"].split("#")[-1]}: #{entry["name"].tr("`", "'")}\"" file.puts "echo \"#{type}: #{entry["comment"].gsub("\"", "\\\"").tr("`", "'")}\"" if entry["action"].end_with?(".json") file.puts "csvlint --schema=features/fixtures/csvw/#{entry["action"]}" elsif entry["option"] && entry["option"]["metadata"] file.puts "csvlint features/fixtures/csvw/#{entry["action"]} --schema=features/fixtures/csvw/#{entry["option"]["metadata"]}" else file.puts "csvlint features/fixtures/csvw/#{entry["action"]}" end file.puts "echo" end end end unless File.exist? VALIDATION_FEATURE_FILE_PATH File.open(VALIDATION_FEATURE_FILE_PATH, "w") do |file| file.puts "# Auto-generated file based on standard validation CSVW tests from #{BASE_URI}manifest-validation.jsonld" file.puts "" manifest = JSON.parse(URI.open("#{BASE_URI}manifest-validation.jsonld").read) file.puts "Feature: #{manifest["label"]}" file.puts "" manifest["entries"].each do |entry| action_uri = cache_file(entry["action"]) metadata = nil provided_files = [] missing_files = [] file.puts "\t# #{entry["id"]}" file.puts "\t# #{entry["comment"]}" file.puts "\tScenario: #{entry["id"]} #{entry["name"].gsub("<", "less than")}" if entry["action"].end_with?(".json") file.puts "\t\tGiven I have a metadata file called \"csvw/#{entry["action"]}\"" file.puts "\t\tAnd the metadata is stored at the url \"#{action_uri}\"" else file.puts "\t\tGiven I have a CSV file called \"csvw/#{entry["action"]}\"" file.puts "\t\tAnd it has a Link header holding \"#{entry["httpLink"]}\"" if entry["httpLink"] file.puts "\t\tAnd it is stored at the url \"#{action_uri}\"" if entry["option"] && entry["option"]["metadata"] # no need to store the file here, as it will be listed in the 'implicit' list, which all get stored metadata = URI.join(BASE_URI, entry["option"]["metadata"]) file.puts "\t\tAnd I have a metadata file called \"csvw/#{entry["option"]["metadata"]}\"" file.puts "\t\tAnd the metadata is stored at the url \"#{metadata}\"" end provided_files << action_uri.to_s if entry["name"].include?("/.well-known/csvm") file.puts "\t\tAnd I have a file called \"w3.org/.well-known/csvm\" at the url \"https://www.w3.org/.well-known/csvm\"" missing_files << "#{action_uri}.json" missing_files << URI.join(action_uri, "csvm.json").to_s else missing_files << URI.join(action_uri, "/.well-known/csvm").to_s end missing_files << "#{action_uri}-metadata.json" missing_files << URI.join(action_uri, "csv-metadata.json").to_s end entry["implicit"]&.each do |implicit| implicit_uri = cache_file(implicit) provided_files << implicit_uri.to_s unless implicit_uri == metadata file.puts "\t\tAnd I have a file called \"csvw/#{implicit}\" at the url \"#{implicit_uri}\"" end end missing_files.each do |uri| file.puts "\t\tAnd there is no file at the url \"#{uri}\"" unless provided_files.include? uri end file.puts "\t\tWhen I carry out CSVW validation" if entry["type"] == "csvt:WarningValidationTest" file.puts "\t\tThen there should not be errors" file.puts "\t\tAnd there should be warnings" elsif entry["type"] == "csvt:NegativeValidationTest" file.puts "\t\tThen there should be errors" else file.puts "\t\tThen there should not be errors" file.puts "\t\tAnd there should not be warnings" end file.puts "\t" end end end ================================================ FILE: features/support/webmock.rb ================================================ require "webmock/cucumber" WebMock.disable_net_connect!(allow: %r{csvw/tests}) ================================================ FILE: features/validation_errors.feature ================================================ Feature: Get validation errors Scenario: CSV with ragged rows Given I have a CSV with the following content: """ "col1","col2","col3" "1","2","3" "4","5" """ And it is stored at the url "http://example.com/example1.csv" When I ask if there are errors Then there should be 1 error And that error should have the type "ragged_rows" And that error should have the row "3" And that error should have the content ""4","5"" Scenario: CSV with incorrect quoting Given I have a CSV with the following content: """ "col1","col2","col3" "Foo","Bar","Baz """ And it is stored at the url "http://example.com/example1.csv" When I ask if there are errors Then there should be 1 error And that error should have the type "unclosed_quote" And that error should have the row "2" And that error should have the content ""Foo","Bar","Baz" Scenario: Successfully report a CSV with incorrect whitespace Given I have a CSV with the following content: """ "col1","col2","col3" "Foo","Bar", "Baz" """ And it is stored at the url "http://example.com/example1.csv" When I ask if there are errors Then there should be 1 error And that error should have the type "whitespace" And that error should have the row "2" And that error should have the content ""Foo","Bar", "Baz"" Scenario: Successfully report a CSV with blank rows Given I have a CSV with the following content: """ "col1","col2","col3" "Foo","Bar","Baz" "","", "Baz","Bar","Foo" """ And it is stored at the url "http://example.com/example1.csv" When I ask if there are errors Then there should be 1 error And that error should have the type "blank_rows" And that error should have the row "3" And that error should have the content """,""," Scenario: Successfully report a CSV with multiple trailing empty rows Given I have a CSV with the following content: """ "col1","col2","col3" "Foo","Bar","Baz" "Foo","Bar","Baz" """ And it is stored at the url "http://example.com/example1.csv" When I ask if there are errors Then there should be 1 error And that error should have the type "blank_rows" And that error should have the row "4" Scenario: Successfully report a CSV with an empty row Given I have a CSV with the following content: """ "col1","col2","col3" "Foo","Bar","Baz" "Foo","Bar","Baz" """ And it is stored at the url "http://example.com/example1.csv" When I ask if there are errors Then there should be 1 error And that error should have the type "blank_rows" And that error should have the row "3" Scenario: Report invalid Encoding Given I have a CSV file called "invalid-byte-sequence.csv" And I set an encoding header of "UTF-8" And it is stored at the url "http://example.com/example1.csv" When I ask if there are errors Then there should be 1 error And that error should have the type "invalid_encoding" Scenario: Correctly handle different encodings Given I have a CSV file called "invalid-byte-sequence.csv" And I set an encoding header of "ISO-8859-1" And it is stored at the url "http://example.com/example1.csv" When I ask if there are errors Then there should be no "content_encoding" errors Scenario: Report invalid file Given I have a CSV file called "spreadsheet.xls" And it is stored at the url "http://example.com/example1.csv" When I ask if there are errors Then there should be 1 error And that error should have the type "invalid_encoding" Scenario: Incorrect extension Given I have a CSV with the following content: """ "abc","2","3" """ And the content type is set to "application/excel" And it is stored at the url "http://example.com/example1.csv" And I ask if there are errors Then there should be 1 error And that error should have the type "wrong_content_type" Scenario: Handles urls that 404 Given I have a CSV that doesn't exist When I ask if there are errors Then there should be 1 error And that error should have the type "not_found" Scenario: Incorrect line endings specified in settings Given I have a CSV file called "cr-line-endings.csv" And I set the line endings to linefeed And it is stored at the url "http://example.com/example1.csv" And I ask if there are errors Then there should be 1 error And that error should have the type "line_breaks" Scenario: inconsistent line endings in file cause an error Given I have a CSV file called "inconsistent-line-endings.csv" And it is stored at the url "http://example.com/example1.csv" And I ask if there are errors Then there should be 1 error And that error should have the type "line_breaks" Scenario: inconsistent line endings with unquoted fields in file cause an error Given I have a CSV file called "inconsistent-line-endings-unquoted.csv" And it is stored at the url "http://example.com/example1.csv" And I ask if there are errors Then there should be 1 error And that error should have the type "line_breaks" ================================================ FILE: features/validation_info.feature ================================================ Feature: Get validation information messages Scenario: LF line endings in file give an info message Given I have a CSV file called "lf-line-endings.csv" And it is stored at the url "http://example.com/example1.csv" And I set header to "true" And I ask if there are info messages Then there should be 1 info messages And one of the messages should have the type "nonrfc_line_breaks" Scenario: CRLF line endings in file produces no info messages Given I have a CSV file called "crlf-line-endings.csv" And it is stored at the url "http://example.com/example1.csv" And I set header to "true" And I ask if there are info messages Then there should be 0 info messages ================================================ FILE: features/validation_warnings.feature ================================================ Feature: Validation warnings Scenario: UTF-8 Encoding Given I have a CSV with the following content: """ "col1","col2","col3" "abc","2","3" """ And it is encoded as "utf-8" And it is stored at the url "http://example.com/example1.csv" When I ask if there are warnings Then there should be 0 warnings Scenario: ISO-8859-1 Encoding Given I have a CSV with the following content: """ "col1","col2","col3" "1","2","3" """ And it is encoded as "iso-8859-1" And it is stored at the url "http://example.com/example1.csv" When I ask if there are warnings Then there should be 1 warnings Scenario: Correct content type Given I have a CSV with the following content: """ "col1","col2","col3" "abc","2","3" """ And the content type is set to "text/csv" And it is stored at the url "http://example.com/example1.csv" And I ask if there are warnings Then there should be 0 warnings Scenario: No extension Given I have a CSV with the following content: """ "col1","col2","col3" "abc","2","3" """ And the content type is set to "text/csv" And it is stored at the url "http://example.com/example1" And I ask if there are warnings Then there should be 0 warnings Scenario: Allow query params after extension Given I have a CSV with the following content: """ "col1","col2","col3" "abc","2","3" """ And the content type is set to "text/csv" And it is stored at the url "http://example.com/example1.csv?query=param" And I ask if there are warnings Then there should be 0 warnings Scenario: User doesn't supply encoding Given I have a CSV with the following content: """ "col1","col2","col3" "abc","2","3" """ And it is stored at the url "http://example.com/example1.csv" with no character set When I ask if there are warnings Then there should be 1 warnings And that warning should have the type "no_encoding" Scenario: Title rows Given I have a CSV file called "title-row.csv" And it is stored at the url "http://example.com/example1.csv" And I ask if there are warnings Then there should be 1 warnings And that warning should have the type "title_row" Scenario: catch excel warnings Given I parse a file called "spreadsheet.xls" And I ask if there are warnings Then there should be 1 warnings And that warning should have the type "excel" Scenario: catch excel warnings Given I parse a file called "spreadsheet.xlsx" And I ask if there are warnings Then there should be 1 warnings And that warning should have the type "excel" ================================================ FILE: gemfiles/activesupport_5.2.gemfile ================================================ # This file was generated by Appraisal source "https://rubygems.org" gem "activesupport", "~> 5.2.0" gemspec path: "../" ================================================ FILE: gemfiles/activesupport_6.0.gemfile ================================================ # This file was generated by Appraisal source "https://rubygems.org" gem "activesupport", "~> 6.0.0" gemspec path: "../" ================================================ FILE: gemfiles/activesupport_6.1.gemfile ================================================ # This file was generated by Appraisal source "https://rubygems.org" gem "activesupport", "~> 6.1.0" gemspec path: "../" ================================================ FILE: gemfiles/activesupport_7.0.gemfile ================================================ # This file was generated by Appraisal source "https://rubygems.org" gem "activesupport", "~> 7.0.0" gemspec path: "../" ================================================ FILE: gemfiles/activesupport_7.1.gemfile ================================================ # This file was generated by Appraisal source "https://rubygems.org" gem "activesupport", "~> 7.1.0" gemspec path: "../" ================================================ FILE: gemfiles/activesupport_7.2.gemfile ================================================ # This file was generated by Appraisal source "https://rubygems.org" gem "activesupport", "~> 7.2.0" gemspec path: "../" ================================================ FILE: lib/csvlint/cli.rb ================================================ require "csvlint" require "rainbow" require "active_support/json" require "json" require "thor" require "active_support/inflector" module Csvlint class Cli < Thor desc "myfile.csv OR csvlint http://example.com/myfile.csv", "Supports validating CSV files to check their syntax and contents" option :dump_errors, desc: "Pretty print error and warning objects.", type: :boolean, aliases: :d option :schema, banner: "FILENAME OR URL", desc: "Schema file", aliases: :s option :json, desc: "Output errors as JSON", type: :boolean, aliases: :j option :werror, desc: "Make all warnings into errors", type: :boolean, aliases: :w def validate(source = nil) source = read_source(source) @schema = get_schema(options[:schema]) if options[:schema] fetch_schema_tables(@schema, options) if source.nil? Rainbow.enabled = $stdout.tty? valid = validate_csv(source, @schema, options[:dump_errors], options[:json], options[:werror]) exit 1 unless valid end def help self.class.command_help(shell, :validate) end default_task :validate private def read_source(source) if source.nil? # If no source is present, try reading from stdin if !$stdin.tty? source = begin StringIO.new($stdin.read) rescue nil end return_error "No CSV data to validate" if !options[:schema] && source.nil? end else # If the source isn't a URL, it's a file unless /^http(s)?/.match?(source) begin source = File.new(source) rescue Errno::ENOENT return_error "#{source} not found" end end end source end def get_schema(schema) begin schema = Csvlint::Schema.load_from_uri(schema, false) rescue Csvlint::Csvw::MetadataError => e return_error "invalid metadata: #{e.message}#{" at " + e.path if e.path}" rescue OpenURI::HTTPError, Errno::ENOENT return_error "#{options[:schema]} not found" end if schema.instance_of?(Csvlint::Schema) && schema.description == "malformed" return_error "invalid metadata: malformed JSON" end schema end def fetch_schema_tables(schema, options) valid = true unless schema.instance_of? Csvlint::Csvw::TableGroup return_error "No CSV data to validate." end schema.tables.keys.each do |source| unless /^http(s)?/.match?(source) begin source = source.sub("file:", "") source = File.new(source) rescue Errno::ENOENT return_error "#{source} not found" end end valid &= validate_csv(source, schema, options[:dump_errors], nil, options[:werror]) end exit 1 unless valid end def print_error(index, error, dump, color) location = "" location += error.row.to_s if error.row location += "#{error.row ? "," : ""}#{error.column}" if error.column if error.row || error.column location = "#{error.row ? "Row" : "Column"}: #{location}" end output_string = "#{index + 1}. " if error.column && @schema&.instance_of?(Csvlint::Schema) unless @schema.fields[error.column - 1].nil? output_string += "#{@schema.fields[error.column - 1].name}: " end end output_string += error.type.to_s output_string += ". #{location}" unless location.empty? output_string += ". #{error.content}" if error.content puts Rainbow(output_string).color(color) if dump pp error end end def print_errors(errors, dump) if errors.size > 0 errors.each_with_index { |error, i| print_error(i, error, dump, :red) } end end def return_error(message) puts Rainbow(message).red exit 1 end def validate_csv(source, schema, dump, json, werror) @error_count = 0 validator = if json === true Csvlint::Validator.new(source, {}, schema) else Csvlint::Validator.new(source, {}, schema, {lambda: report_lines}) end csv = if source.instance_of?(String) source elsif source.instance_of?(File) source.path else "CSV" end if json === true json = { validation: { state: validator.valid? ? "valid" : "invalid", errors: validator.errors.map { |v| hashify(v) }, warnings: validator.warnings.map { |v| hashify(v) }, info: validator.info_messages.map { |v| hashify(v) } } }.to_json print json else puts "\r\n#{csv} is #{validator.valid? ? Rainbow("VALID").green : Rainbow("INVALID").red}" print_errors(validator.errors, dump) print_errors(validator.warnings, dump) end return false if werror && validator.warnings.size > 0 validator.valid? end def hashify(error) h = { type: error.type, category: error.category, row: error.row, col: error.column } if error.column && @schema&.instance_of?(Csvlint::Schema) && !@schema.fields[error.column - 1].nil? field = @schema.fields[error.column - 1] h[:header] = field.name h[:constraints] = field.constraints.map { |k, v| [k.underscore, v] }.to_h end h end def report_lines lambda do |row| new_errors = row.errors.count if new_errors > @error_count print Rainbow("!").red else print Rainbow(".").green end @error_count = new_errors end end end end ================================================ FILE: lib/csvlint/csvw/column.rb ================================================ module Csvlint module Csvw class Column include Csvlint::ErrorCollector attr_reader :id, :about_url, :datatype, :default, :lang, :name, :null, :number, :ordered, :property_url, :required, :separator, :source_number, :suppress_output, :text_direction, :default_name, :titles, :value_url, :virtual, :annotations def initialize(number, name, id: nil, about_url: nil, datatype: {"@id" => "http://www.w3.org/2001/XMLSchema#string"}, default: "", lang: "und", null: [""], ordered: false, property_url: nil, required: false, separator: nil, source_number: nil, suppress_output: false, text_direction: :inherit, default_name: nil, titles: {}, value_url: nil, virtual: false, annotations: [], warnings: []) @number = number @name = name @id = id @about_url = about_url @datatype = datatype @default = default @lang = lang @null = null @ordered = ordered @property_url = property_url @required = required @separator = separator @source_number = source_number || number @suppress_output = suppress_output @text_direction = text_direction @default_name = default_name @titles = titles @value_url = value_url @virtual = virtual @annotations = annotations reset @warnings += warnings end def self.from_json(number, column_desc, base_url = nil, lang = "und", inherited_properties = {}) annotations = {} warnings = [] column_properties = {} inherited_properties = inherited_properties.clone column_desc.each do |property, value| if property == "@type" raise Csvlint::Csvw::MetadataError.new("columns[#{number}].@type"), "@type of column is not 'Column'" if value != "Column" else v, warning, type = Csvw::PropertyChecker.check_property(property, value, base_url, lang) warnings += Array(warning).map { |w| Csvlint::ErrorMessage.new(w, :metadata, nil, nil, "#{property}: #{value}", nil) } unless warning.nil? || warning.empty? if type == :annotation annotations[property] = v elsif type == :common || type == :column column_properties[property] = v elsif type == :inherited inherited_properties[property] = v else warnings << Csvlint::ErrorMessage.new(:invalid_property, :metadata, nil, nil, "column: #{property}", nil) end end end new(number, column_properties["name"], id: column_properties["@id"], datatype: inherited_properties["datatype"] || {"@id" => "http://www.w3.org/2001/XMLSchema#string"}, lang: inherited_properties["lang"] || "und", null: inherited_properties["null"] || [""], default: inherited_properties["default"] || "", about_url: inherited_properties["aboutUrl"], property_url: inherited_properties["propertyUrl"], value_url: inherited_properties["valueUrl"], required: inherited_properties["required"] || false, separator: inherited_properties["separator"], ordered: inherited_properties["ordered"] || false, default_name: column_properties["titles"] && column_properties["titles"][lang] ? column_properties["titles"][lang][0] : nil, titles: column_properties["titles"], suppress_output: column_properties["suppressOutput"] || false, virtual: column_properties["virtual"] || false, annotations: annotations, warnings: warnings) end def validate_header(header, strict) reset if strict || @titles valid_headers = @titles ? @titles.map { |l, v| v if Column.languages_match(l, lang) }.flatten : [] unless valid_headers.include? header if strict build_errors(:invalid_header, :schema, 1, @number, header, @titles) else build_warnings(:invalid_header, :schema, 1, @number, header, @titles) end end end valid? end def validate(string_value, row = nil) reset string_value ||= @default if null.include? string_value validate_required(nil, row) nil else string_values = @separator.nil? ? [string_value] : string_value.split(@separator) values = [] string_values.each do |s| invalid = false value, warning = DATATYPE_PARSER[@datatype["base"] || @datatype["@id"]].call(s, @datatype["format"]) if warning.nil? validate_required(value, row) invalid = !validate_format(value, row) || invalid invalid = !validate_length(value, row) || invalid invalid = !validate_value(value, row) || invalid values << (invalid ? {invalid: s} : value) else build_errors(warning, :schema, row, @number, s, @datatype) values << {invalid: s} end end values && @separator.nil? ? values[0] : values end end private class << self def create_date_parser(type, warning) lambda { |value, format| format = Csvlint::Csvw::DateFormat.new(nil, type) if format.nil? v = format.parse(value) return nil, warning if v.nil? [v, nil] } end def create_regexp_based_parser(regexp, warning) lambda { |value, format| return nil, warning unless value&.match?(regexp) [value, nil] } end def languages_match(l1, l2) return true if l1 == l2 || l1 == "und" || l2 == "und" return true if l1 =~ Regexp.new("^#{l2}-") || l2 =~ Regexp.new("^#{l1}-") false end end def validate_required(value, row) if @required && value.nil? build_errors(:required, :schema, row, number, value, {"required" => @required}) return false end true end def validate_length(value, row) valid = true if datatype["length"] || datatype["minLength"] || datatype["maxLength"] length = value.length length = value.gsub(/==?$/, "").length * 3 / 4 if datatype["@id"] == "http://www.w3.org/2001/XMLSchema#base64Binary" || datatype["base"] == "http://www.w3.org/2001/XMLSchema#base64Binary" length = value.length / 2 if datatype["@id"] == "http://www.w3.org/2001/XMLSchema#hexBinary" || datatype["base"] == "http://www.w3.org/2001/XMLSchema#hexBinary" if datatype["minLength"] && length < datatype["minLength"] build_errors(:min_length, :schema, row, number, value, {"minLength" => datatype["minLength"]}) valid = false end if datatype["maxLength"] && length > datatype["maxLength"] build_errors(:max_length, :schema, row, number, value, {"maxLength" => datatype["maxLength"]}) valid = false end if datatype["length"] && length != datatype["length"] build_errors(:length, :schema, row, number, value, {"length" => datatype["length"]}) valid = false end end valid end def validate_format(value, row) if datatype["format"] unless DATATYPE_FORMAT_VALIDATION[datatype["base"]].call(value, datatype["format"]) build_errors(:format, :schema, row, number, value, {"format" => datatype["format"]}) return false end end true end def validate_value(value, row) valid = true if datatype["minInclusive"] && ((value.is_a? Hash) ? (value[:dateTime] < datatype["minInclusive"][:dateTime]) : (value < datatype["minInclusive"])) build_errors(:min_inclusive, :schema, row, number, value, {"minInclusive" => datatype["minInclusive"]}) valid = false end if datatype["maxInclusive"] && ((value.is_a? Hash) ? (value[:dateTime] > datatype["maxInclusive"][:dateTime]) : (value > datatype["maxInclusive"])) build_errors(:max_inclusive, :schema, row, number, value, {"maxInclusive" => datatype["maxInclusive"]}) valid = false end if datatype["minExclusive"] && ((value.is_a? Hash) ? (value[:dateTime] <= datatype["minExclusive"][:dateTime]) : (value <= datatype["minExclusive"])) build_errors(:min_exclusive, :schema, row, number, value, {"minExclusive" => datatype["minExclusive"]}) valid = false end if datatype["maxExclusive"] && ((value.is_a? Hash) ? (value[:dateTime] >= datatype["maxExclusive"][:dateTime]) : (value >= datatype["maxExclusive"])) build_errors(:max_exclusive, :schema, row, number, value, {"maxExclusive" => datatype["maxExclusive"]}) valid = false end valid end REGEXP_VALIDATION = lambda { |value, format| value =~ format } NO_ADDITIONAL_VALIDATION = lambda { |value, format| true } DATATYPE_FORMAT_VALIDATION = { "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral" => REGEXP_VALIDATION, "http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML" => REGEXP_VALIDATION, "http://www.w3.org/ns/csvw#JSON" => REGEXP_VALIDATION, "http://www.w3.org/2001/XMLSchema#anyAtomicType" => REGEXP_VALIDATION, "http://www.w3.org/2001/XMLSchema#anyURI" => REGEXP_VALIDATION, "http://www.w3.org/2001/XMLSchema#base64Binary" => REGEXP_VALIDATION, "http://www.w3.org/2001/XMLSchema#boolean" => NO_ADDITIONAL_VALIDATION, "http://www.w3.org/2001/XMLSchema#date" => NO_ADDITIONAL_VALIDATION, "http://www.w3.org/2001/XMLSchema#dateTime" => NO_ADDITIONAL_VALIDATION, "http://www.w3.org/2001/XMLSchema#dateTimeStamp" => NO_ADDITIONAL_VALIDATION, "http://www.w3.org/2001/XMLSchema#decimal" => NO_ADDITIONAL_VALIDATION, "http://www.w3.org/2001/XMLSchema#integer" => NO_ADDITIONAL_VALIDATION, "http://www.w3.org/2001/XMLSchema#long" => NO_ADDITIONAL_VALIDATION, "http://www.w3.org/2001/XMLSchema#int" => NO_ADDITIONAL_VALIDATION, "http://www.w3.org/2001/XMLSchema#short" => NO_ADDITIONAL_VALIDATION, "http://www.w3.org/2001/XMLSchema#byte" => NO_ADDITIONAL_VALIDATION, "http://www.w3.org/2001/XMLSchema#nonNegativeInteger" => NO_ADDITIONAL_VALIDATION, "http://www.w3.org/2001/XMLSchema#positiveInteger" => NO_ADDITIONAL_VALIDATION, "http://www.w3.org/2001/XMLSchema#unsignedLong" => NO_ADDITIONAL_VALIDATION, "http://www.w3.org/2001/XMLSchema#unsignedInt" => NO_ADDITIONAL_VALIDATION, "http://www.w3.org/2001/XMLSchema#unsignedShort" => NO_ADDITIONAL_VALIDATION, "http://www.w3.org/2001/XMLSchema#unsignedByte" => NO_ADDITIONAL_VALIDATION, "http://www.w3.org/2001/XMLSchema#nonPositiveInteger" => NO_ADDITIONAL_VALIDATION, "http://www.w3.org/2001/XMLSchema#negativeInteger" => NO_ADDITIONAL_VALIDATION, "http://www.w3.org/2001/XMLSchema#double" => NO_ADDITIONAL_VALIDATION, "http://www.w3.org/2001/XMLSchema#duration" => REGEXP_VALIDATION, "http://www.w3.org/2001/XMLSchema#dayTimeDuration" => REGEXP_VALIDATION, "http://www.w3.org/2001/XMLSchema#yearMonthDuration" => REGEXP_VALIDATION, "http://www.w3.org/2001/XMLSchema#float" => NO_ADDITIONAL_VALIDATION, "http://www.w3.org/2001/XMLSchema#gDay" => NO_ADDITIONAL_VALIDATION, "http://www.w3.org/2001/XMLSchema#gMonth" => NO_ADDITIONAL_VALIDATION, "http://www.w3.org/2001/XMLSchema#gMonthDay" => NO_ADDITIONAL_VALIDATION, "http://www.w3.org/2001/XMLSchema#gYear" => NO_ADDITIONAL_VALIDATION, "http://www.w3.org/2001/XMLSchema#gYearMonth" => NO_ADDITIONAL_VALIDATION, "http://www.w3.org/2001/XMLSchema#hexBinary" => REGEXP_VALIDATION, "http://www.w3.org/2001/XMLSchema#QName" => REGEXP_VALIDATION, "http://www.w3.org/2001/XMLSchema#string" => REGEXP_VALIDATION, "http://www.w3.org/2001/XMLSchema#normalizedString" => REGEXP_VALIDATION, "http://www.w3.org/2001/XMLSchema#token" => REGEXP_VALIDATION, "http://www.w3.org/2001/XMLSchema#language" => REGEXP_VALIDATION, "http://www.w3.org/2001/XMLSchema#Name" => REGEXP_VALIDATION, "http://www.w3.org/2001/XMLSchema#NMTOKEN" => REGEXP_VALIDATION, "http://www.w3.org/2001/XMLSchema#time" => NO_ADDITIONAL_VALIDATION } TRIM_VALUE = lambda { |value, format| [value.strip, nil] } ALL_VALUES_VALID = lambda { |value, format| [value, nil] } NUMERIC_PARSER = lambda { |value, format, integer = false| format = Csvlint::Csvw::NumberFormat.new(nil, nil, ".", integer) if format.nil? v = format.parse(value) return nil, :invalid_number if v.nil? [v, nil] } DATATYPE_PARSER = { "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral" => TRIM_VALUE, "http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML" => TRIM_VALUE, "http://www.w3.org/ns/csvw#JSON" => TRIM_VALUE, "http://www.w3.org/2001/XMLSchema#anyAtomicType" => ALL_VALUES_VALID, "http://www.w3.org/2001/XMLSchema#anyURI" => TRIM_VALUE, "http://www.w3.org/2001/XMLSchema#base64Binary" => TRIM_VALUE, "http://www.w3.org/2001/XMLSchema#boolean" => lambda { |value, format| if format.nil? return true, nil if ["true", "1"].include? value return false, nil if ["false", "0"].include? value else return true, nil if value == format[0] return false, nil if value == format[1] end [value, :invalid_boolean] }, "http://www.w3.org/2001/XMLSchema#date" => create_date_parser("http://www.w3.org/2001/XMLSchema#date", :invalid_date), "http://www.w3.org/2001/XMLSchema#dateTime" => create_date_parser("http://www.w3.org/2001/XMLSchema#dateTime", :invalid_date_time), "http://www.w3.org/2001/XMLSchema#dateTimeStamp" => create_date_parser("http://www.w3.org/2001/XMLSchema#dateTimeStamp", :invalid_date_time_stamp), "http://www.w3.org/2001/XMLSchema#decimal" => lambda { |value, format| return nil, :invalid_decimal if /(E|e|^(NaN|INF|-INF)$)/.match?(value) NUMERIC_PARSER.call(value, format) }, "http://www.w3.org/2001/XMLSchema#integer" => lambda { |value, format| v, w = NUMERIC_PARSER.call(value, format, true) return v, :invalid_integer unless w.nil? return nil, :invalid_integer unless v.is_a? Integer [v, w] }, "http://www.w3.org/2001/XMLSchema#long" => lambda { |value, format| v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format) return v, :invalid_long unless w.nil? return nil, :invalid_long unless v <= 9223372036854775807 && v >= -9223372036854775808 [v, w] }, "http://www.w3.org/2001/XMLSchema#int" => lambda { |value, format| v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format) return v, :invalid_int unless w.nil? return nil, :invalid_int unless v <= 2147483647 && v >= -2147483648 [v, w] }, "http://www.w3.org/2001/XMLSchema#short" => lambda { |value, format| v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format) return v, :invalid_short unless w.nil? return nil, :invalid_short unless v <= 32767 && v >= -32768 [v, w] }, "http://www.w3.org/2001/XMLSchema#byte" => lambda { |value, format| v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format) return v, :invalid_byte unless w.nil? return nil, :invalid_byte unless v <= 127 && v >= -128 [v, w] }, "http://www.w3.org/2001/XMLSchema#nonNegativeInteger" => lambda { |value, format| v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format) return v, :invalid_nonNegativeInteger unless w.nil? return nil, :invalid_nonNegativeInteger unless v >= 0 [v, w] }, "http://www.w3.org/2001/XMLSchema#positiveInteger" => lambda { |value, format| v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format) return v, :invalid_positiveInteger unless w.nil? return nil, :invalid_positiveInteger unless v > 0 [v, w] }, "http://www.w3.org/2001/XMLSchema#unsignedLong" => lambda { |value, format| v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#nonNegativeInteger"].call(value, format) return v, :invalid_unsignedLong unless w.nil? return nil, :invalid_unsignedLong unless v <= 18446744073709551615 [v, w] }, "http://www.w3.org/2001/XMLSchema#unsignedInt" => lambda { |value, format| v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#nonNegativeInteger"].call(value, format) return v, :invalid_unsignedInt unless w.nil? return nil, :invalid_unsignedInt unless v <= 4294967295 [v, w] }, "http://www.w3.org/2001/XMLSchema#unsignedShort" => lambda { |value, format| v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#nonNegativeInteger"].call(value, format) return v, :invalid_unsignedShort unless w.nil? return nil, :invalid_unsignedShort unless v <= 65535 [v, w] }, "http://www.w3.org/2001/XMLSchema#unsignedByte" => lambda { |value, format| v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#nonNegativeInteger"].call(value, format) return v, :invalid_unsignedByte unless w.nil? return nil, :invalid_unsignedByte unless v <= 255 [v, w] }, "http://www.w3.org/2001/XMLSchema#nonPositiveInteger" => lambda { |value, format| v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format) return v, :invalid_nonPositiveInteger unless w.nil? return nil, :invalid_nonPositiveInteger unless v <= 0 [v, w] }, "http://www.w3.org/2001/XMLSchema#negativeInteger" => lambda { |value, format| v, w = DATATYPE_PARSER["http://www.w3.org/2001/XMLSchema#integer"].call(value, format) return v, :invalid_negativeInteger unless w.nil? return nil, :invalid_negativeInteger unless v < 0 [v, w] }, "http://www.w3.org/2001/XMLSchema#double" => NUMERIC_PARSER, # regular expressions here taken from XML Schema datatypes spec "http://www.w3.org/2001/XMLSchema#duration" => create_regexp_based_parser(/-?P((([0-9]+Y([0-9]+M)?([0-9]+D)?|([0-9]+M)([0-9]+D)?|([0-9]+D))(T(([0-9]+H)([0-9]+M)?([0-9]+(\.[0-9]+)?S)?|([0-9]+M)([0-9]+(\.[0-9]+)?S)?|([0-9]+(\.[0-9]+)?S)))?)|(T(([0-9]+H)([0-9]+M)?([0-9]+(\.[0-9]+)?S)?|([0-9]+M)([0-9]+(\.[0-9]+)?S)?|([0-9]+(\.[0-9]+)?S))))/, :invalid_duration), "http://www.w3.org/2001/XMLSchema#dayTimeDuration" => create_regexp_based_parser(/-?P(([0-9]+D(T(([0-9]+H)([0-9]+M)?([0-9]+(\.[0-9]+)?S)?|([0-9]+M)([0-9]+(\.[0-9]+)?S)?|([0-9]+(\.[0-9]+)?S)))?)|(T(([0-9]+H)([0-9]+M)?([0-9]+(\.[0-9]+)?S)?|([0-9]+M)([0-9]+(\.[0-9]+)?S)?|([0-9]+(\.[0-9]+)?S))))/, :invalid_dayTimeDuration), "http://www.w3.org/2001/XMLSchema#yearMonthDuration" => create_regexp_based_parser(/-?P([0-9]+Y([0-9]+M)?|([0-9]+M))/, :invalid_duration), "http://www.w3.org/2001/XMLSchema#float" => NUMERIC_PARSER, "http://www.w3.org/2001/XMLSchema#gDay" => create_date_parser("http://www.w3.org/2001/XMLSchema#gDay", :invalid_gDay), "http://www.w3.org/2001/XMLSchema#gMonth" => create_date_parser("http://www.w3.org/2001/XMLSchema#gMonth", :invalid_gMonth), "http://www.w3.org/2001/XMLSchema#gMonthDay" => create_date_parser("http://www.w3.org/2001/XMLSchema#gMonthDay", :invalid_gMonthDay), "http://www.w3.org/2001/XMLSchema#gYear" => create_date_parser("http://www.w3.org/2001/XMLSchema#gYear", :invalid_gYear), "http://www.w3.org/2001/XMLSchema#gYearMonth" => create_date_parser("http://www.w3.org/2001/XMLSchema#gYearMonth", :invalid_gYearMonth), "http://www.w3.org/2001/XMLSchema#hexBinary" => TRIM_VALUE, "http://www.w3.org/2001/XMLSchema#QName" => TRIM_VALUE, "http://www.w3.org/2001/XMLSchema#string" => ALL_VALUES_VALID, "http://www.w3.org/2001/XMLSchema#normalizedString" => TRIM_VALUE, "http://www.w3.org/2001/XMLSchema#token" => TRIM_VALUE, "http://www.w3.org/2001/XMLSchema#language" => TRIM_VALUE, "http://www.w3.org/2001/XMLSchema#Name" => TRIM_VALUE, "http://www.w3.org/2001/XMLSchema#NMTOKEN" => TRIM_VALUE, "http://www.w3.org/2001/XMLSchema#time" => create_date_parser("http://www.w3.org/2001/XMLSchema#time", :invalid_time) } end end end ================================================ FILE: lib/csvlint/csvw/date_format.rb ================================================ module Csvlint module Csvw class DateFormat attr_reader :pattern def initialize(pattern, datatype = nil) @pattern = pattern if @pattern.nil? @regexp = DEFAULT_REGEXP[datatype] @type = datatype else test_pattern = pattern.clone test_pattern.gsub!(/S+/, "") FIELDS.keys.sort_by { |f| -f.length }.each do |field| test_pattern.gsub!(field, "") end raise Csvw::DateFormatError, "unrecognised date field symbols in date format" if /[GyYuUrQqMLlwWdDFgEecahHKkjJmsSAzZOvVXx]/.match?(test_pattern) @regexp = DATE_PATTERN_REGEXP[@pattern] @type = @regexp.nil? ? "http://www.w3.org/2001/XMLSchema#time" : "http://www.w3.org/2001/XMLSchema#date" @regexp ||= TIME_PATTERN_REGEXP[@pattern] @type = @regexp.nil? ? "http://www.w3.org/2001/XMLSchema#dateTime" : @type @regexp ||= DATE_TIME_PATTERN_REGEXP[@pattern] if @regexp.nil? regexp = @pattern @type = "http://www.w3.org/2001/XMLSchema#date" if !(regexp =~ /HH/) && regexp =~ /yyyy/ @type = "http://www.w3.org/2001/XMLSchema#time" if regexp =~ /HH/ && !(regexp =~ /yyyy/) @type = "http://www.w3.org/2001/XMLSchema#dateTime" if regexp =~ /HH/ && regexp =~ /yyyy/ regexp = regexp.sub("HH", FIELDS["HH"].to_s) regexp = regexp.sub("mm", FIELDS["mm"].to_s) if /ss\.S+/.match?(@pattern) max_fractional_seconds = @pattern.split(".")[-1].length regexp = regexp.sub(/ss\.S+$/, "(?#{FIELDS["ss"]}(.[0-9]{1,#{max_fractional_seconds}})?)") else regexp = regexp.sub("ss", "(?#{FIELDS["ss"]})") end if /yyyy/.match?(regexp) regexp = regexp.sub("yyyy", FIELDS["yyyy"].to_s) regexp = regexp.sub("MM", FIELDS["MM"].to_s) regexp = regexp.sub("M", FIELDS["M"].to_s) regexp = regexp.sub("dd", FIELDS["dd"].to_s) regexp = regexp.sub(/d(?=[-T \/.])/, FIELDS["d"].to_s) end regexp = regexp.sub("XXX", FIELDS["XXX"].to_s) regexp = regexp.sub("XX", FIELDS["XX"].to_s) regexp = regexp.sub("X", FIELDS["X"].to_s) regexp = regexp.sub("xxx", FIELDS["xxx"].to_s) regexp = regexp.sub("xx", FIELDS["xx"].to_s) regexp = regexp.sub(/x(?!:)/, FIELDS["x"].to_s) @regexp = Regexp.new("^#{regexp}$") end end end def match(value) value&.match?(@regexp) ? true : false end def parse(value) match = @regexp.match(value) return nil if match.nil? # STDERR.puts(@regexp) # STDERR.puts(value) # STDERR.puts(match.inspect) value = {} match.names.each do |field| unless match[field].nil? case field when "timezone" tz = match["timezone"] tz = "+00:00" if tz == "Z" tz += ":00" if tz.length == 3 tz = "#{tz[0..2]}:#{tz[3..4]}" unless /:/.match?(tz) value[:timezone] = tz when "second" value[:second] = match["second"].to_f else value[field.to_sym] = match[field].to_i end end end case @type when "http://www.w3.org/2001/XMLSchema#date" begin value[:dateTime] = Date.new(match["year"].to_i, match["month"].to_i, match["day"].to_i) rescue ArgumentError return nil end when "http://www.w3.org/2001/XMLSchema#dateTime" begin value[:dateTime] = DateTime.new(match["year"].to_i, match["month"].to_i, match["day"].to_i, match["hour"].to_i, match["minute"].to_i, (match.names.include?("second") ? match["second"].to_f : 0), (match.names.include?("timezone") && match["timezone"]) ? match["timezone"] : "") rescue ArgumentError return nil end else value[:dateTime] = DateTime.new(value[:year] || 0, value[:month] || 1, value[:day] || 1, value[:hour] || 0, value[:minute] || 0, value[:second] || 0, value[:timezone] || "+00:00") end value[:string] = if value[:year] if value[:month] if value[:day] if value[:hour] # dateTime "#{format("%04d", value[:year])}-#{format("%02d", value[:month])}-#{format("%02d", value[:day])}T#{format("%02d", value[:hour])}:#{format("%02d", value[:minute] || 0)}:#{format("%02g", value[:second] || 0)}#{value[:timezone] ? value[:timezone].sub("+00:00", "Z") : ""}" else # date "#{format("%04d", value[:year])}-#{format("%02d", value[:month])}-#{format("%02d", value[:day])}#{value[:timezone] ? value[:timezone].sub("+00:00", "Z") : ""}" end else # gYearMonth "#{format("%04d", value[:year])}-#{format("%02d", value[:month])}#{value[:timezone] ? value[:timezone].sub("+00:00", "Z") : ""}" end else # gYear "#{format("%04d", value[:year])}#{value[:timezone] ? value[:timezone].sub("+00:00", "Z") : ""}" end elsif value[:month] if value[:day] # gMonthDay "--#{format("%02d", value[:month])}-#{format("%02d", value[:day])}#{value[:timezone] ? value[:timezone].sub("+00:00", "Z") : ""}" else # gMonth "--#{format("%02d", value[:month])}#{value[:timezone] ? value[:timezone].sub("+00:00", "Z") : ""}" end elsif value[:day] # gDay "---#{format("%02d", value[:day])}#{value[:timezone] ? value[:timezone].sub("+00:00", "Z") : ""}" else "#{format("%02d", value[:hour])}:#{format("%02d", value[:minute])}:#{format("%02g", value[:second] || 0)}#{value[:timezone] ? value[:timezone].sub("+00:00", "Z") : ""}" end value end private FIELDS = { "yyyy" => /(?-?([1-9][0-9]{3,}|0[0-9]{3}))/, "MM" => /(?0[1-9]|1[0-2])/, "M" => /(?[1-9]|1[0-2])/, "dd" => /(?0[1-9]|[12][0-9]|3[01])/, "d" => /(?[1-9]|[12][0-9]|3[01])/, "HH" => /(?[01][0-9]|2[0-3])/, "mm" => /(?[0-5][0-9])/, "ss" => /([0-6][0-9])/, "X" => /(?Z|[-+]((0[0-9]|1[0-3])([0-5][0-9])?|14(00)?))/, "XX" => /(?Z|[-+]((0[0-9]|1[0-3])[0-5][0-9]|1400))/, "XXX" => /(?Z|[-+]((0[0-9]|1[0-3]):[0-5][0-9]|14:00))/, "x" => /(?[-+]((0[0-9]|1[0-3])([0-5][0-9])?|14(00)?))/, "xx" => /(?[-+]((0[0-9]|1[0-3])[0-5][0-9]|1400))/, "xxx" => /(?[-+]((0[0-9]|1[0-3]):[0-5][0-9]|14:00))/ } DATE_PATTERN_REGEXP = { "yyyy-MM-dd" => Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}$"), "yyyyMMdd" => Regexp.new("^#{FIELDS["yyyy"]}#{FIELDS["MM"]}#{FIELDS["dd"]}$"), "dd-MM-yyyy" => Regexp.new("^#{FIELDS["dd"]}-#{FIELDS["MM"]}-#{FIELDS["yyyy"]}$"), "d-M-yyyy" => Regexp.new("^#{FIELDS["d"]}-#{FIELDS["M"]}-#{FIELDS["yyyy"]}$"), "MM-dd-yyyy" => Regexp.new("^#{FIELDS["MM"]}-#{FIELDS["dd"]}-#{FIELDS["yyyy"]}$"), "M-d-yyyy" => Regexp.new("^#{FIELDS["M"]}-#{FIELDS["d"]}-#{FIELDS["yyyy"]}$"), "dd/MM/yyyy" => Regexp.new("^#{FIELDS["dd"]}/#{FIELDS["MM"]}/#{FIELDS["yyyy"]}$"), "d/M/yyyy" => Regexp.new("^#{FIELDS["d"]}/#{FIELDS["M"]}/#{FIELDS["yyyy"]}$"), "MM/dd/yyyy" => Regexp.new("^#{FIELDS["MM"]}/#{FIELDS["dd"]}/#{FIELDS["yyyy"]}$"), "M/d/yyyy" => Regexp.new("^#{FIELDS["M"]}/#{FIELDS["d"]}/#{FIELDS["yyyy"]}$"), "dd.MM.yyyy" => Regexp.new("^#{FIELDS["dd"]}.#{FIELDS["MM"]}.#{FIELDS["yyyy"]}$"), "d.M.yyyy" => Regexp.new("^#{FIELDS["d"]}.#{FIELDS["M"]}.#{FIELDS["yyyy"]}$"), "MM.dd.yyyy" => Regexp.new("^#{FIELDS["MM"]}.#{FIELDS["dd"]}.#{FIELDS["yyyy"]}$"), "M.d.yyyy" => Regexp.new("^#{FIELDS["M"]}.#{FIELDS["d"]}.#{FIELDS["yyyy"]}$") } TIME_PATTERN_REGEXP = { "HH:mm:ss" => Regexp.new("^#{FIELDS["HH"]}:#{FIELDS["mm"]}:(?#{FIELDS["ss"]})$"), "HHmmss" => Regexp.new("^#{FIELDS["HH"]}#{FIELDS["mm"]}(?#{FIELDS["ss"]})$"), "HH:mm" => Regexp.new("^#{FIELDS["HH"]}:#{FIELDS["mm"]}$"), "HHmm" => Regexp.new("^#{FIELDS["HH"]}#{FIELDS["mm"]}$") } DATE_TIME_PATTERN_REGEXP = { "yyyy-MM-ddTHH:mm:ss" => Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}T#{FIELDS["HH"]}:#{FIELDS["mm"]}:(?#{FIELDS["ss"]})$"), "yyyy-MM-ddTHH:mm" => Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}T#{FIELDS["HH"]}:#{FIELDS["mm"]}$") } DEFAULT_REGEXP = { "http://www.w3.org/2001/XMLSchema#date" => Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}#{FIELDS["XXX"]}?$"), "http://www.w3.org/2001/XMLSchema#dateTime" => Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}T#{FIELDS["HH"]}:#{FIELDS["mm"]}:(?#{FIELDS["ss"]}(.[0-9]+)?)#{FIELDS["XXX"]}?$"), "http://www.w3.org/2001/XMLSchema#dateTimeStamp" => Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}T#{FIELDS["HH"]}:#{FIELDS["mm"]}:(?#{FIELDS["ss"]}(.[0-9]+)?)#{FIELDS["XXX"]}$"), "http://www.w3.org/2001/XMLSchema#gDay" => Regexp.new("^---#{FIELDS["dd"]}#{FIELDS["XXX"]}?$"), "http://www.w3.org/2001/XMLSchema#gMonth" => Regexp.new("^--#{FIELDS["MM"]}#{FIELDS["XXX"]}?$"), "http://www.w3.org/2001/XMLSchema#gMonthDay" => Regexp.new("^--#{FIELDS["MM"]}-#{FIELDS["dd"]}#{FIELDS["XXX"]}?$"), "http://www.w3.org/2001/XMLSchema#gYear" => Regexp.new("^#{FIELDS["yyyy"]}#{FIELDS["XXX"]}?$"), "http://www.w3.org/2001/XMLSchema#gYearMonth" => Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}#{FIELDS["XXX"]}?$"), "http://www.w3.org/2001/XMLSchema#time" => Regexp.new("^#{FIELDS["HH"]}:#{FIELDS["mm"]}:(?#{FIELDS["ss"]}(.[0-9]+)?)#{FIELDS["XXX"]}?$") } end class DateFormatError < StandardError end end end ================================================ FILE: lib/csvlint/csvw/metadata_error.rb ================================================ module Csvlint module Csvw class MetadataError < StandardError attr_reader :path def initialize(path = nil) @path = path end end end end ================================================ FILE: lib/csvlint/csvw/number_format.rb ================================================ module Csvlint module Csvw class NumberFormat attr_reader :integer, :pattern, :prefix, :numeric_part, :suffix, :grouping_separator, :decimal_separator, :primary_grouping_size, :secondary_grouping_size, :fractional_grouping_size def initialize(pattern = nil, grouping_separator = nil, decimal_separator = ".", integer = nil) @pattern = pattern @integer = integer if @integer.nil? @integer = if @pattern.nil? nil else !@pattern.include?(decimal_separator) end end @grouping_separator = grouping_separator || (@pattern.nil? ? nil : ",") @decimal_separator = decimal_separator || "." if pattern.nil? @regexp = if integer INTEGER_REGEXP else Regexp.new("^(([-+]?[0-9]+(\\.[0-9]+)?([Ee][-+]?[0-9]+)?[%‰]?)|NaN|INF|-INF)$") end else numeric_part_regexp = Regexp.new("(?[-+]?([0#Ee]|#{Regexp.escape(@grouping_separator)}|#{Regexp.escape(@decimal_separator)})+)") number_format_regexp = Regexp.new("^(?.*?)#{numeric_part_regexp}(?.*?)$") match = number_format_regexp.match(pattern) raise Csvw::NumberFormatError, "invalid number format" if match.nil? @prefix = match["prefix"] @numeric_part = match["numeric_part"] @suffix = match["suffix"] parts = @numeric_part.split("E") mantissa_part = parts[0] exponent_part = parts[1] || "" mantissa_parts = mantissa_part.split(@decimal_separator) # raise Csvw::NumberFormatError, "more than two decimal separators in number format" if parts.length > 2 integer_part = mantissa_parts[0] fractional_part = mantissa_parts[1] || "" if ["+", "-"].include?(integer_part[0]) numeric_part_regexp = "\\#{integer_part[0]}" integer_part = integer_part[1..-1] else numeric_part_regexp = "[-+]?" end min_integer_digits = integer_part.gsub(@grouping_separator, "").delete("#").length min_fraction_digits = fractional_part.gsub(@grouping_separator, "").delete("#").length max_fraction_digits = fractional_part.gsub(@grouping_separator, "").length min_exponent_digits = exponent_part.delete("#").length max_exponent_digits = exponent_part.length integer_parts = integer_part.split(@grouping_separator)[1..-1] @primary_grouping_size = begin integer_parts[-1].length rescue 0 end @secondary_grouping_size = begin integer_parts[-2].length rescue @primary_grouping_size end fractional_parts = fractional_part.split(@grouping_separator)[0..-2] @fractional_grouping_size = begin fractional_parts[0].length rescue 0 end if @primary_grouping_size == 0 integer_regexp = "[0-9]*[0-9]{#{min_integer_digits}}" else leading_regexp = "([0-9]{0,#{@secondary_grouping_size - 1}}#{Regexp.escape(@grouping_separator)})?" secondary_groups = "([0-9]{#{@secondary_grouping_size}}#{Regexp.escape(@grouping_separator)})*" if min_integer_digits > @primary_grouping_size remaining_req_digits = min_integer_digits - @primary_grouping_size req_secondary_groups = (remaining_req_digits / @secondary_grouping_size > 0) ? "([0-9]{#{@secondary_grouping_size}}#{Regexp.escape(@grouping_separator)}){#{remaining_req_digits / @secondary_grouping_size}}" : "" if remaining_req_digits % @secondary_grouping_size > 0 final_req_digits = "[0-9]{#{@secondary_grouping_size - (remaining_req_digits % @secondary_grouping_size)}}" final_opt_digits = "[0-9]{0,#{@secondary_grouping_size - (remaining_req_digits % @secondary_grouping_size)}}" integer_regexp = "((#{leading_regexp}#{secondary_groups}#{final_req_digits})|#{final_opt_digits})[0-9]{#{remaining_req_digits % @secondary_grouping_size}}#{Regexp.escape(@grouping_separator)}#{req_secondary_groups}[0-9]{#{@primary_grouping_size}}" else integer_regexp = "(#{leading_regexp}#{secondary_groups})?#{req_secondary_groups}[0-9]{#{@primary_grouping_size}}" end else final_req_digits = (@primary_grouping_size > min_integer_digits) ? "[0-9]{#{@primary_grouping_size - min_integer_digits}}" : "" final_opt_digits = (@primary_grouping_size > min_integer_digits) ? "[0-9]{0,#{@primary_grouping_size - min_integer_digits}}" : "" integer_regexp = "((#{leading_regexp}#{secondary_groups}#{final_req_digits})|#{final_opt_digits})[0-9]{#{min_integer_digits}}" end end numeric_part_regexp += integer_regexp if max_fraction_digits > 0 if @fractional_grouping_size == 0 fractional_regexp = "" fractional_regexp += "[0-9]{#{min_fraction_digits}}" if min_fraction_digits > 0 fractional_regexp += "[0-9]{0,#{max_fraction_digits - min_fraction_digits}}" unless min_fraction_digits == max_fraction_digits fractional_regexp = "#{Regexp.escape(@decimal_separator)}#{fractional_regexp}" fractional_regexp = "(#{fractional_regexp})?" if min_fraction_digits == 0 numeric_part_regexp += fractional_regexp else fractional_regexp = "" if min_fraction_digits > 0 if min_fraction_digits >= @fractional_grouping_size # first group of required digits - something like "[0-9]{3}" fractional_regexp += "[0-9]{#{@fractional_grouping_size}}" # additional groups of required digits - something like "(,[0-9]{3}){1}" fractional_regexp += "(#{Regexp.escape(@grouping_separator)}[0-9]{#{@fractional_grouping_size}}){#{min_fraction_digits / @fractional_grouping_size - 1}}" if min_fraction_digits / @fractional_grouping_size > 1 fractional_regexp += Regexp.escape(@grouping_separator).to_s if min_fraction_digits % @fractional_grouping_size > 0 end # additional required digits - something like ",[0-9]{1}" fractional_regexp += "[0-9]{#{min_fraction_digits % @fractional_grouping_size}}" if min_fraction_digits % @fractional_grouping_size > 0 opt_fractional_digits = max_fraction_digits - min_fraction_digits if opt_fractional_digits > 0 fractional_regexp += "(" if min_fraction_digits % @fractional_grouping_size > 0 # optional fractional digits to complete the group fractional_regexp += "[0-9]{0,#{[opt_fractional_digits, @fractional_grouping_size - (min_fraction_digits % @fractional_grouping_size)].min}}" fractional_regexp += "|" fractional_regexp += "[0-9]{#{[opt_fractional_digits, @fractional_grouping_size - (min_fraction_digits % @fractional_grouping_size)].min}}" else fractional_regexp += "(#{Regexp.escape(@grouping_separator)}[0-9]{1,#{@fractional_grouping_size}})?" fractional_regexp += "|" fractional_regexp += "#{Regexp.escape(@grouping_separator)}[0-9]{#{@fractional_grouping_size}}" end remaining_opt_fractional_digits = opt_fractional_digits - (@fractional_grouping_size - (min_fraction_digits % @fractional_grouping_size)) if remaining_opt_fractional_digits > 0 if remaining_opt_fractional_digits % @fractional_grouping_size > 0 # optional fraction digits in groups fractional_regexp += "(#{Regexp.escape(@grouping_separator)}[0-9]{#{@fractional_grouping_size}}){0,#{remaining_opt_fractional_digits / @fractional_grouping_size}}" if remaining_opt_fractional_digits > @fractional_grouping_size # remaining optional fraction digits fractional_regexp += "(#{Regexp.escape(@grouping_separator)}[0-9]{1,#{remaining_opt_fractional_digits % @fractional_grouping_size}})?" else # optional fraction digits in groups fractional_regexp += "(#{Regexp.escape(@grouping_separator)}[0-9]{#{@fractional_grouping_size}}){0,#{(remaining_opt_fractional_digits / @fractional_grouping_size) - 1}}" if remaining_opt_fractional_digits > @fractional_grouping_size # remaining optional fraction digits fractional_regexp += "(#{Regexp.escape(@grouping_separator)}[0-9]{1,#{@fractional_grouping_size}})?" end # optional fraction digits in groups fractional_regexp += "(#{Regexp.escape(@grouping_separator)}[0-9]{#{@fractional_grouping_size}}){0,#{(remaining_opt_fractional_digits / @fractional_grouping_size) - 1}}" if remaining_opt_fractional_digits > @fractional_grouping_size # remaining optional fraction digits fractional_regexp += "(#{Regexp.escape(@grouping_separator)}[0-9]{1,#{remaining_opt_fractional_digits % @fractional_grouping_size}})?" if remaining_opt_fractional_digits % @fractional_grouping_size > 0 end fractional_regexp += ")" end elsif max_fraction_digits % @fractional_grouping_size > 0 # optional fractional digits in groups fractional_regexp += "([0-9]{#{@fractional_grouping_size}}#{Regexp.escape(@grouping_separator)}){0,#{max_fraction_digits / @fractional_grouping_size}}" # remaining optional fraction digits fractional_regexp += "(#{Regexp.escape(@grouping_separator)}[0-9]{1,#{max_fraction_digits % @fractional_grouping_size}})?" if max_fraction_digits % @fractional_grouping_size > 0 else fractional_regexp += "([0-9]{#{@fractional_grouping_size}}#{Regexp.escape(@grouping_separator)}){0,#{(max_fraction_digits / @fractional_grouping_size) - 1}}" if max_fraction_digits > @fractional_grouping_size fractional_regexp += "[0-9]{1,#{@fractional_grouping_size}}" end fractional_regexp = "#{Regexp.escape(@decimal_separator)}#{fractional_regexp}" fractional_regexp = "(#{fractional_regexp})?" if min_fraction_digits == 0 numeric_part_regexp += fractional_regexp end end if max_exponent_digits > 0 numeric_part_regexp += "E" numeric_part_regexp += "[0-9]{0,#{max_exponent_digits - min_exponent_digits}}" unless max_exponent_digits == min_exponent_digits numeric_part_regexp += "[0-9]{#{min_exponent_digits}}" unless min_exponent_digits == 0 end @regexp = Regexp.new("^(?#{Regexp.escape(@prefix)})(?#{numeric_part_regexp})(?#{suffix})$") end end def match(value) value&.match?(@regexp) ? true : false end def parse(value) if @pattern.nil? return nil if !@grouping_separator.nil? && value =~ Regexp.new("((^#{Regexp.escape(@grouping_separator)})|#{Regexp.escape(@grouping_separator)}{2})") value.gsub!(@grouping_separator, "") unless @grouping_separator.nil? value.gsub!(@decimal_separator, ".") unless @decimal_separator.nil? if value&.match?(@regexp) case value when "NaN" Float::NAN when "INF" Float::INFINITY when "-INF" -Float::INFINITY else case value[-1] when "%" value.to_f / 100 when "‰" value.to_f / 1000 else if @integer.nil? value.include?(".") ? value.to_f : value.to_i else @integer ? value.to_i : value.to_f end end end end else match = @regexp.match(value) return nil if match.nil? number = match["numeric_part"] number.gsub!(@grouping_separator, "") unless @grouping_separator.nil? number.gsub!(@decimal_separator, ".") unless @decimal_separator.nil? number = @integer ? number.to_i : number.to_f number = number.to_f / 100 if match["prefix"].include?("%") || match["suffix"].include?("%") number = number.to_f / 1000 if match["prefix"].include?("‰") || match["suffix"].include?("‰") number end end private INTEGER_REGEXP = /^[-+]?[0-9]+[%‰]?$/ end class NumberFormatError < StandardError end end end ================================================ FILE: lib/csvlint/csvw/property_checker.rb ================================================ module Csvlint module Csvw class PropertyChecker class << self def check_property(property, value, base_url, lang) if PROPERTIES.include? property PROPERTIES[property].call(value, base_url, lang) elsif property =~ /^([a-z]+):/ && NAMESPACES.include?(property.split(":")[0]) value, warnings = check_common_property_value(value, base_url, lang) [value, warnings, :annotation] else # property name must be an absolute URI begin return value, :invalid_property, nil if URI(property).scheme.nil? value, warnings = check_common_property_value(value, base_url, lang) [value, warnings, :annotation] rescue [value, :invalid_property, nil] end end end private def check_common_property_value(value, base_url, lang) case value when Hash value = value.clone warnings = [] value.each do |p, v| case p when "@context" raise Csvlint::Csvw::MetadataError.new(p), "common property has @context property" when "@list" raise Csvlint::Csvw::MetadataError.new(p), "common property has @list property" when "@set" raise Csvlint::Csvw::MetadataError.new(p), "common property has @set property" when "@type" if value["@value"] && BUILT_IN_DATATYPES.include?(v) elsif !value["@value"] && BUILT_IN_TYPES.include?(v) elsif ((v.is_a? String) && (v =~ /^([a-z]+):/)) && NAMESPACES.include?(v.split(":")[0]) else # must be an absolute URI begin raise Csvlint::Csvw::MetadataError.new, "common property has invalid @type (#{v})" if URI(v).scheme.nil? rescue raise Csvlint::Csvw::MetadataError.new, "common property has invalid @type (#{v})" end end when "@id" unless base_url.nil? begin v = URI.join(base_url, v) rescue raise Csvlint::Csvw::MetadataError.new, "common property has invalid @id (#{v})" end end when "@value" raise Csvlint::Csvw::MetadataError.new, "common property with @value has both @language and @type" if value["@type"] && value["@language"] raise Csvlint::Csvw::MetadataError.new, "common property with @value has properties other than @language or @type" unless value.except("@type").except("@language").except("@value").empty? when "@language" raise Csvlint::Csvw::MetadataError.new, "common property with @language lacks a @value" unless value["@value"] raise Csvlint::Csvw::MetadataError.new, "common property has invalid @language (#{v})" if !((v.is_a? String) && (v =~ BCP47_LANGUAGE_REGEXP)) || !v.nil? else if p[0] == "@" raise Csvlint::Csvw::MetadataError.new, "common property has property other than @id, @type, @value or @language beginning with @ (#{p})" else v, w = check_common_property_value(v, base_url, lang) warnings += Array(w) end end value[p] = v end [value, warnings] when String if lang == "und" [value, nil] else [{"@value" => value, "@language" => lang}, nil] end when Array values = [] warnings = [] value.each do |v| v, w = check_common_property_value(v, base_url, lang) warnings += Array(w) values << v end [values, warnings] else [value, nil] end end def convert_value_facet(value, property, datatype) if value[property] if DATE_FORMAT_DATATYPES.include?(datatype) format = Csvlint::Csvw::DateFormat.new(nil, datatype) v = format.parse(value[property]) if v.nil? value.delete(property) return [:":invalid_#{property}"] else value[property] = v return [] end elsif NUMERIC_FORMAT_DATATYPES.include?(datatype) return [] else raise Csvlint::Csvw::MetadataError.new("datatype.#{property}"), "#{property} is only allowed for numeric, date/time and duration types" end end [] end def array_property(type) lambda { |value, base_url, lang| return value, nil, type if value.instance_of? Array [false, :invalid_value, type] } end def boolean_property(type) lambda { |value, base_url, lang| return value, nil, type if value == true || value == false [false, :invalid_value, type] } end def string_property(type) lambda { |value, base_url, lang| return value, nil, type if value.instance_of? String ["", :invalid_value, type] } end def uri_template_property(type) lambda { |value, base_url, lang| return URITemplate.new(value), nil, type if value.instance_of? String [URITemplate.new(""), :invalid_value, type] } end def numeric_property(type) lambda { |value, base_url, lang| return value, nil, type if value.is_a?(Integer) && value >= 0 [nil, :invalid_value, type] } end def link_property(type) lambda { |value, base_url, lang| raise Csvlint::Csvw::MetadataError.new, "URL #{value} starts with _:" if /^_:/.match?(value.to_s) return (base_url.nil? ? URI(value) : URI.join(base_url, value)), nil, type if value.instance_of? String [base_url, :invalid_value, type] } end def language_property(type) lambda { |value, base_url, lang| return value, nil, type if BCP47_REGEXP.match?(value) [nil, :invalid_value, type] } end def natural_language_property(type) lambda { |value, base_url, lang| warnings = [] if value.instance_of? String [{lang => [value]}, nil, type] elsif value.instance_of? Array valid_titles = [] value.each do |title| if title.instance_of? String valid_titles << title else warnings << :invalid_value end end [{lang => valid_titles}, warnings, type] elsif value.instance_of? Hash value = value.clone value.each do |l, v| if BCP47_REGEXP.match?(l) valid_titles = [] Array(v).each do |title| if title.instance_of? String valid_titles << title else warnings << :invalid_value end end value[l] = valid_titles else value.delete(l) warnings << :invalid_language end end warnings << :invalid_value if value.empty? [value, warnings, type] else [{}, :invalid_value, type] end } end def column_reference_property(type) lambda { |value, base_url, lang| [Array(value), nil, type] } end end PROPERTIES = { # context properties "@language" => language_property(:context), "@base" => link_property(:context), # common properties "@id" => link_property(:common), "notes" => lambda { |value, base_url, lang| return false, :invalid_value, :common unless value.instance_of? Array values = [] warnings = [] value.each do |v| v, w = check_common_property_value(v, base_url, lang) values << v warnings += w end [values, warnings, :common] }, "suppressOutput" => boolean_property(:common), "dialect" => lambda { |value, base_url, lang| if value.instance_of? Hash value = value.clone warnings = [] value.each do |p, v| if p == "@id" raise Csvlint::Csvw::MetadataError.new("dialect.@id"), "@id starts with _:" if /^_:/.match?(v) elsif p == "@type" raise Csvlint::Csvw::MetadataError.new("dialect.@type"), "@type of dialect is not 'Dialect'" if v != "Dialect" else v, warning, type = check_property(p, v, base_url, lang) if type == :dialect && (warning.nil? || warning.empty?) value[p] = v else value.delete(p) warnings << :invalid_property unless type == :dialect warnings += Array(warning) end end end [value, warnings, :common] else [{}, :invalid_value, :common] end }, # inherited properties "null" => lambda { |value, base_url, lang| case value when String [[value], nil, :inherited] when Array values = [] warnings = [] value.each do |v| if v.instance_of? String values << v else warnings << :invalid_value end end [values, warnings, :inherited] else [[""], :invalid_value, :inherited] end }, "default" => string_property(:inherited), "separator" => lambda { |value, base_url, lang| return value, nil, :inherited if value.instance_of?(String) || value.nil? [nil, :invalid_value, :inherited] }, "lang" => language_property(:inherited), "datatype" => lambda { |value, base_url, lang| value = value.clone warnings = [] if value.instance_of? Hash if value["@id"] raise Csvlint::Csvw::MetadataError.new("datatype.@id"), "datatype @id must not be the id of a built-in datatype (#{value["@id"]})" if BUILT_IN_DATATYPES.value?(value["@id"]) _, w, _ = PROPERTIES["@id"].call(value["@id"], base_url, lang) unless w.nil? warnings << w value.delete("@id") end end if value["base"] if BUILT_IN_DATATYPES.include? value["base"] value["base"] = BUILT_IN_DATATYPES[value["base"]] else value["base"] = BUILT_IN_DATATYPES["string"] warnings << :invalid_datatype_base end else value["base"] = BUILT_IN_DATATYPES["string"] end elsif BUILT_IN_DATATYPES.include? value value = {"@id" => BUILT_IN_DATATYPES[value]} else value = {"@id" => BUILT_IN_DATATYPES["string"]} warnings << :invalid_value end unless STRING_DATATYPES.include?(value["base"]) || BINARY_DATATYPES.include?(value["base"]) raise Csvlint::Csvw::MetadataError.new("datatype.length"), "datatypes based on #{value["base"]} cannot have a length facet" if value["length"] raise Csvlint::Csvw::MetadataError.new("datatype.minLength"), "datatypes based on #{value["base"]} cannot have a minLength facet" if value["minLength"] raise Csvlint::Csvw::MetadataError.new("datatype.maxLength"), "datatypes based on #{value["base"]} cannot have a maxLength facet" if value["maxLength"] end if value["minimum"] value["minInclusive"] = value["minimum"] value.delete("minimum") end if value["maximum"] value["maxInclusive"] = value["maximum"] value.delete("maximum") end warnings += convert_value_facet(value, "minInclusive", value["base"]) warnings += convert_value_facet(value, "minExclusive", value["base"]) warnings += convert_value_facet(value, "maxInclusive", value["base"]) warnings += convert_value_facet(value, "maxExclusive", value["base"]) minInclusive = value["minInclusive"].is_a?(Hash) ? value["minInclusive"][:dateTime] : value["minInclusive"] maxInclusive = value["maxInclusive"].is_a?(Hash) ? value["maxInclusive"][:dateTime] : value["maxInclusive"] minExclusive = value["minExclusive"].is_a?(Hash) ? value["minExclusive"][:dateTime] : value["minExclusive"] maxExclusive = value["maxExclusive"].is_a?(Hash) ? value["maxExclusive"][:dateTime] : value["maxExclusive"] raise Csvlint::Csvw::MetadataError.new(""), "datatype cannot specify both minimum/minInclusive (#{minInclusive}) and minExclusive (#{minExclusive}" if minInclusive && minExclusive raise Csvlint::Csvw::MetadataError.new(""), "datatype cannot specify both maximum/maxInclusive (#{maxInclusive}) and maxExclusive (#{maxExclusive}" if maxInclusive && maxExclusive raise Csvlint::Csvw::MetadataError.new(""), "datatype minInclusive (#{minInclusive}) cannot be more than maxInclusive (#{maxInclusive}" if minInclusive && maxInclusive && minInclusive > maxInclusive raise Csvlint::Csvw::MetadataError.new(""), "datatype minInclusive (#{minInclusive}) cannot be more than or equal to maxExclusive (#{maxExclusive}" if minInclusive && maxExclusive && minInclusive >= maxExclusive raise Csvlint::Csvw::MetadataError.new(""), "datatype minExclusive (#{minExclusive}) cannot be more than or equal to maxExclusive (#{maxExclusive}" if minExclusive && maxExclusive && minExclusive > maxExclusive raise Csvlint::Csvw::MetadataError.new(""), "datatype minExclusive (#{minExclusive}) cannot be more than maxInclusive (#{maxInclusive}" if minExclusive && maxInclusive && minExclusive >= maxInclusive raise Csvlint::Csvw::MetadataError.new(""), "datatype length (#{value["length"]}) cannot be less than minLength (#{value["minLength"]}" if value["length"] && value["minLength"] && value["length"] < value["minLength"] raise Csvlint::Csvw::MetadataError.new(""), "datatype length (#{value["length"]}) cannot be more than maxLength (#{value["maxLength"]}" if value["length"] && value["maxLength"] && value["length"] > value["maxLength"] raise Csvlint::Csvw::MetadataError.new(""), "datatype minLength (#{value["minLength"]}) cannot be more than maxLength (#{value["maxLength"]}" if value["minLength"] && value["maxLength"] && value["minLength"] > value["maxLength"] if value["format"] if REGEXP_FORMAT_DATATYPES.include?(value["base"]) begin value["format"] = Regexp.new(value["format"]) rescue RegexpError value.delete("format") warnings << :invalid_regex end elsif NUMERIC_FORMAT_DATATYPES.include?(value["base"]) value["format"] = {"pattern" => value["format"]} if value["format"].instance_of? String begin value["format"] = Csvlint::Csvw::NumberFormat.new(value["format"]["pattern"], value["format"]["groupChar"], value["format"]["decimalChar"] || ".", INTEGER_FORMAT_DATATYPES.include?(value["base"])) rescue Csvlint::Csvw::NumberFormatError value["format"] = Csvlint::Csvw::NumberFormat.new(nil, value["format"]["groupChar"], value["format"]["decimalChar"] || ".", INTEGER_FORMAT_DATATYPES.include?(value["base"])) warnings << :invalid_number_format end elsif value["base"] == "http://www.w3.org/2001/XMLSchema#boolean" if value["format"].instance_of? String value["format"] = value["format"].split("|") unless value["format"].length == 2 value.delete("format") warnings << :invalid_boolean_format end else value.delete("format") warnings << :invalid_boolean_format end elsif DATE_FORMAT_DATATYPES.include?(value["base"]) if value["format"].instance_of? String begin value["format"] = Csvlint::Csvw::DateFormat.new(value["format"]) rescue Csvlint::CsvDateFormatError value.delete("format") warnings << :invalid_date_format end else value.delete("format") warnings << :invalid_date_format end end end [value, warnings, :inherited] }, "required" => boolean_property(:inherited), "ordered" => boolean_property(:inherited), "aboutUrl" => uri_template_property(:inherited), "propertyUrl" => uri_template_property(:inherited), "valueUrl" => uri_template_property(:inherited), "textDirection" => lambda { |value, base_url, lang| value = value.to_sym return value, nil, :inherited if [:ltr, :rtl, :auto, :inherit].include? value [:inherit, :invalid_value, :inherited] }, # column level properties "virtual" => boolean_property(:column), "titles" => natural_language_property(:column), "name" => lambda { |value, base_url, lang| return value, nil, :column if value.instance_of?(String) && value =~ NAME_REGEXP [nil, :invalid_value, :column] }, # table level properties "transformations" => lambda { |value, base_url, lang| transformations = [] warnings = [] if value.instance_of? Array value.each_with_index do |transformation, i| if transformation.instance_of? Hash transformation = transformation.clone transformation.each do |p, v| if p == "@id" raise Csvlint::Csvw::MetadataError.new("transformations[#{i}].@id"), "@id starts with _:" if /^_:/.match?(v) elsif p == "@type" raise Csvlint::Csvw::MetadataError.new("transformations[#{i}].@type"), "@type of transformation is not 'Template'" if v != "Template" elsif p == "url" elsif p == "titles" else _, warning, type = check_property(p, v, base_url, lang) if type != :transformation && !(warning.nil? || warning.empty?) value.delete(p) warnings << :invalid_property unless type == :transformation warnings += Array(warning) end end end transformations << transformation else warnings << :invalid_transformation end end else warnings << :invalid_value end [transformations, warnings, :table] }, "tableDirection" => lambda { |value, base_url, lang| value = value.to_sym return value, nil, :table if [:ltr, :rtl, :auto].include? value [:auto, :invalid_value, :table] }, "tableSchema" => lambda { |value, base_url, lang| schema_base_url = base_url schema_lang = lang if value.instance_of? String schema_url = URI.join(base_url, value).to_s schema_base_url = schema_url schema_ref = schema_url.start_with?("file:") ? File.new(schema_url[5..-1]) : schema_url schema = JSON.parse(URI.open(schema_ref).read) schema["@id"] = schema["@id"] ? URI.join(schema_url, schema["@id"]).to_s : schema_url if schema["@context"] if schema["@context"].instance_of?(Array) && schema["@context"].length > 1 schema_base_url = schema["@context"][1]["@base"] ? URI.join(schema_base_url, schema["@context"][1]["@base"]).to_s : schema_base_url schema_lang = schema["@context"][1]["@language"] || schema_lang end schema.delete("@context") end elsif value.instance_of? Hash schema = value.clone else return {}, :invalid_value, :table end warnings = [] schema.each do |p, v| if p == "@id" raise Csvlint::Csvw::MetadataError.new("tableSchema.@id"), "@id starts with _:" if /^_:/.match?(v) elsif p == "@type" raise Csvlint::Csvw::MetadataError.new("tableSchema.@type"), "@type of schema is not 'Schema'" if v != "Schema" else v, warning, type = check_property(p, v, schema_base_url, schema_lang) if (type == :schema || type == :inherited) && (warning.nil? || warning.empty?) schema[p] = v else schema.delete(p) warnings << :invalid_property unless type == :schema || type == :inherited warnings += Array(warning) end end end [schema, warnings, :table] }, "url" => link_property(:table), # dialect properties "commentPrefix" => string_property(:dialect), "delimiter" => string_property(:dialect), "doubleQuote" => boolean_property(:dialect), "encoding" => lambda { |value, base_url, lang| return value, nil, :dialect if VALID_ENCODINGS.include? value [nil, :invalid_value, :dialect] }, "header" => boolean_property(:dialect), "headerRowCount" => numeric_property(:dialect), "lineTerminators" => array_property(:dialect), "quoteChar" => string_property(:dialect), "skipBlankRows" => boolean_property(:dialect), "skipColumns" => numeric_property(:dialect), "skipInitialSpace" => boolean_property(:dialect), "skipRows" => numeric_property(:dialect), "trim" => lambda { |value, base_url, lang| value = :true if value == true || value == "true" value = :false if value == false || value == "false" value = :start if value == "start" value = :end if value == "end" return value, nil, :dialect if [:true, :false, :start, :end].include? value [true, :invalid_value, :dialect] }, # schema properties "columns" => lambda { |value, base_url, lang| [value, nil, :schema] }, "primaryKey" => column_reference_property(:schema), "foreignKeys" => lambda { |value, base_url, lang| foreign_keys = [] warnings = [] if value.instance_of? Array value.each_with_index do |foreign_key, i| if foreign_key.instance_of? Hash foreign_key = foreign_key.clone foreign_key.each do |p, v| v, warning, type = check_property(p, v, base_url, lang) if type == :foreign_key && (warning.nil? || warning.empty?) foreign_key[p] = v elsif /:/.match?(p) raise Csvlint::Csvw::MetadataError.new("foreignKey.#{p}"), "foreignKey includes a prefixed (common) property" else foreign_key.delete(p) warnings << :invalid_property unless type == :foreign_key warnings += Array(warning) end end foreign_keys << foreign_key else warnings << :invalid_foreign_key end end else warnings << :invalid_value end [foreign_keys, warnings, :schema] }, "rowTitles" => column_reference_property(:schema), # transformation properties "targetFormat" => lambda { |value, base_url, lang| [value, nil, :transformation] }, "scriptFormat" => lambda { |value, base_url, lang| [value, nil, :transformation] }, "source" => lambda { |value, base_url, lang| [value, nil, :transformation] }, # foreignKey properties "columnReference" => column_reference_property(:foreign_key), "reference" => lambda { |value, base_url, lang| if value.instance_of? Hash value = value.clone warnings = [] value.each do |p, v| if ["resource", "schemaReference", "columnReference"].include? p v, warning, _ = check_property(p, v, base_url, lang) if warning.nil? || warning.empty? value[p] = v else value.delete(p) warnings += Array(warning) end elsif /:/.match?(p) raise Csvlint::Csvw::MetadataError.new("foreignKey.reference.#{p}"), "foreignKey reference includes a prefixed (common) property" else value.delete(p) warnings << :invalid_property end end raise Csvlint::Csvw::MetadataError.new("foreignKey.reference.columnReference"), "foreignKey reference columnReference is missing" unless value["columnReference"] raise Csvlint::Csvw::MetadataError.new("foreignKey.reference"), "foreignKey reference does not have either resource or schemaReference" unless value["resource"] || value["schemaReference"] raise Csvlint::Csvw::MetadataError.new("foreignKey.reference"), "foreignKey reference has both resource and schemaReference" if value["resource"] && value["schemaReference"] [value, warnings, :foreign_key] else raise Csvlint::Csvw::MetadataError.new("foreignKey.reference"), "foreignKey reference is not an object" end }, # foreignKey reference properties "resource" => lambda { |value, base_url, lang| [value, nil, :foreign_key_reference] }, "schemaReference" => lambda { |value, base_url, lang| [URI.join(base_url, value).to_s, nil, :foreign_key_reference] } } NAMESPACES = { "dcat" => "http://www.w3.org/ns/dcat#", "qb" => "http://purl.org/linked-data/cube#", "grddl" => "http://www.w3.org/2003/g/data-view#", "ma" => "http://www.w3.org/ns/ma-ont#", "org" => "http://www.w3.org/ns/org#", "owl" => "http://www.w3.org/2002/07/owl#", "prov" => "http://www.w3.org/ns/prov#", "rdf" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#", "rdfa" => "http://www.w3.org/ns/rdfa#", "rdfs" => "http://www.w3.org/2000/01/rdf-schema#", "rif" => "http://www.w3.org/2007/rif#", "rr" => "http://www.w3.org/ns/r2rml#", "sd" => "http://www.w3.org/ns/sparql-service-description#", "skos" => "http://www.w3.org/2004/02/skos/core#", "skosxl" => "http://www.w3.org/2008/05/skos-xl#", "wdr" => "http://www.w3.org/2007/05/powder#", "void" => "http://rdfs.org/ns/void#", "wdrs" => "http://www.w3.org/2007/05/powder-s#", "xhv" => "http://www.w3.org/1999/xhtml/vocab#", "xml" => "http://www.w3.org/XML/1998/namespace", "xsd" => "http://www.w3.org/2001/XMLSchema#", "csvw" => "http://www.w3.org/ns/csvw#", "cnt" => "http://www.w3.org/2008/content", "earl" => "http://www.w3.org/ns/earl#", "ht" => "http://www.w3.org/2006/http#", "oa" => "http://www.w3.org/ns/oa#", "ptr" => "http://www.w3.org/2009/pointers#", "cc" => "http://creativecommons.org/ns#", "ctag" => "http://commontag.org/ns#", "dc" => "http://purl.org/dc/terms/", "dcterms" => "http://purl.org/dc/terms/", "dc11" => "http://purl.org/dc/elements/1.1/", "foaf" => "http://xmlns.com/foaf/0.1/", "gr" => "http://purl.org/goodrelations/v1#", "ical" => "http://www.w3.org/2002/12/cal/icaltzd#", "og" => "http://ogp.me/ns#", "rev" => "http://purl.org/stuff/rev#", "sioc" => "http://rdfs.org/sioc/ns#", "v" => "http://rdf.data-vocabulary.org/#", "vcard" => "http://www.w3.org/2006/vcard/ns#", "schema" => "http://schema.org/" } BCP47_REGULAR_REGEXP = "(art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu|zh-hakka|zh-min|zh-min-nan|zh-xiang)" BCP47_IRREGULAR_REGEXP = "(en-GB-oed|i-ami|i-bnn|i-default|i-enochian|i-hak|i-klingon|i-lux|i-mingo|i-navajo|i-pwn|i-tao|i-tay|i-tsu|sgn-BE-FR|sgn-BE-NL|sgn-CH-DE)" BCP47_GRANDFATHERED_REGEXP = "(?" + BCP47_IRREGULAR_REGEXP + "|" + BCP47_REGULAR_REGEXP + ")" BCP47_PRIVATE_USE_REGEXP = "(?x(-[A-Za-z0-9]{1,8})+)" BCP47_SINGLETON_REGEXP = "[0-9A-WY-Za-wy-z]" BCP47_EXTENSION_REGEXP = "(?" + BCP47_SINGLETON_REGEXP + "(-[A-Za-z0-9]{2,8})+)" BCP47_VARIANT_REGEXP = "(?[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3})" BCP47_REGION_REGEXP = "(?[A-Za-z]{2}|[0-9]{3})" BCP47_SCRIPT_REGEXP = "(?