Repository: tensorflow/moonlight Branch: master Commit: d80279a3bf5d Files: 162 Total size: 852.8 KB Directory structure: gitextract_080bjy0d/ ├── AUTHORS ├── CONTRIBUTING.md ├── CONTRIBUTORS ├── LICENSE ├── README.md ├── WORKSPACE ├── docs/ │ ├── concepts.md │ └── engine.md ├── moonlight/ │ ├── BUILD │ ├── conversions/ │ │ ├── BUILD │ │ ├── __init__.py │ │ ├── musicxml.py │ │ ├── musicxml_test.py │ │ └── notesequence.py │ ├── data/ │ │ ├── README.md │ │ └── glyphs_nn_model_20180808/ │ │ ├── BUILD │ │ ├── saved_model.pbtxt │ │ └── variables/ │ │ ├── variables.data-00000-of-00001 │ │ └── variables.index │ ├── engine.py │ ├── evaluation/ │ │ ├── BUILD │ │ ├── evaluator.py │ │ ├── evaluator_endtoend_test.py │ │ ├── musicxml.py │ │ └── musicxml_test.py │ ├── glyphs/ │ │ ├── BUILD │ │ ├── base.py │ │ ├── convolutional.py │ │ ├── convolutional_test.py │ │ ├── corpus.py │ │ ├── geometry.py │ │ ├── glyph_types.py │ │ ├── knn.py │ │ ├── knn_model.py │ │ ├── knn_test.py │ │ ├── neural.py │ │ ├── neural_test.py │ │ ├── note_dots.py │ │ ├── repeated.py │ │ ├── saved_classifier.py │ │ ├── saved_classifier_fn.py │ │ ├── saved_classifier_test.py │ │ └── testing.py │ ├── image.py │ ├── models/ │ │ ├── base/ │ │ │ ├── BUILD │ │ │ ├── batches.py │ │ │ ├── batches_test.py │ │ │ ├── glyph_patches.py │ │ │ ├── glyph_patches_test.py │ │ │ ├── hyperparameters.py │ │ │ ├── hyperparameters_test.py │ │ │ ├── label_weights.py │ │ │ └── label_weights_test.py │ │ └── glyphs_dnn/ │ │ ├── BUILD │ │ ├── model.py │ │ └── train.py │ ├── music/ │ │ ├── BUILD │ │ └── constants.py │ ├── omr.py │ ├── omr_endtoend_test.py │ ├── omr_regression_test.py │ ├── page_processors.py │ ├── pipeline/ │ │ ├── BUILD │ │ └── pipeline_flags.py │ ├── protobuf/ │ │ ├── BUILD │ │ ├── groundtruth.proto │ │ └── musicscore.proto │ ├── score/ │ │ ├── BUILD │ │ ├── elements/ │ │ │ ├── BUILD │ │ │ ├── clef.py │ │ │ ├── clef_test.py │ │ │ ├── key_signature.py │ │ │ └── key_signature_test.py │ │ ├── measures.py │ │ ├── reader.py │ │ ├── reader_test.py │ │ └── state/ │ │ ├── BUILD │ │ ├── __init__.py │ │ ├── measure.py │ │ └── staff.py │ ├── score_processors.py │ ├── scripts/ │ │ └── imslp_pdfs_to_pngs.sh │ ├── staves/ │ │ ├── BUILD │ │ ├── __init__.py │ │ ├── base.py │ │ ├── detectors_test.py │ │ ├── filter.py │ │ ├── hough.py │ │ ├── projection.py │ │ ├── removal.py │ │ ├── removal_test.py │ │ ├── staff_processor.py │ │ ├── staff_processor_test.py │ │ ├── staffline_distance.py │ │ ├── staffline_distance_test.py │ │ ├── staffline_extractor.py │ │ ├── staffline_extractor_test.py │ │ └── testing.py │ ├── structure/ │ │ ├── BUILD │ │ ├── __init__.py │ │ ├── barlines.py │ │ ├── barlines_test.py │ │ ├── beam_processor.py │ │ ├── beams.py │ │ ├── components.py │ │ ├── components_test.py │ │ ├── section_barlines.py │ │ ├── stems.py │ │ ├── stems_test.py │ │ ├── structure_test.py │ │ ├── verticals.py │ │ └── verticals_test.py │ ├── testdata/ │ │ ├── BUILD │ │ ├── IMSLP00747-000.LICENSE.md │ │ ├── IMSLP00747.golden.LICENSE.md │ │ ├── IMSLP00747.golden.xml │ │ ├── README.md │ │ ├── TWO_MEASURE_SAMPLE.LICENSE.md │ │ └── TWO_MEASURE_SAMPLE.xml │ ├── tools/ │ │ ├── BUILD │ │ ├── export_kmeans_centroids.py │ │ ├── export_kmeans_centroids_test.py │ │ └── gen_structure_test_case.py │ ├── training/ │ │ ├── clustering/ │ │ │ ├── BUILD │ │ │ ├── kmeans_labeler.py │ │ │ ├── kmeans_labeler_request_handler.py │ │ │ ├── kmeans_labeler_request_handler_test.py │ │ │ ├── kmeans_labeler_template.html │ │ │ ├── staffline_patches_dofn.py │ │ │ ├── staffline_patches_dofn_test.py │ │ │ ├── staffline_patches_kmeans_pipeline.py │ │ │ └── staffline_patches_kmeans_pipeline_test.py │ │ └── generation/ │ │ ├── BUILD │ │ ├── generation.py │ │ ├── generation_test.py │ │ ├── image_noise.py │ │ ├── vexflow_generator.js │ │ └── vexflow_generator_pipeline.py │ ├── util/ │ │ ├── BUILD │ │ ├── functional_ops.py │ │ ├── functional_ops_test.py │ │ ├── memoize.py │ │ ├── more_iter_tools.py │ │ ├── more_iter_tools_test.py │ │ ├── patches.py │ │ ├── patches_test.py │ │ ├── run_length.py │ │ ├── run_length_test.py │ │ ├── segments.py │ │ └── segments_test.py │ └── vision/ │ ├── BUILD │ ├── hough.py │ ├── hough_test.py │ ├── images.py │ ├── images_test.py │ ├── morphology.py │ └── morphology_test.py ├── requirements.txt ├── sandbox/ │ └── README.md ├── six.BUILD └── tools/ ├── bazel_0.20.0-linux-x86_64.deb.sha256 └── travis_tests.sh ================================================ FILE CONTENTS ================================================ ================================================ FILE: AUTHORS ================================================ # This is the list of Moonlight OMR authors for copyright purposes. # # Copyright for contributions made under Google's Corporate CLA belong to the # contributor's organization. Contributions made under the Individual CLA belong # to the author. Individual contributors are recognized separately in the # "CONTRIBUTORS" file. Google Inc. Nuno Jesus ================================================ FILE: CONTRIBUTING.md ================================================ # How to Contribute We'd love to accept your patches and contributions to this project. There are just a few small guidelines you need to follow. ## Contributor License Agreement Contributions to this project must be accompanied by a Contributor License Agreement. You (or your employer) retain the copyright to your contribution; this simply gives us permission to use and redistribute your contributions as part of the project. Head over to to see your current agreements on file or to sign a new one. You generally only need to submit a CLA once, so if you've already submitted one (even if it was for a different project), you probably don't need to do it again. ## Code reviews All submissions, including submissions by project members, require review. We use GitHub pull requests for this purpose. Consult [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more information on using pull requests. ## Community Guidelines This project follows [Google's Open Source Community Guidelines](https://opensource.google.com/conduct/). ================================================ FILE: CONTRIBUTORS ================================================ # This is the list of individual contributors to the Moonlight OMR project. # # Some contributions belong to the organization of the contributor. Copyright is # tracked separately, in the "AUTHORS" file. # # We will make an effort to recognize all contributors here. However, the source # of truth for contributors is the commit author in source control history. Dan Ringwalt Larry Li Nuno Jesus ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: README.md ================================================ # Moonlight Optical Music Recognition (OMR) [![Build Status](https://travis-ci.org/tensorflow/moonlight.svg?branch=master)](https://travis-ci.org/tensorflow/moonlight) An experimental [optical music recognition](https://en.wikipedia.org/wiki/Optical_music_recognition) engine. Moonlight reads PNG image(s) containing sheet music and outputs [MusicXML](https://www.musicxml.com/) or a [NoteSequence message](https://github.com/tensorflow/magenta/blob/master/magenta/protobuf/music.proto). MusicXML is a standard sheet music interchange format, and `NoteSequence` is used by [Magenta](http://magenta.tensorflow.org) for training generative music models. Moonlight is not an officially supported Google product. ### Command-Line Usage git clone https://github.com/tensorflow/moonlight cd moonlight # You may want to run this inside a virtualenv. pip install -r requirements.txt # Build the OMR command-line tool. bazel build moonlight:omr # Prints a Score message. bazel-bin/moonlight/omr moonlight/testdata/IMSLP00747-000.png # Scans several pages and prints a NoteSequence message. bazel-bin/moonlight/omr --output_type=NoteSequence IMSLP00001-*.png # Writes MusicXML to ~/mozart.xml. bazel-bin/moonlight/omr --output_type=MusicXML --output=$HOME/mozart.xml \ corpus/56/IMSLP56442-*.png The `omr` CLI will print a [`Score`](moonlight/protobuf/musicscore.proto) message by default, or [MusicXML](https://www.musicxml.com/) or a `NoteSequence` message if specified. Moonlight is intended to be run in bulk, and will not offer a full UI for correcting the score. The main entry point will be an Apache Beam pipeline that processes an entire corpus of images. There is no release yet, and Moonlight is not ready for end users. To run interactively or import the module, you can use the [sandbox directory](sandbox/README.md). Moonlight will be used offline for digitizing a scanned corpus (it can be installed on all Cloud Compute platforms, and OS compatibility is not a priority). ### Dependencies * Linux - Note: Our Google dep versions are fragile, and updating them or updating other OS may break directory structure in fragile ways. * [Protobuf 3.6.1](https://pypi.org/project/protobuf/3.6.1/) * [Bazel 0.20.0](https://github.com/bazelbuild/bazel/releases/tag/0.20.0). We encountered some errors using Bazel 0.21.0 to build Protobuf 3.6.1, which is the latest Protobuf release at the time of writing. * Python version supported by TensorFlow (Python 3.5-3.7) * Python dependencies specified in the [requirements](requirements.txt). ### Resources [Forum](https://groups.google.com/forum/#!forum/moonlight-omr) ================================================ FILE: WORKSPACE ================================================ http_archive( name = "com_google_protobuf", sha256 = "40f009cb0c190816a52fc21d45c26558ee7d63c3bd511b326bd85739b2fd99a6", strip_prefix = "protobuf-3.6.1", url = "https://github.com/google/protobuf/releases/download/v3.6.1/protobuf-python-3.6.1.tar.gz", ) new_http_archive( name = "six_archive", build_file = "six.BUILD", sha256 = "105f8d68616f8248e24bf0e9372ef04d3cc10104f1980f54d57b2ce73a5ad56a", strip_prefix = "six-1.10.0", url = "https://pypi.python.org/packages/source/s/six/six-1.10.0.tar.gz#md5=34eed507548117b2ab523ab14b2f8b55", ) bind( name = "six", actual = "@six_archive//:six", ) new_http_archive( name = "magenta", strip_prefix = "magenta-48a199085e303eeae7c36068f050696209b856bb/magenta", url = "https://github.com/tensorflow/magenta/archive/48a199085e303eeae7c36068f050696209b856bb.tar.gz", sha256 = "931fb7b57714d667db618b0c31db1444e44baab17865ad66b76fd24d9e20ad6d", build_file_content = "", ) new_http_archive( name = "omr_regression_test_data", strip_prefix = "omr_regression_test_data_20180516", url = "https://github.com/tensorflow/moonlight/releases/download/v2018.05.16-data/omr_regression_test_data_20180516.tar.gz", sha256 = "b47577ee6b359c2cbbcdb8064c6bd463692c728c55ec5c0ab78139165ba8f35a", build_file_content = """ package(default_visibility = ["//visibility:public"]) filegroup( name = "omr_regression_test_data", srcs = glob(["**/*.png"]), ) """, ) ================================================ FILE: docs/concepts.md ================================================ # Moonlight OMR Concepts ## Diagram ## Glossary | Term | Definition | | ------------ | ------------------------------------------------------------- | | barline | A vertical line spanning one or more staves, which is not a | : : stem. : | beam | A thick line connecting multiple stems horizontally. Each | : : beam halves the duration of a filled notehead connected to : : : the stem, which would otherwise be a quarter note. : | notehead | An ellipse representing a single note. May be filled (quarter | : : or lesser value) or empty (half or whole note). : | ledger line | An extra line above or below the 5 staff lines. | | staff | The object which notes and other glyphs are placed on. | | staff system | One or more staves joined by at least one barline. | | staves | Plural of staff. | | stem | A vertical line attached to a notehead. All noteheads except | : : for whole notes should have a stem. : ## Staves Staves have 5 parallel, horizontal lines, and are parameterized by the center (3rd) line, and the staffline distance (vertical distance between consecutive staff lines). The staffline distance is constant for reasonable quality printed music scores, and this representation avoids redundancy and makes it possible to find the coordinates of each staff line. ## Staff Positions Glyphs are vertically centered either on staff lines (or ledger lines), or on the space halfway between lines. We refer to each of these y coordinates as a *position*. **Note**: We still refer to positions as *stafflines* in many places which is counter-intuitive, since positions are also on the space between lines, and can be a potential ledger line which is empty in a particular image. We are in the process of renaming stafflines in this context to staff positions. * Staff position 0 is the third staff line (staff center line), which is also the y-coordinate that is outputted by staff detection. * Staff positions are half the staffline distance apart. Always calculate the relative y position by tf.floordiv(staffline_distance * y_position, 2) instead of dividing by 2 first. * In treble clef, staff position 0 is B4, -6 is C4, -1 is A4, and +1 is C5. * In bass clef, staff position 0 is D3, +3 is G3, and +6 is C4. ## Glyphs Glyphs are defined in [musicscore.proto](../moonlight/protobuf/musicscore.proto). Each glyph has an x coordinate on the original image, and a y position (staff position). The staff position determines the pitch of the glyph, if applicable. If the glyph is especially large (e.g. clefs) or is not centered on a particular vertical position, we choose a *canonical* staff position (e.g. the G line for treble clef, aka G clef). ### Glyph centers * Every glyph needs a canonical center coordinate. The classifier will detect the glyph if the window (e.g. convolutional filter) is centered on this point. * Noteheads are centered on the middle of the ellipsis, which should be exactly on a staffline or halfway between stafflines. * Accidentals should be centered on their center of mass. For flats, sharps, and naturals, this is the center of the empty space inside them. For double sharps, this is the middle of the crosshairs. * Treble clef ('G clef") is centered on the intersection of the thin vertical line with the G staffline (staff position -2). * Bass clef ("F clef") is centered on the F staffline (staff position +2). The x coordinate is halfway between the filled circle on the left and the vertical segment on the right. * All rests should normally be centered at staff position 0, unless shifted from their usual position. ================================================ FILE: docs/engine.md ================================================ ## OMR Engine Detailed Reference The OMR engine converts a PNG image to a Magenta [NoteSequence message](https://github.com/magenta/note-seq/blob/master/note_seq/protobuf/music.proto), which is interoperable with MIDI and MusicXML. OMR uses TensorFlow for glyph (symbol) classification, as well as all other compute-intensive steps. Final processing is done in pure Python. The entry point is [OMREngine.run](../moonlight/engine.py). Glyph classification is configurable by the`glyph_classifier_fn` argument, and other subtasks of image recognition are part of the [Structure](../moonlight/structure/__init__.py). ### Diagram ### API The entry point for running OMR is [`OMREngine.run()`](../moonlight/engine.py). It takes in a list of PNG filenames and outputs a `Score` or `NoteSequence` message. The `Score` can further be converted to [MusicXML](../moonlight/conversions/musicxml.py). ### TensorFlow Graph For maximum parallelism, all processing is run in the same TensorFlow graph. The graph is run by `OMREngine._get_page()`. This also evaluates the `Structure` in the same graph. `Structure` is a wrapper for detectors which extract information from the image, including [staves](../moonlight/staves/hough.py), [vertical lines](../moonlight/structure/verticals.py), and [note beams](../moonlight/structure/beams.py). #### Structure [Structure](../moonlight/structure/__init__.py) holds the structural elements that need to be evaluated for OMR, but does not do any symbol recognition. The structure encompasses staves, beams, and vertical lines, and may contain more elements in the future (e.g. full connected component analysis) which can be used to detect more elements (e.g. note dots). Structure detection is currently simple computer vision rather than ML, but it can easily be swapped out with a different TensorFlow model. ##### Staffline Distance Estimation We estimate the [staffline distance(s)](../moonlight/staves/staffline_distance.py) of the entire image. There may be staves with multiple different sizes for different parts on a single page, but there should be just a few possible staffline distance values. ##### Staff Detection Concrete subclasses of [BaseStaffDetector](../moonlight/staves/base.py) take in the image and produce: * `staves`: Tensor of shape `(num_staves, num_points, 2)`. Coordinates of the staff center line (third line on the staff). * `staffline_distance`: Vector of the estimated staffline distance (distance between consecutive staff lines) for each staff. * `staffline_thickness`: Scalar thickness of staff lines. Assumed to be the same for all staves. * `staves_interpolated_y`: Tensor of shape `(num_staves, width)`. For each staff and column of the image, outputs the interpolated y position of the staff center line. ##### Staff Removal [StaffRemover](../moonlight/staves/removal.py) takes in the image and staves, and outputs `remove_staves` which is the image with the staff lines erased. This is useful so that [glyphs](concepts.md) look the same whether they are centered on a staff line or the space between lines. It is also used within the structure, by beam detection. ##### Beam Detection [Beams](../moonlight/structure/beams.py) are currently detected from connected components on an [eroded](https://en.wikipedia.org/wiki/Mathematical_morphology#Binary_morphology) staves-removed image. These are attached to notes by a `BeamProcessor`. ##### Vertical Line Detection [ColumnBasedVerticals](../moonlight/structure/verticals.py) detects all vertical lines in the image. These will later be used as either stems or barlines. #### Glyph Classification: 1-D Convolutional Model We also run a glyph classifier as part of the TensorFlow graph, which outputs predictions. ##### Staffline Extraction Glyphs are considered to lie on a black staff line, or halfway between staff lines. For OMR, extracted stafflines are slices of the image that are either centered on an staff line, or halfway between staff lines. The line that the extracted staffline lies on may just be referred to a staffline, or a y position of the staff. [StafflineExtractor](../moonlight/staves/staffline_extractor.py) extracts these vertical slices of the image, and scales their height to a constant value (currently, 18 pixels tall). `StaffRemover` is used so that all extracted stafflines should look similar. ##### Glyph Classification [Glyphs](concepts.md) are classified on small, horizontal slices (currently, 15 pixels wide) of the extracted staffline, a 1D convolutional model. A [GlyphClassifier](../moonlight/glyphs/base.py) outputs a Tensor `staffline_predictions` of shape `(num_staves, num_stafflines, width)`. The values are for the `Glyph.Type` enum. Value 0 (UNKNOWN_TYPE) is not used; value 1 (NONE) corresponds to no glyph. ### Post-Processing #### Page Construction OMR processing operates on [Page protos](../moonlight/protobuf/musicscore.proto). The Page is first constructed by `BaseGlyphClassifier.get_page`, which populates the glyphs on each staff. Staff location information is then added by `StaffProcessor`. Single `Glyph`s are created from consecutive runs in `staffline_predictions` that are classified as the same glyph type. Additional processors modify the `Page` in place, usually adding information from the `Structure`. Each page is run through [`page_processors.process()`](../moonlight/page_processors.py), and then the score (containing all pages) is run through [`score_processors.process()`](../moonlight/score_processors.py). #### Stem Detection [Stems](../moonlight/structure/stems.py) finds stem candidates from the vertical lines, and adds a `Stem` to notehead `Glyph`s if the closest stem is close enough to the expected position. The `ScoreReader` considers multiple noteheads with identical `Stem`s as a single chord. Stems will also be used as a negative signal to avoid detecting barlines in the same area. #### Beam Processing Beams from the `Structure` that are close enough to a stem are added to one or more notes by [BeamProcessor](../moonlight/structure/beam_processor.py). #### Barlines [Barlines](../moonlight/structure/barlines.py) are detected from the verticals if they have not already been used as a stem. #### Score Reading The [ScoreReader](../moonlight/score/reader.py) is the only score processor. It can potentially use state that lasts across multiple pages, such as the current time in the score, which needs to persist for the entire score. Staves are scanned from left to right for glyphs. The `ScoreReader` manages a hierarchy of state, from the global `ScoreState` to the `MeasureState`, holding local state such as accidentals. Based on the preceding `Glyph`s, each notehead `Glyph` gets assigned a [`Note`](https://github.com/magenta/note-seq/blob/d7153cdb26758a69c2fa022782c5817970de7066/note_seq/protobuf/music.proto#L104) field holding its musical value. Afterwards, the `Score` can be converted to a `NoteSequence` (just pulling out all of the `Note`s) or [MusicXML](../moonlight/conversions/musicxml.py). ================================================ FILE: moonlight/BUILD ================================================ # Description: # Optical music recognition using TensorFlow. package( default_visibility = ["//moonlight:__subpackages__"], ) licenses(["notice"]) # Apache 2.0 # The OMR engine. Entry point for running OMR. py_library( name = "engine", srcs = ["engine.py"], srcs_version = "PY2AND3", deps = [ ":image", ":page_processors", ":score_processors", "//moonlight/conversions", "//moonlight/glyphs:saved_classifier_fn", "//moonlight/protobuf:protobuf_py_pb2", "//moonlight/staves:base", "//moonlight/structure", "//moonlight/structure:beams", "//moonlight/structure:components", "//moonlight/structure:verticals", # numpy dep # six dep # tensorflow dep ], ) # The omr CLI for running locally on a single score. py_binary( name = "omr", srcs = ["omr.py"], srcs_version = "PY2AND3", deps = [ ":engine", # disable_tf2 "@com_google_protobuf//:protobuf_python", # absl dep "//moonlight/conversions", "//moonlight/glyphs:saved_classifier_fn", # tensorflow dep ], ) py_test( name = "omr_endtoend_test", size = "large", srcs = ["omr_endtoend_test.py"], data = ["//moonlight/testdata:images"], shard_count = 4, srcs_version = "PY2AND3", deps = [ ":engine", # disable_tf2 # pillow dep # absl/testing dep # librosa dep # lxml dep "//moonlight/conversions", "@magenta//protobuf:music_py_pb2", # numpy dep # tensorflow.python.platform dep ], ) py_test( name = "omr_regression_test", size = "large", srcs = ["omr_regression_test.py"], args = ["--corpus_dir=../omr_regression_test_data"], data = ["@omr_regression_test_data"], shard_count = 4, srcs_version = "PY2AND3", deps = [ ":engine", # disable_tf2 # absl/testing dep "//moonlight/protobuf:protobuf_py_pb2", "//moonlight/score:reader", ], ) py_library( name = "image", srcs = ["image.py"], srcs_version = "PY2AND3", deps = [], # tensorflow dep ) py_library( name = "page_processors", srcs = ["page_processors.py"], srcs_version = "PY2AND3", deps = [ "//moonlight/glyphs:glyph_types", "//moonlight/glyphs:note_dots", "//moonlight/glyphs:repeated", "//moonlight/staves:staff_processor", "//moonlight/structure:barlines", "//moonlight/structure:beam_processor", "//moonlight/structure:section_barlines", "//moonlight/structure:stems", ], ) py_library( name = "score_processors", srcs = ["score_processors.py"], srcs_version = "PY2AND3", deps = ["//moonlight/score:reader"], ) ================================================ FILE: moonlight/conversions/BUILD ================================================ # Description: # Format conversions for OMR. package( default_visibility = ["//moonlight:__subpackages__"], ) licenses(["notice"]) # Apache 2.0 py_library( name = "conversions", srcs = ["__init__.py"], deps = [ ":musicxml", ":notesequence", ], ) py_library( name = "musicxml", srcs = ["musicxml.py"], deps = [ # librosa dep # lxml dep "//moonlight/protobuf:protobuf_py_pb2", "//moonlight/score:measures", ], ) py_test( name = "musicxml_test", srcs = ["musicxml_test.py"], deps = [ ":musicxml", # absl/testing dep "//moonlight/protobuf:protobuf_py_pb2", "@magenta//protobuf:music_py_pb2", ], ) py_library( name = "notesequence", srcs = ["notesequence.py"], deps = [ "//moonlight/protobuf:protobuf_py_pb2", "@magenta//protobuf:music_py_pb2", ], ) ================================================ FILE: moonlight/conversions/__init__.py ================================================ # Copyright 2018 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """OMR format conversions.""" # TODO(ringw): Score to MusicXML, preserving staves, etc. from __future__ import absolute_import from __future__ import division from __future__ import print_function from moonlight.conversions.musicxml import score_to_musicxml from moonlight.conversions.notesequence import page_to_notesequence from moonlight.conversions.notesequence import score_to_notesequence ================================================ FILE: moonlight/conversions/musicxml.py ================================================ # Copyright 2018 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Score to MusicXML conversion.""" # TODO(ringw): Key signature # TODO(ringw): Chords # TODO(ringw): Stems--MusicXML supports "up" or "down". # TODO(ringw): Accurate layout of pages, staves, and measures. # TODO(ringw): Barline types. from __future__ import absolute_import from __future__ import division from __future__ import print_function import copy import re import librosa from lxml import etree from moonlight.protobuf import musicscore_pb2 from moonlight.score import measures from six import moves DOCTYPE = ('\n') MUSICXML_VERSION = '3.0' # Number of divisions (duration units) per quarter note. DIVISIONS = 1024 # TODO(ringw): Detect the actual time signature. TIME_SIGNATURE = etree.Element('time', symbol='common') etree.SubElement(TIME_SIGNATURE, 'beats').text = '4' etree.SubElement(TIME_SIGNATURE, 'beat-type').text = '4' # Note types. HALF = 'half' WHOLE = 'whole' # Indexed by the number of beams on a filled note. Each beam halves the duration # of the note. FILLED = [ 'quarter', 'eighth', '16th', '32nd', '64th', '128th', '256th', '512th', '1024th' ] # Maps the ASCII accidental names to the actual pitch alteration. ACCIDENTAL_TO_ALTER = {'': 0, '#': 1, 'b': -1} def score_to_musicxml(score): """Converts a `tensorflow.moonlight.Score` to MusicXML. Args: score: The OMR score. Returns: XML text. """ musicxml = MusicXMLScore(score) measure_num = 0 previous_note_start_time = 0 previous_note_end_time = 0 for page in score.page: for system in page.system: system_measures = measures.Measures(system) for system_measure_num in moves.xrange(system_measures.size()): for staff_num, staff in enumerate(system.staff): # Produce the measure, even if there are no glyphs. measure = musicxml.get_measure(staff_num, measure_num) for glyph in staff.glyph: if system_measures.get_measure(glyph) == system_measure_num: clef = _glyph_to_clef(glyph) if clef is not None: attributes = _get_attributes(measure) if attributes.find('clef') is not None: attributes.remove(attributes.find('clef')) attributes.append(clef) note = _glyph_to_note(glyph) if note is not None: if (glyph.note.start_time == previous_note_start_time and glyph.note.end_time == previous_note_end_time): position = note.index(note.find('pitch')) chord = etree.Element('chord') note.insert(position, chord) previous_note_start_time = glyph.note.start_time previous_note_end_time = glyph.note.end_time measure.append(note) measure_num += 1 # Add and