[
  {
    "path": ".gitattributes",
    "content": "*.dat     binary"
  },
  {
    "path": ".gitignore",
    "content": "## Ignore Visual Studio temporary files, build results, and\n## files generated by popular Visual Studio add-ons.\n\n# User-specific files\n*.suo\n*.user\n*.userosscache\n*.sln.docstates\naws-lambda*.json\n\n# User-specific files (MonoDevelop/Xamarin Studio)\n*.userprefs\n\n# Build results\n[Dd]ebug/\n[Dd]ebugPublic/\n[Rr]elease/\n[Rr]eleases/\nx64/\nx86/\nbld/\n[Bb]in/\n[Oo]bj/\n[Ll]og/\n\n# Visual Studio 2015 cache/options directory\n.vs/\n# Uncomment if you have tasks that create the project's static files in wwwroot\n#wwwroot/\n\n# MSTest test Results\n[Tt]est[Rr]esult*/\n[Bb]uild[Ll]og.*\n\n# NUNIT\n*.VisualState.xml\nTestResult.xml\n\n# Build Results of an ATL Project\n[Dd]ebugPS/\n[Rr]eleasePS/\ndlldata.c\n\n# DNX\nproject.lock.json\nproject.fragment.lock.json\nartifacts/\n\n*_i.c\n*_p.c\n*_i.h\n*.ilk\n*.meta\n*.obj\n*.pch\n*.pdb\n*.pgc\n*.pgd\n*.rsp\n*.sbr\n*.tlb\n*.tli\n*.tlh\n*.tmp\n*.tmp_proj\n*.log\n*.vspscc\n*.vssscc\n.builds\n*.pidb\n*.svclog\n*.scc\n\n# Chutzpah Test files\n_Chutzpah*\n\n# Visual C++ cache files\nipch/\n*.aps\n*.ncb\n*.opendb\n*.opensdf\n*.sdf\n*.cachefile\n*.VC.db\n*.VC.VC.opendb\n\n# Visual Studio profiler\n*.psess\n*.vsp\n*.vspx\n*.sap\n\n# TFS 2012 Local Workspace\n$tf/\n\n# Guidance Automation Toolkit\n*.gpState\n\n# ReSharper is a .NET coding add-in\n_ReSharper*/\n*.[Rr]e[Ss]harper\n*.DotSettings.user\n\n# JustCode is a .NET coding add-in\n.JustCode\n\n# TeamCity is a build add-in\n_TeamCity*\n\n# DotCover is a Code Coverage Tool\n*.dotCover\n\n# NCrunch\n_NCrunch_*\n.*crunch*.local.xml\nnCrunchTemp_*\n\n# MightyMoose\n*.mm.*\nAutoTest.Net/\n\n# Web workbench (sass)\n.sass-cache/\n\n# Installshield output folder\n[Ee]xpress/\n\n# DocProject is a documentation generator add-in\nDocProject/buildhelp/\nDocProject/Help/*.HxT\nDocProject/Help/*.HxC\nDocProject/Help/*.hhc\nDocProject/Help/*.hhk\nDocProject/Help/*.hhp\nDocProject/Help/Html2\nDocProject/Help/html\n\n# Click-Once directory\npublish/\n\n# Publish Web Output\n*.[Pp]ublish.xml\n*.azurePubxml\n# TODO: Comment the next line if you want to checkin your web deploy settings\n# but database connection strings (with potential passwords) will be unencrypted\n#*.pubxml\n*.publishproj\n\n# Microsoft Azure Web App publish settings. Comment the next line if you want to\n# checkin your Azure Web App publish settings, but sensitive information contained\n# in these scripts will be unencrypted\nPublishScripts/\n\n# NuGet Packages\n*.nupkg\n# The packages folder can be ignored because of Package Restore\n**/packages/*\n# except build/, which is used as an MSBuild target.\n!**/packages/build/\n# Uncomment if necessary however generally it will be regenerated when needed\n#!**/packages/repositories.config\n# NuGet v3's project.json files produces more ignoreable files\n*.nuget.props\n*.nuget.targets\n\n# Microsoft Azure Build Output\ncsx/\n*.build.csdef\n\n# Microsoft Azure Emulator\necf/\nrcf/\n\n# Windows Store app package directories and files\nAppPackages/\nBundleArtifacts/\nPackage.StoreAssociation.xml\n_pkginfo.txt\n\n# Visual Studio cache files\n# files ending in .cache can be ignored\n*.[Cc]ache\n# but keep track of directories ending in .cache\n!*.[Cc]ache/\n\n# Others\nClientBin/\n~$*\n*~\n*.dbmdl\n*.dbproj.schemaview\n*.jfm\n*.pfx\n*.publishsettings\nnode_modules/\norleans.codegen.cs\n\n# Since there are multiple workflows, uncomment next line to ignore bower_components\n# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)\n#bower_components/\n\n# RIA/Silverlight projects\nGenerated_Code/\n\n# Backup & report files from converting an old project file\n# to a newer Visual Studio version. Backup files are not needed,\n# because we have git ;-)\n_UpgradeReport_Files/\nBackup*/\nUpgradeLog*.XML\nUpgradeLog*.htm\n\n# SQL Server files\n*.mdf\n*.ldf\n\n# Business Intelligence projects\n*.rdl.data\n*.bim.layout\n*.bim_*.settings\n\n# Microsoft Fakes\nFakesAssemblies/\n\n# GhostDoc plugin setting file\n*.GhostDoc.xml\n\n# Node.js Tools for Visual Studio\n.ntvs_analysis.dat\n\n# Visual Studio 6 build log\n*.plg\n\n# Visual Studio 6 workspace options file\n*.opt\n\n# Visual Studio LightSwitch build output\n**/*.HTMLClient/GeneratedArtifacts\n**/*.DesktopClient/GeneratedArtifacts\n**/*.DesktopClient/ModelManifest.xml\n**/*.Server/GeneratedArtifacts\n**/*.Server/ModelManifest.xml\n_Pvt_Extensions\n\n# Paket dependency manager\n.paket/paket.exe\npaket-files/\n\n# FAKE - F# Make\n.fake/\n\n# JetBrains Rider\n.idea/\n*.sln.iml\n\n# CodeRush\n.cr/\n\n# Python Tools for Visual Studio (PTVS)\n__pycache__/\n*.pyc\n"
  },
  {
    "path": "AnnotationLambda/AnnotationLambda.cs",
    "content": "using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing System.IO.Compression;\r\nusing System.Security.Cryptography;\r\nusing System.Text;\r\nusing Amazon.Lambda.Core;\r\nusing Cloud;\r\nusing Cloud.Messages.Annotation;\r\nusing Cloud.Notifications;\r\nusing Cloud.Utilities;\r\nusing CommandLine.Utilities;\r\nusing Compression.FileHandling;\r\nusing ErrorHandling;\r\nusing Genome;\r\nusing IO;\r\nusing Nirvana;\r\nusing Vcf;\r\nusing Tabix;\r\nusing VariantAnnotation;\r\nusing VariantAnnotation.SA;\r\n\r\n[assembly: LambdaSerializer(typeof(Amazon.Lambda.Serialization.Json.JsonSerializer))]\r\n\r\nnamespace AnnotationLambda\r\n{\r\n    // ReSharper disable once UnusedMember.Global\r\n    // ReSharper disable once ClassNeverInstantiated.Global\r\n    public sealed class AnnotationLambda\r\n    {\r\n        // ReSharper disable once UnusedMember.Global\r\n        public AnnotationResult Run(AnnotationConfig config, ILambdaContext context)\r\n        {\r\n            var result = new AnnotationResult { id = config.id };\r\n            string snsTopicArn = null;\r\n            var runLog = new StringBuilder();\r\n\r\n            try\r\n            {\r\n                LogUtilities.UpdateLogger(context.Logger, runLog);\r\n                LogUtilities.LogLambdaInfo(context, CommandLineUtilities.InformationalVersion);\r\n                LogUtilities.LogObject(\"Config\", config);\r\n                LogUtilities.Log(new[] { LambdaUrlHelper.UrlBaseEnvironmentVariableName, LambdaUtilities.SnsTopicKey });\r\n\r\n                LambdaUtilities.GarbageCollect();\r\n                LambdaUtilities.DeleteTempOutput();\r\n\r\n                snsTopicArn = LambdaUtilities.GetEnvironmentVariable(LambdaUtilities.SnsTopicKey);\r\n\r\n                string vcfUrl       = config.vcfUrl;\r\n                int    variantCount = 0;\r\n                using (var annotationResources = GetAnnotationResources(config))\r\n                {\r\n                    if (annotationResources.InputStartVirtualPosition == -1) return GetSuccessOutput(result);\r\n\r\n                    long fileOffset = VirtualPosition.From(annotationResources.InputStartVirtualPosition).FileOffset;\r\n\r\n                    using (var preloadVcfStream = PersistentStreamUtils.GetReadStream(vcfUrl, fileOffset))\r\n                    {\r\n                        var annotationRange = config.annotationRange?.ToGenomicRange(annotationResources.SequenceProvider.RefNameToChromosome);\r\n                        annotationResources.GetVariantPositions(new BlockGZipStream(preloadVcfStream, CompressionMode.Decompress), annotationRange);\r\n                    }\r\n\r\n                    Logger.WriteLine(\"Scan for positions to preload complete.\");\r\n                    \r\n                    using (var aes = new AesCryptoServiceProvider())\r\n                    {\r\n                        FileMetadata jsonMetadata, jasixMetadata;\r\n                        string jsonPath = Path.GetTempPath() + LambdaUrlHelper.JsonSuffix;\r\n                        string jasixPath = jsonPath + LambdaUrlHelper.JsonIndexSuffix;\r\n\r\n                        using (var inputVcfStream = new BlockGZipStream(PersistentStreamUtils.GetReadStream(vcfUrl, fileOffset), CompressionMode.Decompress))\r\n                        using (var headerStream = config.annotationRange == null ? null : new BlockGZipStream(PersistentStreamUtils.GetReadStream(vcfUrl), CompressionMode.Decompress))\r\n                        //\r\n                        using (var jsonFileStream = FileUtilities.GetCreateStream(jsonPath))\r\n                        using (var jsonCryptoStream = new CryptoStream(jsonFileStream, aes.CreateEncryptor(), CryptoStreamMode.Write))\r\n                        using (var jsonMd5Stream = new MD5Stream(jsonCryptoStream))\r\n                        //\r\n                        using (var jasixFileStream = FileUtilities.GetCreateStream(jasixPath))\r\n                        using (var jasixCryptoStream = new CryptoStream(jasixFileStream, aes.CreateEncryptor(), CryptoStreamMode.Write))\r\n                        using (var jasixMd5Stream = new MD5Stream(jasixCryptoStream))\r\n                        {\r\n                            IVcfFilter vcfFilter = config.annotationRange == null\r\n                                ? new NullVcfFilter()\r\n                                : new VcfFilter(config.annotationRange.ToGenomicRange(annotationResources.SequenceProvider.RefNameToChromosome));\r\n\r\n                            using (var jsonCompressStream = new BlockGZipStream(jsonMd5Stream, CompressionMode.Compress))\r\n                            {\r\n                                variantCount = StreamAnnotation.Annotate(headerStream, inputVcfStream, jsonCompressStream, \r\n                                    jasixMd5Stream, annotationResources, vcfFilter, true, false,\r\n                                    config.desiredVcfInfo == null? null: new HashSet<string>(config.desiredVcfInfo),\r\n                                    config.desiredVcfSampleInfo == null? null: new HashSet<string>(config.desiredVcfSampleInfo)).variantCount;\r\n                            }\r\n\r\n                            Logger.WriteLine(\"Annotation done.\");\r\n\r\n                            jsonMetadata  = jsonMd5Stream.GetFileMetadata();\r\n                            jasixMetadata = jasixMd5Stream.GetFileMetadata();\r\n                        }\r\n\r\n                        result.filePath = S3Utilities.GetKey(config.outputDir.path, config.outputPrefix + LambdaUrlHelper.JsonSuffix);\r\n                        string jasixKey = result.filePath + LambdaUrlHelper.JsonIndexSuffix;\r\n\r\n                        var s3Client = config.outputDir.GetS3Client(context.RemainingTime);\r\n                        s3Client.DecryptUpload(config.outputDir.bucketName, jasixKey, jasixPath, aes, jasixMetadata);\r\n                        s3Client.DecryptUpload(config.outputDir.bucketName, result.filePath, jsonPath, aes, jsonMetadata);\r\n\r\n                        Logger.WriteLine(\"Nirvana result files uploaded.\");\r\n                    }\r\n                }\r\n\r\n                LambdaUtilities.DeleteTempOutput();\r\n                if (string.IsNullOrEmpty(result.filePath)) throw new FileNotFoundException();\r\n\r\n                result.variantCount = variantCount;\r\n                return GetSuccessOutput(result);\r\n            }\r\n            catch (Exception exception)\r\n            {\r\n                LambdaUtilities.DeleteTempOutput();\r\n                return HandleException(runLog, result, exception, snsTopicArn);\r\n            }\r\n        }\r\n\r\n        private static AnnotationResult GetSuccessOutput(AnnotationResult result)\r\n        {\r\n            result.status = LambdaUtilities.SuccessMessage;\r\n            LogUtilities.LogObject(\"Result\", result);\r\n            return result;\r\n        }\r\n\r\n        private static AnnotationResult HandleException(StringBuilder runLog, AnnotationResult result, Exception e, string snsTopicArn)\r\n        {\r\n            Logger.Log(e);\r\n\r\n            result.status = e.Message;\r\n            result.errorCategory = ExceptionUtilities.ExceptionToErrorCategory(e);\r\n            Logger.WriteLine($\"Error Category: {result.errorCategory}\");\r\n\r\n            if (result.errorCategory != ErrorCategory.UserError)\r\n            {\r\n                string snsMessage = SNS.CreateMessage(runLog.ToString(), result.status, e.StackTrace);\r\n                SNS.SendMessage(snsTopicArn, snsMessage);\r\n            }\r\n\r\n            LogUtilities.LogObject(\"Result\", result);\r\n            return result;\r\n        }\r\n\r\n        internal static long GetTabixVirtualPosition(AnnotationRange annotationRange, Stream stream, Dictionary<string, Chromosome> refNameToChromosome)\r\n        {\r\n            // process the entire file if no range specified\r\n            if (annotationRange == null) return 0;\r\n\r\n            var tabixIndex = Reader.GetTabixIndex(stream, refNameToChromosome);\r\n            return tabixIndex.GetOffset(annotationRange.Start.Chromosome, annotationRange.Start.Position);\r\n        }\r\n\r\n        private static AnnotationResources GetAnnotationResources(AnnotationConfig annotationConfig)\r\n        {\r\n            var genomeAssembly      = GenomeAssemblyHelper.Convert(annotationConfig.genomeAssembly);\r\n            string cachePathPrefix  = LambdaUrlHelper.GetCacheFolder().UrlCombine(genomeAssembly.ToString()).UrlCombine(LambdaUrlHelper.DefaultCacheSource);\r\n            string nirvanaS3Ref     = LambdaUrlHelper.GetRefUrl(genomeAssembly);\r\n            // SaVersion will be provided as an environment variable. Defaults to \"latest\"\r\n            string saVersion = Environment.GetEnvironmentVariable(\"SaVersion\");\r\n            string saManifestUrl    = LambdaUtilities.GetManifestUrl(string.IsNullOrEmpty(saVersion)? \"latest\": saVersion, genomeAssembly, SaCommon.SchemaVersion);\r\n            var metrics = new PerformanceMetrics();\r\n\r\n            var annotationResources = new AnnotationResources(nirvanaS3Ref, cachePathPrefix,\r\n                saManifestUrl == null? null: new List<string> {saManifestUrl}, \r\n                annotationConfig.customAnnotations, \r\n                annotationConfig.customStrUrl, \r\n                false, \r\n                false, \r\n                metrics);\r\n\r\n            using (var tabixStream = PersistentStreamUtils.GetReadStream(annotationConfig.tabixUrl))\r\n            {\r\n                annotationResources.InputStartVirtualPosition = GetTabixVirtualPosition(annotationConfig.annotationRange, tabixStream, annotationResources.SequenceProvider.RefNameToChromosome);\r\n            }\r\n\r\n            Logger.WriteLine($\"Tabix position :{annotationResources.InputStartVirtualPosition}\");\r\n\r\n            return annotationResources;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "AnnotationLambda/AnnotationLambda.csproj",
    "content": "﻿<Project Sdk=\"Microsoft.NET.Sdk\">\r\n  <PropertyGroup>\r\n    <TargetFramework>net6.0</TargetFramework>\r\n    <GenerateRuntimeConfigurationFiles>true</GenerateRuntimeConfigurationFiles>\r\n    <AWSProjectType>Lambda</AWSProjectType>\r\n    <OutputPath>bin\\$(Configuration)</OutputPath>\r\n\r\n  </PropertyGroup>\r\n  <ItemGroup>\r\n    <PackageReference Include=\"Amazon.Lambda.Core\" Version=\"2.1.0\" />\r\n    <PackageReference Include=\"Amazon.Lambda.Serialization.Json\" Version=\"2.0.0\" />\r\n    <PackageReference Include=\"AWSSDK.Lambda\" Version=\"3.7.9.3\" />\r\n    <PackageReference Include=\"AWSSDK.S3\" Version=\"3.7.8.3\" />\r\n    <PackageReference Include=\"AWSSDK.SimpleNotificationService\" Version=\"3.7.3.31\" />\r\n  </ItemGroup>\r\n  <ItemGroup>\r\n    <ProjectReference Include=\"..\\Cloud\\Cloud.csproj\" />\r\n    <ProjectReference Include=\"..\\Compression\\Compression.csproj\" />\r\n    <ProjectReference Include=\"..\\ErrorHandling\\ErrorHandling.csproj\" />\r\n    <ProjectReference Include=\"..\\Genome\\Genome.csproj\" />\r\n    <ProjectReference Include=\"..\\Nirvana\\Nirvana.csproj\" />\r\n    <ProjectReference Include=\"..\\Tabix\\Tabix.csproj\" />\r\n    <ProjectReference Include=\"..\\Vcf\\Vcf.csproj\" />\r\n  </ItemGroup>\r\n  <Import Project=\"..\\CommonAssemblyInfo.props\" />\r\n</Project>\r\n"
  },
  {
    "path": "AnnotationLambda/AssemblyInfo.cs",
    "content": "﻿using System.Runtime.CompilerServices;\r\n\r\n[assembly: InternalsVisibleTo(\"UnitTests\")]"
  },
  {
    "path": "AnnotationLambda/S3Utilities.cs",
    "content": "﻿namespace AnnotationLambda\r\n{\r\n    public static class S3Utilities\r\n    {\r\n        public static string GetKey(string outputDir, string filename)\r\n        {\r\n            outputDir = outputDir?.Trim('/');\r\n            if (string.IsNullOrEmpty(outputDir)) return filename;\r\n            return outputDir + '/' + filename;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CODE_OF_CONDUCT.md",
    "content": "# Contributor Covenant Code of Conduct\n\n## Our Pledge\n\nIn the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.\n\n## Our Standards\n\nExamples of behavior that contributes to creating a positive environment include:\n\n* Using welcoming and inclusive language\n* Being respectful of differing viewpoints and experiences\n* Gracefully accepting constructive criticism\n* Focusing on what is best for the community\n* Showing empathy towards other community members\n\nExamples of unacceptable behavior by participants include:\n\n* The use of sexualized language or imagery and unwelcome sexual attention or advances\n* Trolling, insulting/derogatory comments, and personal or political attacks\n* Public or private harassment\n* Publishing others' private information, such as a physical or electronic address, without explicit permission\n* Other conduct which could reasonably be considered inappropriate in a professional setting\n\n## Our Responsibilities\n\nProject maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.\n\nProject maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.\n\n## Scope\n\nThis Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.\n\n## Enforcement\n\nInstances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at mstromberg@illumina.com. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.\n\nProject maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.\n\n## Attribution\n\nThis Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version]\n\n[homepage]: http://contributor-covenant.org\n[version]: http://contributor-covenant.org/version/1/4/\n"
  },
  {
    "path": "CONTRIBUTING.md",
    "content": "This guide provides:\n* protocols for contributing new features or bug fixes\n* high-level information about our development process\n\nInformation is added as pertinent questions/discussions come up in the contributor community,\nso this guide is not intended to provide complete coverage of the above topics.\n\n# Table of Contents\n* [Scrum (agile development) ](#scrum-agile-development)\n* [Developer environment](#developer-environment)\n* [Coding conventions](#coding-conventions)\n* [Branching model](#branching-model)\n* [Unit testing](#unit-testing)\n* [Continuous integration](#continuous-integration)\n* [Portability](#portability)\n\n# Scrum (agile development)\n\nThe development team uses Scrum agile development methodology. Our sprints are two weeks long and consistent of the four key ceremonies:\n\n* sprint planning\n* daily stand-ups\n* sprint retrospective\n* sprint review\n\nFor external developers interested in contributing to the project, we would be happy to invite you to these ceremonies. Please contact any of the team members and we'll make the necessary arrangements.\n\n# Developer environment\n\n## IDE\n\nThe development team is using Microsoft Visual Studio 2015 to develop Nirvana. Developers could in theory choose to use other C# IDEs such as [MonoDevelop](http://www.monodevelop.com/), [SharpDevelop](https://sourceforge.net/projects/sharpdevelop/), or [Project Rider](https://www.jetbrains.com/rider/). However, we have not evaluated those IDEs at the moment.\n\n## Extensions\n\n<p align=\"center\">\n  <img src=\"https://www.jetbrains.com/resharper/img/screenshots/code-analysis.png\" width=\"600\" />\n</p>\n\nJetBrains makes an incredible Visual Studio extension called [ReSharper](https://www.jetbrains.com/resharper/). No other tool comes as close to helping developers produce clean C# code while offering powerful functionality to make refactoring a breeze. For our internal development team, we require the use of ReSharper.\n\n# Coding conventions\n\nWe use the same coding conventions (naming, layout, and commenting conventions) as is used in Microsoft's [C# Coding Conventions Guide](https://msdn.microsoft.com/en-us/library/ff926074.aspx). The only exception to this is the variable naming scheme that ReSharper suggests (i.e. private class variables should begin with an underscore).\n\nHere's a small example class that demonstrates most of these conventions:\n\n```C#\nusing System;\nusing System.Collections.Generic;\n\nnamespace Demo\n{\n    public class Fibonacci\n    {\n        #region members\n\n        private readonly List<int> _fibonacciSeries;\n        public readonly string Description;\n\n        #endregion\n\n        /// <summary>\n        /// constructor\n        /// </summary>\n        public Fibonacci(string description, int numValues)\n        {\n            Description = description;\n            _fibonacciSeries = new List<int>(numValues);\n            Calculate(numValues);\n        }\n\n        /// <summary>\n        /// iteratively calculates the first n values of the Fibonacci series\n        /// </summary>\n        private void Calculate(int numValues)\n        {\n            int a = 1, b = 1;\n\n            _fibonacciSeries.Add(a);\n            _fibonacciSeries.Add(b);\n\n            for (int i = 2; i < numValues; i++)\n            {\n                int sum = a + b;\n                _fibonacciSeries.Add(sum);\n                a = b;\n                b = sum;\n            }\n        }\n\n        /// <summary>\n        /// displays all the calculated values of our fibonacci series\n        /// </summary>\n        public void Display()\n        {\n            Console.WriteLine($\"{Description}:\");\n            foreach(var value in _fibonacciSeries) Console.Write($\"{value} \");\n            Console.WriteLine();\n        }\n\n        /// <summary>\n        /// displays the nth calculated value of our fibonacci series\n        /// </summary>\n        public void Display(int index)\n        {\n            if ((index < 1) || (index > _fibonacciSeries.Count))\n            {\n                throw new ArgumentOutOfRangeException(nameof(index));\n            }\n\n            Console.WriteLine($\"{Description}: {_fibonacciSeries[index - 1]}\");\n        }\n    }\n}\n```\n\n# Branching model\n\n<img src=\"https://github.com/Illumina/Nirvana/wiki/images/GitFlow.png\" width=\"400\" align=\"right\" />\n\nThe development team uses [GitFlow](http://nvie.com/posts/a-successful-git-branching-model/) to organize all of our branches.\n\n## Feature branches\n\nIn essence, all of our day-to-day work is on the **develop branch**. When work begins on a new **story** or **bug fix**, we will create a feature branch from the develop branch. When work on the feature branch has been completed, a **pull request is required** before it can be merged back to the develop branch. \n\nWhen the feature has finished development, we typically go through the following steps:\n\n1. pull the latest develop branch\n1. merge the develop branch to the feature branch\n1. ensure that all unit tests pass\n1. ensure that all regression and integration tests pass (internal developers only)\n1. create a pull request \n1. once approved, merge the feature branch to the develop branch\n\nInternal developers will also check the status of the Jenkins integration and regression tests before merging a feature branch back.\n\n### Naming\n\nOur feature branch names obey the following convention:\n\n```\nfeatures/short_description_1234\nbugfixes/short_description_1234\n```\n\nAll feature branches are prefixed by either **features/** or **bugfixes/**. This naming scheme is exploited by our continuous integration framework. The number 1234 is used as a convenience to hold our JIRA ID (external developers are not required to add a numerical identifier).\n\n## Builds and releases\n\nWhen we're ready to issue a new build, the develop branch is merged to the **master branch** and an **annotated tag** is added to the master branch.\n\n```\ngit tag -a v1.4.3 -m \"Nirvana 1.4.3\"\ngit push origin v1.4.3\n```\n\n## Release and hotfix branches\n\nOur team typically creates releases and hotfix branches for internal projects. As such, they will only be visible on our internal GitHub Enterprise server.\n\n# Unit testing\n\n<img src=\"https://github.com/Illumina/Nirvana/wiki/images/UnitTesting.png\" width=\"400\" align=\"right\" />\n\nOur team strives to have high unit test code coverage of all Nirvana code. Currently, the code coverage of the Illumina.VariantAnnotation library is around **82%** and we aspire to increase that to 90% or greater within the next few months.\n\nWe prefer using a [TDD methodology](https://en.wikipedia.org/wiki/Test-driven_development), but we are not forcing developers to use it at this time. TDD has had a measured effect on improving our code quality.\n\nAny time our continuous integration pipeline shows an annotation that deviates from the baseline, we create a unit test to demonstrate the correct behavior and to ensure that future regressions do not occur.\n\n<br clear=all>\n\n# Continuous integration\n\nAt Illumina, we have developed an extensive testing framework on top of the [Jenkins continuous integration framework](https://jenkins.io/). During our daily stand-ups, we check the status of every field in every variant for a few dozen data sets against the baseline. This translates to 100's of millions of variants (or billions of annotation fields) being checked on a daily basis.\n\nUnfortunately, our Jenkins servers sits behind our corporate firewall at the moment; but here's a snapshot of the information provided by our CI framework. We run a full set of smoke tests on every git commit on the develop branch. Developers can trigger both smoke and regression tests on any of the branches:\n\n<table>\n<tr>\n<td valign=\"top\"><a href=\"https://github.com/Illumina/Nirvana/wiki/images/CI_AllCurrentBranches.png\"><img src=\"https://github.com/Illumina/Nirvana/wiki/images/CI_AllCurrentBranches.png\" /></a></td>\n<td valign=\"top\"><a href=\"https://github.com/Illumina/Nirvana/wiki/images/SmokeTests.png\"><img src=\"https://github.com/Illumina/Nirvana/wiki/images/SmokeTests.png\" /></a></td>\n</tr>\n</table>\n\nFor each smoke or regression test, our testing framework provides a wealth of information for each input VCF file:\n\n<table>\n<tr>\n<td valign=\"top\"><a href=\"https://github.com/Illumina/Nirvana/wiki/images/NA12877_GlobalAccuracy.png\"><img src=\"https://github.com/Illumina/Nirvana/wiki/images/NA12877_GlobalAccuracy.png\" height=\"500\" /></a></td>\n<td valign=\"top\"><a href=\"https://github.com/Illumina/Nirvana/wiki/images/NA12877_GlobalStatistics.png\"><img src=\"https://github.com/Illumina/Nirvana/wiki/images/NA12877_GlobalStatistics.png\" height=\"500\" /></a></td>\n</tr>\n</table>\n\nIn some cases, deviations from our baseline are found. When this happens, we add it as a bug in our JIRA project and prioritize it accordingly in our backlog until it's ready to committed for a sprint:\n\n<p align=\"center\"><a href=\"https://github.com/Illumina/Nirvana/wiki/images/NA12877_TranscriptDeviations.png\"><img src=\"https://github.com/Illumina/Nirvana/wiki/images/NA12877_TranscriptDeviations.png\" /></a></p>\n\n# Portability\n\nWhile development is mainly performed in a Windows environment, Nirvana is expected to run on multiple platforms (Windows and Linux) reliably. We test Nirvana on a daily basis on both platforms."
  },
  {
    "path": "CacheUtils/AssemblyInfo.cs",
    "content": "﻿using System.Runtime.CompilerServices;\n\n[assembly: InternalsVisibleTo(\"UnitTests\")]"
  },
  {
    "path": "CacheUtils/BuildCache.sh",
    "content": "#!/bin/sh\r\n\r\n# =============\r\n# configuration\r\n# =============\r\n\r\nDOTNET=dotnet\r\nRELEASE_DIR=/d/Projects/NirvanaCacheUtils/bin/Release/netcoreapp2.0\r\nCACHE_UTILS=$RELEASE_DIR/CacheUtils.dll\r\nVEP_VERSION=90\r\nCACHE_VERSION=25\r\n\r\nDATA_ROOT=/e/Data/Nirvana\r\nINTERMEDIATE_CACHE_DIR=$DATA_ROOT/IntermediateCache/$VEP_VERSION\r\nCACHE_DIR=$DATA_ROOT/Cache/$CACHE_VERSION\r\nREFERENCE_DIR=$DATA_ROOT/References/5\r\n\r\nENSEMBL37_TRANSCRIPT_PATH=$INTERMEDIATE_CACHE_DIR/Ensembl${VEP_VERSION}_GRCh37.transcripts.gz\r\nENSEMBL38_TRANSCRIPT_PATH=$INTERMEDIATE_CACHE_DIR/Ensembl${VEP_VERSION}_GRCh38.transcripts.gz\r\nREFSEQ37_TRANSCRIPT_PATH=$INTERMEDIATE_CACHE_DIR/RefSeq${VEP_VERSION}_GRCh37.transcripts.gz\r\nREFSEQ38_TRANSCRIPT_PATH=$INTERMEDIATE_CACHE_DIR/RefSeq${VEP_VERSION}_GRCh38.transcripts.gz\r\n\r\nENSEMBL37_CACHE_PATH=$CACHE_DIR/GRCh37/Ensembl${VEP_VERSION}.transcripts.ndb\r\nENSEMBL38_CACHE_PATH=$CACHE_DIR/GRCh38/Ensembl${VEP_VERSION}.transcripts.ndb\r\nREFSEQ37_CACHE_PATH=$CACHE_DIR/GRCh37/RefSeq${VEP_VERSION}.transcripts.ndb\r\nREFSEQ38_CACHE_PATH=$CACHE_DIR/GRCh38/RefSeq${VEP_VERSION}.transcripts.ndb\r\n\r\nENSEMBL38_TRANSCRIPT_PATH=$INTERMEDIATE_CACHE_DIR/Ensembl${VEP_VERSION}_GRCh38.transcripts.gz\r\nREFSEQ37_TRANSCRIPT_PATH=$INTERMEDIATE_CACHE_DIR/RefSeq${VEP_VERSION}_GRCh37.transcripts.gz\r\nREFSEQ38_TRANSCRIPT_PATH=$INTERMEDIATE_CACHE_DIR/RefSeq${VEP_VERSION}_GRCh38.transcripts.gz\r\n\r\n\r\nENSEMBL37_URL=\"ftp://ftp.ensembl.org/pub/release-${VEP_VERSION}/variation/VEP/homo_sapiens_vep_${VEP_VERSION}_GRCh37.tar.gz\"\r\nENSEMBL38_URL=\"ftp://ftp.ensembl.org/pub/release-${VEP_VERSION}/variation/VEP/homo_sapiens_vep_${VEP_VERSION}_GRCh38.tar.gz\"\r\nREFSEQ37_URL=\"ftp://ftp.ensembl.org/pub/release-${VEP_VERSION}/variation/VEP/homo_sapiens_refseq_vep_${VEP_VERSION}_GRCh37.tar.gz\"\r\nREFSEQ38_URL=\"ftp://ftp.ensembl.org/pub/release-${VEP_VERSION}/variation/VEP/homo_sapiens_refseq_vep_${VEP_VERSION}_GRCh38.tar.gz\"\r\n\r\n# =========\r\n# functions\r\n# =========\r\n\r\nCreateCache() {\r\n\r\n\tGA=$1\r\n\tTS=$2\r\n\r\n\t$DOTNET $CACHE_UTILS create -i $INTERMEDIATE_CACHE_DIR/${TS}${VEP_VERSION}_${GA} -r $REFERENCE_DIR/Homo_sapiens.${GA}.Nirvana.dat -o $CACHE_DIR/${GA}/${TS}${VEP_VERSION}\r\n\r\n\tif [ ! $? -eq 0 ]; then\r\n\t\techo \"ERROR: Unable to generate the cache successfully (Genome assembly: ${GA}, transcript source: ${TS})\"\r\n\t\texit 1\r\n\tfi\r\n}\r\n\r\nexport -f CreateCache\r\n\r\n# =============\r\n# main workflow\r\n# =============\r\n\r\n# download all the required files for building the cache\r\n$DOTNET $CACHE_UTILS download\r\n\r\n# create the intermediate cache files for each configuration\r\n# if [ ! -f ENSEMBL37_TRANSCRIPT_PATH ]\r\n# then\r\n\t# echo \"Not implemented yet.\"\r\n\t# exit 1\r\n# fi\r\n\r\n# if [ ! -f ENSEMBL38_TRANSCRIPT_PATH ]\r\n# then\r\n\t# echo \"Not implemented yet.\"\r\n\t# exit 1\r\n# fi\r\n\r\n# if [ ! -f REFSEQ37_TRANSCRIPT_PATH ]\r\n# then\r\n\t# echo \"Not implemented yet.\"\r\n\t# exit 1\r\n# fi\r\n\r\n# if [ ! -f REFSEQ38_TRANSCRIPT_PATH ]\r\n# then\r\n\t# echo \"Not implemented yet.\"\r\n\t# exit 1\r\n# fi\r\n\r\n# create the universal gene archive\r\n$DOTNET $CACHE_UTILS gene -r $REFERENCE_DIR -i $INTERMEDIATE_CACHE_DIR\r\n\r\n# create the actual cache files\r\nCACHE_LIST=\"\"\r\n\r\nif [ ! -f ENSEMBL37_CACHE_PATH ]\r\nthen\r\n\tCACHE_LIST=\"$CACHE_LIST GRCh37 Ensembl\"\r\nfi\r\n\r\nif [ ! -f ENSEMBL38_CACHE_PATH ]\r\nthen\r\n\tCACHE_LIST=\"$CACHE_LIST GRCh38 Ensembl\"\r\nfi\r\n\r\nif [ ! -f REFSEQ37_CACHE_PATH ]\r\nthen\r\n\tCACHE_LIST=\"$CACHE_LIST GRCh37 RefSeq\"\r\nfi\r\n\r\nif [ ! -f REFSEQ38_CACHE_PATH ]\r\nthen\r\n\tCACHE_LIST=\"$CACHE_LIST GRCh38 RefSeq\"\r\nfi\r\n\r\nif [ ! -z \"$CACHE_LIST\" ]\r\nthen\r\n\techo \"- creating cache files in parallel... \"\r\n\techo $CACHE_LIST | xargs -n 2 -P 8 bash -c 'CreateCache \"$@\"' -- \r\n\techo \"finished.\"\r\nfi\r\n"
  },
  {
    "path": "CacheUtils/CacheUtils.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing CacheUtils.Commands.CombineCacheDirectories;\r\nusing CacheUtils.Commands.CreateCache;\r\nusing CacheUtils.Commands.Download;\r\nusing CacheUtils.Commands.ExtractTranscripts;\r\nusing CacheUtils.Commands.GFF;\r\nusing CacheUtils.Commands.Header;\r\nusing CacheUtils.Commands.ParseVepCacheDirectory;\r\nusing CacheUtils.Commands.RegulatoryGFF;\r\nusing CacheUtils.Commands.UniversalGeneArchive;\r\nusing CommandLine.Builders;\r\nusing VariantAnnotation.Interface;\r\n\r\nnamespace CacheUtils\r\n{\r\n    internal static class CacheUtilsMain\r\n    {\r\n        private static int Main(string[] args)\r\n        {\r\n            var ops = new Dictionary<string, TopLevelOption>\r\n            {\r\n                [\"combine\"]  = new TopLevelOption(\"combine cache directories\", CombineCacheDirectoriesMain.Run),\r\n                [\"create\"]   = new TopLevelOption(\"create Nirvana cache files\", CreateNirvanaDatabaseMain.Run),\r\n                [\"download\"] = new TopLevelOption(\"downloads required files\", DownloadMain.Run),\r\n                [\"extract\"]  = new TopLevelOption(\"extracts transcripts\", ExtractTranscriptsMain.Run),\r\n                [\"gene\"]     = new TopLevelOption(\"updates the universal gene archive\", UniversalGeneArchiveMain.Run),\r\n                [\"gff\"]      = new TopLevelOption(\"export transcripts to GFF\", CreateGffMain.Run),\r\n                [\"header\"]   = new TopLevelOption(\"displays the header information\", HeaderMain.Run),\r\n                [\"parse\"]    = new TopLevelOption(\"parses the VEP cache files\", ParseVepCacheDirectoryMain.Run),\r\n                [\"rgff\"]     = new TopLevelOption(\"export regulatory regions to GFF\", CreateRegulatoryGffMain.Run)\r\n            };\r\n\r\n            var exitCode = new TopLevelAppBuilder(args, ops)\r\n                .Parse()\r\n                .ShowBanner(Constants.Authors)\r\n                .ShowHelpMenu(\"Utilities focused on querying the cache directory\")\r\n                .ShowErrors()\r\n                .Execute();\r\n\r\n            return (int)exitCode;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "CacheUtils/CacheUtils.csproj",
    "content": "﻿<Project Sdk=\"Microsoft.NET.Sdk\">\r\n  <PropertyGroup>\r\n    <OutputType>Exe</OutputType>\r\n    <TargetFramework>net6.0</TargetFramework>\r\n    <OutputPath>..\\bin\\$(Configuration)</OutputPath>\r\n    \r\n  </PropertyGroup>\r\n  <Import Project=\"..\\CommonAssemblyInfo.props\" />\r\n  <ItemGroup>\r\n    <Content Include=\"CacheUtils.dll.gene.json\">\r\n      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>\r\n    </Content>\r\n  </ItemGroup>\r\n  <ItemGroup>\r\n    <PackageReference Include=\"Microsoft.Extensions.Configuration\" Version=\"6.0.0\" />\r\n    <PackageReference Include=\"Microsoft.Extensions.Configuration.Binder\" Version=\"6.0.0\" />\r\n    <PackageReference Include=\"Microsoft.Extensions.Configuration.Json\" Version=\"6.0.0\" />\r\n  </ItemGroup>\r\n  <ItemGroup>\r\n    <ProjectReference Include=\"..\\CommandLine\\CommandLine.csproj\" />\r\n    <ProjectReference Include=\"..\\Compression\\Compression.csproj\" />\r\n    <ProjectReference Include=\"..\\ReferenceSequence\\ReferenceSequence.csproj\" />\r\n    <ProjectReference Include=\"..\\VariantAnnotation.Interface\\VariantAnnotation.Interface.csproj\" />\r\n    <ProjectReference Include=\"..\\VariantAnnotation\\VariantAnnotation.csproj\" />\r\n  </ItemGroup>\r\n</Project>"
  },
  {
    "path": "CacheUtils/CacheUtils.dll.gene.json",
    "content": "{  \r\n   \"GRCh37\":{  \r\n      \"ReferencePath\":\"Homo_sapiens.GRCh37.Nirvana.dat\",\r\n      \"EnsemblCachePath\":\"Ensembl_GRCh37.transcripts.gz\",\r\n      \"RefSeqCachePath\":\"RefSeq_GRCh37.transcripts.gz\"\r\n   },\r\n  \"GRCh38\": {\r\n    \"ReferencePath\": \"Homo_sapiens.GRCh38.Nirvana.dat\",\r\n    \"EnsemblCachePath\": \"Ensembl_GRCh38.transcripts.gz\",\r\n    \"RefSeqCachePath\": \"RefSeq_GRCh38.transcripts.gz\"\r\n  }\r\n}"
  },
  {
    "path": "CacheUtils/Commands/CombineCacheDirectories/CombineCacheDirectoriesMain.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing System.IO.Compression;\r\nusing System.Linq;\r\nusing CacheUtils.PredictionCache;\r\nusing CacheUtils.TranscriptCache;\r\nusing CommandLine.Builders;\r\nusing CommandLine.NDesk.Options;\r\nusing Compression.Algorithms;\r\nusing Compression.FileHandling;\r\nusing ErrorHandling;\r\nusing Genome;\r\nusing Intervals;\r\nusing IO;\r\nusing ReferenceSequence.Utilities;\r\nusing VariantAnnotation.Caches;\r\nusing VariantAnnotation.Caches.DataStructures;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.IO.Caches;\r\nusing VariantAnnotation.Providers;\r\n\r\nnamespace CacheUtils.Commands.CombineCacheDirectories\r\n{\r\n    public static class CombineCacheDirectoriesMain\r\n    {\r\n        private static string _inputPrefix;\r\n        private static string _inputPrefix2;\r\n        private static string _outputPrefix;\r\n        private static string _refSequencePath;\r\n\r\n        private static ExitCodes ProgramExecution()\r\n        {\r\n            var sequenceData = SequenceHelper.GetDictionaries(_refSequencePath);\r\n\r\n            var caches = LoadTranscriptCaches(CacheConstants.TranscriptPath(_inputPrefix),\r\n                CacheConstants.TranscriptPath(_inputPrefix2), sequenceData.refIndexToChromosome);\r\n\r\n            if (caches.Cache.TranscriptIntervalArrays.Length != caches.Cache2.TranscriptIntervalArrays.Length)\r\n                throw new InvalidDataException($\"Expected the number of reference sequences in cache 1 ({caches.Cache.TranscriptIntervalArrays.Length}) and cache 2 ({caches.Cache2.TranscriptIntervalArrays.Length}) to be the same.\");\r\n\r\n            int numRefSeqs                = caches.Cache.TranscriptIntervalArrays.Length;\r\n            var combinedIntervalArrays    = new IntervalArray<ITranscript>[numRefSeqs];\r\n            var siftPredictionsPerRef     = new Prediction[numRefSeqs][];\r\n            var polyphenPredictionsPerRef = new Prediction[numRefSeqs][];\r\n\r\n            PredictionHeader siftHeader;\r\n            PredictionHeader polyphenHeader;\r\n\r\n            using (var siftReader       = new PredictionCacheReader(FileUtilities.GetReadStream(CacheConstants.SiftPath(_inputPrefix)), PredictionCacheReader.SiftDescriptions))\r\n            using (var siftReader2      = new PredictionCacheReader(FileUtilities.GetReadStream(CacheConstants.SiftPath(_inputPrefix2)), PredictionCacheReader.SiftDescriptions))\r\n            using (var polyphenReader   = new PredictionCacheReader(FileUtilities.GetReadStream(CacheConstants.PolyPhenPath(_inputPrefix)), PredictionCacheReader.PolyphenDescriptions))\r\n            using (var polyphenReader2  = new PredictionCacheReader(FileUtilities.GetReadStream(CacheConstants.PolyPhenPath(_inputPrefix2)), PredictionCacheReader.PolyphenDescriptions))\r\n            {\r\n                siftHeader     = siftReader.Header;\r\n                polyphenHeader = polyphenReader.Header;\r\n\r\n                for (ushort refIndex = 0; refIndex < numRefSeqs; refIndex++)\r\n                {\r\n                    var chromosome = sequenceData.refIndexToChromosome[refIndex];\r\n\r\n                    Console.ForegroundColor = ConsoleColor.Yellow;\r\n                    Logger.WriteLine($\"\\n{chromosome.UcscName}:\");\r\n                    Console.ResetColor();\r\n\r\n                    var sift = CombinePredictions(chromosome, \"SIFT\", siftReader, siftReader2);\r\n                    siftPredictionsPerRef[refIndex] = sift.Predictions;\r\n\r\n                    var polyphen = CombinePredictions(chromosome, \"PolyPhen\", polyphenReader, polyphenReader2);\r\n                    polyphenPredictionsPerRef[refIndex] = polyphen.Predictions;\r\n\r\n                    var transcriptIntervalArray  = caches.Cache.TranscriptIntervalArrays[refIndex];\r\n                    var transcriptIntervalArray2 = caches.Cache2.TranscriptIntervalArrays[refIndex];\r\n\r\n                    combinedIntervalArrays[refIndex] = CombineTranscripts(transcriptIntervalArray,\r\n                        transcriptIntervalArray2, sift.Offset, polyphen.Offset);\r\n                }\r\n            }\r\n\r\n            Logger.WriteLine(\"\");\r\n            WritePredictions(\"SIFT\", CacheConstants.SiftPath(_outputPrefix), siftHeader, siftPredictionsPerRef);\r\n            WritePredictions(\"PolyPhen\", CacheConstants.PolyPhenPath(_outputPrefix), polyphenHeader, polyphenPredictionsPerRef);\r\n            WriteTranscripts(CloneHeader(caches.Cache.Header), combinedIntervalArrays,\r\n                caches.Cache.RegulatoryRegionIntervalArrays);\r\n\r\n            return ExitCodes.Success;\r\n        }\r\n\r\n        private static void WriteTranscripts(CacheHeader header,\r\n            IntervalArray<ITranscript>[] transcriptIntervalArrays,\r\n            IntervalArray<IRegulatoryRegion>[] regulatoryRegionIntervalArrays)\r\n        {\r\n            var staging = TranscriptCacheStaging.GetStaging(header, transcriptIntervalArrays, regulatoryRegionIntervalArrays);\r\n\r\n            Logger.Write(\"- writing transcripts... \");\r\n            staging.Write(FileUtilities.GetCreateStream(CacheConstants.TranscriptPath(_outputPrefix)));\r\n            Logger.WriteLine(\"finished.\");\r\n        }\r\n\r\n        private static void WritePredictions(string description, string filePath,\r\n            PredictionHeader header, Prediction[][] predictionsPerRef)\r\n        {\r\n            Logger.Write($\"- writing {description} predictions... \");\r\n\r\n            using (var stream = new BlockStream(new Zstandard(), FileUtilities.GetCreateStream(filePath), CompressionMode.Compress))\r\n            using (var writer = new PredictionCacheWriter(stream, CloneHeader(header)))\r\n            {\r\n                writer.Write(header.LookupTable, predictionsPerRef);\r\n            }\r\n\r\n            Logger.WriteLine(\"finished.\");\r\n        }\r\n\r\n        private static IntervalArray<ITranscript> CombineTranscripts(IntervalArray<ITranscript> intervalArray,\r\n            IntervalArray<ITranscript> intervalArray2, int siftOffset, int polyphenOffset)\r\n        {\r\n            Logger.Write(\"- combine transcripts... \");\r\n\r\n            int numCombinedTranscripts = GetNumCombinedTranscripts(intervalArray, intervalArray2);\r\n            var combinedIntervals      = new Interval<ITranscript>[numCombinedTranscripts];\r\n\r\n            var combinedIndex = 0;\r\n            CopyItems(intervalArray?.Array,  combinedIntervals, ref combinedIndex, interval => interval);\r\n            CopyItems(intervalArray2?.Array, combinedIntervals, ref combinedIndex, interval => GetUpdatedTranscript(interval, siftOffset, polyphenOffset));\r\n\r\n            Logger.WriteLine(\"finished.\");\r\n\r\n            return new IntervalArray<ITranscript>(combinedIntervals.OrderBy(x => x.Begin).ThenBy(x => x.End).ToArray());\r\n        }\r\n\r\n        private static int GetNumCombinedTranscripts<T>(IntervalArray<T> intervalArray,\r\n            IntervalArray<T> intervalArray2)\r\n        {\r\n            int numIntervals  = intervalArray?.Array.Length ?? 0;\r\n            int numIntervals2 = intervalArray2?.Array.Length ?? 0;\r\n            return numIntervals + numIntervals2;\r\n        }\r\n\r\n        // ReSharper disable SuggestBaseTypeForParameter\r\n        private static void CopyItems<T>(T[] src, T[] dest, ref int destIndex, Func<T, T> updateFunc)\r\n        // ReSharper restore SuggestBaseTypeForParameter\r\n        {\r\n            if (src == null) return;\r\n            foreach (var item in src) dest[destIndex++] = updateFunc(item);\r\n        }\r\n\r\n        private static Interval<ITranscript> GetUpdatedTranscript(Interval<ITranscript> interval, int siftOffset,\r\n            int polyphenOffset)\r\n        {\r\n            var transcript = interval.Value;\r\n            if (transcript.SiftIndex == -1 && transcript.PolyPhenIndex == -1) return interval;\r\n\r\n            int newSiftIndex     = transcript.SiftIndex     == -1 ? -1 : transcript.SiftIndex + siftOffset;\r\n            int newPolyphenIndex = transcript.PolyPhenIndex == -1 ? -1 : transcript.PolyPhenIndex + polyphenOffset;\r\n\r\n            var updatedTranscript = transcript.UpdatePredictions(newSiftIndex, newPolyphenIndex);\r\n            return new Interval<ITranscript>(transcript.Start, transcript.End, updatedTranscript);\r\n        }\r\n\r\n        private static VariantAnnotation.IO.Caches.Header CloneBaseHeader(VariantAnnotation.IO.Caches.Header header) =>\r\n            new VariantAnnotation.IO.Caches.Header(CacheConstants.Identifier, header.SchemaVersion, header.DataVersion,\r\n                Source.BothRefSeqAndEnsembl, DateTime.Now.Ticks, header.Assembly);\r\n\r\n        private static PredictionHeader CloneHeader(PredictionHeader header) =>\r\n            new PredictionHeader(CloneBaseHeader(header), header.Custom, header.LookupTable);\r\n\r\n        private static CacheHeader CloneHeader(CacheHeader header) =>\r\n            new CacheHeader(CloneBaseHeader(header), header.Custom);\r\n\r\n        private static (Prediction[] Predictions, int Offset) CombinePredictions(Chromosome chromosome,\r\n            string description, PredictionCacheReader reader, PredictionCacheReader reader2)\r\n        {\r\n            Logger.Write($\"- load {description} predictions... \");\r\n            var predictions  = reader.GetPredictions(chromosome.Index);\r\n            var predictions2 = reader2.GetPredictions(chromosome.Index);\r\n            Logger.WriteLine(\"finished.\");\r\n\r\n            var combinedPredictions = CombinePredictions(description, predictions, predictions2);\r\n            return (combinedPredictions, predictions.Length);\r\n        }\r\n\r\n        private static Prediction[] CombinePredictions(string description, Prediction[] predictions,\r\n            Prediction[] predictions2)\r\n        {\r\n            Logger.Write($\"- combine {description} predictions... \");\r\n\r\n            int numCombinedPredictions = predictions.Length + predictions2.Length;\r\n            var combinedPredictions    = new Prediction[numCombinedPredictions];\r\n\r\n            var combinedIndex = 0;\r\n            CopyItems(predictions, combinedPredictions, ref combinedIndex, x => x);\r\n            CopyItems(predictions2, combinedPredictions, ref combinedIndex, x => x);\r\n\r\n            Logger.WriteLine(\"finished.\");\r\n\r\n            return combinedPredictions;\r\n        }\r\n\r\n        private static (TranscriptCacheData Cache, TranscriptCacheData Cache2) LoadTranscriptCaches(\r\n            string transcriptPath, string transcriptPath2, Dictionary<ushort, Chromosome> refIndexToChromosome)\r\n        {\r\n            TranscriptCacheData cache;\r\n            TranscriptCacheData cache2;\r\n\r\n            Logger.Write(\"- loading transcript caches... \");\r\n\r\n            using (var transcriptReader  = new TranscriptCacheReader(FileUtilities.GetReadStream(transcriptPath)))\r\n            using (var transcriptReader2 = new TranscriptCacheReader(FileUtilities.GetReadStream(transcriptPath2)))\r\n            {\r\n                cache  = transcriptReader.Read(refIndexToChromosome);\r\n                cache2 = transcriptReader2.Read(refIndexToChromosome);\r\n            }\r\n\r\n            Logger.WriteLine(\"finished.\");\r\n            return (cache, cache2);\r\n        }\r\n\r\n        public static ExitCodes Run(string command, string[] args)\r\n        {\r\n            var ops = new OptionSet\r\n            {\r\n                {\r\n                    \"in|1=\",\r\n                    \"input cache {prefix}\",\r\n                    v => _inputPrefix = v\r\n                },\r\n                {\r\n                    \"in2|2=\",\r\n                    \"input cache 2 {prefix}\",\r\n                    v => _inputPrefix2 = v\r\n                },\r\n                {\r\n                    \"out|o=\",\r\n                    \"output cache {prefix}\",\r\n                    v => _outputPrefix = v\r\n                },\r\n                {\r\n                    \"ref|r=\",\r\n                    \"input reference {path}\",\r\n                    v => _refSequencePath = v\r\n                }\r\n            };\r\n\r\n            string commandLineExample = $\"{command} --in <cache prefix> --in2 <cache prefix> --out <cache prefix> --ref <reference path>\";\r\n\r\n            return new ConsoleAppBuilder(args, ops)\r\n                .UseVersionProvider(new VersionProvider())\r\n                .Parse()\r\n                .CheckInputFilenameExists(_refSequencePath, \"reference sequence\", \"--ref\")\r\n                .HasRequiredParameter(_inputPrefix, \"input cache\", \"--in\")\r\n                .HasRequiredParameter(_inputPrefix2, \"input cache 2\", \"--in2\")\r\n                .HasRequiredParameter(_outputPrefix, \"output cache\", \"--out\")\r\n                .SkipBanner()\r\n                .ShowHelpMenu(\"Combines two cache sets into one cache.\", commandLineExample)\r\n                .ShowErrors()\r\n                .Execute(ProgramExecution);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Commands/CreateCache/CreateNirvanaDatabaseMain.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.Linq;\r\nusing CacheUtils.Commands.Download;\r\nusing CacheUtils.DataDumperImport.DataStructures.Mutable;\r\nusing CacheUtils.Genes.DataStructures;\r\nusing CacheUtils.Genes.IO;\r\nusing CacheUtils.Genes.Utilities;\r\nusing CacheUtils.IntermediateIO;\r\nusing CacheUtils.PredictionCache;\r\nusing CacheUtils.TranscriptCache;\r\nusing CommandLine.Builders;\r\nusing CommandLine.NDesk.Options;\r\nusing Compression.Utilities;\r\nusing ErrorHandling;\r\nusing Genome;\r\nusing Intervals;\r\nusing IO;\r\nusing ReferenceSequence.Utilities;\r\nusing VariantAnnotation.Providers;\r\n\r\nnamespace CacheUtils.Commands.CreateCache\r\n{\r\n    public static class CreateNirvanaDatabaseMain\r\n    {\r\n        private static string _inputPrefix;\r\n        private static string _inputReferencePath;\r\n\r\n        private static string _outputCacheFilePrefix;\r\n\r\n        private static ExitCodes ProgramExecution()\r\n        {\r\n            string transcriptPath = _inputPrefix + \".transcripts.gz\";\r\n            string siftPath       = _inputPrefix + \".sift.gz\";\r\n            string polyphenPath   = _inputPrefix + \".polyphen.gz\";\r\n            string regulatoryPath = _inputPrefix + \".regulatory.gz\";\r\n\r\n            (var refIndexToChromosome, var refNameToChromosome, int numRefSeqs) = SequenceHelper.GetDictionaries(_inputReferencePath);\r\n\r\n            using (var transcriptReader = new MutableTranscriptReader(GZipUtilities.GetAppropriateReadStream(transcriptPath), refIndexToChromosome))\r\n            using (var regulatoryReader = new RegulatoryRegionReader(GZipUtilities.GetAppropriateReadStream(regulatoryPath), refIndexToChromosome))\r\n            using (var siftReader       = new PredictionReader(GZipUtilities.GetAppropriateReadStream(siftPath), refIndexToChromosome, IntermediateIoCommon.FileType.Sift))\r\n            using (var polyphenReader   = new PredictionReader(GZipUtilities.GetAppropriateReadStream(polyphenPath), refIndexToChromosome, IntermediateIoCommon.FileType.Polyphen))\r\n            using (var geneReader       = new UgaGeneReader(GZipUtilities.GetAppropriateReadStream(ExternalFiles.UniversalGeneFilePath), refNameToChromosome))\r\n            {\r\n                var genomeAssembly   = transcriptReader.Header.Assembly;\r\n                var source           = transcriptReader.Header.Source;\r\n                long vepReleaseTicks = transcriptReader.Header.VepReleaseTicks;\r\n                ushort vepVersion    = transcriptReader.Header.VepVersion;\r\n\r\n                Logger.Write(\"- loading universal gene archive file... \");\r\n                var genes      = geneReader.GetGenes();\r\n                var geneForest = CreateGeneForest(genes, numRefSeqs, genomeAssembly);\r\n                Logger.WriteLine($\"{genes.Length:N0} loaded.\");\r\n\r\n                Logger.Write(\"- loading regulatory region file... \");\r\n                var regulatoryRegions = regulatoryReader.GetRegulatoryRegions();\r\n                Logger.WriteLine($\"{regulatoryRegions.Length:N0} loaded.\");\r\n\r\n                Logger.Write(\"- loading transcript file... \");\r\n                var transcripts = transcriptReader.GetTranscripts();\r\n                var transcriptsByRefIndex = transcripts.GetMultiValueDict(x => x.Chromosome.Index);\r\n                Logger.WriteLine($\"{transcripts.Length:N0} loaded.\");\r\n\r\n                MarkCanonicalTranscripts(transcripts);\r\n\r\n                var predictionBuilder = new PredictionCacheBuilder(genomeAssembly);\r\n                var predictionCaches  = predictionBuilder.CreatePredictionCaches(transcriptsByRefIndex, siftReader, polyphenReader, numRefSeqs);\r\n\r\n                Logger.Write(\"- writing SIFT prediction cache... \");\r\n                predictionCaches.Sift.Write(FileUtilities.GetCreateStream(CacheConstants.SiftPath(_outputCacheFilePrefix)));\r\n                Logger.WriteLine(\"finished.\");\r\n\r\n                Logger.Write(\"- writing PolyPhen prediction cache... \");\r\n                predictionCaches.PolyPhen.Write(FileUtilities.GetCreateStream(CacheConstants.PolyPhenPath(_outputCacheFilePrefix)));\r\n                Logger.WriteLine(\"finished.\");\r\n\r\n                var transcriptBuilder = new TranscriptCacheBuilder(genomeAssembly, source, vepReleaseTicks, vepVersion);\r\n                var transcriptStaging = transcriptBuilder.CreateTranscriptCache(transcripts, regulatoryRegions, geneForest, numRefSeqs);\r\n\r\n                Logger.Write(\"- writing transcript cache... \");\r\n                transcriptStaging.Write(FileUtilities.GetCreateStream(CacheConstants.TranscriptPath(_outputCacheFilePrefix)));\r\n                Logger.WriteLine(\"finished.\");\r\n            }\r\n\r\n            return ExitCodes.Success;\r\n        }\r\n\r\n        private static IIntervalForest<UgaGene> CreateGeneForest(IEnumerable<UgaGene> genes, int numRefSeqs, GenomeAssembly genomeAssembly)\r\n        {\r\n            bool useGrch37    = genomeAssembly == GenomeAssembly.GRCh37;\r\n            var intervalLists = new List<Interval<UgaGene>>[numRefSeqs];\r\n\r\n            for (var i = 0; i < numRefSeqs; i++) intervalLists[i] = new List<Interval<UgaGene>>();\r\n\r\n            foreach (var gene in genes)\r\n            {\r\n                var coords = useGrch37 ? gene.GRCh37 : gene.GRCh38;\r\n                if (coords.Start == -1 && coords.End == -1) continue;\r\n                intervalLists[gene.Chromosome.Index].Add(new Interval<UgaGene>(coords.Start, coords.End, gene));\r\n            }\r\n\r\n            var refIntervalArrays = new IntervalArray<UgaGene>[numRefSeqs];\r\n            for (var i = 0; i < numRefSeqs; i++)\r\n            {\r\n                refIntervalArrays[i] = new IntervalArray<UgaGene>(intervalLists[i].OrderBy(x => x.Begin).ThenBy(x => x.End).ToArray());\r\n            }\r\n\r\n            return new IntervalForest<UgaGene>(refIntervalArrays);\r\n        }\r\n\r\n        private static void MarkCanonicalTranscripts(MutableTranscript[] transcripts)\r\n        {\r\n            var ccdsIdToEnsemblId = CcdsReader.GetCcdsIdToEnsemblId(ExternalFiles.CcdsFile.FilePath);\r\n            var lrgTranscriptIds  = LrgReader.GetTranscriptIds(ExternalFiles.LrgFile.FilePath, ccdsIdToEnsemblId);\r\n\r\n            Logger.Write(\"- marking canonical transcripts... \");\r\n            var canonical = new CanonicalTranscriptMarker(lrgTranscriptIds);\r\n            int numCanonicalTranscripts = canonical.MarkTranscripts(transcripts);\r\n            Logger.WriteLine($\"{numCanonicalTranscripts:N0} marked.\");\r\n        }\r\n\r\n        public static ExitCodes Run(string command, string[] args)\r\n        {\r\n            var ops = new OptionSet\r\n            {\r\n                {\r\n                    \"in|i=\",\r\n                    \"input filename {prefix}\",\r\n                    v => _inputPrefix = v\r\n                },\r\n                {\r\n                    \"out|o=\",\r\n                    \"output cache file {prefix}\",\r\n                    v => _outputCacheFilePrefix = v\r\n                },\r\n                {\r\n                    \"ref|r=\",\r\n                    \"input reference {filename}\",\r\n                    v => _inputReferencePath = v\r\n                }\r\n            };\r\n\r\n            string commandLineExample = $\"{command} --in <prefix> --out <prefix> --ref <path>\";\r\n\r\n            return new ConsoleAppBuilder(args, ops)\r\n                .UseVersionProvider(new VersionProvider())\r\n                .Parse()\r\n                .HasRequiredParameter(_inputPrefix, \"intermediate cache\", \"--in\")\r\n                .CheckInputFilenameExists(_inputReferencePath, \"compressed reference\", \"--ref\")\r\n                .HasRequiredParameter(_outputCacheFilePrefix, \"Nirvana\", \"--out\")\r\n                .SkipBanner()\r\n                .ShowHelpMenu(\"Converts *deserialized* VEP cache files to Nirvana cache format.\", commandLineExample)\r\n                .ShowErrors()\r\n                .Execute(ProgramExecution);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Commands/Download/DownloadMain.cs",
    "content": "﻿using ErrorHandling;\r\n\r\nnamespace CacheUtils.Commands.Download\r\n{\r\n    public static class DownloadMain\r\n    {\r\n        private static ExitCodes ProgramExecution()\r\n        {\r\n            ExternalFiles.Download();\r\n            return ExitCodes.Success;\r\n        }\r\n\r\n        public static ExitCodes Run(string command, string[] args)\r\n        {\r\n            return ProgramExecution();\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Commands/Download/ExternalFiles.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Linq;\r\nusing CacheUtils.Genbank;\r\nusing CacheUtils.IntermediateIO;\r\nusing CacheUtils.Utilities;\r\nusing Compression.Utilities;\r\nusing Genome;\r\nusing IO;\r\nusing OptimizedCore;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\n\r\nnamespace CacheUtils.Commands.Download\r\n{\r\n    public static class ExternalFiles\r\n    {\r\n        public static readonly RemoteFile CcdsFile     = new RemoteFile(\"CCDS file (2016-09-08)\",   \"ftp://ftp.ncbi.nlm.nih.gov/pub/CCDS/current_human/CCDS2Sequence.20160908.txt\", false);\r\n        public static readonly RemoteFile LrgFile      = new RemoteFile(\"latest LRG file\",          \"http://ftp.ebi.ac.uk/pub/databases/lrgex/list_LRGs_transcripts_xrefs.txt\");\r\n        public static readonly RemoteFile HgncFile     = new RemoteFile(\"latest HGNC gene symbols\", \"ftp://ftp.ebi.ac.uk/pub/databases/genenames/new/tsv/hgnc_complete_set.txt\");\r\n        public static readonly RemoteFile GeneInfoFile = new RemoteFile(\"latest gene_info\",         \"ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/gene_info.gz\");\r\n\r\n        private static readonly RemoteFile AssemblyFile37        = new RemoteFile(\"assembly report (GRCh37.p13)\",    \"ftp://ftp.ncbi.nih.gov/genomes/refseq/vertebrate_mammalian/Homo_sapiens/all_assembly_versions/GCF_000001405.25_GRCh37.p13/GCF_000001405.25_GRCh37.p13_assembly_report.txt\", false);\r\n        public static readonly RemoteFile EnsemblGtfFile37      = new RemoteFile(\"Ensembl 75 GTF (GRCh37)\",         \"ftp://ftp.ensembl.org/pub/release-75/gtf/homo_sapiens/Homo_sapiens.GRCh37.75.gtf.gz\", false);\r\n        public static readonly RemoteFile RefSeqGenomeGffFile37 = new RemoteFile(\"RefSeq genomic GFF (GRCh37.p13)\", \"ftp://ftp.ncbi.nih.gov/genomes/refseq/vertebrate_mammalian/Homo_sapiens/all_assembly_versions/GCF_000001405.25_GRCh37.p13/GCF_000001405.25_GRCh37.p13_genomic.gff.gz\", false);\r\n        public static readonly RemoteFile RefSeqGffFile37       = new RemoteFile(\"RefSeq GFF3 (GRCh37.p13)\",        \"ftp://ftp.ncbi.nih.gov/genomes/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/GFF/ref_GRCh37.p13_top_level.gff3.gz\", false);\r\n\r\n        private static readonly RemoteFile AssemblyFile38        = new RemoteFile(\"assembly report (GRCh38.p11)\",    \"ftp://ftp.ncbi.nih.gov/genomes/refseq/vertebrate_mammalian/Homo_sapiens/all_assembly_versions/GCF_000001405.37_GRCh38.p11/GCF_000001405.37_GRCh38.p11_assembly_report.txt\", false);\r\n        public static readonly RemoteFile EnsemblGtfFile38      = new RemoteFile(\"Ensembl 90 GTF (GRCh38)\",         \"ftp://ftp.ensembl.org/pub/release-90/gtf/homo_sapiens/Homo_sapiens.GRCh38.90.gtf.gz\", false);\r\n        public static readonly RemoteFile RefSeqGenomeGffFile38 = new RemoteFile(\"RefSeq genomic GFF (GRCh38.p11)\", \"ftp://ftp.ncbi.nih.gov/genomes/refseq/vertebrate_mammalian/Homo_sapiens/all_assembly_versions/GCF_000001405.37_GRCh38.p11/GCF_000001405.37_GRCh38.p11_genomic.gff.gz\", false);\r\n        public static readonly RemoteFile RefSeqGffFile38       = new RemoteFile(\"RefSeq GFF3 (GRCh38.p7)\",         \"ftp://ftp.ncbi.nih.gov/genomes/H_sapiens/GFF/ref_GRCh38.p7_top_level.gff3.gz\", false);\r\n\r\n        public static readonly string GenbankFilePath = Path.Combine(Path.GetTempPath(), RemoteFile.GetFilename(\"Genbank.tsv.gz\", false));\r\n\r\n        public static readonly string UniversalGeneFilePath = Path.Combine(Path.GetTempPath(), RemoteFile.GetFilename(\"UGA.tsv.gz\", false));\r\n\r\n        public static void Download()\r\n        {\r\n            var fileList = new List<RemoteFile>\r\n            {\r\n                CcdsFile,\r\n                LrgFile,\r\n                HgncFile,\r\n                GeneInfoFile,\r\n                AssemblyFile37,\r\n                AssemblyFile38,\r\n                EnsemblGtfFile37,\r\n                EnsemblGtfFile38,\r\n                RefSeqGenomeGffFile37,\r\n                RefSeqGenomeGffFile38,\r\n                RefSeqGffFile37,\r\n                RefSeqGffFile38\r\n            };\r\n\r\n            var genbankFiles = GetGenbankFiles(fileList);\r\n\r\n            fileList.Execute(\"downloads\", file => file.Download());\r\n\r\n            if (genbankFiles == null) return;\r\n\r\n            genbankFiles.Execute(\"file parsing\", file => file.Parse());\r\n            var genbankEntries = GetIdToGenbankEntryDict(genbankFiles);\r\n            WriteDictionary(genbankEntries);\r\n        }\r\n\r\n        private static IEnumerable<GenbankEntry> GetIdToGenbankEntryDict(IEnumerable<GenbankFile> files) =>\r\n            files.SelectMany(file => file.GenbankDict.Values).OrderBy(x => x.TranscriptId).ToList();\r\n\r\n        private static List<GenbankFile> GetGenbankFiles(ICollection<RemoteFile> fileList)\r\n        {\r\n            var genbankFileInfo = new FileInfo(GenbankFilePath);\r\n            if (genbankFileInfo.Exists && GetElapsedDays(genbankFileInfo.CreationTime) < 30.0) return null;\r\n\r\n            int numGenbankFiles = GetNumGenbankFiles();\r\n            var genbankFiles    = new List<GenbankFile>(numGenbankFiles);\r\n\r\n            for (var i = 0; i < numGenbankFiles; i++)\r\n            {\r\n                var genbankFile = new GenbankFile(i + 1);\r\n                fileList.Add(genbankFile.RemoteFile);\r\n                genbankFiles.Add(genbankFile);\r\n            }\r\n\r\n            return genbankFiles;\r\n        }\r\n\r\n        public static double GetElapsedDays(DateTime creationTime) => DateTime.Now.Subtract(creationTime).TotalDays;\r\n\r\n        private static int GetNumGenbankFiles()\r\n        {\r\n            var fileList = new RemoteFile(\"RefSeq filelist\", \"ftp://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/mRNA_Prot/human.files.installed\");\r\n            fileList.Download();\r\n\r\n            var maxNum = 0;\r\n\r\n            using (var reader = FileUtilities.GetStreamReader(FileUtilities.GetReadStream(fileList.FilePath)))\r\n            {\r\n                while (true)\r\n                {\r\n                    string line = reader.ReadLine();\r\n                    if (line == null) break;\r\n\r\n                    string filename = line.OptimizedSplit('\\t')[1];\r\n                    if (!filename.EndsWith(\".rna.gbff.gz\")) continue;\r\n\r\n                    int num = int.Parse(filename.Substring(6, filename.Length - 18));\r\n                    if (num > maxNum) maxNum = num;\r\n                }\r\n            }\r\n\r\n            return maxNum;\r\n        }\r\n\r\n        private static void WriteDictionary(IEnumerable<GenbankEntry> entries)\r\n        {\r\n            var header = new IntermediateIoHeader(0, 0, Source.None, GenomeAssembly.Unknown, 0);\r\n\r\n            Logger.Write($\"- writing Genbank file ({Path.GetFileName(GenbankFilePath)})... \");\r\n            using (var writer = new GenbankWriter(GZipUtilities.GetStreamWriter(GenbankFilePath), header))\r\n            {\r\n                foreach (var entry in entries) writer.Write(entry);\r\n            }\r\n            Logger.WriteLine(\"finished.\");\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Commands/Download/GenbankFile.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing CacheUtils.Genbank;\r\nusing CacheUtils.Utilities;\r\nusing Compression.Utilities;\r\nusing IO;\r\n\r\nnamespace CacheUtils.Commands.Download\r\n{\r\n    public sealed class GenbankFile\r\n    {\r\n        public readonly RemoteFile RemoteFile;\r\n        public readonly Dictionary<string, GenbankEntry> GenbankDict;\r\n\r\n        public GenbankFile(int num)\r\n        {\r\n            RemoteFile  = new RemoteFile($\"RefSeq Genbank {num} gbff\", $\"ftp://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/mRNA_Prot/human.{num}.rna.gbff.gz\", false);\r\n            GenbankDict = new Dictionary<string, GenbankEntry>();\r\n        }\r\n\r\n        public void Parse()\r\n        {\r\n            Logger.WriteLine($\"- parsing {Path.GetFileName(RemoteFile.FilePath)}\");\r\n\r\n            using (var reader = new GenbankReader(GZipUtilities.GetAppropriateStreamReader(RemoteFile.FilePath)))\r\n            {\r\n                while (true)\r\n                {\r\n                    var entry = reader.GetGenbankEntry();\r\n                    if (entry == null) break;\r\n                    GenbankDict[entry.TranscriptId] = entry;\r\n                }\r\n            }\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Commands/ExtractTranscripts/ExtractTranscriptsMain.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing CacheUtils.MiniCache;\r\nusing CacheUtils.PredictionCache;\r\nusing CacheUtils.TranscriptCache;\r\nusing CommandLine.Builders;\r\nusing CommandLine.NDesk.Options;\r\nusing ErrorHandling;\r\nusing Genome;\r\nusing Intervals;\r\nusing IO;\r\nusing VariantAnnotation.Caches.DataStructures;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.Interface.Caches;\r\nusing VariantAnnotation.IO.Caches;\r\nusing VariantAnnotation.Providers;\r\n\r\nnamespace CacheUtils.Commands.ExtractTranscripts\r\n{\r\n    public static class ExtractTranscriptsMain\r\n    {\r\n        private static string _inputPrefix;\r\n        private static string _inputReferencePath;\r\n        private static string _outputDirectory;\r\n\r\n        private static string _referenceName;\r\n\r\n        private static int _referencePosition    = -1;\r\n        private static int _referenceEndPosition = -1;\r\n\r\n        private static ExitCodes ProgramExecution()\r\n        {\r\n            var bundle     = DataBundle.GetDataBundle(_inputReferencePath, _inputPrefix);\r\n            int numRefSeqs = bundle.SequenceReader.NumRefSeqs;\r\n            var chromosome = ReferenceNameUtilities.GetChromosome(bundle.SequenceReader.RefNameToChromosome, _referenceName);\r\n            bundle.Load(chromosome);\r\n\r\n            string outputStub = GetOutputStub(chromosome, bundle.Source);\r\n            var interval      = new ChromosomeInterval(chromosome, _referencePosition, _referenceEndPosition);\r\n            var transcripts   = GetTranscripts(bundle, interval);\r\n\r\n            var sift     = GetPredictionStaging(\"SIFT\", transcripts, chromosome, bundle.SiftPredictions, bundle.SiftReader, x => x.SiftIndex, numRefSeqs);\r\n            var polyphen = GetPredictionStaging(\"PolyPhen\", transcripts, chromosome, bundle.PolyPhenPredictions, bundle.PolyPhenReader, x => x.PolyPhenIndex, numRefSeqs);\r\n\r\n            var regulatoryRegionIntervalArrays = GetRegulatoryRegionIntervalArrays(bundle.TranscriptCache, interval, numRefSeqs);\r\n            var transcriptIntervalArrays = PredictionUtilities.UpdateTranscripts(transcripts, bundle.SiftPredictions,\r\n                sift.Predictions, bundle.PolyPhenPredictions, polyphen.Predictions, numRefSeqs);\r\n\r\n            var transcriptStaging = GetTranscriptStaging(bundle.TranscriptCacheData.Header, transcriptIntervalArrays, regulatoryRegionIntervalArrays);\r\n\r\n            WriteCache(FileUtilities.GetCreateStream(CacheConstants.TranscriptPath(outputStub)), transcriptStaging, \"transcript\");\r\n            WriteCache(FileUtilities.GetCreateStream(CacheConstants.SiftPath(outputStub)), sift.Staging, \"SIFT\");\r\n            WriteCache(FileUtilities.GetCreateStream(CacheConstants.PolyPhenPath(outputStub)), polyphen.Staging, \"PolyPhen\");\r\n\r\n            return ExitCodes.Success;\r\n        }\r\n\r\n        private static TranscriptCacheStaging GetTranscriptStaging(CacheHeader header,\r\n            IntervalArray<ITranscript>[] transcriptIntervalArrays,\r\n            IntervalArray<IRegulatoryRegion>[] regulatoryRegionIntervalArrays) =>\r\n            TranscriptCacheStaging.GetStaging(header, transcriptIntervalArrays, regulatoryRegionIntervalArrays);\r\n\r\n\r\n        private static void WriteCache(Stream stream, IStaging staging, string description)\r\n        {\r\n            Logger.Write($\"- writing {description} cache... \");\r\n            staging.Write(stream);\r\n            Logger.WriteLine(\"finished.\");\r\n        }\r\n\r\n        private static string GetOutputStub(Chromosome chromosome, Source source) => Path.Combine(_outputDirectory,\r\n            $\"{chromosome.UcscName}_{_referencePosition}_{_referenceEndPosition}_{GetSource(source)}\");\r\n\r\n        private static string GetSource(Source source) =>\r\n            source != Source.BothRefSeqAndEnsembl ? source.ToString() : \"Both\";\r\n\r\n        private static (PredictionCacheStaging Staging, Prediction[] Predictions) GetPredictionStaging(\r\n            string description, IEnumerable<ITranscript> transcripts, Chromosome chromosome, IReadOnlyList<Prediction> oldPredictions,\r\n            PredictionCacheReader reader, Func<ITranscript, int> indexFunc, int numRefSeqs)\r\n        {\r\n            Logger.Write($\"- retrieving {description} predictions... \");\r\n\r\n            var indexSet          = GetUniqueIndices(transcripts, indexFunc);\r\n            var predictionsPerRef = GetPredictions(indexSet, chromosome, numRefSeqs, oldPredictions);\r\n            var staging           = new PredictionCacheStaging(reader.Header, predictionsPerRef);\r\n\r\n            Logger.WriteLine($\"found {indexSet.Count} predictions.\");\r\n            return (staging, predictionsPerRef[chromosome.Index]);\r\n        }\r\n\r\n        private static Prediction[][] GetPredictions(ICollection<int> indexSet, Chromosome chromosome, int numRefSeqs,\r\n            IReadOnlyList<Prediction> oldPredictions)\r\n        {\r\n            var refPredictions = new Prediction[indexSet.Count];\r\n\r\n            var predIdx = 0;\r\n            foreach (int index in indexSet) refPredictions[predIdx++] = oldPredictions[index];\r\n\r\n            var predictions = new Prediction[numRefSeqs][];\r\n            predictions[chromosome.Index] = refPredictions;\r\n            return predictions;\r\n        }\r\n\r\n        private static HashSet<int> GetUniqueIndices(IEnumerable<ITranscript> transcripts, Func<ITranscript, int> indexFunc)\r\n        {\r\n            var indexSet = new HashSet<int>();\r\n            foreach (var transcript in transcripts)\r\n            {\r\n                int index = indexFunc(transcript);\r\n                if (index == -1) continue;\r\n                indexSet.Add(index);\r\n            }\r\n            return indexSet;\r\n        }\r\n\r\n        private static IntervalArray<IRegulatoryRegion>[] GetRegulatoryRegionIntervalArrays(\r\n            ITranscriptCache cache, ChromosomeInterval interval, int numRefSeqs)\r\n        {\r\n            Logger.Write(\"- retrieving regulatory regions... \");\r\n            var regulatoryIntervalForest = cache.RegulatoryIntervalForest;\r\n            var regulatoryRegions =\r\n                regulatoryIntervalForest.GetAllOverlappingValues(interval.Chromosome.Index, interval.Start,\r\n                    interval.End);\r\n            Logger.WriteLine($\"found {regulatoryRegions.Length} regulatory regions.\");\r\n            return regulatoryRegions.ToIntervalArrays(numRefSeqs);\r\n        }\r\n\r\n        private static List<ITranscript> GetTranscripts(DataBundle bundle, ChromosomeInterval interval)\r\n        {\r\n            Logger.Write(\"- retrieving transcripts... \");\r\n            var transcripts = TranscriptCacheUtilities.GetTranscripts(bundle, interval);\r\n            Logger.WriteLine($\"found {transcripts.Count} transcripts.\");\r\n\r\n            if (transcripts.Count == 0) throw new InvalidDataException(\"Expected at least one transcript, but found none.\");\r\n            return transcripts;\r\n        }\r\n\r\n        public static ExitCodes Run(string command, string[] args)\r\n        {\r\n            var ops = new OptionSet\r\n            {\r\n                {\r\n                    \"in|i=\",\r\n                    \"input cache {prefix}\",\r\n                    v => _inputPrefix = v\r\n                },\r\n                {\r\n                    \"name|n=\",\r\n                    \"reference {name}\",\r\n                    v => _referenceName = v\r\n                },\r\n                {\r\n                    \"out|o=\",\r\n                    \"output {directory}\",\r\n                    v => _outputDirectory = v\r\n                },\r\n                {\r\n                    \"pos|p=\",\r\n                    \"reference {position}\",\r\n                    (int v) => _referencePosition = v\r\n                },\r\n                {\r\n                    \"endpos=\",\r\n                    \"reference end {position}\",\r\n                    (int v) => _referenceEndPosition = v\r\n                },\r\n                {\r\n                    \"ref|r=\",\r\n                    \"input reference {filename}\",\r\n                    v => _inputReferencePath = v\r\n                }\r\n            };\r\n\r\n            string commandLineExample = $\"{command} --in <prefix> --out <dir> -r <ref path> --chr <name> -p <pos> --endpos <pos>\\n\";\r\n\r\n            return new ConsoleAppBuilder(args, ops)\r\n                .UseVersionProvider(new VersionProvider())\r\n                .Parse()\r\n                .HasRequiredParameter(_inputPrefix, \"Nirvana cache\", \"--in\")\r\n                .CheckInputFilenameExists(_inputReferencePath, \"compressed reference sequence\", \"--ref\")\r\n                .CheckDirectoryExists(_outputDirectory, \"output cache\", \"--out\")\r\n                .SkipBanner()\r\n                .ShowHelpMenu(\"Extracts transcripts from Nirvana cache files.\", commandLineExample)\r\n                .ShowErrors()\r\n                .Execute(ProgramExecution);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Commands/GFF/CreateGffMain.cs",
    "content": "﻿using System.Collections.Generic;\nusing CacheUtils.Commands.ParseVepCacheDirectory;\nusing CacheUtils.GFF;\nusing CacheUtils.Helpers;\nusing CommandLine.Builders;\nusing CommandLine.NDesk.Options;\nusing Compression.Utilities;\nusing ErrorHandling;\nusing Genome;\nusing IO;\nusing ReferenceSequence.Utilities;\nusing VariantAnnotation.Caches;\nusing VariantAnnotation.Interface.AnnotatedPositions;\nusing VariantAnnotation.Providers;\n\nnamespace CacheUtils.Commands.GFF\n{\n    public static class CreateGffMain\n    {\n        private static string _compressedReferencePath;\n        private static string _inputPrefix;\n        private static string _outputFileName;\n        private static string _transcriptSource;\n\n        private static ExitCodes ProgramExecution()\n        {\n            Source transcriptSource = ParseVepCacheDirectoryMain.GetSource(_transcriptSource);\n            string cachePath        = CacheConstants.TranscriptPath(_inputPrefix);\n\n            Dictionary<ushort, Chromosome> refIndexToChromosome =\n                SequenceHelper.GetDictionaries(_compressedReferencePath).refIndexToChromosome;\n            \n            TranscriptCacheData     cache            = TranscriptCacheHelper.GetCache(cachePath, refIndexToChromosome);\n            Dictionary<IGene, int> geneToInternalId = InternalGenes.CreateDictionary(cache.Genes);\n\n            using (var writer = new GffWriter(GZipUtilities.GetStreamWriter(_outputFileName)))\n            {\n                var creator = new GffCreator(writer, geneToInternalId, transcriptSource);\n                creator.Create(cache.TranscriptIntervalArrays);\n            }\n\n            return ExitCodes.Success;\n        }\n\n        public static ExitCodes Run(string command, string[] args)\n        {\n            var ops = new OptionSet\n            {\n                {\n                    \"in|i=\",\n                    \"input cache {prefix}\",\n                    v => _inputPrefix = v\n                },\n                {\n                    \"out|o=\",\n                    \"output {file name}\",\n                    v => _outputFileName = v\n                },\n                {\n                    \"source|s=\",\n                    \"transcript {source}\",\n                    v => _transcriptSource = v\n                },\n                {\n                    \"ref|r=\",\n                    \"reference {file}\",\n                    v => _compressedReferencePath = v\n                }\n            };\n\n            string commandLineExample = $\"{command} --in <cache prefix> --out <GFF path>\";\n\n            return new ConsoleAppBuilder(args, ops)\n                .UseVersionProvider(new VersionProvider())\n                .Parse()\n                .HasRequiredParameter(_inputPrefix, \"input cache prefix\", \"--in\")\n                .CheckOutputFilenameSuffix(_outputFileName, \".gz\", \"GFF\")\n                .SkipBanner()\n                .ShowHelpMenu(\"Outputs exon coordinates for all transcripts in a database.\", commandLineExample)\n                .ShowErrors()\n                .Execute(ProgramExecution);\n        }\n    }\n}\n"
  },
  {
    "path": "CacheUtils/Commands/GFF/InternalGenes.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing CacheUtils.TranscriptCache.Comparers;\r\nusing ErrorHandling.Exceptions;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\n\r\nnamespace CacheUtils.Commands.GFF\r\n{\r\n    public static class InternalGenes\r\n    {\r\n        public static Dictionary<IGene, int> CreateDictionary(IGene[] genes)\r\n        {\r\n            var geneComparer     = new GeneComparer();\r\n            var geneToInternalId = new Dictionary<IGene, int>(geneComparer);\r\n\r\n            for (var geneIndex = 0; geneIndex < genes.Length; geneIndex++)\r\n            {\r\n                var gene = genes[geneIndex];\r\n\r\n                if (geneToInternalId.TryGetValue(gene, out int oldGeneIndex))\r\n                {\r\n                    throw new UserErrorException($\"Found a duplicate gene in the dictionary: {genes[geneIndex]} ({geneIndex} vs {oldGeneIndex})\");\r\n                }\r\n\r\n                geneToInternalId[gene] = geneIndex;\r\n            }\r\n\r\n            return geneToInternalId;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Commands/Header/HeaderMain.cs",
    "content": "﻿using System;\nusing CommandLine.Builders;\nusing CommandLine.NDesk.Options;\nusing ErrorHandling;\nusing ErrorHandling.Exceptions;\nusing IO;\nusing VariantAnnotation.IO.Caches;\nusing VariantAnnotation.Providers;\n\nnamespace CacheUtils.Commands.Header\n{\n    public static class HeaderMain\n    {\n        private static string _inputPrefix;\n\n        private static ExitCodes ProgramExecution()\n        {\n            string cachePath = CacheConstants.TranscriptPath(_inputPrefix);\n            var header       = GetHeaderInformation(cachePath);\n\n            Console.WriteLine($\"Versions: Schema: {header.Schema}, Data: {header.Data}, VEP: {header.Vep}\");\n            return ExitCodes.Success;\n        }\n\n        private static (ushort Schema, ushort Data, ushort Vep) GetHeaderInformation(string cachePath)\n        {\n            CacheHeader header;\n            using (var stream = FileUtilities.GetReadStream(cachePath))\n            {\n                header = CacheHeader.Read(stream);\n            }\n\n            if (header == null) throw new InvalidFileFormatException($\"Could not parse the header information correctly for {cachePath}\");\n\n            return (header.SchemaVersion, header.DataVersion, header.Custom.VepVersion);\n        }\n\n        public static ExitCodes Run(string command, string[] args)\n        {\n            var ops = new OptionSet\n            {\n                {\n                    \"in|i=\",\n                    \"input cache {prefix}\",\n                    v => _inputPrefix = v\n                }\n            };\n\n            return new ConsoleAppBuilder(args, ops)\n                .UseVersionProvider(new VersionProvider())\n                .Parse()\n                .HasRequiredParameter(_inputPrefix, \"input cache prefix\", \"--in\")\n                .SkipBanner()\n                .ShowHelpMenu(\"Displays the cache header information.\", $\"{command} --in <cache prefix>\")\n                .ShowErrors()\n                .Execute(ProgramExecution);\n        }\n    }\n}\n"
  },
  {
    "path": "CacheUtils/Commands/ParseVepCacheDirectory/ParseVepCacheDirectoryMain.cs",
    "content": "﻿using CacheUtils.DataDumperImport.DataStructures.Mutable;\r\nusing CacheUtils.IntermediateIO;\r\nusing CommandLine.Builders;\r\nusing CommandLine.NDesk.Options;\r\nusing Compression.Utilities;\r\nusing ErrorHandling;\r\nusing System;\r\nusing System.Collections.Generic;\r\nusing CacheUtils.Commands.Download;\r\nusing CacheUtils.Genbank;\r\nusing Genome;\r\nusing IO;\r\nusing ReferenceSequence.IO;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.Providers;\r\n\r\nnamespace CacheUtils.Commands.ParseVepCacheDirectory\r\n{\r\n    public static class ParseVepCacheDirectoryMain\r\n    {\r\n        private static string _inputVepDirectory;\r\n        private static string _inputReferencePath;\r\n\r\n        private static string _outputStub;\r\n\r\n        private static string _vepReleaseDate;\r\n        private static string _genomeAssembly;\r\n        private static string _transcriptSource;\r\n        private static ushort _vepVersion;\r\n\r\n        private static ExitCodes ProgramExecution()\r\n        {\r\n            var transcriptSource = GetSource(_transcriptSource);\r\n            var sequenceReader   = new CompressedSequenceReader(FileUtilities.GetReadStream(_inputReferencePath));\r\n            var vepRootDirectory = new VepRootDirectory(sequenceReader.RefNameToChromosome);\r\n            var refIndexToVepDir = vepRootDirectory.GetRefIndexToVepDir(_inputVepDirectory);\r\n\r\n            var  genomeAssembly  = GenomeAssemblyHelper.Convert(_genomeAssembly);\r\n            long vepReleaseTicks = DateTime.Parse(_vepReleaseDate).Ticks;\r\n            var  idToGenbank     = GetIdToGenbank(genomeAssembly, transcriptSource);\r\n\r\n            // =========================\r\n            // create the pre-cache file\r\n            // =========================\r\n\r\n            // process each VEP directory\r\n            int numRefSeqs = sequenceReader.NumRefSeqs;            \r\n            var header     = new IntermediateIoHeader(_vepVersion, vepReleaseTicks, transcriptSource, genomeAssembly, numRefSeqs);\r\n\r\n            string siftPath       = _outputStub + \".sift.gz\";\r\n            string polyphenPath   = _outputStub + \".polyphen.gz\";\r\n            string transcriptPath = _outputStub + \".transcripts.gz\";\r\n            string regulatoryPath = _outputStub + \".regulatory.gz\";\r\n\r\n            using (var mergeLogger            = new TranscriptMergerLogger(FileUtilities.GetCreateStream(_outputStub + \".merge_transcripts.log\")))\r\n            using (var siftWriter             = new PredictionWriter(GZipUtilities.GetStreamWriter(siftPath), header, IntermediateIoCommon.FileType.Sift))\r\n            using (var polyphenWriter         = new PredictionWriter(GZipUtilities.GetStreamWriter(polyphenPath), header, IntermediateIoCommon.FileType.Polyphen))\r\n            using (var transcriptWriter       = new MutableTranscriptWriter(GZipUtilities.GetStreamWriter(transcriptPath), header))\r\n            using (var regulatoryRegionWriter = new RegulatoryRegionWriter(GZipUtilities.GetStreamWriter(regulatoryPath), header))\r\n            {\r\n                var converter           = new VepCacheParser(transcriptSource);\r\n                var emptyPredictionDict = new Dictionary<string, List<int>>();\r\n\r\n                for (ushort refIndex = 0; refIndex < numRefSeqs; refIndex++)\r\n                {\r\n                    var chromosome = sequenceReader.RefIndexToChromosome[refIndex];\r\n\r\n                    if (!refIndexToVepDir.TryGetValue(refIndex, out string vepSubDir))\r\n                    {\r\n                        siftWriter.Write(chromosome, emptyPredictionDict);\r\n                        polyphenWriter.Write(chromosome, emptyPredictionDict);\r\n                        continue;\r\n                    }\r\n\r\n                    Console.WriteLine(\"Parsing reference sequence [{0}]:\", chromosome.UcscName);\r\n\r\n                    var rawData                 = converter.ParseDumpDirectory(chromosome, vepSubDir);\r\n                    var mergedTranscripts       = TranscriptMerger.Merge(mergeLogger, rawData.Transcripts, idToGenbank);\r\n                    var mergedRegulatoryRegions = RegulatoryRegionMerger.Merge(rawData.RegulatoryRegions);\r\n\r\n                    int numRawTranscripts    = rawData.Transcripts.Count;\r\n                    int numMergedTranscripts = mergedTranscripts.Count;\r\n                    Console.WriteLine($\"- # merged transcripts: {numMergedTranscripts}, # total transcripts: {numRawTranscripts}\");\r\n\r\n                    WriteTranscripts(transcriptWriter, mergedTranscripts);\r\n                    WriteRegulatoryRegions(regulatoryRegionWriter, mergedRegulatoryRegions);\r\n                    WritePredictions(siftWriter, mergedTranscripts, x => x.SiftData, chromosome);\r\n                    WritePredictions(polyphenWriter, mergedTranscripts, x => x.PolyphenData, chromosome);\r\n                }\r\n            }\r\n\r\n            Console.WriteLine(\"\\n{0} directories processed.\", refIndexToVepDir.Count);\r\n\r\n            return ExitCodes.Success;\r\n        }\r\n\r\n        private static Dictionary<string, GenbankEntry> GetIdToGenbank(GenomeAssembly assembly, Source source)\r\n        {\r\n            if (assembly != GenomeAssembly.GRCh37 || source != Source.RefSeq) return null;\r\n\r\n            Logger.Write(\"- loading the intermediate Genbank file... \");\r\n\r\n            Dictionary<string, GenbankEntry> genbankDict;\r\n            using (var reader = new IntermediateIO.GenbankReader(GZipUtilities.GetAppropriateReadStream(ExternalFiles.GenbankFilePath)))\r\n            {\r\n                genbankDict = reader.GetIdToGenbank();\r\n            }\r\n\r\n            Logger.WriteLine($\"{genbankDict.Count} entries loaded.\");\r\n            return genbankDict;\r\n        }\r\n\r\n        private static void WriteRegulatoryRegions(RegulatoryRegionWriter writer, IEnumerable<IRegulatoryRegion> regulatoryRegions)\r\n        {\r\n            foreach (var regulatoryRegion in regulatoryRegions) writer.Write(regulatoryRegion);\r\n        }\r\n\r\n        private static void WriteTranscripts(MutableTranscriptWriter writer, IEnumerable<MutableTranscript> transcripts)\r\n        {\r\n            foreach (var transcript in transcripts) writer.Write(transcript);\r\n        }\r\n\r\n        private static void WritePredictions(PredictionWriter writer, IReadOnlyList<MutableTranscript> transcripts,\r\n            Func<MutableTranscript, string> predictionFunc, Chromosome chromosome)\r\n        {\r\n            var predictionDict = new Dictionary<string, List<int>>(StringComparer.Ordinal);\r\n\r\n            for (var transcriptIndex = 0; transcriptIndex < transcripts.Count; transcriptIndex++)\r\n            {\r\n                var transcript        = transcripts[transcriptIndex];\r\n                string predictionData = predictionFunc(transcript);\r\n                if (predictionData == null) continue;\r\n\r\n                if (predictionDict.TryGetValue(predictionData, out var transcriptIdList)) transcriptIdList.Add(transcriptIndex);\r\n                else predictionDict[predictionData] = new List<int> { transcriptIndex };\r\n            }\r\n\r\n            writer.Write(chromosome, predictionDict);\r\n        }\r\n\r\n        public static Source GetSource(string source)\r\n        {\r\n            source = source.ToLower();\r\n            if (source.StartsWith(\"ensembl\")) return Source.Ensembl;\r\n            if (source.StartsWith(\"refseq\")) return Source.RefSeq;\r\n            return source.StartsWith(\"both\") ? Source.BothRefSeqAndEnsembl : Source.None;\r\n        }\r\n\r\n        public static ExitCodes Run(string command, string[] args)\r\n        {\r\n            var ops = new OptionSet\r\n            {\r\n                {\r\n                    \"date=\",\r\n                    \"VEP release {date}\",\r\n                    v => _vepReleaseDate = v\r\n                },\r\n                {\r\n                    \"source|s=\",\r\n                    \"transcript {source}\",\r\n                    v => _transcriptSource = v\r\n                },\r\n                {\r\n                    \"ga=\",\r\n                    \"genome assembly {version}\",\r\n                    v => _genomeAssembly = v\r\n                },\r\n                {\r\n                    \"in|i=\",\r\n                    \"input VEP {directory}\",\r\n                    v => _inputVepDirectory = v\r\n                },\r\n                {\r\n                    \"out|o=\",\r\n                    \"output filename {stub}\",\r\n                    v => _outputStub = v\r\n                },\r\n                {\r\n                    \"ref|r=\",\r\n                    \"input reference {filename}\",\r\n                    v => _inputReferencePath = v\r\n                },\r\n                {\r\n                    \"vep=\",\r\n                    \"VEP {version}\",\r\n                    (ushort v) => _vepVersion = v\r\n                }\r\n            };\r\n\r\n            string commandLineExample = $\"{command} --in <VEP directory> --out <Nirvana pre-cache file> --vep <VEP version>\";\r\n\r\n            return new ConsoleAppBuilder(args, ops)\r\n                .UseVersionProvider(new VersionProvider())\r\n                .Parse()\r\n                .CheckDirectoryExists(_inputVepDirectory, \"VEP\", \"--in\")\r\n                .CheckInputFilenameExists(_inputReferencePath, \"compressed reference sequence\", \"--ref\")\r\n                .HasRequiredParameter(_outputStub, \"output stub\", \"--out\")\r\n                .HasRequiredParameter(_vepVersion, \"VEP version\", \"--vep\")\r\n                .HasRequiredParameter(_genomeAssembly, \"genome assembly\", \"--ga\")\r\n                .HasRequiredDate(_vepReleaseDate, \"VEP release date\", \"--date\")\r\n                .HasRequiredParameter(_transcriptSource, \"transcript source\", \"--source\")\r\n                .SkipBanner()\r\n                .ShowHelpMenu(\"Converts *deserialized* VEP cache files to a Nirvana pre-cache file.\", commandLineExample)\r\n                .ShowErrors()\r\n                .Execute(ProgramExecution);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Commands/ParseVepCacheDirectory/RegulatoryRegionMerger.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Linq;\r\nusing CacheUtils.TranscriptCache.Comparers;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\n\r\nnamespace CacheUtils.Commands.ParseVepCacheDirectory\r\n{\r\n    public static class RegulatoryRegionMerger\r\n    {\r\n        public static IEnumerable<IRegulatoryRegion> Merge(IEnumerable<IRegulatoryRegion> regulatoryRegions)\r\n        {\r\n            var regulatoryDict = new Dictionary<string, IRegulatoryRegion>();\r\n            var comparer       = new RegulatoryRegionComparer();\r\n\r\n            foreach (var currentRegion in regulatoryRegions)\r\n            {\r\n                if (currentRegion.Id.IsEmpty()) throw new InvalidOperationException(\"Found a regulatory region without an ID.\");\r\n\r\n                string regulatoryKey = $\"{currentRegion.Id}.{currentRegion.Start}.{currentRegion.End}\";\r\n\r\n                if (regulatoryDict.TryGetValue(regulatoryKey, out var previousRegion))\r\n                {\r\n                    MergeRegulatoryRegion(previousRegion, currentRegion, comparer);\r\n                }\r\n                else\r\n                {\r\n                    regulatoryDict[regulatoryKey] = currentRegion;\r\n                }\r\n            }\r\n\r\n            return regulatoryDict.Values.OrderBy(x => x.Chromosome.Index).ThenBy(x => x.Start).ThenBy(x => x.End)\r\n                .ToList();\r\n        }\r\n\r\n        private static void MergeRegulatoryRegion(IRegulatoryRegion previous, IRegulatoryRegion current,\r\n            RegulatoryRegionComparer comparer)\r\n        {\r\n            if (comparer.Equals(previous, current)) return;\r\n            throw new InvalidDataException(\"Found different regulatory regions\");\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Commands/ParseVepCacheDirectory/TranscriptFilter.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Linq;\r\nusing CacheUtils.DataDumperImport.DataStructures;\r\nusing CacheUtils.DataDumperImport.DataStructures.Mutable;\r\nusing CacheUtils.DataDumperImport.Utilities;\r\nusing CacheUtils.Genbank;\r\nusing CacheUtils.Genes.Utilities;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\n\r\nnamespace CacheUtils.Commands.ParseVepCacheDirectory\r\n{\r\n    public static class TranscriptFilter\r\n    {\r\n        private static readonly MutableTranscriptComparer Comparer = new MutableTranscriptComparer();\r\n\r\n        private static void Log(this TranscriptMergerLogger logger, string transcriptId, string description) =>\r\n            logger.WriteLine($\"{transcriptId}\\t{description}\");\r\n\r\n        public static List<MutableTranscript> PickSpecificTranscript(\r\n            this List<MutableTranscript> transcripts, TranscriptMergerLogger logger, string transcriptId)\r\n        {\r\n            if (transcripts.Count == 1) return transcripts;\r\n\r\n            List<MutableTranscript> filteredTranscripts;\r\n            string logMessage;\r\n\r\n            switch (transcriptId)\r\n            {\r\n                case \"NM_001005786\":\r\n                    filteredTranscripts = transcripts.Where(transcript => transcript.CdnaMaps[9].Start == 25419007).ToList();\r\n                    logMessage = $\"Filtered on exon 9 start: {transcriptId}\";\r\n                    break;\r\n                case \"NM_001278597\":\r\n                case \"NM_001278596\":\r\n                    filteredTranscripts = transcripts.Where(transcript => transcript.CdnaMaps.Length == 26).ToList();\r\n                    logMessage = $\"Filtered on exon count (26): {transcriptId}\";\r\n                    break;\r\n                case \"NM_016152\":\r\n                    filteredTranscripts = transcripts.Where(transcript => transcript.Exons[0].Phase == 0).ToList();\r\n                    logMessage = $\"Filtered on exon phase (0): {transcriptId}\";\r\n                    break;\r\n                default:\r\n                    return transcripts;\r\n            }\r\n\r\n            if (filteredTranscripts.Count == 0) return transcripts;\r\n            logger.Log(transcriptId, logMessage);\r\n\r\n            return filteredTranscripts.Unique();\r\n        }\r\n\r\n        public static List<MutableTranscript> InvestigateInconsistentCdnaMaps(this List<MutableTranscript> transcripts,\r\n            TranscriptMergerLogger logger, string transcriptId)\r\n        {\r\n            var index = 0;\r\n            foreach (var transcript in transcripts)\r\n            {\r\n                string onReverseStrand = transcript.Gene.OnReverseStrand ? \"R\" : \"F\";\r\n\r\n                if (transcript.Exons.Length != transcript.CdnaMaps.Length)\r\n                {\r\n                    logger.Log(transcriptId, $\"Found different exon & cDNA maps counts ({transcript.Exons.Length} vs {transcript.CdnaMaps.Length}) (index: {index}, {onReverseStrand})\");\r\n                }\r\n\r\n                if (transcript.Exons.Length == transcript.CdnaMaps.Length &&\r\n                    DiffExonsAndCdnaMaps(transcript.Exons, transcript.CdnaMaps))\r\n                {\r\n                    logger.Log(transcriptId, $\"Found different start/end coordinates between exons & cDNA maps. (index: {index}, {onReverseStrand})\");\r\n                }\r\n\r\n                index++;\r\n            }\r\n\r\n            return transcripts;\r\n        }\r\n\r\n        private static bool DiffExonsAndCdnaMaps(IReadOnlyList<MutableExon> exons,\r\n            IReadOnlyList<MutableTranscriptRegion> cdnaMaps)\r\n        {\r\n            int numExons = exons.Count;\r\n\r\n            for (var i = 0; i < numExons; i++)\r\n            {\r\n                var exon    = exons[i];\r\n                var cdnaMap = cdnaMaps[i];\r\n                if (exon.Start != cdnaMap.Start || exon.End != cdnaMap.End) return false;\r\n            }\r\n\r\n            return false;\r\n        }\r\n\r\n        public static List<MutableTranscript> ChooseEditedTranscripts(\r\n            this List<MutableTranscript> transcripts, TranscriptMergerLogger logger)\r\n        {\r\n            if (transcripts.Count == 1) return transcripts;\r\n\r\n            var filteredTranscripts = transcripts.Where(transcript => transcript.RnaEdits != null || transcript.BamEditStatus == \"ok\").ToList();\r\n            if (filteredTranscripts.Count == 0) return transcripts;\r\n\r\n            logger.Log(transcripts[0].Id, \"Filtered transcripts without RNA edits or BAM edit status\");\r\n            return filteredTranscripts.Unique();\r\n        }\r\n\r\n        public static List<MutableTranscript> RemoveFailedTranscripts(\r\n            this List<MutableTranscript> transcripts, TranscriptMergerLogger logger)\r\n        {\r\n            if (transcripts.Count == 1) return transcripts;\r\n\r\n            var filteredTranscripts = transcripts.Where(transcript => transcript.BamEditStatus != \"failed\").ToList();\r\n            if (filteredTranscripts.Count == 0) return transcripts;\r\n\r\n            logger.Log(transcripts[0].Id, \"Filtered transcripts with failed BAM status.\");\r\n            return filteredTranscripts.Unique();\r\n        }\r\n\r\n        public static List<MutableTranscript> RemoveTranscriptsWithLowestVersion(\r\n            this List<MutableTranscript> transcripts, TranscriptMergerLogger logger)\r\n        {\r\n            if (transcripts.Count == 1) return transcripts;\r\n\r\n            var versionToTranscript = transcripts.GetMultiValueDict(x => x.Version);\r\n            if (versionToTranscript.Count == 1) return transcripts;\r\n\r\n            byte maxVersion = versionToTranscript.Keys.Max();\r\n            transcripts.RemoveAll(x => x.Version != maxVersion);\r\n\r\n            logger.Log(transcripts[0].Id, \"Filtered transcripts with lower versions\");\r\n            return transcripts.Unique();\r\n        }\r\n\r\n        public static List<MutableTranscript> Unique(this IEnumerable<MutableTranscript> transcripts)\r\n        {\r\n            var set = new HashSet<MutableTranscript>(Comparer);\r\n            foreach (var transcript in transcripts) set.Add(transcript);\r\n            return set.ToList();\r\n        }\r\n\r\n        public static List<MutableTranscript> FixCodingRegionCdnaStart(this List<MutableTranscript> transcripts,\r\n            TranscriptMergerLogger logger, IReadOnlyDictionary<string, GenbankEntry> idToGenbankEntry, string transcriptId)\r\n        {\r\n            if (transcripts.Count == 1 || idToGenbankEntry == null || !idToGenbankEntry.TryGetValue(transcriptId, out var genbankEntry)) return transcripts;\r\n\r\n            var cdnaStartToTranscript = transcripts.GetMultiValueDict(x => x.CodingRegion.CdnaStart);\r\n            if (cdnaStartToTranscript.Count == 1) return transcripts;\r\n\r\n            if (!cdnaStartToTranscript.TryGetValue(genbankEntry.CodingRegion.Start, out var filteredTranscripts))\r\n                return transcripts;\r\n\r\n            logger.Log(transcripts[0].Id, \"Filtered transcripts by coding region cDNA start\");\r\n            return filteredTranscripts.Unique();\r\n        }\r\n\r\n        public static List<MutableTranscript> FixCodingRegionCdnaEnd(this List<MutableTranscript> transcripts,\r\n            TranscriptMergerLogger logger, IReadOnlyDictionary<string, GenbankEntry> idToGenbankEntry, string transcriptId)\r\n        {\r\n            if (transcripts.Count == 1 || idToGenbankEntry == null || !idToGenbankEntry.TryGetValue(transcriptId, out var genbankEntry)) return transcripts;\r\n\r\n            var cdnaEndToTranscript = transcripts.GetMultiValueDict(x => x.CodingRegion.CdnaEnd);\r\n            if (cdnaEndToTranscript.Count == 1) return transcripts;\r\n\r\n            if (!cdnaEndToTranscript.TryGetValue(genbankEntry.CodingRegion.End, out var filteredTranscripts))\r\n                return transcripts;\r\n\r\n            logger.Log(transcripts[0].Id, \"Filtered transcripts by coding region cDNA end\");\r\n            return filteredTranscripts.Unique();\r\n        }\r\n\r\n        public static List<MutableTranscript> FixGeneSymbolSource(this List<MutableTranscript> transcripts,\r\n            TranscriptMergerLogger logger)\r\n        {\r\n            if (transcripts.Count == 1) return transcripts;\r\n\r\n            var symbolSources = transcripts.GetSet(x => x.Gene.SymbolSource);\r\n            if (symbolSources.Count == 1) return transcripts;\r\n\r\n            if (symbolSources.Contains(GeneSymbolSource.Unknown)) symbolSources.Remove(GeneSymbolSource.Unknown);\r\n            if (symbolSources.Count != 1) throw new NotImplementedException(\"Cannot handle multiple gene symbol sources at this time\");\r\n\r\n            var targetSymbolSource = symbolSources.First();\r\n            foreach (var transcript in transcripts) transcript.Gene.SymbolSource = targetSymbolSource;\r\n            logger.Log(transcripts[0].Id, \"Normalized gene symbol source\");\r\n            return transcripts.Unique();\r\n        }\r\n\r\n        public static List<MutableTranscript> FixBioType(this List<MutableTranscript> transcripts, TranscriptMergerLogger logger)\r\n        {\r\n            if (transcripts.Count == 1) return transcripts;\r\n\r\n            var biotypes = transcripts.GetSet(x => x.BioType);\r\n            if (biotypes.Count != 2) return transcripts;\r\n\r\n            var biotype = GetDesiredBioType(biotypes);\r\n            if (biotype == BioType.other) return transcripts;\r\n\r\n            foreach (var transcript in transcripts) transcript.BioType = biotype;\r\n            logger.Log(transcripts[0].Id, \"Normalized biotype\");\r\n            return transcripts.Unique();\r\n        }\r\n\r\n        private static readonly BioType[] MiscRnaBioTypes =\r\n        {\r\n            BioType.antisense_RNA,\r\n            BioType.miRNA,\r\n            BioType.pseudogene,\r\n            BioType.lncRNA,\r\n            BioType.protein_coding,\r\n            BioType.rRNA,\r\n            BioType.SRP_RNA,\r\n            BioType.vaultRNA,\r\n            BioType.Y_RNA\r\n        };\r\n\r\n        private static readonly BioType[] LncRnaBioTypes =\r\n        {\r\n            BioType.antisense_RNA,\r\n            BioType.pseudogene\r\n        };\r\n\r\n        private static BioType GetDesiredBioType(ICollection<BioType> biotypes)\r\n        {\r\n            if (biotypes.Contains(BioType.misc_RNA))\r\n            {\r\n                foreach (var biotype in MiscRnaBioTypes)\r\n                    if (biotypes.Contains(biotype))\r\n                        return biotype;\r\n            }\r\n\r\n            if (biotypes.Contains(BioType.lncRNA) && LncRnaBioTypes.Any(biotypes.Contains)) return BioType.lncRNA;\r\n\r\n            if (biotypes.Contains(BioType.mRNA) && biotypes.Contains(BioType.protein_coding))\r\n                return BioType.protein_coding;\r\n\r\n            return BioType.other;\r\n        }\r\n\r\n        public static List<MutableTranscript> FixGeneId(this List<MutableTranscript> transcripts, TranscriptMergerLogger logger,\r\n            Dictionary<string, GenbankEntry> idToGenbankEntry, string transcriptId)\r\n        {\r\n            if (transcripts.Count == 1 || idToGenbankEntry == null || !idToGenbankEntry.TryGetValue(transcriptId, out var genbankEntry)) return transcripts;\r\n\r\n            var geneIds = transcripts.GetSet(x => x.Gene.GeneId);\r\n            if (geneIds.Count == 1) return transcripts;\r\n\r\n            if (!geneIds.Contains(genbankEntry.GeneId)) throw new InvalidDataException($\"Could not find the Genbank gene ID ({genbankEntry.GeneId}) within the transcripts.\");\r\n\r\n            foreach (var transcript in transcripts) transcript.Gene.GeneId = genbankEntry.GeneId;\r\n            logger.Log(transcripts[0].Id, \"Normalized gene ID\");\r\n            return transcripts.Unique();\r\n        }\r\n\r\n        public static List<MutableTranscript> UnsupervisedFixGeneId(this List<MutableTranscript> transcripts,\r\n            TranscriptMergerLogger logger)\r\n        {\r\n            if (transcripts.Count == 1) return transcripts;\r\n\r\n            var geneIds = transcripts.GetSet(x => x.Gene.GeneId).ToList();\r\n            if (geneIds.Count == 1) return transcripts;\r\n\r\n            string geneId = geneIds[0];\r\n            foreach (var transcript in transcripts) transcript.Gene.GeneId = geneId;\r\n            logger.Log(transcripts[0].Id, \"Normalized gene ID (unsupervised)\");\r\n            return transcripts.Unique();\r\n        }\r\n\r\n        public static List<MutableTranscript> FixGeneSymbols(this List<MutableTranscript> transcripts, TranscriptMergerLogger logger,\r\n            Dictionary<string, GenbankEntry> idToGenbankEntry, string transcriptId)\r\n        {\r\n            if (transcripts.Count == 1) return transcripts;\r\n\r\n            var symbols = transcripts.GetSet(x => x.Gene.Symbol);\r\n            if (symbols.Count == 1) return transcripts;\r\n            if (symbols.Contains(null)) symbols.Remove(null);\r\n\r\n            if (idToGenbankEntry == null || !idToGenbankEntry.TryGetValue(transcriptId, out var genbankEntry))\r\n                return transcripts.UnsupervisedFixGeneSymbols(logger, symbols.ToList());\r\n\r\n            if (!symbols.Contains(genbankEntry.Symbol)) return transcripts.UnsupervisedFixGeneSymbols(logger, symbols.ToList());\r\n\r\n            foreach (var transcript in transcripts) transcript.Gene.Symbol = genbankEntry.Symbol;\r\n            logger.Log(transcripts[0].Id, \"Normalized gene symbol\");\r\n            return transcripts.Unique();\r\n        }\r\n\r\n        public static List<MutableTranscript> FixCanonical(this List<MutableTranscript> transcripts, TranscriptMergerLogger logger)\r\n        {\r\n            if (transcripts.Count == 1) return transcripts;\r\n\r\n            var canonicals = transcripts.GetSet(x => x.IsCanonical);\r\n            if (canonicals.Count == 1) return transcripts;\r\n\r\n            foreach (var transcript in transcripts) transcript.IsCanonical = false;\r\n            logger.Log(transcripts[0].Id, \"Normalized canonical flag\");\r\n            return transcripts.Unique();\r\n        }\r\n\r\n        public static List<MutableTranscript> FixHgncId(this List<MutableTranscript> transcripts, TranscriptMergerLogger logger)\r\n        {\r\n            if (transcripts.Count == 1) return transcripts;\r\n\r\n            var hgncIds = transcripts.GetSet(x => x.Gene.HgncId);\r\n            if (hgncIds.Count == 1) return transcripts;\r\n\r\n            if (hgncIds.Contains(-1)) hgncIds.Remove(-1);\r\n            int hgncId = hgncIds.First();\r\n\r\n            foreach (var transcript in transcripts) transcript.Gene.HgncId = hgncId;\r\n            logger.Log(transcripts[0].Id, \"Normalized HGNC ID\");\r\n            return transcripts.Unique();\r\n        }\r\n\r\n        public static List<MutableTranscript> FixGeneStart(this List<MutableTranscript> transcripts, TranscriptMergerLogger logger)\r\n        {\r\n            if (transcripts.Count == 1) return transcripts;\r\n\r\n            var geneStarts = transcripts.GetSet(x => x.Gene.Start);\r\n            if (geneStarts.Count == 1) return transcripts;\r\n\r\n            var transcriptStarts = transcripts.GetSet(x => x.Start).ToArray();\r\n            if (transcriptStarts.Length > 1) return transcripts;\r\n\r\n            int closestStart = GetClosest(geneStarts, transcriptStarts[0]);\r\n            foreach (var transcript in transcripts) transcript.Gene.Start = closestStart;\r\n            logger.Log(transcripts[0].Id, \"Normalized gene start\");\r\n            return transcripts.Unique();\r\n        }\r\n\r\n        public static List<MutableTranscript> FixGeneEnd(this List<MutableTranscript> transcripts, TranscriptMergerLogger logger)\r\n        {\r\n            if (transcripts.Count == 1) return transcripts;\r\n\r\n            var geneEnds = transcripts.GetSet(x => x.Gene.End);\r\n            if (geneEnds.Count == 1) return transcripts;\r\n\r\n            var transcriptEnds = transcripts.GetSet(x => x.End).ToArray();\r\n            if (transcriptEnds.Length > 1) return transcripts;\r\n\r\n            int closestEnd = GetClosest(geneEnds, transcriptEnds[0]);\r\n            foreach (var transcript in transcripts) transcript.Gene.End = closestEnd;\r\n            logger.Log(transcripts[0].Id, \"Normalized gene end\");\r\n            return transcripts.Unique();\r\n        }\r\n\r\n        private static List<MutableTranscript> UnsupervisedFixGeneSymbols(this IReadOnlyList<MutableTranscript> transcripts,\r\n            TranscriptMergerLogger logger, List<string> symbols)\r\n        {\r\n            var nonLocGeneSymbols = symbols.FindAll(x => !string.IsNullOrEmpty(x) && !x.StartsWith(\"LOC\"));\r\n            string symbol = nonLocGeneSymbols.Count > 0 ? nonLocGeneSymbols[0] : symbols[0];\r\n\r\n            foreach (var transcript in transcripts) transcript.Gene.Symbol = symbol;\r\n            logger.Log(transcripts[0].Id, \"Normalized gene symbol (unsupervised)\");\r\n            return transcripts.Unique();\r\n        }\r\n\r\n        private static int GetClosest(IEnumerable<int> values, int targetValue)\r\n        {\r\n            int bestDelta = int.MaxValue;\r\n            int bestValue = -1;\r\n\r\n            foreach (int value in values)\r\n            {\r\n                int delta = Math.Abs(value - targetValue);\r\n                if (delta >= bestDelta) continue;\r\n\r\n                bestDelta = delta;\r\n                bestValue = value;\r\n            }\r\n\r\n            return bestValue;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Commands/ParseVepCacheDirectory/TranscriptIdFilter.cs",
    "content": "﻿using System.IO;\r\nusing System.Linq;\r\nusing CacheUtils.DataDumperImport.DataStructures.Mutable;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\n\r\nnamespace CacheUtils.Commands.ParseVepCacheDirectory\r\n{\r\n    public sealed class TranscriptIdFilter\r\n    {\r\n        private readonly string[] _whitelist;\r\n\r\n        public TranscriptIdFilter(Source source)\r\n        {\r\n            // ReSharper disable once SwitchStatementMissingSomeCases\r\n            switch (source)\r\n            {\r\n                case Source.Ensembl:\r\n                    _whitelist = new[] { \"ENSE0\", \"ENSG0\", \"ENSP0\", \"ENST0\" };\r\n                    break;\r\n                case Source.RefSeq:\r\n                    _whitelist = new[] { \"NG_\", \"NM_\", \"NP_\", \"NR_\", \"XM_\", \"XP_\", \"XR_\", \"YP_\" };\r\n                    break;\r\n                default:\r\n                    throw new InvalidDataException($\"Unhandled import mode found: {source}\");\r\n            }\r\n        }\r\n\r\n        public bool Pass(MutableTranscript transcript) => _whitelist.Any(prefix => transcript.Id.StartsWith(prefix)) && !transcript.Id.Contains(\"dupl\");\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Commands/ParseVepCacheDirectory/TranscriptMerger.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.Linq;\r\nusing CacheUtils.DataDumperImport.DataStructures.Mutable;\r\nusing CacheUtils.Genbank;\r\nusing CacheUtils.Genes.Utilities;\r\n\r\nnamespace CacheUtils.Commands.ParseVepCacheDirectory\r\n{\r\n    public static class TranscriptMerger\r\n    {\r\n        /// <summary>\r\n        /// separates the transcripts by ID and clusters the transcripts into overlapping\r\n        /// islands. From there we can resolve differences and return a unique transcript \r\n        /// for each cluster.\r\n        /// </summary>\r\n        public static List<MutableTranscript> Merge(TranscriptMergerLogger logger, IEnumerable<MutableTranscript> transcripts,\r\n            Dictionary<string, GenbankEntry> idToGenbankEntry)\r\n        {\r\n            var idToTranscripts   = transcripts.GetMultiValueDict(x => x.Id + \"|\" + x.Start + \"|\" + x.End);\r\n            var mergedTranscripts = idToTranscripts.Select(kvp => Merge(logger, kvp.Value, idToGenbankEntry)).ToList();\r\n            return mergedTranscripts.OrderBy(x => x.Start).ThenBy(x => x.End).ToList();\r\n        }\r\n\r\n        private static MutableTranscript Merge(TranscriptMergerLogger logger, IReadOnlyList<MutableTranscript> transcripts,\r\n            Dictionary<string, GenbankEntry> idToGenbankEntry)\r\n        {\r\n            string transcriptId = transcripts[0].Id;\r\n\r\n            if (transcripts.Count == 1)\r\n            {\r\n                transcripts.Unique().InvestigateInconsistentCdnaMaps(logger, transcriptId);\r\n                return transcripts[0];\r\n            }\r\n\r\n            var filteredTranscripts = transcripts\r\n                .Unique()\r\n                .InvestigateInconsistentCdnaMaps(logger, transcriptId)\r\n                .RemoveFailedTranscripts(logger)\r\n                .ChooseEditedTranscripts(logger)\r\n                .RemoveTranscriptsWithLowestVersion(logger)\r\n                .FixCodingRegionCdnaStart(logger, idToGenbankEntry, transcriptId)\r\n                .FixCodingRegionCdnaEnd(logger, idToGenbankEntry, transcriptId)\r\n                .FixGeneSymbolSource(logger)\r\n                .FixBioType(logger)\r\n                .FixGeneId(logger, idToGenbankEntry, transcriptId)\r\n                .FixCanonical(logger)\r\n                .FixHgncId(logger)\r\n                .FixGeneStart(logger)\r\n                .FixGeneEnd(logger)\r\n                .FixGeneSymbols(logger, idToGenbankEntry, transcriptId)\r\n                .UnsupervisedFixGeneId(logger)\r\n                .PickSpecificTranscript(logger, transcriptId);\r\n\r\n            if (filteredTranscripts.Count == 1) return filteredTranscripts[0];\r\n            throw new NotImplementedException($\"Could not merge down to one transcript: {filteredTranscripts.Count} transcripts ({transcriptId})\");\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Commands/ParseVepCacheDirectory/TranscriptMergerLogger.cs",
    "content": "﻿using System;\r\nusing System.IO;\r\n\r\nnamespace CacheUtils.Commands.ParseVepCacheDirectory\r\n{\r\n    public sealed class TranscriptMergerLogger : IDisposable\r\n    {\r\n        private readonly StreamWriter _writer;\r\n\r\n        public TranscriptMergerLogger(Stream stream) => _writer = new StreamWriter(stream);\r\n\r\n        public void WriteLine()         => _writer.WriteLine();\r\n        public void WriteLine(string s) => _writer.WriteLine(s);\r\n        public void Write(string s)     => _writer.Write(s);\r\n\r\n        public void SetBold()    {\r\n            // not used\r\n        }\r\n\r\n        public void ResetColor()\r\n        {\r\n            // not used\r\n        }\r\n\r\n        public void Dispose() => _writer.Dispose();\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Commands/ParseVepCacheDirectory/VepCacheParser.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Linq;\r\nusing CacheUtils.DataDumperImport.DataStructures.Import;\r\nusing CacheUtils.DataDumperImport.DataStructures.Mutable;\r\nusing CacheUtils.DataDumperImport.Import;\r\nusing CacheUtils.DataDumperImport.IO;\r\nusing Compression.Utilities;\r\nusing Genome;\r\nusing IO;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\n\r\nnamespace CacheUtils.Commands.ParseVepCacheDirectory\r\n{\r\n    public sealed class VepCacheParser\r\n    {\r\n        private readonly Source _source;\r\n        private readonly TranscriptIdFilter _filter;\r\n\r\n        public VepCacheParser(Source source)\r\n        {\r\n            _source = source;\r\n            _filter = new TranscriptIdFilter(source);\r\n        }\r\n\r\n        public (List<IRegulatoryRegion> RegulatoryRegions, List<MutableTranscript> Transcripts) ParseDumpDirectory(\r\n            Chromosome chromosome, string dirPath)\r\n        {\r\n            var regulatoryRegions = ParseRegulatoryFiles(chromosome, dirPath);\r\n            var transcripts       = ParseTranscriptFiles(chromosome, dirPath);\r\n            return (regulatoryRegions, transcripts);\r\n        }\r\n\r\n        private static List<IRegulatoryRegion> ParseRegulatoryFiles(Chromosome chromosome, string dirPath)\r\n        {\r\n            var regulatoryRegions = new List<IRegulatoryRegion>();\r\n            var files = FileUtilities.GetFileNamesInDir(dirPath, \"*_reg_regulatory_regions_data_dumper.txt.gz\")\r\n                    .ToArray();\r\n\r\n            foreach (string dumpPath in VepRootDirectory.GetSortedFiles(files))\r\n            {\r\n                ParseRegulatoryDumpFile(chromosome, dumpPath, regulatoryRegions);\r\n            }\r\n\r\n            return regulatoryRegions;\r\n        }\r\n\r\n        private List<MutableTranscript> ParseTranscriptFiles(Chromosome chromosome, string dirPath)\r\n        {\r\n            var transcripts = new List<MutableTranscript>();\r\n            var files = FileUtilities.GetFileNamesInDir(dirPath, \"*_transcripts_data_dumper.txt.gz\").ToArray();\r\n\r\n            foreach (string dumpPath in VepRootDirectory.GetSortedFiles(files))\r\n            {\r\n                ParseTranscriptDumpFile(chromosome, dumpPath, transcripts);\r\n            }\r\n\r\n            return transcripts;\r\n        }\r\n\r\n        private static void ParseRegulatoryDumpFile(Chromosome chromosome, string filePath,\r\n            ICollection<IRegulatoryRegion> regulatoryRegions)\r\n        {\r\n            Console.WriteLine(\"- processing {0}\", Path.GetFileName(filePath));\r\n\r\n            using (var reader = new DataDumperReader(GZipUtilities.GetAppropriateReadStream(filePath)))\r\n            {\r\n                foreach (var ad in reader.GetRootNode().Value.Values)\r\n                {\r\n                    if (!(ad is ObjectKeyValueNode objectKeyValue)) continue;\r\n\r\n                    foreach (var featureGroup in objectKeyValue.Value.Values)\r\n                    {\r\n                        switch (featureGroup.Key)\r\n                        {\r\n                            case \"MotifFeature\":\r\n                                // not used\r\n                                break;\r\n                            case \"RegulatoryFeature\":\r\n                                ParseRegulatoryRegions(chromosome, featureGroup, regulatoryRegions);\r\n                                break;\r\n                            default:\r\n                                throw new InvalidDataException(\"Found an unexpected feature group (\" + featureGroup.Key + \") in the regulatory regions file.\");\r\n                        }\r\n                    }\r\n                }\r\n            }\r\n        }\r\n\r\n        private void ParseTranscriptDumpFile(Chromosome chromosome, string filePath,\r\n            ICollection<MutableTranscript> transcripts)\r\n        {\r\n            Console.WriteLine(\"- processing {0}\", Path.GetFileName(filePath));\r\n\r\n            using (var reader = new DataDumperReader(GZipUtilities.GetAppropriateReadStream(filePath)))\r\n            {\r\n                foreach (var node in reader.GetRootNode().Value.Values)\r\n                {\r\n                    if (!(node is ListObjectKeyValueNode transcriptNodes)) continue;\r\n\r\n                    foreach (var tNode in transcriptNodes.Values)\r\n                    {\r\n                        if (!(tNode is ObjectValueNode transcriptNode)) throw new InvalidOperationException(\"Expected a transcript object value node, but the current node is not an object value.\");\r\n                        if (transcriptNode.Type != \"Bio::EnsEMBL::Transcript\") throw new InvalidOperationException($\"Expected a transcript node, but the current data type is: [{transcriptNode.Type}]\");\r\n\r\n                        var transcript = ImportTranscript.Parse(transcriptNode, chromosome, _source);\r\n                        if (_filter.Pass(transcript)) transcripts.Add(transcript);\r\n                    }\r\n                }\r\n            }\r\n        }\r\n\r\n        private static void ParseRegulatoryRegions(Chromosome chromosome, IImportNode featureGroupNode,\r\n            ICollection<IRegulatoryRegion> regulatoryRegions)\r\n        {\r\n            if (!(featureGroupNode is ListObjectKeyValueNode regulatoryFeatureNodes)) return;\r\n\r\n            foreach (var node in regulatoryFeatureNodes.Values)\r\n            {\r\n                if (!(node is ObjectValueNode regulatoryFeatureNode))                         throw new InvalidOperationException(\"Expected a regulatory region object value node, but the current node is not an object value.\");\r\n                if (regulatoryFeatureNode.Type != \"Bio::EnsEMBL::Funcgen::RegulatoryFeature\") throw new InvalidOperationException($\"Expected a regulatory region node, but the current data type is: [{regulatoryFeatureNode.Type}]\");\r\n\r\n                var regulatoryRegion = ImportRegulatoryFeature.Parse(regulatoryFeatureNode, chromosome);\r\n                regulatoryRegions.Add(regulatoryRegion);\r\n            }\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Commands/ParseVepCacheDirectory/VepRootDirectory.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Linq;\r\nusing Genome;\r\n\r\nnamespace CacheUtils.Commands.ParseVepCacheDirectory\r\n{\r\n    public sealed class VepRootDirectory\r\n    {\r\n        private readonly Dictionary<string, Chromosome> _refNameToChromosome;\r\n\r\n        public VepRootDirectory(Dictionary<string, Chromosome> refNameToChromosome)\r\n        {\r\n            _refNameToChromosome = refNameToChromosome;\r\n        }\r\n\r\n        public Dictionary<ushort, string> GetRefIndexToVepDir(string dirPath)\r\n        {\r\n            var vepDirectories = Directory.GetDirectories(dirPath);\r\n            var referenceDict  = new Dictionary<ushort, string>();\r\n\r\n            foreach (string dir in vepDirectories)\r\n            {\r\n                string referenceName = Path.GetFileName(dir);\r\n                var chromosome    = ReferenceNameUtilities.GetChromosome(_refNameToChromosome, referenceName);\r\n                if (chromosome.IsEmpty()) continue;\r\n                \r\n                referenceDict[chromosome.Index] = dir;\r\n            }\r\n\r\n            return referenceDict;\r\n        }\r\n\r\n        public static IEnumerable<string> GetSortedFiles(IEnumerable<string> filePaths)\r\n        {\r\n            var sortedPaths = new SortedDictionary<int, string>();\r\n\r\n            foreach (string filePath in filePaths)\r\n            {\r\n                string fileName = Path.GetFileName(filePath);\r\n                if (fileName == null) continue;\r\n\r\n                int hyphenPos = fileName.IndexOf(\"-\", StringComparison.Ordinal);\r\n                if (hyphenPos == -1) throw new InvalidDataException($\"Could not find the hyphen in: [{fileName}]\");\r\n\r\n                int position = int.Parse(fileName.Substring(0, hyphenPos));\r\n                sortedPaths[position] = filePath;\r\n            }\r\n\r\n            return sortedPaths.Values.ToArray();\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Commands/RegulatoryGFF/CreateRegulatoryGffMain.cs",
    "content": "﻿using System;\nusing System.IO;\nusing CacheUtils.Helpers;\nusing CommandLine.Builders;\nusing CommandLine.NDesk.Options;\nusing Compression.Utilities;\nusing ErrorHandling;\nusing IO;\nusing ReferenceSequence.Utilities;\nusing VariantAnnotation.Interface.AnnotatedPositions;\nusing VariantAnnotation.Providers;\n\nnamespace CacheUtils.Commands.RegulatoryGFF\n{\n    public static class CreateRegulatoryGffMain\n    {\n        private static string _referencePath;\n        private static string _inputPrefix;\n        private static string _outputFileName;\n\n        private static ExitCodes ProgramExecution()\n        {\n            using (var writer = GZipUtilities.GetStreamWriter(_outputFileName))\n            {\n                string cachePath    = CacheConstants.TranscriptPath(_inputPrefix);\n                var    sequenceData = SequenceHelper.GetDictionaries(_referencePath);\n\n                // load the cache\n                Console.Write(\"- reading {0}... \", Path.GetFileName(cachePath));\n                var cache = TranscriptCacheHelper.GetCache(cachePath, sequenceData.refIndexToChromosome);\n                Console.WriteLine(\"found {0:N0} reference sequences. \", cache.RegulatoryRegionIntervalArrays.Length);\n\n                Console.Write(\"- writing GFF entries... \");\n                foreach (var intervalArray in cache.RegulatoryRegionIntervalArrays)\n                {\n                    if (intervalArray == null) continue;\n                    foreach (var interval in intervalArray.Array) WriteRegulatoryFeature(writer, interval.Value);\n                }                \n                Console.WriteLine(\"finished.\");\n            }\n\n            return ExitCodes.Success;\n        }\n\n        public static ExitCodes Run(string command, string[] args)\n        {\n            var ops = new OptionSet\n            {\n                {\n                    \"in|i=\",\n                    \"input cache {prefix}\",\n                    v => _inputPrefix = v\n                },\n                {\n                    \"out|o=\",\n                    \"output {file name}\",\n                    v => _outputFileName = v\n                },\n                {\n                    \"ref|r=\",\n                    \"reference {file}\",\n                    v => _referencePath = v\n                }\n            };\n\n            string commandLineExample = $\"{command} --in <cache prefix> --out <GFF path>\";\n\n            return new ConsoleAppBuilder(args, ops)\n                .UseVersionProvider(new VersionProvider())\n                .Parse()\n                .HasRequiredParameter(_inputPrefix, \"input cache prefix\", \"--in\")\n                .CheckOutputFilenameSuffix(_outputFileName, \".gz\", \"GFF\")\n                .SkipBanner()\n                .ShowHelpMenu(\"Outputs regulatory regions in a database.\", commandLineExample)\n                .ShowErrors()\n                .Execute(ProgramExecution);\n        }\n\n        private static void WriteRegulatoryFeature(TextWriter writer, IRegulatoryRegion regulatoryRegion)\n        {\n            writer.Write($\"{regulatoryRegion.Chromosome.UcscName}\\t.\\tregulatory feature\\t{regulatoryRegion.Start}\\t{regulatoryRegion.End}\\t.\\t.\\t.\\t\");\n            WriteGeneralAttributes(writer, regulatoryRegion);\n            writer.WriteLine();\n        }\n\n        private static void WriteGeneralAttributes(TextWriter writer, IRegulatoryRegion regulatoryRegion)\n        {\n            if (!regulatoryRegion.Id.IsEmpty()) writer.Write($\"regulatory_feature_id \\\"{regulatoryRegion.Id}\\\"; \");\n            writer.Write($\"regulatory_feature_type \\\"{regulatoryRegion.Type}\\\"; \");\n        }\n    }\n}\n"
  },
  {
    "path": "CacheUtils/Commands/UniversalGeneArchive/FilePaths.cs",
    "content": "﻿namespace CacheUtils.Commands.UniversalGeneArchive\r\n{\r\n    // ReSharper disable UnusedAutoPropertyAccessor.Global\r\n    public sealed class FilePaths\r\n    {        \r\n        public AssemblySpecificPaths GRCh37 { get; set; }\r\n        public AssemblySpecificPaths GRCh38 { get; set; }\r\n\r\n        // ReSharper disable once ClassNeverInstantiated.Global\r\n        public class AssemblySpecificPaths\r\n        {\r\n            public string ReferencePath { get; set; }\r\n            public string EnsemblCachePath { get; set; }\r\n            public string RefSeqCachePath { get; set; }\r\n        }\r\n    }\r\n    // ReSharper restore UnusedAutoPropertyAccessor.Global\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Commands/UniversalGeneArchive/UniversalGeneArchiveMain.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing System.IO.Compression;\r\nusing CacheUtils.Commands.Download;\r\nusing CacheUtils.Genes;\r\nusing CacheUtils.Genes.DataStores;\r\nusing CacheUtils.Genes.DataStructures;\r\nusing CacheUtils.Genes.IO;\r\nusing CommandLine.Builders;\r\nusing CommandLine.NDesk.Options;\r\nusing CommandLine.Utilities;\r\nusing Compression.FileHandling;\r\nusing ErrorHandling;\r\nusing IO;\r\nusing Microsoft.Extensions.Configuration;\r\nusing ReferenceSequence.Utilities;\r\nusing VariantAnnotation.Providers;\r\n\r\nnamespace CacheUtils.Commands.UniversalGeneArchive\r\n{\r\n    public static class UniversalGeneArchiveMain\r\n    {\r\n        private static string _referencesPath;\r\n        private static string _intermediateCachePath;\r\n\r\n        private static ExitCodes ProgramExecution()\r\n        {\r\n            if (UniversalGeneArchiveCurrent())\r\n            {\r\n                Logger.WriteLine(\"- universal gene archive is already up-to-date.\");\r\n                return ExitCodes.Success;\r\n            }\r\n\r\n            const string jsonPath = \"CacheUtils.dll.gene.json\";\r\n            var filePaths = GetFilePaths(jsonPath);\r\n            \r\n            var ds = LoadDataStores(filePaths);\r\n\r\n            var grch37GenesByRef = ds.Assembly37.UpdateHgncIds(ds.Hgnc).MergeByHgnc(true);            \r\n            var grch38GenesByRef = ds.Assembly38.UpdateHgncIds(ds.Hgnc).MergeByHgnc(false);\r\n\r\n            var universalGenes = CombineGenomeAssemblies(grch37GenesByRef, grch38GenesByRef).UpdateGeneSymbols(\r\n                ds.Hgnc.HgncIdToSymbol, ds.GeneInfoData.EntrezGeneIdToSymbol,\r\n                ds.Assembly38.EnsemblGtf.EnsemblIdToSymbol, ds.Assembly37.RefSeqGff.EntrezGeneIdToSymbol);\r\n\r\n            WriteGenes(universalGenes);\r\n            \r\n            return ExitCodes.Success;\r\n        }\r\n\r\n        private static bool UniversalGeneArchiveCurrent()\r\n        {\r\n            var fileInfo = new FileInfo(ExternalFiles.UniversalGeneFilePath);\r\n            return fileInfo.Exists && ExternalFiles.GetElapsedDays(fileInfo.CreationTime) < 1.0;\r\n        }\r\n\r\n        private static (GeneInfoData GeneInfoData, AssemblyDataStore Assembly37, AssemblyDataStore Assembly38, Hgnc Hgnc)\r\n            LoadDataStores(FilePaths filePaths)\r\n        {\r\n            Logger.Write(\"- loading datastores... \");\r\n            var loadBenchmark = new Benchmark();\r\n\r\n            var (_, refNameToChromosome, _) = SequenceHelper.GetDictionaries(filePaths.GRCh38.ReferencePath);\r\n\r\n            var geneInfoData = GeneInfoData.Create(ExternalFiles.GeneInfoFile.FilePath);\r\n            var dataStore37  = AssemblyDataStore.Create(\"GRCh37\", filePaths.GRCh37, refNameToChromosome, true);\r\n            var dataStore38  = AssemblyDataStore.Create(\"GRCh38\", filePaths.GRCh38, refNameToChromosome, false);\r\n            var hgnc         = Hgnc.Create(ExternalFiles.HgncFile.FilePath, refNameToChromosome);\r\n\r\n            Logger.WriteLine($\"{Benchmark.ToHumanReadable(loadBenchmark.GetElapsedTime())}\");\r\n\r\n            return (geneInfoData, dataStore37, dataStore38, hgnc);\r\n        }\r\n\r\n        private static UgaGene[] CombineGenomeAssemblies(Dictionary<ushort, List<UgaGene>> genesByRef37, Dictionary<ushort, List<UgaGene>> genesByRef38)\r\n        {\r\n            Logger.WriteLine(\"\\n*** Global ***\");\r\n            Logger.Write(\"- combining genes from GRCh37 and GRCh38... \");\r\n            var combinedGenes = UgaAssemblyCombiner.Combine(genesByRef37, genesByRef38);\r\n            Logger.WriteLine($\"{combinedGenes.Length} genes.\");\r\n\r\n            return combinedGenes;\r\n        }\r\n\r\n        private static UgaGene[] UpdateGeneSymbols(this UgaGene[] genes, Dictionary<int, string> hgncIdToSymbol, Dictionary<string, string> entrezGeneIdToSymbol,\r\n            Dictionary<string, string> ensemblIdToSymbol, Dictionary<string, string> refseqGeneIdToSymbol)\r\n        {\r\n            var updater = new GeneSymbolUpdater(hgncIdToSymbol, entrezGeneIdToSymbol, ensemblIdToSymbol, refseqGeneIdToSymbol);\r\n            updater.Update(genes);\r\n            return genes;\r\n        }\r\n\r\n        private static void WriteGenes(UgaGene[] genes)\r\n        {\r\n            Logger.Write($\"- writing genes to {Path.GetFileName(ExternalFiles.UniversalGeneFilePath)}... \");\r\n\r\n            using (var stream = new BlockGZipStream(FileUtilities.GetCreateStream(ExternalFiles.UniversalGeneFilePath), CompressionMode.Compress))\r\n            using (var writer = new UgaGeneWriter(stream))\r\n            {\r\n                writer.Write(genes);\r\n            }\r\n\r\n            Logger.WriteLine(\"finished\");\r\n        }\r\n\r\n        private static FilePaths GetFilePaths(string jsonPath)\r\n        {\r\n            var builder = new ConfigurationBuilder();\r\n            builder.AddJsonFile(jsonPath);\r\n\r\n            var configuration = builder.Build();\r\n\r\n            var filePaths = new FilePaths();\r\n            configuration.Bind(filePaths);\r\n\r\n            UpdatePaths(filePaths.GRCh37);\r\n            UpdatePaths(filePaths.GRCh38);\r\n\r\n            CheckPaths(filePaths.GRCh37);\r\n            CheckPaths(filePaths.GRCh38);\r\n\r\n            return filePaths;\r\n        }\r\n\r\n        private static void UpdatePaths(FilePaths.AssemblySpecificPaths paths)\r\n        {\r\n            paths.EnsemblCachePath = Path.Combine(_intermediateCachePath, paths.EnsemblCachePath);\r\n            paths.RefSeqCachePath  = Path.Combine(_intermediateCachePath, paths.RefSeqCachePath);\r\n            paths.ReferencePath    = Path.Combine(_referencesPath, paths.ReferencePath);\r\n        }\r\n\r\n        private static void CheckPath(string filePath, string description)\r\n        {\r\n            if (string.IsNullOrEmpty(filePath)) throw new InvalidDataException($\"No value was found for the {description} key.\");\r\n            if (!File.Exists(filePath)) throw new FileNotFoundException($\"Unable to find the following file: {filePath}\");\r\n        }\r\n\r\n        private static void CheckPaths(FilePaths.AssemblySpecificPaths paths)\r\n        {\r\n            CheckPath(paths.EnsemblCachePath, \"Ensembl intermediate cache\");\r\n            CheckPath(paths.RefSeqCachePath,  \"RefSeq intermediate cache\");\r\n            CheckPath(paths.ReferencePath,    \"reference\");\r\n        }\r\n\r\n        public static ExitCodes Run(string command, string[] args)\r\n        {\r\n            var ops = new OptionSet\r\n            {\r\n                {\r\n                    \"icache|i=\",\r\n                    \"intermediate cache {dir}\",\r\n                    v => _intermediateCachePath = v\r\n                },\r\n                {\r\n                    \"ref|r=\",\r\n                    \"reference {dir}\",\r\n                    v => _referencesPath = v\r\n                }\r\n            };\r\n\r\n            string commandLineExample = $\"{command} -i <intermediate cache dir> -r <reference dir>\";\r\n\r\n            return new ConsoleAppBuilder(args, ops)\r\n                .UseVersionProvider(new VersionProvider())\r\n                .Parse()\r\n                .CheckDirectoryExists(_intermediateCachePath, \"intermediate cache\", \"--icache\")\r\n                .CheckDirectoryExists(_referencesPath, \"reference\", \"--ref\")\r\n                .SkipBanner()\r\n                .ShowHelpMenu(\"Creates the universal gene archive\", commandLineExample)\r\n                .ShowErrors()\r\n                .Execute(ProgramExecution);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/DataDumperImport/DataStructures/GenomeSymbolSource.cs",
    "content": "﻿namespace CacheUtils.DataDumperImport.DataStructures\r\n{\r\n    public enum GeneSymbolSource : byte\r\n    {\r\n        // ReSharper disable InconsistentNaming\r\n        Unknown,\r\n        CloneBasedEnsemblGene,\r\n        CloneBasedVegaGene,\r\n        EntrezGene,\r\n        HGNC,\r\n        LRG,\r\n        NCBI,\r\n        miRBase,\r\n        RFAM,\r\n        UniProtGeneName\r\n        // ReSharper restore InconsistentNaming\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/DataDumperImport/DataStructures/Import/IImportNode.cs",
    "content": "﻿namespace CacheUtils.DataDumperImport.DataStructures.Import\r\n{\r\n    public interface IImportNode\r\n    {\r\n        string Key { get; }\r\n    }\r\n\r\n    public interface IListMember : IImportNode { }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/DataDumperImport/DataStructures/Import/ImportNodeExtensions.cs",
    "content": "﻿using System.IO;\n\nnamespace CacheUtils.DataDumperImport.DataStructures.Import\n{\n    public static class ImportNodeExtensions\n    {\n        public static int GetInt32(this IImportNode node)\n        {\n            string s = GetString(node);\n            if (s == null) return -1;\n\n            if (!int.TryParse(s, out int ret))\n            {\n                throw new InvalidDataException($\"Unable to convert the string ({s}) to an integer.\");\n            }\n\n            return ret;\n        }\n\n        public static bool GetBool(this IImportNode node)\n        {\n            int num = GetInt32(node);\n            return num == 1;\n        }\n\n        public static string GetString(this IImportNode node)\n        {\n            if (!(node is StringKeyValueNode stringKeyValue))\n            {\n                throw new InvalidDataException($\"Unable to convert the AbstractData type to a StringKeyValue type: [{node.Key}]\");\n            }\n\n            string s = stringKeyValue.Value;\n            if (s == \"\" || s == \"-\") s = null;\n            return s;\n        }\n\n        public static bool IsUndefined(this IImportNode node)\n        {\n            if (!(node is StringKeyValueNode stringKeyValue)) return false;\n            return stringKeyValue.Value == null;\n        }\n    }\n}\n"
  },
  {
    "path": "CacheUtils/DataDumperImport/DataStructures/Import/ListObjectKeyValueNode.cs",
    "content": "﻿using System.Collections.Generic;\r\n\r\nnamespace CacheUtils.DataDumperImport.DataStructures.Import\r\n{\r\n    public sealed class ListObjectKeyValueNode : IImportNode\r\n    {\r\n        public string Key { get; }\r\n        public List<IListMember> Values { get; } = new List<IListMember>();\r\n\r\n        public ListObjectKeyValueNode(string key) => Key = key;\r\n        public void Add(IListMember node) => Values.Add(node);\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/DataDumperImport/DataStructures/Import/ObjectKeyValueNode.cs",
    "content": "﻿namespace CacheUtils.DataDumperImport.DataStructures.Import\r\n{\r\n    public sealed class ObjectKeyValueNode : IImportNode\r\n    {\r\n        public string Key { get; }\r\n        public ObjectValueNode Value { get; }\r\n\r\n        public ObjectKeyValueNode(string key, ObjectValueNode value)\r\n        {\r\n            Key   = key;\r\n            Value = value;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/DataDumperImport/DataStructures/Import/ObjectValueNode.cs",
    "content": "﻿using System.Collections.Generic;\r\n\r\nnamespace CacheUtils.DataDumperImport.DataStructures.Import\r\n{\r\n    public sealed class ObjectValueNode : IListMember\r\n    {\r\n        public string Type { get; }\r\n        public string Key { get; }\r\n        public List<IImportNode> Values { get; }\r\n\r\n        internal ObjectValueNode(string type, List<IImportNode> values)\r\n        {\r\n            Key    = null;\r\n            Type   = type;\r\n            Values = values;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/DataDumperImport/DataStructures/Import/StringKeyValueNode.cs",
    "content": "﻿namespace CacheUtils.DataDumperImport.DataStructures.Import\r\n{\r\n    public sealed class StringKeyValueNode : IImportNode\r\n    {\r\n        public string Key { get; }\r\n        public string Value { get; }\r\n\r\n        public StringKeyValueNode(string key, string value)\r\n        {\r\n            Key   = key;\r\n            Value = value;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/DataDumperImport/DataStructures/Import/StringValueNode.cs",
    "content": "﻿namespace CacheUtils.DataDumperImport.DataStructures.Import\r\n{\r\n    public sealed class StringValueNode : IListMember\r\n    {\r\n        public string Key { get; }\r\n        public StringValueNode(string key) => Key = key;\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/DataDumperImport/DataStructures/Mutable/MutableExon.cs",
    "content": "﻿using System;\r\nusing Genome;\r\nusing Intervals;\r\n\r\nnamespace CacheUtils.DataDumperImport.DataStructures.Mutable\r\n{\r\n    public sealed class MutableExon : IEquatable<MutableExon>, IInterval\r\n    {\r\n        private readonly Chromosome _chromosome;\r\n        public readonly int Phase;\r\n        public int Start { get; }\r\n        public int End { get; }\r\n\r\n        public MutableExon(Chromosome chromosome, int start, int end, int phase)\r\n        {\r\n            _chromosome = chromosome;\r\n            Start       = start;\r\n            End         = end;\r\n            Phase       = phase;\r\n        }\r\n\r\n        public bool Equals(MutableExon other)\r\n        {\r\n            if (ReferenceEquals(null, other)) return false;\r\n            if (ReferenceEquals(this, other)) return true;\r\n            return _chromosome.Index == other._chromosome.Index && Start == other.Start && End == other.End &&\r\n                   Phase == other.Phase;\r\n        }\r\n\r\n        public override int GetHashCode()\r\n        {\r\n            unchecked\r\n            {\r\n                int hashCode = _chromosome.Index.GetHashCode();\r\n                hashCode = (hashCode * 397) ^ Start;\r\n                hashCode = (hashCode * 397) ^ End;\r\n                hashCode = (hashCode * 397) ^ Phase.GetHashCode();\r\n                return hashCode;\r\n            }\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/DataDumperImport/DataStructures/Mutable/MutableGene.cs",
    "content": "﻿using System;\r\nusing CacheUtils.Genes.DataStructures;\r\nusing Genome;\r\nusing Intervals;\r\n\r\nnamespace CacheUtils.DataDumperImport.DataStructures.Mutable\r\n{\r\n    public sealed class MutableGene : IEquatable<MutableGene>, IFlatGene<MutableGene>\r\n    {\r\n        public Chromosome Chromosome { get; set; }\r\n        public int Start { get; set; }\r\n        public int End { get; set; }\r\n        public bool OnReverseStrand { get; }\r\n        public string GeneId { get; set; }\r\n        public string Symbol { get; set; }\r\n        public int HgncId { get; set; }\r\n        public GeneSymbolSource SymbolSource { get; set; }\r\n\r\n        public MutableGene(Chromosome chromosome, int start, int end, bool onReverseStrand, string symbol,\r\n            GeneSymbolSource symbolSource, string geneId, int hgncId)\r\n        {\r\n            Chromosome      = chromosome;\r\n            Start           = start;\r\n            End             = end;\r\n            OnReverseStrand = onReverseStrand;\r\n            Symbol          = symbol;\r\n            SymbolSource    = symbolSource;\r\n            GeneId          = geneId;\r\n            HgncId          = hgncId;\r\n        }\r\n\r\n        public override string ToString()\r\n        {\r\n            string strand = OnReverseStrand ? \"R\" : \"F\";\r\n            return $\"{GeneId}: {Chromosome.UcscName} {Start}-{End} {strand} symbol: {Symbol} ({SymbolSource}), HGNC ID: {HgncId}\";\r\n        }\r\n\r\n        public bool Equals(MutableGene other)\r\n        {\r\n            if (ReferenceEquals(null, other)) return false;\r\n            if (ReferenceEquals(this, other)) return true;\r\n\r\n            return Chromosome.Index == other.Chromosome.Index &&\r\n                   Start            == other.Start            &&\r\n                   End              == other.End              &&\r\n                   OnReverseStrand  == other.OnReverseStrand  &&\r\n                   Symbol           == other.Symbol           &&\r\n                   GeneId           == other.GeneId;\r\n        }\r\n\r\n        public override int GetHashCode()\r\n        {\r\n            unchecked\r\n            {\r\n                // ReSharper disable NonReadonlyMemberInGetHashCode\r\n                int hashCode = Chromosome.Index.GetHashCode();\r\n                hashCode = (hashCode * 397) ^ Start;\r\n                hashCode = (hashCode * 397) ^ End;\r\n                hashCode = (hashCode * 397) ^ OnReverseStrand.GetHashCode();\r\n                hashCode = (hashCode * 397) ^ Symbol.GetHashCode();\r\n                hashCode = (hashCode * 397) ^ GeneId.GetHashCode();\r\n                // ReSharper restore NonReadonlyMemberInGetHashCode\r\n                return hashCode;\r\n            }\r\n        }\r\n\r\n        public MutableGene Clone() => new MutableGene(Chromosome, Start, End, OnReverseStrand, Symbol, SymbolSource,\r\n            GeneId, HgncId);\r\n\r\n        public UgaGene ToUgaGene(bool isGrch37)\r\n        {\r\n            (string ensemblGeneId, string entrezGeneId) = GeneId.StartsWith(\"ENSG\") ? (GeneId, null as string) : (null as string, GeneId);\r\n\r\n            IInterval interval = new Interval(Start, End);\r\n            (IInterval grch37, IInterval grch38) = isGrch37 ? (interval, null as IInterval) : (null as IInterval, interval);\r\n\r\n            return new UgaGene(Chromosome, grch37, grch38, OnReverseStrand, entrezGeneId, ensemblGeneId, Symbol,\r\n                HgncId);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/DataDumperImport/DataStructures/Mutable/MutableTranscript.cs",
    "content": "﻿using System;\r\nusing Genome;\r\nusing Intervals;\r\nusing VariantAnnotation.Caches.DataStructures;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\n\r\nnamespace CacheUtils.DataDumperImport.DataStructures.Mutable\r\n{\r\n    public sealed class MutableTranscript : IEquatable<MutableTranscript>\r\n    {\r\n        public readonly Chromosome Chromosome;\r\n        public readonly int Start;\r\n        public readonly int End;\r\n        public readonly string Id;\r\n        public readonly byte Version;\r\n        public readonly string CcdsId;\r\n        public readonly string RefSeqId;\r\n        public readonly Source Source;\r\n        public readonly MutableGene Gene;\r\n        public readonly IInterval[] MicroRnas;\r\n        public readonly bool CdsStartNotFound;\r\n        public readonly bool CdsEndNotFound;\r\n        public readonly int[] SelenocysteinePositions;\r\n        public readonly int StartExonPhase;\r\n        public readonly IRnaEdit[] RnaEdits;\r\n        \r\n        public readonly string ProteinId;\r\n        public readonly byte ProteinVersion;\r\n        public readonly string PeptideSequence;\r\n        public readonly MutableExon[] Exons;\r\n        public readonly int TotalExonLength;\r\n        public readonly IInterval[] Introns;\r\n        public readonly string TranslateableSequence;\r\n        public readonly MutableTranscriptRegion[] CdnaMaps;\r\n        public readonly string BamEditStatus;\r\n\r\n        // mutable\r\n        public BioType BioType;\r\n        public bool IsCanonical;\r\n        public Gene UpdatedGene;\r\n\r\n        public int CdsLength;\r\n        public ITranscriptRegion[] TranscriptRegions;\r\n        public byte NewStartExonPhase;\r\n        public ICodingRegion CodingRegion;\r\n\r\n        public readonly string SiftData;\r\n        public readonly string PolyphenData;\r\n        public int SiftIndex     = -1;\r\n        public int PolyPhenIndex = -1;\r\n\r\n        public MutableTranscript(Chromosome chromosome, int start, int end, string id, byte version, string ccdsId,\r\n            string refSeqId, BioType bioType, bool isCanonical, ICodingRegion codingRegion, string proteinId,\r\n            byte proteinVersion, string peptideSequence, Source source, MutableGene gene, MutableExon[] exons,\r\n            int startExonPhase, int totalExonLength, IInterval[] introns, MutableTranscriptRegion[] cdnaMaps,\r\n            string siftData, string polyphenData, string translateableSequence, IInterval[] microRnas,\r\n            bool cdsStartNotFound, bool cdsEndNotFound, int[] selenocysteinePositions, IRnaEdit[] rnaEdits,\r\n            string bamEditStatus)\r\n        {\r\n            Chromosome              = chromosome;\r\n            Start                   = start;\r\n            End                     = end;\r\n            Id                      = id;\r\n            Version                 = version;\r\n            CcdsId                  = ccdsId;\r\n            RefSeqId                = refSeqId;\r\n            BioType                 = bioType;\r\n            IsCanonical             = isCanonical;\r\n            CodingRegion            = codingRegion;\r\n            ProteinId               = proteinId;\r\n            ProteinVersion          = proteinVersion;\r\n            PeptideSequence         = peptideSequence;\r\n            Source                  = source;\r\n            Gene                    = gene;\r\n            Exons                   = exons;\r\n            StartExonPhase          = startExonPhase;\r\n            TotalExonLength         = totalExonLength;\r\n            Introns                 = introns;\r\n            CdnaMaps                = cdnaMaps;\r\n            SiftData                = siftData;\r\n            PolyphenData            = polyphenData;\r\n            TranslateableSequence   = translateableSequence;\r\n            MicroRnas               = microRnas;\r\n            CdsStartNotFound        = cdsStartNotFound;\r\n            CdsEndNotFound          = cdsEndNotFound;\r\n            SelenocysteinePositions = selenocysteinePositions;\r\n            RnaEdits                = rnaEdits;\r\n            BamEditStatus           = bamEditStatus;\r\n        }\r\n\r\n        public bool Equals(MutableTranscript other)\r\n        {\r\n            if (ReferenceEquals(null, other)) return false;\r\n            if (ReferenceEquals(this, other)) return true;\r\n\r\n            return Chromosome.Index == other.Chromosome.Index &&\r\n                   Start            == other.Start            &&\r\n                   End              == other.End              &&\r\n                   Id               == other.Id               &&\r\n                   Version          == other.Version          &&\r\n                   BioType          == other.BioType          &&\r\n                   Source           == other.Source;\r\n        }\r\n\r\n        public override int GetHashCode()\r\n        {\r\n            unchecked\r\n            {\r\n                // ReSharper disable NonReadonlyMemberInGetHashCode\r\n                int hashCode = Chromosome.Index.GetHashCode();\r\n                hashCode = (hashCode * 397) ^ Start;\r\n                hashCode = (hashCode * 397) ^ End;\r\n                hashCode = (hashCode * 397) ^ Id.GetHashCode();\r\n                hashCode = (hashCode * 397) ^ Version.GetHashCode();\r\n                hashCode = (hashCode * 397) ^ (int) BioType;\r\n                hashCode = (hashCode * 397) ^ (int) Source;\r\n                return hashCode;\r\n                // ReSharper restore NonReadonlyMemberInGetHashCode\r\n            }\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/DataDumperImport/DataStructures/Mutable/MutableTranscriptRegion.cs",
    "content": "﻿using IO;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\n\r\nnamespace CacheUtils.DataDumperImport.DataStructures.Mutable\r\n{\r\n    public sealed class MutableTranscriptRegion : ITranscriptRegion\r\n    {\r\n        public int Start { get; }\r\n        public int End { get; }\r\n        public TranscriptRegionType Type { get; set; }\r\n        public ushort Id { get; set; }\r\n        public int CdnaStart { get; set; }\r\n        public int CdnaEnd { get; set; }\r\n\r\n        public MutableTranscriptRegion(TranscriptRegionType type, ushort id, int start, int end, int cdnaStart = -1,\r\n            int cdnaEnd = -1)\r\n        {\r\n            Type      = type;\r\n            Id        = id;\r\n            Start     = start;\r\n            End       = end;\r\n            CdnaStart = cdnaStart;\r\n            CdnaEnd   = cdnaEnd;\r\n        }\r\n\r\n        public void Write(IExtendedBinaryWriter writer) => throw new System.NotImplementedException();\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/DataDumperImport/FauxRegex/RegexDecisionTree.cs",
    "content": "﻿using System;\r\nusing System.Linq;\r\nusing CacheUtils.DataDumperImport.IO;\r\n\r\nnamespace CacheUtils.DataDumperImport.FauxRegex\r\n{\r\n    internal static class RegexDecisionTree\r\n    {\r\n        internal static (EntryType Type, string Key, string Value) GetEntryType(string s)\r\n        {\r\n            s = s.Trim().TrimEnd(',');\r\n\r\n            int fatArrowPos = s.IndexOf(\"=>\", StringComparison.Ordinal);\r\n            return fatArrowPos != -1\r\n                ? GetEntryTypeFatArrow(s, fatArrowPos)\r\n                : GetEntryTypeNoArrow(s);\r\n        }\r\n\r\n        private static (EntryType Type, string Key, string Value) GetEntryTypeNoArrow(string s)\r\n        {\r\n            int varPos = s.IndexOf(\"$VAR\", StringComparison.Ordinal);\r\n            return varPos != -1 ? GetEntryTypeVar(s) : GetEntryTypeNoVar(s);\r\n        }\r\n\r\n        private static (EntryType Type, string Key, string Value) GetEntryTypeNoVar(string s)\r\n        {\r\n            s = s.TrimEnd(';');\r\n\r\n            // ReSharper disable once ConvertIfStatementToSwitchStatement\r\n            if (s == \"}\") return (EntryType.EndBraces, null, null);\r\n            if (s == \"bless( {\") return (EntryType.OpenBraces, null, null);\r\n\r\n            int endBracePos = s.IndexOf(\"}, 'Bio::\", StringComparison.Ordinal);\r\n            if (endBracePos != -1) return GetEntryTypeDataPos(s, endBracePos + 4);\r\n\r\n            s = s.Trim('\\'');\r\n            if (OnlyDigits(s)) return (EntryType.DigitKey, s, null);\r\n\r\n            throw new NotImplementedException($\"Unable to match the non-$VAR regexes: [{s}]\");\r\n        }\r\n\r\n        private static (EntryType Type, string Key, string Value) GetEntryTypeDataPos(string s,\r\n            int afterFirstQuote)\r\n        {\r\n            return (EntryType.EndBracesWithDataType, GetForwardString(s, afterFirstQuote), null);\r\n        }\r\n\r\n        private static (EntryType Type, string Key, string Value) GetEntryTypeVar(string s)\r\n        {\r\n            if (!s.EndsWith(\" = {\")) throw new NotImplementedException(\"Unable to match the $VAR regexes: [{s}]\");\r\n\r\n            int spacePos = s.IndexOf(' ');\r\n            return (EntryType.RootObjectKeyValue, s.Substring(0, spacePos), null);\r\n        }\r\n\r\n        private static (EntryType, string Key, string Value) GetEntryTypeFatArrow(string s, int fatArrowPos)\r\n        {\r\n            string key = GetKey(s, fatArrowPos - 2);\r\n\r\n            int firstPosAfterFatArrow = fatArrowPos + 3;\r\n            if (s[firstPosAfterFatArrow] == '\\'') return GetEntryTypeStringKeyValue(s, firstPosAfterFatArrow + 1, key);\r\n            if (s[s.Length - 1] == '{') return (EntryType.ObjectKeyValue, key, null);\r\n\r\n            string afterFatArrow = s.Substring(firstPosAfterFatArrow);\r\n\r\n            // ReSharper disable once ConvertIfStatementToSwitchStatement\r\n            if (afterFatArrow == \"undef\") return (EntryType.UndefKeyValue, key, null);\r\n            if (afterFatArrow == \"{}\") return (EntryType.EmptyValueKeyValue, key, null);\r\n            if (afterFatArrow == \"[]\") return (EntryType.EmptyListKeyValue, key, null);\r\n            if (afterFatArrow.StartsWith(\"$VAR\")) return (EntryType.ReferenceStringKeyValue, key, afterFatArrow);\r\n\r\n            if (s[firstPosAfterFatArrow] == '[') return (EntryType.ListObjectKeyValue, key, null);\r\n            if (OnlyDigits(afterFatArrow)) return (EntryType.DigitKeyValue, key, afterFatArrow);\r\n\r\n            throw new NotImplementedException();\r\n        }\r\n\r\n        private static (EntryType, string Key, string Value) GetEntryTypeStringKeyValue(string s, int afterFirstQuote, string key)\r\n        {\r\n            int secondQuotePos = s.IndexOf('\\'', afterFirstQuote);\r\n\r\n            return secondQuotePos == -1\r\n                ? (EntryType.MultiLineKeyValue, key, s.Substring(afterFirstQuote))\r\n                : (EntryType.StringKeyValue, key, s.Substring(afterFirstQuote,\r\n                    secondQuotePos - afterFirstQuote));\r\n        }\r\n\r\n        private static string GetKey(string s, int secondQuotePos)\r\n        {\r\n            int afterFirstQuote = s.LastIndexOf('\\'', secondQuotePos - 1) + 1;\r\n            return s.Substring(afterFirstQuote, secondQuotePos - afterFirstQuote);\r\n        }\r\n\r\n        private static string GetForwardString(string s, int afterFirstQuote)\r\n        {\r\n            int secondQuotePos = s.IndexOf('\\'', afterFirstQuote);\r\n            string result = s.Substring(afterFirstQuote, secondQuotePos - afterFirstQuote);\r\n            return result;\r\n        }\r\n\r\n        internal static bool OnlyDigits(string s) => s.All(c => char.IsDigit(c) || c == '-');\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/DataDumperImport/IO/DataDumperReader.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Text;\r\nusing CacheUtils.DataDumperImport.DataStructures.Import;\r\nusing CacheUtils.DataDumperImport.FauxRegex;\r\nusing IO;\r\nusing OptimizedCore;\r\n\r\nnamespace CacheUtils.DataDumperImport.IO\r\n{\r\n    public sealed class DataDumperReader : IDisposable\r\n    {\r\n        private readonly StreamReader _reader;\r\n        private readonly StringBuilder _sb = new StringBuilder();\r\n\r\n        public DataDumperReader(Stream stream) => _reader = FileUtilities.GetStreamReader(stream);\r\n\r\n        private string GetNextLine() => _reader.ReadLine();\r\n\r\n        public ObjectKeyValueNode GetRootNode()\r\n        {\r\n            string line = GetNextLine();\r\n            if (line == null) throw new InvalidDataException(\"Expected a root object node, but no data was found.\");\r\n\r\n            var results = RegexDecisionTree.GetEntryType(line);\r\n            if (results.Type != EntryType.RootObjectKeyValue) throw new InvalidDataException($\"Expected a root object node, but found a {results.Type} node.\");\r\n\r\n            return new ObjectKeyValueNode(results.Key, GetObjectValue());\r\n        }\r\n\r\n        private static StringValueNode GetDigitKey(string key) => new StringValueNode(key);\r\n\r\n        private ListObjectKeyValueNode GetListObjectKeyValue(string key)\r\n        {\r\n            var listObjectKeyValue = new ListObjectKeyValueNode(key);\r\n\r\n            while (true)\r\n            {\r\n                string line = GetNextLine().Trim().TrimEnd(',');\r\n                if (line == \"]\") break;\r\n\r\n                var results = RegexDecisionTree.GetEntryType(line);\r\n\r\n                // ReSharper disable once SwitchStatementMissingSomeCases\r\n                switch (results.Type)\r\n                {\r\n                    case EntryType.OpenBraces:\r\n                        listObjectKeyValue.Add(GetObjectValue());\r\n                        break;\r\n                    case EntryType.DigitKey:\r\n                        listObjectKeyValue.Add(GetDigitKey(line));\r\n                        break;\r\n                    default:\r\n                        throw new InvalidDataException($\"Unhandled entry type encountered: {results.Type}\");\r\n                }\r\n            }\r\n\r\n            return listObjectKeyValue;\r\n        }\r\n\r\n        private StringKeyValueNode GetMultiLineKeyValue(string key, string value)\r\n        {\r\n            _sb.Clear();\r\n            _sb.Append(value);\r\n\r\n            while (true)\r\n            {\r\n                string line = GetNextLine().Trim();\r\n                if (line.OptimizedStartsWith('\\'')) break;\r\n                _sb.Append(' ');\r\n                _sb.Append(line);\r\n            }\r\n\r\n            return new StringKeyValueNode(key, _sb.ToString());\r\n        }\r\n\r\n        private ObjectValueNode GetObjectValue()\r\n        {\r\n            var type = \"(unknown)\";\r\n            var nodes   = new List<IImportNode>();\r\n\r\n            while (true)\r\n            {\r\n                string line = GetNextLine();\r\n                var results = RegexDecisionTree.GetEntryType(line);\r\n\r\n                if (results.Type == EntryType.EndBraces || results.Type == EntryType.EndBracesWithDataType)\r\n                {\r\n                    if (results.Type == EntryType.EndBracesWithDataType) type = results.Key;\r\n                    break;\r\n                }\r\n\r\n                // ReSharper disable once SwitchStatementMissingSomeCases\r\n                switch (results.Type)\r\n                {\r\n                    case EntryType.ObjectKeyValue:\r\n                        nodes.Add(new ObjectKeyValueNode(results.Key, GetObjectValue()));\r\n                        break;\r\n                    case EntryType.ListObjectKeyValue:\r\n                        nodes.Add(GetListObjectKeyValue(results.Key));\r\n                        break;\r\n                    case EntryType.DigitKeyValue:\r\n                    case EntryType.StringKeyValue:\r\n                    case EntryType.ReferenceStringKeyValue:\r\n                        nodes.Add(new StringKeyValueNode(results.Key, results.Value));\r\n                        break;\r\n                    case EntryType.UndefKeyValue:\r\n                    case EntryType.EmptyListKeyValue:\r\n                    case EntryType.EmptyValueKeyValue:\r\n                        nodes.Add(new StringKeyValueNode(results.Key, null));\r\n                        break;\r\n                    case EntryType.MultiLineKeyValue:\r\n                        nodes.Add(GetMultiLineKeyValue(results.Key, results.Value));\r\n                        break;\r\n                    default:\r\n                        throw new InvalidDataException($\"Unhandled entry type encountered in GetObjectValue: {results.Type}: [{line}]\");\r\n                }\r\n            }\r\n\r\n            return new ObjectValueNode(type, nodes);\r\n        }\r\n\r\n        public void Dispose() => _reader.Dispose();\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/DataDumperImport/IO/EntryType.cs",
    "content": "﻿namespace CacheUtils.DataDumperImport.IO\r\n{\r\n    internal enum EntryType\r\n    {\r\n        DigitKeyValue,\r\n        DigitKey,\r\n        EmptyListKeyValue,\r\n        EmptyValueKeyValue,\r\n        EndBraces,\r\n        EndBracesWithDataType,\r\n        ListObjectKeyValue,\r\n        MultiLineKeyValue,\r\n        ObjectKeyValue,\r\n        OpenBraces,\r\n        ReferenceStringKeyValue,\r\n        RootObjectKeyValue,\r\n        StringKeyValue,\r\n        UndefKeyValue\r\n    }\r\n}"
  },
  {
    "path": "CacheUtils/DataDumperImport/Import/Attribute.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Text.RegularExpressions;\r\nusing CacheUtils.DataDumperImport.DataStructures.Import;\r\nusing CacheUtils.DataDumperImport.Utilities;\r\nusing Intervals;\r\nusing OptimizedCore;\r\nusing VariantAnnotation.Caches.DataStructures;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\n\r\nnamespace CacheUtils.DataDumperImport.Import\r\n{\r\n    internal static class Attribute\r\n    {\r\n        private static readonly HashSet<string> KnownKeys;\r\n        private static readonly Regex RangeRegex;\r\n\r\n        static Attribute()\r\n        {\r\n            KnownKeys = new HashSet<string>\r\n            {\r\n                ImportKeys.Name,\r\n                ImportKeys.Description,\r\n                ImportKeys.Code,\r\n                ImportKeys.Value\r\n            };\r\n\r\n            RangeRegex = new Regex(\"(\\\\d+)-(\\\\d+)\", RegexOptions.Compiled);\r\n        }\r\n\r\n        /// <summary>\r\n        /// returns an array of miRNAs given a list of ObjectValues (AbstractData)\r\n        /// </summary>\r\n        public static (IInterval[] MicroRnas, IRnaEdit[] RnaEdits, bool CdsStartNotFound, bool CdsEndNotFound) ParseList(\r\n            IImportNode importNode)\r\n        {\r\n            var listMembers = importNode.GetListMembers();\r\n            if (listMembers == null) throw new InvalidDataException(\"Encountered an attribute node that could not be converted to a member list.\");\r\n\r\n            var microRnaList     = new List<IInterval>();\r\n            var rnaEditList      = new List<IRnaEdit>();\r\n            var cdsStartNotFound = false;\r\n            var cdsEndNotFound   = false;\r\n\r\n            foreach (var node in listMembers)\r\n            {\r\n                if (!(node is ObjectValueNode objectValue))\r\n                    throw new InvalidDataException($\"Could not transform the AbstractData object into an ObjectValue: [{node.GetType()}]\");\r\n\r\n                (string key, string value) = ParseKeyValue(objectValue);\r\n                if (key == null) continue;\r\n\r\n                // ReSharper disable once SwitchStatementMissingSomeCases\r\n                switch (key)\r\n                {\r\n                    case \"miRNA\":\r\n                        microRnaList.Add(GetInterval(value));\r\n                        break;\r\n                    case \"_rna_edit\":\r\n                        rnaEditList.Add(GetRnaEdit(value));\r\n                        break;\r\n                    case \"cds_start_NF\":\r\n                        cdsStartNotFound = true;\r\n                        break;\r\n                    case \"cds_end_NF\":\r\n                        cdsEndNotFound = true;\r\n                        break;\r\n                }\r\n            }\r\n\r\n            var microRnas = microRnaList.Count == 0 ? null : microRnaList.ToArray();\r\n            var rnaEdits  = rnaEditList.Count  == 0 ? null : rnaEditList.ToArray();\r\n            return (microRnas, rnaEdits, cdsStartNotFound, cdsEndNotFound);\r\n        }\r\n\r\n        private static IInterval GetInterval(string s)\r\n        {\r\n            var rangeMatch = RangeRegex.Match(s);\r\n            if (!rangeMatch.Success) throw new InvalidDataException($\"Unable to convert the Attribute to a miRNA object. The value string failed the regex: {s}\");\r\n\r\n            int start = int.Parse(rangeMatch.Groups[1].Value);\r\n            int end   = int.Parse(rangeMatch.Groups[2].Value);\r\n\r\n            return new Interval(start, end);\r\n        }\r\n\r\n        private static RnaEdit GetRnaEdit(string s)\r\n        {\r\n            var cols = s.OptimizedSplit(' ');\r\n            if (cols.Length != 3) throw new InvalidDataException($\"Expected 3 columns but found {cols.Length} when parsing RNA edit\");\r\n\r\n            int start    = int.Parse(cols[0]);\r\n            int end      = int.Parse(cols[1]);\r\n            string bases = cols[2];\r\n\r\n            return new RnaEdit(start, end, bases);\r\n        }\r\n\r\n        private static (string Key, string Value) ParseKeyValue(ObjectValueNode objectValue)\r\n        {\r\n            string key   = null;\r\n            string value = null;\r\n\r\n            foreach (var node in objectValue.Values)\r\n            {\r\n                // sanity check: make sure we know about the keys are used for\r\n                if (!KnownKeys.Contains(node.Key))\r\n                {\r\n                    throw new InvalidDataException($\"Encountered an unknown key in the dumper attribute object: {node.Key}\");\r\n                }\r\n\r\n                switch (node.Key)\r\n                {\r\n                    case ImportKeys.Name:\r\n                    case ImportKeys.Description:\r\n                        // not used\r\n                        break;\r\n                    case ImportKeys.Code:\r\n                        key = node.GetString();\r\n                        break;\r\n                    case ImportKeys.Value:\r\n                        value = node.GetString();\r\n                        break;\r\n                    default:\r\n                        throw new InvalidDataException($\"Unknown key found: {node.Key}\");\r\n                }\r\n            }\r\n\r\n            return (key, value);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/DataDumperImport/Import/ImportExon.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing CacheUtils.DataDumperImport.DataStructures.Import;\r\nusing CacheUtils.DataDumperImport.DataStructures.Mutable;\r\nusing CacheUtils.DataDumperImport.Utilities;\r\nusing Genome;\r\n\r\nnamespace CacheUtils.DataDumperImport.Import\r\n{\r\n    internal static class ImportExon\r\n    {\r\n        private static readonly HashSet<string> KnownKeys;\r\n\r\n        static ImportExon()\r\n        {\r\n            KnownKeys = new HashSet<string>\r\n            {\r\n                ImportKeys.End,\r\n                ImportKeys.EndPhase,\r\n                ImportKeys.Phase,\r\n                ImportKeys.StableId,\r\n                ImportKeys.Start,\r\n                ImportKeys.Strand\r\n            };\r\n        }\r\n\r\n        /// <summary>\r\n        /// returns a new exon given an ObjectValue\r\n        /// </summary>\r\n        public static MutableExon Parse(ObjectValueNode objectValue, Chromosome currentChromosome)\r\n        {\r\n            int start = -1;\r\n            int end   = -1;\r\n            int phase = int.MinValue;\r\n\r\n            foreach (var node in objectValue.Values)\r\n            {\r\n                // sanity check: make sure we know about the keys are used for\r\n                if (!KnownKeys.Contains(node.Key))\r\n                {\r\n                    throw new InvalidDataException($\"Encountered an unknown key in the dumper mapper object: {node.Key}\");\r\n                }\r\n\r\n                switch (node.Key)\r\n                {\r\n                    case ImportKeys.Strand:\r\n                    case ImportKeys.StableId:\r\n                    case ImportKeys.EndPhase:\r\n                        // not used\r\n                        break;\r\n                    case ImportKeys.End:\r\n                        end = node.GetInt32();\r\n                        break;\r\n                    case ImportKeys.Phase:\r\n                        phase = node.GetInt32();\r\n                        break;\r\n                    case ImportKeys.Start:\r\n                        start = node.GetInt32();\r\n                        break;\r\n                    default:\r\n                        throw new InvalidDataException($\"Unknown key found: {node.Key}\");\r\n                }\r\n            }\r\n\r\n            return new MutableExon(currentChromosome, start, end, phase);\r\n        }\r\n\r\n        /// <summary>\r\n        /// returns an array of exons given a list of ObjectValues (AbstractData)\r\n        /// </summary>\r\n        public static MutableExon[] ParseList(IImportNode importNode, Chromosome chromosome)\r\n        {\r\n            var listMembers = importNode.GetListMembers();\r\n            if (listMembers == null) throw new InvalidDataException(\"Encountered an exon node that could not be converted to a member list.\");\r\n\r\n            var exons = new MutableExon[listMembers.Count];\r\n\r\n            for (var exonIndex = 0; exonIndex < listMembers.Count; exonIndex++)\r\n            {\r\n                if (listMembers[exonIndex] is ObjectValueNode objectValue)\r\n                {\r\n                    exons[exonIndex] = Parse(objectValue, chromosome);\r\n                }\r\n                else\r\n                {\r\n                    throw new InvalidDataException($\"Could not transform the AbstractData object into an ObjectValue: [{listMembers[exonIndex].GetType()}]\");\r\n                }\r\n            }\r\n\r\n            return exons;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "CacheUtils/DataDumperImport/Import/ImportGene.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing CacheUtils.DataDumperImport.DataStructures.Import;\r\nusing CacheUtils.DataDumperImport.Utilities;\r\n\r\nnamespace CacheUtils.DataDumperImport.Import\r\n{\r\n    internal static class ImportGene\r\n    {\r\n        private static readonly HashSet<string> KnownKeys;\r\n\r\n        static ImportGene()\r\n        {\r\n            KnownKeys = new HashSet<string>\r\n            {\r\n                ImportKeys.End,\r\n                ImportKeys.StableId,\r\n                ImportKeys.Start,\r\n                ImportKeys.Strand\r\n            };\r\n        }\r\n\r\n        public static (int Start, int End, string Id, bool OnReverseStrand) Parse(IImportNode importNode)\r\n        {\r\n            var objectValue = importNode.GetObjectValueNode();\r\n            if (objectValue == null) throw new InvalidDataException(\"Encountered a gene import node that could not be converted to an object value node.\");\r\n\r\n            int start           = -1;\r\n            int end             = -1;\r\n            string stableId     = null;\r\n            var onReverseStrand = false;\r\n\r\n            foreach (var node in objectValue.Values)\r\n            {\r\n                // sanity check: make sure we know about the keys are used for\r\n                if (!KnownKeys.Contains(node.Key))\r\n                {\r\n                    throw new InvalidDataException($\"Encountered an unknown key in the dumper gene object: {node.Key}\");\r\n                }\r\n\r\n                // handle each key\r\n                switch (node.Key)\r\n                {\r\n                    case ImportKeys.End:\r\n                        end = node.GetInt32();\r\n                        break;\r\n                    case ImportKeys.StableId:\r\n                        stableId = node.GetString();\r\n                        break;\r\n                    case ImportKeys.Start:\r\n                        start = node.GetInt32();\r\n                        break;\r\n                    case ImportKeys.Strand:\r\n                        onReverseStrand = TranscriptUtilities.GetStrand(node);\r\n                        break;\r\n                    default:\r\n                        throw new InvalidDataException($\"Unknown key found: {node.Key}\");\r\n                }\r\n            }\r\n\r\n            return (start, end, stableId, onReverseStrand);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/DataDumperImport/Import/ImportIntron.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing CacheUtils.DataDumperImport.DataStructures.Import;\r\nusing Intervals;\r\n\r\nnamespace CacheUtils.DataDumperImport.Import\r\n{\r\n    internal static class ImportIntron\r\n    {\r\n        private static readonly HashSet<string> KnownKeys;\r\n\r\n        static ImportIntron()\r\n        {\r\n            KnownKeys = new HashSet<string>\r\n            {\r\n                ImportKeys.Analysis,\r\n                ImportKeys.Adaptor,\r\n                ImportKeys.DbId,\r\n                ImportKeys.End,\r\n                ImportKeys.Next,\r\n                ImportKeys.Prev,\r\n                ImportKeys.SeqName,\r\n                ImportKeys.Slice,\r\n                ImportKeys.Start,\r\n                ImportKeys.Strand\r\n            };\r\n        }\r\n\r\n        /// <summary>\r\n        /// returns a new exon given an ObjectValue\r\n        /// </summary>\r\n        private static IInterval Parse(ObjectValueNode objectValue)\r\n        {\r\n            int start = -1;\r\n            int end   = -1;\r\n\r\n            foreach (var node in objectValue.Values)\r\n            {\r\n                // sanity check: make sure we know about the keys are used for\r\n                if (!KnownKeys.Contains(node.Key))\r\n                {\r\n                    throw new InvalidDataException($\"Encountered an unknown key in the dumper mapper object: {node.Key}\");\r\n                }\r\n\r\n                switch (node.Key)\r\n                {\r\n                    case ImportKeys.Analysis:\r\n                    case ImportKeys.Adaptor:\r\n                    case ImportKeys.DbId:\r\n                    case ImportKeys.Next:\r\n                    case ImportKeys.Prev:\r\n                    case ImportKeys.SeqName:\r\n                    case ImportKeys.Strand:\r\n                    case ImportKeys.Slice:\r\n                        // not used\r\n                        break;\r\n                    case ImportKeys.End:\r\n                        end = node.GetInt32();\r\n                        break;\r\n                    case ImportKeys.Start:\r\n                        start = node.GetInt32();\r\n                        break;\r\n                    default:\r\n                        throw new InvalidDataException($\"Unknown key found: {node.Key}\");\r\n                }\r\n            }\r\n\r\n            return new Interval(start, end);\r\n        }\r\n\r\n        /// <summary>\r\n        /// parses the relevant data from each intron object\r\n        /// </summary>\r\n        public static IInterval[] ParseList(List<IListMember> members)\r\n        {\r\n            var introns = new IInterval[members.Count];\r\n\r\n            for (var intronIndex = 0; intronIndex < members.Count; intronIndex++)\r\n            {\r\n                if (!(members[intronIndex] is ObjectValueNode objectValue))\r\n                {\r\n                    throw new InvalidDataException($\"Could not transform the AbstractData object into an ObjectValue: [{members[intronIndex].GetType()}]\");\r\n                }\r\n\r\n                introns[intronIndex] = Parse(objectValue);\r\n            }\r\n\r\n            return introns;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/DataDumperImport/Import/ImportKeys.cs",
    "content": "﻿namespace CacheUtils.DataDumperImport.Import\r\n{\r\n    internal static class ImportKeys\r\n    {\r\n        internal const string Adaptor                     = \"adaptor\";\r\n        internal const string AltSeq                      = \"alt_seq\";\r\n        internal const string Analysis                    = \"analysis\";\r\n        internal const string AnalysisId                  = \"_analysis_id\";\r\n        internal const string Attributes                  = \"attributes\";\r\n        internal const string BamEditStatus               = \"_bam_edit_status\";\r\n        internal const string Biotype                     = \"biotype\";\r\n        internal const string BoundLengths                = \"_bound_lengths\";\r\n        internal const string Ccds                        = \"_ccds\";\r\n        internal const string CdnaCodingEnd               = \"cdna_coding_end\";\r\n        internal const string CdnaCodingStart             = \"cdna_coding_start\";\r\n        internal const string CellTypeCount               = \"cell_type_count\";\r\n        internal const string CellTypes                   = \"cell_types\";\r\n        internal const string Code                        = \"code\";\r\n        internal const string CodingDnaCodingEnd          = \"cdna_coding_end\";\r\n        internal const string CodingDnaCodingStart        = \"cdna_coding_start\";\r\n        internal const string CodingRegionEnd             = \"coding_region_end\";\r\n        internal const string CodingRegionStart           = \"coding_region_start\";\r\n        internal const string CodonTable                  = \"codon_table\";\r\n        internal const string CreatedDate                 = \"created_date\";\r\n        internal const string DbId                        = \"dbID\";\r\n        internal const string Description                 = \"description\";\r\n        internal const string DisplayLabel                = \"display_label\";\r\n        internal const string DisplayXref                 = \"display_xref\";\r\n        internal const string End                         = \"end\";\r\n        internal const string EndExon                     = \"end_exon\";\r\n        internal const string EndPhase                    = \"end_phase\";\r\n        internal const string EpigenomeCount              = \"epigenome_count\";\r\n        internal const string ExonCoordinateMapper        = \"exon_coord_mapper\";\r\n        internal const string ExternalDb                  = \"external_db\";\r\n        internal const string ExternalDisplayName         = \"external_display_name\";\r\n        internal const string ExternalName                = \"external_name\";\r\n        internal const string ExternalStatus              = \"external_status\";\r\n        internal const string FeatureType                 = \"feature_type\";\r\n        internal const string FivePrimeUtr                = \"five_prime_utr\";\r\n        internal const string From                        = \"from\";\r\n        internal const string FromCoordSystem             = \"from_cs\";\r\n        internal const string FromName                    = \"from\";\r\n        internal const string Gene                        = \"_gene\";\r\n        internal const string GeneHgnc                    = \"_gene_hgnc\";\r\n        internal const string GeneHgncId                  = \"_gene_hgnc_id\";\r\n        internal const string GenePhenotype               = \"_gene_phenotype\";\r\n        internal const string GeneStableId                = \"_gene_stable_id\";\r\n        internal const string GeneSymbol                  = \"_gene_symbol\";\r\n        internal const string GeneSymbolSource            = \"_gene_symbol_source\";\r\n        internal const string Genomic                     = \"GENOME\";\r\n        internal const string HasEvidence                 = \"has_evidence\";\r\n        internal const string Id                          = \"id\";\r\n        internal const string Introns                     = \"introns\";\r\n        internal const string IsCanonical                 = \"is_canonical\";\r\n        internal const string IsMatrixCompressed          = \"matrix_compressed\";\r\n        internal const string IsSorted                    = \"_is_sorted\";\r\n        internal const string Mapper                      = \"mapper\";\r\n        internal const string Matrix                      = \"matrix\";\r\n        internal const string ModifiedDate                = \"modified_date\";\r\n        internal const string Name                        = \"name\";\r\n        internal const string Next                        = \"next\";\r\n        internal const string Ori                         = \"ori\";\r\n        internal const string PairCodingDna               = \"_pair_cdna\";\r\n        internal const string PairCount                   = \"pair_count\";\r\n        internal const string PairGenomic                 = \"_pair_genomic\";\r\n        internal const string Peptide                     = \"peptide\";\r\n        internal const string PeptideLength               = \"peptide_length\";\r\n        internal const string Phase                       = \"phase\";\r\n        internal const string PolyPhen                    = \"polyphen\";\r\n        internal const string PolyPhenHumDiv              = \"polyphen_humdiv\";\r\n        internal const string PolyPhenHumVar              = \"polyphen_humvar\";\r\n        internal const string Prev                        = \"prev\";\r\n        internal const string Projected                   = \"projected\";\r\n        internal const string Protein                     = \"_protein\";\r\n        internal const string ProteinFeatures             = \"protein_features\";\r\n        internal const string ProteinFunctionPredictions  = \"protein_function_predictions\";\r\n        internal const string Refseq                      = \"_refseq\";\r\n        internal const string RegulatoryBuildId           = \"regulatory_build_id\";\r\n        internal const string Selenocysteines             = \"selenocysteines\";\r\n        internal const string SeqEdits                    = \"seq_edits\";\r\n        internal const string SeqName                     = \"seqname\";\r\n        internal const string Sequence                    = \"seq\";\r\n        internal const string Set                         = \"set\";\r\n        internal const string Sift                        = \"sift\";\r\n        internal const string Slice                       = \"slice\";\r\n        internal const string SortedExons                 = \"sorted_exons\";\r\n        internal const string Source                      = \"source\";\r\n        internal const string SplicedSequence             = \"spliced_seq\";\r\n        internal const string StableId                    = \"stable_id\";\r\n        internal const string Start                       = \"start\";\r\n        internal const string StartExon                   = \"start_exon\";\r\n        internal const string StartPhase                  = \"start_phase\";\r\n        internal const string Strand                      = \"strand\";\r\n        internal const string SubAnalysis                 = \"sub_analysis\";\r\n        internal const string SwissProt                   = \"_swissprot\";\r\n        internal const string ThreePrimeUtr               = \"three_prime_utr\";\r\n        internal const string To                          = \"to\";\r\n        internal const string ToCoordSystem               = \"to_cs\";\r\n        internal const string ToName                      = \"to\";\r\n        internal const string TransExonArray              = \"_trans_exon_array\";\r\n        internal const string Transcript                  = \"transcript\";\r\n        internal const string TranslateableSeq            = \"translateable_seq\";\r\n        internal const string Translation                 = \"translation\";\r\n        internal const string TranslationMd5              = \"translation_md5\";\r\n        internal const string Trembl                      = \"_trembl\";\r\n        internal const string UniParc                     = \"_uniparc\";\r\n        internal const string Value                       = \"value\";\r\n        internal const string VariationEffectFeatureCache = \"_variation_effect_feature_cache\";\r\n        internal const string VepFeatureType              = \"_vep_feature_type\";\r\n        internal const string VepLazyLoaded               = \"_vep_lazy_loaded\";\r\n        internal const string Version                     = \"version\";\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/DataDumperImport/Import/ImportMapper.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.IO;\nusing CacheUtils.DataDumperImport.DataStructures.Import;\nusing CacheUtils.DataDumperImport.DataStructures.Mutable;\n\nnamespace CacheUtils.DataDumperImport.Import\n{\n    internal static class ImportMapper\n    {\n        private static readonly HashSet<string> KnownKeys;\n\n        static ImportMapper()\n        {\n            KnownKeys = new HashSet<string>\n            {\n                ImportKeys.FromCoordSystem,\n                ImportKeys.FromName,\n                ImportKeys.IsSorted,\n                ImportKeys.PairCodingDna,\n                ImportKeys.PairCount,\n                ImportKeys.PairGenomic,\n                ImportKeys.ToCoordSystem,\n                ImportKeys.ToName\n            };\n        }\n\n        /// <summary>\n        /// parses the relevant data from each exon coordinate mapper object\n        /// </summary>\n        public static MutableTranscriptRegion[] Parse(ObjectValueNode objectValue)\n        {\n            MutableTranscriptRegion[] cdnaMaps = null;\n\n            foreach (var node in objectValue.Values)\n            {\n                // sanity check: make sure we know about the keys are used for\n                if (!KnownKeys.Contains(node.Key))\n                {\n                    throw new InvalidDataException($\"Encountered an unknown key in the dumper mapper object: {node.Key}\");\n                }\n\n                switch (node.Key)\n                {\n                    case ImportKeys.ToName:\n                    case ImportKeys.PairCount:\n                    case ImportKeys.PairCodingDna:\n                    case ImportKeys.FromCoordSystem:\n                    case ImportKeys.FromName:\n                    case ImportKeys.IsSorted:\n                    case ImportKeys.ToCoordSystem:\n                        // not used\n                        break;\n                    case ImportKeys.PairGenomic:\n                        if (node is ObjectKeyValueNode pairGenomicNode)\n                        {\n                            cdnaMaps = ImportPairGenomic.Parse(pairGenomicNode.Value);\n                        }\n                        else if (!node.IsUndefined())\n                        {\n                            throw new InvalidDataException($\"Could not transform the AbstractData object into an ObjectKeyValue: [{node.GetType()}]\");\n                        }\n                        break;\n                    default:\n                        throw new InvalidDataException($\"Unknown key found: {node.Key}\");\n                }\n            }\n\n            return cdnaMaps;\n        }\n    }\n}\n"
  },
  {
    "path": "CacheUtils/DataDumperImport/Import/ImportMapperPair.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing CacheUtils.DataDumperImport.DataStructures.Import;\r\nusing CacheUtils.DataDumperImport.DataStructures.Mutable;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\n\r\nnamespace CacheUtils.DataDumperImport.Import\r\n{\r\n    internal static class ImportMapperPair\r\n    {\r\n        private static readonly HashSet<string> KnownKeys;\r\n\r\n        static ImportMapperPair()\r\n        {\r\n            KnownKeys = new HashSet<string>\r\n            {\r\n                ImportKeys.From,\r\n                ImportKeys.Ori,\r\n                ImportKeys.To\r\n            };\r\n        }\r\n\r\n        /// <summary>\r\n        /// parses the relevant data from each mapper pairs object\r\n        /// </summary>\r\n        private static MutableTranscriptRegion Parse(ObjectValueNode objectValue)\r\n        {\r\n            int fromStart = -1;\r\n            int fromEnd   = -1;\r\n            var fromType  = MapperUnitType.Unknown;\r\n            int toStart   = -1;\r\n            int toEnd     = -1;\r\n\r\n            foreach (var node in objectValue.Values)\r\n            {\r\n                // sanity check: make sure we know about the keys are used for\r\n                if (!KnownKeys.Contains(node.Key))\r\n                {\r\n                    throw new InvalidDataException($\"Encountered an unknown key in the mapper pair object: {node.Key}\");\r\n                }\r\n\r\n                switch (node.Key)\r\n                {\r\n                    case ImportKeys.Ori:\r\n                        // not used\r\n                        break;\r\n                    case ImportKeys.From:\r\n                        if (node is ObjectKeyValueNode fromKeyNode)\r\n                        {\r\n                            (fromStart, fromEnd, fromType) = ImportMapperUnit.Parse(fromKeyNode.Value);\r\n                        }\r\n                        else\r\n                        {\r\n                            throw new InvalidDataException($\"Could not transform the AbstractData object into an ObjectKeyValue: [{node.GetType()}]\");\r\n                        }\r\n                        break;\r\n                    case ImportKeys.To:\r\n                        if (node is ObjectKeyValueNode toKeyNode)\r\n                        {\r\n                            (toStart, toEnd, _) = ImportMapperUnit.Parse(toKeyNode.Value);\r\n                        }\r\n                        else\r\n                        {\r\n                            throw new InvalidDataException($\"Could not transform the AbstractData object into an ObjectKeyValue: [{node.GetType()}]\");\r\n                        }\r\n                        break;\r\n                    default:\r\n                        throw new InvalidDataException($\"Unknown key found: {node.Key}\");\r\n                }\r\n            }\r\n\r\n            return GetCdnaMap(fromStart, fromEnd, fromType, toStart, toEnd);\r\n        }\r\n\r\n        private static MutableTranscriptRegion GetCdnaMap(int fromStart, int fromEnd, MapperUnitType fromType, int toStart, int toEnd)\r\n        {\r\n            return fromType == MapperUnitType.Genomic\r\n                ? new MutableTranscriptRegion(TranscriptRegionType.Exon, 0, fromStart, fromEnd, toStart, toEnd)\r\n                : new MutableTranscriptRegion(TranscriptRegionType.Exon, 0, toStart, toEnd, fromStart, fromEnd);\r\n        }\r\n\r\n        /// <summary>\r\n        /// parses the relevant data from each mapper pairs object\r\n        /// </summary>\r\n        public static MutableTranscriptRegion[] ParseList(List<IListMember> listMembers)\r\n        {\r\n            var cdnaMaps = new List<MutableTranscriptRegion>(listMembers.Count);\r\n\r\n            foreach (var entry in listMembers)\r\n            {\r\n                if (!(entry is ObjectValueNode mapperPairNode))          throw new InvalidDataException($\"Could not transform the AbstractData object into an ObjectValue: [{entry.GetType()}]\");\r\n                if (mapperPairNode.Type != \"Bio::EnsEMBL::Mapper::Pair\") throw new InvalidDataException($\"Expected a mapper pair data type, but found the following data type: [{mapperPairNode.Type}]\");\r\n\r\n                cdnaMaps.Add(Parse(mapperPairNode));\r\n            }\r\n\r\n            return cdnaMaps.ToArray();\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/DataDumperImport/Import/ImportMapperUnit.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.IO;\nusing CacheUtils.DataDumperImport.DataStructures.Import;\nusing CacheUtils.DataDumperImport.Utilities;\n\nnamespace CacheUtils.DataDumperImport.Import\n{\n    internal static class ImportMapperUnit\n    {\n        private static readonly HashSet<string> KnownKeys;\n\n        static ImportMapperUnit()\n        {\n            KnownKeys = new HashSet<string>\n            {\n                ImportKeys.End,\n                ImportKeys.Id,\n                ImportKeys.Start\n            };\n        }\n\n        /// <summary>\n        /// parses the relevant data from each mapper unit object\n        /// </summary>\n        public static (int Start, int End, MapperUnitType Type) Parse(ObjectValueNode objectValue)\n        {\n            int start = -1;\n            int end   = -1;\n            var type  = MapperUnitType.Unknown;\n\n            foreach (var node in objectValue.Values)\n            {\n                // sanity check: make sure we know about the keys are used for\n                if (!KnownKeys.Contains(node.Key))\n                {\n                    throw new InvalidDataException($\"Encountered an unknown key in the mapper unit object: {node.Key}\");\n                }\n\n                switch (node.Key)\n                {\n                    case ImportKeys.Id:\n                        type = TranscriptUtilities.GetMapperUnitType(node);\n                        break;\n                    case ImportKeys.End:\n                        end = node.GetInt32();\n                        break;\n                    case ImportKeys.Start:\n                        start = node.GetInt32();\n                        break;\n                    default:\n                        throw new InvalidDataException($\"Unknown key found: {node.Key}\");\n                }\n            }\n\n            return (start, end, type);\n        }\n    }\n\n    public enum MapperUnitType : byte\n    {\n        Unknown,\n        CodingDna,\n        Genomic\n    }\n}\n"
  },
  {
    "path": "CacheUtils/DataDumperImport/Import/ImportPairGenomic.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.IO;\nusing CacheUtils.DataDumperImport.DataStructures.Import;\nusing CacheUtils.DataDumperImport.DataStructures.Mutable;\n\nnamespace CacheUtils.DataDumperImport.Import\n{\n    internal static class ImportPairGenomic\n    {\n        private static readonly HashSet<string> KnownKeys;\n\n        static ImportPairGenomic()\n        {\n            KnownKeys = new HashSet<string>\n            {\n                ImportKeys.Genomic\n            };\n        }\n\n        /// <summary>\n        /// parses the relevant data from each pair genomic object\n        /// </summary>\n        public static MutableTranscriptRegion[] Parse(ObjectValueNode objectValue)\n        {\n            MutableTranscriptRegion[] cdnaMaps = null;\n\n            foreach (var node in objectValue.Values)\n            {\n                // sanity check: make sure we know about the keys are used for\n                if (!KnownKeys.Contains(node.Key))\n                {\n                    throw new InvalidDataException($\"Encountered an unknown key in the pair genomic object: {node.Key}\");\n                }\n\n                switch (node.Key)\n                {\n                    case ImportKeys.Genomic:\n                        if (node is ListObjectKeyValueNode genomicNode)\n                        {\n                            cdnaMaps = ImportMapperPair.ParseList(genomicNode.Values);\n                        }\n                        else if (!node.IsUndefined())\n                        {\n                            throw new InvalidDataException($\"Could not transform the AbstractData object into an ObjectKeyValue: [{node.GetType()}]\");\n                        }\n                        break;\n                    default:\n                        throw new InvalidDataException($\"Unknown key found: {node.Key}\");\n                }\n            }\n\n            return cdnaMaps;\n        }\n    }\n}\n"
  },
  {
    "path": "CacheUtils/DataDumperImport/Import/ImportPrediction.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.IO;\nusing CacheUtils.DataDumperImport.DataStructures.Import;\n\nnamespace CacheUtils.DataDumperImport.Import\n{\n    internal static class ImportPrediction\n    {\n        private static readonly HashSet<string> KnownKeys;\n\n        static ImportPrediction()\n        {\n            KnownKeys = new HashSet<string>\n            {\n                ImportKeys.Analysis,\n                ImportKeys.IsMatrixCompressed,\n                ImportKeys.Matrix,\n                ImportKeys.PeptideLength,\n                ImportKeys.SubAnalysis,\n                ImportKeys.TranslationMd5\n            };\n        }\n\n        /// <summary>\n        /// parses the relevant data from each prediction object\n        /// </summary>\n        public static string Parse(ObjectValueNode objectValue)\n        {\n            string predictionData = null;\n\n            foreach (var node in objectValue.Values)\n            {\n                // sanity check: make sure we know about the keys are used for\n                if (!KnownKeys.Contains(node.Key))\n                {\n                    throw new InvalidDataException($\"Encountered an unknown key in the dumper prediction object: {node.Key}\");\n                }\n\n                switch (node.Key)\n                {\n                    case ImportKeys.Analysis:\n                    case ImportKeys.IsMatrixCompressed:\n                    case ImportKeys.PeptideLength:\n                    case ImportKeys.SubAnalysis:\n                    case ImportKeys.TranslationMd5:\n                        break;\n                    case ImportKeys.Matrix:\n                        predictionData = node.GetString();\n                        break;\n                    default:\n                        throw new InvalidDataException($\"Unknown key found: {node.Key}\");\n                }\n            }\n\n            return predictionData;\n        }\n    }\n}\n"
  },
  {
    "path": "CacheUtils/DataDumperImport/Import/ImportProteinFunctionPredictions.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.IO;\nusing CacheUtils.DataDumperImport.DataStructures.Import;\nusing CacheUtils.DataDumperImport.Utilities;\n\nnamespace CacheUtils.DataDumperImport.Import\n{\n    internal static class ImportProteinFunctionPredictions\n    {\n        private static readonly HashSet<string> KnownKeys;\n\n        static ImportProteinFunctionPredictions()\n        {\n            KnownKeys = new HashSet<string>\n            {\n                ImportKeys.PolyPhenHumVar,\n                ImportKeys.PolyPhenHumDiv,\n                ImportKeys.PolyPhen,\n                ImportKeys.Sift\n            };\n        }\n\n        public static (string SiftMatrix, string PolyphenMatrix) Parse(ObjectValueNode objectValue)\n        {\n            string siftData     = null;\n            string polyphenData = null;\n\n            foreach (var node in objectValue.Values)\n            {\n                // sanity check: make sure we know about the keys are used for\n                if (!KnownKeys.Contains(node.Key))\n                {\n                    throw new InvalidDataException($\"Encountered an unknown key in the dumper mapper object: {node.Key}\");\n                }\n\n                switch (node.Key)\n                {\n                    case ImportKeys.PolyPhen:\n                    case ImportKeys.PolyPhenHumDiv:\n                        // not used\n                        break;\n                    case ImportKeys.PolyPhenHumVar:\n                        // used by default\n                        polyphenData = node.GetPredictionData();\n                        break;\n                    case ImportKeys.Sift:\n                        siftData = node.GetPredictionData();\n                        break;\n                    default:\n                        throw new InvalidDataException($\"Unknown key found: {node.Key}\");\n                }\n            }\n\n            return (siftData, polyphenData);\n        }\n    }\n}\n"
  },
  {
    "path": "CacheUtils/DataDumperImport/Import/ImportRegulatoryFeature.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing CacheUtils.DataDumperImport.DataStructures.Import;\r\nusing CacheUtils.Helpers;\r\nusing Genome;\r\nusing VariantAnnotation.AnnotatedPositions.Transcript;\r\nusing VariantAnnotation.Caches.DataStructures;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\n\r\nnamespace CacheUtils.DataDumperImport.Import\r\n{\r\n    public static class ImportRegulatoryFeature\r\n    {\r\n        private static readonly HashSet<string> KnownKeys;\r\n\r\n        static ImportRegulatoryFeature()\r\n        {\r\n            KnownKeys = new HashSet<string>\r\n            {\r\n                ImportKeys.AnalysisId,\r\n                ImportKeys.BoundLengths,\r\n                ImportKeys.CellTypeCount,\r\n                ImportKeys.CellTypes,\r\n                ImportKeys.DbId,\r\n                ImportKeys.DisplayLabel,\r\n                ImportKeys.End,\r\n                ImportKeys.EpigenomeCount,\r\n                ImportKeys.FeatureType,\r\n                ImportKeys.HasEvidence,\r\n                ImportKeys.Projected,\r\n                ImportKeys.RegulatoryBuildId,\r\n                ImportKeys.Set,\r\n                ImportKeys.StableId,\r\n                ImportKeys.Start,\r\n                ImportKeys.Strand,\r\n                ImportKeys.Slice,\r\n                ImportKeys.VepFeatureType\r\n            };\r\n        }\r\n\r\n        /// <summary>\r\n        /// parses the relevant data from each regulatory element\r\n        /// </summary>\r\n        public static IRegulatoryRegion Parse(ObjectValueNode objectValue, Chromosome chromosome)\r\n        {\r\n            int start       = -1;\r\n            int end         = -1;\r\n            string stableId = null;\r\n            string type     = null;\r\n\r\n            foreach (var node in objectValue.Values)\r\n            {\r\n                // sanity check: make sure we know about the keys are used for\r\n                if (!KnownKeys.Contains(node.Key))\r\n                {\r\n                    throw new InvalidDataException($\"Encountered an unknown key in the dumper regulatory element object: {node.Key}\");\r\n                }\r\n\r\n                switch (node.Key)\r\n                {\r\n                    case ImportKeys.AnalysisId:\r\n                    case ImportKeys.BoundLengths:\r\n                    case ImportKeys.CellTypeCount:\r\n                    case ImportKeys.CellTypes:\r\n                    case ImportKeys.DbId:\r\n                    case ImportKeys.DisplayLabel:\r\n                    case ImportKeys.EpigenomeCount:\r\n                    case ImportKeys.HasEvidence:\r\n                    case ImportKeys.Projected:\r\n                    case ImportKeys.RegulatoryBuildId:\r\n                    case ImportKeys.Set:\r\n                    case ImportKeys.Strand:\r\n                    case ImportKeys.Slice:\r\n                    case ImportKeys.VepFeatureType:\r\n                        // not used\r\n                        break;\r\n                    case ImportKeys.FeatureType:\r\n                        type = node.GetString();\r\n                        break;\r\n                    case ImportKeys.End:\r\n                        end = node.GetInt32();\r\n                        break;\r\n                    case ImportKeys.StableId:\r\n                        stableId = node.GetString();\r\n                        break;\r\n                    case ImportKeys.Start:\r\n                        start = node.GetInt32();\r\n                        break;\r\n                    default:\r\n                        throw new InvalidDataException($\"Unknown key found: {node.Key}\");\r\n                }\r\n            }\r\n\r\n            return new RegulatoryRegion(chromosome, start, end, CompactId.Convert(stableId),\r\n                RegulatoryRegionTypeHelper.GetRegulatoryRegionType(type));\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/DataDumperImport/Import/ImportSeqEdits.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing CacheUtils.DataDumperImport.DataStructures.Import;\r\n\r\nnamespace CacheUtils.DataDumperImport.Import\r\n{\r\n    internal static class ImportSeqEdits\r\n    {\r\n        private static readonly HashSet<string> KnownKeys;\r\n\r\n        static ImportSeqEdits()\r\n        {\r\n            KnownKeys = new HashSet<string>\r\n            {\r\n                ImportKeys.AltSeq,\r\n                ImportKeys.Code,\r\n                ImportKeys.Description,\r\n                ImportKeys.End,\r\n                ImportKeys.Name,\r\n                ImportKeys.Start\r\n            };\r\n        }\r\n\r\n        /// <summary>\r\n        /// parses the relevant data from each seqedits object\r\n        /// </summary>\r\n        public static int[] Parse(List<IListMember> members)\r\n        {\r\n            var selenocysteineList = new List<int>();\r\n\r\n            foreach (var seqEditNode in members)\r\n            {\r\n                if (!(seqEditNode is ObjectValueNode seListNode)) continue;\r\n\r\n                string code = null;\r\n                int start   = -1;\r\n\r\n                foreach (var node in seListNode.Values)\r\n                {\r\n                    // sanity check: make sure we know about the keys are used for\r\n                    if (!KnownKeys.Contains(node.Key))\r\n                    {\r\n                        throw new InvalidDataException($\"Encountered an unknown key in the dumper seq_edits object: {node.Key}\");\r\n                    }\r\n\r\n                    switch (node.Key)\r\n                    {\r\n                        case ImportKeys.AltSeq:\r\n                        case ImportKeys.Description:\r\n                        case ImportKeys.End:\r\n                        case ImportKeys.Name:\r\n                            // not used\r\n                            break;\r\n                        case ImportKeys.Code:\r\n                            code = node.GetString();\r\n                            break;\r\n                        case ImportKeys.Start:\r\n                            start = node.GetInt32();\r\n                            break;\r\n                        default:\r\n                            throw new InvalidDataException($\"Unknown key found: {node.Key}\");\r\n                    }\r\n                }\r\n\r\n                if (code != null && code == \"_selenocysteine\") selenocysteineList.Add(start);\r\n            }\r\n\r\n            return selenocysteineList.Count == 0 ? null : selenocysteineList.ToArray();\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/DataDumperImport/Import/ImportTranscript.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Linq;\r\nusing CacheUtils.DataDumperImport.DataStructures;\r\nusing CacheUtils.DataDumperImport.DataStructures.Import;\r\nusing CacheUtils.DataDumperImport.DataStructures.Mutable;\r\nusing CacheUtils.DataDumperImport.Utilities;\r\nusing CacheUtils.Helpers;\r\nusing CacheUtils.Utilities;\r\nusing Genome;\r\nusing Intervals;\r\nusing VariantAnnotation.Caches.DataStructures;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing TranscriptUtilities = CacheUtils.DataDumperImport.Utilities.TranscriptUtilities;\r\n\r\nnamespace CacheUtils.DataDumperImport.Import\r\n{\r\n    public static class ImportTranscript\r\n    {\r\n        private static readonly HashSet<string> KnownKeys;\r\n\r\n        static ImportTranscript()\r\n        {\r\n            KnownKeys = new HashSet<string>\r\n            {\r\n                ImportKeys.Attributes,\r\n                ImportKeys.BamEditStatus,\r\n                ImportKeys.Biotype,\r\n                ImportKeys.Ccds,\r\n                ImportKeys.CdnaCodingEnd,\r\n                ImportKeys.CdnaCodingStart,\r\n                ImportKeys.CodingRegionEnd,\r\n                ImportKeys.CodingRegionStart,\r\n                ImportKeys.CreatedDate,\r\n                ImportKeys.DbId,\r\n                ImportKeys.Description,\r\n                ImportKeys.DisplayXref,\r\n                ImportKeys.End,\r\n                ImportKeys.ExternalDb,\r\n                ImportKeys.ExternalDisplayName,\r\n                ImportKeys.ExternalName,\r\n                ImportKeys.ExternalStatus,\r\n                ImportKeys.Gene,\r\n                ImportKeys.GeneHgnc,\r\n                ImportKeys.GeneHgncId,\r\n                ImportKeys.GenePhenotype,\r\n                ImportKeys.GeneStableId,\r\n                ImportKeys.GeneSymbol,\r\n                ImportKeys.GeneSymbolSource,\r\n                ImportKeys.IsCanonical,\r\n                ImportKeys.ModifiedDate,\r\n                ImportKeys.Protein,\r\n                ImportKeys.Refseq,\r\n                ImportKeys.Slice,\r\n                ImportKeys.Source,\r\n                ImportKeys.StableId,\r\n                ImportKeys.Start,\r\n                ImportKeys.Strand,\r\n                ImportKeys.SwissProt,\r\n                ImportKeys.TransExonArray,\r\n                ImportKeys.Translation,\r\n                ImportKeys.Trembl,\r\n                ImportKeys.UniParc,\r\n                ImportKeys.VariationEffectFeatureCache,\r\n                ImportKeys.VepLazyLoaded,\r\n                ImportKeys.Version\r\n            };\r\n        }\r\n\r\n        /// <summary>\r\n        /// parses the relevant data from each transcript\r\n        /// </summary>\r\n        public static MutableTranscript Parse(ObjectValueNode objectValue, Chromosome chromosome, Source source)\r\n        {\r\n            // IDs\r\n            string transcriptId    = null;\r\n            byte transcriptVersion = 1;\r\n            string proteinId       = null;\r\n            byte proteinVersion    = 0;\r\n            string ccdsId          = null;\r\n            string refSeqId        = null;\r\n            string geneId          = null;\r\n            int hgncId             = -1;\r\n\r\n            // gene\r\n            int geneStart           = -1;\r\n            int geneEnd             = -1;\r\n            var geneOnReverseStrand = false;\r\n            string geneSymbol       = null;\r\n            var geneSymbolSource    = GeneSymbolSource.Unknown;\r\n\r\n            // translation\r\n            int translationStart             = -1;\r\n            int translationEnd               = -1;\r\n            MutableExon translationStartExon = null;\r\n            MutableExon translationEndExon   = null;\r\n\r\n            // predictions\r\n            string siftData     = null;\r\n            string polyphenData = null;\r\n\r\n            var bioType                        = BioType.other;\r\n            IInterval[] microRnas              = null;\r\n            MutableTranscriptRegion[] cdnaMaps = null;\r\n            IInterval[] introns                = null;\r\n            string peptideSequence             = null;\r\n            string translateableSequence       = null;\r\n            var isCanonical                    = false;\r\n            int compDnaCodingStart             = -1;\r\n            int compDnaCodingEnd               = -1;\r\n            int start                          = -1;\r\n            int end                            = -1;\r\n            MutableExon[] exons                = null;\r\n            var cdsStartNotFound               = false;\r\n            var cdsEndNotFound                 = false;\r\n            int[] selenocysteinePositions      = null;\r\n            IRnaEdit[] rnaEdits                = null;\r\n            string bamEditStatus               = null;\r\n\r\n            foreach (var node in objectValue.Values)\r\n            {\r\n                // sanity check: make sure we know about the keys are used for\r\n                if (!KnownKeys.Contains(node.Key))\r\n                {\r\n                    throw new InvalidDataException($\"Encountered an unknown key in the dumper transcript object: {node.Key}\");\r\n                }\r\n\r\n                // handle each key\r\n                switch (node.Key)\r\n                {\r\n                    case ImportKeys.CodingRegionEnd:\r\n                    case ImportKeys.CodingRegionStart:\r\n                    case ImportKeys.CreatedDate:\r\n                    case ImportKeys.DbId:\r\n                    case ImportKeys.Description:\r\n                    case ImportKeys.DisplayXref:\r\n                    case ImportKeys.ExternalDb:\r\n                    case ImportKeys.ExternalDisplayName:\r\n                    case ImportKeys.ExternalName:\r\n                    case ImportKeys.ExternalStatus:\r\n                    case ImportKeys.GenePhenotype:\r\n                    case ImportKeys.GeneStableId:                    \r\n                    case ImportKeys.ModifiedDate:\r\n                    case ImportKeys.Protein:\r\n                    case ImportKeys.Slice:\r\n                    case ImportKeys.Source:\r\n                    case ImportKeys.Strand:\r\n                    case ImportKeys.SwissProt:\r\n                    case ImportKeys.Trembl:\r\n                    case ImportKeys.UniParc:\r\n                    case ImportKeys.VepLazyLoaded:\r\n                        // not used\r\n                        break;\r\n                    case ImportKeys.BamEditStatus:\r\n                        bamEditStatus = node.GetString();\r\n                        break;\r\n                    case ImportKeys.Attributes:\r\n                        (microRnas, rnaEdits, cdsStartNotFound, cdsEndNotFound) = Attribute.ParseList(node);\r\n                        break;\r\n                    case ImportKeys.Biotype:\r\n                        bioType = TranscriptUtilities.GetBiotype(node);\r\n                        break;\r\n                    case ImportKeys.Ccds:\r\n                        ccdsId = node.GetString();\r\n                        break;\r\n                    case ImportKeys.CdnaCodingEnd:\r\n                        compDnaCodingEnd = node.GetInt32();\r\n                        break;\r\n                    case ImportKeys.CdnaCodingStart:\r\n                        compDnaCodingStart = node.GetInt32();\r\n                        break;\r\n                    case ImportKeys.End:\r\n                        end = node.GetInt32();\r\n                        break;\r\n                    case ImportKeys.GeneHgncId:\r\n                        hgncId = node.GetHgncId();\r\n                        break;\r\n                    case ImportKeys.GeneSymbol:\r\n                    case ImportKeys.GeneHgnc: // older key\r\n                        geneSymbol = node.GetString();\r\n                        break;\r\n                    case ImportKeys.GeneSymbolSource:\r\n                        geneSymbolSource = GeneSymbolSourceHelper.GetGeneSymbolSource(node.GetString());\r\n                        break;\r\n                    case ImportKeys.Gene:\r\n                        (geneStart, geneEnd, geneId, geneOnReverseStrand) = ImportGene.Parse(node);\r\n                        break;\r\n                    case ImportKeys.IsCanonical:\r\n                        isCanonical = node.GetBool();\r\n                        break;\r\n                    case ImportKeys.Refseq:\r\n                        refSeqId = node.GetString();\r\n                        break;\r\n                    case ImportKeys.StableId:\r\n                        transcriptId = node.GetString();\r\n                        break;\r\n                    case ImportKeys.Start:\r\n                        start = node.GetInt32();\r\n                        break;\r\n                    case ImportKeys.TransExonArray:\r\n                        exons = ImportExon.ParseList(node, chromosome);\r\n                        break;\r\n                    case ImportKeys.Translation:\r\n                        (translationStart, translationEnd, proteinId, proteinVersion, translationStartExon, translationEndExon) = ImportTranslation.Parse(node, chromosome);\r\n                        break;\r\n                    case ImportKeys.VariationEffectFeatureCache:\r\n                        (cdnaMaps, introns, peptideSequence, translateableSequence, siftData, polyphenData, selenocysteinePositions) = ImportVariantEffectFeatureCache.Parse(node);\r\n                        break;\r\n                    case ImportKeys.Version:\r\n                        transcriptVersion = (byte)node.GetInt32();\r\n                        break;\r\n                    default:\r\n                        throw new InvalidDataException($\"Unknown key found: {node.Key}\");\r\n                }\r\n            }\r\n\r\n            var fixedTranscript = AccessionUtilities.GetMaxVersion(transcriptId, transcriptVersion);\r\n            var fixedProtein    = AccessionUtilities.GetMaxVersion(proteinId, proteinVersion);\r\n\r\n            var gene = new MutableGene(chromosome, geneStart, geneEnd, geneOnReverseStrand, geneSymbol,\r\n                geneSymbolSource, geneId, hgncId);\r\n\r\n            var codingRegion = new CodingRegion(GetCodingRegionStart(geneOnReverseStrand, translationStartExon, translationEndExon, translationStart, translationEnd),\r\n                GetCodingRegionEnd(geneOnReverseStrand, translationStartExon, translationEndExon, translationStart, translationEnd), \r\n                compDnaCodingStart, compDnaCodingEnd, 0);\r\n\r\n            int totalExonLength = GetTotalExonLength(exons);\r\n            int startExonPhase  = translationStartExon?.Phase ?? int.MinValue;\r\n\r\n            return new MutableTranscript(chromosome, start, end, fixedTranscript.Id, fixedTranscript.Version, ccdsId,\r\n                refSeqId, bioType, isCanonical, codingRegion, fixedProtein.Id, fixedProtein.Version,\r\n                peptideSequence, source, gene, exons, startExonPhase, totalExonLength, introns, cdnaMaps,\r\n                siftData, polyphenData, translateableSequence, microRnas, cdsStartNotFound, cdsEndNotFound,\r\n                selenocysteinePositions, rnaEdits, bamEditStatus);\r\n        }\r\n\r\n        /// <summary>\r\n        /// returns the start position of the coding region. Returns -1 if no translation was possible.\r\n        /// </summary>\r\n        private static int GetCodingRegionStart(bool onReverseStrand, IInterval startExon, IInterval endExon,\r\n            int translationStart, int translationEnd)\r\n        {\r\n            if (startExon == null || endExon == null) return -1;\r\n            return onReverseStrand\r\n                ? endExon.End - translationEnd + 1\r\n                : startExon.Start + translationStart - 1;\r\n        }\r\n\r\n        /// <summary>\r\n        /// returns the start position of the coding region. Returns -1 if no translation was possible.\r\n        /// </summary>\r\n        private static int GetCodingRegionEnd(bool onReverseStrand, IInterval startExon, IInterval endExon,\r\n            int translationStart, int translationEnd)\r\n        {\r\n            if (startExon == null || endExon == null) return -1;\r\n            return onReverseStrand\r\n                ? startExon.End - translationStart + 1\r\n                : endExon.Start + translationEnd - 1;\r\n        }\r\n\r\n        /// <summary>\r\n        /// returns the sum of the exon lengths\r\n        /// </summary>\r\n        private static int GetTotalExonLength(IEnumerable<MutableExon> exons) => exons.Sum(exon => exon.End - exon.Start + 1);\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/DataDumperImport/Import/ImportTranscriptMapper.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.IO;\nusing CacheUtils.DataDumperImport.DataStructures.Import;\nusing CacheUtils.DataDumperImport.DataStructures.Mutable;\n\nnamespace CacheUtils.DataDumperImport.Import\n{\n    internal static class ImportTranscriptMapper\n    {\n        private static readonly HashSet<string> KnownKeys;\n\n        static ImportTranscriptMapper()\n        {\n            KnownKeys = new HashSet<string>\n            {\n                ImportKeys.CodingDnaCodingEnd,\n                ImportKeys.CodingDnaCodingStart,\n                ImportKeys.ExonCoordinateMapper,\n                ImportKeys.StartPhase\n            };\n        }\n\n        /// <summary>\n        /// parses the relevant data from each transcript mapper\n        /// </summary>\n        public static MutableTranscriptRegion[] Parse(ObjectValueNode objectValue)\n        {\n            MutableTranscriptRegion[] cdnaMaps = null;\n\n            foreach (var node in objectValue.Values)\n            {\n                // sanity check: make sure we know about the keys are used for\n                if (!KnownKeys.Contains(node.Key))\n                {\n                    throw new InvalidDataException($\"Encountered an unknown key in the dumper transcript mapper object: {node.Key}\");\n                }\n\n                switch (node.Key)\n                {\n                    case ImportKeys.CodingDnaCodingEnd:\n                    case ImportKeys.CodingDnaCodingStart:\n                    case ImportKeys.StartPhase:\n                        break;\n                    case ImportKeys.ExonCoordinateMapper:\n                        if (node is ObjectKeyValueNode exonCoordMapperNode)\n                        {\n                            cdnaMaps = ImportMapper.Parse(exonCoordMapperNode.Value);\n                        }\n                        else\n                        {\n                            throw new InvalidDataException($\"Could not transform the AbstractData object into an ObjectKeyValue: [{node.GetType()}]\");\n                        }\n                        break;\n                    default:\n                        throw new InvalidDataException($\"Unknown key found: {node.Key}\");\n                }\n            }\n\n            return cdnaMaps;\n        }\n    }\n}\n"
  },
  {
    "path": "CacheUtils/DataDumperImport/Import/ImportTranslation.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.IO;\nusing CacheUtils.DataDumperImport.DataStructures.Import;\nusing CacheUtils.DataDumperImport.DataStructures.Mutable;\nusing CacheUtils.DataDumperImport.Utilities;\nusing Genome;\n\nnamespace CacheUtils.DataDumperImport.Import\n{\n    internal static class ImportTranslation\n    {\n        private static readonly HashSet<string> KnownKeys;\n\n        static ImportTranslation()\n        {\n            KnownKeys = new HashSet<string>\n            {\n                ImportKeys.Adaptor,\n                ImportKeys.DbId,\n                ImportKeys.EndExon,\n                ImportKeys.End,\n                ImportKeys.Sequence,\n                ImportKeys.StableId,\n                ImportKeys.StartExon,\n                ImportKeys.Start,\n                ImportKeys.Transcript,\n                ImportKeys.Version\n            };\n        }\n\n        /// <summary>\n        /// parses the relevant data from each translation object\n        /// </summary>\n        public static (int Start, int End, string ProteinId, byte ProteinVersion, MutableExon startExon, MutableExon\n            endExon) Parse(IImportNode importNode, Chromosome currentChromosome)\n        {\n            var objectValue = importNode.GetObjectValueNode();\n            if (objectValue == null) throw new InvalidDataException(\"Encountered a translation import node that could not be converted to an object value node.\");\n\n            int start             = -1;\n            int end               = -1;\n            string proteinId      = null;\n            byte proteinVersion   = 0;\n            MutableExon startExon = null;\n            MutableExon endExon   = null;\n\n            foreach (var node in objectValue.Values)\n            {\n                // sanity check: make sure we know about the keys are used for\n                if (!KnownKeys.Contains(node.Key))\n                {\n                    throw new InvalidDataException($\"Encountered an unknown key in the dumper mapper object: {node.Key}\");\n                }\n\n                ObjectKeyValueNode exonNode;\n\n                switch (node.Key)\n                {\n                    case ImportKeys.Adaptor:\n                    case ImportKeys.Sequence:\n                    case ImportKeys.DbId:\n                    case ImportKeys.Transcript:\n                        // skip this key\n                        break;\n                    case ImportKeys.StartExon:\n                        exonNode = node as ObjectKeyValueNode;\n                        if (exonNode != null) startExon = ImportExon.Parse(exonNode.Value, currentChromosome);\n                        break;\n                    case ImportKeys.EndExon:\n                        exonNode = node as ObjectKeyValueNode;\n                        if (exonNode != null) endExon = ImportExon.Parse(exonNode.Value, currentChromosome);\n                        break;\n                    case ImportKeys.StableId:\n                        proteinId = node.GetString();\n                        break;\n                    case ImportKeys.End:\n                        end = node.GetInt32();\n                        break;\n                    case ImportKeys.Start:\n                        start = node.GetInt32();\n                        break;\n                    case ImportKeys.Version:\n                        proteinVersion = (byte)node.GetInt32();\n                        break;\n                    default:\n                        throw new InvalidDataException($\"Unknown key found: {node.Key}\");\n                }\n            }\n\n            return (start, end, proteinId, proteinVersion, startExon, endExon);\n        }\n    }\n}\n"
  },
  {
    "path": "CacheUtils/DataDumperImport/Import/ImportVariantEffectFeatureCache.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing CacheUtils.DataDumperImport.DataStructures.Import;\r\nusing CacheUtils.DataDumperImport.DataStructures.Mutable;\r\nusing CacheUtils.DataDumperImport.Utilities;\r\nusing Intervals;\r\n\r\nnamespace CacheUtils.DataDumperImport.Import\r\n{\r\n    internal static class ImportVariantEffectFeatureCache\r\n    {\r\n        private static readonly HashSet<string> KnownKeys;\r\n\r\n        static ImportVariantEffectFeatureCache()\r\n        {\r\n            KnownKeys = new HashSet<string>\r\n            {\r\n                ImportKeys.CodonTable,\r\n                ImportKeys.FivePrimeUtr,\r\n                ImportKeys.Introns,\r\n                ImportKeys.Mapper,\r\n                ImportKeys.Peptide,\r\n                ImportKeys.ProteinFeatures,\r\n                ImportKeys.ProteinFunctionPredictions,\r\n                ImportKeys.Selenocysteines,\r\n                ImportKeys.SeqEdits,\r\n                ImportKeys.SplicedSequence,\r\n                ImportKeys.SortedExons,\r\n                ImportKeys.ThreePrimeUtr,\r\n                ImportKeys.TranslateableSeq\r\n            };\r\n        }\r\n\r\n        /// <summary>\r\n        /// parses the relevant data from each variant effect feature cache\r\n        /// </summary>\r\n        public static (MutableTranscriptRegion[] CdnaMaps, IInterval[] Introns, string PeptideSequence, string\r\n            TranslateableSequence, string SiftData, string PolyPhenData, int[] SelenocysteinePositions) Parse(IImportNode importNode)\r\n        {\r\n            var objectValue = importNode.GetObjectValueNode();\r\n            if (objectValue == null) throw new InvalidDataException(\"Encountered a variant effect feature cache node that could not be converted to an object value node.\");\r\n\r\n            MutableTranscriptRegion[] cdnaMaps = null;\r\n            IInterval[] introns                = null;\r\n            string peptideSequence             = null;\r\n            string translateableSequence       = null;\r\n            string siftData                    = null;\r\n            string polyphenData                = null;\r\n            int[] selenocysteinePositions      = null;\r\n\r\n            foreach (var node in objectValue.Values)\r\n            {\r\n                // sanity check: make sure we know about the keys are used for\r\n                if (!KnownKeys.Contains(node.Key))\r\n                {\r\n                    throw new InvalidDataException($\"Encountered an unknown key in the dumper variant effect feature cache object: {node.Key}\");\r\n                }\r\n\r\n                switch (node.Key)\r\n                {\r\n                    case ImportKeys.CodonTable:\r\n                    case ImportKeys.FivePrimeUtr:\r\n                    case ImportKeys.ProteinFeatures:\r\n                    case ImportKeys.Selenocysteines:                    \r\n                    case ImportKeys.SortedExons:\r\n                    case ImportKeys.SplicedSequence:\r\n                    case ImportKeys.ThreePrimeUtr:\r\n                        // not used\r\n                        break;\r\n                    case ImportKeys.Introns:\r\n                        introns = node.ParseListObjectKeyValueNode(ImportIntron.ParseList);\r\n                        break;\r\n                    case ImportKeys.Mapper:\r\n                        cdnaMaps = node.ParseObjectKeyValueNode(ImportTranscriptMapper.Parse);\r\n                        break;\r\n                    case ImportKeys.Peptide:\r\n                        peptideSequence = node.GetString();\r\n                        break;\r\n                    case ImportKeys.ProteinFunctionPredictions:\r\n                        if (node is ObjectKeyValueNode predictionsNode)\r\n                        {\r\n                            (siftData, polyphenData) = ImportProteinFunctionPredictions.Parse(predictionsNode.Value);\r\n                        }\r\n                        else\r\n                        {\r\n                            throw new InvalidDataException($\"Could not transform the AbstractData object into an ObjectKeyValue: [{node.GetType()}]\");\r\n                        }\r\n                        break;\r\n                    case ImportKeys.SeqEdits:\r\n                        selenocysteinePositions = node.ParseListObjectKeyValueNode(ImportSeqEdits.Parse);\r\n                        break;\r\n                    case ImportKeys.TranslateableSeq:\r\n                        translateableSequence = node.GetString();\r\n                        break;\r\n                    default:\r\n                        throw new InvalidDataException($\"Unknown key found: {node.Key}\");\r\n                }\r\n            }\r\n\r\n            return (cdnaMaps, introns, peptideSequence, translateableSequence, siftData, polyphenData, selenocysteinePositions);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/DataDumperImport/Utilities/ImportUtilities.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing CacheUtils.DataDumperImport.DataStructures.Import;\r\nusing CacheUtils.DataDumperImport.Import;\r\n\r\nnamespace CacheUtils.DataDumperImport.Utilities\r\n{\r\n    public static class ImportUtilities\r\n    {\r\n        public static string GetPredictionData(this IImportNode node)\r\n        {\r\n            string predictionData = null;\r\n\r\n            if (node is ObjectKeyValueNode predictionNode)\r\n            {\r\n                predictionData = ImportPrediction.Parse(predictionNode.Value);\r\n            }\r\n            else if (!node.IsUndefined())\r\n            {\r\n                throw new InvalidDataException($\"Could not transform the AbstractData object into an ObjectKeyValue: [{node.GetType()}]\");\r\n            }\r\n\r\n            return predictionData;\r\n        }\r\n\r\n        public static T[] ParseObjectKeyValueNode<T>(this IImportNode node, Func<ObjectValueNode, T[]> parseFunc)\r\n        {\r\n            T[] results;\r\n\r\n            if (node is ObjectKeyValueNode keyValueNode)\r\n            {\r\n                results = parseFunc(keyValueNode.Value);\r\n            }\r\n            else\r\n            {\r\n                throw new InvalidDataException($\"Could not transform the AbstractData object into an ObjectKeyValue: [{node.GetType()}]\");\r\n            }\r\n\r\n            return results;\r\n        }\r\n\r\n        public static T[] ParseListObjectKeyValueNode<T>(this IImportNode node, Func<List<IListMember>, T[]> parseFunc)\r\n        {\r\n            T[] results = null;\r\n\r\n            if (node is ListObjectKeyValueNode listObjectKeyValueNode)\r\n            {\r\n                results = parseFunc(listObjectKeyValueNode.Values);\r\n            }\r\n            else if (!node.IsUndefined())\r\n            {\r\n                throw new InvalidDataException($\"Could not transform the AbstractData object into a ListObjectKeyValue: [{node.GetType()}]\");\r\n            }\r\n\r\n            return results;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/DataDumperImport/Utilities/MutableTranscriptComparer.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing CacheUtils.DataDumperImport.DataStructures.Mutable;\r\nusing Intervals;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\n\r\nnamespace CacheUtils.DataDumperImport.Utilities\r\n{\r\n    internal sealed class MutableTranscriptComparer : EqualityComparer<MutableTranscript>\r\n    {\r\n        private static bool GeneEquals(MutableGene x, MutableGene y)\r\n        {\r\n            return x.Chromosome.Index == y.Chromosome.Index &&\r\n                   x.Start            == y.Start            &&\r\n                   x.End              == y.End              &&\r\n                   x.OnReverseStrand  == y.OnReverseStrand  &&\r\n                   x.GeneId           == y.GeneId           &&\r\n                   x.Symbol           == y.Symbol           &&\r\n                   x.HgncId           == y.HgncId           &&\r\n                   x.SymbolSource     == y.SymbolSource;\r\n        }\r\n\r\n        private static bool ExonEquals(MutableExon x, MutableExon y)\r\n        {\r\n            return x.Start == y.Start &&\r\n                   x.End   == y.End   &&\r\n                   x.Phase == y.Phase;\r\n        }\r\n\r\n        private static bool IntervalEquals(IInterval x, IInterval y)\r\n        {\r\n            return x.Start == y.Start &&\r\n                   x.End   == y.End;\r\n        }\r\n\r\n        private static bool TranscriptRegionEquals(ITranscriptRegion x, ITranscriptRegion y)\r\n        {\r\n            return x.Start     == y.Start     &&\r\n                   x.End       == y.End       &&\r\n                   x.CdnaStart == y.CdnaStart &&\r\n                   x.CdnaEnd   == y.CdnaEnd;\r\n        }\r\n\r\n        private static bool CodingRegionEquals(ICodingRegion x, ICodingRegion y)\r\n        {\r\n            return x.Start     == y.Start     &&\r\n                   x.End       == y.End       &&\r\n                   x.CdnaStart == y.CdnaStart &&\r\n                   x.CdnaEnd   == y.CdnaEnd   && \r\n                   x.Length    == y.Length;\r\n        }\r\n\r\n        // ReSharper disable SuggestBaseTypeForParameter\r\n        private static bool ArrayEquals<T>(T[] x, T[] y, Func<T, T, bool> equals)\r\n        // ReSharper restore SuggestBaseTypeForParameter\r\n        {\r\n            if (x == null && y == null) return true;\r\n            if (x == null || y == null) return false;\r\n            if (x.Length != y.Length)   return false;\r\n            // ReSharper disable once LoopCanBeConvertedToQuery\r\n            for (var i = 0; i < x.Length; i++) if (!equals(x[i], y[i])) return false;\r\n            return true;\r\n        }\r\n\r\n        private static bool IntEquals(int x, int y) => x == y;\r\n\r\n        public override bool Equals(MutableTranscript x, MutableTranscript y)\r\n        {\r\n                return x.Chromosome.Index      == y.Chromosome.Index                                &&\r\n                       x.Start                 == y.Start                                           &&\r\n                       x.End                   == y.End                                             &&\r\n                       x.Id                    == y.Id                                              &&\r\n                       x.Version               == y.Version                                         &&\r\n                       x.CcdsId                == y.CcdsId                                          &&\r\n                       x.RefSeqId              == y.RefSeqId                                        &&\r\n                       x.Source                == y.Source                                          &&\r\n                       x.TotalExonLength       == y.TotalExonLength                                 &&\r\n                       x.TranslateableSequence == y.TranslateableSequence                           &&\r\n                       x.CdsStartNotFound      == y.CdsStartNotFound                                &&\r\n                       x.CdsEndNotFound        == y.CdsEndNotFound                                  &&\r\n                       x.StartExonPhase        == y.StartExonPhase                                  &&\r\n                       x.BioType               == y.BioType                                         &&\r\n                       x.IsCanonical           == y.IsCanonical                                     &&\r\n                       x.ProteinId             == y.ProteinId                                       &&\r\n                       x.ProteinVersion        == y.ProteinVersion                                  &&\r\n                       x.PeptideSequence       == y.PeptideSequence                                 &&\r\n                       x.SiftData              == y.SiftData                                        &&\r\n                       x.PolyphenData          == y.PolyphenData                                    &&\r\n                       GeneEquals(x.Gene, y.Gene)                                                   &&\r\n                       ArrayEquals(x.Exons, y.Exons, ExonEquals)                                    &&\r\n                       ArrayEquals(x.Introns, y.Introns, IntervalEquals)                            &&\r\n                       ArrayEquals(x.MicroRnas, y.MicroRnas, IntervalEquals)                        &&\r\n                       ArrayEquals(x.SelenocysteinePositions, y.SelenocysteinePositions, IntEquals) &&\r\n                       ArrayEquals(x.CdnaMaps, y.CdnaMaps, TranscriptRegionEquals)                  &&\r\n                       CodingRegionEquals(x.CodingRegion, y.CodingRegion);\r\n        }\r\n\r\n        public override int GetHashCode(MutableTranscript obj)\r\n        {\r\n            unchecked\r\n            {\r\n                int hashCode = obj.Chromosome.Index.GetHashCode();\r\n                hashCode = (hashCode * 397) ^ obj.Start.GetHashCode();\r\n                hashCode = (hashCode * 397) ^ obj.End.GetHashCode();\r\n                hashCode = (hashCode * 397) ^ obj.Id.GetHashCode();\r\n                hashCode = (hashCode * 397) ^ obj.Version.GetHashCode();\r\n                hashCode = (hashCode * 397) ^ obj.BioType.GetHashCode();\r\n                hashCode = (hashCode * 397) ^ obj.Source.GetHashCode();\r\n                return hashCode;\r\n            }\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/DataDumperImport/Utilities/TranscriptUtilities.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.IO;\nusing CacheUtils.DataDumperImport.DataStructures.Import;\nusing CacheUtils.DataDumperImport.Import;\nusing CacheUtils.Helpers;\nusing VariantAnnotation.Interface.AnnotatedPositions;\n\nnamespace CacheUtils.DataDumperImport.Utilities\n{\n    public static class TranscriptUtilities\n    {\n        private const string CodingDnaMapperUnitTypeKey = \"cdna\";\n        private const string GenomeMapperUnitTypeKey    = \"genome\";\n\n        private static readonly Dictionary<string, MapperUnitType> MapperUnitTypes;\n\n        static TranscriptUtilities()\n        {\n            MapperUnitTypes = new Dictionary<string, MapperUnitType>\n            {\n                [CodingDnaMapperUnitTypeKey] = MapperUnitType.CodingDna,\n                [GenomeMapperUnitTypeKey] = MapperUnitType.Genomic\n            };\n        }\n\n        public static BioType GetBiotype(IImportNode node) => BioTypeHelper.GetBioType(node.GetString());\n\n        public static MapperUnitType GetMapperUnitType(IImportNode node)\n        {\n            string mapperUnitTypeString = node.GetString();\n\n            if (!MapperUnitTypes.TryGetValue(mapperUnitTypeString, out var ret))\n            {\n                throw new InvalidDataException($\"Unable to find the specified mapper unit type ({mapperUnitTypeString}) in the MapperUnitType dictionary.\");\n            }\n\n            return ret;\n        }\n\n        public static ObjectValueNode GetObjectValueNode(this IImportNode node)\n        {\n            if (node is ObjectKeyValueNode objectKeyValueNode) return objectKeyValueNode.Value;\n            return null;\n        }\n\n        public static List<IListMember> GetListMembers(this IImportNode node)\n        {\n            if (node is ListObjectKeyValueNode listObjectKeyValueNode) return listObjectKeyValueNode.Values;\n            return null;\n        }\n\n        public static bool GetStrand(IImportNode node)\n        {\n            int strandNum = node.GetInt32();\n\n            // sanity check: make sure the value is either 1 or -1\n            if (strandNum != -1 && strandNum != 1)\n            {\n                throw new InvalidDataException($\"Expected the strand number to be either -1 or 1. Found: {strandNum}.\");\n            }\n\n            return strandNum == -1;\n        }\n\n        public static int GetHgncId(this IImportNode node)\n        {\n            string hgnc = node.GetString();\n            if (hgnc != null && hgnc.StartsWith(\"HGNC:\")) hgnc = hgnc.Substring(5);\n\n            int hgncId = -1;\n            if (hgnc != null) hgncId = int.Parse(hgnc);\n            return hgncId;\n        }\n    }\n}\n"
  },
  {
    "path": "CacheUtils/GFF/GeneralAttributes.cs",
    "content": "﻿namespace CacheUtils.GFF\r\n{\r\n    public sealed class GeneralAttributes : IGeneralAttributes\r\n    {\r\n        public string GeneId { get; }\r\n        public string GeneSymbol { get; }\r\n        public string TranscriptId { get; }\r\n        public string ProteinId { get; }\r\n        public string BioType { get; }\r\n        public bool IsCanonical { get; }\r\n        public int InternalGeneId { get; }\r\n\r\n        public GeneralAttributes(string geneId, string geneSymbol, string transcriptId, string proteinId,\r\n            string bioType, bool isCanonical, int internalGeneId)\r\n        {\r\n            GeneId         = geneId;\r\n            GeneSymbol     = geneSymbol;\r\n            TranscriptId   = transcriptId;\r\n            ProteinId      = proteinId;\r\n            BioType        = bioType;\r\n            IsCanonical    = isCanonical;\r\n            InternalGeneId = internalGeneId;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/GFF/GffCreator.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing Intervals;\r\nusing VariantAnnotation.AnnotatedPositions.Transcript;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\n\r\nnamespace CacheUtils.GFF\r\n{\r\n    public sealed class GffCreator\r\n    {\r\n        private readonly Dictionary<IGene, int> _geneToInternalId;\r\n        private readonly Source _source;\r\n        private readonly GffWriter _writer;\r\n        private readonly HashSet<int> _observedGenes;\r\n\r\n        public GffCreator(GffWriter writer, Dictionary<IGene, int> geneToInternalId, Source source)\r\n        {\r\n            _writer           = writer;\r\n            _geneToInternalId = geneToInternalId;\r\n            _source           = source;\r\n            _observedGenes    = new HashSet<int>();\r\n        }\r\n\r\n        public void Create(IEnumerable<IntervalArray<ITranscript>> transcriptIntervalArrays)\r\n        {\r\n            Console.Write(\"- writing GFF entries... \");\r\n            foreach (var transcriptArray in transcriptIntervalArrays)\r\n            {\r\n                if (transcriptArray == null) continue;\r\n                foreach (var interval in transcriptArray.Array) Write(interval.Value);\r\n            }\r\n            Console.WriteLine(\"finished.\");\r\n        }\r\n\r\n        private void Write(ITranscript transcript)\r\n        {\r\n            if (transcript.Source != _source) return;\r\n            \r\n            var requiredFields = GetRequiredFields(transcript);\r\n            var attribs        = GetGeneralAttributes(transcript);\r\n\r\n            WriteGene(transcript.Gene, requiredFields, attribs.GeneId, attribs.InternalGeneId);\r\n            WriteTranscript(transcript, requiredFields, attribs);\r\n\r\n            var exons        = transcript.TranscriptRegions.GetExons();\r\n            var codingRegion = transcript.Translation?.CodingRegion;\r\n\r\n            foreach (var exon in exons) WriteExon(exon, requiredFields, attribs, codingRegion);\r\n        }\r\n\r\n        private void WriteTranscript(IInterval interval, IRequiredFields requiredFields, IGeneralAttributes attribs) =>\r\n            _writer.WriteTranscript(interval, requiredFields, attribs);\r\n\r\n        private void WriteGene(IGene gene, IRequiredFields requiredFields, string geneId, int internalGeneId)\r\n        {\r\n            if (_observedGenes.Contains(internalGeneId)) return;\r\n\r\n            _observedGenes.Add(internalGeneId);\r\n            var gffGene = GetGene(gene, geneId);\r\n            _writer.WriteGene(gffGene, requiredFields, internalGeneId);\r\n        }\r\n\r\n        private void WriteExon(ITranscriptRegion exon, IRequiredFields requiredFields, IGeneralAttributes attribs,\r\n            IInterval codingRegion)\r\n        {\r\n            _writer.WriteExonicRegion(exon, requiredFields, attribs, exon.Id, \"exon\");\r\n            WriteCds(codingRegion, exon, requiredFields, attribs);\r\n            WriteUtr(codingRegion, exon, requiredFields, attribs);\r\n        }\r\n\r\n        private void WriteUtr(IInterval codingRegion, ITranscriptRegion exon, IRequiredFields requiredFields,\r\n            IGeneralAttributes attribs)\r\n        {\r\n            if (!GffUtilities.HasUtr(codingRegion, exon)) return;\r\n            if (exon.Start < codingRegion.Start) Write5PrimeUtr(codingRegion, exon, requiredFields, attribs);\r\n            if (exon.End > codingRegion.End) Write3PrimeUtr(codingRegion, exon, requiredFields, attribs);\r\n        }\r\n\r\n        private void Write5PrimeUtr(IInterval codingRegion, ITranscriptRegion exon, IRequiredFields requiredFields,\r\n            IGeneralAttributes attribs)\r\n        {\r\n            int utrEnd = codingRegion.Start - 1;\r\n            if (utrEnd > exon.End) utrEnd = exon.End;\r\n            _writer.WriteExonicRegion(new Interval(exon.Start, utrEnd), requiredFields, attribs, exon.Id, \"UTR\");\r\n        }\r\n\r\n        private void Write3PrimeUtr(IInterval codingRegion, ITranscriptRegion exon, IRequiredFields requiredFields,\r\n            IGeneralAttributes attribs)\r\n        {\r\n            int utrStart = codingRegion.End + 1;\r\n            if (utrStart < exon.Start) utrStart = exon.Start;\r\n            _writer.WriteExonicRegion(new Interval(utrStart, exon.End), requiredFields, attribs, exon.Id, \"UTR\");\r\n        }\r\n\r\n        private void WriteCds(IInterval codingRegion, ITranscriptRegion exon, IRequiredFields requiredFields, IGeneralAttributes attribs)\r\n        {\r\n            if (!GffUtilities.HasCds(codingRegion, exon)) return;\r\n            var cds = GffUtilities.GetCdsCoordinates(codingRegion, exon);\r\n            _writer.WriteExonicRegion(cds, requiredFields, attribs, exon.Id, \"CDS\");\r\n        }\r\n\r\n        private static IGffGene GetGene(IGene gene, string id) => new GffGene(gene.Start, gene.End, id,\r\n            gene.EntrezGeneId.WithVersion, gene.EnsemblId.WithVersion, gene.Symbol);\r\n\r\n        private static IRequiredFields GetRequiredFields(ITranscript transcript)\r\n        {\r\n            string source = transcript.Source.ToString();\r\n            return new RequiredFields(transcript.Chromosome.UcscName, source, transcript.Gene.OnReverseStrand);\r\n        }\r\n\r\n        private IGeneralAttributes GetGeneralAttributes(ITranscript transcript)\r\n        {\r\n            string bioType     = AnnotatedTranscript.GetBioType(transcript.BioType);\r\n            int internalGeneId = _geneToInternalId[transcript.Gene];\r\n            string geneId      = transcript.Source == Source.Ensembl\r\n                ? transcript.Gene.EnsemblId.WithVersion\r\n                : transcript.Gene.EntrezGeneId.WithVersion;\r\n\r\n            return new GeneralAttributes(geneId, transcript.Gene.Symbol, transcript.Id.WithVersion,\r\n                transcript.Translation?.ProteinId?.WithVersion, bioType, transcript.IsCanonical, internalGeneId);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/GFF/GffGene.cs",
    "content": "﻿namespace CacheUtils.GFF\r\n{\r\n    public sealed class GffGene : IGffGene\r\n    {\r\n        public int Start { get; }\r\n        public int End { get; }\r\n        public string Id { get; }\r\n        public string EntrezGeneId { get; }\r\n        public string EnsemblGeneId { get; }\r\n        public string Symbol { get; }\r\n\r\n        public GffGene(int start, int end, string id, string entrezGeneId, string ensemblGeneId, string symbol)\r\n        {\r\n            Start         = start;\r\n            End           = end;\r\n            Id            = id;\r\n            EntrezGeneId  = entrezGeneId;\r\n            EnsemblGeneId = ensemblGeneId;\r\n            Symbol        = symbol;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/GFF/GffUtilities.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.Linq;\r\nusing CacheUtils.Genes.Utilities;\r\nusing Intervals;\r\nusing VariantAnnotation.Caches.DataStructures;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\n\r\nnamespace CacheUtils.GFF\r\n{\r\n    public static class GffUtilities\r\n    {\r\n        public static bool HasCds(IInterval codingRegion, IInterval exon)\r\n        {\r\n            if (codingRegion == null || codingRegion.Start == -1 || codingRegion.End == -1) return false;\r\n            return exon.Overlaps(codingRegion);\r\n        }\r\n\r\n        public static IInterval GetCdsCoordinates(IInterval codingRegion, ITranscriptRegion exon)\r\n        {\r\n            int start = exon.Start;\r\n            int end   = exon.End;\r\n\r\n            if (start < codingRegion.Start) start = codingRegion.Start;\r\n            if (end   > codingRegion.End)   end   = codingRegion.End;\r\n\r\n            return new Interval(start, end);\r\n        }\r\n\r\n        public static bool HasUtr(IInterval codingRegion, IInterval exon)\r\n        {\r\n            if (codingRegion == null || codingRegion.Start == -1 || codingRegion.End == -1) return false;\r\n            return exon.Start < codingRegion.Start || exon.End > codingRegion.End;\r\n        }\r\n\r\n        public static IEnumerable<ITranscriptRegion> GetExons(this ITranscriptRegion[] regions) =>\r\n            regions.FilterNonExons().Merge().OrderBy(x => x.Start).ThenBy(x => x.End);\r\n\r\n        private static ITranscriptRegion[] FilterNonExons(this IEnumerable<ITranscriptRegion> regions) =>\r\n            regions.Where(region => region.Type == TranscriptRegionType.Exon).ToArray();\r\n\r\n        private static IEnumerable<ITranscriptRegion> Merge(this IReadOnlyCollection<ITranscriptRegion> exons)\r\n        {\r\n            if (exons.Count == 1) return exons;\r\n\r\n            var mergedExons = new List<ITranscriptRegion>();\r\n            var exonsById   = exons.GetMultiValueDict(x => x.Id);\r\n\r\n            foreach (var kvp in exonsById)\r\n            {\r\n                mergedExons.Add(MergeTranscriptRegions(kvp.Key, kvp.Value));\r\n            }\r\n\r\n            return mergedExons;\r\n        }\r\n\r\n        private static ITranscriptRegion MergeTranscriptRegions(ushort exonId, IReadOnlyList<ITranscriptRegion> regions)\r\n        {\r\n            if (regions.Count == 1) return regions[0];\r\n\r\n            int lastIndex = regions.Count - 1;\r\n\r\n            int start     = regions[0].Start;\r\n            int end       = regions[lastIndex].End;\r\n            int cdnaStart = Math.Min(regions[0].CdnaStart, regions[lastIndex].CdnaStart);\r\n            int cdnaEnd   = Math.Max(regions[0].CdnaEnd, regions[lastIndex].CdnaEnd);\r\n\r\n            return new TranscriptRegion(TranscriptRegionType.Exon, exonId, start, end, cdnaStart, cdnaEnd);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/GFF/GffWriter.cs",
    "content": "﻿using System;\r\nusing System.IO;\r\nusing Intervals;\r\n\r\nnamespace CacheUtils.GFF\r\n{\r\n    public sealed class GffWriter : IDisposable\r\n    {\r\n        private readonly StreamWriter _writer;\r\n\r\n        public GffWriter(StreamWriter writer) => _writer = writer;\r\n\r\n        public void Dispose() => _writer.Dispose();\r\n\r\n        private void WriteRequiredFields(IInterval interval, IRequiredFields fields, string feature)\r\n        {\r\n            char strand = fields.OnReverseStrand ? '-' : '+';\r\n            _writer.Write($\"{fields.UcscName}\\t{fields.Source}\\t{feature}\\t{interval.Start}\\t{interval.End}\\t.\\t{strand}\\t.\\t\");\r\n        }\r\n\r\n        private static bool NotEmpty(string s) => !string.IsNullOrEmpty(s);\r\n\r\n        private void WriteGeneralAttributes(IGeneralAttributes attribs)\r\n        {\r\n            if (NotEmpty(attribs.GeneId))       _writer.Write($\"gene_id \\\"{attribs.GeneId}\\\"; \");\r\n            if (NotEmpty(attribs.GeneSymbol))   _writer.Write($\"gene_name \\\"{attribs.GeneSymbol}\\\"; \");\r\n            if (NotEmpty(attribs.TranscriptId)) _writer.Write($\"transcript_id \\\"{attribs.TranscriptId}\\\"; \");\r\n\r\n            _writer.Write($\"transcript_type \\\"{attribs.BioType}\\\"; \");\r\n            if (attribs.IsCanonical) _writer.Write(\"tag \\\"canonical\\\"; \");\r\n\r\n            if (NotEmpty(attribs.ProteinId)) _writer.Write($\"protein_id \\\"{attribs.ProteinId}\\\"; \");\r\n        }\r\n\r\n        public void WriteGene(IGffGene gene, IRequiredFields requiredFields, int internalGeneId)\r\n        {\r\n            WriteRequiredFields(gene, requiredFields, \"gene\");\r\n            if (!string.IsNullOrEmpty(gene.Id)) _writer.Write($\"gene_id \\\"{gene.Id}\\\"; \");\r\n            if (!string.IsNullOrEmpty(gene.EntrezGeneId)) _writer.Write($\"entrez_gene_id \\\"{gene.EntrezGeneId}\\\"; \");\r\n            if (!string.IsNullOrEmpty(gene.EnsemblGeneId)) _writer.Write($\"ensembl_gene_id \\\"{gene.EnsemblGeneId}\\\"; \");\r\n            if (!string.IsNullOrEmpty(gene.Symbol)) _writer.Write($\"gene_name \\\"{gene.Symbol}\\\"; \");\r\n            WriteInternalGeneId(internalGeneId);\r\n        }\r\n\r\n        private void WriteInternalGeneId(int geneId) => _writer.WriteLine($\"internal_gene_id \\\"{geneId}\\\"; \");\r\n\r\n        public void WriteTranscript(IInterval interval, IRequiredFields requiredFields, IGeneralAttributes attribs)\r\n        {\r\n            WriteRequiredFields(interval, requiredFields, \"transcript\");\r\n            WriteGeneralAttributes(attribs);\r\n            WriteInternalGeneId(attribs.InternalGeneId);\r\n        }\r\n\r\n        public void WriteExonicRegion(IInterval interval, IRequiredFields requiredFields, IGeneralAttributes attribs,\r\n            ushort exonNumber, string feature)\r\n        {\r\n            WriteRequiredFields(interval, requiredFields, feature);\r\n            WriteGeneralAttributes(attribs);\r\n            _writer.Write($\"exon_number {exonNumber}; \");\r\n            WriteInternalGeneId(attribs.InternalGeneId);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/GFF/IGeneralAttributes.cs",
    "content": "﻿namespace CacheUtils.GFF\r\n{\r\n    public interface IGeneralAttributes\r\n    {\r\n        string GeneId { get; }\r\n        string GeneSymbol { get; }\r\n        string TranscriptId { get; }\r\n        string ProteinId { get; }\r\n        string BioType { get; }\r\n        bool IsCanonical { get; }\r\n        int InternalGeneId { get; }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/GFF/IGffGene.cs",
    "content": "﻿using Intervals;\r\n\r\nnamespace CacheUtils.GFF\r\n{\r\n    public interface IGffGene : IInterval\r\n    {\r\n        string Id { get; }\r\n        string EntrezGeneId { get; }\r\n        string EnsemblGeneId { get; }\r\n        string Symbol { get; }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/GFF/IRequiredFields.cs",
    "content": "﻿namespace CacheUtils.GFF\r\n{\r\n    public interface IRequiredFields\r\n    {\r\n        string UcscName { get; }\r\n        string Source { get; }\r\n        bool OnReverseStrand { get; }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/GFF/RequiredFields.cs",
    "content": "﻿namespace CacheUtils.GFF\r\n{\r\n    public sealed class RequiredFields : IRequiredFields\r\n    {\r\n        public string UcscName { get; }\r\n        public string Source { get; }\r\n        public bool OnReverseStrand { get; }\r\n\r\n        public RequiredFields(string ucscName, string source, bool onReverseStrand)\r\n        {\r\n            UcscName        = ucscName;\r\n            Source          = source;\r\n            OnReverseStrand = onReverseStrand;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Genbank/GenbankEntry.cs",
    "content": "﻿using Intervals;\r\n\r\nnamespace CacheUtils.Genbank\r\n{\r\n    public sealed class GenbankEntry\r\n    {\r\n        public readonly string TranscriptId;\r\n        public readonly byte TranscriptVersion;\r\n        public readonly string ProteinId;\r\n        public readonly byte ProteinVersion;\r\n        public readonly string GeneId;\r\n        public readonly string Symbol;\r\n        public readonly IInterval CodingRegion;\r\n        public readonly IInterval[] Exons;\r\n\r\n        public GenbankEntry(string transcriptId, byte transcriptVersion, string proteinId, byte proteinVersion,\r\n            string geneId, string symbol, IInterval codingRegion, IInterval[] exons)\r\n        {\r\n            TranscriptId      = transcriptId;\r\n            TranscriptVersion = transcriptVersion;\r\n            ProteinId         = proteinId;\r\n            ProteinVersion    = proteinVersion;\r\n            GeneId            = geneId;\r\n            Symbol            = symbol;\r\n            CodingRegion      = codingRegion;\r\n            Exons             = exons;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Genbank/GenbankReader.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing Intervals;\r\nusing OptimizedCore;\r\nusing VariantAnnotation.Utilities;\r\n\r\nnamespace CacheUtils.Genbank\r\n{\r\n    public sealed class GenbankReader : IDisposable\r\n    {\r\n        private readonly StreamReader _reader;\r\n\r\n        // ftp://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/mRNA_Prot/human.*.rna.gbff.gz\r\n\r\n        private const string LocusTag      = \"LOCUS\";\r\n        private const string FeaturesTag   = \"FEATURES\";\r\n        private const string OriginTag     = \"ORIGIN\";\r\n        private const string TerminatorTag = \"//\";\r\n\r\n        private const string GeneFeatureTag = \"gene\";\r\n        private const string CdsFeatureTag  = \"CDS\";\r\n        private const string ExonFeatureTag = \"exon\";\r\n\r\n        private const string ProteinIdTag  = \"/protein_id=\";\r\n        private const string GeneIdTag     = \"/db_xref=\\\"GeneID:\";\r\n        private const string GeneSymbolTag = \"/gene=\";\r\n\r\n        private const int FeatureColumnLength = 21;\r\n\r\n        public GenbankReader(StreamReader reader) => _reader = reader;\r\n\r\n        public GenbankEntry GetGenbankEntry()\r\n        {\r\n            // assert that the record starts with LOCUS\r\n            if (!HasLocus()) return null;\r\n\r\n            (string transcriptId, byte transcriptVersion) = ParseHeader();\r\n            var featureData = ParseFeatures();\r\n            ParseOrigin();\r\n\r\n            var exons = featureData.Exons.Count == 0 ? null : featureData.Exons.ToArray();\r\n\r\n            return transcriptId == null\r\n                ? null\r\n                : new GenbankEntry(transcriptId, transcriptVersion, featureData.ProteinId, featureData.ProteinVersion,\r\n                    featureData.GeneId, featureData.GeneSymbol, featureData.CodingRegion, exons);\r\n        }\r\n\r\n        private void ParseOrigin()\r\n        {\r\n            string line;\r\n            do\r\n            {\r\n                line = GetNextLine();\r\n            } while (line != null);\r\n        }\r\n\r\n        private string GetNextLine()\r\n        {\r\n            string line = _reader.ReadLine();\r\n            if (line == null || line.StartsWith(TerminatorTag)) return null;\r\n            return line;\r\n        }\r\n\r\n        private FeatureData ParseFeatures()\r\n        {\r\n            var featureState = FeaturesState.Unknown;\r\n            var featureData = new FeatureData();\r\n\r\n            while (true)\r\n            {\r\n                string line = GetNextLine();\r\n                if (line == null || line.StartsWith(OriginTag)) break;\r\n\r\n                bool isNewState;\r\n                (featureState, isNewState) = GetFeatureState(line, featureState);\r\n                string info = line.Substring(FeatureColumnLength);\r\n\r\n                // ReSharper disable once SwitchStatementMissingSomeCases\r\n                switch (featureState)\r\n                {\r\n                    case FeaturesState.Gene:\r\n                        ParseGeneFeature(info, featureData);\r\n                        break;\r\n                    case FeaturesState.Cds:\r\n                        ParseCdsFeature(isNewState, featureData, info);\r\n                        break;\r\n                    case FeaturesState.Exon:\r\n                        ParseExonFeature(isNewState, featureData, info);\r\n                        break;\r\n                }\r\n            }\r\n\r\n            return featureData;\r\n        }\r\n\r\n        private static void ParseExonFeature(bool isNewState, FeatureData featureData, string info)\r\n        {\r\n            if (isNewState) featureData.Exons.Add(GetInterval(info));\r\n        }\r\n\r\n        private static void ParseCdsFeature(bool isNewState, FeatureData featureData, string info)\r\n        {\r\n            if (isNewState) featureData.CodingRegion = GetInterval(info);\r\n            if (info.StartsWith(ProteinIdTag)) ParseProteinId(featureData, info);\r\n        }\r\n\r\n        private static void ParseGeneFeature(string info, FeatureData featureData)\r\n        {\r\n            if (info.StartsWith(GeneIdTag)) featureData.GeneId = ParseGeneId(info);\r\n            if (info.StartsWith(GeneSymbolTag)) featureData.GeneSymbol = ParseGeneSymbol(info);\r\n        }\r\n\r\n        private (string TranscriptId, byte TranscriptVersion) ParseHeader()\r\n        {\r\n            const string versionTag = \"VERSION\";\r\n            string transcriptId     = null;\r\n            byte transcriptVersion  = 0;\r\n\r\n            while (true)\r\n            {\r\n                string line = GetNextLine();\r\n                if (line == null || line.StartsWith(FeaturesTag)) break;\r\n                if (line.StartsWith(versionTag)) (transcriptId, transcriptVersion) = ParseVersion(line);\r\n            }\r\n\r\n            return (transcriptId, transcriptVersion);\r\n        }\r\n\r\n        private static string ParseGeneSymbol(string info) => info.Substring(GeneSymbolTag.Length).Trim('\"');\r\n        private static string ParseGeneId(string info)     => info.Substring(GeneIdTag.Length).Trim('\"');\r\n\r\n        private static void ParseProteinId(FeatureData featureData, string info)\r\n        {\r\n            string rawId = info.Substring(ProteinIdTag.Length).Trim('\"');\r\n            (featureData.ProteinId, featureData.ProteinVersion) = FormatUtilities.SplitVersion(rawId);\r\n        }\r\n\r\n        private static IInterval GetInterval(string info)\r\n        {\r\n            if (info.StartsWith(\"join\")) return GetJoinInterval(info);\r\n\r\n            var coordinates = info.Split(\"..\");\r\n            if (coordinates.Length != 2) throw new InvalidDataException(\"Expected two coordinates in the exon feature line.\");\r\n\r\n            int start = int.Parse(coordinates[0].TrimStart('<'));\r\n            int end   = int.Parse(coordinates[1].TrimStart('>'));\r\n            return new Interval(start, end);\r\n        }\r\n\r\n        private static IInterval GetJoinInterval(string info)\r\n        {\r\n            var cols  = info.Substring(5, info.Length - 6).OptimizedSplit(',');\r\n            int start = int.Parse(cols[0].Split(\"..\")[0]);\r\n            int end   = int.Parse(cols[1].Split(\"..\")[1]);\r\n            return new Interval(start, end);\r\n        }\r\n\r\n        private static (FeaturesState State, bool IsNewState) GetFeatureState(string line, FeaturesState featureState)\r\n        {\r\n            string label = line.Substring(0, FeatureColumnLength).Trim();\r\n            if (string.IsNullOrEmpty(label)) return (featureState, false);\r\n\r\n            if (label.StartsWith(GeneFeatureTag)) return (FeaturesState.Gene, true);\r\n            if (label.StartsWith(ExonFeatureTag)) return (FeaturesState.Exon, true);\r\n            return label.StartsWith(CdsFeatureTag) ? (FeaturesState.Cds, true) : (FeaturesState.Unknown, true);\r\n        }\r\n\r\n        private bool HasLocus()\r\n        {\r\n            string line = _reader.ReadLine();\r\n            return line != null && line.StartsWith(LocusTag);\r\n        }\r\n\r\n        private static (string TranscriptId, byte TranscriptVersion) ParseVersion(string line)\r\n        {\r\n            string accession = line.Substring(12).Trim();\r\n            return FormatUtilities.SplitVersion(accession);\r\n        }\r\n\r\n        public void Dispose() => _reader.Dispose();\r\n\r\n        private sealed class FeatureData\r\n        {\r\n            public string ProteinId;\r\n            public byte ProteinVersion;\r\n            public string GeneId;\r\n            public string GeneSymbol;\r\n            public IInterval CodingRegion;\r\n            public readonly List<IInterval> Exons = new List<IInterval>();\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Genbank/GenbankState.cs",
    "content": "﻿namespace CacheUtils.Genbank\r\n{\r\n    internal enum FeaturesState : byte\r\n    {\r\n        Unknown,\r\n        Cds,\r\n        Exon,\r\n        Gene\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Genes/Combiners/CombinerUtils.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing CacheUtils.Genes.DataStructures;\r\n\r\nnamespace CacheUtils.Genes.Combiners\r\n{\r\n    public static class CombinerUtils\r\n    {\r\n        public static UgaGene Merge(UgaGene gene37, UgaGene gene38)\r\n        {\r\n            string ensemblId    = CombineField(gene37.EnsemblId, gene38.EnsemblId);\r\n            string entrezGeneId = CombineField(gene37.EntrezGeneId, gene38.EntrezGeneId);\r\n            int hgncId          = CombineField(gene37.HgncId, gene38.HgncId);\r\n            return new UgaGene(gene37.Chromosome, gene37.GRCh37, gene38.GRCh38, gene37.OnReverseStrand, entrezGeneId,\r\n                ensemblId, gene37.Symbol, hgncId);\r\n        }\r\n\r\n        private static T CombineField<T>(T grch37, T grch38)\r\n        {\r\n            if (grch37 == null) return grch38;\r\n            if (grch38 == null) return grch37;\r\n            if (!grch37.Equals(grch38)) throw new InvalidDataException($\"Found two different values: {grch37} & {grch38}\");\r\n            return grch37;\r\n        }\r\n\r\n        internal static void RemoveGenes(IEnumerable<UgaGene> genes, ICollection<UgaGene> remainingGenes)\r\n        {\r\n            foreach (var gene in genes) remainingGenes.Remove(gene);\r\n        }\r\n\r\n        internal static void AddOrphans(ICollection<UgaGene> combinedGenes, IEnumerable<UgaGene> genes)\r\n        {\r\n            foreach (var gene in genes) combinedGenes.Add(gene);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Genes/Combiners/HgncIdCombiner.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing CacheUtils.Genes.DataStructures;\r\nusing CacheUtils.Genes.Utilities;\r\n\r\nnamespace CacheUtils.Genes.Combiners\r\n{\r\n    public sealed class HgncIdCombiner : ICombiner\r\n    {\r\n        public void Combine(List<UgaGene> combinedGenes, HashSet<UgaGene> remainingGenes37,\r\n            HashSet<UgaGene> remainingGenes38)\r\n        {\r\n            var hgncIds       = GetHgncIds(remainingGenes37, remainingGenes38);\r\n            var genesByHgnc37 = remainingGenes37.GetMultiValueDict(x => x.HgncId);\r\n            var genesByHgnc38 = remainingGenes38.GetMultiValueDict(x => x.HgncId);\r\n\r\n            foreach (int hgncId in hgncIds)\r\n            {\r\n                var genes37 = GetGenesByHgncId(genesByHgnc37, hgncId);\r\n                var genes38 = GetGenesByHgncId(genesByHgnc38, hgncId);\r\n\r\n                CombinerUtils.RemoveGenes(genes37, remainingGenes37);\r\n                CombinerUtils.RemoveGenes(genes38, remainingGenes38);\r\n\r\n                // merge if we have one gene on each genome assembly and they're on the same strand\r\n                if (genes37.Count == 1 && genes38.Count == 1)\r\n                {\r\n                    var gene37 = genes37[0];\r\n                    var gene38 = genes38[0];\r\n\r\n                    if (gene37.OnReverseStrand == gene38.OnReverseStrand)\r\n                    {\r\n                        var mergedGene = CombinerUtils.Merge(gene37, gene38);\r\n                        combinedGenes.Add(mergedGene);\r\n                        continue;\r\n                    }\r\n                }\r\n\r\n                // the following situations happen if we have:\r\n                // - one gene from GRCh37 and none from GRCh38 (or vice versa)\r\n                // - there is a mixture of genes forward and reverse strands (13 occurrences)\r\n                CombinerUtils.AddOrphans(combinedGenes, genes37);\r\n                CombinerUtils.AddOrphans(combinedGenes, genes38);\r\n            }\r\n        }\r\n\r\n        private static List<UgaGene> GetGenesByHgncId(IReadOnlyDictionary<int, List<UgaGene>> genesByHgnc, int hgncId) =>\r\n            genesByHgnc.TryGetValue(hgncId, out var genes) ? genes : UgaAssemblyCombiner.EmptyUgaGenes;\r\n\r\n        private static IEnumerable<int> GetHgncIds(IEnumerable<UgaGene> remainingUga37, IEnumerable<UgaGene> remainingUga38)\r\n        {\r\n            var hgncIds = new HashSet<int>();\r\n            foreach (var gene in remainingUga37) if (gene.HgncId != -1) hgncIds.Add(gene.HgncId);\r\n            foreach (var gene in remainingUga38) if (gene.HgncId != -1) hgncIds.Add(gene.HgncId);\r\n            return hgncIds;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Genes/Combiners/ICombiner.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing CacheUtils.Genes.DataStructures;\r\n\r\nnamespace CacheUtils.Genes.Combiners\r\n{\r\n    public interface ICombiner\r\n    {\r\n        void Combine(List<UgaGene> combinedGenes, HashSet<UgaGene> remainingGenes37, HashSet<UgaGene> remainingGenes38);\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Genes/Combiners/PartitionCombiner.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing CacheUtils.Genes.DataStructures;\r\nusing CacheUtils.Genes.Utilities;\r\n\r\nnamespace CacheUtils.Genes.Combiners\r\n{\r\n    public sealed class PartitionCombiner : ICombiner\r\n    {\r\n        public void Combine(List<UgaGene> combinedGenes, HashSet<UgaGene> remainingGenes37,\r\n            HashSet<UgaGene> remainingGenes38)\r\n        {\r\n            var grch37 = Partition(remainingGenes37);\r\n            var grch38 = Partition(remainingGenes38);\r\n\r\n            CombineSet(combinedGenes, grch37.Both, grch38.Both, remainingGenes37, remainingGenes38);\r\n            CombineSet(combinedGenes, grch37.EntrezGeneOnly, grch38.EntrezGeneOnly, remainingGenes37, remainingGenes38);\r\n            CombineSet(combinedGenes, grch37.EnsemblOnly, grch38.EnsemblOnly, remainingGenes37, remainingGenes38);\r\n        }\r\n\r\n        private static void CombineSet(ICollection<UgaGene> combinedGenes, IEnumerable<UgaGene> uga37,\r\n            IEnumerable<UgaGene> uga38, ICollection<UgaGene> remainingGenes37, ICollection<UgaGene> remainingGenes38)\r\n        {\r\n            var keyToGene37 = uga37.GetMultiValueDict(GetKey);\r\n            var keyToGene38 = uga38.GetMultiValueDict(GetKey);\r\n            var keys        = GetAllKeys(keyToGene37.Keys, keyToGene38.Keys);\r\n\r\n            foreach (string key in keys)\r\n            {\r\n                var genes37 = GetGenesByKey(keyToGene37, key);\r\n                var genes38 = GetGenesByKey(keyToGene38, key);\r\n\r\n                CombinerUtils.RemoveGenes(genes37, remainingGenes37);\r\n                CombinerUtils.RemoveGenes(genes38, remainingGenes38);\r\n\r\n                // this happens for both Entrez Gene Only & Ensembl Only\r\n                if (genes37.Count == 1 && genes38.Count == 1)\r\n                {\r\n                    var gene37 = genes37[0];\r\n                    var gene38 = genes38[0];\r\n\r\n                    var mergedGene = CombinerUtils.Merge(gene37, gene38);\r\n                    combinedGenes.Add(mergedGene);\r\n                    continue;\r\n                }\r\n\r\n                // the following situations happen if we have:\r\n                // - one gene from GRCh37 and none from GRCh38 (or vice versa)\r\n                // - two or more non-overlapping genes on the same assembly (14 occurrences)\r\n                CombinerUtils.AddOrphans(combinedGenes, genes37);\r\n                CombinerUtils.AddOrphans(combinedGenes, genes38);\r\n            }\r\n        }\r\n\r\n        private static List<UgaGene> GetGenesByKey(IReadOnlyDictionary<string, List<UgaGene>> genesByKey, string key) =>\r\n            genesByKey.TryGetValue(key, out var genes) ? genes : UgaAssemblyCombiner.EmptyUgaGenes;\r\n\r\n        private static IEnumerable<string> GetAllKeys(IEnumerable<string> keys37, IEnumerable<string> keys38)\r\n        {\r\n            var keys = new HashSet<string>();\r\n            foreach (string key in keys37) keys.Add(key);\r\n            foreach (string key in keys38) keys.Add(key);\r\n            return keys;\r\n        }\r\n\r\n        private static string GetKey(UgaGene gene) =>\r\n            gene.EnsemblId + '|' + gene.EntrezGeneId + '|' + (gene.OnReverseStrand ? \"R\" : \"F\");\r\n\r\n        private static (List<UgaGene> EnsemblOnly, List<UgaGene> Both, List<UgaGene> EntrezGeneOnly) Partition(\r\n            IEnumerable<UgaGene> remainingGenes)\r\n        {\r\n            var ensemblOnly    = new List<UgaGene>();\r\n            var both           = new List<UgaGene>();\r\n            var entrezGeneOnly = new List<UgaGene>();\r\n\r\n            foreach (var gene in remainingGenes)\r\n            {\r\n                if (gene.EntrezGeneId != null && gene.EnsemblId != null) both.Add(gene);\r\n                else if (gene.EntrezGeneId != null) entrezGeneOnly.Add(gene);\r\n                else ensemblOnly.Add(gene);\r\n            }\r\n\r\n            return (ensemblOnly, both, entrezGeneOnly);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Genes/DataStores/AssemblyDataStore.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing CacheUtils.Commands.Download;\r\nusing CacheUtils.Commands.UniversalGeneArchive;\r\nusing Genome;\r\nusing IO;\r\nusing ReferenceSequence.Utilities;\r\n\r\nnamespace CacheUtils.Genes.DataStores\r\n{\r\n    public sealed class AssemblyDataStore\r\n    {\r\n        private readonly string _description;\r\n        public readonly EnsemblGtf EnsemblGtf;\r\n        public readonly RefSeqGff RefSeqGff;\r\n        private readonly GlobalCache _globalCache;\r\n\r\n        private AssemblyDataStore(string description, EnsemblGtf ensemblGtf, RefSeqGff refSeqGff, GlobalCache globalCache)\r\n        {\r\n            _description = description;\r\n            EnsemblGtf   = ensemblGtf;\r\n            RefSeqGff    = refSeqGff;\r\n            _globalCache = globalCache;\r\n        }\r\n\r\n        public static AssemblyDataStore Create(string description, FilePaths.AssemblySpecificPaths paths,\r\n            Dictionary<string, Chromosome> refNameToChromosome, bool useGrch37)\r\n        {\r\n            string ensemblGtfPath      = useGrch37 ? ExternalFiles.EnsemblGtfFile37.FilePath      : ExternalFiles.EnsemblGtfFile38.FilePath;\r\n            string refseqGffPath       = useGrch37 ? ExternalFiles.RefSeqGffFile37.FilePath       : ExternalFiles.RefSeqGffFile38.FilePath;\r\n            string refseqGenomeGffPath = useGrch37 ? ExternalFiles.RefSeqGenomeGffFile37.FilePath : ExternalFiles.RefSeqGenomeGffFile38.FilePath;\r\n\r\n            var ensemblGtf = EnsemblGtf.Create(ensemblGtfPath, refNameToChromosome);\r\n            var refSeqGff  = RefSeqGff.Create(refseqGffPath, refseqGenomeGffPath, refNameToChromosome);\r\n\r\n            var (refIndexToChromosome, _, _) = SequenceHelper.GetDictionaries(paths.ReferencePath);\r\n            var globalCache = GlobalCache.Create(paths.RefSeqCachePath, paths.EnsemblCachePath, refIndexToChromosome, refNameToChromosome);\r\n\r\n            return new AssemblyDataStore(description, ensemblGtf, refSeqGff, globalCache);\r\n        }\r\n\r\n        public IUpdateHgncData UpdateHgncIds(Hgnc oldHgnc)\r\n        {\r\n            Logger.WriteLine($\"\\n*** {_description} ***\");\r\n\r\n            var hgnc = oldHgnc.Clone();\r\n\r\n            Logger.Write(\"- removing duplicate gene IDs from HGNC... \");\r\n            (int numEntrezGeneIdsRemoved, int numEnsemblIdsRemoved) = hgnc.RemoveDuplicateEntries();\r\n            Logger.WriteLine($\"{numEntrezGeneIdsRemoved} Entrez Gene, {numEnsemblIdsRemoved} Ensembl.\");\r\n\r\n            Logger.Write(\"- adding coordinates to the HGNC entries... \");\r\n            int numEntriesWithCoordinates = hgnc.AddCoordinates(EnsemblGtf, RefSeqGff);\r\n            Logger.WriteLine($\"{numEntriesWithCoordinates} with coordinates.\");\r\n\r\n            Logger.Write(\"- updating HGNC IDs for RefSeq genes... \");\r\n            int numGenesWithHgncId = hgnc.HgncGenes.Update(_globalCache.RefSeqGenesByRef, x => x.EntrezGeneId).Consolidate();\r\n            Logger.WriteLine($\"{numGenesWithHgncId} genes have HGNC ID.\");\r\n\r\n            Logger.Write(\"- updating HGNC IDs for Ensembl genes... \");\r\n            numGenesWithHgncId = hgnc.HgncGenes.Update(_globalCache.EnsemblGenesByRef, x => x.EnsemblId).Consolidate();\r\n            Logger.WriteLine($\"{numGenesWithHgncId} genes have HGNC ID.\");\r\n\r\n            return new UpdateHgncData(_globalCache.EnsemblGenesByRef, _globalCache.RefSeqGenesByRef);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Genes/DataStores/EnsemblGtf.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing CacheUtils.Genes.DataStructures;\r\nusing CacheUtils.Genes.IO;\r\nusing CacheUtils.Genes.Utilities;\r\nusing Compression.Utilities;\r\nusing Genome;\r\n\r\nnamespace CacheUtils.Genes.DataStores\r\n{\r\n    public sealed class EnsemblGtf\r\n    {\r\n        public readonly Dictionary<string, EnsemblGene> EnsemblIdToGene;\r\n        public readonly Dictionary<string, string> EnsemblIdToSymbol;\r\n\r\n        private EnsemblGtf(Dictionary<string, EnsemblGene> ensemblIdToGene, Dictionary<string, string> ensemblIdToSymbol)\r\n        {\r\n            EnsemblIdToGene   = ensemblIdToGene;\r\n            EnsemblIdToSymbol = ensemblIdToSymbol;\r\n        }\r\n\r\n        public static EnsemblGtf Create(string filePath, Dictionary<string, Chromosome> refNameToChromosome)\r\n        {\r\n            var ensemblGenes      = LoadEnsemblGenes(GZipUtilities.GetAppropriateStreamReader(filePath), refNameToChromosome);\r\n            var ensemblIdToGene   = ensemblGenes.GetSingleValueDict(x => x.GeneId);\r\n            var ensemblIdToSymbol = ensemblGenes.GetKeyValueDict(x => x.GeneId, x => x.Symbol);\r\n            return new EnsemblGtf(ensemblIdToGene, ensemblIdToSymbol);\r\n        }\r\n\r\n        private static EnsemblGene[] LoadEnsemblGenes(StreamReader streamReader,\r\n            Dictionary<string, Chromosome> refNameToChromosome)\r\n        {\r\n            EnsemblGene[] genes;\r\n            using (var reader = new EnsemblGtfReader(streamReader, refNameToChromosome)) genes = reader.GetGenes();\r\n            return genes;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Genes/DataStores/GeneInfoData.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing CacheUtils.Genes.DataStructures;\r\nusing CacheUtils.Genes.IO;\r\nusing CacheUtils.Genes.Utilities;\r\nusing Compression.Utilities;\r\n\r\nnamespace CacheUtils.Genes.DataStores\r\n{\r\n    public sealed class GeneInfoData\r\n    {\r\n        public readonly Dictionary<string, string> EntrezGeneIdToSymbol;\r\n\r\n        private GeneInfoData(Dictionary<string, string> entrezGeneIdToSymbol)\r\n        {\r\n            EntrezGeneIdToSymbol = entrezGeneIdToSymbol;\r\n        }\r\n\r\n        public static GeneInfoData Create(string filePath)\r\n        {\r\n            var entrezGeneIdToSymbol = LoadGeneInfoGenes(filePath)\r\n                .GetKeyValueDict(x => x.EntrezGeneId, x => x.Symbol);\r\n            return new GeneInfoData(entrezGeneIdToSymbol);\r\n        }\r\n\r\n        private static IEnumerable<GeneInfo> LoadGeneInfoGenes(string filePath)\r\n        {\r\n            GeneInfo[] genes;\r\n            using (var streamReader = GZipUtilities.GetAppropriateStreamReader(filePath))\r\n            using (var reader = new GeneInfoReader(streamReader)) genes = reader.GetGenes();\r\n            return genes;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Genes/DataStores/GlobalCache.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Linq;\r\nusing CacheUtils.DataDumperImport.DataStructures.Mutable;\r\nusing CacheUtils.Genes.Utilities;\r\nusing CacheUtils.IntermediateIO;\r\nusing Compression.Utilities;\r\nusing Genome;\r\n\r\nnamespace CacheUtils.Genes.DataStores\r\n{\r\n    public sealed class GlobalCache\r\n    {\r\n        public readonly Dictionary<ushort, List<MutableGene>> EnsemblGenesByRef;\r\n        public readonly Dictionary<ushort, List<MutableGene>> RefSeqGenesByRef;\r\n\r\n        private GlobalCache(Dictionary<ushort, List<MutableGene>> ensemblGenesByRef,\r\n            Dictionary<ushort, List<MutableGene>> refSeqGenesByRef)\r\n        {\r\n            EnsemblGenesByRef = ensemblGenesByRef;\r\n            RefSeqGenesByRef  = refSeqGenesByRef;\r\n        }\r\n\r\n        public static GlobalCache Create(string refSeqCachePath, string ensemblCachePath,\r\n            Dictionary<ushort, Chromosome> refIndexToChromosome, Dictionary<string, Chromosome> refNameToChromosome38)\r\n        {\r\n            var ensemblGenesByRef = FlattenGenes(LoadGenes(GZipUtilities.GetAppropriateReadStream(ensemblCachePath), refIndexToChromosome, refNameToChromosome38));\r\n            var refSeqGenesByRef  = FlattenGenes(LoadGenes(GZipUtilities.GetAppropriateReadStream(refSeqCachePath),  refIndexToChromosome, refNameToChromosome38));\r\n\r\n            return new GlobalCache(ensemblGenesByRef, refSeqGenesByRef);\r\n        }\r\n\r\n        private static Dictionary<ushort, List<MutableGene>> FlattenGenes(IEnumerable<MutableGene> genes)\r\n        {\r\n            var genesByRef = genes.GetMultiValueDict(x => x.Chromosome.Index);\r\n            var result     = new Dictionary<ushort, List<MutableGene>>();\r\n\r\n            foreach (var kvp in genesByRef.OrderBy(x => x.Key))\r\n            {\r\n                result[kvp.Key] = kvp.Value.GetMultiValueDict(x => x.GeneId).FlattenGeneList();\r\n            }\r\n\r\n            return result;\r\n        }\r\n\r\n        private static IEnumerable<MutableGene> LoadGenes(Stream stream,\r\n            Dictionary<ushort, Chromosome> refIndexToChromosome,\r\n            Dictionary<string, Chromosome> refNameToChromosome38)\r\n        {\r\n            var geneDict = new Dictionary<string, MutableGene>();\r\n\r\n            using (var reader = new MutableTranscriptReader(stream, refIndexToChromosome))\r\n            {\r\n                var transcripts = reader.GetTranscripts();\r\n\r\n                foreach (var transcript in transcripts)\r\n                {\r\n                    var gene   = transcript.Gene;\r\n                    string key = GetGeneKey(gene);\r\n                    if (geneDict.ContainsKey(key)) continue;\r\n\r\n                    gene.Chromosome = refNameToChromosome38[gene.Chromosome.UcscName];\r\n                    geneDict[key] = gene;\r\n                }\r\n            }\r\n\r\n            return geneDict.Values.OrderBy(x => x.Chromosome.Index).ThenBy(x => x.Start).ThenBy(x => x.End);\r\n        }\r\n\r\n        private static string GetGeneKey(MutableGene gene) => gene.GeneId + '|' + gene.Chromosome.UcscName + '|' +\r\n                                                              gene.Start + '|' + gene.End + '|' +\r\n                                                              (gene.OnReverseStrand ? 'R' : 'F');\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Genes/DataStores/Hgnc.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Linq;\r\nusing CacheUtils.Genes.DataStructures;\r\nusing CacheUtils.Genes.IO;\r\nusing CacheUtils.Genes.Utilities;\r\nusing Genome;\r\nusing IO;\r\n\r\nnamespace CacheUtils.Genes.DataStores\r\n{\r\n    public sealed class Hgnc\r\n    {\r\n        public readonly HgncGene[] HgncGenes;\r\n        public readonly Dictionary<int, string> HgncIdToSymbol;\r\n\r\n        private Hgnc(HgncGene[] hgncGenes, Dictionary<int, string> hgncIdToSymbol)\r\n        {\r\n            HgncGenes      = hgncGenes;\r\n            HgncIdToSymbol = hgncIdToSymbol;\r\n        }\r\n\r\n        public static Hgnc Create(string filePath, Dictionary<string, Chromosome> refNameToChromosome)\r\n        {\r\n            var hgncGenes      = LoadHgncGenes(FileUtilities.GetReadStream(filePath), refNameToChromosome);\r\n            var hgncIdToSymbol = hgncGenes.GetKeyValueDict(x => x.HgncId, x => x.Symbol);\r\n            return new Hgnc(hgncGenes, hgncIdToSymbol);\r\n        }\r\n\r\n        private static HgncGene[] LoadHgncGenes(Stream stream, Dictionary<string, Chromosome> refNameToChromosome)\r\n        {\r\n            HgncGene[] genes;\r\n            using (var reader = new HgncReader(stream, refNameToChromosome)) genes = reader.GetGenes();\r\n            return genes;\r\n        }\r\n\r\n        public int AddCoordinates(EnsemblGtf ensemblGtf, RefSeqGff refSeqGff)\r\n        {\r\n            foreach (var hgncGene in HgncGenes)\r\n            {\r\n                (var refSeqGenes, EnsemblGene ensemblGene, int numMatches) = GetGenes(hgncGene.EntrezGeneId,\r\n                    refSeqGff.EntrezGeneIdToGene, hgncGene.EnsemblId, ensemblGtf.EnsemblIdToGene);\r\n\r\n                switch (numMatches)\r\n                {\r\n                    case 0:\r\n                        break;\r\n\r\n                    case 1:\r\n                        if (ensemblGene == null) AddCoordinatesFromGene(hgncGene, refSeqGenes[0]);\r\n                        else AddCoordinatesFromGene(hgncGene, ensemblGene);\r\n                        break;\r\n\r\n                    default:\r\n                        AddCoordinatesFromMultipleGenes(hgncGene, ensemblGene, refSeqGenes);\r\n                        break;\r\n                }\r\n            }\r\n\r\n            return HgncGenes.Count(hgncGene => hgncGene.Start != 1 && hgncGene.End != -1);\r\n        }\r\n\r\n        private static void AddCoordinatesFromMultipleGenes(HgncGene hgncGene, EnsemblGene ensemblGene, IEnumerable<RefSeqGene> refSeqGenes)\r\n        {\r\n            if (ensemblGene == null) return;\r\n\r\n            AddCoordinatesFromGene(hgncGene, ensemblGene);\r\n\r\n            foreach (var refSeqGene in refSeqGenes)\r\n            {\r\n                if (!Intervals.Utilities.Overlaps(hgncGene.Start, hgncGene.End, refSeqGene.Start, refSeqGene.End)) continue;\r\n                AddCoordinatesFromGene(hgncGene, refSeqGene);\r\n            }\r\n        }\r\n\r\n        private static void AddCoordinatesFromGene<T>(HgncGene hgncGene, IFlatGene<T> flatGene) where T : IFlatGene<T>\r\n        {\r\n            hgncGene.Start = hgncGene.Start == -1 ? flatGene.Start : Math.Min(hgncGene.Start, flatGene.Start);\r\n            hgncGene.End   = hgncGene.End   == -1 ? flatGene.End   : Math.Max(hgncGene.End, flatGene.End);\r\n        }\r\n\r\n        private static (List<RefSeqGene> RefSeqGenes, EnsemblGene EnsemblGene, int NumMatches) GetGenes(\r\n            string entrezGeneId, IReadOnlyDictionary<string, List<RefSeqGene>> entrezGeneIdToGene, string ensemblId,\r\n            IReadOnlyDictionary<string, EnsemblGene> ensemblIdToGene)\r\n        {\r\n            var refSeqGenes = GetRefSeqGenes(entrezGeneId, entrezGeneIdToGene);\r\n            var ensemblGene = GetEnsemblGene(ensemblId, ensemblIdToGene);\r\n            int numMatches  = (ensemblGene != null ? 1 : 0) + refSeqGenes.Count;\r\n            return (refSeqGenes, ensemblGene, numMatches);\r\n        }\r\n\r\n        public Hgnc Clone()\r\n        {\r\n            var newGenes = new HgncGene[HgncGenes.Length];\r\n            for (var i = 0; i < HgncGenes.Length; i++) newGenes[i] = HgncGenes[i].Clone();\r\n            return new Hgnc(newGenes, HgncIdToSymbol);\r\n        }\r\n\r\n        private static EnsemblGene GetEnsemblGene(string ensemblId, IReadOnlyDictionary<string, EnsemblGene> ensemblIdToGene)\r\n        {\r\n            if (string.IsNullOrEmpty(ensemblId)) return null;\r\n            return ensemblIdToGene.TryGetValue(ensemblId, out var ensemblGene) ? ensemblGene : null;\r\n        }\r\n\r\n        private static readonly List<RefSeqGene> EmptyList = new List<RefSeqGene>();\r\n\r\n        private static List<RefSeqGene> GetRefSeqGenes(string entrezGeneId, IReadOnlyDictionary<string, List<RefSeqGene>> entrezGeneIdToGene)\r\n        {\r\n            if (string.IsNullOrEmpty(entrezGeneId)) return EmptyList;\r\n            return entrezGeneIdToGene.TryGetValue(entrezGeneId, out var geneList) ? geneList : EmptyList;\r\n        }\r\n\r\n        public (int NumEntrezGeneIdsRemoved, int NumEnsemblIdsRemoved) RemoveDuplicateEntries()\r\n        {\r\n            int numEntrezGeneIdsRemoved = RemoveDuplicatesByTranscriptSource(HgncGenes, x => x.EntrezGeneId, x => x.EntrezGeneId = null);\r\n            int numEnsemblIdsRemoved    = RemoveDuplicatesByTranscriptSource(HgncGenes, x => x.EnsemblId, x => x.EnsemblId = null);\r\n            return (numEntrezGeneIdsRemoved, numEnsemblIdsRemoved);\r\n        }\r\n\r\n        private static int RemoveDuplicatesByTranscriptSource(IEnumerable<HgncGene> newHgncGenes,\r\n            Func<HgncGene, string> idFunc, Action<HgncGene> nullAction)\r\n        {\r\n            var hgncByGeneId = newHgncGenes.GetMultiValueDict(idFunc);\r\n            var numGeneIdsRemoved = 0;\r\n\r\n            foreach (var kvp in hgncByGeneId)\r\n            {\r\n                if (kvp.Value.Count == 1) continue;\r\n                foreach (var hgncGene in kvp.Value) nullAction(hgncGene);\r\n                numGeneIdsRemoved++;\r\n            }\r\n\r\n            return numGeneIdsRemoved;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Genes/DataStores/IUpdateHgncData.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing CacheUtils.DataDumperImport.DataStructures.Mutable;\r\n\r\nnamespace CacheUtils.Genes.DataStores\r\n{\r\n    public interface IUpdateHgncData\r\n    {\r\n        Dictionary<ushort, List<MutableGene>> EnsemblGenesByRef { get; }\r\n        Dictionary<ushort, List<MutableGene>> RefSeqGenesByRef { get; }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Genes/DataStores/RefSeqGff.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Linq;\r\nusing CacheUtils.Genes.DataStructures;\r\nusing CacheUtils.Genes.IO;\r\nusing CacheUtils.Genes.Utilities;\r\nusing Compression.Utilities;\r\nusing Genome;\r\n\r\nnamespace CacheUtils.Genes.DataStores\r\n{\r\n    public sealed class RefSeqGff\r\n    {\r\n        public readonly Dictionary<string, List<RefSeqGene>> EntrezGeneIdToGene;\r\n        public readonly Dictionary<string, string> EntrezGeneIdToSymbol;\r\n\r\n        private RefSeqGff(Dictionary<string, List<RefSeqGene>> entrezGeneIdToGene, Dictionary<string, string> entrezGeneIdToSymbol)\r\n        {\r\n            EntrezGeneIdToGene   = entrezGeneIdToGene;\r\n            EntrezGeneIdToSymbol = entrezGeneIdToSymbol;\r\n        }\r\n\r\n        public static RefSeqGff Create(string gcfGffPath, string refGffPath, Dictionary<string, Chromosome> accessionToChromosome)\r\n        {\r\n            var refSeqGenes = LoadRefSeqGffGenes(GZipUtilities.GetAppropriateStreamReader(gcfGffPath),\r\n                GZipUtilities.GetAppropriateStreamReader(refGffPath), accessionToChromosome);\r\n\r\n            var entrezGeneIdToGene = refSeqGenes\r\n                    .GetMultiValueDict(x => x.GeneId)\r\n                    .FlattenGeneList()\r\n                    .GetMultiValueDict(x => x.GeneId);\r\n\r\n            var entrezGeneIdToSymbol = refSeqGenes.GetKeyValueDict(x => x.GeneId, x => x.Symbol);\r\n\r\n            return new RefSeqGff(entrezGeneIdToGene, entrezGeneIdToSymbol);\r\n        }\r\n\r\n        private static List<RefSeqGene> LoadRefSeqGffGenes(StreamReader gcfGffReader, StreamReader refGffReader, Dictionary<string, Chromosome> accessionToChromosome)\r\n        {\r\n            var refSeqGenes = new List<RefSeqGene>();\r\n\r\n            LoadRefSeqGff(gcfGffReader, refSeqGenes, accessionToChromosome);\r\n            LoadRefSeqGff(refGffReader, refSeqGenes, accessionToChromosome);\r\n\r\n            return refSeqGenes.OrderBy(x => x.Chromosome.Index).ThenBy(x => x.Start).ThenBy(x => x.End).ToList();\r\n        }\r\n\r\n        private static void LoadRefSeqGff(StreamReader streamReader, List<RefSeqGene> refSeqGenes, Dictionary<string, Chromosome> accessionToChromosome)\r\n        {\r\n            using (var reader = new RefSeqGffReader(streamReader, accessionToChromosome))\r\n            {\r\n                reader.AddGenes(refSeqGenes);\r\n            }\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Genes/DataStores/UpdateHgncData.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing CacheUtils.DataDumperImport.DataStructures.Mutable;\r\n\r\nnamespace CacheUtils.Genes.DataStores\r\n{\r\n    public sealed class UpdateHgncData : IUpdateHgncData\r\n    {\r\n        public Dictionary<ushort, List<MutableGene>> EnsemblGenesByRef { get; }\r\n        public Dictionary<ushort, List<MutableGene>> RefSeqGenesByRef { get; }\r\n\r\n        public UpdateHgncData(Dictionary<ushort, List<MutableGene>> ensemblGenesByRef,\r\n            Dictionary<ushort, List<MutableGene>> refSeqGenesByRef)\r\n        {\r\n            EnsemblGenesByRef = ensemblGenesByRef;\r\n            RefSeqGenesByRef  = refSeqGenesByRef;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Genes/DataStructures/EnsemblGene.cs",
    "content": "﻿using System;\r\nusing Genome;\r\n\r\nnamespace CacheUtils.Genes.DataStructures\r\n{\r\n    public sealed class EnsemblGene : IFlatGene<EnsemblGene>\r\n    {\r\n        public Chromosome Chromosome { get; }\r\n        public int Start { get; }\r\n        public int End { get; set; }\r\n        public string GeneId { get; }\r\n        public string Symbol { get; }\r\n\r\n        public EnsemblGene(Chromosome chromosome, int start, int end, string geneId, string symbol)\r\n        {\r\n            Chromosome      = chromosome;\r\n            Start           = start;\r\n            End             = end;\r\n            GeneId          = geneId;\r\n            Symbol          = symbol;\r\n        }\r\n\r\n        public EnsemblGene Clone() => throw new NotImplementedException();\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Genes/DataStructures/GeneInfo.cs",
    "content": "﻿namespace CacheUtils.Genes.DataStructures\n{\n    public sealed class GeneInfo\n    {\n        public string Symbol { get; }\n        public string EntrezGeneId { get; }\n\n        public GeneInfo(string symbol, string entrezGeneId)\n        {\n            Symbol       = symbol;\n            EntrezGeneId = entrezGeneId;\n        }\n    }\n}\n"
  },
  {
    "path": "CacheUtils/Genes/DataStructures/HgncGene.cs",
    "content": "﻿\r\nusing Genome;\r\n\r\nnamespace CacheUtils.Genes.DataStructures\r\n{\r\n    public sealed class HgncGene : IChromosomeInterval\r\n    {\r\n        public Chromosome Chromosome { get; }\r\n        public int Start { get; set; }\r\n        public int End { get; set; }\r\n        public string Symbol { get; }\r\n        public string EntrezGeneId { get; set; }\r\n        public string EnsemblId { get; set; }\r\n        public readonly int HgncId;\r\n\r\n        public HgncGene(Chromosome chromosome, int start, int end, string symbol, string entrezGeneId,\r\n            string ensemblId, int hgncId)\r\n        {\r\n            Chromosome   = chromosome;\r\n            Start        = start;\r\n            End          = end;\r\n            Symbol       = symbol;\r\n            EntrezGeneId = entrezGeneId;\r\n            EnsemblId    = ensemblId;\r\n            HgncId       = hgncId;\r\n        }\r\n\r\n        public HgncGene Clone() => new HgncGene(Chromosome, -1, -1, Symbol, EntrezGeneId, EnsemblId, HgncId);\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Genes/DataStructures/IFlatGene.cs",
    "content": "﻿\r\nusing Genome;\r\n\r\nnamespace CacheUtils.Genes.DataStructures\r\n{\r\n    public interface IFlatGene<out T>\r\n    {\r\n        Chromosome Chromosome { get; }\r\n        int Start { get; }\r\n        int End { get; set; }\r\n        T Clone();\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Genes/DataStructures/RefSeqGene.cs",
    "content": "﻿\r\nusing Genome;\r\n\r\nnamespace CacheUtils.Genes.DataStructures\r\n{\r\n    public sealed class RefSeqGene : IFlatGene<RefSeqGene>\r\n    {\r\n        public Chromosome Chromosome { get; }\r\n        public int Start { get; }\r\n        public int End { get; set; }\r\n        private bool OnReverseStrand { get; }\r\n        public string GeneId { get; }\r\n        public string Symbol { get; }\r\n        private int HgncId { get; }\r\n\r\n        public RefSeqGene(Chromosome chromosome, int start, int end, bool onReverseStrand, string entrezGeneId,\r\n            string symbol, int hgncId)\r\n        {\r\n            Chromosome      = chromosome;\r\n            Start           = start;\r\n            End             = end;\r\n            OnReverseStrand = onReverseStrand;\r\n            GeneId          = entrezGeneId;\r\n            Symbol          = symbol;\r\n            HgncId          = hgncId;\r\n        }\r\n\r\n        public RefSeqGene Clone() => new RefSeqGene(Chromosome, Start, End, OnReverseStrand, GeneId, Symbol, HgncId);\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Genes/DataStructures/UgaGene.cs",
    "content": "﻿using Genome;\r\nusing Intervals;\r\nusing VariantAnnotation.AnnotatedPositions.Transcript;\r\nusing VariantAnnotation.Caches.DataStructures;\r\n\r\nnamespace CacheUtils.Genes.DataStructures\r\n{\r\n    public sealed class UgaGene\r\n    {\r\n        public readonly Chromosome Chromosome;\r\n        public readonly IInterval GRCh37;\r\n        public readonly IInterval GRCh38;\r\n        public readonly bool OnReverseStrand;\r\n        public readonly int HgncId;\r\n\r\n        public string Symbol { get; set; }\r\n        public string EntrezGeneId { get; }\r\n        public string EnsemblId { get; }\r\n\r\n        public UgaGene(Chromosome chromosome, IInterval grch37, IInterval grch38, bool onReverseStrand,\r\n            string entrezGeneId, string ensemblId, string symbol, int hgncId)\r\n        {\r\n            Chromosome       = chromosome;\r\n            GRCh37           = grch37;\r\n            GRCh38           = grch38;\r\n            EntrezGeneId     = entrezGeneId;\r\n            EnsemblId        = ensemblId;\r\n            Symbol           = symbol;\r\n            OnReverseStrand = onReverseStrand;\r\n            HgncId           = hgncId;\r\n        }\r\n\r\n        public override string ToString()\r\n        {\r\n            string interval37 = GetInterval(GRCh37);\r\n            string interval38 = GetInterval(GRCh38);\r\n            string strand     = OnReverseStrand ? \"R\" : \"F\";\r\n            return $\"{Chromosome.UcscName}\\t{Chromosome.EnsemblName}\\t{Symbol}\\t{interval37}\\t{interval38}\\t{strand}\\t{HgncId}\\t{EnsemblId}\\t{EntrezGeneId}\";\r\n        }\r\n\r\n        private static string GetInterval(IInterval interval) =>\r\n            interval == null ? \"-1\\t-1\" : $\"{interval.Start}\\t{interval.End}\";\r\n\r\n        public Gene ToGene(GenomeAssembly genomeAssembly)\r\n        {\r\n            var interval = genomeAssembly == GenomeAssembly.GRCh37 ? GRCh37 : GRCh38;\r\n            return new Gene(Chromosome, interval.Start, interval.End, OnReverseStrand, Symbol, HgncId, CompactId.Convert(EntrezGeneId), CompactId.Convert(EnsemblId));\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Genes/GeneFlattener.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.Linq;\r\nusing CacheUtils.Genes.DataStructures;\r\n\r\nnamespace CacheUtils.Genes\r\n{\r\n    public static class GeneFlattener\r\n    {\r\n        public static List<T> FlattenGeneList<T>(this Dictionary<string, List<T>> genesById) where T : IFlatGene<T>\r\n        {\r\n            var genesList = new List<T>();\r\n\r\n            foreach (var genes in genesById.Values)\r\n            {\r\n                var flatGenes = FlattenWithSameId(genes);\r\n                genesList.AddRange(flatGenes);\r\n            }\r\n\r\n            return genesList.OrderBy(x => x.Chromosome.Index).ThenBy(x => x.Start).ThenBy(x => x.End).ToList();\r\n        }\r\n\r\n        internal static List<T> FlattenWithSameId<T>(List<T> genes) where T : IFlatGene<T>\r\n        {\r\n            if (genes == null || genes.Count == 1) return genes;\r\n\r\n            var flatGenes = new List<T>();\r\n            var seedGene  = genes[0].Clone();\r\n\r\n            foreach (var gene in genes)\r\n            {\r\n                if (Intervals.Utilities.Overlaps(seedGene.Start, seedGene.End, gene.Start, gene.End))\r\n                {\r\n                    seedGene.End = Math.Max(seedGene.End, gene.End);\r\n                    continue;\r\n                }\r\n\r\n                flatGenes.Add(seedGene);\r\n                seedGene = gene.Clone();\r\n            }\r\n\r\n            flatGenes.Add(seedGene);\r\n            return flatGenes;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Genes/GeneMerger.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing CacheUtils.DataDumperImport.DataStructures.Mutable;\r\nusing CacheUtils.Genes.DataStores;\r\nusing CacheUtils.Genes.DataStructures;\r\nusing CacheUtils.Genes.Utilities;\r\nusing Intervals;\r\nusing IO;\r\n\r\nnamespace CacheUtils.Genes\r\n{\r\n    public static class GeneMerger\r\n    {\r\n        public static Dictionary<ushort, List<UgaGene>> MergeByHgnc(this IUpdateHgncData data, bool isGrch37)\r\n        {\r\n            Logger.Write(\"- merging RefSeq & Ensembl genes... \");\r\n\r\n            var genesByRef       = new Dictionary<ushort, List<MutableGene>>();\r\n            var mergedGenesByRef = new Dictionary<ushort, List<UgaGene>>();\r\n\r\n            AddGenes(data.EnsemblGenesByRef, genesByRef);\r\n            AddGenes(data.RefSeqGenesByRef, genesByRef);\r\n\r\n            var totalOrphanEntries = 0;\r\n            var totalMergedEntries = 0;\r\n\r\n            foreach (var kvp in genesByRef)\r\n            {\r\n                var hgncIdToGenes = kvp.Value.GetMultiValueDict(x => x.HgncId.ToString() + '|' + (x.OnReverseStrand ? 'R' : 'F'));\r\n                (var mergedGenes, int numOrphanEntries, int numMergedEntries) = GetMergedGenes(hgncIdToGenes, isGrch37);\r\n\r\n                mergedGenesByRef[kvp.Key] = mergedGenes;\r\n\r\n                totalOrphanEntries += numOrphanEntries;\r\n                totalMergedEntries += numMergedEntries;\r\n            }\r\n\r\n            Logger.WriteLine($\"orphans: {totalOrphanEntries}, merged: {totalMergedEntries}\");\r\n\r\n            return mergedGenesByRef;\r\n        }\r\n\r\n        private static void AddGenes(Dictionary<ushort, List<MutableGene>> source,\r\n            Dictionary<ushort, List<MutableGene>> target)\r\n        {\r\n            foreach (var kvp in source)\r\n            {\r\n                if (target.TryGetValue(kvp.Key, out var targetGeneList))\r\n                {\r\n                    targetGeneList.AddRange(kvp.Value);\r\n                }\r\n                else\r\n                {\r\n                    var geneList = new List<MutableGene>();\r\n                    geneList.AddRange(kvp.Value);\r\n                    target[kvp.Key] = geneList;\r\n                }\r\n            }\r\n        }\r\n\r\n        private static (List<UgaGene> MergedGenes, int NumOrphanEntries, int NumMergedEntries) GetMergedGenes(\r\n            Dictionary<string, List<MutableGene>> hgncIdToGenes, bool isGrch37)\r\n        {\r\n            var mergedGenes      = new List<UgaGene>();\r\n            var numOrphanEntries = 0;\r\n            var numMergedEntries = 0;\r\n\r\n            foreach (var kvp in hgncIdToGenes)\r\n            {\r\n                if (kvp.Key.StartsWith(\"-1|\") || kvp.Value.Count == 1)\r\n                {\r\n                    var convertedGenes = ConvertToUgaGenes(kvp.Value, isGrch37);\r\n                    mergedGenes.AddRange(convertedGenes);\r\n                    numOrphanEntries += convertedGenes.Count;\r\n                    continue;\r\n                }\r\n\r\n                if (kvp.Value.Count > 2) throw new InvalidDataException(\"Found more than two genes when merging Ensembl and RefSeq genes.\");\r\n                mergedGenes.Add(GetMergedGene(kvp.Value[0], kvp.Value[1], isGrch37));\r\n                numMergedEntries++;\r\n            }\r\n\r\n            return (mergedGenes, numOrphanEntries, numMergedEntries);\r\n        }\r\n\r\n        private static List<UgaGene> ConvertToUgaGenes(IEnumerable<MutableGene> genes, bool isGrch37)\r\n        {\r\n            var ugaGenes = new List<UgaGene>();\r\n\r\n            // ReSharper disable once LoopCanBeConvertedToQuery\r\n            foreach (var gene in genes)\r\n            {\r\n                if (gene.GeneId == null) continue;\r\n                ugaGenes.Add(gene.ToUgaGene(isGrch37));\r\n            }\r\n\r\n            return ugaGenes;\r\n        }\r\n\r\n        private static UgaGene GetMergedGene(MutableGene geneA, MutableGene geneB, bool isGrch37)\r\n        {\r\n            (MutableGene ensemblGene, MutableGene refSeqGene) = geneA.GeneId.StartsWith(\"ENSG\") ? (geneA, geneB) : (geneB, geneA);\r\n\r\n            if (ensemblGene.Chromosome.Index != refSeqGene.Chromosome.Index) throw new InvalidDataException($\"The two genes are on different chromosomes: {geneA.GeneId} & {geneB.GeneId}\");\r\n            if (ensemblGene.OnReverseStrand  != refSeqGene.OnReverseStrand)  throw new InvalidDataException($\"Both genes do not have the same orientation: {geneA.GeneId} & {geneB.GeneId}\");\r\n\r\n            IInterval interval = GetMergedInterval(ensemblGene, refSeqGene);\r\n            (IInterval grch37, IInterval grch38) = isGrch37 ? (interval, null as IInterval) : (null as IInterval, interval);\r\n\r\n            return new UgaGene(ensemblGene.Chromosome, grch37, grch38, ensemblGene.OnReverseStrand, refSeqGene.GeneId,\r\n                ensemblGene.GeneId, ensemblGene.Symbol, ensemblGene.HgncId);\r\n        }\r\n\r\n        private static IInterval GetMergedInterval(MutableGene geneA, MutableGene geneB) =>\r\n            new Interval(Math.Min(geneA.Start, geneB.Start), Math.Max(geneA.End, geneB.End));\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Genes/GeneSymbolUpdater.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Linq;\r\nusing CacheUtils.Genes.DataStructures;\r\nusing IO;\r\n\r\nnamespace CacheUtils.Genes\r\n{\r\n    public sealed class GeneSymbolUpdater\r\n    {\r\n        private int _numUpdatedByHgncId;\r\n        private int _numUpdatedByEntrezGeneId;\r\n        private int _numUpdatedByEnsemblId;\r\n        private int _numUpdatedByRefSeqGff;\r\n\r\n        private readonly Dictionary<int, string> _hgncIdToSymbol;\r\n        private readonly Dictionary<string, string> _entrezGeneIdToSymbol;\r\n        private readonly Dictionary<string, string> _ensemblIdToSymbol;\r\n        private readonly Dictionary<string, string> _refseqGeneIdToSymbol;\r\n\r\n        public GeneSymbolUpdater(Dictionary<int, string> hgncIdToSymbol,\r\n            Dictionary<string, string> entrezGeneIdToSymbol, Dictionary<string, string> ensemblIdToSymbol,\r\n            Dictionary<string, string> refseqGeneIdToSymbol)\r\n        {\r\n            _hgncIdToSymbol       = hgncIdToSymbol;\r\n            _entrezGeneIdToSymbol = entrezGeneIdToSymbol;\r\n            _ensemblIdToSymbol    = ensemblIdToSymbol;\r\n            _refseqGeneIdToSymbol = refseqGeneIdToSymbol;\r\n        }\r\n\r\n        public void Update(UgaGene[] mergedGenes)\r\n        {\r\n            Logger.Write(\"- updating gene symbols... \");\r\n            foreach (var gene in mergedGenes) UpdateGeneSymbol(gene);\r\n            Logger.WriteLine($\"{_numUpdatedByHgncId} by HGNC id, {_numUpdatedByEntrezGeneId} by Entrez Gene ID, {_numUpdatedByEnsemblId} by Ensembl ID, {_numUpdatedByRefSeqGff} by RefSeq GFF\");\r\n\r\n            int numGenesMissingSymbol = mergedGenes.Count(gene => string.IsNullOrEmpty(gene.Symbol));\r\n            if (numGenesMissingSymbol > 0) throw new InvalidDataException($\"{numGenesMissingSymbol} genes are missing symbols.\");\r\n        }\r\n\r\n        private void UpdateGeneSymbol(UgaGene gene)\r\n        {\r\n            string originalSymbol = gene.Symbol;\r\n            bool isUpdated = UpdateBySymbolDict(gene, x => x.HgncId, x => x == -1, _hgncIdToSymbol);\r\n\r\n            if (isUpdated)\r\n            {\r\n                if (gene.Symbol != originalSymbol) _numUpdatedByHgncId++;\r\n                return;\r\n            }\r\n\r\n            isUpdated = UpdateBySymbolDict(gene, x => x.EntrezGeneId, string.IsNullOrEmpty, _entrezGeneIdToSymbol);\r\n\r\n            if (isUpdated)\r\n            {\r\n                if (gene.Symbol != originalSymbol) _numUpdatedByEntrezGeneId++;\r\n                return;\r\n            }\r\n\r\n            isUpdated = UpdateBySymbolDict(gene, x => x.EnsemblId, string.IsNullOrEmpty, _ensemblIdToSymbol);\r\n\r\n            if (isUpdated)\r\n            {\r\n                if (gene.Symbol != originalSymbol) _numUpdatedByEnsemblId++;\r\n                return;\r\n            }\r\n\r\n            isUpdated = UpdateBySymbolDict(gene, x => x.EntrezGeneId, string.IsNullOrEmpty, _refseqGeneIdToSymbol);\r\n\r\n            // ReSharper disable once InvertIf\r\n            if (isUpdated && gene.Symbol != originalSymbol) _numUpdatedByRefSeqGff++;\r\n        }\r\n\r\n        private static bool UpdateBySymbolDict<T>(UgaGene gene, Func<UgaGene, T> idFunc, Func<T, bool> isEmpty, IReadOnlyDictionary<T, string> idToSymbol)\r\n        {\r\n            var key = idFunc(gene);\r\n            if (isEmpty(key)) return false;\r\n\r\n            if (!idToSymbol.TryGetValue(idFunc(gene), out string symbol)) return false;\r\n            gene.Symbol = symbol;\r\n            return true;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Genes/HgncIdConsolidator.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.Linq;\r\nusing CacheUtils.DataDumperImport.DataStructures.Mutable;\r\nusing CacheUtils.Genes.Utilities;\r\n\r\nnamespace CacheUtils.Genes\r\n{\r\n    public static class HgncIdConsolidator\r\n    {\r\n        public static int Consolidate(this Dictionary<ushort, List<MutableGene>> genesByRef)\r\n        {\r\n            var numHgncIds = 0;\r\n\r\n            foreach (var refKvp in genesByRef.OrderBy(x => x.Key))\r\n            {\r\n                var genesByHgncId = refKvp.Value.Where(gene => gene.HgncId != -1).GetMultiValueDict(x => x.HgncId);\r\n\r\n                foreach (var kvp in genesByHgncId)\r\n                {\r\n                    if (kvp.Value.Count <= 1) continue;\r\n                    CreateAggregateGene(kvp.Value.OrderBy(x => x.Start).ThenBy(x => x.End).ToList());\r\n                }\r\n\r\n                numHgncIds += refKvp.Value.Count(gene => gene.HgncId != -1);\r\n            }\r\n\r\n            return numHgncIds;\r\n        }\r\n\r\n        private static void CreateAggregateGene(IReadOnlyList<MutableGene> genes)\r\n        {\r\n            var seedGene = genes[0];\r\n            for (var i = 1; i < genes.Count; i++)\r\n            {\r\n                genes[i].GeneId = null;\r\n                genes[i].HgncId = -1;\r\n                seedGene.End = Math.Max(seedGene.End, genes[i].End);\r\n            }\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Genes/HgncIdUpdater.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing CacheUtils.DataDumperImport.DataStructures.Mutable;\r\nusing CacheUtils.Genes.DataStructures;\r\nusing CacheUtils.Genes.Utilities;\r\n\r\nnamespace CacheUtils.Genes\r\n{\r\n    public static class HgncIdUpdater\r\n    {\r\n        public static Dictionary<ushort, List<MutableGene>> Update(this IEnumerable<HgncGene> hgncGenes,\r\n            Dictionary<ushort, List<MutableGene>> genesByRef, Func<HgncGene, string> idFunc)\r\n        {\r\n            var geneIdToHgncId = hgncGenes.GetSingleValueDict(idFunc);\r\n            foreach (var kvp in genesByRef) ReplaceHgncIds(kvp.Value, geneIdToHgncId);\r\n            return genesByRef;\r\n        }\r\n\r\n        private static void ReplaceHgncIds(IEnumerable<MutableGene> genes, IReadOnlyDictionary<string, HgncGene> geneIdToHgncGene)\r\n        {\r\n            foreach (var gene in genes)\r\n            {\r\n                gene.HgncId = -1;\r\n                if (!geneIdToHgncGene.TryGetValue(gene.GeneId, out var hgncGene)) continue;\r\n                if (!Intervals.Utilities.Overlaps(hgncGene.Start, hgncGene.End, gene.Start, gene.End)) continue;\r\n\r\n                gene.HgncId = hgncGene.HgncId;\r\n            }\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Genes/IO/EnsemblGtfReader.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing CacheUtils.Genes.DataStructures;\r\nusing ErrorHandling.Exceptions;\r\nusing Genome;\r\nusing OptimizedCore;\r\n\r\nnamespace CacheUtils.Genes.IO\r\n{\r\n    public sealed class EnsemblGtfReader : IDisposable\r\n    {\r\n        private readonly Dictionary<string, Chromosome> _refNameToChromosome;\r\n        private readonly StreamReader _reader;\r\n\r\n        private const int ChromosomeIndex  = 0;\r\n        private const int FeatureTypeIndex = 2;\r\n        private const int StartIndex       = 3;\r\n        private const int EndIndex         = 4;\r\n        private const int InfoIndex        = 8;\r\n\r\n        public EnsemblGtfReader(StreamReader reader, Dictionary<string, Chromosome> refNameToChromosome)\r\n        {\r\n            _refNameToChromosome = refNameToChromosome;\r\n            _reader = reader;\r\n            _reader.ReadLine();\r\n        }\r\n\r\n        public EnsemblGene[] GetGenes()\r\n        {\r\n            var genes = new List<EnsemblGene>();\r\n\r\n            while (true)\r\n            {\r\n                string line = _reader.ReadLine();\r\n                if (line == null) break;\r\n\r\n                if (line.OptimizedStartsWith('#')) continue;\r\n\r\n                var cols = line.OptimizedSplit('\\t');\r\n                if (cols.Length != 9) throw new InvalidDataException($\"Expected 9 columns but found {cols.Length} when parsing the GFF entry.\");\r\n\r\n                string featureType = cols[FeatureTypeIndex];\r\n                if (featureType != \"gene\") continue;\r\n\r\n                AddGene(cols, genes);\r\n            }\r\n\r\n            return genes.ToArray();\r\n        }\r\n\r\n        private void AddGene(string[] cols, ICollection<EnsemblGene> genes)\r\n        {\r\n            var chromosome = RefSeqGffReader.GetChromosome(cols[ChromosomeIndex], _refNameToChromosome);\r\n            if (chromosome == null) return;\r\n\r\n            try\r\n            {\r\n                int start    = int.Parse(cols[StartIndex]);\r\n                int end      = int.Parse(cols[EndIndex]);\r\n                var infoCols = cols[InfoIndex].Split(';', StringSplitOptions.RemoveEmptyEntries);\r\n                var info     = GetGffFields(infoCols);\r\n\r\n                var gene = new EnsemblGene(chromosome, start, end, info.EnsemblGeneId, info.Name);\r\n                genes.Add(gene);\r\n            }\r\n            catch (Exception)\r\n            {\r\n                Console.WriteLine();\r\n                Console.WriteLine(\"Offending line: {0}\", string.Join('\\t', cols));\r\n                for (var i = 0; i < cols.Length; i++) Console.WriteLine(\"- col {0}: [{1}]\", i, cols[i]);\r\n                throw;\r\n            }\r\n        }\r\n\r\n        private static (string EnsemblGeneId, string Name) GetGffFields(string[] cols)\r\n        {\r\n            string ensemblId = null;\r\n            string symbol    = null;\r\n\r\n            foreach (string col in cols)\r\n            {\r\n                var kvp      = col.Trim().OptimizedSplit(' ');\r\n                string key   = kvp[0];\r\n                string value = kvp[1].Trim('\\\"');\r\n\r\n                // ReSharper disable once SwitchStatementMissingSomeCases\r\n                switch (key)\r\n                {\r\n                    case \"gene_id\":\r\n                        ensemblId = value;\r\n                        break;\r\n                    case \"gene_name\":\r\n                        symbol = value;\r\n                        break;\r\n                }\r\n            }\r\n\r\n            if (string.IsNullOrEmpty(ensemblId) || string.IsNullOrEmpty(symbol))\r\n            {\r\n                throw new UserErrorException(string.Join('\\t', cols));\r\n            }\r\n\r\n            return (ensemblId, symbol);\r\n        }\r\n\r\n        public void Dispose() => _reader.Dispose();\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Genes/IO/GeneInfoReader.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing CacheUtils.Genes.DataStructures;\r\nusing OptimizedCore;\r\n\r\nnamespace CacheUtils.Genes.IO\r\n{\r\n    public sealed class GeneInfoReader : IDisposable\r\n    {\r\n        private readonly StreamReader _reader;\r\n\r\n        private int _entrezGeneIndex = -1;\r\n        private int _symbolIndex     = -1;\r\n        private int _dbXrefsIndex    = -1;\r\n\r\n        public GeneInfoReader(StreamReader reader)\r\n        {\r\n            _reader = reader;\r\n            string headerLine = _reader.ReadLine();\r\n            SetColumnIndices(headerLine);\r\n        }\r\n\r\n        private void SetColumnIndices(string line)\r\n        {\r\n            if (line.StartsWith(\"#Format: \"))  line = line.Substring(9);\r\n            if (line.OptimizedStartsWith('#')) line = line.Substring(1);\r\n\r\n            var cols = line.OptimizedSplit('\\t');\r\n            if (cols.Length == 1) cols = line.OptimizedSplit(' ');\r\n\r\n            for (var index = 0; index < cols.Length; index++)\r\n            {\r\n                string header = cols[index];\r\n\r\n                // ReSharper disable once SwitchStatementMissingSomeCases\r\n                switch (header)\r\n                {\r\n                    case \"dbXrefs\":\r\n                        _dbXrefsIndex = index;\r\n                        break;\r\n                    case \"GeneID\":\r\n                        _entrezGeneIndex = index;\r\n                        break;\r\n                    case \"Symbol\":\r\n                        _symbolIndex = index;\r\n                        break;\r\n                }\r\n            }\r\n\r\n            // ReSharper disable once InvertIf\r\n            if (_entrezGeneIndex == -1 || _symbolIndex == -1) {\r\n                Console.WriteLine(\"_dbXrefsIndex:    {0}\", _dbXrefsIndex);\r\n                Console.WriteLine(\"_entrezGeneIndex: {0}\", _entrezGeneIndex);\r\n                Console.WriteLine(\"_symbolIndex:     {0}\", _symbolIndex);\r\n\r\n                throw new InvalidDataException(\"Not all of the indices were set.\");\r\n            }\r\n        }\r\n\r\n        /// <summary>\r\n        /// retrieves the next gene. Returns false if there are no more genes available\r\n        /// </summary>\r\n        private GeneInfo Next()\r\n        {\r\n            string line = _reader.ReadLine();\r\n            if (line == null) return null;\r\n\r\n            if (!line.StartsWith(\"9606\")) return null;\r\n\r\n            var cols = line.OptimizedSplit('\\t');\r\n            if (cols.Length != 16) throw new InvalidDataException($\"Expected 16 columns but found {cols.Length} when parsing the gene entry:\\n[{line}]\");\r\n\r\n            try\r\n            {\r\n                string entrezGeneId = cols[_entrezGeneIndex];\r\n                string symbol       = cols[_symbolIndex];\r\n\r\n                return new GeneInfo(symbol, entrezGeneId);\r\n            }\r\n            catch (Exception)\r\n            {\r\n                Console.WriteLine(\"Offending line: {0}\", line);\r\n                for (var i = 0; i < cols.Length; i++) Console.WriteLine(\"- col {0}: [{1}]\", i, cols[i]);\r\n                throw;\r\n            }\r\n        }\r\n\r\n        public GeneInfo[] GetGenes()\r\n        {\r\n            var list = new List<GeneInfo>();\r\n\r\n            while (true)\r\n            {\r\n                var gene = Next();\r\n                if (gene == null) break;\r\n                list.Add(gene);\r\n            }\r\n\r\n            return list.ToArray();\r\n        }\r\n\r\n        public void Dispose() => _reader.Dispose();\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Genes/IO/HgncReader.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing CacheUtils.Genes.DataStructures;\nusing Genome;\nusing IO;\nusing OptimizedCore;\n\nnamespace CacheUtils.Genes.IO\n{\n    public sealed class HgncReader : IDisposable\n    {\n        private readonly Dictionary<string, Chromosome> _refNameToChromosome;\n        private readonly StreamReader _reader;\n\n        private const int HgncIdIndex    = 0;\n        private const int SymbolIndex    = 1;\n        private const int LocationIndex  = 6;\n        private const int EntrezIdIndex  = 18;\n        private const int EnsemblIdIndex = 19;\n\n        public HgncReader(Stream stream, Dictionary<string, Chromosome> refNameToChromosome)\n        {\n            _refNameToChromosome = refNameToChromosome;\n            _reader = FileUtilities.GetStreamReader(stream);\n            _reader.ReadLine();\n        }\n\n        /// <summary>\n        /// retrieves the next gene. Returns false if there are no more genes available\n        /// </summary>\n        private HgncGene Next()\n        {\n            string line = _reader.ReadLine();\n            if (line == null) return null;\n\n            var cols = line.OptimizedSplit('\\t');\n            if (cols.Length != 49) throw new InvalidDataException($\"Expected 48 columns but found {cols.Length} when parsing the gene entry:[{line}]\");\n\n            try\n            {\n                int hgncId             = int.Parse(cols[HgncIdIndex].Substring(5));\n                string symbol          = cols[SymbolIndex];\n                Chromosome chromosome = GetChromosome(cols[LocationIndex]);\n                string entrezGeneId    = GetId(cols[EntrezIdIndex]);\n                string ensemblId       = GetId(cols[EnsemblIdIndex]);\n\n                return new HgncGene(chromosome, -1, -1, symbol, entrezGeneId, ensemblId, hgncId);\n            }\n            catch (Exception)\n            {\n                Console.WriteLine(\"Offending line: {0}\", line);\n                for (var i = 0; i < cols.Length; i++) Console.WriteLine(\"- col {0}: [{1}]\", i, cols[i]);\n                throw;\n            }\n        }\n\n        public HgncGene[] GetGenes()\n        {\n            var list = new List<HgncGene>();\n\n            while (true)\n            {\n                var gene = Next();\n                if (gene == null) break;\n                list.Add(gene);\n            }\n\n            return list.ToArray();\n        }\n\n        private Chromosome GetChromosome(string cytogeneticBand)\n        {\n            int armPos = GetArmPos(cytogeneticBand);\n            if (armPos == -1) return Chromosome.GetEmptyChromosome(cytogeneticBand);\n\n            string chrName = cytogeneticBand.Substring(0, armPos);\n            return ReferenceNameUtilities.GetChromosome(_refNameToChromosome, chrName);\n        }\n\n        private static int GetArmPos(string cytogeneticBand)\n        {\n            int pos = cytogeneticBand.IndexOf('p');\n            if (pos != -1) return pos;\n\n            pos = cytogeneticBand.IndexOf('q');\n            return pos;\n        }\n\n        private static string GetId(string s) => string.IsNullOrEmpty(s) ? null : s;\n\n        public void Dispose() => _reader.Dispose();\n    }\n}\n"
  },
  {
    "path": "CacheUtils/Genes/IO/RefSeqGffReader.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing CacheUtils.Genes.DataStructures;\r\nusing Genome;\r\nusing OptimizedCore;\r\n\r\nnamespace CacheUtils.Genes.IO\r\n{\r\n    public sealed class RefSeqGffReader : IDisposable\r\n    {\r\n        private readonly Dictionary<string, Chromosome> _accessionIdToChromosome;\r\n        private readonly StreamReader _reader;\r\n\r\n        private const int AccessionIndex   = 0;\r\n        private const int FeatureTypeIndex = 2;\r\n        private const int StartIndex       = 3;\r\n        private const int EndIndex         = 4;\r\n        private const int StrandIndex      = 6;\r\n        private const int InfoIndex        = 8;\r\n\r\n        public RefSeqGffReader(StreamReader reader, Dictionary<string, Chromosome> accessionIdToChromosome)\r\n        {\r\n            _accessionIdToChromosome = accessionIdToChromosome;\r\n            _reader = reader;\r\n            _reader.ReadLine();\r\n        }\r\n\r\n        public void AddGenes(List<RefSeqGene> refSeqGenes)\r\n        {\r\n            while (true)\r\n            {\r\n                string line = _reader.ReadLine();\r\n                if (line == null) break;\r\n\r\n                if (line.OptimizedStartsWith('#')) continue;\r\n\r\n                var cols = line.OptimizedSplit('\\t');\r\n                if (cols.Length != 9) throw new InvalidDataException($\"Expected 9 columns but found {cols.Length} when parsing the GFF entry.\");\r\n\r\n                string featureType = cols[FeatureTypeIndex];\r\n                if (featureType == \"gene\") AddGene(cols, refSeqGenes);\r\n            }\r\n        }\r\n\r\n        private void AddGene(string[] cols, ICollection<RefSeqGene> refSeqGenes)\r\n        {\r\n            var chromosome = GetChromosome(cols[AccessionIndex], _accessionIdToChromosome);\r\n            if (chromosome == null) return;\r\n\r\n            try\r\n            {\r\n                int start            = int.Parse(cols[StartIndex]);\r\n                int end              = int.Parse(cols[EndIndex]);\r\n                bool onReverseStrand = cols[StrandIndex] == \"-\";\r\n                var infoCols         = cols[InfoIndex].OptimizedSplit(';');\r\n                var info             = GetGffFields(infoCols);\r\n\r\n                var gene = new RefSeqGene(chromosome, start, end, onReverseStrand, info.EntrezGeneId, info.Name, info.HgncId);\r\n                refSeqGenes.Add(gene);\r\n            }\r\n            catch (Exception)\r\n            {\r\n                Console.WriteLine();\r\n                Console.WriteLine(\"Offending line: {0}\", string.Join('\\t', cols));\r\n                for (var i = 0; i < cols.Length; i++) Console.WriteLine(\"- col {0}: [{1}]\", i, cols[i]);\r\n                throw;\r\n            }\r\n        }\r\n\r\n        internal static Chromosome GetChromosome(string referenceName, Dictionary<string, Chromosome> refNameToChromosome)\r\n        {\r\n            refNameToChromosome.TryGetValue(referenceName, out var chromosome);\r\n            return chromosome;\r\n        }\r\n\r\n        private static (string Name, string EntrezGeneId, int HgncId)\r\n            GetGffFields(IEnumerable<string> cols)\r\n        {\r\n            string entrezGeneId = null;\r\n            string name         = null;\r\n            int hgncId          = -1;\r\n\r\n            foreach (string col in cols)\r\n            {\r\n                (string key, string value) = col.OptimizedKeyValue();\r\n\r\n                // ReSharper disable once SwitchStatementMissingSomeCases\r\n                switch (key)\r\n                {\r\n                    case \"Dbxref\":\r\n                        var ids = value.OptimizedSplit(',');\r\n                        (entrezGeneId, hgncId) = GetIds(ids);\r\n                        break;\r\n                    case \"Name\":\r\n                        name = value;\r\n                        break;\r\n                }\r\n            }\r\n\r\n            return (name, entrezGeneId, hgncId);\r\n        }\r\n\r\n        private static (string EntrezGeneId, int HgncId) GetIds(IEnumerable<string> ids)\r\n        {\r\n            string entrezGeneId = null;\r\n            int hgncId          = -1;\r\n\r\n            foreach (string idPair in ids)\r\n            {\r\n                var cols = idPair.OptimizedSplit(':');\r\n\r\n                // ReSharper disable once SwitchStatementMissingSomeCases\r\n                switch (cols[0])\r\n                {\r\n                    case \"HGNC\":\r\n                        int lastIndex = cols.Length - 1;\r\n                        if (cols[lastIndex] != \"HGNC\") hgncId = int.Parse(cols[lastIndex]);\r\n                        break;\r\n                    case \"GeneID\":\r\n                        entrezGeneId = cols[1];\r\n                        break;\r\n                }\r\n            }\r\n\r\n            return (entrezGeneId, hgncId);\r\n        }\r\n\r\n        public void Dispose() => _reader.Dispose();\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Genes/IO/UgaGeneReader.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing CacheUtils.Genes.DataStructures;\r\nusing Genome;\r\nusing Intervals;\r\nusing IO;\r\nusing OptimizedCore;\r\n\r\nnamespace CacheUtils.Genes.IO\r\n{\r\n    public sealed class UgaGeneReader : IDisposable\r\n    {\r\n        private readonly Dictionary<string, Chromosome> _refNameToChromosome;\r\n        private readonly StreamReader _reader;\r\n\r\n        public UgaGeneReader(Stream stream, Dictionary<string, Chromosome> refNameToChromosome, bool leaveOpen = false)\r\n        {\r\n            _refNameToChromosome = refNameToChromosome;\r\n            _reader = FileUtilities.GetStreamReader(stream, leaveOpen);\r\n            _reader.ReadLine();\r\n        }\r\n\r\n        public void Dispose() => _reader.Dispose();\r\n\r\n        public UgaGene[] GetGenes()\r\n        {\r\n            var genes = new List<UgaGene>();\r\n\r\n            while (true)\r\n            {\r\n                var gene = GetNextGene();\r\n                if (gene == null) break;\r\n                genes.Add(gene);\r\n            }\r\n\r\n            return genes.ToArray();\r\n        }\r\n\r\n        private UgaGene GetNextGene()\r\n        {\r\n            string line = _reader.ReadLine();\r\n            if (line == null) return null;\r\n\r\n            var cols = line.OptimizedSplit('\\t');\r\n            if (cols.Length != 11) throw new InvalidDataException($\"Expected 11 columns, but found {cols.Length} columns.\");\r\n\r\n            string ucscRefName   = cols[0];\r\n            var chromosome       = _refNameToChromosome == null ? Chromosome.GetEmptyChromosome(ucscRefName) : ReferenceNameUtilities.GetChromosome(_refNameToChromosome, ucscRefName);\r\n            string symbol        = cols[2];\r\n            int start37          = int.Parse(cols[3]);\r\n            int end37            = int.Parse(cols[4]);\r\n            int start38          = int.Parse(cols[5]);\r\n            int end38            = int.Parse(cols[6]);\r\n            bool onReverseStrand = cols[7] == \"R\";\r\n            int hgncId           = int.Parse(cols[8]);\r\n            string ensemblId     = cols[9];\r\n            string entrezGeneId  = cols[10];\r\n\r\n            var grch37 = new Interval(start37, end37);\r\n            var grch38 = new Interval(start38, end38);\r\n\r\n            return new UgaGene(chromosome, grch37, grch38, onReverseStrand, entrezGeneId, ensemblId, symbol, hgncId);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Genes/IO/UgaGeneWriter.cs",
    "content": "﻿using System;\r\nusing System.IO;\r\nusing System.Text;\r\nusing CacheUtils.Genes.DataStructures;\r\n\r\nnamespace CacheUtils.Genes.IO\r\n{\r\n    public sealed class UgaGeneWriter : IDisposable\r\n    {\r\n        private readonly StreamWriter _writer;\r\n\r\n        public UgaGeneWriter(Stream stream, bool leaveOpen = false)\r\n        {\r\n            _writer = new StreamWriter(stream, Encoding.ASCII, 1024, leaveOpen);\r\n        }\r\n\r\n        public void Dispose() => _writer.Dispose();\r\n\r\n        public void Write(UgaGene[] genes)\r\n        {\r\n            _writer.WriteLine(genes.Length);\r\n            foreach (var gene in genes) _writer.WriteLine(gene.ToString());\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Genes/UgaAssemblyCombiner.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Linq;\r\nusing CacheUtils.Genes.Combiners;\r\nusing CacheUtils.Genes.DataStructures;\r\nusing CacheUtils.TranscriptCache.Comparers;\r\nusing Intervals;\r\n\r\nnamespace CacheUtils.Genes\r\n{\r\n    public static class UgaAssemblyCombiner\r\n    {\r\n        internal static readonly List<UgaGene> EmptyUgaGenes = new List<UgaGene>();\r\n\r\n        public static UgaGene[] Combine(Dictionary<ushort, List<UgaGene>> genesByRef37,\r\n            Dictionary<ushort, List<UgaGene>> genesByRef38)\r\n        {\r\n            var referenceIndices = GetReferenceIndices(genesByRef37.Keys, genesByRef38.Keys);\r\n            var combinedGenes    = new List<UgaGene>();\r\n\r\n            var combiners = GetCombiners();\r\n\r\n            foreach (ushort refIndex in referenceIndices.OrderBy(x => x))\r\n            {\r\n                var ugaGenesByRef = CombineByReference(GetUgaGenesByRef(genesByRef37, refIndex),\r\n                    GetUgaGenesByRef(genesByRef38, refIndex), combiners);\r\n                combinedGenes.AddRange(ugaGenesByRef);\r\n            }\r\n\r\n            return combinedGenes.OrderBy(x => x.Chromosome.Index).ThenBy(x => MinCoordinate(x, y => y.Start))\r\n                .ThenBy(x => MinCoordinate(x, y => y.End)).ToArray();\r\n        }\r\n\r\n        private static List<ICombiner> GetCombiners() =>\r\n            new List<ICombiner> {new HgncIdCombiner(), new PartitionCombiner()};\r\n\r\n        private static IEnumerable<ushort> GetReferenceIndices(IEnumerable<ushort> keysA, IEnumerable<ushort> keysB)\r\n        {\r\n            var referenceIndices = new HashSet<ushort>();\r\n            foreach (ushort key in keysA) referenceIndices.Add(key);\r\n            foreach (ushort key in keysB) referenceIndices.Add(key);\r\n            return referenceIndices.OrderBy(x => x);\r\n        }\r\n\r\n        private static IEnumerable<UgaGene> CombineByReference(IEnumerable<UgaGene> uga37, IEnumerable<UgaGene> uga38,\r\n            IEnumerable<ICombiner> combiners)\r\n        {\r\n            var combinedGenes = new List<UgaGene>();\r\n\r\n            var remainingUga37 = GetRemainingGenes(uga37);\r\n            var remainingUga38 = GetRemainingGenes(uga38);\r\n\r\n            foreach (var combiner in combiners) combiner.Combine(combinedGenes, remainingUga37, remainingUga38);\r\n\r\n            if (remainingUga37.Count > 0 || remainingUga38.Count > 0)\r\n                throw new InvalidDataException($\"Expected the combiners to handle all genes, but some still remain. GRCh37: {remainingUga37.Count}, GRCh38: {remainingUga38.Count}\");\r\n\r\n            return combinedGenes;\r\n        }\r\n\r\n        private static HashSet<UgaGene> GetRemainingGenes(IEnumerable<UgaGene> genes)\r\n        {\r\n            var comparer = new UgaGeneComparer();\r\n            var geneSet = new HashSet<UgaGene>(comparer);\r\n            foreach (var gene in genes) geneSet.Add(gene);\r\n            return geneSet;\r\n        }\r\n\r\n        private static IEnumerable<UgaGene> GetUgaGenesByRef(IReadOnlyDictionary<ushort, List<UgaGene>> refIndexToUgaGenes,\r\n            ushort refIndex) => refIndexToUgaGenes.TryGetValue(refIndex, out var genes) ? genes : EmptyUgaGenes;\r\n\r\n        private static int MinCoordinate(UgaGene gene, Func<IInterval, int> coordFunc) => coordFunc(gene.GRCh37 ?? gene.GRCh38);\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Genes/Utilities/DictionaryUtilities.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\n\r\nnamespace CacheUtils.Genes.Utilities\r\n{\r\n    public static class DictionaryUtilities\r\n    {\r\n        public static Dictionary<TK, T> GetSingleValueDict<T, TK>(this IEnumerable<T> elements, Func<T, TK> idFunc)\r\n        {\r\n            var dict = new Dictionary<TK, T>();\r\n            foreach (var element in elements)\r\n            {\r\n                var key = idFunc(element);\r\n                if (key == null) continue;\r\n                if (dict.ContainsKey(key)) throw new InvalidDataException($\"Multiple entries for [{key}] already exist in the dictionary.\");\r\n                dict[key] = element;\r\n            }\r\n            return dict;\r\n        }\r\n\r\n        public static Dictionary<TK, List<T>> GetMultiValueDict<T, TK>(this IEnumerable<T> elements, Func<T, TK> idFunc)\r\n        {\r\n            var dict = new Dictionary<TK, List<T>>();\r\n            foreach (var element in elements)\r\n            {\r\n                var key = idFunc(element);\r\n                if (key == null) continue;\r\n                if (dict.TryGetValue(key, out var geneList)) geneList.Add(element);\r\n                else dict[key] = new List<T> { element };\r\n            }\r\n            return dict;\r\n        }\r\n\r\n        public static Dictionary<TK, TV> GetKeyValueDict<T, TK, TV>(this IEnumerable<T> elements, Func<T, TK> keyFunc, Func<T, TV> valueFunc)\r\n        {\r\n            var dict = new Dictionary<TK, TV>();\r\n            foreach (var element in elements)\r\n            {\r\n                var key   = keyFunc(element);\r\n                var value = valueFunc(element);\r\n                if (key == null || value == null) continue;\r\n                dict[key] = value;\r\n            }\r\n            return dict;\r\n        }\r\n\r\n        public static HashSet<TV> GetSet<T,TV>(this IEnumerable<T> elements, Func<T, TV> idFunc)\r\n        {\r\n            var set = new HashSet<TV>();\r\n            foreach (var element in elements)\r\n            {\r\n                var key = idFunc(element);\r\n                set.Add(key);\r\n            }\r\n            return set;\r\n        }\r\n\r\n        public static Dictionary<T, int> CreateIndex<T>(this IEnumerable<T> elements)\r\n        {\r\n            var index = new Dictionary<T, int>();\r\n            var currentIndex = 0;\r\n            foreach (var element in elements) index[element] = currentIndex++;\r\n            return index;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Helpers/BioTypeHelper.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\n\r\nnamespace CacheUtils.Helpers\r\n{\r\n    public static class BioTypeHelper\r\n    {\r\n        private static readonly Dictionary<string, BioType> StringToBioTypes;\r\n\r\n        static BioTypeHelper()\r\n        {\r\n            StringToBioTypes = new Dictionary<string, BioType>\r\n            {\r\n                [\"aligned_transcript\"]                 = BioType.aligned_transcript,\r\n                [\"ambiguous_orf\"]                      = BioType.ambiguous_orf,\r\n                [\"antisense\"]                          = BioType.antisense,\r\n                [\"antisense_RNA\"]                      = BioType.antisense_RNA,\r\n                [\"bidirectional_promoter_lncRNA\"]      = BioType.bidirectional_promoter_lncRNA,\r\n                [\"guide_RNA\"]                          = BioType.guide_RNA,\r\n                [\"IG_pseudogene\"]                      = BioType.IG_pseudogene,\r\n                [\"IG_C_gene\"]                          = BioType.IG_C_gene,\r\n                [\"IG_C_pseudogene\"]                    = BioType.IG_C_pseudogene,\r\n                [\"IG_D_gene\"]                          = BioType.IG_D_gene,\r\n                [\"IG_J_gene\"]                          = BioType.IG_J_gene,\r\n                [\"IG_J_pseudogene\"]                    = BioType.IG_J_pseudogene,\r\n                [\"IG_V_gene\"]                          = BioType.IG_V_gene,\r\n                [\"IG_V_pseudogene\"]                    = BioType.IG_V_pseudogene,\r\n                [\"lincRNA\"]                            = BioType.lincRNA,\r\n                [\"lncRNA\"]                             = BioType.lncRNA,\r\n                [\"macro_lncRNA\"]                       = BioType.macro_lncRNA,\r\n                [\"mRNA\"]                               = BioType.mRNA,\r\n                [\"miRNA\"]                              = BioType.miRNA,\r\n                [\"misc_RNA\"]                           = BioType.misc_RNA,\r\n                [\"Mt_rRNA\"]                            = BioType.Mt_rRNA,\r\n                [\"Mt_tRNA\"]                            = BioType.Mt_tRNA,\r\n                [\"non_coding\"]                         = BioType.non_coding,\r\n                [\"nonsense_mediated_decay\"]            = BioType.nonsense_mediated_decay,\r\n                [\"non_stop_decay\"]                     = BioType.non_stop_decay,\r\n                [\"other\"]                              = BioType.other,\r\n                [\"polymorphic_pseudogene\"]             = BioType.polymorphic_pseudogene,\r\n                [\"processed_pseudogene\"]               = BioType.processed_pseudogene,\r\n                [\"processed_transcript\"]               = BioType.processed_transcript,\r\n                [\"protein_coding\"]                     = BioType.protein_coding,\r\n                [\"pseudogene\"]                         = BioType.pseudogene,\r\n                [\"retained_intron\"]                    = BioType.retained_intron,\r\n                [\"retrotransposed\"]                    = BioType.retrotransposed,\r\n                [\"RNase_MRP_RNA\"]                      = BioType.RNase_MRP_RNA,\r\n                [\"RNase_P_RNA\"]                        = BioType.RNase_P_RNA,\r\n                [\"rRNA\"]                               = BioType.rRNA,\r\n                [\"ribozyme\"]                           = BioType.ribozyme,\r\n                [\"sense_intronic\"]                     = BioType.sense_intronic,\r\n                [\"sense_overlapping\"]                  = BioType.sense_overlapping,\r\n                [\"SRP_RNA\"]                            = BioType.SRP_RNA,\r\n                [\"sRNA\"]                               = BioType.sRNA,\r\n                [\"scRNA\"]                              = BioType.scRNA,\r\n                [\"scaRNA\"]                             = BioType.scaRNA,\r\n                [\"snRNA\"]                              = BioType.snRNA,\r\n                [\"snoRNA\"]                             = BioType.snoRNA,\r\n                [\"telomerase_RNA\"]                     = BioType.telomerase_RNA,\r\n                [\"3prime_overlapping_ncrna\"]           = BioType.three_prime_overlapping_ncRNA,\r\n                [\"3prime_overlapping_ncRNA\"]           = BioType.three_prime_overlapping_ncRNA,\r\n                [\"transcribed_processed_pseudogene\"]   = BioType.transcribed_processed_pseudogene,\r\n                [\"translated_unprocessed_pseudogene\"]  = BioType.translated_unprocessed_pseudogene,\r\n                [\"transcribed_unitary_pseudogene\"]     = BioType.transcribed_unitary_pseudogene,\r\n                [\"TEC\"]                                = BioType.TEC,\r\n                [\"tRNA\"]                               = BioType.tRNA,\r\n                [\"translated_processed_pseudogene\"]    = BioType.translated_processed_pseudogene,\r\n                [\"transcribed_unprocessed_pseudogene\"] = BioType.transcribed_unprocessed_pseudogene,\r\n                [\"TR_C_gene\"]                          = BioType.TR_C_gene,\r\n                [\"TR_D_gene\"]                          = BioType.TR_D_gene,\r\n                [\"TR_J_gene\"]                          = BioType.TR_J_gene,\r\n                [\"TR_J_pseudogene\"]                    = BioType.TR_J_pseudogene,\r\n                [\"TR_V_gene\"]                          = BioType.TR_V_gene,\r\n                [\"TR_V_pseudogene\"]                    = BioType.TR_V_pseudogene,\r\n                [\"unitary_pseudogene\"]                 = BioType.unitary_pseudogene,\r\n                [\"unprocessed_pseudogene\"]             = BioType.unprocessed_pseudogene,\r\n                [\"vaultRNA\"]                           = BioType.vaultRNA,\r\n                [\"Y_RNA\"]                              = BioType.Y_RNA\r\n            };\r\n        }\r\n\r\n        public static BioType GetBioType(string s)\r\n        {\r\n            if (s == null) throw new ArgumentNullException(nameof(s));\r\n            if (!StringToBioTypes.TryGetValue(s, out var ret)) throw new InvalidOperationException($\"The specified biotype ({s}) was not found in the BioType enum.\");\r\n            return ret;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Helpers/GeneSymbolSourceHelper.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing CacheUtils.DataDumperImport.DataStructures;\r\n\r\nnamespace CacheUtils.Helpers\r\n{\r\n    public static class GeneSymbolSourceHelper\r\n    {\r\n        private static readonly Dictionary<string, GeneSymbolSource> StringToGeneSymbolSources;\r\n\r\n        static GeneSymbolSourceHelper()\r\n        {\r\n            StringToGeneSymbolSources = new Dictionary<string, GeneSymbolSource>\r\n            {\r\n                [\"Clone_based_ensembl_gene\"] = GeneSymbolSource.CloneBasedEnsemblGene,\r\n                [\"Clone_based_vega_gene\"]    = GeneSymbolSource.CloneBasedVegaGene,\r\n                [\"EntrezGene\"]               = GeneSymbolSource.EntrezGene,\r\n                [\"HGNC\"]                     = GeneSymbolSource.HGNC,\r\n                [\"LRG\"]                      = GeneSymbolSource.LRG,\r\n                [\"miRBase\"]                  = GeneSymbolSource.miRBase,\r\n                [\"NCBI\"]                     = GeneSymbolSource.NCBI,\r\n                [\"RFAM\"]                     = GeneSymbolSource.RFAM,\r\n                [\"Uniprot_gn\"]               = GeneSymbolSource.UniProtGeneName\r\n            };\r\n        }\r\n\r\n        public static GeneSymbolSource GetGeneSymbolSource(string s)\r\n        {\r\n            if (s == null) return GeneSymbolSource.Unknown;\r\n            if (!StringToGeneSymbolSources.TryGetValue(s, out var ret)) throw new InvalidOperationException($\"The specified gene symbol source ({s}) was not found in the GeneSymbolSource enum.\");\r\n            return ret;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Helpers/RegulatoryRegionTypeHelper.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing VariantAnnotation.Interface.Caches;\r\n\r\nnamespace CacheUtils.Helpers\r\n{\r\n    public static class RegulatoryRegionTypeHelper\r\n    {\r\n        private static readonly Dictionary<string, RegulatoryRegionType> StringToRegulatoryRegionTypes;\r\n\r\n        static RegulatoryRegionTypeHelper()\r\n        {\r\n            StringToRegulatoryRegionTypes = new Dictionary<string, RegulatoryRegionType>\r\n            {\r\n                [\"CTCF_binding_site\"]        = RegulatoryRegionType.CTCF_binding_site,\r\n                [\"TF_binding_site\"]          = RegulatoryRegionType.TF_binding_site,\r\n                [\"enhancer\"]                 = RegulatoryRegionType.enhancer,\r\n                [\"open_chromatin_region\"]    = RegulatoryRegionType.open_chromatin_region,\r\n                [\"promoter\"]                 = RegulatoryRegionType.promoter,\r\n                [\"promoter_flanking_region\"] = RegulatoryRegionType.promoter_flanking_region,\r\n                [\"mature_protein_region\"]    = RegulatoryRegionType.mature_protein_region\r\n            };\r\n        }\r\n\r\n        public static RegulatoryRegionType GetRegulatoryRegionType(string s)\r\n        {\r\n            if (s == null) throw new ArgumentNullException(nameof(s));\r\n            if (!StringToRegulatoryRegionTypes.TryGetValue(s, out var ret)) throw new InvalidOperationException($\"The specified regulatory region type ({s}) was not found in the RegulatoryRegionType enum.\");\r\n            return ret;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Helpers/TranscriptCacheHelper.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing Genome;\r\nusing IO;\r\nusing VariantAnnotation.Caches;\r\nusing VariantAnnotation.IO.Caches;\r\n\r\nnamespace CacheUtils.Helpers\r\n{\r\n    public static class TranscriptCacheHelper\r\n    {\r\n        public static TranscriptCacheData GetCache(string cachePath,\r\n            Dictionary<ushort, Chromosome> refIndexToChromosome)\r\n        {\r\n            if (!File.Exists(cachePath)) throw new FileNotFoundException($\"Could not find {cachePath}\");\r\n\r\n            TranscriptCacheData cache;\r\n            using (var reader = new TranscriptCacheReader(FileUtilities.GetReadStream(cachePath))) cache = reader.Read(refIndexToChromosome);\r\n            return cache;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/IntermediateIO/CcdsReader.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.IO;\nusing IO;\nusing OptimizedCore;\nusing VariantAnnotation.Utilities;\n\nnamespace CacheUtils.IntermediateIO\n{\n    public static class CcdsReader\n    {\n        private const int CcdsIdIndex       = 0;\n        private const int NucleotideIdIndex = 4;\n\n        public static Dictionary<string, List<string>> GetCcdsIdToEnsemblId(string ccdsPath)\n        {\n            var ccdsIdToEnsemblId = new Dictionary<string, List<string>>();\n\n            using (var reader = FileUtilities.GetStreamReader(FileUtilities.GetReadStream(ccdsPath)))\n            {\n                while (true)\n                {\n                    string line = reader.ReadLine();\n                    if (line == null) break;\n                    if (line.OptimizedStartsWith('#')) continue;\n\n                    var cols = line.OptimizedSplit('\\t');\n                    if (cols.Length != 8) throw new InvalidDataException($\"Expected 8 columns, but found {cols.Length}: [{line}]\");\n\n                    string nucleotideId = cols[NucleotideIdIndex];\n                    if (!nucleotideId.StartsWith(\"ENST\")) continue;\n\n                    var ccds    = FormatUtilities.SplitVersion(cols[CcdsIdIndex]);\n                    var ensembl = FormatUtilities.SplitVersion(nucleotideId);\n\n                    if (ccdsIdToEnsemblId.TryGetValue(ccds.Id, out var ensemblList)) ensemblList.Add(ensembl.Id);\n                    else ccdsIdToEnsemblId[ccds.Id] = new List<string> { ensembl.Id };\n                }\n            }\n\n            return ccdsIdToEnsemblId;\n        }\n    }\n}\n"
  },
  {
    "path": "CacheUtils/IntermediateIO/GenbankReader.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing CacheUtils.Genbank;\r\nusing Intervals;\r\nusing IO;\r\nusing OptimizedCore;\r\n\r\nnamespace CacheUtils.IntermediateIO\r\n{\r\n    internal sealed class GenbankReader : IDisposable\r\n    {\r\n        private readonly StreamReader _reader;\r\n\r\n        internal GenbankReader(Stream stream)\r\n        {\r\n            _reader = FileUtilities.GetStreamReader(stream);\r\n            IntermediateIoCommon.ReadHeader(_reader, IntermediateIoCommon.FileType.Genbank);\r\n        }\r\n\r\n        public Dictionary<string, GenbankEntry> GetIdToGenbank()\r\n        {\r\n            var genbankDict = new Dictionary<string, GenbankEntry>();\r\n\r\n            while (true)\r\n            {\r\n                var entry = GetNextEntry();\r\n                if (entry == null) break;\r\n                genbankDict[entry.TranscriptId] = entry;\r\n            }\r\n\r\n            return genbankDict;\r\n        }\r\n\r\n        private GenbankEntry GetNextEntry()\r\n        {\r\n            string line = _reader.ReadLine();\r\n            if (line == null) return null;\r\n\r\n            var info  = ReadTranscriptInfo(line);\r\n            var exons = ReadExons(info.NumExons);\r\n\r\n            return new GenbankEntry(info.TranscriptId, info.TranscriptVersion, info.ProteinId, info.ProteinVersion,\r\n                info.GeneId, info.GeneSymbol, info.CodingRegion, exons);\r\n        }\r\n\r\n        private IInterval[] ReadExons(int numExons)\r\n        {\r\n            if (numExons == 0) return null;\r\n\r\n            string line = _reader.ReadLine();\r\n            if (line == null) throw new InvalidOperationException(\"Unexpected null line when parsing exons\");\r\n\r\n            var cols = line.OptimizedSplit('\\t');\r\n            if (cols[0] != \"Exons\") throw new InvalidDataException($\"Expected the first keyword to be Exons, but found something different: {line}\");\r\n\r\n            var exons = new IInterval[numExons];\r\n            var colIndex = 1;\r\n\r\n            for (var i = 0; i < numExons; i++)\r\n            {\r\n                int start = int.Parse(cols[colIndex++]);\r\n                int end   = int.Parse(cols[colIndex++]);\r\n                exons[i]  = new Interval(start, end);\r\n            }\r\n\r\n            return exons;\r\n        }\r\n\r\n        private static (string TranscriptId, byte TranscriptVersion, string ProteinId, byte ProteinVersion, string\r\n            GeneId, string GeneSymbol, IInterval CodingRegion, int NumExons) ReadTranscriptInfo(string line)\r\n        {\r\n            var cols = line.OptimizedSplit('\\t');\r\n            if (cols.Length != 9) throw new InvalidDataException($\"Expected 9 columns, but found {cols.Length} columns instead.\");\r\n\r\n            string transcriptId    = cols[0];\r\n            byte transcriptVersion = byte.Parse(cols[1]);\r\n            string proteinId       = cols[2];\r\n            byte proteinVersion    = byte.Parse(cols[3]);\r\n            string geneId          = cols[4];\r\n            string geneSymbol      = cols[5];\r\n            int start              = int.Parse(cols[6]);\r\n            int end                = int.Parse(cols[7]);\r\n            int numExons           = int.Parse(cols[8]);\r\n\r\n            var codingRegion = new Interval(start, end);\r\n            return (transcriptId, transcriptVersion, proteinId, proteinVersion, geneId, geneSymbol, codingRegion,\r\n                numExons);\r\n        }\r\n\r\n        public void Dispose() => _reader.Dispose();\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/IntermediateIO/GenbankWriter.cs",
    "content": "﻿using System;\r\nusing System.IO;\r\nusing CacheUtils.Genbank;\r\n\r\nnamespace CacheUtils.IntermediateIO\r\n{\r\n    internal sealed class GenbankWriter : IDisposable\r\n    {\r\n        private readonly StreamWriter _writer;\r\n\r\n        internal GenbankWriter(StreamWriter writer, IntermediateIoHeader header)\r\n        {\r\n            _writer = writer;\r\n            _writer.NewLine = \"\\n\";\r\n            header.Write(_writer, IntermediateIoCommon.FileType.Genbank);\r\n        }\r\n\r\n        internal void Write(GenbankEntry entry)\r\n        {\r\n            int numExons = entry.Exons?.Length ?? 0;\r\n\r\n            int codingRegionStart = entry.CodingRegion?.Start ?? -1;\r\n            int codingRegionEnd   = entry.CodingRegion?.End   ?? -1;\r\n\r\n            string proteinId    = entry.ProteinId ?? \"\";\r\n            byte proteinVersion = entry.ProteinVersion;\r\n\r\n            _writer.WriteLine($\"{entry.TranscriptId}\\t{entry.TranscriptVersion}\\t{proteinId}\\t{proteinVersion}\\t{entry.GeneId}\\t{entry.Symbol}\\t{codingRegionStart}\\t{codingRegionEnd}\\t{numExons}\");\r\n            if (entry.Exons == null) return;\r\n\r\n            _writer.Write(\"Exons\");\r\n            foreach (var exon in entry.Exons) _writer.Write($\"\\t{exon.Start}\\t{exon.End}\");\r\n            _writer.WriteLine();\r\n        }\r\n\r\n        public void Dispose() => _writer.Dispose();\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/IntermediateIO/IntermediateIoCommon.cs",
    "content": "﻿using System.IO;\n\nnamespace CacheUtils.IntermediateIO\n{\n    public static class IntermediateIoCommon\n    {\n        public const string Header = \"NirvanaIntermediateIo\";\n\n        public enum FileType : byte\n        {\n            Genbank,\n            Polyphen,\n            Regulatory,\n            Sift,\n            Transcript\n        }\n\n        // ReSharper disable once ParameterOnlyUsedForPreconditionCheck.Global\n        public static IntermediateIoHeader ReadHeader(StreamReader reader, FileType expectedType)\n        {\n            (string id, FileType type, IntermediateIoHeader header) = IntermediateIoHeader.Read(reader);\n            if (id != Header || type != expectedType) throw new InvalidDataException(\"Could not verify the header tag or the file type in the header.\");\n            return header;\n        }\n    }\n}\n"
  },
  {
    "path": "CacheUtils/IntermediateIO/IntermediateIoHeader.cs",
    "content": "﻿using System.IO;\r\nusing Genome;\r\nusing OptimizedCore;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\n\r\nnamespace CacheUtils.IntermediateIO\r\n{\r\n    public sealed class IntermediateIoHeader\r\n    {\r\n        public readonly ushort VepVersion;\r\n        public readonly long VepReleaseTicks;\r\n        public readonly Source Source;\r\n        public readonly GenomeAssembly Assembly;\r\n        private readonly int _numRefSeqs;\r\n\r\n        public IntermediateIoHeader(ushort vepVersion, long vepReleaseTicks, Source transcriptSource,\r\n            GenomeAssembly genomeAssembly, int numRefSeqs)\r\n        {\r\n            VepVersion      = vepVersion;\r\n            VepReleaseTicks = vepReleaseTicks;\r\n            Source          = transcriptSource;\r\n            Assembly        = genomeAssembly;\r\n            _numRefSeqs     = numRefSeqs;\r\n        }\r\n\r\n        internal void Write(StreamWriter writer, IntermediateIoCommon.FileType fileType)\r\n        {\r\n            writer.WriteLine($\"{IntermediateIoCommon.Header}\\t{(byte)fileType}\");\r\n            writer.WriteLine($\"{VepVersion}\\t{VepReleaseTicks}\\t{(byte)Source}\\t{(byte)Assembly}\\t{_numRefSeqs}\");\r\n        }\r\n\r\n        internal static (string Id, IntermediateIoCommon.FileType Type, IntermediateIoHeader Header) Read(StreamReader reader)\r\n        {\r\n            var cols  = reader.ReadLine()?.OptimizedSplit('\\t');\r\n            var cols2 = reader.ReadLine()?.OptimizedSplit('\\t');\r\n\r\n            if (cols == null || cols2 == null)\r\n                throw new InvalidDataException(\"Found unexpected null lines when parsing the intermediate I/O file header\");\r\n\r\n            string id = cols[0];\r\n            var type  = (IntermediateIoCommon.FileType)byte.Parse(cols[1]);\r\n\r\n            ushort vepVersion    = ushort.Parse(cols2[0]);\r\n            long vepReleaseTicks = long.Parse(cols2[1]);\r\n            var source           = (Source)byte.Parse(cols2[2]);\r\n            var genomeAssembly   = (GenomeAssembly)byte.Parse(cols2[3]);\r\n            int numRefSeqs       = int.Parse(cols2[4]);\r\n\r\n            var header = new IntermediateIoHeader(vepVersion, vepReleaseTicks, source, genomeAssembly, numRefSeqs);\r\n            return (id, type, header);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/IntermediateIO/LrgReader.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.IO;\nusing IO;\nusing OptimizedCore;\nusing VariantAnnotation.Utilities;\n\nnamespace CacheUtils.IntermediateIO\n{\n    public static class LrgReader\n    {\n        private const int RefSeqTranscriptIndex  = 4;\n        private const int EnsemblTranscriptIndex = 5;\n        private const int CccdsIndex             = 6;\n\n        public static HashSet<string> GetTranscriptIds(string lrgPath, Dictionary<string, List<string>> ccdsIdToEnsemblId)\n        {\n            var transcriptIds = new HashSet<string>();\n\n            using (var reader = FileUtilities.GetStreamReader(FileUtilities.GetReadStream(lrgPath)))\n            {\n                while (true)\n                {\n                    string line = reader.ReadLine();\n                    if (line == null) break;\n                    if (line.OptimizedStartsWith('#')) continue;\n\n                    var cols = line.OptimizedSplit('\\t');\n                    if (cols.Length != 7) throw new InvalidDataException($\"Expected 7 columns, but found {cols.Length}: [{line}]\");\n\n                    var refSeqTranscript    = FormatUtilities.SplitVersion(Sanitize(cols[RefSeqTranscriptIndex]));\n                    var ccds                = FormatUtilities.SplitVersion(Sanitize(cols[CccdsIndex]));\n                    var ensemblTranscriptIds = GetEnsemblTranscriptIds(ccds.Id, ccdsIdToEnsemblId, Sanitize(cols[EnsemblTranscriptIndex]));\n\n                    if (refSeqTranscript.Id  != null) transcriptIds.Add(refSeqTranscript.Id);\n                    // ReSharper disable once InvertIf\n                    if (ensemblTranscriptIds != null) foreach (string id in ensemblTranscriptIds) transcriptIds.Add(id);\n                }\n            }\n\n            return transcriptIds;\n        }\n\n        private static List<string> GetEnsemblTranscriptIds(string ccdsId,\n            IReadOnlyDictionary<string, List<string>> ccdsIdToEnsemblId, string ensemblId)\n        {\n            if (!string.IsNullOrEmpty(ensemblId)) return new List<string> { ensemblId };\n            if (string.IsNullOrEmpty(ccdsId)) return null;\n            return !ccdsIdToEnsemblId.TryGetValue(ccdsId, out var ensemblList) ? null : ensemblList;\n        }\n\n        private static string Sanitize(string s) => s == \"-\" ? null : s;\n    }\n}\n"
  },
  {
    "path": "CacheUtils/IntermediateIO/MutableTranscriptReader.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing CacheUtils.DataDumperImport.DataStructures;\r\nusing CacheUtils.DataDumperImport.DataStructures.Mutable;\r\nusing CacheUtils.TranscriptCache;\r\nusing Genome;\r\nusing Intervals;\r\nusing IO;\r\nusing OptimizedCore;\r\nusing VariantAnnotation.AnnotatedPositions.Transcript;\r\nusing VariantAnnotation.Caches.DataStructures;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\n\r\nnamespace CacheUtils.IntermediateIO\r\n{\r\n    internal sealed class MutableTranscriptReader : IDisposable\r\n    {\r\n        private readonly Dictionary<ushort, Chromosome> _refIndexToChromosome;\r\n        private readonly StreamReader _reader;\r\n        public readonly IntermediateIoHeader Header;\r\n\r\n        private readonly ISequence _sequence = new NSequence();\r\n\r\n        internal MutableTranscriptReader(Stream stream, Dictionary<ushort, Chromosome> refIndexToChromosome)\r\n        {\r\n            _refIndexToChromosome = refIndexToChromosome;\r\n            _reader = FileUtilities.GetStreamReader(stream);\r\n            Header  = IntermediateIoCommon.ReadHeader(_reader, IntermediateIoCommon.FileType.Transcript);\r\n        }\r\n\r\n        public MutableTranscript[] GetTranscripts()\r\n        {\r\n            var transcripts = new List<MutableTranscript>();\r\n\r\n            while (true)\r\n            {\r\n                var transcript = GetNextTranscript();\r\n                if (transcript == null) break;\r\n                transcripts.Add(transcript);\r\n            }\r\n\r\n            return transcripts.ToArray();\r\n        }\r\n\r\n        private MutableTranscript GetNextTranscript()\r\n        {\r\n            string line = _reader.ReadLine();\r\n            if (line == null) return null;\r\n\r\n            var transcriptInfo  = ReadTranscriptInfo(line);\r\n            var gene            = ReadGene(transcriptInfo.Chromosome);\r\n            var translation     = ReadTranslation();\r\n            var exons           = ReadExons(transcriptInfo.Chromosome);\r\n            var introns         = ReadIntervals(\"Introns\");\r\n            var cdnaMaps        = ReadCdnaMaps();\r\n            var mirnas          = ReadIntervals(\"miRNAs\");\r\n            var selenocysteines = ReadSelenocysteines();\r\n            var rnaEdits        = ReadRnaEdits();\r\n\r\n            var transcript = new MutableTranscript(transcriptInfo.Chromosome, transcriptInfo.Start, transcriptInfo.End,\r\n                transcriptInfo.Id, transcriptInfo.Version, transcriptInfo.CcdsId, transcriptInfo.RefSeqId,\r\n                transcriptInfo.BioType, transcriptInfo.IsCanonical, translation.CodingRegion, translation.Id,\r\n                translation.Version, translation.PeptideSeq, transcriptInfo.Source, gene, exons,\r\n                transcriptInfo.StartExonPhase, transcriptInfo.TotalExonLength, introns, cdnaMaps, null, null,\r\n                transcriptInfo.TranslateableSequence, mirnas, transcriptInfo.CdsStartNotFound,\r\n                transcriptInfo.CdsEndNotFound, selenocysteines, rnaEdits, transcriptInfo.BamEditStatus);\r\n\r\n            AddMutableContents(transcript);\r\n\r\n            return transcript;\r\n        }\r\n\r\n        private void AddMutableContents(MutableTranscript mt)\r\n        {\r\n            mt.TranscriptRegions = TranscriptRegionMerger.GetTranscriptRegions(mt.CdnaMaps, mt.Exons, mt.Introns, mt.Gene.OnReverseStrand);\r\n            TranscriptRegionValidater.Validate(mt.Id, mt.CdnaMaps, mt.Exons, mt.Introns, mt.TranscriptRegions);\r\n\r\n            mt.NewStartExonPhase = mt.StartExonPhase < 0 ? (byte)0 : (byte)mt.StartExonPhase;\r\n\r\n            if (mt.CodingRegion == null) return;\r\n\r\n            var codingSequence = new CodingSequence(_sequence, mt.CodingRegion, mt.TranscriptRegions,\r\n                mt.Gene.OnReverseStrand, mt.NewStartExonPhase, mt.RnaEdits);\r\n\r\n            mt.CdsLength = codingSequence.GetCodingSequence().Length;\r\n\r\n            mt.CodingRegion = new CodingRegion(mt.CodingRegion.Start, mt.CodingRegion.End,\r\n                mt.CodingRegion.CdnaStart, mt.CodingRegion.CdnaEnd, mt.CdsLength);\r\n        }\r\n\r\n        private int[] ReadSelenocysteines()\r\n        {\r\n            var cols = GetColumns(\"Sec\");\r\n\r\n            int numPositions = int.Parse(cols[1]);\r\n            if (numPositions == 0) return null;\r\n\r\n            var positions = new int[numPositions];\r\n            var colIndex = 2;\r\n\r\n            for (var i = 0; i < numPositions; i++) positions[i] = int.Parse(cols[colIndex++]);\r\n            return positions;\r\n        }\r\n\r\n        private IRnaEdit[] ReadRnaEdits()\r\n        {\r\n            var cols = GetColumns(\"RnaEdits\");\r\n\r\n            int numRnaEdits = int.Parse(cols[1]);\r\n            if (numRnaEdits == 0) return null;\r\n\r\n            var rnaEdits = new IRnaEdit[numRnaEdits];\r\n            var colIndex = 2;\r\n\r\n            for (var i = 0; i < numRnaEdits; i++)\r\n            {\r\n                int start    = int.Parse(cols[colIndex++]);\r\n                int end      = int.Parse(cols[colIndex++]);\r\n                string bases = cols[colIndex++];\r\n                rnaEdits[i] = new RnaEdit(start, end, bases);\r\n            }\r\n\r\n            return rnaEdits;\r\n        }\r\n\r\n        private MutableTranscriptRegion[] ReadCdnaMaps()\r\n        {\r\n            var cols = GetColumns(\"cDNA\");\r\n\r\n            int numCdnaMaps = int.Parse(cols[1]);\r\n            if (numCdnaMaps == 0) return null;\r\n\r\n            var cdnaMaps = new MutableTranscriptRegion[numCdnaMaps];\r\n            var colIndex = 2;\r\n\r\n            for (var i = 0; i < numCdnaMaps; i++)\r\n            {\r\n                int start     = int.Parse(cols[colIndex++]);\r\n                int end       = int.Parse(cols[colIndex++]);\r\n                int cdnaStart = int.Parse(cols[colIndex++]);\r\n                int cdnaEnd   = int.Parse(cols[colIndex++]);\r\n                cdnaMaps[i]   = new MutableTranscriptRegion(TranscriptRegionType.Exon, 0, start, end, cdnaStart, cdnaEnd);\r\n            }\r\n\r\n            return cdnaMaps;\r\n        }\r\n\r\n        private IInterval[] ReadIntervals(string description)\r\n        {\r\n            var cols = GetColumns(description);\r\n\r\n            int numIntervals = int.Parse(cols[1]);\r\n            if (numIntervals == 0) return null;\r\n\r\n            var intervals = new IInterval[numIntervals];\r\n            var colIndex = 2;\r\n\r\n            for (var i = 0; i < numIntervals; i++)\r\n            {\r\n                int start    = int.Parse(cols[colIndex++]);\r\n                int end      = int.Parse(cols[colIndex++]);\r\n                intervals[i] = new Interval(start, end);\r\n            }\r\n\r\n            return intervals;\r\n        }\r\n\r\n        private MutableExon[] ReadExons(Chromosome chromosome)\r\n        {\r\n            var cols = GetColumns(\"Exons\");\r\n\r\n            int numExons = int.Parse(cols[1]);\r\n            if (numExons == 0) return null;\r\n\r\n            var exons = new MutableExon[numExons];\r\n            var colIndex = 2;\r\n\r\n            for (var i = 0; i < numExons; i++)\r\n            {\r\n                int start = int.Parse(cols[colIndex++]);\r\n                int end   = int.Parse(cols[colIndex++]);\r\n                var phase = (byte)(int.Parse(cols[colIndex++]) + 1);\r\n                exons[i]  = new MutableExon(chromosome, start, end, phase);\r\n            }\r\n\r\n            return exons;\r\n        }\r\n\r\n        private (string Id, byte Version, ICodingRegion CodingRegion, string PeptideSeq) ReadTranslation()\r\n        {\r\n            var cols = GetColumns(\"Translation\");\r\n\r\n            string id         = cols[1];\r\n            byte version      = byte.Parse(cols[2]);\r\n            int start         = int.Parse(cols[3]);\r\n            int end           = int.Parse(cols[4]);\r\n            int cdnaStart     = int.Parse(cols[5]);\r\n            int cdnaEnd       = int.Parse(cols[6]);\r\n            string peptideSeq = cols[7];\r\n\r\n            var codingRegion = start == -1 && end == -1\r\n                ? null\r\n                : new CodingRegion(start, end, cdnaStart, cdnaEnd, 0);\r\n\r\n            return (id, version, codingRegion, peptideSeq);\r\n        }\r\n\r\n        private MutableGene ReadGene(Chromosome chromosome)\r\n        {\r\n            var cols = GetColumns(\"Gene\");\r\n\r\n            string id            = cols[1];\r\n            int start            = int.Parse(cols[4]);\r\n            int end              = int.Parse(cols[5]);\r\n            bool onReverseStrand = cols[6] == \"R\";\r\n            string symbol        = cols[7];\r\n            var symbolSource     = (GeneSymbolSource)int.Parse(cols[8]);\r\n            int hgncId           = int.Parse(cols[9]);\r\n\r\n            return new MutableGene(chromosome, start, end, onReverseStrand, symbol, symbolSource, id, hgncId);\r\n        }\r\n\r\n        private (string Id, byte Version, Chromosome Chromosome, int Start, int End, BioType BioType, bool IsCanonical,\r\n            int TotalExonLength, string CcdsId, string RefSeqId, Source Source, bool CdsStartNotFound, bool\r\n            CdsEndNotFound, string TranslateableSequence, int StartExonPhase, string BamEditStatus) ReadTranscriptInfo(\r\n                string line)\r\n        {\r\n            var cols = GetColumns(\"Transcript\", line);\r\n\r\n            string id             = cols[1];\r\n            byte version          = byte.Parse(cols[2]);\r\n            ushort referenceIndex = ushort.Parse(cols[4]);\r\n            int start             = int.Parse(cols[5]);\r\n            int end               = int.Parse(cols[6]);\r\n            var biotype           = (BioType)byte.Parse(cols[8]);\r\n            bool isCanonical      = cols[9] == \"Y\";\r\n            int totalExonLength   = int.Parse(cols[10]);\r\n            string ccdsId         = cols[11];\r\n            string refSeqId       = cols[12];\r\n            var source            = (Source)byte.Parse(cols[13]);\r\n            bool cdsStartNotFound = cols[14] == \"Y\";\r\n            bool cdsEndNotFound   = cols[15] == \"Y\";\r\n            int startExonPhase    = int.Parse(cols[16]);\r\n            string bamEditStatus  = cols[17];\r\n\r\n            string translateableSequence = _reader.ReadLine();\r\n            var chromosome = ReferenceNameUtilities.GetChromosome(_refIndexToChromosome, referenceIndex);\r\n\r\n            return (id, version, chromosome, start, end, biotype, isCanonical, totalExonLength, ccdsId, refSeqId, source\r\n                , cdsStartNotFound, cdsEndNotFound, translateableSequence, startExonPhase, bamEditStatus);\r\n        }\r\n\r\n        private string[] GetColumns(string keyword, string line = null)\r\n        {\r\n            if (line == null) line = _reader.ReadLine();\r\n            var cols = line?.OptimizedSplit('\\t');\r\n            if (cols == null) throw new InvalidDataException(\"Found an unexpected null when parsing the columns in the transcript reader.\");\r\n            if (cols[0] != keyword) throw new InvalidDataException($\"Could not find the {keyword} keyword in the transcripts file.\");\r\n            return cols;\r\n        }\r\n\r\n        public void Dispose() => _reader.Dispose();\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/IntermediateIO/MutableTranscriptWriter.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing CacheUtils.DataDumperImport.DataStructures.Mutable;\r\nusing Intervals;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\n\r\nnamespace CacheUtils.IntermediateIO\r\n{\r\n    internal sealed class MutableTranscriptWriter : IDisposable\r\n    {\r\n        private readonly StreamWriter _writer;\r\n\r\n        internal MutableTranscriptWriter(StreamWriter writer, IntermediateIoHeader header)\r\n        {\r\n            _writer         = writer;\r\n            _writer.NewLine = \"\\n\";\r\n            header.Write(_writer, IntermediateIoCommon.FileType.Transcript);\r\n        }\r\n\r\n        internal void Write(MutableTranscript transcript)\r\n        {\r\n            WriteTranscriptInfo(transcript);\r\n            WriteGene(_writer, transcript.Gene);\r\n            WriteTranslation(transcript.CodingRegion, transcript.ProteinId, transcript.ProteinVersion, transcript.PeptideSequence);\r\n            WriteExons(transcript.Exons);\r\n            WriteIntervals(transcript.Introns, \"Introns\");\r\n            WriteCdnaMaps(transcript.CdnaMaps);\r\n            WriteIntervals(transcript.MicroRnas, \"miRNAs\");\r\n            WriteSelenocysteines(transcript.SelenocysteinePositions);\r\n            WriteRnaEdits(transcript.RnaEdits);\r\n        }\r\n\r\n        private void WriteRnaEdits(IReadOnlyCollection<IRnaEdit> rnaEdits)\r\n        {\r\n            if (rnaEdits == null)\r\n            {\r\n                _writer.WriteLine(\"RnaEdits\\t0\");\r\n                return;\r\n            }\r\n\r\n            _writer.Write($\"RnaEdits\\t{rnaEdits.Count}\");\r\n            foreach (var rnaEdit in rnaEdits) _writer.Write($\"\\t{rnaEdit.Start}\\t{rnaEdit.End}\\t{rnaEdit.Bases}\");\r\n            _writer.WriteLine();\r\n        }\r\n\r\n        private void WriteSelenocysteines(IReadOnlyCollection<int> positions)\r\n        {\r\n            if (positions == null)\r\n            {\r\n                _writer.WriteLine(\"Sec\\t0\");\r\n                return;\r\n            }\r\n\r\n            _writer.Write($\"Sec\\t{positions.Count}\");\r\n            foreach (int pos in positions) _writer.Write($\"\\t{pos}\");\r\n            _writer.WriteLine();\r\n        }\r\n\r\n        private void WriteCdnaMaps(IReadOnlyCollection<ITranscriptRegion> cdnaMaps)\r\n        {\r\n            _writer.Write($\"cDNA\\t{cdnaMaps.Count}\");\r\n            foreach (var cdnaMap in cdnaMaps) _writer.Write($\"\\t{cdnaMap.Start}\\t{cdnaMap.End}\\t{cdnaMap.CdnaStart}\\t{cdnaMap.CdnaEnd}\");\r\n            _writer.WriteLine();\r\n        }\r\n\r\n        private void WriteIntervals(IReadOnlyCollection<IInterval> intervals, string description)\r\n        {\r\n            if (intervals == null)\r\n            {\r\n                _writer.WriteLine($\"{description}\\t0\");\r\n                return;\r\n            }\r\n\r\n            _writer.Write($\"{description}\\t{intervals.Count}\");\r\n            foreach (var interval in intervals) _writer.Write($\"\\t{interval.Start}\\t{interval.End}\");\r\n            _writer.WriteLine();\r\n        }\r\n\r\n        private void WriteExons(IReadOnlyCollection<MutableExon> exons)\r\n        {\r\n            _writer.Write($\"Exons\\t{exons.Count}\");\r\n            foreach (var exon in exons) _writer.Write($\"\\t{exon.Start}\\t{exon.End}\\t{exon.Phase}\");\r\n            _writer.WriteLine();\r\n        }\r\n\r\n        private void WriteTranslation(ICodingRegion codingRegion, string proteinId, byte proteinVersion, string peptideSequence) =>\r\n            _writer.WriteLine($\"Translation\\t{proteinId}\\t{proteinVersion}\\t{codingRegion.Start}\\t{codingRegion.End}\\t{codingRegion.CdnaStart}\\t{codingRegion.CdnaEnd}\\t{peptideSequence}\");\r\n\r\n        private static void WriteGene(TextWriter writer, MutableGene gene)\r\n        {\r\n            char strand = gene.OnReverseStrand ? 'R' : 'F';\r\n            writer.WriteLine($\"Gene\\t{gene.GeneId}\\t{gene.Chromosome.UcscName}\\t{gene.Chromosome.Index}\\t{gene.Start}\\t{gene.End}\\t{strand}\\t{gene.Symbol}\\t{(int)gene.SymbolSource}\\t{gene.HgncId}\");\r\n        }\r\n\r\n        private void WriteTranscriptInfo(MutableTranscript transcript)\r\n        {\r\n            _writer.WriteLine($\"Transcript\\t{transcript.Id}\\t{transcript.Version}\\t{transcript.Chromosome.UcscName}\\t{transcript.Chromosome.Index}\\t{transcript.Start}\\t{transcript.End}\\t{transcript.BioType}\\t{(byte)transcript.BioType}\\t{BoolToChar(transcript.IsCanonical)}\\t{transcript.TotalExonLength}\\t{transcript.CcdsId}\\t{transcript.RefSeqId}\\t{(byte)transcript.Source}\\t{BoolToChar(transcript.CdsStartNotFound)}\\t{BoolToChar(transcript.CdsEndNotFound)}\\t{transcript.StartExonPhase}\\t{transcript.BamEditStatus}\");\r\n            _writer.WriteLine(transcript.TranslateableSequence);\r\n        }\r\n\r\n        private static char BoolToChar(bool b) => b ? 'Y' : 'N';\r\n\r\n        public void Dispose() => _writer.Dispose();\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/IntermediateIO/PredictionReader.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Linq;\r\nusing Genome;\r\nusing IO;\r\nusing OptimizedCore;\r\n\r\nnamespace CacheUtils.IntermediateIO\r\n{\r\n    public sealed class PredictionReader : IDisposable\r\n    {\r\n        private readonly Dictionary<ushort, Chromosome> _refIndexToChromosome;\r\n        private readonly StreamReader _reader;\r\n\r\n        public PredictionReader(Stream stream, Dictionary<ushort, Chromosome> refIndexToChromosome,\r\n            IntermediateIoCommon.FileType expectedFileType)\r\n        {\r\n            _refIndexToChromosome = refIndexToChromosome;\r\n            _reader = FileUtilities.GetStreamReader(stream);\r\n            IntermediateIoCommon.ReadHeader(_reader, expectedFileType);\r\n        }\r\n\r\n        public (string[] PredictionData, Dictionary<int, int> TranscriptToPredictionIndex, Chromosome Chromosome)\r\n            GetPredictionData()\r\n        {\r\n            var chromosomeHeader            = GetChromosomeHeader();\r\n            var predictionData              = new string[chromosomeHeader.NumPredictions];\r\n            var transcriptToPredictionIndex = new Dictionary<int, int>(chromosomeHeader.NumPredictions);\r\n\r\n            for (var predictionIndex = 0; predictionIndex < chromosomeHeader.NumPredictions; predictionIndex++)\r\n            {\r\n                var prediction = GetNextPrediction();\r\n                predictionData[predictionIndex] = prediction.PredictionData;\r\n                foreach (int index in prediction.TranscriptIndices)\r\n                    transcriptToPredictionIndex[index] = predictionIndex;\r\n            }\r\n                              \r\n            return (predictionData, transcriptToPredictionIndex, chromosomeHeader.Chromosome);\r\n        }\r\n\r\n        private (Chromosome Chromosome, int NumPredictions) GetChromosomeHeader()\r\n        {\r\n            string line = _reader.ReadLine();\r\n            var cols = line?.OptimizedSplit('\\t');\r\n            if (cols == null) throw new InvalidDataException(\"Found an unexpected null line when parsing the chromosome header in the prediction reader.\");\r\n            if (cols.Length != 3) throw new InvalidDataException($\"Expected 3 columns in the chromosome header, but found {cols.Length}\");\r\n\r\n            ushort referenceIndex = ushort.Parse(cols[1]);\r\n            var chromosome        = ReferenceNameUtilities.GetChromosome(_refIndexToChromosome, referenceIndex);\r\n            int numPredictions    = int.Parse(cols[2]);\r\n\r\n            return (chromosome, numPredictions);\r\n        }\r\n\r\n        private (List<int> TranscriptIndices, string PredictionData) GetNextPrediction()\r\n        {\r\n            string line = _reader.ReadLine();\r\n            if (line == null) throw new InvalidDataException(\"Found an unexpected empty line while parsing the prediction file.\");\r\n\r\n            var cols = line.OptimizedSplit('\\t');\r\n            if (cols.Length != 2) throw new InvalidDataException($\"Expected 2 columns in the prediction entry, but found {cols.Length}\");\r\n\r\n            var transcriptIndices = GetTranscriptIndices(cols[0]);\r\n            string predictionData = cols[1];\r\n\r\n            return (transcriptIndices, predictionData);\r\n        }\r\n\r\n        private static List<int> GetTranscriptIndices(string s)\r\n        {\r\n            var indexStrings = s.OptimizedSplit(',');\r\n            var indices      = new int[indexStrings.Length];\r\n            for (var i = 0; i < indexStrings.Length; i++) indices[i] = int.Parse(indexStrings[i]);\r\n            return indices.ToList();\r\n        }\r\n\r\n        public void Dispose() => _reader.Dispose();\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/IntermediateIO/PredictionWriter.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing Genome;\r\n\r\nnamespace CacheUtils.IntermediateIO\r\n{\r\n    internal sealed class PredictionWriter : IDisposable\r\n    {\r\n        private readonly StreamWriter _writer;\r\n\r\n        internal PredictionWriter(StreamWriter writer, IntermediateIoHeader header,\r\n            IntermediateIoCommon.FileType fileType)\r\n        {\r\n            _writer = writer;\r\n            _writer.NewLine = \"\\n\";\r\n            header.Write(_writer, fileType);\r\n        }\r\n\r\n        internal void Write(Chromosome chromosome, Dictionary<string, List<int>> predictionDict)\r\n        {\r\n            _writer.WriteLine($\"{chromosome.UcscName}\\t{chromosome.Index}\\t{predictionDict.Count}\");\r\n            foreach (var kvp in predictionDict) WritePrediction(kvp.Value, kvp.Key);\r\n        }\r\n\r\n        private void WritePrediction(IEnumerable<int> transcriptIds, string predictionData)\r\n        {\r\n            string transcriptIdString = string.Join(',', transcriptIds);\r\n            _writer.WriteLine($\"{transcriptIdString}\\t{predictionData}\");\r\n        }\r\n\r\n        public void Dispose() => _writer.Dispose();\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/IntermediateIO/RegulatoryRegionReader.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing Genome;\r\nusing IO;\r\nusing OptimizedCore;\r\nusing VariantAnnotation.AnnotatedPositions.Transcript;\r\nusing VariantAnnotation.Caches.DataStructures;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.Interface.Caches;\r\n\r\nnamespace CacheUtils.IntermediateIO\r\n{\r\n    internal sealed class RegulatoryRegionReader : IDisposable\r\n    {\r\n        private readonly Dictionary<ushort, Chromosome> _refIndexToChromosome;\r\n        private readonly StreamReader _reader;\r\n\r\n        internal RegulatoryRegionReader(Stream stream, Dictionary<ushort, Chromosome> refIndexToChromosome)\r\n        {\r\n            _refIndexToChromosome = refIndexToChromosome;\r\n            _reader = FileUtilities.GetStreamReader(stream);\r\n            IntermediateIoCommon.ReadHeader(_reader, IntermediateIoCommon.FileType.Regulatory);\r\n        }\r\n\r\n        public IRegulatoryRegion[] GetRegulatoryRegions()\r\n        {\r\n            var regulatoryRegions = new List<IRegulatoryRegion>();\r\n\r\n            while (true)\r\n            {\r\n                var regulatoryRegion = GetNextRegulatoryRegion();\r\n                if (regulatoryRegion == null) break;\r\n                regulatoryRegions.Add(regulatoryRegion);\r\n            }\r\n\r\n            return regulatoryRegions.ToArray();\r\n        }\r\n\r\n        private IRegulatoryRegion GetNextRegulatoryRegion()\r\n        {\r\n            string line = _reader.ReadLine();\r\n            if (line == null) return null;\r\n\r\n            var cols              = line.OptimizedSplit('\\t');\r\n            ushort referenceIndex = ushort.Parse(cols[1]);\r\n            int start             = int.Parse(cols[2]);\r\n            int end               = int.Parse(cols[3]);\r\n            var id                = CompactId.Convert(cols[4]);\r\n            var type              = (RegulatoryRegionType)byte.Parse(cols[6]);\r\n\r\n            var chromosome = ReferenceNameUtilities.GetChromosome(_refIndexToChromosome, referenceIndex);\r\n            return new RegulatoryRegion(chromosome, start, end, id, type);\r\n        }\r\n\r\n        public void Dispose() => _reader.Dispose();\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/IntermediateIO/RegulatoryRegionWriter.cs",
    "content": "﻿using System;\r\nusing System.IO;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\n\r\nnamespace CacheUtils.IntermediateIO\r\n{\r\n    internal sealed class RegulatoryRegionWriter : IDisposable\r\n    {\r\n        private readonly StreamWriter _writer;\r\n\r\n        internal RegulatoryRegionWriter(StreamWriter writer, IntermediateIoHeader header)\r\n        {\r\n            _writer = writer;\r\n            _writer.NewLine = \"\\n\";\r\n            header.Write(_writer, IntermediateIoCommon.FileType.Regulatory);\r\n        }\r\n\r\n        internal void Write(IRegulatoryRegion region) => _writer.WriteLine(\r\n            $\"{region.Chromosome.UcscName}\\t{region.Chromosome.Index}\\t{region.Start}\\t{region.End}\\t{region.Id}\\t{region.Type}\\t{(byte) region.Type}\");\r\n\r\n        public void Dispose() => _writer.Dispose();\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/MiniCache/DataBundle.cs",
    "content": "﻿using Genome;\nusing IO;\nusing ReferenceSequence.IO;\nusing VariantAnnotation.Caches.DataStructures;\nusing VariantAnnotation.Interface.AnnotatedPositions;\nusing VariantAnnotation.IO.Caches;\nusing VC = VariantAnnotation.Caches;\n\nnamespace CacheUtils.MiniCache\n{\n    /// <summary>\n    /// the bundle of cache and reference data objects that correspond to a \n    /// specific genome assembly and transcript data source\n    /// </summary>\n    public sealed class DataBundle\n    {\n        public readonly CompressedSequenceReader SequenceReader;\n        public readonly VC.TranscriptCacheData TranscriptCacheData;\n        public readonly VC.TranscriptCache TranscriptCache;\n\n        public readonly PredictionCacheReader SiftReader;\n        public readonly PredictionCacheReader PolyPhenReader;\n\n        private Chromosome _currentChromosome = Chromosome.GetEmptyChromosome(string.Empty);\n\n        public Prediction[] SiftPredictions;\n        public Prediction[] PolyPhenPredictions;\n        public readonly Source Source;\n\n        private DataBundle(CompressedSequenceReader sequenceReader, PredictionCacheReader siftReader,\n            PredictionCacheReader polyPhenReader, VC.TranscriptCacheData cacheData, VC.TranscriptCache transcriptCache,\n            Source source)\n        {\n            SequenceReader      = sequenceReader;\n            TranscriptCacheData = cacheData;\n            TranscriptCache     = transcriptCache;\n            Source              = source;\n            SiftReader          = siftReader;\n            PolyPhenReader      = polyPhenReader;\n        }\n\n        public void Load(Chromosome chromosome)\n        {\n            if (_currentChromosome.Index == chromosome.Index) return;\n            SequenceReader.GetCompressedSequence(chromosome);\n            SiftPredictions     = SiftReader.GetPredictions(chromosome.Index);\n            PolyPhenPredictions = PolyPhenReader.GetPredictions(chromosome.Index);\n            _currentChromosome  = chromosome;\n        }\n\n        public static DataBundle GetDataBundle(string referencePath, string cachePrefix)\n        {\n            var sequenceReader = new CompressedSequenceReader(FileUtilities.GetReadStream(referencePath));\n            var siftReader     = new PredictionCacheReader(FileUtilities.GetReadStream(CacheConstants.SiftPath(cachePrefix)), PredictionCacheReader.SiftDescriptions);\n            var polyPhenReader = new PredictionCacheReader(FileUtilities.GetReadStream(CacheConstants.PolyPhenPath(cachePrefix)), PredictionCacheReader.PolyphenDescriptions);\n\n            VC.TranscriptCacheData cacheData;\n            VC.TranscriptCache cache;\n            Source source;\n\n            using (var transcriptReader = new TranscriptCacheReader(FileUtilities.GetReadStream(CacheConstants.TranscriptPath(cachePrefix))))\n            {\n                cacheData = transcriptReader.Read(sequenceReader.RefIndexToChromosome);\n                cache     = cacheData.GetCache();\n                source    = transcriptReader.Header.Source;\n            }\n\n            return new DataBundle(sequenceReader, siftReader, polyPhenReader, cacheData, cache, source);\n        }\n    }\n}"
  },
  {
    "path": "CacheUtils/MiniCache/IStaging.cs",
    "content": "﻿using System.IO;\r\n\r\nnamespace CacheUtils.MiniCache\r\n{\r\n    public interface IStaging\r\n    {\r\n        void Write(Stream stream);\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/PredictionCache/PredictionCacheBuilder.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Linq;\r\nusing CacheUtils.DataDumperImport.DataStructures.Mutable;\r\nusing CacheUtils.IntermediateIO;\r\nusing CacheUtils.Utilities;\r\nusing Genome;\r\nusing IO;\r\nusing VariantAnnotation.Caches.DataStructures;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.IO.Caches;\r\n\r\nnamespace CacheUtils.PredictionCache\r\n{\r\n    public sealed class PredictionCacheBuilder\r\n    {\r\n        private readonly GenomeAssembly _genomeAssembly;\r\n\r\n        public PredictionCacheBuilder(GenomeAssembly genomeAssembly) => _genomeAssembly = genomeAssembly;\r\n\r\n        public (PredictionCacheStaging Sift, PredictionCacheStaging PolyPhen) CreatePredictionCaches(\r\n            Dictionary<ushort, List<MutableTranscript>> transcriptsByRefIndex, PredictionReader siftReader,\r\n            PredictionReader polyphenReader, int numRefSeqs)\r\n        {\r\n            Logger.Write(\"- converting prediction strings... \");\r\n\r\n            var siftRoundedPredictionsPerRef     = new RoundedEntryPrediction[numRefSeqs][];\r\n            var polyPhenRoundedPredictionsPerRef = new RoundedEntryPrediction[numRefSeqs][];\r\n\r\n            for (ushort refIndex = 0; refIndex < numRefSeqs; refIndex++)\r\n            {\r\n                var sift     = siftReader.GetPredictionData();\r\n                var polyphen = polyphenReader.GetPredictionData();\r\n\r\n                if (sift.Chromosome.Index != refIndex || polyphen.Chromosome.Index != refIndex)\r\n                    throw new InvalidDataException(\r\n                        $\"Found mismatch between transcript chromosome index ({refIndex}) and prediction chromosome indices (SIFT: {sift.Chromosome.Index}, PolyPhen: {polyphen.Chromosome.Index}.\");\r\n\r\n                if (!transcriptsByRefIndex.TryGetValue(refIndex, out var refTranscripts)) continue;\r\n\r\n                var (siftPredictions, polyPhenPredictions) = ProcessReference(refTranscripts,\r\n                    sift.TranscriptToPredictionIndex, polyphen.TranscriptToPredictionIndex, sift.PredictionData,\r\n                    polyphen.PredictionData);\r\n\r\n                siftRoundedPredictionsPerRef[refIndex]     = siftPredictions;\r\n                polyPhenRoundedPredictionsPerRef[refIndex] = polyPhenPredictions;\r\n            }\r\n\r\n            Logger.WriteLine(\"finished.\");\r\n\r\n            var siftStaging     = BuildCacheStaging(\"SIFT\", siftRoundedPredictionsPerRef, numRefSeqs);\r\n            var polyPhenStaging = BuildCacheStaging(\"PolyPhen\", polyPhenRoundedPredictionsPerRef, numRefSeqs);\r\n\r\n            return (siftStaging, polyPhenStaging);\r\n        }\r\n\r\n        private PredictionCacheStaging BuildCacheStaging(string description,\r\n            IReadOnlyList<RoundedEntryPrediction[]> roundedPredictionsPerRef, int numReferenceSeqs)\r\n        {\r\n            Logger.Write($\"- calculating {description} LUT... \");\r\n            var (lut, roundedEntryToLutIndex) = CreateLut(roundedPredictionsPerRef);\r\n            Logger.WriteLine($\"{lut.Length} entries.\");\r\n\r\n            Logger.Write($\"- converting {description} rounded entries... \");\r\n            var predictionsPerRef = ConvertPredictions(roundedPredictionsPerRef, roundedEntryToLutIndex, lut);\r\n            Logger.WriteLine(\"finished.\");\r\n\r\n            var header = CreateHeader(numReferenceSeqs, lut);\r\n            return new PredictionCacheStaging(header, predictionsPerRef);\r\n        }\r\n\r\n        private PredictionHeader CreateHeader(int numReferenceSeqs, Prediction.Entry[] lut)\r\n        {\r\n            var customHeader = new PredictionCacheCustomHeader(new IndexEntry[numReferenceSeqs]);\r\n            return new PredictionHeader(HeaderUtilities.GetHeader(Source.None, _genomeAssembly), customHeader, lut);\r\n        }\r\n\r\n        private static Prediction[][] ConvertPredictions(IReadOnlyList<RoundedEntryPrediction[]> roundedPredictionsPerRef,\r\n            Dictionary<RoundedEntry, byte> roundedEntryToLutIndex, Prediction.Entry[] lut)\r\n        {\r\n            int numReferenceSeqs  = roundedPredictionsPerRef.Count;\r\n            var predictionsPerRef = new Prediction[numReferenceSeqs][];\r\n\r\n            for (var i = 0; i < numReferenceSeqs; i++)\r\n            {\r\n                predictionsPerRef[i] = ConvertReferencePredictions(roundedPredictionsPerRef[i], roundedEntryToLutIndex, lut);\r\n            }\r\n\r\n            return predictionsPerRef;\r\n        }\r\n\r\n        private static Prediction[] ConvertReferencePredictions(IReadOnlyList<RoundedEntryPrediction> roundedEntryPredictions,\r\n            Dictionary<RoundedEntry, byte> roundedEntryToLutIndex, Prediction.Entry[] lut)\r\n        {\r\n            if (roundedEntryPredictions == null) return null;\r\n\r\n            int numPredictions = roundedEntryPredictions.Count;\r\n            var predictions    = new Prediction[numPredictions];\r\n\r\n            for (var i = 0; i < numPredictions; i++)\r\n                predictions[i] = roundedEntryPredictions[i].Convert(roundedEntryToLutIndex, lut);\r\n\r\n            return predictions;\r\n        }\r\n\r\n        private static (Prediction.Entry[] Lut, Dictionary<RoundedEntry, byte> RoundedEntryToLutIndex) CreateLut(\r\n            IEnumerable<RoundedEntryPrediction[]> roundedPredictionsPerRef)\r\n        {\r\n            var scores = new HashSet<RoundedEntry>();\r\n\r\n            foreach (var roundedPredictions in roundedPredictionsPerRef)\r\n            {\r\n                if (roundedPredictions == null) continue;\r\n\r\n                foreach (var roundedPrediction in roundedPredictions)\r\n                {\r\n                    foreach (var roundedEntry in roundedPrediction.Entries)\r\n                    {\r\n                        if (roundedEntry.Score > 1000) continue;\r\n                        scores.Add(roundedEntry);\r\n                    }\r\n                }\r\n            }\r\n\r\n            if (scores.Count > 255) throw new InvalidDataException($\"Unable to create lookup table, too many LUT entries: {scores.Count} (max 255).\");\r\n\r\n            var lut                    = new Prediction.Entry[scores.Count];\r\n            var roundedEntryToLutIndex = new Dictionary<RoundedEntry, byte>();\r\n\r\n            var currentIndex = 0;\r\n            foreach (var entry in scores.OrderBy(x => x.EnumIndex).ThenBy(x => x.Score))\r\n            {\r\n                roundedEntryToLutIndex[entry] = (byte)currentIndex;\r\n                lut[currentIndex++] = new Prediction.Entry(entry.Score / 1000.0, entry.EnumIndex);\r\n            }\r\n\r\n            return (lut, roundedEntryToLutIndex);\r\n        }\r\n\r\n        private static (RoundedEntryPrediction[] Sift, RoundedEntryPrediction[] PolyPhen) ProcessReference(\r\n            IReadOnlyList<MutableTranscript> transcripts, Dictionary<int, int> siftTranscriptToPredictionIndex,\r\n            Dictionary<int, int> polyphenTranscriptToPredictionIndex, string[] siftPredictionData,\r\n            string[] polyphenPredictionData)\r\n        {\r\n            AssignPredictionIndices(transcripts, siftTranscriptToPredictionIndex, polyphenTranscriptToPredictionIndex);\r\n\r\n            var siftPredictions     = siftPredictionData.GetRoundedEntryPredictions();\r\n            var polyPhenPredictions = polyphenPredictionData.GetRoundedEntryPredictions();\r\n\r\n            return (siftPredictions, polyPhenPredictions);\r\n        }\r\n\r\n        private static void AssignPredictionIndices(IReadOnlyList<MutableTranscript> transcripts,\r\n            Dictionary<int, int> siftTranscriptToPredictionIndex,\r\n            Dictionary<int, int> polyphenTranscriptToPredictionIndex)\r\n        {\r\n            foreach (var kvp in siftTranscriptToPredictionIndex)     transcripts[kvp.Key].SiftIndex     = kvp.Value;\r\n            foreach (var kvp in polyphenTranscriptToPredictionIndex) transcripts[kvp.Key].PolyPhenIndex = kvp.Value;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/PredictionCache/PredictionCacheStaging.cs",
    "content": "﻿using System.IO;\nusing System.IO.Compression;\nusing CacheUtils.MiniCache;\nusing Compression.Algorithms;\nusing Compression.FileHandling;\nusing VariantAnnotation.Caches.DataStructures;\nusing VariantAnnotation.IO.Caches;\n\nnamespace CacheUtils.PredictionCache\n{\n    public sealed class PredictionCacheStaging : IStaging\n    {\n        private readonly Prediction[][] _predictionsPerRef;\n        private readonly PredictionHeader _header;\n\n        internal PredictionCacheStaging(PredictionHeader header, Prediction[][] predictionsPerRef)\n        {\n            _header            = header;\n            _predictionsPerRef = predictionsPerRef;\n        }\n\n        public void Write(Stream stream)\n        {\n            using (var blockStream = new BlockStream(new Zstandard(), stream, CompressionMode.Compress))\n            using (var writer      = new PredictionCacheWriter(blockStream, _header))\n            {\n                writer.Write(_header.LookupTable, _predictionsPerRef);\n            }\n        }\n    }\n}\n"
  },
  {
    "path": "CacheUtils/PredictionCache/PredictionCacheWriter.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing Compression.FileHandling;\r\nusing VariantAnnotation.Caches.DataStructures;\r\nusing VariantAnnotation.IO.Caches;\r\n\r\nnamespace CacheUtils.PredictionCache\r\n{\r\n    public sealed class PredictionCacheWriter : IDisposable\r\n    {\r\n        private readonly BinaryWriter _writer;\r\n        private readonly BlockStream _blockStream;\r\n        private readonly PredictionHeader _header;\r\n        private readonly bool _leaveOpen;\r\n\r\n        public PredictionCacheWriter(BlockStream blockStream, PredictionHeader header, bool leaveOpen = false)\r\n        {\r\n            _blockStream = blockStream;\r\n            _writer      = new BinaryWriter(blockStream);\r\n            _header      = header;\r\n            _leaveOpen   = leaveOpen;\r\n        }\r\n\r\n        public void Dispose()\r\n        {\r\n            if (!_leaveOpen) _blockStream.Dispose();\r\n            _writer.Dispose();\r\n        }\r\n\r\n        internal void Write(Prediction.Entry[] lut, Prediction[][] predictionsPerRef)\r\n        {\r\n            _blockStream.WriteHeader(_header.Write);\r\n            WriteLookupTable(_writer, lut);\r\n            _blockStream.Flush();\r\n            WritePredictions(predictionsPerRef);\r\n        }\r\n\r\n        private void WritePredictions(IReadOnlyList<Prediction[]> predictionsPerRef)\r\n        {\r\n            var indexEntries = _header.Custom.Entries;\r\n\r\n            for (var i = 0; i < predictionsPerRef.Count; i++)\r\n            {\r\n\t            var refPredictions = predictionsPerRef[i];\r\n\r\n\t\t\t\tvar position = _blockStream.GetBlockPosition();\r\n                indexEntries[i].FileOffset = position.FileOffset;\r\n                indexEntries[i].Count      = refPredictions?.Length ?? 0;\r\n\r\n                if (refPredictions != null)\r\n                {\r\n                    foreach (var prediction in refPredictions) prediction.Write(_writer);\r\n                }\r\n\r\n                _blockStream.Flush();\r\n            }\r\n        }\r\n\r\n        private static void WriteLookupTable(BinaryWriter writer, IReadOnlyCollection<Prediction.Entry> lut)\r\n        {\r\n            writer.Write(lut.Count);\r\n            foreach (var entry in lut) entry.Write(writer);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/PredictionCache/PredictionExtensions.cs",
    "content": "﻿using System;\r\nusing System.IO;\r\n\r\nnamespace CacheUtils.PredictionCache\r\n{\r\n    public static class PredictionExtensions\r\n    {\r\n        public static RoundedEntryPrediction[] GetRoundedEntryPredictions(this string[] predictionStrings)\r\n        {\r\n            var predictions = new RoundedEntryPrediction[predictionStrings.Length];\r\n            var currentIndex = 0;\r\n            foreach (string s in predictionStrings) predictions[currentIndex++] = s.GetRoundedEntryPrediction();\r\n            return predictions;\r\n        }\r\n\r\n        private static RoundedEntryPrediction GetRoundedEntryPrediction(this string predictionString)\r\n        {\r\n            // convert the base 64 string representation to our compressed prediction data\r\n            var uncompressedDataWithHeader = Convert.FromBase64String(predictionString);\r\n            const int headerLength = 3;\r\n\r\n            // skip the 'VEP' header\r\n            int newLength = uncompressedDataWithHeader.Length - headerLength;\r\n\r\n            // sanity check: we should have an even number of bytes\r\n            if ((newLength & 1) != 0)\r\n            {\r\n                throw new InvalidDataException($\"Expected an even number of bytes when serializing the protein function prediction matrix: {newLength}\");\r\n            }\r\n\r\n            var data = new ushort[newLength / 2];\r\n            Buffer.BlockCopy(uncompressedDataWithHeader, headerLength, data, 0, newLength);\r\n\r\n            var roundedEntries = new RoundedEntry[data.Length];\r\n            for (var i = 0; i < data.Length; i++) roundedEntries[i] = new RoundedEntry(data[i]);\r\n            return new RoundedEntryPrediction(roundedEntries);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/PredictionCache/PredictionUtilities.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing CacheUtils.Genes.Utilities;\r\nusing CacheUtils.TranscriptCache;\r\nusing Intervals;\r\nusing VariantAnnotation.Caches.DataStructures;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\n\r\nnamespace CacheUtils.PredictionCache\r\n{\r\n    public static class PredictionUtilities\r\n    {\r\n        internal static IntervalArray<ITranscript>[] UpdateTranscripts(IEnumerable<ITranscript> transcripts,\r\n            Prediction[] oldSiftPredictions, IEnumerable<Prediction> siftPredictions,\r\n            Prediction[] oldPolyPhenPredictions, IEnumerable<Prediction> polyPhenPredictions, int numRefSeqs)\r\n        {\r\n            var siftDict       = siftPredictions.CreateIndex();\r\n            var polyphenDict   = polyPhenPredictions.CreateIndex();\r\n            var newTranscripts = new List<ITranscript>();\r\n\r\n            // ReSharper disable once LoopCanBeConvertedToQuery\r\n            foreach (var transcript in transcripts)\r\n            {\r\n                int siftIndex     = GetNewIndex(oldSiftPredictions, transcript.SiftIndex, siftDict);\r\n                int polyphenIndex = GetNewIndex(oldPolyPhenPredictions, transcript.PolyPhenIndex, polyphenDict);\r\n                newTranscripts.Add(transcript.UpdatePredictions(siftIndex, polyphenIndex));\r\n            }\r\n\r\n            return newTranscripts.ToIntervalArrays(numRefSeqs);\r\n        }\r\n\r\n        internal static ITranscript UpdatePredictions(this ITranscript t, int siftIndex, int polyphenIndex)\r\n        {\r\n            return new Transcript(t.Chromosome, t.Start, t.End, t.Id, t.Translation, t.BioType, t.Gene,\r\n                t.TotalExonLength, t.StartExonPhase, t.IsCanonical, t.TranscriptRegions, t.NumExons,\r\n                t.MicroRnas, siftIndex, polyphenIndex, t.Source, t.CdsStartNotFound, t.CdsEndNotFound,\r\n                t.Selenocysteines, t.RnaEdits);\r\n        }\r\n\r\n        private static int GetNewIndex(IReadOnlyList<Prediction> oldPredictions, int index,\r\n            IReadOnlyDictionary<Prediction, int> dict)\r\n        {\r\n            if (index == -1) return -1;\r\n            var prediction = oldPredictions[index];\r\n            if (!dict.TryGetValue(prediction, out int newIndex)) throw new InvalidDataException(\"Unable to find the prediction in the dictionary.\");\r\n            return newIndex;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/PredictionCache/RoundedEntry.cs",
    "content": "﻿using System;\r\n\r\nnamespace CacheUtils.PredictionCache\r\n{\r\n    public struct RoundedEntry : IEquatable<RoundedEntry>\r\n    {\r\n        public readonly ushort Score;\r\n        public readonly byte EnumIndex;\r\n\r\n        public RoundedEntry(ushort data)\r\n        {\r\n            Score     = Round((ushort)(data & 0x3ff));\r\n            EnumIndex = (byte)((data & 0xc000) >> 14);\r\n        }\r\n\r\n        private static ushort Round(ushort us) => (ushort)((ushort)Math.Round(us / 5.0) * 5);\r\n\r\n        public bool Equals(RoundedEntry other) => Score == other.Score && EnumIndex == other.EnumIndex;\r\n\r\n        public override int GetHashCode()\r\n        {\r\n            unchecked { return (Score.GetHashCode() * 397) ^ EnumIndex.GetHashCode(); }\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/PredictionCache/RoundedEntryPrediction.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing VariantAnnotation.Caches.DataStructures;\r\n\r\nnamespace CacheUtils.PredictionCache\r\n{\r\n    public sealed class RoundedEntryPrediction\r\n    {\r\n        public readonly RoundedEntry[] Entries;\r\n        public RoundedEntryPrediction(RoundedEntry[] entries) => Entries = entries;\r\n\r\n        public Prediction Convert(Dictionary<RoundedEntry, byte> lutDict, Prediction.Entry[] lut)\r\n        {\r\n            int numEntries = Entries.Length;\r\n            var lutIndices = new byte[numEntries];\r\n\r\n            var index = 0;\r\n            foreach (var entry in Entries) lutIndices[index++] = entry.Score > 1000 ? (byte) 255 : lutDict[entry];\r\n            return new Prediction(lutIndices, lut);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/TranscriptCache/CanonicalTranscriptMarker.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Linq;\r\nusing CacheUtils.DataDumperImport.DataStructures.Mutable;\r\nusing CacheUtils.Utilities;\r\n\r\nnamespace CacheUtils.TranscriptCache\r\n{\r\n    public sealed class CanonicalTranscriptMarker\r\n    {\r\n        private readonly HashSet<string> _lrgTranscriptIds;\r\n\r\n        public CanonicalTranscriptMarker(HashSet<string> lrgTranscriptIds)\r\n        {\r\n            _lrgTranscriptIds = lrgTranscriptIds;\r\n        }\r\n\r\n        public int MarkTranscripts(MutableTranscript[] transcripts)\r\n        {\r\n            var transcriptsByGeneId          = GetTranscriptsByEntrezGeneId(transcripts);\r\n            var canonicalTranscriptsByGeneId = GetCanonicalTranscriptsByGeneId(transcriptsByGeneId);\r\n            return SetCanonicalFlags(canonicalTranscriptsByGeneId, transcripts);\r\n        }\r\n\r\n        private SortedDictionary<int, HashSet<TranscriptMetadata>> GetTranscriptsByEntrezGeneId(IEnumerable<MutableTranscript> transcripts)\r\n        {\r\n            var genes = new SortedDictionary<int, HashSet<TranscriptMetadata>>();\r\n\r\n            foreach (var transcript in transcripts)\r\n            {\r\n                string idWithVersion = transcript.Id + '.' + transcript.Version;\r\n\r\n                int cdsLength        = transcript.CodingRegion?.Length ?? 0;\r\n                int transcriptLength = transcript.End - transcript.Start + 1;\r\n                bool isLrg           = _lrgTranscriptIds.Contains(transcript.Id);\r\n                int accession        = AccessionUtilities.GetAccessionNumber(transcript.Id);\r\n\r\n                var metadata = new TranscriptMetadata(idWithVersion, accession, transcriptLength, cdsLength, isLrg);\r\n                int geneId   = ConvertGeneIdToInt(transcript.Gene.GeneId);\r\n\r\n                if (genes.TryGetValue(geneId, out var observedMetadata)) observedMetadata.Add(metadata);\r\n                else genes[geneId] = new HashSet<TranscriptMetadata> { metadata };\r\n            }\r\n\r\n            return genes;\r\n        }\r\n\r\n        private static SortedDictionary<int, string> GetCanonicalTranscriptsByGeneId(SortedDictionary<int, HashSet<TranscriptMetadata>> genes)\r\n        {\r\n            // - Order all of the overlapping transcripts by cds length\r\n            // - Pick the longest transcript that has an associated Locus Reference Genome (LRG) sequence\r\n            // - If no LRGs exist for the set of transcripts, pick the longest transcript that is coding\r\n            // - If there is a tie, pick the transcript with the smaller accession id number\r\n            var canonicalTranscripts = new SortedDictionary<int, string>();\r\n\r\n            foreach (var kvp in genes)\r\n            {\r\n                var sortedTranscripts = GetSortedTrustedTranscripts(kvp.Value);\r\n\r\n                // pick the transcript with the smallest accession\r\n                if (sortedTranscripts.Count > 0) canonicalTranscripts[kvp.Key] = sortedTranscripts[0].TranscriptId;\r\n            }\r\n\r\n            return canonicalTranscripts;\r\n        }\r\n\r\n        private static int ConvertGeneIdToInt(string geneId)\r\n        {\r\n            if (string.IsNullOrEmpty(geneId)) throw new InvalidDataException(\"Expected a non-empty Entrez gene ID during canonical aggregation.\");\r\n            if (geneId.StartsWith(\"ENSG\")) geneId = geneId.Substring(4);\r\n            if (!int.TryParse(geneId, out int geneIdNumber)) throw new InvalidDataException($\"Unable to convert Entrez gene ID ({geneId}) to an integer.\");\r\n            return geneIdNumber;\r\n        }\r\n\r\n        private static int SetCanonicalFlags(IReadOnlyDictionary<int, string> canonicalTranscriptsByGeneId, IEnumerable<MutableTranscript> transcripts)\r\n        {\r\n            var numCanonicalTranscripts = 0;\r\n\r\n            foreach (var transcript in transcripts)\r\n            {\r\n                int geneId = ConvertGeneIdToInt(transcript.Gene.GeneId);\r\n                transcript.IsCanonical = false;\r\n\r\n                // no canonical transcript\r\n                if (!canonicalTranscriptsByGeneId.TryGetValue(geneId, out string canonicalTranscriptId)) continue;\r\n                string idWithVersion = transcript.Id + '.' + transcript.Version;\r\n                if (idWithVersion != canonicalTranscriptId) continue;\r\n\r\n                // mark the transcript canonical\r\n                transcript.IsCanonical = true;\r\n                numCanonicalTranscripts++;\r\n            }\r\n\r\n            return numCanonicalTranscripts;\r\n        }\r\n\r\n        /// <summary>\r\n        /// returns a sorted list of all the transcripts that have an ENST, NM_, or NR_ prefix\r\n        /// </summary>\r\n        private static List<TranscriptMetadata> GetSortedTrustedTranscripts(IEnumerable<TranscriptMetadata> transcripts)\r\n        {\r\n            var selectedTranscripts =\r\n                transcripts.Where(\r\n                    transcript => transcript.TranscriptId.StartsWith(\"ENST\") ||\r\n                    transcript.TranscriptId.StartsWith(\"NM_\") ||\r\n                    transcript.TranscriptId.StartsWith(\"NR_\")).ToList();\r\n\r\n            return selectedTranscripts.OrderByDescending(x => x.IsLrg)\r\n                    .ThenByDescending(x => x.CdsLength)\r\n                    .ThenByDescending(x => x.TranscriptLength)\r\n                    .ThenBy(x => x.Accession)\r\n                    .ToList();\r\n        }\r\n\r\n        public sealed class TranscriptMetadata : IEquatable<TranscriptMetadata>\r\n        {\r\n            public readonly string TranscriptId;\r\n            public readonly int CdsLength;\r\n            public readonly int TranscriptLength;\r\n            public readonly bool IsLrg;\r\n            public readonly int Accession;\r\n\r\n            public TranscriptMetadata(string transcriptId, int accession, int transcriptLength, int cdsLength, bool isLrg)\r\n            {\r\n                TranscriptId     = transcriptId;\r\n                TranscriptLength = transcriptLength;\r\n                CdsLength        = cdsLength;\r\n                IsLrg            = isLrg;\r\n                Accession        = accession;\r\n            }\r\n\r\n            public bool Equals(TranscriptMetadata other)\r\n            {\r\n                if (ReferenceEquals(null, other)) return false;\r\n                if (ReferenceEquals(this, other)) return true;\r\n                return string.Equals(TranscriptId, other.TranscriptId) && CdsLength == other.CdsLength &&\r\n                       TranscriptLength == other.TranscriptLength && IsLrg == other.IsLrg &&\r\n                       Accession == other.Accession;\r\n            }\r\n\r\n            public override int GetHashCode()\r\n            {\r\n                unchecked\r\n                {\r\n                    int hashCode = TranscriptId != null ? TranscriptId.GetHashCode() : 0;\r\n                    hashCode = (hashCode * 397) ^ CdsLength;\r\n                    hashCode = (hashCode * 397) ^ TranscriptLength;\r\n                    hashCode = (hashCode * 397) ^ IsLrg.GetHashCode();\r\n                    hashCode = (hashCode * 397) ^ Accession;\r\n                    return hashCode;\r\n                }\r\n            }\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/TranscriptCache/Comparers/GeneComparer.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\n\r\nnamespace CacheUtils.TranscriptCache.Comparers\r\n{\r\n    internal sealed class GeneComparer : EqualityComparer<IGene>\r\n    {\r\n        public override bool Equals(IGene x, IGene y)\r\n        {\r\n            return x.Start                    == y.Start                    &&\r\n                   x.End                      == y.End                      &&\r\n                   x.Chromosome.Index         == y.Chromosome.Index         &&\r\n                   x.OnReverseStrand          == y.OnReverseStrand          &&\r\n                   x.Symbol                   == y.Symbol                   &&\r\n                   x.EntrezGeneId.WithVersion == y.EntrezGeneId.WithVersion &&\r\n                   x.EnsemblId.WithVersion    == y.EnsemblId.WithVersion    &&\r\n                   x.HgncId                   == y.HgncId;\r\n        }\r\n\r\n        public override int GetHashCode(IGene obj)\r\n        {\r\n            string entrezGeneId = obj.EntrezGeneId.WithVersion;\r\n            string ensemblId    = obj.EnsemblId.WithVersion;\r\n\r\n            unchecked\r\n            {\r\n                int hashCode = obj.Start;\r\n                hashCode = (hashCode * 397) ^ obj.End;\r\n                hashCode = (hashCode * 397) ^ obj.Chromosome.Index;\r\n                hashCode = (hashCode * 397) ^ obj.OnReverseStrand.GetHashCode();\r\n                hashCode = (hashCode * 397) ^ obj.Symbol.GetHashCode();\r\n                if (entrezGeneId != null) hashCode = (hashCode * 397) ^ entrezGeneId.GetHashCode();\r\n                if (ensemblId    != null) hashCode = (hashCode * 397) ^ ensemblId.GetHashCode();\r\n                hashCode = (hashCode * 397) ^ obj.HgncId;\r\n                return hashCode;\r\n            }\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/TranscriptCache/Comparers/IntervalComparer.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing Intervals;\r\n\r\nnamespace CacheUtils.TranscriptCache.Comparers\r\n{\r\n    internal sealed class IntervalComparer : EqualityComparer<IInterval>\r\n    {\r\n        public override bool Equals(IInterval x, IInterval y) => x.Start == y.Start && x.End == y.End;\r\n\r\n        public override int GetHashCode(IInterval obj)\r\n        {\r\n            unchecked\r\n            {\r\n                return (obj.Start * 397) ^ obj.End;\r\n            }\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/TranscriptCache/Comparers/RegulatoryRegionComparer.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\n\r\nnamespace CacheUtils.TranscriptCache.Comparers\r\n{\r\n    internal sealed class RegulatoryRegionComparer : EqualityComparer<IRegulatoryRegion>\r\n    {\r\n        public override bool Equals(IRegulatoryRegion x, IRegulatoryRegion y)\r\n        {\r\n            return x.Start             == y.Start             &&\r\n                   x.End               == y.End               &&\r\n                   x.Chromosome.Index  == y.Chromosome.Index  &&\r\n                   x.Id.WithoutVersion == y.Id.WithoutVersion &&\r\n                   x.Type              == y.Type;\r\n        }\r\n\r\n        public override int GetHashCode(IRegulatoryRegion obj)\r\n        {\r\n            unchecked\r\n            {\r\n                int hashCode = obj.Start;\r\n                hashCode = (hashCode * 397) ^ obj.End;\r\n                hashCode = (hashCode * 397) ^ obj.Chromosome.Index.GetHashCode();\r\n                hashCode = (hashCode * 397) ^ obj.Id.WithoutVersion.GetHashCode();\r\n                hashCode = (hashCode * 397) ^ (int)obj.Type;\r\n                return hashCode;\r\n            }\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/TranscriptCache/Comparers/TranscriptRegionComparer.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\n\r\nnamespace CacheUtils.TranscriptCache.Comparers\r\n{\r\n    internal sealed class TranscriptRegionComparer : EqualityComparer<ITranscriptRegion>\r\n    {\r\n        public override bool Equals(ITranscriptRegion x, ITranscriptRegion y)\r\n        {\r\n            if (ReferenceEquals(x, y)) return true;\r\n            return x.Type == y.Type && x.Id == y.Id && x.Start == y.Start && x.End == y.End &&\r\n                   x.CdnaStart == y.CdnaStart && x.CdnaEnd == y.CdnaEnd;\r\n        }\r\n\r\n        public override int GetHashCode(ITranscriptRegion obj)\r\n        {\r\n            unchecked\r\n            {\r\n                var hashCode = (int)obj.Type;\r\n                hashCode = (hashCode * 397) ^ obj.Id.GetHashCode();\r\n                hashCode = (hashCode * 397) ^ obj.Start;\r\n                hashCode = (hashCode * 397) ^ obj.End;\r\n                hashCode = (hashCode * 397) ^ obj.CdnaStart;\r\n                hashCode = (hashCode * 397) ^ obj.CdnaEnd;\r\n                return hashCode;\r\n            }\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/TranscriptCache/Comparers/UgaGeneComparer.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing CacheUtils.Genes.DataStructures;\r\nusing Intervals;\r\n\r\nnamespace CacheUtils.TranscriptCache.Comparers\r\n{\r\n    public sealed class UgaGeneComparer : EqualityComparer<UgaGene>\r\n    {\r\n        public override bool Equals(UgaGene x, UgaGene y)\r\n        {\r\n            if (ReferenceEquals(null, y)) return false;\r\n            if (ReferenceEquals(x, y)) return true;\r\n            return x.Chromosome.Index == y.Chromosome.Index &&\r\n                Equals(x.GRCh37, y.GRCh37)                  &&\r\n                Equals(x.GRCh38, y.GRCh38)                  &&\r\n                x.OnReverseStrand     == y.OnReverseStrand  &&\r\n                x.HgncId              == y.HgncId           &&\r\n                x.Symbol              == y.Symbol           &&\r\n                x.EntrezGeneId        == y.EntrezGeneId     &&\r\n                x.EnsemblId           == y.EnsemblId;\r\n        }\r\n\r\n        private static bool Equals(IInterval x, IInterval y)\r\n        {\r\n            if (x == null && y == null) return true;\r\n            if (x == null || y == null) return false;\r\n            return x.Start == y.Start && x.End == y.End;\r\n        }\r\n\r\n        private static int GetHashCode(IInterval x)\r\n        {\r\n            unchecked { return (x.Start * 397) ^ x.End; }\r\n        }\r\n\r\n\r\n        public override int GetHashCode(UgaGene obj)\r\n        {\r\n            unchecked\r\n            {\r\n                int hashCode = obj.Chromosome.Index.GetHashCode();\r\n                if (obj.GRCh37 != null) hashCode = (hashCode * 397) ^ GetHashCode(obj.GRCh37);\r\n                if (obj.GRCh38 != null) hashCode = (hashCode * 397) ^ GetHashCode(obj.GRCh38);\r\n                hashCode = (hashCode * 397) ^ obj.OnReverseStrand.GetHashCode();\r\n                hashCode = (hashCode * 397) ^ obj.HgncId;\r\n                if (obj.Symbol != null) hashCode = (hashCode * 397) ^ obj.Symbol.GetHashCode();\r\n                if (obj.EntrezGeneId != null) hashCode = (hashCode * 397) ^ obj.EntrezGeneId.GetHashCode();\r\n                if (obj.EnsemblId != null) hashCode = (hashCode * 397) ^ obj.EnsemblId.GetHashCode();\r\n                return hashCode;\r\n            }\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/TranscriptCache/NSequence.cs",
    "content": "﻿\r\nusing Genome;\r\n\r\nnamespace CacheUtils.TranscriptCache\r\n{\r\n    public sealed class NSequence : ISequence\r\n    {\r\n        public int Length { get; } = 1000;\r\n        public string Substring(int offset, int length) => new string('N', length);\r\n        public Band[] CytogeneticBands => null;\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/TranscriptCache/SortExtensions.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.Linq;\r\nusing Genome;\r\nusing Intervals;\r\n\r\nnamespace CacheUtils.TranscriptCache\r\n{\r\n    public static class SortExtensions\r\n    {\r\n        public static IOrderedEnumerable<T> Sort<T>(this IEnumerable<T> elements) where T : IChromosomeInterval =>\r\n            elements.OrderBy(x => x.Chromosome.Index).ThenBy(x => x.Start).ThenBy(x => x.End);\r\n\r\n        public static IOrderedEnumerable<T> SortInterval<T>(this IEnumerable<T> elements) where T : IInterval =>\r\n            elements.OrderBy(x => x.Start).ThenBy(x => x.End);\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/TranscriptCache/TranscriptCacheBuilder.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing CacheUtils.DataDumperImport.DataStructures.Mutable;\r\nusing CacheUtils.Genes.DataStructures;\r\nusing CacheUtils.Genes.Utilities;\r\nusing CacheUtils.Utilities;\r\nusing Genome;\r\nusing Intervals;\r\nusing IO;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.IO.Caches;\r\n\r\nnamespace CacheUtils.TranscriptCache\r\n{\r\n    public sealed class TranscriptCacheBuilder\r\n    {\r\n        private readonly GenomeAssembly _genomeAssembly;\r\n        private readonly Source _source;\r\n        private readonly long _vepReleaseTicks;\r\n        private readonly ushort _vepVersion;\r\n\r\n        public TranscriptCacheBuilder(GenomeAssembly genomeAssembly, Source source, long vepReleaseTicks,\r\n            ushort vepVersion)\r\n        {\r\n            _genomeAssembly  = genomeAssembly;\r\n            _source          = source;\r\n            _vepReleaseTicks = vepReleaseTicks;\r\n            _vepVersion      = vepVersion;\r\n        }\r\n\r\n        public TranscriptCacheStaging CreateTranscriptCache(MutableTranscript[] mutableTranscripts,\r\n            IEnumerable<IRegulatoryRegion> regulatoryRegions, IIntervalForest<UgaGene> geneForest, int numRefSeqs)\r\n        {\r\n            Logger.Write(\"- assigning UGA genes to transcripts... \");\r\n            AssignUgaGenesToTranscripts(mutableTranscripts, geneForest);\r\n            Logger.WriteLine(\"finished.\");\r\n\r\n            var transcriptIntervalArrays       = mutableTranscripts.ToTranscripts().ToIntervalArrays(numRefSeqs);\r\n            var regulatoryRegionIntervalArrays = regulatoryRegions.ToIntervalArrays(numRefSeqs);\r\n\r\n            var customHeader = new TranscriptCacheCustomHeader(_vepVersion, _vepReleaseTicks);\r\n            var header       = new CacheHeader(HeaderUtilities.GetHeader(_source, _genomeAssembly), customHeader);\r\n\r\n            return TranscriptCacheStaging.GetStaging(header, transcriptIntervalArrays, regulatoryRegionIntervalArrays);\r\n        }\r\n\r\n        private void AssignUgaGenesToTranscripts(IEnumerable<MutableTranscript> transcripts, IIntervalForest<UgaGene> geneForest)\r\n        {\r\n            foreach (var transcript in transcripts)\r\n            {\r\n                var originalGene = transcript.Gene;\r\n                var ugaGenes     = geneForest.GetAllOverlappingValues(originalGene.Chromosome.Index, originalGene.Start, originalGene.End);\r\n\r\n                if (ugaGenes == null)\r\n                {\r\n                    string strand = originalGene.OnReverseStrand ? \"R\" : \"F\";\r\n                    throw new InvalidDataException($\"Found a transcript ({transcript.Id}) that does not have an overlapping UGA gene: gene ID: {originalGene.GeneId} {originalGene.Chromosome.UcscName} {originalGene.Start} {originalGene.End} {strand}\");\r\n                }\r\n\r\n                transcript.UpdatedGene = PickGeneById(ugaGenes, originalGene.GeneId).ToGene(_genomeAssembly);\r\n            }\r\n        }\r\n\r\n        private UgaGene PickGeneById(IReadOnlyList<UgaGene> genes, string geneId)\r\n        {\r\n            if (genes.Count == 1) return genes[0];\r\n\r\n            var genesById = genes.GetMultiValueDict(x => _source == Source.Ensembl ? x.EnsemblId : x.EntrezGeneId);\r\n            if (!genesById.TryGetValue(geneId, out var idGenes)) throw new InvalidDataException($\"Could not find {geneId} in the UGA genes list.\");\r\n\r\n            if (idGenes.Count == 1) return idGenes[0];\r\n            throw new InvalidDataException($\"Found multiple entries for {geneId} in the UGA genes list.\");\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/TranscriptCache/TranscriptCacheStaging.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Linq;\r\nusing CacheUtils.MiniCache;\r\nusing CacheUtils.TranscriptCache.Comparers;\r\nusing Intervals;\r\nusing VariantAnnotation.Caches;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.IO.Caches;\r\n\r\nnamespace CacheUtils.TranscriptCache\r\n{\r\n    public sealed class TranscriptCacheStaging : IStaging\r\n    {\r\n        private readonly TranscriptCacheData _cacheData;\r\n\r\n        private TranscriptCacheStaging(TranscriptCacheData cacheData)\r\n        {\r\n            _cacheData = cacheData;\r\n        }\r\n\r\n        public void Write(Stream stream)\r\n        {\r\n            using (var writer = new TranscriptCacheWriter(stream, _cacheData.Header)) writer.Write(_cacheData);\r\n        }\r\n\r\n        public static TranscriptCacheStaging GetStaging(CacheHeader header,\r\n            IntervalArray<ITranscript>[] transcriptIntervalArrays,\r\n            IntervalArray<IRegulatoryRegion>[] regulatoryRegionIntervalArrays)\r\n        {\r\n            var uniqueData = GetUniqueData(transcriptIntervalArrays);\r\n\r\n            var cacheData = new TranscriptCacheData(header, uniqueData.Genes, uniqueData.TranscriptRegions, uniqueData.Mirnas,\r\n                uniqueData.PeptideSeqs, transcriptIntervalArrays, regulatoryRegionIntervalArrays);\r\n\r\n            return new TranscriptCacheStaging(cacheData);\r\n        }\r\n\r\n        private static (IGene[] Genes, ITranscriptRegion[] TranscriptRegions, IInterval[] Mirnas, string[] PeptideSeqs) GetUniqueData(\r\n            IEnumerable<IntervalArray<ITranscript>> intervalArrays)\r\n        {\r\n            var intervalComparer         = new IntervalComparer();\r\n            var transcriptRegionComparer = new TranscriptRegionComparer();\r\n            var geneComparer             = new GeneComparer();\r\n\r\n            var geneSet             = new HashSet<IGene>(geneComparer);\r\n            var transcriptRegionSet = new HashSet<ITranscriptRegion>(transcriptRegionComparer);\r\n            var mirnaSet            = new HashSet<IInterval>(intervalComparer);\r\n            var peptideSet          = new HashSet<string>();\r\n\r\n            foreach (var intervalArray in intervalArrays)\r\n            {\r\n                if (intervalArray == null) continue;\r\n\r\n                foreach (var interval in intervalArray.Array)\r\n                {\r\n                    var transcript = interval.Value;\r\n                    geneSet.Add(transcript.Gene);\r\n                    AddString(peptideSet, transcript.Translation?.PeptideSeq);\r\n                    AddTranscriptRegions(transcriptRegionSet, transcript.TranscriptRegions);\r\n                    AddIntervals(mirnaSet, transcript.MicroRnas);\r\n                }\r\n            }\r\n\r\n            var genes             = GetUniqueGenes(geneSet);\r\n            var transcriptRegions = GetUniqueTranscriptRegions(transcriptRegionSet);\r\n            var mirnas            = GetUniqueIntervals(mirnaSet);\r\n            var peptideSeqs       = GetUniqueStrings(peptideSet);\r\n\r\n            return (genes, transcriptRegions, mirnas, peptideSeqs);\r\n        }\r\n\r\n        private static void AddIntervals(ISet<IInterval> intervalSet, IInterval[] intervals)\r\n        {\r\n            if (intervals == null) return;\r\n            foreach (var interval in intervals) intervalSet.Add(interval);\r\n        }\r\n\r\n        private static void AddTranscriptRegions(ISet<ITranscriptRegion> transcriptRegionSet, ITranscriptRegion[] regions)\r\n        {\r\n            if (regions == null) return;\r\n            foreach (var region in regions) transcriptRegionSet.Add(region);\r\n        }\r\n\r\n        private static void AddString(ISet<string> stringSet, string s)\r\n        {\r\n            if (string.IsNullOrEmpty(s)) return;\r\n            stringSet.Add(s);\r\n        }\r\n\r\n        private static string[] GetUniqueStrings(ICollection<string> peptideSet)\r\n        {\r\n            return peptideSet.Count > 0 ? peptideSet.OrderBy(x => x).ToArray() : null;\r\n        }\r\n\r\n        private static IInterval[] GetUniqueIntervals(ICollection<IInterval> mirnaSet)\r\n        {\r\n            return mirnaSet.Count > 0 ? mirnaSet.SortInterval().ToArray() : null;\r\n        }\r\n\r\n        private static ITranscriptRegion[] GetUniqueTranscriptRegions(ICollection<ITranscriptRegion> transcriptRegionSet)\r\n        {\r\n            return transcriptRegionSet.Count > 0 ? transcriptRegionSet.SortInterval().ToArray() : null;\r\n        }\r\n\r\n        private static IGene[] GetUniqueGenes(ICollection<IGene> geneSet)\r\n        {\r\n            return geneSet.Count > 0 ? geneSet.Sort().ToArray() : null;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/TranscriptCache/TranscriptCacheUtilities.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.Linq;\r\nusing CacheUtils.Genes.Utilities;\r\nusing CacheUtils.MiniCache;\r\nusing Genome;\r\nusing Intervals;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\n\r\nnamespace CacheUtils.TranscriptCache\r\n{\r\n    public static class TranscriptCacheUtilities\r\n    {\r\n        public static List<ITranscript> GetTranscripts(DataBundle bundle, ChromosomeInterval interval)\r\n        {\r\n            ITranscript[] overlappingTranscripts =\r\n                bundle.TranscriptCache.TranscriptIntervalForest.GetAllOverlappingValues(interval.Chromosome.Index,\r\n                    interval.Start, interval.End);\r\n            return overlappingTranscripts?.ToList() ?? new List<ITranscript>();\r\n        }\r\n\r\n        public static IntervalArray<T>[] ToIntervalArrays<T>(this IEnumerable<T> items, int numRefSeqs) where T : IChromosomeInterval\r\n        {\r\n            var                         intervalArrays = new IntervalArray<T>[numRefSeqs];\r\n            Dictionary<ushort, List<T>> itemsByRef     = items.GetMultiValueDict(x => x.Chromosome.Index);\r\n\r\n            foreach (ushort refIndex in itemsByRef.Keys.OrderBy(x => x))\r\n            {\r\n                List<T>       unsortedItems = itemsByRef[refIndex];\r\n                Interval<T>[] intervals     = unsortedItems.OrderBy(x => x.Start).ThenBy(x => x.End).ToIntervals(unsortedItems.Count);\r\n                intervalArrays[refIndex] = new IntervalArray<T>(intervals);\r\n            }\r\n\r\n            return intervalArrays;\r\n        }\r\n\r\n        private static Interval<T>[] ToIntervals<T>(this IEnumerable<T> items, int numItems) where T : IChromosomeInterval\r\n        {\r\n            var intervals = new Interval<T>[numItems];\r\n            var i = 0;\r\n\r\n            foreach (var item in items)\r\n            {\r\n                intervals[i++] = new Interval<T>(item.Start, item.End, item);\r\n            }\r\n\r\n            return intervals;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/TranscriptCache/TranscriptCacheWriter.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing System.IO.Compression;\r\nusing System.Text;\r\nusing CacheUtils.TranscriptCache.Comparers;\r\nusing Compression.Algorithms;\r\nusing Compression.FileHandling;\r\nusing Intervals;\r\nusing IO;\r\nusing VariantAnnotation.Caches;\r\nusing VariantAnnotation.IO.Caches;\r\nusing VariantAnnotation.IO;\r\n\r\n\r\nnamespace CacheUtils.TranscriptCache\r\n{\r\n    public sealed class TranscriptCacheWriter : IDisposable\r\n    {\r\n        private readonly BlockStream _blockStream;\r\n        private readonly ExtendedBinaryWriter _writer;\r\n        private readonly CacheHeader _header;\r\n        private readonly bool _leaveOpen;\r\n\r\n        public TranscriptCacheWriter(Stream stream, CacheHeader header, bool leaveOpen = false)\r\n        {\r\n            _blockStream = new BlockStream(new Zstandard(), stream, CompressionMode.Compress);\r\n            _writer      = new ExtendedBinaryWriter(_blockStream, Encoding.UTF8, leaveOpen);\r\n            _header      = header;\r\n            _leaveOpen   = leaveOpen;\r\n        }\r\n\r\n        public void Dispose()\r\n        {\r\n            if (!_leaveOpen) _blockStream.Dispose();\r\n            _writer.Dispose();\r\n        }\r\n\r\n        /// <summary>\r\n        /// writes the annotations to the current database file\r\n        /// </summary>\r\n        public void Write(TranscriptCacheData cacheData)\r\n        {\r\n            _blockStream.WriteHeader(_header.Write);\r\n\r\n            WriteItems(_writer, cacheData.Genes,             x => x.Write(_writer));\r\n            WriteItems(_writer, cacheData.TranscriptRegions, x => x.Write(_writer));\r\n            WriteItems(_writer, cacheData.Mirnas,            x => x.Write(_writer));\r\n            WriteItems(_writer, cacheData.PeptideSeqs,       x => _writer.WriteOptAscii(x));\r\n\r\n            var geneComparer             = new GeneComparer();\r\n            var transcriptRegionComparer = new TranscriptRegionComparer();\r\n            var intervalComparer         = new IntervalComparer();\r\n\r\n            var geneIndices             = CreateIndex(cacheData.Genes, geneComparer);\r\n            var transcriptRegionIndices = CreateIndex(cacheData.TranscriptRegions, transcriptRegionComparer);\r\n            var microRnaIndices         = CreateIndex(cacheData.Mirnas, intervalComparer);\r\n            var peptideIndices          = CreateIndex(cacheData.PeptideSeqs, EqualityComparer<string>.Default);\r\n\r\n            WriteIntervals(_writer, cacheData.RegulatoryRegionIntervalArrays, x => x.Write(_writer));\r\n            WriteIntervals(_writer, cacheData.TranscriptIntervalArrays,       x => x.Write(_writer, geneIndices, transcriptRegionIndices, microRnaIndices, peptideIndices));\r\n        }\r\n\r\n        private static void WriteIntervals<T>(IExtendedBinaryWriter writer, IReadOnlyCollection<IntervalArray<T>> intervalArrays,\r\n            Action<T> writeMethod)\r\n        {\r\n            writer.WriteOpt(intervalArrays.Count);\r\n\r\n            foreach (var intervalArray in intervalArrays)\r\n            {\r\n                if (intervalArray == null)\r\n                {\r\n                    writer.WriteOpt(0);\r\n                    continue;\r\n                }\r\n\r\n                writer.WriteOpt(intervalArray.Array.Length);\r\n                foreach (var interval in intervalArray.Array) writeMethod(interval.Value);\r\n            }\r\n\r\n            writer.Write(CacheConstants.GuardInt);\r\n        }\r\n\r\n        internal static void WriteItems<T>(IExtendedBinaryWriter writer, IReadOnlyCollection<T> items, Action<T> writeMethod)\r\n        {\r\n            if (items == null)\r\n            {\r\n                writer.WriteOpt(0);\r\n            }\r\n            else\r\n            {\r\n                writer.WriteOpt(items.Count);\r\n                foreach (var item in items) writeMethod(item);\r\n            }\r\n\r\n            writer.Write(CacheConstants.GuardInt);\r\n        }\r\n\r\n        /// <summary>\r\n        /// creates an index out of a array\r\n        /// </summary>\r\n        internal static Dictionary<T, int> CreateIndex<T>(IReadOnlyList<T> array, IEqualityComparer<T> comparer)\r\n        {\r\n            var index = new Dictionary<T, int>(comparer);\r\n            if (array == null) return index;\r\n\r\n            for (var currentIndex = 0; currentIndex < array.Count; currentIndex++)\r\n                index[array[currentIndex]] = currentIndex;\r\n\r\n            return index;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/TranscriptCache/TranscriptConversionExtensions.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.Linq;\r\nusing CacheUtils.DataDumperImport.DataStructures.Mutable;\r\nusing VariantAnnotation.AnnotatedPositions.Transcript;\r\nusing VariantAnnotation.Caches.DataStructures;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\n\r\nnamespace CacheUtils.TranscriptCache\r\n{\r\n    public static class TranscriptConversionExtensions\r\n    {\r\n        public static IEnumerable<ITranscript> ToTranscripts(this MutableTranscript[] mutableTranscripts)\r\n        {\r\n            var transcripts = new List<ITranscript>(mutableTranscripts.Length);\r\n            transcripts.AddRange(mutableTranscripts.Select(mt => mt.ToTranscript()));\r\n            return transcripts;\r\n        }\r\n\r\n        private static ITranscript ToTranscript(this MutableTranscript mt)\r\n        {\r\n            var translation = mt.CodingRegion == null\r\n                ? null\r\n                : GetTranslation(mt.CodingRegion, mt.CdsLength, CompactId.Convert(mt.ProteinId, mt.ProteinVersion),\r\n                    mt.PeptideSequence);\r\n\r\n            var sortedMicroRnas = mt.MicroRnas?.OrderBy(x => x.Start).ToArray();\r\n\r\n            return new Transcript(mt.Chromosome, mt.Start, mt.End, CompactId.Convert(mt.Id, mt.Version), translation,\r\n                mt.BioType, mt.UpdatedGene, mt.TotalExonLength, mt.NewStartExonPhase, mt.IsCanonical,\r\n                mt.TranscriptRegions, (ushort) mt.Exons.Length, sortedMicroRnas, mt.SiftIndex, mt.PolyPhenIndex,\r\n                mt.Source, mt.CdsStartNotFound, mt.CdsEndNotFound, mt.SelenocysteinePositions, mt.RnaEdits);\r\n        }\r\n\r\n        private static ITranslation GetTranslation(ICodingRegion oldCodingRegion, int cdsLength, CompactId proteinId,\r\n            string peptideSeq)\r\n        {\r\n            var codingRegion = new CodingRegion(oldCodingRegion.Start, oldCodingRegion.End, oldCodingRegion.CdnaStart,\r\n                oldCodingRegion.CdnaEnd, cdsLength);\r\n\r\n            return new Translation(codingRegion, proteinId, peptideSeq);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/TranscriptCache/TranscriptRegionMerger.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.Linq;\r\nusing CacheUtils.DataDumperImport.DataStructures.Mutable;\r\nusing Intervals;\r\nusing VariantAnnotation.Caches.DataStructures;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\n\r\nnamespace CacheUtils.TranscriptCache\r\n{\r\n    public static class TranscriptRegionMerger\r\n    {\r\n        public static ITranscriptRegion[] GetTranscriptRegions(IEnumerable<MutableTranscriptRegion> cdnaMaps, MutableExon[] exons,\r\n            IInterval[] introns, bool onReverseStrand)\r\n        {\r\n            var sortedRegions = cdnaMaps.OrderBy(x => x.Start).ThenBy(x => x.End).ToList();\r\n\r\n            var intronIntervals = introns == null\r\n                ? null\r\n                : CreateIntervals(introns.OrderBy(x => x.Start).ThenBy(x => x.End), introns.Length, onReverseStrand);\r\n\r\n            var exonIntervals = CreateIntervals(exons.OrderBy(x => x.Start).ThenBy(x => x.End), exons.Length,\r\n                onReverseStrand);\r\n            \r\n            return sortedRegions.AddGaps()\r\n                .AddIds(intronIntervals, TranscriptRegionType.Gap, TranscriptRegionType.Intron)\r\n                .AddIds(exonIntervals, TranscriptRegionType.Exon, TranscriptRegionType.Exon)\r\n                .AddIds(exonIntervals, TranscriptRegionType.Gap, TranscriptRegionType.Gap)\r\n                .AddCoords(TranscriptRegionType.Intron, onReverseStrand)\r\n                .AddCoords(TranscriptRegionType.Gap, onReverseStrand)\r\n                .ToInterfaceArray();\r\n        }\r\n\r\n        private static List<MutableTranscriptRegion> AddCoords(this List<MutableTranscriptRegion> regions, TranscriptRegionType targetRegionType, bool onReverseStrand)\r\n        {\r\n            for (var regionIndex = 0; regionIndex < regions.Count; regionIndex++)\r\n            {\r\n                var region = regions[regionIndex];\r\n                if (region.Type != targetRegionType) continue;\r\n                var coords = regions.GetExonCoords(regionIndex, onReverseStrand);\r\n                region.CdnaStart = coords.CdnaStart;\r\n                region.CdnaEnd   = coords.CdnaEnd;\r\n            }\r\n\r\n            return regions;\r\n        }\r\n\r\n        private static (int CdnaStart, int CdnaEnd) GetExonCoords(this IReadOnlyList<MutableTranscriptRegion> regions,\r\n            int regionIndex, bool onReverseStrand)\r\n        {\r\n            int cdnaStart = -1;\r\n            int cdnaEnd   = -1;\r\n\r\n            int testIndex = regionIndex;\r\n            while (testIndex >= 0)\r\n            {\r\n                testIndex--;\r\n                var region = regions[testIndex];\r\n                if (region.Type != TranscriptRegionType.Exon) continue;\r\n                if (onReverseStrand) cdnaEnd = region.CdnaStart;\r\n                else cdnaStart = region.CdnaEnd;\r\n                break;\r\n            }\r\n\r\n            testIndex = regionIndex;\r\n            while (testIndex < regions.Count)\r\n            {\r\n                testIndex++;\r\n                var region = regions[testIndex];\r\n                if (region.Type != TranscriptRegionType.Exon) continue;\r\n                if (onReverseStrand) cdnaStart = region.CdnaEnd;\r\n                else cdnaEnd = region.CdnaStart;\r\n                break;\r\n            }\r\n\r\n            return (cdnaStart, cdnaEnd);\r\n        }\r\n\r\n        private static ITranscriptRegion[] ToInterfaceArray(this IReadOnlyList<MutableTranscriptRegion> mutableRegions)\r\n        {\r\n            var regions = new ITranscriptRegion[mutableRegions.Count];\r\n            for (var i = 0; i < mutableRegions.Count; i++)\r\n            {\r\n                var region = mutableRegions[i];\r\n                regions[i] = new TranscriptRegion(region.Type, region.Id, region.Start, region.End, region.CdnaStart,\r\n                    region.CdnaEnd);\r\n            }\r\n            return regions;\r\n        }\r\n\r\n        private static IdInterval[] CreateIntervals(IEnumerable<IInterval> intervals, int numIntervals, bool onReverseStrand)\r\n        {\r\n            var idIntervals = new IdInterval[numIntervals];\r\n            ushort id       = onReverseStrand ? (ushort)numIntervals : (ushort)1;\r\n            var index       = 0;\r\n\r\n            foreach (var interval in intervals)\r\n            {\r\n                idIntervals[index] = new IdInterval(interval.Start, interval.End, id);\r\n                if (onReverseStrand) id--;\r\n                else id++;\r\n                index++;\r\n            }\r\n\r\n            return idIntervals.OrderBy(x => x.Start).ThenBy(x => x.End).ToArray();\r\n        }\r\n\r\n        private static List<MutableTranscriptRegion> AddIds(this List<MutableTranscriptRegion> regions,\r\n            IReadOnlyList<IdInterval> intervals, TranscriptRegionType targetRegionType, TranscriptRegionType matchRegionType)\r\n        {\r\n            if (intervals == null) return regions;\r\n\r\n            foreach (var region in regions)\r\n            {\r\n                if (region.Type != targetRegionType) continue;\r\n\r\n                int regionMidPoint = region.Start + (region.End - region.Start >> 1);\r\n\r\n                int index = intervals.BinarySearch(regionMidPoint);\r\n                if (index < 0) continue;\r\n\r\n                var intron  = intervals[index];\r\n                region.Type = matchRegionType;\r\n                region.Id   = intron.Id;\r\n            }\r\n\r\n            return regions;\r\n        }\r\n\r\n        private static int BinarySearch(this IReadOnlyList<IdInterval> intervals, int position)\r\n        {\r\n            var begin = 0;\r\n            int end = intervals.Count - 1;\r\n\r\n            while (begin <= end)\r\n            {\r\n                int index = begin + (end - begin >> 1);\r\n                var interval = intervals[index];\r\n\r\n                if (position >= interval.Start && position <= interval.End) return index;\r\n                if (interval.End < position) begin = index + 1;\r\n                else if (position < interval.Start) end = index - 1;\r\n            }\r\n\r\n            return ~begin;\r\n        }\r\n\r\n        private static List<MutableTranscriptRegion> AddGaps(this List<MutableTranscriptRegion> sortedRegions)\r\n        {\r\n            for (var i = 1; i < sortedRegions.Count; i++)\r\n            {\r\n                var prevRegion = sortedRegions[i - 1];\r\n                var region     = sortedRegions[i];\r\n\r\n                int gapLength = CalculateGapLength(prevRegion, region);\r\n                if (gapLength == 0) continue;\r\n\r\n                var gapRegion = new MutableTranscriptRegion(TranscriptRegionType.Gap, 0, prevRegion.End + 1, region.Start - 1);\r\n                sortedRegions.Insert(i, gapRegion);\r\n                i++;\r\n            }\r\n\r\n            return sortedRegions;\r\n        }\r\n\r\n        private static int CalculateGapLength(IInterval prevRegion, IInterval region) => region.Start - prevRegion.End - 1;\r\n\r\n        private sealed class IdInterval : IInterval, IComparable<IdInterval>\r\n        {\r\n            public int Start { get; }\r\n            public int End { get; }\r\n            public readonly ushort Id;\r\n\r\n            public IdInterval(int start, int end, ushort id)\r\n            {\r\n                Start = start;\r\n                End   = end;\r\n                Id    = id;\r\n            }\r\n\r\n            public int CompareTo(IdInterval other)\r\n            {\r\n                if (ReferenceEquals(this, other)) return 0;\r\n                if (ReferenceEquals(null, other)) return 1;\r\n\r\n                int startComparison = Start.CompareTo(other.Start);\r\n                return startComparison != 0 ? startComparison : End.CompareTo(other.End);\r\n            }\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/TranscriptCache/TranscriptRegionValidater.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Linq;\r\nusing CacheUtils.DataDumperImport.DataStructures.Mutable;\r\nusing Intervals;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\n\r\nnamespace CacheUtils.TranscriptCache\r\n{\r\n    public static class TranscriptRegionValidater\r\n    {\r\n        public static void Validate(string transcriptId, IEnumerable<MutableTranscriptRegion> cdnaMaps,\r\n            IEnumerable<MutableExon> exons, IEnumerable<IInterval> introns, ITranscriptRegion[] regions)\r\n        {\r\n            try\r\n            {\r\n                ValidateRegions(transcriptId, regions);\r\n                if (regions.Length <= 1) return;\r\n                CheckGenomicCoordinateContiguity(transcriptId, regions);\r\n            }\r\n            catch (Exception)\r\n            {\r\n                DumpTranscriptRegions(regions);\r\n                DumpExons(exons);\r\n                DumpIntrons(introns);\r\n                DumpCdnaMaps(cdnaMaps);\r\n                throw;\r\n            }\r\n        }\r\n\r\n        private static void CheckGenomicCoordinateContiguity(string transcriptId, IReadOnlyList<ITranscriptRegion> regions)\r\n        {\r\n            for (var i = 1; i < regions.Count; i++)\r\n            {\r\n                var prevRegion = regions[i - 1];\r\n                var region     = regions[i];\r\n\r\n                int delta = region.Start - prevRegion.End;\r\n                if (delta != 1) throw new InvalidDataException($\"Found non-contiguous genomic coordinates in transcript regions in transcript ({transcriptId}).\");\r\n            }\r\n        }\r\n\r\n        private static void ValidateRegions(string transcriptId, IEnumerable<ITranscriptRegion> regions)\r\n        {\r\n            foreach (var region in regions)\r\n            {\r\n                if (region.Id == 0)       throw new InvalidDataException($\"Expected transcript ({transcriptId}) to have regions with non-zero IDs.\");\r\n                if (region.CdnaStart < 1) throw new InvalidDataException($\"Expected transcript ({transcriptId}) to have regions with true cDNA start positions.\");\r\n                if (region.CdnaEnd < 1)   throw new InvalidDataException($\"Expected transcript ({transcriptId}) to have regions with true cDNA end positions.\");\r\n\r\n                if (region.Type != TranscriptRegionType.Exon && region.Type != TranscriptRegionType.Intron &&\r\n                    region.Type != TranscriptRegionType.Gap)\r\n                    throw new InvalidDataException($\"Found unexpected transcript region type ({region.Type}) in transcript ({transcriptId}).\");\r\n            }\r\n        }\r\n\r\n        private static void DumpTranscriptRegions(IEnumerable<ITranscriptRegion> regions)\r\n        {\r\n            Console.WriteLine(\"\\ntranscript regions:\");\r\n            foreach (var region in regions) DumpTranscriptRegion(region);\r\n        }\r\n\r\n        private static void DumpTranscriptRegion(ITranscriptRegion region) => Console.WriteLine($\"{region.Type}\\t{region.Id}\\t{region.Start}\\t{region.End}\\t{region.CdnaStart}\\t{region.CdnaEnd}\");\r\n\r\n        private static void DumpCdnaMaps(IEnumerable<MutableTranscriptRegion> cdnaMaps)\r\n        {\r\n            Console.WriteLine(\"\\ncDNA maps:\");\r\n            foreach (var cdnaMap in cdnaMaps.OrderBy(x => x.Start).ThenBy(x => x.End)) DumpCdnaMap(cdnaMap);\r\n        }\r\n\r\n        private static void DumpCdnaMap(ITranscriptRegion cdnaMap) => Console.WriteLine($\"{cdnaMap.Start}\\t{cdnaMap.End}\\t{cdnaMap.CdnaStart}\\t{cdnaMap.CdnaEnd}\");\r\n\r\n        private static void DumpIntrons(IEnumerable<IInterval> introns)\r\n        {\r\n            Console.WriteLine(\"\\nIntrons:\");\r\n            foreach (var intron in introns.OrderBy(x => x.Start).ThenBy(x => x.End)) DumpIntron(intron);\r\n        }\r\n\r\n        private static void DumpIntron(IInterval intron) => Console.WriteLine($\"{intron.Start}\\t{intron.End}\");\r\n\r\n        private static void DumpExons(IEnumerable<MutableExon> exons)\r\n        {\r\n            Console.WriteLine(\"\\nExons:\");\r\n            foreach (var exon in exons.OrderBy(x => x.Start).ThenBy(x => x.End)) DumpExon(exon);\r\n        }\r\n\r\n        private static void DumpExon(IInterval exon) => Console.WriteLine($\"{exon.Start}\\t{exon.End}\");\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Utilities/AccessionUtilities.cs",
    "content": "﻿using System;\r\nusing System.IO;\r\nusing VariantAnnotation.Utilities;\r\n\r\nnamespace CacheUtils.Utilities\r\n{\r\n    internal static class AccessionUtilities\r\n    {\r\n        internal static (string Id, byte Version) GetMaxVersion(string originalId, byte originalVersion)\r\n        {\r\n            (string pureId, byte idVersion) = FormatUtilities.SplitVersion(originalId);\r\n            return (pureId, Math.Max(originalVersion, idVersion));\r\n        }\r\n\r\n        public static int GetAccessionNumber(string s)\r\n        {\r\n            if (string.IsNullOrEmpty(s)) return -1;\r\n            return s.StartsWith(\"ENS\") ? GetEnsemblAccessionNumber(s) : GetRefSeqAccessionNumber(s);\r\n        }\r\n\r\n        private static int GetRefSeqAccessionNumber(string s)\r\n        {\r\n            int firstUnderlinePos = s.IndexOf('_');\r\n            if (firstUnderlinePos == -1) throw new InvalidDataException(\"Expected an underline in the transcript ID, but didn't find any.\");\r\n\r\n            string id = s.Substring(firstUnderlinePos + 1);\r\n            return int.Parse(id);\r\n        }\r\n\r\n        private static int GetEnsemblAccessionNumber(string s)\r\n        {\r\n            string id = s.Substring(4);\r\n            return int.Parse(id);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Utilities/HeaderUtilities.cs",
    "content": "﻿using System;\r\nusing Genome;\r\nusing IO;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.IO.Caches;\r\n\r\nnamespace CacheUtils.Utilities\r\n{\r\n    public static class HeaderUtilities\r\n    {\r\n        public static Header GetHeader(Source source, GenomeAssembly genomeAssembly) => new Header(\r\n            CacheConstants.Identifier, CacheConstants.SchemaVersion, CacheConstants.DataVersion, source,\r\n            DateTime.Now.Ticks, genomeAssembly);\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Utilities/RemoteFile.cs",
    "content": "﻿using System;\r\nusing System.IO;\r\nusing System.Net;\r\nusing IO;\r\nusing VariantAnnotation.Utilities;\r\n\r\nnamespace CacheUtils.Utilities\r\n{\r\n    public sealed class RemoteFile\r\n    {\r\n        private readonly string _description;\r\n        public readonly string FilePath;\r\n        private readonly string _url;\r\n\r\n        static RemoteFile() => ServicePointManager.DefaultConnectionLimit = int.MaxValue;\r\n\r\n        public RemoteFile(string description, string url, bool addDate = true)\r\n        {\r\n            _description = description;\r\n            _url         = url;\r\n            FilePath     = Path.Combine(Path.GetTempPath(), GetFilename(url, addDate));\r\n        }\r\n\r\n        internal static string GetFilename(string url, bool addDate)\r\n        {\r\n            int lastSlashPos = url.LastIndexOf('/');\r\n            string originalFilename = url.Substring(lastSlashPos + 1);\r\n\r\n            if (!addDate) return originalFilename;\r\n\r\n            string extension    = Path.GetExtension(originalFilename);\r\n            string filenameStub = Path.GetFileNameWithoutExtension(originalFilename);\r\n\r\n            return $\"{filenameStub}_{Date.GetDate(DateTime.Now.Ticks)}{extension}\";\r\n        }\r\n\r\n        public void Download()\r\n        {\r\n            if (File.Exists(FilePath)) return;\r\n\r\n            Logger.WriteLine($\"- downloading the {_description}\");\r\n            while (!SuccessfulDownload())\r\n            {\r\n                Logger.WriteLine($\"- requeueing download of the {_description}\");\r\n            }\r\n        }\r\n\r\n        private bool SuccessfulDownload()\r\n        {\r\n            try\r\n            {\r\n                using (var client = new WebClient())\r\n                {\r\n                    client.Proxy = null;\r\n                    client.DownloadFileTaskAsync(_url, FilePath).Wait();\r\n                }\r\n            }\r\n            catch (Exception)\r\n            {\r\n                return false;\r\n            }\r\n\r\n            return true;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CacheUtils/Utilities/TaskExtensions.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.Threading;\r\nusing System.Threading.Tasks;\r\nusing CommandLine.Utilities;\r\nusing IO;\r\n\r\nnamespace CacheUtils.Utilities\r\n{\r\n    public static class TaskExtensions\r\n    {\r\n        public static void Execute<T>(this IReadOnlyList<T> items, string description,\r\n            Action<T> executeAction, int numThreads = 5)\r\n        {\r\n            var bench     = new Benchmark();\r\n            var tasks     = new Task[items.Count];\r\n            var maxThread = new SemaphoreSlim(numThreads);\r\n\r\n            for (var i = 0; i < items.Count; i++)\r\n            {\r\n                maxThread.Wait();\r\n                var item = items[i];\r\n                tasks[i] = Task.Factory.StartNew(() => executeAction(item), TaskCreationOptions.LongRunning)\r\n                    .ContinueWith(task => maxThread.Release());\r\n            }\r\n\r\n            Task.WaitAll(tasks);\r\n            Logger.WriteLine($\"- all {description} finished ({Benchmark.ToHumanReadable(bench.GetElapsedTime())}).\\n\");\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "Cloud/AmazonS3ClientWrapper.cs",
    "content": "﻿using System.Threading.Tasks;\r\nusing Amazon.S3;\r\nusing Amazon.S3.Model;\r\nusing IO;\r\n\r\nnamespace Cloud\r\n{\r\n    public sealed class AmazonS3ClientWrapper : IS3Client\r\n    {\r\n        private readonly AmazonS3Client _s3Client;\r\n\r\n        public AmazonS3ClientWrapper(AmazonS3Client s3Client) => _s3Client = s3Client;\r\n\r\n        public Task<GetObjectResponse> GetObjectAsync(GetObjectRequest getRequest) => _s3Client.GetObjectAsync(getRequest);\r\n\r\n        public Task<PutObjectResponse> PutObjectAsync(PutObjectRequest putRequest) => _s3Client.PutObjectAsync(putRequest);\r\n    }\r\n}"
  },
  {
    "path": "Cloud/AssemblyInfo.cs",
    "content": "﻿using System.Runtime.CompilerServices;\r\n\r\n[assembly: InternalsVisibleTo(\"UnitTests\")]"
  },
  {
    "path": "Cloud/Cloud.appsettings.json",
    "content": "{\n    \"DataSource\": {\n        \"BaseUrl\": \"http://nirvana-annotations.s3.us-west-2.amazonaws.com/\",\n        \"CacheDirectory\": \"ab0cf104f39708eabd07b8cb67e149ba-Cache\",\n        \"ReferencesDirectory\": \"d95867deadfe690e40f42068d6b59df8-References\",\n        \"ManifestDirectory\": \"a9f54ea6ac0548696c97a3ee64bc39ec2e71b84b-SaManifest\"\n    }\n}"
  },
  {
    "path": "Cloud/Cloud.csproj",
    "content": "﻿<Project Sdk=\"Microsoft.NET.Sdk\">\r\n  <PropertyGroup>\r\n    <TargetFramework>net6.0</TargetFramework>\r\n    <OutputPath>..\\bin\\$(Configuration)</OutputPath>\r\n    \r\n  </PropertyGroup>\r\n  <ItemGroup>\r\n    <PackageReference Include=\"Amazon.Lambda.Core\" Version=\"2.1.0\" />\r\n    <PackageReference Include=\"Amazon.Lambda.Serialization.Json\" Version=\"2.0.0\" />\r\n    <PackageReference Include=\"AWSSDK.S3\" Version=\"3.7.8.3\" />\r\n    <PackageReference Include=\"AWSSDK.SimpleNotificationService\" Version=\"3.7.3.31\" />\r\n    <PackageReference Include=\"Microsoft.Extensions.Configuration\" Version=\"6.0.0\" />\r\n    <PackageReference Include=\"Microsoft.Extensions.Configuration.Json\" Version=\"6.0.0\" />\r\n    <PackageReference Include=\"Newtonsoft.Json\" Version=\"13.0.1\" />\r\n  </ItemGroup>\r\n  <ItemGroup>\r\n    <ProjectReference Include=\"..\\Genome\\Genome.csproj\" />\r\n    <ProjectReference Include=\"..\\IO\\IO.csproj\" />\r\n    <ProjectReference Include=\"..\\ReferenceSequence\\ReferenceSequence.csproj\" />\r\n  </ItemGroup>\r\n  <ItemGroup>\r\n    <None Update=\"Cloud.appsettings.json\">\r\n      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>\r\n    </None>\r\n  </ItemGroup>\r\n  <Import Project=\"..\\CommonAssemblyInfo.props\" />\r\n</Project>\r\n"
  },
  {
    "path": "Cloud/Configuration.cs",
    "content": "using Cloud.Utilities;\n\nnamespace Cloud;\nusing Microsoft.Extensions.Configuration;\n\npublic sealed class Configuration\n{\n    public readonly IConfigurationRoot    Config;\n    public readonly IConfigurationSection DataSources;\n    public          string                CacheDirectory      => DataSources[\"CacheDirectory\"];\n    public          string                ReferencesDirectory => DataSources[\"ReferencesDirectory\"];\n    public          string                ManifestDirectory   => DataSources[\"ManifestDirectory\"];\n    public          string                NirvanaBaseUrl      => DataSources[\"BaseUrl\"];\n    public Configuration()\n    {\n        const string appSettingsFilename = \"Cloud.appsettings.json\";\n\n        Config = new ConfigurationBuilder()\n            .AddJsonFile(appSettingsFilename)\n            .Build();\n\n        DataSources = Config.GetSection(\"DataSource\");\n\n    }\n\n    \n}"
  },
  {
    "path": "Cloud/LambdaUrlHelper.cs",
    "content": "﻿using System;\nusing Cloud.Utilities;\nusing Genome;\nusing IO;\nusing ReferenceSequence;\n\nnamespace Cloud\n{\n    public static class LambdaUrlHelper\n    {\n        public const            ushort        SaSchemaVersion                = 22;\n        public const            string        UrlBaseEnvironmentVariableName = \"NirvanaDataUrlBase\";\n        private static readonly Configuration Config                         = new ();\n\n        public static string S3CacheFolderBase    = Config.CacheDirectory;\n        // public const string S3ManifestFolderBase = \"a9f54ea6ac0548696c97a3ee64bc39ec2e71b84b-SaManifest\";\n        public static readonly string S3CacheFolder =\n            $\"{Config.CacheDirectory}/{CacheConstants.DataVersion}/\";\n\n        private static readonly string S3RefPrefix =\n            $\"{Config.ReferencesDirectory}/{ReferenceSequenceCommon.HeaderVersion}/Homo_sapiens.\";\n\n        \n        private const string UgaFileName        = \"UGA.tsv.gz\";\n        public const  string DefaultCacheSource = \"Both\";\n        public const  string RefSuffix          = \".Nirvana.dat\";\n        public const  string JsonSuffix         = \".json.gz\";\n        public const  string JsonIndexSuffix    = \".jsi\";\n        public const  string SuccessMessage     = \"Success\";\n\n        public static string GetBaseUrl()\n        {\n            var envBaseUrl = Environment.GetEnvironmentVariable(UrlBaseEnvironmentVariableName);\n            \n            return string.IsNullOrEmpty(envBaseUrl) ? Config.NirvanaBaseUrl: envBaseUrl;\n        }\n\n        public static string GetManifestBaseUrl() => GetBaseUrl() + Config.ManifestDirectory;\n        \n        public static string GetCacheFolder() => GetBaseUrl()     + S3CacheFolder;\n        public static string GetUgaUrl() => GetCacheFolder() + UgaFileName;\n        public static string GetRefPrefix() => GetBaseUrl()     + S3RefPrefix;\n\n        public static string GetRefUrl(GenomeAssembly genomeAssembly) =>\n            GetRefPrefix() + genomeAssembly + RefSuffix;\n    }\n}"
  },
  {
    "path": "Cloud/Messages/Annotation/AnnotationConfig.cs",
    "content": "﻿using System.Collections.Generic;\r\n\r\nnamespace Cloud.Messages.Annotation\r\n{\r\n    public sealed class AnnotationConfig\r\n    {\r\n        // ReSharper disable InconsistentNaming\r\n        // ReSharper disable NotAccessedField.Global\r\n        public string          id;\r\n        public string          genomeAssembly;\r\n        public string          vcfUrl;\r\n        public string          tabixUrl;\r\n        public S3Path          outputDir;\r\n        public string          outputPrefix;\r\n        public List<SaUrls>    customAnnotations;\r\n        public List<string>    desiredVcfInfo;\r\n        public List<string>    desiredVcfSampleInfo;\r\n        public string          customStrUrl;\r\n        public AnnotationRange annotationRange;\r\n        // ReSharper restore NotAccessedField.Global\r\n        // ReSharper restore InconsistentNaming\r\n    }\r\n}"
  },
  {
    "path": "Cloud/Messages/Annotation/AnnotationPosition.cs",
    "content": "﻿namespace Cloud.Messages.Annotation\r\n{\r\n    public struct AnnotationPosition\r\n    {\r\n        public readonly string Chromosome;\r\n        public readonly int Position;\r\n\r\n        public AnnotationPosition(string chromosome, int position)\r\n        {\r\n            Chromosome = chromosome;\r\n            Position = position;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "Cloud/Messages/Annotation/AnnotationRange.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing Genome;\r\n\r\nnamespace Cloud.Messages.Annotation\r\n\r\n{\r\n    public sealed class AnnotationRange\r\n    {\r\n        public readonly AnnotationPosition Start;\r\n        public readonly AnnotationPosition? End;\r\n\r\n        public AnnotationRange(AnnotationPosition start, AnnotationPosition? end)\r\n        {\r\n            Start = start;\r\n            End   = end;\r\n        }\r\n\r\n        public GenomicRange ToGenomicRange(Dictionary<string, Chromosome> refNameToChromosome)\r\n        {\r\n            var startGenomicPosition = new GenomicPosition(ReferenceNameUtilities.GetChromosome(refNameToChromosome, Start.Chromosome), Start.Position);\r\n\r\n            GenomicPosition? endGenomicPosition = null;\r\n            if (End != null) endGenomicPosition = new GenomicPosition(ReferenceNameUtilities.GetChromosome(refNameToChromosome, End.Value.Chromosome), End.Value.Position);\r\n\r\n            return new GenomicRange(startGenomicPosition, endGenomicPosition);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "Cloud/Messages/Annotation/AnnotationResult.cs",
    "content": "﻿using ErrorHandling;\r\n\r\nnamespace Cloud.Messages.Annotation\r\n{\r\n    // ReSharper disable once ClassNeverInstantiated.Global\r\n    public sealed class AnnotationResult\r\n    {\r\n        // ReSharper disable InconsistentNaming\r\n        // ReSharper disable UnassignedField.Global\r\n        public string id;\r\n        public string status;\r\n        public string filePath;\r\n        public ErrorCategory? errorCategory;\r\n\r\n        public int variantCount;\r\n        // ReSharper restore UnassignedField.Global\r\n        // ReSharper restore InconsistentNaming\r\n    }\r\n}"
  },
  {
    "path": "Cloud/Messages/Custom/CustomConfig.cs",
    "content": "// ReSharper disable InconsistentNaming\r\n\r\nnamespace Cloud.Messages.Custom\r\n{\r\n    public sealed class CustomConfig\r\n    {\r\n        public string    id;\r\n        public string    tsvUrl;\r\n        public S3Path    outputDir;\r\n        public JwtFields jwtFields;\r\n        \r\n        public bool skipGeneIdValidation;\r\n        public bool skipRefBaseValidation;\r\n    }\r\n}"
  },
  {
    "path": "Cloud/Messages/Custom/CustomResult.cs",
    "content": "﻿namespace Cloud.Messages.Custom\n{\n    // ReSharper disable NotAccessedField.Global\n    // ReSharper disable InconsistentNaming\n    public sealed class CustomResult\n    {\n        public string    id;\n        public string    status;\n        public string    genomeAssembly;\n        public FileList  created;\n        public bool      noValidEntries;\n        public JwtFields jwtFields;\n        public int       variantCount;\n    }\n    // ReSharper restore InconsistentNaming\n    // ReSharper restore NotAccessedField.Global\n}"
  },
  {
    "path": "Cloud/Messages/FileList.cs",
    "content": "﻿namespace Cloud.Messages\r\n{\r\n    // ReSharper disable InconsistentNaming\r\n    // ReSharper disable NotAccessedField.Global\r\n    public sealed class FileList\r\n    {\r\n        public string bucketName;\r\n        public string outputDir;\r\n        public string[] files;\r\n    }\r\n    // ReSharper restore NotAccessedField.Global\r\n    // ReSharper restore InconsistentNaming\r\n}"
  },
  {
    "path": "Cloud/Messages/Gene/GeneConfig.cs",
    "content": "﻿using ErrorHandling.Exceptions;\nusing IO;\n\nnamespace Cloud.Messages.Gene\n{\n    public sealed class GeneConfig\n    {\n        // ReSharper disable InconsistentNaming\n        public string id;\n        public string[] geneSymbols;\n        public string[] ngaUrls;\n        // ReSharper restore InconsistentNaming\n\n        public void Validate()\n        {\n            if (string.IsNullOrEmpty(id)) throw new UserErrorException(\"Please provide the id of the job.\");\n            if (geneSymbols == null || geneSymbols.Length == 0)\n                throw new UserErrorException(\"Please provide at lease one gene symbol.\");\n            if (ngaUrls == null) return;\n\n            foreach (string ngaUrl in ngaUrls) HttpUtilities.ValidateUrl(ngaUrl);\n        }\n    }\n}"
  },
  {
    "path": "Cloud/Messages/JwtFields.cs",
    "content": "namespace Cloud.Messages\n{\n    public class JwtFields\n    {\n        // ReSharper disable InconsistentNaming\n        public string authorizedParty;\n        public string subject;\n        public string tenantId;\n\n        // ReSharper restore InconsistentNaming\n    }\n}"
  },
  {
    "path": "Cloud/Messages/Nirvana/NirvanaConfig.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing ErrorHandling.Exceptions;\r\nusing IO;\r\n\r\nnamespace Cloud.Messages.Nirvana\r\n{\r\n    public sealed class NirvanaConfig\r\n    {\r\n        // ReSharper disable InconsistentNaming\r\n        public string id;\r\n        public string genomeAssembly;\r\n        public string vcfUrl;\r\n        public string tabixUrl;\r\n        public S3Path outputDir;\r\n        // ReSharper disable once UnassignedField.Global\r\n        public List<SaUrls> customAnnotations;\r\n        public List<string> desiredVcfInfo;\r\n        public List<string> desiredVcfSampleInfo;\r\n        public string       customStrUrl;\r\n        public JwtFields    jwtFields;\r\n        \r\n        // ReSharper restore InconsistentNaming\r\n\r\n        internal void CheckRequiredFieldsNotNull()\r\n        {\r\n            static string BuildErrorMessage(string message) => message + \" cannot be null.\";\r\n\r\n            if (id                       == null) throw new UserErrorException(BuildErrorMessage(\"id\"));\r\n            if (genomeAssembly           == null) throw new UserErrorException(BuildErrorMessage(\"genomeAssembly\"));\r\n            if (vcfUrl                   == null) throw new UserErrorException(BuildErrorMessage(\"vcfUrl\"));\r\n            if (tabixUrl                 == null) throw new UserErrorException(BuildErrorMessage(\"tabixUrl\"));\r\n            if (outputDir                == null) throw new UserErrorException(BuildErrorMessage(\"outputDir\"));\r\n            if (outputDir.bucketName     == null) throw new UserErrorException(BuildErrorMessage(\"bucketName of outputDir\"));\r\n            if (outputDir.region         == null) throw new UserErrorException(BuildErrorMessage(\"region of outputDir\"));\r\n            if (outputDir.path           == null) throw new UserErrorException(BuildErrorMessage(\"path of outputDir\"));\r\n            if (outputDir.accessKey      == null) throw new UserErrorException(BuildErrorMessage(\"accessKey of outputDir\"));\r\n            if (outputDir.secretKey      == null) throw new UserErrorException(BuildErrorMessage(\"secretKey of outputDir\"));\r\n            if (outputDir.sessionToken   == null) throw new UserErrorException(BuildErrorMessage(\"sessionToken of outputDir\"));\r\n            \r\n        }\r\n\r\n        public void Validate()\r\n        {\r\n            CheckRequiredFieldsNotNull();\r\n\r\n            HttpUtilities.ValidateUrl(vcfUrl);\r\n            HttpUtilities.ValidateUrl(tabixUrl);\r\n            outputDir.Validate(true);\r\n\r\n            customAnnotations?.ForEach(x => x.Validate());\r\n\r\n            if (customStrUrl != null) HttpUtilities.ValidateUrl(customStrUrl);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "Cloud/Messages/Nirvana/NirvanaResult.cs",
    "content": "﻿namespace Cloud.Messages.Nirvana\r\n{\r\n    public sealed class NirvanaResult\r\n    {\r\n        // ReSharper disable InconsistentNaming\r\n        // ReSharper disable NotAccessedField.Global\r\n        public string    id;\r\n        public string    status;\r\n        public FileList  created;\r\n        public JwtFields jwtFields;\r\n        public int       variantCount;\r\n\r\n        // ReSharper restore NotAccessedField.Global\r\n        // ReSharper restore InconsistentNaming\r\n    }\r\n}\r\n"
  },
  {
    "path": "Cloud/Messages/S3Path.cs",
    "content": "﻿// ReSharper disable InconsistentNaming\r\n\r\nusing System;\r\nusing Amazon;\r\nusing Amazon.S3;\r\nusing Amazon.S3.Model;\r\nusing Cloud.Utilities;\r\nusing ErrorHandling.Exceptions;\r\nusing IO;\r\n\r\nnamespace Cloud.Messages\r\n{\r\n    public sealed class S3Path\r\n    {\r\n        public string bucketName;\r\n        public string region;\r\n        public string path;\r\n        public string accessKey;\r\n        public string secretKey;\r\n        public string sessionToken;\r\n\r\n        public void Validate(bool isDirectory)\r\n        {\r\n            ValidatePathFormat(path, isDirectory);\r\n            path = FormatPath(path);\r\n\r\n            CheckS3Region();\r\n\r\n            var s3Client = GetS3Client(TimeSpan.FromMinutes(5));\r\n            ValidateCredentials(s3Client, isDirectory);\r\n        }\r\n\r\n        private void CheckS3Region()\r\n        {\r\n            if (RegionEndpoint.GetBySystemName(region).DisplayName == \"Unknown\")\r\n                throw new UserErrorException($\"Unknown S3 Region {region}\");\r\n        }\r\n\r\n        private const int MaxRetryCount = 4;\r\n        private void ValidateCredentials(IS3Client s3Client, bool isDirectory)\r\n        {\r\n            int maxRetryCount = MaxRetryCount;\r\n            while (true)\r\n            {\r\n                try\r\n                {\r\n                    if (isDirectory)\r\n                    {\r\n                        var putRequest = new PutObjectRequest\r\n                        {\r\n                            BucketName = bucketName,\r\n                            Key = path\r\n                        };\r\n                        s3Client.PutObjectAsync(putRequest).Wait();\r\n                    }\r\n                    else\r\n                    {\r\n                        var getRequest = new GetObjectRequest\r\n                        {\r\n                            BucketName = bucketName,\r\n                            Key = path,\r\n                            ByteRange = new ByteRange(0, 1)\r\n                        };\r\n                        s3Client.GetObjectAsync(getRequest).Wait();\r\n                    }\r\n                    // validation successful. Break and return.\r\n                    break;\r\n                }\r\n                catch (Exception exception)\r\n                {\r\n                    var processedException = AwsExceptionUtilities.TryConvertUserException(exception, this);\r\n                    if (processedException is UserErrorException) throw processedException;\r\n\r\n                    Logger.WriteLine($\"Failed to validate S3 credentials\\n{processedException.Message}\");\r\n                    \r\n                    maxRetryCount--;\r\n                    if (maxRetryCount >= 0) continue;\r\n                    Logger.WriteLine(\"Max retry limit reached for validating S3 credentials.\");\r\n                    throw processedException;\r\n                }\r\n            }\r\n        }\r\n\r\n        internal static void ValidatePathFormat(string path, bool isDirectory)\r\n        {\r\n            if (isDirectory == path.EndsWith('/')) return;\r\n            string errorMessage = isDirectory\r\n                ? $\"Expect a directory, but S3 path {path} doesn't end up with a '/'\"\r\n                : $\"Expect a file, but S3 path {path} ends up with a '/'\";\r\n            throw new UserErrorException(errorMessage);\r\n        }\r\n\r\n        public static string FormatPath(string path) => path.TrimStart('/');\r\n\r\n        public IS3Client GetS3Client(TimeSpan timeOut) => new AmazonS3ClientWrapper(new AmazonS3Client(accessKey, secretKey, sessionToken, new AmazonS3Config { RegionEndpoint = RegionEndpoint.GetBySystemName(region), Timeout = timeOut }));\r\n    }\r\n}"
  },
  {
    "path": "Cloud/Messages/SaUrls.cs",
    "content": "﻿// ReSharper disable InconsistentNaming\r\n\r\nusing System.IO;\r\nusing System.Linq;\r\nusing ErrorHandling.Exceptions;\r\nusing IO;\r\n\r\nnamespace Cloud.Messages\r\n{\r\n    public sealed class SaUrls\r\n    {\r\n        public string nsaUrl;\r\n        public string idxUrl;\r\n        public string nsiUrl;\r\n        public string ngaUrl;\r\n\r\n        public CustomSaType SaType => GetSaType();\r\n        private CustomSaType _saType;\r\n\r\n        public void Validate()\r\n        {\r\n            switch (SaType) {\r\n                case CustomSaType.Nsa:\r\n                    HttpUtilities.ValidateUrl(nsaUrl);\r\n                    HttpUtilities.ValidateUrl(idxUrl);\r\n                    break;\r\n                case CustomSaType.Nsi:\r\n                    HttpUtilities.ValidateUrl(nsiUrl);\r\n                    break;\r\n                case CustomSaType.Nga:\r\n                    HttpUtilities.ValidateUrl(ngaUrl);\r\n                    break;\r\n                default:\r\n                    throw new InvalidDataException(\"Unknown custom SA type.\");\r\n            }\r\n        }\r\n\r\n        internal CustomSaType GetSaType()\r\n        {\r\n            if (_saType != default) return _saType;\r\n\r\n            bool[] checkSaTypes = {nsaUrl != null, nsiUrl != null, ngaUrl != null};\r\n            CustomSaType[] providedTypes = checkSaTypes.Select((x, i) => (Provided: x, SaTypeIndex: i + 1)).Where(y => y.Provided)\r\n                .Select(y => (CustomSaType) y.SaTypeIndex).ToArray();\r\n\r\n            if (providedTypes.Length == 0) throw new UserErrorException(\"No custom annotation file provided.\");\r\n            if (providedTypes.Length > 1)\r\n                throw new UserErrorException(\r\n                    $\"Multiple types of annotation files found: {providedTypes.Select(x => x.ToString())}. Please just provide one type of custom annotation file(s)\");\r\n\r\n            if (providedTypes[0] == CustomSaType.Nsa && idxUrl == null)\r\n                throw new UserErrorException($\"Index file is not provided for the NSA file {nsaUrl}.\");\r\n\r\n            _saType = providedTypes[0];\r\n            return _saType;\r\n        }\r\n\r\n        public override string ToString()\r\n        {\r\n            switch (SaType)\r\n            {\r\n                case CustomSaType.Nsa:\r\n                    return $\"{{\\\"nsaUrl\\\":\\\"{nsaUrl}\\\", \\\"idxUrl\\\":\\\"{idxUrl}\\\"}}\";\r\n                case CustomSaType.Nsi:\r\n                    return $\"{{\\\"nsiUrl\\\":\\\"{nsiUrl}\\\"}}\";\r\n                case CustomSaType.Nga:\r\n                    return $\"{{\\\"ngaUrl\\\":\\\"{ngaUrl}\\\"}}\";\r\n                default:\r\n                    throw new InvalidDataException(\"Unknown custom SA type.\");\r\n            }\r\n        }\r\n    }\r\n\r\n    public enum CustomSaType\r\n    {\r\n        Nsa = 1,\r\n        Nsi,\r\n        Nga\r\n    }\r\n}\r\n"
  },
  {
    "path": "Cloud/Messages/Single/SingleConfig.cs",
    "content": "﻿using System.Collections.Generic;\r\n\r\nnamespace Cloud.Messages.Single\r\n{\r\n    public sealed class SingleConfig\r\n    {\r\n        // ReSharper disable InconsistentNaming\r\n        public string id;\r\n        public string genomeAssembly;\r\n        public SingleVariant variant;\r\n        public int vepVersion;\r\n        public string supplementaryAnnotations;\r\n        public List<SaUrls> customAnnotations;\r\n        // ReSharper restore InconsistentNaming\r\n       \r\n    }\r\n}\r\n"
  },
  {
    "path": "Cloud/Messages/Single/SingleVariant.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing ErrorHandling.Exceptions;\r\n\r\nnamespace Cloud.Messages.Single\r\n{\r\n    public sealed class SingleVariant\r\n    {\r\n        // ReSharper disable InconsistentNaming\r\n        public string chromosome;\r\n        public int? position;\r\n        public string refAllele;\r\n        public string[] altAlleles;\r\n        public double? quality;\r\n        public string[] filters;\r\n        public string infoField;\r\n        public string formatField;\r\n        public string[] sampleFields;\r\n        public string[] sampleNames;\r\n        // ReSharper restore InconsistentNaming\r\n\r\n        private const string VcfMissingValue = \".\";\r\n\r\n        public void Validate()\r\n        {\r\n            if (string.IsNullOrEmpty(chromosome)) throw new UserErrorException(\"Please provide the chromosome.\");\r\n            if (position == null) throw new UserErrorException(\"Please provide the position.\");\r\n            if (string.IsNullOrEmpty(refAllele)) throw new UserErrorException(\"Please provide the reference allele.\");\r\n            if (altAlleles == null || altAlleles.Length == 0) throw new UserErrorException(\"Please provide the alternate alleles.\");\r\n\r\n            if (!string.IsNullOrEmpty(formatField) || sampleFields != null || sampleNames != null)\r\n            {\r\n                if (string.IsNullOrEmpty(formatField)) throw new UserErrorException(\"Please provide a format field when supplying sample fields or sample names.\");\r\n\r\n                int numSampleFields = sampleFields?.Length ?? 0;\r\n                if (numSampleFields == 0) throw new UserErrorException(\"Please provide sample fields when supplying sample names and the format field.\");\r\n\r\n                int numSampleNames  = sampleNames?.Length ?? 0;\r\n                if (numSampleNames == 0) throw new UserErrorException(\"Please provide sample names when supplying sample fields and the format field.\");\r\n\r\n                if (sampleFields?.Length != sampleNames?.Length) throw new UserErrorException(\"Please provide the same number of sample fields as sample names.\");\r\n            }\r\n        }\r\n\r\n        public string[] GetVcfFields()\r\n        {\r\n            string altAlleleField = GetStringFromNullableCollection(altAlleles, ',');\r\n            string filterField    = GetStringFromNullableCollection(filters, ';');\r\n\r\n            var vcfFields = new List<string>\r\n            {\r\n                chromosome,\r\n                position.ToString(),\r\n                VcfMissingValue,\r\n                refAllele,\r\n                altAlleleField,\r\n                quality?.ToString() ?? VcfMissingValue,\r\n                filterField,\r\n                infoField ?? VcfMissingValue\r\n            };\r\n\r\n            if (sampleFields != null)\r\n            {\r\n                vcfFields.Add(formatField ?? VcfMissingValue);\r\n                vcfFields.AddRange(sampleFields);\r\n            }\r\n\r\n            return vcfFields.ToArray();\r\n        }\r\n\r\n        private static string GetStringFromNullableCollection(string[] values, char separator) =>\r\n            values == null || values.Length == 0 \r\n                ? VcfMissingValue \r\n                : string.Join(separator, values);\r\n    }\r\n}"
  },
  {
    "path": "Cloud/Messages/StrValidation/ValidationConfig.cs",
    "content": "﻿using IO;\n\nnamespace Cloud.Messages.StrValidation\n{\n    public sealed class ValidationConfig\n    {\n        // ReSharper disable InconsistentNaming\n        public string id;\n        public string genomeAssembly;\n        public string customStrUrl;\n        // ReSharper restore InconsistentNaming\n\n        public void Validate() => HttpUtilities.ValidateUrl(customStrUrl);\n    }\n}"
  },
  {
    "path": "Cloud/Messages/StrValidation/ValidationResult.cs",
    "content": "﻿namespace Cloud.Messages.StrValidation\n{\n    public class ValidationResult\n    {\n        // ReSharper disable InconsistentNaming\n        // ReSharper disable NotAccessedField.Global\n        public string   id;\n        public string   status;\n        // ReSharper restore NotAccessedField.Global\n        // ReSharper restore InconsistentNaming\n    }\n}"
  },
  {
    "path": "Cloud/Notifications/SNS.cs",
    "content": "﻿using System;\r\nusing IO;\r\n\r\nnamespace Cloud.Notifications\r\n{\r\n    public static class SNS\r\n    {\r\n        public static void SendMessage(string snsTopicArn, string snsMessage)\r\n        {\r\n            try\r\n            {\r\n                using (var snsClient = new Amazon.SimpleNotificationService.AmazonSimpleNotificationServiceClient())\r\n                {\r\n                    snsClient.PublishAsync(snsTopicArn, snsMessage).Wait();\r\n                }\r\n            }\r\n            catch (Exception e)\r\n            {\r\n                Logger.WriteLine(\"Unable to log to SNS!!\");\r\n                Logger.WriteLine(e.Message);\r\n            }\r\n        }\r\n\r\n        public static string CreateMessage(string message, string status, string stackTrace) => $\"{message}\\n{status}\\nStackTrace: {stackTrace}\";\r\n    }\r\n}"
  },
  {
    "path": "Cloud/RedactionUtilities.cs",
    "content": "﻿using System.Text.RegularExpressions;\r\n\r\nnamespace Cloud\r\n{\r\n    public static class RedactionUtilities\r\n    {\r\n        private static readonly Regex AwsAccessKeyIdRegex = new Regex(\"AWSAccessKeyId=([^&]+)\");\r\n        private static readonly Regex AmzCredentialRegex  = new Regex(\"X-Amz-Credential=([^/]+)\");\r\n        private static readonly Regex AccessKeyRegex      = new Regex(\"\\\"accessKey\\\":\\\"([^\\\"]+)\");\r\n        private static readonly Regex SecretKeyRegex      = new Regex(\"\\\"secretKey\\\":\\\"([^\\\"]+)\");\r\n        private static readonly Regex SessionTokenRegex   = new Regex(\"\\\"sessionToken\\\":\\\"([^\\\"]+)\");\r\n\r\n        public static string Redact(this string s)\r\n        {\r\n            var awsAccessKeyIdMatches = AwsAccessKeyIdRegex.Matches(s);\r\n            var amzCredentialMatches  = AmzCredentialRegex.Matches(s);\r\n            var accessKeyMatches      = AccessKeyRegex.Matches(s);\r\n            var secretKeyMatches      = SecretKeyRegex.Matches(s);\r\n            var sessionTokenMatches   = SessionTokenRegex.Matches(s);\r\n\r\n            char[] charArray = s.ToCharArray();\r\n\r\n            charArray.Mask(awsAccessKeyIdMatches).Mask(amzCredentialMatches).Mask(accessKeyMatches)\r\n                .Mask(secretKeyMatches).Mask(sessionTokenMatches);\r\n\r\n            return new string(charArray);\r\n        }\r\n\r\n        private static char[] Mask(this char[] charArray, MatchCollection matches)\r\n        {\r\n            foreach (Match match in matches)\r\n            {\r\n                var group = match.Groups[1];\r\n                for (var i = 0; i < group.Length; i++)\r\n                {\r\n                    charArray[group.Index + i] = 'X';\r\n                }\r\n            }\r\n\r\n            return charArray;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "Cloud/Utilities/AwsExceptionUtilities.cs",
    "content": "﻿using System;\r\nusing Amazon.S3;\r\nusing Cloud.Messages;\r\nusing ErrorHandling.Exceptions;\r\n\r\nnamespace Cloud.Utilities\r\n{\r\n    public static class AwsExceptionUtilities\r\n    {\r\n        public static Exception TryConvertUserException(Exception exception, S3Path s3Path)\r\n        {\r\n            AmazonS3Exception s3Exception;\r\n            while ((s3Exception = exception as AmazonS3Exception) == null)\r\n            {\r\n                if (exception.InnerException == null) return exception;\r\n                exception = exception.InnerException;\r\n            }\r\n            \r\n            string extraInfo;\r\n            switch (s3Exception.ErrorCode)\r\n            {\r\n                case \"ExpiredToken\":\r\n                case \"InvalidToken\":\r\n                    extraInfo = s3Path?.sessionToken;\r\n                    break;\r\n                case \"InvalidAccessKeyId\":\r\n                    extraInfo = s3Path?.accessKey;\r\n                    break;\r\n                case \"SignatureDoesNotMatch\":\r\n                    extraInfo = s3Path?.secretKey;\r\n                    break;\r\n                case \"NoSuchBucket\":\r\n                    extraInfo = s3Path?.bucketName;\r\n                    break;\r\n                case \"AccessDenied\":\r\n                case \"NoSuchKey\":\r\n                    extraInfo = s3Path?.path;\r\n                    break;\r\n                default:\r\n                    return s3Exception;\r\n            }\r\n\r\n            string errorMessage = extraInfo == null\r\n                ? s3Exception.Message\r\n                : $\"{s3Exception.Message} ({extraInfo})\";\r\n\r\n            return new UserErrorException(errorMessage);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "Cloud/Utilities/JsonUtilities.cs",
    "content": "﻿using System.IO;\r\nusing System.Text;\r\nusing Amazon.Lambda.Serialization.Json;\r\n\r\nnamespace Cloud.Utilities\r\n{\r\n    public static class JsonUtilities\r\n    {\r\n        private static readonly JsonSerializer JsonSerializer = new JsonSerializer();\r\n\r\n        public static string Stringify(object obj) => Encoding.UTF8.GetString(Serialize(obj).ToArray());\r\n\r\n        public static MemoryStream Serialize(object obj)\r\n        {\r\n            var memoryStream = new MemoryStream();\r\n            JsonSerializer.Serialize(obj, memoryStream);\r\n            memoryStream.Position = 0;\r\n            return memoryStream;\r\n        }\r\n\r\n        public static T Deserialize<T>(MemoryStream memoryStream) => JsonSerializer.Deserialize<T>(memoryStream);\r\n    }\r\n}\r\n"
  },
  {
    "path": "Cloud/Utilities/LambdaUtilities.cs",
    "content": "﻿using System;\r\nusing System.IO;\r\nusing Genome;\r\nusing IO;\r\n\r\nnamespace Cloud.Utilities\r\n{\r\n    public static class LambdaUtilities\r\n    {\r\n        public const string SuccessMessage = \"Success\";\r\n        public const string SnsTopicKey    = \"SnsTopicArn\";\r\n\r\n        public static void GarbageCollect()\r\n        {\r\n            GC.Collect();\r\n            GC.WaitForPendingFinalizers();\r\n        }\r\n\r\n        public static string GetEnvironmentVariable(string key)\r\n        {\r\n            string value = Environment.GetEnvironmentVariable(key);\r\n            if (string.IsNullOrEmpty(value)) throw new InvalidDataException($\"Environment variable {key} is not set.\");\r\n            return value;\r\n        }\r\n\r\n        public static void DeleteTempOutput()\r\n        {\r\n            string[] files = Directory.GetFiles(Path.GetTempPath());\r\n            if (files.Length == 0) return;\r\n            foreach (string tempFile in files) File.Delete(tempFile);\r\n        }\r\n\r\n        public static string GetManifestUrl(string version, GenomeAssembly genomeAssembly, int saSchemaVersion = LambdaUrlHelper.SaSchemaVersion)\r\n        {\r\n            if (string.IsNullOrEmpty(version)) version = \"latest\";\r\n            string s3BaseUrl                           = LambdaUrlHelper.GetManifestBaseUrl() +$\"/{saSchemaVersion}/\";\r\n            switch (version)\r\n            {\r\n                case \"latest\":\r\n                    return $\"{s3BaseUrl}latest_SA_{genomeAssembly}.txt\";\r\n                case \"release\":\r\n                    return $\"{s3BaseUrl}DRAGEN_3.4_{genomeAssembly}.txt\";\r\n                case \"none\":\r\n                    return null;\r\n                default:\r\n                    return $\"{s3BaseUrl}{version}_SA_{genomeAssembly}.txt\";\r\n            }\r\n        }\r\n\r\n        public static string GetCachePathPrefix(GenomeAssembly genomeAssembly)\r\n        {\r\n            return LambdaUrlHelper.GetCacheFolder().UrlCombine(genomeAssembly.ToString())\r\n                .UrlCombine(LambdaUrlHelper.DefaultCacheSource);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "Cloud/Utilities/LogUtilities.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.Text;\r\nusing Amazon.Lambda.Core;\r\nusing IO;\r\nusing Newtonsoft.Json;\r\n\r\nnamespace Cloud.Utilities\r\n{\r\n    public static class LogUtilities\r\n    {\r\n        public static void LogLambdaInfo(ILambdaContext context, string version) => Logger.WriteLine(\r\n            $\"Lambda version: {version} ARN: {context?.InvokedFunctionArn}\\nLog group: {context?.LogGroupName}\\nLog stream: {context?.LogStreamName}\");\r\n\r\n        public static void LogObject<T>(string title, T config)\r\n        {\r\n            string json;\r\n\r\n            switch (config)\r\n            {\r\n                case string s:\r\n                    json = s;\r\n                    break;\r\n                default:\r\n                    json = JsonConvert.SerializeObject(config);\r\n                    break;\r\n            }\r\n\r\n            Logger.WriteLine($\"{title}:\\n{json.Redact()}\");\r\n        }\r\n\r\n        public static void Log(IEnumerable<string> environmentVariables)\r\n        {\r\n            var sb = new StringBuilder();\r\n\r\n            sb.AppendLine(\"Environment variables:\");\r\n\r\n            foreach (string key in environmentVariables)\r\n            {\r\n                string value = Environment.GetEnvironmentVariable(key) ?? \"null\";\r\n                sb.AppendLine($\"- {key}: {value}\");\r\n            }\r\n\r\n            Logger.WriteLine(sb.ToString());\r\n        }\r\n\r\n        public static void UpdateLogger(ILambdaLogger logger, StringBuilder sb)\r\n        {\r\n            Logger.WriteLine = s =>\r\n            {\r\n                logger.LogLine(s);\r\n                sb?.Append(s + \"\\n\");\r\n            };\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "Cloud/Utilities/UploadUtilities.cs",
    "content": "﻿using System;\r\nusing System.Security.Cryptography;\r\nusing System.Threading;\r\nusing Amazon.S3.Model;\r\nusing ErrorHandling.Exceptions;\r\nusing IO;\r\n\r\nnamespace Cloud.Utilities\r\n{\r\n    public static class UploadUtilities\r\n    {\r\n        public static void DecryptUpload(this IS3Client s3Client, string bucketName, string key, string filePath,\r\n            AesCryptoServiceProvider aes, FileMetadata metadata, int retryDelay = 1000)\r\n        {\r\n            while (true)\r\n            {\r\n                if (s3Client.TryDecryptUpload(bucketName, key, filePath, aes, metadata)) return;\r\n\r\n                Thread.Sleep(retryDelay);\r\n\r\n            }\r\n        }\r\n\r\n        internal static bool TryDecryptUpload(this IS3Client s3Client, string bucketName, string key, string filePath, AesCryptoServiceProvider aes, FileMetadata metadata)\r\n        {\r\n            try\r\n            {\r\n                using (var fileStream = FileUtilities.GetReadStream(filePath))\r\n                using (var cryptoStream = new CryptoStream(fileStream, aes.CreateDecryptor(), CryptoStreamMode.Read))\r\n                using (var lengthStream = new LengthStream(cryptoStream, metadata.Length))\r\n                {\r\n                    string md5String = Convert.ToBase64String(metadata.MD5);\r\n\r\n                    var request = new PutObjectRequest\r\n                    {\r\n                        BucketName = bucketName,\r\n                        Key = key,\r\n                        InputStream = lengthStream,\r\n                        MD5Digest = md5String\r\n                    };\r\n\r\n                    s3Client.PutObjectAsync(request).Wait();\r\n                }\r\n\r\n                return true;\r\n            }\r\n\r\n            catch (Exception exception)\r\n            {\r\n                var processedException = AwsExceptionUtilities.TryConvertUserException(exception, null);\r\n\r\n                if (processedException is UserErrorException) throw processedException;\r\n\r\n                Logger.WriteLine($\"Exception: {exception.Message}.\");\r\n                return false;\r\n            }\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "CommandLine/AssemblyInfo.cs",
    "content": "﻿using System.Runtime.CompilerServices;\n\n[assembly: InternalsVisibleTo(\"UnitTests\")]"
  },
  {
    "path": "CommandLine/Builders/ConsoleAppBuilder.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing CommandLine.NDesk.Options;\r\nusing CommandLine.Utilities;\r\nusing CommandLine.VersionProviders;\r\nusing ErrorHandling;\r\nusing VariantAnnotation.Interface.Providers;\r\n\r\nnamespace CommandLine.Builders\r\n{\r\n    public sealed class ConsoleAppBuilder : IConsoleAppBuilder\r\n    {\r\n        private readonly IConsoleAppBuilderData _data;\r\n        private readonly string[] _args;\r\n\r\n        public ConsoleAppBuilder(string[] args, OptionSet ops)\r\n        {\r\n            _args = args;\r\n            _data = new ConsoleAppBuilderData\r\n            {\r\n                Ops          = ops,\r\n                HasArguments = _args != null && _args.Length > 0\r\n            };\r\n\r\n            AddAdditionalOptions();\r\n        }\r\n\r\n        private void AddAdditionalOptions()\r\n        {\r\n            _data.Ops.Add(\"help|h\", \"displays the help menu\", v => _data.ShowHelpMenu = v != null);\r\n            _data.Ops.Add(\"version|v\", \"displays the version\", v => _data.ShowVersion = v != null);\r\n        }\r\n\r\n        public IConsoleAppValidator Parse()\r\n        {\r\n            if (!_data.HasArguments)\r\n            {\r\n                _data.ExitCode = ExitCodes.MissingCommandLineOption;\r\n                _data.ShowHelpMenu = true;\r\n                return new ConsoleAppValidator(_data);\r\n            }\r\n\r\n            try\r\n            {\r\n                _data.UnsupportedOps = _data.Ops.Parse(_args);\r\n\r\n                if (_data.UnsupportedOps.Count > 0)\r\n                {\r\n                    _data.AddError($\"Found unknown command-line option(s): {string.Join(\", \", _data.UnsupportedOps)}\",\r\n                        ExitCodes.UnknownCommandLineOption);\r\n                }\r\n            }\r\n            catch (OptionException oe)\r\n            {\r\n                _data.AddError(oe.Message, ExitCodes.UnknownCommandLineOption);\r\n            }\r\n\r\n            return new ConsoleAppValidator(_data);\r\n        }\r\n\r\n        public IConsoleAppBuilder UseVersionProvider(IVersionProvider versionProvider)\r\n        {\r\n            _data.VersionProvider = versionProvider;\r\n            return this;\r\n        }\r\n    }\r\n\r\n    public sealed class ConsoleAppValidator : IConsoleAppValidator\r\n    {\r\n        public IConsoleAppBuilderData Data { get; }\r\n        public bool SkipValidation { get; }\r\n\r\n        public ConsoleAppValidator(IConsoleAppBuilderData data)\r\n        {\r\n            Data           = data;\r\n            SkipValidation = !data.HasArguments || data.ShowHelpMenu || data.ShowVersion;\r\n        }\r\n\r\n        public IConsoleAppValidator DisableOutput(bool condition = true)\r\n        {\r\n            if (condition) Data.DisableOutput = true;\r\n            return this;\r\n        }\r\n\r\n        public IConsoleAppBanner ShowBanner(string authors)\r\n        {\r\n            if (Data.ShowVersion) Console.WriteLine($\"{CommandLineUtilities.Title} {CommandLineUtilities.InformationalVersion} {Data.VersionProvider.DataVersion}\");\r\n            else if (!Data.DisableOutput) CommandLineUtilities.DisplayBanner(authors);\r\n            return new ConsoleAppBanner(Data);\r\n        }\r\n\r\n        public IConsoleAppBanner SkipBanner() => new ConsoleAppBanner(Data);\r\n    }\r\n\r\n    public sealed class ConsoleAppBanner : IConsoleAppBanner\r\n    {\r\n        private readonly IConsoleAppBuilderData _data;\r\n\r\n        public ConsoleAppBanner(IConsoleAppBuilderData data) => _data = data;\r\n\r\n        public IConsoleAppHelpMenu ShowHelpMenu(string description, string commandLineExample)\r\n        {\r\n            // ReSharper disable once InvertIf\r\n            if (_data.ShowHelpMenu || _data.Errors.Count > 0)\r\n            {\r\n                Help.Show(_data.Ops, commandLineExample, description);\r\n                Console.WriteLine($\"\\n{_data.VersionProvider.DataVersion}\\n\");\r\n            }\r\n\r\n            return new ConsoleAppHelpMenu(_data);\r\n        }\r\n    }\r\n\r\n    public sealed class ConsoleAppHelpMenu : IConsoleAppHelpMenu\r\n    {\r\n        private readonly IConsoleAppBuilderData _data;\r\n\r\n        public ConsoleAppHelpMenu(IConsoleAppBuilderData data) => _data = data;\r\n\r\n        public IConsoleAppErrors ShowErrors()\r\n        {\r\n            // ReSharper disable once InvertIf\r\n            if (_data.Errors.Count > 0)\r\n            {\r\n                Console.WriteLine(\"\\nSome problems were encountered when parsing the command line options:\");\r\n                PrintErrors();\r\n                Console.WriteLine(\"\\nFor a complete list of command line options, type \\\"dotnet {0} -h\\\"\", CommandLineUtilities.CommandFileName);\r\n            }\r\n\r\n            return new ConsoleAppErrors(_data);\r\n        }\r\n\r\n        private void PrintErrors()\r\n        {\r\n            foreach (string error in _data.Errors)\r\n            {\r\n                Console.Write(\"- \");\r\n                Console.ForegroundColor = ConsoleColor.Red;\r\n                Console.Write(\"ERROR: \");\r\n                Console.ResetColor();\r\n                Console.WriteLine(error);\r\n            }\r\n        }\r\n    }\r\n\r\n    public sealed class ConsoleAppErrors : IConsoleAppErrors\r\n    {\r\n        private readonly IConsoleAppBuilderData _data;\r\n        private bool Continue => _data.ExitCode == ExitCodes.Success && _data.HasArguments && !_data.ShowVersion && !_data.ShowHelpMenu;\r\n\r\n        public ConsoleAppErrors(IConsoleAppBuilderData data) => _data = data;\r\n\r\n        public ExitCodes Execute(Func<ExitCodes> executeMethod)\r\n        {\r\n            if (!Continue) return _data.ExitCode;\r\n\r\n            var benchmark = new Benchmark();\r\n            ExitCodes exitCode;\r\n\r\n            try\r\n            {\r\n                exitCode = executeMethod();\r\n                ShowPerformanceData(benchmark);\r\n            }\r\n            catch (Exception e)\r\n            {\r\n                exitCode = ExitCodeUtilities.ShowException(e);\r\n            }\r\n\r\n            return exitCode;\r\n        }\r\n\r\n        private void ShowPerformanceData(Benchmark benchmark)\r\n        {\r\n            if (_data.DisableOutput) return;\r\n\r\n            long peakMemoryUsageBytes = MemoryUtilities.GetPeakMemoryUsage();\r\n            var wallTimeSpan          = benchmark.GetElapsedTime();\r\n\r\n            Console.WriteLine();\r\n            if (peakMemoryUsageBytes > 0) Console.WriteLine(\"Peak memory usage: {0}\", MemoryUtilities.ToHumanReadable(peakMemoryUsageBytes));\r\n            Console.WriteLine(\"Time: {0}\", Benchmark.ToHumanReadable(wallTimeSpan));\r\n        }\r\n    }\r\n\r\n    public sealed class ConsoleAppBuilderData : IConsoleAppBuilderData\r\n    {\r\n        public OptionSet Ops { get; set; }\r\n        public List<string> UnsupportedOps { get; set; }\r\n        public List<string> Errors { get; } = new List<string>();\r\n        public ExitCodes ExitCode { get; set; } = ExitCodes.Success;\r\n        public bool DisableOutput { get; set; }\r\n        public bool HasArguments { get; set; }\r\n        public IVersionProvider VersionProvider { get; set; } = new DefaultVersionProvider();\r\n        public bool ShowHelpMenu { get; set; }\r\n        public bool ShowVersion { get; set; }\r\n\r\n        public void AddError(string errorMessage, ExitCodes exitCode)\r\n        {\r\n            ExitCode = exitCode;\r\n            Errors.Add(errorMessage);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CommandLine/Builders/IConsoleAppBuilder.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing CommandLine.NDesk.Options;\r\nusing ErrorHandling;\r\nusing VariantAnnotation.Interface.Providers;\r\n\r\nnamespace CommandLine.Builders\r\n{\r\n    /// We are using separate interfaces to enforce ordering in the console application\r\n    /// builder.\r\n\r\n    public interface IConsoleAppBuilder\r\n    {\r\n        // ReSharper disable once UnusedMemberInSuper.Global\r\n        IConsoleAppBuilder UseVersionProvider(IVersionProvider versionProvider);\r\n        IConsoleAppValidator Parse();\r\n    }\r\n\r\n    public interface IConsoleAppValidator\r\n    {\r\n        IConsoleAppValidator DisableOutput(bool condition = true);\r\n        IConsoleAppBanner ShowBanner(string authors);\r\n        IConsoleAppBanner SkipBanner();\r\n        IConsoleAppBuilderData Data { get; }\r\n        bool SkipValidation { get; }\r\n    }\r\n\r\n    public interface IConsoleAppBanner\r\n    {\r\n        IConsoleAppHelpMenu ShowHelpMenu(string description, string commandLineExample);\r\n    }\r\n\r\n    public interface IConsoleAppHelpMenu\r\n    {\r\n        IConsoleAppErrors ShowErrors();\r\n    }\r\n\r\n    public interface IConsoleAppErrors\r\n    {\r\n        ExitCodes Execute(Func<ExitCodes> executeMethod);\r\n    }\r\n\r\n    public interface IConsoleAppBuilderData\r\n    {\r\n        OptionSet Ops { get; }\r\n        List<string> UnsupportedOps { get; set; }\r\n        List<string> Errors { get; }\r\n        ExitCodes ExitCode { get; set; }\r\n        bool DisableOutput { get; set; }\r\n        bool HasArguments { get; }\r\n        IVersionProvider VersionProvider { get; set; }\r\n        bool ShowHelpMenu { get; set; }\r\n        bool ShowVersion { get; set; }\r\n        void AddError(string errorMessage, ExitCodes exitCode);\r\n    }\r\n}\r\n"
  },
  {
    "path": "CommandLine/Builders/ITopLevelAppBuilder.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing ErrorHandling;\r\n\r\nnamespace CommandLine.Builders\r\n{\r\n    public interface ITopLevelAppBuilder\r\n    {\r\n        // ReSharper disable once UnusedMemberInSuper.Global\r\n        ITopLevelAppValidator Parse();\r\n    }\r\n\r\n    public interface ITopLevelAppValidator\r\n    {\r\n        ITopLevelAppBanner ShowBanner(string authors);\r\n        ITopLevelAppBuilderData Data { get; }\r\n    }\r\n\r\n    public interface ITopLevelAppBanner\r\n    {\r\n        ITopLevelAppHelpMenu ShowHelpMenu(string description);\r\n    }\r\n\r\n    public interface ITopLevelAppHelpMenu\r\n    {\r\n        ITopLevelAppErrors ShowErrors();\r\n    }\r\n\r\n    public interface ITopLevelAppErrors\r\n    {\r\n        ExitCodes Execute();\r\n    }\r\n\r\n    public interface ITopLevelAppBuilderData\r\n    {\r\n        string[] Arguments { get; }\r\n        Dictionary<string, TopLevelOption> Ops { get; }\r\n        bool HasArguments { get; }\r\n        string Command { get; }\r\n\r\n        List<string> Errors { get; }\r\n        ExitCodes ExitCode { get; set; }\r\n        \r\n        bool ShowHelpMenu { get; set; }\r\n\r\n        Func<string, string[], ExitCodes> ExecuteMethod { get; set; }\r\n        void AddError(string errorMessage, ExitCodes exitCode);\r\n    }\r\n}\r\n"
  },
  {
    "path": "CommandLine/Builders/TopLevelAppBuilder.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.Linq;\r\nusing CommandLine.Utilities;\r\nusing ErrorHandling;\r\n\r\nnamespace CommandLine.Builders\r\n{\r\n    public sealed class TopLevelAppBuilder : ITopLevelAppBuilder\r\n    {\r\n        private readonly ITopLevelAppBuilderData _data;\r\n\r\n        public TopLevelAppBuilder(string[] args, Dictionary<string, TopLevelOption> ops)\r\n        {\r\n            _data = new TopLevelAppBuilderData(args, ops);\r\n        }\r\n\r\n        public ITopLevelAppValidator Parse()\r\n        {\r\n            if (!_data.HasArguments)\r\n            {\r\n                _data.ExitCode     = ExitCodes.MissingCommandLineOption;\r\n                _data.ShowHelpMenu = true;\r\n                return new TopLevelAppValidator(_data);\r\n            }\r\n\r\n            _data.ExecuteMethod = GetExecuteMethod(_data.Command);\r\n\r\n            return new TopLevelAppValidator(_data);\r\n        }\r\n\r\n        private Func<string, string[], ExitCodes> GetExecuteMethod(string command)\r\n        {\r\n            var lowerDict = new Dictionary<string, TopLevelOption>();\r\n            foreach (var kvp in _data.Ops) lowerDict[kvp.Key.ToLower()] = kvp.Value;\r\n\r\n            if (lowerDict.TryGetValue(command, out var topLevelOption)) return topLevelOption.CommandMethod;\r\n\r\n            _data.AddError($\"An unrecognized command '{_data.Command}' was specified.\", ExitCodes.UnknownCommandLineOption);\r\n            return null;\r\n        }\r\n    }\r\n\r\n    public sealed class TopLevelAppValidator : ITopLevelAppValidator\r\n    {\r\n        public ITopLevelAppBuilderData Data { get; }\r\n\r\n        public TopLevelAppValidator(ITopLevelAppBuilderData data) => Data = data;\r\n\r\n        public ITopLevelAppBanner ShowBanner(string authors)\r\n        {\r\n            CommandLineUtilities.DisplayBanner(authors);\r\n            return new TopLevelAppBanner(Data);\r\n        }\r\n    }\r\n\r\n    public sealed class TopLevelAppBanner : ITopLevelAppBanner\r\n    {\r\n        private readonly ITopLevelAppBuilderData _data;\r\n\r\n        public TopLevelAppBanner(ITopLevelAppBuilderData data) => _data = data;\r\n\r\n        public ITopLevelAppHelpMenu ShowHelpMenu(string description)\r\n        {\r\n            // ReSharper disable once InvertIf\r\n            if (_data.ShowHelpMenu || _data.Errors.Count > 0)\r\n            {\r\n                Console.WriteLine(description);\r\n                Console.WriteLine();\r\n\r\n                OutputHelper.WriteLabel(\"USAGE: \");\r\n\r\n                Console.WriteLine($\"dotnet {CommandLineUtilities.CommandFileName} <command> [options]\");\r\n                Console.WriteLine();\r\n\r\n                DisplayCommands(_data.Ops);\r\n            }\r\n\r\n            return new TopLevelAppHelpMenu(_data);\r\n        }\r\n\r\n        private static void DisplayCommands(Dictionary<string, TopLevelOption> ops)\r\n        {\r\n            const string label = \"COMMAND: \";\r\n            var filler = new string(' ', label.Length);\r\n\r\n            int commandColumnLen = GetMaxCommandLen(ops.Keys) + 3;\r\n            var useLabel = true;\r\n\r\n            foreach (var op in ops)\r\n            {\r\n                if (useLabel)\r\n                {\r\n                    OutputHelper.WriteLabel(label);\r\n                    useLabel = false;\r\n                }\r\n                else Console.Write(filler);\r\n\r\n                var commandFiller = new string(' ', commandColumnLen - op.Key.Length);\r\n                Console.WriteLine(op.Key + commandFiller + op.Value.Description);\r\n            }\r\n        }\r\n\r\n        private static int GetMaxCommandLen(IEnumerable<string> ops)\r\n        {\r\n            return ops.Select(op => op.Length).Concat(new int[1]).Max();\r\n        }\r\n    }\r\n\r\n    public sealed class TopLevelAppHelpMenu : ITopLevelAppHelpMenu\r\n    {\r\n        private readonly ITopLevelAppBuilderData _data;\r\n\r\n        public TopLevelAppHelpMenu(ITopLevelAppBuilderData data) => _data = data;\r\n\r\n        public ITopLevelAppErrors ShowErrors()\r\n        {\r\n            // ReSharper disable once InvertIf\r\n            if (_data.Errors.Count > 0)\r\n            {\r\n                Console.WriteLine(\"\\nSome problems were encountered when parsing the command line options:\");\r\n                PrintErrors();\r\n            }\r\n\r\n            return new TopLevelAppErrors(_data);\r\n        }\r\n\r\n        private void PrintErrors()\r\n        {\r\n            foreach (string error in _data.Errors)\r\n            {\r\n                Console.Write(\"- \");\r\n                Console.ForegroundColor = ConsoleColor.Red;\r\n                Console.Write(\"ERROR: \");\r\n                Console.ResetColor();\r\n                Console.WriteLine(error);\r\n            }\r\n        }\r\n    }\r\n\r\n    public sealed class TopLevelAppErrors : ITopLevelAppErrors\r\n    {\r\n        private readonly ITopLevelAppBuilderData _data;\r\n        private bool Continue => _data.ExitCode == ExitCodes.Success && _data.HasArguments && !_data.ShowHelpMenu;\r\n\r\n        public TopLevelAppErrors(ITopLevelAppBuilderData data) => _data = data;\r\n\r\n        public ExitCodes Execute()\r\n        {\r\n            if (!Continue) return _data.ExitCode;\r\n\r\n            ExitCodes exitCode;\r\n\r\n            try\r\n            {\r\n                var arguments = _data.Arguments.Skip(1).ToArray();\r\n                exitCode = _data.ExecuteMethod(_data.Command, arguments);\r\n            }\r\n            catch (Exception e)\r\n            {\r\n                exitCode = ExitCodeUtilities.ShowException(e);\r\n            }\r\n\r\n            return exitCode;\r\n        }\r\n    }\r\n\r\n    public sealed class TopLevelAppBuilderData : ITopLevelAppBuilderData\r\n    {\r\n        public string[] Arguments { get; }\r\n        public Dictionary<string, TopLevelOption> Ops { get; }\r\n        public bool HasArguments => Arguments != null && Arguments.Length > 0;\r\n        public string Command { get; }\r\n\r\n        public List<string> Errors { get; } = new List<string>();\r\n        public ExitCodes ExitCode { get; set; }\r\n        \r\n        public bool ShowHelpMenu { get; set; }\r\n        public Func<string, string[], ExitCodes> ExecuteMethod { get; set; }\r\n                \r\n        public TopLevelAppBuilderData(string[] arguments, Dictionary<string, TopLevelOption> ops)\r\n        {\r\n            Arguments = arguments;\r\n            Ops       = ops;\r\n            Command   = HasArguments ? arguments[0].ToLower() : null;\r\n        }\r\n\r\n        public void AddError(string errorMessage, ExitCodes exitCode)\r\n        {\r\n            ExitCode = exitCode;\r\n            Errors.Add(errorMessage);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CommandLine/Builders/TopLevelOption.cs",
    "content": "﻿using System;\r\nusing ErrorHandling;\r\n\r\nnamespace CommandLine.Builders\r\n{\r\n    public sealed class TopLevelOption\r\n    {\r\n        public readonly string Description;\r\n        public readonly Func<string, string[], ExitCodes> CommandMethod;\r\n\r\n        public TopLevelOption(string description, Func<string, string[], ExitCodes> commandMethod)\r\n        {\r\n            Description   = description;\r\n            CommandMethod = commandMethod;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CommandLine/Builders/ValidationExtensions.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Net;\r\nusing ErrorHandling;\r\n\r\nnamespace CommandLine.Builders\r\n{\r\n    public static class ValidationExtensions\r\n    {\r\n        public static IConsoleAppValidator CheckEachFilenameExists(this IConsoleAppValidator validator,\r\n            IEnumerable<string> filePaths, string description, string commandLineOption, bool isRequired = true)\r\n        {\r\n            foreach (string filePath in filePaths)\r\n            {\r\n                validator.CheckInputFilenameExists(filePath, description, commandLineOption, isRequired);\r\n            }\r\n            return validator;\r\n        }\r\n\r\n        public static IConsoleAppValidator CheckInputFilenameExists(this IConsoleAppValidator validator,\r\n            string filePath, string description, string commandLineOption, bool isRequired = true, string ignoreValue = null)\r\n        {\r\n            if (validator.SkipValidation) return validator;\r\n\r\n            if (string.IsNullOrEmpty(filePath) && isRequired)\r\n            {\r\n                validator.Data.AddError(\r\n                    $\"The {description} file was not specified. Please use the {commandLineOption} parameter.\",\r\n                    ExitCodes.MissingCommandLineOption);\r\n            }\r\n            else if (isRequired && (ignoreValue == null || filePath != ignoreValue) && !File.Exists(filePath) && !CheckUrlExist(filePath))\r\n            {\r\n                validator.Data.AddError($\"The {description} file ({filePath}) does not exist.\", ExitCodes.FileNotFound);\r\n            }\r\n\r\n            return validator;\r\n        }\r\n\r\n        private static bool CheckUrlExist(string url)\r\n        {\r\n            try\r\n            {\r\n                var webRequest = WebRequest.Create(url);\r\n                webRequest.GetResponse();\r\n            }\r\n            catch //If exception thrown then couldn't get response from address\r\n            {\r\n                return false;\r\n            }\r\n            return true;\r\n        }      \r\n\r\n        public static IConsoleAppValidator CheckOutputFilenameSuffix(this IConsoleAppValidator validator,\r\n            string filePath, string fileSuffix, string description)\r\n        {\r\n            if (validator.SkipValidation) return validator;\r\n\r\n            if (!filePath.EndsWith(fileSuffix))\r\n            {\r\n                validator.Data.AddError($\"The {description} file ({filePath}) does not end with a {fileSuffix}.\", ExitCodes.BadArguments);\r\n            }\r\n\r\n            return validator;\r\n        }\r\n\r\n        public static IConsoleAppValidator CheckDirectoryExists(this IConsoleAppValidator validator, string dirPath,\r\n            string description, string commandLineOption)\r\n        {\r\n            if (validator.SkipValidation) return validator;\r\n\r\n            if (string.IsNullOrEmpty(dirPath))\r\n            {\r\n                validator.Data.AddError(\r\n                    $\"The {description} directory was not specified. Please use the {commandLineOption} parameter.\",\r\n                    ExitCodes.MissingCommandLineOption);\r\n            }\r\n            else if (!Directory.Exists(dirPath))\r\n            {\r\n                validator.Data.AddError($\"The {description} directory ({dirPath}) does not exist.\", ExitCodes.PathNotFound);\r\n            }\r\n\r\n            return validator;\r\n        }\r\n\r\n        public static IConsoleAppValidator HasRequiredParameter<T>(this IConsoleAppValidator validator,\r\n            T parameterValue, string description, string commandLineOption)\r\n        {\r\n            if (validator.SkipValidation) return validator;\r\n\r\n            if (EqualityComparer<T>.Default.Equals(parameterValue, default))\r\n            {\r\n                validator.Data.AddError($\"The {description} was not specified. Please use the {commandLineOption} parameter.\",\r\n                    ExitCodes.MissingCommandLineOption);\r\n            }\r\n\r\n            return validator;\r\n        }\r\n\r\n        public static IConsoleAppValidator HasRequiredDate(this IConsoleAppValidator validator, string date,\r\n            string description, string commandLineOption)\r\n        {\r\n            if (validator.SkipValidation) return validator;\r\n\r\n            validator.HasRequiredParameter(date, description, commandLineOption);\r\n            if (string.IsNullOrEmpty(date)) return validator;\r\n\r\n            if (!DateTime.TryParse(date, out _))\r\n            {\r\n                validator.Data.AddError($\"The {description} was not specified as a date (YYYY-MM-dd). Please use the {commandLineOption} parameter.\", ExitCodes.BadArguments);\r\n            }\r\n\r\n            return validator;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CommandLine/CommandLine.csproj",
    "content": "﻿<Project Sdk=\"Microsoft.NET.Sdk\">\r\n  <PropertyGroup>\r\n    <TargetFramework>net6.0</TargetFramework>\r\n    <OutputPath>..\\bin\\$(Configuration)</OutputPath>\n    \r\n  </PropertyGroup>\r\n  <Import Project=\"..\\CommonAssemblyInfo.props\" />\r\n  <ItemGroup>\r\n    <ProjectReference Include=\"..\\ErrorHandling\\ErrorHandling.csproj\" />\r\n    <ProjectReference Include=\"..\\OptimizedCore\\OptimizedCore.csproj\" />\r\n  </ItemGroup>\r\n</Project>"
  },
  {
    "path": "CommandLine/NDesk.Options/Options.cs",
    "content": "//\r\n// Options.cs\r\n//\r\n// Authors:\r\n//  Jonathan Pryor <jpryor@novell.com>\r\n//\r\n// Copyright (C) 2008 Novell (http://www.novell.com)\r\n//\r\n// Permission is hereby granted, free of charge, to any person obtaining\r\n// a copy of this software and associated documentation files (the\r\n// \"Software\"), to deal in the Software without restriction, including\r\n// without limitation the rights to use, copy, modify, merge, publish,\r\n// distribute, sublicense, and/or sell copies of the Software, and to\r\n// permit persons to whom the Software is furnished to do so, subject to\r\n// the following conditions:\r\n// \r\n// The above copyright notice and this permission notice shall be\r\n// included in all copies or substantial portions of the Software.\r\n// \r\n// THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\r\n// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\r\n// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\r\n// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE\r\n// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION\r\n// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION\r\n// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\r\n//\r\n// The LINQ version just changes the implementation of\r\n// OptionSet.Parse(IEnumerable<string>), and confers no semantic changes.\r\n//\r\n// A Getopt::Long-inspired option parsing library for C#.\r\n//\r\n// NDesk.Options.OptionSet is built upon a key/value table, where the\r\n// key is a option format string and the value is a delegate that is \r\n// invoked when the format string is matched.\r\n\r\nusing System;\r\nusing System.Collections.Generic;\r\nusing System.Collections.ObjectModel;\r\nusing System.IO;\r\nusing System.Text;\r\nusing System.Text.RegularExpressions;\r\nusing OptimizedCore;\r\n\r\nnamespace CommandLine.NDesk.Options\r\n{\r\n    public sealed class OptionValueCollection\r\n    {\r\n        private readonly List<string> _values = new List<string>();\r\n        private readonly OptionContext _c;\r\n\r\n        internal OptionValueCollection(OptionContext c)\r\n        {\r\n            _c = c;\r\n        }\r\n\r\n        #region ICollection<T>\r\n        public void Add(string item) { _values.Add(item); }\r\n        public void Clear() { _values.Clear(); }\r\n        public int Count => _values.Count;\r\n\r\n        #endregion\r\n\r\n\r\n\r\n        #region IList<T>\r\n\r\n\r\n        private void AssertValid(int index)\r\n        {\r\n            if (_c.Option == null)\r\n                throw new InvalidOperationException(\"OptionContext.Option is null.\");\r\n            if (index >= _c.Option.MaxValueCount)\r\n                throw new ArgumentOutOfRangeException(nameof(index));\r\n            if (_c.Option.OptionValueType == OptionValueType.Required &&\r\n                index >= _values.Count)\r\n                throw new OptionException($\"Missing required value for option '{_c.OptionName}'.\");\r\n        }\r\n\r\n        public string this[int index]\r\n        {\r\n            get\r\n            {\r\n                AssertValid(index);\r\n                return index >= _values.Count ? null : _values[index];\r\n            }\r\n        }\r\n        #endregion\r\n    }\r\n\r\n    public sealed class OptionContext\r\n    {\r\n        public OptionContext()\r\n        {\r\n            OptionValues = new OptionValueCollection(this);\r\n        }\r\n\r\n        public Option Option { get; set; }\r\n\r\n        public string OptionName { get; set; }\r\n\r\n        public int OptionIndex { get; set; }\r\n\r\n        public OptionValueCollection OptionValues { get; }\r\n    }\r\n\r\n    public enum OptionValueType\r\n    {\r\n        None,\r\n        Optional,\r\n        Required\r\n    }\r\n\r\n    public abstract class Option\r\n    {\r\n        protected Option(string prototype, string description, int maxValueCount)\r\n        {\r\n            if (prototype == null)\r\n                throw new ArgumentNullException(nameof(prototype));\r\n            if (prototype.Length == 0)\r\n                throw new ArgumentException(\"Cannot be the empty string.\", nameof(prototype));\r\n            if (maxValueCount < 0)\r\n                throw new ArgumentOutOfRangeException(nameof(maxValueCount));\r\n\r\n            Names = prototype.Split('|');\r\n            Description = description;\r\n            MaxValueCount = maxValueCount;\r\n            OptionValueType = ParsePrototype();\r\n\r\n            if (MaxValueCount == 0 && OptionValueType != OptionValueType.None)\r\n                throw new ArgumentException(\r\n                    \"Cannot provide maxValueCount of 0 for OptionValueType.Required or \" +\r\n                    \"OptionValueType.Optional.\",\r\n                    nameof(maxValueCount));\r\n            if (OptionValueType == OptionValueType.None && maxValueCount > 1)\r\n                throw new ArgumentException(\r\n                    $\"Cannot provide maxValueCount of {maxValueCount} for OptionValueType.None.\",\r\n                    nameof(maxValueCount));\r\n            if (Array.IndexOf(Names, \"<>\") >= 0 &&\r\n                (Names.Length == 1 && OptionValueType != OptionValueType.None ||\r\n                 Names.Length > 1 && MaxValueCount > 1))\r\n                throw new ArgumentException(\r\n                    \"The default option handler '<>' cannot require values.\",\r\n                    nameof(prototype));\r\n        }\r\n\r\n        public string Description { get; }\r\n\r\n        public OptionValueType OptionValueType { get; }\r\n\r\n        public int MaxValueCount { get; }\r\n\r\n        protected static T Parse<T>(string value, OptionContext c)\r\n        {\r\n            T t;\r\n            try\r\n            {\r\n                t = (T)Convert.ChangeType(value, typeof(T));\r\n            }\r\n            catch (Exception e)\r\n            {\r\n                throw new OptionException(\r\n                    $\"Could not convert string `{value}' to type {typeof(T).Name} for option `{c.OptionName}'.\", e);\r\n            }\r\n            return t;\r\n        }\r\n\r\n        public string[] Names { get; }\r\n\r\n        internal string[] ValueSeparators { get; private set; }\r\n\r\n        private static readonly char[] NameTerminator = { '=', ':' };\r\n\r\n        private OptionValueType ParsePrototype()\r\n        {\r\n            var type = '\\0';\r\n            var seps = new List<string>();\r\n\r\n            for (var i = 0; i < Names.Length; ++i)\r\n            {\r\n                string name = Names[i];\r\n                if (name.Length == 0) throw new InvalidDataException($\"Empty option names are not supported: {nameof(name)}\");\r\n\r\n                int end = name.IndexOfAny(NameTerminator);\r\n                if (end == -1) continue;\r\n\r\n                Names[i] = name.Substring(0, end);\r\n\r\n                if (type == '\\0' || type == name[end]) type = name[end];\r\n                else throw new InvalidDataException($\"Conflicting option types: '{type}' vs. '{name[end]}'.\");\r\n\r\n                AddSeparators(name, end, seps);\r\n            }\r\n\r\n            if (type == '\\0') return OptionValueType.None;\r\n\r\n            if (MaxValueCount <= 1 && seps.Count != 0) throw new InvalidDataException($\"Cannot provide key/value separators for Options taking {MaxValueCount} value(s).\");\r\n            if (MaxValueCount <= 1) return GetOptionValueType(type);\r\n\r\n            switch (seps.Count)\r\n            {\r\n                case 0:\r\n                    ValueSeparators = new[] { \":\", \"=\" };\r\n                    break;\r\n                case 1 when seps[0].Length == 0:\r\n                    ValueSeparators = null;\r\n                    break;\r\n                default:\r\n                    ValueSeparators = seps.ToArray();\r\n                    break;\r\n            }\r\n\r\n            return GetOptionValueType(type);\r\n        }\r\n\r\n        private static OptionValueType GetOptionValueType(char type) =>\r\n            type == '=' ? OptionValueType.Required : OptionValueType.Optional;\r\n\r\n        private static void AddSeparators(string name, int end, ICollection<string> seps)\r\n        {\r\n            int start = -1;\r\n            for (int i = end + 1; i < name.Length; ++i)\r\n            {\r\n                switch (name[i])\r\n                {\r\n                    case '{':\r\n                        if (start != -1)\r\n                            throw new ArgumentException(\r\n                                $\"Ill-formed name/value separator found in \\\"{name}\\\".\",\r\n                                nameof(name));\r\n                        start = i + 1;\r\n                        break;\r\n                    case '}':\r\n                        if (start == -1)\r\n                            throw new ArgumentException(\r\n                                $\"Ill-formed name/value separator found in \\\"{name}\\\".\",\r\n                                nameof(name));\r\n                        seps.Add(name.Substring(start, i - start));\r\n                        start = -1;\r\n                        break;\r\n                    default:\r\n                        if (start == -1)\r\n                            seps.Add(name[i].ToString());\r\n                        break;\r\n                }\r\n            }\r\n            if (start != -1)\r\n                throw new ArgumentException(\r\n                    $\"Ill-formed name/value separator found in \\\"{name}\\\".\",\r\n                    nameof(name));\r\n        }\r\n\r\n        public void Invoke(OptionContext c)\r\n        {\r\n            OnParseComplete(c);\r\n            c.OptionName = null;\r\n            c.Option = null;\r\n            c.OptionValues.Clear();\r\n        }\r\n\r\n        protected abstract void OnParseComplete(OptionContext c);\r\n    }\r\n\r\n    public sealed class OptionException : Exception\r\n    {\r\n        public OptionException(string message)\r\n            : base(message)\r\n        {\r\n        }\r\n\r\n        public OptionException(string message, Exception innerException)\r\n            : base(message, innerException)\r\n        {\r\n        }\r\n    }\r\n\r\n    public sealed class OptionSet : KeyedCollection<string, Option>\r\n    {\r\n        protected override string GetKeyForItem(Option item)\r\n        {\r\n            if (item == null)\r\n                throw new ArgumentNullException(nameof(item));\r\n            if (item.Names != null && item.Names.Length > 0)\r\n                return item.Names[0];\r\n            // This should never happen, as it's invalid for Option to be\r\n            // constructed w/o any names.\r\n            throw new InvalidOperationException(\"Option has no names!\");\r\n        }\r\n\r\n        protected override void InsertItem(int index, Option item)\r\n        {\r\n            base.InsertItem(index, item);\r\n            AddImpl(item);\r\n        }\r\n\r\n        private void AddImpl(Option option)\r\n        {\r\n            if (option == null)\r\n                throw new ArgumentNullException(nameof(option));\r\n            var added = new List<string>(option.Names.Length);\r\n            try\r\n            {\r\n                // KeyedCollection.InsertItem/SetItem handle the 0th name.\r\n                for (var i = 1; i < option.Names.Length; ++i)\r\n                {\r\n                    Dictionary.Add(option.Names[i], option);\r\n                    added.Add(option.Names[i]);\r\n                }\r\n            }\r\n            catch (Exception)\r\n            {\r\n                foreach (string name in added)\r\n                    Dictionary.Remove(name);\r\n                throw;\r\n            }\r\n        }\r\n\r\n        public new void Add(Option option)\r\n        {\r\n            base.Add(option);\r\n        }\r\n\r\n        private sealed class ActionOption : Option\r\n        {\r\n            private readonly Action<OptionValueCollection> _action;\r\n\r\n            public ActionOption(string prototype, string description, int count, Action<OptionValueCollection> action)\r\n                : base(prototype, description, count)\r\n            {\r\n                _action = action ?? throw new ArgumentNullException(nameof(action));\r\n            }\r\n\r\n            protected override void OnParseComplete(OptionContext c)\r\n            {\r\n                _action(c.OptionValues);\r\n            }\r\n        }\r\n\r\n        public void Add(string prototype, string description, Action<string> action)\r\n        {\r\n            if (action == null)\r\n                throw new ArgumentNullException(nameof(action));\r\n            Option p = new ActionOption(prototype, description, 1,\r\n                delegate (OptionValueCollection v) { action(v[0]); });\r\n            base.Add(p);\r\n        }\r\n\r\n        private sealed class ActionOption<T> : Option\r\n        {\r\n            private readonly Action<T> _action;\r\n\r\n            public ActionOption(string prototype, string description, Action<T> action)\r\n                : base(prototype, description, 1)\r\n            {\r\n                _action = action ?? throw new ArgumentNullException(nameof(action));\r\n            }\r\n\r\n            protected override void OnParseComplete(OptionContext c)\r\n            {\r\n                _action(Parse<T>(c.OptionValues[0], c));\r\n            }\r\n        }\r\n\r\n        public void Add<T>(string prototype, string description, Action<T> action)\r\n        {\r\n            Add(new ActionOption<T>(prototype, description, action));\r\n        }\r\n\r\n        private static OptionContext CreateOptionContext()\r\n        {\r\n            return new OptionContext();\r\n        }\r\n\r\n        public List<string> Parse(IEnumerable<string> arguments)\r\n        {\r\n            OptionContext c = CreateOptionContext();\r\n            c.OptionIndex = -1;\r\n            var process = true;\r\n            var unprocessed = new List<string>();\r\n            Option def = Contains(\"<>\") ? this[\"<>\"] : null;\r\n            foreach (string argument in arguments)\r\n            {\r\n                ++c.OptionIndex;\r\n                if (argument == \"--\")\r\n                {\r\n                    process = false;\r\n                    continue;\r\n                }\r\n                if (!process)\r\n                {\r\n                    Unprocessed(unprocessed, def, c, argument);\r\n                    continue;\r\n                }\r\n                if (!Parse(argument, c))\r\n                    Unprocessed(unprocessed, def, c, argument);\r\n            }\r\n            c.Option?.Invoke(c);\r\n            return unprocessed;\r\n        }\r\n\r\n        private static void Unprocessed(ICollection<string> extra, Option def, OptionContext c, string argument)\r\n        {\r\n            if (def == null)\r\n            {\r\n                extra.Add(argument);\r\n                return;\r\n            }\r\n            c.OptionValues.Add(argument);\r\n            c.Option = def;\r\n            c.Option.Invoke(c);\r\n        }\r\n\r\n        private readonly Regex _valueOption = new Regex(\r\n            @\"^(?<flag>--|-|/)(?<name>[^:=]+)((?<sep>[:=])(?<value>.*))?$\");\r\n\r\n        private bool GetOptionParts(string argument, out string flag, out string name, out string sep, out string value)\r\n        {\r\n            if (argument == null)\r\n                throw new ArgumentNullException(nameof(argument));\r\n\r\n            flag = name = sep = value = null;\r\n            var m = _valueOption.Match(argument);\r\n            if (!m.Success) return false;\r\n\r\n            flag = m.Groups[\"flag\"].Value;\r\n            name = m.Groups[\"name\"].Value;\r\n\r\n            // ReSharper disable once InvertIf\r\n            if (m.Groups[\"sep\"].Success && m.Groups[\"value\"].Success)\r\n            {\r\n                sep   = m.Groups[\"sep\"].Value;\r\n                value = m.Groups[\"value\"].Value;\r\n            }\r\n\r\n            return true;\r\n        }\r\n\r\n        private bool Parse(string argument, OptionContext c)\r\n        {\r\n            if (c.Option != null)\r\n            {\r\n                ParseValue(argument, c);\r\n                return true;\r\n            }\r\n\r\n            if (!GetOptionParts(argument, out string f, out string n, out string s, out string v))\r\n                return false;\r\n\r\n            if (!Contains(n)) return ParseBool(argument, n, c) || ParseBundledValue(f, n + s + v, c);\r\n\r\n            var p = this[n];\r\n            c.OptionName = f + n;\r\n            c.Option = p;\r\n            // ReSharper disable once SwitchStatementMissingSomeCases\r\n            switch (p.OptionValueType)\r\n            {\r\n                case OptionValueType.None:\r\n                    c.OptionValues.Add(n);\r\n                    c.Option.Invoke(c);\r\n                    break;\r\n                case OptionValueType.Optional:\r\n                case OptionValueType.Required:\r\n                    ParseValue(v, c);\r\n                    break;\r\n            }\r\n            return true;\r\n        }\r\n\r\n        private static void ParseValue(string option, OptionContext c)\r\n        {\r\n            if (option != null)\r\n                foreach (string o in c.Option.ValueSeparators != null\r\n                    ? option.Split(c.Option.ValueSeparators, StringSplitOptions.None)\r\n                    : new[] { option })\r\n                {\r\n                    c.OptionValues.Add(o);\r\n                }\r\n            if (c.OptionValues.Count == c.Option.MaxValueCount ||\r\n                c.Option.OptionValueType == OptionValueType.Optional)\r\n                c.Option.Invoke(c);\r\n            else if (c.OptionValues.Count > c.Option.MaxValueCount)\r\n            {\r\n                throw new OptionException($\"Error: Found {c.OptionValues.Count} option values when expecting {c.Option.MaxValueCount}.\");\r\n            }\r\n        }\r\n\r\n        private bool ParseBool(string option, string n, OptionContext c)\r\n        {\r\n            if (n.Length < 1 || n[n.Length - 1] != '+' && n[n.Length - 1] != '-') return false;\r\n\r\n            string rn = n.Substring(0, n.Length - 1);\r\n            if (!Contains(rn)) return false;\r\n\r\n            var p = this[rn];\r\n            string v = n[n.Length - 1] == '+' ? option : null;\r\n            c.OptionName = option;\r\n            c.Option = p;\r\n            c.OptionValues.Add(v);\r\n            p.Invoke(c);\r\n            return true;\r\n        }\r\n\r\n        private bool ParseBundledValue(string f, string n, OptionContext c)\r\n        {\r\n            if (f != \"-\")\r\n                return false;\r\n            for (var i = 0; i < n.Length; ++i)\r\n            {\r\n                string opt = f + n[i];\r\n                string rn = n[i].ToString();\r\n                if (!Contains(rn))\r\n                {\r\n                    if (i == 0)\r\n                        return false;\r\n                    throw new OptionException($\"Cannot bundle unregistered option '{opt}'.\");\r\n                }\r\n                var p = this[rn];\r\n                switch (p.OptionValueType)\r\n                {\r\n                    case OptionValueType.None:\r\n                        Invoke(c, opt, n, p);\r\n                        break;\r\n                    case OptionValueType.Optional:\r\n                    case OptionValueType.Required:\r\n                        {\r\n                            string v = n.Substring(i + 1);\r\n                            c.Option = p;\r\n                            c.OptionName = opt;\r\n                            ParseValue(v.Length != 0 ? v : null, c);\r\n                            return true;\r\n                        }\r\n                    default:\r\n                        throw new InvalidOperationException(\"Unknown OptionValueType: \" + p.OptionValueType);\r\n                }\r\n            }\r\n            return true;\r\n        }\r\n\r\n        private static void Invoke(OptionContext c, string name, string value, Option option)\r\n        {\r\n            c.OptionName = name;\r\n            c.Option = option;\r\n            c.OptionValues.Add(value);\r\n            option.Invoke(c);\r\n        }\r\n\r\n        private const int OptionWidth = 29;\r\n\r\n        public void WriteOptionDescriptions(TextWriter o)\r\n        {\r\n            foreach (Option p in this)\r\n            {\r\n                var written = 0;\r\n                if (!WriteOptionPrototype(o, p, ref written))\r\n                    continue;\r\n\r\n                if (written < OptionWidth)\r\n                    o.Write(new string(' ', OptionWidth - written));\r\n                else\r\n                {\r\n                    o.WriteLine();\r\n                    o.Write(new string(' ', OptionWidth));\r\n                }\r\n\r\n                var indent = false;\r\n                var prefix = new string(' ', OptionWidth + 2);\r\n                foreach (string line in GetLines(GetDescription(p.Description)))\r\n                {\r\n                    if (indent)\r\n                        o.Write(prefix);\r\n                    o.WriteLine(line);\r\n                    indent = true;\r\n                }\r\n            }\r\n        }\r\n\r\n        private static bool WriteOptionPrototype(TextWriter o, Option p, ref int written)\r\n        {\r\n            var names = p.Names;\r\n\r\n            int i = GetNextOptionIndex(names, 0);\r\n            if (i == names.Length)\r\n                return false;\r\n\r\n            if (names[i].Length == 1)\r\n            {\r\n                Write(o, ref written, \"  -\");\r\n                Write(o, ref written, names[0]);\r\n            }\r\n            else\r\n            {\r\n                Write(o, ref written, \"      --\");\r\n                Write(o, ref written, names[0]);\r\n            }\r\n\r\n            for (i = GetNextOptionIndex(names, i + 1);\r\n                i < names.Length; i = GetNextOptionIndex(names, i + 1))\r\n            {\r\n                Write(o, ref written, \", \");\r\n                Write(o, ref written, names[i].Length == 1 ? \"-\" : \"--\");\r\n                Write(o, ref written, names[i]);\r\n            }\r\n\r\n            if (p.OptionValueType != OptionValueType.Optional && p.OptionValueType != OptionValueType.Required) return true;\r\n\r\n            Write(o, ref written, \" \");\r\n            if (p.OptionValueType == OptionValueType.Optional)\r\n            {\r\n                Write(o, ref written, \"[\");\r\n            }\r\n            Write(o, ref written, \"<\" + GetArgumentName(0, p.MaxValueCount, p.Description) + '>');\r\n            string sep = p.ValueSeparators != null && p.ValueSeparators.Length > 0\r\n                ? p.ValueSeparators[0]\r\n                : \" \";\r\n            for (var c = 1; c < p.MaxValueCount; ++c)\r\n            {\r\n                Write(o, ref written, sep + GetArgumentName(c, p.MaxValueCount, p.Description));\r\n            }\r\n            if (p.OptionValueType == OptionValueType.Optional)\r\n            {\r\n                Write(o, ref written, \"]\");\r\n            }\r\n            return true;\r\n        }\r\n\r\n        private static int GetNextOptionIndex(IReadOnlyList<string> names, int i)\r\n        {\r\n            while (i < names.Count && names[i] == \"<>\")\r\n            {\r\n                ++i;\r\n            }\r\n            return i;\r\n        }\r\n\r\n        private static void Write(TextWriter o, ref int n, string s)\r\n        {\r\n            n += s.Length;\r\n            o.Write(s);\r\n        }\r\n\r\n        private static string GetArgumentName(int index, int maxIndex, string description)\r\n        {\r\n            if (description == null)\r\n                return maxIndex == 1 ? \"VALUE\" : \"VALUE\" + (index + 1);\r\n            var nameStart = maxIndex == 1 ? new[] { \"{0:\", \"{\" } : new[] { \"{\" + index + \":\" };\r\n            foreach (string t in nameStart)\r\n            {\r\n                int start, j = 0;\r\n                do\r\n                {\r\n                    start = description.IndexOf(t, j, StringComparison.Ordinal);\r\n                } while (start >= 0 && j != 0 && description[j++ - 1] == '{');\r\n                if (start == -1)\r\n                    continue;\r\n                int end = description.IndexOf(\"}\", start, StringComparison.Ordinal);\r\n                if (end == -1)\r\n                    continue;\r\n                return description.Substring(start + t.Length, end - start - t.Length);\r\n            }\r\n            return maxIndex == 1 ? \"VALUE\" : \"VALUE\" + (index + 1);\r\n        }\r\n\r\n        private static string GetDescription(string description)\r\n        {\r\n            if (description == null) return string.Empty;\r\n\r\n            StringBuilder sb = StringBuilderPool.Get();\r\n            int start        = -1;\r\n\r\n            for (var position = 0; position < description.Length; ++position)\r\n            {\r\n                position = ParseDescription(description, position, sb, ref start);\r\n            }\r\n\r\n            return StringBuilderPool.GetStringAndReturn(sb);\r\n        }\r\n\r\n        private static int ParseDescription(string description, int position, StringBuilder sb, ref int start)\r\n        {\r\n            switch (description[position])\r\n            {\r\n                case '{':\r\n                    if (position == start)\r\n                    {\r\n                        sb.Append('{');\r\n                        start = -1;\r\n                        break;\r\n                    }\r\n                    if (start < 0) start = position + 1;\r\n                    break;\r\n\r\n                case '}':\r\n                    if (start < 0)\r\n                    {\r\n                        if (position + 1 == description.Length || description[position + 1] != '}')\r\n                            throw new InvalidOperationException(\"Invalid option description: \" + description);\r\n                        ++position;\r\n                        sb.Append(\"}\");\r\n                        break;\r\n                    }\r\n                    sb.Append(description.Substring(start, position - start));\r\n                    start = -1;\r\n                    break;\r\n\r\n                case ':':\r\n                    if (start < 0)\r\n                    {\r\n                        sb.Append(description[position]);\r\n                        break;\r\n                    }\r\n                    start = position + 1;\r\n                    break;\r\n\r\n                default:\r\n                    if (start < 0) sb.Append(description[position]);\r\n                    break;\r\n            }\r\n\r\n            return position;\r\n        }\r\n\r\n        private static IEnumerable<string> GetLines(string description)\r\n        {\r\n            if (string.IsNullOrEmpty(description))\r\n            {\r\n                yield return string.Empty;\r\n                yield break;\r\n            }\r\n\r\n            description = description.Trim();\r\n            int length = 80 - OptionWidth - 1;\r\n            int start = 0, end;\r\n            do\r\n            {\r\n                end = GetLineEnd(start, length, description);\r\n                char c = description[end - 1];\r\n                if (char.IsWhiteSpace(c))\r\n                    --end;\r\n                bool writeContinuation = end != description.Length && !IsEolChar(c);\r\n                string line = description.Substring(start, end - start) +\r\n                              (writeContinuation ? \"-\" : \"\");\r\n                yield return line;\r\n                start = end;\r\n                if (char.IsWhiteSpace(c))\r\n                    ++start;\r\n                length = 80 - OptionWidth - 2 - 1;\r\n            } while (end < description.Length);\r\n        }\r\n\r\n        private static bool IsEolChar(char c)\r\n        {\r\n            return !char.IsLetterOrDigit(c);\r\n        }\r\n\r\n        private static int GetLineEnd(int start, int length, string description)\r\n        {\r\n            int end = Math.Min(start + length, description.Length);\r\n            int sep = -1;\r\n            for (int i = start + 1; i < end; ++i)\r\n            {\r\n                if (description[i] == '\\n')\r\n                    return i + 1;\r\n                if (IsEolChar(description[i]))\r\n                    sep = i + 1;\r\n            }\r\n            if (sep == -1 || end == description.Length)\r\n                return end;\r\n            return sep;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CommandLine/Utilities/Benchmark.cs",
    "content": "﻿using System;\n\nnamespace CommandLine.Utilities\n{\n    public sealed class Benchmark\n    {\n        private DateTime _startTime;\n\n        public Benchmark() => Reset();\n\n        public TimeSpan GetElapsedTime()\n        {\n            var stopTime = DateTime.Now;\n            return new TimeSpan(stopTime.Ticks - _startTime.Ticks);\n        }\n\n        public static string ToHumanReadable(TimeSpan span)\n        {\n            return span.Days > 0\n                ? $\"{span.Days}:{span.Hours:D2}:{span.Minutes:D2}:{span.Seconds:D2}.{span.Milliseconds/100:D1}\"\n                : $\"{span.Hours:D2}:{span.Minutes:D2}:{span.Seconds:D2}.{span.Milliseconds/100:D1}\";\n        }\n\n        public static double GetElapsedIterationsPerSecond(TimeSpan span, int numUnits) => numUnits / span.TotalSeconds;\n\n        public void Reset() => _startTime = DateTime.Now;\n    }\n}"
  },
  {
    "path": "CommandLine/Utilities/CommandLineUtilities.cs",
    "content": "﻿using System;\r\nusing System.IO;\r\nusing System.Reflection;\r\n\r\nnamespace CommandLine.Utilities\r\n{\r\n    public static class CommandLineUtilities\r\n    {\r\n        private static readonly string Copyright;\r\n        public static readonly string Title;\r\n        public static readonly string InformationalVersion;\r\n        public static readonly string Version;\r\n\r\n        static CommandLineUtilities()\r\n        {\r\n            var executingAssembly = Assembly.GetExecutingAssembly();\r\n\r\n            Copyright            = GetCopyright(executingAssembly);\r\n            Version              = GetVersion(executingAssembly);\r\n            InformationalVersion = GetInformationalVersion(executingAssembly);\r\n            \r\n            var entryAssembly = Assembly.GetEntryAssembly();\r\n            \r\n            Title = GetTitle(entryAssembly);\r\n        }\r\n\r\n        private static string GetCopyright(Assembly entryAssembly)\r\n        {\r\n            var attr = GetAssemblyAttributes<AssemblyCopyrightAttribute>(entryAssembly);\r\n            return attr?.Copyright.Replace(\"©\", \"(c)\") ?? $\"(c) {DateTime.Now.Year} Illumina, Inc.\";\r\n        }\r\n\r\n        public static string GetVersion(Assembly entryAssembly)\r\n        {\r\n            var attr = GetAssemblyAttributes<AssemblyFileVersionAttribute>(entryAssembly);\r\n            return attr?.Version;\r\n        }\r\n\r\n        private static string GetInformationalVersion(Assembly entryAssembly)\r\n        {\r\n            var attr = GetAssemblyAttributes<AssemblyInformationalVersionAttribute>(entryAssembly);\r\n            return attr?.InformationalVersion;\r\n        }\r\n\r\n        private static string GetTitle(Assembly entryAssembly)\r\n        {\r\n            var attr = GetAssemblyAttributes<AssemblyTitleAttribute>(entryAssembly);\r\n            return attr?.Title;\r\n        }\r\n\r\n        private static T GetAssemblyAttributes<T>(Assembly entryAssembly)\r\n        {\r\n            var attrs = entryAssembly.GetCustomAttributes(typeof(T)) as T[];\r\n            // ReSharper disable once PossibleNullReferenceException\r\n            return attrs.Length == 0 ? default : attrs[0];\r\n        }\r\n\r\n        /// <summary>\r\n        /// Displays the command-line banner for this program\r\n        /// </summary>\r\n        public static void DisplayBanner(string author)\r\n        {\r\n            // create the top and bottom lines\r\n            const int lineLength = 75;\r\n            var line = new string('-', lineLength);\r\n\r\n            // create the filler string\r\n            int fillerLength  = lineLength - Title.Length - Copyright.Length;\r\n            int fillerLength2 = lineLength - author.Length - InformationalVersion.Length;\r\n\r\n            if (fillerLength < 1)\r\n            {\r\n                throw new InvalidOperationException(\"Unable to display the program banner, the program name is too long.\");\r\n            }\r\n\r\n            if (fillerLength2 < 1)\r\n            {\r\n                throw new InvalidOperationException(\"Unable to display the program banner, the author name and version string is too long.\");\r\n            }\r\n\r\n            var filler  = new string(' ', fillerLength);\r\n            var filler2 = new string(' ', fillerLength2);\r\n\r\n            // display the actual banner\r\n            Console.WriteLine(line);\r\n            Console.ForegroundColor = ConsoleColor.Magenta;\r\n            Console.Write(Title);\r\n            Console.ResetColor();\r\n            Console.WriteLine(\"{0}{1}\", filler, Copyright);\r\n            Console.WriteLine(\"{0}{1}{2}\", author, filler2, InformationalVersion);\r\n            Console.WriteLine(\"{0}\\n\", line);\r\n        }\r\n\r\n        public static string CommandFileName => Path.GetFileName(Environment.GetCommandLineArgs()[0]);\r\n    }\r\n}\r\n"
  },
  {
    "path": "CommandLine/Utilities/Help.cs",
    "content": "﻿using System;\nusing CommandLine.NDesk.Options;\n\nnamespace CommandLine.Utilities\n{\n    public static class Help\n    {\n        public static void Show(OptionSet ops, string commonOptions, string description)\n        {\n            OutputHelper.WriteLabel(\"USAGE: \");\n            Console.WriteLine(\"dotnet {0} {1}\", OutputHelper.GetExecutableName(), commonOptions);\n            Console.WriteLine(\"{0}\\n\", description);\n\n            OutputHelper.WriteLabel(\"OPTIONS:\");\n            Console.WriteLine();\n            ops.WriteOptionDescriptions(Console.Out);\n        }\n    }\n}\n"
  },
  {
    "path": "CommandLine/Utilities/MemoryUtilities.cs",
    "content": "﻿using System.Diagnostics;\n\nnamespace CommandLine.Utilities\n{\n    public static class MemoryUtilities\n    {\n        // ReSharper disable InconsistentNaming\n        private const long NumBytesInGB = 1073741824;\n        private const long NumBytesInMB = 1048576;\n        private const long NumBytesInKB = 1024;\n        // ReSharper restore InconsistentNaming\n\n        /// <summary>\n        /// shows the peak memory usage for the current process\n        /// </summary>\n        public static long GetPeakMemoryUsage()\n        {\n            return Process.GetCurrentProcess().PeakWorkingSet64;\n        }\n\n        /// <summary>\n        /// converts the number of bytes used to a human readable format\n        /// </summary>\n        public static string ToHumanReadable(long numBytes)\n        {\n            if (numBytes > NumBytesInGB)\n            {\n                double gigaBytes = numBytes / (double)NumBytesInGB;\n                return $\"{gigaBytes:0.000} GB\";\n            }\n\n            if (numBytes > NumBytesInMB)\n            {\n                double megaBytes = numBytes / (double)NumBytesInMB;\n                return $\"{megaBytes:0.0} MB\";\n            }\n\n            // ReSharper disable once InvertIf\n            if (numBytes > NumBytesInKB)\n            {\n                double kiloBytes = numBytes / (double)NumBytesInKB;\n                return $\"{kiloBytes:0.0} KB\";\n            }\n\n            return $\"{numBytes} B\";\n        }\n    }\n}\n"
  },
  {
    "path": "CommandLine/Utilities/OutputHelper.cs",
    "content": "﻿using System;\nusing System.IO;\n\nnamespace CommandLine.Utilities\n{\n    public static class OutputHelper\n    {\n        public static void WriteLabel(string label)\n        {\n            Console.ForegroundColor = ConsoleColor.DarkGreen;\n            Console.Write(label);\n            Console.ResetColor();\n        }\n\n        public static string GetExecutableName()\n        {\n            return Path.GetFileName(Environment.GetCommandLineArgs()[0]);\n        }\n    }\n}\n"
  },
  {
    "path": "CommandLine/VersionProviders/DefaultVersionProvider.cs",
    "content": "﻿using VariantAnnotation.Interface.Providers;\n\nnamespace CommandLine.VersionProviders\n{\n    public sealed class DefaultVersionProvider : IVersionProvider\n    {\n        public string DataVersion { get; } = string.Empty;\n    }\n}\n"
  },
  {
    "path": "CommandLine/VersionProviders/IVersionProvider.cs",
    "content": "﻿namespace VariantAnnotation.Interface.Providers\n{\n\tpublic interface IVersionProvider\n\t{\n\t    string DataVersion { get; }\n\t}\n}"
  },
  {
    "path": "CommonAssemblyInfo.props",
    "content": "﻿<Project xmlns=\"http://schemas.microsoft.com/developer/msbuild/2003\">\n  <PropertyGroup>\n    <Company>Illumina</Company>\n    <Copyright>© 2022 Illumina, Inc.</Copyright>\n    <FileVersion>3.18.1</FileVersion>\n    <AssemblyVersion>3.18.1</AssemblyVersion>\n    <Version>3.18.1</Version>\n    <Authors>Stromberg, Roy, Platzer, Siddiqui, Ouyang, et al</Authors>\n  </PropertyGroup>\n</Project>\n"
  },
  {
    "path": "Compression/Algorithms/ICompressionAlgorithm.cs",
    "content": "﻿namespace Compression.Algorithms\n{\n    public interface ICompressionAlgorithm\n    {        \n        int Compress(byte[] source, int srcLength, byte[] destination, int destLength);\n        int Decompress(byte[] source, int srcLength, byte[] destination, int destLength);\n        int GetDecompressedLength(byte[] source, int srcLength);\n        int GetCompressedBufferBounds(int srcLength);\n    }\n}\n"
  },
  {
    "path": "Compression/Algorithms/Zlib.cs",
    "content": "﻿using System;\r\nusing System.Runtime.InteropServices;\r\nusing Compression.Utilities;\r\n\r\nnamespace Compression.Algorithms\r\n{\r\n    public sealed class Zlib : ICompressionAlgorithm\r\n    {\r\n        private readonly int _compressionLevel;\r\n\r\n        public Zlib(int compressionLevel = 1)\r\n        {\r\n            _compressionLevel = compressionLevel;\r\n            LibraryUtilities.CheckLibrary();\r\n        }\r\n\r\n        public int Compress(byte[] source, int srcLength, byte[] destination, int destLength)\r\n        {\r\n            if (destination == null || GetCompressedBufferBounds(srcLength) > destination.Length)\r\n            {\r\n                throw new InvalidOperationException(\"Zlib: Insufficient memory in destination buffer\");\r\n            }\r\n            \r\n            return SafeNativeMethods.bgzf_compress(destination, destLength, source, srcLength, _compressionLevel);\r\n        }\r\n\r\n        public int Decompress(byte[] source, int srcLength, byte[] destination, int destLength)\r\n        {\r\n            if (destination == null)\r\n            {\r\n                throw new InvalidOperationException(\"Zlib: Insufficient memory in destination buffer\");\r\n            }\r\n\r\n            return SafeNativeMethods.bgzf_decompress(destination, destLength, source, srcLength);\r\n        }\r\n\r\n        public int GetDecompressedLength(byte[] source, int srcLength)\r\n        {\r\n            int pos = srcLength - 4;\r\n            return source[pos + 3] << 24 | source[pos + 2] << 16 | source[pos + 1] << 8 | source[pos];\r\n        }\r\n\r\n        public int GetCompressedBufferBounds(int srcLength) => (int)(srcLength * 1.06 + 28);\r\n\r\n        private static class SafeNativeMethods\r\n        {\r\n            [DllImport(\"BlockCompression\", CallingConvention = CallingConvention.Cdecl)]\r\n            public static extern int bgzf_decompress(byte[] uncompressedBlock, int uncompressedSize, byte[] compressedBlock, int compressedSize);\r\n\r\n            [DllImport(\"BlockCompression\", CallingConvention = CallingConvention.Cdecl)]\r\n            public static extern int bgzf_compress(byte[] compressedBlock, int compressedLen, byte[] uncompressedBlock, int uncompressedLen, int compressionLevel);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "Compression/Algorithms/Zstandard.cs",
    "content": "﻿using System;\r\nusing System.Runtime.InteropServices;\r\nusing Compression.Utilities;\r\n\r\nnamespace Compression.Algorithms\r\n{\r\n    public sealed class Zstandard : ICompressionAlgorithm\r\n    {\r\n        private readonly int _compressionLevel;\r\n\r\n        public Zstandard(int compressionLevel = 17)\r\n        {\r\n            _compressionLevel = compressionLevel;\r\n            LibraryUtilities.CheckLibrary();\r\n        }\r\n\r\n        public int Compress(byte[] source, int srcLength, byte[] destination, int destLength)\r\n        {\r\n            if (destination == null || GetCompressedBufferBounds(srcLength) > destination.Length)\r\n            {\r\n                throw new InvalidOperationException(\"Zstandard: Insufficient memory in destination buffer\");\r\n            }\r\n\r\n            return (int)SafeNativeMethods.ZSTD_compress(destination, (ulong)destLength, source, (ulong)srcLength, _compressionLevel);\r\n        }\r\n\r\n        public int Decompress(byte[] source, int srcLength, byte[] destination, int destLength)\r\n        {\r\n            if (destination == null)\r\n            {\r\n                throw new InvalidOperationException(\"Zstandard: Insufficient memory in destination buffer\");\r\n            }\r\n\r\n            return (int)SafeNativeMethods.ZSTD_decompress(destination, (ulong)destLength, source, (ulong)srcLength);\r\n        }\r\n\r\n        public int GetDecompressedLength(byte[] source, int srcLength) => (int)SafeNativeMethods.ZSTD_getDecompressedSize(source, srcLength);\r\n\r\n        // empirically derived via polynomial regression with additional padding added\r\n        public int GetCompressedBufferBounds(int srcLength) => srcLength + 32;\r\n\r\n        private static class SafeNativeMethods\r\n        {\r\n            [DllImport(\"BlockCompression\", CallingConvention = CallingConvention.Cdecl)]\r\n            public static extern ulong ZSTD_compress(byte[] destination, ulong destinationLen, byte[] source, ulong sourceLen, int compressionLevel);\r\n\r\n            [DllImport(\"BlockCompression\", CallingConvention = CallingConvention.Cdecl)]\r\n            public static extern ulong ZSTD_decompress(byte[] destination, ulong destinationLen, byte[] source, ulong sourceLen);\r\n\r\n            [DllImport(\"BlockCompression\", CallingConvention = CallingConvention.Cdecl)]\r\n            public static extern ulong ZSTD_getDecompressedSize(byte[] source, int sourceLen);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "Compression/AssemblyInfo.cs",
    "content": "﻿using System.Runtime.CompilerServices;\n\n[assembly: InternalsVisibleTo(\"UnitTests\")]"
  },
  {
    "path": "Compression/Compression.csproj",
    "content": "﻿<Project Sdk=\"Microsoft.NET.Sdk\">\r\n  <PropertyGroup>\r\n    <TargetFramework>net6.0</TargetFramework>\r\n    <OutputPath>..\\bin\\$(Configuration)</OutputPath>\r\n    \r\n  </PropertyGroup>\r\n  <ItemGroup>\r\n    <ProjectReference Include=\"..\\ErrorHandling\\ErrorHandling.csproj\" />\r\n    <ProjectReference Include=\"..\\IO\\IO.csproj\" />\r\n    <ProjectReference Include=\"..\\OptimizedCore\\OptimizedCore.csproj\" />\r\n  </ItemGroup>\r\n  <ItemGroup>\r\n    <None Include=\"Packages\\BlockCompression\\BlockCompression.dll\" Link=\"BlockCompression.dll\">\r\n      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>\r\n    </None>\r\n    <None Include=\"Packages\\BlockCompression\\libBlockCompression.so\" Link=\"libBlockCompression.so\">\r\n      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>\r\n    </None>\r\n    <None Include=\"Packages\\BlockCompression\\libBlockCompression.dylib\" Link=\"libBlockCompression.dylib\">\r\n      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>\r\n    </None>\r\n  </ItemGroup>\r\n  <Import Project=\"..\\CommonAssemblyInfo.props\" />\r\n</Project>\r\n"
  },
  {
    "path": "Compression/DataStructures/Block.cs",
    "content": "﻿using System;\r\nusing System.IO;\r\nusing Compression.Algorithms;\r\nusing Compression.FileHandling;\r\nusing ErrorHandling.Exceptions;\r\n\r\nnamespace Compression.DataStructures\r\n{\r\n    public sealed class Block\r\n    {\r\n        private readonly ICompressionAlgorithm _compressionAlgorithm;\r\n        private readonly BlockHeader _header;\r\n\r\n        private readonly byte[] _compressedBlock;\r\n        private readonly byte[] _uncompressedBlock;\r\n\r\n        public long FileOffset { get; private set; }\r\n        public int Offset { get; internal set; }\r\n\r\n        internal const int DefaultSize = 16777216;\r\n        private readonly int _size;\r\n        private readonly int _compressedBlockSize;\r\n\r\n        public bool IsFull      => Offset == _size;\r\n        public bool HasMoreData => Offset < _header.NumUncompressedBytes;\r\n\r\n        public Block(ICompressionAlgorithm compressionAlgorithm, int size = DefaultSize)\r\n        {\r\n            _compressionAlgorithm = compressionAlgorithm;\r\n            Offset                = 0;\r\n            _size                 = size;\r\n\r\n            _uncompressedBlock   = new byte[_size];\r\n            _compressedBlockSize = compressionAlgorithm.GetCompressedBufferBounds(_size);\r\n            _compressedBlock     = new byte[_compressedBlockSize];\r\n            _header              = new BlockHeader();\r\n        }\r\n\r\n        public int CopyTo(byte[] array, int offset, int count)\r\n        {\r\n            int copyLength = Math.Min(_size - Offset, count);\r\n            if (copyLength == 0) return 0;\r\n\r\n            Buffer.BlockCopy(array, offset, _uncompressedBlock, Offset, copyLength);\r\n            Offset += copyLength;\r\n\r\n            return copyLength;\r\n        }\r\n\r\n        public int CopyFrom(byte[] array, int offset, int count)\r\n        {\r\n            int copyLength = Math.Min(_header.NumUncompressedBytes - Offset, count);\r\n            if (copyLength == 0) return 0;\r\n\r\n            Buffer.BlockCopy(_uncompressedBlock, Offset, array, offset, copyLength);\r\n            Offset += copyLength;\r\n\r\n            return copyLength;\r\n        }\r\n\r\n        public void Write(Stream stream)\r\n        {\r\n            _header.NumUncompressedBytes = Offset;\r\n\r\n            _header.NumCompressedBytes = _compressionAlgorithm.Compress(_uncompressedBlock, _header.NumUncompressedBytes,\r\n                _compressedBlock, _compressedBlockSize);\r\n\r\n            if (_header.NumCompressedBytes > _header.NumUncompressedBytes)\r\n            {\r\n                _header.NumCompressedBytes = -1;\r\n                _header.Write(stream);\r\n                stream.Write(_uncompressedBlock, 0, _header.NumUncompressedBytes);\r\n            }\r\n            else\r\n            {\r\n                _header.Write(stream);\r\n                stream.Write(_compressedBlock, 0, _header.NumCompressedBytes);\r\n            }\r\n\r\n            Offset = 0;\r\n        }\r\n\r\n        public void WriteEof(Stream stream)\r\n        {\r\n            _header.NumUncompressedBytes = -1;\r\n            _header.NumCompressedBytes   = -1;\r\n            _header.Write(stream);\r\n        }\r\n\r\n        public int Read(Stream stream)\r\n        {\r\n            FileOffset = stream.Position;\r\n\r\n            _header.Read(stream);\r\n            if (_header.IsEmpty) return -1;\r\n\r\n            int numBytesRead = _header.NumCompressedBytes == -1\r\n                ? ReadUncompressedBlock(stream)\r\n                : ReadCompressedBlock(stream);\r\n\r\n            Offset = 0;\r\n\r\n            return BlockHeader.HeaderSize + numBytesRead;\r\n        }\r\n\r\n        private int ReadCompressedBlock(Stream stream)\r\n        {\r\n            int numBytesRead = stream.Read(_compressedBlock, 0, _header.NumCompressedBytes);\r\n            if (numBytesRead != _header.NumCompressedBytes)\r\n            {\r\n                throw new IOException($\"Expected {_header.NumCompressedBytes} bytes from the block, but received only {numBytesRead} bytes.\");\r\n            }\r\n\r\n            int numUncompressedBytes = _compressionAlgorithm.Decompress(_compressedBlock, _header.NumCompressedBytes, _uncompressedBlock, _size);\r\n            if (numUncompressedBytes != _header.NumUncompressedBytes)\r\n            {\r\n                throw new CompressionException($\"Expected {_header.NumUncompressedBytes} bytes after decompression, but found only {numUncompressedBytes} bytes.\");\r\n            }\r\n\r\n            return numBytesRead;\r\n        }\r\n\r\n        private int ReadUncompressedBlock(Stream stream)\r\n        {\r\n            int numBytesRead = stream.Read(_uncompressedBlock, 0, _header.NumUncompressedBytes);\r\n            if (numBytesRead != _header.NumUncompressedBytes)\r\n            {\r\n                throw new IOException($\"Expected {_header.NumUncompressedBytes} bytes from the uncompressed block, but received only {numBytesRead} bytes.\");\r\n            }\r\n\r\n            return numBytesRead;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "Compression/FileHandling/BgzBlockReader.cs",
    "content": "using System;\nusing System.IO;\nusing ErrorHandling.Exceptions;\n\nnamespace Compression.FileHandling\n{\n    public sealed class BgzBlockReader:IDisposable\n    {\n        private readonly string _filePath;\n        private readonly Stream _stream;\n        private readonly bool _leaveStreamOpen;\n        \n        public long Position => _stream.Position;\n\n        public BgzBlockReader(Stream stream, bool leaveStreamOpen = false)\n        {\n            _filePath = \"(stream)\";\n            _stream = stream;\n            _leaveStreamOpen = leaveStreamOpen;\n        }\n        \n        //read the next compressed block into provided buffer\n        public int ReadCompressedBlock(byte[] buffer)\n        {\n            if (buffer.Length < BlockGZipStream.BlockGZipFormatCommon.MaxBlockSize)\n                throw new InsufficientMemoryException($\"Pease provide a buffer at least {BlockGZipStream.BlockGZipFormatCommon.MaxBlockSize} bytes in size.\");\n            int headerSize = _stream.Read(buffer, 0, BlockGZipStream.BlockGZipFormatCommon.BlockHeaderLength);\n\n            // handle the case where no data was read\n            if (headerSize == 0) return 0;\n            \n            // check the header\n            if (!BlockGZipStream.HasValidHeader(headerSize, buffer))\n            {\n                throw new CompressionException($\"Found an invalid header when reading the GZip block ({_filePath})\");\n            }\n\n            int blockLength = BitConverter.ToUInt16(buffer, 16) + 1;\n            int expectedDataSize   = blockLength  - BlockGZipStream.BlockGZipFormatCommon.BlockHeaderLength;\n\n            var dataSize = _stream.Read(buffer, BlockGZipStream.BlockGZipFormatCommon.BlockHeaderLength, expectedDataSize);\n\n            // handle unexpected truncation\n            if (expectedDataSize != dataSize)\n            {\n                throw new CompressionException($\"Found unexpected truncation when reading the GZip block ({_filePath})\");\n            }\n\n            return headerSize+dataSize;\n        }\n\n        public void Dispose()\n        {\n            if (_leaveStreamOpen) return;\n            _stream?.Dispose();\n        }\n    }\n}"
  },
  {
    "path": "Compression/FileHandling/BgzfBlock.cs",
    "content": "﻿using System;\r\nusing System.IO;\r\nusing System.Text;\r\nusing Compression.Algorithms;\r\nusing ErrorHandling.Exceptions;\r\n\r\nnamespace Compression.FileHandling\r\n{\r\n    public sealed class BgzfBlock\r\n    {\r\n        private const int MaxBlockSize             = 65536;\r\n        private readonly byte[] _compressedBlock   = new byte[MaxBlockSize];\r\n        private readonly byte[] _uncompressedBlock = new byte[MaxBlockSize];\r\n        private readonly Zlib _bgzf                = new Zlib();\r\n\r\n        public string Read(Stream stream)\r\n        {\r\n            int count = stream.Read(_compressedBlock, 0, BlockGZipStream.BlockGZipFormatCommon.BlockHeaderLength);\r\n            if (count == 0) return string.Empty;\r\n\r\n            if (!BlockGZipStream.HasValidHeader(count, _compressedBlock))\r\n                throw new InvalidDataException(\"Found an invalid header when reading the GZip block\");\r\n\r\n            int blockLength = BitConverter.ToUInt16(_compressedBlock, 16) + 1;\r\n            int remaining   = blockLength - BlockGZipStream.BlockGZipFormatCommon.BlockHeaderLength;\r\n\r\n            count = stream.Read(_compressedBlock, BlockGZipStream.BlockGZipFormatCommon.BlockHeaderLength, remaining);\r\n\r\n            if (count != remaining) throw new InvalidDataException(\"Found unexpected truncation when reading the GZip block\");\r\n\r\n            count = _bgzf.Decompress(_compressedBlock, blockLength, _uncompressedBlock, MaxBlockSize);\r\n\r\n            if (count < 0) throw new CompressionException(\"Encountered an error when uncompressing the GZip block\");\r\n            return Encoding.UTF8.GetString(_uncompressedBlock, 0, count);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "Compression/FileHandling/BgzipTextReader.cs",
    "content": "﻿using System;\r\nusing System.IO;\r\nusing System.Reflection;\r\nusing System.Text;\r\n\r\nnamespace Compression.FileHandling\r\n{\r\n    public sealed class BgzipTextReader : IDisposable\r\n    {\r\n        private readonly bool _leaveOpen;\r\n        private readonly StreamReader _reader;\r\n        private readonly FieldInfo _charPosInfo;\r\n        private readonly FieldInfo _charLenInfo;\r\n\r\n        public BgzipTextReader(BlockGZipStream stream, bool leaveOpen = false)\r\n        {\r\n            _leaveOpen = leaveOpen;\r\n            _reader    = new StreamReader(stream, Encoding.UTF8, leaveOpen);\r\n\r\n            Type readerType = _reader.GetType();\r\n            _charPosInfo    = readerType.GetField(\"_charPos\", BindingFlags.NonPublic | BindingFlags.Instance);\r\n            _charLenInfo    = readerType.GetField(\"_charLen\", BindingFlags.NonPublic | BindingFlags.Instance);\r\n        }\r\n\r\n        public long Position\r\n        {\r\n            get\r\n            {\r\n                var bufferPos  = (int)_charPosInfo.GetValue(_reader);\r\n                var bufferSize = (int)_charLenInfo.GetValue(_reader);\r\n                return _reader.BaseStream.Position - bufferSize + bufferPos;\r\n            }\r\n        }\r\n\r\n        public string ReadLine() => _reader.ReadLine();\r\n\r\n        public void Dispose()\r\n        {\r\n            if (!_leaveOpen) _reader.Dispose();\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "Compression/FileHandling/BgzipTextWriter.cs",
    "content": "﻿using System;\r\nusing System.IO;\r\nusing System.Text;\r\nusing OptimizedCore;\r\n\r\nnamespace Compression.FileHandling\r\n{\r\n    public sealed class BgzipTextWriter : StreamWriter, IDisposable\r\n    {\r\n        private readonly BlockGZipStream _stream;\r\n        private readonly byte[] _buffer;\r\n        private int _bufferIndex;\r\n        \r\n        private const int             CharBufferSize = 8 * 1024 * 1024;\r\n        private       char[]          _charBuffer;\r\n        private       byte[]          _byteBuffer;\r\n\r\n        private const int BufferSize = BlockGZipStream.BlockGZipFormatCommon.BlockSize;\r\n\r\n        private static readonly UTF8Encoding Utf8WithoutBom = new UTF8Encoding(false);\r\n\r\n        public long Position => _stream.Position + _bufferIndex;\r\n\r\n        public BgzipTextWriter(BlockGZipStream stream) : base(stream, Utf8WithoutBom, BufferSize, true)\r\n        {\r\n            _buffer = new byte[BufferSize];\r\n            _stream = stream;\r\n\r\n            _charBuffer = ExpandableArray<char>.Get(CharBufferSize);\r\n            _byteBuffer = ExpandableArray<byte>.Get(CharBufferSize * 2);\r\n        }\r\n\r\n        public override void Flush()\r\n        {\r\n            if (_bufferIndex == 0) return;\r\n            _stream.Write(_buffer, 0, _bufferIndex);\r\n            //here we want to close the gzip blockB\r\n            _stream.CloseBlock();\r\n            _bufferIndex = 0;\r\n        }\r\n\r\n        public override void WriteLine() => Write(\"\\n\");\r\n\r\n        public override void WriteLine(string value) => Write(value + \"\\n\");\r\n\r\n        public override void Write(string value)\r\n        {\r\n            if (string.IsNullOrEmpty(value)) return;\r\n            var lineBytes = Encoding.UTF8.GetBytes(value);\r\n\r\n            WriteBytes(lineBytes, lineBytes.Length);\r\n        }\r\n\r\n        public override void Write(StringBuilder sb)\r\n        {\r\n            if (sb == null || sb.Length == 0) return;\r\n            \r\n            if (sb.Length > _charBuffer.Length)\r\n            {\r\n                _charBuffer = ExpandableArray<char>.Resize(_charBuffer, sb.Length * 2);\r\n                _byteBuffer = ExpandableArray<byte>.Resize(_byteBuffer, _charBuffer.Length     * 2);\r\n            }\r\n\r\n            sb.CopyTo(0, _charBuffer, 0, sb.Length);\r\n            var length = Encoding.UTF8.GetBytes(_charBuffer, 0, sb.Length, _byteBuffer, 0);\r\n\r\n            WriteBytes(_byteBuffer, length);\r\n        }\r\n\r\n        private void WriteBytes(byte[] lineBytes, int length)\r\n        {\r\n            if (length <= BufferSize - _bufferIndex)\r\n            {\r\n                Array.Copy(lineBytes, 0, _buffer, _bufferIndex, length);\r\n                _bufferIndex += length;\r\n            }\r\n            else\r\n            {\r\n                // fill up the buffer\r\n                Array.Copy(lineBytes, 0, _buffer, _bufferIndex, BufferSize - _bufferIndex);\r\n                int lineIndex = BufferSize                                 - _bufferIndex;\r\n\r\n                // write it out to the stream\r\n                _stream.Write(_buffer, 0, BufferSize);\r\n                _bufferIndex = 0;\r\n\r\n                while (lineIndex + BufferSize <= length)\r\n                {\r\n                    _stream.Write(lineBytes, lineIndex, BufferSize);\r\n                    lineIndex += BufferSize;\r\n                }\r\n\r\n                // the leftover bytes should be saved in buffer\r\n                if (lineIndex >= length) return;\r\n                Array.Copy(lineBytes, lineIndex, _buffer, 0, length - lineIndex);\r\n                _bufferIndex = length - lineIndex;\r\n            }\r\n        }\r\n\r\n        public new void Dispose()\r\n        {\r\n            Flush();\r\n            _stream.Dispose();\r\n            ExpandableArray<char>.Return(_charBuffer);\r\n            ExpandableArray<byte>.Return(_byteBuffer);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "Compression/FileHandling/BlockGZipStream.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing System.IO.Compression;\r\nusing Compression.Algorithms;\r\nusing ErrorHandling.Exceptions;\r\n\r\nnamespace Compression.FileHandling\r\n{\r\n    // BGZF/GZIP header (specialized from RFC 1952; little endian):\r\n    // +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+\r\n    // | 31|139|  8|  4|              0|  0|255|      6| 66| 67|      2|BLK_LEN|\r\n    // +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+\r\n\r\n    // BGZF/GZIP footer:\r\n    // +---+---+---+---+---+---+---+---+\r\n    // |            CRC|     Source len|\r\n    // +---+---+---+---+---+---+---+---+\r\n\r\n    public sealed class BlockGZipStream : Stream\r\n    {\r\n        private readonly byte[] _compressedBlock;\r\n        private readonly byte[] _uncompressedBlock;\r\n        private int _blockOffset;\r\n        private int _blockLength;\r\n        private long _blockAddress;\r\n\r\n        private readonly bool _isCompressor;\r\n        private readonly bool _leaveStreamOpen;\r\n\r\n        private readonly string _filePath;\r\n        private Stream _stream;\r\n        private readonly Zlib _bgzf;\r\n        private bool _isDisposed;\r\n\r\n        public static class BlockGZipFormatCommon\r\n        {\r\n            public const int BlockSize         = 65280;\r\n            public const int MaxBlockSize      = 65536;\r\n            public const int BlockHeaderLength = 18;\r\n        }\r\n\r\n        #region Stream\r\n\r\n        public override bool CanRead => _stream != null && !_isCompressor && _stream.CanRead;\r\n\r\n        public override bool CanWrite => _stream != null && _isCompressor && _stream.CanWrite;\r\n\r\n        public override bool CanSeek => _stream != null && !_isCompressor && _stream.CanSeek;\r\n\r\n        public override long Length => throw new NotSupportedException();\r\n\r\n        public override long Position\r\n        {\r\n            get => (_blockAddress << 16) | ((long)_blockOffset & 0xffff);\r\n            set => SeekVirtualFilePointer((ulong)value);\r\n        }\r\n\r\n        public override long Seek(long offset, SeekOrigin origin) => throw new NotSupportedException();\r\n\r\n        public override void SetLength(long value) => throw new NotSupportedException();\r\n\r\n        public override void Flush() => _stream.Flush();\r\n\r\n        protected override void Dispose(bool disposing)\r\n        {\r\n            if (_isDisposed) return;\r\n\r\n            try\r\n            {\r\n                if (_isCompressor)\r\n                {\r\n                    Flush(_blockOffset);\r\n\r\n                    // write an empty block (as EOF marker)\r\n                    Flush(0);\r\n                }\r\n\r\n                if (!_leaveStreamOpen)\r\n                {\r\n                    _stream.Dispose();\r\n                    _stream = null;\r\n                }\r\n\r\n                _isDisposed = true;\r\n            }\r\n            finally\r\n            {\r\n                base.Dispose(disposing);\r\n            }\r\n        }\r\n\r\n        #endregion\r\n\r\n        public BlockGZipStream(Stream stream, CompressionMode compressionMode, bool leaveStreamOpen = false, int compressionLevel = 5)\r\n        {\r\n            _filePath        = \"(stream)\";\r\n            _leaveStreamOpen = leaveStreamOpen;\r\n            _stream          = stream;\r\n\r\n            // sanity check: make sure the stream exists\r\n            if (stream == null) throw new ArgumentNullException(nameof(stream));\r\n\r\n            // sanity check: make sure we can use the stream for reading or writing\r\n            _isCompressor = compressionMode == CompressionMode.Compress;\r\n            if (_isCompressor  && !_stream.CanWrite) throw new CompressionException(\"A stream lacking write capability was provided to the block GZip compressor.\");\r\n            if (!_isCompressor && !_stream.CanRead)  throw new CompressionException(\"A stream lacking read capability was provided to the block GZip decompressor.\");\r\n\r\n            _bgzf              = new Zlib(compressionLevel);\r\n            _uncompressedBlock = new byte[BlockGZipFormatCommon.MaxBlockSize];\r\n            _compressedBlock   = new byte[_bgzf.GetCompressedBufferBounds(BlockGZipFormatCommon.MaxBlockSize)];\r\n        }\r\n\r\n        private void Flush(int uncompressedSize)\r\n        {\r\n            int blockLength = _bgzf.Compress(_uncompressedBlock, uncompressedSize, _compressedBlock, BlockGZipFormatCommon.MaxBlockSize);\r\n            _blockOffset    = 0;\r\n\r\n            _stream.Write(_compressedBlock, 0, blockLength);\r\n\t\t\t_blockAddress = _stream.Position;\t\r\n        }\r\n\r\n        public static bool HasValidHeader(int numHeaderBytes, IReadOnlyList<byte> header)\r\n        {\r\n            if (numHeaderBytes != BlockGZipFormatCommon.BlockHeaderLength) return false;\r\n\r\n            return header[0] == 31      &&\r\n                   header[1] == 139     &&\r\n                   header[2] == 8       &&\r\n                   (header[3] & 4) != 0 &&\r\n                   header[12] == 66     &&\r\n                   header[13] == 67;\r\n        }\r\n\r\n        private void ReadBlock()\r\n        {\r\n            long blockAddress = _stream.CanSeek ? _stream.Position : 0;\r\n            int count         = _stream.Read(_compressedBlock, 0, BlockGZipFormatCommon.BlockHeaderLength);\r\n\r\n            // handle the case where no data was read\r\n            if (count == 0)\r\n            {\r\n                _blockLength = 0;\r\n                return;\r\n            }\r\n\r\n            // check the header\r\n            if (!HasValidHeader(count, _compressedBlock))\r\n            {\r\n                throw new CompressionException($\"Found an invalid header when reading the GZip block ({_filePath})\");\r\n            }\r\n\r\n            int blockLength = BitConverter.ToUInt16(_compressedBlock, 16) + 1;\r\n            int remaining   = blockLength - BlockGZipFormatCommon.BlockHeaderLength;\r\n\r\n            count = _stream.Read(_compressedBlock, BlockGZipFormatCommon.BlockHeaderLength, remaining);\r\n\r\n            // handle unexpected truncation\r\n            if (count != remaining)\r\n            {\r\n                throw new CompressionException($\"Found unexpected truncation when reading the GZip block ({_filePath})\");\r\n            }\r\n\r\n            count = _bgzf.Decompress(_compressedBlock, blockLength, _uncompressedBlock, BlockGZipFormatCommon.MaxBlockSize);\r\n\r\n            if (count < 0)\r\n            {\r\n                throw new CompressionException($\"Encountered an error when uncompressing the GZip block ({_filePath})\");\r\n            }\r\n\r\n            // Do not reset offset if this read follows a seek\r\n            if (_blockLength != 0) _blockOffset = 0;\r\n\r\n            _blockAddress = blockAddress;\r\n            _blockLength  = count;\r\n        }\r\n\r\n        public override int Read(byte[] buffer, int offset, int count)\r\n        {\r\n            if (_isCompressor) throw new CompressionException(\"Tried to read data from a compression BlockGZipStream.\");\r\n\r\n            if (count == 0) return 0;\r\n\r\n            var numBytesRead = 0;\r\n            int dataOffset   = offset;\r\n\r\n            while (numBytesRead < count)\r\n            {\r\n                int numBytesAvailable = _blockLength - _blockOffset;\r\n\r\n                if (numBytesAvailable <= 0)\r\n                {\r\n                    ReadBlock();\r\n                    numBytesAvailable = _blockLength - _blockOffset;\r\n                    if (numBytesAvailable <= 0) break;\r\n                }\r\n\r\n                int copyLength = Math.Min(count - numBytesRead, numBytesAvailable);\r\n                Buffer.BlockCopy(_uncompressedBlock, _blockOffset, buffer, dataOffset, copyLength);\r\n\r\n                _blockOffset += copyLength;\r\n                dataOffset   += copyLength;\r\n                numBytesRead += copyLength;\r\n            }\r\n\r\n            // ReSharper disable once InvertIf\r\n            if (_blockOffset == _blockLength)\r\n            {\r\n                _blockAddress = _stream.CanSeek ? _stream.Position : 0;\r\n                _blockOffset  = _blockLength = 0;\r\n            }\r\n\r\n            return numBytesRead;\r\n        }\r\n\r\n        public override void Write(byte[] buffer, int offset, int count)\r\n        {\r\n            if (!_isCompressor) throw new CompressionException(\"Tried to write data to a decompression BlockGZipStream.\");\r\n\r\n            var numBytesWritten = 0;\r\n            int dataOffset      = offset;\r\n\r\n            // copy the data to the buffer\r\n            while (numBytesWritten < count)\r\n            {\r\n                int copyLength = Math.Min(BlockGZipFormatCommon.BlockSize - _blockOffset, count - numBytesWritten);\r\n                Buffer.BlockCopy(buffer, dataOffset, _uncompressedBlock, _blockOffset, copyLength);\r\n\r\n                _blockOffset    += copyLength;\r\n                dataOffset      += copyLength;\r\n                numBytesWritten += copyLength;\r\n\r\n                if (_blockOffset == BlockGZipFormatCommon.BlockSize) Flush(_blockOffset);\r\n            }\r\n        }\r\n\r\n        public void CloseBlock() => Flush(_blockOffset);\r\n        \r\n        private void SeekVirtualFilePointer(ulong virtualPosition)\r\n        {\r\n            long compressedOffset  = GetCompressedOffset(virtualPosition);\r\n            int uncompressedOffset = GetUncompressedOffset(virtualPosition);\r\n\r\n            // if we're already in the right block, no need to reload buffer.\r\n            if (_blockAddress != compressedOffset)\r\n            {\r\n                _blockAddress = compressedOffset;\r\n                _stream.Position = _blockAddress;\r\n                ReadBlock();\r\n            }\r\n\r\n            _blockOffset = uncompressedOffset;\r\n        }\r\n\r\n        private static long GetCompressedOffset(ulong virtualPosition)\r\n\t\t{\r\n\t\t\tunchecked\r\n\t\t\t{\r\n\t\t\t\treturn (long)((virtualPosition >> 16) & 0xFFFFFFFFFFFFL);\r\n\t\t\t}\r\n\t\t}\r\n\r\n\t\tprivate static int GetUncompressedOffset(ulong virtualPosition)\r\n\t\t{\r\n\t\t\tunchecked\r\n\t\t\t{\r\n\t\t\t\treturn (int)(virtualPosition & 0xffff);\r\n\t\t\t}\r\n\t\t}\r\n\t}\r\n}\r\n"
  },
  {
    "path": "Compression/FileHandling/BlockHeader.cs",
    "content": "﻿using System.IO;\r\nusing ErrorHandling.Exceptions;\r\n\r\nnamespace Compression.FileHandling\r\n{\r\n    public sealed class BlockHeader\r\n    {\r\n        private readonly byte[] _header;\r\n\r\n        public const int HeaderSize = 12;\r\n        private const int HeaderId  = -822411574; // cafeface\r\n\r\n        public int NumUncompressedBytes;\r\n        public int NumCompressedBytes;\r\n\r\n        public bool IsEmpty => NumUncompressedBytes == -1 && NumCompressedBytes == -1;\r\n\r\n        public BlockHeader() => _header = new byte[HeaderSize];\r\n\r\n        private int GetInt(int offset) => _header[offset] | _header[offset + 1] << 8 | _header[offset + 2] << 16 |\r\n                                          _header[offset + 3] << 24;\r\n\r\n        public void Read(Stream stream)\r\n        {\r\n            int numBytesRead = stream.Read(_header, 0, HeaderSize);\r\n\r\n            if (numBytesRead == 0)\r\n            {\r\n                NumUncompressedBytes = -1;\r\n                NumCompressedBytes   = -1;\r\n                return;\r\n            }\r\n\r\n            if (numBytesRead != HeaderSize) throw new IOException($\"Expected {HeaderSize} bytes from the block header, but received only {numBytesRead} bytes.\");\r\n\r\n            int headerId = GetInt(0);\r\n            if (headerId != HeaderId) throw new CompressionException($\"Expected the header ID ({HeaderId}), but found the following: {headerId}\");\r\n\r\n            NumUncompressedBytes = GetInt(4);\r\n            NumCompressedBytes   = GetInt(8);\r\n        }\r\n\r\n        private void SetInt(int value, int offset)\r\n        {\r\n            _header[offset]     = (byte)value;\r\n            _header[offset + 1] = (byte)(value >> 8);\r\n            _header[offset + 2] = (byte)(value >> 16);\r\n            _header[offset + 3] = (byte)(value >> 24);\r\n        }\r\n\r\n        public void Write(Stream stream)\r\n        {\r\n            SetInt(HeaderId, 0);\r\n            SetInt(NumUncompressedBytes, 4);\r\n            SetInt(NumCompressedBytes, 8);\r\n            stream.Write(_header, 0, HeaderSize);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "Compression/FileHandling/BlockStream.cs",
    "content": "﻿using System;\r\nusing System.IO;\r\nusing System.IO.Compression;\r\nusing System.Text;\r\nusing Compression.Algorithms;\r\nusing Compression.DataStructures;\r\nusing ErrorHandling.Exceptions;\r\n\r\nnamespace Compression.FileHandling\r\n{\r\n    public sealed class BlockStream : Stream\r\n    {\r\n        private readonly bool _isCompressor;\r\n        private readonly bool _leaveStreamOpen;\r\n\r\n        private Stream _stream;\r\n        private BinaryWriter _writer;\r\n        private Action<BinaryWriter> _headerWrite;\r\n\r\n        private readonly Block _block;\r\n        private bool _foundEof;\r\n        private bool _isDisposed;\r\n\r\n        #region Stream\r\n\r\n        public override bool CanRead                              => _stream.CanRead;\r\n        public override bool CanWrite                             => _stream.CanWrite;\r\n        public override bool CanSeek                              => _stream.CanSeek;\r\n        public override long Length                               => throw new NotSupportedException();\r\n        public override long Seek(long offset, SeekOrigin origin) => throw new NotSupportedException();\r\n        public override void SetLength(long value)                => throw new NotSupportedException();\r\n\r\n        public override long Position\r\n        {\r\n            get => _stream.Position;\r\n            set => throw new NotSupportedException();\r\n        }\r\n\r\n        public override void Flush()\r\n        {\r\n            if (_block.Offset > 0) _block.Write(_stream);\r\n        }\r\n\r\n        protected override void Dispose(bool disposing)\r\n        {\r\n            if (_isDisposed) return;\r\n\r\n            try\r\n            {\r\n                if (_isCompressor)\r\n                {\r\n                    Flush();\r\n                    _block.WriteEof(_stream);\r\n\r\n                    // update the header\r\n                    if (_headerWrite != null)\r\n                    {\r\n                        _stream.Position = 0;\r\n                        _headerWrite(_writer);\r\n                    }\r\n\r\n                    _writer.Dispose();\r\n                    _writer = null;\r\n                }\r\n\r\n                if (!_leaveStreamOpen)\r\n                {\r\n                    _stream.Dispose();\r\n                    _stream = null;\r\n                }\r\n\r\n                _isDisposed = true;\r\n            }\r\n            finally\r\n            {\r\n                base.Dispose(disposing);\r\n            }\r\n        }\r\n\r\n        #endregion\r\n\r\n        public BlockStream(ICompressionAlgorithm compressionAlgorithm, Stream stream, CompressionMode compressionMode,\r\n            bool leaveStreamOpen = false, int size = 16777216)\r\n        {\r\n            _stream          = stream ?? throw new ArgumentNullException(nameof(stream));\r\n            _isCompressor    = compressionMode == CompressionMode.Compress;\r\n            _leaveStreamOpen = leaveStreamOpen;\r\n            _block           = new Block(compressionAlgorithm, size);\r\n\r\n            // sanity check: make sure we can use the stream for reading or writing\r\n            if (_isCompressor && !_stream.CanWrite) throw new ArgumentException(\"A stream lacking write capability was provided to the block GZip compressor.\");\r\n            if (!_isCompressor && !_stream.CanRead) throw new ArgumentException(\"A stream lacking read capability was provided to the block GZip decompressor.\");\r\n\r\n            if (_isCompressor) _writer = new BinaryWriter(_stream, Encoding.UTF8, true);\r\n        }\r\n\r\n        public void WriteHeader(Action<BinaryWriter> headerWrite)\r\n        {\r\n            _headerWrite = headerWrite;\r\n            _headerWrite(_writer);\r\n        }\r\n\r\n        public override int Read(byte[] buffer, int offset, int count)\r\n        {\r\n            if (_foundEof) return 0;\r\n            if (_isCompressor) throw new CompressionException(\"Tried to read data from a compression BlockGZipStream.\");\r\n\r\n            ValidateParameters(buffer, offset, count);\r\n\r\n            var numBytesRead = 0;\r\n            int dataOffset   = offset;\r\n\r\n            while (numBytesRead < count)\r\n            {\r\n                if (!_block.HasMoreData)\r\n                {\r\n                    int numBytes = _block.Read(_stream);\r\n\r\n                    if (numBytes == -1)\r\n                    {\r\n                        _foundEof = true;\r\n                        return numBytesRead;\r\n                    }\r\n                }\r\n\r\n                int copyLength = _block.CopyFrom(buffer, dataOffset, count - numBytesRead);\r\n\r\n                dataOffset   += copyLength;\r\n                numBytesRead += copyLength;\r\n            }\r\n\r\n            return numBytesRead;\r\n        }\r\n\r\n        private void ValidateParameters(byte[] array, int offset, int count)\r\n        {\r\n            if (array == null)                 throw new ArgumentNullException(nameof(array));\r\n            if (offset < 0)                    throw new ArgumentOutOfRangeException(nameof(offset));\r\n            if (count < 0)                     throw new ArgumentOutOfRangeException(nameof(count));\r\n            if (array.Length - offset < count) throw new ArgumentException(\"Invalid Argument Offset Count\");\r\n        }\r\n\r\n        public override void Write(byte[] buffer, int offset, int count)\r\n        {\r\n            if (!_isCompressor) throw new CompressionException(\"Tried to write data to a decompression BlockGZipStream.\");\r\n\r\n            ValidateParameters(buffer, offset, count);\r\n\r\n            var numBytesWritten = 0;\r\n            int dataOffset      = offset;\r\n\r\n            while (numBytesWritten < count)\r\n            {\r\n                int copyLength = _block.CopyTo(buffer, dataOffset, count - numBytesWritten);\r\n                dataOffset      += copyLength;\r\n                numBytesWritten += copyLength;\r\n                if (_block.IsFull) _block.Write(_stream);\r\n            }\r\n        }\r\n\r\n        public (long FileOffset, int InternalOffset) GetBlockPosition() => (_stream.Position, _block.Offset);\r\n\r\n        public void SetBlockPosition(long fileOffset, int internalOffset = 0)\r\n        {\r\n            if (fileOffset != _block.FileOffset)\r\n            {\r\n                _stream.Position = fileOffset;\r\n                _block.Read(_stream);\r\n            }\r\n\r\n            _foundEof     = false;\r\n            _block.Offset = internalOffset;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "Compression/Utilities/BlockExtensions.cs",
    "content": "﻿using System;\nusing System.Buffers;\nusing System.IO;\nusing Compression.Algorithms;\n\nnamespace Compression.Utilities\n{\n    public static class BlockExtensions\n    {\n        private static readonly Zstandard Zstd = new(21);\n\n        public static byte[] ReadCompressedByteArray(this BinaryReader reader, ArrayPool<byte> bytePool)\n        {\n            int uncompressedSize = reader.ReadInt32();\n            int compressedSize   = reader.ReadInt32();\n\n            byte[] compressedBuffer   = bytePool.Rent(compressedSize);\n            byte[] uncompressedBuffer = bytePool.Rent(uncompressedSize);\n            reader.Read(compressedBuffer, 0, compressedSize);\n\n            Zstd.Decompress(compressedBuffer, compressedSize, uncompressedBuffer, uncompressedBuffer.Length);\n\n            bytePool.Return(compressedBuffer);\n            return uncompressedBuffer;\n        }\n\n        public static void WriteCompressedByteArray(this BinaryWriter writer, byte[] uncompressed, int uncompressedSize)\n        {\n            ArrayPool<byte> bytePool             = ArrayPool<byte>.Shared;\n            int             compressedBufferSize = Zstd.GetCompressedBufferBounds(uncompressedSize);\n            byte[]          compressedBuffer     = bytePool.Rent(compressedBufferSize);\n\n            int compressedSize = Zstd.Compress(uncompressed, uncompressedSize, compressedBuffer, compressedBuffer.Length);\n\n            writer.Write(uncompressedSize);\n            writer.Write(compressedSize);\n            writer.Write(compressedBuffer, 0, compressedSize);\n\n            double percentCompression = compressedSize / (double) uncompressedSize * 100.0;\n            Console.WriteLine($\"uncompressed: {uncompressedSize:N0}, compressed: {compressedSize:N0}, {percentCompression:0.0}%\");\n\n            bytePool.Return(compressedBuffer);\n        }\n    }\n}"
  },
  {
    "path": "Compression/Utilities/GZipUtilities.cs",
    "content": "﻿using System;\r\nusing System.IO;\r\nusing System.IO.Compression;\r\nusing System.Text;\r\nusing Compression.FileHandling;\r\nusing ErrorHandling.Exceptions;\r\nusing IO;\r\n\r\nnamespace Compression.Utilities\r\n{\r\n    public static class GZipUtilities\r\n    {\r\n        private const int NumHeaderBytes = 18;\r\n\r\n        private enum CompressionAlgorithm\r\n        {\r\n            Uncompressed,\r\n            GZip,\r\n            BlockGZip\r\n        }\r\n\r\n        public static StreamReader GetAppropriateStreamReader(string filePath) => FileUtilities.GetStreamReader(GetAppropriateReadStream(filePath));\r\n        public static StreamWriter GetStreamWriter(string filePath) => new StreamWriter(GetWriteStream(filePath));\r\n        private static Stream GetWriteStream(string filePath) => new BlockGZipStream(FileUtilities.GetCreateStream(filePath), CompressionMode.Compress);\r\n\r\n        private static Stream GetAppropriateStream(Stream stream, CompressionAlgorithm compressionAlgorithm)\r\n        {\r\n            Stream newStream;\r\n\r\n            // ReSharper disable once SwitchStatementMissingSomeCases\r\n            switch (compressionAlgorithm)\r\n            {\r\n                case CompressionAlgorithm.BlockGZip:\r\n                    newStream = new BlockGZipStream(stream, CompressionMode.Decompress);\r\n                    break;\r\n                case CompressionAlgorithm.GZip:\r\n                    newStream = new GZipStream(stream, CompressionMode.Decompress);\r\n                    break;\r\n                default:\r\n                    newStream = stream;\r\n                    break;\r\n            }\r\n\r\n            return newStream;\r\n        }\r\n\r\n        //todo: can have just one method for both file and http streams\r\n        //used in custom annotation lambda\r\n        public static Stream GetAppropriateStream(Stream stream)\r\n        {\r\n            byte[] header = GetHeader(stream);\r\n            var compressionAlgorithm = IdentifyCompressionAlgorithm(header);\r\n            stream.Position = 0;\r\n            var appropriateStream = GetAppropriateStream(stream, compressionAlgorithm);\r\n            return appropriateStream;\r\n        }\r\n\r\n        public static Stream GetAppropriateReadStream(string filePath)\r\n        {\r\n            CompressionAlgorithm compressionAlgorithm;\r\n\r\n            using (var headerStream = PersistentStreamUtils.GetReadStream(filePath))\r\n            {\r\n                byte[] header        = GetHeader(headerStream);\r\n                compressionAlgorithm = IdentifyCompressionAlgorithm(header);\r\n            }\r\n            \r\n            var fileStream = PersistentStreamUtils.GetReadStream(filePath);\r\n            return GetAppropriateStream(fileStream, compressionAlgorithm);\r\n        }\r\n\r\n        private static byte[] GetHeader(Stream stream)\r\n        {\r\n            byte[] header = null;\r\n\r\n            try\r\n            {\r\n                using (var reader = new ExtendedBinaryReader(stream, Encoding.UTF8, true))\r\n                {\r\n                    header = reader.ReadBytes(NumHeaderBytes);\r\n                }\r\n            }\r\n            catch (Exception e)\r\n            {\r\n                if (e.Message.Contains(\"because it is being used by another process.\"))\r\n                {\r\n                    throw new ProcessLockedFileException(e.Message);\r\n                }\r\n            }\r\n\r\n            return header;\r\n        }\r\n\r\n        // ReSharper disable once SuggestBaseTypeForParameter\r\n        private static CompressionAlgorithm IdentifyCompressionAlgorithm(byte[] header)\r\n        {\r\n            var result = CompressionAlgorithm.Uncompressed;\r\n            if (header == null || header.Length != NumHeaderBytes) return result;\r\n\r\n            // check if this is a gzip file\r\n            if (header[0] != 31 || header[1] != 139 || header[2] != 8) return result;\r\n            result = CompressionAlgorithm.GZip;\r\n\r\n            // check if this is a block GZip file\r\n            if ((header[3] & 4) != 0 && header[12] == 66 && header[13] == 67) result = CompressionAlgorithm.BlockGZip;\r\n\r\n            return result;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "Compression/Utilities/LibraryUtilities.cs",
    "content": "﻿using System;\nusing System.IO;\nusing System.Runtime.InteropServices;\nusing ErrorHandling.Exceptions;\n\nnamespace Compression.Utilities\n{\n    public static class LibraryUtilities\n    {\n        public static void CheckLibrary()\n        {\n            const int expectedLibraryId = -822411574; // cafeface\n\n            // check to see if we have our compression library\n            try\n            {\n                int observedLibraryId = SafeNativeMethods.get_library_id();\n                if (observedLibraryId != expectedLibraryId) throw new InvalidDataException(\"Received an incorrect library ID when validating the Block Compression library.\");\n            }\n            catch (Exception)\n            {\n                throw new MissingCompressionLibraryException(\"BlockCompression\");\n            }\n        }\n\n        private static class SafeNativeMethods\n        {\n            [DllImport(\"BlockCompression\", CallingConvention = CallingConvention.Cdecl)]\n            public static extern int get_library_id();\n        }\n    }\n}\n"
  },
  {
    "path": "CreateLambdaZips.sh",
    "content": "#!/usr/bin/env bash\n\nLAMBDA_DIRS=(AnnotationLambda CustomAnnotationLambda GeneAnnotationLambda NirvanaLambda SingleAnnotationLambda)\nOUTPUT_DIR=bin/Release/netcoreapp2.1\nARTIFACT_S3_DIR=${ARTIFACT_S3_DIR:=develop}\nS3_PREFIX=s3://nirvana-cloudformation/$ARTIFACT_S3_DIR\n\n# install Amazon.Lambda.Tools if it's not already there\ndotnet tool list -g | grep dotnet-lambda &> /dev/null\n\nif [ $? -ne 0 ]; then\n\tdotnet tool install -g Amazon.Lambda.Tools\nfi\n\n# get the script's directory\nTOP_DIR=\"$( cd \"$( dirname \"${BASH_SOURCE[0]}\" )\" >/dev/null 2>&1 && pwd )\"\n\n# get the version\nVERSION=$(git describe --long | cut -c 2-)\n\n# some fancy formatting\nfunction Header ()\n{\n\techo -e \"\\n\\e[91m\\e[1m${1}\\e[0m\"\n}\n\n# silence pushd and popd\npushd () {\n\tcommand pushd \"$@\" > /dev/null\n}\n\npopd () {\n\tcommand popd \"$@\" > /dev/null\n}\n\n# create the zip files\nfor LAMBDA_DIR in \"${LAMBDA_DIRS[@]}\"\ndo\n\tLAMBDA_PATH=$TOP_DIR/$LAMBDA_DIR\n\tpushd $LAMBDA_PATH\n\t\n\t# create the zip file\n\tdotnet lambda package //p:Version=$VERSION -c Release\n\t\n\t# upload the file to S3\n\tHeader \"Uploading ${LAMBDA_DIR}:\"\n\tZIP_PATH=${LAMBDA_PATH}/${OUTPUT_DIR}/${LAMBDA_DIR}.zip\n\taws s3 cp $ZIP_PATH ${S3_PREFIX}/${LAMBDA_DIR}-${VERSION}.zip\n\t\n\tpopd\ndone\n\nHeader \"All zip files have been uploaded to ${S3_PREFIX}\""
  },
  {
    "path": "CustomAnnotationLambda/CustomAnnotationLambda.cs",
    "content": "using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Text;\r\nusing Amazon.Lambda.Core;\r\nusing Cloud;\r\nusing Cloud.Messages;\r\nusing Cloud.Messages.Custom;\r\nusing Cloud.Notifications;\r\nusing Cloud.Utilities;\r\nusing CommandLine.Utilities;\r\nusing ErrorHandling;\r\nusing ErrorHandling.Exceptions;\r\nusing IO;\r\nusing SAUtils.Custom;\r\n\r\n[assembly: LambdaSerializer(typeof(Amazon.Lambda.Serialization.Json.JsonSerializer))]\r\n\r\nnamespace CustomAnnotationLambda\r\n{\r\n    // ReSharper disable once ClassNeverInstantiated.Global\r\n    public sealed class CustomAnnotationLambda\r\n    {\r\n        \r\n        // ReSharper disable once UnusedMember.Global\r\n        public CustomResult Run(CustomConfig config, ILambdaContext context)\r\n        {\r\n            var result = new CustomResult { id = config.id };\r\n            string snsTopicArn = null;\r\n            var runLog = new StringBuilder();\r\n\r\n            try\r\n            {\r\n                LogUtilities.UpdateLogger(context.Logger, runLog);\r\n                LogUtilities.LogLambdaInfo(context, CommandLineUtilities.InformationalVersion);\r\n                LogUtilities.LogObject(\"Config\", config);\r\n                LogUtilities.Log(new[] { LambdaUrlHelper.UrlBaseEnvironmentVariableName, LambdaUtilities.SnsTopicKey });\r\n\r\n                LambdaUtilities.GarbageCollect();\r\n                LambdaUtilities.DeleteTempOutput();\r\n\r\n                snsTopicArn = LambdaUtilities.GetEnvironmentVariable(LambdaUtilities.SnsTopicKey);\r\n\r\n                config.CheckRequiredFieldsNotNull();\r\n                var s3Client = config.outputDir.GetS3Client(context.RemainingTime);\r\n                config.CheckResourcesExist();\r\n\r\n                LambdaUtilities.DeleteTempOutput();\r\n\r\n                string inputFileName = config.tsvUrl.TrimEndFromFirst(\"?\").TrimStartToLast(\"/\");\r\n                Logger.WriteLine($\"input file name is: {inputFileName}\");\r\n\r\n                return IsGeneAnnotationTsv(config.tsvUrl) \r\n                    ? GeneAnnotationCreator.Create(config, inputFileName, result, s3Client) \r\n                    : VariantAnnotationCreator.Create(config, inputFileName, result, s3Client);\r\n            }\r\n            catch (Exception e)\r\n            {\r\n                result.jwtFields    = config.jwtFields;\r\n                result.variantCount = 0;\r\n                return HandleException(runLog, result, e, snsTopicArn);\r\n            }\r\n        }\r\n\r\n        private static bool IsGeneAnnotationTsv(string tsvUrl)\r\n        {\r\n            using (var customTsvStream = (PersistentStream) PersistentStreamUtils.GetReadStream(tsvUrl))\r\n            using (var reader = new StreamReader(customTsvStream))\r\n            {\r\n                reader.ReadLine();\r\n                string secondLine = reader.ReadLine();\r\n                if (secondLine == null) throw new UserErrorException(\"The input TSV file has less than two lines\");\r\n\r\n                return secondLine.StartsWith(\"#geneSymbol\");\r\n            }\r\n        }\r\n\r\n        public static CustomResult GetSuccessResult(CustomConfig customSaConfig, CustomResult result, List<string> outputFiles)\r\n        {\r\n            Logger.WriteLine(\"All files uploaded.\");\r\n\r\n            result.created = new FileList\r\n            {\r\n                bucketName = customSaConfig.outputDir.bucketName,\r\n                outputDir = customSaConfig.outputDir.path,\r\n                files = outputFiles.ToArray()\r\n            };\r\n\r\n            result.status = LambdaUtilities.SuccessMessage;\r\n\r\n            LogUtilities.LogObject(\"Result\", result);\r\n            LambdaUtilities.DeleteTempOutput();\r\n\r\n            return result;\r\n        }\r\n\r\n        private static CustomResult HandleException(StringBuilder runLog, CustomResult result, Exception e, string snsTopicArn)\r\n        {\r\n            Logger.Log(e);\r\n\r\n            var errorCategory = ExceptionUtilities.ExceptionToErrorCategory(e);\r\n\r\n            result.status = $\"{errorCategory}: {e.Message}\";\r\n            result.noValidEntries = e.Message.Contains(GeneAnnotationsParser.NoValidEntriesErrorMessage);\r\n\r\n            if (errorCategory != ErrorCategory.UserError)\r\n            {\r\n                string snsMessage = SNS.CreateMessage(runLog.ToString(), result.status, e.StackTrace);\r\n                SNS.SendMessage(snsTopicArn, snsMessage);\r\n            }\r\n\r\n            LogUtilities.LogObject(\"Result\", result);\r\n            LambdaUtilities.DeleteTempOutput();\r\n\r\n            return result;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CustomAnnotationLambda/CustomAnnotationLambda.csproj",
    "content": "﻿<Project Sdk=\"Microsoft.NET.Sdk\">\n  <PropertyGroup>\n    <TargetFramework>net6.0</TargetFramework>\n    <GenerateRuntimeConfigurationFiles>true</GenerateRuntimeConfigurationFiles>\n    <AWSProjectType>Lambda</AWSProjectType>\n    <OutputPath>bin\\$(Configuration)</OutputPath>\n    \n  </PropertyGroup>\n  <ItemGroup>\n    <PackageReference Include=\"Amazon.Lambda.Core\" Version=\"2.1.0\" />\n    <PackageReference Include=\"Amazon.Lambda.Serialization.Json\" Version=\"2.0.0\" />\n    <PackageReference Include=\"AWSSDK.SimpleNotificationService\" Version=\"3.7.3.31\" />\n  </ItemGroup>\n  <ItemGroup>\n    <ProjectReference Include=\"..\\Cloud\\Cloud.csproj\" />\n    <ProjectReference Include=\"..\\SAUtils\\SAUtils.csproj\" />\n  </ItemGroup>\n  <Import Project=\"..\\CommonAssemblyInfo.props\" />\n</Project>\n"
  },
  {
    "path": "CustomAnnotationLambda/CustomConfigExtensions.cs",
    "content": "﻿using Cloud;\r\nusing Cloud.Messages.Custom;\r\nusing ErrorHandling.Exceptions;\r\nusing IO;\r\n\r\nnamespace CustomAnnotationLambda\r\n{\r\n    public static class CustomConfigExtensions\r\n    {\r\n        public static void CheckRequiredFieldsNotNull(this CustomConfig config)\r\n        {\r\n            static string BuildErrorMessage(string message) => message + \" cannot be null.\";\r\n\r\n            if (config.id                     == null) throw new UserErrorException(BuildErrorMessage(\"id\"));\r\n            if (config.tsvUrl                 == null) throw new UserErrorException(BuildErrorMessage(\"tsvUrl\"));\r\n            if (config.outputDir              == null) throw new UserErrorException(BuildErrorMessage(\"outputDir\"));\r\n            if (config.outputDir.bucketName   == null) throw new UserErrorException(BuildErrorMessage(\"bucketName of outputDir\"));\r\n            if (config.outputDir.path         == null) throw new UserErrorException(BuildErrorMessage(\"path of outputDir\"));\r\n            if (config.outputDir.region       == null) throw new UserErrorException(BuildErrorMessage(\"region of outputDir\"));\r\n            if (config.outputDir.accessKey    == null) throw new UserErrorException(BuildErrorMessage(\"accessKey of outputDir\"));\r\n            if (config.outputDir.secretKey    == null) throw new UserErrorException(BuildErrorMessage(\"secretKey of outputDir\"));\r\n            if (config.outputDir.sessionToken == null) throw new UserErrorException(BuildErrorMessage(\"sessionToken of outputDir\"));\r\n        }\r\n\r\n        public static void CheckResourcesExist(this CustomConfig config)\r\n        {\r\n            HttpUtilities.ValidateUrl(config.tsvUrl);\r\n            HttpUtilities.ValidateUrl(LambdaUrlHelper.GetUgaUrl(), false);\r\n            config.outputDir.Validate(true);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "CustomAnnotationLambda/GeneAnnotationCreator.cs",
    "content": "using System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Security.Cryptography;\r\nusing Cloud;\r\nusing Cloud.Messages.Custom;\r\nusing Cloud.Utilities;\r\nusing Compression.Utilities;\r\nusing ErrorHandling.Exceptions;\r\nusing IO;\r\nusing SAUtils.Custom;\r\nusing SAUtils.GeneIdentifiers;\r\nusing VariantAnnotation.SA;\r\n\r\nnamespace CustomAnnotationLambda\r\n{\r\n    public static class GeneAnnotationCreator\r\n    {\r\n        private const string LogFileName = \"unrecognizedGeneIds.txt\";\r\n        public static CustomResult Create(CustomConfig config, string inputFileName, CustomResult result, IS3Client s3Client)\r\n        {\r\n            string inputBaseName   = inputFileName.TrimEndFromFirst(\".tsv\");\r\n            string ngaFileName     = inputBaseName + SaCommon.GeneFileSuffix;\r\n            string localNgaPath    = Path.Combine(Path.GetTempPath(), ngaFileName);\r\n            string localSchemaPath = localNgaPath + SaCommon.JsonSchemaSuffix;\r\n            string localLogPath    = Path.Combine(Path.GetTempPath(), LogFileName);\r\n\r\n            int variantCount = 0;\r\n            \r\n            HttpUtilities.ValidateUrl(LambdaUrlHelper.GetUgaUrl());\r\n            var outputFiles = new List<string>();\r\n            using (var aes = new AesCryptoServiceProvider())\r\n            {\r\n                FileMetadata ngaMetadata, schemaMetadata, logMetaData;\r\n                using (var logStream = FileUtilities.GetCreateStream(localLogPath))\r\n                using (var logCryptoStream = new CryptoStream(logStream, aes.CreateEncryptor(), CryptoStreamMode.Write))\r\n                using (var logMd5Stream = new MD5Stream(logCryptoStream))\r\n                //\r\n                using (var customTsvStream = (PersistentStream)PersistentStreamUtils.GetReadStream(config.tsvUrl))\r\n                using (var parser = GetGeneAnnotationsParserFromCustomTsvStream(customTsvStream))\r\n                //\r\n                using (var ngaStream = FileUtilities.GetCreateStream(localNgaPath))\r\n                using (var ngaCryptoStream = new CryptoStream(ngaStream, aes.CreateEncryptor(), CryptoStreamMode.Write))\r\n                using (var ngaMd5Stream = new MD5Stream(ngaCryptoStream))\r\n                //\r\n                using (var schemaStream = FileUtilities.GetCreateStream(localSchemaPath))\r\n                using (var schemaCryptoStream = new CryptoStream(schemaStream, aes.CreateEncryptor(), CryptoStreamMode.Write))\r\n                using (var schemaMd5Stream = new MD5Stream(schemaCryptoStream))\r\n                {\r\n                    using (var ngaWriter    = CaUtilities.GetNgaWriter(ngaMd5Stream, parser, inputFileName))\r\n                    using (var schemaWriter = new StreamWriter(schemaMd5Stream))\r\n                    using (var logWriter    = new StreamWriter(logMd5Stream))\r\n                    {\r\n                        variantCount = ngaWriter.Write(parser.GetItems(config.skipGeneIdValidation, logWriter));\r\n                        var unknownGenes = parser.GetUnknownGenes();\r\n                        if (!config.skipGeneIdValidation && unknownGenes.Count > 0)\r\n                        {\r\n                            throw new UserErrorException($\"{GeneAnnotationsParser.UnknownGeneIdsErrorMessage} {string.Join(',', unknownGenes)}\");\r\n                        }\r\n\r\n                        schemaWriter.Write(parser.JsonSchema);\r\n                    }\r\n                    \r\n                    //all the writers have to be disposed before GetFileMetaData is called\r\n                    ngaMetadata = ngaMd5Stream.GetFileMetadata();\r\n                    schemaMetadata = schemaMd5Stream.GetFileMetadata();\r\n                    logMetaData = logMd5Stream.GetFileMetadata();\r\n                }\r\n\r\n                if (config.skipGeneIdValidation)\r\n                {\r\n                    string logS3Key = string.Join('/', config.outputDir.path.Trim('/'), LogFileName);\r\n                    Logger.WriteLine(\"uploading log file to \" + logS3Key);\r\n                    s3Client.DecryptUpload(config.outputDir.bucketName, logS3Key, localLogPath, aes, logMetaData);\r\n                }\r\n\r\n                string nsaS3Path = string.Join('/', config.outputDir.path.Trim('/'), ngaFileName);\r\n                string schemaS3Path = nsaS3Path + SaCommon.JsonSchemaSuffix;\r\n\r\n                s3Client.DecryptUpload(config.outputDir.bucketName, nsaS3Path, localNgaPath, aes, ngaMetadata);\r\n                s3Client.DecryptUpload(config.outputDir.bucketName, schemaS3Path, localSchemaPath, aes, schemaMetadata);\r\n\r\n                \r\n                outputFiles.Add(ngaFileName);\r\n                outputFiles.Add(ngaFileName + SaCommon.JsonSchemaSuffix);\r\n\r\n                LambdaUtilities.DeleteTempOutput();\r\n\r\n                result.jwtFields = config.jwtFields;\r\n\r\n                result.variantCount    = variantCount;\r\n                return CustomAnnotationLambda.GetSuccessResult(config, result, outputFiles);\r\n            }\r\n        }\r\n\r\n        private static GeneAnnotationsParser GetGeneAnnotationsParserFromCustomTsvStream(PersistentStream customTsvStream)\r\n        {\r\n            var (entrezGeneIdToSymbol, ensemblGeneIdToSymbol) = GeneUtilities.ParseUniversalGeneArchive(null, LambdaUrlHelper.GetUgaUrl());\r\n            return GeneAnnotationsParser.Create(new StreamReader(GZipUtilities.GetAppropriateStream(customTsvStream)), entrezGeneIdToSymbol, ensemblGeneIdToSymbol);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "CustomAnnotationLambda/VariantAnnotationCreator.cs",
    "content": "using System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Security.Cryptography;\r\nusing Cloud;\r\nusing Cloud.Messages.Custom;\r\nusing Cloud.Utilities;\r\nusing Compression.Utilities;\r\nusing Genome;\r\nusing IO;\r\nusing SAUtils.Custom;\r\nusing SAUtils.DataStructures;\r\nusing SAUtils.Schema;\r\nusing VariantAnnotation.Interface.SA;\r\nusing VariantAnnotation.Providers;\r\nusing VariantAnnotation.SA;\r\n\r\nnamespace CustomAnnotationLambda\r\n{\r\n    public static class VariantAnnotationCreator\r\n    {\r\n        public static CustomResult Create(CustomConfig config, string inputFileName, CustomResult result, IS3Client s3Client)\r\n        {\r\n            string tempPath        = Path.GetTempPath();\r\n            string inputBaseName   = inputFileName.TrimEndFromFirst(\".tsv\");\r\n            string nsaFileName     = inputBaseName + SaCommon.SaFileSuffix;\r\n            string localNsaPath    = Path.Combine(tempPath, nsaFileName);\r\n            string localIndexPath  = localNsaPath + SaCommon.IndexSuffix;\r\n            string localSchemaPath = localNsaPath + SaCommon.JsonSchemaSuffix;\r\n            int    variantCount    = 0;\r\n\r\n            var outputFiles = new List<string>();\r\n            using (var aes = new AesCryptoServiceProvider())\r\n            {\r\n                FileMetadata nsaMetadata, indexMetadata, schemaMetadata;\r\n\r\n                List<CustomInterval> intervals;\r\n                string jsonTag;\r\n                SaJsonSchema intervalJsonSchema;\r\n                DataSourceVersion version;\r\n                GenomeAssembly genomeAssembly;\r\n                int nsaItemsCount;\r\n                ReportFor reportFor;\r\n\r\n                using (var customTsvStream = (PersistentStream) PersistentStreamUtils.GetReadStream(config.tsvUrl))\r\n                using (var parser = GetVariantAnnotationsParserFromCustomTsvStream(customTsvStream))\r\n                    //\r\n                using (var nsaStream = FileUtilities.GetCreateStream(localNsaPath))\r\n                using (var nsaCryptoStream = new CryptoStream(nsaStream, aes.CreateEncryptor(), CryptoStreamMode.Write))\r\n                using (var nsaMd5Stream = new MD5Stream(nsaCryptoStream))\r\n                    //\r\n                using (var indexStream = FileUtilities.GetCreateStream(localIndexPath))\r\n                using (var indexCryptoStream = new CryptoStream(indexStream, aes.CreateEncryptor(), CryptoStreamMode.Write))\r\n                using (var indexMd5Stream = new MD5Stream(indexCryptoStream))\r\n                    //\r\n                using (var schemaStream       = FileUtilities.GetCreateStream(localSchemaPath))\r\n                using (var schemaCryptoStream = new CryptoStream(schemaStream, aes.CreateEncryptor(), CryptoStreamMode.Write))\r\n                using (var schemaMd5Stream    = new MD5Stream(schemaCryptoStream))\r\n                {\r\n                    genomeAssembly        = parser.Assembly;\r\n                    result.genomeAssembly = genomeAssembly.ToString();\r\n                    reportFor             = parser.ReportFor;\r\n                    result.jwtFields      = config.jwtFields;\r\n\r\n                    using (var nsaWriter    = CaUtilities.GetNsaWriter(nsaMd5Stream, indexMd5Stream, parser, inputFileName, parser.SequenceProvider, out version, config.skipRefBaseValidation))\r\n                    using (var schemaWriter = new StreamWriter(schemaMd5Stream))\r\n                    {\r\n                        (jsonTag, nsaItemsCount, intervalJsonSchema, intervals) = CaUtilities.WriteSmallVariants(parser, nsaWriter, schemaWriter);\r\n                    }\r\n\r\n                    variantCount += nsaItemsCount;\r\n                    variantCount += intervals?.Count ?? 0;\r\n\r\n                    nsaMetadata    = nsaMd5Stream.GetFileMetadata();\r\n                    indexMetadata  = indexMd5Stream.GetFileMetadata();\r\n                    schemaMetadata = schemaMd5Stream.GetFileMetadata();\r\n                }\r\n\r\n                result.variantCount = variantCount;\r\n                if (nsaItemsCount > 0)\r\n                {\r\n                    string nsaS3Path    = string.Join('/', config.outputDir.path.Trim('/'), nsaFileName);\r\n                    string indexS3Path  = nsaS3Path + SaCommon.IndexSuffix;\r\n                    string schemaS3Path = nsaS3Path + SaCommon.JsonSchemaSuffix;\r\n\r\n                    s3Client.DecryptUpload(config.outputDir.bucketName, nsaS3Path, localNsaPath, aes, nsaMetadata);\r\n                    s3Client.DecryptUpload(config.outputDir.bucketName, indexS3Path, localIndexPath, aes,\r\n                        indexMetadata);\r\n                    s3Client.DecryptUpload(config.outputDir.bucketName, schemaS3Path, localSchemaPath, aes,\r\n                        schemaMetadata);\r\n\r\n                    outputFiles.Add(nsaFileName);\r\n                    outputFiles.Add(nsaFileName + SaCommon.IndexSuffix);\r\n                    outputFiles.Add(nsaFileName + SaCommon.JsonSchemaSuffix);\r\n                }\r\n\r\n                if (intervals == null) return CustomAnnotationLambda.GetSuccessResult(config, result, outputFiles);\r\n\r\n                FileMetadata nsiMetadata, nsiSchemaMetadata;\r\n                string nsiFileName = inputBaseName + SaCommon.IntervalFileSuffix;\r\n                string localNsiPath = Path.Combine(tempPath, nsiFileName);\r\n                string localNsiSchemaPath = localNsiPath + SaCommon.JsonSchemaSuffix;\r\n                //\r\n                using (var nsiStream = FileUtilities.GetCreateStream(localNsiPath))\r\n                using (var nsiCryptoStream = new CryptoStream(nsiStream, aes.CreateEncryptor(), CryptoStreamMode.Write))\r\n                using (var nsiMd5Stream = new MD5Stream(nsiCryptoStream))\r\n                    //\r\n                using (var nsiSchemaSteam = FileUtilities.GetCreateStream(localNsiSchemaPath))\r\n                using (var nsiSchemaCryptoStream =\r\n                    new CryptoStream(nsiSchemaSteam, aes.CreateEncryptor(), CryptoStreamMode.Write))\r\n                using (var nsiSchemaMd5Stream = new MD5Stream(nsiSchemaCryptoStream))\r\n                {\r\n                    using (var nsiWriter = CaUtilities.GetNsiWriter(nsiMd5Stream, version, genomeAssembly, jsonTag, reportFor))\r\n                    using (var schemaWriter = new StreamWriter(nsiSchemaMd5Stream))\r\n                    {\r\n                        nsiWriter.Write(intervals);\r\n                        schemaWriter.Write(intervalJsonSchema);\r\n                    }\r\n\r\n                    nsiMetadata = nsiMd5Stream.GetFileMetadata();\r\n                    nsiSchemaMetadata = nsiSchemaMd5Stream.GetFileMetadata();\r\n                }\r\n\r\n                string nsiS3Path = string.Join('/', config.outputDir.path.Trim('/'), nsiFileName);\r\n                string nsiSchemaS3PathFile = nsiS3Path + SaCommon.JsonSchemaSuffix;\r\n\r\n                s3Client.DecryptUpload(config.outputDir.bucketName, nsiS3Path, localNsiPath, aes, nsiMetadata);\r\n                s3Client.DecryptUpload(config.outputDir.bucketName, nsiSchemaS3PathFile, localNsiSchemaPath, aes,\r\n                    nsiSchemaMetadata);\r\n\r\n                outputFiles.Add(nsiFileName);\r\n                outputFiles.Add(nsiFileName + SaCommon.JsonSchemaSuffix);\r\n            }\r\n\r\n            LambdaUtilities.DeleteTempOutput();\r\n\r\n            return CustomAnnotationLambda.GetSuccessResult(config, result, outputFiles);\r\n        }\r\n\r\n        private static VariantAnnotationsParser GetVariantAnnotationsParserFromCustomTsvStream(PersistentStream customTsvStream)\r\n        {\r\n            var parser = VariantAnnotationsParser.Create(new StreamReader(GZipUtilities.GetAppropriateStream(customTsvStream)));\r\n\r\n            parser.SequenceProvider = new ReferenceSequenceProvider(PersistentStreamUtils.GetReadStream(LambdaUrlHelper.GetRefUrl(parser.Assembly)));\r\n\r\n            return parser;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "CustomStrValidationLambda/CustomStrValidationLambda.cs",
    "content": "using System;\nusing System.IO;\nusing Amazon.Lambda.Core;\nusing Cloud;\nusing Cloud.Messages.StrValidation;\nusing Cloud.Notifications;\nusing Cloud.Utilities;\nusing CommandLine.Utilities;\nusing ErrorHandling;\nusing ErrorHandling.Exceptions;\nusing Genome;\nusing IO;\nusing Nirvana;\nusing RepeatExpansions.IO;\nusing VariantAnnotation.Interface.Providers;\n\n[assembly: LambdaSerializer(typeof(Amazon.Lambda.Serialization.Json.JsonSerializer))]\n\nnamespace CustomStrValidationLambda\n{\n    public class CustomStrValidationLambda\n    {\n        public ValidationResult Run(ValidationConfig config, ILambdaContext context)\n        {\n            string snsTopicArn = null;\n\n            try\n            {\n                LogUtilities.UpdateLogger(context.Logger, null);\n                LogUtilities.LogLambdaInfo(context, CommandLineUtilities.InformationalVersion);\n                LogUtilities.LogObject(\"Config\", config);\n                LogUtilities.Log(new[] { LambdaUrlHelper.UrlBaseEnvironmentVariableName, LambdaUtilities.SnsTopicKey });\n                LambdaUtilities.GarbageCollect();\n                snsTopicArn = LambdaUtilities.GetEnvironmentVariable(LambdaUtilities.SnsTopicKey);\n\n                config.Validate();\n                GenomeAssembly genomeAssembly = GenomeAssemblyHelper.Convert(config.genomeAssembly);\n\n                string nirvanaS3Ref = LambdaUrlHelper.GetRefUrl(genomeAssembly);\n                var refProvider = ProviderUtilities.GetSequenceProvider(nirvanaS3Ref);\n\n                using (var stream = PersistentStreamUtils.GetReadStream(config.customStrUrl))\n                    TryLoadStrFile(stream, genomeAssembly, refProvider);\n            }\n            catch (Exception exception)\n            {\n                return HandleException(config.id, exception, snsTopicArn);\n            }\n\n            return GetSuccessOutput(config.id);\n        }\n\n        private static void TryLoadStrFile(Stream stream, GenomeAssembly genomeAssembly, ISequenceProvider refProvider)\n        {\n            try\n            {\n                RepeatExpansionReader.Load(stream, genomeAssembly, refProvider.RefNameToChromosome,\n                    refProvider.RefIndexToChromosome.Count);\n            }\n            catch (Exception exception)\n            {\n                throw new UserErrorException(exception.Message);\n            }\n        }\n\n        private static ValidationResult HandleException(string id, Exception exception, string snsTopicArn)\n        {\n            Logger.Log(exception);\n\n            string snsMessage = SNS.CreateMessage(exception.Message, \"exception\", exception.StackTrace);\n            SNS.SendMessage(snsTopicArn, snsMessage);\n\n            ErrorCategory errorCategory = ExceptionUtilities.ExceptionToErrorCategory(exception);\n            var errorMessagePrefix = errorCategory == ErrorCategory.UserError ? \"User error\" : \"Nirvana error\";\n            return new ValidationResult\n            {\n                id = id,\n                status = $\"{errorMessagePrefix}: {exception.Message}\"\n            };\n        }\n\n\n        private static ValidationResult GetSuccessOutput(string id) =>\n            new ValidationResult\n            {\n                id = id,\n                status = LambdaUtilities.SuccessMessage\n            };\n    }\n}"
  },
  {
    "path": "CustomStrValidationLambda/CustomStrValidationLambda.csproj",
    "content": "﻿<Project Sdk=\"Microsoft.NET.Sdk\">\n  <PropertyGroup>\n    <TargetFramework>net6.0</TargetFramework>\n    <GenerateRuntimeConfigurationFiles>true</GenerateRuntimeConfigurationFiles>\n    <AWSProjectType>Lambda</AWSProjectType>\n  </PropertyGroup>\n  <ItemGroup>\n    <PackageReference Include=\"Amazon.Lambda.Core\" Version=\"2.1.0\" />\n    <PackageReference Include=\"Amazon.Lambda.Serialization.Json\" Version=\"2.0.0\" />\n  </ItemGroup>\n  <ItemGroup>\n    <ProjectReference Include=\"..\\Cloud\\Cloud.csproj\" />\n    <ProjectReference Include=\"..\\CommandLine\\CommandLine.csproj\" />\n    <ProjectReference Include=\"..\\ErrorHandling\\ErrorHandling.csproj\" />\n    <ProjectReference Include=\"..\\Genome\\Genome.csproj\" />\n    <ProjectReference Include=\"..\\IO\\IO.csproj\" />\n    <ProjectReference Include=\"..\\Nirvana\\Nirvana.csproj\" />\n    <ProjectReference Include=\"..\\RepeatExpansions\\RepeatExpansions.csproj\" />\n    <ProjectReference Include=\"..\\VariantAnnotation.Interface\\VariantAnnotation.Interface.csproj\" />\n  </ItemGroup>\n</Project>"
  },
  {
    "path": "Downloader/AnnotationRepository.cs",
    "content": "using System;\nusing System.Collections.Generic;\nusing System.Threading;\nusing Downloader.Utilities;\n\nnamespace Downloader\n{\n    public static class AnnotationRepository\n    {\n        public static void DownloadMetadata(IClient client, List<RemoteFile> files) =>\n            files.ParallelExecute(client.SetMetadata, Retry, \"finished\", \"download the file metadata\");\n\n        public static void DownloadFiles(IClient client, List<RemoteFile> files) =>\n            files.ParallelExecute(client.DownloadFile, Retry, \"finished\", \"download the file\");\n\n        private static void Retry(RemoteFile file, Func<RemoteFile, bool> clientFunc,\n            CancellationTokenSource tokenSource, string exceptionMessage)\n        {\n            var       numAttempts = 0;\n            const int maxAttempts = 3;\n\n            while (true)\n            {\n                numAttempts++;\n\n                if (numAttempts == maxAttempts)\n                {\n                    Console.WriteLine($\"  - Unable to {exceptionMessage} for {file.Description} after {maxAttempts} attempts.\");\n                    tokenSource.Cancel();\n                    break;\n                }\n\n                bool success = clientFunc(file);\n                if (success) break;\n            }\n        }\n    }\n}"
  },
  {
    "path": "Downloader/AssemblyInfo.cs",
    "content": "﻿using System.Runtime.CompilerServices;\n\n[assembly: InternalsVisibleTo(\"UnitTests\")]"
  },
  {
    "path": "Downloader/Client.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Net;\r\nusing System.Net.Http;\r\nusing System.Net.Http.Headers;\r\nusing Downloader.Utilities;\r\n\r\nnamespace Downloader\r\n{\r\n    public sealed class Client : IClient\r\n    {\r\n        private readonly HttpClient _httpClient;\r\n\r\n        public Client(string hostName)\r\n        {\r\n            var baseUri = new Uri($\"http://{hostName}\");\r\n\r\n            ServicePointManager.DefaultConnectionLimit                           = int.MaxValue;\r\n            ServicePointManager.FindServicePoint(baseUri).ConnectionLeaseTimeout = 60 * 1000;\r\n\r\n            _httpClient = new HttpClient { BaseAddress = baseUri };\r\n            _httpClient.DefaultRequestHeaders.Clear();\r\n            _httpClient.DefaultRequestHeaders.ConnectionClose = false;\r\n            _httpClient.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue(\"text/plain\"));\r\n            _httpClient.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue(\"application/octet-stream\"));\r\n        }\r\n\r\n        public List<string> DownloadLines(string remotePath)\r\n        {\r\n            var lines = new List<string>();\r\n\r\n            using (var response = _httpClient.GetAsync(remotePath, HttpCompletionOption.ResponseHeadersRead).AsSync())\r\n            {\r\n                var stream = response.Content.ReadAsStreamAsync().AsSync();\r\n                if (!response.IsSuccessStatusCode) return lines;\r\n\r\n                using (var reader = new StreamReader(stream))\r\n                {\r\n                    while (true)\r\n                    {\r\n                        string line = reader.ReadLineAsync().AsSync();\r\n                        if (line == null) break;\r\n\r\n                        lines.Add(line);\r\n                    }\r\n                }\r\n            }\r\n\r\n            return lines;\r\n        }\r\n\r\n        public bool SetMetadata(RemoteFile file)\r\n        {\r\n            using (var response = _httpClient.GetAsync(file.RemotePath, HttpCompletionOption.ResponseHeadersRead).AsSync())\r\n            {\r\n                if (response.StatusCode == HttpStatusCode.NotFound)\r\n                {\r\n                    Console.Write(\"  - \");\r\n                    ConsoleEmbellishments.PrintWarning(\"WARNING: \");\r\n                    Console.WriteLine($\"{file.Description} could not be found. Skipping this file.\");\r\n                    file.Missing = true;\r\n                    file.Skipped = true;\r\n                    return true;\r\n                }\r\n\r\n                if (!response.IsSuccessStatusCode) return false;\r\n\r\n                long? contentLength = response.Content.Headers.ContentLength;\r\n                if (contentLength.HasValue) file.FileSize = contentLength.Value;\r\n\r\n                DateTimeOffset? lastModified = response.Content.Headers.LastModified;\r\n                if (lastModified.HasValue) file.LastModified = lastModified.Value;\r\n            }\r\n\r\n            return true;\r\n        }\r\n\r\n        public bool DownloadFile(RemoteFile file)\r\n        {\r\n            using (var response = _httpClient.GetAsync(file.RemotePath, HttpCompletionOption.ResponseHeadersRead).AsSync())\r\n            {\r\n                if (!response.IsSuccessStatusCode) return false;\r\n\r\n                Console.WriteLine($\"  - downloading {file.Description}\");\r\n\r\n                var stream   = response.Content.ReadAsStreamAsync().ConfigureAwait(false).GetAwaiter().GetResult();\r\n                var fileInfo = new FileInfo(file.LocalPath);\r\n                using (var fileStream = fileInfo.OpenWrite()) stream.CopyTo(fileStream);\r\n            }\r\n\r\n            return true;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "Downloader/Configuration.cs",
    "content": "﻿using System;\r\nusing System.IO;\r\nusing Cloud;\r\nusing Cloud.Utilities;\r\nusing Genome;\r\nusing Microsoft.Extensions.Configuration;\r\n\r\nnamespace Downloader\r\n{\r\n    public static class Configuration\r\n    {\r\n        public static (string HostName, string CacheDir, string ReferencesDir, string ManifestGRCh37, string ManifestGRCh38) Load(\r\n            string hostName, string manifestPrefix)\r\n        {\r\n            const string appSettingsFilename = \"Downloader.appsettings.json\";\r\n\r\n            IConfigurationRoot config = new ConfigurationBuilder()\r\n                .AddJsonFile(appSettingsFilename)\r\n                .Build();\r\n\r\n            IConfigurationSection dataSource = config.GetSection(\"DataSource\");\r\n\r\n            if (string.IsNullOrEmpty(hostName))\r\n            {\r\n                hostName = dataSource[\"HostName\"];\r\n                if (string.IsNullOrEmpty(hostName))\r\n                    throw new InvalidDataException($\"Could not find the HostName entry in the {appSettingsFilename} file.\");\r\n                // this env variable will over-ride the configuration in cloud\r\n                Environment.SetEnvironmentVariable(LambdaUrlHelper.UrlBaseEnvironmentVariableName, $\"http://{hostName}/\");\r\n            }\r\n\r\n            var    cloudConfiguration = new Cloud.Configuration();\r\n            string cacheDir           = cloudConfiguration.CacheDirectory;\r\n            if (string.IsNullOrEmpty(cacheDir))\r\n                throw new InvalidDataException($\"Could not find the CacheDirectory entry in the Cloud.appsettings.json file.\");\r\n\r\n            string referencesDir = cloudConfiguration.ReferencesDirectory;\r\n            if (string.IsNullOrEmpty(referencesDir))\r\n                throw new InvalidDataException($\"Could not find the ReferencesDirectory entry in the Cloud.appsettings.json file.\");\r\n\r\n            string manifestGRCh37 ;\r\n            string manifestGRCh38 ;\r\n            \r\n            if (string.IsNullOrEmpty(manifestPrefix))\r\n            {\r\n                manifestGRCh37 = LambdaUtilities.GetManifestUrl(dataSource[\"ManifestGRCh37\"], GenomeAssembly.GRCh37);\r\n                if (string.IsNullOrEmpty(manifestGRCh37))\r\n                    throw new InvalidDataException($\"Could not find the ManifestGRCh37 entry in the {appSettingsFilename} file.\");\r\n\r\n                manifestGRCh38 = LambdaUtilities.GetManifestUrl(dataSource[\"ManifestGRCh38\"], GenomeAssembly.GRCh38);\r\n                if (string.IsNullOrEmpty(manifestGRCh38))\r\n                    throw new InvalidDataException($\"Could not find the ManifestGRCh38 entry in the {appSettingsFilename} file.\");\r\n            }\r\n            else\r\n            {\r\n                manifestGRCh37 = LambdaUtilities.GetManifestUrl($\"{manifestPrefix}\", GenomeAssembly.GRCh37);\r\n                manifestGRCh38 = LambdaUtilities.GetManifestUrl($\"{manifestPrefix}\", GenomeAssembly.GRCh38);\r\n            }\r\n\r\n            return (hostName, '/' + cacheDir, '/' + referencesDir, manifestGRCh37, manifestGRCh38);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "Downloader/Downloader.appsettings.json",
    "content": "{\r\n  \"DataSource\": {\r\n    \"HostName\": \"annotations.nirvana.illumina.com\",\r\n    \"ManifestGRCh37\": \"latest\",\r\n    \"ManifestGRCh38\": \"latest\"\r\n  }\r\n}"
  },
  {
    "path": "Downloader/Downloader.csproj",
    "content": "﻿<Project Sdk=\"Microsoft.NET.Sdk\">\r\n  <PropertyGroup>\r\n    <OutputType>Exe</OutputType>\r\n    <TargetFramework>net6.0</TargetFramework>\r\n    <OutputPath>..\\bin\\$(Configuration)</OutputPath>\r\n    \r\n  </PropertyGroup>\r\n  <Import Project=\"..\\CommonAssemblyInfo.props\" />\r\n  <ItemGroup>\r\n    <PackageReference Include=\"Microsoft.Extensions.Configuration\" Version=\"6.0.0\" />\r\n    <PackageReference Include=\"Microsoft.Extensions.Configuration.FileExtensions\" Version=\"6.0.0\" />\r\n    <PackageReference Include=\"Microsoft.Extensions.Configuration.Json\" Version=\"6.0.0\" />\r\n  </ItemGroup>\r\n  <ItemGroup>\r\n    <ProjectReference Include=\"..\\Cloud\\Cloud.csproj\" />\r\n    <ProjectReference Include=\"..\\CommandLine\\CommandLine.csproj\" />\r\n    <ProjectReference Include=\"..\\ErrorHandling\\ErrorHandling.csproj\" />\r\n    <ProjectReference Include=\"..\\ReferenceSequence\\ReferenceSequence.csproj\" />\r\n    <ProjectReference Include=\"..\\VariantAnnotation.Interface\\VariantAnnotation.Interface.csproj\" />\r\n  </ItemGroup>\r\n  <ItemGroup>\r\n    <None Update=\"Downloader.appsettings.json\">\r\n      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>\r\n    </None>\r\n  </ItemGroup>\r\n</Project>"
  },
  {
    "path": "Downloader/DownloaderMain.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.Linq;\r\nusing CommandLine.Builders;\r\nusing CommandLine.NDesk.Options;\r\nusing CommandLine.Utilities;\r\nusing Downloader.FileExtensions;\r\nusing Downloader.Utilities;\r\nusing ErrorHandling;\r\nusing Genome;\r\nusing VariantAnnotation.Interface;\r\nusing GenomeAssemblyHelper = Downloader.Utilities.GenomeAssemblyHelper;\r\n\r\nnamespace Downloader\r\n{\r\n    public static class DownloaderMain\r\n    {\r\n        private static string _genomeAssembly;\r\n        private static string _outputDirectory;\r\n        private static string _hostName;\r\n        private static string _manifestPrefix;\r\n\r\n        private static ExitCodes ProgramExecution()\r\n        {\r\n            (string hostName, string remoteCacheDir, string remoteReferencesDir, string manifestGRCh37,\r\n                string manifestGRCh38) = Configuration.Load(_hostName, _manifestPrefix);\r\n\r\n            List<GenomeAssembly> genomeAssemblies = GenomeAssemblyHelper.GetGenomeAssemblies(_genomeAssembly);\r\n\r\n            var client = new Client(hostName);\r\n            \r\n            Console.Write(\"- downloading manifest... \");\r\n            \r\n            Dictionary<GenomeAssembly, List<string>> remotePathsByGenomeAssembly =\r\n                Manifest.GetRemotePaths(client, genomeAssemblies, manifestGRCh37, manifestGRCh38);\r\n\r\n            (string cacheDir, string referencesDir, string saDir, List<string> outputDirectories) =\r\n                OutputDirectory.Create(_outputDirectory, genomeAssemblies);\r\n\r\n            var fileList = new List<RemoteFile>();\r\n            fileList.AddCacheFiles(genomeAssemblies, remoteCacheDir, cacheDir)\r\n                .AddReferenceFiles(genomeAssemblies, remoteReferencesDir, referencesDir)\r\n                .AddSupplementaryAnnotationFiles(remotePathsByGenomeAssembly, saDir);\r\n\r\n            Console.WriteLine($\"{fileList.Count} files.\\n\");\r\n            \r\n            // get rid of extra files in the output directories\r\n            OutputDirectory.Cleanup(fileList, outputDirectories, referencesDir);\r\n            \r\n            // get length, checksum, and checks existence\r\n            Console.WriteLine(\"- downloading file metadata:\");\r\n            AnnotationRepository.DownloadMetadata(client, fileList);\r\n            \r\n            // remove obsolete files from the output directory\r\n            OutputDirectory.RemoveOldFiles(fileList);\r\n            \r\n            // remove skipped files from our list\r\n            List<RemoteFile> filesToDownload = OutputDirectory.RemoveSkippedFiles(fileList);\r\n            \r\n            // download the latest files\r\n            if (filesToDownload.Count > 0)\r\n            {\r\n                long numBytesToDownload = OutputDirectory.GetNumDownloadBytes(filesToDownload);\r\n                DiskSpaceUtilities.CheckAvailableDiskSpace(_outputDirectory, numBytesToDownload);\r\n                Console.WriteLine($\"- downloading files ({MemoryUtilities.ToHumanReadable(numBytesToDownload)}):\");\r\n                \r\n                AnnotationRepository.DownloadFiles(client, filesToDownload);\r\n            }\r\n            \r\n            // sanity check\r\n            OutputDirectory.CheckFiles(fileList);\r\n\r\n            bool foundError = fileList.Any(x => !x.Pass);\r\n            return foundError ? ExitCodes.InvalidData : ExitCodes.Success;\r\n        }\r\n\r\n        public static int Main(string[] args)\r\n        {\r\n            var ops = new OptionSet\r\n            {\r\n                {\r\n                    \"ga=\",\r\n                    \"genome assembly {version}\",\r\n                    v => _genomeAssembly = v\r\n                },\r\n                {\r\n                    \"host=\",\r\n                    \"annotation {hostname} (optional)\",\r\n                    v => _hostName = v\r\n                },\r\n                {\r\n                    \"manifest=\",\r\n                    \"manifest {prefix} (optional)\",\r\n                    v => _manifestPrefix = v\r\n                },\r\n                {\r\n                    \"out|o=\",\r\n                    \"top-level output {directory}\",\r\n                    v => _outputDirectory = v\r\n                }\r\n            };\r\n\r\n            ExitCodes exitCode = new ConsoleAppBuilder(args, ops)\r\n                .Parse()\r\n                .HasRequiredParameter(_genomeAssembly, \"genome assembly\", \"--ga\")\r\n                .CheckDirectoryExists(_outputDirectory, \"top-level output directory\", \"--out\")\r\n                .ShowBanner(Constants.Authors)\r\n                .ShowHelpMenu(\"Downloads the Nirvana data files from S3\",\r\n                    \"--ga <genome assembly> --out <output directory>\")\r\n                .ShowErrors()\r\n                .Execute(ProgramExecution);\r\n\r\n            return (int) exitCode;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "Downloader/FileExtensions/CacheFileExtensions.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing Genome;\r\nusing IO;\r\n\r\nnamespace Downloader.FileExtensions\r\n{\r\n    public static class CacheFileExtensions\r\n    {\r\n        public static List<RemoteFile> AddCacheFiles(this List<RemoteFile> files,\r\n            IEnumerable<GenomeAssembly> genomeAssemblies, string remoteCacheDirectory, string cacheDirectory)\r\n        {\r\n            foreach (var genomeAssembly in genomeAssemblies)\r\n            {\r\n                files.AddCache(genomeAssembly, remoteCacheDirectory, cacheDirectory, \"transcripts\");\r\n                files.AddCache(genomeAssembly, remoteCacheDirectory, cacheDirectory, \"sift\");\r\n                files.AddCache(genomeAssembly, remoteCacheDirectory, cacheDirectory, \"polyphen\");\r\n            }\r\n\r\n            return files;\r\n        }\r\n\r\n        private static void AddCache(this ICollection<RemoteFile> files, GenomeAssembly genomeAssembly,\r\n            string remoteCacheDirectory, string cacheDirectory, string type)\r\n        {\r\n            string filename    = $\"Both.{type}.ndb\";\r\n            string remotePath  = $\"{remoteCacheDirectory}/{CacheConstants.DataVersion}/{genomeAssembly}/{filename}\";\r\n            string localPath   = Path.Combine(cacheDirectory, genomeAssembly.ToString(), filename);\r\n            string description = $\"{filename} ({genomeAssembly})\";\r\n            files.Add(new RemoteFile(remotePath, localPath, description));\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "Downloader/FileExtensions/ReferencesFileExtensions.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing Genome;\r\nusing ReferenceSequence;\r\n\r\nnamespace Downloader.FileExtensions\r\n{\r\n    public static class ReferencesFileExtensions\r\n    {\r\n        public static List<RemoteFile> AddReferenceFiles(this List<RemoteFile> files, IEnumerable<GenomeAssembly> genomeAssemblies,\r\n            string remoteReferencesDirectory, string referencesDirectory)\r\n        {\r\n            // ReSharper disable once LoopCanBeConvertedToQuery\r\n            foreach (GenomeAssembly genomeAssembly in genomeAssemblies)\r\n            {\r\n                string filename   = GetFilename(genomeAssembly);\r\n                var    remotePath = $\"{remoteReferencesDirectory}/{ReferenceSequenceCommon.HeaderVersion}/{filename}\";\r\n                string localPath  = Path.Combine(referencesDirectory, filename);\r\n                files.Add(new RemoteFile(remotePath, localPath, filename));\r\n            }\r\n\r\n            return files;\r\n        }\r\n\r\n        public static string GetFilename(GenomeAssembly genomeAssembly) => $\"Homo_sapiens.{genomeAssembly}.Nirvana.dat\";\r\n    }\r\n}"
  },
  {
    "path": "Downloader/FileExtensions/SupplementaryAnnotationFileExtensions.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing Genome;\r\n\r\nnamespace Downloader.FileExtensions\r\n{\r\n    public static class SupplementaryAnnotationFileExtensions\r\n    {\r\n        private static readonly HashSet<string> NeedsIndexSet = new HashSet<string>();\r\n\r\n        static SupplementaryAnnotationFileExtensions()\r\n        {\r\n            NeedsIndexSet.Add(\".nsa\");\r\n            NeedsIndexSet.Add(\".npd\");\r\n            NeedsIndexSet.Add(\".rma\");\r\n            NeedsIndexSet.Add(\".gsa\");\r\n        }\r\n        \r\n        public static void AddSupplementaryAnnotationFiles(this List<RemoteFile> files,\r\n            Dictionary<GenomeAssembly, List<string>> remotePathsByGenomeAssembly, string saDirectory)\r\n        {\r\n            foreach ((var genomeAssembly, List<string> remotePaths) in remotePathsByGenomeAssembly)\r\n            {\r\n                files.AddDataSources(remotePaths, genomeAssembly, saDirectory);\r\n            }\r\n        }\r\n\r\n        private static void AddDataSources(this ICollection<RemoteFile> files, IEnumerable<string> remotePaths, GenomeAssembly genomeAssembly, string saDirectory)\r\n        {\r\n            foreach (string path in remotePaths)\r\n            {\r\n                files.AddFile(genomeAssembly, saDirectory, path);\r\n                string extension = Path.GetExtension(path);\r\n                if (NeedsIndexSet.Contains(extension)) files.AddFile(genomeAssembly, saDirectory, path + \".idx\");\r\n            }\r\n        }\r\n\r\n        private static void AddFile(this ICollection<RemoteFile> files, GenomeAssembly genomeAssembly, string saDirectory, string path)\r\n        {\r\n            string filename    = Path.GetFileName(path);\r\n            string remotePath  = path;\r\n            string localPath   = Path.Combine(saDirectory, genomeAssembly.ToString(), filename);\r\n            string description = $\"{filename} ({genomeAssembly})\";\r\n            files.Add(new RemoteFile(remotePath, localPath, description));\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "Downloader/IClient.cs",
    "content": "﻿using System.Collections.Generic;\r\n\r\nnamespace Downloader\r\n{\r\n    /// <summary>\r\n    /// IClient should abstract away all network activity for improved testing\r\n    /// </summary>\r\n    public interface IClient\r\n    {\r\n        List<string> DownloadLines(string remotePath);\r\n        bool SetMetadata(RemoteFile file);\r\n        bool DownloadFile(RemoteFile file);\r\n    }\r\n}\r\n"
  },
  {
    "path": "Downloader/Manifest.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing Genome;\r\n\r\nnamespace Downloader\r\n{\r\n    public static class Manifest\r\n    {\r\n        public static Dictionary<GenomeAssembly, List<string>> GetRemotePaths(IClient client,\r\n            IEnumerable<GenomeAssembly> genomeAssemblies, string manifestGRCh37, string manifestGRCh38)\r\n        {\r\n            IEnumerable<(GenomeAssembly GenomeAssembly, string ManifestPath)> genomeAssemblyPaths =\r\n                CreateGenomeAssemblyPaths(manifestGRCh37, manifestGRCh38, genomeAssemblies);\r\n\r\n            var remotePathsByGenomeAssembly = new Dictionary<GenomeAssembly, List<string>>();\r\n\r\n            foreach ((var genomeAssembly, string manifestPath) in genomeAssemblyPaths)\r\n            {\r\n                List<string> remotePaths = client.DownloadLines(manifestPath);\r\n                remotePathsByGenomeAssembly[genomeAssembly] = remotePaths;\r\n            }\r\n\r\n            return remotePathsByGenomeAssembly;\r\n        }\r\n\r\n        internal static IEnumerable<(GenomeAssembly GenomeAssembly, string ManifestPath)> CreateGenomeAssemblyPaths(\r\n            string manifestGRCh37, string manifestGRCh38, IEnumerable<GenomeAssembly> genomeAssemblies)\r\n        {\r\n            var genomeAssemblyPaths = new List<(GenomeAssembly, string)>();\r\n\r\n            foreach (var genomeAssembly in genomeAssemblies)\r\n            {\r\n                // ReSharper disable once SwitchStatementMissingSomeCases\r\n                switch (genomeAssembly)\r\n                {\r\n                    case GenomeAssembly.GRCh37:\r\n                        genomeAssemblyPaths.Add((genomeAssembly, manifestGRCh37));\r\n                        break;\r\n                    case GenomeAssembly.GRCh38:\r\n                        genomeAssemblyPaths.Add((genomeAssembly, manifestGRCh38));\r\n                        break;\r\n                }\r\n            }\r\n\r\n            return genomeAssemblyPaths;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "Downloader/OutputDirectory.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Linq;\r\nusing Downloader.FileExtensions;\r\nusing Downloader.Utilities;\r\nusing Genome;\r\n\r\nnamespace Downloader\r\n{\r\n    public static class OutputDirectory\r\n    {\r\n        public static (string Cache, string Reference, string SupplementaryAnnotation, List<string> OutputDirectories) Create(string outputDirectory, List<GenomeAssembly> genomeAssemblies)\r\n        {\r\n            string cacheDirectory      = Path.Combine(outputDirectory, \"Cache\");\r\n            string referencesDirectory = Path.Combine(outputDirectory, \"References\");\r\n            string saDirectory         = Path.Combine(outputDirectory, \"SupplementaryAnnotation\");\r\n\r\n            var outputDirectories = new List<string> {referencesDirectory};\r\n\r\n            CreateGenomeAssemblySubdirectories(cacheDirectory, genomeAssemblies, outputDirectories);\r\n            CreateGenomeAssemblySubdirectories(saDirectory,    genomeAssemblies, outputDirectories);\r\n            Directory.CreateDirectory(referencesDirectory);\r\n\r\n            return (cacheDirectory, referencesDirectory, saDirectory, outputDirectories);\r\n        }\r\n\r\n        private static void CreateGenomeAssemblySubdirectories(string topLevelDirectory, IEnumerable<GenomeAssembly> genomeAssemblies, ICollection<string> outputDirectories)\r\n        {\r\n            foreach (var genomeAssembly in genomeAssemblies)\r\n            {\r\n                string directory = Path.Combine(topLevelDirectory, genomeAssembly.ToString());\r\n                outputDirectories.Add(directory);\r\n                Directory.CreateDirectory(directory);\r\n            }\r\n        }\r\n\r\n        public static void Cleanup(IEnumerable<RemoteFile> files, IEnumerable<string> outputDirectories, string referencesDirectory)\r\n        {\r\n            IEnumerable<string> existingFiles  = GetExistingFiles(outputDirectories);\r\n            IEnumerable<string> referenceFiles = GetReferenceFiles(referencesDirectory);\r\n            List<string>        desiredFiles   = files.Select(x => x.LocalPath).ToList();\r\n            List<string>        filesToDelete  = existingFiles.Except(desiredFiles).Except(referenceFiles).ToList();\r\n\r\n            if (filesToDelete.Count == 0) return;\r\n\r\n            Console.WriteLine(\"- removing extra files in output directories\");\r\n\r\n            foreach (string file in filesToDelete)\r\n            {\r\n                Console.WriteLine($\"  - deleting extra file: {file}\");\r\n                File.Delete(file);\r\n            }\r\n\r\n            Console.WriteLine();\r\n        }\r\n\r\n        private static IEnumerable<string> GetReferenceFiles(string referencesDirectory) => new List<string>\r\n        {\r\n            Path.Combine(referencesDirectory, ReferencesFileExtensions.GetFilename(GenomeAssembly.GRCh37)),\r\n            Path.Combine(referencesDirectory, ReferencesFileExtensions.GetFilename(GenomeAssembly.GRCh38))\r\n        };\r\n\r\n        private static IEnumerable<string> GetExistingFiles(IEnumerable<string> outputDirectories)\r\n        {\r\n            var existingFiles = new List<string>();\r\n\r\n            foreach (string outputDir in outputDirectories)\r\n            {\r\n                string[] files = Directory.GetFiles(outputDir, \"*\", SearchOption.TopDirectoryOnly);\r\n\r\n                foreach (string localPath in files)\r\n                {\r\n                    if (!localPath.StartsWith(outputDir)) continue;\r\n                    existingFiles.Add(localPath);\r\n                }\r\n            }\r\n\r\n            return existingFiles;\r\n        }\r\n\r\n        public static void RemoveOldFiles(IEnumerable<RemoteFile> files)\r\n        {\r\n            var filesToDelete = new List<RemoteFile>();\r\n\r\n            foreach (var file in files)\r\n            {\r\n                var fileInfo = new FileInfo(file.LocalPath);\r\n                if (!fileInfo.Exists || file.Skipped) continue;\r\n\r\n                if (HasDifferentFileSize(fileInfo.Length, file.FileSize) ||\r\n                    HasOlderFile(fileInfo.CreationTimeUtc, file.LastModified))\r\n                {\r\n                    filesToDelete.Add(file);\r\n                    continue;\r\n                }\r\n\r\n                // these files already exist and can be skipped\r\n                file.Skipped = true;\r\n            }\r\n\r\n            if (filesToDelete.Count == 0) return;\r\n\r\n            Console.WriteLine(\"- removing old files:\");\r\n            foreach (var file in filesToDelete)\r\n            {\r\n                Console.WriteLine($\"  - deleting {file.Description}\");\r\n                File.Delete(file.LocalPath);\r\n            }\r\n\r\n            Console.WriteLine();\r\n        }\r\n\r\n        private static bool HasOlderFile(in DateTimeOffset localOffset, DateTimeOffset remoteOffset) =>\r\n            DateTimeOffset.Compare(remoteOffset, localOffset) == 1;\r\n\r\n        private static bool HasDifferentFileSize(long localLength, long remoteLength) => localLength != remoteLength;\r\n\r\n        public static long GetNumDownloadBytes(IEnumerable<RemoteFile> files)\r\n        {\r\n            long numBytes = 0;\r\n            foreach (var file in files) numBytes += file.FileSize;\r\n            return numBytes;\r\n        }\r\n\r\n        public static List<RemoteFile> RemoveSkippedFiles(List<RemoteFile> files)\r\n        {\r\n            var filesToDownload = new List<RemoteFile>(files.Count);\r\n\r\n            foreach (var file in files.OrderBy(x => x.FileSize))\r\n            {\r\n                if (file.Skipped) continue;\r\n                filesToDownload.Add(file);\r\n            }\r\n\r\n            return filesToDownload;\r\n        }\r\n\r\n        public static void CheckFiles(IEnumerable<RemoteFile> files)\r\n        {\r\n            var divider = new string('-', 75);\r\n            \r\n            Console.WriteLine(\"Description                                                     Status\");\r\n            Console.WriteLine(divider);\r\n            \r\n            foreach (var file in files.OrderBy(x => x.Description))\r\n            {\r\n                string description = GetPaddedField(file.Description, 58);\r\n                Console.Write($\"{description} \");\r\n                PrintStatus(file);\r\n                Console.WriteLine();\r\n            }\r\n            \r\n            Console.WriteLine(divider);\r\n        }\r\n        \r\n        private static string GetPaddedField(string s, int fieldLength)\r\n        {\r\n            if (s.Length > fieldLength) return s.Substring(0, fieldLength - 3) + \"...\";\r\n            return s.PadRight(fieldLength, ' ');\r\n        }\r\n\r\n        private static void PrintStatus(RemoteFile file)\r\n        {\r\n            if (file.Missing)\r\n            {\r\n                ConsoleEmbellishments.PrintWarning(\"Missing (server)\");\r\n                return;\r\n            }\r\n            \r\n            var fileInfo = new FileInfo(file.LocalPath);\r\n\r\n            if (!fileInfo.Exists)\r\n            {\r\n                ConsoleEmbellishments.PrintError(\"Missing (local)\");\r\n                return;\r\n            }\r\n\r\n            if (fileInfo.Length < file.FileSize)\r\n            {\r\n                ConsoleEmbellishments.PrintError(\"    Truncated\");\r\n                return;\r\n            }\r\n            \r\n            if (fileInfo.Length > file.FileSize)\r\n            {\r\n                ConsoleEmbellishments.PrintError(\"    Too large\");\r\n                return;\r\n            }\r\n            \r\n            ConsoleEmbellishments.PrintSuccess(\"       OK\");\r\n            file.Pass = true;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "Downloader/RemoteFile.cs",
    "content": "﻿using System;\r\n\r\nnamespace Downloader\r\n{\r\n    public sealed class RemoteFile\r\n    {\r\n        public readonly string RemotePath;\r\n        public readonly string LocalPath;\r\n        public readonly string Description;\r\n\r\n        public DateTimeOffset LastModified;\r\n        public long           FileSize;\r\n        public bool           Skipped; // skipped from downloading\r\n        public bool           Missing; // missing from the server\r\n        public bool           Pass; // passes the checks after download\r\n\r\n        public RemoteFile(string remotePath, string localPath, string description)\r\n        {\r\n            RemotePath  = remotePath;\r\n            LocalPath   = localPath;\r\n            Description = description;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "Downloader/Utilities/ConsoleEmbellishments.cs",
    "content": "using System;\n\nnamespace Downloader.Utilities\n{\n    public static class ConsoleEmbellishments\n    {\n        public static void PrintWarning(string s) => Highlight(s, ConsoleColor.Yellow);\n\n        public static void PrintError(string s) => Highlight(s, ConsoleColor.Red);\n        \n        public static void PrintSuccess(string s) => Highlight(s, ConsoleColor.Green);\n\n        private static void Highlight(string s, ConsoleColor color)\n        {\n            Console.ForegroundColor = color;\n            Console.Write(s);\n            Console.ResetColor();\n        }\n    }\n}"
  },
  {
    "path": "Downloader/Utilities/DiskSpaceUtilities.cs",
    "content": "﻿using System;\nusing System.IO;\nusing CommandLine.Utilities;\n\nnamespace Downloader.Utilities\n{\n    public static class DiskSpaceUtilities\n    {\n        public static void CheckAvailableDiskSpace(string outputDirectory, long numBytesToDownload)\n        {\n            string    absolutePath = GetAbsolutePath(outputDirectory);\n            DriveInfo driveInfo    = GetDriveWithLongestCommonPrefix(absolutePath);\n\n            // skip available disk space checking if we can't figure out which drive is being used \n            if (driveInfo == null) return;\n\n            long numAvailableBytes = driveInfo.AvailableFreeSpace;\n            if (numBytesToDownload <= numAvailableBytes) return;\n\n            string neededSpace    = MemoryUtilities.ToHumanReadable(numBytesToDownload);\n            string availableSpace = MemoryUtilities.ToHumanReadable(numAvailableBytes);\n\n            ConsoleEmbellishments.PrintError(\"Not enough disk space available\");\n            Console.WriteLine($\" in {absolutePath}. Need: {neededSpace}, available: {availableSpace}\");\n            Environment.Exit(1);\n        }\n\n        private static string GetAbsolutePath(string directoryPath)\n        {\n            var    directoryInfo = new DirectoryInfo(directoryPath);\n            string absolutePath  = directoryInfo.FullName;\n\n            // the absolute path in Windows doesn't always provide the drive letter in uppercase\n            // this is benign on Linux since the root is always /\n            string root = directoryInfo.Root.ToString().ToUpperInvariant();\n            return root + absolutePath.Substring(root.Length);\n        }\n\n        private static DriveInfo GetDriveWithLongestCommonPrefix(string absolutePath)\n        {\n            DriveInfo[] allDrives = DriveInfo.GetDrives();\n\n            var       maxPrefixLength = 0;\n            DriveInfo maxPrefixDrive  = null;\n\n            foreach (DriveInfo d in allDrives)\n            {\n                // Windows drive letters are always in uppercase\n                if (!d.IsReady || !absolutePath.StartsWith(d.Name) || d.Name.Length <= maxPrefixLength) continue;\n                maxPrefixLength = d.Name.Length;\n                maxPrefixDrive  = d;\n            }\n\n            return maxPrefixDrive;\n        }\n    }\n}"
  },
  {
    "path": "Downloader/Utilities/GenomeAssemblyHelper.cs",
    "content": "﻿using System.Collections.Generic;\nusing ErrorHandling.Exceptions;\nusing Genome;\n\nnamespace Downloader.Utilities\n{\n    public static class GenomeAssemblyHelper\n    {\n        public static List<GenomeAssembly> GetGenomeAssemblies(string genomeAssembly)\n        {\n            genomeAssembly = genomeAssembly.ToLower();\n            var genomeAssemblies = new List<GenomeAssembly>();\n\n            switch (genomeAssembly.ToLower())\n            {\n                case \"grch37\":\n                    genomeAssemblies.Add(GenomeAssembly.GRCh37);\n                    break;\n                case \"grch38\":\n                    genomeAssemblies.Add(GenomeAssembly.GRCh38);\n                    break;\n                case \"both\":\n                    genomeAssemblies.Add(GenomeAssembly.GRCh37);\n                    genomeAssemblies.Add(GenomeAssembly.GRCh38);\n                    break;\n                default:\n                    throw new UserErrorException($\"Found an unknown genome assembly ({genomeAssembly}). Expected: GRCh37, GRCh38, or both\");\n            }\n\n            return genomeAssemblies;\n        }\n    }\n}\n"
  },
  {
    "path": "Downloader/Utilities/ParallelUtilities.cs",
    "content": "using System;\nusing System.Collections.Generic;\nusing System.Threading;\nusing System.Threading.Tasks;\nusing CommandLine.Utilities;\nusing ErrorHandling.Exceptions;\n\nnamespace Downloader.Utilities\n{\n    public static class ParallelUtilities\n    {\n        private const int NumThreads = 5;\n\n        public static void ParallelExecute(this List<RemoteFile> files, Func<RemoteFile, bool> clientFunc,\n            Action<RemoteFile, Func<RemoteFile, bool>, CancellationTokenSource, string> httpAction,\n            string finishedMessage, string exceptionMessage)\n        {\n            var bench     = new Benchmark();\n            var tasks     = new Task[files.Count];\n            var maxThread = new SemaphoreSlim(NumThreads);\n\n            var tokenSource       = new CancellationTokenSource();\n            var cancellationToken = tokenSource.Token;\n\n            try\n            {\n                for (var i = 0; i < files.Count; i++)\n                {\n                    maxThread.Wait(cancellationToken);\n\n                    var file = files[i];\n                    tasks[i] = Task.Factory\n                        .StartNew(() => httpAction(file, clientFunc, tokenSource, exceptionMessage), TaskCreationOptions.LongRunning)\n                        .ContinueWith(task => maxThread.Release(), cancellationToken);\n\n                    if (cancellationToken.IsCancellationRequested) break;\n                }\n\n                Task.WaitAll(tasks);\n                Console.WriteLine($\"  - {finishedMessage} ({Benchmark.ToHumanReadable(bench.GetElapsedTime())}).\\n\");\n            }\n            catch (OperationCanceledException)\n            {\n                throw new UserErrorException($\"Unable to {exceptionMessage}. Please verify network connection.\");\n            }\n        }\n    }\n}"
  },
  {
    "path": "Downloader/Utilities/SyncUtilities.cs",
    "content": "using System.Threading.Tasks;\n\nnamespace Downloader.Utilities\n{\n    public static class SyncUtilities\n    {\n        public static T AsSync<T>(this Task<T> task) => task.ConfigureAwait(false).GetAwaiter().GetResult();\n    }\n}"
  },
  {
    "path": "ErrorHandling/AssemblyInfo.cs",
    "content": "﻿using System.Runtime.CompilerServices;\n\n[assembly: InternalsVisibleTo(\"UnitTests\")]"
  },
  {
    "path": "ErrorHandling/ErrorCategory.cs",
    "content": "﻿namespace ErrorHandling\r\n{\r\n    public enum ErrorCategory\r\n    {\r\n        UserError,\r\n        NirvanaError,\r\n        TimeOutError,\r\n        InvocationThrottledError\r\n    }\r\n}"
  },
  {
    "path": "ErrorHandling/ErrorHandling.csproj",
    "content": "﻿<Project Sdk=\"Microsoft.NET.Sdk\">\r\n  <PropertyGroup>\r\n    <TargetFramework>net6.0</TargetFramework>\r\n    <OutputPath>..\\bin\\$(Configuration)</OutputPath>\r\n    \r\n  </PropertyGroup>\r\n  <Import Project=\"..\\CommonAssemblyInfo.props\" />\r\n  <ItemGroup>\r\n    <PackageReference Include=\"AWSSDK.S3\" Version=\"3.7.8.3\" />\r\n  </ItemGroup>\r\n</Project>"
  },
  {
    "path": "ErrorHandling/ExceptionUtilities.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing ErrorHandling.Exceptions;\r\n\r\nnamespace ErrorHandling\r\n{\r\n    public static class ExceptionUtilities\r\n    {\r\n        public const string UserError = \"UserError\";\r\n        \r\n        public static Exception MakeUserError(this Exception e)\r\n        {\r\n            e.Data[UserError] = true;\r\n            return e;\r\n        }\r\n        \r\n        // define which exceptions should not include a full stack trace\r\n        public static readonly HashSet<Type> UserFriendlyExceptions = new HashSet<Type>\r\n        {\r\n            typeof(UserErrorException),\r\n            typeof(FileNotSortedException),\r\n            typeof(UnauthorizedAccessException),\r\n            typeof(InvalidFileFormatException),\r\n            typeof(ProcessLockedFileException),\r\n            typeof(OutOfMemoryException),\r\n            typeof(MissingCompressionLibraryException)\r\n        };\r\n\r\n        public static bool HasException<T>(Exception e)\r\n        {\r\n            if (e == null) return false;\r\n            return e is T || HasException<T>(e.InnerException);\r\n        }\r\n\r\n        public static bool HasErrorMessage(this Exception e, string errorMessage)\r\n        {\r\n            if (e == null) return false;\r\n            return e.Message  == errorMessage|| e.InnerException.HasErrorMessage(errorMessage);\r\n        }\r\n\r\n        public static Exception GetInnermostException(Exception e)\r\n        {\r\n            while (e.InnerException != null) e = e.InnerException;\r\n            return e;\r\n        }\r\n\r\n        public static ErrorCategory ExceptionToErrorCategory(Exception exception) => UserFriendlyExceptions.Contains(exception.GetType()) ? ErrorCategory.UserError : ErrorCategory.NirvanaError;\r\n    }\r\n}"
  },
  {
    "path": "ErrorHandling/Exceptions/CompressionException.cs",
    "content": "﻿using System;\r\n\r\nnamespace ErrorHandling.Exceptions\r\n{\r\n\tpublic sealed class CompressionException : Exception\r\n\t{\r\n\t\t// constructor\r\n\t\tpublic CompressionException(string message) : base(message) { }\r\n\t}\r\n}\r\n"
  },
  {
    "path": "ErrorHandling/Exceptions/DeploymentErrorException.cs",
    "content": "﻿using System;\n\nnamespace ErrorHandling.Exceptions\n{\n    public sealed class DeploymentErrorException : Exception\n    {\n        public DeploymentErrorException(string message) : base(message) { }\n    }\n}"
  },
  {
    "path": "ErrorHandling/Exceptions/FileNotSortedException.cs",
    "content": "﻿using System;\n\nnamespace ErrorHandling.Exceptions\n{\n    public sealed class FileNotSortedException : Exception\n    {\n        // constructor\n        public FileNotSortedException(string message) : base(message) { }\n    }\n}\n"
  },
  {
    "path": "ErrorHandling/Exceptions/InvalidFileFormatException.cs",
    "content": "﻿using System;\n\nnamespace ErrorHandling.Exceptions\n{\n    public sealed class InvalidFileFormatException : Exception\n    {\n        // constructor\n        public InvalidFileFormatException(string message) : base(message) { }\n    }\n}\n"
  },
  {
    "path": "ErrorHandling/Exceptions/MissingCompressionLibraryException.cs",
    "content": "﻿using System;\n\nnamespace ErrorHandling.Exceptions\n{\n    public sealed class MissingCompressionLibraryException : Exception\n    {\n        // constructor\n        public MissingCompressionLibraryException(string missingLibraryFilename) : base(GetErrorMessage(missingLibraryFilename)) { }\n\n        /// <summary>\n        /// returns the error message given the missing compression library filename\n        /// </summary>\n        private static string GetErrorMessage(string missingLibraryFilename)\n        {\n            return $\"Unable to find the block GZip compression library ({missingLibraryFilename})\";\n        }\n    }\n}\n"
  },
  {
    "path": "ErrorHandling/Exceptions/ProcessLockedFileException.cs",
    "content": "﻿using System;\n\nnamespace ErrorHandling.Exceptions\n{\n    public sealed class ProcessLockedFileException : Exception\n    {\n        // constructor\n        public ProcessLockedFileException(string message) : base(message) { }\n    }\n}\n"
  },
  {
    "path": "ErrorHandling/Exceptions/UserErrorException.cs",
    "content": "﻿using System;\n\nnamespace ErrorHandling.Exceptions\n{\n    public sealed class UserErrorException : Exception\n    {\n        // constructor\n        public UserErrorException(string message) : base(message) { }\n    }\n}\n"
  },
  {
    "path": "ErrorHandling/ExitCodeUtilities.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing ErrorHandling.Exceptions;\r\n\r\nnamespace ErrorHandling\r\n{\r\n\tpublic static class ExitCodeUtilities\r\n\t{\r\n\t\tprivate static readonly Dictionary<Type, ExitCodes> ExceptionsToExitCodes;\r\n\t    public const string VcfLine = \"VcfLine\";\r\n\t    public const string Line = \"Line\"; \r\n\r\n\t\t// constructor\r\n\t\tstatic ExitCodeUtilities()\r\n\t\t{\r\n\t\t\t// add the exception to exit code mappings\r\n\t\t\tExceptionsToExitCodes = new Dictionary<Type, ExitCodes>\r\n\t\t\t{\r\n\t\t\t\t{ typeof(ArgumentNullException),              ExitCodes.BadArguments },\r\n\t\t\t\t{ typeof(ArgumentOutOfRangeException),        ExitCodes.BadArguments },\r\n\t\t\t\t{ typeof(Exception),                          ExitCodes.InvalidFunction },\r\n\t\t\t\t{ typeof(FileNotFoundException),              ExitCodes.FileNotFound },\r\n\t\t\t\t{ typeof(FileNotSortedException),             ExitCodes.FileNotSorted },\r\n\t\t\t\t{ typeof(FormatException),                    ExitCodes.BadFormat },\r\n\t\t\t\t{ typeof(InvalidDataException),               ExitCodes.InvalidData },\r\n\t\t\t\t{ typeof(InvalidFileFormatException),         ExitCodes.InvalidFileFormat },\r\n\t\t\t\t{ typeof(InvalidOperationException),          ExitCodes.InvalidFunction },\r\n\t\t\t\t{ typeof(NotImplementedException),            ExitCodes.CallNotImplemented },\r\n\t\t\t\t{ typeof(UserErrorException),                 ExitCodes.UserError },\r\n\t\t\t\t{ typeof(UnauthorizedAccessException),        ExitCodes.AccessDenied },\r\n\t\t\t\t{ typeof(ProcessLockedFileException),         ExitCodes.SharingViolation },\r\n\t\t\t\t{ typeof(OutOfMemoryException),               ExitCodes.OutofMemory },\r\n\t\t\t\t{ typeof(MissingCompressionLibraryException), ExitCodes.MissingCompressionLibrary },\r\n\t\t\t    { typeof(CompressionException),               ExitCodes.Compression }\r\n            };\r\n\t\t}\r\n\r\n\t    internal static ExitCodes GetExitCode(Type exceptionType)\r\n\t    {\r\n            if (!ExceptionsToExitCodes.TryGetValue(exceptionType, out ExitCodes exitCode)) exitCode = ExitCodes.InvalidFunction;\r\n            return exitCode;\r\n\t    }\r\n\r\n\t\t/// <summary>\r\n\t\t/// Displays the details behind the exception\r\n\t\t/// Throw exceptions that are not user friendly if needed\r\n\t\t/// </summary>\r\n\t\tpublic static ExitCodes ShowException(Exception e)\r\n\t\t{\r\n\t\t\tConsole.ForegroundColor = ConsoleColor.Red;\r\n\t\t\tConsole.Write(\"\\nERROR: \");\r\n\t\t\tConsole.ResetColor();\r\n\r\n\t\t    e = ExceptionUtilities.GetInnermostException(e);\r\n\r\n            Console.WriteLine(\"{0}\", e.Message);\r\n\r\n\t\t\tvar exceptionType = e.GetType();\r\n\r\n\t\t    // ReSharper disable once InvertIf\r\n\t\t\tif (!ExceptionUtilities.UserFriendlyExceptions.Contains(exceptionType))\r\n\t\t\t{\r\n\t\t\t\t// print the stack trace\r\n\t\t\t\tConsole.ForegroundColor = ConsoleColor.Red;\r\n\t\t\t\tConsole.WriteLine(\"\\nStack trace:\");\r\n\t\t\t\tConsole.ResetColor();\r\n\t\t\t\tConsole.WriteLine(e.StackTrace);\r\n\r\n\t\t\t\t// extract out the vcf line\r\n\t\t\t    // ReSharper disable once InvertIf\r\n\t\t\t\tif (e.Data.Contains(VcfLine))\r\n\t\t\t\t{\r\n\t\t\t\t\tConsole.ForegroundColor = ConsoleColor.Red;\r\n\t\t\t\t\tConsole.WriteLine(\"\\nVCF line:\");\r\n\t\t\t\t\tConsole.ResetColor();\r\n\t\t\t\t\tConsole.WriteLine(e.Data[VcfLine]);\r\n\t\t\t\t}\r\n\t\t\t\t\r\n\t\t\t\tif (e.Data.Contains(Line))\r\n\t\t\t\t{\r\n\t\t\t\t\tConsole.ForegroundColor = ConsoleColor.Red;\r\n\t\t\t\t\tConsole.WriteLine(\"\\nLine:\");\r\n\t\t\t\t\tConsole.ResetColor();\r\n\t\t\t\t\tConsole.WriteLine(e.Data[Line]);\r\n\t\t\t\t}\r\n\t\t\t}\r\n\r\n\t\t    return GetExitCode(exceptionType);\r\n\t\t}\r\n    }\r\n}"
  },
  {
    "path": "ErrorHandling/ExitCodes.cs",
    "content": "﻿namespace ErrorHandling\r\n{\r\n    /// <summary>\r\n    /// Common Windows Error Codes: https://msdn.microsoft.com/en-us/library/windows/desktop/ms681382(v=vs.85).aspx\r\n    /// C:\\Program Files (x86)\\Windows Kits\\8.1\\Include\\shared\\winerror.h\r\n    /// </summary>\r\n    public enum ExitCodes\r\n    {\r\n        // ================\r\n        // Windows-specific\r\n        // ================\r\n\r\n        Success            = 0,\r\n        InvalidFunction    = 1,\r\n        FileNotFound       = 2,\r\n        PathNotFound       = 3,\r\n        AccessDenied       = 5,\r\n        BadFormat          = 11,\r\n        InvalidData        = 13,\r\n        OutofMemory        = 14,\r\n        SharingViolation   = 32,\r\n        CallNotImplemented = 120,\r\n        BadArguments       = 160,\r\n\r\n        // =================\r\n        // Illumina-specific\r\n        // =================\r\n\r\n        // command-line (200 - 209)\r\n        UnknownCommandLineOption = 200,\r\n        MissingCommandLineOption = 201,\r\n\r\n        // general (210 - 219)\r\n        UserError = 210,\r\n\r\n        // file (220 - 229)\r\n        InvalidFileFormat         = 220,\r\n        FileNotSorted             = 221,\r\n        MissingCompressionLibrary = 223,\r\n\r\n        // functionality (240 - 259)\r\n        Compression = 240\r\n    }\r\n}"
  },
  {
    "path": "GeneAnnotationLambda/GeneAnnotationLambda.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing System.Linq;\nusing System.Text;\nusing Amazon.Lambda.Core;\nusing Amazon.Lambda.Serialization.Json;\nusing Cloud;\nusing Cloud.Messages.Gene;\nusing Cloud.Notifications;\nusing Cloud.Utilities;\nusing CommandLine.Utilities;\nusing ErrorHandling.Exceptions;\nusing Genome;\nusing IO;\nusing Jasix.DataStructures;\nusing VariantAnnotation.GeneAnnotation;\nusing VariantAnnotation.Interface.Providers;\nusing VariantAnnotation.IO;\nusing VariantAnnotation.SA;\nusing VariantAnnotation.Utilities;\n\n[assembly: LambdaSerializer(typeof(JsonSerializer))]\n\nnamespace GeneAnnotationLambda\n{\n    // ReSharper disable once UnusedMember.Global\n    // ReSharper disable once ClassNeverInstantiated.Global\n    public class GeneAnnotationLambda\n    {\n        private readonly string _saPathPrefix = LambdaUrlHelper.GetBaseUrl();\n        \n        // ReSharper disable once UnusedMember.Global\n        public Stream Run(GeneConfig config, ILambdaContext context)\n        {\n            string snsTopicArn = null;\n            var runLog = new StringBuilder();\n\n            try\n            {\n                LogUtilities.UpdateLogger(context.Logger, runLog);\n                LogUtilities.LogLambdaInfo(context, CommandLineUtilities.InformationalVersion);\n                LogUtilities.LogObject(\"Config\", config);\n                LogUtilities.Log(new[] { LambdaUrlHelper.UrlBaseEnvironmentVariableName, LambdaUtilities.SnsTopicKey });\n\n                LambdaUtilities.GarbageCollect();\n\n                snsTopicArn = LambdaUtilities.GetEnvironmentVariable(LambdaUtilities.SnsTopicKey);\n\n                config.Validate();\n                // SaVersion will be provided as an environment variable. Defaults to \"latest\"\n                string saVersion     = Environment.GetEnvironmentVariable(\"SaVersion\");\n                string saManifestUrl = LambdaUtilities.GetManifestUrl(saVersion, GenomeAssembly.GRCh38, SaCommon.SchemaVersion);\n                string result        = GetGeneAnnotation(config, saManifestUrl, _saPathPrefix);\n                \n                return LambdaResponse.Create(config.id, LambdaUrlHelper.SuccessMessage, result);\n            }\n            catch (Exception e)\n            {\n                return HandleException(config.id, snsTopicArn, e);\n            }\n        }\n\n        private static Stream HandleException(string id, string snsTopicArn, Exception e)\n        {\n            Logger.Log(e);\n\n            string snsMessage = SNS.CreateMessage(e.Message, \"exception\", e.StackTrace);\n            SNS.SendMessage(snsTopicArn, snsMessage);\n\n            return LambdaResponse.Create(id, e.Message, null);\n        }\n\n        public static string GetGeneAnnotation(GeneConfig input, string saManifestFilePath, string saPathPrefix)\n        {\n            var geneAnnotationProvider = new GeneAnnotationProvider(PersistentStreamUtils.GetStreams(\n                                        GetNgaFileList(saManifestFilePath, saPathPrefix, input.ngaUrls).ToList()));\n\n            var sb = new StringBuilder(1024 * 1024);\n            var jsonObject = new JsonObject(sb);\n            \n            sb.Append(JsonObject.OpenBrace);\n            jsonObject.AddStringValue(JasixCommons.HeaderSectionTag, GetHeader(geneAnnotationProvider), false);\n            \n            //not all gene symbols have annotations. So, we need to check and only output the ones that are not null\n            var geneAnnotations = input.geneSymbols.Select(geneSymbol => geneAnnotationProvider.Annotate(geneSymbol))\n                                                   .Where(annotation => !string.IsNullOrEmpty(annotation))\n                                                   .ToList();\n\n            jsonObject.AddStringValues(\"genes\", geneAnnotations, false);\n            sb.Append(JsonObject.CloseBrace);\n\n            // AWS lambda response message can not be larger than 6MB\n            if (sb.Length > 6_000_000)\n                throw new UserErrorException(\"Too many genes provided in the request. Please decrease the number of genes and try again later.\");\n            \n            return sb.ToString();\n        }\n\n        private static string GetHeader(IProvider geneAnnotationProvider)\n        {\n            var sb = new StringBuilder();\n            var jsonObject = new JsonObject(sb);\n\n            sb.Append(JsonObject.OpenBrace);\n            jsonObject.AddStringValue(\"annotator\", \"Nirvana \" + CommandLineUtilities.Version);\n            jsonObject.AddStringValue(\"creationTime\", Date.CurrentTimeStamp);\n            jsonObject.AddIntValue(\"schemaVersion\", SaCommon.SchemaVersion);\n            jsonObject.AddObjectValues(\"dataSources\", geneAnnotationProvider.DataSourceVersions);\n            sb.Append(JsonObject.CloseBrace);\n\n            return sb.ToString();\n        }\n\n        public static IEnumerable<string> GetNgaFileList(string saManifestPath, string saPathPrefix, string[] ngaFiles)\n        {\n            using (var reader = new StreamReader(PersistentStreamUtils.GetReadStream(saManifestPath)))\n            {\n                string line;\n                while ((line = reader.ReadLine()) != null)\n                {\n                    string filePath = saPathPrefix + line;\n                    string suffix = filePath.GetFileSuffix(true);\n                    if (suffix == SaCommon.GeneFileSuffix) yield return filePath;\n                }\n            }\n\n            if (ngaFiles == null) yield break;\n            \n            foreach (string ngaFile in ngaFiles) yield return ngaFile;\n        }\n    }\n}"
  },
  {
    "path": "GeneAnnotationLambda/GeneAnnotationLambda.csproj",
    "content": "﻿<Project Sdk=\"Microsoft.NET.Sdk\">\n  <PropertyGroup>\n    <TargetFramework>net6.0</TargetFramework>\n    <GenerateRuntimeConfigurationFiles>true</GenerateRuntimeConfigurationFiles>\n    <AWSProjectType>Lambda</AWSProjectType>\n    <OutputPath>bin\\$(Configuration)</OutputPath>\n    \n  </PropertyGroup>\n  <ItemGroup>\n    <PackageReference Include=\"Amazon.Lambda.Serialization.Json\" Version=\"2.0.0\" />\n    <PackageReference Include=\"AWSSDK.Core\" Version=\"3.7.8.5\" />\n    <PackageReference Include=\"AWSSDK.Lambda\" Version=\"3.7.9.3\" />\n  </ItemGroup>\n  <ItemGroup>\n    <ProjectReference Include=\"..\\Cloud\\Cloud.csproj\" />\n    <ProjectReference Include=\"..\\ErrorHandling\\ErrorHandling.csproj\" />\n    <ProjectReference Include=\"..\\VariantAnnotation\\VariantAnnotation.csproj\" />\n  </ItemGroup>\n  <Import Project=\"..\\CommonAssemblyInfo.props\" />\n</Project>"
  },
  {
    "path": "GeneAnnotationLambda/GeneResult.cs",
    "content": "﻿using System.Data;\nusing System.IO;\nusing System.Text;\nusing Cloud;\nusing Cloud.Utilities;\nusing Newtonsoft.Json;\n\nnamespace GeneAnnotationLambda\n{\n    public static class LambdaResponse\n    {\n        private const string OutputBeforeNirvanaJson = \",\\\"annotation\\\":\";\n        private const string OutputEnd = \"}\";\n\n        public static Stream Create(string id, string status, string nirvanaJson)\n        {\n            string statusJson = JsonConvert.SerializeObject(status);\n            string outputStart = $\"{{\\\"id\\\":\\\"{id}\\\",\\\"status\\\":{statusJson}\";\n            string output;\n\n            if (status == LambdaUrlHelper.SuccessMessage)\n            {\n                if (nirvanaJson == null)\n                    throw new NoNullAllowedException(\"Nirvana annotation cannot be null when the job is successful.\");\n                output = outputStart + OutputBeforeNirvanaJson + nirvanaJson + OutputEnd;\n            }\n            else\n            {\n                output = outputStart + OutputEnd;\n            }\n\n            LogUtilities.LogObject(\"Result\", output);\n\n            var outputStream = new MemoryStream(Encoding.UTF8.GetBytes(output));\n            return outputStream;\n        }\n    }\n}"
  },
  {
    "path": "Genome/Band.cs",
    "content": "﻿namespace Genome\r\n{\r\n\tpublic struct Band\r\n\t{\r\n\t\tpublic readonly int Begin;\r\n\t\tpublic readonly int End;\r\n\t\tpublic readonly string Name;\r\n\r\n\t\tpublic Band(int begin, int end, string name)\r\n\t\t{\r\n\t\t\tBegin = begin;\r\n\t\t\tEnd   = end;\r\n\t\t\tName  = name;\r\n\t\t}\r\n\r\n\t\tpublic int Compare(int position)\r\n\t\t{\r\n\t\t\tif (position < Begin) return 1;\r\n\t\t\treturn position > End ? -1 : 0;\r\n\t\t}\r\n\t}\r\n}"
  },
  {
    "path": "Genome/Chromosome.cs",
    "content": "﻿using System;\r\nusing IO;\r\n\r\nnamespace Genome\r\n{\r\n    \r\n    public sealed class Chromosome : IComparable<Chromosome>\r\n    {\r\n        public string UcscName         { get; }\r\n        public string EnsemblName      { get; }\r\n        public string RefSeqAccession  { get; }\r\n        public string GenBankAccession { get; }\r\n        public int    FlankingLength   { get; private set; }\r\n        public int    Length           { get; }\r\n        public ushort Index            { get; }\r\n\r\n        public const ushort UnknownReferenceIndex = ushort.MaxValue;\r\n        public const int ShortFlankingLength = 100;\r\n\r\n        public static Chromosome GetEmptyChromosome(string name)\r\n        {\r\n             return  new Chromosome(name, name, name, name, 0, ushort.MaxValue)\r\n            {\r\n                FlankingLength = ShortFlankingLength\r\n            };\r\n        }\r\n\r\n        \r\n        public Chromosome(string ucscName, string ensemblName, string refSeqAccession, string genBankAccession,\r\n            int length, ushort index)\r\n        {\r\n            UcscName         = ucscName;\r\n            EnsemblName      = ensemblName;\r\n            RefSeqAccession  = refSeqAccession;\r\n            GenBankAccession = genBankAccession;\r\n            Length           = length;\r\n            Index            = index;\r\n\r\n            // for short references (< 30 kbp), let's use a shorter flanking length\r\n            const int longFlankingLength      = 5_000;\r\n            const int shortReferenceThreshold = 30_000_000;\r\n\r\n            FlankingLength = length < shortReferenceThreshold ? ShortFlankingLength : longFlankingLength;\r\n        }\r\n        \r\n        public void Write(ExtendedBinaryWriter writer)\r\n        {\r\n            writer.WriteOptAscii(UcscName);\r\n            writer.WriteOptAscii(EnsemblName);\r\n            writer.WriteOptAscii(RefSeqAccession);\r\n            writer.WriteOptAscii(GenBankAccession);\r\n            writer.WriteOpt(Length);\r\n            writer.WriteOpt(Index);\r\n        }\r\n\r\n        public static Chromosome Read(ExtendedBinaryReader reader)\r\n        {\r\n            string ucscName         = reader.ReadAsciiString();\r\n            string ensemblName      = reader.ReadAsciiString();\r\n            string refseqAccession  = reader.ReadAsciiString();\r\n            string genBankAccession = reader.ReadAsciiString();\r\n            int    length           = reader.ReadOptInt32();\r\n            ushort refIndex         = reader.ReadOptUInt16();\r\n\r\n            return new Chromosome(ucscName, ensemblName, refseqAccession, genBankAccession, length, refIndex);\r\n        }\r\n\r\n        public bool Equals(Chromosome other) => Index == other.Index && Length == other.Length;\r\n\r\n        public int CompareTo(Chromosome other) => Index == other.Index ? Length.CompareTo(other.Length) : Index.CompareTo(other.Index);\r\n\r\n        public override int GetHashCode()\r\n        {\r\n            return UcscName.GetHashCode() ^ Length ^ Index;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "Genome/ChromosomeInterval.cs",
    "content": "﻿namespace Genome\r\n{\r\n    public sealed class ChromosomeInterval : IChromosomeInterval\r\n    {\r\n        public Chromosome Chromosome { get; }\r\n        public int Start { get; }\r\n        public int End { get; }\r\n\r\n        public ChromosomeInterval(Chromosome chromosome, int start, int end)\r\n        {\r\n            Chromosome = chromosome;\r\n            Start      = start;\r\n            End        = end;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "Genome/ContigInfo.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing ErrorHandling.Exceptions;\r\n\r\nnamespace Genome\r\n{\r\n    public static class ContigInfo\r\n    {\r\n        private static readonly (string, int)[] ChromLengthsGrch37 = {\r\n            (\"1\", 249250621),\r\n            (\"2\", 243199373),\r\n            (\"3\", 198022430),\r\n            (\"4\", 191154276),\r\n            (\"5\", 180915260),\r\n            (\"6\", 171115067),\r\n            (\"7\", 159138663),\r\n            (\"8\", 146364022),\r\n            (\"9\", 141213431),\r\n            (\"10\", 135534747),\r\n            (\"11\", 135006516),\r\n            (\"12\", 133851895),\r\n            (\"13\", 115169878),\r\n            (\"14\", 107349540),\r\n            (\"15\", 102531392),\r\n            (\"16\", 90354753),\r\n            (\"17\", 81195210),\r\n            (\"18\", 78077248),\r\n            (\"19\", 59128983),\r\n            (\"20\", 63025520),\r\n            (\"21\", 48129895),\r\n            (\"22\", 51304566),\r\n            (\"X\", 155270560),\r\n            (\"Y\", 59373566)\r\n        };\r\n\r\n        private static readonly (string, int)[] ChromLengthsGrch38 =\r\n        {\r\n            (\"1\", 248956422),\r\n            (\"2\", 242193529),\r\n            (\"3\", 198295559),\r\n            (\"4\", 190214555),\r\n            (\"5\", 181538259),\r\n            (\"6\", 170805979),\r\n            (\"7\", 159345973),\r\n            (\"8\", 145138636),\r\n            (\"9\", 138394717),\r\n            (\"10\", 133797422),\r\n            (\"11\", 135086622),\r\n            (\"12\", 133275309),\r\n            (\"13\", 114364328),\r\n            (\"14\", 107043718),\r\n            (\"15\", 101991189),\r\n            (\"16\", 90338345),\r\n            (\"17\", 83257441),\r\n            (\"18\", 80373285),\r\n            (\"19\", 58617616),\r\n            (\"20\", 64444167),\r\n            (\"21\", 46709983),\r\n            (\"22\", 50818468),\r\n            (\"X\", 156040895),\r\n            (\"Y\", 57227415)\r\n        };\r\n\r\n        private static readonly Dictionary<string, Dictionary<int, GenomeAssembly>> ChromLengthToAssembly = GetChromLengthToAssembly();\r\n\r\n        private static Dictionary<string, Dictionary<int, GenomeAssembly>> GetChromLengthToAssembly()\r\n        {\r\n            var chromLengthToAssembly = new Dictionary<string, Dictionary<int, GenomeAssembly>>();\r\n            foreach ((string chrom, int length) in ChromLengthsGrch37)\r\n            {\r\n                chromLengthToAssembly[chrom] = new Dictionary<int, GenomeAssembly> { { length, GenomeAssembly.GRCh37 } };\r\n            }\r\n            foreach ((string contig, int length) in ChromLengthsGrch38)\r\n            {\r\n                chromLengthToAssembly[contig][length] = GenomeAssembly.GRCh38;\r\n            }\r\n            chromLengthToAssembly[\"MT\"] = new Dictionary<int, GenomeAssembly> { { 16569, GenomeAssembly.rCRS } };\r\n\r\n            return chromLengthToAssembly;\r\n        }\r\n\r\n        public static GenomeAssembly GetGenomeAssembly(Chromosome chromosome, int length)\r\n        {\r\n            if (!ChromLengthToAssembly.TryGetValue(chromosome.EnsemblName, out var lengthToAssembly))\r\n                return GenomeAssembly.Unknown;\r\n\r\n            if (lengthToAssembly.TryGetValue(length, out GenomeAssembly assembly)) return assembly;\r\n\r\n            if (chromosome.EnsemblName == \"MT\") return GenomeAssembly.Unknown;\r\n\r\n            throw new UserErrorException($\"Invalid length provided in VCF header: chromosome {chromosome.EnsemblName}, length {length}\");\r\n\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "Genome/CytogeneticBands.cs",
    "content": "﻿namespace Genome\r\n{\r\n    public static class CytogeneticBands\r\n    {\r\n        public static string Find(this Band[] bands, Chromosome chromosome, int start, int end)\r\n        {\r\n            if (chromosome.IsEmpty()) return null;\r\n            string startCytogeneticBand = bands.GetCytogeneticBand(start);\r\n            if (startCytogeneticBand == null) return null;\r\n\r\n            // handle the single coordinate case\r\n            if (start == end) return $\"{chromosome.EnsemblName}{startCytogeneticBand}\";\r\n\r\n            // handle the dual coordinate case\r\n            string endCytogeneticBand = bands.GetCytogeneticBand(end);\r\n            if (endCytogeneticBand == null) return null;\r\n\r\n            return startCytogeneticBand == endCytogeneticBand\r\n                ? $\"{chromosome.EnsemblName}{startCytogeneticBand}\"\r\n                : $\"{chromosome.EnsemblName}{startCytogeneticBand}-{endCytogeneticBand}\";\r\n        }\r\n\r\n        private static string GetCytogeneticBand(this Band[] bands, int pos)\r\n        {\r\n            int index = BinarySearch(bands, pos);\r\n            return index < 0 ? null : bands[index].Name;\r\n        }\r\n\r\n        private static int BinarySearch(Band[] array, int position)\r\n        {\r\n            var begin = 0;\r\n            int end = array.Length - 1;\r\n\r\n            while (begin <= end)\r\n            {\r\n                int index = begin + (end - begin >> 1);\r\n\r\n                int ret = array[index].Compare(position);\r\n                if (ret == 0) return index;\r\n                if (ret < 0) begin = index + 1;\r\n                else end = index - 1;\r\n            }\r\n\r\n            return ~begin;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "Genome/Genome.csproj",
    "content": "<Project Sdk=\"Microsoft.NET.Sdk\">\n  <PropertyGroup>\n    <TargetFramework>net6.0</TargetFramework>\n    <OutputPath>..\\bin\\$(Configuration)</OutputPath>\n    \n  </PropertyGroup>\n  <Import Project=\"..\\CommonAssemblyInfo.props\" />\n  <ItemGroup>\n    <ProjectReference Include=\"..\\ErrorHandling\\ErrorHandling.csproj\" />\n    <ProjectReference Include=\"..\\Intervals\\Intervals.csproj\" />\n    <ProjectReference Include=\"..\\IO\\IO.csproj\" />\n  </ItemGroup>\n</Project>\n"
  },
  {
    "path": "Genome/GenomeAssembly.cs",
    "content": "﻿namespace Genome\n{\n\t// ReSharper disable InconsistentNaming\n\tpublic enum GenomeAssembly : byte\n\t{\n\t\tUnknown,\n\t\tGRCh37,\n\t\tGRCh38,\n\t\thg19,\n\t\trCRS, // Revised Cambridge Reference Sequence (rCRS) of the Human Mitochondrial DNA\n\t\tSARSCoV2\n\t}\n\t// ReSharper restore InconsistentNaming\n}"
  },
  {
    "path": "Genome/GenomeAssemblyHelper.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing ErrorHandling.Exceptions;\r\n\r\nnamespace Genome\r\n{\r\n    public static class GenomeAssemblyHelper\r\n    {\r\n        public static readonly HashSet<GenomeAssembly> AutosomeAndAllosomeAssemblies =\r\n            new HashSet<GenomeAssembly> { GenomeAssembly.GRCh37, GenomeAssembly.GRCh38, GenomeAssembly.hg19,GenomeAssembly.SARSCoV2 };\r\n\r\n        public static GenomeAssembly Convert(string genomeAssembly)\r\n        {\r\n            GenomeAssembly ret;\r\n\r\n            switch (string.IsNullOrEmpty(genomeAssembly) ? string.Empty : genomeAssembly.ToLower())\r\n            {\r\n                case \"grch37\":\r\n                    ret = GenomeAssembly.GRCh37;\r\n                    break;\r\n                case \"grch38\":\r\n                    ret = GenomeAssembly.GRCh38;\r\n                    break;\r\n                case \"hg19\":\r\n                    ret = GenomeAssembly.hg19;\r\n                    break;\r\n                case \"rcrs\":\r\n                    ret = GenomeAssembly.rCRS;\r\n                    break;\r\n                case \"sarscov2\":\r\n                    ret = GenomeAssembly.SARSCoV2;\r\n                    break;\r\n                case \"\":\r\n                    ret = GenomeAssembly.Unknown;\r\n                    break;\r\n                default:\r\n                    throw new UserErrorException($\"Unknown genome assembly was specified: {genomeAssembly}\");\r\n            }\r\n\r\n            return ret;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "Genome/GenomicPosition.cs",
    "content": "﻿namespace Genome\r\n{\r\n    public struct GenomicPosition\r\n    {\r\n        public readonly Chromosome Chromosome;\r\n        public readonly int Position;\r\n\r\n        public GenomicPosition(Chromosome chromosome, int position)\r\n        {\r\n            Chromosome = chromosome;\r\n            Position = position;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "Genome/GenomicRange.cs",
    "content": "﻿namespace Genome\r\n{\r\n    public sealed class GenomicRange\r\n    {\r\n        public GenomicPosition Start { get; }\r\n        public GenomicPosition? End { get; }\r\n\r\n        public GenomicRange(GenomicPosition start, GenomicPosition? end)\r\n        {\r\n            Start = start;\r\n            End = end;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "Genome/GenomicRangeChecker.cs",
    "content": "﻿namespace Genome\r\n{\r\n    public sealed class GenomicRangeChecker\r\n    {\r\n        private readonly GenomicRange _genomicRange;\r\n        private bool _reachedLastChromosome;\r\n\r\n        public GenomicRangeChecker(GenomicRange genomicRange)\r\n        {\r\n            _genomicRange = genomicRange;\r\n        }\r\n\r\n        public bool OutOfRange(Chromosome chromosome, int position)\r\n        {\r\n            if (_genomicRange?.End == null) return false;\r\n\r\n            if (!_reachedLastChromosome && chromosome.Equals(_genomicRange.End?.Chromosome)) _reachedLastChromosome = true;\r\n\r\n            return _reachedLastChromosome && (position > _genomicRange.End?.Position || !chromosome.Equals(_genomicRange.End?.Chromosome)) ;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "Genome/IChromosomeInterval.cs",
    "content": "﻿using Intervals;\n\nnamespace Genome\n{\n    public interface IChromosomeInterval : IInterval\n    {\n        Chromosome Chromosome { get; }\n    }\n}"
  },
  {
    "path": "Genome/ISequence.cs",
    "content": "﻿namespace Genome\n{\n    public interface ISequence \n    {\n\t    int Length { get; }\n        Band[] CytogeneticBands { get; }\n        string Substring(int offset, int length);\n\t}\n}"
  },
  {
    "path": "Genome/ReferenceNameUtilities.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.IO;\n\nnamespace Genome\n{\n    public static class ReferenceNameUtilities\n    {\n        public static Chromosome GetChromosome(Dictionary<string, Chromosome> refNameToChromosome,\n            string referenceName)\n        {\n            if (referenceName == null) return Chromosome.GetEmptyChromosome(string.Empty);\n\n            return !refNameToChromosome.TryGetValue(referenceName, out Chromosome chromosome)\n                ? Chromosome.GetEmptyChromosome(referenceName)\n                : chromosome;\n        }\n\n        public static Chromosome GetChromosome(Dictionary<ushort, Chromosome> refIndexToChromosome, ushort referenceIndex)\n        {\n            if (!refIndexToChromosome.TryGetValue(referenceIndex, out Chromosome chromosome))\n            {\n                throw new InvalidDataException($\"Unable to find the reference index ({referenceIndex}) in the refIndexToChromosome dictionary.\");\n            }\n\n            return chromosome;\n        }\n\n        public static bool IsEmpty(this Chromosome chromosome) => chromosome.Index == ushort.MaxValue;\n    }\n}\n"
  },
  {
    "path": "Genome/SequenceUtilities.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.Linq;\r\n\r\nnamespace Genome\r\n{\r\n\tpublic static class SequenceUtilities\r\n\t{\r\n\t\tprivate static readonly char[] ReverseComplementLookupTable;\r\n\t\tprivate static readonly HashSet<char> CanonicalBases;\r\n\r\n\t\tstatic SequenceUtilities()\r\n\t\t{\r\n\t\t\t// initialize the reverse complement code\r\n\t\t\tconst string forwardBases = \"ABCDGHKMRTVYabcdghkmrtvy\";\r\n\t\t\tconst string reverseBases = \"TVGHCDMKYABRTVGHCDMKYABR\";\r\n\t\t\tReverseComplementLookupTable = new char[256];\r\n\r\n\t\t\tfor (var i = 0; i < 256; i++) ReverseComplementLookupTable[i] = 'N';\r\n\t\t\tfor (var i = 0; i < forwardBases.Length; i++)\r\n\t\t\t{\r\n\t\t\t\tReverseComplementLookupTable[forwardBases[i]] = reverseBases[i];\r\n\t\t\t}\r\n\r\n\t\t\tCanonicalBases = new HashSet<char> { 'A', 'C', 'G', 'T', '-' };\r\n\t\t}\r\n\r\n\t\t/// <summary>\r\n\t\t/// returns the reverse complement of the given bases\r\n\t\t/// </summary>\r\n\t\tpublic static string GetReverseComplement(string bases)\r\n\t\t{\r\n\t\t\t// sanity check\r\n\t\t\tif (bases == null) return null;\r\n\r\n\t\t\tint numBases = bases.Length;\r\n\t\t\tvar reverseChars = new char[numBases];\r\n\r\n\t\t\tfor (var i = 0; i < numBases; ++i)\r\n\t\t\t{\r\n\t\t\t\treverseChars[i] = ReverseComplementLookupTable[bases[numBases - i - 1]];\r\n\t\t\t}\r\n\r\n\t\t\treturn new string(reverseChars);\r\n\t\t}\r\n\r\n\t\t/// <summary>\r\n\t\t/// returns true if we have a base other than the 4 standard bases: A, C, G, and T\r\n\t\t/// </summary>\r\n\t\tpublic static bool HasNonCanonicalBase(string bases) => !string.IsNullOrEmpty(bases) && bases.Any(c => !CanonicalBases.Contains(c));\r\n    }\r\n}"
  },
  {
    "path": "IO/BufferedBinaryReader.cs",
    "content": "﻿using System;\r\nusing System.IO;\r\nusing System.Text;\r\n\r\nnamespace IO\r\n{\r\n    public sealed class BufferedBinaryReader : IBufferedBinaryReader\r\n    {\r\n        private const int BufferSize = 10_485_760;\r\n\r\n        private const int ShortLen  = 2;\r\n        private const int IntLen    = 4;\r\n\r\n        private readonly Stream _stream;\r\n        private readonly byte[] _buffer;\r\n\r\n        private bool _foundEof;\r\n\r\n        private int _bufferLen;\r\n        private int _bufferPos;\r\n\r\n        private readonly bool _leaveOpen;\r\n\r\n        public BufferedBinaryReader(Stream stream, bool leaveOpen = false, int bufferSize = BufferSize)\r\n        {\r\n            if (stream == null)  throw new ArgumentNullException(nameof(stream));\r\n            if (!stream.CanRead) throw new ArgumentException(\"A non-readable stream was supplied.\", nameof(stream));\r\n            if (bufferSize <= 0) throw new ArgumentOutOfRangeException(nameof(bufferSize));\r\n\r\n            _stream    = stream;\r\n            _buffer    = new byte[bufferSize];\r\n            _leaveOpen = leaveOpen;\r\n\r\n            FillBuffer();\r\n        }\r\n\r\n        private void FillBuffer()\r\n        {\r\n            int numRemainingBytes = _bufferLen - _bufferPos;\r\n            if (numRemainingBytes > 0) Buffer.BlockCopy(_buffer, _bufferPos, _buffer, 0, numRemainingBytes);\r\n\r\n            _bufferPos = 0;\r\n            _bufferLen = numRemainingBytes;\r\n\r\n            int numBytesRead = _stream.Read(_buffer, numRemainingBytes, _buffer.Length - numRemainingBytes);\r\n            _bufferLen       = numRemainingBytes + numBytesRead;\r\n\r\n            if (_bufferPos == 0 && _bufferLen == 0) _foundEof = true;\r\n        }\r\n\r\n        public string ReadAsciiString()\r\n        {\r\n            int numBytes = ReadOptInt32();\r\n            return numBytes == 0 ? null : Encoding.ASCII.GetString(ReadBytes(numBytes));\r\n        }\r\n\r\n        public bool ReadBoolean()\r\n        {\r\n            if (_bufferPos == _bufferLen) FillBuffer();\r\n            return _buffer[_bufferPos++] != 0;\r\n        }\r\n\r\n        public byte ReadByte()\r\n        {\r\n            if (_bufferPos == _bufferLen) FillBuffer();\r\n            return _buffer[_bufferPos++];\r\n        }\r\n\r\n        public byte[] ReadBytes(int numBytes)\r\n        {\r\n            if (numBytes == 1) return new[] { ReadByte() };\r\n\r\n            var values = new byte[numBytes];\r\n            Read(values, numBytes);\r\n            return values;\r\n        }\r\n\r\n        private void Read(byte[] buffer, int numBytes)\r\n        {\r\n            var offset            = 0;\r\n            int numBytesRemaining = numBytes;\r\n\r\n            while (numBytesRemaining > 0)\r\n            {\r\n                if (_bufferPos == _bufferLen)\r\n                {\r\n                    FillBuffer();\r\n                    if (_foundEof) break;\r\n                }\r\n\r\n                int numBytesAvailable = _bufferLen - _bufferPos;\r\n                int copyLength        = numBytesRemaining < numBytesAvailable ? numBytesRemaining : numBytesAvailable;\r\n\r\n                Buffer.BlockCopy(_buffer, _bufferPos, buffer, offset, copyLength);\r\n\r\n                offset            += copyLength;\r\n                _bufferPos        += copyLength;\r\n                numBytesRemaining -= copyLength;\r\n            }\r\n        }\r\n\r\n        public int ReadOptInt32()\r\n        {\r\n            if (_bufferPos > _bufferLen - 5) FillBuffer();\r\n\r\n            var count = 0;\r\n            var shift = 0;\r\n\r\n            while (shift != 35)\r\n            {\r\n                byte b = _buffer[_bufferPos++];\r\n                count |= (b & sbyte.MaxValue) << shift;\r\n                shift += 7;\r\n\r\n                if ((b & 128) == 0) return count;\r\n            }\r\n\r\n            throw new FormatException(\"Unable to read the 7-bit encoded integer\");\r\n        }\r\n\r\n        public ushort ReadOptUInt16()\r\n        {\r\n            if (_bufferPos > _bufferLen - 3) FillBuffer();\r\n\r\n            ushort count = 0;\r\n            var shift    = 0;\r\n\r\n            while (shift != 21)\r\n            {\r\n                byte b = ReadByte();\r\n                count |= (ushort)((b & sbyte.MaxValue) << shift);\r\n                shift += 7;\r\n\r\n                if ((b & 128) == 0) return count;\r\n            }\r\n\r\n            throw new FormatException(\"Unable to read the 7-bit encoded unsigned short\");\r\n        }\r\n\r\n        public unsafe ushort ReadUInt16()\r\n        {\r\n            if (_bufferPos > _bufferLen - ShortLen) FillBuffer();\r\n\r\n            ushort value;\r\n            fixed (byte* pBuffer = &_buffer[_bufferPos])\r\n            {\r\n                value = (ushort)(pBuffer[0] | pBuffer[1] << 8);\r\n                _bufferPos += ShortLen;\r\n            }\r\n\r\n            return value;\r\n        }\r\n\r\n        public unsafe uint ReadUInt32()\r\n        {\r\n            if (_bufferPos > _bufferLen - IntLen) FillBuffer();\r\n\r\n            uint value;\r\n            fixed (byte* pBuffer = &_buffer[_bufferPos])\r\n            {\r\n                value = (uint)(pBuffer[0] | pBuffer[1] << 8 | pBuffer[2] << 16 | pBuffer[3] << 24);\r\n                _bufferPos += IntLen;\r\n            }\r\n\r\n            return value;\r\n        }\r\n\r\n        public void Dispose()\r\n        {\r\n            if (!_leaveOpen) _stream?.Dispose();\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "IO/CacheConstants.cs",
    "content": "﻿namespace IO\n{\n    //todo: create cache utils project and move it there\n\tpublic static class CacheConstants\n\t{\n\t\tpublic const uint GuardInt     = 4041327495; // 87c3e1f0\n\t\tpublic const string Identifier = \"NirvanaDB\";\n\n\t\t// increment the schema version when the file structures are updated\n\t\t// N.B. we only need to regenerate unit tests when the schema version is incremented\n\t\t// e.g. adding a new feature like regulatory elements\n\t\tpublic const ushort SchemaVersion = 21;\n\n\t\t// increment the data version when the contents are updated\n\t\t// e.g. a bug is fixed in SIFT parsing or if transcripts are filtered differently\n\t\tpublic const ushort DataVersion = 27;\n\n\t\tpublic static string TranscriptPath(string prefix) => Combine(prefix, \".transcripts.ndb\");\n\t\tpublic static string SiftPath(string prefix)       => Combine(prefix, \".sift.ndb\");\n        public static string PolyPhenPath(string prefix)   => Combine(prefix, \".polyphen.ndb\");\n\t    public static string BasesPath(string prefix)      => Combine(prefix, \".bases\");\n\n        private static string Combine(string prefix, string suffix) => prefix == null ? null : prefix + suffix;\n\t}\n}"
  },
  {
    "path": "IO/ExtendedBinaryReader.cs",
    "content": "﻿using System;\nusing System.IO;\nusing System.Text;\n\nnamespace IO\n{\n\tpublic sealed class ExtendedBinaryReader : BinaryReader\n\t{\n\t\tpublic ExtendedBinaryReader(Stream s) : this(s, new UTF8Encoding()) { }\n\n\t\tpublic ExtendedBinaryReader(Stream input, Encoding encoding, bool leaveOpen = false)\n\t\t\t: base(input, encoding, leaveOpen) {}\n\n\n\t    /// <summary>\n\t    /// returns an unsigned short from the binary reader\n\t    /// </summary>\n\t    public ushort ReadOptUInt16()\n\t    {\n\t        ushort count = 0;\n\t        var shift = 0;\n\n\t        while (shift != 21)\n\t        {\n\t            byte b = ReadByte();\n\t            count |= (ushort)((b & sbyte.MaxValue) << shift);\n\t            shift += 7;\n\n\t            if ((b & 128) == 0) return count;\n\t        }\n\n\t        throw new FormatException(\"Unable to read the 7-bit encoded unsigned short\");\n\t    }\n\n        /// <summary>\n        /// returns an integer from the binary reader\n        /// </summary>\n        public int ReadOptInt32()\n\t\t{\n\t\t\tvar count = 0;\n\t\t\tvar shift = 0;\n\n\t\t\twhile (shift != 35)\n\t\t\t{\n\t\t\t\tbyte b = ReadByte();\n\t\t\t\tcount |= (b & sbyte.MaxValue) << shift;\n\t\t\t\tshift += 7;\n\n\t\t\t\tif ((b & 128) == 0) return count;\n\t\t\t}\n\n\t\t\tthrow new FormatException(\"Unable to read the 7-bit encoded integer\");\n\t\t}\n\n\t\t/// <summary>\n\t\t/// returns a long from the binary reader\n\t\t/// </summary>\n\t\tpublic long ReadOptInt64()\n\t\t{\n\t\t\tlong count = 0;\n\t\t\tvar shift = 0;\n\n\t\t\twhile (shift != 70)\n\t\t\t{\n\t\t\t\tbyte b = ReadByte();\n\t\t\t\tcount |= (long)(b & sbyte.MaxValue) << shift;\n\t\t\t\tshift += 7;\n\n\t\t\t\tif ((b & 128) == 0) return count;\n\t\t\t}\n\n\t\t\tthrow new FormatException(\"Unable to read the 7-bit encoded long\");\n\t\t}\n\n        /// <summary>\n\t\t/// returns an ASCII string from the binary reader\n\t\t/// </summary>\n\t\tpublic string ReadAsciiString()\n\t\t{\n\t\t\tint numBytes = ReadOptInt32();\n\n\t\t\t// grab the ASCII characters\n\t\t\t// ReSharper disable once AssignNullToNotNullAttribute\n\t\t\treturn numBytes == 0 ? null : Encoding.ASCII.GetString(ReadBytes(numBytes));\n\t\t}\n        \n\t}\n}"
  },
  {
    "path": "IO/ExtendedBinaryWriter.cs",
    "content": "﻿using System.IO;\nusing System.Text;\n\nnamespace IO\n{\n    public sealed class ExtendedBinaryWriter : BinaryWriter, IExtendedBinaryWriter\n    {\n        public ExtendedBinaryWriter(Stream output) : this(output, new UTF8Encoding(false, true)) { }\n\n        public ExtendedBinaryWriter(Stream output, Encoding encoding, bool leaveOpen = false)\n            : base(output, encoding, leaveOpen)\n        {\n        }\n\n        /// <summary>\n        /// writes an unsigned short to the binary writer\n        /// </summary>\n        public void WriteOpt(ushort value)\n        {\n            ushort num = value;\n\n            while (num >= 128U)\n            {\n                Write((byte)(num | 128U));\n                num >>= 7;\n            }\n\n            Write((byte)num);\n        }\n\n        /// <summary>\n        /// writes an integer to the binary writer\n        /// </summary>\n        public void WriteOpt(int value)\n        {\n            var num = (uint)value;\n\n            while (num >= 128U)\n            {\n                Write((byte)(num | 128U));\n                num >>= 7;\n            }\n\n            Write((byte)num);\n        }\n        \n        public void WriteOpt(uint value)\n        {\n            uint num = value;\n\n            while (num >= 128U)\n            {\n                Write((byte)(num | 128U));\n                num >>= 7;\n            }\n\n            Write((byte)num);\n        }\n\n        /// <summary>\n        /// writes a long to the binary writer\n        /// </summary>\n        public void WriteOpt(long value)\n        {\n            var num = (ulong)value;\n\n            while (num >= 128U)\n            {\n                Write((byte)(num | 128U));\n                num >>= 7;\n            }\n\n            Write((byte)num);\n        }\n\n        /// <summary>\n        /// writes an ASCII string to the binary writer\n        /// </summary>\n        public void WriteOptAscii(string s)\n        {\n            int numBytes = s?.Length ?? 0;\n            WriteOpt(numBytes);\n\n            // sanity check: handle null strings\n            if (s == null) return;\n\n            // write the ASCII bytes\n            Write(Encoding.ASCII.GetBytes(s));\n        }\n    }\n}"
  },
  {
    "path": "IO/FilePathUtilities.cs",
    "content": "using System;\r\n\r\nnamespace IO\r\n{\r\n    public static class StringExtensions\r\n    {\r\n        public static string TrimStartToLast(this string s, string value, bool includeSeparator = false)\r\n        {\r\n            int extPos = s.LastIndexOf(value, StringComparison.Ordinal);\r\n            if (extPos == -1) return s;\r\n            return includeSeparator ? s.Substring(extPos) : s.Substring(extPos + value.Length);\r\n        }\r\n\r\n        public static string TrimEndFromFirst(this string s, string value, bool includeSeparator = false)\r\n        {\r\n            int extPos = s.IndexOf(value, StringComparison.Ordinal);\r\n            if (extPos == -1) return s;\r\n            return includeSeparator ? s.Substring(0, extPos + value.Length) : s.Substring(0, extPos);\r\n        }\r\n\r\n        public static string GetFileSuffix(this string s, bool includeDot) => HttpUtilities.IsUrl(s)\r\n            ? s.TrimEndFromFirst(\"?\").TrimStartToLast(\".\", includeDot)\r\n            : s.TrimStartToLast(\".\", includeDot);\r\n    }\r\n}"
  },
  {
    "path": "IO/FileUtilities.cs",
    "content": "﻿using System.IO;\r\nusing System.Text;\r\n\r\nnamespace IO\r\n{\r\n    public static class FileUtilities\r\n    {\r\n        private const int StreamReaderBufferSize = 10_485_760;\r\n\r\n        public static FileStream GetReadStream(string path)   => new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read);\r\n        public static FileStream GetCreateStream(string path) => new FileStream(path, FileMode.Create);\r\n\r\n        public static StreamReader GetStreamReader(Stream stream, bool leaveOpen = false) =>\r\n            new StreamReader(stream, Encoding.Default, true, StreamReaderBufferSize, leaveOpen);\r\n\r\n        public static string[] GetFileNamesInDir(string directory, string pattern = null)\r\n        {\r\n            if (!Directory.Exists(directory))\r\n            {\r\n                throw new FileNotFoundException($\"{directory} doesn't exist.\");\r\n            }\r\n            return pattern == null ?  Directory.GetFiles(directory) : Directory.GetFiles(directory, pattern);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "IO/HttpUtilities.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.Globalization;\r\nusing System.Net;\r\nusing System.Xml.Linq;\r\nusing System.Linq;\r\nusing System.Threading;\r\nusing ErrorHandling.Exceptions;\r\n\r\nnamespace IO\r\n{\r\n    public static class HttpUtilities\r\n    {\r\n        private static readonly string[] AuthenticationErrorCodes   = { \"InvalidAccessKeyId\", \"SignatureDoesNotMatch\" };\r\n        private static readonly string[] ResourceNotExistErrorCodes = { \"NoSuchKey\", \"NoSuchBucket\" };\r\n        private const           int      MaxRetryCount              = 10;\r\n        \r\n        public static long GetLength(string url)\r\n        {\r\n            var response       = TryGetResponse(url);\r\n            long contentLength = response.ContentLength;\r\n            response.Close();\r\n            return contentLength;\r\n        }\r\n\r\n        // Only throw exceptions when all the three tries failed.\r\n        private static HttpWebResponse TryGetResponse(string url)\r\n        {\r\n            var exceptions = new List<Exception>();\r\n\r\n            for (var retryCounter = 0; retryCounter < MaxRetryCount; retryCounter++)\r\n            {\r\n                try\r\n                {\r\n                    if (retryCounter > 0)\r\n                    {\r\n                        Console.WriteLine($\"Attempt {retryCounter+1} to get response from {url}\");\r\n                        Thread.Sleep(2_000);\r\n                    }\r\n\r\n                    var request = (HttpWebRequest) WebRequest.Create(url);\r\n                    if (retryCounter > 0)\r\n                    {\r\n                        Console.WriteLine($\"Succeeded at attempt#: {retryCounter+1}\");\r\n                    }\r\n\r\n                    return (HttpWebResponse) request.GetResponse();\r\n                }\r\n                catch (Exception e)\r\n                {\r\n                    Logger.WriteLine($\"TryGetResponse exception found when connecting to {url}\");\r\n                    Logger.Log(e);\r\n                    exceptions.Add(ProcessHttpRequestWebProtocolErrorException(e, url));\r\n                }\r\n            }\r\n\r\n            throw new AggregateException(exceptions);\r\n        }\r\n\r\n        public static void ValidateUrl(string url, bool isUserProvided = true)\r\n        {\r\n            try\r\n            {\r\n                var response = TryGetResponse(url);\r\n                response.Close();\r\n            }\r\n            catch (Exception)\r\n            {\r\n                if (isUserProvided) throw new UserErrorException($\"Unable to validate the URL for {UrlUtilities.GetFileName(url)}\");\r\n                throw new DeploymentErrorException($\"Deployment issue detected. Unable to validate the URL for {url}.\");\r\n            }\r\n        }\r\n\r\n        public static bool IsWebProtocolErrorException(Exception exception)\r\n        {\r\n            if (exception is not WebException webException) return false;\r\n\r\n            return webException.Status == WebExceptionStatus.ProtocolError;\r\n        }\r\n\r\n        public static Exception ProcessHttpRequestWebProtocolErrorException(Exception exception, string url)\r\n        {\r\n            if (!IsWebProtocolErrorException(exception)) return exception;\r\n\r\n            string urlPath = UrlUtilities.GetPath(url);\r\n\r\n            var webException = (WebException)exception;\r\n            (string errorCode, string errorMessage) = GetWebExceptionMessage(webException);\r\n\r\n            // Expired URL is always a user error\r\n            if (errorMessage == \"Request has expired\") return new UserErrorException($\"The provided URL for {urlPath} has expired.\");\r\n\r\n            // Authentication error is always considered as a user error\r\n            if (AuthenticationErrorCodes.Contains(errorCode)) return new UserErrorException($\"Authentication error while reading from URL for {urlPath}.\");\r\n\r\n            // Resource not exist error is always considered as a user error\r\n            if (ResourceNotExistErrorCodes.Contains(errorCode)) return new UserErrorException($\"An invalid URL for {urlPath} was specified.\");\r\n\r\n            // Sometimes it is difficult to figure out whether the error is caused by the user or not.\r\n            // For example, the AccessDenied error code could be triggered by either incorrect credentials provided by the user, or network congestion while reading from S3.\r\n            // Therefore, such errors are treated as general exceptions.\r\n            // And we don't pass through the general error to end user to avoid possible confusion.\r\n            Logger.WriteLine($\"The following error occurred while reading from {url}: {errorMessage}. Exception: {exception.Message}\");\r\n            return new WebException($\"An error occurred while reading from the URL for {urlPath} ({exception.GetType()})\");\r\n        }\r\n\r\n        private static (string Code, string Message) GetWebExceptionMessage(WebException exception)\r\n        {\r\n            using (var stream = exception.Response.GetResponseStream())\r\n            {\r\n                if (stream == null) return (null, null);\r\n\r\n                var xElement = XElement.Load(stream);\r\n                return (xElement.Element(\"Code\")?.Value, xElement.Element(\"Message\")?.Value);\r\n            }\r\n        }\r\n\r\n        public static bool IsUrl(string path) => path.StartsWith(\"http\", true, CultureInfo.InvariantCulture);\r\n    }\r\n}\r\n"
  },
  {
    "path": "IO/IBufferedBinaryReader.cs",
    "content": "﻿using System;\r\n\r\nnamespace IO\r\n{\r\n    public interface IBufferedBinaryReader : IDisposable\r\n    {\r\n        string ReadAsciiString();\r\n        bool ReadBoolean();\r\n        byte ReadByte();\r\n        int ReadOptInt32();\r\n        ushort ReadOptUInt16();\r\n        uint ReadUInt32();\r\n    }\r\n}\r\n"
  },
  {
    "path": "IO/IConnect.cs",
    "content": "﻿using System.IO;\nusing System.Net;\n\nnamespace IO\n{\n    public interface IConnect\n    {\n        (HttpWebResponse Response, Stream Stream) Connect(long position);\n    }\n}\n"
  },
  {
    "path": "IO/IExtendedBinaryWriter.cs",
    "content": "﻿namespace IO\n{\n\tpublic interface IExtendedBinaryWriter\n\t{\n\t\tvoid Write(bool b);\n\t\tvoid Write(byte b);\n        void Write(byte[] buffer);\n        void Write(ushort value);\n        void Write(uint value);\n\t    void WriteOpt(ushort value);\n\t\tvoid WriteOpt(int value);\n\t\tvoid WriteOpt(long value);\n\t\tvoid WriteOptAscii(string s);\n\t}\n}"
  },
  {
    "path": "IO/IO.csproj",
    "content": "﻿<Project Sdk=\"Microsoft.NET.Sdk\">\r\n  <PropertyGroup>\r\n    <TargetFramework>net6.0</TargetFramework>\r\n    <OutputPath>..\\bin\\$(Configuration)</OutputPath>\r\n\r\n    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>\r\n  </PropertyGroup>\r\n  <Import Project=\"..\\CommonAssemblyInfo.props\" />\r\n  <ItemGroup>\r\n    <PackageReference Include=\"AWSSDK.Core\" Version=\"3.7.8.5\" />\r\n    <PackageReference Include=\"AWSSDK.S3\" Version=\"3.7.8.3\" />\r\n  </ItemGroup>\r\n  <ItemGroup>\r\n    <ProjectReference Include=\"..\\ErrorHandling\\ErrorHandling.csproj\" />\r\n  </ItemGroup>\r\n</Project>\r\n"
  },
  {
    "path": "IO/IS3Client.cs",
    "content": "﻿using System.Threading.Tasks;\r\nusing Amazon.S3.Model;\r\n\r\nnamespace IO\r\n{\r\n    public interface IS3Client\r\n    {\r\n        Task<GetObjectResponse> GetObjectAsync(GetObjectRequest getRequest);\r\n        Task<PutObjectResponse> PutObjectAsync(PutObjectRequest putRequest);\r\n    }\r\n}"
  },
  {
    "path": "IO/ISerializable.cs",
    "content": "﻿namespace IO\n{\n\tpublic interface ISerializable\n\t{\n\t\tvoid Write(IExtendedBinaryWriter writer);\n\t}\n}"
  },
  {
    "path": "IO/LengthStream.cs",
    "content": "﻿using System;\r\nusing System.IO;\r\n\r\nnamespace IO\r\n{\r\n    /// <inheritdoc />\r\n    /// <summary>\r\n    /// The S3 PutObjectRequest object requires an input stream that supports length and position.\r\n    /// Neither of these are typically available from the CryptoStream\r\n    /// </summary>\r\n    public sealed class LengthStream : Stream\r\n    {\r\n        private readonly Stream _stream;\r\n        private long _position;\r\n\r\n        public LengthStream(Stream stream, long length)\r\n        {\r\n            _stream = stream;\r\n            Length  = length;\r\n        }\r\n\r\n        public override int Read(byte[] buffer, int offset, int count)\r\n        {\r\n            _position += count;\r\n            return _stream.Read(buffer, offset, count);\r\n        }\r\n\r\n        public override long Position\r\n        {\r\n            get => _position;\r\n            set => throw new NotSupportedException();\r\n        }\r\n\r\n        public override void Flush()                                     => throw new NotSupportedException();\r\n        public override long Seek(long offset, SeekOrigin origin)        => throw new NotSupportedException();\r\n        public override void SetLength(long value)                       => throw new NotSupportedException();\r\n        public override void Write(byte[] buffer, int offset, int count) => throw new NotSupportedException();\r\n        public override bool CanRead  => _stream.CanRead;\r\n        public override bool CanSeek  => _stream.CanSeek;\r\n        public override bool CanWrite => _stream.CanWrite;\r\n        public override long Length { get; }\r\n    }\r\n}\r\n"
  },
  {
    "path": "IO/Logger.cs",
    "content": "﻿using System;\r\nusing System.Text;\r\n\r\nnamespace IO\r\n{\r\n    public static class Logger\r\n    {\r\n        // can be redirected to any logger\r\n        public static Action<string> WriteLine { get; set; }\r\n        public static Action<string> Write     { get; set; }\r\n        \r\n        public const string Url = \"Url\";\r\n\r\n        static Logger()\r\n        {\r\n            WriteLine = Console.WriteLine;\r\n            Write     = Console.Write;\r\n        }\r\n\r\n        public static void SetBold()    => Console.ForegroundColor = ConsoleColor.Yellow;\r\n        public static void ResetColor() => Console.ResetColor();\r\n\r\n        public static void Silence()\r\n        {\r\n            WriteLine = s => { };\r\n            Write     = s => { };\r\n        }\r\n\r\n        public static void Log(Exception e)\r\n        {\r\n            var sb   = new StringBuilder();\r\n            var line = new string('-', 80);\r\n            sb.AppendLine(line);\r\n\r\n            const string vcfLine = \"VcfLine\";\r\n            const string errorLine = \"Line\";\r\n\r\n            while (e != null)\r\n            {\r\n                sb.AppendLine($\"{e.GetType()}: {e.Message}\");\r\n                sb.AppendLine($\"Stack trace: {e.StackTrace}\");\r\n                if (e.Data.Contains(vcfLine)) sb.AppendLine($\"VCF line: {e.Data[vcfLine]}\");\r\n                if (e.Data.Contains(errorLine)) sb.AppendLine($\"Line: {e.Data[errorLine]}\");\r\n                if (e.Data.Contains(Url)) sb.AppendLine($\"URL: {e.Data[Url]}\");\r\n\r\n                sb.AppendLine(errorLine);\r\n                e = e.InnerException;\r\n            }\r\n\r\n            WriteLine(sb.ToString());\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "IO/MD5Stream.cs",
    "content": "﻿using System;\r\nusing System.IO;\r\nusing System.Security.Cryptography;\r\n\r\nnamespace IO\r\n{\r\n    public sealed class MD5Stream : Stream\r\n    {\r\n        private readonly Stream _stream;\r\n        private readonly IncrementalHash _md5 = IncrementalHash.CreateHash(HashAlgorithmName.MD5);\r\n        private FileMetadata _metadata;\r\n        private long _length;\r\n\r\n        /// <inheritdoc />\r\n        public MD5Stream(Stream stream) => _stream = stream;\r\n\r\n        public override void Write(byte[] buffer, int offset, int count)\r\n        {\r\n            _stream.Write(buffer, offset, count);\r\n            _md5.AppendData(buffer, offset, count);\r\n            _length += count;\r\n        }\r\n\r\n        public FileMetadata GetFileMetadata()\r\n        {\r\n            if (_metadata != null) return _metadata;\r\n            _metadata = new FileMetadata(_md5.GetHashAndReset(), _length);\r\n            return _metadata;\r\n        }\r\n\r\n        public override long Position\r\n        {\r\n            get => _length;\r\n            set => throw new NotSupportedException();\r\n        }\r\n\r\n        public override void Flush() => _stream.Flush();\r\n\r\n        public override int Read(byte[] buffer, int offset, int count) => throw new NotSupportedException();\r\n        public override long Seek(long offset, SeekOrigin origin)      => throw new NotSupportedException();\r\n        public override void SetLength(long value)                     => throw new NotSupportedException();\r\n        public override bool CanRead  => _stream.CanRead;\r\n        public override bool CanSeek  => _stream.CanSeek;\r\n        public override bool CanWrite => _stream.CanWrite;\r\n        public override long Length   => _stream.Length;\r\n    }\r\n\r\n    public sealed class FileMetadata\r\n    {\r\n        public byte[] MD5 { get; }\r\n        public long Length { get; }\r\n\r\n        public FileMetadata(byte[] md5, long length)\r\n        {\r\n            MD5    = md5;\r\n            Length = length;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "IO/PersistentConnect.cs",
    "content": "﻿using System;\nusing System.IO;\nusing System.Net;\n\nnamespace IO\n{\n    public sealed class PersistentConnect : IConnect\n    {\n        private readonly string _url;\n\n        public PersistentConnect(string url) => _url = url;\n\n        public (HttpWebResponse Response, Stream Stream) Connect(long position)\n        {\n            if (position < 0) throw new ArgumentOutOfRangeException(nameof(position));\n\n            try\n            {\n                var request = WebRequest.CreateHttp(_url);\n                request.AddRange(position);\n                request.Timeout                             = 10_000;\n                request.ReadWriteTimeout                    = 15_000;\n                var response = (HttpWebResponse)request.GetResponse();\n                var stream   = response.GetResponseStream();\n                return (response, stream);\n            }\n            catch (Exception e)\n            {\n                e.Data[Logger.Url] = _url;\n                throw;\n            }\n        }\n    }\n}"
  },
  {
    "path": "IO/PersistentStream.cs",
    "content": "using System;\r\nusing System.IO;\r\nusing System.Net;\r\nusing System.Runtime.CompilerServices;\r\nusing System.Threading;\r\n\r\nnamespace IO\r\n{\r\n    public sealed class PersistentStream : Stream\r\n    {\r\n        private readonly IConnect _connect;\r\n        private HttpWebResponse _response;\r\n        private Stream _stream;\r\n        private long _position;\r\n        \r\n        private const int MaxRetryAttempts     = 5;\r\n        private const int NumRetryMilliseconds = 2_000;\r\n\r\n        public override bool CanRead                                     => _stream.CanRead;\r\n        public override bool CanSeek                                     => _stream.CanSeek;\r\n        public override bool CanWrite                                    => _stream.CanWrite;\r\n        public override long Length                                      => _stream.Length;\r\n        public override void Flush()                                     => _stream.Flush();\r\n        public override long Seek(long offset, SeekOrigin origin)        => _stream.Seek(offset, origin);\r\n        public override void SetLength(long value)                       => _stream.SetLength(value);\r\n        public override void Write(byte[] buffer, int offset, int count) => _stream.Write(buffer, offset, count);\r\n\r\n        public override long Position\r\n        {\r\n            get => _position;\r\n            set\r\n            {\r\n                Disconnect();\r\n                ConnectWithRetries(value);\r\n                _position = value;\r\n            }\r\n        }\r\n\r\n        public PersistentStream(IConnect connect, long position)\r\n        {\r\n            _position = position;\r\n            _connect  = connect;\r\n            ConnectWithRetries(_position);\r\n        }\r\n\r\n        private void ConnectWithRetries(long position)\r\n        {\r\n            if (position < 0) throw new ArgumentOutOfRangeException(nameof(position));\r\n\r\n            var keepTrying = true;\r\n            var numRetries = 0;\r\n\r\n            while (keepTrying)\r\n            {\r\n                try\r\n                {\r\n                    (_response, _stream) = _connect.Connect(position);\r\n                    keepTrying = false;\r\n                }\r\n                catch (Exception e)\r\n                {\r\n                    Log(MethodName(), e);\r\n                    if (numRetries == MaxRetryAttempts) throw;\r\n\r\n                    Disconnect();\r\n                    Thread.Sleep(NumRetryMilliseconds);\r\n                    numRetries++;\r\n                }\r\n            }\r\n        }\r\n\r\n        private void Disconnect()\r\n        {\r\n            _response?.Dispose();\r\n            _stream?.Dispose();\r\n        }\r\n        public override int Read(byte[] buffer, int offset, int count)\r\n        {\r\n            var numBytesRead = 0;\r\n            \r\n            while (count > 0)\r\n            {\r\n                int cnt = PersistentRead(buffer, offset, count);\r\n                if (cnt == 0) return numBytesRead;\r\n\r\n                offset       += cnt;\r\n                numBytesRead += cnt;\r\n                _position    += cnt;\r\n                count        -= cnt;\r\n            }\r\n\r\n            return numBytesRead;\r\n        }\r\n\r\n        private int PersistentRead(byte[] buffer, int offset, int count)\r\n        {\r\n            var keepTrying   = true;\r\n            var numRetries   = 0;\r\n            var numBytesRead = 0;\r\n\r\n            while (keepTrying)\r\n            {\r\n                try\r\n                {\r\n                    numBytesRead = _stream.Read(buffer, offset, count); \r\n                    keepTrying = false;\r\n                }\r\n                catch (Exception e)\r\n                {\r\n                    Log(MethodName(), e);\r\n                    if (numRetries == MaxRetryAttempts) throw;\r\n\r\n                    Disconnect();\r\n                    Thread.Sleep(NumRetryMilliseconds);\r\n                    ConnectWithRetries(_position);\r\n                    numRetries++;                    \r\n                }\r\n            }\r\n\r\n            return numBytesRead;\r\n        }\r\n\r\n        private static void Log(string methodName, Exception e)\r\n        {\r\n            Logger.WriteLine($\"Retrying exception found in {methodName}\");\r\n            Logger.Log(e);\r\n        }\r\n\r\n        private static string MethodName([CallerMemberName] string caller = null) => caller;\r\n\r\n        protected override void Dispose(bool disposing)\r\n        {\r\n            try\r\n            {\r\n                if (disposing) Disconnect();\r\n\r\n                _response = null;\r\n                _stream   = null;\r\n            }\r\n            finally\r\n            {\r\n                base.Dispose(disposing);\r\n            }\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "IO/PersistentStreamUtils.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.IO;\nusing System.Linq;\n\nnamespace IO\n{\n    public static class PersistentStreamUtils\n    {\n        public static Stream GetReadStream(string urlOrPath, long position = 0)\n        {\n            if (string.IsNullOrEmpty(urlOrPath)) return null;\n\n            if (!HttpUtilities.IsUrl(urlOrPath))\n                return File.Exists(urlOrPath) ? FileUtilities.GetReadStream(urlOrPath) : null;\n\n            return new PersistentStream(new PersistentConnect(urlOrPath), position);\n        }\n\n        public static List<Stream> GetStreams(List<string> locations)\n        {\n            if (locations == null) return null;\n\n            var streams = new List<Stream>(locations.Count);\n            streams.AddRange(locations.Select(urlOrPath => GetReadStream(urlOrPath)));\n            return streams;\n        }\n    }\n}"
  },
  {
    "path": "IO/SpanBufferBinaryReader.cs",
    "content": "﻿using System;\nusing System.Runtime.CompilerServices;\nusing System.Runtime.InteropServices;\nusing System.Text;\n\nnamespace IO\n{\n    public static class SpanBufferBinaryReader\n    {\n        private const int MostSignificantBit = 128;\n        private const int VlqBitShift        = 7;\n\n        public static ushort ReadOptUInt16(ref ReadOnlySpan<byte> byteSpan)\n        {\n            ushort value = 0;\n            var    shift = 0;\n            var    index = 0;\n\n            while (shift != 21)\n            {\n                byte b = byteSpan[index++];\n                value |= (ushort)((b & sbyte.MaxValue) << shift);\n                shift += VlqBitShift;\n\n                // ReSharper disable once InvertIf\n                if ((b & MostSignificantBit) == 0)\n                {\n                    byteSpan = byteSpan.Slice(index);\n                    return value;\n                }\n            }\n\n            throw new FormatException(\"Unable to read the 7-bit encoded unsigned short\");\n        }\n        \n        public static int ReadOptInt32(ref ReadOnlySpan<byte> byteSpan)\n        {\n            var value = 0;\n            var shift = 0;\n            var index = 0;\n\n            while (shift != 35)\n            {\n                byte b = byteSpan[index++];\n                value |= (b & sbyte.MaxValue) << shift;\n                shift += VlqBitShift;\n\n                // ReSharper disable once InvertIf\n                if ((b & MostSignificantBit) == 0)\n                {\n                    byteSpan = byteSpan.Slice(index);\n                    return value;\n                }\n            }\n\n            throw new FormatException(\"Unable to read the 7-bit encoded integer\");\n        }\n        \n        public static uint ReadOptUInt32(ref ReadOnlySpan<byte> byteSpan)\n        {\n            uint value = 0;\n            var  shift = 0;\n            var  index = 0;\n\n            while (shift != 35)\n            {\n                byte b = byteSpan[index++];\n                value |= (uint)((b & sbyte.MaxValue) << shift);\n                shift += VlqBitShift;\n\n                // ReSharper disable once InvertIf\n                if ((b & MostSignificantBit) == 0)\n                {\n                    byteSpan = byteSpan.Slice(index);\n                    return value;\n                }\n            }\n\n            throw new FormatException(\"Unable to read the 7-bit encoded unsigned integer\");\n        }\n\n        public static long ReadOptInt64(ref ReadOnlySpan<byte> byteSpan)\n        {\n            long value = 0;\n            var  shift = 0;\n            var  index = 0;\n\n            while (shift != 70)\n            {\n                byte b = byteSpan[index++];\n                value |= (long) (b & sbyte.MaxValue) << shift;\n                shift += VlqBitShift;\n\n                // ReSharper disable once InvertIf\n                if ((b & MostSignificantBit) == 0)\n                {\n                    byteSpan = byteSpan.Slice(index);\n                    return value;\n                }\n            }\n\n            throw new FormatException(\"Unable to read the 7-bit encoded long\");\n        }\n\n        public static ulong ReadOptUInt64(ref ReadOnlySpan<byte> byteSpan)\n        {\n            ulong value = 0;\n            var   shift = 0;\n            var   index = 0;\n\n            while (shift != 70)\n            {\n                byte b = byteSpan[index++];\n                value |= (ulong) (b & sbyte.MaxValue) << shift;\n                shift += VlqBitShift;\n\n                // ReSharper disable once InvertIf\n                if ((b & MostSignificantBit) == 0)\n                {\n                    byteSpan = byteSpan.Slice(index);\n                    return value;\n                }\n            }\n\n            throw new FormatException(\"Unable to read the 7-bit encoded ulong\");\n        }\n\n        public static string ReadUtf8String(ref ReadOnlySpan<byte> byteSpan)\n        {\n            int numBytes = ReadOptInt32(ref byteSpan);\n            if (numBytes == 0) return string.Empty;\n\n            string value = Encoding.UTF8.GetString(byteSpan[..numBytes]);\n            byteSpan = byteSpan.Slice(numBytes);\n            \n            return value;\n        }\n        \n        public static string ReadAsciiString(ref ReadOnlySpan<byte> byteSpan)\n        {\n            int numBytes = ReadOptInt32(ref byteSpan);\n            if (numBytes == 0) return string.Empty;\n\n            string value = Encoding.ASCII.GetString(byteSpan[..numBytes]);\n            byteSpan = byteSpan.Slice(numBytes);\n            \n            return value;\n        }\n\n        [MethodImpl(MethodImplOptions.AggressiveInlining)]\n        public static void SkipString(ref ReadOnlySpan<byte> byteSpan)\n        {\n            int numBytes = ReadOptInt32(ref byteSpan);\n            if (numBytes == 0) return;\n            byteSpan = byteSpan.Slice(numBytes);\n        }\n\n        [MethodImpl(MethodImplOptions.AggressiveInlining)]\n        public static byte ReadByte(ref ReadOnlySpan<byte> byteSpan)\n        {\n            byte value = byteSpan[0];\n            byteSpan = byteSpan.Slice(1);\n            return value;\n        }\n\n        [MethodImpl(MethodImplOptions.AggressiveInlining)]\n        public static ReadOnlySpan<byte> ReadBytes(ref ReadOnlySpan<byte> byteSpan, int numBytes)\n        {\n            ReadOnlySpan<byte> value = byteSpan[..numBytes];\n            byteSpan = byteSpan.Slice(numBytes);\n            return value;\n        }\n\n        [MethodImpl(MethodImplOptions.AggressiveInlining)]\n        public static int ReadInt32(ref ReadOnlySpan<byte> byteSpan)\n        {\n            var value = MemoryMarshal.Read<int>(byteSpan);\n            byteSpan = byteSpan.Slice(4);\n            return value;\n        }\n\n        [MethodImpl(MethodImplOptions.AggressiveInlining)]\n        public static ulong ReadUInt64(ref ReadOnlySpan<byte> byteSpan)\n        {\n            var value = MemoryMarshal.Read<ulong>(byteSpan);\n            byteSpan = byteSpan.Slice(8);\n            return value;\n        }\n\n        [MethodImpl(MethodImplOptions.AggressiveInlining)]\n        public static ushort ReadUInt16(ref ReadOnlySpan<byte> byteSpan)\n        {\n            var value = MemoryMarshal.Read<ushort>(byteSpan);\n            byteSpan = byteSpan.Slice(2);\n            return value;\n        }\n    }\n}"
  },
  {
    "path": "IO/UrlUtilities.cs",
    "content": "﻿using System;\r\nusing System.IO;\r\n\r\nnamespace IO\r\n{\r\n    public static class UrlUtilities\r\n    {\r\n        private const char UriSeparator = '/';\r\n\r\n        public static string GetPath(string url)     => new Uri(url).LocalPath.TrimStart(UriSeparator);\r\n        public static string GetFileName(string url) => Path.GetFileName(GetPath(url));\r\n\r\n        public static string UrlCombine(this string prefix, string suffix) =>\r\n            prefix.TrimEnd(UriSeparator) + UriSeparator + suffix.TrimStart(UriSeparator);\r\n    }\r\n}\r\n"
  },
  {
    "path": "IO/v2/FileType.cs",
    "content": "﻿namespace IO.v2\n{\n    public enum FileType : ushort\n    {\n        // reference\n        Reference = 1,\n\n        // cache\n        GeneSymbol        = 1000,\n        Gene              = 1100,\n        EnsemblTranscript = 1200,\n        RefseqTranscript  = 1300,\n        SIFT              = 1400,\n        PolyPhen          = 1500,\n\n        // supplementary annotation\n        SaVariants     = 2000,\n        SaIntervals    = 3000,\n        SaGenes        = 4000,\n        FusionCatcher  = 4100,\n        GeneFusionJson = 4101,\n        PhyloP         = 5000,\n        GsaWriter      = 6000,\n        GsaIndex       = 6500,\n    }\n}"
  },
  {
    "path": "IO/v2/Header.cs",
    "content": "﻿using System.IO;\n\nnamespace IO.v2\n{\n    /// <summary>\n    /// Common header for all our Nirvana file formats \n    /// </summary>\n    public sealed record Header(FileType FileType, ushort FileFormatVersion)\n    {\n        // see http://www.libpng.org/pub/png/spec/1.2/PNG-Rationale.html#R.PNG-file-signature\n\n        // decimal            137  78  73  82  13  10   26  10\n        // hexadecimal         89  4E  49  52  0d  0a   1a  0a\n        // ASCII C notation  \\211   N   I   R  \\r  \\n \\032  \\n\n        private const ulong NirvanaSignature = 727905342105144969;\n        public const  uint  NirvanaFooter    = 4283582798; // N I R 0xFF\n\n        public static Header Read(BinaryReader reader)\n        {\n            ulong  signature         = reader.ReadUInt64();\n            var    fileType          = (FileType) reader.ReadUInt16();\n            ushort fileFormatVersion = reader.ReadUInt16();\n\n            if (signature != NirvanaSignature)\n                throw new InvalidDataException(\"Invalid Nirvana file signature. Is this the correct file?\");\n\n            return new Header(fileType, fileFormatVersion);\n        }\n\n        public void Write(BinaryWriter writer)\n        {\n            writer.Write(NirvanaSignature);\n            writer.Write((ushort) FileType);\n            writer.Write(FileFormatVersion);\n        }\n    }\n}"
  },
  {
    "path": "Intervals/Extensions.cs",
    "content": "﻿namespace Intervals\n{\n    public static class Extensions\n    {\n        /// <summary>\n        /// interval 2 is overlapped with interval 1 +/- flanking length\n        /// </summary>\n        public static bool Overlaps(this IInterval interval1, IInterval interval2, int flankingLength = 0) =>\n            Utilities.Overlaps(interval1.Start - flankingLength, interval1.End + flankingLength,\n                interval2.Start, interval2.End);\n\n        public static bool Overlaps(this IInterval interval, int start, int end) => Utilities.Overlaps(\n            interval.Start, interval.End, start, end);\n\n        public static bool Contains(this IInterval interval1, IInterval interval2) => Utilities.Contains(\n            interval1.Start, interval1.End, interval2.Start, interval2.End);\n\n        public static Interval Intersects(this IInterval interval1, IInterval interval2)\n        {\n            (int start, int end) = Utilities.Intersects(interval1.Start, interval1.End, interval2.Start, interval2.End);\n            return new Interval(start, end);\n        }\n    }\n}"
  },
  {
    "path": "Intervals/IInterval.cs",
    "content": "﻿namespace Intervals\n{\n    public interface IInterval\n    {\n        int Start { get; }\n        int End { get; }\n    }\n}\n"
  },
  {
    "path": "Intervals/IIntervalForest.cs",
    "content": "﻿namespace Intervals\n{\n\tpublic interface IIntervalForest<out T>\n\t{\n\t\tbool OverlapsAny(ushort refIndex, int begin, int end);\n\t\tT[] GetAllOverlappingValues(ushort refIndex, int begin, int end);\n\t}\n}"
  },
  {
    "path": "Intervals/IIntervalSearch.cs",
    "content": "﻿namespace Intervals\n{\n\tpublic interface IIntervalSearch<out T>\n\t{\n        T[] GetAllOverlappingValues(int begin, int end);\n\t}\n\n\tpublic struct Interval<T>\n\t{\n\t\tpublic readonly int Begin;\n\t\tpublic readonly int End;\n\t\tpublic readonly T Value;\n\t\tpublic int Max;\n\n\t\tpublic Interval(int begin, int end, T value)\n\t\t{\n\t\t\tBegin = begin;\n\t\t\tEnd   = end;\n\t\t\tValue = value;\n\t\t\tMax   = -1;\n\t\t}\n\n\t\t/// <summary>\n\t\t/// our compare function\n\t\t/// </summary>\n\t\tpublic int CompareMax(int position)\n\t\t{\n\t\t\tif (position < Max) return -1;\n\t\t\treturn position > Max ? 1 : 0;\n\t\t}\n\n\t\t/// <summary>\n\t\t/// returns true if this interval overlaps with the specified interval\n\t\t/// </summary>\n\t\tpublic bool Overlaps(int intervalBegin, int intervalEnd)\n\t\t{\n\t\t\treturn End >= intervalBegin && Begin <= intervalEnd;\n\t\t}\n    }\n}"
  },
  {
    "path": "Intervals/Interval.cs",
    "content": "﻿namespace Intervals\n{\n    public struct Interval : IInterval\n    {\n        public int Start { get; }\n        public int End { get; }\n\n        public Interval(int start, int end)\n        {\n            Start = start;\n            End   = end;\n        }\n        \n    }\n}"
  },
  {
    "path": "Intervals/IntervalArray.cs",
    "content": "﻿using System.Collections.Generic;\n\nnamespace Intervals\n{\n\tpublic sealed class IntervalArray<T> : IIntervalSearch<T>\n    {\n\t\tpublic readonly Interval<T>[] Array;\n\n\t\tpublic IntervalArray(Interval<T>[] array)\n\t\t{\n\t\t\tArray = array;\n\t\t\tSetMaxIntervals();\n\t\t}\n\n\t\t/// <summary>\n\t\t/// returns true if there are any overlapping intervals in the specified region\n\t\t/// </summary>\n\t\tpublic bool OverlapsAny(int begin, int end) => GetFirstIndexAny(begin, end) >= 0;\n\n\t\t/// <summary>\n\t\t/// returns values for all intervals that overlap the specified interval\n\t\t/// </summary>\n\t\tpublic T[] GetAllOverlappingValues(int begin, int end)\n\t\t{\n\t\t\tint firstIndex = GetFirstIndex(begin, end);\n\t\t\treturn firstIndex == -1 ? null : AddOverlappingValues(firstIndex, begin, end);\n\t\t}\n\n        public Interval<T>[] GetAllOverlappingIntervals(int begin, int end)\n        {\n            var intervals = new List<Interval<T>>();\n            int firstIndex = GetFirstIndex(begin, end);\n            if (firstIndex == -1) return null;\n            for (int index = firstIndex; index < Array.Length; index++)\n            {\n                Interval<T> interval = Array[index];\n                if (interval.Begin > end) break;\n                if (interval.Overlaps(begin, end)) intervals.Add(interval);\n            }\n\n            return intervals.ToArray();\n        }\n\n        /// <summary>\n\t\t/// adds the overlapping values for all intervals overlapping the specified interval\n\t\t/// </summary>\n\t\tprivate T[] AddOverlappingValues(int firstIndex, int begin, int end)\n\t\t{\n\t\t\tvar values = new List<T>();\n\t\t\tfor (int index = firstIndex; index < Array.Length; index++)\n\t\t\t{\n\t\t\t\tvar interval = Array[index];\n\t\t\t\tif (interval.Begin > end) break;\n\t\t\t\tif (interval.Overlaps(begin, end)) values.Add(interval.Value);\n\t\t\t}\n\t\t\treturn values.ToArray();\n\t\t}\n\n\t\t/// <summary>\n\t\t/// finds the first index that overlaps on the interval [begin, max)\n\t\t/// </summary>\n\t\tprivate int GetFirstIndex(int intervalBegin, int intervalEnd)\n\t\t{\n\t\t\tvar begin = 0;\n\t\t\tint end = Array.Length - 1;\n\n\t\t\tvar lastOverlapIndex = -1;\n\n\t\t\twhile (begin <= end)\n\t\t\t{\n\t\t\t\tint index = begin + (end - begin >> 1);\n\n\t\t\t\tif (Array[index].Overlaps(intervalBegin, intervalEnd)) lastOverlapIndex = index;\n\t\t\t\tint ret = Array[index].CompareMax(intervalBegin);\n\n\t\t\t\tif (ret <= 0) end = index - 1;\n\t\t\t\telse begin = index + 1;\n\t\t\t}\n\n\t\t\treturn lastOverlapIndex;\n\t\t}\n\n\t\t/// <summary>\n\t\t/// finds the first index that overlaps on the interval [begin, max)\n\t\t/// </summary>\n\t\tprivate int GetFirstIndexAny(int intervalBegin, int intervalEnd)\n\t\t{\n\t\t\tvar begin = 0;\n\t\t\tint end = Array.Length - 1;\n\n\t\t\twhile (begin <= end)\n\t\t\t{\n\t\t\t\tint index = begin + (end - begin >> 1);\n\n\t\t\t\tif (Array[index].Overlaps(intervalBegin, intervalEnd)) return index;\n\t\t\t\tint ret = Array[index].CompareMax(intervalBegin);\n\n\t\t\t\tif (ret <= 0) end = index - 1;\n\t\t\t\telse begin = index + 1;\n\t\t\t}\n\n\t\t\treturn ~begin;\n\t\t}\n\n\t\t/// <summary>\n\t\t/// sets the max endpoint for each interval element\n\t\t/// </summary>\n\t\tprivate void SetMaxIntervals()\n\t\t{\n\t\t\tvar currentMax = int.MinValue;\n\n\t\t\tfor (var i = 0; i < Array.Length; i++)\n\t\t\t{\n\t\t\t\tif (Array[i].End > currentMax) currentMax = Array[i].End;\n\t\t\t\tArray[i].Max = currentMax;\n\t\t\t}\n\t\t}\n    }\n}"
  },
  {
    "path": "Intervals/IntervalForest.cs",
    "content": "﻿namespace Intervals\n{\n\tpublic sealed class IntervalForest<T> : IIntervalForest<T>\n\t{\n\t\tprivate readonly IntervalArray<T>[] _intervalArrays;\n\t\tprivate readonly ushort _maxIndex;\n\n\t\tpublic IntervalForest(IntervalArray<T>[] intervalArrays)\n\t\t{\n\t\t\t_intervalArrays = intervalArrays;\n\t\t\t_maxIndex       = (ushort)(intervalArrays.Length - 1);\n\t\t}\n\n\t\t/// <summary>\n\t\t/// returns whether there is any interval that overlaps the specified interval\n\t\t/// </summary>\n\t\tpublic bool OverlapsAny(ushort refIndex, int begin, int end)\n\t\t{\n\t\t\tif (refIndex > _maxIndex) return false;\n\t\t\tvar intervalArray = _intervalArrays[refIndex];\n\t\t\tif (intervalArray == null) return false;\n\t\t\treturn intervalArray.OverlapsAny(begin, end);\n\t\t}\n\n\t\t/// <summary>\n\t\t/// returns values for all intervals that overlap the specified interval\n\t\t/// </summary>\n\t\tpublic T[] GetAllOverlappingValues(ushort refIndex, int begin, int end)\n\t\t{\n\t\t\tif (refIndex > _maxIndex) return null;\n            var intervalArray = _intervalArrays[refIndex];\n            return intervalArray?.GetAllOverlappingValues(begin, end);\n\t\t}\n\t\t\n\t\tpublic Interval<T>[] GetAllOverlappingIntervals(ushort refIndex, int begin, int end)\n\t\t{\n\t\t\tif (refIndex > _maxIndex) return null;\n\t\t\tvar intervalArray = _intervalArrays[refIndex];\n\t\t\treturn intervalArray?.GetAllOverlappingIntervals(begin, end);\n\t\t}\n\t}\n}"
  },
  {
    "path": "Intervals/Intervals.csproj",
    "content": "<Project Sdk=\"Microsoft.NET.Sdk\">\n  <PropertyGroup>\n    <TargetFramework>net6.0</TargetFramework>\n    <OutputPath>..\\bin\\$(Configuration)</OutputPath>\n    \n  </PropertyGroup>\n  <Import Project=\"..\\CommonAssemblyInfo.props\" />\n</Project>\n"
  },
  {
    "path": "Intervals/NullIntervalSearch.cs",
    "content": "﻿namespace Intervals\n{\n\tpublic sealed class NullIntervalSearch<T> : IIntervalForest<T>, IIntervalSearch<T>\n\t{\n\t\t#region IIntervalForest\n\n\t\tpublic bool OverlapsAny(ushort refIndex, int begin, int end)\n\t\t{\n\t\t\treturn false;\n\t\t}\n\n\t\tpublic T[] GetAllOverlappingValues(ushort refIndex, int begin, int end)\n\t\t{\n\t\t\treturn null;\n\t\t}\n\n\t\t#endregion\n\n\t\t#region IIntervalSearch\n\n\t\tpublic T[] GetAllOverlappingValues(int begin, int end)\n\t\t{\n\t\t\treturn null;\n\t\t}\n\n        #endregion\n\t}\n}"
  },
  {
    "path": "Intervals/OverlapType.cs",
    "content": "﻿namespace Intervals\n{\n    public enum OverlapType\n    {\n        None,\n        CompletelyOverlaps,\n        CompletelyWithin,\n        Partial\n    }\n\n    public enum EndpointOverlapType\n    {\n        None,\n        Start,\n        End,\n        Both\n    }\n}"
  },
  {
    "path": "Intervals/Utilities.cs",
    "content": "﻿using System;\n\nnamespace Intervals\n{\n    public static class Utilities\n    {\n        public static bool Overlaps(int firstStart, int firstEnd, int secondStart, int secondEnd) =>\n            firstStart <= secondEnd && secondStart <= firstEnd;\n\n        public static bool Contains(int firstStart, int firstEnd, int secondStart, int secondEnd) =>\n            firstStart <= secondStart && secondEnd <= firstEnd;\n\n        // given two intervals T and V, describe how V overlaps T\n        public static OverlapType GetOverlapType(int tStart, int tEnd, int vStart, int vEnd)\n        {\n            if (tEnd < vStart || vEnd < tStart) return OverlapType.None;\n\n            if (vStart >= tStart && vEnd <= tEnd) return OverlapType.CompletelyWithin;\n\n            if (tStart >= vStart && tEnd <= vEnd) return OverlapType.CompletelyOverlaps;\n            return OverlapType.Partial;\n        }\n\n        public static EndpointOverlapType GetEndpointOverlapType(int tStart, int tEnd, int vStart, int vEnd)\n        {\n            bool overlapsStart = Overlaps(tStart, tStart, vStart, vEnd);\n            bool overlapsEnd   = Overlaps(tEnd,   tEnd,   vStart, vEnd);\n\n            if (!overlapsStart && !overlapsEnd) return EndpointOverlapType.None;\n            if (overlapsStart  && overlapsEnd) return EndpointOverlapType.Both;\n            return overlapsStart ? EndpointOverlapType.Start : EndpointOverlapType.End;\n        }\n\n        public static (int Start, int End) Intersects(int firstStart, int firstEnd, int secondStart, int secondEnd) =>\n            Overlaps(firstStart, firstEnd, secondStart, secondEnd)\n                ? (Math.Max(firstStart, secondStart), Math.Min(firstEnd, secondEnd))\n                : (-1, -1);\n    }\n}"
  },
  {
    "path": "Jasix/AssemblyInfo.cs",
    "content": "﻿using System.Runtime.CompilerServices;\n\n[assembly: InternalsVisibleTo(\"UnitTests\")]"
  },
  {
    "path": "Jasix/DataStructures/JasixChrIndex.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.Linq;\nusing ErrorHandling.Exceptions;\nusing Intervals;\nusing IO;\n\nnamespace Jasix.DataStructures\n{\n    public sealed class JasixChrIndex\n    {\n        public readonly string ReferenceSequence;\n        private readonly List<JasixNode> _nodes;\n\t    private JasixNode _currentNode;\n\t\tprivate readonly List<Interval<long>> _largeVariants;\n        private IntervalArray<long> _intervalArray;\n\n        public JasixChrIndex(string refName)\n        {\n            ReferenceSequence = refName;\n            _nodes            = new List<JasixNode>();\n            _largeVariants    = new List<Interval<long>>();\n            _intervalArray    = null;\n        }\n\n        public JasixChrIndex(ExtendedBinaryReader reader) : this(\"\")\n        {\n            ReferenceSequence = reader.ReadAsciiString();\n            int count = reader.ReadOptInt32();\n            for (var i = 0; i < count; i++)\n                _nodes.Add(new JasixNode(reader));\n\n            int intervalCount = reader.ReadOptInt32();\n            if (intervalCount == 0) return;\n\n            for (var i = 0; i < intervalCount; i++)\n                _largeVariants.Add(ReadInterval(reader));\n\n            _intervalArray = new IntervalArray<long>(_largeVariants.ToArray());\n        }\n\n        private static Interval<long> ReadInterval(ExtendedBinaryReader reader)\n        {\n            int begin    = reader.ReadOptInt32();\n            int end      = reader.ReadOptInt32();\n            long position = reader.ReadOptInt64();\n\n            return new Interval<long>(begin, end, position);\n        }\n\n        public void Write(IExtendedBinaryWriter writer)\n        {\n\t\t\tif (_currentNode != null)\n\t\t        _nodes.Add(_currentNode);\n\n\t        writer.WriteOptAscii(ReferenceSequence);\n\t        writer.WriteOpt(_nodes.Count);\n\t        foreach (var node in _nodes)\n\t        {\n\t\t        node.Write(writer);\n\t        }\n\n\t        writer.WriteOpt(_largeVariants.Count);\n\t        if (_largeVariants.Count == 0) return;\n\n\t        foreach (Interval<long> interval in _largeVariants.OrderBy(x => x.Begin).ThenBy(x => x.End))\n\t        {\n\t\t        WriteInterval(interval, writer);\n\t        }\n\t\t}\n\n        private static void WriteInterval(Interval<long> interval, IExtendedBinaryWriter writer)\n        {\n            writer.WriteOpt(interval.Begin);\n            writer.WriteOpt(interval.End);\n            writer.WriteOpt(interval.Value);\n        }\n\n        public void Add(int begin, int end, long filePosition)\n        {\n            if (begin > end)\n                throw new UserErrorException($\"start position {begin} is greater than end position {end}\");\n\n\t\t\tif (Utilities.IsLargeVariant(begin,end))\n            {\n                _largeVariants.Add(new Interval<long>(begin, end, filePosition));\n                end = begin;// large variants will be recorded as SNVs so that we can query for all entries from a given position\n            }\n\n\t\t\tif (_currentNode == null)\n\t        {\n\t\t        _currentNode = new JasixNode(begin, end, filePosition);\n\t\t        return;\n\t        }\n\n\t        if (_currentNode.TryAdd(begin, end)) return;\n\t        _nodes.Add(_currentNode);\n\t        _currentNode = new JasixNode(begin, end, filePosition);\n        }\n\n        public void Flush()\n        {\n\t\t\tif (_currentNode != null)\n\t\t        _nodes.Add(_currentNode);\n\t        if (_largeVariants.Count != 0)\n\t\t        _intervalArray = new IntervalArray<long>(_largeVariants.ToArray());\n\t\t}\n\n\n\t\tpublic long FindFirstSmallVariant(int start, int end)\n        {\n\t\t\tvar searchNode = new JasixNode(start, end, 0);\n\n\t        var firstOverlappingNode = FindFirstOverlappingNode(searchNode);\n\t        \n\t       return  firstOverlappingNode?.FileLocation ?? -1;\n\t\t}\n\n\t    private JasixNode FindFirstOverlappingNode(JasixNode searchNode)\n\t    {\n\t\t    int index = _nodes.BinarySearch(searchNode);\n\n\t\t    if (index < 0)\n\t\t\t    index = ~index;\n\n\t\t    // if it is to the left of the first node, check if the end overlaps\n\t\t    if (index == 0)\n\t\t    {\n\t\t\t    return _nodes[index].Overlaps(searchNode) ? _nodes[index] : null;\n\t\t    }\n\n\t\t    if (index == _nodes.Count)\n\t\t    {\n\t\t\t    // if range overlaps the last node location of the last node, otherwise, -1\n\t\t\t    return _nodes[index - 1].Overlaps(searchNode) ? _nodes[index - 1] : null;\n\t\t    }\n\n\t\t    // if some intervals from the previous node overlaps the range\n\t\t    if (_nodes[index - 1].Overlaps(searchNode))\n\t\t\t    return _nodes[index - 1];\n\n\t\t    return _nodes[index].Overlaps(searchNode) ? _nodes[index] : null;\n\t\t}\n\n\t    public long[] FindLargeVariants(int begin, int end)\n        {\n            long[] positions = _intervalArray?.GetAllOverlappingValues(begin, end);\n\n            if (positions == null || positions.Length == 0) return null;\n            return positions;\n        }\n    }\n}\n"
  },
  {
    "path": "Jasix/DataStructures/JasixCommons.cs",
    "content": "﻿\nnamespace Jasix.DataStructures\n{\n\tpublic  static class JasixCommons\n\t{\n\t    public const int Version = 1;\n\t    public const string FileExt = \".jsi\";\n\n\t    public const string GenesSectionTag = \"genes\";\n\t    public const string HeaderSectionTag = \"header\";\n\t    public const string PositionsSectionTag = \"positions\";\n\n\t    private const int MaxVariantLength = 50;\n\t\tpublic const int MinNodeWidth = MaxVariantLength;\n\t\tpublic const int PreferredNodeCount = MaxVariantLength*2;\n\t}\n}\n"
  },
  {
    "path": "Jasix/DataStructures/JasixIndex.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing System.Linq;\nusing ErrorHandling.Exceptions;\nusing IO;\n\nnamespace Jasix.DataStructures\n{\n    public struct FileRange\n    {\n        public readonly long Begin;\n        public long End;\n\n        public FileRange(long begin, long end = long.MaxValue)\n        {\n            Begin = begin;\n            End = end;\n        }\n    }\n\n    public sealed class JasixIndex:IDisposable\n    {\n\t    private readonly Stream _stream;\n\t\tprivate readonly Dictionary<string, JasixChrIndex> _chrIndices;\n\t    private readonly Dictionary<string, string> _synonymToChrName;\n\t    private readonly Dictionary<string, FileRange> _sectionRanges;\n\n\t\t// the json file might contain sections. We want to be able to index these sections too\n\n\t\tpublic JasixIndex()\n\t\t{\n\t\t\t_chrIndices = new Dictionary<string, JasixChrIndex>();\n            _synonymToChrName = new Dictionary<string, string>();\n            _sectionRanges = new Dictionary<string, FileRange>();\n\t\t}\n\t\t\n\n\t\tpublic JasixIndex(Stream stream) : this()\n\t\t{\n\t\t\t_stream = stream;\n\t\t\tusing (var reader = new ExtendedBinaryReader(stream))\n\t\t\t{\n\t\t\t\tint version = reader.ReadOptInt32();\n\t\t\t\tif (version != JasixCommons.Version)\n\t\t\t\t\tthrow new InvalidDataException($\"Invalid Jasix version: Observed {version}, expected{JasixCommons.Version}\");\n\n\t\t\t\tint count = reader.ReadOptInt32();\n\n\t\t\t\tfor (var i = 0; i < count; i++)\n\t\t\t\t{\n\t\t\t\t\tvar chrIndex = new JasixChrIndex(reader);\n\t\t\t\t\t_chrIndices[chrIndex.ReferenceSequence] = chrIndex;\n\t\t\t\t}\n\n\t\t\t\tint synonymCount = reader.ReadOptInt32();\n\t\t\t\tfor (var i = 0; i < synonymCount; i++)\n\t\t\t\t{\n\t\t\t\t\tstring synonym   = reader.ReadAsciiString();\n\t\t\t\t\tstring indexName = reader.ReadAsciiString();\n\t\t\t\t\t_synonymToChrName[synonym] = indexName;\n\t\t\t\t}\n\n\t\t\t\tint sectionCount = reader.ReadOptInt32();\n\t\t\t\tfor (var i = 0; i < sectionCount; i++)\n\t\t\t\t{\n\t\t\t\t\tstring sectionName = reader.ReadAsciiString();\n\t\t\t\t\tlong   begin       = reader.ReadOptInt64();\n\t\t\t\t\tlong   end         = reader.ReadOptInt64();\n\t\t\t\t\t_sectionRanges[sectionName] = new FileRange(begin, end);\n\t\t\t\t}\n\n\t\t\t}\n\t\t}\n\n\t\tpublic void Write(Stream writeStream)\n\t\t{\n\t\t\tvar writer = new ExtendedBinaryWriter(writeStream);\n\t\t\twriter.WriteOpt(JasixCommons.Version);\n\n\t\t\twriter.WriteOpt(_chrIndices.Count);\n\t\t\tforeach (var chrIndex in _chrIndices.Values)\n\t\t\t{\n\t\t\t\tchrIndex.Write(writer);\n\t\t\t}\n\n            writer.WriteOpt(_synonymToChrName.Count);\n\t\t    foreach ((string key, string value) in _synonymToChrName)\n\t\t    {\n\t\t        writer.Write(key);\n\t\t        writer.Write(value);\n            }\n\n            writer.WriteOpt(_sectionRanges.Count);\n\t\t    foreach ((string name, FileRange sectionRange) in _sectionRanges)\n\t\t    {\n\t\t        writer.WriteOptAscii(name);\n                writer.WriteOpt(sectionRange.Begin);\n                writer.WriteOpt(sectionRange.End);\n\t\t    }\n\n\t\t}\n\n\t    public void Flush()\n\t\t{\n\t\t\tforeach (var chrIndex in _chrIndices.Values)\n\t\t\t{\n\t\t\t\tchrIndex.Flush();\n\t\t\t}\n\t\t}\n\n\t\tpublic void Add(string chr, int start, int end, long fileLoc, string chrSynonym=null)\n\t\t{\n\t\t    if (!string.IsNullOrEmpty(chrSynonym))\n\t\t    {\n\t\t        _synonymToChrName[chrSynonym] = chr;\n\t\t    }\n\n\t\t    if (_chrIndices.TryGetValue(chr, out var chrIndex))\n\t\t    {\n                chrIndex.Add(start, end, fileLoc);\n\t\t    }\n\t\t    else\n\t\t    {\n\t\t        _chrIndices[chr] = new JasixChrIndex(chr);\n\t\t        _chrIndices[chr].Add(start, end, fileLoc);\n\n            }\n\n\t\t}\n\n\t    public void BeginSection(string section, long fileLoc)\n\t    {\n\t        if (_sectionRanges.ContainsKey(section)) \n                throw new UserErrorException($\"Multiple beginning for section: {section}!!\");\n\n            _sectionRanges[section] = new FileRange(fileLoc);\n\t    }\n\n\t    public void EndSection(string section, long fileLoc)\n\t    {\n\t        if (!_sectionRanges.TryGetValue(section, out var fileRange))\n\t            return;\n\t        //    throw new UserErrorException($\"Attempting to close section:{section} before opening it!!\");\n\n            if (fileRange.End!=long.MaxValue)\n                throw new UserErrorException($\"Multiple closing for section {section} !!\");\n\n            fileRange.End = fileLoc;\n\t        _sectionRanges[section] = fileRange;\n\t    }\n\n        //returns file location of the first node that overlapping the given position chr:start-end\n        public long GetFirstVariantPosition(string chr, int start, int end)\n\t\t{\n\t\t\tif (_chrIndices == null || _chrIndices.Count == 0) return -1;\n\n\t\t    if (_synonymToChrName.TryGetValue(chr, out string indexName))\n\t\t        chr = indexName;\n\n\t\t    if (_chrIndices.TryGetValue(chr, out var chrIndex))\n\t\t    {\n\t\t        return chrIndex.FindFirstSmallVariant(start, end);\n\t\t    }\n\t\t    return -1;\n\n\t\t}\n\n\n\t\tpublic long[] LargeVariantPositions(string chr, int begin, int end)\n\t\t{\n\t\t\tif (_chrIndices == null || _chrIndices.Count == 0) return null;\n\n\t\t    if (_synonymToChrName.TryGetValue(chr, out string indexName))\n\t\t        chr = indexName;\n\n\t\t    return _chrIndices.TryGetValue(chr, out var chrIndex) ? chrIndex.FindLargeVariants(begin, end) : null;\n\t\t}\n\n\t\tpublic IEnumerable<string> GetChromosomeList()\n\t\t{\n\t\t\treturn _chrIndices.Keys;\n\t\t}\n\n\t    public bool ContainsChr(string chr)\n\t    {\n\t        return _chrIndices.Keys.Contains(_synonymToChrName.TryGetValue(chr, out string indexName) ? indexName : chr);\n\t    }\n\n\t    public string GetIndexChromName(string chromName)\n\t    {\n\t        if (_chrIndices.ContainsKey(chromName)) return chromName;\n\t        return _synonymToChrName.TryGetValue(chromName, out string indexName) ? indexName : null;\n\t    }\n\n\t    public long GetSectionBegin(string section)\n\t    {\n\t        return _sectionRanges.ContainsKey(section)? _sectionRanges[section].Begin: -1;\n\t    }\n\t    public long GetSectionEnd(string section)\n\t    {\n\t        return _sectionRanges.ContainsKey(section)? _sectionRanges[section].End: -1;\n\t    }\n\n\t    public void Dispose()\n\t    {\n\t\t    _stream?.Dispose();\n\t    }\n\n\t    public IEnumerable<string> GetSections() => _sectionRanges.Keys;\n\t    \n    }\n}\n"
  },
  {
    "path": "Jasix/DataStructures/JasixNode.cs",
    "content": "﻿using System;\r\nusing IO;\r\n\r\nnamespace Jasix.DataStructures { \r\n\tpublic sealed class JasixNode : IComparable<JasixNode>\r\n\t{\r\n\t\tprivate readonly int _start;\r\n\t\tprivate int _end;\r\n\t\tpublic readonly long FileLocation;\r\n\t\tprivate int _count;\r\n\t\tpublic JasixNode(int start, int end, long location)\r\n\t\t{\r\n\t\t\t_start        = start;\r\n\t\t\t_end          = end;\r\n\t\t\t_count = 1;\r\n\t\t\tFileLocation = location;\r\n\t\t}\r\n\r\n\t\tpublic JasixNode(ExtendedBinaryReader reader)\r\n\t\t{\r\n\t\t\t_start        = reader.ReadOptInt32();\r\n\t\t\t//on disk we will store the end as an offset to save space\r\n\t\t\t_end          = _start + reader.ReadOptInt32();\r\n\t\t\tFileLocation = reader.ReadOptInt64();\r\n\t\t}\r\n\r\n\t\tpublic bool Overlaps(JasixNode other)\r\n\t\t{\r\n\t\t\treturn other._end >= _start && other._start <= _end;\r\n\t\t}\r\n\r\n\t\tpublic int CompareTo(JasixNode other)\r\n\t\t{\r\n\t\t\tif (other == null) return -1;\r\n\t\t\t// ReSharper disable once ImpureMethodCallOnReadonlyValueField\r\n\t\t\treturn _start.CompareTo(other._start);\r\n\t\t}\r\n\r\n\t\tpublic bool TryAdd(int start, int end)\r\n\t\t{\r\n\t\t\tif (start < _start) return false;\r\n\t\t\tif (end - _start > JasixCommons.MinNodeWidth\r\n\t\t\t\t&& _count >= JasixCommons.PreferredNodeCount)\r\n\t\t\t\treturn false;\r\n\t\t\t_end = end;\r\n\t\t\t_count++;\r\n\t\t\treturn true;\r\n\t\t}\r\n\r\n\t\tpublic void Write(IExtendedBinaryWriter writer)\r\n\t\t{\r\n\t\t\twriter.WriteOpt(_start);\r\n\t\t\twriter.WriteOpt(_end-_start);\r\n\t\t\twriter.WriteOpt(FileLocation);\r\n\t\t}\r\n\r\n\t}\r\n}\r\n"
  },
  {
    "path": "Jasix/DataStructures/JsonSchema.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing Intervals;\r\n\r\n// ReSharper disable InconsistentNaming\r\n// The names have to be this way as they have to match the json schema exactly\r\n\r\nnamespace Jasix.DataStructures\r\n{\r\n    // ReSharper disable once ClassNeverInstantiated.Global\r\n    public sealed class JsonSchema : IInterval\r\n    {\r\n        // ReSharper disable UnassignedField.Global\r\n        public string        chromosome;\r\n        public int           position;\r\n        public string        refAllele;\r\n        public List<string> altAlleles;\r\n        public int           svEnd;\r\n\r\n        // ReSharper restore UnassignedField.Global\r\n        public int Start => position;\r\n        public int End   => Utilities.GetJsonEntryEnd(this);\r\n    }\r\n}"
  },
  {
    "path": "Jasix/DataStructures/Utilities.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing System.Linq;\nusing System.Text.RegularExpressions;\nusing ErrorHandling.Exceptions;\nusing Newtonsoft.Json.Linq;\n\nnamespace Jasix.DataStructures\n{\n    public static class Utilities\n    {\n        private const char DoubleQuote  = '\\\"';\n        private const char OpenBracket  = '[';\n        private const char CloseBracket = ']';\n\n        public static (string Chromosome, int Start, int End) ParseQuery(string position)\n        {\n            //chr1:100-101\n            //chr1:100\n            //chr1 - report all entries for chr1\n\n            var regexPos = new Regex(@\"^(\\w+)(?::(\\d+)(?:-(\\d+))?)?$\", RegexOptions.Compiled);\n\n            string trimmedPos = position.Trim(' ');\n            var match = regexPos.Match(trimmedPos);\n            if (!match.Success)\n                throw new UserErrorException($\"region {trimmedPos} is not valid, please specify a valid region, e.g., chr1, 1, 1:1234 or 1:1234-4567\");\n            string chromosome = match.Groups[1].ToString();\n            if (!match.Groups[2].Success && !match.Groups[3].Success) return (chromosome, 1, int.MaxValue);\n\n            int start = Convert.ToInt32(match.Groups[2].ToString());\n\n            int end = match.Groups[3].Success ? Convert.ToInt32(match.Groups[3].ToString()) : start;\n\n            return (chromosome, start, end);\n        }\n\n\t    public static void PrintQuerySectionOpening(string sectionName, StreamWriter writer)\n\t    {\n\t\t    writer.Write(DoubleQuote + sectionName + DoubleQuote+ \":\" + OpenBracket + Environment.NewLine);\n\t    }\n\n\t    public static void PrintQuerySectionClosing(StreamWriter writer)\n\t    {\n\t\t    writer.Write(Environment.NewLine + CloseBracket);\n\t    }\n\n\t    public static void PrintJsonEntry(string entry, bool needComma, StreamWriter writer)\n\t    {\n\t\t    if (needComma)\n\t\t\t    writer.Write(\",\" + writer.NewLine);\n\t\t    var jObject = JObject.Parse(entry);\n\t\t\twriter.Write(jObject.ToString());\n\t\t}\n\n\t    public static bool IsLargeVariant(int start, int end)\n        {\n            return end - start + 1 > JasixCommons.MinNodeWidth;\n        }\n\n        public static int GetJsonEntryEnd(JsonSchema jsonEntry)\n        {\n            if (jsonEntry.svEnd > 0) return jsonEntry.svEnd;\n            List<string> altAlleles = jsonEntry.altAlleles;\n            int altAlleleOffset = altAlleles != null && altAlleles.All(IsNucleotideAllele) && altAlleles.Any(x => x.Length > 1) ? 1 : 0;\n\n            return Math.Max(jsonEntry.refAllele.Length - 1, altAlleleOffset) + jsonEntry.position;\n        }\n\n        public static bool IsNucleotideAllele(string altAllele)\n        {\n            return string.IsNullOrEmpty(altAllele) || altAllele.ToCharArray().All(x => x == 'A' || x == 'T' || x == 'C' || x == 'G');\n        }\n    }\n}\n"
  },
  {
    "path": "Jasix/IndexCreator.cs",
    "content": "﻿using System;\r\nusing System.IO;\r\nusing System.IO.Compression;\r\nusing CommandLine.Utilities;\r\nusing Compression.FileHandling;\r\nusing ErrorHandling.Exceptions;\r\nusing IO;\r\nusing Jasix.DataStructures;\r\nusing Newtonsoft.Json;\r\nusing OptimizedCore;\r\n\r\nnamespace Jasix\r\n{\r\n    public sealed class IndexCreator : IDisposable\r\n    {\r\n        private readonly BgzipTextReader _reader;\r\n        private readonly Stream _writeStream;\r\n\r\n        private readonly Benchmark _chromBenchmark;\r\n        private readonly Benchmark _benchmark;\r\n\r\n        public IndexCreator(BlockGZipStream readStream, Stream writeStream)\r\n        {\r\n            _reader         = new BgzipTextReader(readStream);\r\n            _writeStream    = writeStream;\r\n            _chromBenchmark = new Benchmark();\r\n            _benchmark      = new Benchmark();\r\n        }\r\n\r\n        public IndexCreator(string fileName)\r\n            : this(\r\n                new BlockGZipStream(FileUtilities.GetReadStream(fileName), CompressionMode.Decompress),\r\n                FileUtilities.GetCreateStream(fileName + JasixCommons.FileExt))\r\n        {}\r\n\r\n        public void CreateIndex()\r\n        {\r\n            var index = new JasixIndex();\r\n            IndexHeader(index);\r\n\r\n            string lastLine = IndexPositions(index);\r\n\r\n            IndexGenes(lastLine, index);\r\n\r\n            index.Write(_writeStream);\r\n\r\n            Console.WriteLine();\r\n\r\n            long peakMemoryUsageBytes = MemoryUtilities.GetPeakMemoryUsage();\r\n            var wallTimeSpan = _benchmark.GetElapsedTime();\r\n            Console.WriteLine();\r\n            if (peakMemoryUsageBytes > 0) Console.WriteLine(\"Peak memory usage: {0}\", MemoryUtilities.ToHumanReadable(peakMemoryUsageBytes));\r\n            Console.WriteLine(\"Time: {0}\", Benchmark.ToHumanReadable(wallTimeSpan));\r\n        }\r\n\r\n        private string IndexPositions(JasixIndex index)\r\n        {\r\n            // we need the location before accessing the line\r\n            long linePosition = _reader.Position;\r\n            index.BeginSection(JasixCommons.PositionsSectionTag, linePosition);\r\n            Console.WriteLine($\"section:{JasixCommons.PositionsSectionTag} starts at {linePosition}\");\r\n\r\n            var previousChr = \"\";\r\n            var previousPos = 0;\r\n            string line;\r\n            while ((line = _reader.ReadLine()) != null)\r\n            {\r\n                if (line.OptimizedStartsWith(']'))\r\n                {\r\n                    index.EndSection(JasixCommons.PositionsSectionTag, linePosition);\r\n                    Console.WriteLine($\"section:{JasixCommons.PositionsSectionTag} ends at {linePosition}\");\r\n                    break;\r\n                }\r\n\r\n                line = line.TrimEnd(',');\r\n                (string chr, int position, int end) = GetChromPosition(line);\r\n\r\n                CheckSorting(chr, position, previousChr, previousPos);\r\n\r\n                index.Add(chr, position, end, linePosition);\r\n                linePosition = _reader.Position;\r\n                previousChr = chr;\r\n                previousPos = position;\r\n            }\r\n\r\n            return line;\r\n        }\r\n\r\n        private void IndexGenes(string lastLine, JasixIndex index)\r\n        {\r\n            if (lastLine == null) return;\r\n            do\r\n            {\r\n                long linePosition = _reader.Position;\r\n                \r\n                if (lastLine.EndsWith($\",\\\"{JasixCommons.GenesSectionTag}\\\":[\"))\r\n                {\r\n                    index.BeginSection(JasixCommons.GenesSectionTag, _reader.Position);\r\n                    Console.WriteLine($\"section:{JasixCommons.GenesSectionTag} starts at {_reader.Position}\");\r\n                }\r\n\r\n                if (lastLine.EndsWith(\"]}\"))\r\n                {\r\n                    index.EndSection(JasixCommons.GenesSectionTag, linePosition);\r\n                    Console.WriteLine($\"section:{JasixCommons.GenesSectionTag} ends at {linePosition}\");\r\n                    break;\r\n                }\r\n            } while ((lastLine = _reader.ReadLine()) != null);\r\n        }\r\n\r\n        private void IndexHeader(JasixIndex index)\r\n        {\r\n            string searchTag = $\"\\\"{JasixCommons.PositionsSectionTag}\\\":[\";\r\n            string headerTag = $\"{{\\\"{JasixCommons.HeaderSectionTag}\\\":\";\r\n            string line;\r\n\r\n            long previousPosition = _reader.Position;\r\n            while ((line = _reader.ReadLine()) != null)\r\n            {\r\n                if (line.StartsWith(headerTag))\r\n                {\r\n                    index.BeginSection(JasixCommons.HeaderSectionTag, previousPosition);\r\n                    Console.WriteLine($\"section:{JasixCommons.HeaderSectionTag} starts at {previousPosition}\");\r\n                }\r\n\r\n                if (line.EndsWith(searchTag))\r\n                {\r\n                    {\r\n                        index.EndSection(JasixCommons.HeaderSectionTag, previousPosition);\r\n                        Console.WriteLine($\"section:{JasixCommons.HeaderSectionTag} ends at {previousPosition}\");\r\n                    }\r\n                    break;\r\n                }\r\n\r\n                previousPosition = _reader.Position;\r\n            }\r\n\r\n        }\r\n\r\n        // ReSharper disable once UnusedParameter.Local\r\n        // ReSharper disable once ParameterOnlyUsedForPreconditionCheck.Local\r\n        private void CheckSorting(string chr, int pos, string previousChr, int previousPos)\r\n        {\r\n            if (chr == previousChr && pos < previousPos)\r\n            {\r\n                throw new UserErrorException($\"the Json file is not sorted at {chr}: {pos}\");\r\n            }\r\n\r\n            if (chr == previousChr || previousChr == \"\") return;\r\n\r\n            Console.WriteLine($\"Ref Sequence {previousChr} indexed in {Benchmark.ToHumanReadable(_chromBenchmark.GetElapsedTime())}\");\r\n            _chromBenchmark.Reset();\r\n        }\r\n\r\n        internal static (string chr, int position, int end) GetChromPosition(string line)\r\n        {\r\n            JsonSchema jsonEntry;\r\n            try\r\n            {\r\n                jsonEntry = JsonConvert.DeserializeObject<JsonSchema>(line);\r\n            }\r\n            catch (Exception)\r\n            {\r\n                Console.WriteLine($\"Error in line:\\n{line}\");\r\n                throw;\r\n            }\r\n\r\n\t\t\tint end = Utilities.GetJsonEntryEnd(jsonEntry);\r\n\r\n            return (jsonEntry.chromosome, jsonEntry.position, end);\r\n        }\r\n\r\n        public void Dispose()\r\n        {\r\n            _reader?.Dispose();\r\n            _writeStream?.Dispose();\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "Jasix/Jasix.csproj",
    "content": "﻿<Project Sdk=\"Microsoft.NET.Sdk\">\n  <PropertyGroup>\n    <OutputType>Exe</OutputType>\n    <TargetFramework>net6.0</TargetFramework>\n    <OutputPath>..\\bin\\$(Configuration)</OutputPath>\n    \n  </PropertyGroup>\n  <ItemGroup>\n    <PackageReference Include=\"Newtonsoft.Json\" Version=\"13.0.1\" />\n    <ProjectReference Include=\"..\\CommandLine\\CommandLine.csproj\" />\n    <ProjectReference Include=\"..\\Compression\\Compression.csproj\" />\n    <ProjectReference Include=\"..\\ErrorHandling\\ErrorHandling.csproj\" />\n    <ProjectReference Include=\"..\\OptimizedCore\\OptimizedCore.csproj\" />\n    <ProjectReference Include=\"..\\VariantAnnotation.Interface\\VariantAnnotation.Interface.csproj\" />\n  </ItemGroup>\n  <Import Project=\"..\\CommonAssemblyInfo.props\" />\n</Project>"
  },
  {
    "path": "Jasix/JasixMain.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing CommandLine.Builders;\nusing CommandLine.NDesk.Options;\nusing Compression.Utilities;\nusing ErrorHandling;\nusing ErrorHandling.Exceptions;\nusing IO;\nusing Jasix.DataStructures;\nusing VariantAnnotation.Interface;\n\nnamespace Jasix\n{\n    public static class JasixMain \n    {\n        private static string _inputJson;\n        private static string _outputFile;\n        private static readonly List<string> Queries = new List<string>();\n        private static string _section;\n        private static bool _printHeader;\n        private static bool _printHeaderOnly;\n        private static bool _list;\n        private static bool _createIndex;\n\n        public static int Main(string[] args)\n        {\n            var ops = new OptionSet\n            {\n                {\n                    \"header|t\",\n                    \"print also the header lines\",\n                    v => _printHeader = v != null\n                },\n                {\n                    \"only-header|H\",\n                    \"print only the header lines\",\n                    v => _printHeaderOnly = v != null\n                },\n                {\n                    \"list|l\",\n                    \"list chromosome and section names\",\n                    v => _list = v != null\n                },\n                {\n                    \"index|c\",\n                    \"create index\",\n                    v => _createIndex = v != null\n                },\n                {\n                    \"in|i=\",\n                    \"input\",\n                    v => _inputJson = v\n                },\n                {\n                    \"out|o=\",\n                    \"compressed output file name (default:console)\",\n                    v => _outputFile = v\n                },\n                {\n                    \"query|q=\",\n                    \"query range\",\n                    v => Queries.Add(v)\n                },\n                {\n                    \"section|s=\",\n                    \"complete section (positions or genes) to output\",\n                    v => _section = v\n                }\n            };\n\n            var exitCode = new ConsoleAppBuilder(args, ops)\n                .Parse()\n                .CheckInputFilenameExists(_inputJson, \"input Json file\", \"[in.json.gz]\")\n                .DisableOutput(!_createIndex && _outputFile == null)\n                .ShowBanner(Constants.Authors)\n                .ShowHelpMenu(\"Indexes a Nirvana annotated JSON file\", \"-i in.json.gz [options]\")\n                .ShowErrors()\n                .Execute(ProgramExecution);\n\n            return (int)exitCode;\n        }\n\n        private static ExitCodes ProgramExecution()\n        {\n            if (_createIndex)\n            {\n                using (var indexCreator = new IndexCreator(_inputJson))\n                {\n                    indexCreator.CreateIndex();\n                }\n\n                return ExitCodes.Success;\n            }\n\n            string indexFileName = _inputJson + JasixCommons.FileExt;\n\n            ValidateIndexFile(indexFileName);\n            var writer = string.IsNullOrEmpty(_outputFile)\n                ? null : GZipUtilities.GetStreamWriter(_outputFile);\n\n            using (var queryProcessor = new QueryProcessor(GZipUtilities.GetAppropriateStreamReader(_inputJson),\n                    FileUtilities.GetReadStream(indexFileName), writer))\n            {\n                if (_list)\n                {\n                    queryProcessor.ListChromosomesAndSections();\n                    return ExitCodes.Success;\n                }\n\n                if (_printHeaderOnly)\n                {\n                    queryProcessor.PrintHeaderOnly();\n                    return ExitCodes.Success;\n                }\n\n                if (!string.IsNullOrEmpty(_section))\n                {\n                    queryProcessor.PrintSection(_section);\n                    return ExitCodes.Success;\n                }\n\n                if (Queries == null)\n                {\n                    Console.WriteLine(\"Please specify query region(s)\");\n                    return ExitCodes.BadArguments;\n                }\n                \n                queryProcessor.ProcessQuery(Queries, _printHeader);\n                \n            }\n            return ExitCodes.Success;\n        }\n\n        private static void ValidateIndexFile(string indexFileName)\n        {\n            if (!File.Exists(indexFileName))\n                throw new UserErrorException(\"No index file found,please generate index file first.\");\n            //var indexFileCreateTime = File.GetCreationTime(indexFileName).Ticks;\n            //var fileCreateTime = File.GetCreationTime(_inputJson).Ticks;\n            //if (fileCreateTime > indexFileCreateTime - 1000) // adding a 100ms buffer\n            //    throw new UserErrorException(\"Index file is older than the input file, please re-generate the index.\");\n        }\n        \n    }\n}"
  },
  {
    "path": "Jasix/OnTheFlyIndexCreator.cs",
    "content": "﻿using System;\r\nusing System.IO;\r\nusing System.Linq;\r\nusing ErrorHandling.Exceptions;\r\nusing Jasix.DataStructures;\r\nusing VariantAnnotation.Interface.Positions;\r\n\r\nnamespace Jasix\r\n{\r\n    public sealed class OnTheFlyIndexCreator : IDisposable\r\n    {\r\n        private readonly Stream _indexStream;\r\n        private readonly JasixIndex _jasixIndex;\r\n        private int _lastPosition;\r\n        private string _lastChromName;\r\n\r\n        \r\n        public OnTheFlyIndexCreator(Stream indexStream)\r\n        {\r\n            _indexStream = indexStream;\r\n            _jasixIndex  = new JasixIndex();\r\n        }\r\n\r\n        public void Add(IPosition position, long fileLocation)\r\n        {\r\n            string chromName = position.Chromosome.EnsemblName;\r\n            int start        = position.Start;\r\n            int? end          = position.InfoData?.End;\r\n\r\n            if (chromName == _lastChromName && start < _lastPosition)\r\n            {\r\n                throw new UserErrorException($\"The Json file is not sorted at {position.Chromosome.UcscName}: {start}\");\r\n            }\r\n\r\n            _lastPosition  = start;\r\n            _lastChromName = chromName;\r\n\r\n            if (end == null)\r\n            {\r\n                string[] altAlleles = position.AltAlleles;\r\n                int altAlleleOffset = altAlleles != null && altAlleles.All(Utilities.IsNucleotideAllele) && altAlleles.Any(x => x.Length > 1) ? 1 : 0;\r\n\r\n                end = Math.Max(position.RefAllele.Length - 1, altAlleleOffset) + start;\r\n            }\r\n\r\n            _jasixIndex.Add(position.Chromosome.EnsemblName, start, end.Value, fileLocation, position.Chromosome.UcscName);\r\n        }\r\n\r\n        public void BeginSection(string sectionName, long fileLocation)\r\n        {\r\n            _jasixIndex.BeginSection(sectionName, fileLocation);\r\n        }\r\n\r\n        public void EndSection(string sectionName, long fileLocation)\r\n        {\r\n            _jasixIndex.EndSection(sectionName, fileLocation);\r\n        }\r\n\r\n        public void Dispose()\r\n        {\r\n            Flush();\r\n            _indexStream.Dispose();\r\n        }\r\n\r\n        public void Flush()\r\n        {\r\n            _jasixIndex.Write(_indexStream);\r\n            _indexStream.Flush();\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "Jasix/QueryProcessor.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing Compression.FileHandling;\r\nusing Intervals;\r\nusing Jasix.DataStructures;\r\nusing Newtonsoft.Json;\r\nusing OptimizedCore;\r\nusing Utilities = Jasix.DataStructures.Utilities;\r\n\r\nnamespace Jasix\r\n{\r\n\tpublic sealed class QueryProcessor:IDisposable\r\n\t{\r\n\t\t#region members\r\n\t\tprivate readonly StreamReader _jsonReader;\r\n\t    private readonly StreamWriter _writer;\r\n\t\tprivate readonly Stream _indexStream;\r\n\t\tprivate readonly JasixIndex _jasixIndex;\r\n\r\n        #endregion\r\n\r\n        #region IDisposable\r\n\t    public void Dispose()\r\n\t    {\r\n\t        _jsonReader?.Dispose();\r\n\t        _writer?.Dispose();\r\n\t        _indexStream?.Dispose();\r\n\t    }\r\n        #endregion\r\n\r\n        public QueryProcessor(StreamReader jsonReader, Stream indexStream, StreamWriter writer=null)\r\n\t\t{\r\n\t\t\t_jsonReader  = jsonReader;\r\n\t\t    _writer      = writer ?? new StreamWriter(Console.OpenStandardOutput());\r\n\t\t\t_indexStream = indexStream;\r\n\t\t\t_jasixIndex  = new JasixIndex(_indexStream);\r\n\r\n\t\t}\r\n\r\n\t\t\r\n        public void ListChromosomesAndSections()\r\n\t\t{\r\n\t\t\tforeach (string chrName in _jasixIndex.GetChromosomeList())\r\n\t\t\t{\r\n                _writer.WriteLine(chrName);\r\n\t\t\t}\r\n\r\n\t\t\tforeach (var section in _jasixIndex.GetSections())\r\n\t\t\t{\r\n\t\t\t\t_writer.WriteLine(section);\r\n\t\t\t}\r\n\t\t}\r\n\r\n\t\tpublic void PrintHeaderOnly()\r\n\t\t{\r\n\r\n\t\t    string headerString = \"{\"+GetHeader()+\"}\";\r\n\t\t\tUtilities.PrintJsonEntry(headerString, false, _writer);\r\n\t\t\t\r\n\t\t}\r\n\r\n\t\tpublic void PrintSection(string section)\r\n\t\t{\r\n\t\t\t_writer.WriteLine(\"[\");\r\n\t\t\tvar needComma = false;\r\n\t\t\tforeach (var line in GetSectionLines(section))\r\n\t\t\t{\r\n\t\t\t\tUtilities.PrintJsonEntry(line.TrimEnd(','), needComma,_writer);\r\n\t\t\t\tneedComma = true;\r\n\t\t\t}\r\n\t\t\t_writer.WriteLine(\"]\");\r\n\t\t}\r\n\r\n\t\tpublic int ProcessQuery(IEnumerable<string> queryStrings, bool printHeader = false)\r\n\t\t{\r\n\t\t\t\r\n\t\t\tif (printHeader)\r\n\t\t\t{\r\n\t\t\t\t_writer.Write(\"{\\n\\\"header\\\":\");\r\n\t\t\t\tvar headerContent = GetHeader().Split(':',2)[1];\r\n\t\t\t\tUtilities.PrintJsonEntry(headerContent, false, _writer);\r\n\t\t\t\t_writer.WriteLine(\",\");\r\n\t\t\t}\r\n\t\t\telse _writer.Write(\"{\");\r\n\t\t\t\r\n\t\t\tUtilities.PrintQuerySectionOpening(JasixCommons.PositionsSectionTag, _writer);\r\n\r\n\t\t    var count = 0;\r\n\t\t    foreach (string queryString in queryStrings)\r\n            {\r\n                var query = Utilities.ParseQuery(queryString);\r\n                query.Chromosome = _jasixIndex.GetIndexChromName(query.Chromosome);\r\n                if (!_jasixIndex.ContainsChr(query.Chromosome)) continue;\r\n\r\n                count += PrintLargeVariantsExtendingIntoQuery(query);\r\n                count += PrintAllVariantsFromQueryBegin(query, count > 0);\r\n            }\r\n\r\n            Utilities.PrintQuerySectionClosing(_writer);\r\n\t\t\t_writer.WriteLine(\"}\");\r\n\t\t    return count;\r\n\r\n\t\t}\r\n\r\n\t\tprivate int PrintAllVariantsFromQueryBegin((string, int, int) query, bool needComma)\r\n\t\t{\r\n\t\t    var count = 0;\r\n\t\t\tforeach (string line in ReadOverlappingJsonLines(query))\r\n\t\t\t{\r\n\t\t\t\tUtilities.PrintJsonEntry(line, needComma, _writer);\r\n\t\t\t\tneedComma = true;\r\n\t\t\t    count++;\r\n\t\t\t}\r\n\r\n\t\t    return count;\r\n\t\t}\r\n\t\tprivate int PrintLargeVariantsExtendingIntoQuery((string, int, int) query)\r\n\t\t{\r\n\t\t    var count = 0;\r\n\t\t\tforeach (string line in ReadJsonLinesExtendingInto(query))\r\n\t\t\t{\r\n\t\t\t\tUtilities.PrintJsonEntry(line, count>0, _writer);\r\n\t\t\t    count++;\r\n\t\t\t}\r\n\r\n\t\t\treturn count;\r\n\t\t}\r\n\r\n\t\tinternal IEnumerable<string> ReadJsonLinesExtendingInto((string Chr, int Start, int End) query)\r\n\t\t{\r\n\t\t\t// query for large variants like chr1:100-99 returns all overlapping large variants that start before 100\r\n            (string chr, int start, _) = query;\r\n            long[] locations = _jasixIndex.LargeVariantPositions(chr, start, start - 1);\r\n\r\n\t\t\tif (locations == null || locations.Length == 0) yield break;\r\n\r\n\t\t\tforeach (long location in locations)\r\n\t\t\t{\r\n\t\t\t\tRepositionReader(location);\r\n\t\t\t\tstring line;\r\n\t\t\t\twhile ((line = _jsonReader.ReadLine()) != null)\r\n\t\t\t\t{\r\n\t\t\t\t\tif (!line.OptimizedStartsWith(',')) { //buffer starts with ',\\n', skip this first line\r\n\t\t\t\t\t\tline = line.TrimEnd(',');\t\t\t\t\t      \r\n\t\t\t\t\t\tyield return line;\r\n\t\t\t\t\t\tbreak;\r\n\t\t\t\t\t}\r\n\t\t\t\t}\r\n\t\t\t}\r\n\t\t}\r\n\r\n\t\tprivate void RepositionReader(long location)\r\n\t\t{\r\n\t\t\t_jsonReader.DiscardBufferedData();\r\n\t\t\t_jsonReader.BaseStream.Position = location;\r\n\t\t}\r\n\r\n\t    public string GetHeader()\r\n\t    {\r\n\t        long headerLocation = _jasixIndex.GetSectionBegin(JasixCommons.HeaderSectionTag);\r\n\t        RepositionReader(headerLocation);\r\n\r\n\t        string headerLine = _jsonReader.ReadLine();\r\n\t        string additionalTail = $\",\\\"{JasixCommons.PositionsSectionTag}\\\":[\";\r\n\r\n\t        return headerLine?.Substring(1, headerLine.Length - 1 - additionalTail.Length);\r\n\t    }\r\n\t    \r\n\t    private static readonly byte[] BgzBlock = new byte[BlockGZipStream.BlockGZipFormatCommon.MaxBlockSize];\r\n\t    public IEnumerable<string> GetSectionLines(string section)\r\n\t    {\r\n\t\t    if (_jasixIndex.GetSectionBegin(section) == -1) yield break;\r\n\t\t    \r\n\t\t    long sectionBegin = _jasixIndex.GetSectionBegin(section);\r\n\t\t    RepositionReader(sectionBegin);\r\n\r\n\t\t    string line = _jsonReader.ReadLine();\r\n\t\t    // at the end of both positions and genes section, we have a line that closes the array.\r\n\t\t    // So, our terminating condition can be the following\r\n\t\t    while (line != null && !line.StartsWith(\"]\"))\r\n\t\t    {\r\n\t\t\t    yield return line;\r\n\t\t\t    line = _jsonReader.ReadLine();\r\n\t\t    }\r\n\t    }\r\n        internal IEnumerable<string> ReadOverlappingJsonLines((string Chr, int Start, int End) query)\r\n\t\t{\r\n            (string chr, int start, int end) = query;\r\n            long position = _jasixIndex.GetFirstVariantPosition(chr, start, end);\r\n\r\n\t\t\tif (position == -1) yield break;\r\n\r\n\t\t\tRepositionReader(position);\r\n\r\n\t\t\tstring line;\r\n\t\t\twhile ((line = _jsonReader.ReadLine()) != null && !line.OptimizedStartsWith(']'))\r\n\t\t\t\t//The array of positions entry end with \"],\" Going past it will cause the json deserializer to crash\r\n\t\t\t{\r\n\t\t\t\tline = line.TrimEnd(',');\r\n                if (string.IsNullOrEmpty(line)) continue;\r\n\t\t\t    \r\n\t\t\t\tJsonSchema jsonEntry = ParseJsonEntry(line);\r\n\r\n\t\t\t    string jsonChrom = _jasixIndex.GetIndexChromName(jsonEntry.chromosome);\r\n\t\t\t\tif (jsonChrom != chr) break;\r\n\r\n\t\t\t\tif (jsonEntry.Start > end) break;\r\n\r\n\t\t\t\tif (!jsonEntry.Overlaps(start, end)) continue;\r\n\t\t\t\t// if there is an SV that starts before the query start that is printed by the large variant printer\r\n\t\t\t\tif (Utilities.IsLargeVariant(jsonEntry.Start, jsonEntry.End) && jsonEntry.Start < start) continue;\r\n\t\t\t\tyield return line;\r\n\t\t\t}\r\n\t\t}\r\n\r\n\t    private static JsonSchema ParseJsonEntry(string line)\r\n\t    {\r\n\t        JsonSchema jsonEntry;\r\n\t        try\r\n\t        {\r\n\t            jsonEntry = JsonConvert.DeserializeObject<JsonSchema>(line);\r\n\t        }\r\n\t        catch (Exception)\r\n\t        {\r\n\t            Console.WriteLine($\"Error in line:\\n{line}\");\r\n\t            throw;\r\n\t        }\r\n\r\n\t        return jsonEntry;\r\n\t    }\r\n\r\n\t    \r\n\t}\r\n}\r\n"
  },
  {
    "path": "Jist/Jist.csproj",
    "content": "<Project Sdk=\"Microsoft.NET.Sdk\">\n\n    <PropertyGroup>\n        <OutputType>Exe</OutputType>\n        <TargetFramework>net6.0</TargetFramework>\n        <OutputPath>..\\bin\\$(Configuration)</OutputPath>\n    </PropertyGroup>\n\n    <ItemGroup>\n      <ProjectReference Include=\"..\\Compression\\Compression.csproj\" />\n      <ProjectReference Include=\"..\\ErrorHandling\\ErrorHandling.csproj\" />\n      <ProjectReference Include=\"..\\IO\\IO.csproj\" />\n      <ProjectReference Include=\"..\\Jasix\\Jasix.csproj\" />\n    </ItemGroup>\n\n</Project>\n"
  },
  {
    "path": "Jist/JistMain.cs",
    "content": "﻿using System;\nusing System.IO;\nusing ErrorHandling;\nusing IO;\nusing Jasix.DataStructures;\n\nnamespace Jist\n{\n    public class JistMain\n    {\n        public static int Main(string[] args)\n        {\n            Console.WriteLine(\"Running Nirvana Json Stitching tool\");\n            if (args.Length < 1)\n            {\n                Console.WriteLine(\"Usage: dotnet jist.dll input-json.gz-prefix output-json.gz \");\n                Environment.Exit((int)ExitCodes.UserError);\n            }\n\n            var inputFilePrefix = args[0];\n            var outputFileName = args[1];\n            var directory = Path.GetDirectoryName(inputFilePrefix);\n            if (string.IsNullOrEmpty(directory)) directory = \".\";\n            var prefix = Path.GetFileName(inputFilePrefix);\n            var inputFiles = Directory.GetFiles(directory, prefix+\"*.json.gz\");\n            Array.Sort(inputFiles);\n            Console.WriteLine(\"Files to stitch\");\n            foreach (var file in inputFiles)\n            {\n                Console.WriteLine(file);\n                if (!File.Exists(file + JasixCommons.FileExt))\n                {\n                    Console.WriteLine($\"Cannot find {file +JasixCommons.FileExt}. Please provide corresponding {JasixCommons.FileExt} files for each input JSON\");\n                    return (int)ExitCodes.UserError;\n                }\n            }\n\n            if (inputFiles.Length == 0)\n            {\n                Console.WriteLine($\"Found {inputFiles.Length} files to stitch. Need at least 1.\");\n                Environment.Exit((int)ExitCodes.UserError);\n            }\n            \n            if (inputFiles.Length == 1)\n            {\n                Console.WriteLine(\"Found only one input JSON. Copying it to output file...\");\n                File.Copy(inputFiles[0], outputFileName, true);\n                return (int)ExitCodes.Success;\n            }\n\n            \n            var inputStreams = new Stream[inputFiles.Length];\n            var indexStreams = new Stream[inputFiles.Length];\n            for (var i = 0; i < inputFiles.Length; i++)\n            {\n                inputStreams[i] = FileUtilities.GetReadStream(inputFiles[i]);\n                indexStreams[i] = FileUtilities.GetReadStream(inputFiles[i] + JasixCommons.FileExt);\n            }\n\n            using(var outputStream = FileUtilities.GetCreateStream(outputFileName))\n            using (var stitcher = new JsonStitcher(inputStreams, indexStreams, outputStream))\n            {\n                return stitcher.Stitch();\n            }\n        }\n        \n    }\n}"
  },
  {
    "path": "Jist/JistUtilities.cs",
    "content": "using System.IO;\nusing System.IO.Compression;\nusing Compression.FileHandling;\n\nnamespace Jist\n{\n    public static class JistUtilities\n    {\n        public static byte[] GetCompressedBlock(string s, int compressionLevel=1)\n        {\n            using (var stream = new MemoryStream())\n            {\n                using(var memStream = new BlockGZipStream(stream, CompressionMode.Compress, true))\n                using (var writer = new StreamWriter(memStream))\n                {\n                    writer.Write(s);\n                }\n\n                return stream.ToArray();\n            }\n\n        }\n\n    }\n}"
  },
  {
    "path": "Jist/JsonStitcher.cs",
    "content": "using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing System.IO.Compression;\nusing System.Linq;\nusing System.Text;\nusing Compression.FileHandling;\nusing ErrorHandling;\nusing Jasix.DataStructures;\n\nnamespace Jist\n{\n    public sealed class JsonStitcher:IDisposable\n    {\n        private readonly Stream[] _jsonStreams;\n        private readonly Stream[] _jasixStreams;\n        private readonly Stream _outStream;\n        private readonly bool _leaveOutStreamOpen;\n        private readonly HashSet<string> _geneLines;\n\n        public JsonStitcher(Stream[] jsonStreams, Stream[] jasixStreams, Stream outStream, bool leaveOutStreamOpen=false)\n        {\n            _jasixStreams = jasixStreams;\n            _jsonStreams = jsonStreams;\n            _outStream = outStream;\n            _leaveOutStreamOpen = leaveOutStreamOpen;\n            _geneLines = new HashSet<string>();\n        }\n        \n        public const string GeneHeaderLine = \"\\n],\\\"genes\\\":[\";\n        public const string FooterLine = \"]}\";\n\n        private bool _isFirstHeaderBlock = true;\n        private static readonly byte[] BgzBlock = new byte[BlockGZipStream.BlockGZipFormatCommon.MaxBlockSize];\n        private static readonly byte[] CommaBlock = JistUtilities.GetCompressedBlock(\",\\n\");//will be added to the end of a block when needed\n        \n        \n        public int Stitch()\n        {\n            var positionBlockCount = 0;\n            var geneLineCount = 0;\n\n            using (var writer = new BinaryWriter(_outStream, Encoding.Default, _leaveOutStreamOpen))\n            {\n                var needsCommaBlock = false;\n                \n                for (var i=0; i < _jsonStreams.Length; i++)\n                {\n                    if (needsCommaBlock) writer.Write(CommaBlock, 0, CommaBlock.Length);\n                    var jsonStream = _jsonStreams[i];\n                    var jasixStream = _jasixStreams[i];\n                    \n                    positionBlockCount+= WritePositionBlocks(jsonStream, jasixStream, writer);\n                    geneLineCount+= ReadGeneLines(jsonStream);\n                    //after the first file, every file will need a comma block to maintain valid json after positions block\n                    // and after each gene block\n                    needsCommaBlock = true;\n                }\n                writer.Flush();\n                //write out the gene blocks\n                WriteGeneBlocks(_outStream);\n            }\n\n            Console.WriteLine($\"Total position blocks written: {positionBlockCount}\");\n            Console.WriteLine($\"Gene lines read: {geneLineCount}\");\n            Console.WriteLine($\"Unique gene lines: {_geneLines.Count}\");\n            return (int) ExitCodes.Success;\n        }\n\n        private int ReadGeneLines(Stream jsonStream)\n        {\n            var lineCount = 0;\n            using (var bGzipStream = new BlockGZipStream(jsonStream, CompressionMode.Decompress))\n            using(var reader = new StreamReader(bGzipStream))\n            {\n                string line;\n                while ((line= reader.ReadLine())!= null)\n                {\n                    if (line==string.Empty) continue;\n                    if (line == FooterLine) break;\n                    if (!line.EndsWith(',')) line += ',';\n                    lineCount++;\n                    _geneLines.Add(line);\n                }\n            }\n\n            return lineCount;\n        }\n\n        private void WriteGeneBlocks(Stream stream)\n        {\n            \n            using (var bGzipStream = new BlockGZipStream(stream, CompressionMode.Compress, _leaveOutStreamOpen)) \n            using(var writer = new StreamWriter(bGzipStream))\n            {\n                var count = _geneLines.Count;\n                if (count == 0)\n                {\n                    writer.WriteLine(FooterLine);\n                    return;\n                }\n                writer.WriteLine(GeneHeaderLine);\n                var i = 0;\n                foreach (var geneLine in _geneLines.OrderBy(x=>x))\n                {\n                    i++;\n                    //the last gene line shouldn't have a comma at the end\n                    writer.WriteLine(i == count ? geneLine.TrimEnd(',') : geneLine);\n                }\n                writer.WriteLine(FooterLine);\n            }\n        }\n\n        private int WritePositionBlocks(Stream jsonStream, Stream jasixStream,\n            BinaryWriter writer)\n        {\n            var blockCount = 0;\n            using (var reader = new BgzBlockReader(jsonStream, true))\n            using (var jasixIndex = new JasixIndex(jasixStream))\n            {\n                var positionSectionBegin = jasixIndex.GetSectionBegin(JasixCommons.PositionsSectionTag);\n                if (positionSectionBegin == -1) return 0;//no positions found. and therefore, cannot have genes either.\n\n                var positionSectionEnd   = jasixIndex.GetSectionEnd(JasixCommons.PositionsSectionTag);\n                var geneSectionBegin     = jasixIndex.GetSectionBegin(JasixCommons.GenesSectionTag);\n                \n                var isFirstBlock = true;\n                \n                for (int count = reader.ReadCompressedBlock(BgzBlock); count > 0; count=reader.ReadCompressedBlock(BgzBlock))\n                {\n                    if (isFirstBlock)\n                    {\n                        if (_isFirstHeaderBlock)\n                        {\n                            writer.Write(BgzBlock, 0, count);\n                            _isFirstHeaderBlock = false;\n                        }\n\n                        isFirstBlock = false;\n                        continue;\n                    }\n                    //we need the following check because there is one block between the positions and the genes block that we want to skip\n                    // the block that contains: ],\"genes\":[...\n                    \n                    // the 16 bit left shift is due to the representation of the position in bgzip file\n                    if(reader.Position >= positionSectionEnd >>16)\n                    {\n                        //we have read the last position block\n                        blockCount++;\n                        writer.Write(BgzBlock, 0, count);\n                        if(geneSectionBegin!=-1) jsonStream.Position = geneSectionBegin >> 16;\n                        return blockCount;\n                    }\n                    \n                    blockCount++;\n                    writer.Write(BgzBlock, 0, count);\n                }\n                \n            }\n\n            return blockCount;\n        }\n\n        public void Dispose()\n        {\n            if (_jsonStreams != null)\n            {\n                foreach (Stream jsonStream in _jsonStreams)\n                {\n                    jsonStream?.Dispose();\n                }\n            }\n            \n            if (_jasixStreams != null)\n            {\n                foreach (Stream jasixStream in _jasixStreams)\n                {\n                    jasixStream?.Dispose();\n                }\n            }\n\n            if (_leaveOutStreamOpen)\n            {\n                _outStream.Flush();\n                return;\n            }\n\n            _outStream?.Dispose();\n        }\n    }\n}"
  },
  {
    "path": "LICENSE",
    "content": "# PolyForm Strict License 1.0.0\n\n<https://polyformproject.org/licenses/strict/1.0.0>\n\n## Acceptance\n\nIn order to get any license under these terms, you must agree\nto them as both strict obligations and conditions to all\nyour licenses.\n\n## Copyright License\n\nThe licensor grants you a copyright license for the software\nto do everything you might do with the software that would\notherwise infringe the licensor's copyright in it for any\npermitted purpose, other than distributing the software or\nmaking changes or new works based on the software.\n\n## Patent License\n\nThe licensor grants you a patent license for the software that\ncovers patent claims the licensor can license, or becomes able\nto license, that you would infringe by using the software.\n\n## Noncommercial Purposes\n\nAny noncommercial purpose is a permitted purpose.\n\n## Personal Uses\n\nPersonal use for research, experiment, and testing for\nthe benefit of public knowledge, personal study, private\nentertainment, hobby projects, amateur pursuits, or religious\nobservance, without any anticipated commercial application,\nis use for a permitted purpose.\n\n## Noncommercial Organizations\n\nUse by any charitable organization, educational institution,\npublic research organization, public safety or health\norganization, environmental protection organization,\nor government institution is use for a permitted purpose\nregardless of the source of funding or obligations resulting\nfrom the funding.\n\n## Fair Use\n\nYou may have \"fair use\" rights for the software under the\nlaw. These terms do not limit them.\n\n## No Other Rights\n\nThese terms do not allow you to sublicense or transfer any of\nyour licenses to anyone else, or prevent the licensor from\ngranting licenses to anyone else.  These terms do not imply\nany other licenses.\n\n## Patent Defense\n\nIf you make any written claim that the software infringes or\ncontributes to infringement of any patent, your patent license\nfor the software granted under these terms ends immediately. If\nyour company makes such a claim, your patent license ends\nimmediately for work on behalf of your company.\n\n## Violations\n\nThe first time you are notified in writing that you have\nviolated any of these terms, or done anything with the software\nnot covered by your licenses, your licenses can nonetheless\ncontinue if you come into full compliance with these terms,\nand take practical steps to correct past violations, within\n32 days of receiving notice.  Otherwise, all your licenses\nend immediately.\n\n## No Liability\n\n***As far as the law allows, the software comes as is, without\nany warranty or condition, and the licensor will not be liable\nto you for any damages arising out of these terms or the use\nor nature of the software, under any kind of legal claim.***\n\n## Definitions\n\nThe **licensor** is the individual or entity offering these\nterms, and the **software** is the software the licensor makes\navailable under these terms.\n\n**You** refers to the individual or entity agreeing to these\nterms.\n\n**Your company** is any legal entity, sole proprietorship,\nor other kind of organization that you work for, plus all\norganizations that have control over, are under the control of,\nor are under common control with that organization.  **Control**\nmeans ownership of substantially all the assets of an entity,\nor the power to direct its management and policies by vote,\ncontract, or otherwise.  Control can be direct or indirect.\n\n**Your licenses** are all the licenses granted to you for the\nsoftware under these terms.\n\n**Use** means anything you do with the software requiring one\nof your licenses.\n"
  },
  {
    "path": "MitoHeteroplasmy/MitoHeteroplasmy.csproj",
    "content": "<Project Sdk=\"Microsoft.NET.Sdk\">\n\n  <PropertyGroup>\n    <TargetFramework>net6.0</TargetFramework>\n    \n  </PropertyGroup>\n  <ItemGroup>\n    <EmbeddedResource Include=\"Resources\\MitoHeteroplasmy.tsv.gz\" />\n  </ItemGroup>\n  <ItemGroup>\n    <ProjectReference Include=\"..\\OptimizedCore\\OptimizedCore.csproj\" />\n    <ProjectReference Include=\"..\\RepeatExpansions\\RepeatExpansions.csproj\" />\n    <ProjectReference Include=\"..\\VariantAnnotation.Interface\\VariantAnnotation.Interface.csproj\" />\n    <ProjectReference Include=\"..\\RepeatExpansions\\RepeatExpansions.csproj\" />\n    <ProjectReference Include=\"..\\VariantAnnotation\\VariantAnnotation.csproj\" />\n  </ItemGroup>\n\n</Project>\n"
  },
  {
    "path": "MitoHeteroplasmy/MitoHeteroplasmyProvider.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.Linq;\nusing Genome;\nusing RepeatExpansions;\nusing VariantAnnotation.Interface.Providers;\nusing VariantAnnotation.Providers;\nusing Variants;\n\nnamespace MitoHeteroplasmy\n{\n    public sealed class MitoHeteroplasmyProvider : IMitoHeteroplasmyProvider\n    {\n        public string Name { get; } = \"MitochondrialHeteroplasmy\";\n        public GenomeAssembly Assembly { get; } = GenomeAssembly.rCRS;\n        public IEnumerable<IDataSourceVersion> DataSourceVersions { get; }\n\n        private const string Version = \"20180410\";\n        private const string Description = \"Variant read frequency percentiles for the Mitochondrial reference\";\n        private const string MitoChromUcscName = \"chrM\";\n\n        private static readonly long CreateDateTicks = new DateTime(2020, 5, 21).Ticks;\n        private static readonly Dictionary<string, int> AlleleToInt = new Dictionary<string, int> { { \"A\", 0 }, { \"C\", 1 }, { \"G\", 2 }, { \"T\", 3 } };\n        private const int SequenceLengthMax = int.MaxValue / 4;\n        private readonly Dictionary<int, (double[] Vrfs, double[] Percentiles)> _alleleToDistribution = new Dictionary<int, (double[], double[])>();\n\n        public MitoHeteroplasmyProvider()\n        {\n            var dataSourceVersion = new DataSourceVersion(Name, Version, CreateDateTicks, Description);\n            DataSourceVersions = new[] {dataSourceVersion};\n        }\n\n        public void Add(int position, string altAllele, double[] vrfs, int[] alleleDepths)\n        {\n            double[] percentiles = PercentileUtilities.ComputePercentiles(vrfs.Length, alleleDepths);\n            _alleleToDistribution[EncodeMitoPositionAndAltAllele(position, altAllele)] = (vrfs, percentiles);\n        }\n        \n        public double?[] GetVrfPercentiles(IVariant[] variants, double[] vrfs)\n        {\n            if (vrfs == null) return null;\n            if (variants == null || variants.Length == 0) return null;\n            \n            if (variants[0].Chromosome.UcscName != MitoChromUcscName) return null;\n\n            var percentiles = vrfs.Zip(variants, (vrf, variant) => GetVrfPercentile(variant, vrf)).ToArray();\n            return percentiles.All(x => x == null) ? null : percentiles;\n        }\n\n        private double? GetVrfPercentile(IVariant variant, double vrf)\n        {\n            var position  = variant.Start;\n            var altAllele = variant.AltAllele;\n            if (string.IsNullOrEmpty(altAllele) || !AlleleToInt.ContainsKey(altAllele)) return null;\n\n            var positionAndAltAlleleIntForm = EncodeMitoPositionAndAltAllele(position, altAllele);\n\n            if (!_alleleToDistribution.TryGetValue(positionAndAltAlleleIntForm, out (double[] Vrfs, double[] Percentiles) data)) return null;\n\n            if (vrf > 0.999) vrf = 0.999;\n            return PercentileUtilities.GetPercentile(vrf, data.Vrfs, data.Percentiles);\n        }\n        \n        private static int EncodeMitoPositionAndAltAllele(int position, string altAllele) => SequenceLengthMax * AlleleToInt[altAllele] + position;\n        \n        private static double ToRoundedVrf(double vrf) => Math.Round(vrf, 3, MidpointRounding.AwayFromZero);\n    }\n}"
  },
  {
    "path": "MitoHeteroplasmy/MitoHeteroplasmyReader.cs",
    "content": "﻿using System;\nusing System.IO;\nusing System.IO.Compression;\nusing System.Linq;\nusing System.Reflection;\nusing OptimizedCore;\n\nnamespace MitoHeteroplasmy\n{\n    public static class MitoHeteroplasmyReader\n    {\n        private const int PositionIndex  = 1;\n        private const int RefIndex       = 2;\n        private const int AltIndex       = 3;\n        private const int VrfBinsIndex   = 4;\n        private const int VrfCountsIndex = 5;\n\n        private const string ResourceName = \"MitoHeteroplasmy.Resources.MitoHeteroplasmy.tsv.gz\";\n        public static MitoHeteroplasmyProvider GetProvider()\n        {\n            var assembly = Assembly.GetExecutingAssembly();\n            using var stream = assembly.GetManifestResourceStream(ResourceName);\n            if (stream == null) throw new NullReferenceException(\"Unable to read from the Mitochondrial Heteroplasmy file\");\n\n            using var gzStream = new GZipStream(stream, CompressionMode.Decompress);\n            using var reader = new StreamReader(gzStream);\n\n            string line;\n            \n            var heteroplasmyProvider = new MitoHeteroplasmyProvider();\n            while ((line = reader.ReadLine())!=null)\n            {\n                if(line.StartsWith(\"#\")) continue;\n                \n                var fields    = line.OptimizedSplit('\\t');\n                var position  = int.Parse(fields[PositionIndex]);\n                var refAllele = fields[RefIndex];\n                var altAllele = fields[AltIndex];\n                if (altAllele==\".\" || !(refAllele.Length == 1 && altAllele.Length == 1)) continue;\n                \n                var vrfs         = fields[VrfBinsIndex].Split(',').Select(double.Parse);\n                var alleleDepths = fields[VrfCountsIndex].Split(',').Select(int.Parse).ToArray();\n                heteroplasmyProvider.Add(position, altAllele, vrfs.ToArray(), alleleDepths);\n            }\n\n            return heteroplasmyProvider;\n        }\n    }\n}"
  },
  {
    "path": "Nirvana/AnnotationFiles.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing Cloud;\r\nusing Cloud.Messages;\r\nusing IO;\r\nusing VariantAnnotation.ProteinConservation;\r\nusing VariantAnnotation.SA;\r\n\r\nnamespace Nirvana\r\n{\r\n    public sealed class AnnotationFiles\r\n    {\r\n        public List<(string Nsa, string Idx)> NsaFiles                { get; } = new();\r\n        public List<(string Gsa, string Idx)> GsaFiles                { get; } = new();\r\n        public List<string>                   NsiFiles                { get; } = new();\r\n        public List<string>                   NgaFiles                { get; } = new();\r\n        public List<string>                   GeneFusionSourceFiles   { get; } = new();\r\n        public List<string>                   GeneFusionJsonFiles     { get; } = new();\r\n        public (string Npd, string Idx)       PhylopFile              { get; private set; }\r\n        public string                         LowComplexityRegionFile { get; private set; }\r\n        public string                         ProteinConservationFile { get; private set; }\r\n        public (string Rma, string Idx)       RefMinorFile            { get; private set; }\r\n\r\n        public void AddFiles(string saDirectoryPath)\r\n        {\r\n            foreach (string filePath in GetFiles(saDirectoryPath))\r\n            {\r\n                // ReSharper disable once SwitchStatementMissingSomeCases\r\n                switch (filePath.GetFileSuffix(true))\r\n                {\r\n                    case SaCommon.SaFileSuffix:\r\n                        NsaFiles.Add((filePath, filePath + SaCommon.IndexSuffix));\r\n                        break;\r\n                    case SaCommon.GsaFileSuffix:\r\n                        GsaFiles.Add((filePath, filePath + SaCommon.IndexSuffix));\r\n                        break;\r\n                    case SaCommon.IntervalFileSuffix:\r\n                        NsiFiles.Add(filePath);\r\n                        break;\r\n                    case SaCommon.GeneFileSuffix:\r\n                        NgaFiles.Add(filePath);\r\n                        break;\r\n                    case SaCommon.PhylopFileSuffix:\r\n                        PhylopFile = (filePath, filePath + SaCommon.IndexSuffix);\r\n                        break;\r\n                    case ProteinConservationCommon.FileSuffix:\r\n                        ProteinConservationFile = filePath;\r\n                        break;\r\n                    case SaCommon.LcrFileSuffix:\r\n                        LowComplexityRegionFile = filePath;\r\n                        break;\r\n                    case SaCommon.RefMinorFileSuffix:\r\n                        RefMinorFile = (filePath, filePath + SaCommon.IndexSuffix);\r\n                        break;\r\n                    case SaCommon.GeneFusionSourceSuffix:\r\n                        GeneFusionSourceFiles.Add(filePath);\r\n                        break;\r\n                    case SaCommon.GeneFusionJsonSuffix:\r\n                        GeneFusionJsonFiles.Add(filePath);\r\n                        break;\r\n                }\r\n            }\r\n        }\r\n\r\n        public void AddFiles(SaUrls saUrls)\r\n        {\r\n            switch (saUrls.SaType)\r\n            {\r\n                case CustomSaType.Nsa:\r\n                    NsaFiles.Add((saUrls.nsaUrl, saUrls.idxUrl));\r\n                    break;\r\n                case CustomSaType.Nsi:\r\n                    NsiFiles.Add(saUrls.nsiUrl);\r\n                    break;\r\n                case CustomSaType.Nga:\r\n                    NgaFiles.Add(saUrls.ngaUrl);\r\n                    break;\r\n                default:\r\n                    throw new InvalidDataException(\"Unknown custom SA type.\");\r\n            }\r\n        }\r\n\r\n        private static IEnumerable<string> GetFiles(string directoryOrManifestFilePath)\r\n        {\r\n            if (HttpUtilities.IsUrl(directoryOrManifestFilePath))\r\n            {\r\n                using (var reader = new StreamReader(PersistentStreamUtils.GetReadStream(directoryOrManifestFilePath)))\r\n                {\r\n                    string line;\r\n                    while ((line = reader.ReadLine()) != null)\r\n                    {\r\n                        yield return LambdaUrlHelper.GetBaseUrl() + line;\r\n                    }\r\n                }\r\n            }\r\n            else\r\n            {\r\n                foreach (string file in Directory.GetFiles(directoryOrManifestFilePath))\r\n                    yield return file;\r\n            }\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "Nirvana/AnnotationResources.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Linq;\r\nusing Cloud.Messages;\r\nusing CommandLine.Utilities;\r\nusing Genome;\r\nusing IO;\r\nusing MitoHeteroplasmy;\r\nusing RepeatExpansions;\r\nusing VariantAnnotation;\r\nusing VariantAnnotation.Interface;\r\nusing VariantAnnotation.Interface.GeneAnnotation;\r\nusing VariantAnnotation.Interface.IO;\r\nusing VariantAnnotation.Interface.Positions;\r\nusing VariantAnnotation.Interface.Providers;\r\nusing VariantAnnotation.Providers;\r\nusing VariantAnnotation.SA;\r\nusing Vcf.VariantCreator;\r\n\r\nnamespace Nirvana\r\n{\r\n    public sealed class AnnotationResources : IAnnotationResources\r\n    {\r\n        private Dictionary<Chromosome, List<int>> _variantPositions;\r\n\r\n        public          ISequenceProvider             SequenceProvider             { get; }\r\n        public          ITranscriptAnnotationProvider TranscriptAnnotationProvider { get; }\r\n        private         ProteinConservationProvider   ProteinConservationProvider  { get; }\r\n        public          IAnnotationProvider           SaProvider                   { get; }\r\n        public          IAnnotationProvider           GsaProvider                  { get; }\r\n        public          IAnnotationProvider           ConservationProvider         { get; }\r\n        public          IRefMinorProvider             RefMinorProvider             { get; }\r\n        public          IAnnotationProvider           LcrProvider                  { get; }\r\n        public          IGeneAnnotationProvider       GeneAnnotationProvider       { get; }\r\n        public          IMitoHeteroplasmyProvider     MitoHeteroplasmyProvider     { get; }\r\n        public          IAnnotator                    Annotator                    { get; }\r\n        public          IVariantIdCreator             VidCreator                   { get; }\r\n        public          List<IDataSourceVersion>      DataSourceVersions           { get; }\r\n        public          string                        VepDataVersion               { get; }\r\n        public          long                          InputStartVirtualPosition    { get; set; }\r\n        public          string                        AnnotatorVersionTag          { get; set; } = \"Nirvana \" + CommandLineUtilities.Version;\r\n        public          bool                          ForceMitochondrialAnnotation { get; }\r\n        public readonly PerformanceMetrics            Metrics;\r\n\r\n        public AnnotationResources(string refSequencePath, string inputCachePrefix, List<string> saDirectoryPaths, List<SaUrls> customAnnotations,\r\n            string customStrTsvPath, bool forceMitochondrialAnnotation, bool useLegacyVids, PerformanceMetrics metrics)\r\n        {\r\n            Metrics = metrics;\r\n            PerformanceMetrics.ShowInitializationHeader();\r\n\r\n            SequenceProvider = ProviderUtilities.GetSequenceProvider(refSequencePath);\r\n\r\n            var annotationFiles = new AnnotationFiles();\r\n            saDirectoryPaths?.ForEach(x => annotationFiles.AddFiles(x));\r\n            customAnnotations?.ForEach(x => annotationFiles.AddFiles(x));\r\n\r\n            ProteinConservationProvider = ProviderUtilities.GetProteinConservationProvider(annotationFiles);\r\n            ProteinConservationProvider?.Load();\r\n\r\n            metrics.Cache.Start();\r\n            TranscriptAnnotationProvider =\r\n                ProviderUtilities.GetTranscriptAnnotationProvider(inputCachePrefix, SequenceProvider, ProteinConservationProvider);\r\n            metrics.ShowCacheLoad();\r\n\r\n            SaProvider             = ProviderUtilities.GetNsaProvider(annotationFiles);\r\n            GsaProvider            = ProviderUtilities.GetGsaProvider(annotationFiles);\r\n            ConservationProvider   = ProviderUtilities.GetConservationProvider(annotationFiles);\r\n            LcrProvider            = ProviderUtilities.GetLcrProvider(annotationFiles);\r\n            RefMinorProvider       = ProviderUtilities.GetRefMinorProvider(annotationFiles);\r\n            GeneAnnotationProvider = ProviderUtilities.GetGeneAnnotationProvider(annotationFiles);\r\n\r\n            IRepeatExpansionProvider repeatExpansionProvider = GetRepeatExpansionProvider(SequenceProvider.Assembly,\r\n                SequenceProvider.RefNameToChromosome, SequenceProvider.RefIndexToChromosome.Count, customStrTsvPath);\r\n\r\n            MitoHeteroplasmyProvider = MitoHeteroplasmyReader.GetProvider();\r\n\r\n            Annotator = new Annotator(\r\n                TranscriptAnnotationProvider,\r\n                SequenceProvider,\r\n                SaProvider,\r\n                ConservationProvider,\r\n                LcrProvider,\r\n                GeneAnnotationProvider,\r\n                repeatExpansionProvider,\r\n                GsaProvider\r\n            );\r\n\r\n            if (useLegacyVids) VidCreator = new LegacyVariantId(SequenceProvider.RefNameToChromosome);\r\n            else VidCreator               = new VariantId();\r\n\r\n            DataSourceVersions = GetDataSourceVersions(\r\n                    TranscriptAnnotationProvider,\r\n                    SaProvider,\r\n                    GsaProvider,\r\n                    GeneAnnotationProvider,\r\n                    ConservationProvider,\r\n                    LcrProvider,\r\n                    MitoHeteroplasmyProvider\r\n                )\r\n                .ToList();\r\n            \r\n            VepDataVersion = TranscriptAnnotationProvider.VepVersion + \".\" + CacheConstants.DataVersion + \".\" + SaCommon.DataVersion;\r\n\r\n            ForceMitochondrialAnnotation = forceMitochondrialAnnotation;\r\n        }\r\n\r\n        private static IRepeatExpansionProvider GetRepeatExpansionProvider(GenomeAssembly genomeAssembly,\r\n            Dictionary<string, Chromosome> refNameToChromosome, int numRefSeqs, string customStrTsvPath)\r\n        {\r\n            if (genomeAssembly != GenomeAssembly.GRCh37 && genomeAssembly != GenomeAssembly.GRCh38) return null;\r\n            return new RepeatExpansionProvider(genomeAssembly, refNameToChromosome, numRefSeqs, customStrTsvPath);\r\n        }\r\n\r\n        private static IEnumerable<IDataSourceVersion> GetDataSourceVersions(params IProvider[] providers)\r\n        {\r\n            var dataSourceVersions = new List<IDataSourceVersion>();\r\n            foreach (IProvider provider in providers)\r\n                if (provider != null)\r\n                    dataSourceVersions.AddRange(provider.DataSourceVersions);\r\n            return dataSourceVersions.ToHashSet(new DataSourceVersionComparer());\r\n        }\r\n\r\n        public void SingleVariantPreLoad(IPosition position)\r\n        {\r\n            var chromToPositions = new Dictionary<Chromosome, List<int>>();\r\n            PreLoadUtilities.TryAddPosition(chromToPositions, position.Chromosome, position.Start, position.RefAllele,\r\n                position.VcfFields[VcfCommon.AltIndex], SequenceProvider.Sequence);\r\n            _variantPositions = chromToPositions;\r\n            PreLoad(position.Chromosome);\r\n        }\r\n\r\n        public void GetVariantPositions(Stream vcfStream, GenomicRange genomicRange)\r\n        {\r\n            if (genomicRange != null)\r\n                vcfStream.Position = Tabix.VirtualPosition.From(InputStartVirtualPosition).BlockOffset;\r\n            int numPositions;\r\n\r\n            Metrics.SaPositionScan.Start();\r\n            (_variantPositions, numPositions) = PreLoadUtilities.GetPositions(vcfStream, genomicRange, SequenceProvider, RefMinorProvider);\r\n            Metrics.ShowSaPositionScanLoad(numPositions);\r\n        }\r\n\r\n        public void PreLoad(Chromosome chromosome)\r\n        {\r\n            SequenceProvider.LoadChromosome(chromosome);\r\n\r\n            if (_variantPositions == null || !_variantPositions.TryGetValue(chromosome, out List<int> positions)) return;\r\n            SaProvider?.PreLoad(chromosome, positions);\r\n        }\r\n\r\n        public void Dispose()\r\n        {\r\n            SequenceProvider?.Dispose();\r\n            TranscriptAnnotationProvider?.Dispose();\r\n            SaProvider?.Dispose();\r\n            GsaProvider?.Dispose();\r\n            ConservationProvider?.Dispose();\r\n            RefMinorProvider?.Dispose();\r\n            GeneAnnotationProvider?.Dispose();\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "Nirvana/Nirvana.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO.Compression;\r\nusing CommandLine.Builders;\r\nusing CommandLine.NDesk.Options;\r\nusing Compression.FileHandling;\r\nusing Compression.Utilities;\r\nusing ErrorHandling;\r\nusing IO;\r\nusing Jasix.DataStructures;\r\nusing OptimizedCore;\r\nusing VariantAnnotation;\r\nusing VariantAnnotation.Interface;\r\nusing VariantAnnotation.Providers;\r\nusing Vcf;\r\n\r\nnamespace Nirvana\r\n{\r\n    public static class Nirvana\r\n    {\r\n        private static          string       _inputCachePrefix;\r\n        private static readonly List<string> SupplementaryAnnotationDirectories = new List<string>();\r\n        private static          string       _vcfPath;\r\n        private static          string       _refSequencePath;\r\n        private static          string       _outputFileName;\r\n        private static          string       _customStrTsv;\r\n        private static          string       _customInfoKeysString;\r\n        private static          string       _customSampleInfoKeysString;\r\n        \r\n        private static          bool         _forceMitochondrialAnnotation;\r\n        private static          bool         _useLegacyVids;\r\n        private static          bool         _enableDq;\r\n        \r\n\r\n        private static ExitCodes ProgramExecution()\r\n        {\r\n            var    annotationResources = GetAnnotationResources();\r\n            string jasixFileName       = _outputFileName == \"-\" ? null : _outputFileName + \".json.gz\" + JasixCommons.FileExt;\r\n            \r\n            var    customInfoKeys = string.IsNullOrEmpty(_customInfoKeysString) ?\r\n                null: \r\n                new HashSet<string>(_customInfoKeysString.OptimizedSplit(','));\r\n\r\n            var customSampleInfoKeys = string.IsNullOrEmpty(_customSampleInfoKeysString) ?\r\n                null: \r\n                new HashSet<string>(_customSampleInfoKeysString.OptimizedSplit(','));\r\n\r\n            using (var inputVcfStream        = _vcfPath        == \"-\"  ? Console.OpenStandardInput() : GZipUtilities.GetAppropriateReadStream(_vcfPath))\r\n            using (var outputJsonStream      = _outputFileName == \"-\"  ? Console.OpenStandardOutput() : new BlockGZipStream(FileUtilities.GetCreateStream(_outputFileName + \".json.gz\"), CompressionMode.Compress))\r\n            using (var outputJsonIndexStream = jasixFileName   == null ? null : FileUtilities.GetCreateStream(jasixFileName))\r\n                return StreamAnnotation.Annotate(null, inputVcfStream, outputJsonStream, outputJsonIndexStream, annotationResources, \r\n                    new NullVcfFilter(), false, _enableDq, customInfoKeys, customSampleInfoKeys).exitCode;\r\n        }\r\n\r\n        private static AnnotationResources GetAnnotationResources()\r\n        {\r\n            if (_outputFileName == \"-\") Logger.Silence();\r\n            var metrics = new PerformanceMetrics();\r\n            \r\n            var annotationResources = new AnnotationResources(_refSequencePath, _inputCachePrefix, \r\n                SupplementaryAnnotationDirectories, null, _customStrTsv, _forceMitochondrialAnnotation, _useLegacyVids, metrics);\r\n            \r\n            if (SupplementaryAnnotationDirectories.Count == 0) return annotationResources;\r\n\r\n            using (var preloadVcfStream = GZipUtilities.GetAppropriateStream(PersistentStreamUtils.GetReadStream(_vcfPath)))\r\n            {\r\n                annotationResources.GetVariantPositions(preloadVcfStream, null);\r\n            }\r\n\r\n            return annotationResources;\r\n        }\r\n\r\n        public static int Main(string[] args)\r\n        {\r\n            var ops = new OptionSet\r\n            {\r\n                {\r\n                    \"cache|c=\",\r\n                    \"input cache {prefix}\",\r\n                    v => _inputCachePrefix = v\r\n                },\r\n                {\r\n                    \"in|i=\",\r\n                    \"input VCF {path}\",\r\n                    v => _vcfPath = v\r\n                },\r\n                {\r\n                    \"out|o=\",\r\n                    \"output {file path}\",\r\n                    v => _outputFileName = v\r\n                },\r\n                {\r\n                    \"ref|r=\",\r\n                    \"input compressed reference sequence {path}\",\r\n                    v => _refSequencePath = v\r\n                },\r\n                {\r\n                    \"sd=\",\r\n                    \"input supplementary annotation {directory}\",\r\n                    v => SupplementaryAnnotationDirectories.Add(v)\r\n                },\r\n                {\r\n                    \"force-mt\",\r\n                    \"forces to annotate mitochondrial variants\",\r\n                    v => _forceMitochondrialAnnotation = v != null\r\n                },\r\n                {\r\n                    \"legacy-vids\",\r\n                    \"enables support for legacy VIDs\",\r\n                    v => _useLegacyVids = v != null\r\n                },\r\n                {\r\n                    \"enable-dq\",\r\n                    \"report DQ from VCF samples field\",\r\n                    v => _enableDq = v != null\r\n                },\r\n                {\r\n                    \"str=\",\r\n                    \"user provided STR annotation TSV file\",\r\n                    v => _customStrTsv = v\r\n                },\r\n                {\r\n                    \"vcf-info=\",\r\n                    \"additional vcf info field keys (comma separated) desired in the output\",\r\n                    v => _customInfoKeysString = v\r\n                },\r\n                {\r\n                    \"vcf-sample-info=\",\r\n                    \"additional vcf format field keys (comma separated) desired in the output\",\r\n                    v => _customSampleInfoKeysString = v\r\n                }\r\n            };\r\n\r\n            var exitCode = new ConsoleAppBuilder(args, ops)\r\n                .UseVersionProvider(new VersionProvider())\r\n                .Parse()\r\n                .CheckInputFilenameExists(_vcfPath, \"vcf\", \"--in\", true, \"-\")\r\n                .CheckInputFilenameExists(_refSequencePath, \"reference sequence\", \"--ref\")\r\n                .CheckInputFilenameExists(CacheConstants.TranscriptPath(_inputCachePrefix), \"transcript cache\", \"--cache\")\r\n                .CheckInputFilenameExists(CacheConstants.SiftPath(_inputCachePrefix), \"SIFT cache\", \"--cache\")\r\n                .CheckInputFilenameExists(CacheConstants.PolyPhenPath(_inputCachePrefix), \"PolyPhen cache\", \"--cache\")\r\n                .CheckInputFilenameExists(_customStrTsv, \"custom STR annotation TSV\", \"--str\", false)\r\n                .HasRequiredParameter(_outputFileName, \"output file stub\", \"--out\")\r\n                .DisableOutput(_outputFileName == \"-\")\r\n                .ShowBanner(Constants.Authors)\r\n                .ShowHelpMenu(\"Annotates a set of variants\", \"-i <vcf path> -c <cache prefix> --sd <sa dir> -r <ref path> -o <base output filename>\")\r\n                .ShowErrors()\r\n                .Execute(ProgramExecution);\r\n\r\n            return (int)exitCode;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "Nirvana/Nirvana.csproj",
    "content": "﻿<Project Sdk=\"Microsoft.NET.Sdk\">\r\n  <PropertyGroup>\r\n    <OutputType>Exe</OutputType>\r\n    <TargetFramework>net6.0</TargetFramework>\r\n    <OutputPath>..\\bin\\$(Configuration)</OutputPath>\r\n    \r\n  </PropertyGroup>\r\n  <ItemGroup>\r\n    <PackageReference Include=\"Microsoft.Extensions.Configuration\" Version=\"6.0.0\" />\r\n    <PackageReference Include=\"Microsoft.Extensions.Configuration.Binder\" Version=\"6.0.0\" />\r\n    <PackageReference Include=\"Microsoft.Extensions.Configuration.Json\" Version=\"6.0.0\" />\r\n  </ItemGroup>\r\n  <ItemGroup>\r\n    <ProjectReference Include=\"..\\Cloud\\Cloud.csproj\" />\r\n    <ProjectReference Include=\"..\\CommandLine\\CommandLine.csproj\" />\r\n    <ProjectReference Include=\"..\\Compression\\Compression.csproj\" />\r\n    <ProjectReference Include=\"..\\ErrorHandling\\ErrorHandling.csproj\" />\r\n    <ProjectReference Include=\"..\\IO\\IO.csproj\" />\r\n    <ProjectReference Include=\"..\\Jasix\\Jasix.csproj\" />\r\n    <ProjectReference Include=\"..\\VariantAnnotation.Interface\\VariantAnnotation.Interface.csproj\" />\r\n    <ProjectReference Include=\"..\\VariantAnnotation\\VariantAnnotation.csproj\" />\r\n    <ProjectReference Include=\"..\\Vcf\\Vcf.csproj\" />\r\n    <ProjectReference Include=\"..\\Tabix\\Tabix.csproj\" />\r\n  </ItemGroup>\r\n  <Import Project=\"..\\CommonAssemblyInfo.props\" />\r\n</Project>"
  },
  {
    "path": "Nirvana/PreLoadUtilities.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing Genome;\r\nusing OptimizedCore;\r\nusing VariantAnnotation.Interface.IO;\r\nusing VariantAnnotation.Interface.Providers;\r\nusing Variants;\r\n\r\nnamespace Nirvana\r\n{\r\n    public static class PreLoadUtilities\r\n    {\r\n        public static (Dictionary<Chromosome, List<int>> PositionsByChromosome, int Count) GetPositions(Stream vcfStream, GenomicRange genomicRange,\r\n            ISequenceProvider sequenceProvider, IRefMinorProvider refMinorProvider)\r\n        {\r\n            var positionsByChromosome = new Dictionary<Chromosome, List<int>>();\r\n            var rangeChecker          = new GenomicRangeChecker(genomicRange);\r\n            var refNameToChrom        = sequenceProvider.RefNameToChromosome;\r\n\r\n            using (var reader = new StreamReader(vcfStream))\r\n            {\r\n                string      line;\r\n                string      currentReferenceName = \"\";\r\n                Chromosome chromosome           = null;\r\n                \r\n                while ((line = reader.ReadLine()) != null)\r\n                {\r\n                    if (line.StartsWith('#')) continue;\r\n\r\n                    string[] cols          = line.OptimizedSplit('\\t');\r\n                    string   referenceName = cols[VcfCommon.ChromIndex];\r\n                    \r\n                    if (referenceName != currentReferenceName)\r\n                    {\r\n                        if (!refNameToChrom.TryGetValue(referenceName, out chromosome)) continue;\r\n                        currentReferenceName = referenceName;\r\n                    }\r\n\r\n                    (int position, bool foundError) = cols[VcfCommon.PosIndex].OptimizedParseInt32();\r\n                    if (foundError) throw new InvalidDataException($\"Unable to convert the VCF position to an integer: {cols[VcfCommon.PosIndex]}\");\r\n\r\n                    if (rangeChecker.OutOfRange(chromosome, position)) break;\r\n\r\n                    string refAllele = cols[VcfCommon.RefIndex];\r\n                    string altAllele = cols[VcfCommon.AltIndex];\r\n\r\n                    if (altAllele == \".\" && !IsRefMinor(refMinorProvider, chromosome, position)) continue;\r\n\r\n                    sequenceProvider.LoadChromosome(chromosome);\r\n                    TryAddPosition(positionsByChromosome, chromosome, position, refAllele, altAllele, sequenceProvider.Sequence);\r\n                }\r\n            }\r\n            \r\n            int count = SortPositionsAndGetCount(positionsByChromosome);\r\n\r\n            return (positionsByChromosome, count);\r\n        }\r\n\r\n        private static bool IsRefMinor(IRefMinorProvider refMinorProvider, Chromosome chrom, int position)\r\n        {\r\n            if (refMinorProvider == null) return false;\r\n            return !string.IsNullOrEmpty(refMinorProvider.GetGlobalMajorAllele(chrom, position));\r\n        }\r\n\r\n        public static void TryAddPosition(Dictionary<Chromosome, List<int>> chromPositions, Chromosome chromosome,\r\n            int position, string refAllele, string altAllele, ISequence refSequence)\r\n        {\r\n            if (!chromPositions.ContainsKey(chromosome)) chromPositions.Add(chromosome, new List<int>(16 * 1024));\r\n\r\n            foreach (string allele in altAllele.OptimizedSplit(','))\r\n            {\r\n                if (allele.OptimizedStartsWith('<') && allele != \"<NON_REF>\") continue;\r\n\r\n                (int shiftedPos, string _, string _) =\r\n                    VariantUtils.TrimAndLeftAlign(position, refAllele, allele, refSequence);\r\n                chromPositions[chromosome].Add(shiftedPos);\r\n            }\r\n        }\r\n\r\n        private static int SortPositionsAndGetCount(Dictionary<Chromosome, List<int>> positionsByChromosome)\r\n        {\r\n            var count = 0;\r\n\r\n            foreach (var positions in positionsByChromosome.Values)\r\n            {\r\n                positions.Sort();\r\n                count += positions.Count;\r\n            }\r\n\r\n            return count;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "Nirvana/ProviderUtilities.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Linq;\r\nusing ErrorHandling.Exceptions;\r\nusing IO;\r\nusing VariantAnnotation.GeneAnnotation;\r\nusing VariantAnnotation.GeneFusions.IO;\r\nusing VariantAnnotation.GenericScore;\r\nusing VariantAnnotation.Interface.GeneAnnotation;\r\nusing VariantAnnotation.Interface.Providers;\r\nusing VariantAnnotation.Interface.SA;\r\nusing VariantAnnotation.NSA;\r\nusing VariantAnnotation.Providers;\r\n\r\nnamespace Nirvana\r\n{\r\n    public static class ProviderUtilities\r\n    {\r\n        public static ISequenceProvider GetSequenceProvider(string compressedReferencePath)\r\n        {\r\n            return new ReferenceSequenceProvider(PersistentStreamUtils.GetReadStream(compressedReferencePath));\r\n        }\r\n\r\n        public static ProteinConservationProvider GetProteinConservationProvider(AnnotationFiles files) =>\r\n            files == null || string.IsNullOrEmpty(files.ProteinConservationFile)\r\n                ? null\r\n                : new ProteinConservationProvider(PersistentStreamUtils.GetReadStream(files.ProteinConservationFile));\r\n\r\n        public static IAnnotationProvider GetConservationProvider(AnnotationFiles files)\r\n        {\r\n            if (files == null || files.PhylopFile == default) return null;\r\n            (Stream phylopStream, Stream indexStream) = GetDataAndIndexStreams(files.PhylopFile.Npd, files.PhylopFile.Idx);\r\n            return new ConservationScoreProvider()\r\n                .AddPhylopReader(phylopStream, indexStream);\r\n        }\r\n\r\n        private static (Stream, Stream) GetDataAndIndexStreams(string dataFilePath, string indexPath)\r\n        {\r\n            var dataStream = PersistentStreamUtils.GetReadStream(dataFilePath);\r\n            var indexStream = PersistentStreamUtils.GetReadStream(indexPath);\r\n            if (dataStream == null)\r\n            {\r\n                throw new UserErrorException($\"Unable to open data file {dataFilePath}\");\r\n            }\r\n\r\n            if (indexStream == null)\r\n            {\r\n                throw new UserErrorException($\"Unable to open index file {indexPath}\");\r\n            }\r\n\r\n            return (dataStream, indexStream);\r\n        }\r\n\r\n        public static IAnnotationProvider GetLcrProvider(AnnotationFiles files) =>\r\n            files?.LowComplexityRegionFile == null\r\n                ? null\r\n                : new LcrProvider(PersistentStreamUtils.GetReadStream(files.LowComplexityRegionFile));\r\n\r\n        public static IRefMinorProvider GetRefMinorProvider(AnnotationFiles files)\r\n        {\r\n            if( files == null || files.RefMinorFile == default) return null;\r\n            \r\n            return new RefMinorProvider(PersistentStreamUtils.GetReadStream(files.RefMinorFile.Rma),\r\n                    PersistentStreamUtils.GetReadStream(files.RefMinorFile.Idx));\r\n        }\r\n\r\n        public static IGeneAnnotationProvider GetGeneAnnotationProvider(AnnotationFiles files) => files?.NsiFiles == null\r\n            ? null\r\n            : new GeneAnnotationProvider(PersistentStreamUtils.GetStreams(files.NgaFiles));\r\n\r\n        public static IAnnotationProvider GetNsaProvider(AnnotationFiles files)\r\n        {\r\n            if (files == null) return null;\r\n\r\n            INsaReader[]          nsaReaders    = GetNsaReaders(files.NsaFiles);\r\n            INsiReader[]          nsiReaders    = GetNsiReaders(files.NsiFiles);\r\n            IGeneFusionSaReader[] fusionReaders = GetGeneFusionReaders(files.GeneFusionSourceFiles, files.GeneFusionJsonFiles);\r\n\r\n            int numReaders = nsaReaders.Length + nsiReaders.Length + fusionReaders.Length;\r\n            return numReaders == 0 ? null : new NsaProvider(nsaReaders, nsiReaders, fusionReaders);\r\n        }\r\n\r\n        private static INsaReader[] GetNsaReaders(IReadOnlyCollection<(string Nsa, string Idx)> filePaths)\r\n        {\r\n            var readers = new List<INsaReader>(filePaths.Count);\r\n            foreach ((string nsaPath, string idxPath) in filePaths)\r\n            {\r\n                var (nsaStream, idxStream) = GetDataAndIndexStreams(nsaPath, idxPath);\r\n                readers.Add(new NsaReader(nsaStream, idxStream));\r\n            }\r\n            return readers.SortByJsonKey();\r\n        }\r\n\r\n        public static IAnnotationProvider GetGsaProvider(AnnotationFiles files)\r\n        {\r\n            if (files?.GsaFiles == null || files.GsaFiles.Count == 0) return null;\r\n\r\n            List<(string Gsa, string Idx)> filePaths = files.GsaFiles;\r\n\r\n            var readers = new ScoreReader[filePaths.Count];\r\n\r\n            var i = 0;\r\n            foreach ((string gsaPath, string idxPath) in filePaths)\r\n            {\r\n                var (gsaStream, idxStream) = GetDataAndIndexStreams(gsaPath, idxPath);\r\n                readers[i] = ScoreReader.Read(gsaStream, idxStream);\r\n                i++;\r\n            }\r\n\r\n            readers = readers.SortByJsonKey();\r\n            return new ScoreProvider(readers);\r\n        }\r\n\r\n\r\n        private static INsiReader[] GetNsiReaders(IReadOnlyCollection<string> filePaths)\r\n        {\r\n            var readers = new List<INsiReader>(filePaths.Count);\r\n            foreach (string filePath in filePaths) readers.Add(NsiReader.Read(PersistentStreamUtils.GetReadStream(filePath)));\r\n            return readers.SortByJsonKey();\r\n        }\r\n\r\n        private static IGeneFusionSaReader[] GetGeneFusionReaders(IReadOnlyCollection<string> sourceFilePaths,\r\n            IReadOnlyCollection<string> jsonFilePaths)\r\n        {\r\n            var readers = new List<IGeneFusionSaReader>(jsonFilePaths.Count);\r\n            foreach (string filePath in sourceFilePaths) readers.Add(new GeneFusionSourceReader(PersistentStreamUtils.GetReadStream(filePath)));\r\n            foreach (string filePath in jsonFilePaths) readers.Add(new GeneFusionJsonReader(PersistentStreamUtils.GetReadStream(filePath)));\r\n            return readers.SortByJsonKey();\r\n        }\r\n\r\n        private static T[] SortByJsonKey<T>(this IEnumerable<T> entries) where T : ISaMetadata =>\r\n            entries.OrderBy(x => x.JsonKey, StringComparer.Ordinal).ToArray();\r\n\r\n        public static ITranscriptAnnotationProvider GetTranscriptAnnotationProvider(string path, ISequenceProvider sequenceProvider,\r\n            ProteinConservationProvider proteinConservationProvider) =>\r\n            new TranscriptAnnotationProvider(path, sequenceProvider, proteinConservationProvider);\r\n    }\r\n}"
  },
  {
    "path": "Nirvana/StreamAnnotation.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing ErrorHandling;\r\nusing ErrorHandling.Exceptions;\r\nusing Genome;\r\nusing IO;\r\nusing MitoHeteroplasmy;\r\nusing OptimizedCore;\r\nusing VariantAnnotation;\r\nusing VariantAnnotation.AnnotatedPositions;\r\nusing VariantAnnotation.AnnotatedPositions.Transcript;\r\nusing VariantAnnotation.Interface;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.Interface.IO;\r\nusing VariantAnnotation.Interface.Positions;\r\nusing VariantAnnotation.Interface.Providers;\r\nusing VariantAnnotation.IO;\r\nusing VariantAnnotation.Pools;\r\nusing VariantAnnotation.Utilities;\r\nusing Variants;\r\nusing Vcf;\r\n\r\nnamespace Nirvana\r\n{\r\n    public static class StreamAnnotation\r\n    {\r\n        public static (int variantCount, ExitCodes exitCode) Annotate(Stream headerStream, Stream inputVcfStream, Stream outputJsonStream,\r\n            Stream outputJsonIndexStream, AnnotationResources annotationResources, IVcfFilter vcfFilter,\r\n            bool ignoreEmptyChromosome, bool enableDq = false, HashSet<string> customInfoKeys=null, HashSet<string> customSampleInfoKeys=null)\r\n        {\r\n            var metrics = annotationResources.Metrics;\r\n            PerformanceMetrics.ShowAnnotationHeader();\r\n\r\n            Chromosome                currentChromosome        = Chromosome.GetEmptyChromosome(\"dummy\");\r\n            int                       numVariants              = 0;\r\n            int                       variantCount             = 0;\r\n            IMitoHeteroplasmyProvider mitoHeteroplasmyProvider = MitoHeteroplasmyReader.GetProvider();\r\n            using (var vcfReader  = GetVcfReader(headerStream, inputVcfStream, annotationResources, vcfFilter, mitoHeteroplasmyProvider, enableDq, customInfoKeys, customSampleInfoKeys))\r\n            using (var jsonWriter = new JsonWriter(outputJsonStream, outputJsonIndexStream, annotationResources, Date.CurrentTimeStamp, vcfReader.GetSampleNames(), false))\r\n            {\r\n                try\r\n                {\r\n                    CheckGenomeAssembly(annotationResources, vcfReader);\r\n                    SetMitochondrialAnnotationBehavior(annotationResources, vcfReader);\r\n                    \r\n                    IPosition position;\r\n\r\n                    while ((position = vcfReader.GetNextPosition()) != null)\r\n                    {\r\n                        Chromosome chromosome = position.Chromosome;\r\n                        if (ignoreEmptyChromosome && chromosome.IsEmpty()) continue;\r\n                        \r\n                        if (chromosome.Index != currentChromosome.Index)\r\n                        {\r\n                            if (!currentChromosome.IsEmpty())\r\n                                metrics.ShowAnnotationEntry(currentChromosome, numVariants);\r\n                            \r\n                            numVariants = 0;\r\n                            \r\n                            metrics.Preload.Start();\r\n                            annotationResources.PreLoad(chromosome);\r\n                            metrics.Preload.Stop();\r\n                            \r\n                            metrics.Annotation.Start();\r\n                            currentChromosome = chromosome;\r\n                        }\r\n\r\n                        var annotatedPosition = position.Variants != null ? annotationResources.Annotator.Annotate(position) : null;\r\n\r\n                        var jsb = annotatedPosition?.GetJsonStringBuilder();\r\n                        if (jsb != null) jsonWriter.WritePosition(annotatedPosition.Position, jsb);\r\n                        StringBuilderPool.Return(jsb);\r\n                        \r\n                        ReturnPoolObjects(annotatedPosition);\r\n\r\n                        numVariants++;\r\n                        variantCount += position.Variants?.Length ?? 0;\r\n                    }\r\n\r\n                    jsonWriter.WriteGenes(annotationResources.Annotator.GetGeneAnnotations());\r\n\r\n                }\r\n                catch (Exception e)\r\n                {\r\n                    e.Data[ExitCodeUtilities.VcfLine] = vcfReader.VcfLine;\r\n                    throw;\r\n                }\r\n            }\r\n            \r\n            if (!currentChromosome.IsEmpty())\r\n                metrics.ShowAnnotationEntry(currentChromosome, numVariants);\r\n\r\n            metrics.ShowSummaryTable();\r\n\r\n            return (variantCount, ExitCodes.Success);\r\n        }\r\n\r\n        private static void ReturnPoolObjects(IAnnotatedPosition annotatedPosition)\r\n        {\r\n            if (annotatedPosition?.AnnotatedVariants != null)\r\n                foreach (var annotatedVariant in annotatedPosition.AnnotatedVariants)\r\n                {\r\n                    if (annotatedVariant.Transcripts != null)\r\n                    {\r\n                        foreach (IAnnotatedTranscript annotatedTranscript in annotatedVariant.Transcripts)\r\n                        {\r\n                            AnnotatedTranscriptPool.Return((AnnotatedTranscript) annotatedTranscript);\r\n                        }\r\n                    }\r\n\r\n                    var variant = annotatedVariant.Variant;\r\n                    if (variant is Variant) VariantPool.Return((Variant) annotatedVariant.Variant);\r\n                    AnnotatedVariantPool.Return((AnnotatedVariant) annotatedVariant);\r\n                }\r\n\r\n            PositionPool.Return((Position) annotatedPosition?.Position);\r\n            AnnotatedPositionPool.Return((AnnotatedPosition) annotatedPosition);\r\n        }\r\n\r\n        private static void CheckGenomeAssembly(IAnnotationResources annotationResources, VcfReader vcfReader)\r\n        {\r\n            if (vcfReader.InferredGenomeAssembly != GenomeAssembly.Unknown && vcfReader.InferredGenomeAssembly != annotationResources.Annotator.Assembly)\r\n                throw new UserErrorException($\"A mismatch between genome assemblies was found. The input VCF uses {vcfReader.InferredGenomeAssembly} whereas annotation was configured for {annotationResources.Annotator.Assembly}.\");\r\n        }\r\n\r\n        private static void SetMitochondrialAnnotationBehavior(IAnnotationResources annotationResources, IVcfReader vcfReader)\r\n        {\r\n            if (vcfReader.IsRcrsMitochondrion && annotationResources.Annotator.Assembly == GenomeAssembly.GRCh37\r\n                || annotationResources.Annotator.Assembly == GenomeAssembly.GRCh38\r\n                || annotationResources.ForceMitochondrialAnnotation)\r\n                annotationResources.Annotator.EnableMitochondrialAnnotation();\r\n        }\r\n\r\n        private static VcfReader GetVcfReader(Stream headerStream, Stream vcfStream, IAnnotationResources annotationResources,\r\n            IVcfFilter vcfFilter, IMitoHeteroplasmyProvider mitoHeteroplasmyProvider, bool enableDq = false, \r\n            HashSet<string> customInfoKeys=null , HashSet<string> customSampleInfoKeys=null)\r\n        {\r\n            var vcfReader = FileUtilities.GetStreamReader(vcfStream);\r\n\r\n            StreamReader headerReader;\r\n            if (headerStream == null)\r\n                headerReader = vcfReader;\r\n            else\r\n            {\r\n                headerReader = FileUtilities.GetStreamReader(headerStream);\r\n                vcfStream.Position = Tabix.VirtualPosition.From(annotationResources.InputStartVirtualPosition).BlockOffset;\r\n            }\r\n\r\n            return VcfReader.Create(headerReader, vcfReader, annotationResources.SequenceProvider,\r\n                annotationResources.RefMinorProvider, vcfFilter, annotationResources.VidCreator, \r\n                mitoHeteroplasmyProvider, enableDq, customInfoKeys, customSampleInfoKeys);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "Nirvana.sln",
    "content": "﻿\r\nMicrosoft Visual Studio Solution File, Format Version 12.00\r\n# Visual Studio Version 16\r\nVisualStudioVersion = 16.0.29201.188\r\nMinimumVisualStudioVersion = 10.0.40219.1\r\nProject(\"{9A19103F-16F7-4668-BE54-9A1E7A4F7556}\") = \"VariantAnnotation.Interface\", \"VariantAnnotation.Interface\\VariantAnnotation.Interface.csproj\", \"{248C8736-3A76-4F45-A131-A776BD3257C9}\"\r\nEndProject\r\nProject(\"{9A19103F-16F7-4668-BE54-9A1E7A4F7556}\") = \"Nirvana\", \"Nirvana\\Nirvana.csproj\", \"{84CD8FB5-1071-47D5-AF1A-E028134D3C70}\"\r\nEndProject\r\nProject(\"{9A19103F-16F7-4668-BE54-9A1E7A4F7556}\") = \"Vcf\", \"Vcf\\Vcf.csproj\", \"{0DF48817-8AED-449A-AA87-CB91040D8439}\"\r\nEndProject\r\nProject(\"{9A19103F-16F7-4668-BE54-9A1E7A4F7556}\") = \"Compression\", \"Compression\\Compression.csproj\", \"{8E2CD866-DFCF-4486-A289-32DEFA050E87}\"\r\nEndProject\r\nProject(\"{9A19103F-16F7-4668-BE54-9A1E7A4F7556}\") = \"VariantAnnotation\", \"VariantAnnotation\\VariantAnnotation.csproj\", \"{155E28ED-122E-49DD-A8F0-FE3F670073B8}\"\r\nEndProject\r\nProject(\"{9A19103F-16F7-4668-BE54-9A1E7A4F7556}\") = \"ErrorHandling\", \"ErrorHandling\\ErrorHandling.csproj\", \"{A65F4919-CDB8-49C5-ADA4-66055A3F4923}\"\r\nEndProject\r\nProject(\"{9A19103F-16F7-4668-BE54-9A1E7A4F7556}\") = \"UnitTests\", \"UnitTests\\UnitTests.csproj\", \"{0CB1644A-BEA1-4CF6-AD5F-E544512769C2}\"\r\nEndProject\r\nProject(\"{9A19103F-16F7-4668-BE54-9A1E7A4F7556}\") = \"CommandLine\", \"CommandLine\\CommandLine.csproj\", \"{147C336A-6A6E-43F4-BDDC-8C8B72199C5D}\"\r\nEndProject\r\nProject(\"{9A19103F-16F7-4668-BE54-9A1E7A4F7556}\") = \"CacheUtils\", \"CacheUtils\\CacheUtils.csproj\", \"{986CF15B-DFAE-4C39-98D0-75A15271B34A}\"\r\nEndProject\r\nProject(\"{9A19103F-16F7-4668-BE54-9A1E7A4F7556}\") = \"SAUtils\", \"SAUtils\\SAUtils.csproj\", \"{F1F05D39-1BE0-4CFD-AD60-F27FB31D925A}\"\r\nEndProject\r\nProject(\"{9A19103F-16F7-4668-BE54-9A1E7A4F7556}\") = \"Jasix\", \"Jasix\\Jasix.csproj\", \"{ECC7869C-1B21-42C1-B8BD-4190F15B3B6F}\"\r\nEndProject\r\nProject(\"{9A19103F-16F7-4668-BE54-9A1E7A4F7556}\") = \"OptimizedCore\", \"OptimizedCore\\OptimizedCore.csproj\", \"{76FEE3B3-FB8E-4421-A63F-CA659FB1ACA0}\"\r\nEndProject\r\nProject(\"{9A19103F-16F7-4668-BE54-9A1E7A4F7556}\") = \"Intervals\", \"Intervals\\Intervals.csproj\", \"{82CA75B3-37DF-40DA-AA1B-70888CF3ED05}\"\r\nEndProject\r\nProject(\"{9A19103F-16F7-4668-BE54-9A1E7A4F7556}\") = \"Variants\", \"Variants\\Variants.csproj\", \"{0A94104A-71E7-4925-B667-C29C18E3356D}\"\r\nEndProject\r\nProject(\"{9A19103F-16F7-4668-BE54-9A1E7A4F7556}\") = \"Genome\", \"Genome\\Genome.csproj\", \"{3B5C30A5-FBBC-4247-BE62-2B64960213FD}\"\r\nEndProject\r\nProject(\"{9A19103F-16F7-4668-BE54-9A1E7A4F7556}\") = \"IO\", \"IO\\IO.csproj\", \"{654069F3-3B86-4325-823F-BC78946A26FF}\"\r\n\tProjectSection(ProjectDependencies) = postProject\r\n\t\t{82CA75B3-37DF-40DA-AA1B-70888CF3ED05} = {82CA75B3-37DF-40DA-AA1B-70888CF3ED05}\r\n\tEndProjectSection\r\nEndProject\r\nProject(\"{9A19103F-16F7-4668-BE54-9A1E7A4F7556}\") = \"Tabix\", \"Tabix\\Tabix.csproj\", \"{F337E3F6-72AA-44B4-B11F-D69EE14B6152}\"\r\nEndProject\r\nProject(\"{9A19103F-16F7-4668-BE54-9A1E7A4F7556}\") = \"Cloud\", \"Cloud\\Cloud.csproj\", \"{E93914C8-2599-46BE-BE18-6229E53F581B}\"\r\nEndProject\r\nProject(\"{9A19103F-16F7-4668-BE54-9A1E7A4F7556}\") = \"ReferenceSequence\", \"ReferenceSequence\\ReferenceSequence.csproj\", \"{234765A8-2B5C-4FD5-ACBA-6D48002E9074}\"\r\nEndProject\r\nProject(\"{9A19103F-16F7-4668-BE54-9A1E7A4F7556}\") = \"Downloader\", \"Downloader\\Downloader.csproj\", \"{5B81B762-8A86-466A-A947-AC2CA53EE40D}\"\r\nEndProject\r\nProject(\"{9A19103F-16F7-4668-BE54-9A1E7A4F7556}\") = \"AnnotationLambda\", \"AnnotationLambda\\AnnotationLambda.csproj\", \"{374D5D10-98DF-4D18-9ECF-D20B5C19D258}\"\r\nEndProject\r\nProject(\"{9A19103F-16F7-4668-BE54-9A1E7A4F7556}\") = \"CustomAnnotationLambda\", \"CustomAnnotationLambda\\CustomAnnotationLambda.csproj\", \"{FFC36924-DA37-41E1-8FA8-5FF54AC84CC0}\"\r\nEndProject\r\nProject(\"{9A19103F-16F7-4668-BE54-9A1E7A4F7556}\") = \"GeneAnnotationLambda\", \"GeneAnnotationLambda\\GeneAnnotationLambda.csproj\", \"{9A0F21D6-D0B0-4074-BACA-5FF179E83007}\"\r\nEndProject\r\nProject(\"{9A19103F-16F7-4668-BE54-9A1E7A4F7556}\") = \"NirvanaLambda\", \"NirvanaLambda\\NirvanaLambda.csproj\", \"{37EEEA52-94F8-4B27-A044-1CD6DBF2F86E}\"\r\nEndProject\r\nProject(\"{9A19103F-16F7-4668-BE54-9A1E7A4F7556}\") = \"SingleAnnotationLambda\", \"SingleAnnotationLambda\\SingleAnnotationLambda.csproj\", \"{C9B4E16E-FF30-4CE0-A617-F833696FBE10}\"\r\nEndProject\r\nProject(\"{9A19103F-16F7-4668-BE54-9A1E7A4F7556}\") = \"RepeatExpansions\", \"RepeatExpansions\\RepeatExpansions.csproj\", \"{E586F712-DEDA-4CA2-AE97-96DE0180DB0E}\"\r\nEndProject\r\nProject(\"{9A19103F-16F7-4668-BE54-9A1E7A4F7556}\") = \"Jist\", \"Jist\\Jist.csproj\", \"{62109AB0-2E66-4C84-8D62-7A8C9B7E335A}\"\r\nEndProject\r\nProject(\"{9A19103F-16F7-4668-BE54-9A1E7A4F7556}\") = \"CustomStrValidationLambda\", \"CustomStrValidationLambda\\CustomStrValidationLambda.csproj\", \"{F3E60E51-EE07-4768-8EC3-E3A323DFA547}\"\r\nEndProject\r\nProject(\"{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}\") = \"MitoHeteroplasmy\", \"MitoHeteroplasmy\\MitoHeteroplasmy.csproj\", \"{387E4C8D-6A27-40DE-A305-F3F047B8D865}\"\r\nEndProject\r\nGlobal\r\n\tGlobalSection(SolutionConfigurationPlatforms) = preSolution\r\n\t\tDebug|Any CPU = Debug|Any CPU\r\n\t\tRelease|Any CPU = Release|Any CPU\r\n\tEndGlobalSection\r\n\tGlobalSection(ProjectConfigurationPlatforms) = postSolution\r\n\t\t{248C8736-3A76-4F45-A131-A776BD3257C9}.Debug|Any CPU.ActiveCfg = Debug|Any CPU\r\n\t\t{248C8736-3A76-4F45-A131-A776BD3257C9}.Debug|Any CPU.Build.0 = Debug|Any CPU\r\n\t\t{248C8736-3A76-4F45-A131-A776BD3257C9}.Release|Any CPU.ActiveCfg = Release|Any CPU\r\n\t\t{248C8736-3A76-4F45-A131-A776BD3257C9}.Release|Any CPU.Build.0 = Release|Any CPU\r\n\t\t{84CD8FB5-1071-47D5-AF1A-E028134D3C70}.Debug|Any CPU.ActiveCfg = Debug|Any CPU\r\n\t\t{84CD8FB5-1071-47D5-AF1A-E028134D3C70}.Debug|Any CPU.Build.0 = Debug|Any CPU\r\n\t\t{84CD8FB5-1071-47D5-AF1A-E028134D3C70}.Release|Any CPU.ActiveCfg = Release|Any CPU\r\n\t\t{84CD8FB5-1071-47D5-AF1A-E028134D3C70}.Release|Any CPU.Build.0 = Release|Any CPU\r\n\t\t{0DF48817-8AED-449A-AA87-CB91040D8439}.Debug|Any CPU.ActiveCfg = Debug|Any CPU\r\n\t\t{0DF48817-8AED-449A-AA87-CB91040D8439}.Debug|Any CPU.Build.0 = Debug|Any CPU\r\n\t\t{0DF48817-8AED-449A-AA87-CB91040D8439}.Release|Any CPU.ActiveCfg = Release|Any CPU\r\n\t\t{0DF48817-8AED-449A-AA87-CB91040D8439}.Release|Any CPU.Build.0 = Release|Any CPU\r\n\t\t{8E2CD866-DFCF-4486-A289-32DEFA050E87}.Debug|Any CPU.ActiveCfg = Debug|Any CPU\r\n\t\t{8E2CD866-DFCF-4486-A289-32DEFA050E87}.Debug|Any CPU.Build.0 = Debug|Any CPU\r\n\t\t{8E2CD866-DFCF-4486-A289-32DEFA050E87}.Release|Any CPU.ActiveCfg = Release|Any CPU\r\n\t\t{8E2CD866-DFCF-4486-A289-32DEFA050E87}.Release|Any CPU.Build.0 = Release|Any CPU\r\n\t\t{155E28ED-122E-49DD-A8F0-FE3F670073B8}.Debug|Any CPU.ActiveCfg = Debug|Any CPU\r\n\t\t{155E28ED-122E-49DD-A8F0-FE3F670073B8}.Debug|Any CPU.Build.0 = Debug|Any CPU\r\n\t\t{155E28ED-122E-49DD-A8F0-FE3F670073B8}.Release|Any CPU.ActiveCfg = Release|Any CPU\r\n\t\t{155E28ED-122E-49DD-A8F0-FE3F670073B8}.Release|Any CPU.Build.0 = Release|Any CPU\r\n\t\t{A65F4919-CDB8-49C5-ADA4-66055A3F4923}.Debug|Any CPU.ActiveCfg = Debug|Any CPU\r\n\t\t{A65F4919-CDB8-49C5-ADA4-66055A3F4923}.Debug|Any CPU.Build.0 = Debug|Any CPU\r\n\t\t{A65F4919-CDB8-49C5-ADA4-66055A3F4923}.Release|Any CPU.ActiveCfg = Release|Any CPU\r\n\t\t{A65F4919-CDB8-49C5-ADA4-66055A3F4923}.Release|Any CPU.Build.0 = Release|Any CPU\r\n\t\t{0CB1644A-BEA1-4CF6-AD5F-E544512769C2}.Debug|Any CPU.ActiveCfg = Debug|Any CPU\r\n\t\t{0CB1644A-BEA1-4CF6-AD5F-E544512769C2}.Debug|Any CPU.Build.0 = Debug|Any CPU\r\n\t\t{0CB1644A-BEA1-4CF6-AD5F-E544512769C2}.Release|Any CPU.ActiveCfg = Release|Any CPU\r\n\t\t{0CB1644A-BEA1-4CF6-AD5F-E544512769C2}.Release|Any CPU.Build.0 = Release|Any CPU\r\n\t\t{147C336A-6A6E-43F4-BDDC-8C8B72199C5D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU\r\n\t\t{147C336A-6A6E-43F4-BDDC-8C8B72199C5D}.Debug|Any CPU.Build.0 = Debug|Any CPU\r\n\t\t{147C336A-6A6E-43F4-BDDC-8C8B72199C5D}.Release|Any CPU.ActiveCfg = Release|Any CPU\r\n\t\t{147C336A-6A6E-43F4-BDDC-8C8B72199C5D}.Release|Any CPU.Build.0 = Release|Any CPU\r\n\t\t{986CF15B-DFAE-4C39-98D0-75A15271B34A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU\r\n\t\t{986CF15B-DFAE-4C39-98D0-75A15271B34A}.Debug|Any CPU.Build.0 = Debug|Any CPU\r\n\t\t{986CF15B-DFAE-4C39-98D0-75A15271B34A}.Release|Any CPU.ActiveCfg = Release|Any CPU\r\n\t\t{986CF15B-DFAE-4C39-98D0-75A15271B34A}.Release|Any CPU.Build.0 = Release|Any CPU\r\n\t\t{F1F05D39-1BE0-4CFD-AD60-F27FB31D925A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU\r\n\t\t{F1F05D39-1BE0-4CFD-AD60-F27FB31D925A}.Debug|Any CPU.Build.0 = Debug|Any CPU\r\n\t\t{F1F05D39-1BE0-4CFD-AD60-F27FB31D925A}.Release|Any CPU.ActiveCfg = Release|Any CPU\r\n\t\t{F1F05D39-1BE0-4CFD-AD60-F27FB31D925A}.Release|Any CPU.Build.0 = Release|Any CPU\r\n\t\t{ECC7869C-1B21-42C1-B8BD-4190F15B3B6F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU\r\n\t\t{ECC7869C-1B21-42C1-B8BD-4190F15B3B6F}.Debug|Any CPU.Build.0 = Debug|Any CPU\r\n\t\t{ECC7869C-1B21-42C1-B8BD-4190F15B3B6F}.Release|Any CPU.ActiveCfg = Release|Any CPU\r\n\t\t{ECC7869C-1B21-42C1-B8BD-4190F15B3B6F}.Release|Any CPU.Build.0 = Release|Any CPU\r\n\t\t{76FEE3B3-FB8E-4421-A63F-CA659FB1ACA0}.Debug|Any CPU.ActiveCfg = Debug|Any CPU\r\n\t\t{76FEE3B3-FB8E-4421-A63F-CA659FB1ACA0}.Debug|Any CPU.Build.0 = Debug|Any CPU\r\n\t\t{76FEE3B3-FB8E-4421-A63F-CA659FB1ACA0}.Release|Any CPU.ActiveCfg = Release|Any CPU\r\n\t\t{76FEE3B3-FB8E-4421-A63F-CA659FB1ACA0}.Release|Any CPU.Build.0 = Release|Any CPU\r\n\t\t{82CA75B3-37DF-40DA-AA1B-70888CF3ED05}.Debug|Any CPU.ActiveCfg = Debug|Any CPU\r\n\t\t{82CA75B3-37DF-40DA-AA1B-70888CF3ED05}.Debug|Any CPU.Build.0 = Debug|Any CPU\r\n\t\t{82CA75B3-37DF-40DA-AA1B-70888CF3ED05}.Release|Any CPU.ActiveCfg = Release|Any CPU\r\n\t\t{82CA75B3-37DF-40DA-AA1B-70888CF3ED05}.Release|Any CPU.Build.0 = Release|Any CPU\r\n\t\t{0A94104A-71E7-4925-B667-C29C18E3356D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU\r\n\t\t{0A94104A-71E7-4925-B667-C29C18E3356D}.Debug|Any CPU.Build.0 = Debug|Any CPU\r\n\t\t{0A94104A-71E7-4925-B667-C29C18E3356D}.Release|Any CPU.ActiveCfg = Release|Any CPU\r\n\t\t{0A94104A-71E7-4925-B667-C29C18E3356D}.Release|Any CPU.Build.0 = Release|Any CPU\r\n\t\t{3B5C30A5-FBBC-4247-BE62-2B64960213FD}.Debug|Any CPU.ActiveCfg = Debug|Any CPU\r\n\t\t{3B5C30A5-FBBC-4247-BE62-2B64960213FD}.Debug|Any CPU.Build.0 = Debug|Any CPU\r\n\t\t{3B5C30A5-FBBC-4247-BE62-2B64960213FD}.Release|Any CPU.ActiveCfg = Release|Any CPU\r\n\t\t{3B5C30A5-FBBC-4247-BE62-2B64960213FD}.Release|Any CPU.Build.0 = Release|Any CPU\r\n\t\t{654069F3-3B86-4325-823F-BC78946A26FF}.Debug|Any CPU.ActiveCfg = Debug|Any CPU\r\n\t\t{654069F3-3B86-4325-823F-BC78946A26FF}.Debug|Any CPU.Build.0 = Debug|Any CPU\r\n\t\t{654069F3-3B86-4325-823F-BC78946A26FF}.Release|Any CPU.ActiveCfg = Release|Any CPU\r\n\t\t{654069F3-3B86-4325-823F-BC78946A26FF}.Release|Any CPU.Build.0 = Release|Any CPU\r\n\t\t{F337E3F6-72AA-44B4-B11F-D69EE14B6152}.Debug|Any CPU.ActiveCfg = Debug|Any CPU\r\n\t\t{F337E3F6-72AA-44B4-B11F-D69EE14B6152}.Debug|Any CPU.Build.0 = Debug|Any CPU\r\n\t\t{F337E3F6-72AA-44B4-B11F-D69EE14B6152}.Release|Any CPU.ActiveCfg = Release|Any CPU\r\n\t\t{F337E3F6-72AA-44B4-B11F-D69EE14B6152}.Release|Any CPU.Build.0 = Release|Any CPU\r\n\t\t{E93914C8-2599-46BE-BE18-6229E53F581B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU\r\n\t\t{E93914C8-2599-46BE-BE18-6229E53F581B}.Debug|Any CPU.Build.0 = Debug|Any CPU\r\n\t\t{E93914C8-2599-46BE-BE18-6229E53F581B}.Release|Any CPU.ActiveCfg = Release|Any CPU\r\n\t\t{E93914C8-2599-46BE-BE18-6229E53F581B}.Release|Any CPU.Build.0 = Release|Any CPU\r\n\t\t{234765A8-2B5C-4FD5-ACBA-6D48002E9074}.Debug|Any CPU.ActiveCfg = Debug|Any CPU\r\n\t\t{234765A8-2B5C-4FD5-ACBA-6D48002E9074}.Debug|Any CPU.Build.0 = Debug|Any CPU\r\n\t\t{234765A8-2B5C-4FD5-ACBA-6D48002E9074}.Release|Any CPU.ActiveCfg = Release|Any CPU\r\n\t\t{234765A8-2B5C-4FD5-ACBA-6D48002E9074}.Release|Any CPU.Build.0 = Release|Any CPU\r\n\t\t{5B81B762-8A86-466A-A947-AC2CA53EE40D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU\r\n\t\t{5B81B762-8A86-466A-A947-AC2CA53EE40D}.Debug|Any CPU.Build.0 = Debug|Any CPU\r\n\t\t{5B81B762-8A86-466A-A947-AC2CA53EE40D}.Release|Any CPU.ActiveCfg = Release|Any CPU\r\n\t\t{5B81B762-8A86-466A-A947-AC2CA53EE40D}.Release|Any CPU.Build.0 = Release|Any CPU\r\n\t\t{374D5D10-98DF-4D18-9ECF-D20B5C19D258}.Debug|Any CPU.ActiveCfg = Debug|Any CPU\r\n\t\t{374D5D10-98DF-4D18-9ECF-D20B5C19D258}.Debug|Any CPU.Build.0 = Debug|Any CPU\r\n\t\t{374D5D10-98DF-4D18-9ECF-D20B5C19D258}.Release|Any CPU.ActiveCfg = Release|Any CPU\r\n\t\t{374D5D10-98DF-4D18-9ECF-D20B5C19D258}.Release|Any CPU.Build.0 = Release|Any CPU\r\n\t\t{FFC36924-DA37-41E1-8FA8-5FF54AC84CC0}.Debug|Any CPU.ActiveCfg = Debug|Any CPU\r\n\t\t{FFC36924-DA37-41E1-8FA8-5FF54AC84CC0}.Debug|Any CPU.Build.0 = Debug|Any CPU\r\n\t\t{FFC36924-DA37-41E1-8FA8-5FF54AC84CC0}.Release|Any CPU.ActiveCfg = Release|Any CPU\r\n\t\t{FFC36924-DA37-41E1-8FA8-5FF54AC84CC0}.Release|Any CPU.Build.0 = Release|Any CPU\r\n\t\t{9A0F21D6-D0B0-4074-BACA-5FF179E83007}.Debug|Any CPU.ActiveCfg = Debug|Any CPU\r\n\t\t{9A0F21D6-D0B0-4074-BACA-5FF179E83007}.Debug|Any CPU.Build.0 = Debug|Any CPU\r\n\t\t{9A0F21D6-D0B0-4074-BACA-5FF179E83007}.Release|Any CPU.ActiveCfg = Release|Any CPU\r\n\t\t{9A0F21D6-D0B0-4074-BACA-5FF179E83007}.Release|Any CPU.Build.0 = Release|Any CPU\r\n\t\t{37EEEA52-94F8-4B27-A044-1CD6DBF2F86E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU\r\n\t\t{37EEEA52-94F8-4B27-A044-1CD6DBF2F86E}.Debug|Any CPU.Build.0 = Debug|Any CPU\r\n\t\t{37EEEA52-94F8-4B27-A044-1CD6DBF2F86E}.Release|Any CPU.ActiveCfg = Release|Any CPU\r\n\t\t{37EEEA52-94F8-4B27-A044-1CD6DBF2F86E}.Release|Any CPU.Build.0 = Release|Any CPU\r\n\t\t{C9B4E16E-FF30-4CE0-A617-F833696FBE10}.Debug|Any CPU.ActiveCfg = Debug|Any CPU\r\n\t\t{C9B4E16E-FF30-4CE0-A617-F833696FBE10}.Debug|Any CPU.Build.0 = Debug|Any CPU\r\n\t\t{C9B4E16E-FF30-4CE0-A617-F833696FBE10}.Release|Any CPU.ActiveCfg = Release|Any CPU\r\n\t\t{C9B4E16E-FF30-4CE0-A617-F833696FBE10}.Release|Any CPU.Build.0 = Release|Any CPU\r\n\t\t{E586F712-DEDA-4CA2-AE97-96DE0180DB0E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU\r\n\t\t{E586F712-DEDA-4CA2-AE97-96DE0180DB0E}.Debug|Any CPU.Build.0 = Debug|Any CPU\r\n\t\t{E586F712-DEDA-4CA2-AE97-96DE0180DB0E}.Release|Any CPU.ActiveCfg = Release|Any CPU\r\n\t\t{E586F712-DEDA-4CA2-AE97-96DE0180DB0E}.Release|Any CPU.Build.0 = Release|Any CPU\r\n\t\t{62109AB0-2E66-4C84-8D62-7A8C9B7E335A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU\r\n\t\t{62109AB0-2E66-4C84-8D62-7A8C9B7E335A}.Debug|Any CPU.Build.0 = Debug|Any CPU\r\n\t\t{62109AB0-2E66-4C84-8D62-7A8C9B7E335A}.Release|Any CPU.ActiveCfg = Release|Any CPU\r\n\t\t{62109AB0-2E66-4C84-8D62-7A8C9B7E335A}.Release|Any CPU.Build.0 = Release|Any CPU\r\n\t\t{F3E60E51-EE07-4768-8EC3-E3A323DFA547}.Debug|Any CPU.ActiveCfg = Debug|Any CPU\r\n\t\t{F3E60E51-EE07-4768-8EC3-E3A323DFA547}.Debug|Any CPU.Build.0 = Debug|Any CPU\r\n\t\t{F3E60E51-EE07-4768-8EC3-E3A323DFA547}.Release|Any CPU.ActiveCfg = Release|Any CPU\r\n\t\t{F3E60E51-EE07-4768-8EC3-E3A323DFA547}.Release|Any CPU.Build.0 = Release|Any CPU\r\n\t\t{387E4C8D-6A27-40DE-A305-F3F047B8D865}.Debug|Any CPU.ActiveCfg = Debug|Any CPU\r\n\t\t{387E4C8D-6A27-40DE-A305-F3F047B8D865}.Debug|Any CPU.Build.0 = Debug|Any CPU\r\n\t\t{387E4C8D-6A27-40DE-A305-F3F047B8D865}.Release|Any CPU.ActiveCfg = Release|Any CPU\r\n\t\t{387E4C8D-6A27-40DE-A305-F3F047B8D865}.Release|Any CPU.Build.0 = Release|Any CPU\r\n\tEndGlobalSection\r\n\tGlobalSection(SolutionProperties) = preSolution\r\n\t\tHideSolutionNode = FALSE\r\n\tEndGlobalSection\r\n\tGlobalSection(ExtensibilityGlobals) = postSolution\r\n\t\tSolutionGuid = {BA40BDB6-7E39-4F75-AC8A-EB65F7FC8209}\r\n\tEndGlobalSection\r\nEndGlobal\r\n"
  },
  {
    "path": "Nirvana.sln.DotSettings",
    "content": "﻿<wpf:ResourceDictionary xml:space=\"preserve\" xmlns:x=\"http://schemas.microsoft.com/winfx/2006/xaml\" xmlns:s=\"clr-namespace:System;assembly=mscorlib\" xmlns:ss=\"urn:shemas-jetbrains-com:settings-storage-xaml\" xmlns:wpf=\"http://schemas.microsoft.com/winfx/2006/xaml/presentation\">\n\t<s:String x:Key=\"/Default/CodeInspection/Highlighting/AnalysisEnabled/@EntryValue\">SOLUTION</s:String>\n\t<s:String x:Key=\"/Default/CodeInspection/Highlighting/InspectionSeverities/=ClassCanBeSealed_002EGlobal/@EntryIndexedValue\">WARNING</s:String>\n\t<s:String x:Key=\"/Default/CodeInspection/Highlighting/InspectionSeverities/=ClassCanBeSealed_002ELocal/@EntryIndexedValue\">WARNING</s:String>\n\t<s:String x:Key=\"/Default/CodeInspection/Highlighting/InspectionSeverities/=CommentTypo/@EntryIndexedValue\">DO_NOT_SHOW</s:String>\n\t<s:String x:Key=\"/Default/CodeInspection/Highlighting/InspectionSeverities/=ForeachCanBeConvertedToQueryUsingAnotherGetEnumerator/@EntryIndexedValue\">DO_NOT_SHOW</s:String>\n\t<s:String x:Key=\"/Default/CodeInspection/Highlighting/InspectionSeverities/=ForeachCanBePartlyConvertedToQueryUsingAnotherGetEnumerator/@EntryIndexedValue\">DO_NOT_SHOW</s:String>\n\t<s:String x:Key=\"/Default/CodeInspection/Highlighting/InspectionSeverities/=HeapView_002EBoxingAllocation/@EntryIndexedValue\">DO_NOT_SHOW</s:String>\n\t<s:String x:Key=\"/Default/CodeInspection/Highlighting/InspectionSeverities/=HeapView_002EClosureAllocation/@EntryIndexedValue\">DO_NOT_SHOW</s:String>\n\t<s:String x:Key=\"/Default/CodeInspection/Highlighting/InspectionSeverities/=HeapView_002EDelegateAllocation/@EntryIndexedValue\">DO_NOT_SHOW</s:String>\n\t<s:String x:Key=\"/Default/CodeInspection/Highlighting/InspectionSeverities/=HeapView_002EObjectAllocation/@EntryIndexedValue\">DO_NOT_SHOW</s:String>\n\t<s:String x:Key=\"/Default/CodeInspection/Highlighting/InspectionSeverities/=HeapView_002EObjectAllocation_002EEvident/@EntryIndexedValue\">DO_NOT_SHOW</s:String>\n\t<s:String x:Key=\"/Default/CodeInspection/Highlighting/InspectionSeverities/=HeapView_002EObjectAllocation_002EPossible/@EntryIndexedValue\">DO_NOT_SHOW</s:String>\n\t<s:String x:Key=\"/Default/CodeInspection/Highlighting/InspectionSeverities/=LoopCanBeConvertedToQuery/@EntryIndexedValue\">DO_NOT_SHOW</s:String>\n\t<s:String x:Key=\"/Default/CodeInspection/Highlighting/InspectionSeverities/=ReplaceSliceWithRangeIndexer/@EntryIndexedValue\">DO_NOT_SHOW</s:String>\n\t<s:String x:Key=\"/Default/CodeInspection/Highlighting/InspectionSeverities/=ReplaceSubstringWithRangeIndexer/@EntryIndexedValue\">DO_NOT_SHOW</s:String>\n\t<s:String x:Key=\"/Default/CodeInspection/Highlighting/InspectionSeverities/=StringLiteralTypo/@EntryIndexedValue\">DO_NOT_SHOW</s:String>\n\t<s:String x:Key=\"/Default/CodeInspection/Highlighting/InspectionSeverities/=SuggestVarOrType_005FSimpleTypes/@EntryIndexedValue\"></s:String>\n\t<s:Boolean x:Key=\"/Default/CodeInspection/Highlighting/InspectionSeverities/=SuggestVarOrType_005FSimpleTypes/@EntryIndexRemoved\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/CodeStyle/CodeFormatting/CSharpFormat/ALIGN_MULTILINE_PARAMETER/@EntryValue\">False</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/CodeStyle/CodeFormatting/CSharpFormat/INT_ALIGN_ASSIGNMENTS/@EntryValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/CodeStyle/CodeFormatting/CSharpFormat/INT_ALIGN_BINARY_EXPRESSIONS/@EntryValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/CodeStyle/CodeFormatting/CSharpFormat/INT_ALIGN_COMMENTS/@EntryValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/CodeStyle/CodeFormatting/CSharpFormat/INT_ALIGN_FIELDS/@EntryValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/CodeStyle/CodeFormatting/CSharpFormat/INT_ALIGN_INVOCATIONS/@EntryValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/CodeStyle/CodeFormatting/CSharpFormat/INT_ALIGN_METHODS/@EntryValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/CodeStyle/CodeFormatting/CSharpFormat/INT_ALIGN_NESTED_TERNARY/@EntryValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/CodeStyle/CodeFormatting/CSharpFormat/INT_ALIGN_PROPERTIES/@EntryValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/CodeStyle/CodeFormatting/CSharpFormat/INT_ALIGN_PROPERTY_PATTERNS/@EntryValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/CodeStyle/CodeFormatting/CSharpFormat/INT_ALIGN_SWITCH_EXPRESSIONS/@EntryValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/CodeStyle/CodeFormatting/CSharpFormat/INT_ALIGN_SWITCH_SECTIONS/@EntryValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/CodeStyle/CodeFormatting/CSharpFormat/INT_ALIGN_VARIABLES/@EntryValue\">True</s:Boolean>\n\t<s:Int64 x:Key=\"/Default/CodeStyle/CodeFormatting/CSharpFormat/WRAP_LIMIT/@EntryValue\">150</s:Int64>\n\t<s:String x:Key=\"/Default/CodeStyle/CSharpVarKeywordUsage/ForBuiltInTypes/@EntryValue\">UseVarWhenEvident</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/CSharpVarKeywordUsage/ForOtherTypes/@EntryValue\">UseVarWhenEvident</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/CSharpVarKeywordUsage/ForSimpleTypes/@EntryValue\">UseVarWhenEvident</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/CSharpNaming/Abbreviations/=CNV/@EntryIndexedValue\">CNV</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/CSharpNaming/Abbreviations/=GR/@EntryIndexedValue\">GR</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/CSharpNaming/Abbreviations/=IO/@EntryIndexedValue\">IO</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/CSharpNaming/Abbreviations/=LI/@EntryIndexedValue\">LI</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/CSharpNaming/Abbreviations/=LZ/@EntryIndexedValue\">LZ</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/CSharpNaming/Abbreviations/=MD/@EntryIndexedValue\">MD</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/CSharpNaming/Abbreviations/=RNA/@EntryIndexedValue\">RNA</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/CSharpNaming/Abbreviations/=SNS/@EntryIndexedValue\">SNS</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/CSharpNaming/Abbreviations/=SV/@EntryIndexedValue\">SV</s:String>\n\t<s:Boolean x:Key=\"/Default/CodeStyle/Naming/CSharpNaming/ApplyAutoDetectedRules/@EntryValue\">False</s:Boolean>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/JavaScriptNaming/UserRules/=JS_005FBLOCK_005FSCOPE_005FCONSTANT/@EntryIndexedValue\">&lt;Policy Inspect=\"True\" Prefix=\"\" Suffix=\"\" Style=\"aaBb\" /&gt;</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/JavaScriptNaming/UserRules/=JS_005FBLOCK_005FSCOPE_005FFUNCTION/@EntryIndexedValue\">&lt;Policy Inspect=\"True\" Prefix=\"\" Suffix=\"\" Style=\"aaBb\" /&gt;</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/JavaScriptNaming/UserRules/=JS_005FBLOCK_005FSCOPE_005FVARIABLE/@EntryIndexedValue\">&lt;Policy Inspect=\"True\" Prefix=\"\" Suffix=\"\" Style=\"aaBb\" /&gt;</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/JavaScriptNaming/UserRules/=JS_005FCLASS/@EntryIndexedValue\">&lt;Policy Inspect=\"True\" Prefix=\"\" Suffix=\"\" Style=\"aaBb\" /&gt;</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/JavaScriptNaming/UserRules/=JS_005FCONSTRUCTOR/@EntryIndexedValue\">&lt;Policy Inspect=\"True\" Prefix=\"\" Suffix=\"\" Style=\"AaBb\" /&gt;</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/JavaScriptNaming/UserRules/=JS_005FFUNCTION/@EntryIndexedValue\">&lt;Policy Inspect=\"True\" Prefix=\"\" Suffix=\"\" Style=\"aaBb\" /&gt;</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/JavaScriptNaming/UserRules/=JS_005FGLOBAL_005FVARIABLE/@EntryIndexedValue\">&lt;Policy Inspect=\"True\" Prefix=\"\" Suffix=\"\" Style=\"AaBb\" /&gt;</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/JavaScriptNaming/UserRules/=JS_005FLABEL/@EntryIndexedValue\">&lt;Policy Inspect=\"True\" Prefix=\"\" Suffix=\"\" Style=\"aaBb\" /&gt;</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/JavaScriptNaming/UserRules/=JS_005FLOCAL_005FCONSTRUCTOR/@EntryIndexedValue\">&lt;Policy Inspect=\"True\" Prefix=\"\" Suffix=\"\" Style=\"AaBb\" /&gt;</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/JavaScriptNaming/UserRules/=JS_005FLOCAL_005FVARIABLE/@EntryIndexedValue\">&lt;Policy Inspect=\"True\" Prefix=\"\" Suffix=\"\" Style=\"aaBb\" /&gt;</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/JavaScriptNaming/UserRules/=JS_005FOBJECT_005FPROPERTY_005FOF_005FFUNCTION/@EntryIndexedValue\">&lt;Policy Inspect=\"True\" Prefix=\"\" Suffix=\"\" Style=\"aaBb\" /&gt;</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/JavaScriptNaming/UserRules/=JS_005FPARAMETER/@EntryIndexedValue\">&lt;Policy Inspect=\"True\" Prefix=\"\" Suffix=\"\" Style=\"aaBb\" /&gt;</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/JavaScriptNaming/UserRules/=TS_005FCLASS/@EntryIndexedValue\">&lt;Policy Inspect=\"True\" Prefix=\"\" Suffix=\"\" Style=\"AaBb\" /&gt;</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/JavaScriptNaming/UserRules/=TS_005FENUM/@EntryIndexedValue\">&lt;Policy Inspect=\"True\" Prefix=\"\" Suffix=\"\" Style=\"AaBb\" /&gt;</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/JavaScriptNaming/UserRules/=TS_005FENUM_005FMEMBER/@EntryIndexedValue\">&lt;Policy Inspect=\"True\" Prefix=\"\" Suffix=\"\" Style=\"AaBb\" /&gt;</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/JavaScriptNaming/UserRules/=TS_005FINTERFACE/@EntryIndexedValue\">&lt;Policy Inspect=\"True\" Prefix=\"I\" Suffix=\"\" Style=\"AaBb\" /&gt;</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/JavaScriptNaming/UserRules/=TS_005FMIXED_005FENUM/@EntryIndexedValue\">&lt;Policy Inspect=\"True\" Prefix=\"\" Suffix=\"\" Style=\"AaBb\" /&gt;</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/JavaScriptNaming/UserRules/=TS_005FMODULE/@EntryIndexedValue\">&lt;Policy Inspect=\"True\" Prefix=\"\" Suffix=\"\" Style=\"AaBb\" /&gt;</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/JavaScriptNaming/UserRules/=TS_005FMODULE_005FEXPORTED/@EntryIndexedValue\">&lt;Policy Inspect=\"True\" Prefix=\"\" Suffix=\"\" Style=\"aaBb\" /&gt;</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/JavaScriptNaming/UserRules/=TS_005FMODULE_005FLOCAL/@EntryIndexedValue\">&lt;Policy Inspect=\"True\" Prefix=\"\" Suffix=\"\" Style=\"aaBb\" /&gt;</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/JavaScriptNaming/UserRules/=TS_005FPRIVATE_005FMEMBER_005FACCESSOR/@EntryIndexedValue\">&lt;Policy Inspect=\"True\" Prefix=\"\" Suffix=\"\" Style=\"aaBb\" /&gt;</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/JavaScriptNaming/UserRules/=TS_005FPRIVATE_005FSTATIC_005FTYPE_005FFIELD/@EntryIndexedValue\">&lt;Policy Inspect=\"True\" Prefix=\"\" Suffix=\"\" Style=\"aaBb\" /&gt;</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/JavaScriptNaming/UserRules/=TS_005FPRIVATE_005FTYPE_005FFIELD/@EntryIndexedValue\">&lt;Policy Inspect=\"True\" Prefix=\"\" Suffix=\"\" Style=\"aaBb\" /&gt;</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/JavaScriptNaming/UserRules/=TS_005FPRIVATE_005FTYPE_005FMETHOD/@EntryIndexedValue\">&lt;Policy Inspect=\"True\" Prefix=\"\" Suffix=\"\" Style=\"aaBb\" /&gt;</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/JavaScriptNaming/UserRules/=TS_005FPROTECTED_005FMEMBER_005FACCESSOR/@EntryIndexedValue\">&lt;Policy Inspect=\"True\" Prefix=\"\" Suffix=\"\" Style=\"aaBb\" /&gt;</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/JavaScriptNaming/UserRules/=TS_005FPROTECTED_005FSTATIC_005FTYPE_005FFIELD/@EntryIndexedValue\">&lt;Policy Inspect=\"True\" Prefix=\"\" Suffix=\"\" Style=\"aaBb\" /&gt;</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/JavaScriptNaming/UserRules/=TS_005FPROTECTED_005FTYPE_005FFIELD/@EntryIndexedValue\">&lt;Policy Inspect=\"True\" Prefix=\"\" Suffix=\"\" Style=\"aaBb\" /&gt;</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/JavaScriptNaming/UserRules/=TS_005FPROTECTED_005FTYPE_005FMETHOD/@EntryIndexedValue\">&lt;Policy Inspect=\"True\" Prefix=\"\" Suffix=\"\" Style=\"aaBb\" /&gt;</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/JavaScriptNaming/UserRules/=TS_005FPUBLIC_005FMEMBER_005FACCESSOR/@EntryIndexedValue\">&lt;Policy Inspect=\"True\" Prefix=\"\" Suffix=\"\" Style=\"aaBb\" /&gt;</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/JavaScriptNaming/UserRules/=TS_005FPUBLIC_005FSTATIC_005FTYPE_005FFIELD/@EntryIndexedValue\">&lt;Policy Inspect=\"True\" Prefix=\"\" Suffix=\"\" Style=\"aaBb\" /&gt;</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/JavaScriptNaming/UserRules/=TS_005FPUBLIC_005FTYPE_005FFIELD/@EntryIndexedValue\">&lt;Policy Inspect=\"True\" Prefix=\"\" Suffix=\"\" Style=\"aaBb\" /&gt;</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/JavaScriptNaming/UserRules/=TS_005FPUBLIC_005FTYPE_005FMETHOD/@EntryIndexedValue\">&lt;Policy Inspect=\"True\" Prefix=\"\" Suffix=\"\" Style=\"aaBb\" /&gt;</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/JavaScriptNaming/UserRules/=TS_005FTYPE_005FALIAS/@EntryIndexedValue\">&lt;Policy Inspect=\"True\" Prefix=\"\" Suffix=\"\" Style=\"AaBb\" /&gt;</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/JavaScriptNaming/UserRules/=TS_005FTYPE_005FPARAMETER/@EntryIndexedValue\">&lt;Policy Inspect=\"True\" Prefix=\"T\" Suffix=\"\" Style=\"AaBb\" /&gt;</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/WebNaming/UserRules/=ASP_005FFIELD/@EntryIndexedValue\">&lt;Policy Inspect=\"True\" Prefix=\"\" Suffix=\"\" Style=\"AaBb\" /&gt;</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/WebNaming/UserRules/=ASP_005FHTML_005FCONTROL/@EntryIndexedValue\">&lt;Policy Inspect=\"True\" Prefix=\"\" Suffix=\"\" Style=\"AaBb\" /&gt;</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/WebNaming/UserRules/=ASP_005FTAG_005FNAME/@EntryIndexedValue\">&lt;Policy Inspect=\"True\" Prefix=\"\" Suffix=\"\" Style=\"AaBb\" /&gt;</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/WebNaming/UserRules/=ASP_005FTAG_005FPREFIX/@EntryIndexedValue\">&lt;Policy Inspect=\"True\" Prefix=\"\" Suffix=\"\" Style=\"AaBb\" /&gt;</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/XamlNaming/UserRules/=NAMESPACE_005FALIAS/@EntryIndexedValue\">&lt;Policy Inspect=\"True\" Prefix=\"\" Suffix=\"\" Style=\"aaBb\" /&gt;</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/XamlNaming/UserRules/=XAML_005FFIELD/@EntryIndexedValue\">&lt;Policy Inspect=\"True\" Prefix=\"\" Suffix=\"\" Style=\"AaBb\" /&gt;</s:String>\n\t<s:String x:Key=\"/Default/CodeStyle/Naming/XamlNaming/UserRules/=XAML_005FRESOURCE/@EntryIndexedValue\">&lt;Policy Inspect=\"True\" Prefix=\"\" Suffix=\"\" Style=\"AaBb\" /&gt;</s:String>\n\t<s:String x:Key=\"/Default/Environment/Hierarchy/PsiConfigurationSettingsKey/CustomLocation/@EntryValue\">C:\\Users\\Michael\\AppData\\Local\\JetBrains\\Transient\\ReSharperPlatformVs15\\v08_b4a306d0\\SolutionCaches</s:String>\n\t<s:String x:Key=\"/Default/Environment/Highlighting/HighlightingMode/@EntryValue\">MarkersAndFullLine</s:String>\n\t<s:String x:Key=\"/Default/Environment/PerformanceGuide/SwitchBehaviour/=Antivirus/@EntryIndexedValue\">LIVE_MONITOR</s:String>\n\t<s:String x:Key=\"/Default/Environment/PerformanceGuide/SwitchBehaviour/=AutoRecoverer/@EntryIndexedValue\">LIVE_MONITOR</s:String>\n\t<s:String x:Key=\"/Default/Environment/PerformanceGuide/SwitchBehaviour/=Format/@EntryIndexedValue\">LIVE_MONITOR</s:String>\n\t<s:String x:Key=\"/Default/Environment/PerformanceGuide/SwitchBehaviour/=Roslyn_002Dswea/@EntryIndexedValue\">DO_NOTHING</s:String>\n\t<s:String x:Key=\"/Default/Environment/PerformanceGuide/SwitchBehaviour/=ShowAnnotations/@EntryIndexedValue\">LIVE_MONITOR</s:String>\n\t<s:String x:Key=\"/Default/Environment/PerformanceGuide/SwitchBehaviour/=SolExp_002DTrack/@EntryIndexedValue\">LIVE_MONITOR</s:String>\n\t<s:String x:Key=\"/Default/Environment/PerformanceGuide/SwitchBehaviour/=StartPage_002DIsDownloadRefreshEnabled/@EntryIndexedValue\">LIVE_MONITOR</s:String>\n\t<s:String x:Key=\"/Default/Environment/PerformanceGuide/SwitchBehaviour/=StartPage_002DOnEnvironmentStatup/@EntryIndexedValue\">LIVE_MONITOR</s:String>\n\t<s:String x:Key=\"/Default/Environment/PerformanceGuide/SwitchBehaviour/=SyncSettings/@EntryIndexedValue\">LIVE_MONITOR</s:String>\n\t<s:String x:Key=\"/Default/Environment/PerformanceGuide/SwitchBehaviour/=TextEditor_002DCodeLens/@EntryIndexedValue\">LIVE_MONITOR</s:String>\n\t<s:String x:Key=\"/Default/Environment/PerformanceGuide/SwitchBehaviour/=TextEditor_002DTrackChanges_002D2/@EntryIndexedValue\">LIVE_MONITOR</s:String>\n\t<s:String x:Key=\"/Default/Environment/PerformanceGuide/SwitchBehaviour/=VCS/@EntryIndexedValue\">LIVE_MONITOR</s:String>\n\t<s:String x:Key=\"/Default/Environment/PerformanceGuide/SwitchBehaviour/=VsBulb/@EntryIndexedValue\">DO_NOTHING</s:String>\n\t<s:String x:Key=\"/Default/Environment/PerformanceGuide/SwitchBehaviour/=XAML_0020Designer/@EntryIndexedValue\">LIVE_MONITOR</s:String>\n\t<s:Boolean x:Key=\"/Default/Environment/SettingsMigration/IsMigratorApplied/=JetBrains_002EdotCover_002EIde_002ECore_002EFilterManagement_002EModel_002ESolutionFilterSettingsManagerMigrateSettings/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/Environment/SettingsMigration/IsMigratorApplied/=JetBrains_002EReSharper_002EPsi_002ECSharp_002ECodeStyle_002ECSharpKeepExistingMigration/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/Environment/SettingsMigration/IsMigratorApplied/=JetBrains_002EReSharper_002EPsi_002ECSharp_002ECodeStyle_002ECSharpPlaceEmbeddedOnSameLineMigration/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/Environment/SettingsMigration/IsMigratorApplied/=JetBrains_002EReSharper_002EPsi_002ECSharp_002ECodeStyle_002ECSharpUseContinuousIndentInsideBracesMigration/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/Environment/SettingsMigration/IsMigratorApplied/=JetBrains_002EReSharper_002EPsi_002ECSharp_002ECodeStyle_002ESettingsUpgrade_002EMigrateBlankLinesAroundFieldToBlankLinesAroundProperty/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Int64 x:Key=\"/Default/Environment/UnitTesting/ParallelProcessesCount/@EntryValue\">10</s:Int64>\n\t<s:String x:Key=\"/Default/FilterSettingsManager/AttributeFilterXml/@EntryValue\">&lt;data /&gt;</s:String>\n\t<s:String x:Key=\"/Default/FilterSettingsManager/CoverageFilterXml/@EntryValue\">&lt;data&gt;&lt;IncludeFilters /&gt;&lt;ExcludeFilters /&gt;&lt;/data&gt;</s:String>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=aaalt/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=aaref/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=adjacencies/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=allelecount/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=allelefrequency/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=allelenumber/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=appsettings/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=BGZF/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=Breakend/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=cdna/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=Chrom/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=Codon/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=Codons/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=colocalized/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=Contig/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=Cytogenetic/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=decompressor/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=delins/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=denovo/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=Downloader/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=endian/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=ensembl/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=ENSG/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=enst/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=entrez/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=exome/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=exon/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=exons/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=Fasta/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=Frameshift/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=Gatk/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=Genbank/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=Germline/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=Gnomad/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=grch/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=Gvcf/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=Haplotype/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=Haplotypes/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=Heteroplasmy/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=hgnc/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=hgvs/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=hgvsc/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=Hgvsg/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=hgvsp/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=hgvsr/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=histologies/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=homozygosity/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=Illumina/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=intron/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=introns/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=jasix/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=Mims/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=Mirna/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=Mito/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=Ncbi/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=Novo/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=Omim/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=Paralog/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=PEPE/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=Phen/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=Phylop/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=Ploidies/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=ploidy/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=polyphen/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=preload/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=Pseudogene/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=Pubmed/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=Qual/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=recalibrated/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=Recomposable/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=Recomposer/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=Recomposition/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=requeuing/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=sapiens/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=Seqs/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=Tabix/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=TCGA/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=Translocation/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=Translocations/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=ucsc/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=uncompressing/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=Vids/@EntryIndexedValue\">True</s:Boolean>\n\t<s:Boolean x:Key=\"/Default/UserDictionary/Words/=Vrfs/@EntryIndexedValue\">True</s:Boolean></wpf:ResourceDictionary>"
  },
  {
    "path": "NirvanaLambda/AnnotationJob.cs",
    "content": "﻿using System;\r\nusing System.IO;\r\nusing System.Threading.Tasks;\r\nusing Amazon.Lambda;\r\nusing Amazon.Lambda.Core;\r\nusing Amazon.Lambda.Model;\r\nusing Cloud.Messages.Annotation;\r\nusing Cloud.Utilities;\r\nusing ErrorHandling;\r\nusing IO;\r\n\r\nnamespace NirvanaLambda\r\n{\r\n    public sealed class AnnotationJob\r\n    {\r\n        private const int MinAnnotationTime          = 5_000;\r\n        private const int ReservedPostAnnotationTime = 10_000;\r\n        private const int WaitBeforeRetry            = 2_000;\r\n        private const string UnknownErrorMessage     = \"Unknown error -1\";\r\n\r\n        private int _numRetries;\r\n        private double _annotationTimeOut;\r\n        private readonly ILambdaContext _lambdaContext;\r\n        private readonly int _jobIndex;\r\n        private ErrorCategory? _errorCategory;\r\n\r\n        public AnnotationJob(ILambdaContext context, int jobIndex)\r\n        {\r\n            _lambdaContext = context;\r\n            _jobIndex      = jobIndex;\r\n        }\r\n\r\n        public AnnotationResultSummary Invoke(string functionArn, string functionInput)\r\n        {\r\n            try\r\n            {\r\n                return InvokeAndRetryWhenThrottled(functionArn, functionInput).Result;\r\n            }\r\n            catch (Exception e)\r\n            {\r\n                Logger.Log(e);\r\n                return GetResultSummaryFromFailedInvocation(e);\r\n            }\r\n        }\r\n\r\n        private async Task<AnnotationResultSummary> InvokeAndRetryWhenThrottled(string functionArn, string functionInput)\r\n        {\r\n            AnnotationResultSummary resultSummary;\r\n\r\n            while (true)\r\n            {\r\n                try\r\n                {\r\n                    var invokeRequest = new InvokeRequest\r\n                    {\r\n                        FunctionName   = functionArn,\r\n                        Payload        = functionInput,\r\n                        InvocationType = \"RequestResponse\"\r\n                    };\r\n\r\n                    var payload   = GetAnnotationResult(invokeRequest);\r\n                    resultSummary = GetResultSummaryFromSuccessInvocation(payload);\r\n                    break;\r\n                }\r\n                catch (Exception e) when (ExceptionUtilities.HasException<TooManyRequestsException>(e))\r\n                {\r\n                    Logger.WriteLine($\"Job {_jobIndex}: Invocation is throttled. Retry in {WaitBeforeRetry} ms.\");\r\n                    _numRetries++;\r\n                    await Task.Delay(WaitBeforeRetry);\r\n                }\r\n                catch (Exception e) when (e.HasErrorMessage(UnknownErrorMessage))\r\n                {\r\n                    Logger.WriteLine($\"Job {_jobIndex}: {UnknownErrorMessage}. Retry in {WaitBeforeRetry} ms.\");\r\n                    _numRetries++;\r\n                    await Task.Delay(WaitBeforeRetry);\r\n                }\r\n            }\r\n\r\n            return resultSummary;\r\n        }\r\n\r\n        internal static AnnotationResultSummary GetResultSummaryFromSuccessInvocation(MemoryStream payload)\r\n        {\r\n            var annotationResult = JsonUtilities.Deserialize<AnnotationResult>(payload);\r\n            string errorMessage  = annotationResult.errorCategory == null ? null : annotationResult.status;\r\n            return AnnotationResultSummary.Create(annotationResult, annotationResult.errorCategory, errorMessage);\r\n        }\r\n\r\n        private MemoryStream GetAnnotationResult(InvokeRequest invokeRequest)\r\n        {\r\n            CheckRemainingTime();\r\n\r\n            var config = new AmazonLambdaConfig\r\n            {\r\n                ResignRetries = true,\r\n                Timeout = TimeSpan.FromMilliseconds(_annotationTimeOut)\r\n            };\r\n\r\n            InvokeResponse response;\r\n            using (var lambdaClient = new AmazonLambdaClient(config))\r\n            {\r\n                response = lambdaClient.InvokeAsync(invokeRequest).Result;\r\n            }\r\n\r\n            CheckResponse(response);\r\n            return response.Payload;\r\n        }\r\n\r\n        private void CheckRemainingTime()\r\n        {\r\n            double currentRemainingTime = _lambdaContext.RemainingTime.TotalMilliseconds;\r\n\r\n            if (currentRemainingTime < MinAnnotationTime + ReservedPostAnnotationTime)\r\n            {\r\n                if (_numRetries > 0)\r\n                {\r\n                    _errorCategory = ErrorCategory.InvocationThrottledError;\r\n                    throw new Exception($\"Invocation is still throttled after {_numRetries} retries.\");\r\n                }\r\n\r\n                _errorCategory = ErrorCategory.TimeOutError;\r\n                throw new Exception($\"Only {currentRemainingTime} ms left. No enough time for annotation job.\");\r\n            }\r\n\r\n            _annotationTimeOut = currentRemainingTime - ReservedPostAnnotationTime;\r\n        }\r\n\r\n        // ReSharper disable once ParameterOnlyUsedForPreconditionCheck.Global\r\n        internal void CheckResponse(InvokeResponse response)\r\n        {\r\n            if (response == null)\r\n            {\r\n                _errorCategory = ErrorCategory.NirvanaError;\r\n                throw new Exception(\"Failed to get the response from annotation job\");\r\n            }\r\n\r\n            if (response.FunctionError == \"Unhandled\")\r\n            {\r\n                _errorCategory = ErrorCategory.NirvanaError;\r\n                throw new Exception(\"There is unhandled error in annotation job. A possible reason for this is the out-of-memory issue.\");\r\n            }\r\n        }\r\n\r\n        internal AnnotationResultSummary GetResultSummaryFromFailedInvocation(Exception e)\r\n        {\r\n            var additionalDescription = \"\";\r\n            if (ExceptionUtilities.HasException<TaskCanceledException>(e))\r\n            {\r\n                _errorCategory = ErrorCategory.TimeOutError;\r\n                additionalDescription = $\" Annotation job was not finished in {_annotationTimeOut} milliseconds.\";\r\n            }\r\n\r\n            if (_errorCategory == null) _errorCategory = ExceptionUtilities.ExceptionToErrorCategory(e);\r\n\r\n            e = ExceptionUtilities.GetInnermostException(e);\r\n            string errorMessage = $\"Failed job when invoking the annotation job: {e.Message}.{additionalDescription}\";\r\n\r\n            return AnnotationResultSummary.Create(null, _errorCategory, errorMessage); \r\n        }\r\n    }\r\n}"
  },
  {
    "path": "NirvanaLambda/AnnotationResultSummary.cs",
    "content": "﻿using System.IO;\r\nusing Cloud.Messages.Annotation;\r\nusing ErrorHandling;\r\n\r\nnamespace NirvanaLambda\r\n{\r\n    public sealed class AnnotationResultSummary\r\n    {\r\n        public string         ErrorMessage;\r\n        public string         FileName;\r\n        public int            VariantCount;\r\n        public ErrorCategory? ErrorCategory;\r\n\r\n        public static AnnotationResultSummary Create(AnnotationResult annotationResult, ErrorCategory? errorCategory, string errorMessage)\r\n        {\r\n            string fileName = Path.GetFileName(annotationResult?.filePath);\r\n\r\n            return new AnnotationResultSummary\r\n            {\r\n                ErrorCategory = errorCategory,\r\n                ErrorMessage  = errorMessage,\r\n                FileName      = fileName,\r\n                VariantCount  =  annotationResult?.variantCount ?? 0\r\n            };\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "NirvanaLambda/AssemblyInfo.cs",
    "content": "﻿using System.Runtime.CompilerServices;\r\n\r\n[assembly: InternalsVisibleTo(\"UnitTests\")]"
  },
  {
    "path": "NirvanaLambda/NirvanaLambda.cs",
    "content": "using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing System.IO.Compression;\r\nusing System.Linq;\r\nusing System.Text;\r\nusing System.Threading.Tasks;\r\nusing Amazon.Lambda.Core;\r\nusing Cloud;\r\nusing Cloud.Messages;\r\nusing Cloud.Messages.Annotation;\r\nusing Cloud.Messages.Nirvana;\r\nusing Cloud.Notifications;\r\nusing Cloud.Utilities;\r\nusing CommandLine.Utilities;\r\nusing Compression.FileHandling;\r\nusing ErrorHandling;\r\nusing ErrorHandling.Exceptions;\r\nusing Genome;\r\nusing Intervals;\r\nusing IO;\r\nusing Tabix;\r\nusing VariantAnnotation.Caches.Utilities;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.Providers;\r\nusing JsonSerializer = Amazon.Lambda.Serialization.Json.JsonSerializer;\r\n\r\n[assembly: LambdaSerializer(typeof(JsonSerializer))]\r\n\r\nnamespace NirvanaLambda\r\n{\r\n    // ReSharper disable once UnusedMember.Global\r\n    // ReSharper disable once ClassNeverInstantiated.Global\r\n    public sealed class NirvanaLambda\r\n    {\r\n        private const string AnnotationLambdaFailedStatus = \"One or more annotation Lambdas failed\";\r\n        private const string AnnotationLambdaKey          = \"annotation_lambda_arn\";\r\n        private const string TryAgainMessage              = \"Please try again later.\";\r\n        private const int MaxNumPartitions                = 30;\r\n        private const int MinNumPartitions                = 6;\r\n        private const int MinPartitionSize                = 10_000_000;\r\n\r\n        private readonly HashSet<GenomeAssembly> _supportedAssemblies = new HashSet<GenomeAssembly> { GenomeAssembly.GRCh37, GenomeAssembly.GRCh38 };\r\n\r\n        // ReSharper disable once UnusedMember.Global\r\n        public NirvanaResult Run(NirvanaConfig config, ILambdaContext context)\r\n        {\r\n            NirvanaResult result;\r\n            string snsTopicArn = null;\r\n            var runLog = new StringBuilder();\r\n\r\n            try\r\n            {\r\n                LogUtilities.UpdateLogger(context.Logger, runLog);\r\n                LogUtilities.LogLambdaInfo(context, CommandLineUtilities.InformationalVersion);\r\n                LogUtilities.LogObject(\"Config\", config);\r\n                LogUtilities.Log(new[] { LambdaUrlHelper.UrlBaseEnvironmentVariableName, LambdaUtilities.SnsTopicKey, \"annotation_lambda_arn\" });\r\n\r\n                LambdaUtilities.GarbageCollect();\r\n\r\n                snsTopicArn                = LambdaUtilities.GetEnvironmentVariable(LambdaUtilities.SnsTopicKey);\r\n                string annotationLambdaArn = LambdaUtilities.GetEnvironmentVariable(AnnotationLambdaKey);\r\n                \r\n                config.Validate();\r\n\r\n                var genomeAssembly = GenomeAssemblyHelper.Convert(config.genomeAssembly);\r\n\r\n                if (!_supportedAssemblies.Contains(genomeAssembly))\r\n                    throw new UserErrorException($\"Unsupported assembly: {config.genomeAssembly}\");\r\n\r\n                AnnotationRange[] annotationRanges = GetAnnotationRanges(config, genomeAssembly);\r\n                result = GetNirvanaResult(annotationRanges, config, annotationLambdaArn, context, runLog, snsTopicArn);\r\n            }\r\n            catch (Exception exception)\r\n            {\r\n                result = HandleException(runLog, config, exception, snsTopicArn);\r\n            }\r\n\r\n            LogUtilities.LogObject(\"Result\", result);\r\n\r\n            return result;\r\n        }\r\n        \r\n        private static AnnotationRange[] GetAnnotationRanges(NirvanaConfig config, GenomeAssembly genomeAssembly)\r\n        {\r\n            string cachePathPrefix = LambdaUtilities.GetCachePathPrefix(genomeAssembly);\r\n\r\n            using Stream tabixStream      = PersistentStreamUtils.GetReadStream(config.tabixUrl);\r\n            using var    tabixReader      = new BinaryReader(new BlockGZipStream(tabixStream, CompressionMode.Decompress));\r\n            using Stream referenceStream  = PersistentStreamUtils.GetReadStream(LambdaUrlHelper.GetRefUrl(genomeAssembly));\r\n            using var    sequenceProvider = new ReferenceSequenceProvider(referenceStream);\r\n            \r\n            long         vcfSize          = HttpUtilities.GetLength(config.vcfUrl);\r\n            int          numPartitions    = Math.Max(Math.Min((int) ((vcfSize - 1) / MinPartitionSize + 1), MaxNumPartitions), MinNumPartitions);\r\n\r\n            Tabix.Index tabixIndex   = Reader.Read(tabixReader, sequenceProvider.RefNameToChromosome);\r\n            List<long>  blockOffsets = PartitionUtilities.GetFileOffsets(config.vcfUrl, numPartitions, tabixIndex);\r\n            \r\n            // stop early if we're going to annotate the entire file\r\n            if (blockOffsets.Count == 1 && blockOffsets[0] == 0) return null;\r\n\r\n            using var                        taProvider               = new TranscriptAnnotationProvider(cachePathPrefix, sequenceProvider, null);\r\n            IntervalArray<ITranscript>[]     transcriptIntervalArrays = taProvider.TranscriptIntervalArrays;\r\n            IntervalForest<IGene>            geneIntervalForest       = GeneForestGenerator.GetGeneForest(transcriptIntervalArrays);\r\n            Dictionary<string, Chromosome> refNameToChromosome      = sequenceProvider.RefNameToChromosome;\r\n\r\n            return PartitionUtilities.GenerateAnnotationRanges(blockOffsets, config.vcfUrl, geneIntervalForest, refNameToChromosome);\r\n        }\r\n\r\n        private static NirvanaResult HandleException(StringBuilder runLog, NirvanaConfig config, Exception e, string snsTopicArn)\r\n        {\r\n            Logger.Log(e);\r\n            var errorCategory = ExceptionUtilities.ExceptionToErrorCategory(e);\r\n            return GetNirvanaFailResult(runLog, config, errorCategory, e.Message, e.StackTrace, snsTopicArn);\r\n        }\r\n\r\n        private static NirvanaResult GetNirvanaFailResult(StringBuilder runLog, NirvanaConfig config, ErrorCategory errorCategory, string errorMessage, string stackTrace, string snsTopicArn)\r\n        {\r\n            string status = GetFailedRunStatus(errorCategory, errorMessage);\r\n\r\n            if (errorCategory != ErrorCategory.UserError)\r\n            {\r\n                string snsMessage = SNS.CreateMessage(runLog.ToString(), status, stackTrace);\r\n                SNS.SendMessage(snsTopicArn, snsMessage);\r\n            }\r\n\r\n            return new NirvanaResult\r\n            {\r\n                id           = config.id,\r\n                status       = status,\r\n                variantCount = 0,\r\n                jwtFields    =  config.jwtFields\r\n\r\n            };\r\n        }\r\n\r\n        internal static string GetFailedRunStatus(ErrorCategory errorCategory, string errorMessage)\r\n        {\r\n            // ReSharper disable once SwitchStatementMissingSomeCases\r\n            switch (errorCategory)\r\n            {\r\n                case ErrorCategory.TimeOutError:\r\n                    return \"Timeout error: annotation of the VCF was not finished on time due to network congestion. \" + \r\n                           TryAgainMessage;\r\n                case ErrorCategory.InvocationThrottledError:\r\n                    return \"Invocation throttled error: there are too many lambdas currently running in this account. \" +\r\n                           TryAgainMessage;\r\n                case ErrorCategory.UserError:\r\n                    return \"User error: \" + FirstCharToLower(errorMessage);\r\n                default:\r\n                    return \"Nirvana error: an unexpected annotation error occurred while annotating this VCF.\";\r\n            }\r\n        }\r\n\r\n        private static NirvanaResult GetNirvanaResult(AnnotationRange[] annotationRanges, NirvanaConfig config, string annotationLambdaArn, ILambdaContext context, StringBuilder runLog, string snsTopicArn)\r\n        {\r\n            Task<AnnotationResultSummary>[] annotationTasks = CallAnnotationLambdas(config, annotationLambdaArn, context, annotationRanges);\r\n            AnnotationResultSummary[] processedAnnotationResults = Task.WhenAll(annotationTasks).Result;\r\n\r\n            (ErrorCategory? errorCategory, string errorMessage) = GetMostSevereErrorCategoryAndMessage(processedAnnotationResults);\r\n            if (errorCategory != null) return GetNirvanaFailResult(runLog, config, errorCategory.Value, errorMessage, null, snsTopicArn);\r\n\r\n            string[] fileNames    = processedAnnotationResults.Select(x => x.FileName).ToArray();\r\n            int      variantCount = processedAnnotationResults.Sum(x => x.VariantCount);\r\n\r\n            return new NirvanaResult\r\n            {\r\n                id      = config.id,\r\n                status  = LambdaUrlHelper.SuccessMessage,\r\n                created = new FileList\r\n                {\r\n                    bucketName = config.outputDir.bucketName,\r\n                    outputDir  = config.outputDir.path,\r\n                    files      = fileNames\r\n                },\r\n                variantCount    = variantCount,\r\n                jwtFields =  config.jwtFields\r\n\r\n            };\r\n        }\r\n\r\n        private static (ErrorCategory?, string) GetMostSevereErrorCategoryAndMessage(IEnumerable<AnnotationResultSummary> annotationResultSummaries)\r\n        {\r\n            List<(AnnotationResultSummary Item, int Index)> failedJobs = annotationResultSummaries\r\n                .Select(x => x ?? AnnotationResultSummary.Create(null, ErrorCategory.NirvanaError, \"No result summary available for the annotation job.\"))\r\n                .Select((x, i) => (Item: x, Index: i)).Where(x => x.Item.ErrorCategory != null).ToList();\r\n\r\n            if (failedJobs.Count == 0) return (null, null);\r\n\r\n            Logger.WriteLine(AnnotationLambdaFailedStatus);\r\n            failedJobs.ForEach(x => Logger.WriteLine($\"Job {x.Index + 1}: {x.Item.ErrorCategory} {x.Item.ErrorMessage}\"));\r\n\r\n            ErrorCategory? mostSevereError = failedJobs.Select(x => x.Item.ErrorCategory).Min();\r\n            string errorMessage = mostSevereError == ErrorCategory.UserError \r\n                ? string.Join(\";\", failedJobs.Where(x => x.Item.ErrorCategory == mostSevereError).Select(x => x.Item.ErrorMessage).Distinct())\r\n                : \"\";\r\n\r\n            return (mostSevereError, errorMessage);\r\n        }\r\n\r\n        private static Task<AnnotationResultSummary>[] CallAnnotationLambdas(NirvanaConfig config, string annotationLambdaArn, ILambdaContext context,\r\n            IEnumerable<AnnotationRange> annotationRanges) =>\r\n            annotationRanges?.Select((x, i) => RunAnnotationJob(config, annotationLambdaArn, context, x, i + 1)).ToArray() ??\r\n            new[] {RunAnnotationJob(config, annotationLambdaArn, context, null, 1)};\r\n\r\n        private static Task<AnnotationResultSummary> RunAnnotationJob(NirvanaConfig config, string annotationLambdaArn, ILambdaContext context, AnnotationRange range, int jobIndex)\r\n        {\r\n            var annotationConfig = GetAnnotationConfig(config, range, jobIndex);\r\n            Logger.WriteLine($\"Job: {jobIndex}, Annotation region: {DescribeAnnotationRegion(range)}\");\r\n\r\n            string configString = JsonUtilities.Stringify(annotationConfig);\r\n\r\n            var annotationJob = new AnnotationJob(context, jobIndex);\r\n            return Task.Run(() => annotationJob.Invoke(annotationLambdaArn, configString));\r\n        }\r\n\r\n        private static string DescribeAnnotationRegion(AnnotationRange ar)\r\n        {\r\n            if (ar == null) return \"Whole VCF\";\r\n            string ret = $\"{ar.Start.Chromosome}:{ar.Start.Position}-\";\r\n            return ar.End == null ? ret : $\"{ret}{ar.End?.Chromosome}:{ar.End?.Position}\";\r\n        }\r\n\r\n        private static AnnotationConfig GetAnnotationConfig(NirvanaConfig config, AnnotationRange annotationRange, int jobIndex) => new()\r\n        {\r\n            id                = config.id + $\"_job{jobIndex}\",\r\n            genomeAssembly    = config.genomeAssembly,\r\n            vcfUrl            = config.vcfUrl,\r\n            tabixUrl          = config.tabixUrl,\r\n            outputDir         = config.outputDir,\r\n            outputPrefix      = GetIndexedPrefix(config.vcfUrl, jobIndex),\r\n            customAnnotations = config.customAnnotations,\r\n            desiredVcfInfo    = config.desiredVcfInfo,\r\n            desiredVcfSampleInfo     = config.desiredVcfSampleInfo,\r\n            customStrUrl      = config.customStrUrl,\r\n            annotationRange   = annotationRange\r\n        };\r\n\r\n        internal static string GetIndexedPrefix(string inputVcfPath, int jobIndex) =>\r\n            inputVcfPath.TrimEndFromFirst(\"?\").TrimStartToLast(\"/\").TrimEndFromFirst(\".vcf\") + \"_\" + jobIndex.ToString(\"00000\");\r\n\r\n        private static string FirstCharToLower(string input) => string.IsNullOrEmpty(input) || char.IsLower(input[0])\r\n            ? input\r\n            : char.ToLowerInvariant(input[0]) + input.Substring(1);\r\n    }\r\n}\r\n"
  },
  {
    "path": "NirvanaLambda/NirvanaLambda.csproj",
    "content": "﻿<Project Sdk=\"Microsoft.NET.Sdk\">\r\n  <PropertyGroup>\r\n    <TargetFramework>net6.0</TargetFramework>\r\n    <GenerateRuntimeConfigurationFiles>true</GenerateRuntimeConfigurationFiles>\r\n    <AWSProjectType>Lambda</AWSProjectType>\r\n    <OutputPath>bin\\$(Configuration)</OutputPath>\r\n    \r\n  </PropertyGroup>\r\n  <ItemGroup>\r\n    <PackageReference Include=\"Amazon.Lambda.Core\" Version=\"2.1.0\" />\r\n    <PackageReference Include=\"Amazon.Lambda.Serialization.Json\" Version=\"2.0.0\" />\r\n    <PackageReference Include=\"AWSSDK.Lambda\" Version=\"3.7.9.3\" />\r\n    <PackageReference Include=\"AWSSDK.SimpleNotificationService\" Version=\"3.7.3.31\" />\r\n  </ItemGroup>\r\n  <ItemGroup>\r\n    <ProjectReference Include=\"..\\Genome\\Genome.csproj\" />\r\n    <ProjectReference Include=\"..\\Cloud\\Cloud.csproj\" />\r\n    <ProjectReference Include=\"..\\IO\\IO.csproj\" />\r\n    <ProjectReference Include=\"..\\Compression\\Compression.csproj\" />\r\n    <ProjectReference Include=\"..\\Tabix\\Tabix.csproj\" />\r\n    <ProjectReference Include=\"..\\VariantAnnotation\\VariantAnnotation.csproj\" />\r\n  </ItemGroup>\r\n  <Import Project=\"..\\CommonAssemblyInfo.props\" />\r\n</Project>\r\n"
  },
  {
    "path": "NirvanaLambda/PartitionUtilities.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing System.IO.Compression;\r\nusing System.Linq;\r\nusing Cloud.Messages.Annotation;\r\nusing Compression.FileHandling;\r\nusing Genome;\r\nusing Intervals;\r\nusing IO;\r\nusing Tabix;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.Interface.IO;\r\n\r\nnamespace NirvanaLambda\r\n{\r\n    public static class PartitionUtilities\r\n    {\r\n        public static List<long> GetFileOffsets(string vcfUrl, int numPartitions, Tabix.Index tabixIndex)\r\n        {\r\n            long fileSize = HttpUtilities.GetLength(vcfUrl);\r\n            long[] sizeBasedOffsets = GetEqualSizeOffsets(fileSize, numPartitions);\r\n            return GetBlockOffsets(sizeBasedOffsets, tabixIndex);\r\n        }\r\n\r\n        private static List<long> GetBlockOffsets(long[] sizeBasedOffsets, Tabix.Index tabixIndex)\r\n        {\r\n            long[] allLinearOffsets = GetAllLinearFileOffsets(tabixIndex);\r\n\r\n            return FindEqualOrClosestSmallerOffsets(sizeBasedOffsets, allLinearOffsets);\r\n        }\r\n\r\n        internal static List<long> FindEqualOrClosestSmallerOffsets(long[] sizeBasedOffsets, long[] allLinearOffsets)\r\n        {\r\n            if (sizeBasedOffsets == null || allLinearOffsets == null) return new List<long>();\r\n\r\n            var closestOffsets = new List<long>();\r\n            var startIndex = 0;\r\n\r\n            foreach (long offset in sizeBasedOffsets)\r\n            {\r\n                int searchedIndex = Array.BinarySearch(allLinearOffsets, startIndex, allLinearOffsets.Length - startIndex, offset);\r\n                if (searchedIndex < 0) searchedIndex = ~searchedIndex - 1;\r\n                if (searchedIndex < 0) searchedIndex = 0;\r\n\r\n                // only add new offset if it is different from the last one in the list\r\n                if (closestOffsets.Count == 0 || startIndex != searchedIndex) closestOffsets.Add(allLinearOffsets[searchedIndex]);\r\n                startIndex = searchedIndex;\r\n            }\r\n\r\n            return closestOffsets;\r\n        }\r\n\r\n        internal static long[] GetEqualSizeOffsets(long fileSize, int numPartitions)\r\n        {\r\n            var offsets = new long[numPartitions];\r\n            long baseSize = fileSize / numPartitions;\r\n\r\n            //put all the extra {fileSize%numPartitions} bytes to the last partition\r\n            for (var i = 0; i < numPartitions; i++) offsets[i] = baseSize * i;\r\n\r\n            return offsets;\r\n        }\r\n\r\n        private static long[] GetAllLinearFileOffsets(Tabix.Index tabixIndex)\r\n        {\r\n            if (tabixIndex.ReferenceSequences.Length == 0) return new long[] {0};\r\n            return MergeConsecutiveEqualValues(\r\n                tabixIndex.ReferenceSequences.SelectMany(x => x.LinearFileOffsets.Select(y => VirtualPosition.From((long) y).FileOffset))).ToArray();\r\n        }\r\n\r\n        public static IEnumerable<T> MergeConsecutiveEqualValues<T>(IEnumerable<T> values)\r\n        {\r\n            var isFirstValue = true;\r\n            T lastValue = default;\r\n            foreach (var value in values)\r\n            {\r\n                if (!isFirstValue && lastValue.Equals(value)) continue;\r\n\r\n                isFirstValue = false;\r\n                lastValue = value;\r\n                yield return value;\r\n            }\r\n        }\r\n\r\n        public static AnnotationRange[] GenerateAnnotationRanges(List<long> blockBasedOffsets, string vcfUrl,\r\n            IntervalForest<IGene> geneIntervalForest, Dictionary<string, Chromosome> refNameToChromosome)\r\n        {\r\n            // There may be less intervals for annotation Lambda after the adjustment\r\n            AnnotationPosition[] adjustedStarts = AdjustPartitionGenomicStarts(blockBasedOffsets, vcfUrl, geneIntervalForest, refNameToChromosome);\r\n\r\n            return GetRanges(adjustedStarts);\r\n        }\r\n\r\n\r\n        private static AnnotationPosition[] AdjustPartitionGenomicStarts(IReadOnlyList<long> blockBasedOffsets, string vcfUrl,\r\n            IIntervalForest<IGene> geneIntervalForest, Dictionary<string, Chromosome> refNameToChromosome)\r\n        {\r\n            var allAdjustedStarts = new AnnotationPosition[blockBasedOffsets.Count];\r\n\r\n            for (var index = 0; index < blockBasedOffsets.Count; index++)\r\n            {\r\n                long blockBasedOffset = blockBasedOffsets[index];\r\n\r\n                using (var stream     = PersistentStreamUtils.GetReadStream(vcfUrl, blockBasedOffset))\r\n                using (var gzipStream = new BlockGZipStream(stream, CompressionMode.Decompress))\r\n                {\r\n                    var annotationPosition   = GetFirstGenomicPosition(gzipStream, index == 0);\r\n                    allAdjustedStarts[index] = FindProperStartPosition(annotationPosition, geneIntervalForest, refNameToChromosome);\r\n                }\r\n            }\r\n\r\n            AnnotationPosition[] adjustedStarts = MergeConsecutiveEqualValues(allAdjustedStarts).ToArray();\r\n            return adjustedStarts;\r\n        }\r\n\r\n        private static AnnotationRange[] GetRanges(AnnotationPosition[] adjustedStarts)\r\n        {\r\n            int numStarts = adjustedStarts.Length;\r\n            int lastIndex = numStarts - 1;\r\n\r\n            if (numStarts == 1) return null;\r\n\r\n            var ranges = new AnnotationRange[numStarts];\r\n            \r\n            for (var i = 0; i < lastIndex; i++)\r\n                //The end position in an annotation range can be smaller than 1, which indicate it ends at the end of previous chromosome\r\n                ranges[i] = new AnnotationRange(adjustedStarts[i], new AnnotationPosition(adjustedStarts[i + 1].Chromosome, adjustedStarts[i + 1].Position - 1));\r\n\r\n            ranges[lastIndex] = new AnnotationRange(adjustedStarts[lastIndex], null);\r\n\r\n            return ranges;\r\n        }\r\n\r\n        private static AnnotationPosition GetFirstGenomicPosition(Stream vcfStream, bool isFirstBlock)\r\n        {\r\n            if (vcfStream == null) throw new ArgumentNullException(nameof(vcfStream),\"The VCF stream trying to read is null.\");\r\n\r\n            using (var streamReader = new StreamReader(vcfStream))\r\n            {\r\n                // Discard the first line if this is not the first block, as it may be a partial VCF line\r\n                if (!isFirstBlock) streamReader.ReadLine();\r\n\r\n                string line;\r\n                while ((line = streamReader.ReadLine()) != null)\r\n                {\r\n                    if (line.StartsWith('#')) continue;\r\n                    string[] splits = line.Split('\\t', 3);\r\n                    if (splits.Length < 3) continue;\r\n                    string chrom = splits[VcfCommon.ChromIndex];\r\n                    string positionString = splits[VcfCommon.PosIndex];\r\n                    if (!int.TryParse(positionString, out int position)) throw new InvalidDataException($\"Position {positionString} in VCF line {line} is not a number.\");\r\n\r\n                    return new AnnotationPosition(chrom, position);\r\n                }\r\n\r\n                throw new InvalidDataException(\"No variant found in the VCF stream.\");\r\n            }\r\n        }\r\n\r\n        private static AnnotationPosition FindProperStartPosition(AnnotationPosition genomicPosition, IIntervalForest<IGene> geneIntervalForest, Dictionary<string, Chromosome> refNameToChromosome)\r\n        {\r\n            var chromosome = ReferenceNameUtilities.GetChromosome(refNameToChromosome, genomicPosition.Chromosome);\r\n\r\n            int currentPosition = genomicPosition.Position;\r\n            IGene[] overlappingGenes;\r\n            while ((overlappingGenes = geneIntervalForest.GetAllOverlappingValues(chromosome.Index,\r\n                       currentPosition, currentPosition)) != null)\r\n            {\r\n                if (overlappingGenes.Length > 0) currentPosition = overlappingGenes.Select(x => x.Start).Min() - 1;\r\n            }\r\n\r\n            // Always return the position right before the overlapping genes to KISS\r\n            return new AnnotationPosition(genomicPosition.Chromosome, currentPosition < 1 ? 1 : currentPosition);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "OptimizedCore/ExpandableArray.cs",
    "content": "using System.Buffers;\n\nnamespace OptimizedCore\n{\n    public static class ExpandableArray<T>\n    {\n        public static T[] Get(int size)\n        {\n            var pool = ArrayPool<T>.Shared;\n            return pool.Rent(size);\n        }\n\n        public static T[] Resize(T[] array, int newSize)\n        {\n            var pool = ArrayPool<T>.Shared;\n            pool.Return(array);\n\n            return pool.Rent(newSize);\n        }\n\n        public static void Return(T[] array)\n        {\n            var pool = ArrayPool<T>.Shared;\n            pool.Return(array);\n        }\n    }\n}"
  },
  {
    "path": "OptimizedCore/NullSequenceEqual.cs",
    "content": "﻿namespace OptimizedCore\n{\n    public static class NullSequenceEqual\n    {\n        public static bool ArrayEqual<T>(this T[] first, T[] second)\n        {\n            if (ReferenceEquals(first, second)) return true;\n            if (first == null || second == null) return false;\n\n            if (first.Length != second.Length) return false;\n\n            for (var i = 0; i < first.Length; i++)\n                if (!first[i].Equals(second[i]))\n                    return false;\n\n            return true;\n        }\n    }\n}"
  },
  {
    "path": "OptimizedCore/OptimizedCore.csproj",
    "content": "<Project Sdk=\"Microsoft.NET.Sdk\">\r\n  <PropertyGroup>\r\n    <TargetFramework>net6.0</TargetFramework>\r\n    <OutputPath>..\\bin\\$(Configuration)</OutputPath>\r\n    \r\n    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>\r\n  </PropertyGroup>\r\n  <ItemGroup>\r\n    <PackageReference Include=\"Microsoft.Extensions.ObjectPool\" Version=\"6.0.2\" />\r\n  </ItemGroup>\r\n  <Import Project=\"..\\CommonAssemblyInfo.props\" />\r\n</Project>\r\n"
  },
  {
    "path": "OptimizedCore/StringBuilderPool.cs",
    "content": "using System.Text;\nusing Microsoft.Extensions.ObjectPool;\n\nnamespace OptimizedCore\n{\n    public static class StringBuilderPool\n    {\n        private static readonly ObjectPool<StringBuilder> Pool = new DefaultObjectPool<StringBuilder>(new StringBuilderPooledObjectPolicy(), 1024);\n\n        public static StringBuilder Get()\n        {\n            var sb = Pool.Get();\n            sb.Clear();\n            return sb;\n        }\n\n        public static string GetStringAndReturn(StringBuilder sb)\n        {\n            var s = sb.ToString();\n            Return(sb);\n            return s;\n        }\n\n        public static void Return(StringBuilder sb)\n        {\n            if (sb == null) return;\n            Pool.Return(sb);\n        }\n    }\n}"
  },
  {
    "path": "OptimizedCore/StringExtensions.cs",
    "content": "﻿using System;\r\n\r\nnamespace OptimizedCore\r\n{\r\n    public static class StringExtensions\r\n    {\r\n        public static unsafe string[] OptimizedSplit(this string s, char delimiter, int numColumns = -1)\r\n        {\r\n            var numReplaces = 0;\r\n            int sLen        = s.Length;\r\n            var sepList     = new int[s.Length];\r\n\r\n            // find the locations of our tab delimiter\r\n            fixed (char* chPtr = s)\r\n            {\r\n                for (var index = 0; index < sLen; ++index)\r\n                {\r\n                    if (chPtr[index] == delimiter) sepList[numReplaces++] = index;\r\n                }\r\n            }\r\n\r\n            // extract our columns\r\n            var startIndex = 0;\r\n            var colIndex   = 0;\r\n\r\n            int numDelimitedColumns = numReplaces + 1;\r\n            if (numColumns < numDelimitedColumns) numColumns = numDelimitedColumns;\r\n\r\n            var columns = new string[numColumns];\r\n            for (var index = 0; index < numReplaces && startIndex < sLen; ++index)\r\n            {\r\n                columns[colIndex++] = s.Substring(startIndex, sepList[index] - startIndex);\r\n                startIndex = sepList[index] + 1;\r\n            }\r\n\r\n            // handle the last column\r\n            if (startIndex < sLen && numReplaces >= 0) columns[colIndex] = s.Substring(startIndex);\r\n            else if (colIndex == numReplaces) columns[colIndex] = string.Empty;\r\n\r\n            return columns;\r\n        }\r\n\r\n        public static (string Key, string Value) OptimizedKeyValue(this string s)\r\n        {\r\n            int equalPos = s.IndexOf('=');\r\n            return equalPos == -1 ? (s, null) : (s.Substring(0, equalPos), s.Substring(equalPos + 1));\r\n        }\r\n\r\n        /// <summary>\r\n        /// handles -2_147_483_647 to +2_147_483_647\r\n        /// </summary>\r\n        public static unsafe (int Number, bool FoundError) OptimizedParseInt32(this string s)\r\n        {\r\n            var number = 0;\r\n\r\n            // 2_147_483_647\r\n            if (string.IsNullOrEmpty(s) || s.Length > 11) return (0, true);\r\n\r\n            try\r\n            {\r\n                fixed (char* chPtr = s)\r\n                {\r\n                    int index         = s.Length - 1;\r\n                    var ptr           = chPtr;\r\n                    var applyNegative = false;\r\n\r\n                    if (*ptr == '-')\r\n                    {\r\n                        applyNegative = true;\r\n                        ptr++;\r\n                        index--;\r\n                    }\r\n\r\n                    while (index >= 0)\r\n                    {\r\n                        if (*ptr < 48 || *ptr > 57) return (0, true);\r\n\r\n                        checked\r\n                        {\r\n                            number *= 10;\r\n                            number += *ptr++ - '0';\r\n                        }\r\n\r\n                        index--;\r\n                    }\r\n\r\n                    if (applyNegative) number = -number;\r\n                }\r\n            }\r\n            catch (OverflowException)\r\n            {\r\n                return (0, true);\r\n            }\r\n\r\n            return (number, false);\r\n        }\r\n\r\n        public static bool OptimizedStartsWith(this string s, char ch) => s.Length > 0 && s[0] == ch;\r\n\r\n        public static bool OptimizedEndsWith(this string s, char ch) => s.Length > 0 && s[s.Length - 1] == ch;\r\n    }\r\n}\r\n"
  },
  {
    "path": "Phantom/AssemblyInfo.cs",
    "content": "﻿using System.Runtime.CompilerServices;\n\n[assembly: InternalsVisibleTo(\"UnitTests\")]"
  },
  {
    "path": "Phantom/Phantom.csproj",
    "content": "﻿<Project Sdk=\"Microsoft.NET.Sdk\">\r\n  <PropertyGroup>\r\n    <TargetFramework>net6.0</TargetFramework>\r\n    <OutputPath>..\\bin\\$(Configuration)</OutputPath>\r\n    \r\n  </PropertyGroup>\r\n  <ItemGroup>\r\n    <PackageReference Include=\"Newtonsoft.Json\" Version=\"13.0.1\" />\r\n    <ProjectReference Include=\"..\\OptimizedCore\\OptimizedCore.csproj\" />\r\n    <ProjectReference Include=\"..\\Genome\\Genome.csproj\" />\r\n    <ProjectReference Include=\"..\\VariantAnnotation.Interface\\VariantAnnotation.Interface.csproj\" />\r\n    <ProjectReference Include=\"..\\VariantAnnotation\\VariantAnnotation.csproj\" />\r\n    <ProjectReference Include=\"..\\Vcf\\Vcf.csproj\" />\r\n  </ItemGroup>\r\n  <Import Project=\"..\\CommonAssemblyInfo.props\" />\r\n</Project>\r\n"
  },
  {
    "path": "README.md",
    "content": "> [!IMPORTANT]\n> Nirvana is no longer actively maintained as an open sourced tool. Please visit [Illumina Connected Annotations](https://developer.illumina.com/illumina-connected-annotations) for the latest version. Latest documentation can be found [here](https://illumina.github.io/IlluminaConnectedAnnotationsDocumentation/)\n\nNirvana is licensed under [PolyForm Strict License 1.0.0](https://github.com/Illumina/Nirvana/blob/main/LICENSE) \n# Nirvana\n\nNirvana provides **clinical-grade annotation of genomic variants** (SNVs, MNVs, insertions, deletions, indels, and SVs (including CNVs). It can be run as a stand-alone package or integrated into larger software tools that require variant annotation.\n\nThe input to Nirvana are VCFs and the output is a structured JSON representation of all annotation and sample information (as extracted from the VCF). Optionally, a subset of the annotated data is available in VCF and/or gVCF files. Nirvana handles multiple alternate alleles and multiple samples with ease.\n\nThe software is being developed under a rigorous SDLC and testing process to ensure accuracy of the results and enable embedding in other software with regulatory needs. Nirvana uses a continuous integration pipeline where millions of variant annotations are monitored against baseline values on a daily basis.\n\nBackronym: **NI**mble and **R**obust **VA**riant a**N**not**A**tor\n<br clear=left>\n\n## Resources\n*Please note that our documentation site has moved.*\n* [Documentation](https://illumina.github.io/NirvanaDocumentation/)\n* [Release Notes](https://github.com/Illumina/Nirvana/releases)\n"
  },
  {
    "path": "ReferenceSequence/AssemblyInfo.cs",
    "content": "﻿using System.Runtime.CompilerServices;\n\n[assembly: InternalsVisibleTo(\"UnitTests\")]"
  },
  {
    "path": "ReferenceSequence/Commands/CreateReferenceMain.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing System.IO.Compression;\nusing System.Linq;\nusing CommandLine.Builders;\nusing CommandLine.NDesk.Options;\nusing ErrorHandling;\nusing Genome;\nusing IO;\nusing ReferenceSequence.Common;\nusing ReferenceSequence.Compression;\nusing ReferenceSequence.Creation;\nusing ReferenceSequence.IO;\n\nnamespace ReferenceSequence.Commands\n{\n    public static class CreateReferenceMain\n    {\n        private static string _fastaPrefix;\n        private static string _genomeAssemblyReportPath;\n        private static string _cytogeneticBandPath;\n        private static string _referenceNamesPath;\n        private static string _genomeAssembly;\n        private static string _outputCompressedPath;\n        private static byte _patchLevel;\n\n        private static ExitCodes ProgramExecution()\n        {\n            var genomeAssembly = GenomeAssemblyHelper.Convert(_genomeAssembly);\n\n            Console.Write(\"- loading previous reference names... \");\n            List<Chromosome> oldChromosomes = ReferenceNamesReader.GetReferenceNames(FileUtilities.GetReadStream(_referenceNamesPath));\n            Console.WriteLine(\"finished.\");\n\n            Dictionary<string, Chromosome> oldRefNameToChromosome = ReferenceDictionaryUtils.GetRefNameToChromosome(oldChromosomes);\n\n            Console.Write(\"- reading the genome assembly report... \");\n            List<Chromosome> chromosomes = AssemblyReader.GetChromosomes(FileUtilities.GetReadStream(_genomeAssemblyReportPath), oldRefNameToChromosome, oldChromosomes.Count);\n            int numRefSeqs  = chromosomes.Count;\n            Console.WriteLine($\"{numRefSeqs} references found.\");\n\n            Console.Write(\"- checking reference index contiguity... \");\n            CheckReferenceIndexContiguity(chromosomes, oldChromosomes);\n            Console.WriteLine(\"contiguous.\");\n\n            Dictionary<string, Chromosome> refNameToChromosome = ReferenceDictionaryUtils.GetRefNameToChromosome(chromosomes);\n\n            Console.Write(\"- reading cytogenetic bands... \");\n            List<Band>[] cytogeneticBandsByRef = CytogeneticBandsReader.GetCytogeneticBands(FileUtilities.GetReadStream(_cytogeneticBandPath),\n                    numRefSeqs, refNameToChromosome);\n            Console.WriteLine(\"finished.\");\n\n            Console.WriteLine(\"- reading FASTA files:\");\n            List<FastaSequence> fastaSequences = GetFastaSequences(_fastaPrefix, refNameToChromosome);\n            long genomeLength  = GetGenomeLength(fastaSequences);\n            Console.WriteLine($\"- genome length: {genomeLength:N0}\");\n\n            Console.Write(\"- check if chrY has PAR masking... \");\n            CheckChrYPadding(fastaSequences);\n            Console.WriteLine(\"unmasked.\");\n\n            Console.Write(\"- applying 2-bit compression... \");\n            List<Creation.ReferenceSequence> referenceSequences = CreateReferenceSequences(fastaSequences, cytogeneticBandsByRef);\n            Console.WriteLine(\"finished.\");\n\n            Console.Write(\"- creating reference sequence file... \");\n            CreateReferenceSequenceFile(genomeAssembly, _patchLevel, chromosomes, referenceSequences);\n            long fileSize = new FileInfo(_outputCompressedPath).Length;\n            Console.WriteLine($\"{fileSize:N0} bytes\");\n\n            return ExitCodes.Success;\n        }\n\n        private static long GetGenomeLength(IEnumerable<FastaSequence> fastaSequences) =>\n            fastaSequences.Aggregate<FastaSequence, long>(0, (current, fastaSequence) => current + fastaSequence.Bases.Length);\n\n        private static List<Creation.ReferenceSequence> CreateReferenceSequences(IEnumerable<FastaSequence> fastaSequences, IReadOnlyList<List<Band>> cytogeneticBandsByRef)\n        {\n            var referenceSequences = new List<Creation.ReferenceSequence>();\n\n            foreach (var fastaSequence in fastaSequences)\n            {\n                Band[] cytogeneticBands = cytogeneticBandsByRef[fastaSequence.Chromosome.Index].ToArray();\n                (byte[] buffer, MaskedEntry[] maskedEntries) = TwoBitCompressor.Compress(fastaSequence.Bases);\n                var referenceSequence = new Creation.ReferenceSequence(buffer, maskedEntries,\n                    cytogeneticBands, 0, fastaSequence.Bases.Length);\n                referenceSequences.Add(referenceSequence);\n            }\n\n            return referenceSequences;\n        }\n\n        private static void CheckChrYPadding(IEnumerable<FastaSequence> fastaSequences)\n        {\n            FastaSequence chrY = fastaSequences.FirstOrDefault(s => s.Chromosome.UcscName == \"chrY\");\n\n            if (chrY == null) return;\n\n            int numN = CountNs(chrY.Bases);\n\n            if (numN > 33720001)\n            {\n                throw new InvalidDataException($\"Found a large number of Ns ({numN}) in the Y chromosome. Are you sure the PAR region is unmasked?\");\n            }\n        }\n\n        private static List<FastaSequence> GetFastaSequences(string fastaPrefix, Dictionary<string, Chromosome> refNameToChromosome)\n        {\n            string directory = Path.GetDirectoryName(fastaPrefix);\n            string prefix    = Path.GetFileName(fastaPrefix);\n            string[] fastaFiles   = Directory.GetFiles(directory, $\"{prefix}*.fa.gz\");\n\n            var references = new List<FastaSequence>();\n\n            foreach (string filePath in fastaFiles)\n            {\n                Console.Write($\"  - parsing {Path.GetFileName(filePath)}... \");\n                FastaReader.AddReferenceSequences(new GZipStream(FileUtilities.GetReadStream(filePath), CompressionMode.Decompress), refNameToChromosome, references);\n                Console.WriteLine($\"total: {references.Count} sequences\");\n            }\n\n            return references.OrderBy(x => x.Chromosome.Index).ToList();\n        }\n\n        private static void CheckReferenceIndexContiguity(IEnumerable<Chromosome> chromosomes, IReadOnlyList<Chromosome> oldChromosomes)\n        {\n            ushort testRefIndex = 0;\n\n            foreach (var chromosome in chromosomes)\n            {\n                if (chromosome.Index != testRefIndex)\n                {\n                    Console.WriteLine($\"Found a non-contiguous entry at test refIndex: {testRefIndex} vs chromosome.Index: {chromosome.Index}\");\n                    Console.WriteLine($\"NEW: RefIndex: {chromosome.Index}, Ensembl: {chromosome.EnsemblName}, UCSC: {chromosome.UcscName}, GenBank: {chromosome.GenBankAccession}, RefSeq: {chromosome.RefSeqAccession}\");\n                    Console.WriteLine($\"OLD: RefIndex: {oldChromosomes[testRefIndex].Index}, Ensembl: {oldChromosomes[testRefIndex].EnsemblName}, UCSC: {oldChromosomes[testRefIndex].UcscName}, GenBank: {oldChromosomes[testRefIndex].GenBankAccession}, RefSeq: {oldChromosomes[testRefIndex].RefSeqAccession}\");\n                    Environment.Exit(1);\n                }\n\n                testRefIndex++;\n            }\n        }\n\n        private static void CreateReferenceSequenceFile(GenomeAssembly genomeAssembly, byte patchLevel,\n            IReadOnlyCollection<Chromosome> chromosomes, List<Creation.ReferenceSequence> referenceSequences)\n        {\n            using (var writer = new ReferenceSequenceWriter(FileUtilities.GetCreateStream(_outputCompressedPath),\n                chromosomes, genomeAssembly, patchLevel))\n            {\n                writer.Write(referenceSequences);\n            }\n        }\n\n        private static int CountNs(string s)\n        {\n            var numN = 0;\n            foreach (char c in s) if (c == 'N') numN++;\n            return numN;\n        }\n\n        public static ExitCodes Run(string command, string[] args)\n        {\n            var ops = new OptionSet\n            {\n                {\n                    \"cb|c=\",\n                    \"cytogenetic band {filename}\",\n                    v => _cytogeneticBandPath = v\n                },\n                {\n                    \"ga=\",\n                    \"genome assembly {version}\",\n                    v => _genomeAssembly = v\n                },\n                {\n                    \"gar|g=\",\n                    \"genome assembly report {filename}\",\n                    v => _genomeAssemblyReportPath = v\n                },\n                {\n                    \"in|i=\",\n                    \"FASTA {prefix}\",\n                    v => _fastaPrefix = v\n                },\n                {\n                    \"patch=\",\n                    \"patch {level}\",\n                    (byte v) => _patchLevel = v\n                },\n                {\n                    \"rn=\",\n                    \"reference names {filename}\",\n                    v => _referenceNamesPath = v\n                },\n                {\n                    \"out|o=\",\n                    \"output compressed reference {filename}\",\n                    v => _outputCompressedPath = v\n                }\n            };\n\n            string commandLineExample = $\"{command} --in <prefix> --gar <path> --cb <path> --rn <path> --ga <genome assembly> --out <path>\";\n\n            return new ConsoleAppBuilder(args, ops)\n                .Parse()\n                .CheckInputFilenameExists(_genomeAssemblyReportPath, \"genome assembly report\", \"--gar\")\n                .CheckInputFilenameExists(_cytogeneticBandPath, \"cytogenetic band\", \"--cb\")\n                .CheckInputFilenameExists(_referenceNamesPath, \"reference names\", \"--rn\")\n                .HasRequiredParameter(_fastaPrefix, \"FASTA prefix\", \"--in\")\n                .HasRequiredParameter(_genomeAssembly, \"genome assembly\", \"--ga\")\n                .HasRequiredParameter(_patchLevel, \"patch level\", \"--patch\")\n                .HasRequiredParameter(_outputCompressedPath, \"output reference\", \"--out\")\n                .SkipBanner()\n                .ShowHelpMenu(\"Converts a FASTA file to the Nirvana reference format.\", commandLineExample)\n                .ShowErrors()\n                .Execute(ProgramExecution);\n        }\n    }\n}\n"
  },
  {
    "path": "ReferenceSequence/Commands/CreateSubstringMain.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing System.IO.Compression;\nusing System.Linq;\nusing CommandLine.Builders;\nusing CommandLine.NDesk.Options;\nusing ErrorHandling;\nusing Genome;\nusing IO;\nusing ReferenceSequence.Common;\nusing ReferenceSequence.Compression;\nusing ReferenceSequence.Creation;\nusing ReferenceSequence.IO;\n\nnamespace ReferenceSequence.Commands\n{\n    public static class CreateSubstringMain\n    {\n        private static string _fastaPath;\n        private static string _genomeAssemblyReportPath;\n        private static string _cytogeneticBandPath;\n        \n        private static string _genomeAssembly;\n        private static string _outputCompressedPath;\n\n        private static int _beginPosition;\n        private static int _endPosition;\n\n        private static ExitCodes ProgramExecution()\n        {\n            var genomeAssembly = GenomeAssemblyHelper.Convert(_genomeAssembly);\n\n            Console.Write(\"- reading the genome assembly report... \");\n            var dummyRefNameToChromosome = new Dictionary<string, Chromosome>();\n            List<Chromosome> chromosomes = AssemblyReader.GetChromosomes(FileUtilities.GetReadStream(_genomeAssemblyReportPath), dummyRefNameToChromosome, 0);\n            int numRefSeqs  = chromosomes.Count;\n            Console.WriteLine($\"{numRefSeqs} references found.\");\n\n            Dictionary<string, Chromosome> refNameToChromosome = ReferenceDictionaryUtils.GetRefNameToChromosome(chromosomes);\n\n            Console.Write(\"- reading FASTA file... \");\n            var fastaSequence = GetFastaSequence(_fastaPath, refNameToChromosome);\n            Console.WriteLine($\"- sequence length: {fastaSequence.Bases.Length:N0}\");\n\n            Console.Write(\"- reading cytogenetic bands... \");\n            List<Band> cytogeneticBands = GetCytogeneticBands(fastaSequence.Chromosome.Index, numRefSeqs, refNameToChromosome);\n            Console.WriteLine(\"finished.\");\n\n            Console.Write(\"- applying 2-bit compression... \");\n            var referenceSequence = CreateReferenceSequence(fastaSequence, cytogeneticBands);\n            Console.WriteLine(\"finished.\");\n\n            Console.Write(\"- creating reference sequence file... \");\n            var minimalChromosomes = new List<Chromosome> { fastaSequence.Chromosome };\n            CreateReferenceSequenceFile(genomeAssembly, minimalChromosomes, referenceSequence);\n            long fileSize = new FileInfo(_outputCompressedPath).Length;\n            Console.WriteLine($\"{fileSize:N0} bytes\");\n\n            return ExitCodes.Success;\n        }\n\n        private static List<Band> GetCytogeneticBands(ushort refIndex, int numRefSeqs, Dictionary<string, Chromosome> refNameToChromosome)\n        {\n            List<Band> chrBands = CytogeneticBandsReader.GetCytogeneticBands(FileUtilities.GetReadStream(_cytogeneticBandPath), numRefSeqs, refNameToChromosome)[refIndex];\n\n            int substringBegin = _beginPosition;\n            int substringEnd   = _beginPosition + _endPosition - 1;\n\n            return chrBands.Where(band => Intervals.Utilities.Overlaps(substringBegin, substringEnd, band.Begin, band.End))\n                .ToList();\n        }\n\n        private static void CreateReferenceSequenceFile(GenomeAssembly genomeAssembly, IReadOnlyCollection<Chromosome> chromosomes, Creation.ReferenceSequence referenceSequence)\n        {\n            using (var writer = new ReferenceSequenceWriter(FileUtilities.GetCreateStream(_outputCompressedPath),\n                chromosomes, genomeAssembly, 0))\n            {\n                writer.Write(new List<Creation.ReferenceSequence> {referenceSequence});\n            }\n        }\n\n        private static Creation.ReferenceSequence CreateReferenceSequence(FastaSequence fastaSequence, List<Band> cytogeneticBands)\n        {\n            Band[] bands = cytogeneticBands.ToArray();\n            (byte[] buffer, MaskedEntry[] maskedEntries) = TwoBitCompressor.Compress(fastaSequence.Bases);\n            return new Creation.ReferenceSequence(buffer, maskedEntries, bands, _beginPosition - 1, fastaSequence.Bases.Length);\n        }\n\n        private static FastaSequence GetFastaSequence(string fastaPath, Dictionary<string, Chromosome> refNameToChromosome)\n        {\n            var references = new List<FastaSequence>();\n            FastaReader.AddReferenceSequences(new GZipStream(FileUtilities.GetReadStream(fastaPath), CompressionMode.Decompress), refNameToChromosome, references);\n\n            if (references.Count != 1)\n            {\n                throw new InvalidDataException($\"Expected 1 reference, but found {references.Count} references.\");\n            }\n\n            var reference    = references[0];\n            int length       = _endPosition - _beginPosition + 1;\n            string substring = reference.Bases.Substring(_beginPosition - 1, length);\n\n            return new FastaSequence(reference.Chromosome, substring);\n        }\n\n        public static ExitCodes Run(string command, string[] args)\n        {\n            var ops = new OptionSet\n            {\n                {\n                    \"begin=\",\n                    \"begin {position}\",\n                    (int v) => _beginPosition = v\n                },\n                {\n                    \"cb|c=\",\n                    \"cytogenetic band {filename}\",\n                    v => _cytogeneticBandPath = v\n                },\n                {\n                    \"end=\",\n                    \"end {position}\",\n                    (int v) => _endPosition = v\n                },\n                {\n                    \"ga=\",\n                    \"genome assembly {version}\",\n                    v => _genomeAssembly = v\n                },\n                {\n                    \"gar|g=\",\n                    \"genome assembly report {filename}\",\n                    v => _genomeAssemblyReportPath = v\n                },\n                {\n                    \"in|i=\",\n                    \"FASTA {filename}\",\n                    v => _fastaPath = v\n                },\n                {\n                    \"out|o=\",\n                    \"output compressed reference {filename}\",\n                    v => _outputCompressedPath = v\n                }\n            };\n\n            string commandLineExample = $\"{command} --in <path> --gar <path> --cb <path> --rn <path> --ga <genome assembly> --out <path>\";\n\n            return new ConsoleAppBuilder(args, ops)\n                .Parse()\n                .CheckInputFilenameExists(_genomeAssemblyReportPath, \"genome assembly report\", \"--gar\")\n                .CheckInputFilenameExists(_cytogeneticBandPath, \"cytogenetic band\", \"--cb\")\n                .HasRequiredParameter(_fastaPath, \"FASTA prefix\", \"--in\")\n                .HasRequiredParameter(_genomeAssembly, \"genome assembly\", \"--ga\")\n                .HasRequiredParameter(_outputCompressedPath, \"output reference\", \"--out\")\n                .HasRequiredParameter(_beginPosition, \"offset\", \"--begin\")\n                .HasRequiredParameter(_endPosition, \"length\", \"--end\")\n                .SkipBanner()\n                .ShowHelpMenu(\"Converts a FASTA file to the Nirvana reference format.\", commandLineExample)\n                .ShowErrors()\n                .Execute(ProgramExecution);\n        }\n    }\n}\n"
  },
  {
    "path": "ReferenceSequence/Commands/CreateTestSeqMain.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing CommandLine.Builders;\nusing CommandLine.NDesk.Options;\nusing ErrorHandling;\nusing Genome;\nusing IO;\nusing ReferenceSequence.Common;\nusing ReferenceSequence.Compression;\nusing ReferenceSequence.Creation;\nusing ReferenceSequence.IO;\n\nnamespace ReferenceSequence.Commands\n{\n    public static class CreateTestSeqMain\n    {\n        private static string _outputCompressedPath;\n\n        private static ExitCodes ProgramExecution()\n        {\n            var testSeqChromosome = new Chromosome(\"chrTestSeq\", \"TestSeq\", null, null, 1, 0);\n            var chromosomes       = new List<Chromosome> {testSeqChromosome};\n            \n            Console.Write(\"- creating FASTA sequence... \");\n            var fastaSequence = new FastaSequence(testSeqChromosome, \"NNATGTTTCCACTTTCTCCTCATTAGANNNTAACGAATGGGTGATTTCCCTAN\");\n            Console.WriteLine($\"- sequence length: {fastaSequence.Bases.Length:N0}\");\n\n            Console.Write(\"- applying 2-bit compression... \");\n            var referenceSequence = CreateReferenceSequence(fastaSequence);\n            Console.WriteLine(\"finished.\");\n\n            Console.Write(\"- creating reference sequence file... \");\n            CreateReferenceSequenceFile(GenomeAssembly.GRCh37, chromosomes, referenceSequence);\n            long fileSize = new FileInfo(_outputCompressedPath).Length;\n            Console.WriteLine($\"{fileSize:N0} bytes\");\n\n            return ExitCodes.Success;\n        }\n\n        private static void CreateReferenceSequenceFile(GenomeAssembly genomeAssembly, IReadOnlyCollection<Chromosome> chromosomes, Creation.ReferenceSequence referenceSequence)\n        {\n            using (var writer = new ReferenceSequenceWriter(FileUtilities.GetCreateStream(_outputCompressedPath),\n                chromosomes, genomeAssembly, 0))\n            {\n                writer.Write(new List<Creation.ReferenceSequence> { referenceSequence });\n            }\n        }\n\n        private static Creation.ReferenceSequence CreateReferenceSequence(FastaSequence fastaSequence)\n        {\n            (byte[] buffer, MaskedEntry[] maskedEntries) = TwoBitCompressor.Compress(fastaSequence.Bases);\n            return new Creation.ReferenceSequence(buffer, maskedEntries, new Band[0], 0, fastaSequence.Bases.Length);\n        }\n\n        public static ExitCodes Run(string command, string[] args)\n        {\n            var ops = new OptionSet\n            {\n                {\n                    \"out|o=\",\n                    \"output compressed reference {filename}\",\n                    v => _outputCompressedPath = v\n                }\n            };\n\n            string commandLineExample = $\"{command} --out <prefix>\";\n\n            return new ConsoleAppBuilder(args, ops)\n                .Parse()\n                .HasRequiredParameter(_outputCompressedPath, \"output reference\", \"--out\")\n                .SkipBanner()\n                .ShowHelpMenu(\"Creates a TestSeq_reference.dat file.\", commandLineExample)\n                .ShowErrors()\n                .Execute(ProgramExecution);\n        }\n    }\n}\n"
  },
  {
    "path": "ReferenceSequence/Common/IndexEntry.cs",
    "content": "﻿namespace ReferenceSequence.Common\n{\n    internal sealed class IndexEntry\n    {\n        public readonly ushort RefIndex;\n        public readonly long   FileOffset;\n\n        public const int Size = 10;\n\n        internal IndexEntry(ushort refIndex, long fileOffset)\n        {\n            RefIndex   = refIndex;\n            FileOffset = fileOffset;\n        }\n    }\n}"
  },
  {
    "path": "ReferenceSequence/Common/MaskedEntry.cs",
    "content": "﻿namespace ReferenceSequence.Common\n{\n    internal sealed class MaskedEntry\n    {\n        public readonly int Begin;\n        public readonly int End;\n\n        internal MaskedEntry(int begin, int end)\n        {\n            Begin = begin;\n            End   = end;\n        }\n    }\n}"
  },
  {
    "path": "ReferenceSequence/Common/Sequence.cs",
    "content": "﻿using Genome;\nusing Intervals;\nusing ReferenceSequence.IO;\n\nnamespace ReferenceSequence.Common\n{\n    public sealed class Sequence : ISequence\n    {\n        public int Length { get; private set; }\n        public Band[] CytogeneticBands { get; private set; }\n        public GenomeAssembly Assembly { get; set; }\n        \n        private int _sequenceOffset;\n        private byte[] _buffer;\n        private char[] _decompressBuffer;\n\n        private IIntervalSearch<MaskedEntry> _maskedIntervalSearch;\n        \n        private readonly char[] _convertNumberToBase;\n        private bool _useNSequence;\n\n        public Sequence()\n        {\n            const string bases   = \"GCTA\";\n            _convertNumberToBase = bases.ToCharArray();\n            _decompressBuffer    = new char[1024];\n        }\n\n        private static (int BaseIndex, int Shift) GetBaseIndexAndShift(int referencePosition)\n        {\n            int refPos    = referencePosition + 1;\n            var baseIndex = (int)(refPos / 4.0);\n            int shift     = (3 - refPos % 4) * 2;\n            return (baseIndex, shift);\n        }\n\n        internal static int GetNumBufferBytes(int numBases) =>\n            (int)((double)numBases / ReferenceSequenceCommon.NumBasesPerByte + 1);\n\n        public void EnableNSequence() => _useNSequence = true;\n        \n        internal void Set(int length, int sequenceOffset, byte[] twoBitBuffer,\n            IntervalArray<MaskedEntry> maskedEntryIntervalArray, Band[] cytogeneticBands)\n        {\n            Length                = length;\n            _buffer               = twoBitBuffer;\n            _maskedIntervalSearch = maskedEntryIntervalArray;\n            _sequenceOffset       = sequenceOffset;\n            CytogeneticBands      = cytogeneticBands;\n            _useNSequence         = false;\n        }\n\n        public string Substring(int offset, int length)\n        {\n            if (_useNSequence) return new string('N', length);\n\n            offset -= _sequenceOffset;\n\n            // handle negative offsets and lengths\n            if (offset < 0 || length < 1 || offset >= Length) return null;\n\n            // sanity check: avoid going past the end of the sequence\n            if (offset + length > Length) length = Length - offset;\n\n            // allocate more memory if needed\n            if (length > _decompressBuffer.Length) _decompressBuffer = new char[length];\n\n            // set the initial state of the buffer\n            (int bufferIndex, int bufferShift) = GetBaseIndexAndShift(offset - 1);\n            byte currentBufferSeed = _buffer[bufferIndex];\n\n            // get the overlapping masked interval\n            MaskedEntry[] maskedEntries = _maskedIntervalSearch.GetAllOverlappingValues(offset, offset + length - 1);\n\n            // get the first masked interval\n            var  currentOffset      = 0;\n            bool hasMaskedIntervals = maskedEntries != null;\n            int  numIntervals       = maskedEntries?.Length ?? 0;\n            var  currentMaskedEntry = hasMaskedIntervals ? maskedEntries[0] : null;\n\n            for (var baseIndex = 0; baseIndex < length; baseIndex++)\n            {\n                int currentPosition = offset + baseIndex;\n\n                if (hasMaskedIntervals && currentPosition >= currentMaskedEntry.Begin && currentPosition <= currentMaskedEntry.End)\n                {\n                    int numMaskedBases = MaskBases(offset, length, baseIndex, currentMaskedEntry);\n                    baseIndex += numMaskedBases - 1;\n\n                    (bufferIndex, bufferShift) = GetBaseIndexAndShift(offset + baseIndex);\n                    currentBufferSeed = _buffer[bufferIndex];\n\n                    currentOffset++;\n                    hasMaskedIntervals = currentOffset < numIntervals;\n                    currentMaskedEntry    = hasMaskedIntervals ? maskedEntries[currentOffset] : null;\n\n                    continue;\n                }\n\n                // evaluate normal bases\n                _decompressBuffer[baseIndex] = _convertNumberToBase[(currentBufferSeed >> bufferShift) & 3];\n\n                bufferShift -= 2;\n\n                if (bufferShift < 0)\n                {\n                    bufferShift = CompressedSequenceReader.MaxShift;\n                    bufferIndex++;\n                    currentBufferSeed = _buffer[bufferIndex];\n                }\n            }\n\n            return new string(_decompressBuffer, 0, length);\n        }\n\n        private int MaskBases(int offset, int length, int baseIndex, MaskedEntry currentInterval)\n        {\n            var numBasesMasked = 0;\n            for (; baseIndex <= currentInterval.End - offset && baseIndex < length; baseIndex++, numBasesMasked++)\n                _decompressBuffer[baseIndex] = 'N';\n            return numBasesMasked;\n        }\n    }\n}"
  },
  {
    "path": "ReferenceSequence/Compression/TwoBitCompressor.cs",
    "content": "﻿using System.Collections.Generic;\nusing ReferenceSequence.Common;\n\nnamespace ReferenceSequence.Compression\n{\n    internal static class TwoBitCompressor\n    {\n        private static readonly byte[] ConvertBaseToNumber = new byte[256];\n        private const string Bases = \"GCTA\";\n\n        static TwoBitCompressor()\n        {\n            for (var index = 0; index < 256; ++index)\n                ConvertBaseToNumber[index] = 10;\n\n            for (var index = 0; index < Bases.Length; ++index)\n            {\n                ConvertBaseToNumber[Bases[index]] = (byte)index;\n                ConvertBaseToNumber[char.ToLower(Bases[index])] = (byte)index;\n            }\n        }\n\n        private static int GetNumBufferBytes(int numBases) => (int)(numBases / 4.0 + 1.0);\n\n        public static (byte[] Buffer, MaskedEntry[] MaskedEntries) Compress(string bases)\n        {\n            int numBufferBases = GetNumBufferBytes(bases.Length);\n            var buffer         = new byte[numBufferBases];\n\n            byte num1  = 0;\n            var index1 = 0;\n            var num2   = 0;\n\n            foreach (char index2 in bases)\n            {\n                byte num3 = ConvertBaseToNumber[index2];\n                if (num3 == 10) num3 = 0;\n                num1 = (byte)((uint)num1 << 2 | num3);\n                ++num2;\n\n                if (num2 != 4) continue;\n\n                buffer[index1] = num1;\n                num1 = 0;\n                num2 = 0;\n                ++index1;\n            }\n\n            if (num2 != 0) buffer[index1] = (byte)((uint)num1 << (4 - num2) * 2);\n\n            var maskedEntries = new List<MaskedEntry>();\n\n            for (var index2 = 0; index2 < bases.Length; ++index2)\n            {\n                if (bases[index2] != 'N') continue;\n\n                int begin = index2;\n                int end   = index2;\n\n                for (++index2; index2 < bases.Length && bases[index2] == 'N'; ++index2) end = index2;\n\n                maskedEntries.Add(new MaskedEntry(begin, end));\n            }\n\n            return (buffer, maskedEntries.ToArray());\n        }\n    }\n}"
  },
  {
    "path": "ReferenceSequence/Creation/FastaSequence.cs",
    "content": "﻿using Genome;\n\nnamespace ReferenceSequence.Creation\n{\n    internal sealed class FastaSequence\n    {\n        public readonly Chromosome Chromosome;\n        public readonly string Bases;\n\n        internal FastaSequence(Chromosome chromosome, string bases)\n        {\n            Chromosome = chromosome;\n            Bases      = bases;\n        }\n    }\n}\n"
  },
  {
    "path": "ReferenceSequence/Creation/ReferenceBuffer.cs",
    "content": "namespace ReferenceSequence.Creation\n{\n    public sealed class ReferenceBuffer\n    {\n        public readonly ushort RefIndex;\n        public readonly byte[] Buffer;\n        public readonly int    BufferSize;\n\n        public ReferenceBuffer(ushort refIndex, byte[] buffer, int bufferSize)\n        {\n            RefIndex   = refIndex;\n            Buffer     = buffer;\n            BufferSize = bufferSize;\n        }\n    }\n}"
  },
  {
    "path": "ReferenceSequence/Creation/ReferenceDictionaryUtils.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.IO;\nusing Genome;\n\nnamespace ReferenceSequence.Creation\n{\n    internal static class ReferenceDictionaryUtils\n    {\n        internal static Dictionary<string, Chromosome> GetRefNameToChromosome(IEnumerable<Chromosome> chromosomes)\n        {\n            var refNameToChromosome = new Dictionary<string, Chromosome>();\n\n            foreach (var chromosome in chromosomes)\n            {\n                bool isUcscEmpty             = string.IsNullOrEmpty(chromosome.UcscName);\n                bool isEnsemblEmpty          = string.IsNullOrEmpty(chromosome.EnsemblName);\n                bool isRefSeqAccessionEmpty  = string.IsNullOrEmpty(chromosome.RefSeqAccession);\n                bool isGenBankAccessionEmpty = string.IsNullOrEmpty(chromosome.GenBankAccession);\n\n                if (isUcscEmpty && isEnsemblEmpty && isRefSeqAccessionEmpty && isGenBankAccessionEmpty)\n                    throw new InvalidDataException(\"Expected at least one chromosome field to be non-empty.\");\n\n                if (!isUcscEmpty)             refNameToChromosome[chromosome.UcscName]         = chromosome;\n                if (!isEnsemblEmpty)          refNameToChromosome[chromosome.EnsemblName]      = chromosome;\n                if (!isRefSeqAccessionEmpty)  refNameToChromosome[chromosome.RefSeqAccession]  = chromosome;\n                if (!isGenBankAccessionEmpty) refNameToChromosome[chromosome.GenBankAccession] = chromosome;\n            }\n\n            return refNameToChromosome;\n        }\n    }\n}\n"
  },
  {
    "path": "ReferenceSequence/Creation/ReferenceSequence.cs",
    "content": "﻿using System.IO;\nusing System.Text;\nusing Genome;\nusing IO;\nusing ReferenceSequence.Common;\n\nnamespace ReferenceSequence.Creation\n{\n    internal sealed class ReferenceSequence\n    {\n        private readonly byte[]        _buffer;\n        private readonly MaskedEntry[] _maskedEntries;\n        private readonly Band[]        _cytogeneticBands;\n        private readonly int           _sequenceOffset;\n        private readonly int           _numBases;\n\n        internal ReferenceSequence(byte[] buffer, MaskedEntry[] maskedEntries, Band[] cytogeneticBands,\n            int sequenceOffset, int numBases)\n        {\n            _buffer           = buffer;\n            _maskedEntries    = maskedEntries;\n            _cytogeneticBands = cytogeneticBands;\n            _sequenceOffset   = sequenceOffset;\n            _numBases         = numBases;\n        }\n\n        internal ReferenceBuffer GetReferenceBuffer(ushort refIndex)\n        {\n            int    bufferSize;\n            byte[] buffer;\n\n            using (var ms = new MemoryStream())\n            {\n                using (var writer = new ExtendedBinaryWriter(ms, Encoding.UTF8, true))\n                {\n                    writer.Write(ReferenceSequenceCommon.ReferenceStartTag);\n                    WriteMetadata(writer);\n                    WriteBuffer(writer);\n                    WriteMaskedEntries(writer);\n                    WriteCytogeneticBands(writer);\n                }\n\n                bufferSize = (int) ms.Position;\n                buffer     = ms.ToArray();\n            }\n\n            return new ReferenceBuffer(refIndex, buffer, bufferSize);\n        }\n\n        private void WriteMetadata(IExtendedBinaryWriter writer)\n        {\n            writer.WriteOpt(_sequenceOffset);\n            writer.WriteOpt(_numBases);\n        }\n\n        private void WriteCytogeneticBands(IExtendedBinaryWriter writer)\n        {\n            writer.WriteOpt(_cytogeneticBands.Length);\n\n            foreach (var band in _cytogeneticBands)\n            {\n                writer.WriteOpt(band.Begin);\n                writer.WriteOpt(band.End);\n                writer.WriteOptAscii(band.Name);\n            }\n        }\n\n        private void WriteMaskedEntries(IExtendedBinaryWriter writer)\n        {\n            writer.WriteOpt(_maskedEntries.Length);\n\n            foreach (var maskedEntry in _maskedEntries)\n            {\n                writer.WriteOpt(maskedEntry.Begin);\n                writer.WriteOpt(maskedEntry.End);\n            }\n        }\n\n        private void WriteBuffer(IExtendedBinaryWriter writer)\n        {\n            writer.WriteOpt(_buffer.Length);\n            writer.Write(_buffer);\n        }\n    }\n}"
  },
  {
    "path": "ReferenceSequence/IO/AssemblyReader.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.IO;\nusing System.Linq;\nusing Genome;\nusing OptimizedCore;\n\nnamespace ReferenceSequence.IO\n{\n    public static class AssemblyReader\n    {\n        private const int EnsemblIndex          = 0;\n        private const int GenBankAccessionIndex = 4;\n        private const int RefSeqAccessionIndex  = 6;\n        private const int LengthIndex           = 8;\n        private const int UcscIndex             = 9;\n\n        public static List<Chromosome> GetChromosomes(Stream stream, Dictionary<string, Chromosome> oldRefNameToChromosome, int oldNumRefSeqs)\n        {\n            var nextRefIndex = (ushort)oldNumRefSeqs;\n            var chromosomes  = new List<Chromosome>();\n\n            using (var reader = new StreamReader(stream))\n            {\n                while (true)\n                {\n                    string line = reader.ReadLine();\n                    if (line == null) break;\n\n                    if (line.OptimizedStartsWith('#')) continue;\n\n                    string[] cols = line.OptimizedSplit('\\t');\n                    string ensemblName      = cols[EnsemblIndex].Sanitize();\n                    string genBankAccession = cols[GenBankAccessionIndex].Sanitize();\n                    string refSeqAccession  = cols[RefSeqAccessionIndex].Sanitize();\n                    int length              = int.Parse(cols[LengthIndex]);\n                    string ucscName         = cols[UcscIndex].Sanitize();\n\n                    ushort refIndex = GetRefIndex(oldRefNameToChromosome, ensemblName, ucscName, genBankAccession, refSeqAccession, ref nextRefIndex);\n                    chromosomes.Add(new Chromosome(ucscName, ensemblName, refSeqAccession, genBankAccession, length, refIndex));\n                }\n            }\n\n            return chromosomes.OrderBy(x => x.Index).ToList();\n        }\n\n        private static string Sanitize(this string s) => s == \"na\" ? null : s;\n\n        private static ushort GetRefIndex(Dictionary<string, Chromosome> refNameToChromosome, string ensemblName, string ucscName, string genBankAccession, string refSeqAccession, ref ushort nextRefIndex)\n        {\n            if (!string.IsNullOrEmpty(ensemblName)      && refNameToChromosome.TryGetValue(ensemblName, out var chromosome))  return chromosome.Index;\n            if (!string.IsNullOrEmpty(ucscName)         && refNameToChromosome.TryGetValue(ucscName, out chromosome))         return chromosome.Index;\n            if (!string.IsNullOrEmpty(genBankAccession) && refNameToChromosome.TryGetValue(genBankAccession, out chromosome)) return chromosome.Index;\n            if (!string.IsNullOrEmpty(refSeqAccession)  && refNameToChromosome.TryGetValue(refSeqAccession, out chromosome))  return chromosome.Index;\n\n            ushort refIndex = nextRefIndex;\n            nextRefIndex++;\n            return refIndex;\n        }\n    }\n}\n"
  },
  {
    "path": "ReferenceSequence/IO/CytogeneticBandsReader.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.IO;\nusing Genome;\n\nnamespace ReferenceSequence.IO\n{\n    public static class CytogeneticBandsReader\n    {\n        public static List<Band>[] GetCytogeneticBands(Stream stream, int numRefSeqs, Dictionary<string, Chromosome> refNameToChromosome)\n        {\n            var bandLists = new List<Band>[numRefSeqs];\n            for (var i = 0; i < numRefSeqs; i++) bandLists[i] = new List<Band>();\n\n            using (var reader = new StreamReader(stream))\n            {\n                while (true)\n                {\n                    string line = reader.ReadLine();\n                    if (string.IsNullOrEmpty(line)) break;\n\n                    string[] cols = line.Split('\\t');\n\n                    const int expectedNumColumns = 5;\n\n                    if (cols.Length != expectedNumColumns)\n                    {\n                        throw new InvalidDataException($\"Expected {expectedNumColumns} columns, but found {cols.Length} columns: [{line}]\");\n                    }\n\n                    string ucscName = cols[0];\n                    int begin       = int.Parse(cols[1]) + 1;\n                    int end         = int.Parse(cols[2]);\n                    string name     = cols[3];\n\n                    var chromosome = ReferenceNameUtilities.GetChromosome(refNameToChromosome, ucscName);\n                    if (chromosome.IsEmpty()) continue;\n\n                    bandLists[chromosome.Index].Add(new Band(begin, end, name));\n                }\n            }\n\n            return bandLists;\n        }\n    }\n}\n"
  },
  {
    "path": "ReferenceSequence/IO/FastaReader.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.IO;\nusing System.Text;\nusing System.Text.RegularExpressions;\nusing ErrorHandling.Exceptions;\nusing Genome;\nusing ReferenceSequence.Creation;\n\nnamespace ReferenceSequence.IO\n{\n    internal static class FastaReader\n    {\n        private static readonly Regex NameRegex = new Regex(\"^>(\\\\S+)\", RegexOptions.Compiled);\n\n        // >gi|224589823|ref|NC_000024.9|\n        private static readonly Regex NcbiRegex = new Regex(\"^>gi\\\\|\\\\d+\\\\|ref\\\\|([^|]+)\\\\|\", RegexOptions.Compiled);\n\n        // >ref|NC_000013.11| Homo sapiens chromosome 13, GRCh38.p12 Primary Assembly\n        private static readonly Regex NcbiRegex2 = new Regex(\"^>ref\\\\|([^|]+)\\\\|\", RegexOptions.Compiled);\n\n        internal static void AddReferenceSequences(Stream stream, Dictionary<string, Chromosome> refNameToChromosome, List<FastaSequence> references)\n        {\n            var sb = new StringBuilder();\n\n            using (var reader = new StreamReader(stream))\n            {\n                var queue = new Queue<string>();\n\n                while (true)\n                {\n                    string input = queue.Count > 0 ? queue.Dequeue() : reader.ReadLine();\n                    if (input == null) break;\n\n                    if (!input.StartsWith(\">\")) throw new UserErrorException($\"Encountered a FASTA header that did not start with '>': {input}\");\n\n                    string name       = GetName(input);\n                    var    chromosome = GetChromosome(refNameToChromosome, name);\n                    string bases      = GetBases(sb, reader, queue);\n\n                    references.Add(new FastaSequence(chromosome, bases));\n                }\n            }\n        }\n\n        private static string GetBases(StringBuilder sb, StreamReader reader, Queue<string> queue)\n        {\n            sb.Clear();\n\n            while (true)\n            {\n                string line = reader.ReadLine();\n                if (line == null) break;\n\n                if (line.StartsWith('>'))\n                {\n                    queue.Enqueue(line);\n                    break;\n                }\n\n                sb.Append(line);\n            }\n\n            return sb.ToString();\n        }\n\n        private static Chromosome GetChromosome(Dictionary<string, Chromosome> refNameToChromosome, string name)\n        {\n            var chromosome = ReferenceNameUtilities.GetChromosome(refNameToChromosome, name);\n\n            if (chromosome.IsEmpty())\n            {\n                throw new InvalidDataException($\"Could not find the chromosome ({name}) in the reference name dictionary.\");\n            }\n\n            return chromosome;\n        }\n\n        private static string GetName(string s)\n        {\n            var match = NcbiRegex2.Match(s);\n            if (match.Success) return match.Groups[1].Value;\n\n            match = NcbiRegex.Match(s);\n            if (match.Success) return match.Groups[1].Value;\n\n            match = NameRegex.Match(s);\n            if (match.Success) return match.Groups[1].Value;\n\n            throw new InvalidDataException($\"Unable to match the regex to the chromosome name ({s})\");\n        }\n    }\n}"
  },
  {
    "path": "ReferenceSequence/IO/ReferenceNamesReader.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.IO;\nusing System.Linq;\nusing Genome;\nusing OptimizedCore;\n\nnamespace ReferenceSequence.IO\n{\n    public static class ReferenceNamesReader\n    {\n        private const int RefIndex     = 0;\n        private const int EnsemblIndex = 1;\n        private const int UcscIndex    = 2;\n\n        public static List<Chromosome> GetReferenceNames(Stream stream)\n        {\n            var names = new List<Chromosome>();\n\n            using (var reader = new StreamReader(stream))\n            {\n                while (true)\n                {\n                    string line = reader.ReadLine();\n                    if (line == null) break;\n\n                    string[] cols        = line.OptimizedSplit('\\t');\n                    ushort   refIndex    = ushort.Parse(cols[RefIndex]);\n                    string   ensemblName = cols[EnsemblIndex];\n                    string   ucscName    = cols[UcscIndex];\n\n                    names.Add(new Chromosome(ucscName, ensemblName, null, null, 0, refIndex));\n                }\n            }\n\n            return names.OrderBy(x => x.Index).ToList();\n        }\n    }\n}\n"
  },
  {
    "path": "ReferenceSequence/IO/ReferenceSequenceReader.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing ErrorHandling.Exceptions;\nusing Genome;\nusing Intervals;\nusing IO;\nusing ReferenceSequence.Common;\n\nnamespace ReferenceSequence.IO\n{\n    public sealed class CompressedSequenceReader : IDisposable\n    {\n        public readonly Dictionary<string, Chromosome> RefNameToChromosome  = new Dictionary<string, Chromosome>();\n        public readonly Dictionary<ushort, Chromosome> RefIndexToChromosome = new Dictionary<ushort, Chromosome>();\n        private readonly Dictionary<ushort, int> _refIndexToIndex            = new Dictionary<ushort, int>();\n\n        private readonly IndexEntry[] _indexEntries;\n        public readonly Sequence Sequence = new Sequence();\n\n        public ushort NumRefSeqs { get; private set; }\n\n        public const int MaxShift = 6;\n\n        private readonly ExtendedBinaryReader _reader;\n        private readonly Stream _stream;\n\n        public GenomeAssembly Assembly => Sequence.Assembly;\n        public byte PatchLevel; // we'll use this in future version providers\n\n        public CompressedSequenceReader(Stream stream)\n        {\n            _stream = stream;\n            _reader = new ExtendedBinaryReader(stream);\n\n            CheckHeaderVersion();\n            LoadHeader();\n            AddChromosomes();\n            _indexEntries = LoadIndex();\n        }\n\n        public void Dispose()\n        {\n            _reader?.Dispose();\n            _stream?.Dispose();\n        }\n\n        private void CheckHeaderVersion()\n        {\n            string headerTag  = _reader.ReadString();\n            int headerVersion = _reader.ReadInt32();\n\n            if (headerTag != ReferenceSequenceCommon.HeaderTag || headerVersion != ReferenceSequenceCommon.HeaderVersion)\n            {\n                throw new InvalidFileFormatException($\"The header identifiers do not match the expected values: Obs: {headerTag} {headerVersion} vs Exp: {ReferenceSequenceCommon.HeaderTag} {ReferenceSequenceCommon.HeaderVersion}\");\n            }\n        }\n\n        public void GetCompressedSequence(Chromosome chromosome)\n        {\n            if (chromosome.IsEmpty() || !_refIndexToIndex.TryGetValue(chromosome.Index, out int index))\n            {\n                Sequence.EnableNSequence();\n                return;\n            }\n\n            var indexEntry = _indexEntries[index];\n            _stream.Position = indexEntry.FileOffset;\n            \n            uint tag = _reader.ReadUInt32();\n\n            if (tag != ReferenceSequenceCommon.ReferenceStartTag)\n            {\n                throw new InvalidDataException($\"The reference start tag does not match the expected values: Obs: {tag} vs Exp: {ReferenceSequenceCommon.ReferenceStartTag}\");\n            }\n\n            (int sequenceOffset, int numBases) = GetMetadata(_reader);\n\n            byte[]                     twoBitBuffer             = GetTwoBitBuffer(_reader);\n            IntervalArray<MaskedEntry> maskedEntryIntervalArray = GetMaskedEntries(_reader);\n            Band[]                     cytogeneticBands         = GetCytogeneticBands(_reader);\n\n            Sequence.Set(numBases, sequenceOffset, twoBitBuffer, maskedEntryIntervalArray,\n                cytogeneticBands);\n        }\n\n        private static (int SequenceOffset, int NumBases) GetMetadata(ExtendedBinaryReader reader)\n        {\n            int sequenceOffset = reader.ReadOptInt32();\n            int numBases       = reader.ReadOptInt32();\n            return (sequenceOffset, numBases);\n        }\n\n        private static Band[] GetCytogeneticBands(ExtendedBinaryReader reader)\n        {\n            int numBands = reader.ReadOptInt32();\n            var bands    = new Band[numBands];\n\n            for (var i = 0; i < numBands; i++)\n            {\n                int begin   = reader.ReadOptInt32();\n                int end     = reader.ReadOptInt32();\n                string name = reader.ReadAsciiString();\n\n                bands[i] = new Band(begin, end, name);\n            }\n\n            return bands;\n        }\n\n        private static IntervalArray<MaskedEntry> GetMaskedEntries(ExtendedBinaryReader reader)\n        {\n            int numEntries    = reader.ReadOptInt32();\n            var maskedEntries = new Interval<MaskedEntry>[numEntries];\n\n            for (var i = 0; i < numEntries; i++)\n            {\n                int begin = reader.ReadOptInt32();\n                int end   = reader.ReadOptInt32();\n\n                maskedEntries[i] = new Interval<MaskedEntry>(begin, end, new MaskedEntry(begin, end));\n            }\n\n            return new IntervalArray<MaskedEntry>(maskedEntries);\n        }\n\n        private static byte[] GetTwoBitBuffer(ExtendedBinaryReader reader)\n        {\n            int numBytes = reader.ReadOptInt32();\n            return reader.ReadBytes(numBytes);\n        }\n\n        private void LoadHeader()\n        {\n            Sequence.Assembly = (GenomeAssembly)_reader.ReadByte();\n            PatchLevel        = _reader.ReadByte();\n            NumRefSeqs        = (ushort)_reader.ReadOptInt32();\n        }\n\n        private void AddChromosomes()\n        {\n            for (var i = 0; i < NumRefSeqs; i++)\n            {\n                var chromosome = Chromosome.Read(_reader);\n                AddReferenceName(chromosome);\n            }\n        }\n\n        private IndexEntry[] LoadIndex()\n        {\n            uint tag = _reader.ReadUInt32();\n\n            if (tag != ReferenceSequenceCommon.IndexStartTag)\n            {\n                throw new InvalidDataException($\"The index start tag does not match the expected values: Obs: {tag} vs Exp: {ReferenceSequenceCommon.IndexStartTag}\");\n            }\n\n            int numEntries = _reader.ReadInt32();\n\n            var indexEntries = new IndexEntry[numEntries];\n\n            for (var i = 0; i < numEntries; i++)\n            {\n                ushort refIndex   = _reader.ReadUInt16();\n                long   fileOffset = _reader.ReadInt64();\n                indexEntries[i] = new IndexEntry(refIndex, fileOffset);\n\n                _refIndexToIndex[refIndex] = i;\n            }\n\n            return indexEntries;\n        }\n\n        private void AddReferenceName(Chromosome chromosome)\n        {\n            if (!string.IsNullOrEmpty(chromosome.UcscName))         RefNameToChromosome[chromosome.UcscName]         = chromosome;\n            if (!string.IsNullOrEmpty(chromosome.EnsemblName))      RefNameToChromosome[chromosome.EnsemblName]      = chromosome;\n            if (!string.IsNullOrEmpty(chromosome.RefSeqAccession))  RefNameToChromosome[chromosome.RefSeqAccession]  = chromosome;\n            if (!string.IsNullOrEmpty(chromosome.GenBankAccession)) RefNameToChromosome[chromosome.GenBankAccession] = chromosome;\n            RefIndexToChromosome[chromosome.Index] = chromosome;\n        }\n    }\n}"
  },
  {
    "path": "ReferenceSequence/IO/ReferenceSequenceWriter.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing Genome;\nusing IO;\nusing ReferenceSequence.Common;\nusing ReferenceSequence.Creation;\n\nnamespace ReferenceSequence.IO\n{\n    internal sealed class ReferenceSequenceWriter : IDisposable\n    {\n        private readonly Stream _stream;\n        private readonly ExtendedBinaryWriter _writer;\n\n        internal ReferenceSequenceWriter(Stream stream, IReadOnlyCollection<Chromosome> chromosomes, GenomeAssembly genomeAssembly, byte patchLevel)\n        {\n            _stream = stream;\n            _writer = new ExtendedBinaryWriter(stream);\n            WriteHeader(genomeAssembly, patchLevel, chromosomes);\n        }\n\n        public void Dispose() => _writer.Dispose();\n\n        private void WriteHeader(GenomeAssembly genomeAssembly, byte patchLevel, IReadOnlyCollection<Chromosome> chromosomes)\n        {\n            _writer.Write(ReferenceSequenceCommon.HeaderTag);\n            _writer.Write(ReferenceSequenceCommon.HeaderVersion);\n            _writer.Write((byte)genomeAssembly);\n            _writer.Write(patchLevel);\n\n            _writer.WriteOpt(chromosomes.Count);\n            foreach (var chromosome in chromosomes) chromosome.Write(_writer);\n        }\n\n        internal void Write(List<Creation.ReferenceSequence> referenceSequences)\n        {\n            _writer.Flush();\n\n            long indexOffset = _stream.Position;\n            int  indexSize   = 8 + IndexEntry.Size * referenceSequences.Count;\n\n            var    buffers  = new List<ReferenceBuffer>(referenceSequences.Count);\n            ushort refIndex = 0;\n\n            foreach (var referenceSequence in referenceSequences)\n            {\n                buffers.Add(referenceSequence.GetReferenceBuffer(refIndex));\n                refIndex++;\n            }\n\n            IndexEntry[] index = CreateIndex(buffers, indexOffset, indexSize);\n\n            WriteIndex(index);\n            WriteReferenceBuffers(buffers);\n        }\n\n        private static IndexEntry[] CreateIndex(IReadOnlyCollection<ReferenceBuffer> referenceBuffers, long indexOffset, int indexSize)\n        {\n            var indexEntries     = new IndexEntry[referenceBuffers.Count];\n            long referenceOffset = indexOffset + indexSize;\n\n            var index = 0;\n            foreach (var block in referenceBuffers)\n            {\n                indexEntries[index] = new IndexEntry(block.RefIndex, referenceOffset);\n                referenceOffset += block.BufferSize;\n                index++;\n            }\n\n            return indexEntries;\n        }\n\n        private void WriteIndex(IReadOnlyCollection<IndexEntry> indexEntries)\n        {\n            _writer.Write(ReferenceSequenceCommon.IndexStartTag);\n            _writer.Write(indexEntries.Count);\n\n            foreach (var indexEntry in indexEntries)\n            {\n                _writer.Write(indexEntry.RefIndex);\n                _writer.Write(indexEntry.FileOffset);\n            }\n        }\n\n        private void WriteReferenceBuffers(IEnumerable<ReferenceBuffer> referenceBuffers)\n        {\n            foreach (var referenceBuffer in referenceBuffers)\n            {\n                _writer.Write(referenceBuffer.Buffer, 0, referenceBuffer.BufferSize);\n            }\n        }\n    }\n}"
  },
  {
    "path": "ReferenceSequence/ReferenceSequence.csproj",
    "content": "﻿<Project Sdk=\"Microsoft.NET.Sdk\">\n  <PropertyGroup>\n    <OutputType>Exe</OutputType>\n    <TargetFramework>net6.0</TargetFramework>\n    <OutputPath>..\\bin\\$(Configuration)</OutputPath>\n    \n  </PropertyGroup>\n  <ItemGroup>\n    <ProjectReference Include=\"..\\CommandLine\\CommandLine.csproj\" />\n    <ProjectReference Include=\"..\\Genome\\Genome.csproj\" />\n    <ProjectReference Include=\"..\\VariantAnnotation.Interface\\VariantAnnotation.Interface.csproj\" />\n  </ItemGroup>\n  <Import Project=\"..\\CommonAssemblyInfo.props\" />\n</Project>\n"
  },
  {
    "path": "ReferenceSequence/ReferenceSequenceCommon.cs",
    "content": "namespace ReferenceSequence\n{\n    public static class ReferenceSequenceCommon\n    {\n        public const string HeaderTag     = \"NirvanaReference\";\n        public const int    HeaderVersion = 7;\n\n        public const uint IndexStartTag     = 0x4CF76E2F;\n        public const uint ReferenceStartTag = 0xA7D8212A;\n\n        public const int NumBasesPerByte = 4;\n    }\n}"
  },
  {
    "path": "ReferenceSequence/ReferenceUtilsMain.cs",
    "content": "﻿using System.Collections.Generic;\nusing CommandLine.Builders;\nusing ReferenceSequence.Commands;\nusing VariantAnnotation.Interface;\n\nnamespace ReferenceSequence\n{\n    internal static class ReferenceUtilsMain\n    {\n        private static int Main(string[] args)\n        {\n            var ops = new Dictionary<string, TopLevelOption>\n            {\n                [\"create\"]    = new TopLevelOption(\"creates a full reference file\", CreateReferenceMain.Run),\n                [\"substring\"] = new TopLevelOption(\"creates a reference substring file\", CreateSubstringMain.Run),\n                [\"testseq\"]   = new TopLevelOption(\"creates a TestSeq_reference.dat file\", CreateTestSeqMain.Run)\n            };\n\n            var exitCode = new TopLevelAppBuilder(args, ops)\n                .Parse()\n                .ShowBanner(Constants.Authors)\n                .ShowHelpMenu(\"Utilities focused on creating the reference files\")\n                .ShowErrors()\n                .Execute();\n\n            return (int)exitCode;\n        }\n    }\n}"
  },
  {
    "path": "ReferenceSequence/Utilities/SequenceHelper.cs",
    "content": "﻿using System.Collections.Generic;\nusing Genome;\nusing IO;\nusing ReferenceSequence.IO;\n\nnamespace ReferenceSequence.Utilities\n{\n    public static class SequenceHelper\n    {\n        public static (Dictionary<ushort, Chromosome> refIndexToChromosome, Dictionary<string, Chromosome>\n            refNameToChromosome, int numRefSeqs) GetDictionaries(string referencePath)\n        {\n            Dictionary<ushort, Chromosome> refIndexToChromosome;\n            Dictionary<string, Chromosome> refNameToChromosome;\n            int numRefSeqs;\n\n            using (var reader = new CompressedSequenceReader(PersistentStreamUtils.GetReadStream(referencePath)))\n            {\n                refIndexToChromosome = reader.RefIndexToChromosome;\n                refNameToChromosome  = reader.RefNameToChromosome;\n                numRefSeqs           = reader.NumRefSeqs;\n            }\n\n            return (refIndexToChromosome, refNameToChromosome, numRefSeqs);\n        }\n    }\n}\n"
  },
  {
    "path": "RepeatExpansions/AssemblyInfo.cs",
    "content": "﻿using System.Runtime.CompilerServices;\n\n[assembly: InternalsVisibleTo(\"UnitTests\")]"
  },
  {
    "path": "RepeatExpansions/IO/RepeatExpansionReader.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Linq;\r\nusing ErrorHandling;\r\nusing ErrorHandling.Exceptions;\r\nusing Genome;\r\nusing Intervals;\r\nusing OptimizedCore;\r\n\r\nnamespace RepeatExpansions.IO\r\n{\r\n    public static class RepeatExpansionReader\r\n    {\r\n        private const int ChromIndex          = 0;\r\n        private const int StartIndex          = 1;\r\n        private const int EndIndex            = 2;\r\n        private const int PhenotypeIndex      = 3;\r\n        private const int OmimIndex           = 4;\r\n        private const int RepeatNumbersIndex  = 5;\r\n        private const int AlleleCountsIndex   = 6;\r\n        private const int CategoriesIndex     = 7;\r\n        private const int CategoryRangesIndex = 8;\r\n        private const int MinNumberOfColumns  = 9;\r\n\r\n        public static IIntervalForest<RepeatExpansionPhenotype> Load(Stream stream, GenomeAssembly desiredGenomeAssembly,\r\n            Dictionary<string, Chromosome> refNameToChromosome, int numRefSeqs)\r\n        {\r\n            var intervalLists = new List<Interval<RepeatExpansionPhenotype>>[numRefSeqs];\r\n            for (var i = 0; i < numRefSeqs; i++) intervalLists[i] = new List<Interval<RepeatExpansionPhenotype>>();\r\n\r\n            using (stream)\r\n            {\r\n                using (var reader = new StreamReader(stream))\r\n                {\r\n                    CheckHeader(reader, desiredGenomeAssembly);\r\n\r\n                    while (true)\r\n                    {\r\n                        string line = reader.ReadLine();\r\n                        if (line == null) break;\r\n                        if(line == string.Empty) continue;\r\n\r\n                        try\r\n                        {\r\n                            (ushort refIndex, Interval<RepeatExpansionPhenotype> phenotypeInterval) = GetPhenotype(line, refNameToChromosome);\r\n                            if(refIndex == ushort.MaxValue) throw new InvalidDataException(\"Unknown chromosome encountered in STR file.\");\r\n                            intervalLists[refIndex].Add(phenotypeInterval);\r\n                        }\r\n                        catch (Exception e)\r\n                        {\r\n                            e.Data[ExitCodeUtilities.Line] = line;\r\n                            throw;\r\n                        }\r\n                        \r\n                    }\r\n                }\r\n            }\r\n\r\n            var refIntervalArrays = new IntervalArray<RepeatExpansionPhenotype>[numRefSeqs];\r\n            for (var i = 0; i < numRefSeqs; i++)\r\n            {\r\n                refIntervalArrays[i] = new IntervalArray<RepeatExpansionPhenotype>(intervalLists[i].ToArray());\r\n            }\r\n\r\n            return new IntervalForest<RepeatExpansionPhenotype>(refIntervalArrays);\r\n        }\r\n\r\n        private static (ushort RefIndex, Interval<RepeatExpansionPhenotype> Interval) GetPhenotype(string line, Dictionary<string, Chromosome> refNameToChromosome)\r\n        {\r\n            string[] cols = line.OptimizedSplit('\\t');\r\n            if (cols.Length < MinNumberOfColumns) throw new InvalidDataException($\"Expected at least {MinNumberOfColumns} columns in the STR data file, but found only {cols.Length}.\");\r\n\r\n            string chromosomeString         = cols[ChromIndex];\r\n            int start                       = int.Parse(cols[StartIndex]);\r\n            int end                         = int.Parse(cols[EndIndex]);\r\n            string phenotype                = cols[PhenotypeIndex];\r\n            string omimId                   = cols[OmimIndex];\r\n            int[] repeatNumbers             = cols[RepeatNumbersIndex].Split(',').Select(int.Parse).ToArray();\r\n            int[] alleleCounts              = cols[AlleleCountsIndex].Split(',').Select(int.Parse).ToArray();\r\n            string[] classifications        = cols[CategoriesIndex].Split(',').ToArray();\r\n            Interval[] classificationRanges = cols[CategoryRangesIndex].Split(',').Select(GetInterval).ToArray();\r\n\r\n            if (repeatNumbers.Length   != alleleCounts.Length)         throw new InvalidDataException($\"Inconsistent number of repeat numbers ({repeatNumbers.Length}) vs. allele counts ({alleleCounts.Length})\");\r\n            if (classifications.Length != classificationRanges.Length) throw new InvalidDataException($\"Inconsistent number of values of classifications ({classifications.Length}) vs. classification ranges ({classificationRanges.Length})\");\r\n\r\n            var chromosome         = ReferenceNameUtilities.GetChromosome(refNameToChromosome, chromosomeString);\r\n            var chromosomeInterval = new ChromosomeInterval(chromosome, start, end);\r\n            double[] percentiles   = PercentileUtilities.ComputePercentiles(repeatNumbers.Length, alleleCounts);\r\n\r\n            var rePhenotype = new RepeatExpansionPhenotype(chromosomeInterval, phenotype, omimId, repeatNumbers, percentiles, classifications, classificationRanges);\r\n            return (chromosome.Index, new Interval<RepeatExpansionPhenotype>(start, end, rePhenotype));\r\n        }\r\n\r\n        private static Interval GetInterval(string s)\r\n        {\r\n            string[] cols = s.OptimizedSplit('-');\r\n            int begin     = cols[0] == \"inf\" ? int.MaxValue : int.Parse(cols[0]);\r\n            int end       = cols[1] == \"inf\" ? int.MaxValue : int.Parse(cols[1]);\r\n\r\n            return new Interval(begin, end);\r\n        }\r\n\r\n        private static void CheckHeader(TextReader reader, GenomeAssembly desiredGenomeAssembly)\r\n        {\r\n            string line = reader.ReadLine();\r\n            while (line == string.Empty) line = reader.ReadLine();\r\n            if(line==null) throw new UserErrorException(\"The custom STR file provided is empty.\");\r\n\r\n            GenomeAssembly genomeAssembly = GenomeAssembly.Unknown;\r\n            var headerNum = 0;\r\n            while (line!=null && line.StartsWith(\"#\"))\r\n            {\r\n                headerNum++;\r\n                line = line.Trim();\r\n                var columns = line.Split('=','\\t');\r\n                var tag = columns[0].ToLower();\r\n                switch (headerNum)\r\n                {\r\n                    case 1:\r\n                        if(tag != \"#assembly\")\r\n                            throw new UserErrorException(\"First line in STR data file has to contain assembly. For example: #assembly=GRCh38\");\r\n                        genomeAssembly = GenomeAssemblyHelper.Convert(columns[1]);\r\n                        if (genomeAssembly != desiredGenomeAssembly) \r\n                            throw new UserErrorException($\"Expected {desiredGenomeAssembly} in the STR data file, but found {genomeAssembly}\");\r\n                        break;\r\n                    case 2:\r\n                        if(tag!=\"#chrom\")\r\n                            throw new UserErrorException(\"Second line in TSV has to contain column labels. For example: #Chrom\\tStart\\tEnd\\tPhenotype\\t...\");\r\n                        return; // we should not read the next line\r\n                    default:\r\n                        throw new UserErrorException($\"Unexpected header tag observed:\\n{line}\");\r\n                }\r\n                line = reader.ReadLine();\r\n            }\r\n            if(genomeAssembly == GenomeAssembly.Unknown) \r\n                throw new UserErrorException(\"Genome assembly not specified in STR header. It is a required field.\");\r\n\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "RepeatExpansions/IRepeatExpansionProvider.cs",
    "content": "﻿using VariantAnnotation.Interface.AnnotatedPositions;\r\n\r\nnamespace RepeatExpansions\r\n{\r\n    public interface IRepeatExpansionProvider\r\n    {\r\n        void Annotate(IAnnotatedPosition annotatedPosition);\r\n    }\r\n}\r\n"
  },
  {
    "path": "RepeatExpansions/Matcher.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing Intervals;\r\nusing VariantAnnotation.Interface.SA;\r\nusing Variants;\r\n\r\nnamespace RepeatExpansions\r\n{\r\n    public sealed class Matcher\r\n    {\r\n        private readonly IIntervalForest<RepeatExpansionPhenotype> _phenotypeForest;\r\n\r\n        public Matcher(IIntervalForest<RepeatExpansionPhenotype> phenotypeForest) => _phenotypeForest = phenotypeForest;\r\n\r\n        public ISupplementaryAnnotation GetMatchingAnnotations(RepeatExpansion variant)\r\n        {\r\n            RepeatExpansionPhenotype[] variantPhenotypes =\r\n                _phenotypeForest.GetAllOverlappingValues(variant.Chromosome.Index, variant.Start, variant.End);\r\n            if (variantPhenotypes == null) return null;\r\n\r\n            var jsonEntries = new List<string>();\r\n\r\n            // ReSharper disable once LoopCanBeConvertedToQuery\r\n            foreach (var variantPhenotype in variantPhenotypes)\r\n            {\r\n                if (!ExactMatch(variant, variantPhenotype.ChromosomeInterval)) continue;\r\n\r\n                string json = variantPhenotype.GetAnnotation(variant.RepeatCount);\r\n                jsonEntries.Add(json);\r\n            }\r\n\r\n            return jsonEntries.Count == 0 ? null : new RepeatExpansionSupplementaryAnnotation(jsonEntries);\r\n        }\r\n\r\n        private static bool ExactMatch(IInterval variant, IInterval variantPhenotype) =>\r\n            variant.Start == variantPhenotype.Start && \r\n            variant.End   == variantPhenotype.End;\r\n    }\r\n}\r\n"
  },
  {
    "path": "RepeatExpansions/PercentileUtilities.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.Linq;\n\nnamespace RepeatExpansions\n{\n    public static class PercentileUtilities\n    {\n        public static double[] ComputePercentiles(int valueCount, IReadOnlyList<int> alleleCounts)\n        {\n            var percentiles       = new double[valueCount];\n            var smallerValueCount = 0;\n            int totalCount        = alleleCounts.Sum();\n\n            percentiles[0] = 0;\n\n            for (var i = 1; i < valueCount; i++)\n            {\n                smallerValueCount += alleleCounts[i - 1];\n                percentiles[i]    =  100.0 * smallerValueCount / totalCount;\n            }\n\n            return percentiles;\n        }\n\n        public static double GetPercentile<T>(T inputValue, T[] referenceValues, double[] referencePercentiles)\n        {\n            int index = Array.BinarySearch(referenceValues, inputValue);\n            if (index >= 0) return referencePercentiles[index];\n\n            index = ~index;\n            return index == referenceValues.Length ? 100.00 : referencePercentiles[index];\n        }\n    }\n}"
  },
  {
    "path": "RepeatExpansions/RepeatExpansionPhenotype.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.Linq;\r\nusing Genome;\r\nusing Intervals;\r\n\r\nnamespace RepeatExpansions\r\n{\r\n    public sealed class RepeatExpansionPhenotype\r\n    {\r\n        public readonly ChromosomeInterval ChromosomeInterval;\r\n\r\n        // used directly in JSON output\r\n        private readonly string _phenotype;\r\n        private readonly string _omimId;\r\n\r\n        // used during annotation\r\n        private readonly int[] _repeatNumbers;\r\n        private readonly double[] _percentiles;\r\n        private readonly string[] _classifications;\r\n        private readonly Interval[] _classificationRanges;\r\n\r\n        public RepeatExpansionPhenotype(ChromosomeInterval chromosomeInterval, string phenotype, string omimId,\r\n            int[] repeatNumbers, double[] percentiles, string[] classifications, Interval[] classificationRanges)\r\n        {\r\n            ChromosomeInterval    = chromosomeInterval;\r\n            _phenotype            = phenotype;\r\n            _omimId               = omimId;\r\n            _repeatNumbers        = repeatNumbers;\r\n            _percentiles          = percentiles;\r\n            _classifications      = classifications;\r\n            _classificationRanges = classificationRanges;\r\n        }\r\n\r\n        public string GetAnnotation(int repeatNumber)\r\n        {\r\n            double percentile                   = PercentileUtilities.GetPercentile(repeatNumber, _repeatNumbers, _percentiles);\r\n            IEnumerable<string> classifications = GetClassifications(repeatNumber);\r\n\r\n            return GetJson(percentile, classifications);\r\n        }\r\n\r\n        private string GetJson(double percentile, IEnumerable<string> classifications)\r\n        {\r\n            // in net6.0, the compiler gets confused if you have }}}. Should the first two }s be a closing brace or the second? This results in a bug.\r\n            // we can circumvent it by taking the leading and trailing parenthesis out of the main expression and adding them separately\r\n            const char openCurly  = '{';\r\n            const char closeCurly = '}';\r\n            string     joined     = string.Join(\",\", classifications.Select(classification => \"\\\"\" + classification + \"\\\"\"));\r\n            return $\"{openCurly}\\\"phenotype\\\":\\\"{_phenotype}\\\",\\\"omimId\\\":{_omimId},\\\"classifications\\\":[{joined}],\\\"percentile\\\":{percentile:0.00}{closeCurly}\";\r\n\r\n        }\r\n\r\n        private IEnumerable<string> GetClassifications(int repeatNumber)\r\n        {\r\n            var classifications = new List<string>();\r\n\r\n            for (var i = 0; i < _classificationRanges.Length; i++)\r\n            {\r\n                var range = _classificationRanges[i];\r\n                if (range.Start <= repeatNumber && repeatNumber <= range.End) classifications.Add(_classifications[i]);\r\n            }\r\n\r\n            return classifications;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "RepeatExpansions/RepeatExpansionProvider.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing Genome;\r\nusing Intervals;\r\nusing IO;\r\nusing RepeatExpansions.IO;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing Variants;\r\n\r\nnamespace RepeatExpansions\r\n{\r\n    public sealed class RepeatExpansionProvider : IRepeatExpansionProvider\r\n    {\r\n        private readonly Matcher _matcher;\r\n\r\n        public RepeatExpansionProvider(GenomeAssembly genomeAssembly, Dictionary<string, Chromosome> refNameToChromosome, \r\n            int numRefSeqs, string customTsvPath)\r\n        {\r\n            using ( Stream stream = GetTsvStream(genomeAssembly, customTsvPath))\r\n            {\r\n                IIntervalForest<RepeatExpansionPhenotype> phenotypeForest = RepeatExpansionReader.Load(stream, genomeAssembly, refNameToChromosome, numRefSeqs);\r\n                _matcher = new Matcher(phenotypeForest);\r\n            }\r\n        }\r\n\r\n        private static Stream GetTsvStream(GenomeAssembly genomeAssembly, string customTsvPath)\r\n        {\r\n            //since we are using the executing assembly, we cannot move the following lines about getting stream further upstream.\r\n            var    assembly     = System.Reflection.Assembly.GetExecutingAssembly();\r\n            string resourceName = $\"RepeatExpansions.Resources.RepeatExpansions.{genomeAssembly}.tsv\";\r\n            var stream = customTsvPath != null\r\n                ? PersistentStreamUtils.GetReadStream(customTsvPath)\r\n                : assembly.GetManifestResourceStream(resourceName);\r\n            \r\n            if (stream == null) throw new NullReferenceException(\"Unable to read from the STR resource file\");\r\n            return stream;\r\n        }\r\n\r\n        public void Annotate(IAnnotatedPosition annotatedPosition)\r\n        {\r\n            foreach (var variant in annotatedPosition.AnnotatedVariants)\r\n            {\r\n                if (variant.Variant.Type != VariantType.short_tandem_repeat_variation) continue;\r\n                var repeatExpansion = (RepeatExpansion)variant.Variant;\r\n\r\n                var phenotypes = _matcher.GetMatchingAnnotations(repeatExpansion);\r\n                if (phenotypes == null) continue;\r\n\r\n                variant.RepeatExpansionPhenotypes = phenotypes;\r\n            }\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "RepeatExpansions/RepeatExpansionSupplementaryAnnotation.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.Text;\r\nusing VariantAnnotation.Interface.SA;\r\n\r\nnamespace RepeatExpansions\r\n{\r\n    public sealed class RepeatExpansionSupplementaryAnnotation : ISupplementaryAnnotation\r\n    {\r\n        private readonly List<string> _jsonEntries;\r\n        public string JsonKey => \"repeatExpansionPhenotypes\";\r\n\r\n        public RepeatExpansionSupplementaryAnnotation(List<string> jsonEntries) => _jsonEntries = jsonEntries;\r\n\r\n        public void SerializeJson(StringBuilder sb) => sb.Append($\"[{string.Join(',', _jsonEntries)}]\");\r\n    }\r\n}\r\n"
  },
  {
    "path": "RepeatExpansions/RepeatExpansions.csproj",
    "content": "﻿<Project Sdk=\"Microsoft.NET.Sdk\">\r\n  <PropertyGroup>\r\n    <TargetFramework>net6.0</TargetFramework>\r\n    <OutputPath>..\\bin\\$(Configuration)</OutputPath>\n\r\n  </PropertyGroup>\r\n  <Import Project=\"..\\CommonAssemblyInfo.props\" />\r\n  <ItemGroup>\r\n    <EmbeddedResource Include=\"Resources\\RepeatExpansions.GRCh37.tsv\" />\r\n    <EmbeddedResource Include=\"Resources\\RepeatExpansions.GRCh38.tsv\" />\r\n  </ItemGroup>\r\n  <ItemGroup>\r\n    <ProjectReference Include=\"..\\OptimizedCore\\OptimizedCore.csproj\" />\r\n    <ProjectReference Include=\"..\\VariantAnnotation.Interface\\VariantAnnotation.Interface.csproj\" />\r\n  </ItemGroup>\r\n</Project>\r\n"
  },
  {
    "path": "RepeatExpansions/Resources/RepeatExpansions.GRCh37.tsv",
    "content": "#assembly=GRCh37\n#Chrom\tStart\tEnd\tPhenotype\tOMIM_ID\tRepeat_numbers\tAllele_counts\tClassifications\tClassification_ranges\nX\t66765159\t66765227\tSpinal and bulbar muscular atrophy of Kennedy\t313200\t9,14,15,16,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32\t1,1,2,6,19,18,20,18,28,24,13,18,13,7,4,4,3,1,1\tNormal, Expanded\t0-34, 35-inf\n12\t7045880\t7045936\tDentatorubro-pallidoluysian atrophy\t125370\t7,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,30\t1,2,1,16,7,23,5,29,49,16,57,26,36,19,6,5,1,1\tNormal, Expanded\t0-35, 36-inf\n22\t46191235\t46191304\tSpinocerebellar ataxia 10\t603516\t7,9,11,12,13,14,15,16,17,18,19\t2,1,7,43,75,87,43,21,11,6,4\tNormal, Expanded\t0-32, 33-inf\n6\t16327865\t16327954\tSpinocerebellar ataxia 1\t164400\t19,20,21,22,24,25,27,28,29,30,31,32,33,34,35,36,37,38\t1,1,1,1,2,1,26,20,43,59,84,28,21,5,1,2,3,1\tNormal, Expanded\t0-35, 36-inf\n12\t112036754\t112036822\tSpinocerebellar ataxia 2\t183090\t19,21,22,23,24,26,27,29,30,31,33\t2,1,237,48,4,1,1,2,2,1,1\tNormal, Expanded\t0-31, 32-inf\n14\t92537354\t92537386\tMachado-Joseph disease\t109150\t9,11,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,34,35,36\t1,71,2,11,10,14,24,46,14,3,7,30,17,7,6,6,5,9,8,6,1,1,1\tNormal, Expanded\t0-44, 45-inf\n3\t63898361\t63898390\tSpinocerebellar ataxia 7\t164500\t1,2,3,4,7,8,9,10,11,12,13,15,21\t1,1,5,1,5,2,4,217,25,29,8,1,1\tNormal, Expanded\t0-27, 28-inf\n13\t70713516\t70713560\tSpinocerebellar ataxia 8\t608768\t8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,33,36,37,38,71,81,118\t4,55,3,19,40,6,21,54,18,26,10,13,8,9,5,2,1,1,1,1,1,1,1\tNormal, Expanded\t0-50, 51-inf\n9\t27573527\t27573544\tFrontotemporal dementia and/or amyotrophic lateral sclerosis 1\t105550\t2,3,4,5,6,7,8,9,10,11,12,13,14,17,19,23,27,41\t147,2,22,17,24,25,26,9,13,1,4,3,2,1,1,1,1,1\tNormal, Expanded\t0-25, 26-inf\n19\t13318673\t13318711\tSpinocerebellar ataxia 6\t183086\t4,7,8,10,11,12,13,14,15,38\t2,28,1,4,102,62,85,13,2,1\tNormal, Expanded\t0-18, 19-inf\n11\t119077000\t119077032\tJacobsen syndrome\t147791\t4,7,8,10,11,12,13,14,15,16,17,18,19,20,22,25\t1,1,21,6,192,36,8,10,8,3,3,1,3,4,1,2\tNormal, Expanded\t0-80, 81-inf\n3\t128891420\t128891499\tMyotonic dystrophy 2\t602668\t6,8,10,11,12,15,16,17,18,19,20,21,22,24,26,27,28,30,41\t3,1,2,5,1,130,68,38,12,19,4,7,4,1,1,1,1,1,1\tNormal, Expanded\t0-50, 51-inf\n21\t45196325\t45196360\tEpilepsy, progressive myoclonic 1A (Unverricht and Lundborg)\t254800\t2,3,4,6,7,10,11,13\t133,160,2,1,1,1,1,1\tNormal, Expanded\t0-3, 4-inf\n19\t46273463\t46273522\tMyotonic dystrophy 1\t160900\t5,6,7,8,9,10,11,12,13,14,15,16,17,19,21,22,24,26,27,32,33\t110,1,4,1,2,11,29,55,38,16,13,4,1,1,5,2,3,1,1,1,1\tNormal, Expanded\t0-34, 35-inf\nX\t146993569\t146993628\tFragile X syndrome\t300624\t8,11,15,20,22,23,24,25,28,29,30,31,32,33,34,35,36,37,38,39,41,43,47,52,55,56,57,60\t1,1,1,3,7,5,3,2,1,60,50,20,5,4,1,2,9,6,1,4,1,1,1,8,1,1,1,1\tNormal, Expanded\t0-44, 45-inf\n9\t71652203\t71652220\tFriedreich ataxia\t229300\t5,6,7,8,9,10,13,14,16,17,18,19,20,21,23,24,25,26\t9,7,2,113,135,1,4,1,4,5,4,6,1,3,1,1,1,2\tNormal, Expanded\t0-33, 34-inf\n4\t3076604\t3076660\tHuntington disease\t143100\t9,11,12,15,16,17,18,19,20,21,22,23,24,25,27,28\t1,1,2,33,25,99,46,31,20,10,10,8,6,2,3,3\tNormal, Expanded\t0-26, 27-inf\n16\t87637894\t87637935\tHuntington disease-like 2\t606438\t5,11,12,13,14,15,16,17,18,19,20,22,23,26,27,28,29,33\t1,5,1,20,139,41,49,17,9,5,1,1,1,4,2,1,2,1\tNormal, Expanded\t0-28, 29-inf\n20\t2633380\t2633403\tSpinocerebellar ataxia 36\t614153\t4,5,6,7,8,9,10,11\t72,37,32,122,11,21,2,3\tNormal, Expanded\t0-14, 15-inf\n5\t146258291\t146258320\tSpinocerebellar ataxia 12\t604326\t9,10,11,13,14,15,16,17,18,19,20,23\t31,129,3,43,30,28,17,13,1,3,1,1\tNormal, Expanded\t0-32, 33-inf\n18\t53253387\t53253458\tFuchs' Corneal Dystrophy\t613267\t5,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,31,33,34,35,36,37,46,52,54\t1,76,5,4,18,32,12,32,9,6,5,6,12,12,20,11,9,10,4,4,2,2,2,1,1,1,1,1,1\tNormal, Expanded\t0-39, 40-inf\n15\t23086367\t23086390\tAmyotrophic lateral sclerosis\t600363\t6,7,8,9,10,19,20,24,33\t1,127,162,2,4,1,1,1,1\tNormal, Expanded\t0-8, 9-inf\n2\t191745600\t191745646 \tGlutaminase deficiency\t618412\t7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,25\t1,99,6,12,5,7,21,53,49,13,11,6,8,4,2,1,1,1\tNormal, Expanded\t0-89, 90-inf"
  },
  {
    "path": "RepeatExpansions/Resources/RepeatExpansions.GRCh38.tsv",
    "content": "#assembly=GRCh38\n#Chrom\tStart\tEnd\tPhenotype\tOMIM_ID\tRepeat_numbers\tAllele_counts\tClassifications\tClassification_ranges\nchrX\t67545317\t67545385\tSpinal and bulbar muscular atrophy of Kennedy\t313200\t9,14,15,16,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32\t1,1,2,6,19,18,20,18,28,24,13,18,13,7,4,4,3,1,1\tNormal, Expanded\t0-34, 35-inf\nchr12\t6936717\t6936773\tDentatorubro-pallidoluysian atrophy\t125370\t7,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,30\t1,2,1,16,7,23,5,29,49,16,57,26,36,19,6,5,1,1\tNormal, Expanded\t0-35, 36-inf\nchr22\t45795355\t45795424\tSpinocerebellar ataxia 10\t603516\t7,9,11,12,13,14,15,16,17,18,19\t2,1,7,43,75,87,43,21,11,6,4\tNormal, Expanded\t0-32, 33-inf\nchr6\t16327634\t16327723\tSpinocerebellar ataxia 1\t164400\t19,20,21,22,24,25,27,28,29,30,31,32,33,34,35,36,37,38\t1,1,1,1,2,1,26,20,43,59,84,28,21,5,1,2,3,1\tNormal, Expanded\t0-35, 36-inf\nchr12\t111598950\t111599018\tSpinocerebellar ataxia 2\t183090\t19,21,22,23,24,26,27,29,30,31,33\t2,1,237,48,4,1,1,2,2,1,1\tNormal, Expanded\t0-31, 32-inf\nchr14\t92071010\t92071042\tMachado-Joseph disease\t109150\t9,11,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,34,35,36\t1,71,2,11,10,14,24,46,14,3,7,30,17,7,6,6,5,9,8,6,1,1,1\tNormal, Expanded\t0-44, 45-inf\nchr3\t63912685\t63912714\tSpinocerebellar ataxia 7\t164500\t1,2,3,4,7,8,9,10,11,12,13,15,21\t1,1,5,1,5,2,4,217,25,29,8,1,1\tNormal, Expanded\t0-27, 28-inf\nchr13\t70139384\t70139428\tSpinocerebellar ataxia 8\t608768\t8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,33,36,37,38,71,81,118\t4,55,3,19,40,6,21,54,18,26,10,13,8,9,5,2,1,1,1,1,1,1,1\tNormal, Expanded\t0-50, 51-inf\nchr9\t27573529\t27573546\tFrontotemporal dementia and/or amyotrophic lateral sclerosis 1\t105550\t2,3,4,5,6,7,8,9,10,11,12,13,14,17,19,23,27,41\t147,2,22,17,24,25,26,9,13,1,4,3,2,1,1,1,1,1\tNormal, Expanded\t0-25, 26-inf\nchr19\t13207859\t13207897\tSpinocerebellar ataxia 6\t183086\t4,7,8,10,11,12,13,14,15,38\t2,28,1,4,102,62,85,13,2,1\tNormal, Expanded\t0-18, 19-inf\nchr11\t119206290\t119206322\tJacobsen syndrome\t147791\t4,7,8,10,11,12,13,14,15,16,17,18,19,20,22,25\t1,1,21,6,192,36,8,10,8,3,3,1,3,4,1,2\tNormal, Expanded\t0-80, 81-inf\nchr3\t129172577\t129172656\tMyotonic dystrophy 2\t602668\t6,8,10,11,12,15,16,17,18,19,20,21,22,24,26,27,28,30,41\t3,1,2,5,1,130,68,38,12,19,4,7,4,1,1,1,1,1,1\tNormal, Expanded\t0-50, 51-inf\nchr21\t43776444\t43776479\tEpilepsy, progressive myoclonic 1A (Unverricht and Lundborg)\t254800\t2,3,4,6,7,10,11,13\t133,160,2,1,1,1,1,1\tNormal, Expanded\t0-3, 4-inf\nchr19\t45770205\t45770264\tMyotonic dystrophy 1\t160900\t5,6,7,8,9,10,11,12,13,14,15,16,17,19,21,22,24,26,27,32,33\t110,1,4,1,2,11,29,55,38,16,13,4,1,1,5,2,3,1,1,1,1\tNormal, Expanded\t0-34, 35-inf\nchrX\t147912051\t147912110\tFragile X syndrome\t300624\t8,11,15,20,22,23,24,25,28,29,30,31,32,33,34,35,36,37,38,39,41,43,47,52,55,56,57,60\t1,1,1,3,7,5,3,2,1,60,50,20,5,4,1,2,9,6,1,4,1,1,1,8,1,1,1,1\tNormal, Expanded\t0-44, 45-inf\nchr9\t69037287\t69037304\tFriedreich ataxia\t229300\t5,6,7,8,9,10,13,14,16,17,18,19,20,21,23,24,25,26\t9,7,2,113,135,1,4,1,4,5,4,6,1,3,1,1,1,2\tNormal, Expanded\t0-33, 34-inf\nchr4\t3074877\t3074933\tHuntington disease\t143100\t9,11,12,15,16,17,18,19,20,21,22,23,24,25,27,28\t1,1,2,33,25,99,46,31,20,10,10,8,6,2,3,3\tNormal, Expanded\t0-26, 27-inf\nchr16\t87604288\t87604329\tHuntington disease-like 2\t606438\t5,11,12,13,14,15,16,17,18,19,20,22,23,26,27,28,29,33\t1,5,1,20,139,41,49,17,9,5,1,1,1,4,2,1,2,1\tNormal, Expanded\t0-28, 29-inf\nchr20\t2652734\t2652757\tSpinocerebellar ataxia 36\t614153\t4,5,6,7,8,9,10,11\t72,37,32,122,11,21,2,3\tNormal, Expanded\t0-14, 15-inf\nchr5\t146878728\t146878757\tSpinocerebellar ataxia 12\t604326\t9,10,11,13,14,15,16,17,18,19,20,23\t31,129,3,43,30,28,17,13,1,3,1,1\tNormal, Expanded\t0-32, 33-inf\nchr18\t55586156\t55586227\tFuchs' Corneal Dystrophy\t613267\t5,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,31,33,34,35,36,37,46,52,54\t1,76,5,4,18,32,12,32,9,6,5,6,12,12,20,11,9,10,4,4,2,2,2,1,1,1,1,1,1\tNormal, Expanded\t0-39, 40-inf\nchr15\t22786678\t22786701\tAmyotrophic lateral sclerosis\t600363\t6,7,8,9,10,19,20,24,33\t1,127,162,2,4,1,1,1,1\tNormal, Expanded\t0-8, 9-inf\nchr2\t190880874\t190880920\tGlutaminase deficiency\t618412\t7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,25\t1,99,6,12,5,7,21,53,49,13,11,6,8,4,2,1,1,1\tNormal, Expanded\t0-89, 90-inf"
  },
  {
    "path": "SAUtils/AAConservation/AaConservationMain.cs",
    "content": "﻿using System.IO;\nusing CommandLine.Builders;\nusing CommandLine.NDesk.Options;\nusing Compression.Utilities;\nusing ErrorHandling;\nusing IO;\nusing SAUtils.InputFileParsers;\nusing VariantAnnotation.Caches;\nusing VariantAnnotation.ProteinConservation;\nusing VariantAnnotation.Providers;\n\nnamespace SAUtils.AAConservation\n{\n    public static class AaConservationMain\n    {\n        private static string _scoresFile;\n        private static string _compressedReference;\n        private static string _transcriptCachePrefix;\n        private static string _outputDirectory;\n        \n        public static ExitCodes Run(string command, string[] commandArgs)\n\n        {\n            var ops = new OptionSet\n            {\n                {\n                    \"ref|r=\",\n                    \"compressed reference sequence file\",\n                    v => _compressedReference = v\n                },\n                {\n                    \"cache|c=\",\n                    \"Transcript cache prefix\",\n                    v => _transcriptCachePrefix = v\n                },\n                {\n                    \"scr|s=\",\n                    \"input file path with conservation scores\",\n                    v => _scoresFile = v\n                },\n                {\n                    \"out|o=\",\n                    \"output directory\",\n                    v => _outputDirectory = v\n                }\n            };\n\n            string commandLineExample = $\"{command} [options]\";\n\n            var exitCode = new ConsoleAppBuilder(commandArgs, ops)\n                .Parse()\n                .CheckInputFilenameExists(_compressedReference, \"compressed reference sequence file name\", \"--ref\")\n                .CheckInputFilenameExists(CacheConstants.TranscriptPath(_transcriptCachePrefix), \"transcript cache prefix\", \"--cache\")\n                .CheckInputFilenameExists(_scoresFile, \"input file path with conservation scores\", \"--src\")\n                .CheckDirectoryExists(_outputDirectory, \"output directory\", \"--out\")\n                .SkipBanner()\n                .ShowHelpMenu(\"Creates a supplementary database containing 1000 Genomes allele frequencies\", commandLineExample)\n                .ShowErrors()\n                .Execute(ProgramExecution);\n\n            return exitCode;\n        }\n\n        private static ExitCodes ProgramExecution()\n        {\n            using var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));\n            TranscriptCacheData transcriptData = AaConservationUtilities.GetTranscriptData(referenceProvider.RefIndexToChromosome, _transcriptCachePrefix);// we will use the transcript data to validate the protein sequence\n            \n            var    version     = DataSourceVersionReader.GetSourceVersion(_scoresFile + \".version\");\n            string outFileName = $\"{version.Name}_{version.Version}\";\n\n            //read multi-alignments\n            using (var stream = GZipUtilities.GetAppropriateReadStream(_scoresFile))\n            using(var parser = new ProteinConservationParser(stream))\n            using(var outStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName+ProteinConservationCommon.FileSuffix)))\n            using(var groupStream = FileUtilities.GetCreateStream(\"transcriptGroups.txt\"))\n            using(var writer = new ProteinConservationWriter(outStream, groupStream, transcriptData, version))    \n            {\n                writer.Write(parser.GetItems());\n            }\n\n            return ExitCodes.Success;\n        }\n\n    }\n}"
  },
  {
    "path": "SAUtils/AAConservation/AaConservationUtilities.cs",
    "content": "﻿using System.Collections.Generic;\nusing Genome;\nusing IO;\nusing VariantAnnotation.Caches;\nusing VariantAnnotation.IO.Caches;\n\nnamespace SAUtils.AAConservation\n{\n    public static class AaConservationUtilities\n    {\n        public static TranscriptCacheData GetTranscriptData(Dictionary<ushort, Chromosome> refIndexToChromosome, string transcriptCachePrefix)\n        {\n            using var transcriptCacheReader = new TranscriptCacheReader(\n                FileUtilities.GetReadStream(CacheConstants.TranscriptPath(transcriptCachePrefix)));\n            return transcriptCacheReader.Read(refIndexToChromosome);\n        }\n\n    }\n}"
  },
  {
    "path": "SAUtils/AAConservation/ProteinConservationParser.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing System.Linq;\nusing OptimizedCore;\nusing VariantAnnotation.ProteinConservation;\n\nnamespace SAUtils.AAConservation\n{\n    public sealed class ProteinConservationParser:IDisposable\n    {\n        private readonly Stream _stream;\n\n        private int _ensemblIdsIndex = -1;\n        private int _chromIndex = -1;\n        private int _scoresIndex = -1;\n        private int _proteinSeqIndex = -1;\n\n        private const string EnsemblIdsTag = \"Ensembl\";\n        private const string ProteinSequenceTag  = \"ProteinSequence\";\n        private const string ChromTag = \"Chromosome\";\n        private const string ScoresTag = \"Percent Conservation at each AA residue\";\n\n        public ProteinConservationParser(Stream stream)\n        {\n            _stream = stream;\n        }\n\n        public IEnumerable<ProteinConservationItem> GetItems()\n        {\n            using (var reader = new StreamReader(_stream))\n            {\n                string line;\n                while ((line = reader.ReadLine()) != null)\n                {\n                    var columns = line.OptimizedSplit('\\t');\n                    if (line.StartsWith(\"#\"))\n                    {\n                        ParseHeader(line);\n                        continue;\n                    }\n\n                    var transcriptId    = columns[_ensemblIdsIndex];\n                    var proteinSequence = columns[_proteinSeqIndex];\n                    var chromosome      = columns[_chromIndex];\n                    var scores = columns[_scoresIndex].OptimizedSplit(',').Select(x => (byte) int.Parse(x))\n                        .ToArray();\n                    \n                    yield return new ProteinConservationItem(chromosome, transcriptId, proteinSequence, scores);\n                }\n            }\n\n        }\n\n        private void ParseHeader(string line)\n        {\n            var columnTags = line.TrimStart('#').OptimizedSplit('\\t');\n\n            _ensemblIdsIndex = Array.IndexOf(columnTags, EnsemblIdsTag);\n            _chromIndex = Array.IndexOf(columnTags, ChromTag);\n            _scoresIndex = Array.IndexOf(columnTags, ScoresTag);\n            _proteinSeqIndex = Array.IndexOf(columnTags, ProteinSequenceTag);\n        }\n\n        public void Dispose()=>_stream?.Dispose(); \n        \n    }\n}"
  },
  {
    "path": "SAUtils/AAConservation/ProteinConservationWriter.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing System.Linq;\nusing Genome;\nusing IO;\nusing VariantAnnotation.Caches;\nusing VariantAnnotation.ProteinConservation;\nusing VariantAnnotation.Providers;\n\nnamespace SAUtils.AAConservation\n{\n    public sealed class ProteinConservationWriter:IDisposable\n    {\n        private readonly Stream _transcriptGroupStream;\n        private readonly GenomeAssembly _assembly;\n        private readonly ExtendedBinaryWriter _writer;\n        private readonly DataSourceVersion _version;\n\n        private readonly TranscriptCacheData _transcriptCacheData;\n        //some transcripts have multiple locations in the genome and may have conflicting scores\n        // so, we need to load them up and check for duplicates and resolve them.\n        \n\n        public ProteinConservationWriter(Stream stream, Stream groupStream, TranscriptCacheData transcriptData, DataSourceVersion version)\n        {\n            _transcriptGroupStream = groupStream;\n            _writer                = new ExtendedBinaryWriter(stream);\n            _transcriptCacheData   = transcriptData;\n            _version               = version;\n        }\n\n        public void Write(IEnumerable<ProteinConservationItem> items)\n        {\n            if (items == null) return;\n            _writer.WriteOpt(ProteinConservationCommon.SchemaVersion);\n            _writer.Write((byte) _assembly);\n            _version.Write(_writer);\n            \n            var alignedProteinsAndScores = GetProteinWithUniqueScores(items);\n            var nirvanaProteins = new HashSet<string>(_transcriptCacheData.PeptideSeqs);\n            CheckProteinSetOverlap(alignedProteinsAndScores, nirvanaProteins);\n            \n            var transcriptScores = new Dictionary<string, byte[]>();\n            //protein sequence -> transcript ids mapping\n            var transcriptGroupsByProtein = new Dictionary<string, List<string>>(alignedProteinsAndScores.Count);\n            foreach (var protein in alignedProteinsAndScores.Keys)\n            {\n                transcriptGroupsByProtein.Add(protein, new List<string>());\n            }\n            foreach (var transcriptIntervalArray in _transcriptCacheData.TranscriptIntervalArrays)\n            {\n                if (transcriptIntervalArray == null) continue;//may happen since for GRCh38 decoy contigs, there may be none\n                foreach (var transcriptInterval in transcriptIntervalArray.Array)\n                {\n                    var transcript = transcriptInterval.Value;\n                    if(transcript.Translation == null) continue;\n                    var peptideSeq = transcript.Translation.PeptideSeq;\n                    if(!alignedProteinsAndScores.TryGetValue(transcript.Translation.PeptideSeq, out var scores)) continue;\n\n                    transcriptScores.TryAdd(transcript.Id.WithVersion, scores);\n                    transcriptGroupsByProtein[peptideSeq].Add(transcript.Id.WithVersion);\n                }\n            }\n            \n            foreach (var (transcriptId, scores) in transcriptScores)\n            {\n                var transcriptScore = new TranscriptConservationScores(transcriptId, scores);\n                transcriptScore.Write(_writer);\n            }\n\n            WriteTranscriptGroups(transcriptGroupsByProtein);\n\n            Console.WriteLine($\"Recorded conservation scores for {transcriptScores.Count} transcripts.\");\n            //writing an empty item to indicate end of records\n            var endOfRecordItem = TranscriptConservationScores.GetEmptyItem();\n            endOfRecordItem.Write(_writer);\n        }\n\n        private void WriteTranscriptGroups(Dictionary<string, List<string>> transcriptGroupsByProtein)\n        {\n            using (var writer = new StreamWriter(_transcriptGroupStream))\n            {\n                var ensemblIds = new List<string>();\n                var refseqIds = new List<string>();\n                writer.WriteLine(\"#EnsemblIds\\tRefSeqIds\\tPeptide sequence\");\n                foreach (var (protein,ids) in transcriptGroupsByProtein)\n                {\n                    if(ids.Count == 0) continue;\n                    ensemblIds.Clear();\n                    refseqIds.Clear();\n                    foreach (var id in ids)\n                    {\n                        if(id.StartsWith(\"ENST\")) ensemblIds.Add(id);\n                        else refseqIds.Add(id);\n                    }\n                    writer.WriteLine($\"{string.Join(',',ensemblIds)}\\t{string.Join(',',refseqIds)}\\t{protein}\");\n                }\n            }\n        }\n\n        private static void CheckProteinSetOverlap(Dictionary<string, byte[]> proteinAndScores, HashSet<string> nirvanaProteins)\n        {\n            var count = 0;\n            foreach (var protein in proteinAndScores.Keys)\n            {\n                if (nirvanaProteins.Contains(protein)) count++;\n            }\n\n            Console.WriteLine($\"{count} aligned proteins were also in Nirvana cache\");\n        }\n\n        private static Dictionary<string, byte[]> GetProteinWithUniqueScores(IEnumerable<ProteinConservationItem> items)\n        {\n            var proteinAndScores = new Dictionary<string, byte[]>();\n            var multiAlignProteins = new HashSet<string>();\n            var proteinCount            = 0;\n            foreach (var item in items)\n            {\n                if (proteinAndScores.TryAdd(item.ProteinSequence, item.Scores)) proteinCount++;\n                else\n                {\n                    if (item.Chromosome == \"chrX\" || item.Chromosome == \"X\")\n                    {\n                        proteinAndScores[item.ProteinSequence] = item.Scores;\n                    }\n\n                    if (!item.Scores.SequenceEqual(proteinAndScores[item.ProteinSequence]))\n                        multiAlignProteins.Add(item.ProteinSequence);\n                }\n            }\n\n            foreach (var protein in multiAlignProteins)\n            {\n                proteinAndScores.Remove(protein);\n            }\n\n            Console.WriteLine($\"Found {proteinCount} proteins with unique scores.\");\n            return proteinAndScores;\n        }\n\n        public void Dispose()=>_writer?.Dispose();\n        \n    }\n}"
  },
  {
    "path": "SAUtils/AssemblyInfo.cs",
    "content": "﻿using System.Runtime.CompilerServices;\n\n[assembly: InternalsVisibleTo(\"UnitTests\")]"
  },
  {
    "path": "SAUtils/ClinGen/DosageMapRegionItem.cs",
    "content": "using Genome;\nusing OptimizedCore;\nusing VariantAnnotation.Interface.SA;\nusing VariantAnnotation.IO;\n\nnamespace SAUtils.ClinGen\n{\n    public sealed class DosageMapRegionItem : ISuppIntervalItem\n    {\n        public          Chromosome Chromosome { get; }\n        public          int        Start      { get; }\n        public          int        End        { get; }\n        public readonly int        HiScore;\n        public readonly int        TsScore;\n        \n        public DosageMapRegionItem(Chromosome chromosome, int start, int end, int hiScore, int tsScore)\n        {\n            Chromosome = chromosome;\n            Start      = start;\n            End        = end;\n            HiScore    = hiScore;\n            TsScore    = tsScore;\n        }\n        \n        public string GetJsonString()\n        {\n            var sb= StringBuilderPool.Get();\n            var jsonObject = new JsonObject(sb);\n\n            jsonObject.AddStringValue(\"chromosome\", Chromosome.EnsemblName);\n            jsonObject.AddIntValue(\"begin\", Start);\n            jsonObject.AddIntValue(\"end\", End);\n            jsonObject.AddStringValue(\"haploinsufficiency\", Data.ScoreToDescription[HiScore]);\n            jsonObject.AddStringValue(\"triplosensitivity\",  Data.ScoreToDescription[TsScore]);\n\n            return StringBuilderPool.GetStringAndReturn(sb);\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/ClinGen/DosageMapRegionParser.cs",
    "content": "using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing System.Linq;\nusing Genome;\nusing OptimizedCore;\nusing VariantAnnotation.IO;\nusing Newtonsoft.Json.Linq;\nusing SAUtils.DataStructures;\n\nnamespace SAUtils.ClinGen\n{\n    public sealed class DosageMapRegionParser : IDisposable\n    {\n        private readonly Stream _stream;\n        private readonly Dictionary<string, Chromosome> _refNameToChromosome;\n\n        private const string GenomicLocation = \"Genomic Location\";\n        private const string HaploInsufficiencyScoreTag = \"Haploinsufficiency Score\";\n        private const string TriploSensitivityScoreTag  = \"Triplosensitivity Score\";\n\n        private        int _genomicLocationIndex         = -1;\n        private        int _haploInsufficiencyScoreIndex = -1;\n        private        int _triploSensitivityScoreIndex  = -1;\n        private static int _unknownRegion                = 0;\n        \n        public DosageMapRegionParser(Stream stream, Dictionary<string, Chromosome> refNameToChromosome)\n        {\n            _stream = stream;\n            _refNameToChromosome = refNameToChromosome;\n        }\n        \n        public void Dispose()\n        {\n            _stream?.Dispose();\n        }\n        \n        public IEnumerable<DosageMapRegionItem> GetItems()\n        {\n            var dosageMapRegionItems = new List<DosageMapRegionItem>();\n            using (var reader = new StreamReader(_stream))\n            {\n                string line;\n                while ((line = reader.ReadLine()) != null)\n                {\n                    if (line.StartsWith(\"#\"))\n                    {\n                        ParseHeaderLine(line);\n                    }\n                    else\n                    {\n                        var item = GetDosageMapRegionItem(line, _refNameToChromosome);\n                        if (item != null) dosageMapRegionItems.Add(item);\n                    }\n                }\n            }\n            ReportStatistics(dosageMapRegionItems);\n            return dosageMapRegionItems;\n        }\n\n        private DosageMapRegionItem GetDosageMapRegionItem(string line, Dictionary<string, Chromosome> refNameToChromosome)\n        {\n            var fields = line.OptimizedSplit('\\t');\n            string genomicLocation = fields[_genomicLocationIndex];\n            (string chromName, int start, int end) = ParseGenomeLocation(genomicLocation);\n            if (chromName == null) return null;\n            if (!refNameToChromosome.TryGetValue(chromName, out var chrom)) return null;\n            \n            string haploInsufficiencyScore = fields[_haploInsufficiencyScoreIndex];\n            string triploSensitivityScore = fields[_triploSensitivityScoreIndex];\n            \n            if (!int.TryParse(haploInsufficiencyScore, out int hiScore)) hiScore = -1;\n            if (!int.TryParse(triploSensitivityScore, out int tsScore)) tsScore  = -1;\n            \n            return new DosageMapRegionItem(chrom, start, end, hiScore, tsScore);\n        }\n\n\n        private void ParseHeaderLine(string line)\n        {\n            if (line.StartsWith(\"#ISCA ID\")) GetColumnIndices(line);\n        }\n        \n        private void GetColumnIndices(string line)\n        {\n            var cols = line.OptimizedSplit('\\t');\n\n            _genomicLocationIndex = Array.IndexOf(cols, GenomicLocation);\n            _haploInsufficiencyScoreIndex = Array.IndexOf(cols, HaploInsufficiencyScoreTag);\n            _triploSensitivityScoreIndex  = Array.IndexOf(cols, TriploSensitivityScoreTag);\n            \n            if (_genomicLocationIndex == -1 || _haploInsufficiencyScoreIndex == -1 || _triploSensitivityScoreIndex == -1)\n                throw new InvalidDataException(\"Column indices not set!!\");\n        }\n\n        private static (string chromName, int Start, int End) ParseGenomeLocation(string genomeLocation)\n        {\n            int index1 = genomeLocation.IndexOf(':');\n            int index2 = genomeLocation.IndexOf('-');\n            if (index1 < 0 || index2 < 0)\n            {\n                Console.WriteLine($\"Not able to parse {genomeLocation}\");\n                _unknownRegion ++;\n                return (null, -1, -1);\n            }\n            string chromName = genomeLocation.Substring(0, index1);\n            int start = int.Parse(genomeLocation.Substring(index1 + 1, index2 - index1 - 1));\n            int end = int.Parse(genomeLocation.Substring(index2 + 1));\n            return (chromName, start, end);\n        }\n        \n        private void ReportStatistics(IEnumerable<DosageMapRegionItem> items)\n        {\n            var       description = new List<string>(Data.ScoreToDescription.Values);\n            KeyCounts hiScore     = new KeyCounts(description);\n            KeyCounts tsScore     = new KeyCounts(description);\n            foreach (DosageMapRegionItem item in items)\n            {\n                hiScore.Increment(Data.ScoreToDescription[item.HiScore]);\n                tsScore.Increment(Data.ScoreToDescription[item.TsScore]);\n            }\n                \n            var       sb      = StringBuilderPool.Get();\n            var       jo      = new JsonObject(sb);\n            sb.Append(JsonObject.OpenBrace);\n\n            jo.AddIntValue(\"genomeLocationCount\",           items.Count());\n            jo.AddIntValue(\"unparsableGenomeLocationCount\", _unknownRegion);\n            jo.AddObjectValue(\"haploinsufficiency\", hiScore);\n            jo.AddObjectValue(\"triplosensitivity\",  tsScore);\n            sb.Append(JsonObject.CloseBrace);\n\n            Console.WriteLine(JObject.Parse(StringBuilderPool.GetStringAndReturn(sb))); \n        }\n    }\n}"
  },
  {
    "path": "SAUtils/ClinGen/DosageMapRegions.cs",
    "content": "using System.IO;\nusing CommandLine.Builders;\nusing CommandLine.NDesk.Options;\nusing Compression.Utilities;\nusing ErrorHandling;\nusing ErrorHandling.Exceptions;\nusing IO;\nusing SAUtils.InputFileParsers;\nusing VariantAnnotation.Interface.SA;\nusing VariantAnnotation.NSA;\nusing VariantAnnotation.Providers;\nusing VariantAnnotation.SA;\n\nnamespace SAUtils.ClinGen\n{\n    public static class DosageMapRegions\n    {\n        private static string _outputDirectory;\n        private static string _dosageMapRegionFile;\n        private static string _inputReferencePath;\n\n        public static ExitCodes Run(string command, string[] commandArgs)\n        {\n            var ops = new OptionSet\n            {\n                {\n                    \"tsv|t=\",\n                    \"input tsv file\",\n                    v => _dosageMapRegionFile = v\n                },\n                {\n                    \"ref|r=\",\n                    \"input reference {filename}\",\n                    v => _inputReferencePath = v\n                },\n                {\n                    \"out|o=\",\n                    \"output directory\",\n                    v => _outputDirectory = v\n                }\n            };\n\n            string commandLineExample = $\"{command} [options]\";\n\n            var exitCode = new ConsoleAppBuilder(commandArgs, ops)\n                .Parse()\n                .CheckInputFilenameExists(_dosageMapRegionFile, \"dosage map region TSV file\", \"--tsv\")\n                .CheckInputFilenameExists(_inputReferencePath, \"reference sequence file\", \"--tsv\")\n                .CheckDirectoryExists(_outputDirectory, \"output directory\", \"--out\")\n                .SkipBanner()\n                .ShowHelpMenu(\"Creates an interval annotation database from dbVar data\", commandLineExample)\n                .ShowErrors()\n                .Execute(ProgramExecution);\n            return exitCode;\n        }\n\n        private static ExitCodes ProgramExecution()\n        {\n            var versionFileNames = Directory.GetFiles(\".\", \"*.version\");\n            if (versionFileNames.Length != 1)\n            {\n                throw new UserErrorException($\"Multiple version files found in directory: {Directory.GetCurrentDirectory()}\");\n            }\n            \n\n            var    sourceVersion = DataSourceVersionReader.GetSourceVersion(versionFileNames[0]);\n            string outFileName            =  $\"{sourceVersion.Name.Replace(' ', '_')}_{sourceVersion.Version}\";\n            var    referenceProvider      = new ReferenceSequenceProvider(GZipUtilities.GetAppropriateReadStream(_inputReferencePath));\n            using (var dosageSensitivityParser = new DosageMapRegionParser(GZipUtilities.GetAppropriateReadStream(_dosageMapRegionFile), referenceProvider.RefNameToChromosome))\n            using (var stream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.IntervalFileSuffix)))\n            using (var nsiWriter = new NsiWriter(stream, sourceVersion, referenceProvider.Assembly, SaCommon.DosageSensitivityTag, ReportFor.StructuralVariants, SaCommon.SchemaVersion))\n            {\n                nsiWriter.Write(dosageSensitivityParser.GetItems());\n            }\n\n            return ExitCodes.Success;\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/ClinGen/DosageSensitivity.cs",
    "content": "﻿using System.IO;\nusing CommandLine.Builders;\nusing CommandLine.NDesk.Options;\nusing Compression.Utilities;\nusing ErrorHandling;\nusing ErrorHandling.Exceptions;\nusing IO;\nusing SAUtils.InputFileParsers;\nusing VariantAnnotation.SA;\n\nnamespace SAUtils.ClinGen\n{\n    public static class DosageSensitivity\n    {\n        private static string _outputDirectory;\n        private static string _dosageSensitivityFile;\n\n        public static ExitCodes Run(string command, string[] commandArgs)\n        {\n            var ops = new OptionSet\n            {\n                {\n                    \"tsv|t=\",\n                    \"input tsv file\",\n                    v => _dosageSensitivityFile = v\n                },\n                {\n                    \"out|o=\",\n                    \"output directory\",\n                    v => _outputDirectory = v\n                }\n            };\n\n            string commandLineExample = $\"{command} [options]\";\n\n            var exitCode = new ConsoleAppBuilder(commandArgs, ops)\n                .Parse()\n                .HasRequiredParameter(_outputDirectory, \"output directory\", \"--out\")\n                .CheckDirectoryExists(_outputDirectory, \"output directory\", \"--out\")\n                .CheckInputFilenameExists(_dosageSensitivityFile, \"dosage sensitivity TSV file\", \"--tsv\")\n                .SkipBanner()\n                .ShowHelpMenu(\"Creates a gene annotation database from dbVar data\", commandLineExample)\n                .ShowErrors()\n                .Execute(ProgramExecution);\n\n            return exitCode;\n        }\n\n        private static ExitCodes ProgramExecution()\n        {\n            var versionFileNames = Directory.GetFiles(\".\", \"*.version\");\n            if (versionFileNames.Length != 1)\n            {\n                throw new UserErrorException($\"Multiple version files found in directory: {Directory.GetCurrentDirectory()}\");\n            }\n\n            var sourceVersion = DataSourceVersionReader.GetSourceVersion(versionFileNames[0]);\n\n            string outFileName = $\"{sourceVersion.Name.Replace(' ','_')}_{sourceVersion.Version}\";\n\n            using (var dosageSensitivityParser= new DosageSensitivityParser(GZipUtilities.GetAppropriateReadStream(_dosageSensitivityFile)))\n            using (var stream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.GeneFileSuffix)))\n            using (var ngaWriter = new NgaWriter(stream, sourceVersion, SaCommon.DosageSensitivityTag, SaCommon.SchemaVersion, false))\n            {\n                ngaWriter.Write(dosageSensitivityParser.GetItems());\n            }\n\n            return ExitCodes.Success;\n        }\n\n    }\n}"
  },
  {
    "path": "SAUtils/ClinGen/DosageSensitivityItem.cs",
    "content": "﻿using System.IO;\nusing OptimizedCore;\nusing VariantAnnotation.Interface.SA;\nusing VariantAnnotation.IO;\n\nnamespace SAUtils.ClinGen\n{\n    public sealed class DosageSensitivityItem : ISuppGeneItem\n    {\n        public          string GeneSymbol { get; }\n        public readonly int    HiScore;\n        public readonly int    TsScore;\n\n        public DosageSensitivityItem(string geneSymbol, int hiScore, int tsScore)\n        {\n            GeneSymbol = geneSymbol;\n            HiScore    = hiScore;\n            TsScore    = tsScore;\n\n            if (!Data.ScoreToDescription.ContainsKey(HiScore) || !Data.ScoreToDescription.ContainsKey(TsScore))\n            {\n                throw new InvalidDataException($\"Unexpected score ({HiScore}, {TsScore}) observed for gene: {geneSymbol}\");\n            }\n        }\n\n        public string GetJsonString()\n        {\n            var sb = StringBuilderPool.Get();\n            var jsonObject = new JsonObject(sb);\n\n            sb.Append(JsonObject.OpenBrace);\n            jsonObject.AddStringValue(\"haploinsufficiency\", Data.ScoreToDescription[HiScore]);\n            jsonObject.AddStringValue(\"triplosensitivity\",  Data.ScoreToDescription[TsScore]);\n            sb.Append(JsonObject.CloseBrace);\n\n            return StringBuilderPool.GetStringAndReturn(sb);\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/ClinGen/DosageSensitivityParser.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing OptimizedCore;\nusing VariantAnnotation.Interface.SA;\nusing VariantAnnotation.IO;\nusing Newtonsoft.Json.Linq;\nusing SAUtils.DataStructures;\n\nnamespace SAUtils.ClinGen\n{\n    public sealed class DosageSensitivityParser:IDisposable\n    {\n        private readonly Stream _stream;\n\n        private const string GeneSymbolTag = \"#Gene Symbol\";\n        private const string GeneIdTag = \"Gene ID\";\n        private const string HaploInsufficiencyScoreTag = \"Haploinsufficiency Score\";\n        private const string TriploSensitivityScoreTag = \"Triplosensitivity Score\";\n\n        private int _geneSymbolIndex = -1;\n        private int _geneIdIndex = -1;\n        private int _haploInsufficiencyScoreIndex = -1;\n        private int _triploSensitivityScoreIndex = -1;\n\n        public DosageSensitivityParser(Stream stream)\n        {\n            _stream = stream;\n        }\n        public void Dispose()\n        {\n            _stream?.Dispose();\n        }\n\n        public Dictionary<string, List<ISuppGeneItem>> GetItems()\n        {\n            var geneAnnotations = new Dictionary<string, List<ISuppGeneItem>>();\n            var duplicateGenes  = new HashSet<string>();\n            \n            using (var reader = new StreamReader(_stream))\n            {\n                string line;\n                while ((line = reader.ReadLine()) != null)\n                {\n                    if (line.StartsWith(\"#\"))\n                    {\n                        ParseHeaderLine(line);\n                    }\n                    else\n                    {\n                        if (MissingIndices()) throw new InvalidDataException(\"Column indices not set!!\");\n                        var geneAnnotation = GetGeneAndScores(line);\n                        bool isDuplicate = geneAnnotations.TryAdd(geneAnnotation.GeneSymbol, new List<ISuppGeneItem> { geneAnnotation });\n                        if (!isDuplicate)\n                        {\n                            duplicateGenes.Add(geneAnnotation.GeneSymbol);\n                            if (geneAnnotation.GetJsonString() != geneAnnotations[geneAnnotation.GeneSymbol][0].GetJsonString())\n                            {\n                                Console.WriteLine(geneAnnotation.GetJsonString());\n                                Console.WriteLine(geneAnnotations[geneAnnotation.GeneSymbol][0].GetJsonString());\n                                throw new DataMisalignedException($\"Duplicate gene entries have conflicting informatioin.\");\n                            }\n                        }\n                    }\n                }\n                Console.WriteLine($\"WARNING: Duplicate entries found for genes:{string.Join(',', duplicateGenes)}. But the contents were identical.\");\n            }\n            ReportStatistics(geneAnnotations);\n            return geneAnnotations;\n        }\n\n        private ISuppGeneItem GetGeneAndScores(string line)\n        {\n            var cols = line.OptimizedSplit('\\t');\n\n            var gene    = cols[_geneSymbolIndex];\n            if (!int.TryParse(cols[_haploInsufficiencyScoreIndex], out var hiScore)) hiScore = -1;\n            if (!int.TryParse(cols[_triploSensitivityScoreIndex], out var tsScore)) tsScore = -1;\n\n            return new DosageSensitivityItem(gene, hiScore, tsScore);\n        }\n\n        private bool MissingIndices()\n        {\n            return _geneSymbolIndex == -1 ||\n                   _geneIdIndex == -1 ||\n                   _haploInsufficiencyScoreIndex == -1 ||\n                   _triploSensitivityScoreIndex == -1;\n        }\n\n        private void ParseHeaderLine(string line)\n        {\n            if (line.StartsWith(\"#Gene Symbol\")) GetColumnIndices(line);\n        }\n        \n        private void GetColumnIndices(string line)\n        {\n            var cols = line.OptimizedSplit('\\t');\n\n            _geneSymbolIndex                    = Array.IndexOf(cols, GeneSymbolTag);\n            _geneIdIndex                        = Array.IndexOf(cols, GeneIdTag);\n            _haploInsufficiencyScoreIndex       = Array.IndexOf(cols, HaploInsufficiencyScoreTag);\n            _triploSensitivityScoreIndex        = Array.IndexOf(cols, TriploSensitivityScoreTag);\n        }\n        \n        private void ReportStatistics(Dictionary<string, List<ISuppGeneItem>> items)\n        {\n            var       genes       = new List<string>(items.Keys);\n            var       description = new List<string>(Data.ScoreToDescription.Values);\n            KeyCounts hiScore     = new KeyCounts(description);\n            KeyCounts tsScore     = new KeyCounts(description);\n            foreach (string gene in genes)\n            {\n                var item = (DosageSensitivityItem) items[gene][0];\n                hiScore.Increment(Data.ScoreToDescription[item.HiScore]);\n                tsScore.Increment(Data.ScoreToDescription[item.TsScore]);\n            }\n                \n            var sb = StringBuilderPool.Get();\n            var jo = new JsonObject(sb);\n            sb.Append(JsonObject.OpenBrace);\n\n            jo.AddIntValue(\"geneCount\", items.Count);\n            jo.AddObjectValue(\"haploinsufficiency\", hiScore);\n            jo.AddObjectValue(\"triplosensitivity\", tsScore);\n            sb.Append(JsonObject.CloseBrace);\n\n            Console.WriteLine(JObject.Parse(StringBuilderPool.GetStringAndReturn(sb))); \n        }\n    }\n}"
  },
  {
    "path": "SAUtils/ClinGen/GeneDiseaseValidity.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing CommandLine.Builders;\nusing CommandLine.NDesk.Options;\nusing Compression.Utilities;\nusing ErrorHandling;\nusing ErrorHandling.Exceptions;\nusing IO;\nusing OptimizedCore;\nusing SAUtils.InputFileParsers;\nusing VariantAnnotation.SA;\n\nnamespace SAUtils.ClinGen\n{\n    public static class GeneDiseaseValidity\n    {\n        private static string _outputDirectory;\n        private static string _ugaFile;\n        private static string _diseaseValidityFile;\n\n        public static ExitCodes Run(string command, string[] commandArgs)\n        {\n            var ops = new OptionSet\n            {\n                {\n                    \"tsv|t=\",\n                    \"ClinGen gene validity file path\",\n                    v => _diseaseValidityFile = v\n                },\n                {\n                    \"uga|u=\",\n                    \"UGA file path\",\n                    v => _ugaFile = v\n                },\n                {\n                    \"out|o=\",\n                    \"output directory\",\n                    v => _outputDirectory = v\n                }\n            };\n\n            string commandLineExample = $\"{command} [options]\";\n\n            var exitCode = new ConsoleAppBuilder(commandArgs, ops)\n                .Parse()\n                .HasRequiredParameter(_outputDirectory, \"output directory\", \"--out\")\n                .CheckDirectoryExists(_outputDirectory, \"output directory\", \"--out\")\n                .CheckInputFilenameExists(_diseaseValidityFile, \"disease validity TSV file\", \"--tsv\")\n                .CheckInputFilenameExists(_ugaFile, \"UGA file path\", \"--uga\")\n                .SkipBanner()\n                .ShowHelpMenu(\"Creates a gene annotation database from ClinGen gene validity data\", commandLineExample)\n                .ShowErrors()\n                .Execute(ProgramExecution);\n\n            return exitCode;\n        }\n\n        private static ExitCodes ProgramExecution()\n        {\n            var versionFileNames = Directory.GetFiles(\".\",\"*.version\");\n            if (versionFileNames.Length != 1)\n            {\n                throw new UserErrorException($\"Multiple version files found in directory: {Directory.GetCurrentDirectory()}\");\n            }\n\n            var sourceVersion = DataSourceVersionReader.GetSourceVersion(versionFileNames[0]);\n\n            string outFileName = $\"{sourceVersion.Name.Replace(' ', '_')}_{sourceVersion.Version}\";\n\n            // read uga file to get hgnc id to gene symbols dictionary\n            using (var diseaseValidityParser = new GeneDiseaseValidityParser(GZipUtilities.GetAppropriateReadStream(_diseaseValidityFile), GetHgncIdToGeneSymbols()))\n            using (var stream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.GeneFileSuffix)))\n            using (var ngaWriter = new NgaWriter(stream, sourceVersion, SaCommon.DiseaseValidityTag, SaCommon.SchemaVersion, true))\n            {\n                ngaWriter.Write(diseaseValidityParser.GetItems());\n            }\n\n            return ExitCodes.Success;\n        }\n\n        private static Dictionary<int, string> GetHgncIdToGeneSymbols()\n        {\n            var idToSymbols = new Dictionary<int, string>();\n            \n            using (var ugaStream = GZipUtilities.GetAppropriateReadStream(_ugaFile))\n            using(var reader = new StreamReader(ugaStream))\n            {\n                string line= reader.ReadLine();//first line has the count of entries\n                while ((line = reader.ReadLine()) != null)\n                {\n                    var splits = line.OptimizedSplit('\\t');\n                    var symbol = splits[2];\n                    var hgncId = int.Parse(splits[8]);\n                    if(hgncId == -1) continue;\n                    \n                    if (idToSymbols.TryAdd(hgncId, symbol)) continue;\n                    if(symbol != idToSymbols[hgncId]) Console.WriteLine($\"Different symbol for the same id({hgncId}). Existing: {idToSymbols[hgncId]}. New: {symbol}\");\n\n                }\n            }\n\n            return idToSymbols;\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/ClinGen/GeneDiseaseValidityItem.cs",
    "content": "﻿using System;\nusing System.Globalization;\nusing OptimizedCore;\nusing VariantAnnotation.Interface.SA;\nusing VariantAnnotation.IO;\n\nnamespace SAUtils.ClinGen\n{\n    public sealed class GeneDiseaseValidityItem: ISuppGeneItem\n    {\n        public string GeneSymbol { get; }\n\n        public readonly string DiseaseId;\n        private readonly string _disease;\n        private readonly string _classification;\n        private readonly string _classificationDate;\n\n\n        public GeneDiseaseValidityItem(string geneSymbol, string diseaseId, string disease, string classification,\n            string classificationDate)\n        {\n            GeneSymbol          = geneSymbol;\n            DiseaseId           = diseaseId;\n            _disease            = disease;\n            _classification     = classification;\n            _classificationDate = classificationDate;\n        }\n\n        public string GetJsonString()\n        {\n            var sb = StringBuilderPool.Get();\n            var jsonObject = new JsonObject(sb);\n\n            sb.Append(JsonObject.OpenBrace);\n            jsonObject.AddStringValue(\"diseaseId\", DiseaseId);\n            jsonObject.AddStringValue(\"disease\", _disease);\n            jsonObject.AddStringValue(\"classification\", _classification);\n            jsonObject.AddStringValue(\"classificationDate\", _classificationDate);\n            sb.Append(JsonObject.CloseBrace);\n\n            return StringBuilderPool.GetStringAndReturn(sb);\n        }\n\n        public int CompareDate(GeneDiseaseValidityItem other)\n        {\n            var date = DateTime.ParseExact(_classificationDate, \"yyyy-MM-dd\", CultureInfo.InvariantCulture);\n            var otherDate = DateTime.ParseExact(other._classificationDate, \"yyyy-MM-dd\", CultureInfo.InvariantCulture);\n\n            return date.CompareTo(otherDate);\n        }\n    }\n\n}"
  },
  {
    "path": "SAUtils/ClinGen/GeneDiseaseValidityParser.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing OptimizedCore;\nusing VariantAnnotation.Interface.SA;\nusing VariantAnnotation.IO;\nusing Newtonsoft.Json.Linq;\n\nnamespace SAUtils.ClinGen\n{\n    public sealed class GeneDiseaseValidityParser: IDisposable\n    {\n        private readonly Stream _stream;\n        private readonly Dictionary<int, string> _hgncIdToSymbols;\n\n        private readonly HashSet<int> _unknownIds = new HashSet<int>();\n        private readonly HashSet<string> _classificationSet = new HashSet<string>\n        {\n            \"no reported evidence\",\n            \"disputed\",\n            \"limited\",\n            \"moderate\",\n            \"definitive\",\n            \"strong\",\n            \"refuted\",\n            \"no known disease relationship\"\n        };\n\n        public GeneDiseaseValidityParser(Stream stream, Dictionary<int, string> hgncIdToSymbols)\n        {\n            _stream = stream;\n            _hgncIdToSymbols = hgncIdToSymbols;\n        }\n        public void Dispose()\n        {\n            _stream?.Dispose();\n        }\n\n        public Dictionary<string, List<ISuppGeneItem>> GetItems()\n        {\n            var geneAnnotations = new Dictionary<string, Dictionary<string,GeneDiseaseValidityItem>>();\n\n            using (var reader = new StreamReader(_stream))\n            {\n                string line;\n                bool isComments = true;\n                bool isHeaderLine = false;\n                while ((line = reader.ReadLine()) != null)\n                {\n                    if (isComments)\n                    {\n                        //the header starts with a bunch of '+' signs\n                        if (!line.StartsWith(\"++++\")) continue;\n                        isComments = false;\n                        isHeaderLine = true;\n                        continue;\n                    }\n\n                    if (isHeaderLine)\n                    {\n                        ParseHeaderLine(line);\n                        isHeaderLine = false;\n                        line = reader.ReadLine();//reading end of header line\n                        if (line.StartsWith(\"++++\")) continue;\n                    }\n\n                    if (MissingIndices()) throw new InvalidDataException(\"Column indices not set!!\");\n                    var geneAnnotation = GetAnnotationItem(line);\n                    if(geneAnnotation == null) continue;\n\n                    if (geneAnnotations.TryGetValue(geneAnnotation.GeneSymbol, out var annotations))\n                        AddLatest(annotations, geneAnnotation);\n                    else geneAnnotations.Add(geneAnnotation.GeneSymbol, new Dictionary<string, GeneDiseaseValidityItem> {{geneAnnotation.DiseaseId, geneAnnotation}});\n\n                }\n            }\n\n            Console.WriteLine($\"Number of geneIds missing from the cache:{_unknownIds.Count} ({100.0*_unknownIds.Count/_hgncIdToSymbols.Count}%)\");\n            \n            var items = GetLatestAnnotations(geneAnnotations);\n            ReportStatistics(items);\n            return items;\n        }\n\n        private static Dictionary<string, List<ISuppGeneItem>> GetLatestAnnotations(Dictionary<string, Dictionary<string, GeneDiseaseValidityItem>> annotationByDiseaseIds)\n        {\n            var latestAnnotations = new Dictionary<string, List<ISuppGeneItem>>();\n            foreach (var annotation in annotationByDiseaseIds)\n            {\n                var geneAnnotation = new List<ISuppGeneItem>();\n                foreach (var geneAnno in annotation.Value.Values)\n                {\n                    geneAnnotation.Add(geneAnno);\n                }\n\n                latestAnnotations.Add(annotation.Key, geneAnnotation);\n            }\n\n            return latestAnnotations;\n        }\n\n        private static void AddLatest(Dictionary<string, GeneDiseaseValidityItem> annotations, GeneDiseaseValidityItem geneAnnotation)\n        {\n            if(!annotations.TryGetValue(geneAnnotation.DiseaseId, out var diseaseItem)) annotations.Add(geneAnnotation.DiseaseId, geneAnnotation);\n            else\n            {\n                if (diseaseItem.CompareDate(geneAnnotation) < 0) annotations[geneAnnotation.DiseaseId] = geneAnnotation;\n            }\n        }\n\n\n        private GeneDiseaseValidityItem GetAnnotationItem(string line)\n        {\n            var cols = line.OptimizedSplit('\\t');\n\n            var geneId = int.Parse(cols[_geneIdIndex].OptimizedSplit(':')[1]);\n            if (!_hgncIdToSymbols.TryGetValue(geneId, out var geneSymbol))\n            {\n                _unknownIds.Add(geneId);\n                return null;\n            }\n\n            var disease = cols[_diseaseIndex].Trim('\\\"');\n            var diseaseId = cols[_diseaseIdIndex];\n            var classification = cols[_classificationIndex].ToLower();\n            if (!_classificationSet.Contains(classification))\n            {\n                throw new InvalidDataException($\"Unknown classification found: {classification}\");\n            }\n        \n            var classificationDate = cols[_classificationDateIndex].OptimizedSplit('T')[0];//2018-06-07T14:37:47.175Z\n\n            return new GeneDiseaseValidityItem(geneSymbol, diseaseId, disease, classification, classificationDate);\n        }\n\n        private void ReportStatistics(Dictionary<string, List<ISuppGeneItem>> items)\n        {\n            var sb = StringBuilderPool.Get();\n            var jo = new JsonObject(sb);\n            sb.Append(JsonObject.OpenBrace);\n\n            jo.AddIntValue(\"geneIdsCount\", items.Count);\n            jo.AddIntValue(\"unknownGeneIdsCount\", _unknownIds.Count);\n            sb.Append(JsonObject.CloseBrace);\n\n            Console.WriteLine(JObject.Parse(StringBuilderPool.GetStringAndReturn(sb))); \n        }\n\n        \n        private int _geneIdIndex = -1;\n        private int _diseaseIdIndex = -1;\n        private int _diseaseIndex = -1;\n        private int _classificationIndex = -1;\n        private int _classificationDateIndex = -1;\n\n        private const string GeneIdTag = \"GENE ID (HGNC)\";\n        private const string DiseaseTag = \"DISEASE LABEL\";\n        private const string DiseaseIdTag = \"DISEASE ID (MONDO)\";\n        private const string ClassificationTag = \"CLASSIFICATION\";\n        private const string ClassificationDateTag = \"CLASSIFICATION DATE\";\n\n        private bool MissingIndices()\n        {\n            return _geneIdIndex            == -1 ||\n                   _diseaseIdIndex         == -1 ||\n                   _diseaseIndex           == -1 ||\n                   _classificationIndex    == -1 ||\n                   _classificationDateIndex== -1;\n        }\n\n        private void ParseHeaderLine(string line)\n        {\n            var cols = line.OptimizedSplit('\\t');\n\n            _geneIdIndex             = Array.IndexOf(cols, GeneIdTag);\n            _diseaseIndex            = Array.IndexOf(cols, DiseaseTag);\n            _diseaseIdIndex          = Array.IndexOf(cols, DiseaseIdTag);\n            _classificationIndex     = Array.IndexOf(cols, ClassificationTag);\n            _classificationDateIndex = Array.IndexOf(cols, ClassificationDateTag);\n        }\n\n    }\n}"
  },
  {
    "path": "SAUtils/ClinGen/ScoreToDescription.cs",
    "content": "using System.Collections.Generic;\n\nnamespace SAUtils.ClinGen\n{\n    public static class Data\n    {\n        public static Dictionary<int, string> ScoreToDescription { get; } = new Dictionary<int, string>\n        {\n            {-1, \"Not yet evaluated\"},\n            {0, \"no evidence to suggest that dosage sensitivity is associated with clinical phenotype\"},\n            {1, \"little evidence suggesting dosage sensitivity is associated with clinical phenotype\"},\n            {2, \"emerging evidence suggesting dosage sensitivity is associated with clinical phenotype\"},\n            {3, \"sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype\"},\n            {30, \"gene associated with autosomal recessive phenotype\"},\n            {40, \"dosage sensitivity unlikely\"}\n        };\n    }\n}"
  },
  {
    "path": "SAUtils/CosmicGeneFusions/Cache/ReferenceLoader.cs",
    "content": "﻿using System.Collections.Generic;\nusing Genome;\nusing IO;\nusing VariantAnnotation.Providers;\n\nnamespace SAUtils.CosmicGeneFusions.Cache\n{\n    public static class ReferenceLoader\n    {\n        public static Dictionary<ushort, Chromosome> GetRefIndexToChromosome(string referencePath)\n        {\n            var sequenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(referencePath));\n            return sequenceProvider.RefIndexToChromosome;\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/CosmicGeneFusions/Cache/TranscriptCache.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.IO;\nusing Genome;\nusing Intervals;\nusing VariantAnnotation.Caches;\nusing VariantAnnotation.Interface.AnnotatedPositions;\nusing VariantAnnotation.IO.Caches;\nusing VariantAnnotation.Utilities;\n\nnamespace SAUtils.CosmicGeneFusions.Cache\n{\n    public sealed class TranscriptCache\n    {\n        private readonly Dictionary<string, ITranscript> _idToTranscript;\n\n        public TranscriptCache(Dictionary<string, ITranscript> idToTranscript) => _idToTranscript = idToTranscript;\n\n        public static TranscriptCache Create(Stream stream, Dictionary<ushort, Chromosome> refIndexToChromosome)\n        {\n            using var           reader    = new TranscriptCacheReader(stream);\n            TranscriptCacheData cacheData = reader.Read(refIndexToChromosome);\n            return new TranscriptCache(GetTranscriptIdToTranscript(cacheData.TranscriptIntervalArrays));\n        }\n\n        // ReSharper disable once ParameterTypeCanBeEnumerable.Local\n        internal static Dictionary<string, ITranscript> GetTranscriptIdToTranscript(IntervalArray<ITranscript>[] transcriptIntervalArrays)\n        {\n            var transcriptIdToTranscript = new Dictionary<string, ITranscript>();\n\n            foreach (IntervalArray<ITranscript> refTranscriptIntervals in transcriptIntervalArrays)\n            {\n                if (refTranscriptIntervals == null) continue;\n                \n                foreach (Interval<ITranscript> transcriptInterval in refTranscriptIntervals.Array)\n                {\n                    ITranscript transcript = transcriptInterval.Value;\n                    if (transcript.Source != Source.Ensembl) continue;\n\n                    if (!transcriptIdToTranscript.ContainsKey(transcript.Id.WithVersion))\n                        transcriptIdToTranscript[transcript.Id.WithVersion] = transcript;\n\n                    if (!transcriptIdToTranscript.ContainsKey(transcript.Id.WithoutVersion))\n                        transcriptIdToTranscript[transcript.Id.WithoutVersion] = transcript;\n                }\n            }\n\n            return transcriptIdToTranscript;\n        }\n\n        public (string GeneId, string GeneSymbol) GetGene(string transcriptId)\n        {\n            string shortTranscriptId = FormatUtilities.SplitVersion(transcriptId).Id;\n\n            return _idToTranscript.TryGetValue(shortTranscriptId, out ITranscript transcript)\n                ? (transcript.Gene.EnsemblId.WithoutVersion, transcript.Gene.Symbol)\n                : HandleMissingTranscripts(transcriptId);\n        }\n\n        // In GRCh38, we're missing some of the transcripts specified by COSMIC. However, it's fine to substitute\n        // these transcripts with others belonging to the same gene. These are generally from transcripts that are\n        // no longer used.\n        internal static (string GeneId, string GeneSymbol) HandleMissingTranscripts(string transcriptId) =>\n            transcriptId switch\n            {\n                \"ENST00000646891.1\" => (\"ENSG00000157764\", \"BRAF\"),\n                \"ENST00000242365.4\" => (\"ENSG00000122778\", \"KIAA1549\"),\n                \"ENST00000311979.3\" => (\"ENSG00000172660\", \"TAF15\"),\n                \"ENST00000529193.1\" => (\"ENSG00000157613\", \"CREB3L1\"),\n                \"ENST00000312675.4\" => (\"ENSG00000145012\", \"LPP\"),\n                \"ENST00000556625.1\" => (\"ENSG00000258389\", \"DUX4\"),\n                _                   => throw new InvalidDataException($\"Found an unhandled transcript ID in HandleMissingTranscripts: {transcriptId}\")\n            };\n    }\n}"
  },
  {
    "path": "SAUtils/CosmicGeneFusions/Conversion/CosmicConverter.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.IO;\nusing System.Linq;\nusing SAUtils.CosmicGeneFusions.Cache;\n\nnamespace SAUtils.CosmicGeneFusions.Conversion\n{\n    public static class CosmicConverter\n    {\n        public static Dictionary<ulong, string[]> Convert(Dictionary<int, HashSet<RawCosmicGeneFusion>> fusionIdToEntries,\n            TranscriptCache transcriptCache)\n        {\n            var fusionKeyToJsonList = new Dictionary<ulong, List<string>>();\n\n            foreach ((int fusionId, HashSet<RawCosmicGeneFusion> fusionEntries) in fusionIdToEntries)\n            {\n                (ulong fusionKey, string json) = GetCosmicGeneFusion(fusionId, fusionEntries, transcriptCache);\n                if (json == null) continue;\n\n                if (!fusionKeyToJsonList.TryGetValue(fusionKey, out List<string> jsonEntries))\n                {\n                    jsonEntries                    = new List<string>();\n                    fusionKeyToJsonList[fusionKey] = jsonEntries;\n                }\n\n                jsonEntries.Add(json);\n            }\n\n            return fusionKeyToJsonList.ToJsonArray();\n        }\n\n        internal static Dictionary<ulong, string[]> ToJsonArray(this Dictionary<ulong, List<string>> geneKeyToJsonList)\n        {\n            var geneKeyToJson = new Dictionary<ulong, string[]>();\n\n            foreach ((ulong geneKey, List<string> jsonList) in geneKeyToJsonList) geneKeyToJson[geneKey] = jsonList.ToArray();\n            return geneKeyToJson;\n        }\n\n        internal static (ulong FusionKey, string Json) GetCosmicGeneFusion(int fusionId, HashSet<RawCosmicGeneFusion> fusionEntries,\n            TranscriptCache transcriptCache)\n        {\n            (int[] pubMedIds, int numSamples, string hgvsNotation) = AggregateRawCosmicGeneFusions(fusionEntries);\n            if (hgvsNotation == null) return (0, null);\n\n            var           id          = $\"COSF{fusionId}\";\n            CosmicCount[] histologies = Histology.GetCounts(fusionEntries, numSamples);\n            CosmicCount[] sites       = Site.GetCounts(fusionEntries, numSamples);\n\n            (string[] geneSymbols, ulong fusionKey) = HgvsRnaParser.GetTranscripts(hgvsNotation, transcriptCache);\n\n            var geneFusion = new CosmicGeneFusion(id, numSamples, geneSymbols, hgvsNotation, histologies, sites, pubMedIds);\n            var json       = geneFusion.ToString();\n\n            return (fusionKey, json);\n        }\n\n        internal static (int[] PubMedIds, int NumSamples, string HgvsNotation) AggregateRawCosmicGeneFusions(\n            // ReSharper disable once ParameterTypeCanBeEnumerable.Local\n            HashSet<RawCosmicGeneFusion> fusionEntries)\n        {\n            var sampleIds   = new HashSet<int>();\n            var pubMedIds   = new HashSet<int>();\n            var hgvsEntries = new HashSet<string>();\n\n            foreach (RawCosmicGeneFusion fusionEntry in fusionEntries)\n            {\n                pubMedIds.Add(fusionEntry.PubMedId);\n                sampleIds.Add(fusionEntry.SampleId);\n                hgvsEntries.Add(fusionEntry.HgvsNotation);\n            }\n\n            if (hgvsEntries.Count != 1)\n                throw new InvalidDataException($\"Expected one HGVS entry for the gene fusion, but found {hgvsEntries.Count}\");\n\n            string hgvsr = HgvsRnaFixer.Fix(hgvsEntries.First());\n            return (pubMedIds.OrderBy(x => x).ToArray(), sampleIds.Count, hgvsr);\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/CosmicGeneFusions/Conversion/CosmicGeneFusion.cs",
    "content": "﻿// ReSharper disable InconsistentNaming\n// ReSharper disable SuggestBaseTypeForParameter\n\nusing System.Text.Json;\n\nnamespace SAUtils.CosmicGeneFusions.Conversion\n{\n    public sealed record CosmicGeneFusion(string id, int numSamples, string[] geneSymbols, string hgvsr, CosmicCount[] histologies,\n        CosmicCount[] sites, int[] pubMedIds)\n    {\n        public override string ToString()\n        {\n            string json = JsonSerializer.Serialize(this);\n            return json.Substring(1, json.Length - 2);\n        }\n    }\n\n    public sealed record CosmicCount(string name, int numSamples);\n}"
  },
  {
    "path": "SAUtils/CosmicGeneFusions/Conversion/HgvsRnaFixer.cs",
    "content": "﻿using System;\nusing System.Text;\n\nnamespace SAUtils.CosmicGeneFusions.Conversion\n{\n    public static class HgvsRnaFixer\n    {\n        // COSMIC isn't using the correct HGVS notation, so we're just going to add the proper junction string (::) between each transcript\n        public static string Fix(string hgvsNotation)\n        {\n            var                sb        = new StringBuilder();\n            ReadOnlySpan<char> delimiter = \"_ENST\".AsSpan();\n            ReadOnlySpan<char> hgvsSpan  = hgvsNotation.AsSpan();\n\n            var numTranscripts = 0;\n\n            while (true)\n            {\n                int index = hgvsSpan.IndexOf(delimiter);\n                numTranscripts++;\n                \n                if (index == -1)\n                {\n                    sb.Append(hgvsSpan);\n                    break;\n                }\n\n                sb.Append(hgvsSpan.Slice(0, index));\n                sb.Append(\"::\");\n                hgvsSpan = hgvsSpan.Slice(index + 1);\n            }\n\n            // this is to capture HGVS entries like \"ENST00000283243.12(PLA2R1):r.1_2802\" which is not actually a gene fusion\n            return numTranscripts == 1 ? null : sb.ToString();\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/CosmicGeneFusions/Conversion/HgvsRnaParser.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.IO;\nusing System.Linq;\nusing System.Text.RegularExpressions;\nusing SAUtils.CosmicGeneFusions.Cache;\nusing VariantAnnotation.GeneFusions.Utilities;\n\nnamespace SAUtils.CosmicGeneFusions.Conversion\n{\n    public static class HgvsRnaParser\n    {\n        private static readonly Regex HgvsRegex = new(@\"(ENST[^\\(]+)\", RegexOptions.Compiled);\n\n        public static (string[] GeneSymbols, ulong FusionKey) GetTranscripts(string hgvsNotation, TranscriptCache transcriptCache)\n        {\n            (string transcriptId5, string transcriptId3) = Parse(hgvsNotation);\n\n            (string geneId5, string geneSymbol5) = transcriptCache.GetGene(transcriptId5);\n            (string geneId3, string geneSymbol3) = transcriptCache.GetGene(transcriptId3);\n\n            ulong fusionKey = GeneFusionKey.Create(GeneFusionKey.CreateGeneKey(geneId5), GeneFusionKey.CreateGeneKey(geneId3));\n\n            return (new[] {geneSymbol5, geneSymbol3}, fusionKey);\n        }\n\n        public static (string TranscriptId5, string TranscriptId3) Parse(string hgvsString)\n        {\n            // the only gene fusion involving 3 transcripts. The middle one is a bit suspicious, so we'll use the other two. (GRCh37)\n            if (hgvsString == \"ENST00000305877.8(BCR):r.1_2866::ENST00000372348.2(ABL1):r.511-?_511-?::ENST00000318560.5(ABL1):r.461_5766\")\n                return (\"ENST00000305877.8\", \"ENST00000318560.5\");\n\n            // same situation in GRCh38\n            if (hgvsString == \"ENST00000305877.12(BCR):r.1_2866::ENST00000372348.6(ABL1):r.511-?_511-?::ENST00000318560.5(ABL1):r.461_5766\")\n                return (\"ENST00000305877.12\", \"ENST00000318560.5\");\n\n            var transcriptIds = new List<string>();\n            foreach (Match match in HgvsRegex.Matches(hgvsString)) transcriptIds.Add(match.Value);\n\n            string[] uniqueTranscriptIds = transcriptIds.Distinct().ToArray();\n            if (uniqueTranscriptIds.Length != 2) throw new InvalidDataException($\"Could not identify 2 transcripts in HGVS RNA parser: {hgvsString}\");\n\n            return (uniqueTranscriptIds[0], uniqueTranscriptIds[1]);\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/CosmicGeneFusions/Conversion/Histology.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.IO;\nusing SAUtils.CosmicGeneFusions.IO;\nusing SAUtils.CosmicGeneFusions.Utilities;\n\nnamespace SAUtils.CosmicGeneFusions.Conversion\n{\n    public static class Histology\n    {\n        // ReSharper disable once ParameterTypeCanBeEnumerable.Global\n        public static CosmicCount[] GetCounts(HashSet<RawCosmicGeneFusion> fusionEntries, int numSamples)\n        {\n            var histologyCountDict = new Dictionary<string, int>();\n            var totalCount         = 0;\n\n            foreach (RawCosmicGeneFusion fusionEntry in fusionEntries)\n            {\n                string histology = GetMostSpecificValue(fusionEntry.PrimaryHistology, fusionEntry.HistologySubtype1);\n                if (histology == CosmicGeneFusionParser.MissingValue) continue;\n\n                if (histologyCountDict.TryGetValue(histology, out int count)) histologyCountDict[histology] = count + 1;\n                else histologyCountDict[histology]                                                          = 1;\n                totalCount++;\n            }\n\n            if (totalCount != numSamples)\n            {\n                throw new InvalidDataException($\"Found different histology count total ({totalCount}) than samples ({numSamples}).\");\n            }\n            \n            return histologyCountDict.GetCosmicCounts();\n        }\n\n        private static string GetMostSpecificValue(string primary, string subtype1) =>\n            subtype1 != CosmicGeneFusionParser.MissingValue ? subtype1 : primary;\n    }\n}"
  },
  {
    "path": "SAUtils/CosmicGeneFusions/Conversion/RawCosmicGeneFusion.cs",
    "content": "﻿namespace SAUtils.CosmicGeneFusions.Conversion\n{\n    public sealed record RawCosmicGeneFusion(int SampleId, int FusionId, string PrimarySite, string SiteSubtype1, string PrimaryHistology,\n        string HistologySubtype1, string HgvsNotation, int PubMedId);\n}"
  },
  {
    "path": "SAUtils/CosmicGeneFusions/Conversion/Site.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.IO;\nusing SAUtils.CosmicGeneFusions.IO;\nusing SAUtils.CosmicGeneFusions.Utilities;\n\nnamespace SAUtils.CosmicGeneFusions.Conversion\n{\n    public static class Site\n    {\n        // ReSharper disable once ParameterTypeCanBeEnumerable.Global\n        public static CosmicCount[] GetCounts(HashSet<RawCosmicGeneFusion> fusionEntries, int numSamples)\n        {\n            var siteCountDict = new Dictionary<string, int>();\n            var totalCount    = 0;\n\n            foreach (RawCosmicGeneFusion fusionEntry in fusionEntries)\n            {\n                string site = CombineLevels(fusionEntry.PrimarySite, fusionEntry.SiteSubtype1);\n                if (site == CosmicGeneFusionParser.MissingValue) continue;\n\n                if (siteCountDict.TryGetValue(site, out int count)) siteCountDict[site] = count + 1;\n                else siteCountDict[site]                                                = 1;\n                totalCount++;\n            }\n\n            // this can be less if we had missing values\n            if (totalCount > numSamples) throw new InvalidDataException($\"Found more total sites ({totalCount}) than samples ({numSamples}).\");\n\n            return siteCountDict.GetCosmicCounts();\n        }\n\n        private static string CombineLevels(string primary, string subtype1) =>\n            subtype1 != CosmicGeneFusionParser.MissingValue ? $\"{primary} ({subtype1})\" : primary;\n    }\n}"
  },
  {
    "path": "SAUtils/CosmicGeneFusions/CreateCosmicGeneFusions.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing CommandLine.Builders;\nusing CommandLine.NDesk.Options;\nusing Compression.Utilities;\nusing ErrorHandling;\nusing Genome;\nusing IO;\nusing SAUtils.CosmicGeneFusions.Cache;\nusing SAUtils.CosmicGeneFusions.Conversion;\nusing SAUtils.CosmicGeneFusions.IO;\nusing VariantAnnotation.Providers;\nusing VariantAnnotation.SA;\n\nnamespace SAUtils.CosmicGeneFusions\n{\n    public static class CreateCosmicGeneFusions\n    {\n        private static string _transcriptCachePath;\n        private static string _cosmicGeneFusionsPath;\n        private static string _referencePath;\n        private static string _outputDirectory;\n        private static string _releaseDate;\n        private static string _cosmicVersion;\n\n        private static ExitCodes ProgramExecution()\n        {\n            Console.Write(\"- loading reference sequence... \");\n            Dictionary<ushort, Chromosome> refIndexToChromosome = ReferenceLoader.GetRefIndexToChromosome(_referencePath);\n            Console.WriteLine(\"finished.\");\n\n            Console.Write(\"- loading transcript cache... \");\n            using FileStream cacheStream     = FileUtilities.GetReadStream(_transcriptCachePath);\n            var              transcriptCache = TranscriptCache.Create(cacheStream, refIndexToChromosome);\n            Console.WriteLine(\"finished.\");\n            \n            Console.Write(\"- parsing COSMIC gene fusions... \");\n            using StreamReader                            cosmicReader      = GZipUtilities.GetAppropriateStreamReader(_cosmicGeneFusionsPath);\n            Dictionary<int, HashSet<RawCosmicGeneFusion>> fusionIdToEntries = CosmicGeneFusionParser.Parse(cosmicReader);\n            Console.WriteLine($\"{fusionIdToEntries.Count:N0} fusion IDs loaded\");\n\n            Console.Write(\"- converting COSMIC entries... \");\n            Dictionary<ulong, string[]> fusionKeyToJson = CosmicConverter.Convert(fusionIdToEntries, transcriptCache);\n            Console.WriteLine($\"{fusionKeyToJson.Count:N0} gene pairs converted\");\n            \n            DataSourceVersion version = CreateDataSourceVersion(_cosmicVersion, _releaseDate);\n            WriteGeneFusions(_outputDirectory, fusionKeyToJson, version);\n\n            Console.WriteLine();\n            Console.WriteLine($\"Total: {fusionKeyToJson.Count:N0} gene pairs in database.\");\n\n            return ExitCodes.Success;\n        }\n\n        // ReSharper disable once SuggestBaseTypeForParameter\n        private static void WriteGeneFusions(string outputDirectory, Dictionary<ulong, string[]> geneKeyToJson, DataSourceVersion version)\n        {\n            Console.Write(\"- writing gene fusions SA file... \");\n            string    outputPath = Path.Combine(outputDirectory, $\"COSMIC_GeneFusions_{version.Version}{SaCommon.GeneFusionJsonSuffix}\");\n            using var writer     = new GeneFusionJsonWriter(FileUtilities.GetCreateStream(outputPath), \"cosmicGeneFusions\", version);\n            writer.Write(geneKeyToJson);\n            Console.WriteLine(\"finished.\");\n        }\n\n        internal static DataSourceVersion CreateDataSourceVersion(string version, string releaseDate)\n        {\n            long releaseTicks = DateTime.Parse(releaseDate).Ticks;\n            return new DataSourceVersion(\"COSMIC gene fusions\", version, releaseTicks, \"manually curated somatic gene fusions\");\n        }\n\n        public static ExitCodes Run(string command, string[] commandArgs)\n        {\n            var ops = new OptionSet\n            {\n                {\n                    \"cache|c=\",\n                    \"transcript cache {path}\",\n                    v => _transcriptCachePath = v\n                },\n                {\n                    \"in|i=\",\n                    \"COSMIC gene fusions {path}\",\n                    v => _cosmicGeneFusionsPath = v\n                },\n                {\n                    \"out|o=\",\n                    \"output {directory}\",\n                    v => _outputDirectory = v\n                },\n                {\n                    \"ref|r=\",\n                    \"input reference sequence {path}\",\n                    v => _referencePath = v\n                },\n                {\n                    \"releaseDate=\",\n                    \"release {date} (YYYY-MM-dd)\",\n                    v => _releaseDate = v\n                },\n                {\n                    \"cosmicVersion=\",\n                    \"COSMIC {version} (e.g. 92)\",\n                    v => _cosmicVersion = v\n                }\n            };\n\n            var commandLineExample = $\"{command} [options]\";\n\n            ExitCodes exitCode = new ConsoleAppBuilder(commandArgs, ops)\n                .Parse()\n                .CheckInputFilenameExists(_referencePath,         \"reference sequence\",  \"--ref\")\n                .CheckInputFilenameExists(_transcriptCachePath,   \"transcript cache\",    \"--cache\")\n                .CheckInputFilenameExists(_cosmicGeneFusionsPath, \"COSMIC gene fusions\", \"--in\")\n                .CheckDirectoryExists(_outputDirectory, \"output directory\", \"--out\")\n                .HasRequiredDate(_releaseDate, \"COSMIC release date\", \"--date\")\n                .HasRequiredParameter(_cosmicVersion, \"COSMIC version\", \"--version\")\n                .SkipBanner()\n                .ShowHelpMenu(\"Creates a supplementary database with COSMIC gene fusion annotations\", commandLineExample)\n                .ShowErrors()\n                .Execute(ProgramExecution);\n\n            return exitCode;\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/CosmicGeneFusions/IO/CosmicGeneFusionParser.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.IO;\nusing SAUtils.CosmicGeneFusions.Conversion;\n\nnamespace SAUtils.CosmicGeneFusions.IO\n{\n    public static class CosmicGeneFusionParser\n    {\n        public const string MissingValue = \"NS\";\n\n        public static Dictionary<int, HashSet<RawCosmicGeneFusion>> Parse(StreamReader reader)\n        {\n            var fusionEntries = new List<RawCosmicGeneFusion>();\n\n            // skip the first line\n            reader.ReadLine();\n\n            while (true)\n            {\n                string line = reader.ReadLine();\n                if (line == null) break;\n\n                string[] cols = line.Split('\\t');\n                if (cols.Length != 32)\n                    throw new InvalidDataException($\"Expected 32 columns in the COSMIC gene fusions file, but found {cols.Length}\");\n\n                string fusionIdString = cols[10];\n\n                // skip entries that are missing the fusion ID\n                if (string.IsNullOrEmpty(fusionIdString)) continue;\n\n                int    sampleId          = int.Parse(cols[0]);\n                string primarySite       = RemoveUnderlines(cols[2]);\n                string siteSubtype1      = RemoveUnderlines(cols[3]);\n                string primaryHistology  = RemoveUnderlines(cols[6]);\n                string histologySubtype1 = RemoveUnderlines(cols[7]);\n                int    fusionId          = int.Parse(fusionIdString);\n                string hgvsNotation      = cols[11];\n                int    pubMedId          = int.Parse(cols[31]);\n\n                fusionEntries.Add(new RawCosmicGeneFusion(sampleId, fusionId, primarySite, siteSubtype1, primaryHistology, histologySubtype1,\n                    hgvsNotation, pubMedId));\n            }\n\n            return fusionEntries.GroupByFusionId();\n        }\n\n        // ReSharper disable once ParameterTypeCanBeEnumerable.Local\n        private static Dictionary<int, HashSet<RawCosmicGeneFusion>> GroupByFusionId(this List<RawCosmicGeneFusion> fusionEntries)\n        {\n            var fusionIdToEntries = new Dictionary<int, HashSet<RawCosmicGeneFusion>>();\n\n            foreach (RawCosmicGeneFusion fusionEntry in fusionEntries)\n            {\n                if (!fusionIdToEntries.TryGetValue(fusionEntry.FusionId, out HashSet<RawCosmicGeneFusion> fusionEntrySet))\n                {\n                    fusionEntrySet                          = new HashSet<RawCosmicGeneFusion>();\n                    fusionIdToEntries[fusionEntry.FusionId] = fusionEntrySet;\n                }\n\n                fusionEntrySet.Add(fusionEntry);\n            }\n\n            return fusionIdToEntries;\n        }\n\n        internal static string RemoveUnderlines(string s) => s.Replace('_', ' ');\n    }\n}"
  },
  {
    "path": "SAUtils/CosmicGeneFusions/IO/GeneFusionJsonWriter.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing System.Text;\nusing Compression.Utilities;\nusing IO;\nusing IO.v2;\nusing VariantAnnotation.GeneFusions.IO;\nusing VariantAnnotation.Interface.Providers;\n\nnamespace SAUtils.CosmicGeneFusions.IO\n{\n    public sealed class GeneFusionJsonWriter : IDisposable\n    {\n        private readonly ExtendedBinaryWriter _writer;\n\n        public GeneFusionJsonWriter(Stream stream, string jsonKey, IDataSourceVersion version, bool leaveOpen = false)\n        {\n            _writer = new ExtendedBinaryWriter(stream, Encoding.UTF8, leaveOpen);\n            WriteHeader();\n            _writer.Write(jsonKey);\n            version.Write(_writer);\n        }\n\n        private void WriteHeader()\n        {\n            var header = new Header(FileType.GeneFusionJson, GeneFusionJsonReader.SupportedFileFormatVersion);\n            header.Write(_writer);\n        }\n\n        public void Write(Dictionary<ulong, string[]> geneKeyToJson)\n        {\n            using var ms = new MemoryStream();\n            using (var writer = new ExtendedBinaryWriter(ms, Encoding.UTF8, true))\n            {\n                writer.WriteOpt(geneKeyToJson.Count);\n\n                foreach ((ulong geneKey, string[] jsonArray) in geneKeyToJson)\n                {\n                    writer.Write(geneKey);\n                    writer.WriteOpt(jsonArray.Length);\n                    foreach (string json in jsonArray) writer.Write(json);\n                }\n            }\n\n            byte[] bytes = ms.ToArray();\n            _writer.WriteCompressedByteArray(bytes, bytes.Length);\n        }\n\n        public void Dispose() => _writer.Dispose();\n    }\n}"
  },
  {
    "path": "SAUtils/CosmicGeneFusions/Utilities/CosmicCountUtilities.cs",
    "content": "﻿using System.Collections.Generic;\nusing SAUtils.CosmicGeneFusions.Conversion;\n\nnamespace SAUtils.CosmicGeneFusions.Utilities\n{\n    public static class CosmicCountUtilities\n    {\n        public static CosmicCount[] GetCosmicCounts(this Dictionary<string, int> countDict)\n        {\n            var counts = new List<CosmicCount>(countDict.Count);\n            foreach ((string histology, int count) in countDict) counts.Add(new CosmicCount(histology, count));\n            return counts.ToArray();\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/CreateClinvarDb/ClinVarMain.cs",
    "content": "﻿using System.IO;\nusing CommandLine.Builders;\nusing CommandLine.NDesk.Options;\nusing Compression.Utilities;\nusing ErrorHandling;\nusing IO;\nusing SAUtils.InputFileParsers;\nusing SAUtils.InputFileParsers.ClinVar;\nusing VariantAnnotation.Providers;\nusing VariantAnnotation.SA;\n\nnamespace SAUtils.CreateClinvarDb\n{\n    public static class ClinVarMain\n    {\n        private static string _rcvFile;\n        private static string _vcvFile;\n        private static string _compressedReference;\n        private static string _outputDirectory;\n        public static ExitCodes Run(string command, string[] commandArgs)\n        {\n            var ops = new OptionSet\n            {\n                {\n                     \"ref|r=\",\n                     \"compressed reference sequence file\",\n                     v => _compressedReference = v\n                },\n                {\n                    \"rcv|i=\",\n                    \"ClinVar Full release XML file\",\n                    v => _rcvFile = v\n                },\n                {\n                    \"vcv|c=\",\n                    \"ClinVar Variation release XML file\",\n                    v => _vcvFile = v\n                },\n                {\n                    \"out|o=\",\n                    \"output directory\",\n                    v => _outputDirectory = v\n                }\n            };\n\n            string commandLineExample = $\"{command} [options]\";\n\n            var exitCode = new ConsoleAppBuilder(commandArgs, ops)\n                .Parse()\n                .CheckInputFilenameExists(_compressedReference, \"compressed reference sequence file name\", \"--ref\")\n                .CheckInputFilenameExists(_rcvFile, \"ClinVar full release XML file\", \"--rcv\")\n                .CheckInputFilenameExists(_vcvFile, \"ClinVar variation release XML file\", \"--vcv\")\n                .CheckDirectoryExists(_outputDirectory, \"output directory\", \"--out\")\n                .SkipBanner()\n                .ShowHelpMenu(\"Creates a supplementary database with ClinVar annotations\", commandLineExample)\n                .ShowErrors()\n                .Execute(ProgramExecution);\n\n            return exitCode;\n        }\n\n        private static ExitCodes ProgramExecution()\n        {\n            var version           = DataSourceVersionReader.GetSourceVersion(_rcvFile + \".version\");\n            string outFileName = $\"{version.Name}_{version.Version}\";\n            \n            using (var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference)))\n            using (var clinvarReader     = new ClinVarParser(GZipUtilities.GetAppropriateReadStream(_rcvFile), GZipUtilities.GetAppropriateReadStream(_vcvFile), referenceProvider))\n            using (var nsaStream         = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName+SaCommon.SaFileSuffix)))\n            using (var indexStream       = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSuffix)))\n            using (var nsaWriter         = new NsaWriter(nsaStream, indexStream, version, referenceProvider, SaCommon.ClinvarTag, false, true, SaCommon.SchemaVersion, false))\n            using (var schemaStream      = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.JsonSchemaSuffix)))\n            using (var schemaWriter      = new StreamWriter(schemaStream))\n            {\n                nsaWriter.Write(clinvarReader.GetItems());\n                schemaWriter.Write(clinvarReader.JsonSchema);\n            }\n\n            return ExitCodes.Success;\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/CreateClinvarDb/ClinVarStats.cs",
    "content": "using System;\nusing System.Collections.Generic;\nusing System.Linq;\nusing System.Text;\nusing IO;\nusing OptimizedCore;\nusing SAUtils.DataStructures;\nusing SAUtils.InputFileParsers.ClinVar;\nusing VariantAnnotation.Interface.IO;\nusing VariantAnnotation.IO;\n\nnamespace SAUtils.CreateClinvarDb;\n\n\npublic class ClinVarStats\n{\n    public          int       RcvCount               = 0;\n    public          int       VcvCount               = 0;\n    public          int       InvalidRefAlleleCount  = 0;\n    public readonly KeyCounts RcvPathogenicityCounts = new KeyCounts(ClinVarCommon.ValidPathogenicity);\n    public readonly KeyCounts RcvReviewStatusCounts  = new KeyCounts(ClinVarCommon.ReviewStatusStrings.Values);\n    public readonly KeyCounts VcvPathogenicityCounts = new KeyCounts(ClinVarCommon.ValidPathogenicity);\n    public readonly KeyCounts VcvReviewStatusCounts  = new KeyCounts(ClinVarCommon.ReviewStatusStrings.Values);\n    \n    public void GetClinvarSaItemsStats(List<IClinVarSaItem> items)\n    {\n        foreach (IClinVarSaItem item in items)\n        {\n            if (item.Id.StartsWith(\"RCV\"))\n            {\n                RcvCount++;\n                foreach (string significance in item.Significances)\n                {\n                    RcvPathogenicityCounts.Increment(significance);\n                }\n\n                RcvReviewStatusCounts.Increment(ClinVarCommon.ReviewStatusStrings[item.ReviewStatus]);\n\n            }\n            else\n            {\n                VcvCount++;\n                foreach (string significance in item.Significances)\n                {\n                    VcvPathogenicityCounts.Increment(significance);\n                }\n\n                VcvReviewStatusCounts.Increment(ClinVarCommon.ReviewStatusStrings[item.ReviewStatus]);\n            }\n        }\n\n    }\n\n    public override string ToString()\n    {\n        var sb = StringBuilderPool.Get();\n        var jo = new JsonObject(sb);\n        sb.Append(JsonObject.OpenBrace);\n\n        jo.AddIntValue(\"rcvCount\", RcvCount);\n        jo.AddObjectValue(\"rcvPathogenicity\", RcvPathogenicityCounts);\n        jo.AddObjectValue(\"rcvReviewStatus\",  RcvReviewStatusCounts);\n        \n        jo.AddIntValue(\"vcvCount\", VcvCount);\n        jo.AddObjectValue(\"vcvPathogenicity\", VcvPathogenicityCounts);\n        jo.AddObjectValue(\"vcvReviewStatus\",  VcvReviewStatusCounts);\n        sb.Append(JsonObject.CloseBrace);\n\n        return StringBuilderPool.GetStringAndReturn(sb);\n\n    }\n}"
  },
  {
    "path": "SAUtils/CreateCosmicDb/Main.cs",
    "content": "﻿using System.IO;\nusing CommandLine.Builders;\nusing CommandLine.NDesk.Options;\nusing ErrorHandling;\nusing IO;\nusing SAUtils.InputFileParsers;\nusing SAUtils.InputFileParsers.Cosmic;\nusing VariantAnnotation.Providers;\nusing VariantAnnotation.SA;\n\nnamespace SAUtils.CreateCosmicDb\n{\n    public static class Main\n    {\n        private static string _vcfFile;\n        private static string _tsvFile;\n        private static string _compressedReference;\n        private static string _outputDirectory;\n\n        public static ExitCodes Run(string command, string[] commandArgs)\n        {\n            var ops = new OptionSet\n            {\n                {\n                    \"ref|r=\",\n                    \"compressed reference sequence file\",\n                    v => _compressedReference = v\n                },\n                {\n                    \"in|i=\",\n                    \"COSMIC VCF file\",\n                    v => _vcfFile = v\n                },\n                {\n                    \"tsv|t=\",\n                    \"COSMIC TSV file\",\n                    v => _tsvFile = v\n                },\n                {\n                    \"out|o=\",\n                    \"output directory\",\n                    v => _outputDirectory = v\n                }\n            };\n\n            string commandLineExample = $\"{command} [options]\";\n\n            var exitCode = new ConsoleAppBuilder(commandArgs, ops)\n                .Parse()\n                .CheckInputFilenameExists(_compressedReference, \"compressed reference sequence file name\", \"--ref\")\n                .HasRequiredParameter(_vcfFile, \"COSMIC VCF file\", \"--in\")\n                .CheckInputFilenameExists(_vcfFile, \"COSMIC VCF file\", \"--in\")\n                .HasRequiredParameter(_tsvFile, \"COSMIC TSV file\", \"--tsv\")\n                .CheckInputFilenameExists(_tsvFile, \"COSMIC TSV file\", \"--tsv\")\n                .HasRequiredParameter(_outputDirectory, \"output directory\", \"--out\")\n                .CheckDirectoryExists(_outputDirectory, \"output directory\", \"--out\")\n                .SkipBanner()\n                .ShowHelpMenu(\"Creates a supplementary database with COSMIC annotations\", commandLineExample)\n                .ShowErrors()\n                .Execute(ProgramExecution);\n\n            return exitCode;\n        }\n        private static ExitCodes ProgramExecution()\n        {\n            var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));\n            var cosmicReader = new MergedCosmicReader(_vcfFile, _tsvFile, referenceProvider);\n            var version = DataSourceVersionReader.GetSourceVersion(_vcfFile + \".version\");\n\n            string outFileName = $\"{version.Name}_{version.Version}\";\n            using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix)))\n            using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSuffix)))\n            using (var nsaWriter = new NsaWriter(nsaStream, indexStream, version, referenceProvider, SaCommon.CosmicTag, false, true, SaCommon.SchemaVersion, false))\n            {\n                nsaWriter.Write(cosmicReader.GetItems());\n            }\n\n            return ExitCodes.Success;\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/CreateDbsnpDb/Main.cs",
    "content": "﻿using System;\nusing System.IO;\nusing CommandLine.Builders;\nusing CommandLine.NDesk.Options;\nusing Compression.Utilities;\nusing ErrorHandling;\nusing IO;\nusing SAUtils.InputFileParsers;\nusing SAUtils.InputFileParsers.DbSnp;\nusing VariantAnnotation.Providers;\nusing VariantAnnotation.SA;\n\nnamespace SAUtils.CreateDbsnpDb\n{\n    public static class Main\n    {\n        private static string _inputFile;\n        private static string _compressedReference;\n        private static string _outputDirectory;\n        public static ExitCodes Run(string command, string[] commandArgs)\n        {\n            var ops = new OptionSet\n            {\n                {\n                     \"ref|r=\",\n                     \"compressed reference sequence file\",\n                     v => _compressedReference = v\n                 },\n                {\n                    \"in|i=\",\n                    \"input VCF file path\",\n                    v => _inputFile = v\n                },\n                {\n                    \"out|o=\",\n                    \"output directory\",\n                    v => _outputDirectory = v\n                }\n            };\n\n            string commandLineExample = $\"{command} [options]\";\n\n            var exitCode = new ConsoleAppBuilder(commandArgs, ops)\n                .Parse()\n                .CheckInputFilenameExists(_compressedReference, \"compressed reference sequence file name\", \"--ref\")\n                .HasRequiredParameter(_inputFile, \"dbSNP VCF file\", \"--in\")\n                .CheckInputFilenameExists(_inputFile, \"dbSNP VCF file\", \"--in\")\n                .HasRequiredParameter(_outputDirectory, \"output directory\", \"--out\")\n                .CheckDirectoryExists(_outputDirectory, \"output directory\", \"--out\")\n                .SkipBanner()\n                .ShowHelpMenu(\"Creates a supplementary database containing 1000 Genomes allele frequencies\", commandLineExample)\n                .ShowErrors()\n                .Execute(ProgramExecution);\n\n            return exitCode;\n        }\n\n        private static ExitCodes ProgramExecution()\n        {\n            var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));\n            var version           = DataSourceVersionReader.GetSourceVersion(_inputFile + \".version\");\n            \n            string outFileName = $\"{version.Name}_{version.Version}\";\n            using (var dbSnpReader = new DbSnpReader(GZipUtilities.GetAppropriateReadStream(_inputFile), referenceProvider))\n            using (var nsaStream   = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix)))\n            using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSuffix)))\n            using (var nsaWriter   = new NsaWriter(nsaStream, indexStream, version, referenceProvider, SaCommon.DbsnpTag, true, true, SaCommon.SchemaVersion, false))\n            {\n                var count = nsaWriter.Write(dbSnpReader.GetItems());\n                Console.WriteLine($\"{{\\n \\\"totalCount\\\":{count} \\n}}\");\n            }\n\n            return ExitCodes.Success;\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/CreateDecipherDb/Main.cs",
    "content": "﻿using System.IO;\nusing CommandLine.Builders;\nusing CommandLine.NDesk.Options;\nusing Compression.Utilities;\nusing ErrorHandling;\nusing IO;\nusing SAUtils.InputFileParsers;\nusing SAUtils.InputFileParsers.Decipher;\nusing VariantAnnotation.Providers;\nusing VariantAnnotation.SA;\nusing VariantAnnotation.Interface.SA;\nusing VariantAnnotation.NSA;\n\nnamespace SAUtils.CreateDecipherDb\n{\n    public static class Main\n    {\n        private static string _inputFile;\n        private static string _compressedReference;\n        private static string _outputDirectory;\n\n        public static ExitCodes Run(string command, string[] commandArgs)\n        {\n            var ops = new OptionSet\n            {\n                {\n                     \"ref|r=\",\n                     \"compressed reference sequence file\",\n                     v => _compressedReference = v\n                 },\n                {\n                    \"in|i=\",\n                    \"input txt file path\",\n                    v => _inputFile = v\n                },\n                {\n                    \"out|o=\",\n                    \"output directory\",\n                    v => _outputDirectory = v\n                }\n            };\n\n            string commandLineExample = $\"{command} [options]\";\n\n            var exitCode = new ConsoleAppBuilder(commandArgs, ops)\n                .Parse()\n                .CheckInputFilenameExists(_compressedReference, \"compressed reference sequence file name\", \"--ref\")\n                .HasRequiredParameter(_inputFile, \"Decipher txt file\", \"--in\")\n                .CheckInputFilenameExists(_inputFile, \"Decipher txt file\", \"--in\")\n                .HasRequiredParameter(_outputDirectory, \"output directory\", \"--out\")\n                .CheckDirectoryExists(_outputDirectory, \"output directory\", \"--out\")\n                .SkipBanner()\n                .ShowHelpMenu(\"Creates a supplementary database with Decipher\", commandLineExample)\n                .ShowErrors()\n                .Execute(ProgramExecution);\n\n            return exitCode;\n        }\n\n        private static ExitCodes ProgramExecution()\n        {\n            var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));\n            \n            var version = DataSourceVersionReader.GetSourceVersion(_inputFile + \".version\");\n            \n            string outFileName = $\"{version.Name}_{version.Version}\".Replace(' ','_');\n            using (var decipherParser = new DecipherParser(GZipUtilities.GetAppropriateStreamReader(_inputFile), referenceProvider.RefNameToChromosome)) \n            using (FileStream nsiStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.IntervalFileSuffix)))\n            using (var nsiWriter = new NsiWriter(nsiStream, version, referenceProvider.Assembly, SaCommon.DecipherTag, ReportFor.StructuralVariants, SaCommon.SchemaVersion))\n            {\n                nsiWriter.Write(decipherParser.GetItems());\n            }\n            \n            return ExitCodes.Success;\n        }\n    }\n\n}"
  },
  {
    "path": "SAUtils/CreateGlobalAllelesDb/Main.cs",
    "content": "﻿using System.IO;\nusing CommandLine.Builders;\nusing CommandLine.NDesk.Options;\nusing Compression.Utilities;\nusing ErrorHandling;\nusing IO;\nusing SAUtils.InputFileParsers;\nusing SAUtils.InputFileParsers.DbSnp;\nusing VariantAnnotation.Providers;\nusing VariantAnnotation.SA;\n\nnamespace SAUtils.CreateGlobalAllelesDb\n{\n    public static class Main\n    {\n        private static string _inputFile;\n        private static string _compressedReference;\n        private static string _outputDirectory;\n        public static ExitCodes Run(string command, string[] commandArgs)\n        {\n            var ops = new OptionSet\n            {\n                {\n                     \"ref|r=\",\n                     \"compressed reference sequence file\",\n                     v => _compressedReference = v\n                 },\n                {\n                    \"in|i=\",\n                    \"input VCF file path\",\n                    v => _inputFile = v\n                },\n                {\n                    \"out|o=\",\n                    \"output directory\",\n                    v => _outputDirectory = v\n                }\n            };\n\n            string commandLineExample = $\"{command} [options]\";\n\n            var exitCode = new ConsoleAppBuilder(commandArgs, ops)\n                .Parse()\n                .CheckInputFilenameExists(_compressedReference, \"compressed reference sequence file name\", \"--ref\")\n                .HasRequiredParameter(_inputFile, \"dbSNP VCF file\", \"--in\")\n                .CheckInputFilenameExists(_inputFile, \"dbSNP VCF file\", \"--in\")\n                .HasRequiredParameter(_outputDirectory, \"output directory\", \"--out\")\n                .CheckDirectoryExists(_outputDirectory, \"output directory\", \"--out\")\n                .SkipBanner()\n                .ShowHelpMenu(\"Creates a supplementary database containing 1000 Genomes allele frequencies\", commandLineExample)\n                .ShowErrors()\n                .Execute(ProgramExecution);\n\n            return exitCode;\n        }\n\n        private static ExitCodes ProgramExecution()\n        {\n            var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));\n            var globalMinorReader = new GlobalMinorReader(GZipUtilities.GetAppropriateReadStream(_inputFile), referenceProvider.RefNameToChromosome);\n            var version           = DataSourceVersionReader.GetSourceVersion(_inputFile + \".version\");\n            \n            string outFileName = $\"{version.Name}_{version.Version}_globalMinor\";\n            using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix)))\n            using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSuffix)))\n            using (var nsaWriter = new NsaWriter(nsaStream, indexStream, version, referenceProvider, SaCommon.GlobalAlleleTag, true, false, SaCommon.SchemaVersion, true))\n            {\n                nsaWriter.Write(globalMinorReader.GetItems());\n            }\n\n            return ExitCodes.Success;\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/CreateGmeDb/Main.cs",
    "content": "using System.IO;\nusing CommandLine.Builders;\nusing CommandLine.NDesk.Options;\nusing Compression.Utilities;\nusing ErrorHandling;\nusing IO;\nusing SAUtils.InputFileParsers;\nusing SAUtils.InputFileParsers.Gme;\nusing VariantAnnotation.Providers;\nusing VariantAnnotation.SA;\n\nnamespace SAUtils.CreateGmeDb\n{\n    public static class Main\n    {\n        private static string _inputFile;\n        private static string _compressedReference;\n        private static string _outputDirectory;\n        public static ExitCodes Run(string command, string[] commandArgs)\n        {\n            var ops = new OptionSet\n            {\n                {\n                     \"ref|r=\",\n                     \"compressed reference sequence file\",\n                     v => _compressedReference = v\n                 },\n                {\n                    \"in|i=\",\n                    \"input TSV file path\",\n                    v => _inputFile = v\n                },\n                {\n                    \"out|o=\",\n                    \"output directory\",\n                    v => _outputDirectory = v\n                }\n            };\n\n            string commandLineExample = $\"{command} [options]\";\n\n            var exitCode = new ConsoleAppBuilder(commandArgs, ops)\n                .Parse()\n                .CheckInputFilenameExists(_compressedReference, \"compressed reference sequence file name\", \"--ref\")\n                .HasRequiredParameter(_inputFile, \"GME TSV file\", \"--in\")\n                .CheckInputFilenameExists(_inputFile, \"GME TSV file\", \"--in\")\n                .HasRequiredParameter(_outputDirectory, \"output directory\", \"--out\")\n                .CheckDirectoryExists(_outputDirectory, \"output directory\", \"--out\")\n                .SkipBanner()\n                .ShowHelpMenu(\"Creates a supplementary database containing 1000 Genomes allele frequencies\", commandLineExample)\n                .ShowErrors()\n                .Execute(ProgramExecution);\n\n            return exitCode;\n        }\n\n        private static ExitCodes ProgramExecution()\n        {\n            var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));\n            var version           = DataSourceVersionReader.GetSourceVersion(_inputFile + \".version\");\n            \n            string outFileName = $\"{version.Name}_{version.Version}\";\n            using (var gmeReader = new GmeParser(GZipUtilities.GetAppropriateStreamReader(_inputFile), referenceProvider))\n            using (var nsaStream    = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix)))\n            using (var indexStream  = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSuffix)))\n            using (var nsaWriter    = new NsaWriter(nsaStream, indexStream, version, referenceProvider, SaCommon.GmeTag, true, false, SaCommon.SchemaVersion, false))\n            {\n                nsaWriter.Write(gmeReader.GetItems());\n            }\n\n            return ExitCodes.Success;\n        }\n    }\n}\n\n"
  },
  {
    "path": "SAUtils/CreateOneKgDb/Main.cs",
    "content": "﻿using System.IO;\nusing CommandLine.Builders;\nusing CommandLine.NDesk.Options;\nusing Compression.Utilities;\nusing ErrorHandling;\nusing IO;\nusing SAUtils.InputFileParsers;\nusing SAUtils.InputFileParsers.OneKGen;\nusing VariantAnnotation.Providers;\nusing VariantAnnotation.SA;\n\nnamespace SAUtils.CreateOneKgDb\n{\n    public static class Main\n    {\n        private static string _inputFile;\n        private static string _compressedReference;\n        private static string _outputDirectory;\n\n        public static ExitCodes Run(string command, string[] commandArgs)\n        {\n            var ops = new OptionSet\n            {\n                {\n                     \"ref|r=\",\n                     \"compressed reference sequence file\",\n                     v => _compressedReference = v\n                 },\n                {\n                    \"in|i=\",\n                    \"input VCF file path\",\n                    v => _inputFile = v\n                },\n                {\n                    \"out|o=\",\n                    \"output directory\",\n                    v => _outputDirectory = v\n                }\n            };\n\n            string commandLineExample = $\"{command} [options]\";\n\n            var exitCode = new ConsoleAppBuilder(commandArgs, ops)\n                .Parse()\n                .CheckInputFilenameExists(_compressedReference, \"compressed reference sequence file name\", \"--ref\")\n                .HasRequiredParameter(_inputFile, \"OneK Gen VCF file\", \"--in\")\n                .CheckInputFilenameExists(_inputFile, \"OneK Gen VCF file\", \"--in\")\n                .HasRequiredParameter(_outputDirectory, \"output directory\", \"--out\")\n                .CheckDirectoryExists(_outputDirectory, \"output directory\", \"--out\")\n                .SkipBanner()\n                .ShowHelpMenu(\"Creates a supplementary database containing 1000 Genomes allele frequencies\", commandLineExample)\n                .ShowErrors()\n                .Execute(ProgramExecution);\n\n            return exitCode;\n        }\n\n        private static ExitCodes ProgramExecution()\n        {\n            var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));\n            \n            var version = DataSourceVersionReader.GetSourceVersion(_inputFile + \".version\");\n            \n            string outFileName = $\"{version.Name}_{version.Version}\".Replace(' ','_');\n            using (var oneKGenReader = new OneKGenReader(GZipUtilities.GetAppropriateReadStream(_inputFile), referenceProvider))\n            using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix)))\n            using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSuffix)))\n            using (var writer = new NsaWriter(nsaStream, indexStream, version, referenceProvider, SaCommon.OneKgenTag, true, false, SaCommon.SchemaVersion, false))\n            {\n                writer.Write(oneKGenReader.GetItems());\n            }\n\n            return ExitCodes.Success;\n        }\n    }\n\n}"
  },
  {
    "path": "SAUtils/CreateTopMedDb/Main.cs",
    "content": "﻿using System.IO;\nusing CommandLine.Builders;\nusing CommandLine.NDesk.Options;\nusing Compression.Utilities;\nusing ErrorHandling;\nusing IO;\nusing SAUtils.InputFileParsers;\nusing SAUtils.InputFileParsers.TOPMed;\nusing VariantAnnotation.Providers;\nusing VariantAnnotation.SA;\n\nnamespace SAUtils.CreateTopMedDb\n{\n    public static class Main\n    {\n        private static string _inputFile;\n        private static string _compressedReference;\n        private static string _outputDirectory;\n        public static ExitCodes Run(string command, string[] commandArgs)\n        {\n            var ops = new OptionSet\n            {\n                {\n                     \"ref|r=\",\n                     \"compressed reference sequence file\",\n                     v => _compressedReference = v\n                 },\n                {\n                    \"in|i=\",\n                    \"input VCF file path\",\n                    v => _inputFile = v\n                },\n                {\n                    \"out|o=\",\n                    \"output directory\",\n                    v => _outputDirectory = v\n                }\n            };\n\n            string commandLineExample = $\"{command} [options]\";\n\n            var exitCode = new ConsoleAppBuilder(commandArgs, ops)\n                .Parse()\n                .CheckInputFilenameExists(_compressedReference, \"compressed reference sequence file name\", \"--ref\")\n                .HasRequiredParameter(_inputFile, \"TopMed VCFfile\", \"--in\")\n                .CheckInputFilenameExists(_inputFile, \"TopMed VCFfile\", \"--in\")\n                .HasRequiredParameter(_outputDirectory, \"output directory\", \"--out\")\n                .CheckDirectoryExists(_outputDirectory, \"output directory\", \"--out\")\n                .SkipBanner()\n                .ShowHelpMenu(\"Creates a supplementary database containing 1000 Genomes allele frequencies\", commandLineExample)\n                .ShowErrors()\n                .Execute(ProgramExecution);\n\n            return exitCode;\n        }\n\n        private static ExitCodes ProgramExecution()\n        {\n            var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));\n            var version           = DataSourceVersionReader.GetSourceVersion(_inputFile + \".version\");\n            \n            string outFileName = $\"{version.Name}_{version.Version}\";\n            using (var topMedReader = new TopMedReader(GZipUtilities.GetAppropriateStreamReader(_inputFile), referenceProvider))\n            using (var nsaStream    = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix)))\n            using (var indexStream  = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSuffix)))\n            using (var nsaWriter    = new NsaWriter(nsaStream, indexStream, version, referenceProvider, SaCommon.TopMedTag, true, false, SaCommon.SchemaVersion, false))\n            {\n                nsaWriter.Write(topMedReader.GetItems());\n            }\n\n            return ExitCodes.Success;\n        }\n    }\n\n}"
  },
  {
    "path": "SAUtils/Custom/AllowedValues.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.Linq;\r\nusing ErrorHandling.Exceptions;\r\n\r\nnamespace SAUtils.Custom\r\n{\r\n    public static class AllowedValues\r\n    {\r\n        private const int MaxFilterLength = 20;\r\n        private const int MaxIdentifierLength = 50;\r\n        private const int MaxDescriptionLength = 100;\r\n        private static readonly string[] EmptyValues = {\".\", \"\"};\r\n        private static readonly HashSet<string> PredictionValues = new HashSet<string>\r\n        {\r\n            \"pathogenic\",\r\n            \"p\",\r\n            \"likely pathogenic\",\r\n            \"lp\",\r\n            \"vus\",\r\n            \"likely benign\",\r\n            \"lb\",\r\n            \"benign\",\r\n            \"b\"\r\n        };\r\n\r\n        public static void ValidatePredictionValue(string value, string line)\r\n        {\r\n            if (!IsEmptyValue(value) && !PredictionValues.Contains(value.ToLower()))\r\n                throw new UserErrorException($\"{value} is not a valid prediction value.\\nInput line: {line}\");\r\n        }\r\n\r\n        public static void ValidateFilterValue(string value, string line) => CheckValueLength(value, line, MaxFilterLength);\r\n\r\n        public static void ValidateIdentifierValue(string value, string line) => CheckValueLength(value, line, MaxIdentifierLength);\r\n\r\n        public static void ValidateDescriptionValue(string value, string line) => CheckValueLength(value, line, MaxDescriptionLength);\r\n\r\n        public static void ValidateScoreValue(string value, string line)\r\n        {\r\n            // empty (.) implies unknown score\r\n            if (IsEmptyValue(value)) return;\r\n            if (double.TryParse(value, out _)) return;\r\n            \r\n            var e = new UserErrorException(\r\n                $\"{value} is not a valid score value. Scores are expected to be numbers.\");\r\n            e.Data[\"Line\"] = line;\r\n            throw e;\r\n\r\n        }\r\n\r\n        public static bool IsEmptyValue(string value) => EmptyValues.Contains(value);\r\n\r\n        private static void CheckValueLength(string value, string line, int maxLength)\r\n        {\r\n            if (!string.IsNullOrEmpty(value) && value.Length > maxLength)\r\n                throw new UserErrorException($\"\\\"{value}\\\" exceeds the allowed length for descriptions ({maxLength} characters).\\nInput line:{line}\");\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "SAUtils/Custom/CaUtilities.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing ErrorHandling.Exceptions;\r\nusing Genome;\r\nusing SAUtils.DataStructures;\r\nusing SAUtils.Schema;\r\nusing VariantAnnotation.Interface.Providers;\r\nusing VariantAnnotation.Interface.SA;\r\nusing VariantAnnotation.NSA;\r\nusing VariantAnnotation.Providers;\r\nusing VariantAnnotation.SA;\r\n\r\nnamespace SAUtils.Custom\r\n{\r\n    public static class CaUtilities\r\n    {\r\n        public static NsaWriter GetNsaWriter(Stream nsaStream, Stream indexStream, VariantAnnotationsParser parser, string dataVersion, ISequenceProvider referenceProvider, out DataSourceVersion version, bool skipRefBaseValidation)\r\n        {\r\n            dataVersion = string.IsNullOrEmpty(parser.Version) ? dataVersion : parser.Version;\r\n            version = new DataSourceVersion(parser.JsonTag, dataVersion, DateTime.Now.Ticks,\r\n                parser.DataSourceDescription);\r\n            return new NsaWriter(\r\n                nsaStream,\r\n                indexStream,\r\n                version,\r\n                referenceProvider,\r\n                parser.JsonTag,\r\n                parser.MatchByAllele, // match by allele\r\n                parser.IsArray, // is array\r\n                SaCommon.SchemaVersion,\r\n                false, // is positional\r\n                skipRefBaseValidation, // skip incorrect ref base\r\n                true // throw error on conflicting entries\r\n            );\r\n        }\r\n\r\n        public static NsiWriter GetNsiWriter(Stream nsiStream, DataSourceVersion version, GenomeAssembly assembly, string jsonTag, ReportFor reportFor) => new NsiWriter(nsiStream, version, assembly, jsonTag, reportFor, SaCommon.SchemaVersion);\r\n\r\n        public static NgaWriter GetNgaWriter(Stream ngaStream, GeneAnnotationsParser parser, string dataVersion)\r\n        {\r\n            dataVersion = string.IsNullOrEmpty(parser.Version) ? dataVersion : parser.Version;\r\n            var version = new DataSourceVersion(parser.JsonTag, dataVersion, DateTime.Now.Ticks, parser.DataSourceDescription);\r\n            return new NgaWriter(ngaStream, version, parser.JsonTag, SaCommon.SchemaVersion, false);\r\n        }\r\n\r\n        public static (string JsonTag, int NsaItemsCount, SaJsonSchema IntervalJsonSchema, List<CustomInterval> Intervals) WriteSmallVariants(VariantAnnotationsParser parser, NsaWriter nsaWriter, StreamWriter schemaWriter)\r\n        {\r\n            int nsaItemsCount = nsaWriter.Write(parser.GetItems());\r\n            schemaWriter.Write(parser.JsonSchema);\r\n            var intervals = parser.GetCustomIntervals();\r\n\r\n            if (nsaItemsCount == 0 & intervals == null) throw new UserErrorException(GeneAnnotationsParser.NoValidEntriesErrorMessage);\r\n            return (parser.JsonTag, nsaItemsCount, parser.IntervalJsonSchema, intervals);\r\n        }\r\n\r\n        public static string GetInputFileName(string inputFilePath)\r\n        {\r\n            int fileNameIndex = inputFilePath.LastIndexOf(Path.DirectorySeparatorChar);\r\n            return fileNameIndex < 0 ? inputFilePath : inputFilePath.Substring(fileNameIndex + 1);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "SAUtils/Custom/CustomGene.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing ErrorHandling.Exceptions;\r\nusing SAUtils.Schema;\r\nusing VariantAnnotation.Interface.SA;\r\nusing VariantAnnotation.IO;\r\n\r\nnamespace SAUtils.Custom\r\n{\r\n    public sealed class CustomGene : ISuppGeneItem\r\n    {\r\n        public string GeneSymbol { get; }\r\n\r\n        private readonly List<string[]> _values;\r\n        private readonly SaJsonSchema _jsonSchema;\r\n        private readonly string _inputLine;\r\n\r\n        public CustomGene(string geneSymbol, List<string[]> values, SaJsonSchema jsonSchema, string inputLine)\r\n        {\r\n            GeneSymbol = geneSymbol;\r\n            _values = values;\r\n            _jsonSchema = jsonSchema;\r\n            _inputLine = inputLine;\r\n        }\r\n\r\n        public string GetJsonString()\r\n        {\r\n            try\r\n            {\r\n                return JsonObject.OpenBrace + _jsonSchema.GetJsonString(_values) + JsonObject.CloseBrace;\r\n            }\r\n            catch (UserErrorException e)\r\n            {\r\n                throw new UserErrorException(e.Message + $\"\\nInput line: {_inputLine}\");\r\n            }\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "SAUtils/Custom/GeneAnnotationsParser.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Linq;\r\nusing System.Text;\r\nusing ErrorHandling;\r\nusing ErrorHandling.Exceptions;\r\nusing OptimizedCore;\r\nusing SAUtils.GeneIdentifiers;\r\nusing SAUtils.Schema;\r\nusing VariantAnnotation.Interface.SA;\r\nusing VariantAnnotation.SA;\r\n\r\nnamespace SAUtils.Custom\r\n{\r\n    public sealed class GeneAnnotationsParser : IDisposable\r\n    {\r\n        private readonly StreamReader _reader;\r\n        private readonly Dictionary<string, string> _entrezGeneIdToSymbol;\r\n        private readonly Dictionary<string, string> _ensemblIdToSymbol;\r\n\r\n        public string JsonTag;\r\n        public string Version;\r\n        public string DataSourceDescription;\r\n\r\n        private string[] _tags;\r\n        internal CustomAnnotationCategories[] Categories;\r\n        internal string[] Descriptions;\r\n        internal SaJsonValueType[] ValueTypes;\r\n        internal readonly List<string> JsonKeys = new List<string>();\r\n        public SaJsonSchema JsonSchema;\r\n\r\n        private const    int                      NumRequiredColumns = 2;\r\n        private          int                      _numAnnotationColumns;\r\n        private          Action<string, string>[] _annotationValidators;\r\n        private readonly List<string>             _unknownGenes = new List<string>();\r\n        \r\n        public const string NoValidEntriesErrorMessage = \"The provided TSV has no valid custom annotation entries.\";\r\n        public const string UnknownGeneIdsErrorMessage = \"The following gene IDs were not recognized in Nirvana:\";\r\n        \r\n        internal GeneAnnotationsParser(StreamReader reader, Dictionary<string, string> entrezGeneIdToSymbol, Dictionary<string, string> ensemblIdToSymbol)\r\n        {\r\n            _reader = reader;\r\n            _entrezGeneIdToSymbol = entrezGeneIdToSymbol;\r\n            _ensemblIdToSymbol = ensemblIdToSymbol;\r\n        }\r\n\r\n        public static GeneAnnotationsParser Create(StreamReader reader, Dictionary<string, string> entrezGeneIdToSymbol, Dictionary<string, string> ensemblIdToSymbol)\r\n        {\r\n            var parser = new GeneAnnotationsParser(reader, entrezGeneIdToSymbol, ensemblIdToSymbol);\r\n\r\n            parser.ParseHeaderLines();\r\n            parser.InitiateSchema();\r\n            parser.AddHeaderAnnotation();\r\n\r\n            return parser;\r\n        }\r\n\r\n        internal void ParseHeaderLines()\r\n        {\r\n            string line;\r\n            while ((line = _reader.ReadLine()) !=null)\r\n            {\r\n                if (line.StartsWith(\"#geneSymbol\")) break;\r\n                line = line.Trim();\r\n                (string key, string value) = line.OptimizedKeyValue();\r\n                switch (key)\r\n                {\r\n                    case \"#title\":\r\n                        JsonTag = value;\r\n                        break;\r\n                    case \"#version\":\r\n                        Version = value;\r\n                        break;\r\n                    case \"#description\":\r\n                        DataSourceDescription = value;\r\n                        break;\r\n                    default:\r\n                        var e = new UserErrorException(\"Unexpected header tag observed\");\r\n                        e.Data[ExitCodeUtilities.Line] = line;\r\n                        throw e;\r\n                }\r\n            }\r\n            _tags = ParserUtilities.ParseTags(line, \"#geneSymbol\", NumRequiredColumns);\r\n            CheckTagsAndSetJsonKeys();\r\n            Categories = ParserUtilities.ParseCategories(_reader.ReadLine(), NumRequiredColumns, _numAnnotationColumns, _annotationValidators);\r\n            Descriptions = ParserUtilities.ParseDescriptions(_reader.ReadLine(), NumRequiredColumns, _numAnnotationColumns);\r\n            ValueTypes = ParserUtilities.ParseTypes(_reader.ReadLine(), NumRequiredColumns, _numAnnotationColumns);\r\n        }\r\n\r\n        private void InitiateSchema()\r\n        {\r\n            JsonSchema = SaJsonSchema.Create(new StringBuilder(), JsonTag, SaJsonValueType.Object, JsonKeys);\r\n        }\r\n\r\n        private void CheckTagsAndSetJsonKeys()\r\n        {\r\n\r\n            for (int i = NumRequiredColumns; i < _tags.Length; i++)\r\n            {\r\n                if (string.IsNullOrWhiteSpace(_tags[i]))\r\n                    throw new UserErrorException($\"Please provide a name for column {i + 1} at the second row.\");\r\n\r\n                JsonKeys.Add(_tags[i]);\r\n            }\r\n\r\n            _numAnnotationColumns = _tags.Length - NumRequiredColumns;\r\n            _annotationValidators = Enumerable.Repeat<Action<string, string>>((a, b) => { }, _numAnnotationColumns).ToArray();\r\n        }\r\n\r\n        private void AddHeaderAnnotation()\r\n        {\r\n            for (var i = 0; i < _numAnnotationColumns; i++)\r\n            {\r\n                var annotation = SaJsonKeyAnnotation.CreateFromProperties(ValueTypes[i], Categories[i], Descriptions[i]);\r\n\r\n                JsonSchema?.AddAnnotation(_tags[i + NumRequiredColumns], annotation);\r\n            }\r\n        }\r\n\r\n        public Dictionary<string, List<ISuppGeneItem>> GetItems(bool skipGeneIdValidation=false, StreamWriter logWriter = null)\r\n        {\r\n            var geneAnnotations = new Dictionary<string, List<ISuppGeneItem>>();\r\n            using (_reader)\r\n            {\r\n                string line;\r\n                while ((line = _reader.ReadLine()) != null)\r\n                {\r\n                    if (string.IsNullOrWhiteSpace(line)) continue;\r\n                    AddItem(line, geneAnnotations, skipGeneIdValidation, logWriter);\r\n                }\r\n            }\r\n\r\n            if (_unknownGenes.Count > 0 && geneAnnotations.Count == 0)\r\n                throw new UserErrorException($\"{UnknownGeneIdsErrorMessage} {string.Join(',',_unknownGenes)}. {NoValidEntriesErrorMessage}\");\r\n            \r\n            if (_unknownGenes.Count > 0)\r\n                throw new UserErrorException($\"{UnknownGeneIdsErrorMessage} {string.Join(',',_unknownGenes)}.\");\r\n            \r\n            if (geneAnnotations.Count == 0) throw new UserErrorException(NoValidEntriesErrorMessage);\r\n            return geneAnnotations;\r\n        }\r\n\r\n        \r\n        private void AddItem(string line, Dictionary<string, List<ISuppGeneItem>> geneAnnotations, bool skipGeneIdValidation, StreamWriter logWriter)\r\n        {\r\n            var splits = line.OptimizedSplit('\\t');\r\n            if (splits.Length != _tags.Length)\r\n                throw new UserErrorException($\"Column number mismatch!! Header has {_tags.Length} columns but {line} contains {splits.Length}\");\r\n\r\n            string geneId = splits[1];\r\n\r\n            var annotationValues = new string[_numAnnotationColumns];\r\n            var hasAnnotation = false;\r\n            for (var i = 0; i < _numAnnotationColumns; i++)\r\n            {\r\n                string annotationValue = splits[i + NumRequiredColumns];\r\n                if (annotationValue != \"\" && annotationValue != \".\") hasAnnotation = true;\r\n\r\n                annotationValues[i] = annotationValue;\r\n                _annotationValidators[i](annotationValues[i], line);\r\n            }\r\n\r\n            if (!hasAnnotation) throw new UserErrorException($\"No annotation provided in line {line}\");\r\n\r\n            string geneSymbol = GeneUtilities.GetGeneSymbolFromId(geneId, _entrezGeneIdToSymbol, _ensemblIdToSymbol);\r\n            if (geneSymbol == null)\r\n            {\r\n                if (!skipGeneIdValidation) _unknownGenes.Add(geneId);\r\n                \r\n                logWriter?.WriteLine($\"Skipping unrecognized gene ID {geneId}\");\r\n                return;\r\n            }\r\n            if (geneAnnotations.ContainsKey(geneSymbol)) throw new UserErrorException($\"Found the same gene {geneSymbol} in different lines. Current line is: {line}\");\r\n            \r\n            geneAnnotations[geneSymbol] = new List<ISuppGeneItem> {new CustomGene(geneSymbol, annotationValues.Select(x => new[] {x}).ToList(), JsonSchema, line)};\r\n        }\r\n\r\n        public IReadOnlyList<string> GetUnknownGenes() => _unknownGenes.OrderBy(x=>x).ToList();\r\n        public void Dispose() => _reader?.Dispose();\r\n    }\r\n}"
  },
  {
    "path": "SAUtils/Custom/GeneMain.cs",
    "content": "﻿using System.IO;\r\nusing CommandLine.Builders;\r\nusing CommandLine.NDesk.Options;\r\nusing Compression.Utilities;\r\nusing ErrorHandling;\r\nusing ErrorHandling.Exceptions;\r\nusing IO;\r\nusing SAUtils.GeneIdentifiers;\r\nusing VariantAnnotation.SA;\r\n\r\nnamespace SAUtils.Custom\r\n{\r\n    public static class GeneMain\r\n    {\r\n        private static string _inputFile;\r\n        private static string _universalGeneArchivePath;\r\n        private static string _outputDirectory;\r\n        private static bool   _skipGeneValidation;\r\n\r\n        public static ExitCodes Run(string command, string[] commandArgs)\r\n        {\r\n            var ops = new OptionSet\r\n            {\r\n                {\r\n                    \"uga|u=\",\r\n                    \"universal gene archive file path\",\r\n                    v => _universalGeneArchivePath = v\r\n                },\r\n                {\r\n                    \"in|i=\",\r\n                    \"custom TSV file path\",\r\n                    v => _inputFile = v\r\n                },\r\n                {\r\n                    \"skip-validation\",\r\n                    \"skips gene name validation\",\r\n                    v => _skipGeneValidation = v != null\r\n                },\r\n                {\r\n                    \"out|o=\",\r\n                    \"output directory\",\r\n                    v => _outputDirectory = v\r\n                }\r\n            };\r\n\r\n            string commandLineExample = $\"{command} [options]\";\r\n\r\n            var exitCode = new ConsoleAppBuilder(commandArgs, ops)\r\n                .Parse()\r\n                .CheckInputFilenameExists(_universalGeneArchivePath, \"universal gene archive\", \"--uga\")\r\n                .CheckInputFilenameExists(_inputFile, \"Custom gene annotation TSV\", \"--in\")\r\n                .CheckDirectoryExists(_outputDirectory, \"output\", \"--out\")\r\n                .SkipBanner()\r\n                .ShowHelpMenu(\"Creates a supplementary gene annotation database from a custom input file\", commandLineExample)\r\n                .ShowErrors()\r\n                .Execute(ProgramExecution);\r\n\r\n            return exitCode;\r\n        }\r\n\r\n        private static ExitCodes ProgramExecution()\r\n        {\r\n\r\n            var (entrezGeneIdToSymbol, ensemblGeneIdToSymbol) = GeneUtilities.ParseUniversalGeneArchive(null, _universalGeneArchivePath);\r\n\r\n            string outputPrefix = GetOutputPrefix(_inputFile);\r\n            string ngaFilePath = Path.Combine(_outputDirectory, outputPrefix + SaCommon.GeneFileSuffix);\r\n            string ngaSchemaFilePath = ngaFilePath + SaCommon.JsonSchemaSuffix;\r\n\r\n            using (var parser = GeneAnnotationsParser.Create(GZipUtilities.GetAppropriateStreamReader(_inputFile), entrezGeneIdToSymbol, ensemblGeneIdToSymbol))\r\n            using (var ngaStream = FileUtilities.GetCreateStream(ngaFilePath))\r\n            using (var ngaWriter = CaUtilities.GetNgaWriter(ngaStream, parser, CaUtilities.GetInputFileName(_inputFile)))\r\n            using (var saJsonSchemaStream = FileUtilities.GetCreateStream(ngaSchemaFilePath))\r\n            using (var schemaWriter = new StreamWriter(saJsonSchemaStream))\r\n            {\r\n                ngaWriter.Write(parser.GetItems(_skipGeneValidation));\r\n                if(parser.GetUnknownGenes().Count > 0)\r\n                    throw new UserErrorException($\"{GeneAnnotationsParser.UnknownGeneIdsErrorMessage} {string.Join(',',parser.GetUnknownGenes())}.\");\r\n                schemaWriter.Write(parser.JsonSchema);\r\n            }\r\n\r\n            return ExitCodes.Success;\r\n        }\r\n\r\n        private static string GetOutputPrefix(string inputFilePath)\r\n        {\r\n            string fileName = GetInputFileName(inputFilePath);\r\n            if (fileName.EndsWith(\".tsv\"))\r\n                return fileName.Substring(0, fileName.Length - 4);\r\n            return fileName.EndsWith(\".tsv.gz\") ? fileName.Substring(0, fileName.Length - 7) : fileName;\r\n        }\r\n\r\n        private static string GetInputFileName(string inputFilePath)\r\n        {\r\n            int fileNameIndex = inputFilePath.LastIndexOf(Path.DirectorySeparatorChar);\r\n            return fileNameIndex < 0 ? inputFilePath : inputFilePath.Substring(fileNameIndex + 1);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "SAUtils/Custom/ParserUtilities.cs",
    "content": "﻿using System;\r\nusing ErrorHandling.Exceptions;\r\nusing OptimizedCore;\r\nusing SAUtils.Schema;\r\nusing VariantAnnotation.Interface.SA;\r\nusing VariantAnnotation.SA;\r\n\r\nnamespace SAUtils.Custom\r\n{\r\n    public static class ParserUtilities\r\n    {\r\n        public static (bool MatchByAllele, bool IsArray, SaJsonValueType PrimaryType, ReportFor reportFor) ParseMatchVariantsBy(string line)\r\n        {\r\n            line = line.Trim();\r\n            CheckPrefix(line, \"#matchVariantsBy\");\r\n            string firstCol = line.OptimizedSplit('\\t')[0];\r\n            (_, string matchBy) = firstCol.OptimizedKeyValue();\r\n\r\n            bool matchByAllele;\r\n            bool isArray;\r\n            SaJsonValueType primaryType;\r\n            ReportFor reportFor = ReportFor.AllVariants;\r\n            switch (matchBy)\r\n            {\r\n                case null:\r\n                    throw new UserErrorException(\"Please provide the annotation reporting criteria in the format: #matchVariantsBy=allele.\");\r\n                case \"allele\":\r\n                    matchByAllele = true;\r\n                    isArray = false;\r\n                    primaryType = SaJsonValueType.Object;\r\n                    break;\r\n                case \"position\":\r\n                    primaryType = SaJsonValueType.ObjectArray;\r\n                    matchByAllele = false;\r\n                    isArray = true;\r\n                    break;\r\n                case \"sv\":\r\n                    primaryType   = SaJsonValueType.ObjectArray;\r\n                    matchByAllele = false;\r\n                    isArray       = true;\r\n                    reportFor     = ReportFor.StructuralVariants;\r\n                    break;\r\n                default:\r\n                    throw new UserErrorException(\"matchVariantsBy tag has to be \\'allele\\', \\'sv\\' or \\'position\\'\");\r\n            }\r\n\r\n            return (matchByAllele, isArray, primaryType, reportFor);\r\n        }\r\n\r\n        public static string[] ParseTags(string line, string prefix, int numRequiredCols)\r\n        {\r\n            CheckPrefix(line, prefix);\r\n\r\n            var tags = line.OptimizedSplit('\\t');\r\n            if (tags.Length < numRequiredCols)\r\n                throw new UserErrorException($\"At least {numRequiredCols} columns required. Please note that the columns should be separated by tab.\");\r\n\r\n            return tags;\r\n        }\r\n\r\n\r\n        public static CustomAnnotationCategories[] ParseCategories(string line, int numRequiredColumns, int numAnnotationColumns, Action<string, string>[] annotationValidators)\r\n        {\r\n            CheckPrefix(line, \"#categories\");\r\n            var splits = line.OptimizedSplit('\\t');\r\n            if (splits.Length != numRequiredColumns + numAnnotationColumns) throw new UserErrorException(\"#categories row must have the same number of columns as the header row with column names.\");\r\n\r\n            var categories = new CustomAnnotationCategories[numAnnotationColumns];\r\n            for (var i = 0; i < numAnnotationColumns; i++)\r\n            {\r\n                switch (splits[i + numRequiredColumns].ToLower())\r\n                {\r\n                    case \"allelecount\":\r\n                        categories[i] = CustomAnnotationCategories.AlleleCount;\r\n                        break;\r\n                    case \"allelenumber\":\r\n                        categories[i] = CustomAnnotationCategories.AlleleNumber;\r\n                        break;\r\n                    case \"allelefrequency\":\r\n                        categories[i] = CustomAnnotationCategories.AlleleFrequency;\r\n                        break;\r\n                    case \"homozygouscount\":\r\n                        categories[i] = CustomAnnotationCategories.HomozygousCount;\r\n                        break;\r\n                    case \"prediction\":\r\n                        categories[i] = CustomAnnotationCategories.Prediction;\r\n                        annotationValidators[i] = AllowedValues.ValidatePredictionValue;\r\n                        break;\r\n                    case \"filter\":\r\n                        categories[i] = CustomAnnotationCategories.Filter;\r\n                        annotationValidators[i] = AllowedValues.ValidateFilterValue;\r\n                        break;\r\n                    case \"identifier\":\r\n                        categories[i] = CustomAnnotationCategories.Identifier;\r\n                        annotationValidators[i] = AllowedValues.ValidateIdentifierValue;\r\n                        break;\r\n                    case \"description\":\r\n                        categories[i] = CustomAnnotationCategories.Description;\r\n                        annotationValidators[i] = AllowedValues.ValidateDescriptionValue;\r\n                        break;\r\n                    case \"score\":\r\n                        categories[i] = CustomAnnotationCategories.Score;\r\n                        annotationValidators[i] = AllowedValues.ValidateScoreValue;\r\n                        break;\r\n                    case \".\":\r\n                    case \"\":\r\n                        categories[i] = CustomAnnotationCategories.Unknown;\r\n                        break;\r\n                    default:\r\n                        throw new UserErrorException($\"Invalid category value: {splits[i + numRequiredColumns]}\");\r\n                }\r\n            }\r\n\r\n            return categories;\r\n        }\r\n\r\n        public static string[] ParseDescriptions(string line, int numRequiredColumns, int numAnnotationColumns)\r\n        {\r\n            CheckPrefix(line,\"#descriptions\");\r\n            var splits = line.OptimizedSplit('\\t');\r\n            if (splits.Length != numRequiredColumns + numAnnotationColumns) throw new UserErrorException(\"#descriptions row must have the same number of columns as the header row with column names\");\r\n\r\n            var descriptions = new string[numAnnotationColumns];\r\n            for (var i = 0; i < numAnnotationColumns; i++)\r\n            {\r\n                if (splits[i + numRequiredColumns] == \".\" || splits[i + numRequiredColumns] == \"\") descriptions[i] = null;\r\n                else descriptions[i] = splits[i + numRequiredColumns];\r\n            }\r\n\r\n            return descriptions;\r\n        }\r\n\r\n        public static SaJsonValueType[] ParseTypes(string line, int numRequiredColumns, int numAnnotationColumns)\r\n        {\r\n            CheckPrefix(line, \"#type\");\r\n            var splits = line.OptimizedSplit('\\t');\r\n            if (splits.Length != numRequiredColumns + numAnnotationColumns) throw new UserErrorException(\"#types row must have the same number of columns as the header row with column names\");\r\n\r\n            var valueTypes = new SaJsonValueType[numAnnotationColumns];\r\n            for (var i = 0; i < numAnnotationColumns; i++)\r\n            {\r\n                switch (splits[i + numRequiredColumns].ToLower())\r\n                {\r\n                    case \"bool\":\r\n                        valueTypes[i] = SaJsonValueType.Bool;\r\n                        break;\r\n                    case \"string\":\r\n                        valueTypes[i] = SaJsonValueType.String;\r\n                        break;\r\n                    case \"number\":\r\n                        valueTypes[i] = SaJsonValueType.Number;\r\n                        break;\r\n                    default:\r\n                        throw new UserErrorException(\"Invalid value for type column. Valid values are bool, string and number.\");\r\n                }\r\n            }\r\n\r\n            return valueTypes;\r\n        }\r\n\r\n        // ReSharper disable once ParameterOnlyUsedForPreconditionCheck.Global\r\n        internal static void CheckPrefix(string line, string prefix)\r\n        {\r\n            if (line != null && !line.StartsWith(prefix))\r\n                throw new UserErrorException($\"Expected a line starting with {prefix}. Observed \\n{line}\");\r\n        }\r\n\r\n        public static bool CheckJsonTagConflict(string value)\r\n        {\r\n            return value.Equals(SaCommon.DbsnpTag)\r\n                   || value.Equals(SaCommon.GlobalAlleleTag)\r\n                   || value.Equals(SaCommon.AncestralAlleleTag)\r\n                   || value.Equals(SaCommon.ClinGenTag)\r\n                   || value.Equals(SaCommon.ClinvarTag)\r\n                   || value.Equals(SaCommon.CosmicTag)\r\n                   || value.Equals(SaCommon.CosmicCnvTag)\r\n                   || value.Equals(SaCommon.DgvTag)\r\n                   || value.Equals(SaCommon.GnomadTag)\r\n                   || value.Equals(SaCommon.GnomadExomeTag)\r\n                   || value.Equals(SaCommon.MitoMapTag)\r\n                   || value.Equals(SaCommon.OmimTag)\r\n                   || value.Equals(SaCommon.OneKgenTag)\r\n                   || value.Equals(SaCommon.OnekSvTag)\r\n                   || value.Equals(SaCommon.PhylopTag)\r\n                   || value.Equals(SaCommon.RefMinorTag)\r\n                   || value.Equals(SaCommon.TopMedTag);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "SAUtils/Custom/VariantAnnotationsParser.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Linq;\r\nusing System.Text;\r\nusing ErrorHandling;\r\nusing ErrorHandling.Exceptions;\r\nusing Genome;\r\nusing OptimizedCore;\r\nusing SAUtils.DataStructures;\r\nusing SAUtils.Schema;\r\nusing VariantAnnotation.Interface.Providers;\r\nusing VariantAnnotation.Interface.SA;\r\nusing VariantAnnotation.SA;\r\nusing Variants;\r\n\r\nnamespace SAUtils.Custom\r\n{\r\n    public sealed class VariantAnnotationsParser : IDisposable\r\n    {\r\n        private readonly StreamReader _reader;\r\n        public ISequenceProvider SequenceProvider;\r\n        public string JsonTag;\r\n        public GenomeAssembly Assembly;\r\n        public string Version;\r\n        public string DataSourceDescription;\r\n        public bool MatchByAllele;\r\n        public bool IsArray;\r\n        public string[] Tags;\r\n        internal CustomAnnotationCategories[] Categories;\r\n        internal string[] Descriptions;\r\n        internal SaJsonValueType[] ValueTypes;\r\n\r\n        public ReportFor ReportFor;\r\n\r\n        private int _numRequiredColumns;\r\n        private int _numAnnotationColumns;\r\n        private int _altColumnIndex = -1;\r\n        private int _endColumnIndex = -1;\r\n        private readonly HashSet<GenomeAssembly> _allowedGenomeAssemblies = new HashSet<GenomeAssembly> { GenomeAssembly.GRCh37, GenomeAssembly.GRCh38, GenomeAssembly.SARSCoV2 };\r\n        private readonly List<CustomInterval> _intervals;\r\n        private (Chromosome Chromesome, int Position) _previousPosition = (null, 0);\r\n        private Action<string, string>[] _annotationValidators;\r\n\r\n        private SaJsonValueType _primaryType;\r\n        private readonly Dictionary<string, SaJsonValueType> _predefinedTypeAnnotation = new Dictionary<string, SaJsonValueType>\r\n        {\r\n            {\"refAllele\", SaJsonValueType.String},\r\n            {\"altAllele\", SaJsonValueType.String},\r\n            {\"start\", SaJsonValueType.Number},\r\n            {\"end\", SaJsonValueType.Number}\r\n        };\r\n\r\n        internal readonly List<string> JsonKeys = new List<string> { \"refAllele\", \"altAllele\" };\r\n        internal readonly List<string> IntervalJsonKeys = new List<string> { \"start\", \"end\" };\r\n\r\n        public SaJsonSchema JsonSchema;\r\n        public SaJsonSchema IntervalJsonSchema;\r\n\r\n\r\n        internal VariantAnnotationsParser(StreamReader streamReader, ISequenceProvider sequenceProvider)\r\n        {\r\n            _reader = streamReader;\r\n            SequenceProvider = sequenceProvider;\r\n            _intervals = new List<CustomInterval>();\r\n        }\r\n\r\n        public static VariantAnnotationsParser Create(StreamReader streamReader, ISequenceProvider sequenceProvider = null)\r\n        {\r\n            var parser = new VariantAnnotationsParser(streamReader, sequenceProvider);\r\n\r\n            parser.ParseHeaderLines();\r\n            parser.InitiateSchema();\r\n            parser.AddPredefinedTypeAnnotation();\r\n            parser.AddHeaderAnnotation();\r\n\r\n            return parser;\r\n        }\r\n\r\n        internal void ParseHeaderLines()\r\n        {\r\n            var hasMatchByLine = false;\r\n\r\n            string line;\r\n            while ((line = _reader.ReadLine())!=null)\r\n            {\r\n                if (line.StartsWith(\"#CHROM\")) break;\r\n                line = line.Trim();\r\n                (string key, string value) = line.OptimizedKeyValue();\r\n                switch (key)\r\n                {\r\n                    case \"#title\":\r\n                        JsonTag = value;\r\n                        break;\r\n                    case \"#assembly\":\r\n                        Assembly = GenomeAssemblyHelper.Convert(value);\r\n                        break;\r\n                    case \"#matchVariantsBy\":\r\n                        (MatchByAllele, IsArray, _primaryType, ReportFor) = ParserUtilities.ParseMatchVariantsBy(line);\r\n                        hasMatchByLine = true;\r\n                        break;\r\n                    case \"#version\":\r\n                        Version = value;\r\n                        break;\r\n                    case \"#description\":\r\n                        DataSourceDescription = value;\r\n                        break;\r\n                    default:\r\n                        var e = new UserErrorException(\"Unexpected header tag observed:\"+value);\r\n                        e.Data[ExitCodeUtilities.Line] = line;\r\n                        throw e;\r\n                }\r\n            }\r\n            CheckRequiredFields(hasMatchByLine);\r\n\r\n            //The following lines have to appear in exact order\r\n            Tags = ParserUtilities.ParseTags(line, \"#CHROM\", _numRequiredColumns);\r\n            CheckTagsAndSetJsonKeys();\r\n            Categories = ParserUtilities.ParseCategories(_reader.ReadLine(), _numRequiredColumns, _numAnnotationColumns, _annotationValidators);\r\n            Descriptions = ParserUtilities.ParseDescriptions(_reader.ReadLine(), _numRequiredColumns, _numAnnotationColumns);\r\n            ValueTypes = ParserUtilities.ParseTypes(_reader.ReadLine(), _numRequiredColumns, _numAnnotationColumns);\r\n        }\r\n\r\n        private void CheckRequiredFields(bool hasMatchByLine)\r\n        {\r\n            if (string.IsNullOrEmpty(JsonTag))\r\n                throw new UserErrorException(\"Please provide the title in the format: #title=titleValue.\");\r\n            if (ParserUtilities.CheckJsonTagConflict(JsonTag))\r\n                throw new UserErrorException($\"{JsonTag} is a reserved supplementary annotation tag in Nirvana. Please use a different value.\");\r\n            if (!_allowedGenomeAssemblies.Contains(Assembly))\r\n                throw new UserErrorException(\"Only GRCh37 and GRCh38 are accepted as genome assembly.\");\r\n            if (!hasMatchByLine)\r\n                throw new UserErrorException(\r\n                    \"Please provide the annotation reporting criteria in the format: #matchVariantsBy=allele.\");\r\n        }\r\n\r\n        private void CheckTagsAndSetJsonKeys()\r\n        {\r\n            CheckPosAndRefColumns();\r\n            CheckAltAndEndColumns();\r\n\r\n            for (int i = _numRequiredColumns; i < Tags.Length; i++)\r\n            {\r\n                if (string.IsNullOrWhiteSpace(Tags[i]))\r\n                    throw new UserErrorException($\"Please provide a name for column {i + 1} at the forth row.\");\r\n\r\n                JsonKeys.Add(Tags[i]);\r\n                IntervalJsonKeys.Add(Tags[i]);\r\n            }\r\n        }\r\n\r\n        internal void CheckPosAndRefColumns()\r\n        {\r\n            if (Tags[1] != \"POS\" || Tags[2] != \"REF\")\r\n                throw new UserErrorException(\"The 2nd and 3rd columns must be POS and REF, respectively.\");\r\n        }\r\n\r\n        internal void CheckAltAndEndColumns()\r\n        {\r\n            _numRequiredColumns = 4;\r\n\r\n            switch (Tags[3])\r\n            {\r\n                case \"ALT\":\r\n                    {\r\n                        _altColumnIndex = 3;\r\n\r\n                        if (Tags.Length > 4 && Tags[4] == \"END\")\r\n                        {\r\n                            _endColumnIndex = 4;\r\n                            _numRequiredColumns = 5;\r\n                        }\r\n\r\n                        break;\r\n                    }\r\n                case \"END\":\r\n                    _endColumnIndex = 3;\r\n                    break;\r\n                default:\r\n                    throw new UserErrorException(\"Please provide at least one of the ALT and END columns.The END column should come after the ALT column if both are present.\");\r\n            }\r\n\r\n            _numAnnotationColumns = Tags.Length - _numRequiredColumns;\r\n            _annotationValidators = Enumerable.Repeat<Action<string, string>>((a, b) => { }, _numAnnotationColumns).ToArray();\r\n        }\r\n\r\n        public IEnumerable<CustomItem> GetItems()\r\n        {\r\n            if (SequenceProvider == null)\r\n            {\r\n                throw new Exception(\"Sequence provider is null.\");\r\n            }\r\n            using (_reader)\r\n            {\r\n                string line;\r\n                while ((line = _reader.ReadLine()) != null)\r\n                {\r\n                    if (string.IsNullOrWhiteSpace(line)) continue;\r\n                    var item = ExtractItems(line);\r\n                    if (item == null) continue;\r\n                    yield return item;\r\n                }\r\n            }\r\n        }\r\n\r\n        private void InitiateSchema()\r\n        {\r\n            if (_altColumnIndex != -1) JsonSchema = SaJsonSchema.Create(new StringBuilder(), JsonTag, _primaryType, JsonKeys);\r\n            if (_endColumnIndex != -1) IntervalJsonSchema = SaJsonSchema.Create(new StringBuilder(), JsonTag, SaJsonValueType.ObjectArray, IntervalJsonKeys);\r\n        }\r\n\r\n        private void AddPredefinedTypeAnnotation()\r\n        {\r\n            foreach ((string jsonKey, var valueType) in _predefinedTypeAnnotation)\r\n            {\r\n                JsonSchema?.AddAnnotation(jsonKey, SaJsonKeyAnnotation.CreateFromProperties(valueType, 0, null));\r\n                IntervalJsonSchema?.AddAnnotation(jsonKey, SaJsonKeyAnnotation.CreateFromProperties(valueType, 0, null));\r\n            }\r\n        }\r\n\r\n        private void AddHeaderAnnotation()\r\n        {\r\n            for (var i = 0; i < _numAnnotationColumns; i++)\r\n            {\r\n                var annotation = SaJsonKeyAnnotation.CreateFromProperties(ValueTypes[i], Categories[i], Descriptions[i]);\r\n\r\n                JsonSchema?.AddAnnotation(Tags[i + _numRequiredColumns], annotation);\r\n                IntervalJsonSchema?.AddAnnotation(Tags[i + _numRequiredColumns], annotation);\r\n            }\r\n        }\r\n\r\n        internal CustomItem ExtractItems(string line)\r\n        {\r\n            var splits = line.OptimizedSplit('\\t');\r\n            if (splits.Length != Tags.Length)\r\n                throw new UserErrorException($\"Column number mismatch!! Header has {Tags.Length} columns but {line} contains {splits.Length}\");\r\n\r\n            string chromosome = splits[0];\r\n\r\n            if (!SequenceProvider.RefNameToChromosome.TryGetValue(chromosome, out var chrom))\r\n            {\r\n                Console.WriteLine($\"Annotation on {chromosome} is skipped.\");\r\n                return null;\r\n            }\r\n\r\n            SequenceProvider.LoadChromosome(chrom);\r\n\r\n            if (!int.TryParse(splits[1], out var position))\r\n                throw new UserErrorException($\"POS is not an int number at: {line}.\");\r\n\r\n            CheckAnnotationSorted(chrom, position, line);\r\n\r\n            string refAllele = splits[2].ToUpper();\r\n\r\n            var annotationValues = new string[_numAnnotationColumns];\r\n            for (var i = 0; i < _numAnnotationColumns; i++)\r\n            {\r\n                annotationValues[i] = splits[i + _numRequiredColumns];\r\n                _annotationValidators[i](annotationValues[i], line);\r\n            }\r\n\r\n            if (IsInterval(splits))\r\n            {\r\n                \r\n                if (!int.TryParse(splits[_endColumnIndex], out var end))\r\n                    throw new UserErrorException($\"END is not an integer.\\nInput line: {line}.\");\r\n\r\n                //for symbolic alleles, position needs to increment to account for the padding base \r\n                if (_altColumnIndex >=0 && IsSymbolicAllele(splits[_altColumnIndex]))\r\n                    position++;\r\n\r\n                var jsonStringValues = new List<string> { position.ToString(), splits[_endColumnIndex] };\r\n                jsonStringValues.AddRange(annotationValues);\r\n                _intervals.Add(new CustomInterval(chrom, position, end, jsonStringValues.Select(x => new[] { x }).ToList(), IntervalJsonSchema, line));\r\n                return null;\r\n            }\r\n\r\n            string altAllele = splits[_altColumnIndex];\r\n            if (!IsValidAltAllele(altAllele))\r\n                throw new UserErrorException($\"Invalid nucleotides in ALT column: {altAllele}.\\nInput line: {line}\");\r\n\r\n            (position, refAllele, altAllele) = VariantUtils.TrimAndLeftAlign(position, refAllele, altAllele, SequenceProvider.Sequence);\r\n            return new CustomItem(chrom, position, refAllele, altAllele, annotationValues.Select(x => new[] { x }).ToArray(), JsonSchema, line);\r\n        }\r\n\r\n        private bool IsSymbolicAllele(string altAllele)\r\n        {\r\n            return altAllele.StartsWith('<') && altAllele.EndsWith('>');\r\n        }\r\n\r\n        private bool IsInterval(string[] splits) => _endColumnIndex != -1 && !AllowedValues.IsEmptyValue(splits[_endColumnIndex]);\r\n\r\n        private void CheckAnnotationSorted(Chromosome chrom, int position, string line)\r\n        {\r\n            if (chrom != _previousPosition.Chromesome)\r\n            {\r\n                _previousPosition = (chrom, position);\r\n            }\r\n            else\r\n            {\r\n                if (position < _previousPosition.Position)\r\n                    throw new UserErrorException($\"Annotation is not sorted at {line}\");\r\n                _previousPosition.Position = position;\r\n            }\r\n        }\r\n\r\n        public List<CustomInterval> GetCustomIntervals() => _intervals.Count > 0 ? _intervals : null;\r\n\r\n        internal static bool IsValidAltAllele(string sequence)\r\n        {\r\n            if (sequence.Contains('[') || sequence.Contains(']')) return true;\r\n            \r\n            var validNucleotides = new[] { 'a', 'c', 'g', 't', 'n' };\r\n            foreach (char nucleotide in sequence.ToLower())\r\n            {\r\n                if (!validNucleotides.Contains(nucleotide)) return false;\r\n            }\r\n\r\n            return true;\r\n        }\r\n\r\n        public void Dispose()\r\n        {\r\n            _reader?.Dispose();\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "SAUtils/Custom/VariantMain.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing CommandLine.Builders;\r\nusing CommandLine.NDesk.Options;\r\nusing Compression.Utilities;\r\nusing ErrorHandling;\r\nusing IO;\r\nusing SAUtils.DataStructures;\r\nusing SAUtils.Schema;\r\nusing VariantAnnotation.Interface.SA;\r\nusing VariantAnnotation.Providers;\r\nusing VariantAnnotation.SA;\r\n\r\nnamespace SAUtils.Custom\r\n{\r\n    public static class VariantMain\r\n    {\r\n        private static string _inputFile;\r\n        private static string _compressedReference;\r\n        private static string _outputDirectory;\r\n        private static bool   _skipRefBaseValidation;\r\n        \r\n        public static ExitCodes Run(string command, string[] commandArgs)\r\n        {\r\n            var ops = new OptionSet\r\n            {\r\n                {\r\n                    \"ref|r=\",\r\n                    \"compressed reference sequence file\",\r\n                    v => _compressedReference = v\r\n                },\r\n                {\r\n                    \"in|i=\",\r\n                    \"custom TSV file path\",\r\n                    v => _inputFile = v\r\n                },\r\n                {\r\n                    \"out|o=\",\r\n                    \"output directory\",\r\n                    v => _outputDirectory = v\r\n                },\r\n                {\r\n                    \"skip-ref\",\r\n                    \"skip ref base validation\",\r\n                    v => _skipRefBaseValidation = v != null\r\n                }\r\n            };\r\n\r\n            string commandLineExample = $\"{command} [options]\";\r\n\r\n            var exitCode = new ConsoleAppBuilder(commandArgs, ops)\r\n                .Parse()\r\n                .CheckInputFilenameExists(_compressedReference, \"compressed reference sequence\", \"--ref\")\r\n                .CheckInputFilenameExists(_inputFile, \"Custom variant annotation TSV\", \"--in\")\r\n                .CheckDirectoryExists(_outputDirectory, \"output\", \"--out\")\r\n                .SkipBanner()\r\n                .ShowHelpMenu(\"Creates a supplementary variant annotation database from a custom input file\", commandLineExample)\r\n                .ShowErrors()\r\n                .Execute(ProgramExecution);\r\n\r\n            return exitCode;\r\n        }\r\n\r\n        private static ExitCodes ProgramExecution()\r\n        {\r\n            var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));\r\n            \r\n            List<CustomInterval> intervals;\r\n            SaJsonSchema         intervalJsonSchema;\r\n            string               jsonTag;\r\n            DataSourceVersion    version;\r\n            string               outputPrefix      = GetOutputPrefix(_inputFile);\r\n            string               nsaFileName       = Path.Combine(_outputDirectory, outputPrefix + SaCommon.SaFileSuffix);\r\n            string               nsaIndexFileName  = nsaFileName + SaCommon.IndexSuffix;\r\n            string               nsaSchemaFileName = nsaFileName + SaCommon.JsonSchemaSuffix;\r\n            ReportFor            reportFor;\r\n\r\n            var nsaItemCount = 0;\r\n\r\n            using (var parser = VariantAnnotationsParser.Create(GZipUtilities.GetAppropriateStreamReader(_inputFile), referenceProvider))\r\n            using (var nsaStream   = FileUtilities.GetCreateStream(nsaFileName))\r\n            using (var indexStream = FileUtilities.GetCreateStream(nsaIndexFileName))       \r\n            using (var nsaWriter = CaUtilities.GetNsaWriter(nsaStream, indexStream, parser,  CaUtilities.GetInputFileName(_inputFile),referenceProvider, out version, _skipRefBaseValidation))\r\n            using (var saJsonSchemaStream = FileUtilities.GetCreateStream(nsaSchemaFileName))\r\n            using (var schemaWriter = new StreamWriter(saJsonSchemaStream))\r\n            {\r\n                (jsonTag, nsaItemCount, intervalJsonSchema, intervals) = CaUtilities.WriteSmallVariants(parser, nsaWriter, schemaWriter);\r\n                reportFor = parser.ReportFor;\r\n                if (intervals == null) return ExitCodes.Success;\r\n            }\r\n\r\n            if (nsaItemCount == 0)\r\n            {\r\n                File.Delete(nsaFileName);\r\n                File.Delete(nsaIndexFileName);\r\n                File.Delete(nsaSchemaFileName);\r\n            }\r\n\r\n            using (var nsiStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outputPrefix + SaCommon.IntervalFileSuffix)))\r\n            using (var nsiWriter = CaUtilities.GetNsiWriter(nsiStream, version, referenceProvider.Assembly, jsonTag, reportFor))\r\n            using (var siJsonSchemaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outputPrefix + SaCommon.IntervalFileSuffix + SaCommon.JsonSchemaSuffix)))\r\n            using (var schemaWriter = new StreamWriter(siJsonSchemaStream))\r\n            {\r\n                nsiWriter.Write(intervals);\r\n                schemaWriter.Write(intervalJsonSchema);\r\n            }\r\n\r\n            return ExitCodes.Success;\r\n        }\r\n\r\n        private static string GetOutputPrefix(string inputFilePath)\r\n        {\r\n            string fileName = CaUtilities.GetInputFileName(inputFilePath);\r\n            if (fileName.EndsWith(\".tsv\"))\r\n                return fileName.Substring(0, fileName.Length - 4);\r\n            return fileName.EndsWith(\".tsv.gz\") ? fileName.Substring(0, fileName.Length - 7) : fileName;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "SAUtils/Dann/Create.cs",
    "content": "﻿using System.IO;\nusing CommandLine.Builders;\nusing CommandLine.NDesk.Options;\nusing Compression.Utilities;\nusing ErrorHandling;\nusing IO;\nusing SAUtils.GenericScore;\nusing SAUtils.GenericScore.GenericScoreParser;\nusing SAUtils.InputFileParsers;\nusing VariantAnnotation.GenericScore;\nusing VariantAnnotation.Providers;\nusing VariantAnnotation.SA;\n\nnamespace SAUtils.Dann\n{\n    public static class Create\n    {\n        private static string _inputFile;\n        private static string _compressedReference;\n        private static string _outputDirectory;\n\n        public static ExitCodes Run(string command, string[] commandArgs)\n        {\n            var ops = new OptionSet\n            {\n                {\n                    \"ref|r=\",\n                    \"compressed reference sequence file\",\n                    v => _compressedReference = v\n                },\n                {\n                    \"in|i=\",\n                    \"input DANN file path\",\n                    v => _inputFile = v\n                },\n                {\n                    \"out|o=\",\n                    \"output directory\",\n                    v => _outputDirectory = v\n                }\n            };\n\n            var commandLineExample = $\"{command} [options]\";\n\n            var exitCode = new ConsoleAppBuilder(commandArgs, ops)\n                .Parse()\n                .CheckInputFilenameExists(_compressedReference, \"compressed reference sequence file name\", \"--ref\")\n                .CheckInputFilenameExists(_inputFile,           \"input DANN file Path\",                    \"--in\")\n                .HasRequiredParameter(_outputDirectory, \"output directory\", \"--out\")\n                .CheckDirectoryExists(_outputDirectory, \"output directory\", \"--out\")\n                .SkipBanner()\n                .ShowHelpMenu(\"Create a supplementary database from DANN input file \", commandLineExample)\n                .ShowErrors()\n                .Execute(ProgramExecution);\n\n            return exitCode;\n        }\n\n        private static ExitCodes ProgramExecution()\n        {\n            var nucleotides = new[] {\"A\", \"C\", \"G\", \"T\"};\n\n            var dannParserSettings = new ParserSettings(\n                new ColumnIndex(0, 2, 3, 4, 5, null),\n                nucleotides,\n                GenericScoreParser.MaxRepresentativeScores\n            );\n\n            var dannWriterSettings = new WriterSettings(\n                1_000_000,\n                nucleotides,\n                false,\n                EncoderType.ZeroToOne,\n                new ZeroToOneScoreEncoder(2, 1.0),\n                new ScoreJsonEncoder(SaCommon.DannTag + SaCommon.Score, null),\n                new SaItemValidator(true, false)\n            );\n\n            DataSourceVersion version     = DataSourceVersionReader.GetSourceVersion(_inputFile + \".version\");\n            var               outFileName = $\"{version.Name}_{version.Version}\";\n            using (var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference)))\n            using (var streamReader = GZipUtilities.GetAppropriateStreamReader(_inputFile))\n            using (var dannParser = new GenericScoreParser(dannParserSettings, streamReader, referenceProvider.RefNameToChromosome))\n            using (var saStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.GsaFileSuffix)))\n            using (var indexStream =\n                   FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.GsaFileSuffix + SaCommon.IndexSuffix)))\n            using (var saWriter = new ScoreFileWriter(dannWriterSettings, saStream, indexStream, version, referenceProvider,\n                       SaCommon.SchemaVersion, skipIncorrectRefEntries: true, leaveOpen: false))\n            {\n                saWriter.Write(dannParser.GetItems());\n            }\n\n            return ExitCodes.Success;\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/DataStructures/AlleleFrequencyItem.cs",
    "content": "﻿using System;\nusing Genome;\nusing VariantAnnotation.Interface.SA;\n\nnamespace SAUtils.DataStructures\n{\n    public sealed class AlleleFrequencyItem:ISupplementaryDataItem\n    {\n        public Chromosome Chromosome { get; }\n        public int Position { get; set; }\n        public string RefAllele { get; set; }\n        public string AltAllele { get; set; }\n\n        public readonly double AltFrequency;\n\n        public AlleleFrequencyItem(Chromosome chromosome, int position, string refAllele, string altAllele, double altFrequency, string inputLine)\n        {\n            Chromosome   = chromosome;\n            Position     = position;\n            AltFrequency = altFrequency;\n            RefAllele    = refAllele;\n            AltAllele    = altAllele;\n            InputLine    = inputLine;\n        }\n\n        public string GetJsonString()\n        {\n            throw new NotImplementedException();\n        }\n\n        public string InputLine { get; }\n    }\n}"
  },
  {
    "path": "SAUtils/DataStructures/AncestralAlleleItem.cs",
    "content": "﻿using Genome;\nusing VariantAnnotation.Interface.SA;\n\nnamespace SAUtils.DataStructures\n{\n    public sealed class AncestralAlleleItem: ISupplementaryDataItem\n    {\n        public          Chromosome Chromosome { get; }\n        public          int         Position   { get; set; }\n        public          string      RefAllele  { get; set; }\n        public          string      AltAllele  { get; set; }\n        public          string      InputLine  { get; }\n        public readonly string      AncestralAllele;\n\n        public AncestralAlleleItem(Chromosome chromosome, int position, string refAllele, string altAllele, string ancestralAllele, string inputLine)\n        {\n            Chromosome      = chromosome;\n            Position        = position;\n            RefAllele       = refAllele;\n            AltAllele       = altAllele;\n            AncestralAllele = ancestralAllele;\n            InputLine       = inputLine;\n        }\n\n        public string GetJsonString()\n        {\n            return $\"\\\"{AncestralAllele}\\\"\";\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/DataStructures/ClinGenItem.cs",
    "content": "﻿using System.Collections.Generic;\nusing Genome;\nusing OptimizedCore;\nusing VariantAnnotation.Interface.SA;\nusing VariantAnnotation.IO;\nusing Variants;\n\nnamespace SAUtils.DataStructures\n{\n    public enum ClinicalInterpretation\n    {\n        // ReSharper disable InconsistentNaming\n        pathogenic             = 5,\n        likely_pathogenic      = 4,\n        benign                 = 3,\n        likely_benign          = 2,\n        uncertain_significance = 1,\n        unknown                = 0\n        // ReSharper restore InconsistentNaming\n    }\n\n    public sealed class ClinGenItem:ISuppIntervalItem\n    {\n        public int Start { get; }\n        public int End { get; }\n        public Chromosome Chromosome { get; }\n\n\n        private string Id { get; }\n        private VariantType VariantType { get; }\n        private ClinicalInterpretation ClinicalInterpretation { get; }\n        private IEnumerable<string> Phenotypes => _phenotypes;\n\t    private readonly HashSet<string> _phenotypes;\n        private IEnumerable<string> PhenotypeIds => _phenotypeIds;\n\t    private readonly HashSet<string> _phenotypeIds;\n        private int ObservedGains { get; }\n        private int ObservedLosses { get; }\n        private bool Validated { get; }\n\n\n        public ClinGenItem(string id, Chromosome chromosome, int start, int end, VariantType variantType, int observedGains, int observedLosses,\n            ClinicalInterpretation clinicalInterpretation, bool validated, HashSet<string> phenotypes = null, HashSet<string> phenotypeIds = null)\n        {\n            Id                     = id;\n            Chromosome             = chromosome;\n            Start                  = start;\n            End                    = end;\n            VariantType            = variantType;\n            ClinicalInterpretation = clinicalInterpretation;\n            _phenotypes            = phenotypes ?? new HashSet<string>();\n            _phenotypeIds          = phenotypeIds ?? new HashSet<string>();\n            ObservedGains          = observedGains;\n            ObservedLosses         = observedLosses;\n            Validated              = validated;\n        }\n\n\n\n        public string GetJsonString()\n        {\n            var sb = StringBuilderPool.Get();\n            var jsonObject = new JsonObject(sb);\n\n            jsonObject.AddStringValue(\"chromosome\", Chromosome.EnsemblName);\n            jsonObject.AddIntValue(\"begin\", Start);\n            jsonObject.AddIntValue(\"end\", End);\n            jsonObject.AddStringValue(\"variantType\", VariantType.ToString());\n            jsonObject.AddStringValue(\"id\", Id);\n            jsonObject.AddStringValue(\"clinicalInterpretation\", GetClinicalDescription(ClinicalInterpretation));\n            jsonObject.AddStringValues(\"phenotypes\", Phenotypes);\n            jsonObject.AddStringValues(\"phenotypeIds\", PhenotypeIds);\n            if (ObservedGains>0) jsonObject.AddIntValue(\"observedGains\", ObservedGains);\n            if (ObservedLosses>0) jsonObject.AddIntValue(\"observedLosses\", ObservedLosses);\n            jsonObject.AddBoolValue(\"validated\",Validated);\n\n            return StringBuilderPool.GetStringAndReturn(sb);\n        }\n\n        \n\n        private static string GetClinicalDescription(ClinicalInterpretation clinicalInterpretation)\n        {\n            // ReSharper disable once SwitchStatementMissingSomeCases\n            switch (clinicalInterpretation)\n            {\n                case ClinicalInterpretation.uncertain_significance:\n                    return \"uncertain significance\";\n                case ClinicalInterpretation.likely_benign:\n                    return \"likely benign\";\n                case ClinicalInterpretation.likely_pathogenic:\n                    return \"likely pathogenic\";\n                case ClinicalInterpretation.unknown:\n                    return null;\n                default:\n                    return clinicalInterpretation.ToString();\n            }\n        }\n        \n        \n    }\n}\n"
  },
  {
    "path": "SAUtils/DataStructures/ClinVarItem.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.Linq;\r\nusing Genome;\r\nusing SAUtils.InputFileParsers.ClinVar;\r\nusing SAUtils.Schema;\r\nusing VariantAnnotation.Interface.SA;\r\n\r\n\r\nnamespace SAUtils.DataStructures\r\n{\r\n    public sealed class ClinVarItem : IClinVarSaItem\r\n    {\r\n        public Chromosome Chromosome { get; }\r\n        public int         Position   { get; set; }\r\n        public string      RefAllele  { get; set; }\r\n        public string      AltAllele  { get; set; }\r\n        public string      InputLine  { get; }\r\n\r\n        public  int                        Stop             { get; }\r\n        public  string                     VariantType      { get; }\r\n        public  string                     Id               { get; }\r\n        public  string                     VariationId      { get; set; }\r\n        public  IEnumerable<string>        AlleleOrigins    { get; }\r\n        public  IEnumerable<string>        Phenotypes       { get; }\r\n        public  IEnumerable<string>        Significances    { get; }\r\n        public  ClinVarCommon.ReviewStatus ReviewStatus     { get; }\r\n        private string                     IsAlleleSpecific { get; }\r\n        public  IEnumerable<string>        MedGenIds        { get; }\r\n        public  IEnumerable<string>        OmimIds          { get; }\r\n        public  IEnumerable<string>        OrphanetIds      { get; }\r\n\r\n        public IEnumerable<long> PubmedIds { get; }\r\n        public long LastUpdatedDate { get; }\r\n\r\n        public SaJsonSchema JsonSchema { get; }\r\n\r\n        public ClinVarItem(Chromosome chromosome,\r\n            int position,\r\n            int stop,\r\n            string refAllele,\r\n            string altAllele,\r\n            SaJsonSchema jsonSchema,\r\n            IEnumerable<string> alleleOrigins,\r\n            string variantType,\r\n            string id,\r\n            string variationId,\r\n            ClinVarCommon.ReviewStatus reviewStatus,\r\n            IEnumerable<string> medGenIds,\r\n            IEnumerable<string> omimIds,\r\n            IEnumerable<string> orphanetIds,\r\n            IEnumerable<string> phenotypes,\r\n            IEnumerable<string> significances,\r\n            IEnumerable<long> pubmedIds = null,\r\n            long lastUpdatedDate = long.MinValue\r\n        )\r\n        {\r\n            Chromosome       = chromosome;\r\n            Position         = position;\r\n            Stop             = stop;\r\n            AlleleOrigins    = alleleOrigins;\r\n            AltAllele        = altAllele;\r\n            JsonSchema       = jsonSchema;\r\n            VariantType      = variantType;\r\n            Id               = id;\r\n            VariationId      = variationId;\r\n            MedGenIds        = medGenIds;\r\n            OmimIds          = omimIds;\r\n            OrphanetIds      = orphanetIds;\r\n            Phenotypes       = phenotypes;\r\n            RefAllele        = refAllele;\r\n            Significances    = significances;\r\n            PubmedIds        = pubmedIds;\r\n            LastUpdatedDate  = lastUpdatedDate;\r\n            IsAlleleSpecific = null;\r\n            ReviewStatus     = reviewStatus;\r\n\r\n        }\r\n\r\n        public string GetJsonString()\r\n        {\r\n            return JsonSchema.GetJsonString(GetValues());\r\n        }\r\n\r\n        private List<string[]> GetValues()\r\n        {\r\n            var values = new List<string[]>\r\n            {\r\n                //the exact order of adding values has to be preserved. the order is dictated by the json schema\r\n                new[] {Id},\r\n                new[] {VariationId},\r\n                new[] {ClinVarCommon.ReviewStatusStrings[ReviewStatus]},\r\n                AlleleOrigins?.ToArray(),\r\n                new[] {ClinVarCommon.NormalizeAllele(RefAllele)},\r\n                new[] {ClinVarCommon.NormalizeAllele(AltAllele)},\r\n                Phenotypes?.ToArray(),\r\n                MedGenIds?.ToArray(),\r\n                OmimIds?.ToArray(),\r\n                OrphanetIds?.ToArray(),\r\n                Significances?.ToArray(),\r\n                new[] {new DateTime(LastUpdatedDate).ToString(\"yyyy-MM-dd\")},\r\n                PubmedIds?.OrderBy(x => x).Select(x => x.ToString()).ToArray()\r\n            };\r\n            \r\n            return values;\r\n        }\r\n\r\n        public int CompareTo(IClinVarSaItem other)\r\n        {\r\n            return Chromosome.Index != other.Chromosome.Index\r\n                ? Chromosome.Index.CompareTo(other.Chromosome.Index)\r\n                : Position.CompareTo(other.Position);\r\n        }\r\n    }\r\n\r\n    \r\n}\r\n"
  },
  {
    "path": "SAUtils/DataStructures/ComputingUtilities.cs",
    "content": "﻿using System;\r\nusing VariantAnnotation.IO;\r\n\r\nnamespace SAUtils.DataStructures\r\n{\r\n    public static class ComputingUtilities\r\n    {\r\n        public static string ComputeFrequency(int? alleleNumber, int? alleleCount)\r\n        {\r\n            return alleleNumber != null && alleleNumber.Value > 0 && alleleCount != null\r\n                ? ((double)alleleCount / alleleNumber.Value).ToString(JsonCommon.FrequencyRoundingFormat)\r\n                : null;\r\n        }\r\n        \r\n        public static int GetCoverage(double depth, double allAlleleNumber)\r\n        {\r\n            return (int) Math.Round(depth / allAlleleNumber, 0, MidpointRounding.AwayFromZero);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "SAUtils/DataStructures/ConservationItem.cs",
    "content": "﻿using System;\nusing Genome;\n\nnamespace SAUtils.DataStructures\n{\n    public sealed class PhylopItem\n    {\n        public Chromosome Chromosome { get; }\n        public int Position { get; }\n        public double Score { get; }\n\n        public PhylopItem(Chromosome chromosome, int position, double score)\n        {\n            Chromosome = chromosome;\n            Position   = position;\n            Score      = Math.Round(score,1, MidpointRounding.AwayFromZero);\n        }\n        \n    }\n}"
  },
  {
    "path": "SAUtils/DataStructures/CosmicItem.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.Linq;\nusing Genome;\nusing OptimizedCore;\nusing VariantAnnotation.Interface.SA;\nusing VariantAnnotation.IO;\n\nnamespace SAUtils.DataStructures\n{\n    public sealed class CosmicItem : ISupplementaryDataItem\n    {\n        public Chromosome Chromosome { get; }\n        public int         Position   { get; set; }\n        public string      RefAllele  { get; set; }\n        public string      AltAllele  { get; set; }\n        public string      InputLine  { get; }\n\n        private string Id { get; }\n        private string Gene { get; }\n        private int? SampleCount { get; }\n        public HashSet<CosmicStudy> Studies { get; }\n\n        public CosmicItem(\n            Chromosome chromosome,\n            int position,\n            string id,\n            string refAllele,\n            string altAllele,\n            string gene,\n            HashSet<CosmicStudy> studies, int? sampleCount)\n        {\n            Chromosome      = chromosome;\n            Position        = position;\n            Id              = id;\n            RefAllele = refAllele;\n            AltAllele = altAllele;\n            Gene            = gene;\n            Studies         = studies;\n            SampleCount     = sampleCount;\n\n        }\n\n        public sealed class CosmicStudy : IEquatable<CosmicStudy>\n        {\n            #region members\n\n            public string Id { get; }\n            public IEnumerable<string> Histologies { get; }\n            public IEnumerable<string> Sites { get; }\n\n            #endregion\n\n            public CosmicStudy(string studyId, IEnumerable<string> histologies, IEnumerable<string> sites)\n            {\n                Id          = studyId;\n                Sites       = sites;\n                Histologies = histologies;\n            }\n\n            public bool Equals(CosmicStudy other)\n            {\n                if (other == null) return false;\n                return Id.Equals(other.Id)\n                    && Histologies.SequenceEqual(other.Histologies)\n                    && Sites.SequenceEqual(other.Sites);\n            }\n\n            public override int GetHashCode()\n            {\n                var hashCode = Id?.GetHashCode() ?? 0;\n                //hashCode ^= Histologies.GetHashCode() ^ Sites.GetHashCode();\n                return hashCode;\n            }\n        }\n\n        public string GetJsonString()\n        {\n            var sb = StringBuilderPool.Get();\n\n            var jsonObject = new JsonObject(sb);\n\n            jsonObject.AddStringValue(\"id\", Id);\n            jsonObject.AddStringValue(\"refAllele\", string.IsNullOrEmpty(RefAllele) ? \"-\" : RefAllele);\n            jsonObject.AddStringValue(\"altAllele\", SaUtilsCommon.ReverseSaReducedAllele(AltAllele));\n            jsonObject.AddStringValue(\"gene\", Gene);\n            jsonObject.AddIntValue(\"sampleCount\", SampleCount);\n\n            jsonObject.AddStringValue(\"cancerTypesAndCounts\", GetJsonStringFromDict(\"cancerType\",GetCancerTypeCounts()), false);\n            jsonObject.AddStringValue(\"cancerSitesAndCounts\", GetJsonStringFromDict(\"cancerSite\",GetTissueCounts()), false);\n\n            return StringBuilderPool.GetStringAndReturn(sb);\n        }\n\n        internal Dictionary<string,int> GetTissueCounts()\n        {\n            if (Studies == null) return null;\n            var tissueCounts = new Dictionary<string, int>();\n            foreach (var study in Studies)\n            {\n                if (study.Sites == null) return null;\n\n                foreach (var site in study.Sites)\n                {\n                    if (tissueCounts.TryGetValue(site, out _))\n                    {\n                        tissueCounts[site]++;\n                    }\n                    else tissueCounts[site] = 1;\n                }\n            }\n\n            return tissueCounts; \n        }\n\n        internal Dictionary<string,int> GetCancerTypeCounts()\n        {\n            if (Studies == null) return null;\n            var cancerTypeCounts = new Dictionary<string, int>();\n            foreach (var study in Studies)\n            {\n                if (study.Histologies == null) return null;\n                foreach (var histology in study.Histologies)\n                {\n                    if (cancerTypeCounts.TryGetValue(histology, out _))\n                    {\n                        cancerTypeCounts[histology]++;\n                    }\n                    else cancerTypeCounts[histology] = 1;\n                }\n            }\n\n            return cancerTypeCounts;\n        }\n\n        private static string GetJsonStringFromDict(string dataType, Dictionary<string, int> dictionary)\n        {\n            if (dictionary == null) return null;\n\n            var sb = StringBuilderPool.Get();\n            sb.Append(JsonObject.OpenBracket);\n\n            bool isFirstItem = true;\n            foreach (var kvp in dictionary)\n            {\n                if (!isFirstItem)\n                    sb.Append(JsonObject.Comma);\n\n                sb.Append(JsonObject.OpenBrace);\n                sb.Append($\"\\\"{dataType}\\\":\\\"{kvp.Key}\\\",\");\n                sb.Append($\"\\\"count\\\":{kvp.Value}\");\n                sb.Append(JsonObject.CloseBrace);\n                \n                isFirstItem = false;\n            }\n\n            sb.Append(JsonObject.CloseBracket);\n\n            return StringBuilderPool.GetStringAndReturn(sb);\n        }\n\n       \n    }\n}\n\n\n"
  },
  {
    "path": "SAUtils/DataStructures/CounterDictionary.cs",
    "content": "using System.Collections.Generic;\nusing System.Text;\nusing VariantAnnotation.Interface.IO;\nusing VariantAnnotation.IO;\n\nnamespace SAUtils.DataStructures;\n\npublic sealed class CounterDictionary<TKey> : Dictionary<TKey, uint>, IJsonSerializer\n{\n    public uint Total;\n\n    public void Add(TKey key)\n    {\n        Total++;\n\n        if (TryGetValue(key, out uint _))\n        {\n            this[key]++;\n            return;\n        }\n\n        this[key] = 1;\n    }\n\n    public void SerializeJson(StringBuilder sb)\n    {\n        var jo = new JsonObject(sb);\n        sb.Append(JsonObject.OpenBrace);\n        jo.AddUIntValue(\"count\", Total);\n        foreach ((TKey key, uint count) in this)\n        {\n            jo.AddUIntValue(key.ToString(), count);\n        }\n\n        sb.Append(JsonObject.CloseBrace);\n    }\n}"
  },
  {
    "path": "SAUtils/DataStructures/CustomInterval.cs",
    "content": "﻿using System.Collections.Generic;\nusing ErrorHandling.Exceptions;\nusing Genome;\nusing SAUtils.Schema;\nusing VariantAnnotation.Interface.SA;\nusing Variants;\n\nnamespace SAUtils.DataStructures\n{\n    public sealed class CustomInterval : ISuppIntervalItem\n    {\n        public Chromosome Chromosome { get; }\n        public int Start { get; }\n        public int End { get; }\n        private VariantType VariantType { get; }\n\n        private readonly List<string[]> _values;\n        private readonly SaJsonSchema _jsonSchema;\n        private readonly string _inputLine;\n\n        /// <summary>\n        /// constructor\n        /// </summary>\n        public CustomInterval(Chromosome chromosome, int start, int end, List<string[]> values, SaJsonSchema jsonSchema, string inputLine)\n        {\n            Chromosome      = chromosome;\n            Start           = start;\n            End             = end;\n            VariantType     = VariantType.structural_alteration;\n            _values         = values;\n            _jsonSchema     = jsonSchema;\n            _inputLine      = inputLine;\n        }\n\n        public string GetJsonString()\n        {\n            try\n            {\n                return _jsonSchema.GetJsonString(_values);\n            }\n            catch (UserErrorException e)\n            {\n                throw new UserErrorException(e.Message + $\"\\nInput line: {_inputLine}\");\n            }\n        }\n\n    }\n}\n"
  },
  {
    "path": "SAUtils/DataStructures/CustomItem.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing ErrorHandling.Exceptions;\r\nusing Genome;\r\nusing SAUtils.Schema;\r\nusing VariantAnnotation.Interface.SA;\r\nusing VariantAnnotation.Utilities;\r\n\r\nnamespace SAUtils.DataStructures\r\n{\r\n    public sealed class CustomItem : ISupplementaryDataItem\r\n    {\r\n        public Chromosome Chromosome { get; }\r\n        public int Position { get; set; }\r\n        public string RefAllele { get; set; }\r\n        public string AltAllele { get; set; }\r\n\r\n        private readonly string[][] _values;\r\n        private readonly SaJsonSchema _jsonSchema;\r\n\r\n        public CustomItem(Chromosome chromosome, int start, string refAllele, string altAllele, string[][] values, SaJsonSchema jsonSchema, string inputLine)\r\n        {\r\n            Chromosome = chromosome;\r\n            Position = start;\r\n            RefAllele = refAllele;\r\n            AltAllele = altAllele;\r\n            _values = values;\r\n            _jsonSchema = jsonSchema;\r\n            InputLine = inputLine;\r\n        }\r\n\r\n        public string GetJsonString()\r\n        {\r\n            var allValues = new List<string[]> {new []{BaseFormatting.EmptyToDash(RefAllele)}, new []{BaseFormatting.EmptyToDash(AltAllele)} };\r\n            allValues.AddRange(_values);\r\n            try\r\n            {\r\n                return _jsonSchema.GetJsonString(allValues);\r\n            }\r\n            catch (UserErrorException e) \r\n            {\r\n                throw new UserErrorException(e.Message + $\"\\nInput line: {InputLine}\");\r\n            }\r\n        }\r\n\r\n        public string InputLine { get; }\r\n    }\r\n}\r\n"
  },
  {
    "path": "SAUtils/DataStructures/DbSnpItem.cs",
    "content": "﻿using Genome;\nusing VariantAnnotation.Interface.SA;\n\nnamespace SAUtils.DataStructures\n{\n\tpublic sealed class DbSnpItem: ISupplementaryDataItem\n\t{\n\t    public Chromosome Chromosome { get; }\n\t    public int         Position   { get; set; }\n\t    public string      RefAllele  { get; set; }\n\t    public string      AltAllele  { get; set; }\n\t    public string      InputLine  { get; }\n\n        public long RsId { get; }\n\t    \n\t    public DbSnpItem(Chromosome chromosome,\n\t\t\tint position,\n\t\t\tlong rsId,\n\t\t\tstring refAllele,\n\t\t\tstring alternateAllele, \n\t\t\tstring inputLine)\n\t\t{\n\t\t\tChromosome = chromosome;\n\t\t\tPosition   = position;\n\t\t\tRsId       = rsId;\n\t\t\tRefAllele  = refAllele;\n\t\t\tAltAllele  = alternateAllele;\n\t\t\tInputLine  = inputLine;\n\t\t}\n\n\n\t\tpublic string GetJsonString()\n\t    {\n\t        return $\"\\\"rs{RsId}\\\"\";\n\t    }\n\t    \n\t}\n}\n"
  },
  {
    "path": "SAUtils/DataStructures/DecipherItem.cs",
    "content": "using System.Text;\nusing Genome;\nusing VariantAnnotation.Interface.SA;\nusing VariantAnnotation.IO;\n\nnamespace SAUtils.DataStructures\n{\n    public sealed class DecipherItem : ISuppIntervalItem\n    {\n        private readonly int?    _delNum;\n        private readonly double? _delFreq;\n        private readonly int?    _dupNum;\n        private readonly double? _dupFreq;\n        private readonly int?    _sampleSize;\n        \n        public Chromosome Chromosome { get; }\n        public int        Start      { get; }\n        public int        End      { get; }\n\n        public DecipherItem(Chromosome chrom, int start, int end, \n            int? delNum, double? delFreq, int? dupNum, double? dupFreq, int? sampleSize)\n        {\n            Chromosome  = chrom;\n            Start       = start;\n            End         = end;\n            _delNum     = delNum;\n            _delFreq    = delFreq;\n            _dupNum     = dupNum;\n            _dupFreq    = dupFreq;\n            _sampleSize = sampleSize;\n        }\n\n        public string GetJsonString()\n        {\n            var sb         = new StringBuilder();\n            var jsonObject = new JsonObject(sb);\n            \n            jsonObject.AddStringValue(\"chromosome\", Chromosome.EnsemblName);\n            jsonObject.AddIntValue(\"begin\", Start);\n            jsonObject.AddIntValue(\"end\",   End);\n            jsonObject.AddIntValue(\"numDeletions\", _delNum); \n            jsonObject.AddDoubleValue(\"deletionFrequency\", _delFreq, JsonCommon.FrequencyRoundingFormat);\n            jsonObject.AddIntValue(\"numDuplications\", _dupNum); \n            jsonObject.AddDoubleValue(\"duplicationFrequency\", _dupFreq, JsonCommon.FrequencyRoundingFormat);\n            jsonObject.AddIntValue(\"sampleSize\", _sampleSize); \n\n            return sb.ToString();\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/DataStructures/DgvItem.cs",
    "content": "﻿using Genome;\nusing OptimizedCore;\nusing VariantAnnotation.Interface.SA;\nusing VariantAnnotation.IO;\nusing Variants;\n\nnamespace SAUtils.DataStructures\n{\n    public sealed class DgvItem : ISuppIntervalItem\n    {\n        public Chromosome Chromosome { get; }\n        public int Start { get; }\n        public int End { get; }\n\n        private string Id { get; }\n\n        private int ObservedGains { get; }\n\n        private int ObservedLosses { get; }\n\n        private int SampleSize { get; }\n\n        private VariantType VariantType { get; }\n\n        private double? VariantFreqAll { get; }\n\n        \n        public DgvItem(string id, Chromosome chromosome, int start, int end, int sampleSize, int observedGains, int observedLosses,\n            VariantType variantType)\n        {\n            Id             = id;\n            Chromosome     = chromosome;\n            Start          = start;\n            End            = end;\n            SampleSize     = sampleSize;\n            ObservedGains  = observedGains;\n            ObservedLosses = observedLosses;\n            VariantType    = variantType;\n\n            if (SampleSize == 0 || ObservedLosses + ObservedGains == 0) return;\n            VariantFreqAll = (ObservedLosses + ObservedGains) / (double)SampleSize;\n            VariantFreqAll = VariantFreqAll > 1.0 ? 1.0 : VariantFreqAll;\n        }\n\n        public string GetJsonString()\n        {\n            var sb = StringBuilderPool.Get();\n            var jsonObject = new JsonObject(sb);\n\n            jsonObject.AddStringValue(\"chromosome\", Chromosome.EnsemblName);\n            jsonObject.AddIntValue(\"begin\", Start);\n            jsonObject.AddIntValue(\"end\", End);\n            jsonObject.AddStringValue(\"variantType\", VariantType.ToString());\n\n            jsonObject.AddStringValue(\"id\", Id);\n            jsonObject.AddIntValue(\"sampleSize\", SampleSize);\n            if (ObservedGains != 0) jsonObject.AddIntValue(\"observedGains\", ObservedGains);\n            if (ObservedLosses != 0) jsonObject.AddIntValue(\"observedLosses\", ObservedLosses);\n            jsonObject.AddDoubleValue(\"variantFreqAll\", VariantFreqAll, \"0.#####\");\n\n            return StringBuilderPool.GetStringAndReturn(sb);\n        }\n\n        public override bool Equals(object obj)\n        {\n            // If parameter is null return false.\n\n            if (!(obj is DgvItem otherItem)) return false;\n\n            // Return true if the fields match:\n            return Equals(Chromosome, otherItem.Chromosome)\n                   && Start          == otherItem.Start\n                   && End            == otherItem.End\n                   && ObservedGains  == otherItem.ObservedGains\n                   && SampleSize     == otherItem.SampleSize\n                   && ObservedLosses == otherItem.ObservedLosses\n                   && string.Equals(Id, otherItem.Id)\n                   && Equals(VariantType, otherItem.VariantType)\n                   && Equals(VariantFreqAll, otherItem.VariantFreqAll);\n        }\n\n        public override int GetHashCode()\n        {\n            unchecked\n            {\n                var hashCode = Id?.GetHashCode() ?? 0;\n                hashCode = (hashCode * 397) ^ (Chromosome?.GetHashCode() ?? 0);\n                hashCode = (hashCode * 397) ^ Start.GetHashCode();\n                hashCode = (hashCode * 397) ^ End.GetHashCode();\n                hashCode = (hashCode * 397) ^ VariantType.GetHashCode();\n                hashCode = (hashCode * 397) ^ SampleSize.GetHashCode();\n                hashCode = (hashCode * 397) ^ ObservedGains.GetHashCode();\n                hashCode = (hashCode * 397) ^ ObservedLosses.GetHashCode();\n                hashCode = (hashCode * 397) ^ (VariantFreqAll?.GetHashCode() ?? 0);\n\n                return hashCode;\n            }\n        }\n\n     \n    }\n}\n"
  },
  {
    "path": "SAUtils/DataStructures/GlobalMinorItem.cs",
    "content": "﻿using Genome;\nusing OptimizedCore;\nusing VariantAnnotation.Interface.SA;\nusing VariantAnnotation.IO;\n\nnamespace SAUtils.DataStructures\n{\n    public sealed class GlobalMinorItem:ISupplementaryDataItem\n    {\n        public Chromosome Chromosome { get; }\n        public int Position { get; set; }\n        public string RefAllele { get; set; }\n        public string AltAllele { get; set; }\n\n        private readonly string _allele;\n        private readonly double _frequency;\n\n        public GlobalMinorItem(Chromosome chromosome, int position, string allele, double frequency)\n        {\n            Chromosome = chromosome;\n            Position   = position;\n            _allele    = allele;\n            _frequency = frequency;\n        }\n\n        public string GetJsonString()\n        {\n            var sb = StringBuilderPool.Get();\n            var jsonObject = new JsonObject(sb);\n\n            sb.Append(JsonObject.OpenBrace);\n            jsonObject.AddStringValue(\"globalMinorAllele\", _allele);\n            jsonObject.AddDoubleValue(\"globalMinorAlleleFrequency\", _frequency, \"0.#######\");\n            sb.Append(JsonObject.CloseBrace);\n\n            return StringBuilderPool.GetStringAndReturn(sb);\n        }\n\n        public string InputLine { get; set; }\n    }\n}"
  },
  {
    "path": "SAUtils/DataStructures/GmeItem.cs",
    "content": "using System.Text;\nusing Genome;\nusing VariantAnnotation.Interface.SA;\nusing VariantAnnotation.IO;\n\nnamespace SAUtils.DataStructures\n{\n    public sealed class GmeItem : ISupplementaryDataItem\n    {\n        private readonly int?    _alleleCount;\n        private readonly int?    _alleleNum;\n        private readonly double? _alleleFreq;\n        private readonly bool    _failedFilter;\n\n        public Chromosome Chromosome { get; }\n        public int         Position   { get; set; }\n        public string      RefAllele  { get; set; }\n        public string      AltAllele  { get; set; }\n\n\n\n        public GmeItem(Chromosome chrom, int position, string refAllele, string altAllele, \n            int? alleleCount, int? alleleNum, double? alleleFreq, bool failedFilter)\n        {\n            Chromosome    = chrom;\n            Position      = position;\n            RefAllele     = refAllele;\n            AltAllele     = altAllele;\n            _alleleCount  = alleleCount;\n            _alleleNum    = alleleNum;\n            _alleleFreq   = alleleFreq;\n            _failedFilter = failedFilter;\n        }\n\n        public string GetJsonString()\n        {\n            var sb         = new StringBuilder();\n            var jsonObject = new JsonObject(sb);\n            \n            jsonObject.AddIntValue(\"allAc\", _alleleCount); \n            jsonObject.AddIntValue(\"allAn\", _alleleNum); \n            jsonObject.AddDoubleValue(\"allAf\", _alleleFreq);\n            if (_failedFilter) jsonObject.AddBoolValue(\"failedFilter\", true);\n\n            return sb.ToString();\n        }\n\n        public string InputLine { get; set; }\n    }\n}"
  },
  {
    "path": "SAUtils/DataStructures/GnomadItem.cs",
    "content": "﻿using Genome;\r\nusing OptimizedCore;\r\nusing VariantAnnotation.Interface.SA;\r\nusing VariantAnnotation.IO;\r\n\r\n// ReSharper disable NonReadonlyMemberInGetHashCode\r\n\r\nnamespace SAUtils.DataStructures\r\n{\r\n    public enum GnomadDataType : byte\r\n    {\r\n        Unknown,\r\n        Genome,\r\n        Exome\r\n    }\r\n\r\n    public sealed class GnomadItem : ISupplementaryDataItem\r\n    {\r\n        #region members\r\n\r\n        public Chromosome Chromosome { get; }\r\n        public int         Position   { get; set; }\r\n        public string      RefAllele  { get; set; }\r\n        public string      AltAllele  { get; set; }\r\n\r\n        public int? AllAlleleCount  { get; private set; }\r\n        public int? AfrAlleleCount  { get; private set; }\r\n        public int? AmrAlleleCount  { get; private set; }\r\n        public int? EasAlleleCount  { get; private set; }\r\n        public int? FinAlleleCount  { get; private set; }\r\n        public int? NfeAlleleCount  { get; private set; }\r\n        public int? OthAlleleCount  { get; private set; }\r\n        public int? AsjAlleleCount  { get; private set; }\r\n        public int? SasAlleleCount  { get; private set; }\r\n        public int? AllAlleleNumber { get; private set; }\r\n        public int? AfrAlleleNumber { get; private set; }\r\n        public int? AmrAlleleNumber { get; private set; }\r\n        public int? EasAlleleNumber { get; private set; }\r\n        public int? FinAlleleNumber { get; private set; }\r\n        public int? NfeAlleleNumber { get; private set; }\r\n        public int? OthAlleleNumber { get; private set; }\r\n        public int? AsjAlleleNumber { get; private set; }\r\n        public int? SasAlleleNumber { get; private set; }\r\n\r\n        public int? AllHomCount { get; private set; }\r\n        public int? AfrHomCount { get; private set; }\r\n        public int? AmrHomCount { get; private set; }\r\n        public int? EasHomCount { get; private set; }\r\n        public int? FinHomCount { get; private set; }\r\n        public int? NfeHomCount { get; private set; }\r\n        public int? OthHomCount { get; private set; }\r\n        public int? AsjHomCount { get; private set; }\r\n        public int? SasHomCount { get; private set; }\r\n\r\n        //male counts\r\n        public int? MaleAlleleCount  { get; private set; }\r\n        public int? MaleAlleleNumber { get; private set; }\r\n        public int? MaleHomCount     { get; private set; }\r\n\r\n        //female counts\r\n        public int? FemaleAlleleCount  { get; private set; }\r\n        public int? FemaleAlleleNumber { get; private set; }\r\n        public int? FemaleHomCount     { get; private set; }\r\n\r\n        //controls\r\n        public int? ControlsAllAlleleCount  { get; private set; }\r\n        public int? ControlsAllAlleleNumber { get; private set; }\r\n\r\n        public int?           Depth            { get; }\r\n        public int?           Coverage         { get; }\r\n        public bool           HasFailedFilters { get; }\r\n        public GnomadDataType DataType         { get; }\r\n\r\n        #endregion\r\n\r\n        public GnomadItem(Chromosome chromosome,\r\n            int position,\r\n            string refAllele,\r\n            string alternateAllele,\r\n            int? depth,\r\n            int? allAlleleNumber, int? afrAlleleNumber, int? amrAlleleNumber, int? easAlleleNumber,\r\n            int? finAlleleNumber, int? nfeAlleleNumber, int? othAlleleNumber, int? asjAlleleNumber, int? sasAlleleNumber,\r\n            int? maleAlleleNumber, int? femaleAlleleNumber,\r\n            int? allAlleleCount, int? afrAlleleCount, int? amrAlleleCount, int? easAlleleCount, int? finAlleleCount, int? nfeAlleleCount,\r\n            int? othAlleleCount, int? asjAlleleCount, int? sasAlleleCount,\r\n            int? maleAlleleCount, int? femaleAlleleCount,\r\n            int? allHomCount, int? afrHomCount, int? amrHomCount, int? easHomCount,\r\n            int? finHomCount, int? nfeHomCount, int? othHomCount, int? asjHomCount, int? sasHomCount,\r\n            int? maleHomCount, int? femaleHomCount,\r\n            int? controlsAllAlleleNumber,\r\n            int? controlsAllAlleleCount,\r\n            bool hasFailedFilters,\r\n            GnomadDataType dataType,\r\n            string inputLine)\r\n        {\r\n            Chromosome = chromosome;\r\n            Position   = position;\r\n            RefAllele  = refAllele;\r\n            AltAllele  = alternateAllele;\r\n            InputLine  = inputLine;\r\n\r\n            Depth = depth;\r\n            if (depth != null && allAlleleNumber != null && allAlleleNumber.Value > 0)\r\n                Coverage = ComputingUtilities.GetCoverage(depth.Value, allAlleleNumber.Value);\r\n\r\n            AllAlleleNumber = allAlleleNumber;\r\n            AfrAlleleNumber = afrAlleleNumber;\r\n            AmrAlleleNumber = amrAlleleNumber;\r\n            EasAlleleNumber = easAlleleNumber;\r\n            FinAlleleNumber = finAlleleNumber;\r\n            NfeAlleleNumber = nfeAlleleNumber;\r\n            OthAlleleNumber = othAlleleNumber;\r\n            AsjAlleleNumber = asjAlleleNumber;\r\n            SasAlleleNumber = sasAlleleNumber;\r\n\r\n            MaleAlleleNumber   = maleAlleleNumber;\r\n            FemaleAlleleNumber = femaleAlleleNumber;\r\n            MaleHomCount       = maleHomCount;\r\n\r\n            AllAlleleCount = allAlleleCount;\r\n            AfrAlleleCount = afrAlleleCount;\r\n            AmrAlleleCount = amrAlleleCount;\r\n            EasAlleleCount = easAlleleCount;\r\n            FinAlleleCount = finAlleleCount;\r\n            NfeAlleleCount = nfeAlleleCount;\r\n            OthAlleleCount = othAlleleCount;\r\n            AsjAlleleCount = asjAlleleCount;\r\n            SasAlleleCount = sasAlleleCount;\r\n\r\n            MaleAlleleCount   = maleAlleleCount;\r\n            FemaleAlleleCount = femaleAlleleCount;\r\n            FemaleHomCount    = femaleHomCount;\r\n\r\n            AllHomCount = allHomCount;\r\n            AfrHomCount = afrHomCount;\r\n            AmrHomCount = amrHomCount;\r\n            EasHomCount = easHomCount;\r\n            FinHomCount = finHomCount;\r\n            NfeHomCount = nfeHomCount;\r\n            OthHomCount = othHomCount;\r\n            AsjHomCount = asjHomCount;\r\n            SasHomCount = sasHomCount;\r\n\r\n            //controls\r\n            ControlsAllAlleleNumber = controlsAllAlleleNumber;\r\n            ControlsAllAlleleCount  = controlsAllAlleleCount;\r\n\r\n            HasFailedFilters = hasFailedFilters;\r\n            DataType         = dataType;\r\n\r\n            RemoveAlleleNumberZero();\r\n        }\r\n\r\n        private void RemoveAlleleNumberZero()\r\n        {\r\n            if (SaUtilsCommon.IsNumberNullOrZero(AllAlleleNumber))\r\n            {\r\n                AllAlleleNumber = null;\r\n                AllAlleleCount  = null;\r\n                AllHomCount     = null;\r\n            }\r\n\r\n            if (SaUtilsCommon.IsNumberNullOrZero(MaleAlleleNumber))\r\n            {\r\n                MaleAlleleNumber = null;\r\n                MaleAlleleCount  = null;\r\n                MaleHomCount     = null;\r\n            }\r\n\r\n            if (SaUtilsCommon.IsNumberNullOrZero(FemaleAlleleNumber))\r\n            {\r\n                FemaleAlleleNumber = null;\r\n                FemaleAlleleCount  = null;\r\n                FemaleHomCount     = null;\r\n            }\r\n\r\n            if (SaUtilsCommon.IsNumberNullOrZero(AfrAlleleNumber))\r\n            {\r\n                AfrAlleleNumber = null;\r\n                AfrAlleleCount  = null;\r\n                AfrHomCount     = null;\r\n            }\r\n\r\n            if (SaUtilsCommon.IsNumberNullOrZero(AmrAlleleNumber))\r\n            {\r\n                AmrAlleleNumber = null;\r\n                AmrAlleleCount  = null;\r\n                AmrHomCount     = null;\r\n            }\r\n\r\n            if (SaUtilsCommon.IsNumberNullOrZero(EasAlleleNumber))\r\n            {\r\n                EasAlleleNumber = null;\r\n                EasAlleleCount  = null;\r\n                EasHomCount     = null;\r\n            }\r\n\r\n            if (SaUtilsCommon.IsNumberNullOrZero(FinAlleleNumber))\r\n            {\r\n                FinAlleleNumber = null;\r\n                FinAlleleCount  = null;\r\n                FinHomCount     = null;\r\n            }\r\n\r\n            if (SaUtilsCommon.IsNumberNullOrZero(NfeAlleleNumber))\r\n            {\r\n                NfeAlleleNumber = null;\r\n                NfeAlleleCount  = null;\r\n                NfeHomCount     = null;\r\n            }\r\n\r\n            if (SaUtilsCommon.IsNumberNullOrZero(OthAlleleNumber))\r\n            {\r\n                OthAlleleNumber = null;\r\n                OthAlleleCount  = null;\r\n                OthHomCount     = null;\r\n            }\r\n\r\n            if (SaUtilsCommon.IsNumberNullOrZero(AsjAlleleNumber))\r\n            {\r\n                AsjAlleleNumber = null;\r\n                AsjAlleleCount  = null;\r\n                AsjHomCount     = null;\r\n            }\r\n\r\n            if (SaUtilsCommon.IsNumberNullOrZero(SasAlleleNumber))\r\n            {\r\n                SasAlleleNumber = null;\r\n                SasAlleleCount  = null;\r\n                SasHomCount     = null;\r\n            }\r\n\r\n            if (SaUtilsCommon.IsNumberNullOrZero(MaleAlleleNumber))\r\n            {\r\n                MaleAlleleNumber = null;\r\n                MaleAlleleCount  = null;\r\n                MaleHomCount     = null;\r\n            }\r\n\r\n            if (SaUtilsCommon.IsNumberNullOrZero(FemaleAlleleNumber))\r\n            {\r\n                FemaleAlleleNumber = null;\r\n                FemaleAlleleCount  = null;\r\n                FemaleHomCount     = null;\r\n            }\r\n\r\n            //controls\r\n            if (SaUtilsCommon.IsNumberNullOrZero(ControlsAllAlleleNumber))\r\n            {\r\n                ControlsAllAlleleNumber = null;\r\n                ControlsAllAlleleCount  = null;\r\n            }\r\n        }\r\n\r\n\r\n        public string GetJsonString()\r\n        {\r\n            var sb         = StringBuilderPool.Get();\r\n            var jsonObject = new JsonObject(sb);\r\n            jsonObject.AddIntValue(\"coverage\", Coverage);\r\n            if (HasFailedFilters) jsonObject.AddBoolValue(\"failedFilter\", true);\r\n\r\n            jsonObject.AddStringValue(\"allAf\", ComputingUtilities.ComputeFrequency(AllAlleleNumber, AllAlleleCount), false);\r\n            jsonObject.AddIntValue(\"allAn\", AllAlleleNumber);\r\n            jsonObject.AddIntValue(\"allAc\", AllAlleleCount);\r\n            jsonObject.AddIntValue(\"allHc\", AllHomCount);\r\n\r\n            jsonObject.AddStringValue(\"afrAf\", ComputingUtilities.ComputeFrequency(AfrAlleleNumber, AfrAlleleCount), false);\r\n            jsonObject.AddIntValue(\"afrAn\", AfrAlleleNumber);\r\n            jsonObject.AddIntValue(\"afrAc\", AfrAlleleCount);\r\n            jsonObject.AddIntValue(\"afrHc\", AfrHomCount);\r\n\r\n            jsonObject.AddStringValue(\"amrAf\", ComputingUtilities.ComputeFrequency(AmrAlleleNumber, AmrAlleleCount), false);\r\n            jsonObject.AddIntValue(\"amrAn\", AmrAlleleNumber);\r\n            jsonObject.AddIntValue(\"amrAc\", AmrAlleleCount);\r\n            jsonObject.AddIntValue(\"amrHc\", AmrHomCount);\r\n\r\n            jsonObject.AddStringValue(\"easAf\", ComputingUtilities.ComputeFrequency(EasAlleleNumber, EasAlleleCount), false);\r\n            jsonObject.AddIntValue(\"easAn\", EasAlleleNumber);\r\n            jsonObject.AddIntValue(\"easAc\", EasAlleleCount);\r\n            jsonObject.AddIntValue(\"easHc\", EasHomCount);\r\n\r\n            jsonObject.AddStringValue(\"finAf\", ComputingUtilities.ComputeFrequency(FinAlleleNumber, FinAlleleCount), false);\r\n            jsonObject.AddIntValue(\"finAn\", FinAlleleNumber);\r\n            jsonObject.AddIntValue(\"finAc\", FinAlleleCount);\r\n            jsonObject.AddIntValue(\"finHc\", FinHomCount);\r\n\r\n            jsonObject.AddStringValue(\"nfeAf\", ComputingUtilities.ComputeFrequency(NfeAlleleNumber, NfeAlleleCount), false);\r\n            jsonObject.AddIntValue(\"nfeAn\", NfeAlleleNumber);\r\n            jsonObject.AddIntValue(\"nfeAc\", NfeAlleleCount);\r\n            jsonObject.AddIntValue(\"nfeHc\", NfeHomCount);\r\n\r\n            jsonObject.AddStringValue(\"asjAf\", ComputingUtilities.ComputeFrequency(AsjAlleleNumber, AsjAlleleCount), false);\r\n            jsonObject.AddIntValue(\"asjAn\", AsjAlleleNumber);\r\n            jsonObject.AddIntValue(\"asjAc\", AsjAlleleCount);\r\n            jsonObject.AddIntValue(\"asjHc\", AsjHomCount);\r\n\r\n            jsonObject.AddStringValue(\"sasAf\", ComputingUtilities.ComputeFrequency(SasAlleleNumber, SasAlleleCount), false);\r\n            jsonObject.AddIntValue(\"sasAn\", SasAlleleNumber);\r\n            jsonObject.AddIntValue(\"sasAc\", SasAlleleCount);\r\n            jsonObject.AddIntValue(\"sasHc\", SasHomCount);\r\n\r\n            jsonObject.AddStringValue(\"othAf\", ComputingUtilities.ComputeFrequency(OthAlleleNumber, OthAlleleCount), false);\r\n            jsonObject.AddIntValue(\"othAn\", OthAlleleNumber);\r\n            jsonObject.AddIntValue(\"othAc\", OthAlleleCount);\r\n            jsonObject.AddIntValue(\"othHc\", OthHomCount);\r\n\r\n            jsonObject.AddStringValue(\"maleAf\", ComputingUtilities.ComputeFrequency(MaleAlleleNumber, MaleAlleleCount), false);\r\n            jsonObject.AddIntValue(\"maleAn\", MaleAlleleNumber);\r\n            jsonObject.AddIntValue(\"maleAc\", MaleAlleleCount);\r\n            jsonObject.AddIntValue(\"maleHc\", MaleHomCount);\r\n\r\n            jsonObject.AddStringValue(\"femaleAf\", ComputingUtilities.ComputeFrequency(FemaleAlleleNumber, FemaleAlleleCount), false);\r\n            jsonObject.AddIntValue(\"femaleAn\", FemaleAlleleNumber);\r\n            jsonObject.AddIntValue(\"femaleAc\", FemaleAlleleCount);\r\n            jsonObject.AddIntValue(\"femaleHc\", FemaleHomCount);\r\n\r\n            //controls\r\n            //jsonObject.AddIntValue(\"controlsCoverage\", ControlsCoverage);\r\n            jsonObject.AddStringValue(\"controlsAllAf\", ComputingUtilities.ComputeFrequency(ControlsAllAlleleNumber, ControlsAllAlleleCount), false);\r\n            jsonObject.AddIntValue(\"controlsAllAn\", ControlsAllAlleleNumber);\r\n            jsonObject.AddIntValue(\"controlsAllAc\", ControlsAllAlleleCount);\r\n\r\n            return StringBuilderPool.GetStringAndReturn(sb);\r\n        }\r\n\r\n        public string InputLine { get; }\r\n\r\n        public static int CompareTo(GnomadItem item, GnomadItem other)\r\n        {\r\n            if (other == null) return -1;\r\n            return item.Chromosome.Index == other.Chromosome.Index\r\n                ? item.Position.CompareTo(other.Position)\r\n                : item.Chromosome.Index.CompareTo(other.Chromosome.Index);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "SAUtils/DataStructures/GnomadSvItem.cs",
    "content": "using System.Text;\nusing Genome;\nusing OptimizedCore;\nusing VariantAnnotation.Interface.SA;\nusing VariantAnnotation.IO;\nusing Variants;\n\nnamespace SAUtils.DataStructures;\n\npublic sealed record GnomadSvItem(Chromosome Chromosome, string InputLine) : ISuppIntervalItem\n{\n    public int         Start            { get; init; }\n    public int         End              { get; init; }\n    public bool        HasFailedFilters { get; init; }\n    public VariantType SvType           { get; init; }\n    public string      VariantId        { get; init; }\n\n    public double? AllAlleleFrequency    { get; init; }\n    public double? AfrAlleleFrequency    { get; init; }\n    public double? AmrAlleleFrequency    { get; init; }\n    public double? EasAlleleFrequency    { get; init; }\n    public double? EurAlleleFrequency    { get; init; }\n    public double? OthAlleleFrequency    { get; init; }\n    public double? FemaleAlleleFrequency { get; init; }\n    public double? MaleAlleleFrequency   { get; init; }\n\n    public int? AllAlleleCount    { get; init; }\n    public int? AfrAlleleCount    { get; init; }\n    public int? AmrAlleleCount    { get; init; }\n    public int? EasAlleleCount    { get; init; }\n    public int? EurAlleleCount    { get; init; }\n    public int? OthAlleleCount    { get; init; }\n    public int? FemaleAlleleCount { get; init; }\n    public int? MaleAlleleCount   { get; init; }\n\n    public int? AllAlleleNumber    { get; init; }\n    public int? AfrAlleleNumber    { get; init; }\n    public int? AmrAlleleNumber    { get; init; }\n    public int? EasAlleleNumber    { get; init; }\n    public int? EurAlleleNumber    { get; init; }\n    public int? OthAlleleNumber    { get; init; }\n    public int? FemaleAlleleNumber { get; init; }\n    public int? MaleAlleleNumber   { get; init; }\n\n    public int? AllHomCount    { get; init; }\n    public int? AfrHomCount    { get; init; }\n    public int? AmrHomCount    { get; init; }\n    public int? EasHomCount    { get; init; }\n    public int? EurHomCount    { get; init; }\n    public int? OthHomCount    { get; init; }\n    public int? FemaleHomCount { get; init; }\n    public int? MaleHomCount   { get; init; }\n\n\n    public string GetJsonString()\n    {\n        int start = Start;\n        int end   = End;\n        \n        // swap bengin and end if variant is an insertion\n        if (SvType == VariantType.insertion)\n        {\n            (start, end) = (end, start);\n        }\n\n        StringBuilder sb         = StringBuilderPool.Get();\n        var           jsonObject = new JsonObject(sb);\n\n        jsonObject.AddStringValue(JsonCommon.Chromosome, Chromosome.EnsemblName);\n        jsonObject.AddIntValue(JsonCommon.Begin, start);\n        jsonObject.AddIntValue(JsonCommon.End,   end);\n\n        jsonObject.AddStringValue(JsonCommon.VariantId,   VariantId);\n        jsonObject.AddStringValue(JsonCommon.VariantType, SvType.ToString());\n        if (HasFailedFilters) jsonObject.AddBoolValue(JsonCommon.FailedFilter, true);\n\n        jsonObject.AddDoubleValue(JsonCommon.AllAlleleFrequency,    AllAlleleFrequency,    JsonCommon.FrequencyRoundingFormat);\n        jsonObject.AddDoubleValue(JsonCommon.AfrAlleleFrequency,    AfrAlleleFrequency,    JsonCommon.FrequencyRoundingFormat);\n        jsonObject.AddDoubleValue(JsonCommon.AmrAlleleFrequency,    AmrAlleleFrequency,    JsonCommon.FrequencyRoundingFormat);\n        jsonObject.AddDoubleValue(JsonCommon.EasAlleleFrequency,    EasAlleleFrequency,    JsonCommon.FrequencyRoundingFormat);\n        jsonObject.AddDoubleValue(JsonCommon.EurAlleleFrequency,    EurAlleleFrequency,    JsonCommon.FrequencyRoundingFormat);\n        jsonObject.AddDoubleValue(JsonCommon.OthAlleleFrequency,    OthAlleleFrequency,    JsonCommon.FrequencyRoundingFormat);\n        jsonObject.AddDoubleValue(JsonCommon.FemaleAlleleFrequency, FemaleAlleleFrequency, JsonCommon.FrequencyRoundingFormat);\n        jsonObject.AddDoubleValue(JsonCommon.MaleAlleleFrequency,   MaleAlleleFrequency,   JsonCommon.FrequencyRoundingFormat);\n\n        jsonObject.AddIntValue(JsonCommon.AllAlleleCount,    AllAlleleCount);\n        jsonObject.AddIntValue(JsonCommon.AfrAlleleCount,    AfrAlleleCount);\n        jsonObject.AddIntValue(JsonCommon.AmrAlleleCount,    AmrAlleleCount);\n        jsonObject.AddIntValue(JsonCommon.EasAlleleCount,    EasAlleleCount);\n        jsonObject.AddIntValue(JsonCommon.EurAlleleCount,    EurAlleleCount);\n        jsonObject.AddIntValue(JsonCommon.OthAlleleCount,    OthAlleleCount);\n        jsonObject.AddIntValue(JsonCommon.FemaleAlleleCount, FemaleAlleleCount);\n        jsonObject.AddIntValue(JsonCommon.MaleAlleleCount,   MaleAlleleCount);\n\n        jsonObject.AddIntValue(JsonCommon.AllAlleleNumber,    AllAlleleNumber);\n        jsonObject.AddIntValue(JsonCommon.AfrAlleleNumber,    AfrAlleleNumber);\n        jsonObject.AddIntValue(JsonCommon.AmrAlleleNumber,    AmrAlleleNumber);\n        jsonObject.AddIntValue(JsonCommon.EasAlleleNumber,    EasAlleleNumber);\n        jsonObject.AddIntValue(JsonCommon.EurAlleleNumber,    EurAlleleNumber);\n        jsonObject.AddIntValue(JsonCommon.OthAlleleNumber,    OthAlleleNumber);\n        jsonObject.AddIntValue(JsonCommon.FemaleAlleleNumber, FemaleAlleleNumber);\n        jsonObject.AddIntValue(JsonCommon.MaleAlleleNumber,   MaleAlleleNumber);\n\n        jsonObject.AddIntValue(JsonCommon.AllHomCount,    AllHomCount);\n        jsonObject.AddIntValue(JsonCommon.AfrHomCount,    AfrHomCount);\n        jsonObject.AddIntValue(JsonCommon.AmrHomCount,    AmrHomCount);\n        jsonObject.AddIntValue(JsonCommon.EasHomCount,    EasHomCount);\n        jsonObject.AddIntValue(JsonCommon.EurHomCount,    EurHomCount);\n        jsonObject.AddIntValue(JsonCommon.OthHomCount,    OthHomCount);\n        jsonObject.AddIntValue(JsonCommon.FemaleHomCount, FemaleHomCount);\n        jsonObject.AddIntValue(JsonCommon.MaleHomCount,   MaleHomCount);\n\n        return StringBuilderPool.GetStringAndReturn(sb);\n    }\n}"
  },
  {
    "path": "SAUtils/DataStructures/KeyCounts.cs",
    "content": "using System.Collections.Generic;\nusing System.Text;\nusing VariantAnnotation.Interface.IO;\nusing VariantAnnotation.IO;\n\nnamespace SAUtils.DataStructures;\n\npublic class KeyCounts: IJsonSerializer\n{\n    public readonly Dictionary<string, int> Counts;\n\n    public KeyCounts(IEnumerable<string> keys)\n    {\n        Counts = new ();\n        foreach (var key in keys)\n        {\n            Counts[key] = 0;\n        }\n    }\n\n    public void Increment(string key)\n    {\n        Counts[key]++;\n    }\n\n    public void SerializeJson(StringBuilder sb)\n    {\n        var jo = new JsonObject(sb);\n        sb.Append(JsonObject.OpenBrace);\n        foreach (var (key, count) in Counts)\n        {\n            jo.AddIntValue(key, count);\n        }\n        \n        sb.Append(JsonObject.CloseBrace);\n    }\n}\n"
  },
  {
    "path": "SAUtils/DataStructures/MinHeap.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\n\r\nnamespace SAUtils.DataStructures\r\n{\r\n    public sealed class MinHeap<T>\r\n    {\r\n        private readonly List<T> _itemArray;\r\n        private readonly Func<T, T, int> _comparerFunc;\r\n\r\n        \r\n        public MinHeap(Func<T,T, int> comparerFunc)\r\n        {\r\n            _itemArray = new List<T>();\r\n            _comparerFunc = comparerFunc;\r\n        }\r\n\r\n        public void Add(T item)\r\n        {\r\n            _itemArray.Add(item);\r\n            Heapify();\r\n        }\r\n\r\n        private void Heapify()\r\n        {\r\n            var i = _itemArray.Count - 1;\r\n            while (i > 0)\r\n            {\r\n                var j = i % 2 == 0 ? i / 2 - 1 : i / 2;//the index of the parent\r\n                //if (_itemArray[i].CompareTo(_itemArray[j]) < 0)\r\n                if (_comparerFunc(_itemArray[i], _itemArray[j]) < 0)\r\n                    SwapItems(_itemArray, i, j);\r\n\r\n                i = j;\r\n            }\r\n        }\r\n\r\n        public T ExtractMin()\r\n        {\r\n            var min = _itemArray[0];\r\n\r\n            // the last item form the array is brought to the root and pushed down to the appropriate position\r\n            _itemArray[0] = _itemArray[_itemArray.Count - 1];\r\n            _itemArray.RemoveAt(_itemArray.Count - 1);\r\n\r\n\r\n            for (var i = 0; i < _itemArray.Count / 2;)\r\n            {\r\n                var j = 2 * i + 1;\r\n\r\n                if (j + 1 < _itemArray.Count && _comparerFunc(_itemArray[j], _itemArray[j + 1]) > 0)\r\n                    // both children are present\r\n                    j++; //A[2*i+2] is the smaller child\r\n\r\n                if (_comparerFunc(_itemArray[i],_itemArray[j]) > 0)\r\n                    SwapItems(_itemArray, i, j);\r\n\r\n                i = j;\r\n            }\r\n            return min;\r\n        }\r\n\r\n        private static void SwapItems(List<T> list, int i, int j)\r\n        {\r\n            var temp = list[i];\r\n            list[i] = list[j];\r\n            list[j] = temp;\r\n        }\r\n\r\n        public T GetMin()\r\n        {\r\n            return _itemArray.Count == 0 ? default : _itemArray[0];\r\n        }\r\n\r\n        public int Count()\r\n        {\r\n            return _itemArray.Count;\r\n        }\r\n\r\n        public override string ToString()\r\n        {\r\n            return string.Join(\",\", _itemArray);\r\n\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "SAUtils/DataStructures/OmimItem.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.Linq;\r\nusing System.Text;\r\nusing Newtonsoft.Json;\r\nusing OptimizedCore;\r\nusing SAUtils.Schema;\r\nusing VariantAnnotation.Interface.IO;\r\nusing VariantAnnotation.Interface.SA;\r\nusing VariantAnnotation.IO;\r\n\r\nnamespace SAUtils.DataStructures;\r\n\r\npublic sealed class OmimItem : ISuppGeneItem\r\n{\r\n    public           string          GeneSymbol { get; }\r\n    private readonly string          _geneName;\r\n    private readonly string          _description;\r\n    private readonly int             _mimNumber;\r\n    public readonly  List<Phenotype> Phenotypes;\r\n    public           SaJsonSchema    JsonSchema { get; }\r\n\r\n    public OmimItem(string geneSymbol, string geneName, string description, int mimNumber, List<Phenotype> phenotypes, SaJsonSchema jsonSchema)\r\n    {\r\n        GeneSymbol   = geneSymbol;\r\n        _geneName    = geneName;\r\n        _description = description;\r\n        _mimNumber   = mimNumber;\r\n        Phenotypes   = phenotypes;\r\n        JsonSchema   = jsonSchema;\r\n    }\r\n\r\n    public string GetJsonString()\r\n    {\r\n        var sb         = StringBuilderPool.Get();\r\n        var jsonObject = new JsonObject(sb);\r\n\r\n        sb.Append(JsonObject.OpenBrace);\r\n        JsonSchema.TotalItems++;\r\n        JsonSchema.CountKeyIfAdded(jsonObject.AddIntValue(\"mimNumber\", _mimNumber), \"mimNumber\");\r\n        JsonSchema.CountKeyIfAdded(\r\n            jsonObject.AddStringValue(\"geneName\", string.IsNullOrEmpty(_geneName) ? null : JsonConvert.SerializeObject(_geneName), false),\r\n            \"geneName\");\r\n        //Serialized string has the double quote at the beginning and the end\r\n        JsonSchema.CountKeyIfAdded(\r\n            jsonObject.AddStringValue(\"description\", string.IsNullOrEmpty(_description) ? null : JsonConvert.SerializeObject(_description),\r\n                false), \"description\");\r\n        if (Phenotypes.Count > 0)\r\n            JsonSchema.CountKeyIfAdded(jsonObject.AddObjectValues(\"phenotypes\", Phenotypes), \"phenotypes\");\r\n        sb.Append(JsonObject.CloseBrace);\r\n\r\n        return StringBuilderPool.GetStringAndReturn(sb);\r\n    }\r\n\r\n    public sealed class Phenotype : IJsonSerializer\r\n    {\r\n        private readonly int             _mimNumber;\r\n        public readonly  string          _phenotype;\r\n        private readonly string          _description;\r\n        public readonly  Mapping         Mapping;\r\n        private readonly Comment[]       _comments;\r\n        public readonly  HashSet<string> Inheritance;\r\n        private readonly SaJsonSchema    _jsonSchema;\r\n\r\n        public Phenotype(int mimNumber, string phenotype, string description, Mapping mapping, Comment[] comments, HashSet<string> inheritance,\r\n            SaJsonSchema schema)\r\n        {\r\n            _mimNumber   = mimNumber;\r\n            _phenotype   = phenotype;\r\n            _description = description;\r\n            Mapping      = mapping;\r\n            _comments    = comments;\r\n            Inheritance  = inheritance;\r\n            _jsonSchema  = schema;\r\n        }\r\n\r\n        public void SerializeJson(StringBuilder sb)\r\n        {\r\n            var jsonObject = new JsonObject(sb);\r\n\r\n            sb.Append(JsonObject.OpenBrace);\r\n            _jsonSchema.TotalItems++;\r\n\r\n            if (_mimNumber >= 100000)\r\n                _jsonSchema.CountKeyIfAdded(jsonObject.AddIntValue(\"mimNumber\", _mimNumber), \"mimNumber\");\r\n            _jsonSchema.CountKeyIfAdded(jsonObject.AddStringValue(\"phenotype\", _phenotype), \"phenotype\");\r\n            _jsonSchema.CountKeyIfAdded(\r\n                jsonObject.AddStringValue(\"description\", string.IsNullOrEmpty(_description) ? null : JsonConvert.SerializeObject(_description),\r\n                    false), \"description\");\r\n            if (Mapping != Mapping.unknown)\r\n                _jsonSchema.CountKeyIfAdded(jsonObject.AddStringValue(\"mapping\", Mapping.ToString().Replace(\"_\", \" \")), \"mapping\");\r\n            if (Inheritance != null && Inheritance.Count > 0)\r\n                _jsonSchema.CountKeyIfAdded(jsonObject.AddStringValues(\"inheritances\", Inheritance), \"inheritances\");\r\n            if (_comments.Length > 0)\r\n                _jsonSchema.CountKeyIfAdded(jsonObject.AddStringValues(\"comments\", _comments.Select(x => x.ToString().Replace(\"_\", \" \"))),\r\n                    \"comments\");\r\n\r\n            sb.Append(JsonObject.CloseBrace);\r\n        }\r\n    }\r\n\r\n    public enum Mapping : byte\r\n    {\r\n        // ReSharper disable InconsistentNaming\r\n        unknown,\r\n        mapping_of_the_wildtype_gene,\r\n        disease_phenotype_itself_was_mapped,\r\n        molecular_basis_of_the_disorder_is_known,\r\n\r\n        chromosome_deletion_or_duplication_syndrome\r\n        // ReSharper restore InconsistentNaming\r\n    }\r\n\r\n    public enum Comment : byte\r\n    {\r\n        // ReSharper disable InconsistentNaming\r\n        unknown,\r\n        unconfirmed_or_possibly_spurious_mapping,\r\n        nondiseases,\r\n\r\n        contribute_to_susceptibility_to_multifactorial_disorders_or_to_susceptibility_to_infection\r\n        // ReSharper restore InconsistentNaming\r\n    }\r\n}"
  },
  {
    "path": "SAUtils/DataStructures/OneKGenItem.cs",
    "content": "﻿using Genome;\nusing OptimizedCore;\nusing VariantAnnotation.Interface.SA;\nusing VariantAnnotation.IO;\n\nnamespace SAUtils.DataStructures\n{\n    public sealed class OneKGenItem : ISupplementaryDataItem\n    {\n        public Chromosome Chromosome { get; }\n        public int Position { get; set; }\n        public string RefAllele { get; set; }\n        public string AltAllele { get; set; }\n\n        private string AncestralAllele { get; }\n\n        private int? AllAlleleNumber { get; }\n        private int? AfrAlleleNumber { get; }\n        private int? AmrAlleleNumber { get; }\n        private int? EurAlleleNumber { get; }\n        private int? EasAlleleNumber { get; }\n        private int? SasAlleleNumber { get; }\n\n        private int? AllAlleleCount { get; }\n        private int? AfrAlleleCount { get; }\n        private int? AmrAlleleCount { get; }\n        private int? EurAlleleCount { get; }\n        private int? EasAlleleCount { get; }\n        private int? SasAlleleCount { get; }\n\n        public OneKGenItem(Chromosome chromosome,\n            int position,\n            string refAllele,\n            string alternateAllele,\n            string ancestralAllele,\n            int? allAlleleCount,\n            int? afrAlleleCount,\n            int? amrAlleleCount,\n            int? eurAlleleCount,\n            int? easAlleleCount,\n            int? sasAlleleCount,\n            int? allAlleleNumber,\n            int? afrAlleleNumber,\n            int? amrAlleleNumber,\n            int? eurAlleleNumber,\n            int? easAlleleNumber,\n            int? sasAlleleNumber\n            )\n        {\n            Chromosome = chromosome;\n            Position = position;\n            RefAllele = refAllele;\n            AltAllele = alternateAllele;\n            AncestralAllele = ancestralAllele;\n\n            AllAlleleCount = allAlleleCount;\n            AfrAlleleCount = afrAlleleCount;\n            AmrAlleleCount = amrAlleleCount;\n            EurAlleleCount = eurAlleleCount;\n            EasAlleleCount = easAlleleCount;\n            SasAlleleCount = sasAlleleCount;\n\n            AllAlleleNumber = allAlleleNumber;\n            AfrAlleleNumber = afrAlleleNumber;\n            AmrAlleleNumber = amrAlleleNumber;\n            EurAlleleNumber = eurAlleleNumber;\n            EasAlleleNumber = easAlleleNumber;\n            SasAlleleNumber = sasAlleleNumber;\n        }\n\n        \n\t\tpublic string GetJsonString()\n\t\t{\n            var sb = StringBuilderPool.Get();\n\t\t\tvar jsonObject = new JsonObject(sb);\n\t\t    jsonObject.AddStringValue(\"ancestralAllele\", AncestralAllele);\n            jsonObject.AddStringValue(\"allAf\", ComputingUtilities.ComputeFrequency(AllAlleleNumber, AllAlleleCount), false);\n\t\t\tjsonObject.AddStringValue(\"afrAf\", ComputingUtilities.ComputeFrequency(AfrAlleleNumber, AfrAlleleCount), false);\n\t\t\tjsonObject.AddStringValue(\"amrAf\", ComputingUtilities.ComputeFrequency(AmrAlleleNumber, AmrAlleleCount), false);\n\t\t\tjsonObject.AddStringValue(\"easAf\", ComputingUtilities.ComputeFrequency(EasAlleleNumber, EasAlleleCount), false);\n\t\t\tjsonObject.AddStringValue(\"eurAf\", ComputingUtilities.ComputeFrequency(EurAlleleNumber, EurAlleleCount), false);\n\t\t\tjsonObject.AddStringValue(\"sasAf\", ComputingUtilities.ComputeFrequency(SasAlleleNumber, SasAlleleCount), false);\n\n\t\t\tjsonObject.AddIntValue(\"allAn\", AllAlleleNumber);\n\t\t\tjsonObject.AddIntValue(\"afrAn\", AfrAlleleNumber);\n\t\t\tjsonObject.AddIntValue(\"amrAn\", AmrAlleleNumber);\n\t\t\tjsonObject.AddIntValue(\"easAn\", EasAlleleNumber);\n\t\t\tjsonObject.AddIntValue(\"eurAn\", EurAlleleNumber);\n\t\t\tjsonObject.AddIntValue(\"sasAn\", SasAlleleNumber);\n\n\t\t\tjsonObject.AddIntValue(\"allAc\", AllAlleleCount);\n\t\t\tjsonObject.AddIntValue(\"afrAc\", AfrAlleleCount);\n\t\t\tjsonObject.AddIntValue(\"amrAc\", AmrAlleleCount);\n\t\t\tjsonObject.AddIntValue(\"easAc\", EasAlleleCount);\n\t\t\tjsonObject.AddIntValue(\"eurAc\", EurAlleleCount);\n\t\t\tjsonObject.AddIntValue(\"sasAc\", SasAlleleCount);\n\n\t\t    return StringBuilderPool.GetStringAndReturn(sb);\n\t\t}\n\n\t\tpublic string InputLine { get; set; }\n    }\n}\n"
  },
  {
    "path": "SAUtils/DataStructures/OnekGenSvItem.cs",
    "content": "﻿using Genome;\nusing OptimizedCore;\nusing VariantAnnotation.Interface.SA;\nusing VariantAnnotation.IO;\nusing Variants;\n\nnamespace SAUtils.DataStructures\n{\n    public sealed class OnekGenSvItem: ISuppIntervalItem\n    {\n        public int Start { get; }\n        public int End { get; }\n        public Chromosome Chromosome { get; }\n        private VariantType VariantType { get; }\n\n        private readonly int? _allAlleleNumber;\n        private readonly int? _allAlleleCount;\n        private readonly double? _allAlleleFrequency;\n        private readonly double? _afrAlleleFrequency;\n        private readonly double? _amrAlleleFrequency;\n        private readonly double? _easAlleleFrequency;\n        private readonly double? _eurAlleleFrequency;\n        private readonly double? _sasAlleleFrequency;\n\n        public OnekGenSvItem(Chromosome chromosome, int start, int end, VariantType variantType, string id, int? allAlleleNumber, int? allAlleleCount, double? allAlleleFrequency, double? afrAlleleFrequency, double? amrAlleleFrequency, double? easAlleleFrequency, double? eurAlleleFrequency, double? sasAlleleFrequency)\n        {\n            Chromosome = chromosome;\n            Start = start;\n            End = end;\n            VariantType = variantType;\n            Id = id;\n            _allAlleleNumber = allAlleleNumber;\n            _allAlleleCount = allAlleleCount;\n            _allAlleleFrequency = allAlleleFrequency;\n            _afrAlleleFrequency = afrAlleleFrequency;\n            _amrAlleleFrequency = amrAlleleFrequency;\n            _easAlleleFrequency = easAlleleFrequency;\n            _eurAlleleFrequency = eurAlleleFrequency;\n            _sasAlleleFrequency = sasAlleleFrequency;\n        }\n\n        \n        private string Id { get; }\n        \n        public string GetJsonString()\n        {\n            var sb = StringBuilderPool.Get();\n            var jsonObject = new JsonObject(sb);\n\n            jsonObject.AddStringValue(\"chromosome\", Chromosome.EnsemblName);\n            jsonObject.AddIntValue(\"begin\", Start);\n            jsonObject.AddIntValue(\"end\", End);\n            jsonObject.AddStringValue(\"variantType\", VariantType.ToString());\n\n            jsonObject.AddStringValue(\"id\", Id);\n            jsonObject.AddIntValue(\"allAn\", _allAlleleNumber);\n            jsonObject.AddIntValue(\"allAc\", _allAlleleCount);\n            jsonObject.AddDoubleValue(\"allAf\", _allAlleleFrequency, \"0.######\");\n            jsonObject.AddDoubleValue(\"afrAf\", _afrAlleleFrequency, \"0.######\");\n            jsonObject.AddDoubleValue(\"amrAf\", _amrAlleleFrequency, \"0.######\");\n            jsonObject.AddDoubleValue(\"eurAf\", _eurAlleleFrequency, \"0.######\");\n            jsonObject.AddDoubleValue(\"easAf\", _easAlleleFrequency, \"0.######\");\n            jsonObject.AddDoubleValue(\"sasAf\", _sasAlleleFrequency, \"0.######\");\n\n            return StringBuilderPool.GetStringAndReturn(sb);\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/DataStructures/RefMinorItem.cs",
    "content": "﻿using Genome;\nusing OptimizedCore;\nusing VariantAnnotation.Interface.SA;\nusing VariantAnnotation.IO;\n\nnamespace SAUtils.DataStructures\n{\n    public sealed class RefMinorItem:ISupplementaryDataItem\n    {\n        public Chromosome Chromosome { get; }\n        public int Position { get; set; }\n        public string RefAllele { get; set; }\n        public string AltAllele { get; set; }\n        public string GlobalMajor { get; }\n\n        public RefMinorItem(Chromosome chromosome, int position, string globalMajor)\n        {\n            Chromosome = chromosome;\n            Position = position;\n            GlobalMajor = globalMajor;\n        }\n\n        public string GetJsonString()\n        {\n            var sb = StringBuilderPool.Get();\n            var jsonObject = new JsonObject(sb);\n\n            jsonObject.AddBoolValue(\"isReferenceMinor\", true);\n\n            return StringBuilderPool.GetStringAndReturn(sb);\n        }\n\n        public string InputLine { get; set; }\n    }\n}"
  },
  {
    "path": "SAUtils/DataStructures/SuppDataUtilities.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.Linq;\nusing ErrorHandling.Exceptions;\nusing SAUtils.PrimateAi;\nusing VariantAnnotation.Interface.SA;\nusing Variants;\n\nnamespace SAUtils.DataStructures\n{\n    public static class SuppDataUtilities\n    {\n        public static int CompareTo(ISupplementaryDataItem item, ISupplementaryDataItem other)\n        {\n            if (other == null) return -1;\n            return item.Chromosome.Index == other.Chromosome.Index ? item.Position.CompareTo(other.Position) : item.Chromosome.Index.CompareTo(other.Chromosome.Index);\n        }\n\n        public static void Trim(this ISupplementaryDataItem saItem)\n        {\n            if (saItem.RefAllele == null || saItem.AltAllele == null || saItem.Position < 0)\n                return;\n\n            (int start, string refAllele, string altAllele) = BiDirectionalTrimmer.Trim(saItem.Position, saItem.RefAllele, saItem.AltAllele);\n\n            saItem.Position  = start;\n            saItem.RefAllele = refAllele;\n            saItem.AltAllele = altAllele;\n\n        }\n        public static int BinarySearch<T>(List<T> items, int value) where T:IComparable<int>\n        {\n            var begin = 0;\n            int end   = items.Count - 1;\n\n            while (begin <= end)\n            {\n                int index = begin + (end - begin >> 1);\n\n                int ret = items[index].CompareTo(value);\n                if (ret == 0) return index;\n                if (ret < 0) begin = index + 1;\n                else end           = index - 1;\n            }\n\n            return ~begin;\n        }\n        public static List<ISupplementaryDataItem> DeDuplicatePrimateAiItems(List<ISupplementaryDataItem> saItems)\n        {\n            var maxScoreItems = new Dictionary<string, ISupplementaryDataItem>();\n\n            foreach (var supplementaryDataItem in saItems)\n            {\n                var saItem = (PrimateAiItem) supplementaryDataItem;\n                var refAlt = saItem.RefAllele + '>' + saItem.AltAllele;\n\n                if (maxScoreItems.TryGetValue(refAlt, out var dupItem))\n                {\n                    var dupPrimateAiItem = (PrimateAiItem) dupItem;\n                    if (saItem.ScorePercentile >= dupPrimateAiItem.ScorePercentile)\n                    {\n                        maxScoreItems[refAlt] = saItem;\n                    }\n                }\n                else maxScoreItems.Add(refAlt, saItem);\n            }\n\n            return maxScoreItems.Values.ToList();\n        }\n        public static List<ISupplementaryDataItem> RemoveConflictingAlleles(List<ISupplementaryDataItem> saItems, bool throwErrorOnConflicts)\n        {\n            var nonDuplicateSet  = new Dictionary<string, ISupplementaryDataItem>();\n            var conflictSet = new List<string>();\n\n            foreach (var saItem in saItems)\n            {\n                var refAlt = saItem.RefAllele+'>'+saItem.AltAllele;\n\n                if (nonDuplicateSet.TryGetValue(refAlt, out var dupItem))\n                {\n                    if (saItem.GetJsonString() != dupItem.GetJsonString())\n                    {\n                        if(throwErrorOnConflicts)\n                            throw new UserErrorException($\"Conflicting entries for items at {saItem.Chromosome.UcscName}:{saItem.Position} for alleles {saItem.RefAllele} > {saItem.AltAllele}\");\n                        conflictSet.Add(refAlt);\n                    }\n                }\n                else nonDuplicateSet.Add(refAlt, saItem);\n            }\n\n            var values = nonDuplicateSet.Values.ToList();\n\n            if (conflictSet.Count > 0)\n            {\n                values.RemoveAll(x => conflictSet.Contains(x.RefAllele + '>' + x.AltAllele));\n            }\n\n            return values;\n        }\n\n        public static ISupplementaryDataItem GetPositionalAnnotation(List<ISupplementaryDataItem> saItems)\n        {\n            // all items in the list are assumed to be objects of the same implementation\n            var firstItem = saItems[0];\n            switch (firstItem)\n            {\n                case AlleleFrequencyItem _:\n                    return GetGlobalMinor(saItems);\n                // if onekgen return Ancestral allele \n                case AncestralAlleleItem _:\n                    return GetConsensus(saItems);\n            }\n\n            return null;\n        }\n\n        private static ISupplementaryDataItem GetConsensus(List<ISupplementaryDataItem> saItems)\n        {\n            //check consistancy\n            string ancestralAllele = null;\n            foreach (var supplementaryDataItem in saItems)\n            {\n                var aaItem = (AncestralAlleleItem) supplementaryDataItem;\n                //note: aaItem.AncestralAllele cannot be null at this point\n                if (ancestralAllele == null) ancestralAllele = aaItem.AncestralAllele;\n\n                if (ancestralAllele != aaItem.AncestralAllele) return null;\n                \n            }\n\n            return ancestralAllele==null? null : saItems[0];\n        }\n\n        \n        private static ISupplementaryDataItem GetGlobalMinor(List<ISupplementaryDataItem> saItems)\n        {\n            var alleleFreqDict = new Dictionary<string, double>();\n\n            foreach (var supplementaryDataItem in saItems)\n            {\n                var frequencyItem = (AlleleFrequencyItem) supplementaryDataItem;\n                if (!double.MinValue.Equals(frequencyItem.AltFrequency))\n                    alleleFreqDict[frequencyItem.AltAllele] = frequencyItem.AltFrequency;\n            }\n\n            if (alleleFreqDict.Count == 0) return null;\n\n            var firstItem = saItems[0];\n            \n            string refAllele = firstItem.RefAllele;\n\n            string globalMajorAllele = GetMostFrequentAllele(alleleFreqDict, refAllele);\n            if (globalMajorAllele == null) return null;\n\n            alleleFreqDict.Remove(globalMajorAllele);\n\n            string globalMinorAllele = GetMostFrequentAllele(alleleFreqDict, refAllele, false);\n\n            if (globalMinorAllele == null) return null;\n            double frequency = alleleFreqDict[globalMinorAllele];\n            return new GlobalMinorItem(firstItem.Chromosome, firstItem.Position, globalMinorAllele, frequency);\n\n        }\n\n        public static string GetMostFrequentAllele(Dictionary<string, double> alleleFreqDict, string refAllele, bool isRefPreferred = true)\n        {\n            if (alleleFreqDict.Count == 0) return null;\n\n            // find all alleles that have max frequency.\n            double maxFreq = alleleFreqDict.Values.Max();\n            if (Math.Abs(maxFreq - double.MinValue) < double.Epsilon) return null;\n\n            var maxFreqAlleles = (from pair in alleleFreqDict where Math.Abs(pair.Value - maxFreq) < double.Epsilon select pair.Key).ToList();\n\n\n            // if there is only one with max frequency, return it\n            if (maxFreqAlleles.Count == 1)\n                return maxFreqAlleles[0];\n\n            // if ref is preferred (as in global major) it is returned\n            if (isRefPreferred && maxFreqAlleles.Contains(refAllele))\n                return refAllele;\n\n            // else refAllele is removed and the first of the remaining allele is returned (arbitrary selection)\n            maxFreqAlleles.Remove(refAllele);\n            return maxFreqAlleles[0];\n\n        }\n\n       \n    }\n}"
  },
  {
    "path": "SAUtils/DataStructures/TopMedItem.cs",
    "content": "﻿using System.Text;\nusing Genome;\nusing VariantAnnotation.Interface.SA;\nusing VariantAnnotation.IO;\n\nnamespace SAUtils.DataStructures\n{\n    public sealed class TopMedItem : ISupplementaryDataItem\n    {\n        private readonly int? _alleleNum;\n        private readonly int? _alleleCount;\n        private readonly int? _homCount;\n        private readonly bool _failedFilter;\n\n        public Chromosome Chromosome { get; }\n        public int Position { get; set; }\n        public string RefAllele { get; set; }\n        public string AltAllele { get; set; }\n\n\n\n        public TopMedItem(Chromosome chrom, int position, string refAllele, string altAllele, int? alleleNum,\n            int? alleleCount, int? homCount, bool failedFilter)\n        {\n            Chromosome      = chrom;\n            Position        = position;\n            RefAllele       = refAllele;\n            AltAllele       = altAllele;\n            _alleleNum      = alleleNum;\n            _alleleCount    = alleleCount;\n            _homCount       = homCount;\n            _failedFilter   = failedFilter;\n        }\n\n        public string GetJsonString()\n        {\n            var sb         = new StringBuilder();\n            var jsonObject = new JsonObject(sb);\n\n            jsonObject.AddStringValue(\"allAf\", ComputingUtilities.ComputeFrequency(_alleleNum, _alleleCount), false);\n            jsonObject.AddIntValue(\"allAn\", _alleleNum);\n            jsonObject.AddIntValue(\"allAc\", _alleleCount);            \n            jsonObject.AddIntValue(\"allHc\", _homCount);\n            if (_failedFilter) jsonObject.AddBoolValue(\"failedFilter\", true);\n\n            return sb.ToString();\n        }\n\n        public string InputLine { get; set; }\n    }\n}"
  },
  {
    "path": "SAUtils/DbSnpRemapper/ChromMapper.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Linq;\r\nusing Compression.Utilities;\r\nusing Genome;\r\nusing OptimizedCore;\r\nusing VariantAnnotation.Interface.IO;\r\nusing VariantAnnotation.Interface.Providers;\r\nusing Variants;\r\n\r\nnamespace SAUtils.DbSnpRemapper\r\n{\r\n    internal sealed class ChromMapper\r\n    {\r\n        private readonly StreamReader _srcReader;\r\n        private readonly StreamReader _destReader;\r\n        private readonly Dictionary<string, StreamWriter> _writers;\r\n        private readonly StreamWriter _leftoverWriter;\r\n        private readonly ISequenceProvider _srcSequenceProvider;\r\n        private readonly ISequenceProvider _desSequenceProvider;\r\n        private int _leftoverCount;\r\n        private readonly Dictionary<(long, int, string), List<int>> _destinationVariants;\r\n        private int _alleleMismatchCount;\r\n        \r\n        public ChromMapper(StreamReader srcReader, StreamReader destReader, StreamWriter leftoverWriter,\r\n            ISequenceProvider srcSequenceProvider, ISequenceProvider desSequenceProvider)\r\n        {\r\n            _srcReader  = srcReader;\r\n            _destReader = destReader;\r\n            _writers = new Dictionary<string, StreamWriter>();\r\n            _leftoverWriter = leftoverWriter;\r\n            _srcSequenceProvider = srcSequenceProvider;\r\n            _desSequenceProvider = desSequenceProvider;\r\n            _destinationVariants = new Dictionary<(long, int, string), List<int>>();\r\n        }\r\n\r\n\r\n        public Dictionary<string, StreamWriter> Map()\r\n        {\r\n            using (_srcReader)\r\n            using (_destReader)\r\n            {\r\n                //map all the destination rsIDs to their positions in destination\r\n\r\n                string srcLine, destLine;\r\n                //read to the first data line\r\n                while ((srcLine = _srcReader.ReadLine()) != null)\r\n                {\r\n                    if (!srcLine.OptimizedStartsWith('#')) break;\r\n                }\r\n                while ((destLine= _destReader.ReadLine()) != null)\r\n                {\r\n                    if (!destLine.OptimizedStartsWith('#')) break;\r\n                }\r\n\r\n                // dictionary of leftover rsIds from previous chromosomes\r\n                \r\n                //var destRsidLocations = new Dictionary<long, int>();\r\n                while (destLine != null && srcLine!=null)\r\n                {\r\n                    _destinationVariants.Clear();\r\n                    destLine = GetNextChromDestinations(destLine);\r\n                    srcLine = ProcessNextChromSource(srcLine);\r\n                }                \r\n            }\r\n\r\n            // these writers need to be kept open so that the leftover mapper can append to them\r\n            Console.WriteLine($\"Total leftover count:{_leftoverCount}\");\r\n            return _writers;\r\n        }\r\n\r\n        private string ProcessNextChromSource(string line)\r\n        {\r\n            //extracting current chrom info from first line provided\r\n            var currentChromName = line.Split('\\t', 2)[VcfCommon.ChromIndex];\r\n            var currentChrom = ReferenceNameUtilities.GetChromosome(_srcSequenceProvider.RefNameToChromosome, currentChromName);\r\n            _srcSequenceProvider.LoadChromosome(currentChrom);\r\n            \r\n            var leftoverCount=0;\r\n            do\r\n            {\r\n                var splits = line.Split('\\t', VcfCommon.InfoIndex);\r\n                var chrom = splits[VcfCommon.ChromIndex];\r\n                if (chrom != currentChromName) break;\r\n                \r\n                var refAllele = splits[VcfCommon.RefIndex];\r\n                var altAlleles = splits[VcfCommon.AltIndex].Split(',');\r\n                var position = int.Parse(splits[VcfCommon.PosIndex]);\r\n                var rsIds = Utilities.GetRsids(splits[VcfCommon.IdIndex]);\r\n                if (rsIds == null) continue;\r\n                \r\n                var processedVariants = altAlleles.Select(x => VariantUtils.TrimAndLeftAlign(position, refAllele, x, _srcSequenceProvider.Sequence)).ToArray();\r\n                \r\n                var foundInDest = false;\r\n                foreach (var (_, variantRef, variantAlt) in processedVariants)\r\n                foreach (var rsId in rsIds)\r\n                {\r\n                    if (! _destinationVariants.TryGetValue((rsId, variantRef.Length, variantAlt), out var targetPositions)) continue;\r\n                    \r\n                    targetPositions.ForEach(x => WriteRemappedEntry(chrom, x, variantRef, variantAlt, line));\r\n                    //flipping the sign to indicate it has been mapped\r\n                    //_destinationVariants[rsId] = (-variant.position, variant.refAllele, variant.altAlleles);\r\n\r\n                    foundInDest = true;\r\n                }\r\n                if (foundInDest) continue;\r\n\r\n                foreach (var (_, _, variantAlt) in processedVariants)\r\n                foreach (var rsId in rsIds)\r\n                    _leftoverWriter.WriteLine(string.Join('#',rsId.ToString(), variantAlt, line));\r\n                leftoverCount++;\r\n\r\n            } while ((line = _srcReader.ReadLine()) != null);\r\n            \r\n            Console.WriteLine($\"Leftover count for {currentChromName}: {leftoverCount}\");\r\n            //Console.WriteLine($\"Number of entries discarded due to allele mismatch: {_alleleMismatchCount}\");\r\n            _leftoverCount += leftoverCount;\r\n            return line;\r\n        }\r\n\r\n        private string GetNextChromDestinations(string line)\r\n        {\r\n            //extracting current chrom info from first line provided\r\n            var currentChromName = line.Split('\\t', 2)[VcfCommon.ChromIndex];\r\n            Console.Write($\"Getting destinations for chromosome:{currentChromName}...\");\r\n            var currentChrom = ReferenceNameUtilities.GetChromosome(_desSequenceProvider.RefNameToChromosome, currentChromName);\r\n            _desSequenceProvider.LoadChromosome(currentChrom);\r\n            do\r\n            {\r\n                var splits = line.Split('\\t', VcfCommon.InfoIndex);\r\n                var chrom = splits[VcfCommon.ChromIndex];\r\n                if (chrom != currentChromName) break;\r\n\r\n                var refAllele = splits[VcfCommon.RefIndex];\r\n                var altAlleles = splits[VcfCommon.AltIndex].Split(',');\r\n                var position = int.Parse(splits[VcfCommon.PosIndex]);\r\n                var rsIds = Utilities.GetRsids(splits[VcfCommon.IdIndex]);\r\n                if (rsIds == null) continue;\r\n\r\n                var processedVariants = altAlleles.Select(x => VariantUtils.TrimAndLeftAlign(position, refAllele, x, _desSequenceProvider.Sequence)).ToArray();\r\n\r\n                foreach (var (start, variantRef, variantAlt) in processedVariants)\r\n                foreach (var rsId in rsIds)\r\n                {\r\n                    if (!_destinationVariants.TryGetValue((rsId, variantRef.Length, variantAlt), out var variants))\r\n                    {\r\n                        variants = new List<int>();\r\n                        _destinationVariants[(rsId, variantRef.Length, variantAlt)] = variants;\r\n                    }\r\n\r\n                    variants.Add(start);\r\n                }\r\n\r\n            } while ((line = _destReader.ReadLine()) != null);\r\n\r\n            \r\n            Console.WriteLine($\"{_destinationVariants.Count} rsIds found.\");\r\n\r\n            return line;\r\n        }\r\n\r\n        private void WriteRemappedEntry(string chrom, int pos, string refAllele, string altAllele, string vcfLine)\r\n        {\r\n            if (!_writers.ContainsKey(chrom))\r\n                _writers[chrom] = GZipUtilities.GetStreamWriter(chrom+\".vcf.gz\");\r\n\r\n            var splits = vcfLine.Split('\\t', 6);\r\n\r\n            _writers[chrom].WriteLine(string.Join('\\t', chrom, pos.ToString(), splits[2], refAllele, altAllele, splits[5]));\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "SAUtils/DbSnpRemapper/DbSnpRemapperMain.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing CommandLine.Builders;\r\nusing CommandLine.NDesk.Options;\r\nusing Compression.Utilities;\r\nusing ErrorHandling;\r\nusing Nirvana;\r\nusing VariantAnnotation.Interface.Providers;\r\n\r\nnamespace SAUtils.DbSnpRemapper\r\n{\r\n    public static class DbSnpRemapperMain\r\n    {\r\n        private static string _srcMapFile;\r\n        private static string _destMapFile;\r\n        private static string _srcRefSequence;\r\n        private static string _desRefSequence;\r\n\r\n        public static ExitCodes Run(string command, string[] commandArgs)\r\n        {\r\n            var ops = new OptionSet\r\n            {\r\n                {\r\n                    \"src|s=\",\r\n                    \"VCF file with dbSNP ids and data to be remapped\",\r\n                    v => _srcMapFile = v\r\n                },\r\n                {\r\n                    \"des|d=\",\r\n                    \"VCF file (with same chromosome order as src) with destination dbSNP mapping\",\r\n                    v => _destMapFile = v\r\n                },\r\n                {\r\n                    \"sref=\",\r\n                    \"compressed reference sequence file for the source assembly\",\r\n                    v => _srcRefSequence = v\r\n                },\r\n                {\r\n                    \"dref=\",\r\n                    \"compressed reference sequence file for the destination assembly\",\r\n                    v => _desRefSequence = v\r\n                }\r\n\r\n            };\r\n\r\n            var commandLineExample = $\"{command} [options]\";\r\n\r\n            var exitCode = new ConsoleAppBuilder(commandArgs, ops)\r\n                .Parse()\r\n                .CheckInputFilenameExists(_srcMapFile, \"VCF file with dbSNP ids and data to be remapped\", \"--src\")\r\n                .CheckInputFilenameExists(_destMapFile, \"VCF file with destination dbSNP mapping\", \"--des\")\r\n                .CheckInputFilenameExists(_srcRefSequence, \"reference sequence for source genome assembly\", \"--sref\")\r\n                .CheckInputFilenameExists(_desRefSequence, \"reference sequence for destination genome assembly\", \"--dref\")\r\n                .SkipBanner()\r\n                .ShowHelpMenu(\"Reads provided supplementary data files and populates tsv files\", commandLineExample)\r\n                .ShowErrors()\r\n                .Execute(ProgramExecution);\r\n\r\n            return exitCode;\r\n        }\r\n\r\n        private static ExitCodes ProgramExecution()\r\n        {\r\n            const string tempLeftoverFilename = \"LeftOvers.vcf.gz\";\r\n            Dictionary<string, StreamWriter> writers;\r\n\r\n            ISequenceProvider srcSequenceProvider = ProviderUtilities.GetSequenceProvider(_srcRefSequence);\r\n            ISequenceProvider desSequenceProvider = ProviderUtilities.GetSequenceProvider(_desRefSequence);\r\n            using (var srcReader = GZipUtilities.GetAppropriateStreamReader(_srcMapFile))\r\n            using (var destReader = GZipUtilities.GetAppropriateStreamReader(_destMapFile))\r\n            using (var leftoverWriter = GZipUtilities.GetStreamWriter(tempLeftoverFilename))\r\n            {\r\n                var chromMapper = new ChromMapper(srcReader, destReader, leftoverWriter, srcSequenceProvider, desSequenceProvider);\r\n                writers = chromMapper.Map();\r\n            }\r\n\r\n            //now we will try to map the leftovers\r\n            using (var destReader = GZipUtilities.GetAppropriateStreamReader(_destMapFile))\r\n            using (var leftoverReader = GZipUtilities.GetAppropriateStreamReader(tempLeftoverFilename))\r\n            {\r\n                var leftOverMapper = new LeftoverMapper(leftoverReader, destReader, writers, desSequenceProvider);\r\n                var leftoverCount = leftOverMapper.Map();\r\n                Console.WriteLine($\"{leftoverCount} leftovers mapped!!\");\r\n            }\r\n\r\n            foreach (var writer in writers.Values)\r\n            {\r\n                writer.Dispose();\r\n            }\r\n            \r\n            return ExitCodes.Success;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "SAUtils/DbSnpRemapper/GenomicLocation.cs",
    "content": "﻿namespace SAUtils.DbSnpRemapper\r\n{\r\n    public struct GenomicLocation\r\n    {\r\n        public readonly string Chrom;\r\n        public readonly int Position;\r\n        \r\n        public GenomicLocation(string chrom, int pos)\r\n        {\r\n            Chrom = chrom;\r\n            Position = pos;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "SAUtils/DbSnpRemapper/LeftoverMapper.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Linq;\r\nusing Compression.Utilities;\r\nusing Genome;\r\nusing OptimizedCore;\r\nusing VariantAnnotation.Interface.IO;\r\nusing VariantAnnotation.Interface.Providers;\r\nusing Variants;\r\n\r\nnamespace SAUtils.DbSnpRemapper\r\n{\r\n    public sealed class LeftoverMapper\r\n    {\r\n        private readonly StreamReader _leftoverReader;\r\n        private readonly StreamReader _destReader;\r\n        private readonly Dictionary<string, StreamWriter> _writers;\r\n        private readonly ISequenceProvider _desSequenceProvider;\r\n\r\n        public LeftoverMapper(StreamReader leftoverReader, StreamReader destReader, Dictionary<string, StreamWriter> writers,\r\n            ISequenceProvider desSequenceProvider)\r\n        {\r\n            _leftoverReader = leftoverReader;\r\n            _destReader = destReader;\r\n            _writers = writers;\r\n            _desSequenceProvider = desSequenceProvider;\r\n        }\r\n\r\n        public int Map()\r\n        {\r\n            // write out the relocated locations of the leftover rsIds whenever possible\r\n            //reading in the leftover ids\r\n            var leftoverIds = new HashSet<(long,string)>();\r\n            Console.Write(\"Loading leftover ids...\");\r\n            string line;\r\n            while ((line = _leftoverReader.ReadLine()) != null)\r\n            {\r\n                var splits = line.Split('#', 3);\r\n                var id = long.Parse(splits[0]);\r\n                var alt = splits[1];\r\n                leftoverIds.Add((id, alt));\r\n            }\r\n            Console.WriteLine($\"{leftoverIds.Count} found.\");\r\n\r\n            // stream through the dest file to find locations\r\n            var leftoversWithDest = new Dictionary<(long, string), List<GenomicLocation>>();\r\n            var currentChromName = \"\";\r\n            while ((line = _destReader.ReadLine()) != null)\r\n            {\r\n                if (line.OptimizedStartsWith('#')) continue;\r\n                \r\n                var splits = line.Split('\\t', VcfCommon.InfoIndex);\r\n                var chromName = splits[VcfCommon.ChromIndex];\r\n                if (chromName != currentChromName)\r\n                {\r\n                    currentChromName = chromName;\r\n                    Console.WriteLine($\"Getting destinations for chromosome:{currentChromName}...\");\r\n                    var currentChrom = ReferenceNameUtilities.GetChromosome(_desSequenceProvider.RefNameToChromosome,\r\n                        currentChromName);\r\n                    _desSequenceProvider.LoadChromosome(currentChrom);\r\n                }\r\n\r\n                var refAllele  = splits[VcfCommon.RefIndex];\r\n                var altAlleles = splits[VcfCommon.AltIndex].Split(',');\r\n                var position   = int.Parse(splits[VcfCommon.PosIndex]);\r\n                var rsIds      = Utilities.GetRsids(splits[VcfCommon.IdIndex]);\r\n                if (rsIds == null) continue;\r\n                \r\n                var processedVariants = altAlleles.Select(x => VariantUtils.TrimAndLeftAlign(position, refAllele, x, _desSequenceProvider.Sequence)).ToArray();\r\n\r\n                foreach (var (_, _, variantAlt) in processedVariants)\r\n                foreach (var rsId in rsIds)\r\n                {\r\n                    if (!leftoverIds.Contains((rsId, variantAlt))) continue;\r\n                    var pos = int.Parse(splits[VcfCommon.PosIndex]);\r\n                    if (!leftoversWithDest.TryGetValue((rsId, variantAlt), out var locations))\r\n                    {\r\n                        locations = new List<GenomicLocation>();\r\n                        leftoversWithDest[(rsId, variantAlt)] = locations;\r\n                    }\r\n                    locations.Add(new GenomicLocation(chromName, pos));\r\n                }\r\n\r\n            }\r\n\r\n            WriteMappedLeftovers(leftoversWithDest);\r\n\r\n            return leftoversWithDest.Count;\r\n\r\n\r\n        }\r\n\r\n        private void WriteMappedLeftovers(Dictionary<(long, string), List<GenomicLocation>> leftoversWithDest)\r\n        {\r\n            //resetting the reader\r\n            _leftoverReader.DiscardBufferedData();\r\n            _leftoverReader.BaseStream.Position = 0;\r\n            \r\n            string line;\r\n            while ((line = _leftoverReader.ReadLine()) != null)\r\n            {\r\n                var splits = line.Split('#', 3);\r\n                var id     = long.Parse(splits[0]);\r\n                var alt    = splits[1];\r\n                \r\n                if (! leftoversWithDest.ContainsKey((id, alt))) continue;\r\n                AppendToChromFile(leftoversWithDest[(id, alt)], line);\r\n            }\r\n        }\r\n\r\n        \r\n        private void AppendToChromFile(List<GenomicLocation> leftoverLocations, string line)\r\n        {\r\n            foreach (GenomicLocation location in leftoverLocations)\r\n            {\r\n                var chromName = location.Chrom;\r\n                if (!chromName.StartsWith(\"chr\"))\r\n                    chromName = \"chr\" + chromName;\r\n                if (!_writers.ContainsKey(chromName))\r\n                {\r\n                    Console.WriteLine($\"Warning!! {chromName} was not present in source but is in destination\");\r\n                    _writers.Add(chromName, GZipUtilities.GetStreamWriter(chromName +\".vcf.gz\"));\r\n                }\r\n                var splits = line.Split('\\t', 3);\r\n                _writers[chromName].WriteLine($\"{chromName}\\t{location.Position}\\t{splits[2]}\");\r\n            }\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "SAUtils/DbSnpRemapper/Utilities.cs",
    "content": "﻿using System.Linq;\r\nusing OptimizedCore;\r\n\r\nnamespace SAUtils.DbSnpRemapper\r\n{\r\n    public static class Utilities\r\n    {\r\n        public static long[] GetRsids(string idField)\r\n        {\r\n            var ids = idField.OptimizedSplit(',')\r\n                .Where(idStr => idStr.StartsWith(\"rs\"))\r\n                .Select(idStr => long.Parse(idStr.Substring(2))).ToArray();\r\n\r\n            return ids.Length == 0 ? null : ids;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "SAUtils/DegenerateBaseUtilities.cs",
    "content": "﻿using System.Collections.Generic;\r\n\r\nnamespace SAUtils\r\n{\r\n    public static class DegenerateBaseUtilities\r\n    {\r\n        private static readonly Dictionary<char, List<char>> DegenerateBaseNotation = new Dictionary<char, List<char>>\r\n        {\r\n            {'B', new List<char>{'C','G','T'}},\r\n            {'D', new List<char>{'A','G','T'}},\r\n            {'H', new List<char>{'A','C','T'}},\r\n            {'K', new List<char>{'G','T'}},\r\n            {'M', new List<char>{'A','C'}},\r\n            {'R', new List<char>{'A','G'}},\r\n            {'S', new List<char>{'C','G'}},\r\n            {'V', new List<char>{'A','C','G'}},\r\n            {'W', new List<char>{'A','T'}},\r\n            {'Y', new List<char>{'C','T'}}\r\n        };\r\n\r\n        public static List<string> GetAllPossibleSequences(string sequenceWithDegenerateBases)\r\n        {\r\n            var sequences = new List<string>();\r\n            GetSequences(sequenceWithDegenerateBases.ToUpper(), sequences, 0, \"\");\r\n            return sequences;\r\n        }\r\n\r\n        private static void GetSequences(string inputSequence, ICollection<string> outputSequences, int index, string subSequence)\r\n        {\r\n            if (index == inputSequence.Length)\r\n            {\r\n                outputSequences.Add(subSequence);\r\n                return;\r\n            }\r\n            MapBase(inputSequence[index]).ForEach(x =>\r\n                GetSequences(inputSequence, outputSequences, index + 1, subSequence + x));\r\n\r\n        }\r\n\r\n        private static List<char> MapBase(char inputBase) => DegenerateBaseNotation.ContainsKey(inputBase) ? DegenerateBaseNotation[inputBase] : new List<char> {inputBase};\r\n    }\r\n}"
  },
  {
    "path": "SAUtils/ExtractCosmicSvs/CosmicCnvItem.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.IO;\nusing Genome;\nusing OptimizedCore;\nusing VariantAnnotation.IO;\nusing Variants;\n\nnamespace SAUtils.ExtractCosmicSvs\n{\n   \n    public sealed class CosmicCnvItem\n    {\n        public readonly int CNVId;\n        private readonly Chromosome _chromosome;\n        private readonly VariantType _cnvType;\n        private readonly int _copyNumber;\n        private readonly int _studyId;\n        private readonly Dictionary<string, int> _cancerTypes;\n        public int CancerTypeCount => _cancerTypes.Count;\n        private readonly Dictionary<string, int> _tissueTypes;\n        public int TissueTypeCount => _tissueTypes.Count;\n\n        public CosmicCnvItem(int cnvId, Chromosome chromosome, int start, int end, VariantType cnvType, int copyNumber, Dictionary<string, int> cancerTypes, Dictionary<string, int> tissueTypes, int studyId)\n        {\n            CNVId        = cnvId;\n            _chromosome  = chromosome;\n            Start        = start;\n            End          = end;\n            _cnvType     = cnvType;\n            _studyId     = studyId;\n            _copyNumber  = copyNumber;\n            _cancerTypes = cancerTypes;\n            _tissueTypes = tissueTypes;\n        }\n\n        private int Start { get; }\n        private int End { get; }\n\n        public string GetJsonString()\n        {\n            var sb = StringBuilderPool.Get();\n\n            var jsonObject = new JsonObject(sb);\n\n            jsonObject.AddIntValue(\"id\", CNVId);\n            jsonObject.AddStringValue(\"variantType\", _cnvType.ToString());\n            if (_copyNumber!=-1)\n                jsonObject.AddIntValue(\"copyNumber\", _copyNumber);\n\n            jsonObject.AddStringValues(\"cancerTypes\", GetJsonStrings(_cancerTypes), false);\n            jsonObject.AddStringValues(\"tissueTypes\", GetJsonStrings(_tissueTypes), false);\n\n            return sb.ToString();\n        }\n\n        private static IEnumerable<string> GetJsonStrings(Dictionary<string, int> dictionary)\n        {\n            foreach (var kvp in dictionary)\n            {\n                yield return $\"{JsonObject.OpenBrace}\\\"{kvp.Key.Replace('_', ' ')}\\\":{kvp.Value}{JsonObject.CloseBrace}\";\n            }\n        }\n\n        public void Merge(CosmicCnvItem other)\n        {\n            if (CNVId != other.CNVId \n                || _cnvType != other._cnvType \n                || _copyNumber!= other._copyNumber)\n                throw new InvalidDataException(\"Attempting to merge different cosmic CNVs\");\n\n            //avoid double counting \n            if (_studyId != other._studyId)\n            {\n                MergeCounts(_cancerTypes, other._cancerTypes);\n                MergeCounts(_tissueTypes, other._tissueTypes);\n            }\n\n        }\n\n        private static void MergeCounts(Dictionary<string, int> countDict1, Dictionary<string, int> countDict2)\n        {\n            foreach (var kvp in countDict2)\n            {\n                if (!countDict1.TryAdd(kvp.Key, kvp.Value)) // this key already exist\n                    countDict1[kvp.Key] += kvp.Value;\n            }\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/ExtractCosmicSvs/CosmicCnvReader.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing Genome;\r\nusing IO;\r\nusing OptimizedCore;\r\nusing Variants;\r\n\r\nnamespace SAUtils.ExtractCosmicSvs\r\n{\r\n    public sealed class CosmicCnvReader:IDisposable\r\n    {\r\n        private readonly StreamReader _reader;\r\n        private readonly Dictionary<string, Chromosome> _refToChrom;\r\n        private readonly GenomeAssembly _assembly;\r\n\r\n        private int _idIndex                    = -1;  \r\n        private int _primarySiteIndex           = -1;\r\n        private int _siteSubtypeOneIndex        = -1;\r\n        private int _siteSubtypeTwoIndex        = -1;\r\n        private int _siteSubtypeThreeIndex      = -1;\r\n        private int _primaryHistologyIndex      = -1;\r\n        private int _histologySubtypeOneIndex   = -1;\r\n        private int _histologySubtypeTwoIndex   = -1;\r\n        private int _histologySubtypeThreeIndex = -1;\r\n        private int _copyNumberIndex            = -1;\r\n        private int _cnvTypeIndex               = -1;\r\n        private int _assemblyIndex              = -1;\r\n        private int _chromStartStopIndex        = -1;\r\n        private int _studyIdIndex               = -1;\r\n\r\n        private static readonly char[] ChromosomeDelimiters = {':', '.'};\r\n\r\n        //CNV_ID  ID_GENE gene_name       ID_SAMPLE       ID_TUMOUR       Primary site    Site subtype 1  Site subtype 2  Site subtype 3  Primary histology       Histology subtype 1     Histology subtype 2     Histology subtype 3     SAMPLE_NAME     TOTAL_CN        MINOR_ALLELE    MUT_TYPE        ID_STUDY        GRCh    Chromosome:G_Start..G_Stop\r\n\r\n        public CosmicCnvReader(Stream cnvStream, Dictionary<string, Chromosome> refNameToChorm, GenomeAssembly assembly)\r\n        {\r\n            _reader     = FileUtilities.GetStreamReader(cnvStream);\r\n            _refToChrom = refNameToChorm; \r\n            _assembly   = assembly;\r\n        }\r\n\r\n        public IEnumerable<CosmicCnvItem> GetEntries()\r\n        {\r\n            var cnvDictionary = new Dictionary<int, CosmicCnvItem>();\r\n            string line;\r\n            var isFirstLine = true;\r\n\r\n            while ((line = _reader.ReadLine()) != null)\r\n            {\r\n                // Skip empty lines.\r\n                if (string.IsNullOrWhiteSpace(line)) continue;\r\n\r\n                // Skip comments.\r\n                if (isFirstLine)\r\n                {\r\n                    GetColumnIndices(line);\r\n                    isFirstLine = false;\r\n                    continue;\r\n                }\r\n\r\n                try\r\n                {\r\n                    var cnvItem = ExtractCosmicCnv(line);\r\n                    if (cnvItem == null) continue;\r\n                    if (cnvDictionary.TryGetValue(cnvItem.CNVId, out var value))\r\n                        value.Merge(cnvItem);\r\n                    else cnvDictionary[cnvItem.CNVId] = cnvItem;\r\n                }\r\n                catch (Exception e)\r\n                {\r\n                    Console.WriteLine(e);\r\n                    Console.WriteLine(line);\r\n                    throw;\r\n                }                \r\n            }\r\n\r\n            Console.WriteLine($\"Found {cnvDictionary.Count} unique cosmic cnvs\");\r\n            return cnvDictionary.Values;\r\n        }\r\n\r\n        internal void GetColumnIndices(string headerLine)\r\n        {\r\n            //CNV_ID  ID_GENE gene_name       ID_SAMPLE       ID_TUMOUR       Primary site    Site subtype 1  Site subtype 2  Site subtype 3  Primary histology       Histology subtype 1     Histology subtype 2     Histology subtype 3     SAMPLE_NAME     TOTAL_CN        MINOR_ALLELE    MUT_TYPE        ID_STUDY        GRCh    Chromosome:G_Start..G_Stop\r\n\r\n            _idIndex                    = -1;\r\n            _primarySiteIndex           = -1;\r\n            _siteSubtypeOneIndex        = -1;\r\n            _siteSubtypeTwoIndex        = -1;\r\n            _siteSubtypeThreeIndex      = -1;\r\n            _primaryHistologyIndex      = -1;\r\n            _histologySubtypeOneIndex   = -1;\r\n            _histologySubtypeTwoIndex   = -1;\r\n            _histologySubtypeThreeIndex = -1;\r\n            _copyNumberIndex            = -1;\r\n            _cnvTypeIndex               = -1;\r\n            _assemblyIndex              = -1;\r\n            _chromStartStopIndex        = -1;\r\n            _studyIdIndex               = -1;\r\n\r\n            var columns = headerLine.OptimizedSplit('\\t');\r\n            for (int i = 0; i < columns.Length; i++)\r\n            {\r\n                switch (columns[i])\r\n                {\r\n                    case \"CNV_ID\":\r\n                        _idIndex = i;\r\n                        break;\r\n                    case \"Primary site\":\r\n                        _primarySiteIndex = i;\r\n                        break;\r\n                    case \"Site subtype 1\":\r\n                        _siteSubtypeOneIndex = i;\r\n                        break;\r\n                    case \"Site subtype 2\":\r\n                        _siteSubtypeTwoIndex = i;\r\n                        break;\r\n                    case \"Site subtype 3\":\r\n                        _siteSubtypeThreeIndex = i;\r\n                        break;\r\n                    case \"Primary histology\":\r\n                        _primaryHistologyIndex = i;\r\n                        break;\r\n                    case \"Histology subtype 1\":\r\n                        _histologySubtypeOneIndex = i;\r\n                        break;\r\n                    case \"Histology subtype 2\":\r\n                        _histologySubtypeTwoIndex = i;\r\n                        break;\r\n                    case \"Histology subtype 3\":\r\n                        _histologySubtypeThreeIndex = i;\r\n                        break;\r\n                    case \"TOTAL_CN\":\r\n                        _copyNumberIndex = i;\r\n                        break;\r\n                    case \"MUT_TYPE\":\r\n                        _cnvTypeIndex = i;\r\n                        break;\r\n                    case \"GRCh\":\r\n                        _assemblyIndex = i;\r\n                        break;\r\n                    case \"Chromosome:G_Start..G_Stop\":\r\n                        _chromStartStopIndex = i;\r\n                        break;\r\n                    case \"ID_STUDY\":\r\n                        _studyIdIndex = i;\r\n                        break;\r\n                }\r\n            }\r\n\r\n            if (_primarySiteIndex == -1 || _siteSubtypeThreeIndex == -1 || _siteSubtypeOneIndex == -1 || _siteSubtypeTwoIndex == -1)\r\n                throw new InvalidDataException(\"Column for some site(s) could not be detected\");\r\n            if (_primaryHistologyIndex == -1 || _histologySubtypeOneIndex == -1 || _histologySubtypeTwoIndex == -1 || _histologySubtypeThreeIndex == -1)\r\n                throw new InvalidDataException(\"Column for some histology(ies) could not be detected\");\r\n            if (_copyNumberIndex == -1 || _assemblyIndex == -1 || _chromStartStopIndex == -1 || _cnvTypeIndex == -1)\r\n                throw new InvalidDataException(\"Column for some CNV details could not be detected\");\r\n            if (_studyIdIndex == -1)\r\n                throw new InvalidDataException(\"No study Id column detected\");\r\n        }\r\n\r\n        private CosmicCnvItem ExtractCosmicCnv(string line)\r\n        {            \r\n            var splits = line.OptimizedSplit('\\t');\r\n\r\n            if (splits.Length == 1) return null;\r\n\r\n            var assembly = GenomeAssembly.Unknown;\r\n            var assemblyString = splits[_assemblyIndex];\r\n\r\n            if (assemblyString == \"37\") assembly = GenomeAssembly.GRCh37;\r\n            if (assemblyString == \"38\") assembly = GenomeAssembly.GRCh38;\r\n\r\n            if (assembly != _assembly) return null;\r\n\r\n            var cnvId = int.Parse(splits[_idIndex]);\r\n\r\n            var studyId = int.Parse(splits[_studyIdIndex]);\r\n\r\n            var cancerTypes = new Dictionary<string, int>();\r\n\r\n            TryAddValue(cancerTypes, splits[_primaryHistologyIndex]);\r\n            TryAddValue(cancerTypes, splits[_histologySubtypeOneIndex]);\r\n            TryAddValue(cancerTypes, splits[_histologySubtypeTwoIndex]);\r\n            TryAddValue(cancerTypes, splits[_histologySubtypeThreeIndex]);\r\n\r\n            var tissueTypes = new Dictionary<string, int>();\r\n\r\n            TryAddValue(tissueTypes, splits[_primarySiteIndex]);\r\n            TryAddValue(tissueTypes, splits[_siteSubtypeOneIndex]);\r\n            TryAddValue(tissueTypes, splits[_siteSubtypeTwoIndex]);\r\n            TryAddValue(tissueTypes, splits[_siteSubtypeThreeIndex]);\r\n\r\n            if (! int.TryParse(splits[_copyNumberIndex], out var copyNumber))\r\n            {\r\n                copyNumber = -1;\r\n            }\r\n\r\n            var cnvType = VariantType.copy_number_variation;\r\n            if (splits[_cnvTypeIndex] == \"gain\") cnvType = VariantType.copy_number_gain;\r\n            if (splits[_cnvTypeIndex] == \"loss\") cnvType = VariantType.copy_number_loss;\r\n\r\n            (string chrom, int start, int end) = GetChromStartStop(splits[_chromStartStopIndex]);\r\n\r\n            return new CosmicCnvItem(cnvId, _refToChrom[chrom], start, end, cnvType, copyNumber, cancerTypes, tissueTypes, studyId);\r\n        }\r\n\r\n        private static (string, int, int) GetChromStartStop(string chromPos)\r\n        {\r\n            // 17:18358950..18464587 Chromosome:G_Start..G_Stop\r\n            var splits   = chromPos.Split(ChromosomeDelimiters);\r\n            string chrom = splits[0];\r\n            if (chrom == \"25\") chrom = \"MT\";\r\n            return (chrom, int.Parse(splits[1]), int.Parse(splits[3]));\r\n        }\r\n\r\n        private static void TryAddValue(Dictionary<string, int> cancerTypes, string type)\r\n        {\r\n            if (string.IsNullOrEmpty(type) || type == \"NS\") return;\r\n            cancerTypes[type] = 1; // we don't care about overriding the old count since this is for one study. So counts should not add up\r\n        }\r\n\r\n        public void Dispose() => _reader?.Dispose();\r\n    }\r\n}"
  },
  {
    "path": "SAUtils/ExtractCosmicSvs/CosmicSvReader.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing Genome;\nusing VariantAnnotation.Providers;\n\nnamespace SAUtils.ExtractCosmicSvs\n{\n    public sealed class CosmicSvReader:IDisposable\n    {\n        private readonly Stream _cnvStream;\n        private readonly Stream _breakendStream;\n        private readonly DataSourceVersion _version;\n        private readonly string _outputDirectory;\n        private readonly GenomeAssembly _genomeAssembly;\n        private readonly Dictionary<string, Chromosome> _refNameToChorm;\n\n        public CosmicSvReader(Stream cnvStream, Stream breakendStream, DataSourceVersion version, string outputDir, GenomeAssembly assembly, Dictionary<string, Chromosome> refNameToChromosome)\n        {\n            _cnvStream       = cnvStream;\n            _breakendStream  = breakendStream;\n            _version         = version;\n            _outputDirectory = outputDir;\n            _genomeAssembly  = assembly;\n            _refNameToChorm  = refNameToChromosome;\n        }\n\n        //public void CreateTsv()\n        //{\n        //    var benchMark = new Benchmark();\n        //    const string dataSource = \"COSMIC\";\n\n        //    if (_cnvStream != null)\n        //    {\n        //        using (var writer = new IntervalTsvWriter(_outputDirectory, _version,\n        //            _genomeAssembly.ToString(), SaTsvCommon.CosmicSvSchemaVersion, DataSourceTags.CosmicCnvTag, ReportFor.StructuralVariants))\n        //        using (var cnvReader = new CosmicCnvReader(_cnvStream, _refNameToChorm, _genomeAssembly))\n        //        {\n        //            foreach (var cnvEntry in cnvReader.GetEntries())\n        //            {\n        //                writer.AddEntry(cnvEntry.Chromosome.EnsemblName, cnvEntry.Start, cnvEntry.End, cnvEntry.GetJsonString());\n        //            }\n        //        }\n\n        //    }\n\n\n        //    var timeSpan = Benchmark.ToHumanReadable(benchMark.GetElapsedTime());\n        //    TsvWriterUtilities.WriteCompleteInfo(dataSource, _version.Version, timeSpan);\n        //}\n\n        public void Dispose()\n        {\n            _cnvStream?.Dispose();\n            _breakendStream?.Dispose();\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/ExtractCosmicSvs/ExtractCosmicSvsMain.cs",
    "content": "﻿using CommandLine.Builders;\nusing CommandLine.NDesk.Options;\nusing Compression.Utilities;\nusing ErrorHandling;\nusing IO;\nusing SAUtils.InputFileParsers;\nusing VariantAnnotation.Providers;\n\nnamespace SAUtils.ExtractCosmicSvs\n{\n    public static class ExtractCosmicSvsMain\n    {\n        private static string _breakendTsv;\n        private static string _cnvTsv;\n        private static string _outputDir;\n        private static string _compressedReference;\n\n        public static ExitCodes Run(string command, string[] commandArgs)\n        {\n            var ops = new OptionSet\n            {\n                {\n                    \"brk|b=\",\n                    \"input TSV file with breakend data\",\n                    v => _breakendTsv = v\n                },\n                {\n                    \"cnv|c=\",\n                    \"input TSV file with CNV data\",\n                    v => _cnvTsv = v\n                },\n                {\n                    \"ref|r=\",\n                    \"compressed reference sequence file\",\n                    v => _compressedReference = v\n                },\n                {\n                    \"out|o=\",\n                    \"output directory for intermediate TSV\",\n                    v => _outputDir = v\n                }\n            };\n\n            var commandLineExample = $\"{command} [options]\";\n\n            var exitCode = new ConsoleAppBuilder(commandArgs, ops)\n                .Parse()\n                .HasRequiredParameter(_cnvTsv, \"input TSV file with CNV data\", \"--cnv\")\n                .HasRequiredParameter(_outputDir, \"output directory name\", \"--out\")\n                .CheckDirectoryExists(_outputDir, \"output directory name\", \"--out\")\n                .CheckInputFilenameExists(_compressedReference, \"compressed reference sequence file name\", \"--ref\")\n                .SkipBanner()\n                .ShowHelpMenu(\"Reads provided supplementary data files and populates tsv files\", commandLineExample)\n                .ShowErrors()\n                .Execute(ProgramExecution);\n\n            return exitCode;\n        }\n\n        private static ExitCodes ProgramExecution()\n        {\n            var version = DataSourceVersionReader.GetSourceVersion(_cnvTsv+ \".version\");\n            var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));\n\n            var cnvStream = _cnvTsv==null? null: GZipUtilities.GetAppropriateReadStream(_cnvTsv);\n            var breakendStream = _breakendTsv == null ? null : GZipUtilities.GetAppropriateReadStream(_breakendTsv);\n\n            using (new CosmicSvReader(cnvStream, breakendStream, version, _outputDir,\n                referenceProvider.Assembly, referenceProvider.RefNameToChromosome))\n            {\n                //cosmicSvExtractor.CreateTsv();\n            }\n            \n            return ExitCodes.Success;\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/ExtractMiniSa/ExtractMiniSaMain.cs",
    "content": "﻿using System;\r\nusing CommandLine.Builders;\r\nusing CommandLine.NDesk.Options;\r\nusing ErrorHandling;\r\n\r\nnamespace SAUtils.ExtractMiniSa\r\n{\r\n    internal static class ExtractMiniSaMain \r\n\t{\r\n\t    #region members\r\n\r\n\t    // filenames\r\n\t    private  static string _compressedReference;\r\n\t    private  static string _inputSuppAnnotPath;\r\n\t    private  static string _dataSourceName;\r\n\r\n\t    private  static int _begin;\r\n\t    private  static int _end;\r\n\t    private  static string _miniSaDirectory;\r\n\r\n\t    #endregion\r\n\r\n        /// <summary>\r\n        /// executes the program\r\n        /// </summary>\r\n        private static ExitCodes ProgramExecution()\r\n\t\t{\r\n\t\t\tvar extractor = new MiniSaExtractor(_compressedReference, _inputSuppAnnotPath, _begin, _end, _dataSourceName, _miniSaDirectory);\r\n\t\t\tvar count = extractor.Extract();\r\n\r\n\t\t\tConsole.WriteLine(\"Extracted {0} supplementary annotations\", count);\r\n\r\n\t\t    return ExitCodes.Success;\r\n\t\t}\r\n\t\t\r\n\t\tpublic static ExitCodes Run(string command,string[] commandArgs)\r\n\t\t{\r\n            \r\n\t\t\tvar ops = new OptionSet\r\n\t\t\t{\r\n\t\t\t\t{\r\n\t\t\t\t\t \"ref|r=\",\r\n\t\t\t\t\t \"compressed reference sequence file\",\r\n\t\t\t\t\t v => _compressedReference = v\r\n\t\t\t\t },\r\n\t\t\t\t{\r\n\t\t\t\t\t\"in|i=\",\r\n\t\t\t\t\t\"input Nirvana Supplementary Annotations {file}\",\r\n\t\t\t\t\tv => _inputSuppAnnotPath = v\r\n\t\t\t\t},\r\n\t\t\t\t{\r\n\t\t\t\t\t\"name|n=\",\r\n\t\t\t\t\t\"data source {name}\",\r\n\t\t\t\t\tv => _dataSourceName = v\r\n\t\t\t\t},\r\n\t\t\t\t{\r\n\t\t\t\t\t\"begin|b=\",\r\n\t\t\t\t\t\"reference begin {position}\",\r\n\t\t\t\t\t(int v) => _begin= v\r\n\t\t\t\t},\r\n\t\t\t\t{\r\n\t\t\t\t\t\"end|e=\",\r\n\t\t\t\t\t\"reference end {allele}\",\r\n\t\t\t\t\t(int v) => _end= v\r\n\t\t\t\t},\r\n\t\t\t\t{\r\n\t\t\t\t\t\"out|o=\",\r\n\t\t\t\t\t\"output {directory}\",\r\n\t\t\t\t\tv => _miniSaDirectory= v\r\n\t\t\t\t}\r\n\t\t\t};\r\n\r\n\t\t\tvar commandLineExample = $\"{command} --in <Supplementary Annotations path> --out <Supplementary Annotations Directory> --begin <position> --end <position> --name <dataSource>\";\r\n\r\n            var exitCode = new ConsoleAppBuilder(commandArgs, ops)\r\n                .Parse()\r\n                .CheckInputFilenameExists(_inputSuppAnnotPath, \"Nirvana supplementary annotations\", \"--in\")\r\n                .CheckInputFilenameExists(_compressedReference, \"Compressed reference sequence file name\", \"--ref\")\r\n                .HasRequiredParameter(_miniSaDirectory, \"output directory\", \"--out\")\r\n                .SkipBanner()\r\n                .ShowHelpMenu(\"Extracts mini supplementary annotations for the given range from Nirvana Supplementary Annotations files.\", commandLineExample)\r\n                .ShowErrors()\r\n                .Execute(ProgramExecution);\r\n\r\n            return exitCode;\r\n\t    }\r\n\t}\r\n}\r\n"
  },
  {
    "path": "SAUtils/ExtractMiniSa/MiniSaExtractor.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\n\r\nnamespace SAUtils.ExtractMiniSa\r\n{\r\n    public sealed class MiniSaExtractor\r\n    {\r\n        #region members\r\n\r\n        private readonly int _begin;\r\n        private readonly int _end;\r\n        private readonly string _saPath;\r\n        private readonly string _miniSaPath;\r\n\r\n        #endregion\r\n\r\n        public MiniSaExtractor(string compressedRefFile, string saPath, int begin, int end, string datasourceName = null,\r\n            string outputDir = null)\r\n        {\r\n            _begin  = begin;\r\n            _end    = end;\r\n            _saPath = saPath;\r\n\r\n            //new ReferenceSequenceProvider(FileUtilities.GetReadStream(compressedRefFile)).RefNameToChromosome;\r\n\r\n            //string referenceName = GetReferenceName(saPath, refChromDict);\r\n            //_miniSaPath = GetMiniSaPath(referenceName, begin, end, datasourceName, outputDir);\r\n\r\n            Console.WriteLine($\"MiniSA output to: {_miniSaPath}\");\r\n        }\r\n\r\n        private static string GetMiniSaPath(string referenceName, int begin, int end, string dataSourceName, string outputDir)\r\n        {\r\n            string miniSaPath = dataSourceName == null\r\n                ? $\"{referenceName}_{begin}_{end}.nsa\"\r\n                : $\"{referenceName}_{begin}_{end}_{dataSourceName}.nsa\";\r\n\r\n            if (outputDir != null) miniSaPath = Path.Combine(outputDir, miniSaPath);\r\n            return miniSaPath;\r\n        }\r\n\r\n        //private static string GetReferenceName(string saPath, IDictionary<string, Chromosome> refChromDict)\r\n        //{\r\n        //    ISupplementaryAnnotationHeader header;\r\n\r\n        //    using (var stream = FileUtilities.GetReadStream(saPath))\r\n        //    using (var reader = new ExtendedBinaryReader(stream))\r\n        //    {\r\n        //        header = SaReader.GetHeader(reader);\r\n        //    }\r\n\r\n        //    return refChromDict[header.ReferenceSequenceName].UcscName;\r\n        //}\r\n\r\n        //private static SaWriter GetSaWriter(string saPath, ISupplementaryAnnotationHeader header,\r\n        //    List<ISupplementaryInterval> smallVariantIntervals, List<ISupplementaryInterval> svIntervals,\r\n        //    List<ISupplementaryInterval> allVariantIntervals,List<(int,string)> globalMajorAlleleInRefMinors)\r\n        //{\r\n        //    var stream    = FileUtilities.GetCreateStream(saPath);\r\n        //    var idxStream = FileUtilities.GetCreateStream(saPath + \".idx\");\r\n        //    return new SaWriter(stream, idxStream, header, smallVariantIntervals, svIntervals, allVariantIntervals,globalMajorAlleleInRefMinors);\r\n        //}\r\n\r\n        //private static SaReader GetSaReader(string saPath)\r\n        //{\r\n        //    var stream    = FileUtilities.GetReadStream(saPath);\r\n        //    var idxStream = FileUtilities.GetReadStream(saPath + \".idx\");\r\n        //    return new SaReader(stream, idxStream);\r\n        //}\r\n\r\n        public int Extract()\r\n        {\r\n            var count = 0;\r\n\r\n            //using (var reader = GetSaReader(_saPath))\r\n            //{\r\n            //    var smallVariantIntervals = GetIntervals(\"small variants\", reader.SmallVariantIntervals);\r\n            //    var svIntervals           = GetIntervals(\"SVs\",            reader.SvIntervals);\r\n            //    var allVariantIntervals   = GetIntervals(\"all variants\",   reader.AllVariantIntervals);\r\n            //    var globalMajorAlleles = GetGlobaleMajorAlleleAndRefMinors(reader.GlobalMajorAlleleInRefMinors);\r\n\r\n            //    using (var writer = GetSaWriter(_miniSaPath, reader.Header, smallVariantIntervals, svIntervals,\r\n            //            allVariantIntervals,globalMajorAlleles))\r\n            //    {\r\n            //        for (int position = _begin; position <= _end; position++)\r\n            //        {\r\n            //            var saPosition = reader.GetAnnotation(position);\r\n            //            if (saPosition == null) continue;\r\n\r\n            //            writer.Write(saPosition, position);\r\n            //            count++;\r\n            //        }\r\n            //    }\r\n            //}\r\n\r\n            return count;\r\n        }\r\n\r\n        private List<(int,string)> GetGlobaleMajorAlleleAndRefMinors(IEnumerable<(int Position, string)> readerGlobalMajorAlleleInRefMinors)\r\n        {\r\n            var overlappedRefMinors = new List<(int,string)>();\r\n            foreach (var refMinor in readerGlobalMajorAlleleInRefMinors)\r\n            {\r\n                if(refMinor.Position>=_begin && refMinor.Position<=_end)\r\n                    overlappedRefMinors.Add(refMinor);\r\n            }\r\n            return overlappedRefMinors;\r\n        }\r\n\r\n        //private List<ISupplementaryInterval> GetIntervals(string description,\r\n        //    IEnumerable<Interval<ISupplementaryInterval>> intervals)\r\n        //{\r\n        //    var miniIntervals  = new List<ISupplementaryInterval>();\r\n        //    var targetInterval = new Interval(_begin, _end);\r\n\r\n        //    var allIntervals = intervals;\r\n\r\n        //    if (allIntervals != null)\r\n        //    {\r\n        //        foreach (var interval in allIntervals)\r\n        //        {\r\n        //            if (targetInterval.Overlaps(interval.Begin, interval.End)) miniIntervals.Add(interval.Value);\r\n        //        }\r\n        //    }\r\n\r\n        //    Console.WriteLine($\"Found {miniIntervals.Count} supplementary intervals for {description}.\");\r\n        //    return miniIntervals;\r\n        //}\r\n    }\r\n}\r\n"
  },
  {
    "path": "SAUtils/ExtractMiniXml/ExtractMiniXmlMain.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Linq;\r\nusing CommandLine.Builders;\r\nusing CommandLine.NDesk.Options;\r\nusing ErrorHandling;\r\n\r\nnamespace SAUtils.ExtractMiniXml\r\n{\r\n    public static class ExtractMiniXmlMain\r\n\t{\r\n\t    private  static string _inputXmlFile;\r\n\t    private  static string _accessions;\r\n\t    private  static string _outputDir;\r\n\r\n\t    private static ExitCodes ProgramExecution()\r\n\t    {\r\n\t\t    var accessions = GetAccessions(_accessions);\r\n\t        if (accessions.Any(x=>x.StartsWith(\"RCV\")))\r\n\t        {\r\n\t\t        var rcvExtractor = new RcvXmlExtractor(_inputXmlFile, accessions, _outputDir);\r\n\t\t        rcvExtractor.Extract();\r\n\t        }\r\n\r\n\t        if (accessions.Any(x=>x.StartsWith(\"VCV\")))\r\n\t        {\r\n\t\t        var vcvExtractor = new VcvXmlExtractor(_inputXmlFile, accessions, _outputDir);\r\n\t\t        vcvExtractor.Extract();\r\n\t        }\r\n\r\n\t        return ExitCodes.Success;\r\n        }\r\n\r\n\t    private static List<string> GetAccessions(string accString)\r\n\t    {\r\n\t\t    var accessions = new List<string>();\r\n\t\t    if (Directory.Exists(accString))\r\n\t\t    {\r\n\t\t\t    foreach (var fileName in Directory.EnumerateFiles(accString))\r\n\t\t\t    {\r\n\t\t\t\t    if(fileName.Contains(\"RCV\") || fileName.Contains(\"VCV\")) accessions.Add(Path.GetFileNameWithoutExtension(fileName));\r\n\t\t\t    }\r\n\r\n\t\t\t    return accessions;\r\n\t\t    }\r\n\r\n\t\t    return accString.Split(',').ToList();\r\n\t\t    \r\n\t    }\r\n\r\n\t    public static ExitCodes Run(string command, string[] commandArgs)\r\n        {\r\n\t\t\tvar ops = new OptionSet\r\n\t\t\t{\r\n\t\t\t\t{\r\n\t\t\t\t\t\"i|in=\",\r\n\t\t\t\t\t\"Input XML {file}\",\r\n\t\t\t\t\tv => _inputXmlFile = v\r\n\t\t\t\t},\r\n\t\t\t\t{\r\n\t\t\t\t\t\"a|acc=\",\r\n\t\t\t\t\t\"accessions\",\r\n\t\t\t\t\tv => _accessions = v\r\n\t\t\t\t},\r\n\t\t\t\t{\r\n\t\t\t\t\t\"o|out=\",\r\n\t\t\t\t\t\"Output {dir}\",\r\n\t\t\t\t\tv => _outputDir = v\r\n\t\t\t\t}\r\n\t\t\t};\r\n\r\n\t\t\tvar commandLineExample = $\"{command} --in <xml file> --out <output Directory> --rcv <RCV ID>\";\r\n\r\n\t\t\tvar exitCode = new ConsoleAppBuilder(commandArgs, ops)\r\n\t            .Parse()\r\n\t            .CheckInputFilenameExists(_inputXmlFile, \"input XML file\", \"--in\")\r\n\t            .HasRequiredParameter(_outputDir, \"output directory\", \"--out\")\r\n                .HasRequiredParameter(_accessions, \"comma separated list of accessions or folder containing mini XML files to update\", \"--acc\")\r\n                .SkipBanner()\r\n                .ShowHelpMenu(\"Extracts mini supplementary annotations for the given range from Nirvana Supplementary Annotations files.\", commandLineExample)\r\n\t            .ShowErrors()\r\n\t            .Execute(ProgramExecution);\r\n\t        \r\n\t        return exitCode;\r\n\t\t}\r\n\t}\r\n}\r\n"
  },
  {
    "path": "SAUtils/ExtractMiniXml/RcvXmlExtractor.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing System.Xml;\nusing Compression.Utilities;\nusing IO;\n\nnamespace SAUtils.ExtractMiniXml\n{\n\tpublic sealed class RcvXmlExtractor\n\t{\n\t\tprivate readonly string _inputXmlFile;\n\t\tprivate readonly string _outputDir;\n\t\tprivate readonly List<string> _rcvIds;\n\n\t\tprivate const string XmlHeader = \"<?xml version=\\\"1.0\\\" encoding=\\\"utf-8\\\" standalone=\\\"yes\\\"?>\"+\"\\n\"+ \"<ReleaseSet Dated=\\\"2016-07-04\\\" xmlns:xsi=\\\"http://www.w3.org/2001/XMLSchema-instance\\\" Type=\\\"full\\\" xsi:noNamespaceSchemaLocation=\\\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\\\">\"+\"\\n\\n\";\n\n\t\tprivate const string XmlFooter = \"\\n\\n</ReleaseSet>\";\n\n\t\tpublic RcvXmlExtractor(string inputXmlFile, List<string>  rcvIds, string outputDir)\n\t\t{\n\t\t\t_inputXmlFile = inputXmlFile;\n\t\t\t_rcvIds       = rcvIds;\n            _outputDir    = outputDir;\n        }\n\n\t\tpublic void Extract()\n\t\t{\n            using (var reader = GZipUtilities.GetAppropriateStreamReader(_inputXmlFile))\n\t\t\tusing (var xmlReader = XmlReader.Create(reader, new XmlReaderSettings { DtdProcessing = DtdProcessing.Prohibit, IgnoreWhitespace = true }))\n\t\t\t{\n\t\t\t\tvar existVarSet = xmlReader.ReadToDescendant(\"ClinVarSet\");\n\n                while (_rcvIds.Count > 0 && existVarSet)\n\t\t\t\t{\n\t\t\t\t\t var rcvContents = xmlReader.ReadOuterXml();\n\n\t\t\t\t\tvar rcv = DetectRcv(_rcvIds, rcvContents);\n                    \n                    if (rcv!=null)\n                    {\n                        var targetedContent =rcvContents;\n                        var outXmlFile = Path.Combine(_outputDir, rcv + \".xml\");\n                        WriteToFile(outXmlFile, targetedContent);\n                    }\n\t\t\t\t\tif(!xmlReader.IsStartElement(\"ClinVarSet\"))\n\t\t\t\t\t\texistVarSet = xmlReader.ReadToNextSibling(\"ClinVarSet\");\n\t\t\t\t}\n\n\t\t\t}\n\n\t\t\tif (_rcvIds.Count > 0)\n\t\t\t{\n\t\t\t\tConsole.WriteLine($\"Failed to Find {string.Join(',',_rcvIds)}\");\n\t\t\t}\n\n\t\t}\n\n        private static string DetectRcv(List<string> rcvs, string rcvContents)\n        {\n            foreach (var rcv in rcvs)\n            {\n                if (rcvContents.Contains(rcv))\n                {\n                    rcvs.Remove(rcv);\n                    return rcv;\n                }\n            }\n\n            return null;\n        }\n\n        private static void WriteToFile(string fileName, string targetedContent)\n        {\n            using (var writer = new StreamWriter(FileUtilities.GetCreateStream(fileName)))\n            {\n                writer.Write(XmlHeader);\n                writer.Write(targetedContent);\n                writer.Write(XmlFooter);\n                Console.WriteLine($\"Creating/ updating {fileName}\");\n            }\n        }\n\n\t}\n}"
  },
  {
    "path": "SAUtils/ExtractMiniXml/VcvXmlExtractor.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing System.Xml;\nusing Compression.Utilities;\nusing IO;\n\nnamespace SAUtils.ExtractMiniXml\n{\n    public sealed class VcvXmlExtractor\n    {\n        private readonly string _inputXmlFile;\n        private readonly string _outputDir;\n        private readonly List<string> _vcvIds;\n        \n        private const string VcvRecordTag = \"VariationArchive\";\n\n        private const string XmlHeader = \"<?xml version=\\\"1.0\\\" encoding=\\\"UTF-8\\\" standalone=\\\"yes\\\"?>\\n\" +\n                                         \"<ClinVarVariationRelease xmlns:xsi=\\\"http://www.w3.org/2001/XMLSchema-instance\\\" xsi:noNamespaceSchemaLocation=\\\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_variation/variation_archive_1.4.xsd\\\" ReleaseDate=\\\"2019-12-31\\\">\\n\";\n\n        private const string XmlFooter = \"\\n</ClinVarVariationRelease>\";\n        \n        public VcvXmlExtractor(string inputXmlFile, List<string> vcvIds, string outputDir)\n        {\n            _inputXmlFile = inputXmlFile;\n            _vcvIds       = vcvIds;\n            _outputDir    = outputDir;\n        }\n\n        public void Extract()\n        {\n            using (var reader = GZipUtilities.GetAppropriateStreamReader(_inputXmlFile))\n            using (var xmlReader = XmlReader.Create(reader, new XmlReaderSettings { DtdProcessing = DtdProcessing.Prohibit, IgnoreWhitespace = true }))\n            {\n                var existVarSet = xmlReader.ReadToDescendant(VcvRecordTag);\n\n                while (_vcvIds.Count > 0 && existVarSet)\n                {\n                    var contents = xmlReader.ReadOuterXml();\n\n                    var rcv = DetectVcv(_vcvIds, contents);\n                    \n                    if (rcv !=null)\n                    {\n                        var targetedContent =contents;\n                        var outXmlFile      = Path.Combine(_outputDir, rcv + \".xml\");\n                        WriteToFile(outXmlFile, targetedContent);\n                    }\n                    if(!xmlReader.IsStartElement(VcvRecordTag))\n                        existVarSet = xmlReader.ReadToNextSibling(VcvRecordTag);\n                }\n\n            }\n\n            if (_vcvIds.Count > 0)\n            {\n                Console.WriteLine($\"Failed to Find {string.Join(',',_vcvIds)}\");\n            }\n\n        }\n        \n        private static void WriteToFile(string fileName, string targetedContent)\n        {\n            using (var writer = new StreamWriter(FileUtilities.GetCreateStream(fileName)))\n            {\n                writer.Write(XmlHeader);\n                writer.Write(targetedContent);\n                writer.Write(XmlFooter);\n                Console.WriteLine($\"Creating/ updating {fileName}\");\n            }\n        }\n\n        private static string DetectVcv(List<string> vcvs, string rcvContents)\n        {\n            foreach (var vcv in vcvs)\n            {\n                if (rcvContents.Contains(vcv))\n                {\n                    vcvs.Remove(vcv);\n                    return vcv;\n                }\n            }\n\n            return null;\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/FusionCatcher/CollectionType.cs",
    "content": "﻿namespace SAUtils.FusionCatcher\n{\n    public enum CollectionType : byte\n    {\n        Germline,\n        Somatic,\n        Relationships        \n    }\n}"
  },
  {
    "path": "SAUtils/FusionCatcher/CreateFusionCatcher.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing System.Linq;\nusing CommandLine.Builders;\nusing CommandLine.NDesk.Options;\nusing Compression.Utilities;\nusing ErrorHandling;\nusing Genome;\nusing IO;\nusing VariantAnnotation.Caches;\nusing VariantAnnotation.GeneFusions.IO;\nusing VariantAnnotation.GeneFusions.SA;\nusing VariantAnnotation.Interface.AnnotatedPositions;\nusing VariantAnnotation.IO.Caches;\nusing VariantAnnotation.Providers;\nusing VariantAnnotation.SA;\n\nnamespace SAUtils.FusionCatcher\n{\n    public static class CreateFusionCatcher\n    {\n        private static string _transcriptCache37Path;\n        private static string _transcriptCache38Path;\n        private static string _dataDirectory;\n        private static string _reference38Path;\n        private static string _outputDirectory;\n\n        private static ExitCodes ProgramExecution()\n        {\n            var geneKeyToFusion = new Dictionary<ulong, GeneFusionSourceBuilder>();\n            var knownGenes      = new HashSet<string>();\n            var oncoGenes       = new HashSet<uint>();\n\n            Dictionary<ushort, Chromosome> refIndexToChromosome = GetReferences(_reference38Path);\n\n            AddGenes(_transcriptCache37Path, refIndexToChromosome, knownGenes, \"GRCh37\");\n            AddGenes(_transcriptCache38Path, refIndexToChromosome, knownGenes, \"GRCh38\");\n\n            DataSourceVersion version = CreateDataSourceVersion(Path.Combine(_dataDirectory, \"version.txt\"));\n\n            // relationships\n            FusionCatcherDataSource.Parse(GetStream(\"pairs_pseudogenes.txt\"), GeneFusionSource.Pseudogene, CollectionType.Relationships,\n                geneKeyToFusion, knownGenes);\n            FusionCatcherDataSource.Parse(GetStream(\"paralogs.txt\"), GeneFusionSource.Paralog, CollectionType.Relationships, geneKeyToFusion,\n                knownGenes);\n            FusionCatcherDataSource.Parse(GetStream(\"readthroughs.txt\"), GeneFusionSource.Readthrough, CollectionType.Relationships, geneKeyToFusion,\n                knownGenes);\n\n            // oncogenes\n            FusionCatcherOncogenes.Parse(GetStream(\"cancer_genes.txt\"), \"Bushman\", oncoGenes, knownGenes);\n            FusionCatcherOncogenes.Parse(GetStream(\"oncogenes_more.txt\"),    \"ONGENE\",  oncoGenes, knownGenes);\n            FusionCatcherOncogenes.Parse(GetStream(\"tumor_genes.txt\"),  \"UniProt\", oncoGenes, knownGenes);\n            Console.WriteLine($\"- found a total of {oncoGenes.Count:N0} oncogenes.\");\n\n            // germline fusions\n            FusionCatcherDataSource.Parse(GetStream(\"1000genomes.txt\"), GeneFusionSource.OneK_Genomes_Project, CollectionType.Germline,\n                geneKeyToFusion, knownGenes);\n            FusionCatcherDataSource.Parse(GetStream(\"banned.txt\"), GeneFusionSource.Healthy_strong_support, CollectionType.Germline, geneKeyToFusion,\n                knownGenes);\n            FusionCatcherDataSource.Parse(GetStream(\"bodymap2.txt\"), GeneFusionSource.Illumina_BodyMap2, CollectionType.Germline, geneKeyToFusion,\n                knownGenes);\n            FusionCatcherDataSource.Parse(GetStream(\"cacg.txt\"),     GeneFusionSource.CACG,     CollectionType.Germline, geneKeyToFusion, knownGenes);\n            FusionCatcherDataSource.Parse(GetStream(\"conjoing.txt\"), GeneFusionSource.ConjoinG, CollectionType.Germline, geneKeyToFusion, knownGenes);\n            FusionCatcherDataSource.Parse(GetStream(\"cortex.txt\"), GeneFusionSource.Healthy_prefrontal_cortex, CollectionType.Germline,\n                geneKeyToFusion, knownGenes);\n            FusionCatcherDataSource.Parse(GetStream(\"dgd.txt\"), GeneFusionSource.Duplicated_Genes_Database, CollectionType.Germline, geneKeyToFusion,\n                knownGenes);\n            FusionCatcherDataSource.Parse(GetStream(\"gtex.txt\"), GeneFusionSource.GTEx_healthy_tissues, CollectionType.Germline, geneKeyToFusion,\n                knownGenes);\n            FusionCatcherDataSource.Parse(GetStream(\"healthy.txt\"), GeneFusionSource.Healthy, CollectionType.Germline, geneKeyToFusion, knownGenes);\n            FusionCatcherDataSource.Parse(GetStream(\"hpa.txt\"), GeneFusionSource.Human_Protein_Atlas, CollectionType.Germline, geneKeyToFusion,\n                knownGenes);\n            FusionCatcherDataSource.Parse(GetStream(\"non-cancer_tissues.txt\"), GeneFusionSource.Babiceanu_NonCancerTissues, CollectionType.Germline,\n                geneKeyToFusion, knownGenes);\n            FusionCatcherDataSource.Parse(GetStream(\"non-tumor_cells.txt\"), GeneFusionSource.NonTumorCellLines, CollectionType.Germline,\n                geneKeyToFusion, knownGenes);\n            FusionCatcherDataSource.Parse(GetStream(\"tcga-normal.txt\"), GeneFusionSource.TumorFusions_normal, CollectionType.Germline, geneKeyToFusion,\n                knownGenes);\n\n            // somatic fusions\n            FusionCatcherDataSource.Parse(GetStream(\"18cancers.txt\"), GeneFusionSource.Alaei_Mahabadi_18_Cancers, CollectionType.Somatic,\n                geneKeyToFusion, knownGenes);\n            FusionCatcherDataSource.Parse(GetStream(\"ccle.txt\"),  GeneFusionSource.CCLE,       CollectionType.Somatic, geneKeyToFusion, knownGenes);\n            FusionCatcherDataSource.Parse(GetStream(\"ccle2.txt\"), GeneFusionSource.CCLE_Klign, CollectionType.Somatic, geneKeyToFusion, knownGenes);\n            FusionCatcherDataSource.Parse(GetStream(\"ccle3.txt\"), GeneFusionSource.CCLE_Vellichirammal, CollectionType.Somatic, geneKeyToFusion,\n                knownGenes);\n            FusionCatcherDataSource.Parse(GetStream(\"cgp.txt\"), GeneFusionSource.Cancer_Genome_Project, CollectionType.Somatic, geneKeyToFusion,\n                knownGenes);\n            FusionCatcherDataSource.Parse(GetStream(\"chimerdb4kb.txt\"), GeneFusionSource.ChimerKB_4, CollectionType.Somatic, geneKeyToFusion,\n                knownGenes);\n            FusionCatcherDataSource.Parse(GetStream(\"chimerdb4pub.txt\"), GeneFusionSource.ChimerPub_4, CollectionType.Somatic, geneKeyToFusion,\n                knownGenes);\n            FusionCatcherDataSource.Parse(GetStream(\"chimerdb4seq.txt\"), GeneFusionSource.ChimerSeq_4, CollectionType.Somatic, geneKeyToFusion,\n                knownGenes);\n            FusionCatcherDataSource.Parse(GetStream(\"cosmic.txt\"), GeneFusionSource.COSMIC, CollectionType.Somatic, geneKeyToFusion, knownGenes);\n            FusionCatcherDataSource.Parse(GetStream(\"gliomas.txt\"), GeneFusionSource.Bao_gliomas, CollectionType.Somatic, geneKeyToFusion,\n                knownGenes);\n            FusionCatcherDataSource.Parse(GetStream(\"known.txt\"), GeneFusionSource.Known, CollectionType.Somatic, geneKeyToFusion, knownGenes);\n            FusionCatcherDataSource.Parse(GetStream(\"mitelman.txt\"), GeneFusionSource.Mitelman_DB, CollectionType.Somatic, geneKeyToFusion,\n                knownGenes);\n            FusionCatcherDataSource.Parse(GetStream(\"oesophagus.txt\"), GeneFusionSource.TCGA_oesophageal_carcinomas, CollectionType.Somatic,\n                geneKeyToFusion, knownGenes);\n            // FusionCatcherDataSource.Parse(GetStream(\"oncokb.txt\"), GeneFusionSource.OncoKB, CollectionType.Somatic, geneKeyToFusion, knownGenes);\n            FusionCatcherDataSource.Parse(GetStream(\"pancreases.txt\"), GeneFusionSource.Bailey_pancreatic_cancers, CollectionType.Somatic,\n                geneKeyToFusion, knownGenes);\n            FusionCatcherDataSource.Parse(GetStream(\"pcawg.txt\"), GeneFusionSource.PCAWG, CollectionType.Somatic, geneKeyToFusion, knownGenes);\n            FusionCatcherDataSource.Parse(GetStream(\"prostate_cancer.txt\"), GeneFusionSource.Robinson_prostate_cancers, CollectionType.Somatic,\n                geneKeyToFusion, knownGenes);\n            FusionCatcherDataSource.Parse(GetStream(\"tcga.txt\"), GeneFusionSource.TCGA, CollectionType.Somatic, geneKeyToFusion, knownGenes);\n            FusionCatcherDataSource.Parse(GetStream(\"tcga-cancer.txt\"), GeneFusionSource.TumorFusions_tumor, CollectionType.Somatic, geneKeyToFusion,\n                knownGenes);\n            FusionCatcherDataSource.Parse(GetStream(\"tcga2.txt\"), GeneFusionSource.TCGA_Gao, CollectionType.Somatic, geneKeyToFusion, knownGenes);\n            FusionCatcherDataSource.Parse(GetStream(\"tcga3.txt\"), GeneFusionSource.TCGA_Vellichirammal, CollectionType.Somatic, geneKeyToFusion,\n                knownGenes);\n            FusionCatcherDataSource.Parse(GetStream(\"ticdb.txt\"), GeneFusionSource.TICdb, CollectionType.Somatic, geneKeyToFusion, knownGenes);\n\n            (GeneFusionSourceCollection[] index, GeneFusionIndexEntry[] indexEntries) = IndexBuilder.Convert(geneKeyToFusion);\n            Console.WriteLine($\"- created {index.Length:N0} index entries.\");\n\n            uint[] oncogeneKeys = oncoGenes.OrderBy(x => x).ToArray();\n            \n            WriteGeneFusions(_outputDirectory, oncogeneKeys, index, indexEntries, version);\n\n            Console.WriteLine();\n            Console.WriteLine($\"Total: {geneKeyToFusion.Count:N0} gene pairs in database.\");\n\n            return ExitCodes.Success;\n        }\n        \n        private static Dictionary<ushort, Chromosome> GetReferences(string referencePath)\n        {\n            Console.Write(\"- loading reference sequence... \");\n            var sequenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(referencePath));\n            Console.WriteLine(\"finished.\");\n\n            return sequenceProvider.RefIndexToChromosome;\n        }\n\n        private static void AddGenes(string cachePath, Dictionary<ushort, Chromosome> refIndexToChromosome, ISet<string> knownGenes,\n            string description)\n        {\n            Console.Write($\"- loading known genes ({description})... \");\n            int startCount = knownGenes.Count;\n\n            using (var reader = new TranscriptCacheReader(FileUtilities.GetReadStream(cachePath)))\n            {\n                TranscriptCacheData cacheData = reader.Read(refIndexToChromosome);\n\n                foreach (IGene gene in cacheData.Genes)\n                {\n                    string ensemblId = gene.EnsemblId.WithoutVersion;\n                    if (string.IsNullOrEmpty(ensemblId)) continue;\n                    knownGenes.Add(ensemblId);\n                }\n            }\n\n            int numAdded = knownGenes.Count - startCount;\n            Console.WriteLine($\"added {numAdded:N0} Ensembl gene IDs.\");\n        }\n\n        private static void WriteGeneFusions(string outputDirectory, uint[] oncogeneKeys, GeneFusionSourceCollection[] index,\n            // ReSharper disable once SuggestBaseTypeForParameter\n            GeneFusionIndexEntry[] indexEntries, DataSourceVersion version)\n        {\n            Console.Write(\"- writing gene fusions SA file... \");\n            string    outputPath = Path.Combine(outputDirectory, $\"FusionCatcher_{version.Version}{SaCommon.GeneFusionSourceSuffix}\");\n            using var writer     = new GeneFusionSourceWriter(FileUtilities.GetCreateStream(outputPath), \"fusionCatcher\", version);\n            writer.Write(oncogeneKeys, index, indexEntries);\n            Console.WriteLine(\"finished.\");\n        }\n\n        private static DataSourceVersion CreateDataSourceVersion(string filePath)\n        {\n            var fi = new FileInfo(filePath);\n            long releaseDateTicks = fi.CreationTime.Ticks;\n            \n            // const string description =\n            using var reader = new StreamReader(FileUtilities.GetReadStream(filePath));\n            string    line   = reader.ReadLine();\n            if (line == null) throw new InvalidDataException(\"Could not extract the first line from version.txt\");\n\n            int    spacePos = line.LastIndexOf(' ');\n            string version  = line.Substring(spacePos + 1);\n            \n            return new DataSourceVersion(\"FusionCatcher\", version, releaseDateTicks, \"known germline and somatic gene fusions\");\n        }\n\n        private static Stream GetStream(string filename) => GZipUtilities.GetAppropriateReadStream(Path.Combine(_dataDirectory, filename));\n\n        public static ExitCodes Run(string command, string[] commandArgs)\n        {\n            var ops = new OptionSet\n            {\n                {\n                    \"cache37=\",\n                    \"transcript cache {path} for GRCh37\",\n                    v => _transcriptCache37Path = v\n                },\n                {\n                    \"cache38=\",\n                    \"transcript cache {path} for GRCh38\",\n                    v => _transcriptCache38Path = v\n                },\n                {\n                    \"in|i=\",\n                    \"FusionCatcher data {directory}\",\n                    v => _dataDirectory = v\n                },\n                {\n                    \"out|o=\",\n                    \"output {directory}\",\n                    v => _outputDirectory = v\n                },\n                {\n                    \"ref|r=\",\n                    \"input reference sequence {path} for GRCh38\",\n                    v => _reference38Path = v\n                }\n            };\n\n            var commandLineExample = $\"{command} [options]\";\n\n            ExitCodes exitCode = new ConsoleAppBuilder(commandArgs, ops)\n                .Parse()\n                .CheckInputFilenameExists(_reference38Path,       \"reference sequence (GRCh38)\", \"--ref\")\n                .CheckInputFilenameExists(_transcriptCache37Path, \"transcript cache (GRCh37)\",   \"--cache37\")\n                .CheckInputFilenameExists(_transcriptCache38Path, \"transcript cache (GRCh38)\",   \"--cache38\")\n                .CheckDirectoryExists(_dataDirectory,   \"FusionCatcher data directory\", \"--in\")\n                .CheckDirectoryExists(_outputDirectory, \"output directory\",             \"--out\")\n                .SkipBanner()\n                .ShowHelpMenu(\"Creates a supplementary database with FusionCatcher annotations\", commandLineExample)\n                .ShowErrors()\n                .Execute(ProgramExecution);\n\n            return exitCode;\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/FusionCatcher/FusionCatcherDataSource.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing VariantAnnotation.GeneFusions.SA;\nusing VariantAnnotation.GeneFusions.Utilities;\n\nnamespace SAUtils.FusionCatcher\n{\n    public static class FusionCatcherDataSource\n    {\n        public static void Parse(Stream stream, GeneFusionSource source, CollectionType collectionType,\n            Dictionary<ulong, GeneFusionSourceBuilder> geneKeyToFusion, HashSet<string> knownEnsemblGenes)\n        {\n            Console.Write($\"- parsing {source}... \");\n\n            using var reader              = new StreamReader(stream);\n            var       numGeneFusionsAdded = 0;\n\n            while (true)\n            {\n                string line = reader.ReadLine();\n                if (line == null) break;\n\n                string[] cols = line.Split('\\t');\n                if (cols.Length != 2) throw new InvalidDataException($\"Expected 2 columns in the FusionCatcher file, but found {cols.Length}\");\n\n                string gene  = cols[0];\n                string gene2 = cols[1];\n\n                bool hasGene  = knownEnsemblGenes.Contains(gene);\n                bool hasGene2 = knownEnsemblGenes.Contains(gene2);\n                if (!hasGene || !hasGene2) continue;\n\n                ulong fusionKey = GeneFusionKey.Create(GeneFusionKey.CreateGeneKey(cols[0]), GeneFusionKey.CreateGeneKey(cols[1]));\n\n                if (!geneKeyToFusion.TryGetValue(fusionKey, out GeneFusionSourceBuilder geneFusion))\n                {\n                    geneFusion           = new GeneFusionSourceBuilder();\n                    geneKeyToFusion[fusionKey] = geneFusion;\n                }\n\n                switch (collectionType)\n                {\n                    case CollectionType.Germline:\n                        geneFusion.GermlineSources.Add(source);\n                        break;\n                    case CollectionType.Somatic:\n                        geneFusion.SomaticSources.Add(source);\n                        break;\n                    case CollectionType.Relationships:\n                        switch (source)\n                        {\n                            case GeneFusionSource.Pseudogene:\n                                geneFusion.IsPseudogenePair = true;\n                                break;\n                            case GeneFusionSource.Paralog:\n                                geneFusion.IsParalogPair = true;\n                                break;\n                            case GeneFusionSource.Readthrough:\n                                geneFusion.IsReadthrough = true;\n                                break;\n                            default:\n                                throw new NotSupportedException($\"Found an unsupported relationship: {source}\");\n                        }\n                        break;\n                    default:\n                        throw new NotSupportedException($\"Found an unsupported gene fusion collection type: {collectionType}\");\n                }\n\n                numGeneFusionsAdded++;\n            }\n\n            Console.WriteLine($\"added {numGeneFusionsAdded:N0} gene fusions.\");\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/FusionCatcher/FusionCatcherOncogenes.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing VariantAnnotation.GeneFusions.Utilities;\n\nnamespace SAUtils.FusionCatcher\n{\n    public static class FusionCatcherOncogenes\n    {\n        public static void Parse(Stream stream, string description, HashSet<uint> oncoGenes, HashSet<string> knownEnsemblGenes)\n        {\n            Console.Write($\"- parsing {description} oncogenes... \");\n\n            using var reader            = new StreamReader(stream);\n            var       numOncogenesAdded = 0;\n\n            while (true)\n            {\n                string line = reader.ReadLine();\n                if (line == null) break;\n\n                string[] cols = line.Split('\\t');\n                if (cols.Length != 1) throw new InvalidDataException($\"Expected 1 column in the FusionCatcher file, but found {cols.Length}\");\n\n                string gene = cols[0];\n\n                bool hasGene = knownEnsemblGenes.Contains(gene);\n                if (!hasGene) continue;\n\n                uint geneKey = GeneFusionKey.CreateGeneKey(gene);\n                oncoGenes.Add(geneKey);\n\n                numOncogenesAdded++;\n            }\n\n            Console.WriteLine($\"added {numOncogenesAdded:N0} oncogenes.\");\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/FusionCatcher/GeneFusionSourceBuilder.cs",
    "content": "﻿using System.Collections.Generic;\nusing VariantAnnotation.GeneFusions.SA;\n\nnamespace SAUtils.FusionCatcher\n{\n    public sealed class GeneFusionSourceBuilder\n    {\n        public          bool                   IsPseudogenePair;\n        public          bool                   IsParalogPair;\n        public          bool                   IsReadthrough;\n        public readonly List<GeneFusionSource> GermlineSources = new();\n        public readonly List<GeneFusionSource> SomaticSources  = new();\n\n        public GeneFusionSourceCollection Create()\n        {\n            GeneFusionSource[] germlineSources = GermlineSources.Count > 0 ? GermlineSources.ToArray() : null;\n            GeneFusionSource[] somaticSources  = SomaticSources.Count  > 0 ? SomaticSources.ToArray() : null;\n            return new GeneFusionSourceCollection(IsPseudogenePair, IsParalogPair, IsReadthrough, germlineSources, somaticSources);\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/FusionCatcher/GeneFusionSourceWriter.cs",
    "content": "﻿using System;\nusing System.IO;\nusing System.Text;\nusing Compression.Utilities;\nusing IO;\nusing IO.v2;\nusing VariantAnnotation.GeneFusions.IO;\nusing VariantAnnotation.GeneFusions.SA;\nusing VariantAnnotation.Interface.Providers;\n\nnamespace SAUtils.FusionCatcher\n{\n    public sealed class GeneFusionSourceWriter : IDisposable\n    {\n        private readonly ExtendedBinaryWriter _writer;\n\n        public GeneFusionSourceWriter(Stream stream, string jsonKey, IDataSourceVersion version, bool leaveOpen = false)\n        {\n            _writer = new ExtendedBinaryWriter(stream, Encoding.UTF8, leaveOpen);\n            WriteHeader();\n            _writer.Write(jsonKey);\n            version.Write(_writer);\n        }\n\n        private void WriteHeader()\n        {\n            var header = new Header(FileType.FusionCatcher, GeneFusionSourceReader.SupportedFileFormatVersion);\n            header.Write(_writer);\n        }\n\n        public void Write(uint[] oncogeneKeys, GeneFusionSourceCollection[] index, GeneFusionIndexEntry[] indexEntries)\n        {\n            using var ms = new MemoryStream();\n            using (var writer = new ExtendedBinaryWriter(ms, Encoding.UTF8, true))\n            {\n                writer.WriteOpt(oncogeneKeys.Length);\n                foreach (uint geneKey in oncogeneKeys) writer.WriteOpt(geneKey);\n\n                writer.WriteOpt(index.Length);\n                foreach (GeneFusionSourceCollection sourceCollection in index) sourceCollection.Write(writer);\n\n                writer.WriteOpt(indexEntries.Length);\n                foreach (GeneFusionIndexEntry indexEntry in indexEntries) indexEntry.Write(writer);\n            }\n\n            byte[] bytes = ms.ToArray();\n            _writer.WriteCompressedByteArray(bytes, bytes.Length);\n        }\n\n        public void Dispose() => _writer.Dispose();\n    }\n}"
  },
  {
    "path": "SAUtils/FusionCatcher/IndexBuilder.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.IO;\nusing System.Linq;\nusing VariantAnnotation.GeneFusions.IO;\nusing VariantAnnotation.GeneFusions.SA;\n\nnamespace SAUtils.FusionCatcher\n{\n    public static class IndexBuilder\n    {\n        public static (GeneFusionSourceCollection[] Index, GeneFusionIndexEntry[] IndexEntries) Convert(Dictionary<ulong, GeneFusionSourceBuilder> geneKeyToSourceBuilder)\n        {\n            Dictionary<ulong, GeneFusionSourceCollection> geneKeyToSourceCollection = GetSourceCollection(geneKeyToSourceBuilder);\n\n            (GeneFusionSourceCollection[] index, Dictionary<GeneFusionSourceCollection, ushort> sourceCollectionToIndex) =\n                BuildIndex(geneKeyToSourceCollection.Values);\n\n            GeneFusionIndexEntry[] indexEntries = BuildIndexEntries(geneKeyToSourceCollection, sourceCollectionToIndex);\n\n            return (index, indexEntries);\n        }\n\n        private static GeneFusionIndexEntry[] BuildIndexEntries(Dictionary<ulong, GeneFusionSourceCollection> geneKeyToSourceCollection,\n            IReadOnlyDictionary<GeneFusionSourceCollection, ushort> sourceCollectionToIndex)\n        {\n            var indexEntries = new GeneFusionIndexEntry[geneKeyToSourceCollection.Count];\n            var currentIndex = 0;\n\n            foreach ((ulong geneKey, GeneFusionSourceCollection sourceCollection) in geneKeyToSourceCollection.OrderBy(x => x.Key))\n            {\n                if (!sourceCollectionToIndex.TryGetValue(sourceCollection, out ushort index))\n                    throw new InvalidDataException($\"Unable to find the gene fusion source collection for gene key: {geneKey}\");\n\n                indexEntries[currentIndex++] = new GeneFusionIndexEntry(geneKey, index);\n            }\n\n            return indexEntries;\n        }\n\n        private static (GeneFusionSourceCollection[] Index, Dictionary<GeneFusionSourceCollection, ushort> SourceCollectionToIndex) BuildIndex(\n            Dictionary<ulong, GeneFusionSourceCollection>.ValueCollection sourceCollections)\n        {\n            var collectionToHits = new Dictionary<GeneFusionSourceCollection, BuilderMetadata>();\n\n            foreach (GeneFusionSourceCollection sourceCollection in sourceCollections)\n            {\n                if (collectionToHits.TryGetValue(sourceCollection, out BuilderMetadata metadata))\n                {\n                    metadata.NumHits++;\n                }\n                else\n                {\n                    collectionToHits[sourceCollection] = new BuilderMetadata {NumHits = 1, SourceCollection = sourceCollection};\n                }\n            }\n\n            // we want to order these in descending popularity\n            BuilderMetadata[] sortedIndex             = collectionToHits.Values.OrderByDescending(x => x.NumHits).ToArray();\n            var               index                   = new GeneFusionSourceCollection[sortedIndex.Length];\n            var               sourceCollectionToIndex = new Dictionary<GeneFusionSourceCollection, ushort>();\n\n            for (var i = 0; i < sortedIndex.Length; i++)\n            {\n                GeneFusionSourceCollection sourceCollection = sortedIndex[i].SourceCollection;\n                index[i]                                  = sourceCollection;\n                sourceCollectionToIndex[sourceCollection] = (ushort) i;\n            }\n\n            return (index, sourceCollectionToIndex);\n        }\n\n        private static Dictionary<ulong, GeneFusionSourceCollection> GetSourceCollection(\n            Dictionary<ulong, GeneFusionSourceBuilder> geneKeyToSourceBuilder)\n        {\n            var geneKeyToSourceCollection = new Dictionary<ulong, GeneFusionSourceCollection>(geneKeyToSourceBuilder.Count);\n\n            foreach ((ulong geneKey, GeneFusionSourceBuilder builder) in geneKeyToSourceBuilder)\n            {\n                GeneFusionSourceCollection sourceCollection = builder.Create();\n                geneKeyToSourceCollection[geneKey] = sourceCollection;\n            }\n\n            return geneKeyToSourceCollection;\n        }\n\n        private sealed class BuilderMetadata\n        {\n            public int                        NumHits;\n            public GeneFusionSourceCollection SourceCollection;\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/GERP/GerpMain.cs",
    "content": "using System.IO;\nusing CommandLine.Builders;\nusing CommandLine.NDesk.Options;\nusing Compression.Utilities;\nusing ErrorHandling;\nusing IO;\nusing SAUtils.GenericScore;\nusing SAUtils.GenericScore.GenericScoreParser;\nusing SAUtils.InputFileParsers;\nusing VariantAnnotation.GenericScore;\nusing VariantAnnotation.Providers;\nusing VariantAnnotation.SA;\n\nnamespace SAUtils.GERP\n{\n    public class GerpMain\n    {\n        private static string _inputFile;\n        private static string _compressedReference;\n        private static string _outputDirectory;\n\n        public static ExitCodes Run(string command, string[] commandArgs)\n        {\n            var ops = new OptionSet\n            {\n                {\n                    \"ref|r=\",\n                    \"compressed reference sequence file\",\n                    v => _compressedReference = v\n                },\n                {\n                    \"in|i=\",\n                    \"input file path\",\n                    v => _inputFile = v\n                },\n                {\n                    \"out|o=\",\n                    \"output directory\",\n                    v => _outputDirectory = v\n                }\n            };\n\n            string commandLineExample = $\"{command} [options]\";\n\n            var exitCode = new ConsoleAppBuilder(commandArgs, ops)\n                .Parse()\n                .CheckInputFilenameExists(_compressedReference, \"compressed reference sequence file name\", \"--ref\")\n                .HasRequiredParameter(_inputFile, \"GERP wiggle or TSV file\", \"--in\")\n                .CheckInputFilenameExists(_inputFile, \"GERP wiggle or TSV file\", \"--in\")\n                .HasRequiredParameter(_outputDirectory, \"output directory\", \"--out\")\n                .CheckDirectoryExists(_outputDirectory, \"output directory\", \"--out\")\n                .SkipBanner()\n                .ShowHelpMenu(\"create Ancestral allele database from 1000Genomes data\", commandLineExample)\n                .ShowErrors()\n                .Execute(ProgramExecution);\n\n            return exitCode;\n        }\n\n        private static ExitCodes ProgramExecution()\n        {\n            var               referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));\n            DataSourceVersion version           = DataSourceVersionReader.GetSourceVersion(_inputFile + \".version\");\n            var               outFileName       = $\"{version.Name}_{version.Version}\";\n\n            var nucleotides = new[] {\"N\"};\n\n            var  wigColumnIndex = new ColumnIndex(0, 2, null, null, 3, null);\n            var  tsvColumnIndex = new ColumnIndex(0, 1, null, null, 2, null);\n            bool isWig          = _inputFile.EndsWith(\"wig.gz\");\n\n            var parserSettings = new ParserSettings(\n                isWig ? wigColumnIndex : tsvColumnIndex,\n                nucleotides,\n                GenericScoreParser.NonConflictingScore\n            );\n\n            var writerSettings = new WriterSettings(\n                1_000_000,\n                nucleotides,\n                true,\n                EncoderType.Generic,\n                new GenericScoreEncoder(),\n                new ScoreJsonEncoder(SaCommon.GerpTag + SaCommon.Score, null),\n                new SaItemValidator(null, null)\n            );\n\n\n            using (var streamReader = new StreamReader(GZipUtilities.GetAppropriateReadStream(_inputFile)))\n            using (var parser = new GenericScoreParser(parserSettings, streamReader, referenceProvider.RefNameToChromosome))\n            using (var saStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.GsaFileSuffix)))\n            using (var indexStream =\n                   FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.GsaFileSuffix + SaCommon.IndexSuffix)))\n            using (var saWriter = new ScoreFileWriter(writerSettings, saStream, indexStream, version, referenceProvider,\n                       SaCommon.SchemaVersion, skipIncorrectRefEntries: true, leaveOpen: false))\n            {\n                saWriter.Write(parser.GetItems());\n            }\n\n            return ExitCodes.Success;\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/GeneIdentifiers/GeneSymbolUpdater.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.Linq;\nusing System.Text;\nusing Newtonsoft.Json.Linq;\nusing OptimizedCore;\nusing VariantAnnotation.IO;\n\nnamespace SAUtils.GeneIdentifiers\n{\n    public sealed class GeneSymbolUpdater\n    {\n        private readonly Dictionary<string, string> _entrezGeneIdToSymbol;\n        private readonly Dictionary<string, string> _ensemblGeneIdToSymbol;\n        private readonly HashSet<string> _geneSymbols;\n\n        private readonly Dictionary<string, string> _updatedGeneSymbols;\n\n        private int _numGenesWhereBothIdsAreNull;\n        private int _numGeneSymbolsUpToDate;\n        private int _numGeneSymbolsUpdated;\n        private int _numGeneSymbolsNotInCache;\n        private int _numResolvedGeneSymbolConflicts;\n        private int _numUnresolvedGeneSymbolConflicts;\n\n        public GeneSymbolUpdater(Dictionary<string, string> entrezGeneIdToSymbol, Dictionary<string, string> ensemblGeneIdToSymbol)\n        {\n            _entrezGeneIdToSymbol = entrezGeneIdToSymbol;\n            _ensemblGeneIdToSymbol = ensemblGeneIdToSymbol;\n            _geneSymbols = new HashSet<string>();\n            _updatedGeneSymbols = new Dictionary<string, string>();\n        }\n\n        public string UpdateGeneSymbol(string oldGeneSymbol, string ensemblGeneId, string entrezGeneId)\n        {\n            if (ensemblGeneId == null && entrezGeneId == null)\n            {\n                _numGenesWhereBothIdsAreNull++;\n                return null;\n            }\n            var ensemblSymbol = GetSymbol(ensemblGeneId, _ensemblGeneIdToSymbol);\n            var entrezGeneSymbol = GetSymbol(entrezGeneId, _entrezGeneIdToSymbol);\n            _geneSymbols.Clear();\n            if (ensemblSymbol != null) _geneSymbols.Add(ensemblSymbol);\n            if (entrezGeneSymbol != null) _geneSymbols.Add(entrezGeneSymbol);\n\n            if (_geneSymbols.Count == 0)\n            {\n                _numGeneSymbolsNotInCache++;\n                return oldGeneSymbol;\n            }\n\n            var newGeneSymbol = _geneSymbols.First();\n\n            if (_geneSymbols.Count > 1)\n            {\n                newGeneSymbol = ResolveGeneSymbolConflict(oldGeneSymbol, ensemblSymbol, entrezGeneSymbol);\n                if (newGeneSymbol == null)\n                {\n                    Console.WriteLine($\"Unable to resolve gene symbol conflict for {oldGeneSymbol}: Ensembl: [{ensemblGeneId}]: {ensemblSymbol}, Entrez Gene: [{entrezGeneId}]: {entrezGeneSymbol}\");\n                    _numUnresolvedGeneSymbolConflicts++;\n                    return null;\n                }\n                _numResolvedGeneSymbolConflicts++;\n            }\n\n            if (newGeneSymbol == oldGeneSymbol) _numGeneSymbolsUpToDate++;\n            else\n            {\n                _updatedGeneSymbols[oldGeneSymbol] = newGeneSymbol;\n                _numGeneSymbolsUpdated++;\n            }\n\n            return newGeneSymbol;\n        }\n\n        private static string ResolveGeneSymbolConflict(string oldGeneSymbol, string ensemblSymbol, string entrezGeneSymbol)\n        {\n            var symbolCounts = new Dictionary<string, int>();\n            AddSymbol(symbolCounts, oldGeneSymbol);\n            AddSymbol(symbolCounts, ensemblSymbol);\n            AddSymbol(symbolCounts, entrezGeneSymbol);\n\n            var mostFrequentSymbol = symbolCounts.OrderByDescending(x => x.Value).First();\n            if (mostFrequentSymbol.Value == 1)\n            {\n                //Console.WriteLine($\"Found unique gene symbols when trying to resolve the gene symbol conflict. Entrez Gene {entrezGeneSymbol}\");\n                return null;\n            }\n\n            return mostFrequentSymbol.Key;\n        }\n\n        private static void AddSymbol(Dictionary<string, int> symbolCounts, string geneSymbol)\n        {\n            if (symbolCounts.TryGetValue(geneSymbol, out int counts)) symbolCounts[geneSymbol] = counts + 1;\n            else symbolCounts[geneSymbol] = 1;\n        }\n\n        private static string GetSymbol(string geneId, IReadOnlyDictionary<string, string> geneIdToSymbol)\n        {\n            if (geneId == null) return null;\n            return geneIdToSymbol.TryGetValue(geneId, out var symbol) ? symbol : null;\n        }\n\n        public void DisplayStatistics()\n        {\n            Console.ForegroundColor = ConsoleColor.Yellow;\n            Console.WriteLine(\"Gene Symbol Update Statistics\");\n            Console.ResetColor();\n            Console.WriteLine(\"============================================\");\n\n            StringBuilder sb = StringBuilderPool.Get();\n            var jo = new JsonObject(sb);\n            sb.Append(JsonObject.OpenBrace);\n\n            jo.AddIntValue(\"NumGeneSymbolsUpToDate\",           _numGeneSymbolsUpToDate);\n            jo.AddIntValue(\"NumGeneSymbolsUpdated\",            _numGeneSymbolsUpdated);\n            jo.AddIntValue(\"NumGenesWhereBothIdsAreNull\",      _numGenesWhereBothIdsAreNull);\n            jo.AddIntValue(\"NumGeneSymbolsNotInCache\",         _numGeneSymbolsNotInCache);\n            jo.AddIntValue(\"NumResolvedGeneSymbolConflicts\",   _numResolvedGeneSymbolConflicts);\n            jo.AddIntValue(\"NumUnresolvedGeneSymbolConflicts\", _numUnresolvedGeneSymbolConflicts);\n\n            sb.Append(JsonObject.CloseBrace);\n\n            Console.WriteLine(JObject.Parse(sb.ToString())); //pretty printing json\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/GeneIdentifiers/GeneUtilities.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing CacheUtils.Genes.DataStructures;\r\nusing CacheUtils.Genes.IO;\r\nusing Compression.Utilities;\r\nusing Genome;\r\nusing ReferenceSequence.Utilities;\r\n\r\nnamespace SAUtils.GeneIdentifiers\r\n{\r\n    public static class GeneUtilities\r\n    {\r\n        public static string GetGeneSymbolFromId(string geneId, Dictionary<string, string> entrezGeneIdToSymbol, Dictionary<string, string> ensemblIdToSymbol)\r\n        {\r\n            string geneSymbol;\r\n\r\n            if (geneId.StartsWith(\"ENSG\")) return ensemblIdToSymbol.TryGetValue(geneId, out geneSymbol) ? geneSymbol : null;\r\n            \r\n            return entrezGeneIdToSymbol.TryGetValue(geneId, out geneSymbol) ? geneSymbol : null; \r\n        }\r\n\r\n        public static (Dictionary<string, string> EntrezGeneIdToSymbol, Dictionary<string, string> EnsemblIdToSymbol) ParseUniversalGeneArchive(string inputReferencePath, string universalGeneArchivePath)\r\n        {\r\n\r\n            Dictionary<string, Chromosome> refNameToChromosome;\r\n            if (inputReferencePath == null) refNameToChromosome = null;\r\n            else (_, refNameToChromosome, _) = SequenceHelper.GetDictionaries(inputReferencePath);\r\n\r\n            UgaGene[] genes;\r\n\r\n            using (var reader = new UgaGeneReader(GZipUtilities.GetAppropriateReadStream(universalGeneArchivePath),\r\n                refNameToChromosome))\r\n            {\r\n                genes = reader.GetGenes();\r\n            }\r\n\r\n            var entrezGeneIdToSymbol = genes.GetGeneIdToSymbol(x => x.EntrezGeneId);\r\n            var ensemblIdToSymbol = genes.GetGeneIdToSymbol(x => x.EnsemblId);\r\n            return (entrezGeneIdToSymbol, ensemblIdToSymbol);\r\n        }\r\n\r\n        private static Dictionary<string, string> GetGeneIdToSymbol(this IEnumerable<UgaGene> genes,\r\n            Func<UgaGene, string> geneIdFunc)\r\n        {\r\n            var dict = new Dictionary<string, string>();\r\n            foreach (var gene in genes)\r\n            {\r\n                var key = geneIdFunc(gene);\r\n                var symbol = gene.Symbol;\r\n                if (string.IsNullOrEmpty(key) || string.IsNullOrEmpty(symbol)) continue;\r\n                dict[key] = symbol;\r\n            }\r\n            return dict;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "SAUtils/GenericScore/GenericScoreParser/GenericScoreItem.cs",
    "content": "using System;\nusing Genome;\nusing VariantAnnotation.Interface.SA;\n\nnamespace SAUtils.GenericScore.GenericScoreParser\n{\n    public sealed class GenericScoreItem : ISupplementaryDataItem\n    {\n        public Chromosome Chromosome { get; }\n        public int         Position   { get; set; }\n        public string      RefAllele  { get; set; }\n        public string      AltAllele  { get; set; }\n\n        public readonly double Score;\n\n        public GenericScoreItem(Chromosome chromosome, int position, string refAllele, string altAllele, double score)\n        {\n            Chromosome = chromosome;\n            Position   = position;\n            RefAllele  = refAllele;\n            AltAllele  = altAllele;\n            Score      = score;\n        }\n\n        [Obsolete]\n        public string GetJsonString() => $\"\\\"score\\\":{Score}\";\n\n        public string InputLine { get; }\n    }\n}"
  },
  {
    "path": "SAUtils/GenericScore/GenericScoreParser/GenericScoreParser.cs",
    "content": "using System;\nusing System.Collections.Generic;\nusing System.Globalization;\nusing System.IO;\nusing ErrorHandling.Exceptions;\nusing Genome;\nusing OptimizedCore;\n\nnamespace SAUtils.GenericScore.GenericScoreParser\n{\n    public sealed class GenericScoreParser : IDisposable\n    {\n        private readonly ParserSettings _parserSettings;\n\n        private readonly StreamReader                     _reader;\n        private readonly Dictionary<string, Chromosome> _refNameToChromosome;\n        private readonly Dictionary<string, double>      _representativeScores;\n\n        private readonly Action<string, double, Dictionary<string, double>> _updateRepresentativeScores;\n\n        public GenericScoreParser(\n            ParserSettings parserSettings,\n            StreamReader reader,\n            Dictionary<string, Chromosome> refNameToChromosome\n        )\n        {\n            _reader               = reader;\n            _refNameToChromosome  = refNameToChromosome;\n            _parserSettings       = parserSettings;\n            _representativeScores = new Dictionary<string, double>();\n            foreach (string allele in _parserSettings.PossibleAlleles)\n            {\n                _representativeScores[allele] = double.NaN;\n            }\n\n            _updateRepresentativeScores = _parserSettings.ConflictResolutionFunction;\n        }\n\n        public IEnumerable<GenericScoreItem> GetItems()\n        {\n            string      line;\n            int         currentPosition   = -1;\n            Chromosome currentChromosome = null;\n            string      refAllele         = null;\n\n            ColumnIndex columnIndex = _parserSettings.ColumnIndex;\n\n            while ((line = _reader.ReadLine()) != null)\n            {\n                if (line.StartsWith(\"#\")) continue;\n\n                string[] fields = line.OptimizedSplit('\\t');\n\n                if (!_refNameToChromosome.TryGetValue(fields[columnIndex.Chromosome], out var chromosome)) continue;\n                int position = int.Parse(fields[columnIndex.Position]);\n\n                if (chromosome != currentChromosome || position != currentPosition)\n                {\n                    foreach (GenericScoreItem scoreItem in GetItemsAtOnePosition(currentChromosome, currentPosition, refAllele))\n                        yield return scoreItem;\n                }\n\n                currentChromosome = chromosome;\n                currentPosition   = position;\n\n                // add null checks for alleles\n                refAllele = columnIndex.RefAllele == null ? null : fields[columnIndex.RefAllele.Value];\n                string altAllele = columnIndex.AltAllele == null ? null : fields[columnIndex.AltAllele.Value];\n\n                // set saItem.AltAllele to 'N' if positional\n                if (_parserSettings.IsPositional) altAllele = \"N\";\n\n                if (double.TryParse(fields[columnIndex.Score], NumberStyles.Number | NumberStyles.AllowExponent, CultureInfo.InvariantCulture, out double score))\n                {\n                    _updateRepresentativeScores(altAllele, score, _representativeScores);\n                }\n            }\n\n            foreach (var scoreItem in GetItemsAtOnePosition(currentChromosome, currentPosition, refAllele))\n                yield return scoreItem;\n        }\n\n        private IEnumerable<GenericScoreItem> GetItemsAtOnePosition(Chromosome currentChromosome, int currentPosition, string refAllele)\n        {\n            if (currentChromosome == null) yield break;\n            foreach (string altAllele in _parserSettings.PossibleAlleles)\n            {\n                double score = _representativeScores[altAllele];\n                if (double.IsNaN(score)) continue;\n                yield return new GenericScoreItem(currentChromosome, currentPosition, refAllele, altAllele, score);\n                _representativeScores[altAllele] = double.NaN;\n            }\n        }\n\n        public static void MaxRepresentativeScores(string altAllele, double score, Dictionary<string, double> highestScores)\n        {\n            if (double.IsNaN(highestScores[altAllele]) || highestScores[altAllele] < score)\n                highestScores[altAllele] = score;\n        }\n\n        public static void NonConflictingScore(string altAllele, double score, Dictionary<string, double> highestScores)\n        {\n            if (!double.IsNaN(highestScores[altAllele]))\n                throw new UserErrorException(\"Multiple scores oberved.\");\n\n            highestScores[altAllele] = score;\n        }\n\n        public static void MinRepresentativeScores(string altAllele, double score, Dictionary<string, double> highestScores)\n        {\n            if (double.IsNaN(highestScores[altAllele]) || highestScores[altAllele] > score)\n                highestScores[altAllele] = score;\n        }\n\n        public void Dispose()\n        {\n            _reader?.Dispose();\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/GenericScore/GenericScoreParser/SaItemValidator.cs",
    "content": "using System.IO;\nusing System.Linq;\nusing VariantAnnotation.Interface.Providers;\n\nnamespace SAUtils.GenericScore.GenericScoreParser\n{\n    public sealed class SaItemValidator\n    {\n        private readonly bool? _strictSnvCheck;\n        private readonly bool? _strictReferenceCheck;\n        \n        \n        /// <summary>\n        /// Performs checks on each saItem, will throw exception if strict checking is enabled, otherwise returns true/false\n        /// Setting strict checking to null disables all checks and true is always returned\n        /// </summary>\n        /// <param name=\"strictSnvCheck\"></param> Set to null to disable, if true, then exception will be thrown\n        /// <param name=\"strictReferenceCheck\"></param> Set to null to disable, if true, then exception will be thrown\n        public SaItemValidator(bool? strictSnvCheck, bool? strictReferenceCheck)\n        {\n            _strictSnvCheck       = strictSnvCheck;\n            _strictReferenceCheck = strictReferenceCheck;\n        }\n\n        public bool Validate(GenericScoreItem saItem, ISequenceProvider refProvider)\n        {\n            return CheckSnv(saItem) && CheckReference(saItem, refProvider);\n        }\n\n        private bool CheckReference(GenericScoreItem saItem, ISequenceProvider refProvider)\n        {\n            if (_strictReferenceCheck == null)\n                return true;\n\n            bool   hasParRegions = CheckParRegion(saItem, refProvider);\n            string refSequence   = refProvider.Sequence.Substring(saItem.Position - 1, saItem.RefAllele.Length);\n\n            if (string.IsNullOrEmpty(saItem.RefAllele) || saItem.RefAllele == refSequence || hasParRegions)\n                return true;\n\n            if (_strictReferenceCheck == false)\n                return false;\n\n            throw new InvalidDataException(\n                $\"The provided reference allele {saItem.RefAllele} at {saItem.Chromosome.UcscName}:{saItem.Position} is different from {refSequence} in the reference genome sequence.\" +\n                $\"\\nInput Line:\\n {saItem.InputLine}\");\n        }\n\n        private bool CheckParRegion(GenericScoreItem saItem, ISequenceProvider refProvider)\n        {\n            return RegionUtilities.OverlapsParRegion(saItem, refProvider.Assembly)\n                   && !string.IsNullOrEmpty(saItem.RefAllele)\n                   && saItem.RefAllele.All(x => x is 'N' or 'n');\n        }\n\n        private bool CheckSnv(GenericScoreItem saItem)\n        {\n            if (_strictSnvCheck == null)\n                return true;\n\n            if (saItem.RefAllele.Length == 1 && saItem.AltAllele.Length == 1)\n                return true;\n\n            if (_strictSnvCheck == false)\n                return false;\n\n            throw new InvalidDataException($\"Only SNV is expected in the input file. Exception found: {saItem.Chromosome}:{saItem.Position}\");\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/GenericScore/ParserSettings.cs",
    "content": "using System;\nusing System.Collections.Generic;\n\nnamespace SAUtils.GenericScore\n{\n    public sealed class ParserSettings\n    {\n        public readonly ColumnIndex ColumnIndex;\n        public readonly string[]    PossibleAlleles;\n        public          bool        IsPositional => ColumnIndex.AltAllele == null;\n\n        public readonly Action<string, double, Dictionary<string, double>> ConflictResolutionFunction;\n\n        public ParserSettings(\n            ColumnIndex columnIndex,\n            string[] possibleAlleles,\n            Action<string, double, Dictionary<string, double>> conflictResolutionFunction\n        )\n        {\n            ColumnIndex                = columnIndex;\n            PossibleAlleles            = possibleAlleles;\n            ConflictResolutionFunction = conflictResolutionFunction;\n        }\n    }\n\n    public sealed class ColumnIndex\n    {\n        public readonly ushort  Chromosome;\n        public readonly ushort  Position;\n        public readonly ushort? RefAllele;\n        public readonly ushort? AltAllele;\n        public readonly ushort  Score;\n        public readonly ushort  Others;\n\n        public ColumnIndex(\n            ushort chromosome,\n            ushort position,\n            ushort? refAllele,\n            ushort? altAllele,\n            ushort score,\n            ushort? others\n        )\n        {\n            Chromosome = chromosome;\n            Position   = position;\n            RefAllele  = refAllele;\n            AltAllele  = altAllele;\n            Score      = score;\n            Others     = others ?? ushort.MaxValue;\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/GenericScore/ScoreFileWriter.cs",
    "content": "using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing Compression.Algorithms;\nusing ErrorHandling.Exceptions;\nusing IO;\nusing IO.v2;\nusing SAUtils.GenericScore.GenericScoreParser;\nusing VariantAnnotation.GenericScore;\nusing VariantAnnotation.Interface.Providers;\nusing VariantAnnotation.SA;\n\n\nnamespace SAUtils.GenericScore\n{\n    public sealed class ScoreFileWriter : IDisposable\n    {\n        private readonly ExtendedBinaryWriter _writer;\n        private readonly ExtendedBinaryWriter _indexWriter;\n        private readonly ScoreIndex           _index;\n        private readonly ScoreBlock           _block;\n        private readonly WriterSettings       _writerSettings;\n        private readonly ISequenceProvider    _refProvider;\n\n        private readonly bool _leaveOpen;\n        private readonly bool _skipIncorrectRefEntries;\n\n        public ScoreFileWriter(\n            WriterSettings writerSettings,\n            Stream stream,\n            Stream indexStream,\n            IDataSourceVersion version,\n            ISequenceProvider refProvider,\n            int schemaVersion,\n            bool skipIncorrectRefEntries = true,\n            bool leaveOpen = false)\n        {\n            _leaveOpen               = leaveOpen;\n            _skipIncorrectRefEntries = skipIncorrectRefEntries;\n            _refProvider             = refProvider;\n            _writerSettings          = writerSettings;\n\n            var readerSettings = new ReaderSettings(\n                _writerSettings.IsPositional,\n                _writerSettings.EncoderType,\n                _writerSettings.ScoreEncoder,\n                _writerSettings.ScoreJsonEncoder,\n                _writerSettings.Nucleotides,\n                _writerSettings.BlockLength\n            );\n\n            _writer      = new ExtendedBinaryWriter(stream,      System.Text.Encoding.Default, _leaveOpen);\n            _indexWriter = new ExtendedBinaryWriter(indexStream, System.Text.Encoding.Default, _leaveOpen);\n\n            _index = new ScoreIndex(\n                _indexWriter,\n                readerSettings,\n                _refProvider.Assembly,\n                version,\n                schemaVersion,\n                _writerSettings.IndexHeader,\n                _writerSettings.FilePairId\n            );\n            _block = new ScoreBlock(\n                new Zstandard(),\n                _index.GetBlockLength()\n            );\n        }\n\n        private long FilePosition => _writer.BaseStream.Position;\n\n        private void WriteHeader()\n        {\n            _writerSettings.Header.Write(_writer);\n            _writer.WriteOpt(_writerSettings.FilePairId);\n            _writer.Write(SaCommon.GuardInt);\n        }\n\n        public void Write(IEnumerable<GenericScoreItem> saItems)\n        {\n            WriteHeader();\n\n            uint nucleotideSize  = _index.GetNucleotideCount() * _writerSettings.ScoreEncoder.BytesRequired;\n            var  nucleotideArray = new byte[nucleotideSize];\n            Array.Fill(nucleotideArray, byte.MaxValue);\n\n            var  chromosomeIndex            = ushort.MaxValue;\n            int  chromosomeStartingPosition = -1;\n            int  previousPosition           = -1;\n            uint blockNumber                = 0;\n            uint localBlockIndex            = 0;\n\n            foreach (GenericScoreItem saItem in saItems)\n            {\n                if (chromosomeStartingPosition < 0 && previousPosition < 0)\n                {\n                    (chromosomeIndex, chromosomeStartingPosition) = AddNewChromosome(saItem);\n                    previousPosition                              = chromosomeStartingPosition;\n                }\n\n                if (!_writerSettings.SaItemValidator.Validate(saItem, _refProvider))\n                {\n                    _index.TrackUnmatchedReferencePositions();\n                    continue;\n                }\n\n                int previousBlockNumber = _index.GetLastBlockNumber(chromosomeIndex);\n\n                int    position     = saItem.Position;\n                byte[] encodedScore = _writerSettings.ScoreEncoder.EncodeToBytes(saItem.Score);\n\n                // Still on the same chromosome and postion, hence just fill the nucleotide array only\n                if (chromosomeIndex == saItem.Chromosome.Index && position == previousPosition)\n                {\n                    // Write 4 {A,C,T,G} score values to nucleotide array\n                    AddEncodedScoreToNucleotideArray(nucleotideArray, saItem.AltAllele, encodedScore);\n                    continue;\n                }\n\n                (blockNumber, localBlockIndex) = PositionToBlockLocation(previousPosition, chromosomeStartingPosition);\n\n                // Handle empty blocks by skipping them and adding them to index\n                if (blockNumber - previousBlockNumber > 1)\n                {\n                    // Finalize previous memory buffer before writing empty blocks (creats an additional block)\n                    WriteToDiskAndUpdateIndex(chromosomeIndex);\n\n                    // write blockNumber - previousBlockNumber - 2 blank blocks and write them to disk\n                    int blankBlockCount = (int) blockNumber - previousBlockNumber - 2;\n                    WriteBlankBlocks(chromosomeIndex, blankBlockCount);\n                }\n\n                // Add nucleotide array to memory at appropriate index\n                _block.Add(localBlockIndex, nucleotideArray, nucleotideSize);\n\n                // writeout if memory buffer is full\n                if (_block.IsFull())\n                {\n                    WriteToDiskAndUpdateIndex(chromosomeIndex);\n                }\n\n                Array.Fill(nucleotideArray, byte.MaxValue);\n                AddEncodedScoreToNucleotideArray(nucleotideArray, saItem.AltAllele, encodedScore);\n\n                // A new chromosome\n                if (chromosomeIndex != saItem.Chromosome.Index)\n                {\n                    WriteToDiskAndUpdateIndex(chromosomeIndex);\n                    (chromosomeIndex, chromosomeStartingPosition) = AddNewChromosome(saItem);\n                }\n\n                previousPosition = position;\n            }\n\n            // Writeout the partial block at the end\n            (_, localBlockIndex) = PositionToBlockLocation(previousPosition, chromosomeStartingPosition);\n            _block.Add(localBlockIndex, nucleotideArray, nucleotideSize);\n            WriteToDiskAndUpdateIndex(chromosomeIndex);\n\n            _writer.Write(Header.NirvanaFooter);\n\n            //Write Index to disk\n            _index.Write();\n        }\n\n        private void AddEncodedScoreToNucleotideArray(byte[] nucleotideArray, string allele, byte[] encodedScore)\n        {\n            ushort? nucleotidePosition = _index.GetNucleotidePosition(allele);\n            if (nucleotidePosition == null) return;\n\n            Array.Copy(\n                encodedScore,\n                0,\n                nucleotideArray,\n                (ushort) nucleotidePosition,\n                encodedScore.Length\n            );\n        }\n\n        private (ushort chromosomeIndex, int chromosomeStartingPosition) AddNewChromosome(GenericScoreItem saItem)\n        {\n            ushort chromosomeIndex            = saItem.Chromosome.Index;\n            int    chromosomeStartingPosition = saItem.Position;\n            _refProvider.LoadChromosome(saItem.Chromosome);\n            _index.AddChromosomeBlock(chromosomeIndex, chromosomeStartingPosition);\n            return (chromosomeIndex, chromosomeStartingPosition);\n        }\n\n        private void WriteBlankBlocks(ushort chromosomeIndex, int blankBlockCount)\n        {\n            for (var i = 0; i < blankBlockCount; i++)\n            {\n                AddBlockToIndex(chromosomeIndex, -1, 0, 0);\n            }\n        }\n\n        /// <summary>\n        /// Write the memory buffer to disk,\n        /// Add the block to index\n        /// Clear out the memory buffer\n        /// </summary>\n        /// <param name=\"chromosomeIndex\"></param>\n        private void WriteToDiskAndUpdateIndex(ushort chromosomeIndex)\n        {\n            long filePosition = FilePosition;\n            (uint uncompressedSize, int compressedSize) = _block.Write(_writer);\n            AddBlockToIndex(chromosomeIndex, filePosition, compressedSize, uncompressedSize);\n        }\n\n        private void AddBlockToIndex(ushort chromosomeIndex, long fileStartingPosition, int compressedSize, uint uncompressedSize)\n        {\n            _index.Add(chromosomeIndex, fileStartingPosition, compressedSize, uncompressedSize);\n        }\n\n        private (uint blockNumber, uint localBlockIndex) PositionToBlockLocation(int position, int startingPosition)\n        {\n            // Position is less than start position\n            if (position < startingPosition) throw new UserErrorException(\"The Positions are not in order\");\n            return ((uint blockNumber, uint localBlockIndex)) _index.PositionToBlockLocation(position, startingPosition);\n        }\n\n        public void Dispose()\n        {\n            if (_leaveOpen) return;\n            _writer?.Dispose();\n            _indexWriter?.Dispose();\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/GenericScore/WriterSettings.cs",
    "content": "using System;\nusing IO.v2;\nusing SAUtils.GenericScore.GenericScoreParser;\nusing VariantAnnotation.GenericScore;\n\nnamespace SAUtils.GenericScore\n{\n    public sealed class WriterSettings\n    {\n        public readonly Header Header      = new(FileType.GsaWriter, 1);\n        public readonly Header IndexHeader = new(FileType.GsaIndex, 1);\n        public readonly int    FilePairId  = new Random().Next(1_000_000, int.MaxValue);\n\n        public readonly bool             IsPositional;\n        public readonly EncoderType      EncoderType;\n        public readonly IScoreEncoder    ScoreEncoder;\n        public readonly SaItemValidator  SaItemValidator;\n        public readonly string[]         Nucleotides;\n        public readonly int              BlockLength;\n        public readonly ScoreJsonEncoder ScoreJsonEncoder;\n\n        public WriterSettings(\n            int blockLength,\n            string[] nucleotides,\n            bool isPositional,\n            EncoderType encoderType,\n            IScoreEncoder scoreEncoder,\n            ScoreJsonEncoder scoreJsonEncoder,\n            SaItemValidator saItemValidator\n        )\n        {\n            BlockLength      = blockLength;\n            Nucleotides      = nucleotides;\n            IsPositional     = isPositional;\n            EncoderType      = encoderType;\n            ScoreEncoder     = scoreEncoder;\n            ScoreJsonEncoder = scoreJsonEncoder;\n            SaItemValidator  = saItemValidator;\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/GnomadGeneScores/GnomadGeneItem.cs",
    "content": "﻿using System;\nusing OptimizedCore;\nusing VariantAnnotation.Interface.SA;\nusing VariantAnnotation.IO;\n\nnamespace SAUtils.GnomadGeneScores\n{\n    public sealed class GnomadGeneItem : ISuppGeneItem, IComparable<GnomadGeneItem>\n    {\n        public string GeneSymbol { get; }\n        private readonly double? _pLI;\n        private readonly double? _pRec;\n        private readonly double? _pNull;\n        private readonly double? _synZ;\n        private readonly double? _misZ;\n        private readonly double? _loeuf;\n\n\n        public GnomadGeneItem(string gene, double? pLi, double? pRec, double? pNull, double? synZ, double? misZ, double? loeuf)\n        {\n            GeneSymbol = gene;\n            _pLI       = pLi;\n            _pRec      = pRec;\n            _pNull     = pNull;\n            _synZ      = synZ;\n            _misZ      = misZ;\n            _loeuf     = loeuf;\n        }\n\n\n        public string GetJsonString()\n        {\n            var sb = StringBuilderPool.Get();\n            var jsonObject = new JsonObject(sb);\n\n            sb.Append(JsonObject.OpenBrace);\n            jsonObject.AddDoubleValue(\"pLi\", _pLI, \"0.00e0\");\n            jsonObject.AddDoubleValue(\"pRec\", _pRec, \"0.00e0\");\n            jsonObject.AddDoubleValue(\"pNull\", _pNull, \"0.00e0\");\n            jsonObject.AddDoubleValue(\"synZ\", _synZ, \"0.00e0\");\n            jsonObject.AddDoubleValue(\"misZ\", _misZ, \"0.00e0\");\n            jsonObject.AddDoubleValue(\"loeuf\", _loeuf, \"0.00e0\");\n            sb.Append(JsonObject.CloseBrace);\n\n            return StringBuilderPool.GetStringAndReturn(sb);\n\n        }\n\n        public int CompareTo(GnomadGeneItem other)\n        {\n            if (_loeuf == other._loeuf)\n            {\n                //pick entry with lowest pLI value\n                if (_pLI == other._pLI)\n                {\n                    //pick the entry with the max absolute value of synZ + misZ\n                    var abs1 = Math.Abs(_synZ ?? 0 + _misZ ?? 0);\n                    var abs2 = Math.Abs(other._synZ ?? 0 + other._misZ ?? 0);\n\n                    return abs2.CompareTo(abs1);// inverse compare since we want the greater value to be taken\n                }\n\n                if (_pLI == null) return 1;\n                if (other._pLI == null) return -1;\n\n                return _pLI.Value.CompareTo(other._pLI.Value);\n            }\n            if (_loeuf == null) return 1;\n            if (other._loeuf == null) return -1;\n\n            return _loeuf.Value.CompareTo(other._loeuf.Value);\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/GnomadGeneScores/GnomadGeneParser.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing OptimizedCore;\nusing VariantAnnotation.Interface.SA;\n\nnamespace SAUtils.GnomadGeneScores\n{\n    public sealed class GnomadGeneParser : IDisposable\n    {\n        private readonly StreamReader _reader;\n\n        private const string GeneTag       = \"gene\";\n        private const string GeneIdTag     = \"gene_id\";\n        private const string PliTag        = \"pLI\";\n        private const string PrecTag       = \"pRec\";\n        private const string PnullTag      = \"pNull\";\n        private const string SynZTag       = \"syn_z\";\n        private const string MisZTag       = \"mis_z\";\n        private const string LoeufTag      = \"oe_lof_upper\";\n\n        private int _geneIndex   = -1;\n        private int _geneIdIndex = -1;\n        private int _pliIndex    = -1;\n        private int _precIndex   = -1;\n        private int _pnullIndex  = -1;\n        private int _synZIndex   = -1;\n        private int _misZIndex   = -1;\n        private int _loeufIndex  = -1;\n\n        private readonly Dictionary<string, string> _geneIdToSymbols;\n        public GnomadGeneParser(StreamReader reader, Dictionary<string, string> geneIdToSymbols)\n        {\n            _reader = reader;\n            _geneIdToSymbols = geneIdToSymbols;\n        }\n\n        public void Dispose()\n        {\n            _reader?.Dispose();\n        }\n\n        public Dictionary<string, List<ISuppGeneItem>> GetItems()\n        {\n            bool isFirstLine = true;\n\n            var geneAnnotations = new Dictionary<string, List<ISuppGeneItem>>();\n            string line;\n            while ((line = _reader.ReadLine()) != null)\n            {\n                if (isFirstLine)\n                {\n                    if (!GetColumnIndices(line)) return null;\n                    isFirstLine = false;\n                }\n                else\n                {\n                    var geneAnnotation = GetGeneAndScores(line);\n                    if(geneAnnotation == null) continue;\n                    if (geneAnnotations.TryAdd(geneAnnotation.GeneSymbol, new List<ISuppGeneItem> {geneAnnotation}))\n                        continue;\n\n                    var existingEntry = (GnomadGeneItem) geneAnnotations[geneAnnotation.GeneSymbol][0];\n                    var newEntry = (GnomadGeneItem) geneAnnotation;\n                    // in case of a conflict we keep the item with minimal loeuf\n                    if (existingEntry.CompareTo(newEntry) > 0)\n                        geneAnnotations[geneAnnotation.GeneSymbol][0] = geneAnnotation;\n                    \n                }\n\n            }\n            return geneAnnotations;\n\n        }\n\n        private ISuppGeneItem GetGeneAndScores(string line)\n        {\n            var cols = line.OptimizedSplit('\\t');\n            var geneId = cols[_geneIdIndex];\n            if (!_geneIdToSymbols.TryGetValue(geneId, out var gene))\n            {\n                gene = cols[_geneIndex];\n                Console.WriteLine($\"GeneId to symbol not found in cache for: {geneId}, using provided name in file: {gene}\");\n            }\n\n            var pLi   = GetScore(cols[_pliIndex]);\n            var pRec  = GetScore(cols[_precIndex]);\n            var pNull = GetScore(cols[_pnullIndex]);\n            var synZ  = GetScore(cols[_synZIndex]);\n            var misZ  = GetScore(cols[_misZIndex]);\n            var loeuf = GetScore(cols[_loeufIndex]);\n\n            return new GnomadGeneItem(gene, pLi, pRec, pNull, synZ, misZ, loeuf);\n        }\n\n        private static double? GetScore(string score)\n        {\n            if (score == \"NA\" || score == \"NaN\") return null;\n            return double.Parse(score);\n        }\n        \n        private bool GetColumnIndices(string line)\n        {\n            var cols = line.OptimizedSplit('\\t');\n\n            _geneIndex   = Array.IndexOf(cols, GeneTag);\n            _geneIdIndex = Array.IndexOf(cols, GeneIdTag);\n            _pliIndex    = Array.IndexOf(cols, PliTag);\n            _pnullIndex  = Array.IndexOf(cols, PnullTag);\n            _precIndex   = Array.IndexOf(cols, PrecTag);\n            _synZIndex   = Array.IndexOf(cols, SynZTag);\n            _misZIndex   = Array.IndexOf(cols, MisZTag);\n            _loeufIndex  = Array.IndexOf(cols, LoeufTag);\n\n            if (_geneIdIndex < 0)\n            {\n                Console.WriteLine(\"gene column not found\");\n                return false;\n            }\n            if (_pliIndex < 0)\n            {\n                Console.WriteLine(\"pLI column not found\");\n                return false;\n            }\n            if (_precIndex < 0)\n            {\n                Console.WriteLine(\"pRec column not found\");\n                return false;\n            }\n            if (_pnullIndex < 0)\n            {\n                Console.WriteLine(\"pNull column not found\");\n                return false;\n            }\n            if (_synZIndex < 0)\n            {\n                Console.WriteLine(\"synZ column not found\");\n                return false;\n            }\n            if (_misZIndex < 0)\n            {\n                Console.WriteLine(\"misZ column not found\");\n                return false;\n            }\n            if (_loeufIndex < 0)\n            {\n                Console.WriteLine(\"loeuf column not found\");\n                return false;\n            }\n\n            return true;\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/GnomadGeneScores/GnomadGenesMain.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing CommandLine.Builders;\nusing CommandLine.NDesk.Options;\nusing Compression.Utilities;\nusing ErrorHandling;\nusing IO;\nusing SAUtils.InputFileParsers;\nusing VariantAnnotation.IO.Caches;\nusing VariantAnnotation.Providers;\nusing VariantAnnotation.SA;\n\nnamespace SAUtils.GnomadGeneScores\n{\n    public static class GnomadGenesMain\n    {\n        private static string _outputDirectory;\n        private static string _inputFile;\n        private static string _cachePrefix;\n        private static string _referenceSequncePath;\n\n        public static ExitCodes Run(string command, string[] commandArgs)\n        {\n            var ops = new OptionSet\n            {\n                {\n                    \"cache|c=\",\n                    \"Cache prefix\",\n                    v => _cachePrefix = v\n                },\n                {\n                    \"ref|r=\",\n                    \"Reference sequence path\",\n                    v => _referenceSequncePath = v\n                },\n                {\n                    \"in|i=\",\n                    \"input tsv file\",\n                    v => _inputFile = v\n                },\n                {\n                    \"out|o=\",\n                    \"output directory\",\n                    v => _outputDirectory = v\n                }\n            };\n\n            string commandLineExample = $\"{command} [options]\";\n\n            var exitCode = new ConsoleAppBuilder(commandArgs, ops)\n                .Parse()\n                .HasRequiredParameter(_outputDirectory, \"output directory\", \"--out\")\n                .CheckDirectoryExists(_outputDirectory, \"output directory\", \"--out\")\n                .HasRequiredParameter(_cachePrefix, \"transcript cache prefix\", \"--cache\")\n                .CheckInputFilenameExists(CacheConstants.TranscriptPath(_cachePrefix), \"transcript cache prefix\", \"--cache\")\n                .HasRequiredParameter(_referenceSequncePath, \"reference sequence path\", \"--ref\")\n                .CheckInputFilenameExists(_referenceSequncePath, \"reference sequence path\", \"--ref\")\n                .CheckInputFilenameExists(_inputFile, \"input TSV file\", \"--in\")\n                .SkipBanner()\n                .ShowHelpMenu(\"Creates a gene annotation database from gnomAD data\", commandLineExample)\n                .ShowErrors()\n                .Execute(ProgramExecution);\n\n            return exitCode;\n        }\n\n        private static ExitCodes ProgramExecution()\n        {\n            Dictionary<string, string> geneIdToSymbols;\n            using (var cacheStream = FileUtilities.GetReadStream(CacheConstants.TranscriptPath(_cachePrefix)))\n            using (var transcriptCacheReader = new TranscriptCacheReader(cacheStream))\n            using (var refProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_referenceSequncePath)))\n            {\n                geneIdToSymbols = LoadGenesFromCache(refProvider, transcriptCacheReader);\n                Console.WriteLine($\"Loaded {geneIdToSymbols.Count} gene symbols from cache.\");\n            }\n\n            var version = DataSourceVersionReader.GetSourceVersion(_inputFile + \".version\");\n            var outFileName = $\"{version.Name}_{version.Version}\";\n\n            using (var gnomadGeneParser = new GnomadGeneParser(GZipUtilities.GetAppropriateStreamReader(_inputFile), geneIdToSymbols))\n            using (var stream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.GeneFileSuffix)))\n            using (var ngaWriter = new NgaWriter(stream, version, SaCommon.GnomadGeneScoreTag, SaCommon.SchemaVersion, false))\n            {\n                ngaWriter.Write(gnomadGeneParser.GetItems());\n            }\n\n            return ExitCodes.Success;\n        }\n\n        private static Dictionary<string, string> LoadGenesFromCache(ReferenceSequenceProvider refProvider, TranscriptCacheReader cacheReader)\n        {\n            var transcriptData = cacheReader.Read(refProvider.RefIndexToChromosome);\n\n            var geneIdToSymbols = new Dictionary<string, string>(transcriptData.Genes.Length);\n            foreach (var gene in transcriptData.Genes)\n            {\n                var geneId = gene.EnsemblId.WithoutVersion;\n                //if(geneId == \"ENSG00000272962\" || geneId == \"ENSG00000198743\")\n                //    Console.WriteLine(\"bug\");\n                if (string.IsNullOrEmpty(geneId)) continue;\n\n                if (! geneIdToSymbols.TryAdd(geneId, gene.Symbol))\n                {\n                    if(geneIdToSymbols[geneId] != gene.Symbol)\n                        throw new DataMisalignedException($\"Multiple symbols found for {geneId}\");\n                }\n            }\n\n            return geneIdToSymbols;\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/InputFileParsers/AncestralAlleleReader.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing System.Linq;\nusing Genome;\nusing OptimizedCore;\nusing SAUtils.DataStructures;\nusing VariantAnnotation.Interface.IO;\nusing VariantAnnotation.Interface.Providers;\nusing Variants;\n\nnamespace SAUtils.InputFileParsers\n{\n    public sealed class AncestralAlleleReader:IDisposable\n    {\n        private readonly StreamReader _streamReader;\n        private readonly Dictionary<string, Chromosome> _refNameDictionary;\n        private readonly ISequenceProvider _sequenceProvider;\n\n        private string _ancestralAllele;\n\n        public AncestralAlleleReader(StreamReader streamReader, ISequenceProvider sequenceProvider)\n        {\n            _streamReader = streamReader;\n            _sequenceProvider = sequenceProvider;\n            _refNameDictionary = sequenceProvider.RefNameToChromosome;\n        }\n\n        private void Clear()\n        {\n            _ancestralAllele = null;\n        }\n\n        public IEnumerable<AncestralAlleleItem> GetItems()\n        {\n            using (_streamReader)\n            {\n                string line;\n                while ((line = _streamReader.ReadLine()) != null)\n                {\n                    // Skip empty lines.\n                    if (string.IsNullOrWhiteSpace(line)) continue;\n                    // Skip comments.\n                    if (line.OptimizedStartsWith('#')) continue;\n                    var itemsList = ExtractItems(line);\n                    if (itemsList == null) continue;\n                    foreach (var aaItem in itemsList)\n                    {\n                        yield return aaItem;\n                    }\n\n                }\n            }\n        }\n\n        private List<AncestralAlleleItem> ExtractItems(string vcfLine)\n        {\n            var splitLine = vcfLine.Split(new[] { '\\t' }, 9);// we don't care about the many fields after info field\n            if (splitLine.Length < 8) return null;\n\n            Clear();\n\n            var chromosomeName = splitLine[VcfCommon.ChromIndex];\n            if (!_refNameDictionary.ContainsKey(chromosomeName)) return null;\n            var chromosome = _refNameDictionary[chromosomeName];\n            var position = int.Parse(splitLine[VcfCommon.PosIndex]);//we have to get it from RSPOS in info\n            var refAllele = splitLine[VcfCommon.RefIndex];\n            var altAlleles = splitLine[VcfCommon.AltIndex].OptimizedSplit(',');\n            var infoFields = splitLine[VcfCommon.InfoIndex];\n\n            // parses the info fields and extract frequencies, ancestral allele, allele counts, etc.\n            var hasSymbolicAllele = altAlleles.Any(x => x.OptimizedStartsWith('<') && x.OptimizedEndsWith('>'));\n            if (hasSymbolicAllele) return null;\n\n            // ReSharper disable once ConditionIsAlwaysTrueOrFalse\n            ParseInfoField(infoFields);\n\n            var ancestralAlleleItems = new List<AncestralAlleleItem>();\n\n            foreach (string altAllele in altAlleles)\n            {\n                var (shiftedPos, shiftedRef, shiftedAlt) = VariantUtils.TrimAndLeftAlign(position, refAllele,\n                    altAllele, _sequenceProvider.Sequence);\n                ancestralAlleleItems.Add(new AncestralAlleleItem(chromosome, shiftedPos, shiftedRef, shiftedAlt, _ancestralAllele, vcfLine));\n            }\n\n            return ancestralAlleleItems;\n        }\n\n        private void ParseInfoField(string infoFields)\n        {\n            if (infoFields == \"\" || infoFields == \".\") return;\n            var infoItems = infoFields.OptimizedSplit(';');\n\n            foreach (string infoItem in infoItems)\n            {\n                (string key, string value) = infoItem.OptimizedKeyValue();\n\n                if (key != \"AA\") continue;\n                _ancestralAllele = GetAncestralAllele(value);\n                break;\n            }\n        }\n\n        private static string GetAncestralAllele(string value)\n        {\n            if (value == \"\" || value == \".\") return null;\n\n            var ancestralAllele = value.OptimizedSplit('|')[0];\n            if (string.IsNullOrEmpty(ancestralAllele)) return null;\n            return ancestralAllele.All(IsNucleotide) ? ancestralAllele : null;\n        }\n        private static bool IsNucleotide(char c)\n        {\n            c = char.ToUpper(c);\n            return c == 'A' || c == 'C' || c == 'G' || c == 'T' || c == 'N';\n        }\n\n        public void Dispose()\n        {\n            _streamReader?.Dispose();\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/InputFileParsers/ClinGen/ClinGenReader.cs",
    "content": "using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing Genome;\nusing OptimizedCore;\nusing SAUtils.DataStructures;\nusing Variants;\n\nnamespace SAUtils.InputFileParsers.ClinGen\n{\n    public sealed class ClinGenReader : IDisposable\n    {\n        #region members\n\n        private readonly StreamReader _reader;\n        private readonly Dictionary<string, Chromosome> _refNameDict;\n\n        #endregion\n        \n        public ClinGenReader(StreamReader reader, Dictionary<string, Chromosome> refNameDict)\n        {\n            _reader = reader;\n            _refNameDict = refNameDict;\n        }\n\n        public IEnumerable<ClinGenItem> GetItems()\n        {\n            using (var reader = _reader)\n            {\n                string line;\n                while ((line = reader.ReadLine()) != null)\n                {\n                    if (IsClinGenHeader(line)) continue;\n\n                    var cols         = line.OptimizedSplit('\\t');\n                    string id        = cols[0];\n                    string ucscChrom = cols[1];\n                    if(!_refNameDict.ContainsKey(ucscChrom)) continue;\n\n                    var chrom              = _refNameDict[ucscChrom];\n                    int start              = int.Parse(cols[2]);\n                    int end                = int.Parse(cols[3]);\n                    int observedGains      = int.Parse(cols[4]);\n                    int observedLosses     = int.Parse(cols[5]);\n                    var variantType        = GetVariantType(cols[6]);\n                    var clinInterpretation = GetClinInterpretation(cols[7]);\n                    bool validated         = cols[8].Equals(\"True\");\n                    var phenotypes         = cols[9] == \"\" ? null : new HashSet<string>(cols[9].OptimizedSplit(','));\n                    var phenotypeIds       = cols[10] == \"\" ? null : new HashSet<string>(cols[10].OptimizedSplit(','));\n\n                    var currentItem = new ClinGenItem(id, chrom, start, end, variantType, observedGains, observedLosses,\n                        clinInterpretation, validated, phenotypes, phenotypeIds);\n                    yield return currentItem;\n                }\n            }\n        }\n\n        private static VariantType GetVariantType(string variantTypeDescription)\n        {\n            switch (variantTypeDescription)\n            {\n                case \"copy_number_gain\":\n                    return VariantType.copy_number_gain;\n                case \"copy_number_loss\":\n                    return VariantType.copy_number_loss;\n                case \"copy_number_variation\":\n                    return VariantType.copy_number_variation;\n                default:\n                    return VariantType.unknown;\n            }\n        }\n\n        private static ClinicalInterpretation GetClinInterpretation(string s)\n        {\n            switch (s)\n            {\n                case \"pathogenic\":\n                    return ClinicalInterpretation.pathogenic;\n                case \"benign\":\n                    return ClinicalInterpretation.benign;\n                case \"likely_pathogenic\":\n                    return ClinicalInterpretation.likely_pathogenic;\n                case \"likely_benign\":\n                    return ClinicalInterpretation.likely_benign;\n                case \"uncertain_significance\":\n                    return ClinicalInterpretation.uncertain_significance;\n                default:\n                    return ClinicalInterpretation.unknown;\n            }\n        }\n\n        private static bool IsClinGenHeader(string line)\n        {\n            return line.OptimizedStartsWith('#');\n        }\n\n        public void Dispose()\n        {\n            _reader?.Dispose();\n        }\n    }\n}\n"
  },
  {
    "path": "SAUtils/InputFileParsers/ClinVar/ClinVarCommon.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.Linq;\n\nnamespace SAUtils.InputFileParsers.ClinVar\n{\n    public static class ClinVarCommon\n    {\n        public static string NormalizeAllele(string allele)\n        {\n            if (string.IsNullOrEmpty(allele)) return \"-\";\n            return allele == \"N\" ? null : allele;\n        }\n        \n        public static readonly HashSet<string> ValidPathogenicity = new HashSet<string>\n        {\n            \"uncertain significance\",\n            \"not provided\",\n            \"benign\",\n            \"likely benign\",\n            \"likely pathogenic\",\n            \"pathogenic\",\n            \"drug response\",\n            \"histocompatibility\",\n            \"association\",\n            \"risk factor\",\n            \"protective\",\n            \"affects\",\n            \"conflicting data from submitters\",\n            \"other\",\n            \"association not found\",\n            \"confers sensitivity\",\n            \"no interpretation for the single variant\",// observed in VCV XML only\n            \n            \"conflicting interpretations of pathogenicity\", // observed in VCV XML only\n            \"established risk allele\", // observed in VCV XML only\n            \"likely risk allele\"                            // observed in VCV XML only\n        };\n        public enum ReviewStatus\n        {\n            // ReSharper disable InconsistentNaming\n            no_assertion,\n            no_criteria,\n            single_submitter,\n            multiple_submitters,\n            multiple_submitters_no_conflict,\n            conflicting_interpretations,\n            expert_panel,\n            practice_guideline,\n            no_interpretation_single\n            // ReSharper restore InconsistentNaming\n        }\n        public static readonly Dictionary<string, ReviewStatus> ReviewStatusNameMapping = new Dictionary<string, ReviewStatus>\n        {\n            [\"no_assertion\"] = ReviewStatus.no_assertion,\n            [\"no_criteria\"]  = ReviewStatus.no_criteria,\n            [\"guideline\"]    = ReviewStatus.practice_guideline,\n            [\"single\"]       = ReviewStatus.single_submitter,\n            [\"mult\"]         = ReviewStatus.multiple_submitters,\n            [\"conf\"]         = ReviewStatus.conflicting_interpretations,\n            [\"exp\"]          = ReviewStatus.expert_panel,\n            // the following are the long forms found in XML\n            [\"no assertion provided\"]                                = ReviewStatus.no_assertion,\n            [\"no assertion criteria provided\"]                       = ReviewStatus.no_criteria,\n            [\"practice guideline\"]                                   = ReviewStatus.practice_guideline,\n            [\"criteria provided, conflicting interpretations\"]       = ReviewStatus.conflicting_interpretations,\n            [\"reviewed by expert panel\"]                             = ReviewStatus.expert_panel,\n            [\"classified by multiple submitters\"]                    = ReviewStatus.multiple_submitters,\n            [\"criteria provided, multiple submitters, no conflicts\"] = ReviewStatus.multiple_submitters_no_conflict,\n            [\"criteria provided, single submitter\"]                  = ReviewStatus.single_submitter,\n            [\"no interpretation for the single variant\"]  = ReviewStatus.no_interpretation_single\n        };\n\n        public static readonly Dictionary<ReviewStatus, string> ReviewStatusStrings = new Dictionary<ReviewStatus, string>\n        {\n            [ReviewStatus.no_criteria]                     = \"no assertion criteria provided\",\n            [ReviewStatus.no_assertion]                    = \"no assertion provided\",\n            [ReviewStatus.expert_panel]                    = \"reviewed by expert panel\",\n            [ReviewStatus.single_submitter]                = \"criteria provided, single submitter\",\n            [ReviewStatus.practice_guideline]              = \"practice guideline\",\n            [ReviewStatus.multiple_submitters]             = \"classified by multiple submitters\",\n            [ReviewStatus.conflicting_interpretations]     = \"criteria provided, conflicting interpretations\",\n            [ReviewStatus.multiple_submitters_no_conflict] = \"criteria provided, multiple submitters, no conflicts\",\n            [ReviewStatus.no_interpretation_single]        = \"no interpretation for the single variant\"\n        };\n        \n        public static string[] GetSignificances(string description, string explanation)\n        {\n            if(string.IsNullOrEmpty(explanation)) return description?.ToLower().Split('/', ',', ';').Select(x=>x.Trim()).ToArray();\n            //<Explanation DataSource=\"ClinVar\" Type=\"public\">Pathogenic(1);Uncertain significance(1)</Explanation>\n            var significances =new List<string>();\n            foreach (var significance in explanation.ToLower().Split('/',';'))\n            {\n                var openParenthesisIndex = significance.IndexOf('(');\n                significances.Add(openParenthesisIndex < 0 ? significance.Trim() : significance.Substring(0, openParenthesisIndex).Trim());\n            }\n\n            return significances.ToArray();\n        }\n\n    }\n}"
  },
  {
    "path": "SAUtils/InputFileParsers/ClinVar/ClinVarParser.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing System.Linq;\nusing System.Xml;\nusing System.Xml.Linq;\nusing Genome;\nusing Newtonsoft.Json.Linq;\nusing SAUtils.CreateClinvarDb;\nusing SAUtils.DataStructures;\nusing SAUtils.Schema;\nusing VariantAnnotation.Interface.Providers;\nusing VariantAnnotation.Interface.SA;\nusing Variants;\nusing Vcf.VariantCreator;\n\nnamespace SAUtils.InputFileParsers.ClinVar\n{\n    public sealed class ClinVarParser :IDisposable\n    {\n        #region members\n\n        private const string RefAssertionTag     = \"ReferenceClinVarAssertion\";\n        private const string ClinVarAssertionTag = \"ClinVarAssertion\";\n        private const string ReviewStatusTag     = \"ReviewStatus\";\n        private const string DescriptionTag      = \"Description\";\n        private const string ExplanationTag      = \"Explanation\";\n        private const int    MaxVariantLength    = 1000;\n\n        private readonly Dictionary<char, char[]> _iupacBases = new Dictionary<char, char[]>\n        {\n            ['R'] = new[] { 'A', 'G' },\n            ['Y'] = new[] { 'C', 'T' },\n            ['S'] = new[] { 'G', 'C' },\n            ['W'] = new[] { 'A', 'T' },\n            ['K'] = new[] { 'G', 'T' },\n            ['M'] = new[] { 'A', 'C' },\n            ['B'] = new[] { 'C', 'G', 'T' },\n            ['D'] = new[] { 'A', 'G', 'T' },\n            ['H'] = new[] { 'A', 'C', 'T' },\n            ['V'] = new[] { 'A', 'C', 'G' }\n        };\n\n        \n        private readonly Stream _rcvStream;\n        private readonly Stream _vcvStream;\n\t\tprivate readonly ISequenceProvider _sequenceProvider;\n        private readonly Dictionary<string, Chromosome> _refChromDict;\n\n        private string _lastClinvarAccession;\n        #endregion\n\n        \n        #region clinVarItem fields\n\n        private readonly List<ClinvarVariant> _variantList= new List<ClinvarVariant>();\n\t\tprivate HashSet<string> _alleleOrigins;\n\t\tprivate string _reviewStatus;\n\t\tprivate string _id;\n\t\tprivate HashSet<string> _prefPhenotypes;\n\t\tprivate HashSet<string> _altPhenotypes;\n\t\tprivate string[] _significances;\n\n\t\tprivate HashSet<string> _medGenIDs;\n\t\tprivate HashSet<string> _omimIDs;\n        private HashSet<string> _allilicOmimIDs;\n\t\tprivate HashSet<string> _orphanetIDs;\n\n        private HashSet<long> _pubMedIds = new HashSet<long>();\n\t\tprivate long          _lastUpdatedDate;\n\t\tprivate List<VcvItem> _vcvItems;\n\n        public SaJsonSchema JsonSchema { get; } = ClinVarSchema.Get();\n\n        #endregion\n\n        private void ClearClinvarFields()\n\t\t{\n\t\t\t_variantList.Clear();\n\t\t\t_reviewStatus      = null;\n\t\t\t_alleleOrigins     = new HashSet<string>();\n\t\t\t_significances      = null;\n\t\t\t_prefPhenotypes    = new HashSet<string>();\n\t\t\t_altPhenotypes     = new HashSet<string>();\n\t\t\t_id                = null;\n\t\t\t_medGenIDs         = new HashSet<string>();\n\t\t\t_omimIDs           = new HashSet<string>();\n            _allilicOmimIDs    = new HashSet<string>();\n            _orphanetIDs       = new HashSet<string>();\n\t\t\t_pubMedIds         = new HashSet<long>();//we need a new pubmed hash since otherwise, pubmedid hashes of different items interfere. \n\t\t\t_lastUpdatedDate   = long.MinValue;\n\t\t}\n\n\t\t// constructor\n        public ClinVarParser(Stream rcvStream, Stream vcvStream, ISequenceProvider sequenceProvider)\n        {\n\t        _rcvStream        = rcvStream;\n\t        _vcvStream        = vcvStream;\n\t        _sequenceProvider = sequenceProvider;\n\t        _refChromDict     = sequenceProvider?.RefNameToChromosome;\n        }\n\n        private const string ClinVarSetTag = \"ClinVarSet\";\n\n        public IEnumerable<ISupplementaryDataItem> GetItems()\n        {\n\t        _vcvItems = GetVariationRecords();\n\t        Console.WriteLine($\"Found {_vcvItems.Count} VCV records\");\n\t        \n\t        var unknownVcvs = new HashSet<int>();\n\t        var vcvSaItems  = new HashSet<VcvSaItem >();\n\t        var rcvItems    = GetRcvItems();\n\t        \n\t        foreach (var clinVarItem in rcvItems)\n\t        {\n\t\t        var vcvId = int.Parse(clinVarItem.VariationId);\n\t\t        var vcvIndex = SuppDataUtilities.BinarySearch(_vcvItems, vcvId);\n\t\t        \n\t\t        if (vcvIndex < 0)\n\t\t        {\n\t\t\t        Console.WriteLine($\"Unknown vcv id:{vcvId} found in {clinVarItem.Id}\");\n\t\t\t        unknownVcvs.Add(vcvId);\n\t\t\t        //remove the VariationId\n\t\t\t        clinVarItem.VariationId = null;\n\t\t\t        continue;\n\t\t        }\n\n\t\t        var vcvItem = _vcvItems[vcvIndex];\n\t\t        vcvSaItems.Add(new VcvSaItem(clinVarItem.Chromosome, clinVarItem.Position, clinVarItem.RefAllele, clinVarItem.AltAllele,\n\t\t\t        vcvItem.Accession, vcvItem.Version, vcvItem.LastUpdatedDate, vcvItem.ReviewStatus, vcvItem.Significances));\n\n\t\t        clinVarItem.VariationId = $\"{vcvItem.Accession}.{vcvItem.Version}\";\n\t        }\n\n\t        var allItems = new List<IClinVarSaItem>(rcvItems);\n\t        allItems.AddRange(vcvSaItems);\n\t        allItems.Sort();\n\t        ReportStatistics(allItems);\n\n\t        Console.WriteLine($\"{unknownVcvs.Count} unknown VCVs found in RCVs.\");\n\t        Console.WriteLine($\"{string.Join(',', unknownVcvs)}\");\n\n\t        return allItems;\n        }\n\n        private void ReportStatistics(List<IClinVarSaItem> items)\n        {\n\t        Console.WriteLine($\"{_invalidRefAlleleCount} entries were skipped due to invalid ref allele.\");\n\t        Console.WriteLine($\"{_aluCount} ALU entries found.\");\n\t        Console.WriteLine($\"{_microsatelliteCount} Microsatellite entries found.\");\n\t        Console.WriteLine($\"{_variationCount} Variation entries found.\");\n\n\t        var stats = new ClinVarStats();\n\t        stats.GetClinvarSaItemsStats(items);\n\n\t        var jo = JObject.Parse(stats.ToString());\n\t        Console.WriteLine(jo);//pretty printing json\n        }\n\n        \n        public List<ClinVarItem> GetRcvItems()\n        {\n\t        var clinVarItems = new List<ClinVarItem>();\n\n            using (var reader = new StreamReader(_rcvStream))\n\t\t\tusing (var xmlReader = XmlReader.Create(reader, new XmlReaderSettings { DtdProcessing = DtdProcessing.Prohibit, IgnoreWhitespace = true}))\n\t\t\t{\n\t\t\t\t//skipping the top level element to go down to its elements\n\t\t\t    xmlReader.ReadToDescendant(ClinVarSetTag);\n\n                do\n\t\t\t\t{\n\t\t\t\t\tvar subTreeReader = xmlReader.ReadSubtree();\n\t\t\t\t    var xElement = XElement.Load(subTreeReader);\n\t\t\t\t    List<ClinVarItem> extractedItems;\n\t\t\t\t    try\n\t\t\t\t    {\n\t\t\t\t        extractedItems = ExtractClinVarItems(xElement);\n                    }\n                    catch (Exception )\n\t\t\t\t    {\n\t\t\t\t        Console.WriteLine($\"Last clinVar accession observed {_lastClinvarAccession}\");\n\t\t\t\t        throw;\n\t\t\t\t    }\n\t\t\t\t    if (extractedItems == null) continue;\n\t\t\t\t    clinVarItems.AddRange(extractedItems);\n\n                } while (xmlReader.ReadToNextSibling(ClinVarSetTag));\n\t\t\t}\n\t\t    clinVarItems.Sort();\n\n\t\t    var validItems = GetValidItems(clinVarItems);\n\n\t\t    return validItems.Distinct().ToList();\n\n        }\n\n        private List<VcvItem> GetVariationRecords()\n        {\n\t        using var reader = new ClinVarVariationReader(_vcvStream);\n\t        var items= new List<VcvItem>(reader.GetItems());\n\t        items.Sort();\n\t        return items;\n        }\n\n        private int _invalidRefAlleleCount = 0;\n        private List<ClinVarItem> GetValidItems(List<ClinVarItem> clinVarItems)\n        {\n            var shiftedItems = new List<ClinVarItem>();\n            foreach (var item in clinVarItems)\n            {\n                _sequenceProvider.LoadChromosome(item.Chromosome);\n\n                if (!ValidateRefAllele(item))\n                {\n\t                _invalidRefAlleleCount++;\n\t                continue;\n                }\n\n                string refAllele= item.RefAllele, altAllele= item.AltAllele;\n                if (string.IsNullOrEmpty(item.RefAllele) && item.VariantType == \"Deletion\")\n                    refAllele = GetReferenceAllele(item, _sequenceProvider.Sequence);\n\n                if (string.IsNullOrEmpty(item.RefAllele) && item.VariantType == \"Indel\" && !string.IsNullOrEmpty(item.AltAllele))\n                    refAllele = GetReferenceAllele(item, _sequenceProvider.Sequence);\n\n                if (string.IsNullOrEmpty(item.AltAllele) && item.VariantType == \"Duplication\")\n                    altAllele = GetAltAllele(item, _sequenceProvider.Sequence);\n\n                if (string.IsNullOrEmpty(refAllele) && string.IsNullOrEmpty(altAllele)) continue;\n\n                int start;\n                (start, refAllele, altAllele) = VariantUtils.TrimAndLeftAlign(item.Position, refAllele, altAllele, _sequenceProvider.Sequence);\n                \n                shiftedItems.Add(new ClinVarItem(item.Chromosome,\n                    start,\n                    item.Stop,\n                    refAllele,\n                    altAllele,\n                    item.JsonSchema,\n                    item.AlleleOrigins, \n                    item.VariantType, \n                    item.Id,item.VariationId, \n                    item.ReviewStatus, \n                    item.MedGenIds, \n                    item.OmimIds, \n                    item.OrphanetIds, \n                    item.Phenotypes, \n                    item.Significances, \n                    item.PubmedIds, \n                    item.LastUpdatedDate));\n            }\n\n            shiftedItems.Sort();\n            return shiftedItems;\n        }\n\n\n        \n\n        private List<ClinVarItem> ExtractClinVarItems(XElement xElement)\n\t\t{\n            ClearClinvarFields();\n\n\t\t\tif (xElement == null || xElement.IsEmpty) return null;\n\n\t\t\tParseAssertions(xElement);\n\n\t\t    var clinvarList = new List<ClinVarItem>();\n\n            foreach (var variant in _variantList)\n            {\n                if (IsInvalidVariant(variant)) continue;\n\n                var extendedOmimIds = GetOmimIds(variant);\n\n                var reviewStatEnum = ClinVarCommon.ReviewStatus.no_assertion;\n                if (ClinVarCommon.ReviewStatusNameMapping.ContainsKey(_reviewStatus))\n                    reviewStatEnum = ClinVarCommon.ReviewStatusNameMapping[_reviewStatus];\n\n                clinvarList.Add(\n                    new ClinVarItem(variant.Chromosome,\n                        variant.Start,\n                        variant.Stop,\n                        variant.RefAllele??\"\",// alleles cannot be null\n                        variant.AltAllele??\"\",\n                        JsonSchema,\n                        _alleleOrigins.Count > 0 ? _alleleOrigins : null,\n                        variant.VariantType,\n                        _id,\n                        variant.VariantId,\n                        reviewStatEnum,\n                        _medGenIDs.Count > 0 ? _medGenIDs : null,\n                        extendedOmimIds.Count > 0 ? extendedOmimIds : null,\n                        _orphanetIDs.Count > 0 ? _orphanetIDs : null,\n                        _prefPhenotypes.Count > 0 ? _prefPhenotypes : _altPhenotypes,\n                        _significances,\n                        _pubMedIds.Count > 0 ? _pubMedIds.OrderBy(x => x) : null, \n                        _lastUpdatedDate));\n            }\n\n\t\t\treturn clinvarList.Count > 0 ? clinvarList: null;\n\t\t}\n\n        \n        \n        private HashSet<string> GetOmimIds(ClinvarVariant variant)\n        {\n            var extendedOmimIds = new HashSet<string>(_omimIDs);\n\n            foreach (var omimId in variant.AllelicOmimIds)\n            {\n                extendedOmimIds.Add(omimId);\n            }\n\n            return extendedOmimIds;\n        }\n\n        private void ParseAssertions(XElement xElement)\n        {\n            foreach (var element in xElement.Elements(RefAssertionTag))\n                ParseRefClinVarAssertion(element);\n\n            foreach (var element in xElement.Elements(ClinVarAssertionTag))\n                ParseClinvarAssertion(element);\n        }\n\n        private int _aluCount            = 0;\n        private int _microsatelliteCount = 0;\n        private int _variationCount      = 0;\n        private bool IsInvalidVariant(ClinvarVariant variant)\n        {\n\t        switch (variant.VariantType)\n\t        {\n\t\t        case \"ALU\":\n\t\t\t        _aluCount++;\n\t\t\t        break;\n\t\t        case \"Microsatellite\":\n\t\t\t        _microsatelliteCount++;\n\t\t\t        break;\n\t\t        case \"Variation\":\n\t\t\t        _variationCount++;\n\t\t\t        break;\n\t        }\n            if (variant.VariantType == \"ALU\") return true;\n            return variant.Chromosome == null\n                   || (variant.VariantType == \"Microsatellite\" || variant.VariantType == \"Variation\" )\n                   && string.IsNullOrEmpty(variant.AltAllele);\n        }\n\n        private bool ValidateRefAllele(ClinVarItem clinvarVariant)\n\t    {\n\t        if (string.IsNullOrEmpty(clinvarVariant.RefAllele) || clinvarVariant.RefAllele == \"-\") return true;\n\n\t\t    string refAllele = clinvarVariant.RefAllele;\n\t\t    if (string.IsNullOrEmpty(refAllele)) return true;\n\n\t        int refLength = clinvarVariant.Stop - clinvarVariant.Position + 1;\n\t        return refLength == refAllele.Length && _sequenceProvider.Sequence.Validate(clinvarVariant.Position, clinvarVariant.Stop, refAllele);\n        }\n\n        private static string GetReferenceAllele(ClinVarItem variant, ISequence compressedSequence)\n        {\n            return variant == null ? null : compressedSequence.Substring(variant.Position - 1, variant.Stop - variant.Position + 1);\n        }\n\n        private static string GetAltAllele(ClinVarItem variant, ISequence compressedSequence)\n        {\n            return variant == null ? null : compressedSequence.Substring(variant.Position - 1, variant.Stop - variant.Position + 1);\n        }\n\n        internal static long ParseDate(string s)\n\t\t{\n\t\t\tif (string.IsNullOrEmpty(s) || s == \"-\") return long.MinValue;\n\t\t\t//Jun 29, 2010\n\t\t\treturn DateTime.Parse(s).Ticks;\n\t\t}\n\n        private const string UpdateDateTag           = \"DateLastUpdated\";\n        private const string AccessionTag            = \"Acc\";\n        private const string VersionTag              = \"Version\";\n        private const string ClinVarAccessionTag     = \"ClinVarAccession\";\n        private const string ClinicalSignificanceTag = \"ClinicalSignificance\";\n        private const string MeasureSetTag           = \"MeasureSet\";\n        private const string TraitSetTag             = \"TraitSet\";\n        private const string ObservedInTag           = \"ObservedIn\";\n        private const string SampleTag               = \"Sample\";\n\n        private void ParseRefClinVarAssertion(XElement xElement)\n\t\t{\n\t\t\tif (xElement==null || xElement.IsEmpty) return;\n\t\t\t//<ReferenceClinVarAssertion DateCreated=\"2013-10-28\" DateLastUpdated=\"2016-04-20\" ID=\"182406\">\n            _lastUpdatedDate      = ParseDate(xElement.Attribute(UpdateDateTag)?.Value);\n\t\t    _lastClinvarAccession = xElement.Element(ClinVarAccessionTag)?.Attribute(AccessionTag)?.Value;\n            _id                   =  _lastClinvarAccession + \".\" + xElement.Element(ClinVarAccessionTag)?.Attribute(VersionTag)?.Value;\n            \n            GetClinicalSignificance(xElement.Element(ClinicalSignificanceTag));\n            ParseGenotypeSet(xElement.Element(GenotypeSetTag));\n\t\t    ParseMeasureSet(xElement.Element(MeasureSetTag));\n\t\t    ParseTraitSet(xElement.Element(TraitSetTag));\n\t\t}\n\n        private const string CitationTag = \"Citation\";\n        private const string OriginTag = \"Origin\";\n\n        private void ParseClinvarAssertion(XElement xElement)\n\t\t{\n\t\t    if (xElement == null || xElement.IsEmpty) return;\n\n            foreach (var element in xElement.Descendants(CitationTag))\n\t\t\t\tParseCitation(element);\n\n\t\t    foreach (var element in xElement.Elements(ObservedInTag))\n                ParseObservedIn(element);\n\n        }\n\n        private void ParseObservedIn(XElement xElement)\n        {\n            var samples = xElement?.Elements(SampleTag);\n            if (samples == null) return;\n\n            foreach (var sample in samples)\n            {\n                foreach (var origin in sample.Elements(OriginTag))\n                    _alleleOrigins.Add(origin.Value);\n            }\n        }\n\n        private const string TraitTag = \"Trait\";\n\n        private void ParseTraitSet(XElement xElement)\n\t\t{\n\t\t\tif (xElement == null || xElement.IsEmpty) return;\n\n\t\t\tforeach (var element in xElement.Elements(TraitTag))\n\t\t\t    ParseTrait(element);\n\t\t}\n\n        private const string XrefTag = \"XRef\";\n        private const string NameTag = \"Name\";\n\t\tprivate void ParseTrait(XElement xElement)\n\t\t{\n\t\t\tif (xElement == null || xElement.IsEmpty) return;\n\n\t\t    foreach (var element in xElement.Elements(XrefTag))\n\t\t        ParseXref(element);\n\n\t\t    foreach (var element in xElement.Elements(NameTag))\n                ParsePnenotype(element);\n\t\t}\n\n        private const string ElementValueTag = \"ElementValue\";\n        private void ParsePnenotype(XElement xElement)\n\t\t{\n\t\t\tif (xElement == null || xElement.IsEmpty) return;\n\n\t        ParsePhenotypeElementValue(xElement.Element(ElementValueTag));\n\t\t}\n\n        private const string TypeTag = \"Type\";\n\n        private void ParsePhenotypeElementValue(XElement xElement)\n\t\t{\n\t\t    var phenotype = xElement.Attribute(TypeTag);\n\t\t    if (phenotype == null) return;\n\n\t\t    if (phenotype.Value == \"Preferred\")\n\t\t    {\n\t\t        _prefPhenotypes.Add(xElement.Value);\n\t\t    }\n\t\t    else if (phenotype.Value == \"Alternate\")\n\t\t    {\n\t\t        _altPhenotypes.Add(xElement.Value);\n\t\t    }\n\t\t}\n\n\n        private const string DbTag = \"DB\";\n        private const string IdTag = \"ID\";\n        private void ParseXref(XElement xElement)\n        {\n            var db = xElement.Attribute(DbTag);\n\n            if (db == null) return;\n\n\t\t\tstring id = xElement.Attribute(IdTag)?.Value.Trim(' '); // Trimming is necessary here, don't turn it off.\n\n\t\t\tswitch (db.Value)\n\t\t\t{\n\t\t\t\tcase \"MedGen\":\n\t\t\t\t\t_medGenIDs.Add(id);\n\t\t\t\t\tbreak;\n\t\t\t\tcase \"Orphanet\":\n\t\t\t\t\t_orphanetIDs.Add(id);\n\t\t\t\t\tbreak;\n\t\t\t\tcase \"OMIM\":\n\t\t\t\t    var type = xElement.Attribute(TypeTag);\n\t\t\t\t\tif (type !=null)\n\t\t\t\t\t    if (type.Value == \"Allelic variant\" )\n                            _allilicOmimIDs.Add(TrimOmimId(id));\n                        else\n                            _omimIDs.Add(TrimOmimId(id));\n\t\t\t\t\tbreak;\n\t\t\t}\n\t\t}\n\n        \n        private static string TrimOmimId(string id)\n\t    {\n\t\t    return id.TrimStart('P','S');\n\t    }\n\n        private const string SourceTag = \"Source\";\n        private const string PubmedIdTag = \"PubMed\";\n\n        private void ParseCitation(XElement xElement)\n\t\t{\n\t\t\tif (xElement == null || xElement.IsEmpty) return;\n\n\t\t\t\n\t\t\tforeach (var element in xElement.Elements(IdTag))\n\t\t\t{\n\t\t\t    var source = element.Attribute(SourceTag);\n\t\t\t    if (source == null) continue;\n\n\t\t\t    if (source.Value != PubmedIdTag) continue;\n\n\t\t\t    string pubmedId = element.Value.Split('.', ',')[0];\n                //pubmed ids with more than 8 digits are bad\n                if (long.TryParse(pubmedId, out long l) && l <= 99_999_999)//pubmed ids with more than 8 digits are bad\n                    _pubMedIds.Add(l);\n                //else Console.WriteLine($\"WARNING:unexpected pubmedID {pubmedId}.\");\n                \n    \t\t}\n\t\t}\n\n        private const string MeasureTag = \"Measure\";\n        private const string GenotypeSetTag = \"GenotypeSet\";\n\n        private void ParseGenotypeSet(XElement xElement)\n        {\n            if (xElement == null || xElement.IsEmpty) return;\n            \n            foreach (var measureSet in xElement.Elements(MeasureSetTag))\n            {\n                ParseMeasureSet(measureSet);\n            }\n        }\n\n        private void ParseMeasureSet(XElement xElement)\n\t\t{\n\t\t\tif (xElement == null || xElement.IsEmpty) return;\n            var variantId = xElement.Attribute(IdTag) == null ? null : xElement.Attribute(IdTag)?.Value;\n            foreach (var element in xElement.Elements(MeasureTag))\n\t\t    {\n\t\t        ParseMeasure(element, variantId);\n            }\n            \n\t\t}\n\n\n        private const string SeqLocationTag = \"SequenceLocation\";\n        private void ParseMeasure(XElement xElement, string variantId)\n\t\t{\n\t\t\tif (xElement == null || xElement.IsEmpty) return;\n\n            _allilicOmimIDs.Clear();\n\n\t\t\t//the variant type is available in the attributes\n            var varType = xElement.Attribute(TypeTag)?.Value;\n            \n            var variantList = new List<ClinvarVariant>();\n\n\t\t    foreach (var element in xElement.Elements(XrefTag))\n                ParseXref(element);\n\n\t\t    foreach (var element in xElement.Elements(SeqLocationTag))\n            {\n\t\t        var variant = GetClinvarVariant(element, _sequenceProvider.Assembly, _refChromDict, variantId);\n\n\t\t        if (variant == null) continue;\n\n                variant.VariantType = varType;\n                if (varType == \"Microsatellite\") UpdateVariantType(variant);\n                if (variant.AltAllele == \"Alu\") variant.VariantType = \"ALU\";\n\n\t\t        if (variant.AltAllele != null && variant.AltAllele.Length == 1 && _iupacBases.ContainsKey(variant.AltAllele[0]))\n\t\t            AddIupacVariants(variant, variantList);\n\t\t        else\n\t\t            variantList.Add(variant);\n\t\t    }\n\n            if (_allilicOmimIDs.Count != 0) \n            {\n                foreach (var variant in variantList)\n                {\n                    variant.AllelicOmimIds.AddRange(_allilicOmimIDs);\n                }\n            }\n\n            _variantList.AddRange(variantList);\n\t\t    \n\t\t}\n\n        private static void UpdateVariantType(ClinvarVariant variant)\n        {\n            var refAllele = variant.RefAllele;\n            var altAllele = variant.AltAllele;\n\n            if (refAllele == null || altAllele == null) return;\n\n            var variantType = SmallVariantCreator.GetVariantType(refAllele, altAllele);\n            switch (variantType)\n            {\n                case VariantType.deletion:\n                    variant.VariantType = \"Deletion\";\n                    break;\n                case VariantType.insertion:\n                    variant.VariantType = \"Insertion\";\n                    break;\n                case VariantType.indel:\n                    variant.VariantType = \"Indel\";\n                    break;\n                case VariantType.duplication:\n                    variant.VariantType = \"Duplication\";\n                    break;\n                case VariantType.SNV:\n                    variant.VariantType = \"SNV\";\n                    break;\n                case VariantType.MNV:\n                    variant.VariantType = \"MNV\";\n                    break;\n\n            }\n        }\n        private void AddIupacVariants(ClinvarVariant variant, List<ClinvarVariant> variantList)\n\t\t{\n\t\t\tforeach (char altAllele in _iupacBases[variant.AltAllele[0]])\n\t\t\t{\n\t\t\t    variantList.Add(new ClinvarVariant(variant.Chromosome,variant.Start, variant.Stop,variant.VariantId, variant.RefAllele, altAllele.ToString()));\n\t\t\t}\n\t\t}\n\n\n        private const string ChrTag          = \"Chr\";\n        private const string StopTag         = \"display_stop\";\n        private const string StartTag        = \"display_start\";\n        private const string AssemblyTag     = \"Assembly\";\n        private const string RefAlleleTag    = \"referenceAllele\";\n        private const string AltAlleleTag    = \"alternateAllele\";\n        private const string VcfPositionTag  = \"positionVCF\";\n        private const string VcfRefAlleleTag = \"referenceAlleleVCF\";\n        private const string VcfAltAlleleTag = \"alternateAlleleVCF\";\n        \n\n        private static ClinvarVariant GetClinvarVariant(XElement xElement, GenomeAssembly genomeAssembly, Dictionary<string, Chromosome> refChromDict, string variantId)\n        {\n\t\t    if (xElement == null ) return null;\n\t\t\t//<SequenceLocation Assembly=\"GRCh38\" Chr=\"17\" Accession=\"NC_000017.11\" start=\"43082402\" stop=\"43082402\" variantLength=\"1\" referenceAllele=\"A\" alternateAllele=\"C\" />\n\n\t\t\tif (genomeAssembly.ToString()!= xElement.Attribute(AssemblyTag)?.Value\n                && genomeAssembly != GenomeAssembly.Unknown) return null;\n\n\t\t\tvar chromosome = refChromDict.ContainsKey(xElement.Attribute(ChrTag)?.Value)\n\t\t\t\t? refChromDict[xElement.Attribute(ChrTag)?.Value]\n\t\t\t\t: null;\n\t\t\tint    start     = Convert.ToInt32(xElement.Attribute(StartTag)?.Value);\n\t\t\tint    stop      = Convert.ToInt32(xElement.Attribute(StopTag)?.Value);\n\t\t\tstring refAllele = xElement.Attribute(RefAlleleTag)?.Value;\n\t\t\tstring altAllele = xElement.Attribute(AltAlleleTag)?.Value;\n            \n\t\t\t//check if VCV values are present\n\t\t\tint vcfPosition = Convert.ToInt32(xElement.Attribute(VcfPositionTag)?.Value);\n\t\t\tstring vcfRefAllele = xElement.Attribute(VcfRefAlleleTag)?.Value;\n\t\t\tstring vcfAltAllele = xElement.Attribute(VcfAltAlleleTag)?.Value;\n\t\t\t\n\t\t\tif (vcfRefAllele != null)\n\t\t\t{\n\t\t\t\tstart = vcfPosition;\n\t\t\t\trefAllele = vcfRefAllele;\n\t\t\t\taltAllele = vcfAltAllele;\n\t\t\t\tstop = start + refAllele.Length - 1;\n\t\t\t}\n\n\t\t\tif (stop - start + 1 > MaxVariantLength) return null;\n            AdjustVariant(ref start, ref refAllele, ref altAllele);\n\t\t    \n            return new ClinvarVariant(chromosome, start, stop, variantId, refAllele, altAllele);\n\t\t}\n\n\t\tprivate static void AdjustVariant(ref int start, ref string referenceAllele, ref string altAllele)\n\t\t{\n\t\t    if (referenceAllele == \"-\")\n\t\t    {\n\t\t        referenceAllele = \"\";\n\t\t        start++;\n\t\t    }\n\n            if (altAllele == \"-\")\n\t\t\t\taltAllele = \"\";\n\t\t}\n\n        private void GetClinicalSignificance(XElement xElement)\n\t\t{\n\t\t\tif (xElement == null || xElement.IsEmpty) return;\n\n\t\t    _reviewStatus = xElement.Element(ReviewStatusTag)?.Value;\n            var description = xElement.Element(DescriptionTag)?.Value;\n            var explanation = xElement.Element(ExplanationTag)?.Value;\n\n            _significances = ClinVarCommon.GetSignificances(description, explanation);\n\n            ValidateSignificance(_significances);\n        }\n\n        private void ValidateSignificance(string[] significances)\n        {\n            foreach (var significance in significances)\n            {\n                if (!ClinVarCommon.ValidPathogenicity.Contains(significance)) \n                    throw new InvalidDataException($\"Invalid pathogenicity found in {_id}. Observed: {significance}\");\n            }\n        }\n\n        \n        \n\n        public void Dispose()\n        {\n\t        _rcvStream?.Dispose();\n\t        _sequenceProvider?.Dispose();\n        }\n    }\n}\n"
  },
  {
    "path": "SAUtils/InputFileParsers/ClinVar/ClinVarSchema.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.Linq;\r\nusing System.Text;\r\nusing SAUtils.Schema;\r\nusing VariantAnnotation.SA;\r\n\r\nnamespace SAUtils.InputFileParsers.ClinVar\r\n{\r\n    public static class ClinVarSchema\r\n    {\r\n        private static readonly SaJsonValueType PrimaryValueType = SaJsonValueType.ObjectArray;\r\n\r\n        private static readonly string[] JsonKeys = {\r\n            \"id\",\r\n            \"variationId\",\r\n            \"reviewStatus\",\r\n            \"alleleOrigins\",\r\n            \"refAllele\",\r\n            \"altAllele\",\r\n            \"phenotypes\",\r\n            \"medGenIds\",\r\n            \"omimIds\",\r\n            \"orphanetIds\",\r\n            \"significance\",\r\n            \"lastUpdatedDate\",\r\n            \"pubMedIds\",\r\n            \"isAlleleSpecific\"\r\n        };\r\n\r\n        private static readonly List<SaJsonValueType> ValueTypes = new List<SaJsonValueType>\r\n        {\r\n            SaJsonValueType.String,\r\n            SaJsonValueType.String,\r\n            SaJsonValueType.String,\r\n            SaJsonValueType.StringArray,\r\n            SaJsonValueType.String,  \r\n            SaJsonValueType.String,\r\n            SaJsonValueType.StringArray,\r\n            SaJsonValueType.StringArray,\r\n            SaJsonValueType.StringArray,\r\n            SaJsonValueType.StringArray,\r\n            SaJsonValueType.StringArray,\r\n            SaJsonValueType.String,\r\n            SaJsonValueType.StringArray,\r\n            SaJsonValueType.Bool\r\n        };\r\n\r\n        public static SaJsonSchema Get()\r\n        {\r\n            var jsonSchema = SaJsonSchema.Create(new StringBuilder(), SaCommon.ClinvarTag, PrimaryValueType, JsonKeys);\r\n            jsonSchema.SetNonSaKeys(new []{\"isAlleleSpecific\"});\r\n\r\n            foreach((string key, var valueType) in JsonKeys.Zip(ValueTypes, (a, b) => (a, b))) \r\n                jsonSchema.AddAnnotation(key, SaJsonKeyAnnotation.CreateFromProperties(valueType, 0, null));\r\n\r\n            return jsonSchema;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "SAUtils/InputFileParsers/ClinVar/ClinVarVariationReader.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing System.Xml;\nusing System.Xml.Linq;\nusing IO;\n\nnamespace SAUtils.InputFileParsers.ClinVar\n{\n    public sealed class ClinVarVariationReader : IDisposable\n    {\n        private const string VcvRecordTag      = \"VariationArchive\";\n        private const string AccessionTag      = \"Accession\";\n        private const string VersionTag        = \"Version\";\n        private const string DateTag           = \"DateLastUpdated\";\n        private const string ReviewStatusTag   = \"ReviewStatus\";\n        private const string InterpretedRecordTag = \"InterpretedRecord\";\n        private const string InterpretationsTag   = \"Interpretations\";\n        private const string InterpretationTag    = \"Interpretation\";\n\n        private const string IncludedRecordTag = \"IncludedRecord\";\n        \n        private const string DescriptionTag = \"Description\";\n        private const string ExplanationTag = \"Explanation\";\n        private const string TypeTag        = \"Type\";\n\n\n        private readonly Stream _readStream;\n\n        public ClinVarVariationReader(Stream readStream)\n        {\n            _readStream = readStream;\n        }\n\n        public IEnumerable<VcvItem> GetItems()\n        {\n            using (var reader = FileUtilities.GetStreamReader(_readStream))\n            using (var xmlReader = XmlReader.Create(reader, new XmlReaderSettings { DtdProcessing = DtdProcessing.Prohibit, IgnoreWhitespace = true}))\n            {\n                xmlReader.ReadToDescendant(VcvRecordTag);\n                do\n                {\n                    var  subTreeReader = xmlReader.ReadSubtree();\n                    var xElement       = XElement.Load(subTreeReader);\n                    \n                    var item = ExtractVariantRecord(xElement);\n                    \n                    if (item == null) continue;\n                    yield return item;\n\n                } while (xmlReader.ReadToNextSibling(VcvRecordTag));\n            }\n        }\n\n        private static VcvItem ExtractVariantRecord(XElement xElement)\n        {\n            if (xElement == null || xElement.IsEmpty) return null;\n            \n            var accession  = xElement.Attribute(AccessionTag)?.Value;\n            var version    = xElement.Attribute(VersionTag)?.Value;\n            var dateString       = xElement.Attribute(DateTag)?.Value;\n            var date        = ClinVarParser.ParseDate(dateString);\n\n            var interpretationRecord = xElement.Element(InterpretedRecordTag);\n            var includedRecord = xElement.Element(IncludedRecordTag);\n            \n            //expecting one of the two to be non-null\n            if (!((interpretationRecord == null || interpretationRecord.IsEmpty) ^\n                  (includedRecord       == null || includedRecord.IsEmpty)))\n            {\n                throw new DataMisalignedException(\"Only one of interpretation/included records should be present for \"+ accession);\n            }\n            \n            if (interpretationRecord != null && !interpretationRecord.IsEmpty)\n            {\n                var interpretedSignificances = GetSignificances(interpretationRecord.Element(InterpretationsTag));\n\n                var interpretedReviewStatusString = interpretationRecord.Element(ReviewStatusTag)?.Value;\n                if(interpretedReviewStatusString ==null) throw new MissingFieldException($\"No review status provided for {accession}.{version}\");\n            \n                var interpretedReviewStatus = ClinVarCommon.ReviewStatusNameMapping[interpretedReviewStatusString];\n                return new VcvItem(accession, version, date, interpretedReviewStatus, interpretedSignificances);\n            }\n            \n            var includedSignificances = GetSignificances(includedRecord.Element(InterpretationsTag));\n\n            var includedReviewStatusString = includedRecord.Element(ReviewStatusTag)?.Value;\n            if(includedReviewStatusString ==null) throw new MissingFieldException($\"No review status provided for {accession}.{version}\");\n            \n            var reviewStatus = ClinVarCommon.ReviewStatusNameMapping[includedReviewStatusString];\n            return new VcvItem(accession, version, date, reviewStatus, includedSignificances);\n        }\n\n        \n        private static List<string> GetSignificances(XElement interpretations)\n        {\n            if (interpretations == null || interpretations.IsEmpty) return null;\n            \n            var significanceList = new List<string>();\n            foreach (var interpretation in interpretations.Elements(InterpretationTag))\n            {\n                var type = interpretation.Attribute(TypeTag)?.Value;\n                if(type==null || type != \"Clinical significance\") continue;\n                \n                var description = interpretation.Element(DescriptionTag)?.Value.ToLower();\n                var explanation = interpretation.Element(ExplanationTag)?.Value.ToLower();\n                if(description == null && explanation == null) continue;\n\n                var significances = ClinVarCommon.GetSignificances(description, explanation);\n                foreach (var significance in significances)\n                {\n                    if (!ClinVarCommon.ValidPathogenicity.Contains(significance)) \n                        throw new InvalidDataException($\"Invalid clinical significance found. Observed: {significance}\");\n                    significanceList.Add(significance);\n                }\n            }\n            return significanceList;\n        }\n\n        public void Dispose()\n        {\n            _readStream?.Dispose();\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/InputFileParsers/ClinVar/ClinvarVariant.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing Genome;\r\n\r\nnamespace SAUtils.InputFileParsers.ClinVar\r\n{\r\n    public sealed class ClinvarVariant\r\n    {\r\n        public readonly Chromosome Chromosome;\r\n        public int Start { get; }\r\n        public readonly int Stop;\r\n        public readonly string RefAllele;\r\n        public readonly string AltAllele;\r\n        public string VariantType;\r\n        public readonly List<string> AllelicOmimIds;\r\n        public readonly string VariantId;\r\n\r\n        public ClinvarVariant(Chromosome chr, int start, int stop, string variantId, string refAllele, string altAllele, List<string> allilicOmimIds =null)\r\n        {\r\n            Chromosome      = chr;\r\n            Start           = start;\r\n            Stop            = stop;\r\n            VariantId       = variantId;\r\n            RefAllele       = refAllele;\r\n            AltAllele       = altAllele;\r\n            AllelicOmimIds  = allilicOmimIds ?? new List<string>();\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "SAUtils/InputFileParsers/ClinVar/IClinVarSaItem.cs",
    "content": "using System;\nusing System.Collections.Generic;\nusing VariantAnnotation.Interface.SA;\n\nnamespace SAUtils.InputFileParsers.ClinVar;\n\npublic interface IClinVarSaItem: ISupplementaryDataItem, IComparable<IClinVarSaItem>\n{\n    string Id { get; }\n    IEnumerable<string>        Significances { get; }\n    ClinVarCommon.ReviewStatus ReviewStatus  { get; }\n}"
  },
  {
    "path": "SAUtils/InputFileParsers/ClinVar/VariantAligner.cs",
    "content": "﻿using System.IO;\r\nusing Genome;\r\nusing SAUtils.MitoMap;\r\nusing Variants;\r\n\r\nnamespace SAUtils.InputFileParsers.ClinVar\r\n{\r\n    public sealed class VariantAligner\r\n    {\r\n        private readonly ISequence _compressedSequence;\r\n        private const int MaxRotationRange = 500;\r\n\r\n        /// <summary>\r\n        /// constructor\r\n        /// </summary>\r\n        public VariantAligner(ISequence compressedSequence)\r\n        {\r\n            _compressedSequence = compressedSequence;\r\n        }\r\n\r\n        /// <summary>\r\n        /// Left aligns the variant using base rotation\r\n        /// </summary>\r\n        /// <returns>Tuple of new position, ref and alt allele</returns>\r\n        public (int RefPosition, string RefAllele, string AltAllele) LeftAlign(int refPosition, string refAllele, string altAllele, bool isCircularGenome = false)\r\n        {\r\n            var trimmedAllele = BiDirectionalTrimmer.Trim(refPosition, refAllele, altAllele);\r\n            var trimmedPos = trimmedAllele.Start;\r\n            var trimmedRefAllele = trimmedAllele.RefAllele;\r\n            var trimmedAltAllele = trimmedAllele.AltAllele;\r\n\r\n            // alignment only makes sense for insertion and deletion\r\n            if (!(trimmedAltAllele.Length == 0 || trimmedRefAllele.Length == 0)) return (refPosition, refAllele, altAllele);\r\n\r\n            var upstreamSeq = GetUpstreamSeq(trimmedPos, MaxRotationRange, isCircularGenome);\r\n            if (upstreamSeq == null)\r\n                throw new InvalidDataException(\"Reference sequence not set, please check that it is loaded\");\r\n\r\n            // compressed seq is 0 based\r\n            var combinedSeq = upstreamSeq;\r\n            int repeatLength;\r\n            int i;\r\n            if (trimmedRefAllele.Length > trimmedAltAllele.Length)\r\n            {\r\n                // deletion\r\n                combinedSeq += trimmedRefAllele;\r\n                repeatLength = trimmedRefAllele.Length;\r\n                for (i = combinedSeq.Length - 1; i >= repeatLength; i--, trimmedPos--)\r\n                {\r\n                    if (combinedSeq[i] != combinedSeq[i - repeatLength]) break;\r\n                }\r\n                var newRefAllele = combinedSeq.Substring(i + 1 - repeatLength, repeatLength);\r\n                return (trimmedPos, newRefAllele, \"\"); //alt is empty for deletion\r\n            }\r\n\r\n            //insertion\r\n            combinedSeq += trimmedAltAllele;\r\n            repeatLength = trimmedAltAllele.Length;\r\n\r\n            for (i = combinedSeq.Length - 1; i >= repeatLength; i--, trimmedPos--)\r\n            {\r\n                if (combinedSeq[i] != combinedSeq[i - repeatLength]) break;\r\n            }\r\n            var newAltAllele = combinedSeq.Substring(i + 1 - repeatLength, repeatLength);\r\n            return (trimmedPos, \"\", newAltAllele);\r\n        }\r\n\r\n        private string GetUpstreamSeq(int position, int length, bool isCircularGenome)\r\n        {\r\n            if (isCircularGenome)\r\n            {\r\n                var circularGenome = new CircularGenomeModel(_compressedSequence);\r\n                var interval = (position - length, position -1);\r\n                return circularGenome.ExtractIntervalSequence(interval);\r\n            }\r\n\r\n            var adjustedLength = length < position ? length : position - 1;\r\n            return _compressedSequence.Substring(position - 1 - adjustedLength, adjustedLength);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "SAUtils/InputFileParsers/ClinVar/VcvItem.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing OptimizedCore;\nusing VariantAnnotation.IO;\n\nnamespace SAUtils.InputFileParsers.ClinVar\n{\n    public sealed class VcvItem : IComparable<int>, IComparable<VcvItem>\n    {\n        public readonly int          VariantId;\n        public readonly string       Accession;\n        public readonly string       Version;\n        public readonly DateTime     LastUpdatedDate;\n        public readonly ClinVarCommon.ReviewStatus ReviewStatus;\n        public readonly IEnumerable<string> Significances;\n        \n        public VcvItem(string accession, string version, long updatedDateTicks, ClinVarCommon.ReviewStatus reviewStatus, IEnumerable<string> significances)\n        {\n            \n            Accession       = accession;\n            Version         = version;\n            LastUpdatedDate = new DateTime(updatedDateTicks);\n            ReviewStatus    = reviewStatus;\n            Significances   = significances;\n\n            VariantId = int.Parse(accession.Substring(3));\n        }\n        \n        public string GetJsonString()\n        {\n            var sb         = StringBuilderPool.Get();\n            var jsonObject = new JsonObject(sb);\n\n            jsonObject.AddStringValue(\"id\", $\"{Accession}.{Version}\");\n            jsonObject.AddStringValue(\"reviewStatus\", ClinVarCommon.ReviewStatusStrings[ReviewStatus]);\n            jsonObject.AddStringValue(\"lastUpdatedDate\", LastUpdatedDate.ToString(\"yyyy-MM-dd\"));\n            jsonObject.AddStringValues(\"significance\", Significances);\n\n            return StringBuilderPool.GetStringAndReturn(sb);\n        }\n\n        public int CompareTo(int vcvId)\n        {\n            return VariantId.CompareTo(vcvId);\n        }\n\n        public int CompareTo(VcvItem other)\n        {\n            if (ReferenceEquals(this, other)) return 0;\n            if (ReferenceEquals(null, other)) return 1;\n            return VariantId.CompareTo(other.VariantId);\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/InputFileParsers/ClinVar/VcvSaItem.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing Genome;\nusing OptimizedCore;\nusing VariantAnnotation.Interface.SA;\nusing VariantAnnotation.IO;\n\nnamespace SAUtils.InputFileParsers.ClinVar\n{\n    public sealed class VcvSaItem: IClinVarSaItem, IEquatable<VcvSaItem>\n    {\n        public Chromosome Chromosome { get; }\n        public int         Position   { get; set; }\n        public string      RefAllele  { get; set; }\n        public string      AltAllele  { get; set; }\n\n        private readonly string                     _accession;\n        private readonly string                     _version;\n        private readonly DateTime                   _lastUpdatedDate;\n        public           ClinVarCommon.ReviewStatus ReviewStatus  { get; }\n        public           IEnumerable<string>        Significances { get; }\n        public           string                     Id            => $\"{_accession}.{_version}\";\n\n        public VcvSaItem(Chromosome chromosome, int position, string refAllele, string altAllele, string accession, string version, DateTime lastUpdatedDate, ClinVarCommon.ReviewStatus reviewStatus, IEnumerable<string> significances)\n        {\n            Chromosome = chromosome;\n            Position   = position;\n            RefAllele  = refAllele;\n            AltAllele  = altAllele;\n\n            _accession       = accession;\n            _version         = version;\n            _lastUpdatedDate = lastUpdatedDate;\n            ReviewStatus    = reviewStatus;\n            Significances   = significances;\n        }\n\n        public string GetJsonString()\n        {\n            var sb= StringBuilderPool.Get();\n            var jsonObject = new JsonObject(sb);\n\n            jsonObject.AddStringValue(\"id\", $\"{_accession}.{_version}\");\n            jsonObject.AddStringValue(\"reviewStatus\", ClinVarCommon.ReviewStatusStrings[ReviewStatus]);\n            jsonObject.AddStringValues(\"significance\", Significances);\n            jsonObject.AddStringValue(\"refAllele\", ClinVarCommon.NormalizeAllele(RefAllele));\n            jsonObject.AddStringValue(\"altAllele\", ClinVarCommon.NormalizeAllele(AltAllele));\n            jsonObject.AddStringValue(\"lastUpdatedDate\", _lastUpdatedDate.ToString(\"yyyy-MM-dd\"));\n\n            return StringBuilderPool.GetStringAndReturn(sb);\n        }\n\n        public string InputLine { get; set; }\n\n        public int CompareTo(IClinVarSaItem other)\n        {\n            return Chromosome.Index != other.Chromosome.Index\n                ? Chromosome.Index.CompareTo(other.Chromosome.Index)\n                : Position.CompareTo(other.Position);\n        }\n\n\n        public bool Equals(VcvSaItem other)\n        {\n            if (ReferenceEquals(null, other)) return false;\n            if (ReferenceEquals(this, other)) return true;\n            return _accession == other._accession && _version == other._version;\n        }\n        public override int GetHashCode()\n        {\n            return HashCode.Combine(_accession, _version);\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/InputFileParsers/Cosmic/MergedCosmicReader.cs",
    "content": "using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Linq;\r\nusing Compression.Utilities;\r\nusing Genome;\r\nusing OptimizedCore;\r\nusing SAUtils.DataStructures;\r\nusing VariantAnnotation.Interface.IO;\r\nusing VariantAnnotation.Interface.Providers;\r\nusing Variants;\r\n\r\nnamespace SAUtils.InputFileParsers.Cosmic\r\n{\r\n    public sealed class MergedCosmicReader \r\n    {\r\n        private readonly StreamReader _vcfFileReader;\r\n        private readonly StreamReader _tsvFileReader;\r\n        private string _geneName;\r\n        private int? _sampleCount;\r\n\r\n        private int _mutationIdIndex       = -1;\r\n        private int _primarySiteIndex      = -1;\r\n        private int _primaryHistologyIndex = -1;\r\n        private int _studyIdIndex          = -1;\r\n\r\n        private const string StudyIdTag = \"ID_STUDY\";\r\n\r\n        private readonly Dictionary<string, Chromosome> _refChromDict;\r\n        private readonly ISequenceProvider _sequenceProvider;\r\n        private readonly Dictionary<string, HashSet<CosmicItem.CosmicStudy>> _studies;\r\n\r\n        public MergedCosmicReader(string vcfFile, string tsvFile, ISequenceProvider sequenceProvider)\r\n        {\r\n            _vcfFileReader = GZipUtilities.GetAppropriateStreamReader(vcfFile);\r\n            _tsvFileReader = GZipUtilities.GetAppropriateStreamReader(tsvFile);\r\n            _sequenceProvider = sequenceProvider;\r\n            _refChromDict  = _sequenceProvider.RefNameToChromosome;\r\n            _studies       = new Dictionary<string, HashSet<CosmicItem.CosmicStudy>>();\r\n        }\r\n        \r\n        public IEnumerable<CosmicItem> GetItems()\r\n        {\r\n            // taking up all studies in to the dictionary\r\n            using (_tsvFileReader)\r\n            {\r\n                string line;\r\n                while ((line = _tsvFileReader.ReadLine()) != null)\r\n                {\r\n                    if (IsHeaderLine(line))\r\n                        GetColumnIndexes(line); // the first line is supposed to be a the header line\r\n                    else AddCosmicStudy(line);\r\n                }\r\n            }\r\n\r\n            using (_vcfFileReader)\r\n            {\r\n                string line;\r\n                while ((line = _vcfFileReader.ReadLine()) != null)\r\n                {\r\n                    // Skip empty lines.\r\n                    if (string.IsNullOrWhiteSpace(line)) continue;\r\n\r\n                    // Skip comments.\r\n                    if (line.OptimizedStartsWith('#')) continue;\r\n                    var cosmicItems = ExtractCosmicItems(line);\r\n                    if (cosmicItems == null) continue;\r\n\r\n                    foreach (var cosmicItem in cosmicItems)\r\n                    {\r\n                        yield return cosmicItem;\r\n                    }\r\n                }\r\n            }\r\n        }\r\n\r\n        private void AddCosmicStudy(string line)\r\n        {\r\n            var columns = line.OptimizedSplit('\\t');\r\n\r\n            string mutationId  = columns[_mutationIdIndex];\r\n            string studyId     = columns[_studyIdIndex];\r\n            var sites          = GetSites(columns);\r\n            var histologies    = GetHistologies(columns);\r\n            \r\n            if (string.IsNullOrEmpty(mutationId)) return;\r\n\r\n            var study = new CosmicItem.CosmicStudy(studyId, histologies, sites);\r\n            if (_studies.TryGetValue(mutationId, out var studySet))\r\n                studySet.Add(study);\r\n            else _studies[mutationId] = new HashSet<CosmicItem.CosmicStudy> { study };\r\n        }\r\n\r\n        private List<string> GetHistologies(string[] columns)\r\n        {\r\n            var histologies = new HashSet<string>();\r\n            var primaryHistology = columns[_primaryHistologyIndex].Replace('_', ' ');\r\n            TryAddValue(primaryHistology, histologies);\r\n\r\n            return histologies.ToList();\r\n        }\r\n\r\n        private List<string> GetSites(string[] columns)\r\n        {\r\n            var sites = new HashSet<string>();\r\n\r\n            var primarySite = columns[_primarySiteIndex].Replace('_', ' ');\r\n            TryAddValue(primarySite, sites);\r\n\r\n            return sites.ToList();\r\n        }\r\n\r\n        private static void TryAddValue(string value, ISet<string> sites)\r\n        {\r\n           if (!string.IsNullOrEmpty(value) && value != \"NS\")\r\n                sites.Add(value);\r\n        }\r\n\r\n        private static bool IsHeaderLine(string line) => line.Contains(StudyIdTag);\r\n\r\n        private void GetColumnIndexes(string headerLine)\r\n        {\r\n            //Gene name       Accession Number        Gene CDS length HGNC ID Sample name     ID_sample       ID_tumour       Primary site    Site subtype 1  Site subtype 2  Site subtype 3  Primary histology       Histology subtype 1     Histology subtype 2     Histology subtype 3     Genome-wide screen      Mutation ID     Mutation CDS    Mutation AA     Mutation Description    Mutation zygosity       LOH     GRCh    Mutation genome position        Mutation strand SNP     FATHMM prediction       FATHMM score    Mutation somatic status Pubmed_PMID     ID_STUDY        Sample source   Tumour origin   Age\r\n\r\n            _mutationIdIndex       = -1;\r\n            _studyIdIndex          = -1;\r\n            _primarySiteIndex      = -1;\r\n            _primaryHistologyIndex = -1;\r\n\r\n            var columns = headerLine.OptimizedSplit('\\t');\r\n            for (int i = 0; i < columns.Length; i++)\r\n            {\r\n                switch (columns[i])\r\n                {\r\n                    case \"Mutation ID\":\r\n                        _mutationIdIndex = i;\r\n                        break;\r\n                    case StudyIdTag:\r\n                        _studyIdIndex = i;\r\n                        break;\r\n                    case \"Primary site\":\r\n                        _primarySiteIndex = i;\r\n                        break;\r\n                    case \"Primary histology\":\r\n                        _primaryHistologyIndex = i;\r\n                        break;\r\n                }\r\n            }\r\n\r\n            if (_mutationIdIndex == -1)\r\n                throw new InvalidDataException(\"Column for mutation Id could not be detected\");\r\n            if (_studyIdIndex == -1)\r\n                throw new InvalidDataException(\"Column for study Id could not be detected\");\r\n            if (_primarySiteIndex == -1)\r\n                throw new InvalidDataException(\"Column for primary site could not be detected\");\r\n            if (_primaryHistologyIndex == -1)\r\n                throw new InvalidDataException(\"Column for primary histology could not be detected\");\r\n        }\r\n\r\n        private const int MaxVariantLength= 1000;\r\n        internal List<CosmicItem> ExtractCosmicItems(string vcfLine)\r\n        {\r\n            var splitLine = vcfLine.Split(new[] { '\\t' }, 8);\r\n            //skipping large variants\r\n            if (splitLine[VcfCommon.RefIndex].Length > MaxVariantLength || splitLine[VcfCommon.AltIndex].Length > MaxVariantLength) return null;\r\n\r\n            string chromosomeName = splitLine[VcfCommon.ChromIndex];\r\n            if (!_refChromDict.ContainsKey(chromosomeName)) return null;\r\n\r\n            var chromosome    = _refChromDict[chromosomeName];\r\n            int position      = int.Parse(splitLine[VcfCommon.PosIndex]);\r\n            string cosmicId   = splitLine[VcfCommon.IdIndex];\r\n            string refAllele  = splitLine[VcfCommon.RefIndex];\r\n            var altAlleles    = splitLine[VcfCommon.AltIndex].OptimizedSplit(',');\r\n            string infoField  = splitLine[VcfCommon.InfoIndex];\r\n\r\n            Clear();\r\n\r\n            ParseInfoField(infoField);\r\n\r\n            var cosmicItems = new List<CosmicItem>();\r\n\r\n            foreach (string altAllele in altAlleles)\r\n            {\r\n                var (shiftedPos, shiftedRef, shiftedAlt) = VariantUtils.TrimAndLeftAlign(position, refAllele,\r\n                    altAllele, _sequenceProvider.Sequence);\r\n\r\n                cosmicItems.Add(_studies.TryGetValue(cosmicId, out var studies)\r\n                    ? new CosmicItem(chromosome, shiftedPos, cosmicId, shiftedRef, shiftedAlt, _geneName, studies,\r\n                        _sampleCount)\r\n                    : new CosmicItem(chromosome, shiftedPos, cosmicId, shiftedRef, shiftedAlt, _geneName, null,\r\n                        _sampleCount));\r\n            }\r\n\r\n            return cosmicItems;\r\n        }\r\n\r\n        private void Clear()\r\n        {\r\n            _geneName    = null;\r\n            _sampleCount = null;\r\n        }\r\n\r\n        private void ParseInfoField(string infoFields)\r\n        {\r\n            if (infoFields == \"\" || infoFields == \".\") return;\r\n            var infoItems = infoFields.OptimizedSplit(';');\r\n\r\n            foreach (string infoItem in infoItems)\r\n            {\r\n                if (string.IsNullOrEmpty(infoItem)) continue;\r\n\r\n                (string key, string value) = infoItem.OptimizedKeyValue();\r\n\r\n                // sanity check\r\n                if (value != null) SetInfoField(key, value);\r\n            }\r\n        }\r\n\r\n        private void SetInfoField(string vcfId, string value)\r\n        {\r\n            switch (vcfId)\r\n            {\r\n                case \"GENE\":\r\n                    _geneName = value;\r\n                    break;\r\n                case \"CNT\":\r\n                    _sampleCount = Convert.ToInt32(value);\r\n                    break;\r\n            }\r\n        }       \r\n    }\r\n}\r\n"
  },
  {
    "path": "SAUtils/InputFileParsers/DGV/DgvReader.cs",
    "content": "using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing Genome;\r\nusing OptimizedCore;\r\nusing SAUtils.DataStructures;\r\nusing SAUtils.ParseUtils;\r\n\r\nnamespace SAUtils.InputFileParsers.DGV\r\n{\r\n    public sealed class DgvReader: IDisposable\r\n    {\r\n        #region members\r\n\r\n        private readonly StreamReader _reader;\r\n        private readonly Dictionary<string, Chromosome> _refChromDict;\r\n\r\n        #endregion\r\n\r\n        // constructor\r\n        public DgvReader(StreamReader reader, Dictionary<string, Chromosome> refChromDict)\r\n        {\r\n            _reader = reader;\r\n            _refChromDict = refChromDict;\r\n        }\r\n\r\n        /// <summary>\r\n        /// returns a ClinVar object given the vcf line\r\n        /// </summary>\r\n        public static DgvItem ExtractDgvItem(string line, Dictionary<string, Chromosome> refChromDict)\r\n        {\r\n            var cols = line.OptimizedSplit('\\t');\r\n            if (cols.Length < 8) return null;\r\n\r\n            string id = cols[0];\r\n            string chromosomeName = cols[1];\r\n\r\n            if (!refChromDict.ContainsKey(chromosomeName)) return null;\r\n\r\n            var chromosome = refChromDict[chromosomeName];\r\n\r\n            int start = int.Parse(cols[2]);\r\n            int end = int.Parse(cols[3]);\r\n            string variantType = cols[4];\r\n            string variantSubType = cols[5];\r\n            int sampleSize = int.Parse(cols[14]);\r\n            int observedGains = cols[15] == \"\" ? 0 : int.Parse(cols[15]);\r\n            int observedLosses = cols[16] == \"\" ? 0 : int.Parse(cols[16]);\r\n\r\n            var seqAltType = SaParseUtilities.GetSequenceAlterationType(variantType, variantSubType);\r\n\r\n            return new DgvItem(id, chromosome, start, end, sampleSize, observedGains, observedLosses, seqAltType);\r\n        }\r\n\r\n\r\n\r\n        public IEnumerable<DgvItem> GetItems()\r\n        {\r\n            using (var reader = _reader)\r\n            {\r\n                while (true)\r\n                {\r\n                    // grab the next line\r\n                    string line = reader.ReadLine();\r\n                    if (line == null) break;\r\n\r\n                    // skip header and empty lines\r\n                    if (string.IsNullOrWhiteSpace(line) || IsDgvHeader(line)) continue;\r\n                    var dgvItem = ExtractDgvItem(line, _refChromDict);\r\n                    if (dgvItem == null) continue;\r\n                    yield return dgvItem;\r\n                }\r\n            }\r\n        }\r\n\r\n        private static bool IsDgvHeader(string line)\r\n        {\r\n            return line.StartsWith(\"variantaccession\");\r\n        }\r\n\r\n        public void Dispose()\r\n        {\r\n            _reader?.Dispose();\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "SAUtils/InputFileParsers/DataSourceVersionReader.cs",
    "content": "﻿using System;\r\nusing System.IO;\r\nusing IO;\r\nusing OptimizedCore;\r\nusing VariantAnnotation.Providers;\r\n\r\nnamespace SAUtils.InputFileParsers\r\n{\r\n    /// <inheritdoc />\r\n    /// <summary>\r\n    /// reads data version from a file that is expected to be found alongside each supplementary data file\r\n    /// </summary>\r\n    public sealed class DataSourceVersionReader : IDisposable\r\n    {\r\n        #region members\r\n\r\n        private readonly StreamReader _reader;\r\n\r\n        #endregion\r\n\r\n        public void Dispose()\r\n        {\r\n            _reader.Dispose();\r\n        }\r\n\r\n        /// <summary>\r\n        /// constructor\r\n        /// </summary>\r\n        public DataSourceVersionReader(Stream fileStream)\r\n        {\r\n            _reader = new StreamReader(fileStream);\r\n        }\r\n\r\n        public static DataSourceVersion GetSourceVersion(string versionFileName)\r\n        {\r\n            if (!versionFileName.EndsWith(\".version\")) versionFileName += \".version\";\r\n            if (!File.Exists(versionFileName))\r\n            {\r\n                throw new FileNotFoundException(versionFileName);\r\n            }\r\n\r\n            var fileStream = FileUtilities.GetReadStream(versionFileName);\r\n\r\n            return GetSourceVersion(fileStream);\r\n        }\r\n\r\n        private static DataSourceVersion GetSourceVersion(Stream versionFileStream)\r\n        {\r\n            using (var versionReader = new DataSourceVersionReader(versionFileStream))\r\n            {\r\n                var version = versionReader.GetVersion();\r\n                return version;\r\n            }\r\n        }\r\n\r\n        public DataSourceVersion GetVersion()\r\n        {\r\n            // NAME = dbSNP\r\n            // VERSION = 147\r\n            // DATE = 2016-04-08\r\n            // DESCRIPTION =\r\n\r\n            string line, name = null, version = null, date = null, description = null;\r\n\r\n            while ((line = _reader.ReadLine()) != null)\r\n            {\r\n                (string key, string value) = line.OptimizedKeyValue();\r\n                if (key == null || value == null) continue;\r\n\r\n                switch (key)\r\n                {\r\n                    case \"NAME\":\r\n                        name = value;\r\n                        break;\r\n                    case \"VERSION\":\r\n                        version = value;\r\n                        break;\r\n                    case \"DATE\":\r\n                        date = value;\r\n                        break;\r\n                    case \"DESCRIPTION\":\r\n                        description = value;\r\n                        break;\r\n                }\r\n            }\r\n\r\n            if (date == null)\r\n            {\r\n                date = DateTime.Now.ToString(\"yyyy-MM-dd\");\r\n                Console.ForegroundColor = ConsoleColor.Red;\r\n                Console.WriteLine($\"WARNING: Date was missing from the data source. Using {date} instead.\");\r\n                Console.ResetColor();\r\n            }\r\n\r\n            return new DataSourceVersion(name, version, DateTime.Parse(date).Ticks, description);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "SAUtils/InputFileParsers/DbSnp/DbSnpReader.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing IO;\r\nusing OptimizedCore;\r\nusing SAUtils.DataStructures;\r\nusing VariantAnnotation.Interface.IO;\r\nusing VariantAnnotation.Interface.Providers;\r\nusing Variants;\r\n\r\nnamespace SAUtils.InputFileParsers.DbSnp\r\n{\r\n    public sealed class DbSnpReader : IDisposable\r\n    {\r\n        // Key in VCF info field of the allele frequencies subfield.\r\n\t    private readonly Stream _stream;\r\n        private readonly ISequenceProvider _sequenceProvider;\r\n\r\n        public DbSnpReader(Stream stream, ISequenceProvider sequenceProvider)\r\n        {\r\n            _stream           = stream;\r\n            _sequenceProvider = sequenceProvider;\r\n        }\r\n\t    \r\n\t    public IEnumerable<DbSnpItem> GetItems()\r\n        {\r\n            using (var reader = FileUtilities.GetStreamReader(_stream))\r\n            {\r\n                string line;\r\n                while ((line = reader.ReadLine()) != null)\r\n                {\r\n                    // Skip empty lines.\r\n                    if (string.IsNullOrWhiteSpace(line)) continue;\r\n\r\n                    // Skip comments.\r\n                    if (line.OptimizedStartsWith('#')) continue;\r\n                    \r\n\t                foreach (var dbSnpItem in ExtractItem(line))\r\n\t                {\r\n\t\t\t\t\t\tyield return dbSnpItem;\r\n\t                }\r\n\t\t\t\t\t\r\n                }\r\n            }\r\n        }\r\n\r\n        /// <summary>\r\n        /// Extracts a dbSNP item from the specified VCF line.\r\n        /// </summary>\r\n        /// <param name=\"vcfLine\"></param>\r\n        /// <returns></returns>\r\n        public IEnumerable<DbSnpItem> ExtractItem(string vcfLine)\r\n        {\r\n            var splitLine = vcfLine.Split('\\t',6);\r\n            if (splitLine.Length < 5) yield break;\r\n\r\n            var chromosomeName = splitLine[VcfCommon.ChromIndex];\r\n            if (!_sequenceProvider.RefNameToChromosome.ContainsKey(chromosomeName)) yield break;\r\n\r\n            var chromosome = _sequenceProvider.RefNameToChromosome[chromosomeName];\r\n            var position   = int.Parse(splitLine[VcfCommon.PosIndex]);\r\n\t\t\tvar dbSnpId    = Convert.ToInt64(splitLine[VcfCommon.IdIndex].Substring(2));\r\n\t\t\tvar refAllele  = splitLine[VcfCommon.RefIndex];\r\n\t\t\tvar altAlleles = splitLine[VcfCommon.AltIndex].OptimizedSplit(',');\r\n\r\n            foreach (var altAllele in altAlleles)\r\n            {\r\n                var (shiftedPos, shiftedRef, shiftedAlt) =\r\n                    VariantUtils.TrimAndLeftAlign(position, refAllele, altAllele, _sequenceProvider.Sequence);\r\n\r\n                yield return new DbSnpItem(chromosome, shiftedPos, dbSnpId, shiftedRef, shiftedAlt, vcfLine);\r\n            }\r\n\t        \r\n        }\r\n\r\n        public void Dispose()\r\n        {\r\n            _stream?.Dispose();\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "SAUtils/InputFileParsers/DbSnp/GlobalMinorReader.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing System.Linq;\nusing Genome;\nusing IO;\nusing OptimizedCore;\nusing SAUtils.DataStructures;\nusing VariantAnnotation.Interface.IO;\n\nnamespace SAUtils.InputFileParsers.DbSnp\n{\n    public sealed class GlobalMinorReader \n    {\n        // Key in VCF info field of the allele frequencies subfield.\n\t    private readonly Stream _stream;\n        private readonly Dictionary<string, Chromosome> _refChromDict;\n\n        public GlobalMinorReader(Stream stream, Dictionary<string, Chromosome> refChromDict)\n        {\n            _stream = stream;\n            _refChromDict = refChromDict;\n        }\n\t    \n\t    public IEnumerable<AlleleFrequencyItem> GetItems()\n        {\n            using (var reader = FileUtilities.GetStreamReader(_stream))\n            {\n                string line;\n                while ((line = reader.ReadLine()) != null)\n                {\n                    // Skip empty lines.\n                    if (string.IsNullOrWhiteSpace(line)) continue;\n                    // Skip comments.\n                    if (line.OptimizedStartsWith('#')) continue;\n                    var items = ExtractItem(line);\n\t                if (items == null || items.Count == 0) continue;\n\t                foreach (var item in items)\n\t                {\n\t\t\t\t\t\tyield return item;\n\t                }\n\t\t\t\t\t\n                }\n            }\n        }\n\n        /// <summary>\n        /// Extracts a dbSNP item from the specified VCF line.\n        /// </summary>\n        /// <param name=\"vcfline\"></param>\n        /// <returns></returns>\n        private List<AlleleFrequencyItem> ExtractItem(string vcfline)\n        {\n            var splitLine = vcfline.OptimizedSplit('\\t');\n            if (splitLine.Length < 8) return null;\n\n            var chromosomeName = splitLine[VcfCommon.ChromIndex];\n            if (!_refChromDict.ContainsKey(chromosomeName)) return null;\n\n            var chromosome = _refChromDict[chromosomeName];\n\n            var position   = int.Parse(splitLine[VcfCommon.PosIndex]);\n\t\t\tvar refAllele  = splitLine[VcfCommon.RefIndex];\n\t\t\tvar altAlleles = splitLine[VcfCommon.AltIndex].OptimizedSplit(',');\n\t        var infoField  = splitLine[VcfCommon.InfoIndex];\n\t\t\t\n\t\t\tvar alleleFrequencies = GetAlleleFrequencies(infoField, refAllele, altAlleles);\n\n            var frequencyItems = new List<AlleleFrequencyItem>();\n            foreach ((string allele, double frequency) in alleleFrequencies)\n            {\n                frequencyItems.Add(new AlleleFrequencyItem(chromosome, position, refAllele, allele, frequency, vcfline));\n            }\n\n            return frequencyItems;\n        }\n\n\n        private static Dictionary<string, double> GetAlleleFrequencies(string infoField, string refAllele, string[] altAlleles)\n        {\n            var freqDict = new Dictionary<string, double> { [refAllele] = double.MinValue };\n\n            foreach (var altAllele in altAlleles)\n            {\n                freqDict[altAllele] = double.MinValue;\n            }\n\n            if (infoField.Trim() == \".\") return freqDict;\n\n            // for now we also want to disregard anything other than SNVs\n            var allSnv = refAllele.Length == 1 && altAlleles.All(altAllele => altAllele.Length == 1);\n            if (!allSnv) return freqDict;\n\n            // return if there are no freq information\n            if (!infoField.Contains(\"CAF=\"))\n                return freqDict;\n\n            foreach (var info in infoField.OptimizedSplit(';'))\n            {\n                if (!info.StartsWith(\"CAF=\")) continue;\n                var alleleFrequencies = info.OptimizedKeyValue().Value.OptimizedSplit(',');\n\n                freqDict[refAllele] = GetFrequency(alleleFrequencies[0]);\n\n                for (int i = 1; i < alleleFrequencies.Length; i++)\n                    freqDict[altAlleles[i - 1]] = GetFrequency(alleleFrequencies[i]);\n                break;\n            }\n\n            return freqDict;\n        }\n\n        private static double GetFrequency(string alleleFrequency)\n\t    {\n\t\t    return alleleFrequency == \".\" || alleleFrequency == \"0\" ? double.MinValue : Convert.ToDouble(alleleFrequency);\n\t    }\n        \n    }\n}\n"
  },
  {
    "path": "SAUtils/InputFileParsers/Decipher/DecipherParser.cs",
    "content": "using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing Genome;\nusing OptimizedCore;\nusing SAUtils.DataStructures;\n\nnamespace SAUtils.InputFileParsers.Decipher\n{\n    public sealed class DecipherParser : IDisposable\n    {\n        private const int ChromIndex      = 1;\n        private const int StartIndex      = 2;\n        private const int EndIndex        = 3;\n        private const int DelNumIndex     = 4;\n        private const int DelFreqIndex    = 5;\n        private const int DupNumIndex     = 7;\n        private const int DupFreqIndex    = 8;\n        private const int SampleSizeIndex = 14;\n\n        private readonly StreamReader                    _reader;\n        private readonly IDictionary<string, Chromosome> _refNameDict;\n        \n        private int?    _delNum;\n        private double? _delFreq;\n        private int?    _dupNum;\n        private double? _dupFreq;\n        private int?    _sampleSize;\n        \n        public DecipherParser(StreamReader reader, IDictionary<string, Chromosome> refNameDict)\n        {\n            _reader      = reader;\n            _refNameDict = refNameDict;\n        }\n        \n        public IEnumerable<DecipherItem> GetItems()\n        {\n            using (_reader)\n            {\n                string line;\n                while ((line = _reader.ReadLine()) != null)\n                {\n                    if (string.IsNullOrWhiteSpace(line) || line.OptimizedStartsWith('#')) continue;\n\n                    // #population_cnv_id\tchr\tstart\tend\tdeletion_observations\tdeletion_frequency\tdeletion_standard_error\tduplication_observations\tduplication_frequency\tduplication_standard_error\tobservations\tfrequency\tstandard_error\ttype\tsample_size\tstudy\n                    var    splitLine = line.OptimizedSplit('\\t');                    \n                    string chromosomeName = splitLine[ChromIndex];\n                    if(!_refNameDict.ContainsKey(chromosomeName)) continue;\n\n                    var chrom = _refNameDict[chromosomeName];\n                    int start = int.Parse(splitLine[StartIndex]);\n                    int end   = int.Parse(splitLine[EndIndex]);\n\n                    _delNum = int.Parse(splitLine[DelNumIndex]);\n                    _delFreq = double.Parse(splitLine[DelFreqIndex]);\n                    _dupNum = int.Parse(splitLine[DupNumIndex]);\n                    _dupFreq = double.Parse(splitLine[DupFreqIndex]);\n                    _sampleSize = int.Parse(splitLine[SampleSizeIndex]);\n\n                    var decipherItem = new DecipherItem(chrom, start, end, _delNum, _delFreq, _dupNum, _dupFreq, _sampleSize);\n                    \n                    yield return decipherItem;\n                }\n            }\n        }\n        \n        public void Dispose()\n        {\n            _reader?.Dispose();\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/InputFileParsers/Gme/GmeParser.cs",
    "content": "using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing Genome;\nusing OptimizedCore;\nusing SAUtils.DataStructures;\nusing VariantAnnotation.Interface.IO;\nusing VariantAnnotation.Interface.Providers;\nusing Variants;\n\nnamespace SAUtils.InputFileParsers.Gme\n{\n    public sealed class GmeParser : IDisposable\n    {\n        private readonly StreamReader                     _reader;\n        private readonly Dictionary<string, Chromosome> _refChromDict;\n        private readonly ISequenceProvider                _sequenceProvider;\n        \n        private int?    _alleleCount;\n        private int?    _alleleNum;\n        private double? _alleleFreq;\n        \n        public GmeParser(StreamReader streamReader, ISequenceProvider sequenceProvider)\n        {\n            _reader           = streamReader;\n            _sequenceProvider = sequenceProvider;\n            _refChromDict     = sequenceProvider.RefNameToChromosome;\n        }\n        \n        private void Clear()\n        {\n            _alleleCount = null;\n            _alleleNum   = null;\n            _alleleFreq  = null;\n        }\n        \n        public IEnumerable<GmeItem> GetItems()\n        {\n            using (_reader)\n            {\n                string line;\n                while ((line = _reader.ReadLine()) != null)\n                {\n                    // file has been modified to 7 columns\n                    // #chrom\tpos\tref\talt\tfilter\tGME_AC\tGME_AF\n                    \n                    if (string.IsNullOrWhiteSpace(line) || line.OptimizedStartsWith('#')) continue;\n                    \n                    var    cols      = line.OptimizedSplit('\\t');\n                    string ucscChrom = cols[0];\n                    if(!_refChromDict.ContainsKey(ucscChrom)) continue;\n\n                    var chrom     = _refChromDict[ucscChrom];\n                    int position  = int.Parse(cols[1]);\n                    var refAllele = cols[2];\n                    var altAllele = cols[3];\n                    var filters   = cols[4];\n                    var gmeAc     = cols[5].OptimizedSplit(',');\n                    _alleleFreq = double.Parse(cols[6]);\n                    \n                    var failedFilter = !(filters.Equals(\"PASS\") || filters.Equals(\".\"));\n                    var (shiftedPos, shiftedRef, shiftedAlt) = VariantUtils.TrimAndLeftAlign(position, refAllele,\n                        altAllele, _sequenceProvider.Sequence);\n                    \n                    _alleleCount = Convert.ToInt32(gmeAc[0]);\n                    _alleleNum   = Convert.ToInt32(gmeAc[0]) + Convert.ToInt32(gmeAc[1]);\n                    \n                    var gemItem = new GmeItem(chrom, shiftedPos, shiftedRef, shiftedAlt, _alleleCount, _alleleNum, _alleleFreq, failedFilter);\n                    \n                    yield return gemItem;\n                }\n            }\n        }\n\n        public  void    Dispose() => _reader?.Dispose();\n    }\n}\n\n"
  },
  {
    "path": "SAUtils/InputFileParsers/OneKGen/OneKGenReader.cs",
    "content": "using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Linq;\r\nusing Genome;\r\nusing IO;\r\nusing OptimizedCore;\r\nusing SAUtils.DataStructures;\r\nusing VariantAnnotation.Interface.IO;\r\nusing VariantAnnotation.Interface.Providers;\r\nusing Variants;\r\n\r\nnamespace SAUtils.InputFileParsers.OneKGen\r\n{\r\n    public sealed class OneKGenReader :IDisposable\r\n    {\r\n        private readonly Stream _stream;\r\n        private readonly Dictionary<string,Chromosome> _refNameDictionary;\r\n        private readonly ISequenceProvider _sequenceProvider;\r\n\r\n        private  string _ancestralAllele;\r\n\r\n        private int? _allAlleleNumber;\r\n\t    private int? _afrAlleleNumber;\r\n\t    private int? _amrAlleleNumber;\r\n\t    private int? _eurAlleleNumber;\r\n\t    private int? _easAlleleNumber;\r\n\t    private int? _sasAlleleNumber;\r\n\r\n\t\tprivate int[] _allAlleleCounts;\r\n\t\tprivate int[] _afrAlleleCounts;\r\n\t\tprivate int[] _amrAlleleCounts;\r\n\t\tprivate int[] _eurAlleleCounts;\r\n\t\tprivate int[] _easAlleleCounts;\r\n\t\tprivate int[] _sasAlleleCounts;\r\n\r\n        // empty constructor for onekg reader for unit tests.\r\n        public OneKGenReader(Stream stream, ISequenceProvider sequenceProvider) \r\n        {\r\n            _stream = stream;\r\n            _sequenceProvider = sequenceProvider;\r\n            _refNameDictionary = sequenceProvider.RefNameToChromosome;\r\n        }\r\n\r\n        private void Clear()\r\n\t    {\r\n\t\t    _ancestralAllele = null;\r\n\r\n\t\t\t_allAlleleNumber = null;\r\n\t\t\t_afrAlleleNumber = null;\r\n\t\t\t_amrAlleleNumber = null;\r\n\t\t\t_eurAlleleNumber = null;\r\n\t\t\t_easAlleleNumber = null;\r\n\t\t\t_sasAlleleNumber = null;\r\n\r\n\t\t\t_allAlleleCounts = null;\r\n\t\t\t_afrAlleleCounts = null;\r\n\t\t\t_amrAlleleCounts = null;\r\n\t\t\t_eurAlleleCounts = null;\r\n\t\t\t_easAlleleCounts = null;\r\n\t\t\t_sasAlleleCounts = null;\r\n\r\n\t\t\t// SV fields\r\n\t    }\r\n\r\n\t    public IEnumerable<OneKGenItem> GetItems()\r\n        {\r\n            using (var reader = FileUtilities.GetStreamReader(_stream))\r\n            {\r\n                string line;\r\n                while ((line = reader.ReadLine()) != null)\r\n                {\r\n                    // Skip empty lines.\r\n                    if (string.IsNullOrWhiteSpace(line)) continue;\r\n\r\n                    // Skip comments.\r\n                    if (line.OptimizedStartsWith('#')) continue;\r\n                    \r\n\t                foreach (var oneKGenItem in ExtractItems(line))\r\n\t                {\r\n\t\t\t\t\t\tyield return oneKGenItem;\r\n\t                }\r\n\t\t\t\t\t\r\n                }\r\n            }\r\n        }\r\n\r\n        internal IEnumerable<OneKGenItem> ExtractItems(string vcfLine)\r\n        {\r\n            var splitLine = vcfLine.OptimizedSplit('\\t');// we don't care about the many fields after info field\r\n            if (splitLine.Length < 8) yield break;\r\n\r\n            Clear();\r\n\t\t\t\r\n            var chromosomeName  = splitLine[VcfCommon.ChromIndex];\r\n            if (!_refNameDictionary.ContainsKey(chromosomeName)) yield break;\r\n            var chromosome = _refNameDictionary[chromosomeName];\r\n            var position   = int.Parse(splitLine[VcfCommon.PosIndex]);//we have to get it from RSPOS in info\r\n            var refAllele  = splitLine[VcfCommon.RefIndex];\r\n            var altAlleles = splitLine[VcfCommon.AltIndex].OptimizedSplit(',');\r\n            var infoFields = splitLine[VcfCommon.InfoIndex];\r\n\r\n            // parses the info fields and extract frequencies, ancestral allele, allele counts, etc.\r\n            var hasSymbolicAllele = altAlleles.Any(x => x.OptimizedStartsWith('<') && x.OptimizedEndsWith('>'));\r\n\t        if (hasSymbolicAllele) yield break;\r\n\r\n            // ReSharper disable once ConditionIsAlwaysTrueOrFalse\r\n\t\t\tParseInfoField(infoFields, hasSymbolicAllele);\r\n\r\n\t        for (var i = 0; i < altAlleles.Length; i++)\r\n            {\r\n                var (shiftedPos, shiftedRef, shiftedAlt) = VariantUtils.TrimAndLeftAlign(position, refAllele,\r\n                    altAlleles[i], _sequenceProvider.Sequence);\r\n\r\n                yield return new OneKGenItem(\r\n\t\t\t\t\tchromosome,\r\n\t\t\t\t\tshiftedPos,\r\n\t\t\t\t\tshiftedRef,\r\n\t\t\t\t\tshiftedAlt,\r\n                    _ancestralAllele,\r\n\t\t\t\t\tGetAlleleCount(_allAlleleCounts, i),\r\n\t\t\t\t\tGetAlleleCount(_afrAlleleCounts,i),\r\n\t\t\t\t\tGetAlleleCount(_amrAlleleCounts,i),\r\n\t\t\t\t\tGetAlleleCount(_eurAlleleCounts,i),\r\n\t\t\t\t\tGetAlleleCount(_easAlleleCounts,i),\r\n\t\t\t\t\tGetAlleleCount(_sasAlleleCounts, i),\r\n\t\t\t\t\t_allAlleleNumber,\r\n\t\t\t\t\t_afrAlleleNumber,\r\n\t\t\t\t\t_amrAlleleNumber,\r\n\t\t\t\t\t_eurAlleleNumber,\r\n\t\t\t\t\t_easAlleleNumber,\r\n\t\t\t\t\t_sasAlleleNumber\r\n\t\t\t\t\t);\r\n\r\n                \r\n\t\t\t}\r\n\t\t\t\r\n        }\r\n\r\n\t    private static int? GetAlleleCount(int[] alleleCounts, int i)\r\n\t    {\r\n\t\t    if (alleleCounts == null) return null;\r\n\t\t    if (i >= alleleCounts.Length) return null;\r\n\t\t    return alleleCounts[i];\r\n\t    }\r\n\r\n        private void ParseInfoField(string infoFields, bool hasSymbolicAllele)\r\n        {\r\n            if (infoFields == \"\" || infoFields == \".\") return;\r\n            var infoItems = infoFields.OptimizedSplit(';');\r\n\r\n            foreach (string infoItem in infoItems)\r\n            {\r\n                (string key, string value) = infoItem.OptimizedKeyValue();\r\n\r\n                // sanity check\r\n                if (value != null) SetInfoField(key, value, hasSymbolicAllele);\r\n            }\r\n        }\r\n\r\n        private  void SetInfoField(string vcfAfId, string value, bool hasSymbolicAllele)\r\n\t\t{\r\n\t\t\tswitch (vcfAfId)\r\n\t\t\t{\r\n\t\t\t\tcase \"AA\":\r\n\t\t\t\t\t_ancestralAllele = GetAncestralAllele(value);\r\n\t\t\t\t\tbreak;\r\n\t\t\t\t// the following are for SVs\r\n\t\t\t\tcase \"SVTYPE\":\r\n\t\t\t\t\tif (hasSymbolicAllele)\r\n\t\t\t\t\t{\r\n\t\t\t\t\t}\r\n\r\n\t\t\t\t    break;\r\n\t\t\t\tcase \"END\":\r\n\t\t\t\t\tif (hasSymbolicAllele)\r\n\t\t\t\t\t{\r\n\t\t\t\t\t}\r\n\r\n\t\t\t\t    break;\r\n\t\t\t\tcase \"CIEND\":\r\n\t\t\t\tcase \"CIPOS\":\r\n\t\t\t\t\tbreak;\r\n\t\t\t\tcase \"AN\":\r\n\t\t\t\t\t_allAlleleNumber = Convert.ToInt32(value);\r\n\t\t\t\t\tbreak;\r\n\t\t\t\tcase \"AFR_AN\":\r\n\t\t\t\t\t_afrAlleleNumber = Convert.ToInt32(value);\r\n\t\t\t\t\tbreak;\r\n\t\t\t\tcase \"AMR_AN\":\r\n\t\t\t\t\t_amrAlleleNumber = Convert.ToInt32(value);\r\n\t\t\t\t\tbreak;\r\n\t\t\t\tcase \"EUR_AN\":\r\n\t\t\t\t\t_eurAlleleNumber = Convert.ToInt32(value);\r\n\t\t\t\t\tbreak;\r\n\t\t\t\tcase \"EAS_AN\":\r\n\t\t\t\t\t_easAlleleNumber = Convert.ToInt32(value);\r\n\t\t\t\t\tbreak;\r\n\t\t\t\tcase \"SAS_AN\":\r\n\t\t\t\t\t_sasAlleleNumber = Convert.ToInt32(value);\r\n\t\t\t\t\tbreak;\r\n\t\t\t\tcase \"AC\":\r\n\t\t\t\t\t_allAlleleCounts = value.OptimizedSplit(',').Select(val => Convert.ToInt32(val)).ToArray();\r\n\t\t\t\t\tbreak;\r\n\t\t\t\tcase \"AMR_AC\":\r\n\t\t\t\t\t_amrAlleleCounts = value.OptimizedSplit(',').Select(val => Convert.ToInt32(val)).ToArray();\r\n\t\t\t\t\tbreak;\r\n\t\t\t\tcase \"AFR_AC\":\r\n\t\t\t\t\t_afrAlleleCounts = value.OptimizedSplit(',').Select(val => Convert.ToInt32(val)).ToArray();\r\n\t\t\t\t\tbreak;\r\n\t\t\t\tcase \"EUR_AC\":\r\n\t\t\t\t\t_eurAlleleCounts = value.OptimizedSplit(',').Select(val => Convert.ToInt32(val)).ToArray();\r\n\t\t\t\t\tbreak;\r\n\t\t\t\tcase \"EAS_AC\":\r\n\t\t\t\t\t_easAlleleCounts = value.OptimizedSplit(',').Select(val => Convert.ToInt32(val)).ToArray();\r\n\t\t\t\t\tbreak;\r\n\t\t\t\tcase \"SAS_AC\":\r\n\t\t\t\t\t_sasAlleleCounts = value.OptimizedSplit(',').Select(val => Convert.ToInt32(val)).ToArray();\r\n\t\t\t\t\tbreak;\r\n\t\t\t}\r\n\t\t}\r\n\r\n\t\tprivate static string GetAncestralAllele(string value)\r\n\t\t{\r\n\t\t\tif (value == \"\" || value == \".\") return null;\r\n\r\n\t\t\tvar ancestralAllele = value.OptimizedSplit('|')[0];\r\n\t\t\tif (string.IsNullOrEmpty(ancestralAllele)) return null;\r\n\t\t\treturn ancestralAllele.All(IsNucleotide) ? ancestralAllele : null;\r\n\t\t}\r\n\t\tprivate static bool IsNucleotide(char c)\r\n\t\t{\r\n\t\t\tc = char.ToUpper(c);\r\n\t\t\treturn c == 'A' || c == 'C' || c == 'G' || c == 'T' || c == 'N';\r\n\t\t}\r\n\r\n        public void Dispose()\r\n        {\r\n            _stream?.Dispose();\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "SAUtils/InputFileParsers/OneKGen/RefMinorReader.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing System.Linq;\nusing Genome;\nusing OptimizedCore;\nusing SAUtils.DataStructures;\nusing VariantAnnotation.Interface.IO;\nusing VariantAnnotation.Interface.Providers;\nusing Variants;\n\nnamespace SAUtils.InputFileParsers.OneKGen\n{\n    public sealed class RefMinorReader:IDisposable\n    {\n        private readonly StreamReader _reader;\n        private readonly Dictionary<string, Chromosome> _refNameDictionary;\n        private readonly ISequenceProvider _sequenceProvider;\n\n        private int? _allAlleleNumber;\n        private int[] _allAlleleCounts;\n        \n        public RefMinorReader(StreamReader reader, ISequenceProvider sequenceProvider)\n        {\n            _reader = reader;\n            _sequenceProvider = sequenceProvider;\n            _refNameDictionary = sequenceProvider.RefNameToChromosome;\n        }\n\n        private void Clear()\n        {\n            _allAlleleNumber = null;\n            _allAlleleCounts = null;\n        }\n\n        public IEnumerable<AlleleFrequencyItem> GetItems()\n        {\n            using (var reader = _reader)\n            {\n                string line;\n                while ((line = reader.ReadLine()) != null)\n                {\n                    // Skip empty lines.\n                    if (string.IsNullOrWhiteSpace(line)) continue;\n                    // Skip comments.\n                    if (line.OptimizedStartsWith('#')) continue;\n                    var items = ExtractItems(line);\n                    if (items == null) continue;\n                    foreach (var item in items)\n                    {\n                        yield return item;\n                    }\n\n                }\n            }\n        }\n\n        private List<AlleleFrequencyItem> ExtractItems(string vcfLine)\n        {\n            var splitLine = vcfLine.Split(new[] { '\\t' }, 9);// we don't care about the many fields after info field\n            if (splitLine.Length < 8) return null;\n\n            Clear();\n\n            var chromosomeName = splitLine[VcfCommon.ChromIndex];\n            if (!_refNameDictionary.ContainsKey(chromosomeName)) return null;\n\n            var chromosome = _refNameDictionary[chromosomeName];\n            var position   = int.Parse(splitLine[VcfCommon.PosIndex]);//we have to get it from RSPOS in info\n            var refAllele  = splitLine[VcfCommon.RefIndex];\n            var altAlleles = splitLine[VcfCommon.AltIndex].OptimizedSplit(',');\n            var infoFields = splitLine[VcfCommon.InfoIndex];\n\n            // parses the info fields and extract frequencies, ancestral allele, allele counts, etc.\n            ParseInfoField(infoFields);\n            if (_allAlleleNumber == null) return null;\n\n            var items = new List<AlleleFrequencyItem>();\n\n            for (var i = 0; i < altAlleles.Length; i++)\n            {\n                var alleleCount = GetAlleleCount(_allAlleleCounts, i);\n                if (alleleCount == null || alleleCount==0) continue;\n\n                var frequency = 1.0* alleleCount.Value/ _allAlleleNumber.Value ;\n\n                var (shiftedPos, shiftedRef, shiftedAlt) = VariantUtils.TrimAndLeftAlign(position, refAllele,\n                    altAlleles[i], _sequenceProvider.Sequence);\n\n                items.Add(new AlleleFrequencyItem(chromosome, shiftedPos,shiftedRef, shiftedAlt, frequency, vcfLine));\n            }\n\n            return items.Count>0? items: null;\n        }\n\n        private static int? GetAlleleCount(int[] alleleCounts, int i)\n        {\n            if (alleleCounts == null) return null;\n            if (i >= alleleCounts.Length) return null;\n            return alleleCounts[i];\n        }\n\n\n        private void ParseInfoField(string infoFields)\n        {\n            if (infoFields == \"\" || infoFields == \".\") return;\n            var infoItems = infoFields.OptimizedSplit(';');\n\n            foreach (string infoItem in infoItems)\n            {\n                (string key, string value) = infoItem.OptimizedKeyValue();\n\n                switch (key)\n                {\n                    case \"AN\":\n                        _allAlleleNumber = Convert.ToInt32(value);\n                        break;\n                    case \"AC\":\n                        _allAlleleCounts = value.OptimizedSplit(',').Select(val => Convert.ToInt32(val)).ToArray();\n                        break;\n                }\n            }\n        }\n\n\n        public void Dispose()\n        {\n            _reader?.Dispose();\n        }\n    }\n}\n"
  },
  {
    "path": "SAUtils/InputFileParsers/OneKGen/oneKGenSvReader.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Linq;\r\nusing Genome;\r\nusing OptimizedCore;\r\nusing SAUtils.DataStructures;\r\nusing SAUtils.ParseUtils;\r\n\r\nnamespace SAUtils.InputFileParsers.OneKGen\r\n{\r\n\tpublic sealed class OneKGenSvReader:IDisposable\r\n    {\r\n        private const int ChromIndex = 0;\r\n        private const int StartIndex = 1;\r\n        private const int EndIndex = 2;\r\n        private const int IdIndex = 3;\r\n        private const int AltIndex = 4;\r\n        private const int InfoIndex = 5;\r\n\r\n        private readonly StreamReader _reader;\r\n\t    private readonly Dictionary<string, Chromosome> _refNameDict;\r\n\r\n\t    private string _svType;\r\n\r\n        private int? _allAlleleNumber;\r\n\t    private int? _allAlleleCount;\r\n        private double? _allAlleleFrequency;\r\n\t    private double? _afrAlleleFrequency;\r\n\t    private double? _amrAlleleFrequency;\r\n\t    private double? _eurAlleleFrequency;\r\n\t    private double? _easAlleleFrequency;\r\n\t    private double? _sasAlleleFrequency;\r\n\r\n\r\n        public OneKGenSvReader(StreamReader reader, Dictionary<string, Chromosome> refNameDict)\r\n\t\t{\r\n\t\t\t_reader = reader;\r\n\t\t    _refNameDict  = refNameDict;\r\n\t\t}\r\n\r\n\t\tpublic IEnumerable<OnekGenSvItem> GetItems()\r\n\t\t{\r\n\t\t    string line;\r\n\t\t    while ((line = _reader.ReadLine()) != null)\r\n\t\t    {\r\n                // Skip empty lines.\r\n                if (string.IsNullOrWhiteSpace(line)) continue;\r\n\r\n                // Skip comments.\r\n                if (line.OptimizedStartsWith('#')) continue;\r\n\t\t        var oneKSvGenItem = ExtractOneKGenSvItem(line);\r\n\t\t        if (oneKSvGenItem == null) continue;\r\n\t\t        yield return oneKSvGenItem;\r\n\r\n\t\t    }\r\n        }\r\n\t    private void Clear()\r\n\t    {\r\n\t        _allAlleleNumber = null;\r\n\t        _allAlleleFrequency = null;\r\n\t        _afrAlleleFrequency = null;\r\n\t        _amrAlleleFrequency = null;\r\n\t        _eurAlleleFrequency = null;\r\n\t        _easAlleleFrequency = null;\r\n\t        _sasAlleleFrequency = null;\r\n\r\n\t        _svType = null;\r\n\t    }\r\n\r\n        private OnekGenSvItem ExtractOneKGenSvItem(string line)\r\n\t\t{\r\n\t\t    var splitLine = line.OptimizedSplit('\\t');\r\n            string altAllele = splitLine[AltIndex];\r\n            if (altAllele.StartsWith(\"<INS:ME:\")) return null;\r\n\r\n            string chromosomeName = splitLine[ChromIndex];\r\n\t\t    if (!_refNameDict.ContainsKey(chromosomeName)) return null;\r\n\t\t    var chromosome = _refNameDict[chromosomeName];\r\n\t\t    int start = int.Parse(splitLine[StartIndex]) + 1; // start is 0-based in BED format\r\n            int end = int.Parse(splitLine[EndIndex]);\r\n\t\t    string id = RemoveMissingValues(splitLine[IdIndex]);\r\n\r\n            string infoFields = splitLine[InfoIndex];\r\n            Clear();\r\n\t\t    ParseInfoField(infoFields);\r\n\r\n\t\t    var variantType = SaParseUtilities.GetSequenceAlteration(_svType);\r\n            return new OnekGenSvItem(chromosome, start, end, variantType, id,  \r\n\t\t\t\t_allAlleleNumber, _allAlleleCount,\r\n                _allAlleleFrequency, _afrAlleleFrequency, _amrAlleleFrequency, _easAlleleFrequency, _eurAlleleFrequency, _sasAlleleFrequency);\r\n\t\t}\r\n\r\n        private static string RemoveMissingValues(string idField)\r\n        {\r\n            var ids = idField.OptimizedSplit(';');\r\n            return string.Join(';', ids.Where(id => id != \".\"));\r\n        }\r\n\r\n        private void ParseInfoField(string infoFields)\r\n\t    {\r\n\t        if (infoFields == \"\" || infoFields == \".\") return;\r\n\t        var infoItems = infoFields.OptimizedSplit(';');\r\n\r\n\t        foreach (string infoItem in infoItems)\r\n\t        {\r\n\t            (string key, string value) = infoItem.OptimizedKeyValue();\r\n\r\n\t            // sanity check\r\n\t            if (value != null) SetInfoField(key, value);\r\n\t        }\r\n\t    }\r\n        //1       668630  esv3584976      G       <CN2>   100     PASS    AC=64;AF=0.0127796;AN=5008;CIEND=-150,150;CIPOS=-150,150;CS=DUP_delly;END=850204;NS=2504;SVTYPE=DUP;IMPRECISE;DP=22135;EAS_AF=0.0595;AMR_AF=0;AFR_AF=0.0015;EUR_AF=0.001;SAS_AF=0.001;VT=SV;EX_TARGET\r\n        private void SetInfoField(string vcfAfId, string value)\r\n\t    {\r\n\t        switch (vcfAfId)\r\n\t        {\r\n\t            case \"SVTYPE\":\r\n\t                _svType = value;// for SVs there is only one value in SVTYPE\r\n\t                break;\r\n                case \"AN\":\r\n\t                _allAlleleNumber = Convert.ToInt32(value);\r\n\t                break;\r\n\t            case \"AC\":\r\n\t                _allAlleleCount = value.OptimizedSplit(',').Select(val => Convert.ToInt32(val)).Sum();\r\n\t                break;\r\n                case \"AF\":\r\n\t                _allAlleleFrequency = value.OptimizedSplit(',').Select(Convert.ToDouble).Sum();\r\n\t                break;\r\n\t            case \"AMR_AF\":\r\n\t                _amrAlleleFrequency = value.OptimizedSplit(',').Select(Convert.ToDouble).Sum();\r\n                    break;\r\n\t            case \"AFR_AF\":\r\n\t                _afrAlleleFrequency = value.OptimizedSplit(',').Select(Convert.ToDouble).Sum();\r\n                    break;\r\n\t            case \"EUR_AF\":\r\n\t                _eurAlleleFrequency = value.OptimizedSplit(',').Select(Convert.ToDouble).Sum();\r\n                    break;\r\n\t            case \"EAS_AF\":\r\n\t                _easAlleleFrequency = value.OptimizedSplit(',').Select(Convert.ToDouble).Sum();\r\n                    break;\r\n\t            case \"SAS_AF\":\r\n\t                _sasAlleleFrequency = value.OptimizedSplit(',').Select(Convert.ToDouble).Sum();\r\n                    break;\r\n\t        }\r\n\t    }\r\n\r\n\t    public void Dispose()\r\n\t    {\r\n\t        _reader?.Dispose();\r\n\t    }\r\n\t}\r\n}"
  },
  {
    "path": "SAUtils/InputFileParsers/SequenceExtensions.cs",
    "content": "﻿using Genome;\r\n\r\nnamespace SAUtils.InputFileParsers\r\n{\r\n    public static class SequenceExtensions\r\n    {\r\n        public static bool Validate( this ISequence referenceSequence, int start, int end, string testSequence)\r\n        {\r\n            var expSequence = referenceSequence.Substring(start - 1, end - start + 1);\r\n            return testSequence == expSequence;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "SAUtils/InputFileParsers/TOPMed/TopMedReader.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing Genome;\r\nusing OptimizedCore;\r\nusing SAUtils.DataStructures;\r\nusing VariantAnnotation.Interface.IO;\r\nusing VariantAnnotation.Interface.Providers;\r\nusing Variants;\r\n\r\nnamespace SAUtils.InputFileParsers.TOPMed\r\n{\r\n    public sealed class TopMedReader : IDisposable\r\n    {\r\n        private readonly StreamReader _reader;\r\n        private readonly Dictionary<string, Chromosome> _refChromDict;\r\n        private readonly ISequenceProvider _sequenceProvider;\r\n\r\n        private int? _alleleNum;\r\n        private int? _alleleCount;\r\n        private int? _homCount;\r\n\r\n        public TopMedReader(StreamReader streamReader, ISequenceProvider sequenceProvider)\r\n        {\r\n            _reader       = streamReader;\r\n            _sequenceProvider = sequenceProvider;\r\n            _refChromDict = sequenceProvider.RefNameToChromosome;\r\n        }\r\n\r\n        private void Clear()\r\n        {\r\n            _alleleNum    = null;\r\n            _alleleCount  = null;\r\n            _homCount     = null;\r\n        }\r\n\r\n        public IEnumerable<TopMedItem> GetItems()\r\n        {\r\n            using (_reader)\r\n            {\r\n                string line;\r\n                while ((line = _reader.ReadLine()) != null)\r\n                {\r\n                    if (string.IsNullOrWhiteSpace(line) || line.OptimizedStartsWith('#')) continue;\r\n\r\n                    var topMedItem = ExtractItems(line);\r\n                    if (topMedItem == null) continue;\r\n                    yield return topMedItem;\r\n                }\r\n            }\r\n        }\r\n\r\n        private TopMedItem ExtractItems(string vcfLine)\r\n        {\r\n            if (vcfLine == null) return null;\r\n            var splitLine = vcfLine.OptimizedSplit('\\t');\r\n\r\n            if (splitLine.Length < 8) return null;\r\n\r\n            Clear();\r\n            // chr1    10169   TOPMed_freeze_5?chr1:10,169     T       C       255     SVM     VRT=1;NS=62784;AN=125568;AC=20;AF=0.000159276;Het=20;Hom=0      NA:FRQ  125568:0.000159276\r\n\r\n            var chromosome = splitLine[VcfCommon.ChromIndex];\r\n            if (!_refChromDict.ContainsKey(chromosome)) return null;\r\n\r\n            var chrom      = _refChromDict[chromosome];\r\n            var position   = int.Parse(splitLine[VcfCommon.PosIndex]);//we have to get it from RSPOS in info\r\n            var refAllele  = splitLine[VcfCommon.RefIndex];\r\n            var altAllele  = splitLine[VcfCommon.AltIndex];\r\n            var filters    = splitLine[VcfCommon.FilterIndex];\r\n            var infoFields = splitLine[VcfCommon.InfoIndex];\r\n\r\n            if (altAllele.Contains(\",\"))\r\n            {\r\n                Console.WriteLine(vcfLine);\r\n                throw new InvalidDataException(\"het site found!!\");\r\n            }\r\n\r\n            var failedFilter = !(filters.Equals(\"PASS\") || filters.Equals(\".\"));\r\n\r\n            ParseInfoField(infoFields);\r\n\r\n            if (_alleleNum == 0) return null;\r\n            var (shiftedPos, shiftedRef, shiftedAlt) = VariantUtils.TrimAndLeftAlign(position, refAllele,\r\n                altAllele, _sequenceProvider.Sequence);\r\n\r\n            return new TopMedItem(chrom, shiftedPos, shiftedRef, shiftedAlt, _alleleNum, _alleleCount, _homCount,\r\n                failedFilter);\r\n        }\r\n\r\n        private void ParseInfoField(string infoFields)\r\n        {\r\n            if (infoFields == \"\" || infoFields == \".\") return;\r\n            var infoItems = infoFields.OptimizedSplit(';');\r\n\r\n            foreach (string infoItem in infoItems)\r\n            {\r\n                (string key, string value) = infoItem.OptimizedKeyValue();\r\n\r\n                // sanity check\r\n                if (value != null) SetInfoField(key, value);\r\n            }\r\n        }\r\n\r\n        private void SetInfoField(string vcfId, string value)\r\n        {\r\n            // VRT=1;NS=62784;AN=125568;AC=20;AF=0.000159276;Het=20;Hom=0\r\n            switch (vcfId)\r\n            {\r\n                case \"AN\":\r\n                    _alleleNum = Convert.ToInt32(value);\r\n                    break;\r\n                case \"AC\":\r\n                    _alleleCount = Convert.ToInt32(value);\r\n                    break;\r\n                case \"Hom\":\r\n                    _homCount = Convert.ToInt32(value);\r\n                    break;\r\n            }\r\n        }\r\n\r\n        public void Dispose() => _reader?.Dispose();\r\n    }\r\n}"
  },
  {
    "path": "SAUtils/MakeAaDb/Main.cs",
    "content": "﻿using System.IO;\nusing CommandLine.Builders;\nusing CommandLine.NDesk.Options;\nusing Compression.Utilities;\nusing ErrorHandling;\nusing IO;\nusing SAUtils.InputFileParsers;\nusing VariantAnnotation.Providers;\nusing VariantAnnotation.SA;\n\nnamespace SAUtils.MakeAaDb\n{\n    public static class Main\n    {\n        private static string _inputFile;\n        private static string _compressedReference;\n        private static string _outputDirectory;\n\n        public static ExitCodes Run(string command, string[] commandArgs)\n        {\n            var ops = new OptionSet\n            {\n                {\n                     \"ref|r=\",\n                     \"compressed reference sequence file\",\n                     v => _compressedReference = v\n                 },\n                {\n                    \"in|i=\",\n                    \"input VCF file path\",\n                    v => _inputFile = v\n                },\n                {\n                    \"out|o=\",\n                    \"output directory\",\n                    v => _outputDirectory = v\n                }\n            };\n\n            string commandLineExample = $\"{command} [options]\";\n\n            var exitCode = new ConsoleAppBuilder(commandArgs, ops)\n                .Parse()\n                .CheckInputFilenameExists(_compressedReference, \"compressed reference sequence file name\", \"--ref\")\n                .HasRequiredParameter(_inputFile, \"OneK Gen VCFfile\", \"--in\")\n                .CheckInputFilenameExists(_inputFile, \"OneK Gen VCFfile\", \"--in\")\n                .HasRequiredParameter(_outputDirectory, \"output directory\", \"--out\")\n                .CheckDirectoryExists(_outputDirectory, \"output directory\", \"--out\")\n                .SkipBanner()\n                .ShowHelpMenu(\"create Ancestral allele database from 1000Genomes data\", commandLineExample)\n                .ShowErrors()\n                .Execute(ProgramExecution);\n\n            return exitCode;\n        }\n\n        private static ExitCodes ProgramExecution()\n        {\n            var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));\n            var version = DataSourceVersionReader.GetSourceVersion(_inputFile + \".version\");\n            \n            string outFileName = $\"{version.Name}_{version.Version}_ancestralAlleles\".Replace(' ','_');\n            using (var ancestralAlleleReader = new AncestralAlleleReader(GZipUtilities.GetAppropriateStreamReader(_inputFile), referenceProvider))\n            using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix)))\n            using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSuffix)))\n            using (var writer = new NsaWriter(nsaStream, indexStream, version, referenceProvider, SaCommon.AncestralAlleleTag, true, false, SaCommon.SchemaVersion, true))\n            {\n                writer.Write(ancestralAlleleReader.GetItems());\n            }\n\n            return ExitCodes.Success;\n        }\n    }\n\n}"
  },
  {
    "path": "SAUtils/MakeClinGenDb/Main.cs",
    "content": "﻿using System.IO;\nusing CommandLine.Builders;\nusing CommandLine.NDesk.Options;\nusing Compression.Utilities;\nusing ErrorHandling;\nusing IO;\nusing SAUtils.InputFileParsers;\nusing SAUtils.InputFileParsers.ClinGen;\nusing VariantAnnotation.Interface.SA;\nusing VariantAnnotation.NSA;\nusing VariantAnnotation.Providers;\nusing VariantAnnotation.SA;\n\nnamespace SAUtils.MakeClinGenDb\n{\n    public static class Main\n    {\n        private static string _inputFileName;\n        private static string _compressedReference;\n        private static string _outputDirectory;\n        public static ExitCodes Run(string command, string[] commandArgs)\n        {\n            var ops = new OptionSet\n            {\n                {\n                    \"ref|r=\",\n                    \"compressed reference sequence file\",\n                    v => _compressedReference = v\n                },\n                {\n                    \"in|i=\",\n                    \"ClinGen VCFfile\",\n                    v => _inputFileName = v\n                },\n                {\n                    \"out|o=\",\n                    \"output directory\",\n                    v => _outputDirectory = v\n                }\n            };\n\n            string commandLineExample = $\"{command} [options]\";\n\n            var exitCode = new ConsoleAppBuilder(commandArgs, ops)\n                .Parse()\n                .CheckInputFilenameExists(_compressedReference, \"compressed reference sequence file name\", \"--ref\")\n                .HasRequiredParameter(_inputFileName, \"ClinGen VCFfile\", \"--in\")\n                .CheckInputFilenameExists(_inputFileName, \"ClinGen VCFfile\", \"--in\")\n                .HasRequiredParameter(_outputDirectory, \"output directory\", \"--out\")\n                .CheckDirectoryExists(_outputDirectory, \"output directory\", \"--out\")\n                .SkipBanner()\n                .ShowHelpMenu(\"Creates a supplementary database with ClinVar annotations\", commandLineExample)\n                .ShowErrors()\n                .Execute(ProgramExecution);\n\n            return exitCode;\n        }\n\n        private static ExitCodes ProgramExecution()\n        {\n            var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));\n            var version = DataSourceVersionReader.GetSourceVersion(_inputFileName + \".version\");\n            string outFileName = $\"{version.Name}_{version.Version}\";\n\n            using (var clinGenReader = new ClinGenReader(GZipUtilities.GetAppropriateStreamReader(_inputFileName), referenceProvider.RefNameToChromosome))\n            using (var nsiStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.IntervalFileSuffix)))\n            using (var nsiWriter = new NsiWriter(nsiStream, version, referenceProvider.Assembly, SaCommon.ClinGenTag, ReportFor.StructuralVariants, SaCommon.SchemaVersion))\n            {\n                nsiWriter.Write(clinGenReader.GetItems());\n            }\n\n            return ExitCodes.Success;\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/MitoHeteroplasmy/MitoHeteroplasmyDb.cs",
    "content": "﻿using System.IO;\nusing CommandLine.Builders;\nusing CommandLine.NDesk.Options;\nusing Compression.Utilities;\nusing ErrorHandling;\nusing IO;\n\nnamespace SAUtils.MitoHeteroplasmy\n{\n    public static class MitoHeteroplasmyDb\n    {\n        private static string _inputFile;\n        private static string _outputDirectory;\n        private const string OutFileName = \"MitoHeteroplasmy.tsv\";\n        private const string HeaderLine = \"#POS\\tREF\\tALT\\tVRFs\\tAlleleDepths\";\n\n        public static ExitCodes Run(string command, string[] commandArgs)\n\n        {\n            var ops = new OptionSet\n            {\n                {\n                    \"in|i=\",\n                    \"input BED file path\",\n                    v => _inputFile = v\n                },\n                {\n                    \"out|o=\",\n                    \"output directory\",\n                    v => _outputDirectory = v\n                }\n            };\n\n            string commandLineExample = $\"{command} [options]\";\n\n            var exitCode = new ConsoleAppBuilder(commandArgs, ops)\n                .Parse()\n                .CheckInputFilenameExists(_inputFile, \"Mitochondrial Heteroplasmy BED file\", \"--in\")\n                .CheckDirectoryExists(_outputDirectory, \"output directory\", \"--out\")\n                .SkipBanner()\n                .ShowHelpMenu(\"Creates a TSV file with mitochondrial heteroplasmy information\", commandLineExample)\n                .ShowErrors()\n                .Execute(ProgramExecution);\n\n            return exitCode;\n        }\n\n        private static ExitCodes ProgramExecution()\n        {\n            using var mitoHeteroplasmyParser = new MitoHeteroplasmyParser(GZipUtilities.GetAppropriateReadStream(_inputFile));\n            using var tsvStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, OutFileName));\n            using var tsvWriter = new StreamWriter(tsvStream);\n            tsvWriter.WriteLine(HeaderLine);\n            foreach(var line in mitoHeteroplasmyParser.GetOutputLines())\n                tsvWriter.WriteLine(line);\n\n            return ExitCodes.Success;\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/MitoHeteroplasmy/MitoHeteroplasmyParser.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing System.Linq;\nusing IO;\nusing Newtonsoft.Json;\nusing OptimizedCore;\n\nnamespace SAUtils.MitoHeteroplasmy\n{\n    public sealed class MitoHeteroplasmyParser : IDisposable\n    {\n        private readonly Stream _stream;\n\n        public MitoHeteroplasmyParser(Stream stream)\n        {\n            _stream = stream;\n        }\n\n        public void Dispose()\n        {\n            _stream?.Dispose();\n        }\n\n        public IEnumerable<string> GetOutputLines()\n        {\n            using var reader = FileUtilities.GetStreamReader(_stream);\n            string line;\n            while ((line = reader.ReadLine()) != null)\n            {\n                // Skip empty lines.\n                if (string.IsNullOrWhiteSpace(line)) continue;\n\n                // Skip comments, headers\n                if (line.OptimizedStartsWith('#')) continue;\n\n                foreach (string item in ExtractItems(line))\n                    yield return item;\n            }\n        }\n\n        //MT      5       6       {\"C:A\":{\"ad\":[1],\"allele_type\":\"alt\",\"vrf\":[0.006329113924050633],\"vrf_stats\":{\"kurtosis\":241.00408163265314,\"max\":0.0063291139240506328,\"mean\":2.5728105382319646e-05,\"min\":0.0,\"nobs\":246,\"skewness\":15.588588185998534,\"stdev\":0.00040352956522996095,\"variance\":1.6283611001468132e-07}}}\n        private static IEnumerable<string> ExtractItems(string line)\n        {\n            var splits = line.Split('\\t');\n            if (splits.Length < 4) yield break;\n\n            var position = int.Parse(splits[1]) + 1; // since this is a bed file\n            var info = splits[3];\n            var stats = DeserializeStats(info);\n\n            foreach ((string refAllele, string altAllele, AlleleStats alleleStats) in GetAlleleStats(stats))\n            {\n                (string formattedVrfs, string alleleDepths) = MergeAndSortByVrf(alleleStats);\n                yield return string.Join('\\t', position, refAllele, altAllele, formattedVrfs, alleleDepths);\n            }\n\n        }\n\n        private static (string formattedVrfs, string alleleDepths) MergeAndSortByVrf(AlleleStats alleleStats)\n        {\n            var vrfToAd = new Dictionary<string, int>();\n            foreach ((string vrf, int ad) in alleleStats.vrf.Select(x => x.ToString(\"0.###\"))\n                                                            .Zip(alleleStats.ad, (a, b) => (a, b)))\n            {\n                if (vrfToAd.ContainsKey(vrf)) vrfToAd[vrf] += ad;\n                else vrfToAd[vrf] = ad;\n            }\n\n            var formattedVrfs = new string[vrfToAd.Count];\n            var alleleDepths = new int[vrfToAd.Count];\n            var i = 0;\n            foreach (var vrf in vrfToAd.Keys.OrderBy(x => double.Parse(x)))\n            {\n                formattedVrfs[i] = vrf;\n                alleleDepths[i] = vrfToAd[vrf];\n                i++;\n            }\n\n            return (string.Join(',',formattedVrfs), string.Join(',', alleleDepths));\n        }\n\n        private static IEnumerable<(string, string, AlleleStats)> GetAlleleStats(PositionStats stats)\n        {\n            if (stats.A_C != null) yield return (\"A\", \"C\", stats.A_C);\n            if (stats.A_G != null) yield return (\"A\", \"G\", stats.A_G);\n            if (stats.A_T != null) yield return (\"A\", \"T\", stats.A_T);\n\n            if (stats.C_A != null) yield return (\"C\", \"A\", stats.C_A);\n            if (stats.C_G != null) yield return (\"C\", \"G\", stats.C_G);\n            if (stats.C_T != null) yield return (\"C\", \"T\", stats.C_T);\n\n            if (stats.G_A != null) yield return (\"G\", \"A\", stats.G_A);\n            if (stats.G_C != null) yield return (\"G\", \"C\", stats.G_C);\n            if (stats.G_T != null) yield return (\"G\", \"T\", stats.G_T);\n\n            if (stats.T_A != null) yield return (\"T\", \"A\", stats.T_A);\n            if (stats.T_C != null) yield return (\"T\", \"C\", stats.T_C);\n            if (stats.T_G != null) yield return (\"T\", \"G\", stats.T_G);\n        }\n\n        public static PositionStats DeserializeStats(string s)\n        {\n            var charArray = s.ToCharArray();\n            for (var i = 0; i < charArray.Length - 3; i++)\n            {\n                if (IsNucleotide(charArray[i])\n                    && charArray[i + 1] == ':'\n                    && IsNucleotide(charArray[i + 2]))\n                    charArray[i + 1] = '_';\n            }\n            return JsonConvert.DeserializeObject<PositionStats>(new string(charArray));\n        }\n\n        private static bool IsNucleotide(char c)\n        {\n            return c == 'A' || c == 'C' || c == 'G' || c == 'T';\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/MitoHeteroplasmy/StatClasses.cs",
    "content": "﻿namespace SAUtils.MitoHeteroplasmy\n{\n    //{\n    //\"ad\": [1],\n    //\"allele_type\": \"alt\",\n    //\"vrf\": [0.004273504273504274],\n    //\"vrf_stats\": {\n    //    \"kurtosis\": 241.00408163265314,\n    //    \"max\": 0.0042735042735042739,\n    //    \"mean\": 1.7371968591480788e-05,\n    //    \"min\": 0.0,\n    //    \"nobs\": 246,\n    //    \"skewness\": 15.588588185998535,\n    //    \"stdev\": 0.00027246868079629845,\n    //    \"variance\": 7.4239182014875175e-08\n    //}\n    //}\n\n    public sealed class PositionStats\n    {\n        public AlleleStats A_C;\n        public AlleleStats A_G;\n        public AlleleStats A_T;\n\n        public AlleleStats C_A;\n        public AlleleStats C_G;\n        public AlleleStats C_T;\n\n        public AlleleStats G_C;\n        public AlleleStats G_A;\n        public AlleleStats G_T;\n\n        public AlleleStats T_C;\n        public AlleleStats T_G;\n        public AlleleStats T_A;\n\n    }\n    \n    public class AlleleStats\n    {\n        public int[] ad;\n        public double[] vrf;\n    }\n}"
  },
  {
    "path": "SAUtils/MitoMap/CircularGenomeModel.cs",
    "content": "﻿using System.Collections.Generic;\nusing Genome;\n\nnamespace SAUtils.MitoMap\n{\n    public sealed class CircularGenomeModel\n    {\n        private readonly int _genomeLength;\n        private readonly ISequence _compressedSequence;\n\n        public CircularGenomeModel(ISequence compressedSequence)\n        {\n            _compressedSequence = compressedSequence;\n            _genomeLength = compressedSequence.Length;\n        }\n\n        // convert linear pseudogenome position back to the circular genome position \n        private (int, int) PseudoToCircular((int, int) interval) =>  (GetCircularPosition(interval.Item1), GetCircularPosition(interval.Item2));\n\n        private int GetCircularPosition(int posi) => (posi - 1) % _genomeLength + 1;\n\n        // translate the genomic interval that may overlap with the origin of the genome, no matter on circular genome or linear pseudo genome,  into interval(s) not crossing the origin\n        private List<(int, int)> SplitInterval((int, int) interval)\n        {\n            var (circularStart, circularEnd) = PseudoToCircular(interval);\n            var intervalList = new List<(int, int)>();\n            if (circularEnd >= circularStart)\n                intervalList.Add((circularStart, circularEnd));\n            else\n            {\n                intervalList.Add((circularStart, _genomeLength));\n                intervalList.Add((1, circularEnd));\n            }\n            return intervalList;\n        }\n\n        public string ExtractIntervalSequence((int, int) interval)\n        {\n            var subSequence = \"\";\n            SplitInterval(interval).ForEach(x => subSequence += _compressedSequence.Substring(x.Item1 - 1, x.Item2 - x.Item1 + 1));\n            return subSequence;\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/MitoMap/MitoMapDatabaseUtilities.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing System.IO.Compression;\nusing OptimizedCore;\n\nnamespace SAUtils.MitoMap\n{\n    internal static class MitoMapDatabaseUtilities\n    {\n        private const string ReferenceQueryPrefix = \"COPY mitomap.reference (\";\n        public static MitoMapInputDb Create(string mitoMapDatabase)\n        {\n            var internalReferenceIdToPubmedId = new Dictionary<string, string>();\n            using (var stream = new FileStream(mitoMapDatabase, FileMode.Open))\n            using(var gzStream = new GZipStream(stream, CompressionMode.Decompress))\n            using (var reader = new StreamReader(gzStream))\n            {\n                string line;\n                MitoMapTable currentTable = 0;\n                while ((line = reader.ReadLine()) != null)\n                {\n                    if (line == \"\\\\.\")\n                    {\n                        currentTable = 0;\n                        continue;\n                    }\n\n                    switch (currentTable)\n                    {\n                        case 0:\n                            currentTable = TryGetTable(line);\n                            continue;\n                        case MitoMapTable.Reference:\n                            ProcessReferenceInfo(line, internalReferenceIdToPubmedId);\n                            break;\n                        default:\n                            throw new ArgumentOutOfRangeException();\n                    }\n                }\n            }\n\n            return new MitoMapInputDb(internalReferenceIdToPubmedId);\n        }\n\n        private static void ProcessReferenceInfo(string line, Dictionary<string, string> internalReferenceIdToPubmedId)\n        {\n            var fields = line.OptimizedSplit('\\t');\n            if (fields.Length != 14) throw new InvalidDataException($\"Invalid reference table record: {line}\");\n            internalReferenceIdToPubmedId[fields[0]] = fields[13];\n        }\n\n        private static MitoMapTable TryGetTable(string line)\n        {\n            return line.StartsWith(ReferenceQueryPrefix) ? MitoMapTable.Reference : 0;\n        }\n    }\n\n    public enum MitoMapTable\n    {\n        Reference = 1\n    }\n}"
  },
  {
    "path": "SAUtils/MitoMap/MitoMapInputDb.cs",
    "content": "﻿using System.Collections.Generic;\n\nnamespace SAUtils.MitoMap\n{\n    public struct MitoMapInputDb\n    {\n        public Dictionary<string, string> InternalReferenceIdToPubmedId { get; }\n\n        public MitoMapInputDb(Dictionary<string, string> internalReferenceIdToPubmedId)\n        {\n            InternalReferenceIdToPubmedId = internalReferenceIdToPubmedId;\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/MitoMap/MitoMapItem.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing System.Linq;\nusing Genome;\nusing OptimizedCore;\nusing VariantAnnotation.Interface.Providers;\nusing VariantAnnotation.Interface.SA;\nusing VariantAnnotation.IO;\n\nnamespace SAUtils.MitoMap\n{\n    public static class MitoMapDataTypes\n    {\n        public const string MitoMapMutationsCodingControl = \"MutationsCodingControl\";\n        public const string MitoMapMutationsRNA = \"MutationsRNA\";\n        public const string MitoMapPolymorphismsCoding = \"PolymorphismsCoding\";\n        public const string MitoMapPolymorphismsControl = \"PolymorphismsControl\";\n        public const string MitoMapDeletionsSingle = \"DeletionsSingle\";\n        public const string MitoMapInsertionsSimple = \"InsertionsSimple\";\n    }\n\n    public static class MitoDLoop\n    {\n        public const int Start = 16024;\n        // ReSharper disable once UnusedMember.Global\n        public const int End = 576;\n    }\n\n    public static class MitomapParsingParameters\n    {\n        public const int LargeDeletionCutoff = 100;\n    }\n\n    public sealed class MitoMapItem : ISupplementaryDataItem\n    {\n        public Chromosome Chromosome { get; }\n        public int Position { get; set; }\n        public string RefAllele { get; set; }\n        public string AltAllele { get; set; }\n        \n        private readonly List<string> _diseases;\n        private readonly bool? _homoplasmy;\n        private readonly bool? _heteroplasmy;\n        private readonly string _status;\n        private readonly string _clinicalSignificance;\n        private readonly string _scorePercentile;\n        private readonly int _numGenBankFullLengthSeqs;\n        private readonly List<string> _pubMedIds;\n\n        public MitoMapItem(Chromosome chromosome, int posi, string refAllele, string altAllele, List<string> diseases, bool? homoplasmy, bool? heteroplasmy, string status, string clinicalSignificance, string scorePercentile, ISequenceProvider sequenceProvider, int numGenBankFullLengthSeqs, List<string> pubMedIds)\n        {\n            Chromosome = chromosome;\n            Position = posi;\n            if (sequenceProvider == null)\n            {\n                RefAllele = refAllele;\n                AltAllele = altAllele;\n            }\n            else\n            {\n                (Position, RefAllele, AltAllele) = TryAddPaddingBase(refAllele, altAllele, Position, sequenceProvider);\n            }\n            _diseases = diseases;\n            _homoplasmy = homoplasmy;\n            _heteroplasmy = heteroplasmy;\n            _status = status;\n            _clinicalSignificance = clinicalSignificance;\n            _scorePercentile = scorePercentile;\n            _numGenBankFullLengthSeqs = numGenBankFullLengthSeqs;\n            _pubMedIds = pubMedIds;\n        }\n\n        private static (int, string, string) TryAddPaddingBase(string refAllele, string altAllele, int position, ISequenceProvider sequenceProvider)\n        {\n            // insertion\n            if (IsEmptyOrDash(refAllele)) return AddPaddingBase(altAllele, true, position, sequenceProvider);\n            // deletion\n            return IsEmptyOrDash(altAllele) ? AddPaddingBase(refAllele, false, position, sequenceProvider) : (position, refAllele, altAllele);\n        }\n\n        private static (int, string, string) AddPaddingBase(string allele, bool isInsertion, int position, ISequenceProvider sequenceProvider)\n        {\n            string paddingBase = sequenceProvider.Sequence.Substring(position - 2, 1);\n            return isInsertion ? (position - 1, paddingBase, paddingBase + allele) : (position - 1, paddingBase + allele, paddingBase);\n        }\n\n        private static bool IsEmptyOrDash(string allele) => string.IsNullOrEmpty(allele) || allele == \"-\";\n\n        public string GetJsonString()\n        {\n            var sb = StringBuilderPool.Get();\n            var jsonObject = new JsonObject(sb);\n\n            if (string.IsNullOrEmpty(RefAllele)) RefAllele = \"-\";\n            if (string.IsNullOrEmpty(AltAllele)) AltAllele = \"-\";\n\n            jsonObject.AddStringValue(\"refAllele\", RefAllele);\n            jsonObject.AddStringValue(\"altAllele\", AltAllele);\n            jsonObject.AddStringValues(\"diseases\", _diseases?.Distinct());\n            if (_homoplasmy.HasValue) jsonObject.AddBoolValue(\"hasHomoplasmy\", _homoplasmy.Value, true); \n            if (_heteroplasmy.HasValue) jsonObject.AddBoolValue(\"hasHeteroplasmy\", _heteroplasmy.Value, true);  \n            jsonObject.AddStringValue(\"status\", _status);\n            jsonObject.AddStringValue(\"clinicalSignificance\", _clinicalSignificance);\n            jsonObject.AddStringValue(\"scorePercentile\", _scorePercentile, false);\n            jsonObject.AddIntValue(\"numGenBankFullLengthSeqs\", _numGenBankFullLengthSeqs);\n            jsonObject.AddStringValues(\"pubMedIds\", _pubMedIds);\n            return StringBuilderPool.GetStringAndReturn(sb);\n        }\n\n        public string InputLine { get; set; }\n\n        public static Dictionary<(string, string), MitoMapItem> AggregatedMutationsSamePosition(IEnumerable<MitoMapItem> mitoMapMutItems)\n        {\n            var aggregatedMutations = new Dictionary<(string, string), MitoMapItem>();\n\n            foreach (var mitoMapMutItem in mitoMapMutItems)\n            {\n                var mutation = (mitoMapMutItem.RefAllele, mitoMapMutItem.AltAllele);\n                if (aggregatedMutations.ContainsKey(mutation))\n                {\n                    var mergedItem = Merge(aggregatedMutations[mutation], mitoMapMutItem);\n                    if (mergedItem == null) continue;\n                    aggregatedMutations[mutation] = mergedItem;\n                }\n                else aggregatedMutations[mutation] = mitoMapMutItem;\n            }\n            return aggregatedMutations;\n        }\n\n        private static MitoMapItem Merge(MitoMapItem mitoMapItem1, MitoMapItem mitoMapItem2)\n        {\n            if (HasConflictValue(mitoMapItem1.Chromosome, mitoMapItem2.Chromosome) || HasConflictValue(mitoMapItem1.Position, mitoMapItem2.Position) ||\n                HasConflictValue(mitoMapItem1.RefAllele, mitoMapItem2.RefAllele) || HasConflictValue(mitoMapItem1.AltAllele, mitoMapItem2.AltAllele) || \n                HasConflictValue(mitoMapItem1._homoplasmy, mitoMapItem2._homoplasmy) || HasConflictValue(mitoMapItem1._heteroplasmy, mitoMapItem2._heteroplasmy) || \n                HasConflictValue(mitoMapItem1._status, mitoMapItem2._status) || HasConflictValue(mitoMapItem1._clinicalSignificance, mitoMapItem2._clinicalSignificance) ||\n                HasConflictValue(mitoMapItem1._scorePercentile, mitoMapItem2._scorePercentile))\n            {\n                throw new InvalidDataException($\"Conflict found at {mitoMapItem1.Position} when updating MITOMAP record: first record: {mitoMapItem1.GetJsonString()}; second record: {mitoMapItem2.GetJsonString()} \");\n            }\n            var homoplasmy = mitoMapItem1._homoplasmy ?? mitoMapItem2._homoplasmy;\n            var heteroplasmy = mitoMapItem1._heteroplasmy ?? mitoMapItem2._heteroplasmy;\n            string alleleInfo = $\"{mitoMapItem1.Position} (Ref: {mitoMapItem1.RefAllele}, Alt: {mitoMapItem1.AltAllele})\";\n            var diseases = MergeCollections(mitoMapItem1._diseases, mitoMapItem2._diseases, alleleInfo).ToList();\n            var pubMedIds = MergeCollections(mitoMapItem1._pubMedIds, mitoMapItem2._pubMedIds, alleleInfo).ToList();\n            var status = mitoMapItem1._status ?? mitoMapItem2._status;\n            var clinicalSignificance = mitoMapItem1._clinicalSignificance ?? mitoMapItem2._clinicalSignificance;\n            var scorePercentile = mitoMapItem1._scorePercentile ?? mitoMapItem2._scorePercentile;\n            var numFullLengthSequences = Math.Max(mitoMapItem1._numGenBankFullLengthSeqs, mitoMapItem2._numGenBankFullLengthSeqs);\n            return new MitoMapItem(mitoMapItem1.Chromosome, mitoMapItem1.Position, mitoMapItem1.RefAllele, mitoMapItem1.AltAllele,\n                diseases, homoplasmy, heteroplasmy, status, clinicalSignificance, scorePercentile, null, numFullLengthSequences, pubMedIds);\n        }\n\n        private static IEnumerable<string> MergeCollections(ICollection<string> collection1, ICollection<string> collection2, string alleleInfo)\n        {\n            if (IsNullOrEmpty(collection1) || IsNullOrEmpty(collection2)) \n                return (collection1?.Count ?? -1) > 0 \n                ? collection1 \n                : collection2 ?? Enumerable.Empty<string>();\n            \n            Console.WriteLine($\"Merge data at {alleleInfo}: {string.Join(\",\", collection1)} and {string.Join(\",\", collection2)}\");\n            return collection1.Concat(collection2).Distinct();\n\n        }\n\n        private static bool HasConflictValue<T>(T originalValue, T newValue)\n        {\n            bool hasConflict = !IsNullOrEmpty(originalValue) && !IsNullOrEmpty(newValue) && !originalValue.Equals(newValue);\n            if (hasConflict) Console.WriteLine($\"Conflict found: {originalValue}, {newValue}\");\n\n            return hasConflict;\n        }\n\n        private static bool IsNullOrEmpty<T>(T value)\n        {\n            if (typeof(T) == typeof(string))\n                return string.IsNullOrEmpty(value as string);\n            return value == null || value.Equals(default(T));\n        }\n    }\n}\n"
  },
  {
    "path": "SAUtils/MitoMap/MitoMapSvItem.cs",
    "content": "﻿using Genome;\nusing OptimizedCore;\nusing VariantAnnotation.Interface.SA;\nusing VariantAnnotation.IO;\nusing Variants;\n\nnamespace SAUtils.MitoMap\n{\n    public sealed class MitoMapSvItem : ISuppIntervalItem\n    {\n        public int Start { get; }\n        public int End { get; }\n        public Chromosome Chromosome { get; }\n        private VariantType VariantType { get; }\n\n        public MitoMapSvItem(Chromosome chromosome, int start, int end, VariantType variantType)\n        {\n            Chromosome = chromosome;\n            Start = start;\n            End = end;\n            VariantType = variantType;\n        }\n        \n        public string GetJsonString()\n        {\n            var sb= StringBuilderPool.Get();\n            var jsonObject = new JsonObject(sb);\n\n            // data section\n            jsonObject.AddStringValue(\"chromosome\", Chromosome.EnsemblName);\n            jsonObject.AddIntValue(\"begin\", Start);\n            jsonObject.AddIntValue(\"end\", End);\n            jsonObject.AddStringValue(\"variantType\", VariantType.ToString());\n\n            return StringBuilderPool.GetStringAndReturn(sb);\n        }\n\n    }\n}"
  },
  {
    "path": "SAUtils/MitoMap/MitoMapSvReader.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing System.Linq;\nusing System.Text.RegularExpressions;\nusing ErrorHandling.Exceptions;\nusing Genome;\nusing IO;\nusing OptimizedCore;\nusing SAUtils.InputFileParsers.ClinVar;\nusing VariantAnnotation.Interface.Providers;\nusing Variants;\n\nnamespace SAUtils.MitoMap\n{\n    public sealed class MitoMapSvReader\n    {\n        private readonly FileInfo _mitoMapFileInfo;\n        private readonly string _dataType;\n        private readonly ISequenceProvider _sequenceProvider;\n        private readonly VariantAligner _variantAligner;\n        private readonly Chromosome _chromosome;\n\n        private readonly HashSet<string> _mitoMapSvDataTypes = new HashSet<string>\n        {\n            MitoMapDataTypes.MitoMapDeletionsSingle,\n            MitoMapDataTypes.MitoMapInsertionsSimple\n        };\n\n        public MitoMapSvReader(FileInfo mitoMapFileInfo, ISequenceProvider sequenceProvider)\n        {\n            _mitoMapFileInfo = mitoMapFileInfo;\n            _dataType = GetDataType();\n            _sequenceProvider = sequenceProvider;\n            _chromosome = sequenceProvider?.RefNameToChromosome[\"chrM\"] ;\n            _variantAligner = new VariantAligner(sequenceProvider?.Sequence);\n        }\n\n        private string GetDataType()\n        {\n            string dataType = _mitoMapFileInfo.Name.Replace(\".html\", null, StringComparison.Ordinal);\n            if (!_mitoMapSvDataTypes.Contains(dataType)) throw new InvalidFileFormatException($\"Unexpected data file: {_mitoMapFileInfo.Name}\");\n            return dataType;\n        }\n\n\n        private IEnumerable<MitoMapSvItem> GetMitoMapSvItems()\n        {\n            bool isDataLine = false;\n            using (var reader = FileUtilities.GetStreamReader(FileUtilities.GetReadStream(_mitoMapFileInfo.FullName)))\n            {\n                string line;\n                while ((line = reader.ReadLine()) != null)\n                {\n                    line = line.Trim();\n                    if (!isDataLine)\n                    {\n                        if (line == \"\\\"data\\\":[\") isDataLine = true;\n                        continue;\n                    }\n                    // last item\n                    if (line.OptimizedStartsWith('[') && line.EndsWith(\"]],\", StringComparison.Ordinal)) isDataLine = false;\n\n                    foreach (var supplementaryIntervalItem in ParseLine(line))\n                    {\n                        yield return supplementaryIntervalItem;\n                    }\n                }\n            }\n        }\n\n        internal List<MitoMapSvItem> ParseLine(string line)\n        {\n            // line validation\n            if (!(line.OptimizedStartsWith('[') && line.EndsWith(\"],\", StringComparison.Ordinal)))\n                throw new InvalidFileFormatException($\"Data line doesn't start with \\\"[\\\" or end with \\\"],\\\": {line}\");\n            var info = line.TrimEnd(',').TrimEnd(']').Trim('[', ']').Split(\"\\\",\\\"\").Select(x => x.Trim('\"')).ToList();\n            return _dataType == MitoMapDataTypes.MitoMapInsertionsSimple ? ExtractSvItemFromSimpleInsertions(info) : ExtractSvItemFromDeletionsSingle(info);\n        }\n\n        private List<MitoMapSvItem> ExtractSvItemFromDeletionsSingle(List<string> info)\n        {\n            var junctions = info[0].OptimizedSplit(':').Select(int.Parse).ToList();\n            var start = junctions[0] + 1; \n            var end = junctions[1] - 1;\n            if (end < start)\n                throw new ArgumentOutOfRangeException($\"Deletions with end position smaller than start position: start: {start}, end: {end}\");\n            var calculatedSize = end - start + 1;\n            var size = int.Parse(info[1].Substring(1));\n            if (size <= MitomapParsingParameters.LargeDeletionCutoff) return new List<MitoMapSvItem>();\n            if (calculatedSize != size) Console.WriteLine($\"Incorrect size of deleted region: size of {start}-{end} should be {calculatedSize}, provided size is {size}. Provided size is used.\");\n            var refSequence = _sequenceProvider.Sequence.Substring(start - 1, size);\n            var newStart = _variantAligner.LeftAlign(start, refSequence, \"\").Item1;\n            if (start != newStart) Console.WriteLine($\"Deletion of {size} bps. Original start start position: {start}; new position after left-alignment {newStart}.\");\n            var mitoMapSvItem = new MitoMapSvItem(_chromosome, newStart, newStart + size - 1, VariantType.deletion);\n            return new List<MitoMapSvItem> { mitoMapSvItem };\n        }\n\n        // extract large insertions from this file\n        private List<MitoMapSvItem> ExtractSvItemFromSimpleInsertions(IReadOnlyList<string> info)\n        {\n            var mitoMapSvItems = new List<MitoMapSvItem>();\n            var altAlleleInfo = info[2];\n            var dLoopPattern = new Regex(@\"(?<start>^\\d+)-(?<end>(\\d+)) D-Loop region\");\n            var dLoopMatch = dLoopPattern.Match(altAlleleInfo);\n            // not a large insertion\n            if (!dLoopMatch.Success) return mitoMapSvItems;\n            var genomeStart = MitoDLoop.Start + int.Parse(dLoopMatch.Groups[\"start\"].Value) - 1;\n            var genomeEnd = MitoDLoop.Start + int.Parse(dLoopMatch.Groups[\"end\"].Value) - 1;\n            if (genomeEnd < genomeStart)\n                throw new ArgumentOutOfRangeException($\"Duplication with end position smaller than start position: start: {genomeStart}, end: {genomeEnd}\");\n            var size = genomeEnd - genomeStart + 1;\n            var refSequence = _sequenceProvider.Sequence.Substring(genomeStart - 1, size);\n            var leftAlignResults = _variantAligner.LeftAlign(genomeStart, refSequence, refSequence + refSequence); // duplication\n            var newStart = leftAlignResults.Item1;\n            if (genomeStart != newStart) Console.WriteLine($\"Duplication of {size} bps. Original start start position: {genomeStart}; new position after left-alignment {newStart}.\");\n            var mitoMapSvItem = new MitoMapSvItem(_chromosome, newStart, newStart + size - 1, VariantType.duplication);\n            mitoMapSvItems.Add(mitoMapSvItem);\n            return mitoMapSvItems;\n        }\n        \n        public static IEnumerable<MitoMapSvItem> GetSortedItems(IEnumerable<MitoMapSvReader> mitoMapSvReaders) => mitoMapSvReaders.SelectMany(x => x.GetMitoMapSvItems()).OrderBy(x => x.Start);\n    }\n}\n"
  },
  {
    "path": "SAUtils/MitoMap/MitoMapVariantReader.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing System.Linq;\nusing System.Text.RegularExpressions;\nusing ErrorHandling.Exceptions;\nusing Genome;\nusing IO;\nusing OptimizedCore;\nusing SAUtils.DataStructures;\nusing SAUtils.InputFileParsers.ClinVar;\nusing VariantAnnotation.Interface.Providers;\nusing VariantAnnotation.Providers;\n\nnamespace SAUtils.MitoMap\n{\n    public sealed class MitoMapVariantReader\n    {\n        private readonly FileInfo _mitoMapFileInfo;\n        private const string DelSymbol = \"\";\n        private readonly string _dataType;\n        private readonly ReferenceSequenceProvider _sequenceProvider;\n        private readonly VariantAligner _variantAligner;\n        private readonly Chromosome _chromosome;\n        private readonly MitoMapInputDb _mitoMapInputDb;\n\n        private static readonly Dictionary<string, int[]> MitoMapMutationColumnDefinitions = new Dictionary<string, int[]>\n        {\n            {MitoMapDataTypes.MitoMapMutationsCodingControl, new[] {0, 2, 3, 6, 7, 8, -1, 10, 11}},\n            {MitoMapDataTypes.MitoMapMutationsRNA, new[] {0, 2, 3, 5, 6, 7, 8, 10, 11}},\n            {MitoMapDataTypes.MitoMapPolymorphismsCoding,  new[] {0, -1, 2, -1, -1, -1, -1, 7, 8}},\n            {MitoMapDataTypes.MitoMapPolymorphismsControl,  new[] {0, -1, 2, -1, -1, -1, -1, 4, 5}},\n            {MitoMapDataTypes.MitoMapInsertionsSimple,  new int[0]},\n            {MitoMapDataTypes.MitoMapDeletionsSingle,  new int[0]}\n        };\n\n        private static readonly Dictionary<(string, int), string> ClinicalSignificances = new Dictionary<(string, int), string>\n        {\n            {(\"up\", 3), \"confirmed pathogenic\"},\n            {(\"up\", 2), \"likely pathogenic\"},\n            {(\"up\", 1), \"possibly pathogenic\"},\n            {(\"down\", 1), \"possibly benign\"},\n            {(\"down\", 2), \"likely benign\"}\n        };\n\n        private static readonly Dictionary<string, bool> SymbolToBools = new Dictionary<string, bool>\n        {\n            {\"+\", true},\n            {\"-\", false}\n        };\n\n        private static readonly HashSet<string> MitoMapDelSymbolSet = new HashSet<string> { \":\", \"del\", \"d\" };\n        private static readonly HashSet<string> IgnoredStatus = new HashSet<string> { \"See 7471insC\", \"Reported  (alt loc)\" };\n\n        public MitoMapVariantReader(FileInfo mitoMapFileInfo, MitoMapInputDb mitoMapInputDb, ReferenceSequenceProvider sequenceProvider)\n        {\n            _mitoMapFileInfo = mitoMapFileInfo;\n            _mitoMapInputDb = mitoMapInputDb;\n            _dataType = GetDataType();\n            _sequenceProvider = sequenceProvider;\n            _chromosome = sequenceProvider.RefNameToChromosome[\"chrM\"];\n            _variantAligner = new VariantAligner(sequenceProvider.Sequence);\n        }\n\n        private string GetDataType()\n        {\n            var dataType = _mitoMapFileInfo.Name.Replace(\".html\", \"\");\n            if (!MitoMapMutationColumnDefinitions.ContainsKey(dataType)) throw new InvalidDataException($\"Unexpected data file: {_mitoMapFileInfo.Name}\");\n            return dataType;\n        }\n\n        private IEnumerable<MitoMapItem> GetMitoMapItems()\n        {\n            Console.WriteLine($\"Processing {_dataType} file\");\n            bool isDataLine = false;\n            using (var reader = FileUtilities.GetStreamReader(FileUtilities.GetReadStream(_mitoMapFileInfo.FullName)))\n            {\n                string line;\n                while ((line = reader.ReadLine()) != null)\n                {\n                    line = line.Trim();\n                    if (!isDataLine)\n                    {\n                        if (line == \"\\\"data\\\":[\") isDataLine = true;\n                        continue;\n                    }\n                    // last item\n                    if (line.OptimizedStartsWith('[') && line.EndsWith(\"]],\")) isDataLine = false;\n\n                    foreach (var mitoMapMutItem in ParseLine(line, _dataType, _sequenceProvider, _variantAligner, _chromosome, _mitoMapInputDb))\n                    {\n                        if (!string.IsNullOrEmpty(mitoMapMutItem.RefAllele) ||\n                            !string.IsNullOrEmpty(mitoMapMutItem.AltAllele))\n                            yield return mitoMapMutItem;\n                    }\n                }\n            }\n        }\n\n        internal static List<MitoMapItem> ParseLine(string line, string dataType, ISequenceProvider sequenceProvider,\n            VariantAligner variantAligner, Chromosome chromosome, MitoMapInputDb mitoMapInputDb)\n        {\n            // line validation\n            if (!(line.OptimizedStartsWith('[') && line.EndsWith(\"],\")))\n                throw new InvalidFileFormatException($\"Data line doesn't start with \\\"[\\\" or end with \\\"],\\\": {line}\");\n            /* example lines\n            [\"582\",\"<a href='/MITOMAP/GenomeLoci#MTTF'>MT-TF</a>\",\"Mitochondrial myopathy\",\"T582C\",\"tRNA Phe\",\"-\",\"+\",\"Reported\",\"<span style='display:inline-block;white-space:nowrap;'><a href='/cgi-bin/mitotip?pos=582&alt=C&quart=2'><u>72.90%</u></a> <i class='fa fa-arrow-up' style='color:orange' aria-hidden='true'></i></span>\",\"0\",\"<a href='/cgi-bin/print_ref_list?refs=90165,91590&title=RNA+Mutation+T582C' target='_blank'>2</a>\"],\n            [\"583\",\"<a href='/MITOMAP/GenomeLoci#MTTF'>MT-TF</a>\",\"MELAS / MM & EXIT\",\"G583A\",\"tRNA Phe\",\"-\",\"+\",\"Cfrm\",\"<span style='display:inline-block;white-space:nowrap;'><a href='/cgi-bin/mitotip?pos=583&alt=A&quart=0'><u>93.10%</u></a> <i class='fa fa-arrow-up' style='color:red' aria-hidden='true'></i><i class='fa fa-arrow-up' style='color:red' aria-hidden='true'></i><i class='fa fa-arrow-up' style='color:red' aria-hidden='true'></i></span>\",\"0\",\"<a href='/cgi-bin/print_ref_list?refs=2066,90532,91590&title=RNA+Mutation+G583A' target='_blank'>3</a>\"],\n            */\n            var info = line.TrimEnd(',').TrimEnd(']').Trim('[', ']').Split(\"\\\",\\\"\").Select(x => x.Trim('\"')).ToList();\n            switch (dataType)\n            {\n                case MitoMapDataTypes.MitoMapInsertionsSimple:\n                    return ExtractVariantItemFromInsertionsSimple(info, sequenceProvider, variantAligner, chromosome, mitoMapInputDb);\n                case MitoMapDataTypes.MitoMapDeletionsSingle:\n                    return ExtractVariantItemFromDeletionsSingle(info, sequenceProvider, variantAligner, chromosome, mitoMapInputDb);\n            }\n            return ExtractVariantItem(info, dataType, sequenceProvider, variantAligner, chromosome, mitoMapInputDb);\n        }\n\n        private static List<MitoMapItem> ExtractVariantItemFromDeletionsSingle(List<string> info,\n            ISequenceProvider sequenceProvider, VariantAligner variantAligner, Chromosome chromosome,\n            MitoMapInputDb mitoMapInputDb)\n        {\n            var junctions = info[0].OptimizedSplit(':').Select(int.Parse).ToList();\n            var start = junctions[0] + 1;\n            var end = junctions[1] - 1;\n            if (end < start)\n                throw new ArgumentOutOfRangeException($\"Deletions with end position smaller than start position: start: {start}, end: {end}\");\n            var calculatedSize = end - start + 1;\n            var size = int.Parse(info[1].Substring(1));\n            if (size > MitomapParsingParameters.LargeDeletionCutoff) return new List<MitoMapItem>();\n            if (calculatedSize != size) Console.WriteLine($\"Incorrect size of deleted region: size of {start}-{end} should be {calculatedSize}, provided size is {size}. Provided size is used.\");\n            var refSequence = sequenceProvider.Sequence.Substring(start - 1, size);\n            var leftAlignResults = GetLeftAlignedVariant(start, refSequence, \"\", variantAligner);\n            var pubMedIds = ParsingUtilities.GetPubMedIds(info[4], mitoMapInputDb);\n            var mitoMapItem = new MitoMapItem(chromosome, leftAlignResults.RefPosition, leftAlignResults.RefAllele, \"-\", null, null, null, \"\", \"\", \"\", sequenceProvider, default, pubMedIds);\n            return new List<MitoMapItem> { mitoMapItem };\n        }\n\n        // extract small variant from this file\n        private static List<MitoMapItem> ExtractVariantItemFromInsertionsSimple(List<string> info,\n            ISequenceProvider sequenceProvider, VariantAligner variantAligner, Chromosome chromosome,\n            MitoMapInputDb mitoMapInputDb)\n        {\n            var altAlleleInfo = info[2];\n            var dLoopPattern = new Regex(@\"(?<start>^\\d+)-(?<end>(\\d+)) D-Loop region\");\n            var dLoopMatch = dLoopPattern.Match(altAlleleInfo);\n            // not a small variant\n            if (dLoopMatch.Success)\n            {\n                return new List<MitoMapItem>();\n            }\n            string altAllele;\n            var additionalRepeatPattern = new Regex(@\"additional \\[(?<repeat>[ACTGN]+)\\] \");\n            var additionalRepeatMatch = additionalRepeatPattern.Match(altAlleleInfo);\n            if (additionalRepeatMatch.Success)\n                altAllele = additionalRepeatMatch.Groups[\"repeat\"].Value;\n            // expect a string of allele sequence then\n            else\n            {\n                if (altAlleleInfo.Contains(\" \")) throw new InvalidDataException($\"Cannot parse {altAlleleInfo}\");\n                altAllele = altAlleleInfo;\n            }\n            var firstNumberPattern = new Regex(@\"(?<firstNumber>^\\d+)\");\n            var firstNumberMatch = firstNumberPattern.Match(info[3]);\n            if (!firstNumberMatch.Success) throw new InvalidDataException($\"Failed to extract variant position from {info[3]}\");\n            var position = int.Parse(firstNumberMatch.Groups[\"firstNumber\"].Value);\n            var leftAlgnResults = GetLeftAlignedVariant(position, \"\", altAllele, variantAligner); // insertion\n            var pubMedIds = ParsingUtilities.GetPubMedIds(info[6], mitoMapInputDb);\n            return new List<MitoMapItem>{new MitoMapItem(chromosome, leftAlgnResults.RefPosition, \"-\", leftAlgnResults.AltAllele, null, null, null, \"\", \"\", \"\", sequenceProvider, default, pubMedIds) };\n        }\n\n        private static List<MitoMapItem> ExtractVariantItem(List<string> info, string dataType,\n            ISequenceProvider sequenceProvider, VariantAligner variantAligner, Chromosome chromosome,\n            MitoMapInputDb mitoMapInputDb)\n        {\n            int[] fields = MitoMapMutationColumnDefinitions[dataType];\n            List<MitoMapItem> mitoMapVarItems = new List<MitoMapItem>();\n            int position = int.Parse(info[fields[0]]);\n            var mitomapDiseaseString = GetDiseaseInfo(info, fields[1]);\n            if (DescribedAsDuplicatedRecord(mitomapDiseaseString)) return mitoMapVarItems;\n\n            var diseases = string.IsNullOrEmpty(mitomapDiseaseString) ? null : new List<string> {mitomapDiseaseString};\n            var (refAllele, rawAltAllele, extractedPosition) = GetRefAltAlleles(info[fields[2]], sequenceProvider);\n\n            if (extractedPosition.HasValue && position != extractedPosition)\n                Console.WriteLine($\"Inconsistant positions found: annotated position: {position}; allele {info[fields[2]]}\");\n\n            if (string.IsNullOrEmpty(refAllele) && string.IsNullOrEmpty(rawAltAllele))\n            {\n                Console.WriteLine($\"No reference and alternative alleles could be extracted: {position}; allele {info[fields[2]]}\");\n                return mitoMapVarItems;\n            }\n\n            if (MitoMapDelSymbolSet.Contains(rawAltAllele)) rawAltAllele = DelSymbol;\n\n            var homoplasmy   = GetPlasmy(info, fields[3]);\n            var heteroplasmy = GetPlasmy(info, fields[4]);\n\n            string status = GetStatus(info, fields);\n            (string scorePercentile, string clinicalSignificance) = GetFunctionalInfo(info, fields[6]);\n            int numFullLengthSeqs = GetNumFullLengthSequences(info[fields[7]], dataType);\n            var pubMedIds = ParsingUtilities.GetPubMedIds(info[fields[8]], mitoMapInputDb);\n\n            if (!string.IsNullOrEmpty(rawAltAllele))\n            {\n                foreach (var altAllele in GetAltAlleles(rawAltAllele))\n                {\n                    var thisLeftAlignResults = GetLeftAlignedVariant(position, refAllele, altAllele, variantAligner);\n                    mitoMapVarItems.Add(new MitoMapItem(chromosome, thisLeftAlignResults.RefPosition, thisLeftAlignResults.RefAllele, thisLeftAlignResults.AltAllele, diseases, homoplasmy,heteroplasmy, status, clinicalSignificance, scorePercentile, sequenceProvider, numFullLengthSeqs, pubMedIds));\n                }\n                if (mitoMapVarItems.Count > 1) Console.WriteLine($\"Multiple Alternative Allele Sequences {info[fields[2]]} at {position}\");\n                return mitoMapVarItems;         \n            }\n\n            var leftAlignResults = GetLeftAlignedVariant(position, refAllele, rawAltAllele, variantAligner);\n            mitoMapVarItems.Add(new MitoMapItem(chromosome, leftAlignResults.RefPosition, leftAlignResults.RefAllele, leftAlignResults.AltAllele, diseases, homoplasmy,\n                    heteroplasmy, status, clinicalSignificance, scorePercentile, sequenceProvider, numFullLengthSeqs, pubMedIds));\n\n            return mitoMapVarItems;\n        }\n\n        private static string GetStatus(List<string> info, int[] fields)\n        {\n            string status = fields[5] == -1 ? null : info[fields[5]];\n            return IgnoredStatus.Contains(status) ? null : status;\n        }\n\n        internal static int GetNumFullLengthSequences(string field, string dataType)\n        {\n            if (!field?.OptimizedStartsWith('<') ?? true) return 0;\n\n            int leadingCharIndex = -1;\n            int trailingCharIndex = -1;\n            switch (dataType)\n            {\n                case MitoMapDataTypes.MitoMapMutationsRNA:\n                case MitoMapDataTypes.MitoMapMutationsCodingControl:\n                    leadingCharIndex = field.IndexOf('>');\n                    trailingCharIndex = field.IndexOf(\" (\", StringComparison.Ordinal);\n                    break;\n\n                case MitoMapDataTypes.MitoMapPolymorphismsCoding:\n                    leadingCharIndex = field.IndexOf('>');\n                    trailingCharIndex = field.IndexOf(\"</\", StringComparison.Ordinal);\n                break;\n\n                case MitoMapDataTypes.MitoMapPolymorphismsControl:\n                    leadingCharIndex = field.IndexOf('(');\n                    trailingCharIndex = field.IndexOf('/', leadingCharIndex+1);\n                    break;\n            }\n            string numFullLengthString = field.Substring(leadingCharIndex + 1, trailingCharIndex - leadingCharIndex - 1);\n            if (int.TryParse(numFullLengthString, out int numFullLength)) return numFullLength;\n            \n            throw new InvalidDataException($\"Can't extract number of full length GenBank sequences from {field} in the {dataType} dataset.\");\n        }\n\n        private static bool? GetPlasmy(List<string> info, int fields)\n        {\n            if (fields == -1 || !SymbolToBools.TryGetValue(info[fields], out bool b)) return null;\n            return b;\n        }\n\n        // there may be multiple alt alleles concatenated by \";\"\n        internal static IEnumerable<string> GetAltAlleles(string rawAltAllele) => rawAltAllele.OptimizedSplit(';').Select(DegenerateBaseUtilities.GetAllPossibleSequences).SelectMany(x => x);\n\n\n        private static bool DescribedAsDuplicatedRecord(string mitomapDiseaseString)\n        {\n            if (string.IsNullOrEmpty(mitomapDiseaseString)) return false;\n            var altNotationPattern1 = new Regex(\"alternate notation$\");\n            var altNotationMatch = altNotationPattern1.Match(mitomapDiseaseString);\n            if (!altNotationMatch.Success) return false;\n            Console.WriteLine($\"Alternate notation found: {mitomapDiseaseString}. This record is skipped.\");\n            return true;\n        }\n\n        private static string GetDiseaseInfo(List<string> info, int fieldIndex)\n        {\n            if (fieldIndex == -1) return null;\n            string diseaseString = info[fieldIndex];\n            if (string.IsNullOrEmpty(diseaseString)) return diseaseString;\n            var regexPattern = new Regex(@\"<a href=.+>(?<disease>.+)</a>$\");\n            var match = regexPattern.Match(diseaseString);\n            return match.Success ? match.Groups[\"disease\"].Value : diseaseString;\n        }\n\n        private static (string, string) GetFunctionalInfo(List<string> info, int fieldIndex)\n        {\n            if (fieldIndex == -1) return (null, null);\n            string functionInfoString = info[fieldIndex];\n            // <u>93.10%</u></a> <i class='fa fa-arrow-up' style='color:red' aria-hidden='true'></i><i class='fa fa-arrow-up' style='color:red' aria-hidden='true'></i><i class='fa fa-arrow-up' style='color:red' aria-hidden='true'></i></span>\n            var regexPattern = new Regex(@\"<u>(?<scoreString>[0-9.]+)%</u></a> (?<significanceString>.+)</span>$\");\n            var match = regexPattern.Match(functionInfoString);\n            var clineSignificance = GetClinicalSignificance(match.Groups[\"significanceString\"].Value);\n            return (match.Groups[\"scoreString\"].Value, clineSignificance);\n        }\n\n        private static string GetClinicalSignificance(string significanceString)\n        {\n            // < i class='fa fa-arrow-up' style='color:red' aria-hidden='true'></i><i class='fa fa-arrow-up' style='color:red' aria-hidden='true'></i><i class='fa fa-arrow-up' style='color:red' aria-hidden='true'></i>\n            // filter out the symbol for frequency alert\n            var arrows = significanceString.Split(@\"</i>\", StringSplitOptions.RemoveEmptyEntries).Where(x => !x.Contains(\"fa-asterisk\")).ToList();\n            var nArrows = arrows.Count;\n            if (nArrows == 0) return null;\n            var arrowType = arrows[0].Contains(\"fa-arrow-up\") ? \"up\" : \"down\";\n            return ClinicalSignificances[(arrowType, nArrows)];\n        }\n\n        private static (string RefAllele, string RawAltAllele, int? ExtractedPosition) GetRefAltAlleles(string alleleString, ISequenceProvider sequenceProvider)\n        {\n            var results = Evaluate_C123T(alleleString);\n            if (results.Success) return (results.RefAllele, results.RawAltAllele, results.ExtractedPosition);\n\n            results = Evaluate_16021_16022del(alleleString, sequenceProvider);\n            if (results.Success) return (results.RefAllele, results.RawAltAllele, results.ExtractedPosition);\n\n            results = Evaluate_8042del2(alleleString, sequenceProvider);\n            if (results.Success) return (results.RefAllele, results.RawAltAllele, results.ExtractedPosition);\n\n            results = Evaluate_C9537insC(alleleString);\n            if (results.Success) return (results.RefAllele, results.RawAltAllele, results.ExtractedPosition);\n\n            results = Evaluate_3902_3908invACCTTGC(alleleString, sequenceProvider);\n            if (results.Success) return (results.RefAllele, results.RawAltAllele, results.ExtractedPosition);\n\n            results = Evaluate_A_C_or_CC(alleleString);\n            if (results.Success) return (results.RefAllele, results.RawAltAllele, results.ExtractedPosition);\n\n            results = Evaluate_C_C_2_8(alleleString);\n            if (results.Success) return (results.RefAllele, results.RawAltAllele, results.ExtractedPosition);\n\n            results = Evaluate_8042delAT(alleleString, sequenceProvider);\n\n            return results.Success\n                ? (results.RefAllele, results.RawAltAllele, results.ExtractedPosition)\n                : (null, null, null);\n        }\n\n        // 8042delAT\n        private static (bool Success, string RefAllele, string RawAltAllele, int? ExtractedPosition) Evaluate_8042delAT(string alleleString, ISequenceProvider sequenceProvider)\n        {\n            var regex = new Regex(@\"(?<position>^\\d+)del(?<del>[ACGTacgtNn]+)\");\n            var match = regex.Match(alleleString);\n            if (!match.Success) return (false, null, null, null);\n\n            var extractedPosition      = int.Parse(match.Groups[\"position\"].Value);\n            string deletedSeq          = match.Groups[\"del\"].Value;\n            string deletedReferenceSeq = GetRefAllelesFromReference(sequenceProvider, extractedPosition, deletedSeq.Length);\n\n            if (deletedSeq != deletedReferenceSeq)\n            {\n                throw new InvalidDataException($\"Deleted sequence at {extractedPosition}: annoation is {deletedSeq}, reference sequence is {deletedReferenceSeq}\");\n            }\n\n            return (true, deletedReferenceSeq, \"-\", extractedPosition);\n        }\n\n        // C-C(2-8)\n        private static (bool Success, string RefAllele, string RawAltAllele, int? ExtractedPosition) Evaluate_C_C_2_8(string alleleString)\n        {\n            var regex = new Regex(@\"(?<ref>[ACGTacgtNn])[_|-](?<alt>[ACGTacgtNn])\\((?<min>\\d+)-(?<max>\\d+)\\)\");\n            var match = regex.Match(alleleString);\n            if (!match.Success) return (false, null, null, null);\n\n            var altBase = char.Parse(match.Groups[\"alt\"].Value);\n            int minRepeat = int.Parse(match.Groups[\"min\"].Value);\n            int maxRepeat = int.Parse(match.Groups[\"max\"].Value);\n            var altAlleleSequences = new List<string>();\n\n            for (int i = minRepeat; i <= maxRepeat; i++)\n            {\n                altAlleleSequences.Add(new string(altBase, i));\n            }\n\n            return (true, match.Groups[\"ref\"].Value, string.Join(\";\", altAlleleSequences), null);\n        }\n\n        //A-Cor CC\n        private static (bool Success, string RefAllele, string RawAltAllele, int? ExtractedPosition) Evaluate_A_C_or_CC(string alleleString)\n        {\n            var regex = new Regex(@\"(?<ref>[ACGTacgtNn]+)[_|-](?<alt1>[ACGTacgtNn]+) ?or ?(?<alt2>[ACGTacgtNn]+)\");\n            var match = regex.Match(alleleString);\n            if (!match.Success) return (false, null, null, null);\n\n            var altAllele = match.Groups[\"alt1\"].Value + \";\" + match.Groups[\"alt2\"].Value;\n            return (true, match.Groups[\"ref\"].Value, altAllele, null);\n        }\n\n        // 3902_3908invACCTTGC\n        private static (bool Success, string RefAllele, string RawAltAllele, int? ExtractedPosition) Evaluate_3902_3908invACCTTGC(string alleleString, ISequenceProvider sequenceProvider)\n        {\n            var regex = new Regex(@\"(?<start>^\\d+)[_|-](?<end>\\d+)inv(?<seq>[ACGTacgtNn]+)\");\n            var match = regex.Match(alleleString);\n            if (!match.Success) return (false, null, null, null);\n\n            var start       = int.Parse(match.Groups[\"start\"].Value);\n            var end         = int.Parse(match.Groups[\"end\"].Value);\n            var refSequence = GetRefAllelesFromReference(sequenceProvider, start, end - start + 1);\n            if (refSequence != match.Groups[\"seq\"].Value) throw new InvalidDataException($\"Inconsistent sequences: reference {refSequence}, annotation {match.Groups[\"seq\"].Value}\");\n            return (true, refSequence, ReverseSequence(refSequence), start);\n        }\n\n        // C9537insC\n        private static (bool Success, string RefAllele, string RawAltAllele, int? ExtractedPosition) Evaluate_C9537insC(string alleleString)\n        {\n            var regex = new Regex(@\"(?<ref>[ACGTacgtNn])(?<position>\\d+)ins(?<extra>[ACGTacgtNn]+)\");\n            var match = regex.Match(alleleString);\n            if (!match.Success) return (false, null, null, null);\n\n            var extractedPosition = int.Parse(match.Groups[\"position\"].Value);\n            var refAllele         = match.Groups[\"ref\"].Value;\n            var altAllele         = refAllele + match.Groups[\"extra\"].Value;\n            return (true, refAllele, altAllele, extractedPosition);\n        }\n\n        // 8042del2\n        private static (bool Success, string RefAllele, string RawAltAllele, int? ExtractedPosition) Evaluate_8042del2(string alleleString, ISequenceProvider sequenceProvider)\n        {\n            var regex = new Regex(@\"(?<position>^\\d+)del(?<length>\\d+)\");\n            var match = regex.Match(alleleString);\n            if (!match.Success) return (false, null, null, null);\n\n            var extractedPosition = int.Parse(match.Groups[\"position\"].Value);\n            return (true, GetRefAllelesFromReference(sequenceProvider, extractedPosition, int.Parse(match.Groups[\"length\"].Value)), \"-\", extractedPosition);\n        }\n\n        // 16021_16022del\n        private static (bool Success, string RefAllele, string RawAltAllele, int? ExtractedPosition) Evaluate_16021_16022del(string alleleString, ISequenceProvider sequenceProvider)\n        {\n            var regex = new Regex(@\"(?<start>^\\d+)[_|-](?<end>\\d+)del\");\n            var match = regex.Match(alleleString);\n            if (!match.Success) return (false, null, null, null);\n\n            var start = int.Parse(match.Groups[\"start\"].Value);\n            var end   = int.Parse(match.Groups[\"end\"].Value);\n            return (true, GetRefAllelesFromReference(sequenceProvider, start, end - start + 1), \"-\", start);\n        }\n\n        // C123T, A-del or A123del\n        private static (bool Success, string RefAllele, string RawAltAllele, int? ExtractedPosition) Evaluate_C123T(string alleleString)\n        {            \n            var regex = new Regex(@\"(?<ref>^[ACGTacgtNn]+)(?<position>(\\d+|-))(?<alt>([ACGTBDHKMRSVWYNacgtbdhkmrsvwyn]+|:|del[ACGTacgtNn]*|d)$)\");\n            var match = regex.Match(alleleString);\n            if (!match.Success) return (false, null, null, null);\n\n            int? extractedPosition = null;\n            if (match.Groups[\"position\"].Value != \"-\") extractedPosition = int.Parse(match.Groups[\"position\"].Value);\n            return (true, match.Groups[\"ref\"].Value, match.Groups[\"alt\"].Value, extractedPosition);\n        }\n\n        private static string GetRefAllelesFromReference(ISequenceProvider sequenceProvider, int start,\n            int length) => sequenceProvider.Sequence.Substring(start - 1, length);\n\n        private static string ReverseSequence(string sequence)\n        {\n            var reversedNucleotide = new char[sequence.Length];\n            var i = sequence.Length - 1;\n\n            foreach (var nucleotide in sequence)\n            {\n                reversedNucleotide[i] = nucleotide;\n                i--;\n            }\n\n            return new string(reversedNucleotide);\n        }\n\n        public static IEnumerable<MitoMapItem> GetMergeAndSortedItems(IEnumerable<MitoMapVariantReader> mitoMapMutationReaders)\n        {\n            var items = mitoMapMutationReaders.SelectMany(x => x.GetMitoMapItems()).ToList();\n            items.ForEach(x => x.Trim());\n            return items.ToLookup(x => x.Position).Select(x => MitoMapItem.AggregatedMutationsSamePosition(x.Select(i => i)).Values)\n                .SelectMany(x => x).OrderBy(x => x.Position);\n        }\n\n        private static (int RefPosition, string RefAllele, string AltAllele) GetLeftAlignedVariant(int position, string refAllele, string altAllele, VariantAligner variantAligner)\n        {\n            if (refAllele == null || altAllele == null) return (position, refAllele, altAllele);\n            if (refAllele == \"-\") refAllele = \"\";\n            if (altAllele == \"-\") altAllele = \"\";\n            var leftAlgnResults = variantAligner.LeftAlign(position, refAllele, altAllele); \n            var newPosition = leftAlgnResults.RefPosition;\n            var newRefAllele = leftAlgnResults.RefAllele;\n            var newAltAllele = leftAlgnResults.AltAllele;\n            if (position == newPosition) return leftAlgnResults;\n            if (newRefAllele == \"\") // insertion\n                Console.WriteLine(\n                    $\"Insertion of {altAllele}. Original start position: {position}; new position after left-alignment {newPosition}; new altAllele {newAltAllele}\");\n            else if (newAltAllele == \"\") // deletion\n                Console.WriteLine($\"Deletion of {newRefAllele.Length} bps. Original start start position: {position}; new position after left-alignment {newPosition}.\");\n            else\n            {\n                throw new InvalidDataException($\"{position}:{refAllele}:{altAllele} becomes {newPosition}:{newRefAllele}:{newAltAllele} after left alignment. Left-alignment should be only performed for deletions and insertions\");\n            }\n            return leftAlgnResults;\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/MitoMap/ParsingUtilities.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing System.Linq;\n\nnamespace SAUtils.MitoMap\n{\n    public static class ParsingUtilities\n    {\n        private const string EmptyString = \"\\\\N\";\n        public static List<string> GetPubMedIds(string field, MitoMapInputDb mitoMapInputDb)\n        {\n            if (field == \"0\") return default;\n\n            var internalIds = ExtractInternalIds(field);\n            var pubMedIds = new List<string>();\n            foreach (string internalId in internalIds)\n            {\n                if (mitoMapInputDb.InternalReferenceIdToPubmedId.TryGetValue(internalId, out string pubMedId))\n                {\n                    if (pubMedId != EmptyString) pubMedIds.Add(pubMedId);\n                }\n                else\n                    throw new InvalidDataException($\"Can't find PubMedID corresponding to internal reference ID {internalId} when parsing {field}\");\n            }\n\n            return pubMedIds.Distinct().ToList();\n        }\n\n        public static string[] ExtractInternalIds(string field)\n        {\n            //\"?refs=4,140,189,91687,91737&title=\"\n            const string leadingString = \"refs=\";\n            const string trailingString = \"&title=\";\n            var leadingStringIndex = field.IndexOf(leadingString, StringComparison.Ordinal);\n            var trailingStringIndex = field.IndexOf(trailingString, StringComparison.Ordinal);\n            var startIndex = leadingStringIndex + leadingString.Length;\n            var idStringLength = trailingStringIndex - startIndex;\n            if (leadingStringIndex == -1 || trailingStringIndex == -1 || idStringLength == 0)\n                throw new InvalidDataException($\"Failed to extract reference IDs from {field}\");\n\n            return field.Substring(startIndex, idStringLength).Split(',');\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/MitoMap/SmallVarDb.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Linq;\r\nusing CommandLine.Builders;\r\nusing CommandLine.NDesk.Options;\r\nusing ErrorHandling;\r\nusing IO;\r\nusing SAUtils.InputFileParsers;\r\nusing VariantAnnotation.Providers;\r\nusing VariantAnnotation.SA;\r\n\r\nnamespace SAUtils.MitoMap\r\n{\r\n    public static class SmallVarDb\r\n    {\r\n        private static string _compressedReference;\r\n        private static string _outputDirectory;\r\n        private static readonly List<string> MitoMapFileNames = new List<string>();\r\n        private static string _mitoMapDatabase;\r\n\r\n        public static ExitCodes Run(string command, string[] commandArgs)\r\n        {\r\n            var ops = new OptionSet\r\n            {\r\n                {\r\n                    \"ref|r=\",\r\n                    \"compressed reference sequence file\",\r\n                    v => _compressedReference = v\r\n                },\r\n                {\r\n                    \"in|i=\",\r\n                    \"MITOMAP small variants HTML file\",\r\n                    v => MitoMapFileNames.Add(v)\r\n                },\r\n                {\r\n                    \"database|d=\",\r\n                    \"MITOMAP database\",\r\n                    v => _mitoMapDatabase = v\r\n                },\r\n                {\r\n                    \"out|o=\",\r\n                    \"output directory\",\r\n                    v => _outputDirectory = v\r\n                }\r\n            };\r\n\r\n            string commandLineExample = $\"{command} [options]\";\r\n\r\n            var exitCode = new ConsoleAppBuilder(commandArgs, ops)\r\n                .Parse()\r\n                .CheckInputFilenameExists(_compressedReference, \"compressed reference sequence file name\", \"--ref\")\r\n                .CheckEachFilenameExists(MitoMapFileNames, \"MITOMAP small variants HTML file\", \"--in\")\r\n                .CheckInputFilenameExists(_mitoMapDatabase, \"output directory\", \"--database\")\r\n                .CheckDirectoryExists(_outputDirectory, \"output directory\", \"--out\")\r\n                .SkipBanner()\r\n                .ShowHelpMenu(\"Creates a supplementary database with MITOMAP small variants annotations\", commandLineExample)\r\n                .ShowErrors()\r\n                .Execute(ProgramExecution);\r\n\r\n            return exitCode;\r\n        }\r\n        private static ExitCodes ProgramExecution()\r\n        {\r\n\r\n            var rootDirectory = new FileInfo(MitoMapFileNames[0]).Directory;\r\n            if (rootDirectory == null) return ExitCodes.PathNotFound;\r\n            var version = DataSourceVersionReader.GetSourceVersion(Path.Combine(rootDirectory.ToString(), \"mitoMapVar\"));\r\n            var sequenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));\r\n            var chrom = sequenceProvider.RefNameToChromosome[\"chrM\"];\r\n            sequenceProvider.LoadChromosome(chrom);\r\n            MitoMapInputDb mitoMapInputDb = MitoMapDatabaseUtilities.Create(_mitoMapDatabase);\r\n            var mitoMapVarReaders = MitoMapFileNames.Select(mitoMapFileName => new MitoMapVariantReader(new FileInfo(mitoMapFileName), mitoMapInputDb, sequenceProvider)).ToList();\r\n            var mergedMitoMapVarItems = MitoMapVariantReader.GetMergeAndSortedItems(mitoMapVarReaders);\r\n\r\n            string outFileName = $\"{version.Name}_{version.Version}\";\r\n            using (var nsaStream   = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix)))\r\n            using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSuffix)))\r\n            using (var nsaWriter   = new NsaWriter(nsaStream, indexStream, version, sequenceProvider, SaCommon.MitoMapTag, false, true, SaCommon.SchemaVersion, false))\r\n            {\r\n                nsaWriter.Write(mergedMitoMapVarItems);\r\n            }\r\n\r\n            return ExitCodes.Success;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "SAUtils/MitoMap/StructVarDb.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.IO;\nusing System.Linq;\nusing CommandLine.Builders;\nusing CommandLine.NDesk.Options;\nusing ErrorHandling;\nusing Genome;\nusing IO;\nusing SAUtils.InputFileParsers;\nusing VariantAnnotation.Interface.SA;\nusing VariantAnnotation.NSA;\nusing VariantAnnotation.Providers;\nusing VariantAnnotation.SA;\n\nnamespace SAUtils.MitoMap\n{\n    public static class StructVarDb\n    {\n        private static string _compressedReference;\n        private static string _outputDirectory;\n        private static readonly List<string> MitoMapFileNames = new List<string>();\n\n        public static ExitCodes Run(string command, string[] commandArgs)\n        {\n            var ops = new OptionSet\n            {\n                {\n                    \"ref|r=\",\n                    \"compressed reference sequence file\",\n                    v => _compressedReference = v\n                },\n                {\n                    \"in|i=\",\n                    \"MITOMAP structural variants HTML file\",\n                    v => MitoMapFileNames.Add(v)\n                },\n                {\n                    \"out|o=\",\n                    \"output directory\",\n                    v => _outputDirectory = v\n                }\n            };\n\n            string commandLineExample = $\"{command} [options]\";\n\n            var exitCode = new ConsoleAppBuilder(commandArgs, ops)\n                .Parse()\n                .CheckInputFilenameExists(_compressedReference, \"compressed reference sequence file name\", \"--ref\")\n                .CheckEachFilenameExists(MitoMapFileNames, \"MITOMAP structural variants HTML file\", \"--in\")\n                .CheckDirectoryExists(_outputDirectory, \"output directory\", \"--out\")\n                .SkipBanner()\n                .ShowHelpMenu(\"Creates a supplementary database with MITOMAP structural variants annotations\", commandLineExample)\n                .ShowErrors()\n                .Execute(ProgramExecution);\n\n            return exitCode;\n        }\n        private static ExitCodes ProgramExecution()\n        {\n            var rootDirectory = new FileInfo(MitoMapFileNames[0]).Directory;\n            if (rootDirectory == null) return ExitCodes.PathNotFound;\n            var version = DataSourceVersionReader.GetSourceVersion(Path.Combine(rootDirectory.ToString(), \"mitoMapSv\"));\n            var sequenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));\n            var chrom = sequenceProvider.RefNameToChromosome[\"chrM\"];\n            sequenceProvider.LoadChromosome(chrom);\n            var mitoMapSvReaders = MitoMapFileNames.Select(mitoMapFileName => new MitoMapSvReader(new FileInfo(mitoMapFileName), sequenceProvider)).ToList();\n            var sortedMitoMapVarItems = MitoMapSvReader.GetSortedItems(mitoMapSvReaders);\n\n            string outFileName = $\"{version.Name}_{version.Version}\";\n            using (var nsiStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.IntervalFileSuffix)))\n            using(var nsiWriter = new NsiWriter(nsiStream, version, GenomeAssembly.rCRS, SaCommon.MitoMapTag, ReportFor.StructuralVariants, SaCommon.SchemaVersion))\n            {\n                nsiWriter.Write(sortedMitoMapVarItems);\n            }\n\n            return ExitCodes.Success;\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/NgaWriter.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing System.IO.Compression;\nusing System.Text;\nusing Compression.Algorithms;\nusing Compression.FileHandling;\nusing IO;\nusing VariantAnnotation.Interface.SA;\nusing VariantAnnotation.SA;\n\nnamespace SAUtils\n{\n    public sealed class NgaWriter : IDisposable\n    {\n        private readonly ExtendedBinaryWriter _writer;\n\n        public NgaWriter(Stream stream, ISerializable version, string jsonKey, ushort schemaVersion, bool isArray,\n            bool leaveOpen = false)\n        {\n            WriteHeader(stream, version, jsonKey, schemaVersion, isArray);\n\n            var blockStream = new BlockStream(new Zstandard(), stream, CompressionMode.Compress);\n            _writer         = new ExtendedBinaryWriter(blockStream, Encoding.UTF8, leaveOpen);\n        }\n\n        private static void WriteHeader(Stream stream, ISerializable version, string jsonKey, ushort schemaVersion, bool isArray)\n        {\n            using (var writer = new ExtendedBinaryWriter(stream, Encoding.UTF8, true))\n            {\n                writer.Write(SaCommon.NgaIdentifier);\n                version.Write(writer);\n                writer.Write(jsonKey);\n                writer.Write(isArray);\n                writer.Write(schemaVersion);\n                writer.Write(SaCommon.GuardInt);\n            }\n        }\n\n        public void Dispose() => _writer.Dispose();\n\n        public int Write(Dictionary<string, List<ISuppGeneItem>> geneToEntries)\n        {\n            _writer.WriteOpt(geneToEntries.Count);\n\n            var count = 0;\n            foreach ((string geneSymbol, List<ISuppGeneItem> entries) in geneToEntries)\n            {\n                _writer.WriteOptAscii(geneSymbol);\n                _writer.WriteOpt(entries.Count);\n\n                foreach (ISuppGeneItem geneItem in entries)\n                {\n                    count++;\n                    _writer.Write(geneItem.GetJsonString());\n                }\n            }\n\n            return count;\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/NsaConcatenator/ConcatUtilities.cs",
    "content": "﻿using IO;\nusing System;\nusing System.Collections.Generic;\nusing System.Linq;\nusing Genome;\nusing VariantAnnotation.Interface.Providers;\nusing VariantAnnotation.NSA;\nusing VariantAnnotation.Providers;\nusing VariantAnnotation.SA;\n\nnamespace SAUtils.NsaConcatenator\n{\n    public static class ConcatUtilities\n    {\n        private static (IDataSourceVersion version, string jsonKey, bool matchByAllele, bool isArray, bool isPositional, GenomeAssembly assembly) GetIndexFields(List<NsaReader> nsaReaders)\n        {\n            var version       = nsaReaders[0].Version;\n            var jsonKey       = nsaReaders[0].JsonKey;\n            var matchByAllele = nsaReaders[0].MatchByAllele;\n            var isArray       = nsaReaders[0].IsArray;\n            var isPositional  = nsaReaders[0].IsPositional;\n            var assembly      = nsaReaders[0].Assembly;\n\n            var versionComparer = new DataSourceVersionComparer();\n            for (var i = 1; i < nsaReaders.Count; i++) {\n                if (!versionComparer.Equals(version, nsaReaders[i].Version)\n                    || jsonKey       != nsaReaders[i].JsonKey\n                    || matchByAllele != nsaReaders[i].MatchByAllele\n                    || isArray       != nsaReaders[i].IsArray\n                    || isPositional  != nsaReaders[i].IsPositional\n                    || assembly      != nsaReaders[i].Assembly\n                ) \n                    return (null, null, false, false, false, GenomeAssembly.Unknown);\n            }\n\n            return (version, jsonKey, matchByAllele, isArray, isPositional, assembly);\n        }\n\n        private static NsaReader GetNsaReader(ushort chromIndex, List<NsaReader> nsaReaders)\n        {\n            if (nsaReaders == null) return null;\n\n            var hasDataArray = nsaReaders.Select(x => x.HasDataBlocks(chromIndex)).ToArray();\n            var count = hasDataArray.Count(x => x);\n\n            if (count > 1) throw new DataMisalignedException(\"Only one of the NSA files should have data for a given chromosome.\");\n\n            for (var i = 0; i < hasDataArray.Length; i++) {\n                if (hasDataArray[i] == false) continue;\n\n                return nsaReaders[i];\n            }\n            return null;\n        }\n\n        public static void ConcatenateNsaFiles(IEnumerable<string> filePaths, string outFilePrefix) {\n            if(filePaths == null || !filePaths.Any()) return;\n\n            var nsaReaders = new List<NsaReader>();\n\n            foreach (var fileName in filePaths)\n            {\n                nsaReaders.Add(new NsaReader(FileUtilities.GetReadStream(fileName), FileUtilities.GetReadStream(fileName + SaCommon.IndexSuffix)));\n            }\n\n            Console.WriteLine($\"Merging {nsaReaders.Count} NSA files...\");\n\n            var (version, jsonKey, matchByAllele, isArray, isPositional, assembly) = GetIndexFields(nsaReaders);\n\n            using (var nsaStream = FileUtilities.GetCreateStream(outFilePrefix + SaCommon.SaFileSuffix))\n            using (var indexStream = FileUtilities.GetCreateStream(outFilePrefix + SaCommon.SaFileSuffix + SaCommon.IndexSuffix))\n            using (var nsaWriter = new NsaWriter(nsaStream, indexStream, version, null, jsonKey, matchByAllele, isArray, SaCommon.SchemaVersion, isPositional, true, false, SaCommon.DefaultBlockSize, assembly))\n            {\n                var chromIndices = GetChromIndices(nsaReaders);\n\n                foreach (var chromIndex in chromIndices)\n                {\n                    Console.WriteLine($\"Working on chromosome index: {chromIndex}\");\n\n                    nsaWriter.Write(chromIndex, GetNsaReader(chromIndex, nsaReaders));\n                }\n\n            }\n        }\n\n        private static IEnumerable<ushort> GetChromIndices(List<NsaReader> nsaReaders)\n        {\n            var indices = new List<ushort>();\n            if (nsaReaders == null) return indices;\n            foreach (var reader in nsaReaders) {\n                indices.AddRange(reader.ChromosomeIndices);\n            }\n            return indices.Distinct();\n        }\n    }\n}\n"
  },
  {
    "path": "SAUtils/NsaConcatenator/NsaConcatenator.cs",
    "content": "﻿using CommandLine.Builders;\nusing CommandLine.NDesk.Options;\nusing ErrorHandling;\nusing System;\nusing System.IO;\nusing VariantAnnotation.SA;\n\nnamespace SAUtils.NsaConcatenator\n{\n    public static class NsaConcatenator\n    {\n        private static string _inputDir;\n        private static string _outFileStub;\n\n        public static ExitCodes Run(string command, string[] commandArgs)\n        {\n            var ops = new OptionSet\n            {\n                {\n                    \"dir|d=\",\n                    \"input directory containing NSA (and index) files to be merged\",\n                    v => _inputDir = v\n                },\n                {\n                    \"out|o=\",\n                    \"output NSA file stub\",\n                    v => _outFileStub = v\n                }\n            };\n\n            var commandLineExample = $\"{command} [options]\";\n\n            var exitCode = new ConsoleAppBuilder(commandArgs, ops)\n                .Parse()\n                .CheckDirectoryExists(_inputDir, \"input directory containing NSA files\", \"--in\")\n                .HasRequiredParameter(_outFileStub, \"output NSA file stub\", \"--out\")\n                .SkipBanner()\n                .ShowHelpMenu(\"Concatenate multiple (non-overlapping) NSA files from the same data source into one\", commandLineExample)\n                .ShowErrors()\n                .Execute(ProgramExecution);\n\n            return exitCode;\n        }\n\n        private static ExitCodes ProgramExecution()\n        {\n            Console.WriteLine($\"Concatenating NSA files from {_inputDir}\");\n\n            ConcatUtilities.ConcatenateNsaFiles(Directory.GetFiles(_inputDir, $\"*{SaCommon.SaFileSuffix}\"), _outFileStub);\n            \n            return ExitCodes.Success;\n        }\n    }\n}\n"
  },
  {
    "path": "SAUtils/NsaIndexUpdater/UpdateIndex.cs",
    "content": "﻿using CommandLine.Builders;\nusing CommandLine.NDesk.Options;\nusing ErrorHandling;\nusing IO;\nusing SAUtils.InputFileParsers;\nusing VariantAnnotation.NSA;\n\nnamespace SAUtils.NsaIndexUpdater\n{\n    public static class UpdateIndex\n    {\n        private static string _inputIndexFile;\n        private static string _outputIndexFile;\n        private static string _versionFile;\n        public static ExitCodes Run(string command, string[] commandArgs)\n        {\n\n            var ops = new OptionSet\n            {\n                {\n                    \"ind|i=\",\n                    \"input NSA index file path\",\n                    v => _inputIndexFile = v\n                },\n                {\n                    \"ver|r=\",\n                    \"version file path\",\n                    v => _versionFile = v\n                },\n                {\n                    \"out|o=\",\n                    \"output index file path\",\n                    v => _outputIndexFile= v\n                }\n            };\n\n            var commandLineExample = $\"{command} --ind <input NSA index file path> --out <output index file path> --ver <version file path>\";\n\n            var exitCode = new ConsoleAppBuilder(commandArgs, ops)\n                .Parse()\n                .CheckInputFilenameExists(_inputIndexFile, \"input NSA index file path\", \"--ind\")\n                .HasRequiredParameter(_outputIndexFile, \"output index file path\", \"--out\")\n                .CheckInputFilenameExists(_versionFile, \"version file path\", \"--ver\")\n                .SkipBanner()\n                .ShowHelpMenu(\"Extracts mini supplementary annotations for the given range from Nirvana Supplementary Annotations files.\", commandLineExample)\n                .ShowErrors()\n                .Execute(ProgramExecution);\n\n            return exitCode;\n        }\n\n        private static ExitCodes ProgramExecution()\n        {\n            using (var indexStream = FileUtilities.GetReadStream(_inputIndexFile))\n            using (var outStream = FileUtilities.GetCreateStream(_outputIndexFile))\n            using (var extWriter = new ExtendedBinaryWriter(outStream))\n            {\n                var version = DataSourceVersionReader.GetSourceVersion(_versionFile);\n                var oldIndex = new NsaIndex(indexStream);\n                var newIndex = new NsaIndex(extWriter, oldIndex.Assembly, version, oldIndex.JsonKey, oldIndex.MatchByAllele, oldIndex.IsArray, oldIndex.SchemaVersion, oldIndex.IsPositional);\n\n                newIndex.Write(oldIndex.GetBlocks());\n            }\n\n            return ExitCodes.Success;\n        }\n    }\n}\n"
  },
  {
    "path": "SAUtils/NsaWriter.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing System.Linq;\nusing CommandLine.Utilities;\nusing Compression.Algorithms;\nusing ErrorHandling.Exceptions;\nusing Genome;\nusing IO;\nusing SAUtils.DataStructures;\nusing VariantAnnotation.Interface.Providers;\nusing VariantAnnotation.Interface.SA;\nusing VariantAnnotation.NSA;\nusing VariantAnnotation.SA;\nusing Variants;\n\nnamespace SAUtils\n{\n    public sealed class NsaWriter : IDisposable\n    {\n        private readonly ExtendedBinaryWriter _writer;\n        private readonly ExtendedBinaryWriter _indexWriter;\n        private readonly Stream _stream;\n        private readonly Stream _indexStream;\n\n        private readonly byte[] _memBuffer;\n        private readonly MemoryStream _memStream;\n        private readonly ExtendedBinaryWriter _memWriter;\n\n        private readonly NsaBlock _block;\n        private readonly NsaIndex _index;\n        private readonly bool _isPositional;\n        private readonly bool _skipIncorrectRefEntries;\n        private readonly bool _throwErrorOnConflicts;\n        private readonly ISequenceProvider _refProvider;\n        private readonly bool _leaveOpen;\n        private int _count;\n\n        private HashSet<ushort> _completedChromosomes = new HashSet<ushort>();\n\n        public NsaWriter(Stream nsaStream, Stream indexStream, IDataSourceVersion version, ISequenceProvider refProvider, string jsonKey, bool matchByAllele, bool isArray, int schemaVersion, bool isPositional, bool skipIncorrectRefEntries= true, bool throwErrorOnConflicts = false, int blockSize = SaCommon.DefaultBlockSize, GenomeAssembly assembly= GenomeAssembly.Unknown, bool leaveOpen=false)\n        {\n            _stream                  = nsaStream;\n            _indexStream             = indexStream;\n            _writer                  = new ExtendedBinaryWriter(_stream,System.Text.Encoding.Default, leaveOpen);\n            _indexWriter             = new ExtendedBinaryWriter(_indexStream,System.Text.Encoding.Default, leaveOpen);\n            _isPositional            = isPositional;\n            _skipIncorrectRefEntries = skipIncorrectRefEntries;\n            _throwErrorOnConflicts   = throwErrorOnConflicts;\n            _refProvider             = refProvider;\n            _leaveOpen = leaveOpen;\n\n            assembly = _refProvider?.Assembly ?? assembly;\n\n            _block     = new NsaBlock(new Zstandard(), blockSize);\n            _index     = new NsaIndex(_indexWriter, assembly, version, jsonKey, matchByAllele, isArray, schemaVersion, isPositional);\n            _memBuffer = new byte[blockSize];\n            _memStream = new MemoryStream(_memBuffer);\n            _memWriter = new ExtendedBinaryWriter(_memStream);\n        }\n\n        internal void Write(ushort chromIndex, NsaReader nsaReader)\n        {\n            if (nsaReader == null) return;\n\n            var dataBlocks  = nsaReader.GetCompressedBlocks(chromIndex);\n            var indexBlocks = nsaReader.GetIndexBlocks(chromIndex);\n\n            var i = 0;//index of the index Blocks\n            //cannot convert the dataBlocks into a list since that may take up GBs of memory (proportional to the nas file size)\n            foreach (var dataBlock in dataBlocks) {\n                if (i > indexBlocks.Count) throw new IndexOutOfRangeException(\"Nsa Index have less blocks than the Nsa file. They have to be the same.\");\n\n                var oldIndexBlock = indexBlocks[i];\n                _index.Add(chromIndex, oldIndexBlock.Start, oldIndexBlock.End, _writer.BaseStream.Position, oldIndexBlock.Length);\n                dataBlock.WriteCompressedBytes(_writer);\n                i++;\n            }\n            if (i < indexBlocks.Count) throw new IndexOutOfRangeException(\"Nsa Index have more blocks than the Nsa file. They have to be the same.\");\n        }\n\n        public int Write(IEnumerable<ISupplementaryDataItem> saItems)\n        { \n            var itemsMinHeap = new MinHeap<ISupplementaryDataItem>(SuppDataUtilities.CompareTo);\n            var chromIndex = ushort.MaxValue;\n            var currentEnsemblName = \"\";\n            _count = 0;\n            var benchmark = new Benchmark();\n\n            foreach (var saItem in saItems)\n            {\n                if (chromIndex != saItem.Chromosome.Index)\n                {\n                    if (chromIndex != ushort.MaxValue)\n                    {\n                        _completedChromosomes.Add(chromIndex); // this chrom is done\n                        //flushing out the remaining items in buffer\n                        WriteUptoPosition(itemsMinHeap, int.MaxValue);\n                        Flush(chromIndex);\n                        Console.WriteLine($\"Chromosome {currentEnsemblName} completed in {Benchmark.ToHumanReadable(benchmark.GetElapsedTime())}\");\n                        benchmark.Reset();\n                    }\n                    chromIndex = saItem.Chromosome.Index;\n                    currentEnsemblName = saItem.Chromosome.EnsemblName;\n                    _refProvider.LoadChromosome(saItem.Chromosome);\n                }\n\n                if (_completedChromosomes.Contains(saItem.Chromosome.Index))\n                {\n                    throw new UserErrorException(\n                        $\"The input file is not sorted by chromosomes. {saItem.Chromosome.UcscName} is observed in multiple segments.\" +\n                        $\"\\nInput Line:\\n{saItem.InputLine}\");\n                }\n\n                // the items come in sorted order of the pre-trimmed position. \n                // So when writing out, we have to make sure that we do not write past this position. \n                // Once a position has been seen in the stream, we can safely write all positions before that.\n                var writeToPos = saItem.Position;\n                \n                // if variant is in par region, we allow N's in ref\n                if (RegionUtilities.OverlapsParRegion(saItem, _refProvider.Assembly)\n                    && !string.IsNullOrEmpty(saItem.RefAllele) \n                    && saItem.RefAllele.All(x=> x=='N' || x=='n'))\n                {\n                    itemsMinHeap.Add(saItem);\n                    // in order to allow room for left shifted variants, we hold off on removing them from the heap\n                    WriteUptoPosition(itemsMinHeap, writeToPos - VariantUtils.MaxUpstreamLength);\n                    continue;\n                }\n                string refSequence = _refProvider.Sequence.Substring(saItem.Position - 1, saItem.RefAllele.Length);\n                if (!string.IsNullOrEmpty(saItem.RefAllele) && saItem.RefAllele != refSequence)\n                {\n                    if (_skipIncorrectRefEntries) continue;\n                    throw new UserErrorException($\"The provided reference allele {saItem.RefAllele} at {saItem.Chromosome.UcscName}:{saItem.Position} is different from {refSequence} in the reference genome sequence.\" +\n                                                 $\"\\nInput Line:\\n {saItem.InputLine}\");\n                }\n\n                itemsMinHeap.Add(saItem);\n                // in order to allow room for left shifted variants, we hold off on removing them from the heap\n                WriteUptoPosition(itemsMinHeap, writeToPos- VariantUtils.MaxUpstreamLength);\n            }\n\n            //flushing out the remaining items in buffer\n            WriteUptoPosition(itemsMinHeap, int.MaxValue);\n            Flush(chromIndex);\n            Console.WriteLine($\"Chromosome {currentEnsemblName} completed in {Benchmark.ToHumanReadable(benchmark.GetElapsedTime())}\");\n\n            Console.WriteLine($\"Maximum bp shifted for any variant:{VariantUtils.MaxShiftLength}\");\n            return _count;\n        }\n\n        private void WriteUptoPosition(MinHeap<ISupplementaryDataItem> itemsHeap, int position)\n        {\n            if (position < 1) return;\n            if (itemsHeap.Count() == 0) return;\n            var bufferMin = itemsHeap.GetMin();\n\n            while (bufferMin.Position < position)\n            {\n                var itemsAtMinPosition = new List<ISupplementaryDataItem>();\n\n                while (itemsHeap.Count() > 0 && SuppDataUtilities.CompareTo(bufferMin, itemsHeap.GetMin()) == 0)\n                    itemsAtMinPosition.Add(itemsHeap.ExtractMin());\n\n                if (itemsAtMinPosition.Count > 0)\n                {\n                    _count += itemsAtMinPosition.Count;\n                    WritePosition(itemsAtMinPosition);\n                }\n                if (itemsHeap.Count() == 0) break;\n\n                bufferMin = itemsHeap.GetMin();\n            }\n\n        }\n\n        private void WritePosition(List<ISupplementaryDataItem> items)\n        {\n            int position = items[0].Position;\n            _memStream.Position = 0;\n            if (_isPositional)\n            {\n                var positionalItem = SuppDataUtilities.GetPositionalAnnotation(items);\n                if (positionalItem == null) return;\n                _memWriter.Write(positionalItem.GetJsonString());\n            }\n            else\n            {\n                // any data source that is reported by allele and is not an array (e.g. allele frequencies) need this filtering step\n                if (_index.MatchByAllele && !_index.IsArray)\n                    items = SuppDataUtilities.RemoveConflictingAlleles(items, _throwErrorOnConflicts);\n\n                if (_index.JsonKey == SaCommon.PrimateAiTag)\n                    items = SuppDataUtilities.DeDuplicatePrimateAiItems(items);\n\n                _memWriter.WriteOpt(items.Count);\n\n                foreach (ISupplementaryDataItem saItem in items)\n                {\n                    _memWriter.WriteOptAscii(saItem.RefAllele);\n                    _memWriter.WriteOptAscii(saItem.AltAllele);\n                    _memWriter.Write(saItem.GetJsonString());\n                }\n            }\n\n            int numBytes = (int)_memStream.Position;\n            if (!_block.HasSpace(numBytes)) Flush(items[0].Chromosome.Index);\n            _block.Add(_memBuffer, numBytes, position);\n        }\n\n        private void Flush(ushort chromIndex)\n        {\n\n            if (_block.BlockOffset == 0) return;\n\n            long fileOffset = _stream.Position;\n            (int firstPosition, int lastPosition, int numBytes) = _block.Write(_writer);\n            _block.Clear();\n            _index.Add(chromIndex, firstPosition, lastPosition, fileOffset, numBytes);\n        }\n\n        public void Dispose()\n        {\n            _index.Write();\n\n            if (!_leaveOpen)\n            {\n                _writer?.Dispose();\n                _indexWriter?.Dispose();\n                _stream?.Dispose();\n                _indexStream?.Dispose();\n                _block?.Dispose();\n            }\n            \n            _memWriter?.Dispose();\n            _memStream?.Dispose();\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/Omim/Downloader.cs",
    "content": "﻿using System.IO;\r\nusing CommandLine.Builders;\r\nusing CommandLine.NDesk.Options;\r\nusing ErrorHandling;\r\nusing SAUtils.GeneIdentifiers;\r\nusing static System.Environment;\r\n\r\nnamespace SAUtils.Omim\r\n{\r\n    public static class Downloader\r\n    {\r\n        private static string _apiKey;\r\n        private static string _universalGeneArchivePath;\r\n        private static string _outputDirectory;\r\n        private static string _inputReferencePath;\r\n        private const string OmimApiKeyEnvironmentVariableName = \"OmimApiKey\";\r\n\r\n        public static ExitCodes Run(string command, string[] commandArgs)\r\n        {\r\n            var ops = new OptionSet\r\n            {\r\n                {\r\n                    \"uga|u=\",\r\n                    \"universal gene archive {path}\",\r\n                    v => _universalGeneArchivePath = v\r\n                },\r\n                {\r\n                    \"ref|r=\",\r\n                    \"input reference {filename}\",\r\n                    v => _inputReferencePath = v\r\n                },\r\n                {\r\n                    \"out|o=\",\r\n                    \"output directory\",\r\n                    v => _outputDirectory = v\r\n                }\r\n            };\r\n\r\n            string commandLineExample = $\"{command} [options]\";\r\n\r\n            var exitCode = new ConsoleAppBuilder(commandArgs, ops)\r\n                .Parse()\r\n                .HasRequiredParameter(_outputDirectory, \"output directory\", \"--out\")\r\n                .CheckDirectoryExists(_outputDirectory, \"output directory\", \"--out\")\r\n                .CheckInputFilenameExists(_universalGeneArchivePath, \"universal gene archive\", \"--uga\")\r\n                .SkipBanner()\r\n                .ShowHelpMenu(\"Download the OMIM gene annotation data\", commandLineExample)\r\n                .ShowErrors()\r\n                .Execute(ProgramExecution);\r\n\r\n            return exitCode;\r\n        }\r\n\r\n        private static ExitCodes ProgramExecution()\r\n        {\r\n            _apiKey = GetEnvironmentVariable(OmimApiKeyEnvironmentVariableName);\r\n            if (_apiKey == null) throw new InvalidDataException(\"Please set the OMIM API key as the environment variable \\\"OmimApiKey\\\".\");\r\n\r\n            var (entrezGeneIdToSymbol, ensemblGeneIdToSymbol) = GeneUtilities.ParseUniversalGeneArchive(_inputReferencePath, _universalGeneArchivePath);\r\n            var geneSymbolUpdater = new GeneSymbolUpdater(entrezGeneIdToSymbol, ensemblGeneIdToSymbol);\r\n\r\n            using (var omimQuery = new OmimQuery(_apiKey, _outputDirectory))\r\n            {\r\n                omimQuery.GenerateMimToGeneSymbolFile(geneSymbolUpdater);\r\n                omimQuery.GenerateJsonResponse();\r\n            }\r\n            OmimVersion.WriteToFile(OmimQuery.JsonResponseFile, _outputDirectory);\r\n\r\n            geneSymbolUpdater.DisplayStatistics();\r\n            return ExitCodes.Success;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "SAUtils/Omim/EntryApiResponse/EntryResponse.cs",
    "content": "﻿namespace SAUtils.Omim.EntryApiResponse\r\n{\r\n    // ReSharper disable InconsistentNaming\r\n    public sealed class EntryRoot\r\n    {\r\n        public RootItem omim;\r\n    }\r\n\r\n    // ReSharper disable ClassNeverInstantiated.Global\r\n    public class RootItem\r\n    {\r\n        public string  version;\r\n        public Entry[] entryList;\r\n    }\r\n\r\n    public class Entry\r\n    {\r\n        public EntryItem entry;\r\n    }\r\n\r\n    public class EntryItem\r\n    {\r\n        public char          prefix;\r\n        public int           mimNumber;\r\n        public string        status;\r\n        public TextSection[] textSectionList;\r\n        public GeneMap       geneMap;\r\n    }\r\n\r\n    public class TextSection\r\n    {\r\n        public TextSectionItem textSection;\r\n    }\r\n\r\n    public class TextSectionItem\r\n    {\r\n        public string textSectionName;\r\n        public string textSectionTitle;\r\n        public string textSectionContent;\r\n    }\r\n    // ReSharper restore ClassNeverInstantiated.Global\r\n    // ReSharper restore InconsistentNaming\r\n}"
  },
  {
    "path": "SAUtils/Omim/EntryApiResponse/GeneMap.cs",
    "content": "﻿namespace SAUtils.Omim.EntryApiResponse\r\n{\r\n    // ReSharper disable InconsistentNaming\r\n    // ReSharper disable ClassNeverInstantiated.Global\r\n    public class GeneMap\r\n    {\r\n        public string         geneName;\r\n        public int            mimNumber;\r\n        public PhenotypeMap[] phenotypeMapList;\r\n    }\r\n\r\n    public class PhenotypeMap\r\n    {\r\n        public PhenotypeMapItem phenotypeMap;\r\n    }\r\n\r\n    public class PhenotypeMapItem\r\n    {\r\n        public int    phenotypeMimNumber;\r\n        public string phenotype;\r\n        public int    phenotypeMappingKey;\r\n        public string phenotypeInheritance;\r\n    }\r\n    // ReSharper restore ClassNeverInstantiated.Global\r\n    // ReSharper restore InconsistentNaming\r\n}"
  },
  {
    "path": "SAUtils/Omim/Main.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing CommandLine.Builders;\r\nusing CommandLine.NDesk.Options;\r\nusing ErrorHandling;\r\nusing IO;\r\nusing Newtonsoft.Json.Linq;\r\nusing SAUtils.DataStructures;\r\nusing VariantAnnotation.Interface.SA;\r\nusing VariantAnnotation.SA;\r\n\r\nnamespace SAUtils.Omim\r\n{\r\n    public static class Main\r\n    {\r\n\r\n        private static string _mimToGeneFile;\r\n        private static string _omimJsonFile;\r\n        private static string _outputDirectory;\r\n\r\n        public static ExitCodes Run(string command, string[] commandArgs)\r\n        {\r\n            var ops = new OptionSet\r\n            {\r\n                {\r\n                    \"m2g|m=\",\r\n                    \"MimToGeneSymbol tsv file\",\r\n                    v => _mimToGeneFile = v\r\n                },\r\n                {\r\n                    \"json|j=\",\r\n                    \"OMIM entry json file\",\r\n                    v => _omimJsonFile = v\r\n                },\r\n                {\r\n                    \"out|o=\",\r\n                    \"output directory\",\r\n                    v => _outputDirectory = v\r\n                }\r\n            };\r\n\r\n            string commandLineExample = $\"{command} [options]\";\r\n\r\n            var exitCode = new ConsoleAppBuilder(commandArgs, ops)\r\n                .Parse()\r\n                .HasRequiredParameter(_outputDirectory, \"output directory\", \"--out\")\r\n                .CheckDirectoryExists(_outputDirectory, \"output directory\", \"--out\")\r\n                .CheckInputFilenameExists(_mimToGeneFile, \"MimToGeneSymbol tsv file\", \"--m2g\")\r\n                .CheckInputFilenameExists(_omimJsonFile, \"OMIM entry json file\", \"--json\")\r\n                .SkipBanner()\r\n                .ShowHelpMenu(\"Creates a gene annotation database from OMIM data\", commandLineExample)\r\n                .ShowErrors()\r\n                .Execute(ProgramExecution);\r\n\r\n            return exitCode;\r\n        }\r\n\r\n        private static ExitCodes ProgramExecution()\r\n        {\r\n            var omimSchema = OmimSchema.Get();\r\n\r\n            var omimParser = new OmimParser(_mimToGeneFile, _omimJsonFile, omimSchema);\r\n            var version = omimParser.GetVersion();\r\n            string outFileName = $\"{version.Name}_{version.Version}\";\r\n            using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.GeneFileSuffix)))\r\n            using (var ngaWriter = new NgaWriter(nsaStream, version, SaCommon.OmimTag, SaCommon.SchemaVersion, true))\r\n            using (var saJsonSchemaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.GeneFileSuffix + SaCommon.JsonSchemaSuffix)))\r\n            using (var schemaWriter = new StreamWriter(saJsonSchemaStream))\r\n            {\r\n                IEnumerable<OmimItem>                           omimItems   = omimParser.GetItems();\r\n                Dictionary<string, List<ISuppGeneItem>> geneToItems = OmimUtilities.GetGeneToOmimEntriesAndSchema(omimItems);\r\n                ngaWriter.Write(geneToItems);\r\n                schemaWriter.Write(omimSchema);\r\n                \r\n                JObject jo = JObject.Parse(omimParser.OmimStats.ToString());\r\n                Console.WriteLine(jo); //pretty printing json\r\n            }\r\n\r\n            return ExitCodes.Success;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "SAUtils/Omim/OmimParser.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing System.IO.Compression;\r\nusing System.Linq;\r\nusing Newtonsoft.Json;\r\nusing OptimizedCore;\r\nusing SAUtils.DataStructures;\r\nusing SAUtils.InputFileParsers;\r\nusing SAUtils.Omim.EntryApiResponse;\r\nusing SAUtils.Schema;\r\nusing VariantAnnotation.Providers;\r\n\r\nnamespace SAUtils.Omim;\r\n\r\npublic sealed class OmimParser\r\n{\r\n    private readonly string       _mimToGeneSymbolFile;\r\n    private readonly string       _omimJsonFile;\r\n    private readonly SaJsonSchema _jsonSchema;\r\n\r\n    private const string CurrentOmimJsonVersion = \"1.0\";\r\n        \r\n    public readonly OmimStatistics OmimStats = new();\r\n\r\n    public OmimParser(string mimToGeneSymbolFile, string omimJsonFile, SaJsonSchema jsonSchema)\r\n    {\r\n        _mimToGeneSymbolFile = mimToGeneSymbolFile;\r\n        _omimJsonFile        = omimJsonFile;\r\n        _jsonSchema          = jsonSchema;\r\n    }\r\n\r\n    public DataSourceVersion GetVersion() => DataSourceVersionReader.GetSourceVersion(_omimJsonFile);\r\n\r\n    public IEnumerable<OmimItem> GetItems()\r\n    {\r\n        Dictionary<int, string> mimToGeneSymbol       = GetMimNumberToGeneSymbol();\r\n        EntryRoot               entryRoot             = GetEntryRootObject();\r\n        Dictionary<int, string> phenotypeDescriptions = GetPhenotypeDescriptions(entryRoot);\r\n\r\n        foreach (OmimItem omimItem in GetOmimItems(entryRoot, mimToGeneSymbol, phenotypeDescriptions))\r\n        {\r\n            OmimStats.Add(omimItem);\r\n            yield return omimItem;\r\n        }\r\n    }\r\n\r\n    private static Dictionary<int, string> GetPhenotypeDescriptions(EntryRoot entryRoot)\r\n    {\r\n            \r\n        Dictionary<int, string> phenotypeToDescription = new Dictionary<int, string>();\r\n\r\n        foreach (var entry in entryRoot.omim.entryList)\r\n        {\r\n            var item = entry.entry;\r\n            // gene only item\r\n            if (item.prefix == '*') continue;\r\n        \r\n            var description = OmimUtilities.ExtractAndProcessItemDescription(item);\r\n            if (string.IsNullOrEmpty(description)) continue;\r\n            phenotypeToDescription[item.mimNumber] = description;\r\n        }\r\n\r\n        return phenotypeToDescription;\r\n    }\r\n\r\n    private Dictionary<int, string> GetMimNumberToGeneSymbol()\r\n    {\r\n        var mimNumberToGeneSymbol = new Dictionary<int, string>();\r\n        using (var stream = new FileStream(_mimToGeneSymbolFile, FileMode.Open))\r\n        using (var reader = new StreamReader(stream))\r\n        {\r\n            string line;\r\n            //title line\r\n            reader.ReadLine();\r\n            while ((line = reader.ReadLine()) != null)\r\n            {\r\n                var fields = line.OptimizedSplit('\\t');\r\n                mimNumberToGeneSymbol[int.Parse(fields[0])] = fields[1];\r\n            }\r\n        }\r\n\r\n        return mimNumberToGeneSymbol;\r\n    }\r\n\r\n    private EntryRoot GetEntryRootObject()\r\n    {\r\n        using var fileStream         = new FileStream(_omimJsonFile, FileMode.Open);\r\n        using var uncompressedStream = new GZipStream(fileStream, CompressionMode.Decompress);\r\n        using var streamReader       = new StreamReader(uncompressedStream);\r\n        var       entryQueryResponse = JsonConvert.DeserializeObject<EntryRoot>(streamReader.ReadToEnd());\r\n        if (entryQueryResponse.omim.version != CurrentOmimJsonVersion)\r\n            throw new InvalidDataException($\"An unknown version of OMIM JSON schema has been used: version {entryQueryResponse.omim.version}. The latest known version is {CurrentOmimJsonVersion}\");\r\n\r\n        return entryQueryResponse;\r\n    }\r\n\r\n    private IEnumerable<OmimItem> GetOmimItems(EntryRoot entryRoot, Dictionary<int, string> mimToGeneSymbol, Dictionary<int, string> phenotypeDescriptions)\r\n    {\r\n        foreach (var entry in entryRoot.omim.entryList)\r\n        {\r\n            var item      = entry.entry;\r\n            var mimNumber = item.mimNumber;\r\n            //skip if not a supported gene symbol\r\n            if (!mimToGeneSymbol.TryGetValue(mimNumber, out var geneSymbol)) continue;\r\n\r\n            string description = OmimUtilities.ExtractAndProcessItemDescription(item);\r\n            string geneName    = item.geneMap?.geneName;\r\n            var phenotypes = item.geneMap?.phenotypeMapList?.Select(x => OmimUtilities.GetPhenotype(x, phenotypeDescriptions, _jsonSchema.GetSubSchema(\"phenotypes\")))\r\n                .ToList() ?? new List<OmimItem.Phenotype>();\r\n\r\n            yield return new OmimItem(geneSymbol, geneName, description, mimNumber, phenotypes, _jsonSchema);\r\n\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "SAUtils/Omim/OmimPhenotypeSchema.cs",
    "content": "﻿using System.Linq;\r\nusing System.Text;\r\nusing SAUtils.Schema;\r\n\r\nnamespace SAUtils.Omim\r\n{\r\n    public static class OmimPhenotypeSchema\r\n    {\r\n        private static readonly SaJsonValueType PrimaryValueType = SaJsonValueType.ObjectArray;\r\n\r\n        private static readonly (string JsonKey, SaJsonValueType ValueType)[] SchemaDescription = {\r\n            (\"mimNumber\", SaJsonValueType.Number),\r\n            (\"phenotype\", SaJsonValueType.String),\r\n            (\"description\", SaJsonValueType.String),\r\n            (\"mapping\", SaJsonValueType.String),\r\n            (\"inheritances\", SaJsonValueType.StringArray),\r\n            (\"comments\", SaJsonValueType.String)\r\n        };\r\n\r\n        public static SaJsonSchema Get()\r\n        {\r\n            var jsonSchema = SaJsonSchema.Create(new StringBuilder(), null, PrimaryValueType, SchemaDescription.Select(x => x.JsonKey));\r\n            jsonSchema.SetNonSaKeys(new[] { \"isAlleleSpecific\" });\r\n\r\n            foreach ((string key, var valueType) in SchemaDescription)\r\n                jsonSchema.AddAnnotation(key, SaJsonKeyAnnotation.CreateFromProperties(valueType, 0, null));\r\n\r\n            return jsonSchema;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "SAUtils/Omim/OmimQuery.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing System.IO.Compression;\r\nusing System.Net.Http;\r\nusing System.Text.RegularExpressions;\r\nusing OptimizedCore;\r\nusing SAUtils.GeneIdentifiers;\r\n\r\nnamespace SAUtils.Omim\r\n{\r\n\r\n    public sealed class OmimQuery : IDisposable\r\n    {\r\n        private readonly HttpClient _httpClient;\r\n        private readonly FileStream _mimToSymbolStream;\r\n        private readonly FileStream _jsonResponseStream;\r\n        private string _jsonPrefix;\r\n        private readonly string _mimTitlesUrl;\r\n\r\n        private const string Mim2GeneUrl = \"https://omim.org/static/omim/data/mim2gene.txt\";\r\n        private const string MimTitlesFileName = \"mimTitles.txt\";\r\n        private const string OmimApiUrl = \"https://api.omim.org/api/\";\r\n        private const string OmimDownloadBaseUrl = \"https://data.omim.org/downloads/\";\r\n        private const string EntryHandler = \"entry\";\r\n        private const int EntryQueryLimit = 20;\r\n        private const string ReturnDataFormat = \"json\";\r\n        private const string MimToSymbolFile = \"MimToGeneSymbol.tsv\";\r\n        public const string JsonResponseFile = \"MimEntries.json.gz\";\r\n        private const string JsonPrefixPattern = @\"^{\"\"omim\"\": { \\n\"\"version\"\": \"\"\\d+\\.\\d+\\\"\",\\n\"\"entryList\"\": \\[ \\n\";\r\n        private const string JsonTextEnding = \"] \\n} }\";\r\n\r\n        public OmimQuery(string apiKey, string outputDirectory) \r\n        {\r\n            _httpClient = new HttpClient();\r\n            _httpClient.DefaultRequestHeaders.Add(\"ApiKey\", apiKey);\r\n\r\n            if (string.IsNullOrEmpty(outputDirectory)) return;\r\n            \r\n            _mimTitlesUrl = GetMimTitlesUrl(apiKey);\r\n            _mimToSymbolStream = new FileStream(Path.Combine(outputDirectory, MimToSymbolFile), FileMode.Create);\r\n            _jsonResponseStream = new FileStream(Path.Combine(outputDirectory, JsonResponseFile), FileMode.Create);\r\n        }\r\n\r\n        private static string GetMimTitlesUrl(string apiKey) => $\"{OmimDownloadBaseUrl}{apiKey}/{MimTitlesFileName}\";\r\n\r\n        private List<string> GetMimsToDownload()\r\n        {\r\n            var mims = new List<string>();\r\n            using (var response = _httpClient.GetAsync(_mimTitlesUrl).Result)\r\n            using (var reader = new StreamReader(response.Content.ReadAsStreamAsync().Result))\r\n            {\r\n                string line;\r\n                while ((line = reader.ReadLine()) != null)\r\n                {\r\n                    //Caret (^)  Entry has been removed from the database or moved to another entry\r\n                    if (line.OptimizedStartsWith('#') || line.StartsWith(\"Caret\")) continue;\r\n\r\n                    var fields = line.Split('\\t', 3);\r\n                    mims.Add(fields[1]);\r\n                }\r\n            }\r\n\r\n            return mims;\r\n        }\r\n\r\n        public void GenerateMimToGeneSymbolFile(GeneSymbolUpdater geneSymbolUpdater)\r\n        {\r\n            using StreamWriter writer = new StreamWriter(_mimToSymbolStream);\r\n            using var response = _httpClient.GetAsync(Mim2GeneUrl).Result;\r\n            using var reader = new StreamReader(response.Content.ReadAsStreamAsync().Result);\r\n            writer.WriteLine(\"#MIM number\\tGene symbol\");\r\n            string line;\r\n            while ((line = reader.ReadLine()) != null)\r\n            {\r\n                if (line.OptimizedStartsWith('#')) continue;\r\n\r\n                var fields     = line.OptimizedSplit('\\t');\r\n                var geneSymbol = fields[3];\r\n                if (string.IsNullOrEmpty(geneSymbol)) continue;\r\n\r\n                var mimNumber         = fields[0];\r\n                var entrezGeneId      = fields[2];\r\n                var ensemblGeneId     = fields[4];\r\n                var updatedGeneSymbol = geneSymbolUpdater.UpdateGeneSymbol(geneSymbol, ensemblGeneId, entrezGeneId);\r\n                if (string.IsNullOrEmpty(updatedGeneSymbol)) continue;\r\n\r\n                writer.WriteLine($\"{mimNumber}\\t{updatedGeneSymbol}\");\r\n            }\r\n        }\r\n\r\n        public void GenerateJsonResponse()\r\n        {\r\n            var i = 0;\r\n            var mimNumbers = GetMimsToDownload();\r\n\r\n            var needComma = false;\r\n            using Stream gzStream = new GZipStream(_jsonResponseStream, CompressionMode.Compress);\r\n            using StreamWriter writer = new StreamWriter(gzStream);\r\n            while (i < mimNumbers.Count)\r\n            {\r\n                var endMimNumberIndex = Math.Min(i + EntryQueryLimit - 1, mimNumbers.Count - 1);\r\n                var mimNumberString   = GetMimNumbersString(mimNumbers, i, endMimNumberIndex);\r\n                var queryUrl = GetApiQueryUrl(OmimApiUrl, EntryHandler, (\"mimNumber\", mimNumberString),\r\n                    (\"include\", \"text:description\"), (\"include\", \"externalLinks\"), (\"include\", \"geneMap\"),\r\n                    (\"format\", ReturnDataFormat));\r\n\r\n                using (var response = _httpClient.GetAsync(queryUrl).Result)\r\n                {\r\n                    string responseContent = response.Content.ReadAsStringAsync().Result;\r\n                    string entries         = SetPrefixAndGetEntriesString(responseContent);\r\n                    if (i == 0) writer.Write(_jsonPrefix);\r\n                    if (needComma) writer.Write(',');\r\n                    writer.Write(entries);\r\n                    needComma = true;\r\n                }\r\n\r\n                i = endMimNumberIndex + 1;\r\n            }\r\n\r\n            writer.WriteLine(JsonTextEnding);\r\n        }\r\n\r\n        private string SetPrefixAndGetEntriesString(string responseContent)\r\n        {\r\n            if (string.IsNullOrEmpty(_jsonPrefix))\r\n            {\r\n                var prefixMatch = Regex.Match(responseContent, JsonPrefixPattern);\r\n                if (!prefixMatch.Success)\r\n                    throw new InvalidDataException(\r\n                        $\"Cannot find expected content at the beginning of the response from OMIM server. The response starts with \\\"{responseContent.Substring(0, JsonPrefixPattern.Length)}\\\"\");\r\n\r\n                _jsonPrefix = prefixMatch.Value;\r\n            }\r\n\r\n            int entriesStringLength = responseContent.Length - _jsonPrefix.Length - JsonTextEnding.Length;\r\n            return responseContent.Substring(_jsonPrefix.Length, entriesStringLength);\r\n        }\r\n\r\n        private static string GetMimNumbersString(List<string> allMimNumbers, int startIndex, int endIndex)\r\n        {\r\n            var sb = StringBuilderPool.Get();\r\n            var needComma = false;\r\n            for (int i = startIndex; i <= endIndex; i++)\r\n            {\r\n                if (needComma) sb.Append(',');\r\n                sb.Append(allMimNumbers[i]);\r\n\r\n                needComma = true;\r\n            }\r\n\r\n            return StringBuilderPool.GetStringAndReturn(sb);\r\n        }\r\n\r\n        private static string GetApiQueryUrl(string baseAddress, string handler, params (string, string)[] keyValueTuples)\r\n        {\r\n            var sb = StringBuilderPool.Get();\r\n            sb.Append(baseAddress);\r\n            sb.Append(handler);\r\n            sb.Append('?');\r\n            var needAmpersand = false;\r\n            foreach ((string key, string value) in keyValueTuples)\r\n            {\r\n                if (needAmpersand) sb.Append('&');\r\n\r\n                sb.Append(key);\r\n                sb.Append('=');\r\n                sb.Append(value);\r\n\r\n                needAmpersand = true;\r\n            }\r\n\r\n            return StringBuilderPool.GetStringAndReturn(sb);\r\n        }\r\n\r\n        public void Dispose()\r\n        {\r\n            _httpClient?.Dispose();\r\n            _mimToSymbolStream?.Dispose();\r\n            _jsonResponseStream?.Dispose();\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "SAUtils/Omim/OmimSchema.cs",
    "content": "﻿using System.Linq;\r\nusing System.Text;\r\nusing SAUtils.Schema;\r\nusing VariantAnnotation.SA;\r\n\r\nnamespace SAUtils.Omim\r\n{\r\n    public static class OmimSchema\r\n    {\r\n        private static readonly SaJsonValueType PrimaryValueType = SaJsonValueType.ObjectArray;\r\n\r\n        private static readonly (string JsonKey, SaJsonValueType ValueType, SaJsonSchema subSchema)[] SchemaDescription = {\r\n            (\"mimNumber\", SaJsonValueType.Number, null),\r\n            (\"geneName\", SaJsonValueType.String, null),\r\n            (\"description\", SaJsonValueType.String, null),\r\n            (\"phenotypes\", null, OmimPhenotypeSchema.Get())\r\n        };\r\n\r\n        public static SaJsonSchema Get()\r\n        {\r\n            var jsonSchema = SaJsonSchema.Create(new StringBuilder(), SaCommon.OmimTag, PrimaryValueType, SchemaDescription.Select(x => x.JsonKey));\r\n            jsonSchema.SetNonSaKeys(new[] { \"isAlleleSpecific\" });\r\n\r\n            foreach ((string key, var valueType, var subSchema) in SchemaDescription)\r\n            {\r\n                var keyAnnotation = valueType == null\r\n                    ? SaJsonKeyAnnotation.CreateFromSubSchema(subSchema)\r\n                    : SaJsonKeyAnnotation.CreateFromProperties(valueType, 0, null);\r\n\r\n                jsonSchema.AddAnnotation(key, keyAnnotation);\r\n            }\r\n\r\n            return jsonSchema;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "SAUtils/Omim/OmimStatistics.cs",
    "content": "using System.Text;\nusing OptimizedCore;\nusing SAUtils.DataStructures;\nusing VariantAnnotation.IO;\n\nnamespace SAUtils.Omim;\n\npublic class OmimStatistics\n{\n    public uint                      TotalItems            = 0;\n    public uint                      TotalPhenotypes       = 0;\n    public CounterDictionary<string> PhenotypeMappings     = new();\n    public CounterDictionary<string> PhenotypeInheritances = new();\n\n    public void Add(OmimItem omimItem)\n    {\n        TotalItems++;\n\n        foreach (OmimItem.Phenotype phenotype in omimItem.Phenotypes)\n        {\n            TotalPhenotypes++;\n            PhenotypeMappings.Add(phenotype.Mapping.ToString());\n            foreach (string inheritance in phenotype.Inheritance)\n            {\n                PhenotypeInheritances.Add(inheritance);\n            }\n        }\n    }\n\n    public override string ToString()\n    {\n        StringBuilder sb = StringBuilderPool.Get();\n        var jo = new JsonObject(sb);\n        sb.Append(JsonObject.OpenBrace);\n\n        jo.AddUIntValue(\"totalItems\",      TotalItems);\n        jo.AddUIntValue(\"totalPhenotypes\", TotalPhenotypes);\n        jo.AddObjectValue(\"mappings\",     PhenotypeMappings);\n        jo.AddObjectValue(\"inheritances\", PhenotypeInheritances);\n\n        sb.Append(JsonObject.CloseBrace);\n\n        return StringBuilderPool.GetStringAndReturn(sb);\n    }\n}"
  },
  {
    "path": "SAUtils/Omim/OmimUtilities.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.Linq;\r\nusing System.Text.RegularExpressions;\r\nusing OptimizedCore;\r\nusing SAUtils.DataStructures;\r\nusing SAUtils.Omim.EntryApiResponse;\r\nusing SAUtils.Schema;\r\nusing VariantAnnotation.Interface.SA;\r\n\r\nnamespace SAUtils.Omim\r\n{\r\n    public static class OmimUtilities\r\n    {\r\n        public static OmimItem.Phenotype GetPhenotype(PhenotypeMap phenotypeMap,\r\n            Dictionary<int, string> phenotypeDescriptions, SaJsonSchema jsonSchema)\r\n        {\r\n            var phenotypeItem = phenotypeMap.phenotypeMap;\r\n            var mimNumber = phenotypeItem.phenotypeMimNumber;\r\n            phenotypeDescriptions.TryGetValue(mimNumber, out var description);\r\n\r\n            var (phenotype, comments) = ExtractPhenotypeAndComments(phenotypeItem.phenotype);\r\n            return new OmimItem.Phenotype(mimNumber, phenotype, description, (OmimItem.Mapping)phenotypeItem.phenotypeMappingKey, comments, ExtractInheritances(phenotypeItem.phenotypeInheritance), jsonSchema);\r\n        }\r\n\r\n        private static HashSet<string> ExtractInheritances(string inheritance)\r\n        {\r\n            var inheritances = new HashSet<string>();\r\n            if (String.IsNullOrEmpty(inheritance)) return inheritances;\r\n\r\n            foreach (string content in inheritance.OptimizedSplit(';'))\r\n            {\r\n                string trimmedContent = content.Trim(' ');\r\n                inheritances.Add(trimmedContent);\r\n            }\r\n\r\n            return inheritances;\r\n        }\r\n\r\n        internal static (string Phenotype, OmimItem.Comment[] Comments) ExtractPhenotypeAndComments(string phenotypeString)\r\n        {\r\n            phenotypeString = phenotypeString.Trim(' ').Trim(',').Replace(@\"\\\\'\", \"'\", StringComparison.Ordinal);\r\n            string phenotype = Regex.Replace(phenotypeString,@\" \\(\\d\\) \", \" \");\r\n\r\n            var comments = phenotypeString.Select(GetComment)\r\n                                          .Where(x => x != OmimItem.Comment.unknown)\r\n                                          .ToArray();\r\n\r\n            return (phenotype, comments);\r\n        }\r\n\r\n        private static OmimItem.Comment GetComment(char symbol)\r\n        {\r\n            return symbol switch\r\n            {\r\n                '?' => OmimItem.Comment.unconfirmed_or_possibly_spurious_mapping,\r\n                '[' => OmimItem.Comment.nondiseases,\r\n                '{' => OmimItem.Comment\r\n                    .contribute_to_susceptibility_to_multifactorial_disorders_or_to_susceptibility_to_infection,\r\n                _ => OmimItem.Comment.unknown\r\n            };\r\n        }\r\n\r\n        public static Dictionary<string, List<ISuppGeneItem>> GetGeneToOmimEntriesAndSchema(IEnumerable<OmimItem> omimItems)\r\n        {\r\n            var geneToOmimEntries = new Dictionary<string, List<ISuppGeneItem>>();\r\n            SaJsonSchema jsonSchema = null;\r\n\r\n            foreach (var item in omimItems)\r\n            {\r\n                if (jsonSchema == null) jsonSchema = item.JsonSchema;\r\n                if (item.GeneSymbol == null) continue;\r\n\r\n                if (geneToOmimEntries.TryGetValue(item.GeneSymbol, out var mimList))\r\n                {\r\n                    mimList.Add(item);\r\n                }\r\n                else\r\n                {\r\n                    geneToOmimEntries[item.GeneSymbol] = new List<ISuppGeneItem> { item };\r\n                }\r\n            }\r\n\r\n            return geneToOmimEntries;\r\n        }\r\n\r\n        // remove links enclosed by parentheses with only numbers, e.g. ({12345})\r\n        public static string RemoveLinks(this string text) => text == null\r\n            ? null\r\n            : Regex.Replace(Regex.Replace(Regex.Replace(text, \r\n                        @\"((and|see|;|(e\\.g\\.)?,) )*{\\d+(\\.\\d+)?}\", \"\"),\r\n                        @\" ?\\((\\ |/)*\\)\", \"\"),\r\n                        @\"{([\\d,]+:)?(.+?)}\", \"$2\");\r\n\r\n        public static string RemoveFormatControl(this string text) => text == null ? null : \r\n            Regex.Replace(text, \"<Subhead>\", \"\");\r\n\r\n        public static string ExtractAndProcessItemDescription(EntryItem item)\r\n        {\r\n            const string sectionName = \"description\";\r\n            return item.textSectionList?.FirstOrDefault(x => x.textSection.textSectionName == sectionName)?\r\n                .textSection.textSectionContent.RemoveLinks().RemoveFormatControl();\r\n        }\r\n    }\r\n} "
  },
  {
    "path": "SAUtils/Omim/OmimVersion.cs",
    "content": "﻿using System;\r\nusing System.IO;\r\n\r\nnamespace SAUtils.Omim\r\n{\r\n    public static class OmimVersion\r\n    {\r\n        private const string Name = \"OMIM\";\r\n        private const string Description = \"An Online Catalog of Human Genes and Genetic Disorders\";\r\n        private const string VersionFileSuffix = \".version\";\r\n\r\n        public static void WriteToFile(string outputPrefix, string outputDirectory)\r\n        {\r\n            using (var stream = new FileStream(Path.Combine(outputDirectory, outputPrefix + VersionFileSuffix), FileMode.Create))\r\n            using (var writer = new StreamWriter(stream))\r\n            {\r\n                var currentDate = DateTime.Today;\r\n                writer.WriteLine($\"NAME={Name}\");\r\n                writer.WriteLine($\"VERSION={currentDate:yyyyMMdd}\");\r\n                writer.WriteLine($\"DATE={currentDate:yyyy-MM-dd}\");\r\n                writer.WriteLine($\"DESCRIPTION={Description}\");\r\n            }\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "SAUtils/OneKGenSvDb/Create.cs",
    "content": "﻿using System.IO;\nusing CommandLine.Builders;\nusing CommandLine.NDesk.Options;\nusing Compression.Utilities;\nusing ErrorHandling;\nusing IO;\nusing SAUtils.InputFileParsers;\nusing SAUtils.InputFileParsers.OneKGen;\nusing VariantAnnotation.Interface.SA;\nusing VariantAnnotation.NSA;\nusing VariantAnnotation.Providers;\nusing VariantAnnotation.SA;\n\nnamespace SAUtils.OneKGenSvDb\n{\n    public static class Create\n    {\n        private static string _inputFileName;\n        private static string _compressedReference;\n        private static string _outputDirectory;\n\n        public static ExitCodes Run(string command, string[] commandArgs)\n        {\n            var ops = new OptionSet\n            {\n                {\n                    \"ref|r=\",\n                    \"compressed reference sequence file\",\n                    v => _compressedReference = v\n                },\n                {\n                    \"in|i=\",\n                    \"OneKGenSv BED file\",\n                    v => _inputFileName = v\n                },\n                {\n                    \"out|o=\",\n                    \"output directory\",\n                    v => _outputDirectory = v\n                }\n            };\n\n            string commandLineExample = $\"{command} [options]\";\n\n            var exitCode = new ConsoleAppBuilder(commandArgs, ops)\n                .Parse()\n                .CheckInputFilenameExists(_compressedReference, \"compressed reference sequence file name\", \"--ref\")\n                .CheckInputFilenameExists(_inputFileName, \"OneKGenSv BED file\", \"--in\")\n                .CheckDirectoryExists(_outputDirectory, \"output directory\", \"--out\")\n                .SkipBanner()\n                .ShowHelpMenu(\"Creates a supplementary database with 1000 Genome structural variant annotations\", commandLineExample)\n                .ShowErrors()\n                .Execute(ProgramExecution);\n\n            return exitCode;\n        }\n\n        private static ExitCodes ProgramExecution()\n        {\n            var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));\n            var version = DataSourceVersionReader.GetSourceVersion(_inputFileName + \".version\");\n\n            string outFileName = $\"{version.Name}_{version.Version}\".Replace(' ','_');\n            using(var reader = GZipUtilities.GetAppropriateStreamReader(_inputFileName))\n            using(var oneKGenSvReader = new OneKGenSvReader(reader, referenceProvider.RefNameToChromosome))\n            using(var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.IntervalFileSuffix)))\n            using(var nsiWriter = new NsiWriter(nsaStream, version, referenceProvider.Assembly,\n                SaCommon.OnekSvTag, ReportFor.StructuralVariants, SaCommon.SchemaVersion))\n            {\n                nsiWriter.Write(oneKGenSvReader.GetItems());\n            }\n\n            return ExitCodes.Success;\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/OneKGenSvDb/VcfToBed.cs",
    "content": "﻿using System.IO;\r\nusing System.IO.Compression;\r\nusing System.Linq;\r\nusing CommandLine.Builders;\r\nusing CommandLine.NDesk.Options;\r\nusing Compression.Utilities;\r\nusing ErrorHandling;\r\nusing IO;\r\nusing OptimizedCore;\r\nusing VariantAnnotation.Interface.IO;\r\n\r\nnamespace SAUtils.OneKGenSvDb\r\n{\r\n    public static class VcfToBed\r\n    {\r\n        private static string _inputFileName;\r\n        private static string _outputDirectory;\r\n\r\n        public static ExitCodes Run(string command, string[] commandArgs)\r\n        {\r\n            var ops = new OptionSet\r\n            {\r\n                {\r\n                    \"in|i=\",\r\n                    \"OneKGenSv VCF file\",\r\n                    v => _inputFileName = v\r\n                },\r\n                {\r\n                    \"out|o=\",\r\n                    \"output directory\",\r\n                    v => _outputDirectory = v\r\n                }\r\n            };\r\n\r\n            string commandLineExample = $\"{command} [options]\";\r\n\r\n            var exitCode = new ConsoleAppBuilder(commandArgs, ops)\r\n                .Parse()\r\n                .CheckInputFilenameExists(_inputFileName, \"OneKGenSv VCF file\", \"--in\")\r\n                .CheckDirectoryExists(_outputDirectory, \"output directory\", \"--out\")\r\n                .SkipBanner()\r\n                .ShowHelpMenu(\"Convert the VCF file into BED-like format\", commandLineExample)\r\n                .ShowErrors()\r\n                .Execute(ProgramExecution);\r\n\r\n            return exitCode;\r\n        }\r\n\r\n        private static ExitCodes ProgramExecution()\r\n        {\r\n            string outFileName = Path.GetFileName(_inputFileName).Replace(\"vcf\", \"bed\");\r\n            using (var reader = GZipUtilities.GetAppropriateStreamReader(_inputFileName))\r\n            using (var outputStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName)))\r\n            using (var outputGzipStream = new GZipStream(outputStream, CompressionMode.Compress))\r\n            using (var writer = new StreamWriter(outputGzipStream))\r\n            {\r\n                string line;\r\n                while ((line = reader.ReadLine()) != null)\r\n                {\r\n                    var fields = line.OptimizedSplit('\\t', VcfCommon.InfoIndex + 2);\r\n                    if (fields.Length <= VcfCommon.InfoIndex) continue;\r\n\r\n                    string infoFields = fields[VcfCommon.InfoIndex];\r\n                    string svEnd = GetSvEndString(infoFields);\r\n                    if (svEnd == null) continue;\r\n\r\n                    // Because 1K Genome SV has a padding base, the POS should add one to get the 1-based start position of the interval\r\n                    // However, the start position need to minus one to become the 0-based start position in a BED file\r\n                    // So the POS value can be used directly in the BED file.\r\n                    writer.WriteLine(string.Join('\\t', fields[VcfCommon.ChromIndex], fields[VcfCommon.PosIndex], svEnd, fields[VcfCommon.IdIndex], fields[VcfCommon.AltIndex], infoFields));\r\n                }\r\n            }\r\n\r\n            return ExitCodes.Success;\r\n        }\r\n\r\n        private static string GetSvEndString(string infoFields)\r\n        {\r\n            if (infoFields == \"\" || infoFields == \".\") return null;\r\n\r\n            string endInfo = infoFields.OptimizedSplit(';').FirstOrDefault(x => x.StartsWith(\"END=\"));\r\n\r\n            return string.IsNullOrEmpty(endInfo) ? null : endInfo.Substring(4);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "SAUtils/ParseUtils/SAParseUtilities.cs",
    "content": "﻿using Variants;\n\nnamespace SAUtils.ParseUtils\n{\n    public static class SaParseUtilities\n    {\n        public static VariantType GetSequenceAlterationType(string dgvType, string dgvSubType)\n        {\n            var sequenceAlterationType = VariantType.unknown;\n            if (dgvType == \"CNV\")\n            {\n                switch (dgvSubType)\n                {\n                    case \"deletion\":\n                        sequenceAlterationType = VariantType.copy_number_loss;\n                        break;\n                    case \"duplication\":\n                        sequenceAlterationType = VariantType.copy_number_gain;\n                        break;\n                    case \"gain\":\n                        sequenceAlterationType = VariantType.copy_number_gain;\n                        break;\n                    case \"gain+loss\":\n                        sequenceAlterationType = VariantType.copy_number_variation;\n                        break;\n                    case \"loss\":\n                        sequenceAlterationType = VariantType.copy_number_loss;\n                        break;\n                    case \"insertion\":\n                        sequenceAlterationType = VariantType.insertion;\n                        break;\n                    case \"mobile element insertion\":\n                        sequenceAlterationType = VariantType.mobile_element_insertion;\n                        break;\n                    case \"novel sequence insertion\":\n                        sequenceAlterationType = VariantType.novel_sequence_insertion;\n                        break;\n                    case \"tandem duplication\":\n                        sequenceAlterationType = VariantType.tandem_duplication;\n                        break;\n                    default:\n                        sequenceAlterationType = VariantType.unknown;\n                        break;\n                }\n\n            }\n            else if (dgvType == \"OTHER\")\n            {\n                switch (dgvSubType)\n                {\n                    case \"complex\":\n                        sequenceAlterationType = VariantType.complex_structural_alteration;\n                        break;\n                    case \"inversion\":\n                        sequenceAlterationType = VariantType.inversion;\n                        break;\n                    case \"sequence alteration\":\n                        sequenceAlterationType = VariantType.structural_alteration;\n                        break;\n                    default:\n                        sequenceAlterationType = VariantType.unknown;\n                        break;\n\n                }\n            }\n\n            return sequenceAlterationType;\n        }\n\n        public static VariantType GetSequenceAlteration(string svType)\n        {\n            VariantType sequenceAlterationType;\n\n            switch (svType)\n            {\n                case \"DEL\":\n                    sequenceAlterationType = VariantType.copy_number_loss;\n                    break;\n                case \"DUP\":\n                    sequenceAlterationType = VariantType.copy_number_gain;\n                    break;\n                case \"CNV\":\n                    sequenceAlterationType = VariantType.copy_number_variation;\n                    break;\n                case \"INS\":\n                    sequenceAlterationType = VariantType.insertion;\n                    break;\n                case \"ALU\":\n                    sequenceAlterationType = VariantType.mobile_element_insertion;\n                    break;\n                case \"LINE1\":\n                    sequenceAlterationType = VariantType.mobile_element_insertion;\n                    break;\n                case \"SVA\":\n                    sequenceAlterationType = VariantType.mobile_element_insertion;\n                    break;\n                case \"INV\":\n                    sequenceAlterationType = VariantType.inversion;\n                    break;\n                default:\n                    sequenceAlterationType = VariantType.unknown;\n                    break;\n            }\n\n            return sequenceAlterationType;\n        }\n\n        public static int? Add(int? x, int? y)\n        {\n            if (x == null && y == null) return null;\n            if (x == null) return y;\n            if (y == null) return x;\n            return x + y;\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/ParseUtils/SplitLine.cs",
    "content": "using System.Globalization;\nusing OptimizedCore;\n\nnamespace SAUtils.ParseUtils;\n\npublic sealed class SplitLine\n{\n    private readonly string[] _splitLine;\n\n    public SplitLine(in string inputLine, in char delimiter)\n    {\n        _splitLine = inputLine.OptimizedSplit(delimiter);\n    }\n\n    public string GetString(in int index)\n    {\n        return _splitLine[index];\n    }\n\n    public int? ParseInteger(in int index)\n    {\n        return ParseInteger(_splitLine[index]);\n    }\n\n    public double? ParseDouble(in int index)\n    {\n        return ParseDouble(_splitLine[index]);\n    }\n\n    public static int? ParseInteger(string valueString)\n    {\n        return int.TryParse(\n            valueString,\n            NumberStyles.Integer | NumberStyles.AllowDecimalPoint,\n            CultureInfo.InvariantCulture,\n            out int temp\n        )\n            ? temp\n            : null;\n    }\n\n    public static double? ParseDouble(string valueString)\n    {\n        return double.TryParse(valueString, out double temp) ? temp : null;\n    }\n}"
  },
  {
    "path": "SAUtils/ParseUtils/TsvIndices.cs",
    "content": "namespace SAUtils.ParseUtils;\n\npublic struct TsvIndices\n{\n    public ushort Chromosome = ushort.MaxValue;\n    public ushort Start      = ushort.MaxValue;\n    public ushort End        = ushort.MaxValue;\n    public ushort VariantId  = ushort.MaxValue;\n    public ushort SvType     = ushort.MaxValue;\n    public ushort Filters    = ushort.MaxValue;\n\n    public ushort AllAlleleCount    = ushort.MaxValue;\n    public ushort AfrAlleleCount    = ushort.MaxValue;\n    public ushort AmrAlleleCount    = ushort.MaxValue;\n    public ushort EasAlleleCount    = ushort.MaxValue;\n    public ushort EurAlleleCount    = ushort.MaxValue;\n    public ushort OthAlleleCount    = ushort.MaxValue;\n    public ushort FemaleAlleleCount = ushort.MaxValue;\n    public ushort MaleAlleleCount   = ushort.MaxValue;\n\n    public ushort AllAlleleFrequency    = ushort.MaxValue;\n    public ushort AfrAlleleFrequency    = ushort.MaxValue;\n    public ushort AmrAlleleFrequency    = ushort.MaxValue;\n    public ushort EasAlleleFrequency    = ushort.MaxValue;\n    public ushort EurAlleleFrequency    = ushort.MaxValue;\n    public ushort OthAlleleFrequency    = ushort.MaxValue;\n    public ushort FemaleAlleleFrequency = ushort.MaxValue;\n    public ushort MaleAlleleFrequency   = ushort.MaxValue;\n\n    public ushort AllAlleleNumber    = ushort.MaxValue;\n    public ushort AfrAlleleNumber    = ushort.MaxValue;\n    public ushort AmrAlleleNumber    = ushort.MaxValue;\n    public ushort EasAlleleNumber    = ushort.MaxValue;\n    public ushort EurAlleleNumber    = ushort.MaxValue;\n    public ushort OthAlleleNumber    = ushort.MaxValue;\n    public ushort FemaleAlleleNumber = ushort.MaxValue;\n    public ushort MaleAlleleNumber   = ushort.MaxValue;\n\n    public ushort AllHomCount    = ushort.MaxValue;\n    public ushort AfrHomCount    = ushort.MaxValue;\n    public ushort AmrHomCount    = ushort.MaxValue;\n    public ushort EasHomCount    = ushort.MaxValue;\n    public ushort EurHomCount    = ushort.MaxValue;\n    public ushort OthHomCount    = ushort.MaxValue;\n    public ushort FemaleHomCount = ushort.MaxValue;\n    public ushort MaleHomCount   = ushort.MaxValue;\n\n    public TsvIndices()\n    {\n    }\n}"
  },
  {
    "path": "SAUtils/PhyloP/Main.cs",
    "content": "﻿using System.IO;\nusing CommandLine.Builders;\nusing CommandLine.NDesk.Options;\nusing Compression.Utilities;\nusing ErrorHandling;\nusing IO;\nusing SAUtils.InputFileParsers;\nusing VariantAnnotation.Providers;\nusing VariantAnnotation.SA;\n\nnamespace SAUtils.PhyloP\n{\n    public static class Main\n    {\n        private static string _inputFile;\n        private static string _compressedReference;\n        private static string _outputDirectory;\n\n        public static ExitCodes Run(string command, string[] commandArgs)\n        {\n            var ops = new OptionSet\n            {\n                {\n                     \"ref|r=\",\n                     \"compressed reference sequence file\",\n                     v => _compressedReference = v\n                 },\n                {\n                    \"in|i=\",\n                    \"input WifFix file path\",\n                    v => _inputFile = v\n                },\n                {\n                    \"out|o=\",\n                    \"output directory\",\n                    v => _outputDirectory = v\n                }\n            };\n\n            string commandLineExample = $\"{command} [options]\";\n\n            var exitCode = new ConsoleAppBuilder(commandArgs, ops)\n                .Parse()\n                .CheckInputFilenameExists(_compressedReference, \"compressed reference sequence file name\", \"--ref\")\n                .HasRequiredParameter(_inputFile, \"PhyloP WigFix file\", \"--in\")\n                .CheckInputFilenameExists(_inputFile, \"PhyloP WigFix file\", \"--in\")\n                .HasRequiredParameter(_outputDirectory, \"output directory\", \"--out\")\n                .CheckDirectoryExists(_outputDirectory, \"output directory\", \"--out\")\n                .SkipBanner()\n                .ShowHelpMenu(\"create Ancestral allele database from 1000Genomes data\", commandLineExample)\n                .ShowErrors()\n                .Execute(ProgramExecution);\n\n            return exitCode;\n        }\n\n        private static ExitCodes ProgramExecution()\n        {\n            var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));\n            var version           = DataSourceVersionReader.GetSourceVersion(_inputFile + \".version\");\n            string outFileName    = $\"{version.Name}_{version.Version}\";\n\n\n            using (var phylopParser = new PhylopParser(GZipUtilities.GetAppropriateReadStream(_inputFile), referenceProvider.Assembly, referenceProvider.RefNameToChromosome))\n            using (var nsaStream    = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.PhylopFileSuffix)))\n            using (var indexStream  = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.PhylopFileSuffix + SaCommon.IndexSuffix)))\n            using (var writer       = new NpdWriter(nsaStream, indexStream, version, referenceProvider.Assembly, SaCommon.PhylopTag, SaCommon.SchemaVersion))\n            {\n                writer.Write(phylopParser.GetItems());\n            }\n\n            return ExitCodes.Success;\n        }\n    }\n\n}"
  },
  {
    "path": "SAUtils/PhyloP/NpdWriter.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing CommandLine.Utilities;\nusing Compression.Algorithms;\nusing Genome;\nusing IO;\nusing SAUtils.DataStructures;\nusing VariantAnnotation.PhyloP;\nusing VariantAnnotation.Providers;\n\nnamespace SAUtils.PhyloP\n{\n    public sealed class NpdWriter:IDisposable\n    {\n        private readonly ExtendedBinaryWriter _writer;\n\n        private readonly byte[] _scores;\n        private readonly byte[] _compressedScores;\n        private readonly MemoryStream _memStream;\n        private readonly ExtendedBinaryWriter _memWriter;\n        private readonly Zstandard _zstd;\n\n        \n\n        private readonly Dictionary<double, byte> _scoreMap;\n        private byte _nextScoreCode = 1; //0 is reserved to indicate no score\n\n        private readonly NpdIndex _index;\n\n        public NpdWriter(Stream dbStream, Stream indexStream, DataSourceVersion version, GenomeAssembly assembly, string jsonKey, int schemaVersion)\n        {\n            _writer = new ExtendedBinaryWriter( dbStream);\n            \n            _index    = new NpdIndex(indexStream, assembly, version, jsonKey, schemaVersion);\n            _scoreMap = new Dictionary<double, byte>(byte.MaxValue);\n\n            _scores = new byte[NpdIndex.MaxChromLength];\n            _memStream = new MemoryStream(_scores);\n            _memWriter = new ExtendedBinaryWriter(_memStream);\n            _zstd = new Zstandard();\n\n            _compressedScores = new byte[_zstd.GetCompressedBufferBounds(_scores.Length)];\n\n        }\n\n        private ushort _chromIndex = ushort.MaxValue;\n        private string _chromName = \"\";\n        \n        public void Write(IEnumerable<PhylopItem> items)\n        {\n            var benchmark = new Benchmark();\n            int lastPosition = 0;\n            foreach (PhylopItem item in items)\n            {\n                if (item.Chromosome.Index != _chromIndex)\n                {\n                    //flush out old chrom \n                    if (_chromIndex != ushort.MaxValue)\n                    {\n                        WriteCompressed(lastPosition);\n                        Console.WriteLine($\"Chromosome {_chromName} completed in {Benchmark.ToHumanReadable(benchmark.GetElapsedTime())}\");\n                        benchmark.Reset();\n                    }\n\n                    _chromIndex = item.Chromosome.Index;\n                    _chromName = item.Chromosome.EnsemblName;\n                }\n\n                if (! _scoreMap.TryGetValue(item.Score, out byte _))\n                {\n                    _scoreMap.Add(item.Score, _nextScoreCode++);\n                    if (_nextScoreCode==byte.MaxValue)\n                        throw new ArgumentOutOfRangeException($\"No of distinct scores exceeded expected value of {_nextScoreCode}!!\");\n                }\n\n                _memStream.Position = item.Position - 1;\n\n                _memWriter.Write(_scoreMap[item.Score]);\n                \n                lastPosition = item.Position;\n                \n            }\n\n            //closing the last chromosome\n            WriteCompressed(lastPosition);\n            Console.WriteLine($\"Chromosome {_chromName} completed in {Benchmark.ToHumanReadable(benchmark.GetElapsedTime())}\");\n            benchmark.Reset();\n\n            Console.WriteLine($\"\\nNumber of distinct scores oberved:{_scoreMap.Count}\");\n\n\n            _index.Write(_scoreMap);\n        }\n\n        private void WriteCompressed(int lastPosition)\n        {\n            var startLocation = _writer.BaseStream.Position;\n\n            int compressSize = _zstd.Compress(_scores, lastPosition, _compressedScores, _compressedScores.Length);\n            _writer.Write(_compressedScores, 0, compressSize);\n            _index.Add(_chromIndex, startLocation, compressSize);\n\n            Array.Clear(_scores, 0, _scores.Length);\n            _memStream.Position = 0;//reset the stream\n\n        }\n\n        public void Dispose()\n        {\n            _writer?.Dispose();\n            _memStream?.Dispose();\n            _memWriter?.Dispose();\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/PhyloP/PhylopParser.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing Genome;\nusing IO;\nusing OptimizedCore;\nusing SAUtils.DataStructures;\n\nnamespace SAUtils.PhyloP\n{\n    public sealed class PhylopParser : IDisposable\n    {\n        private readonly Stream _stream;\n        private readonly GenomeAssembly _assembly;\n        private readonly Dictionary<string, Chromosome> _refChromDict;\n\n        public PhylopParser(Stream stream, GenomeAssembly assembly, Dictionary<string, Chromosome> refChromDict)\n        {\n            _stream = stream;\n            _assembly = assembly;\n            _refChromDict = refChromDict;\n        }\n\n        public IEnumerable<PhylopItem> GetItems()\n        {\n            using (var reader = FileUtilities.GetStreamReader(_stream))\n            {\n                Chromosome chrom = null;\n                int position = 0;\n                int step = 0;\n                string line;\n                while ((line = reader.ReadLine()) != null)\n                {\n                    // Skip empty lines.\n                    if (string.IsNullOrWhiteSpace(line)) continue;\n\n                    if (double.TryParse(line, out double score))\n                    {\n                        // the chrom is unrecognized, so we skip\n                        if (chrom ==null || chrom.Index==ushort.MaxValue) continue;\n                        // since phylop used hg19, we skip entries for chrM\n                        if (_assembly == GenomeAssembly.GRCh37 && chrom.UcscName == \"chrM\") continue;\n                        // this is a phylop score\n                        yield return new PhylopItem(chrom, position, score);\n                        position += step;\n                    }\n                    else\n                    {\n                        (chrom, position, step) = StartNewInterval(line);\n                    }\n\n                }\n            }\n        }\n\n        private (Chromosome chrom, int position, int step) StartNewInterval(string line)\n        {\n            var words = line.Split();\n            string chromName = words[1].OptimizedKeyValue().Value;\n\n            var chrom = _refChromDict.TryGetValue(chromName, out var chromosome)? chromosome: Chromosome.GetEmptyChromosome(chromName);\n            if (chrom.Index == ushort.MaxValue) return (chrom, 0, 0);\n\n            int position = int.Parse(words[2].OptimizedKeyValue().Value);\n            int step = short.Parse(words[3].OptimizedKeyValue().Value);\n\n            return (chrom, position, step);\n        }\n\n        public void Dispose()\n        {\n            _stream?.Dispose();\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/PrimateAi/PrimateAiDb.cs",
    "content": "﻿using System.IO;\nusing CommandLine.Builders;\nusing CommandLine.NDesk.Options;\nusing Compression.Utilities;\nusing ErrorHandling;\nusing IO;\nusing SAUtils.InputFileParsers;\nusing VariantAnnotation.Caches;\nusing VariantAnnotation.IO.Caches;\nusing VariantAnnotation.Providers;\nusing VariantAnnotation.SA;\n\nnamespace SAUtils.PrimateAi\n{\n    public static class PrimateAiDb\n    {\n        private static string _inputFile;\n        private static string _compressedReference;\n        private static string _outputDirectory;\n        private static string _transcriptCachePrefix;\n\n        public static ExitCodes Run(string command, string[] commandArgs)\n\n        {\n            var ops = new OptionSet\n            {\n                {\n                     \"ref|r=\",\n                     \"compressed reference sequence file\",\n                     v => _compressedReference = v\n                },\n                {\n                    \"in|i=\",\n                    \"input VCF file path\",\n                    v => _inputFile = v\n                },\n                {\n                    \"cache|c=\",\n                    \"Transcript cache prefix\",\n                    v => _transcriptCachePrefix = v\n                },\n                {\n                    \"out|o=\",\n                    \"output directory\",\n                    v => _outputDirectory = v\n                }\n            };\n\n            string commandLineExample = $\"{command} [options]\";\n\n            var exitCode = new ConsoleAppBuilder(commandArgs, ops)\n                .Parse()\n                .CheckInputFilenameExists(_compressedReference, \"compressed reference sequence file name\", \"--ref\")\n                .HasRequiredParameter(_inputFile, \"PrimateAI VCF file\", \"--in\")\n                .HasRequiredParameter(_transcriptCachePrefix, \"transcript cache file\", \"--cache\")\n                .CheckInputFilenameExists(_inputFile, \"PrimateAI VCF file\", \"--in\")\n                .HasRequiredParameter(_outputDirectory, \"output directory\", \"--out\")\n                .CheckDirectoryExists(_outputDirectory, \"output directory\", \"--out\")\n                .SkipBanner()\n                .ShowHelpMenu(\"Creates a supplementary database containing 1000 Genomes allele frequencies\", commandLineExample)\n                .ShowErrors()\n                .Execute(ProgramExecution);\n\n            return exitCode;\n        }\n\n        private static ExitCodes ProgramExecution()\n        {\n            var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));\n            var version = DataSourceVersionReader.GetSourceVersion(_inputFile + \".version\");\n            string outFileName = $\"{version.Name}_{version.Version}\";\n\n            TranscriptCacheData transcriptData;\n            using (var transcriptCacheReader = new TranscriptCacheReader(FileUtilities.GetReadStream(CacheConstants.TranscriptPath(_transcriptCachePrefix))))\n            {\n                transcriptData = transcriptCacheReader.Read(referenceProvider.RefIndexToChromosome);\n            }\n\n            var (entrezToHgnc, ensemblToHgnc) = PrimateAiUtilities.GetIdToSymbols(transcriptData);\n\n            using (var primateAiParser = new PrimateAiParser(GZipUtilities.GetAppropriateReadStream(_inputFile),referenceProvider, entrezToHgnc, ensemblToHgnc))\n            using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix)))\n            using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSuffix)))\n            using (var nsaWriter = new NsaWriter(nsaStream, indexStream, version, referenceProvider, SaCommon.PrimateAiTag, true, true, SaCommon.SchemaVersion, false))\n            {\n                nsaWriter.Write(primateAiParser.GetItems());\n            }\n\n            return ExitCodes.Success;\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/PrimateAi/PrimateAiItem.cs",
    "content": "﻿using Genome;\nusing OptimizedCore;\nusing VariantAnnotation.Interface.SA;\nusing VariantAnnotation.IO;\n\nnamespace SAUtils.PrimateAi\n{\n    public sealed class PrimateAiItem : ISupplementaryDataItem\n    {\n        public Chromosome Chromosome { get; }\n        public int Position { get; set; }\n        public string RefAllele { get; set; }\n        public string AltAllele { get; set; }\n        private string Hgnc { get; }\n        public double ScorePercentile { get; }\n        public PrimateAiItem(Chromosome chromosome, int position, string refAllele, string altAllele, string hgnc,\n            double percentile)\n        {\n            Chromosome      = chromosome;\n            Position        = position;\n            RefAllele       = refAllele;\n            AltAllele       = altAllele;\n            Hgnc            = hgnc;\n            ScorePercentile = percentile;\n        }\n        public string GetJsonString()\n        {\n            var sb = StringBuilderPool.Get();\n            var jsonObject = new JsonObject(sb);\n            jsonObject.AddStringValue(\"hgnc\", Hgnc);\n            jsonObject.AddDoubleValue(\"scorePercentile\", ScorePercentile, \"0.##\");\n            return StringBuilderPool.GetStringAndReturn(sb);\n        }\n\n        public string InputLine { get; set; }\n    }\n}"
  },
  {
    "path": "SAUtils/PrimateAi/PrimateAiParser.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing IO;\nusing OptimizedCore;\nusing VariantAnnotation.Interface.Providers;\n\nnamespace SAUtils.PrimateAi\n{\n    public sealed class PrimateAiParser : IDisposable\n    {\n        private readonly Stream _stream;\n        private readonly ISequenceProvider _referenceProvider;\n        private readonly Dictionary<string, string> _entrezToHgnc;\n        private readonly Dictionary<string, string> _ensemblToHgnc;\n\n        public PrimateAiParser(Stream stream, ISequenceProvider referenceProvider, Dictionary<string, string> entrezToHgnc, Dictionary<string, string> ensemblToHgnc)\n        {\n            _stream            = stream;\n            _entrezToHgnc      = entrezToHgnc;\n            _ensemblToHgnc     = ensemblToHgnc;\n            _referenceProvider = referenceProvider;\n        }\n\n\n        public IEnumerable<PrimateAiItem> GetItems()\n        {\n\n            using (var reader = FileUtilities.GetStreamReader(_stream))\n            {\n                string line;\n                while ((line = reader.ReadLine()) != null)\n                {\n                    // Skip empty lines.\n                    if (string.IsNullOrWhiteSpace(line)) continue;\n\n                    // Skip comments.\n                    if (line.OptimizedStartsWith('#')) continue;\n\n                    var item = ExtractItem(line);\n                    if (item == null) continue;\n                    yield return item;\n                }\n            }\n\n            Console.WriteLine($\"Number of entries:{_count}. Entries without hgnc:{_nullGeneCount} ({100.0*_nullGeneCount/_count} %)\");\n        }\n        //#CHROM  POS     REF     ALT     GeneId  ScorePercentile\n        //1       69094   G A       79501   0.79\n        private int _nullGeneCount;\n        private int _count;\n        private PrimateAiItem ExtractItem(string line)\n        {\n            var splits = line.Split('\\t');\n            var chromosomeName = splits[0];\n            if (!_referenceProvider.RefNameToChromosome.ContainsKey(chromosomeName)) return null;\n\n            var chromosome = _referenceProvider.RefNameToChromosome[chromosomeName];\n            var position   = int.Parse(splits[1]);\n            var refAllele  = splits[2];\n            var altAllele  = splits[3];\n            var geneId     = splits[4];\n            var percentile = double.Parse(splits[5]);\n\n            string hgnc=null;\n            if (_entrezToHgnc.ContainsKey(geneId))  hgnc = _entrezToHgnc[geneId];\n            if (_ensemblToHgnc.ContainsKey(geneId)) hgnc = _ensemblToHgnc[geneId];\n\n            if (string.IsNullOrEmpty(hgnc))\n            {\n                _nullGeneCount++;\n                return null;\n            }\n\n            _count++;\n            return new PrimateAiItem(chromosome, position, refAllele, altAllele, hgnc, percentile);\n        }\n\n        public void Dispose()\n        {\n            _stream?.Dispose();\n            _referenceProvider?.Dispose();\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/PrimateAi/PrimateAiUtilities.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing VariantAnnotation.Caches;\n\nnamespace SAUtils.PrimateAi\n{\n    public static class PrimateAiUtilities\n    {\n        public static (Dictionary<string, string> entrezToHgnc, Dictionary<string, string> ensemblToHgnc)\n            GetIdToSymbols(TranscriptCacheData transcriptData)\n        {\n            var entrezToHgnc  = new Dictionary<string, string>();\n            var ensemblToHgnc = new Dictionary<string, string>();\n            foreach (var gene in transcriptData.Genes)\n            {\n                if(gene.EntrezGeneId.WithoutVersion == \"649330\")\n                    Console.WriteLine(\"bug\");\n                if(! string.IsNullOrEmpty(gene.EntrezGeneId.WithoutVersion))\n                    entrezToHgnc[gene.EntrezGeneId.WithoutVersion] = gene.Symbol;\n\n                if (!string.IsNullOrEmpty(gene.EnsemblId.WithoutVersion))\n                    ensemblToHgnc[gene.EnsemblId.WithoutVersion] = gene.Symbol;\n            }\n\n            return (entrezToHgnc, ensemblToHgnc);\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/ProcessSpliceNetTsv/PredictionFilter.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.Linq;\r\nusing Compression.Utilities;\r\nusing Intervals;\r\n\r\nnamespace SAUtils.ProcessSpliceNetTsv\r\n{\r\n    public static class PredictionFilter\r\n    {\r\n        private const int GffChrColumn = 0;\r\n        private const int GffFeatureColumn = 2;\r\n        private const int GffStartColumn = 3;\r\n        private const int GffEndColumn = 4;\r\n        private const int NumChrs = 25;\r\n        private const int PredChrColumn = 0;\r\n        private const int PredPosColumn = 1;\r\n        private static readonly int[] PredScoreColumns = { 6, 8, 10, 12 };\r\n        private const double FreqCutoff = 0.05;\r\n        private const int IntronBoundaryDistanceCutoff = 15;\r\n\r\n        public static void Filter(string intputTsv, string gffFile1, string gffFile2, string outputTsv)\r\n        {\r\n            var intronFlankingRegions = GetIntronFlankingRegions(gffFile1, gffFile2);\r\n\r\n            using (var resultsReader = GZipUtilities.GetAppropriateStreamReader(intputTsv))\r\n            using (var resultsWriter = GZipUtilities.GetStreamWriter(outputTsv))\r\n            {\r\n                long lineCount = 0;\r\n                string line;\r\n                while ((line = resultsReader.ReadLine()) != null)\r\n                {\r\n                    var info = line.TrimEnd().Split('\\t');\r\n                    ushort chrIndex = GetChrIndex(info[PredChrColumn]);\r\n                    int pos = int.Parse(info[PredPosColumn]);\r\n                    if (intronFlankingRegions.OverlapsAny(chrIndex, pos, pos) ||\r\n                        AnyScorePassTheCutoff(info, PredScoreColumns, FreqCutoff))\r\n                    {\r\n                        resultsWriter.WriteLine(line);\r\n                    }\r\n                    lineCount++;\r\n                    if (lineCount % 1_000_000 == 0)\r\n                    {\r\n                        Console.WriteLine($\"Processed {lineCount} lines. Current position: {info[PredChrColumn]}:{info[PredPosColumn]}\");\r\n                    }\r\n                }\r\n            }\r\n        }\r\n\r\n        private static bool AnyScorePassTheCutoff(string[] columns, int[] scoreColumnIndices, double scoreCutoff)\r\n        {\r\n            foreach (int columnIndex in scoreColumnIndices)\r\n            {\r\n                if (double.Parse(columns[columnIndex]) >= scoreCutoff) return true;\r\n            }\r\n            return false;\r\n        }\r\n\r\n        private static IntervalForest<byte> GetIntronFlankingRegions(string gffFile1, string gffFile2)\r\n        {\r\n            var flankingRegions = new IntervalArray<byte>[NumChrs];\r\n            var flankingRegionStarts1 = GetIntronFlankingRegionStarts(gffFile1);\r\n            var flankingRegionStarts2 = GetIntronFlankingRegionStarts(gffFile2);\r\n            for (var i = 0; i < NumChrs; i++)\r\n            {\r\n                var allStartsThisChr = new HashSet<int>(flankingRegionStarts1[i]);\r\n                allStartsThisChr.UnionWith(flankingRegionStarts2[i]);\r\n                var intervals = GetIntervals(allStartsThisChr, IntronBoundaryDistanceCutoff * 2);\r\n                flankingRegions[i] = new IntervalArray<byte>(intervals.ToArray());\r\n            }\r\n            return new IntervalForest<byte>(flankingRegions);\r\n        }\r\n\r\n        private static IEnumerable<Interval<byte>> GetIntervals(IEnumerable<int> starts, int size) => starts.Select(x => new Interval<byte>(x, x + size - 1, 0));\r\n\r\n        private static HashSet<int>[] GetIntronFlankingRegionStarts(string gffFile)\r\n        {\r\n\r\n            var flankingRegionStarts = new HashSet<int>[NumChrs];\r\n            for (var i = 0; i < NumChrs; i++) flankingRegionStarts[i] = new HashSet<int>();\r\n            using (var gffReader = GZipUtilities.GetAppropriateStreamReader(gffFile))\r\n            {\r\n                string line;\r\n                var previousChrIndex = ushort.MaxValue;\r\n                var exonBoundaries = new List<Interval>();\r\n                var flankingRegionStartsthisChr = new HashSet<int>();\r\n                while ((line = gffReader.ReadLine()) != null)\r\n                {\r\n                    var info = line.Split('\\t');\r\n                    if (info[GffFeatureColumn] == \"gene\")\r\n                    {\r\n                        ushort chrIndex = GetChrIndex(info[GffChrColumn]);\r\n                        if (previousChrIndex != ushort.MaxValue && chrIndex != previousChrIndex)\r\n                        {\r\n                            ProcessBufferedBoundaries(exonBoundaries, flankingRegionStartsthisChr);\r\n                            flankingRegionStarts[previousChrIndex] = flankingRegionStartsthisChr;\r\n                            flankingRegionStartsthisChr = new HashSet<int>();\r\n                        }\r\n                        previousChrIndex = chrIndex;\r\n                    }\r\n                    else if (info[GffFeatureColumn] == \"transcript\")\r\n                    {\r\n                        ProcessBufferedBoundaries(exonBoundaries, flankingRegionStartsthisChr);\r\n                        exonBoundaries = new List<Interval>();\r\n                    }\r\n                    else if (info[GffFeatureColumn] == \"exon\")\r\n                    {\r\n                        int start = int.Parse(info[GffStartColumn]);\r\n                        int end = int.Parse(info[GffEndColumn]);\r\n                        exonBoundaries.Add(new Interval(start, end));\r\n                    }\r\n                }\r\n                if (previousChrIndex != ushort.MaxValue)\r\n                {\r\n                    ProcessBufferedBoundaries(exonBoundaries, flankingRegionStartsthisChr);\r\n                    flankingRegionStarts[previousChrIndex] = flankingRegionStartsthisChr;\r\n                }\r\n            }\r\n\r\n            return flankingRegionStarts;\r\n        }\r\n\r\n        private static void ProcessBufferedBoundaries(List<Interval> exonBoundaries, HashSet<int> flankingRegionStartsthisChr)\r\n        {\r\n            for (var i = 1; i < exonBoundaries.Count; i++)\r\n            {\r\n                // Donor site for intron i\r\n                flankingRegionStartsthisChr.Add(exonBoundaries[i - 1].End - IntronBoundaryDistanceCutoff + 1);\r\n                // Acceptor site for intron i\r\n                flankingRegionStartsthisChr.Add(exonBoundaries[i].Start - IntronBoundaryDistanceCutoff);\r\n            }\r\n        }\r\n\r\n        private static ushort GetChrIndex(string chrName)\r\n        {\r\n            if (chrName.StartsWith(\"chr\")) chrName = chrName.Substring(3);\r\n            if (ushort.TryParse(chrName, out ushort chrNum))\r\n            {\r\n                return (ushort)(chrNum - 1);\r\n            }\r\n            switch (chrName)\r\n            {\r\n                case \"X\":\r\n                    return 22;\r\n                case \"Y\":\r\n                    return 23;\r\n                case \"M\":\r\n                case \"MT\":\r\n                    return 24;\r\n                default:\r\n                    return ushort.MaxValue;\r\n            }\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "SAUtils/ProcessSpliceNetTsv/SpliceNetPredictionFilterMain.cs",
    "content": "﻿using CommandLine.Builders;\r\nusing CommandLine.NDesk.Options;\r\nusing ErrorHandling;\r\n\r\nnamespace SAUtils.ProcessSpliceNetTsv\r\n{\r\n    public static class SpliceNetPredictionFilterMain\r\n    {\r\n        private static string _spliceNetResultsFile;\r\n        private static string _filteredResultsFile;\r\n        private static string _gffFile1;\r\n        private static string _gffFile2;\r\n\r\n        public static ExitCodes Run(string command, string[] commandArgs)\r\n        {\r\n            var ops = new OptionSet\r\n            {\r\n                {\r\n                    \"in|i=\",\r\n                    \"SpliceNet prediction results\",\r\n                    v => _spliceNetResultsFile = v\r\n                },\r\n                {\r\n                    \"gff1|g1=\",\r\n                    \"Gene structure file 1\",\r\n                    v => _gffFile1 = v\r\n                },\r\n                {\r\n                    \"gff2|g2=\",\r\n                    \"Gene structures file 2\",\r\n                    v => _gffFile2 = v\r\n                },\r\n                {\r\n                    \"out|o=\",\r\n                    \"Filtered SpliceNet results\",\r\n                    v => _filteredResultsFile = v\r\n                }\r\n            };\r\n\r\n            var commandLineExample = $\"{command} [options]\";\r\n\r\n            var exitCode = new ConsoleAppBuilder(commandArgs, ops)\r\n                .Parse()\r\n                .CheckInputFilenameExists(_spliceNetResultsFile, \"SpliceNet prediction results\", \"--in\")\r\n                .CheckInputFilenameExists(_gffFile1, \"Gene structures file 1\", \"--gff1\")\r\n                .CheckInputFilenameExists(_gffFile2, \"Gene structures file 2\", \"--gff2\")\r\n                .SkipBanner()\r\n                .ShowHelpMenu(\"Filter SpliceNet results based on predicted scores and variant location\",\r\n                    commandLineExample)\r\n                .ShowErrors()\r\n                .Execute(ProgramExecution);\r\n\r\n            return exitCode;\r\n        }\r\n\r\n        private static ExitCodes ProgramExecution()\r\n        {\r\n            PredictionFilter.Filter(_spliceNetResultsFile, _gffFile1, _gffFile2, _filteredResultsFile);\r\n            return ExitCodes.Success;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "SAUtils/RefMinorDb/Main.cs",
    "content": "﻿using System.IO;\nusing CommandLine.Builders;\nusing CommandLine.NDesk.Options;\nusing Compression.Utilities;\nusing ErrorHandling;\nusing IO;\nusing SAUtils.InputFileParsers;\nusing SAUtils.InputFileParsers.OneKGen;\nusing VariantAnnotation.Providers;\nusing VariantAnnotation.SA;\n\nnamespace SAUtils.RefMinorDb\n{\n    public static class Main\n    {\n        private static string _inputFile;\n        private static string _compressedReference;\n        private static string _outputDirectory;\n\n        public static ExitCodes Run(string command, string[] commandArgs)\n        {\n            var ops = new OptionSet\n            {\n                {\n                     \"ref|r=\",\n                     \"compressed reference sequence file\",\n                     v => _compressedReference = v\n                 },\n                {\n                    \"in|i=\",\n                    \"input VCF file path\",\n                    v => _inputFile = v\n                },\n                {\n                    \"out|o=\",\n                    \"output directory\",\n                    v => _outputDirectory = v\n                }\n            };\n\n            string commandLineExample = $\"{command} [options]\";\n\n            var exitCode = new ConsoleAppBuilder(commandArgs, ops)\n                .Parse()\n                .CheckInputFilenameExists(_compressedReference, \"compressed reference sequence file name\", \"--ref\")\n                .HasRequiredParameter(_inputFile, \"OneK Gen VCFfile\", \"--in\")\n                .CheckInputFilenameExists(_inputFile, \"OneK Gen VCFfile\", \"--in\")\n                .HasRequiredParameter(_outputDirectory, \"output directory\", \"--out\")\n                .CheckDirectoryExists(_outputDirectory, \"output directory\", \"--out\")\n                .SkipBanner()\n                .ShowHelpMenu(\"Creates a supplementary database containing 1000 Genomes allele frequencies\", commandLineExample)\n                .ShowErrors()\n                .Execute(ProgramExecution);\n\n            return exitCode;\n        }\n\n        private static ExitCodes ProgramExecution()\n        {\n            var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));\n            var version           = DataSourceVersionReader.GetSourceVersion(_inputFile + \".version\");\n            string outFileName = $\"{version.Name}_{version.Version}_{SaCommon.RefMinorTag}\".Replace(' ','_');\n\n            using (var refMinorReader = new RefMinorReader(GZipUtilities.GetAppropriateStreamReader(_inputFile), referenceProvider))\n            using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.RefMinorFileSuffix)))\n            using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.RefMinorFileSuffix + SaCommon.IndexSuffix)))\n            using (var writer = new RefMinorDbWriter(new ExtendedBinaryWriter(nsaStream), new ExtendedBinaryWriter(indexStream), version, referenceProvider, SaCommon.SchemaVersion))\n            {\n                writer.Write(refMinorReader.GetItems());\n            }\n\n            return ExitCodes.Success;\n        }\n    }\n\n}"
  },
  {
    "path": "SAUtils/RefMinorDb/RefMinorDbWriter.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing CommandLine.Utilities;\nusing IO;\nusing SAUtils.DataStructures;\nusing VariantAnnotation.Interface.Providers;\nusing VariantAnnotation.Interface.SA;\nusing VariantAnnotation.NSA;\nusing VariantAnnotation.Providers;\nusing VariantAnnotation.SA;\n\nnamespace SAUtils.RefMinorDb\n{\n    public sealed class RefMinorDbWriter:IDisposable\n    {\n        private readonly ExtendedBinaryWriter _writer;\n        private readonly Stream _stream;\n\n        private readonly ISequenceProvider _refProvider;\n        private readonly RefMinorIndex _refMinorIndex;\n\n        public RefMinorDbWriter(ExtendedBinaryWriter writer, ExtendedBinaryWriter indexWriter, DataSourceVersion version, ISequenceProvider refProvider, int schemaVersion)\n        {\n            _stream = writer.BaseStream;\n            _writer = writer;\n            _refProvider = refProvider;\n            _refMinorIndex  = new RefMinorIndex(indexWriter, _refProvider.Assembly, version, schemaVersion);\n            \n        }\n        \n        public void Write(IEnumerable<ISupplementaryDataItem> saItems)\n        {\n            var itemsMinHeap = new MinHeap<ISupplementaryDataItem>(SuppDataUtilities.CompareTo);\n            var chromIndex = ushort.MaxValue;\n            var currentEnsemblName = \"\";\n\n            var benchmark = new Benchmark();\n\n            foreach (var saItem in saItems)\n            {\n                if (chromIndex != saItem.Chromosome.Index)\n                {\n                    if (chromIndex != ushort.MaxValue)\n                    {\n                        //flushing out the remaining items in buffer\n                        WriteUptoPosition(itemsMinHeap, int.MaxValue);\n                        Console.WriteLine($\"Chromosome {currentEnsemblName} completed in {Benchmark.ToHumanReadable(benchmark.GetElapsedTime())}\");\n                        benchmark.Reset();\n                    }\n                    chromIndex = saItem.Chromosome.Index;\n                    currentEnsemblName = saItem.Chromosome.EnsemblName;\n                    _refProvider.LoadChromosome(saItem.Chromosome);\n                }\n\n                if (saItem.RefAllele != _refProvider.Sequence.Substring(saItem.Position-1, saItem.RefAllele.Length)) continue;\n                //the items come in sorted order of the pre-trimmed position. \n                //So when writing out, we have to make sure that we do not write past this position. \n                //Once a position has been seen in the stream, we can safely write all positions before that.\n                var writeToPos = saItem.Position;\n\n                saItem.Trim();\n                itemsMinHeap.Add(saItem);\n                WriteUptoPosition(itemsMinHeap, writeToPos);\n\n            }\n            //flushing out the remaining items in buffer\n            WriteUptoPosition(itemsMinHeap, int.MaxValue);\n            Console.WriteLine($\"Chromosome {currentEnsemblName} completed in {Benchmark.ToHumanReadable(benchmark.GetElapsedTime())}\");\n\n            _refMinorIndex.Write(_stream.Position);\n            \n        }\n\n        private void WriteUptoPosition(MinHeap<ISupplementaryDataItem> itemsHeap, int position)\n        {\n            if (itemsHeap.Count() == 0) return;\n            var bufferMin = itemsHeap.GetMin();\n\n            while (bufferMin.Position < position)\n            {\n                var itemsAtMinPosition = new List<ISupplementaryDataItem>();\n\n                while (itemsHeap.Count() > 0 && SuppDataUtilities.CompareTo(bufferMin, itemsHeap.GetMin()) == 0)\n                    itemsAtMinPosition.Add(itemsHeap.ExtractMin());\n                WritePosition(itemsAtMinPosition);\n                if (itemsHeap.Count() == 0) break;\n\n                bufferMin = itemsHeap.GetMin();\n            }\n\n        }\n\n        private void WritePosition(List<ISupplementaryDataItem> saItems)\n        {\n            var refMinorItem = (RefMinorItem)GetRefMinorItem(saItems);\n\n            if (refMinorItem == null) return;\n\n            _refMinorIndex.Add(refMinorItem.Chromosome.Index, _stream.Position);\n            _writer.WriteOpt(refMinorItem.Position);\n            _writer.WriteOptAscii(refMinorItem.GlobalMajor);\n            \n        }\n\n        private static ISupplementaryDataItem GetRefMinorItem(List<ISupplementaryDataItem> saItems)\n        {\n            var totalAltAlleleFreq = 0.0;\n            var alleleFrequencies = new Dictionary<string, double>();\n            string refAllele = null;\n            foreach (var supplementaryDataItem in saItems)\n            {\n                var item = (AlleleFrequencyItem) supplementaryDataItem;\n                if (!IsSnv(item.RefAllele) || !IsSnv(item.AltAllele)) continue;\n\n                refAllele = item.RefAllele;\n                totalAltAlleleFreq += item.AltFrequency;\n                alleleFrequencies[item.AltAllele] = item.AltFrequency;\n\n            }\n            var isRefMinor = totalAltAlleleFreq >= SaCommon.RefMinorThreshold;\n\n            if (!isRefMinor) return null;\n            string globalMajor = SuppDataUtilities.GetMostFrequentAllele(alleleFrequencies, refAllele);\n\n            return new RefMinorItem(saItems[0].Chromosome, saItems[0].Position, globalMajor);\n        }\n\n        private static bool IsSnv(string allele)\n        {\n            if (allele.Length != 1) return false;\n\n            allele = allele.ToUpper();\n            return allele == \"A\" || allele == \"C\" || allele == \"G\" || allele == \"T\";\n        }\n\n\n        public void Dispose()\n        {\n            _writer?.Dispose();\n            _stream?.Dispose();\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/RegionUtilities.cs",
    "content": "using System;\nusing Genome;\nusing Intervals;\nusing VariantAnnotation.Interface.SA;\n\nnamespace SAUtils\n{\n    public static class RegionUtilities\n    {\n        private static readonly IInterval Grch37Par1 = new Interval(10_001,     2_649_520);\n        private static readonly IInterval Grch37Par2 = new Interval(59_034_050, 59_363_566);\n\n        private static readonly IInterval Grch38Par1 = new Interval(10_001,     2_781_479);\n        private static readonly IInterval Grch38Par2 = new Interval(56_887_903, 57_217_415);\n        \n\n        public static bool OverlapsParRegion(ISupplementaryDataItem variant, GenomeAssembly assembly)\n        {\n            if (variant.Chromosome.UcscName != \"chrY\") return false;\n\n            var start = variant.Position;\n            var end   = variant.Position + Math.Max(variant.AltAllele.Length, variant.RefAllele.Length);\n            switch (assembly)\n            {\n                case GenomeAssembly.hg19:\n                case GenomeAssembly.GRCh37:\n                    return Grch37Par1.Overlaps(start, end) || Grch37Par2.Overlaps(start, end);\n                case GenomeAssembly.GRCh38:\n                    return Grch38Par1.Overlaps(start, end) || Grch38Par2.Overlaps(start, end);\n                default:\n                    return false;\n            }\n            \n        }\n    }\n}"
  },
  {
    "path": "SAUtils/Revel/Create.cs",
    "content": "﻿using System.IO;\nusing CommandLine.Builders;\nusing CommandLine.NDesk.Options;\nusing Compression.Utilities;\nusing ErrorHandling;\nusing IO;\nusing SAUtils.GenericScore;\nusing SAUtils.GenericScore.GenericScoreParser;\nusing SAUtils.InputFileParsers;\nusing VariantAnnotation.Providers;\nusing VariantAnnotation.SA;\n\nnamespace SAUtils.Revel\n{\n    public static class Create\n    {\n        private static string _inputFile;\n        private static string _compressedReference;\n        private static string _outputDirectory;\n\n        public static ExitCodes Run(string command, string[] commandArgs)\n        {\n            var ops = new OptionSet\n            {\n                {\n                    \"ref|r=\",\n                    \"compressed reference sequence file\",\n                    v => _compressedReference = v\n                },\n                {\n                    \"in|i=\",\n                    \"input REVEL file path\",\n                    v => _inputFile = v\n                },\n                {\n                    \"out|o=\",\n                    \"output directory\",\n                    v => _outputDirectory = v\n                }\n            };\n\n            var commandLineExample = $\"{command} [options]\";\n\n            var exitCode = new ConsoleAppBuilder(commandArgs, ops)\n                .Parse()\n                .CheckInputFilenameExists(_compressedReference, \"compressed reference sequence file name\", \"--ref\")\n                .CheckInputFilenameExists(_inputFile,           \"input REVEL file Path\",                   \"--in\")\n                .HasRequiredParameter(_outputDirectory, \"output directory\", \"--out\")\n                .CheckDirectoryExists(_outputDirectory, \"output directory\", \"--out\")\n                .SkipBanner()\n                .ShowHelpMenu(\"Create a supplementary database from REVEL input file \", commandLineExample)\n                .ShowErrors()\n                .Execute(ProgramExecution);\n\n            return exitCode;\n        }\n\n        private static ExitCodes ProgramExecution()\n        {\n            var nucleotides = new[] {\"A\", \"C\", \"G\", \"T\"};\n\n            var revelParserSettings = new ParserSettings(\n                new ColumnIndex(0, 1, 2, 3, 6, null),\n                nucleotides,\n                GenericScoreParser.MaxRepresentativeScores\n            );\n\n            var version     = DataSourceVersionReader.GetSourceVersion(_inputFile + \".version\");\n            var outFileName = $\"{version.Name}_{version.Version}\";\n            using (var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference)))\n            using (var streamReader = GZipUtilities.GetAppropriateStreamReader(_inputFile))\n            using (var revelParser = new GenericScoreParser(revelParserSettings, streamReader, referenceProvider.RefNameToChromosome))\n            using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix)))\n            using (var indexStream =\n                   FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSuffix)))\n            using (var nsaWriter = new NsaWriter(nsaStream, indexStream, version, referenceProvider, SaCommon.RevelTag, true, false,\n                       SaCommon.SchemaVersion, false))\n            {\n                nsaWriter.Write(revelParser.GetItems());\n            }\n\n            return ExitCodes.Success;\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/SAUtils.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing CommandLine.Builders;\r\nusing ErrorHandling;\r\nusing SAUtils.ClinGen;\r\nusing SAUtils.CosmicGeneFusions;\r\nusing SAUtils.CreateClinvarDb;\r\nusing SAUtils.DbSnpRemapper;\r\nusing SAUtils.ExtractCosmicSvs;\r\nusing SAUtils.ExtractMiniSa;\r\nusing SAUtils.ExtractMiniXml;\r\nusing SAUtils.FusionCatcher;\r\nusing SAUtils.GERP;\r\nusing SAUtils.gnomAD;\r\nusing SAUtils.GnomadGeneScores;\r\nusing SAUtils.MitoHeteroplasmy;\r\nusing SAUtils.MitoMap;\r\nusing SAUtils.NsaIndexUpdater;\r\nusing SAUtils.PrimateAi;\r\nusing SAUtils.ProcessSpliceNetTsv;\r\nusing SAUtils.SpliceAi;\r\nusing VariantAnnotation.Interface;\r\n\r\nnamespace SAUtils\r\n{\r\n    public static class SaUtils\r\n    {\r\n        public static int Main(string[] args)\r\n        {\r\n            var ops = new Dictionary<string, TopLevelOption>\r\n            {\r\n                [\"AaCon\"]           = new(\"create AA conservation database\", AAConservation.AaConservationMain.Run),\r\n                [\"ancestralAllele\"] = new(\"create Ancestral allele database from 1000Genomes data\", MakeAaDb.Main.Run),\r\n                [\"ClinGen\"]         = new(\"create ClinGen database\", MakeClinGenDb.Main.Run),\r\n                [\"clinvar\"]         = new(\"create ClinVar database\", ClinVarMain.Run),\r\n                [\"concat\"] = new(\"merge multiple NSA files for the same data source having non-overlapping regions\",\r\n                    NsaConcatenator.NsaConcatenator.Run),\r\n                [\"Cosmic\"]             = new(\"create COSMIC database\", CreateCosmicDb.Main.Run),\r\n                [\"CosmicSv\"]           = new(\"create COSMIC SV database\", ExtractCosmicSvsMain.Run),\r\n                [\"CosmicFusion\"]       = new(\"create COSMIC gene fusion database\", CreateCosmicGeneFusions.Run),\r\n                [\"CustomGene\"]         = new(\"create custom gene annotation database\", Custom.GeneMain.Run),\r\n                [\"CustomVar\"]          = new(\"create custom variant annotation database\", Custom.VariantMain.Run),\r\n                [\"Dann\"]               = new(\"create DANN database\", Dann.Create.Run),\r\n                [\"Dbsnp\"]              = new(\"create dbSNP database\", CreateDbsnpDb.Main.Run),\r\n                [\"Dgv\"]                = new(\"create DGV database\", makeDgvDb.Main.Run),\r\n                [\"DiseaseValidity\"]    = new(\"create disease validity database\", GeneDiseaseValidity.Run),\r\n                [\"DosageMapRegions\"]   = new(\"create dosage map regions\", DosageMapRegions.Run),\r\n                [\"DosageSensitivity\"]  = new(\"create dosage sensitivity database\", DosageSensitivity.Run),\r\n                [\"DownloadOmim\"]       = new(\"download OMIM database\", Omim.Downloader.Run),\r\n                [\"ExtractMiniSA\"]      = new(\"extracts mini SA\", ExtractMiniSaMain.Run),\r\n                [\"ExtractMiniXml\"]     = new(\"extracts mini XML (ClinVar)\", ExtractMiniXmlMain.Run),\r\n                [\"FilterSpliceNetTsv\"] = new(\"filter SpliceNet predictions\", SpliceNetPredictionFilterMain.Run),\r\n                [\"FusionCatcher\"]      = new(\"create FusionCatcher database\", CreateFusionCatcher.Run),\r\n                [\"Gerp\"]               = new(\"create GERP conservation database\", GerpMain.Run),\r\n                [\"GlobalMinor\"]        = new(\"create global minor allele database\", CreateGlobalAllelesDb.Main.Run),\r\n                [\"Gnomad\"]             = new(\"create gnomAD database\", GnomadSnvMain.Run),\r\n                [\"Gnomad-lcr\"]         = new(\"create gnomAD low complexity region database\", LcrRegionsMain.Run),\r\n                [\"GnomadGeneScores\"]   = new(\"create gnomAD gene scores database\", GnomadGenesMain.Run),\r\n                [\"GnomadSV\"]           = new(\"create gnomAD structural variant database\", GnomadSvMain.Run),\r\n                [\"Index\"]              = new(\"edit an index file\", UpdateIndex.Run),\r\n                [\"MitoHet\"]            = new(\"create mitochondrial Heteroplasmy database\", MitoHeteroplasmyDb.Run),\r\n                [\"MitomapSvDb\"]        = new(\"create MITOMAP structural variants database\", StructVarDb.Run),\r\n                [\"MitomapVarDb\"]       = new(\"create MITOMAP small variants database\", SmallVarDb.Run),\r\n                [\"Omim\"]               = new(\"create OMIM database\", Omim.Main.Run),\r\n                [\"OneKGen\"]            = new(\"create 1000 Genome small variants database\", CreateOneKgDb.Main.Run),\r\n                [\"OneKGenSv\"]          = new(\"create 1000 Genomes structural variants database\", OneKGenSvDb.Create.Run),\r\n                [\"OneKGenSvVcfToBed\"] = new(\"convert 1000 Genomes structural variants VCF file into a BED-like file\",\r\n                    OneKGenSvDb.VcfToBed.Run),\r\n                [\"PhyloP\"]         = new(\"create PhyloP database\", PhyloP.Main.Run),\r\n                [\"PrimateAi\"]      = new(\"create PrimateAI database\", PrimateAiDb.Run),\r\n                [\"RefMinor\"]       = new(\"create Reference Minor database from 1000 Genome \", RefMinorDb.Main.Run),\r\n                [\"RemapWithDbsnp\"] = new(\"remap a VCF file given source and destination rsID mappings\", DbSnpRemapperMain.Run),\r\n                [\"Revel\"]          = new(\"create REVEL database\", Revel.Create.Run),\r\n                [\"SpliceAi\"]       = new(\"create SpliceAI database\", SpliceAiDb.Run),\r\n                [\"TopMed\"]         = new(\"create TOPMed database\", CreateTopMedDb.Main.Run),\r\n                [\"Gme\"]            = new(\"create GME Variome database\", CreateGmeDb.Main.Run),\r\n                [\"Decipher\"]       = new(\"create Decipher database\", CreateDecipherDb.Main.Run)\r\n            };\r\n\r\n            ExitCodes exitCode = new TopLevelAppBuilder(args, ops)\r\n                .Parse()\r\n                .ShowBanner(Constants.Authors)\r\n                .ShowHelpMenu(\"Utilities focused on supplementary annotation\")\r\n                .ShowErrors()\r\n                .Execute();\r\n\r\n            return (int) exitCode;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "SAUtils/SAUtils.csproj",
    "content": "﻿<Project Sdk=\"Microsoft.NET.Sdk\">\r\n  <PropertyGroup>\r\n    <OutputType>Exe</OutputType>\r\n    <TargetFramework>net6.0</TargetFramework>\r\n    <OutputPath>..\\bin\\$(Configuration)</OutputPath>\r\n    \r\n  </PropertyGroup>\r\n  <Import Project=\"..\\CommonAssemblyInfo.props\" />\r\n  <ItemGroup>\r\n    <PackageReference Include=\"Newtonsoft.Json\" Version=\"13.0.1\" />\r\n    <PackageReference Include=\"System.Text.Json\" Version=\"6.0.2\" />\r\n  </ItemGroup>\r\n  <ItemGroup>\r\n    <ProjectReference Include=\"..\\CacheUtils\\CacheUtils.csproj\" />\r\n    <ProjectReference Include=\"..\\CommandLine\\CommandLine.csproj\" />\r\n    <ProjectReference Include=\"..\\Compression\\Compression.csproj\" />\r\n    <ProjectReference Include=\"..\\IO\\IO.csproj\" />\r\n    <ProjectReference Include=\"..\\Nirvana\\Nirvana.csproj\" />\r\n    <ProjectReference Include=\"..\\OptimizedCore\\OptimizedCore.csproj\" />\r\n    <ProjectReference Include=\"..\\VariantAnnotation\\VariantAnnotation.csproj\" />\r\n    <ProjectReference Include=\"..\\Vcf\\Vcf.csproj\" />\r\n  </ItemGroup>\r\n</Project>\r\n"
  },
  {
    "path": "SAUtils/SaUtilsCommon.cs",
    "content": "﻿using System.Linq;\n\nnamespace SAUtils\n{\n    public static class SaUtilsCommon\n    {\n        public static bool IsNumberNullOrZero(int? item)\n        {\n            return item is null or 0;\n        }\n\n        /// <summary>\n        /// Returns a regular alternate allele when a provided with one have SA format.\n        /// In case of long insertions or InsDel, where the saAltAllele contains an MD5 hash, the hash is returned.\n        /// </summary>\n        /// <param name=\"saAltAllele\"> supplementary annotation alternate allele</param>\n        /// <param name=\"emptyAllele\">The way the calling function wants to represent an empty allele</param>\n        /// <returns>regular alternate allele</returns>\n        public static string ReverseSaReducedAllele(string saAltAllele, string emptyAllele = \"-\")\n        {\n            if (saAltAllele == null) return null;\n            if (saAltAllele.All(char.IsDigit)) return emptyAllele; // this was a deletion\n\n            int firstBaseIndex;\n            for (firstBaseIndex = 0; firstBaseIndex < saAltAllele.Length; firstBaseIndex++)\n            {\n                if (saAltAllele[firstBaseIndex] != 'i' && saAltAllele[firstBaseIndex] != '<' &&\n                    !char.IsDigit(saAltAllele[firstBaseIndex]))\n                    break;\n            }\n\n            if (saAltAllele.Substring(firstBaseIndex) == \"\") return emptyAllele;\n\n            return firstBaseIndex > 0 && firstBaseIndex < saAltAllele.Length\n                ? saAltAllele.Substring(firstBaseIndex)\n                : saAltAllele;\n        }\n\n        public static bool HasFailedFilters(string filters)\n        {\n            return !(filters.Equals(\"PASS\") || filters.Equals(\".\"));\n        }\n    }\n}\n"
  },
  {
    "path": "SAUtils/Schema/SaJsonKeyAnnotation.cs",
    "content": "﻿using VariantAnnotation.SA;\r\n\r\nnamespace SAUtils.Schema\r\n{\r\n    public sealed class SaJsonKeyAnnotation\r\n    {\r\n        public SaJsonKeyProperties Properties;\r\n        public SaJsonSchema Schema;\r\n\r\n        private SaJsonKeyAnnotation() { }\r\n\r\n        public static SaJsonKeyAnnotation CreateFromProperties(SaJsonValueType valueType, CustomAnnotationCategories category, string description)\r\n        {\r\n            return new SaJsonKeyAnnotation {Properties = new SaJsonKeyProperties(valueType, category, description)};\r\n        }\r\n\r\n        public static SaJsonKeyAnnotation CreateFromSubSchema(SaJsonSchema schema)\r\n        {\r\n            return new SaJsonKeyAnnotation { Schema = schema};\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "SAUtils/Schema/SaJsonKeyProperties.cs",
    "content": "﻿using VariantAnnotation.SA;\r\n\r\nnamespace SAUtils.Schema\r\n{\r\n    public sealed class SaJsonKeyProperties\r\n    {\r\n        public readonly SaJsonValueType            ValueType;\r\n        public readonly CustomAnnotationCategories Category;\r\n        public readonly string                     Description;\r\n\r\n        public SaJsonKeyProperties(SaJsonValueType valueType, CustomAnnotationCategories category, string description)\r\n        {\r\n            ValueType   = valueType;\r\n            Category    = category;\r\n            Description = description;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "SAUtils/Schema/SaJsonSchema.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.Linq;\r\nusing System.Text;\r\nusing ErrorHandling.Exceptions;\r\nusing OptimizedCore;\r\nusing VariantAnnotation.IO;\r\nusing VariantAnnotation.SA;\r\n\r\nnamespace SAUtils.Schema\r\n{\r\n    public sealed class SaJsonSchema\r\n    {\r\n        private const string SchemaVersion = \"http://json-schema.org/draft-06/schema#\";\r\n\r\n        public int TotalItems { get; set; }\r\n\r\n        private readonly StringBuilder _sb;\r\n        private readonly JsonObject _jsonObject;\r\n        private readonly Dictionary<string, SaJsonKeyAnnotation> _keyAnnotation = new Dictionary<string, SaJsonKeyAnnotation>();\r\n        private IEnumerable<string> Keys { get; set; }\r\n        // Keys not used to generate the NSA file, but in the Nirvana JSON output\r\n        private string[] NonSaKeys { get; set; } = { };\r\n        internal readonly Dictionary<string, int> KeyCounts = new Dictionary<string, int>();\r\n        private Action<JsonObject, List<string[]>> _jsonStringGenerationAction;\r\n        private bool _finalized;\r\n\r\n        internal SaJsonSchema(StringBuilder sb)\r\n        {\r\n            _sb = sb;\r\n            _jsonObject = new JsonObject(sb);\r\n        }\r\n\r\n        public static SaJsonSchema Create(StringBuilder sb, string jsonTag, SaJsonValueType primaryType, IEnumerable<string> jsonKeys)\r\n        {\r\n            var jsonSchema = new SaJsonSchema(sb) { Keys = jsonKeys };\r\n\r\n            // The root level schema for a SA\r\n            if (jsonTag != null)\r\n            {\r\n                jsonSchema._jsonObject.StartObject();\r\n                jsonSchema.AddSchemaVersion();\r\n                // SA json is an object\r\n                jsonSchema.AddJsonDataType(JsonDataType.Object);\r\n                jsonSchema._jsonObject.StartObjectWithKey(jsonTag);\r\n            }\r\n\r\n            jsonSchema.AddValueTypes(primaryType);\r\n            return jsonSchema;\r\n        }\r\n\r\n        public void SetNonSaKeys(string[] nonSaKeys)\r\n        {\r\n            NonSaKeys = nonSaKeys;\r\n        }\r\n\r\n        private void AddAnnotation(SaJsonKeyAnnotation annotation)\r\n        {\r\n            if (annotation.Properties != null)\r\n            {\r\n                AddAnnotationProperties(annotation);\r\n            }\r\n            else\r\n            {\r\n                _sb.Append(annotation.Schema);\r\n                _jsonObject.EndObject();\r\n            }\r\n        }\r\n\r\n        private void AddAnnotationProperties(SaJsonKeyAnnotation annotation)\r\n        {\r\n            AddValueTypes(annotation.Properties.ValueType);\r\n            int numComplexTypes = annotation.Properties.ValueType.JsonDataTypes.Count(x => x.IsComplexType());\r\n            while (numComplexTypes > 0)\r\n            {\r\n                _jsonObject.EndObject();\r\n                numComplexTypes--;\r\n            }\r\n            if (annotation.Properties.Category != CustomAnnotationCategories.Unknown)\r\n                _jsonObject.AddStringValue(\"category\", annotation.Properties.Category.ToString());\r\n            if (annotation.Properties.Description != null)\r\n                _jsonObject.AddStringValue(\"description\", annotation.Properties.Description);\r\n        }\r\n\r\n        private void AddValueTypes(SaJsonValueType jsonValueType)\r\n        {\r\n            foreach (var dataType in jsonValueType.JsonDataTypes)\r\n            {\r\n                AddJsonDataType(dataType);\r\n            }\r\n\r\n        }\r\n\r\n        private void AddJsonDataType(JsonDataType jsonType)\r\n        {\r\n            _jsonObject.AddStringValue(\"type\", jsonType.ToTypeString());\r\n\r\n            if (jsonType.IsComplexType()) _jsonObject.StartObjectWithKey(jsonType.GetSchemaKey());\r\n        }\r\n\r\n        private void AddSchemaVersion() => _jsonObject.AddStringValue(\"$schema\", SchemaVersion);\r\n\r\n        private SaJsonValueType GetJsonType(string key) => _keyAnnotation[key].Properties?.ValueType;\r\n        private CustomAnnotationCategories GetCategory(string key) => _keyAnnotation[key].Properties?.Category ?? 0;\r\n\r\n        public void AddAnnotation(string key, SaJsonKeyAnnotation annotation)\r\n        {\r\n            _keyAnnotation.Add(key, annotation);\r\n            KeyCounts.Add(key, 0);\r\n        }\r\n\r\n        public override string ToString()\r\n        {\r\n            if (!_finalized) FinalizeSchema();\r\n            return _sb.ToString();\r\n        }\r\n\r\n        private void FinalizeSchema()\r\n        {\r\n            var requiredKeys = new List<string>();\r\n\r\n            foreach (string key in Keys)\r\n            {\r\n                int counts = KeyCounts[key];\r\n                if (counts == 0 && !NonSaKeys.Contains(key)) continue;\r\n                // boolean is always considered as optional\r\n                if (counts == TotalItems && !GetJsonType(key).Equals(SaJsonValueType.Bool)) requiredKeys.Add(key);\r\n\r\n                OutputKeyAnnotation(key);\r\n            }\r\n\r\n            _jsonObject.EndObject();\r\n            OutputRequiredKeys(requiredKeys);\r\n            DisallowExtraProperites();\r\n\r\n            _jsonObject.EndAllObjects();\r\n            _finalized = true;\r\n        }\r\n\r\n        private void OutputRequiredKeys(IReadOnlyCollection<string> requiredKeys)\r\n        {\r\n            if (requiredKeys.Count > 0) _jsonObject.AddStringValues(\"required\", requiredKeys);\r\n        }\r\n\r\n        private void DisallowExtraProperites()\r\n        {\r\n            _jsonObject.AddStringValue(\"additionalProperties\", \"false\", false);\r\n        }\r\n\r\n        private Action<JsonObject, List<string[]>> GetJsonStringGenerationAction()\r\n        {\r\n            var actions = new List<Action<JsonObject, string[]>>();\r\n\r\n            foreach (string key in Keys)\r\n            {\r\n                if (NonSaKeys.Contains(key)) continue;\r\n                var intendedType = GetJsonType(key);\r\n\r\n                if (intendedType.Equals(SaJsonValueType.String))\r\n                {\r\n                    actions.Add((jsonObject, value) => CountKeyIfAdded(jsonObject.AddStringValue(key, value[0]), key));\r\n                }\r\n\r\n                else if (intendedType.Equals(SaJsonValueType.Bool))\r\n                {\r\n                    actions.Add((jsonObject, value) => CountKeyIfAdded(jsonObject.AddBoolValue(key, CheckAndGetBoolFromString(value[0])), key));\r\n                }\r\n\r\n                else if (intendedType.Equals(SaJsonValueType.Number))\r\n                {\r\n                    actions.Add((jsonObject, value) =>\r\n                    {\r\n                        if (value[0] == null) return;\r\n                        var doubleValue = CheckAndGetNullableDoubleFromString(value[0]);\r\n                        CustomAnnotationCategories keyCategory = GetCategory(key);\r\n                        CountKeyIfAdded(keyCategory == CustomAnnotationCategories.AlleleFrequency\r\n                            ? jsonObject.AddDoubleValue(key, doubleValue, \"0.######\")\r\n                            : jsonObject.AddStringValue(key, value[0], false), key);\r\n                    });\r\n                }\r\n\r\n                else if (intendedType.Equals(SaJsonValueType.StringArray))\r\n                {\r\n                    actions.Add((jsonObject, value) => CountKeyIfAdded(jsonObject.AddStringValues(key, value), key));\r\n                }\r\n\r\n                else\r\n                {\r\n                    throw new Exception($\"Unknown data type {intendedType}\");\r\n                }\r\n            }\r\n\r\n            return (jsonObject, strings) =>\r\n            {\r\n                foreach (var (action, str) in actions.Zip(strings, (a, b) => (a, b)))\r\n                {\r\n                    action(jsonObject, str);\r\n                }\r\n\r\n                TotalItems++;\r\n            };\r\n        }\r\n\r\n\r\n        public void CountKeyIfAdded(bool keyAdded, string key)\r\n        {\r\n            if (keyAdded) KeyCounts[key]++;\r\n        }\r\n\r\n        public string GetJsonString(List<string[]> values)\r\n        {\r\n            if (_jsonStringGenerationAction == null) _jsonStringGenerationAction = GetJsonStringGenerationAction();\r\n\r\n            if (values.Count != Keys.Count(x => !NonSaKeys.Contains(x)))\r\n                throw new UserErrorException(\"Please provide one and only one value for each JSON key.\");\r\n\r\n            var sb = StringBuilderPool.Get();\r\n            var jsonObject = new JsonObject(sb);\r\n\r\n            _jsonStringGenerationAction(jsonObject, values);\r\n\r\n            return StringBuilderPool.GetStringAndReturn(sb);\r\n        }\r\n\r\n        internal void OutputKeyAnnotation(string key)\r\n        {\r\n            _jsonObject.StartObjectWithKey(key);\r\n\r\n            var annotation = _keyAnnotation[key];\r\n            AddAnnotation(annotation);\r\n\r\n            _jsonObject.EndObject();\r\n        }\r\n\r\n        internal static bool CheckAndGetBoolFromString(string value)\r\n        {\r\n            switch (value.ToLower())\r\n            {\r\n                case \"true\":\r\n                    return true;\r\n                case \"false\":\r\n                case \"\":\r\n                case \".\":\r\n                    return false;\r\n                default:\r\n                    throw new UserErrorException($\"{value} is not a valid boolean.\");\r\n            }\r\n        }\r\n\r\n        internal static double? CheckAndGetNullableDoubleFromString(string value)\r\n        {\r\n            if (value == \".\" || value == \"\") return null;\r\n\r\n            if (double.TryParse(value, out double doubleValue))\r\n                return doubleValue;\r\n\r\n            throw new UserErrorException($\"{value} is not a valid number.\");\r\n        }\r\n\r\n        public SaJsonSchema GetSubSchema(string key)\r\n        {\r\n            if (!_keyAnnotation.TryGetValue(key, out var annotation))\r\n                throw new KeyNotFoundException($\"{key} is not JSON key.\");\r\n\r\n            return annotation.Schema;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "SAUtils/Schema/SaJsonValueType.cs",
    "content": "﻿using System;\r\nusing System.Linq;\r\nusing VariantAnnotation.SA;\r\n\r\nnamespace SAUtils.Schema\r\n{\r\n    public sealed class SaJsonValueType : IEquatable<SaJsonValueType>\r\n    {\r\n        public JsonDataType[] JsonDataTypes { get; }\r\n\r\n        public static readonly SaJsonValueType Number = Create(JsonDataType.Number);\r\n        public static readonly SaJsonValueType Bool = Create(JsonDataType.Bool);\r\n        public static readonly SaJsonValueType String = Create(JsonDataType.String);\r\n        public static readonly SaJsonValueType Object = Create(JsonDataType.Object);\r\n        public static readonly SaJsonValueType Array = Create(JsonDataType.Array);\r\n        public static readonly SaJsonValueType StringArray = Create(JsonDataType.Array, JsonDataType.String);\r\n        public static readonly SaJsonValueType ObjectArray = Create(JsonDataType.Array, JsonDataType.Object);\r\n\r\n        private SaJsonValueType(params JsonDataType[] dataTypes)\r\n        {\r\n            JsonDataTypes = dataTypes;\r\n        }\r\n\r\n        private static SaJsonValueType Create(params JsonDataType[] dataTypes)\r\n        {\r\n            if (dataTypes.Length > 2) throw new ArgumentException(\"At most two JSON data types are allowed.\");\r\n            if (dataTypes.Length == 2 && !dataTypes[0].IsComplexType()) throw new ArgumentException(\"The first data type must a complex type when two data types provided.\");\r\n            \r\n            return new SaJsonValueType(dataTypes);\r\n        }\r\n\r\n        private bool JsonTypeEquals(params JsonDataType[] dataTypes) => JsonDataTypes.Length == dataTypes.Length &&\r\n            JsonDataTypes.SequenceEqual(dataTypes);\r\n\r\n\r\n        public bool Equals(SaJsonValueType other)\r\n        {\r\n            if (ReferenceEquals(null, other)) return false;\r\n            return ReferenceEquals(this, other) || JsonTypeEquals(other.JsonDataTypes);\r\n        }\r\n\r\n        public override int GetHashCode()\r\n        {\r\n            if (JsonDataTypes == null) return 0;\r\n\r\n            unchecked\r\n            {\r\n                return JsonDataTypes.Aggregate(17, (current, jsonDataType) => (current * 1201) ^ jsonDataType.GetHashCode());\r\n            }\r\n        }\r\n\r\n    }\r\n}"
  },
  {
    "path": "SAUtils/SpliceAi/SpliceAiDb.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing CommandLine.Builders;\nusing CommandLine.NDesk.Options;\nusing Compression.Utilities;\nusing ErrorHandling;\nusing IO;\nusing SAUtils.InputFileParsers;\nusing VariantAnnotation.Caches;\nusing VariantAnnotation.IO.Caches;\nusing VariantAnnotation.Providers;\nusing VariantAnnotation.SA;\n\nnamespace SAUtils.SpliceAi\n{\n    public static class SpliceAiDb\n    {\n        private static string _inputFile;\n        private static string _compressedReference;\n        private static string _transcriptCachePrefix;\n        private static string _outputDirectory;\n        private static string _geneInfoFile;\n        public static ExitCodes Run(string command, string[] commandArgs)\n\n        {\n            var ops = new OptionSet\n            {\n                {\n                     \"ref|r=\",\n                     \"compressed reference sequence file\",\n                     v => _compressedReference = v\n                },\n                {\n                    \"cache|c=\",\n                    \"Transcript cache prefix\",\n                    v => _transcriptCachePrefix = v\n                },\n                {\n                    \"gene|g=\",\n                    \"SpliceAi gene data\",\n                    v => _geneInfoFile = v\n                },\n                {\n                    \"in|i=\",\n                    \"input VCF file path\",\n                    v => _inputFile = v\n                },\n                {\n                    \"out|o=\",\n                    \"output directory\",\n                    v => _outputDirectory = v\n                }\n            };\n\n            string commandLineExample = $\"{command} [options]\";\n\n            var exitCode = new ConsoleAppBuilder(commandArgs, ops)\n                .Parse()\n                .CheckInputFilenameExists(_compressedReference, \"compressed reference sequence file name\", \"--ref\")\n                .HasRequiredParameter(_transcriptCachePrefix, \"transcript cache file\", \"--cache\")\n                .CheckInputFilenameExists(CacheConstants.TranscriptPath(_transcriptCachePrefix), \"transcript cache prefix\", \"--cache\")\n                .HasRequiredParameter(_inputFile, \"SpliceAI VCF file\", \"--in\")\n                .CheckInputFilenameExists(_inputFile, \"SpliceAI VCF file\", \"--in\")\n                .HasRequiredParameter(_geneInfoFile, \"SpliceAi gene data\", \"--gene\")\n                .CheckInputFilenameExists(_geneInfoFile, \"SpliceAi gene data\", \"--gene\")\n                .HasRequiredParameter(_outputDirectory, \"output directory\", \"--out\")\n                .CheckDirectoryExists(_outputDirectory, \"output directory\", \"--out\")\n                .SkipBanner()\n                .ShowHelpMenu(\"Creates a supplementary database containing 1000 Genomes allele frequencies\", commandLineExample)\n                .ShowErrors()\n                .Execute(ProgramExecution);\n\n            return exitCode;\n        }\n\n        private static ExitCodes ProgramExecution()\n        {\n            var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));\n            TranscriptCacheData transcriptData;\n            using (var transcriptCacheReader = new TranscriptCacheReader(FileUtilities.GetReadStream(CacheConstants.TranscriptPath(_transcriptCachePrefix))))\n            {\n                transcriptData = transcriptCacheReader.Read(referenceProvider.RefIndexToChromosome);\n            }\n\n            var spliceIntervals    = SpliceUtilities.GetSpliceIntervals(referenceProvider, transcriptData);\n            var nirEnstToGeneSymbols  = SpliceUtilities.GetEnstToGeneSymbols(referenceProvider, transcriptData);\n\n            Dictionary<string, string> spliceAiEnstToGeneSymbols;\n            using (var reader = new StreamReader(GZipUtilities.GetAppropriateReadStream(_geneInfoFile)))\n            {\n                spliceAiEnstToGeneSymbols = SpliceUtilities.GetSpliceAiGeneSymbols(reader);\n            }\n\n            var spliceAiToNirvanaGeneSymbols =\n                SpliceUtilities.GetSymbolMapping(spliceAiEnstToGeneSymbols, nirEnstToGeneSymbols);\n\n            Console.WriteLine($\"Mapped {spliceAiToNirvanaGeneSymbols.Count} spliceAI gene symbols to Nirvana gene symbols (out of {spliceAiEnstToGeneSymbols.Count})\");\n\n            var version        = DataSourceVersionReader.GetSourceVersion(_inputFile + \".version\");\n            string outFileName = $\"{version.Name}_{version.Version}\";\n\n            using (var spliceAiParser = new SpliceAiParser(\n                GZipUtilities.GetAppropriateReadStream(_inputFile), \n                referenceProvider, spliceIntervals, spliceAiToNirvanaGeneSymbols))\n            using (var nsaStream   = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix)))\n            using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSuffix)))\n            using (var nsaWriter   = new NsaWriter(nsaStream, indexStream, version, referenceProvider, SaCommon.SpliceAiTag, true, true, SaCommon.SchemaVersion, false))\n            {\n                nsaWriter.Write(spliceAiParser.GetItems());\n            }\n\n            Console.WriteLine($\"Total number of entries from Splice AI: {SpliceAiParser.Count}\");\n            return ExitCodes.Success;\n        }\n\n        \n    }\n}"
  },
  {
    "path": "SAUtils/SpliceAi/SpliceAiItem.cs",
    "content": "﻿using Genome;\nusing OptimizedCore;\nusing VariantAnnotation.Interface.SA;\nusing VariantAnnotation.IO;\n\nnamespace SAUtils.SpliceAi\n{\n    public sealed class SpliceAiItem:ISupplementaryDataItem\n    {\n        public Chromosome Chromosome { get; }\n        public int Position { get; set; }\n        public string RefAllele { get; set; }\n        public string AltAllele { get; set; }\n        public string Hgnc { get; set; }\n        public const double MinSpliceAiScore = 0.1;\n        private readonly bool _isSpliceAdjacent;\n\n        private readonly double _acceptorGainScore;\n        private readonly double _acceptorLossScore;\n        private readonly double _donorGainScore;\n        private readonly double _donorLossScore;\n\n        private readonly int _acceptorGainPosition;\n        private readonly int _acceptorLossPosition;\n        private readonly int _donorGainPosition;\n        private readonly int _donorLossPosition;\n\n        public SpliceAiItem(Chromosome chromosome, int position, string refAllele, string altAllele, string hgnc,\n            double acceptorGainScore, double acceptorLossScore, double donorGainScore, double donorLossScore,\n            int acceptorGainPosition, int acceptorLossPosition, int donorGainPosition, int donorLossPosition,\n            bool isSpliceAdjacent)\n        {\n            Chromosome = chromosome;\n            Position   = position;\n            RefAllele  = refAllele;\n            AltAllele  = altAllele;\n\n            Hgnc                  = hgnc;\n            _acceptorGainScore    = acceptorGainScore;\n            _acceptorLossScore    = acceptorLossScore;\n            _donorGainScore       = donorGainScore;\n            _donorLossScore        = donorLossScore;\n            _acceptorGainPosition = acceptorGainPosition;\n            _acceptorLossPosition = acceptorLossPosition;\n            _donorGainPosition    = donorGainPosition;\n            _donorLossPosition    = donorLossPosition;\n            _isSpliceAdjacent     = isSpliceAdjacent;\n        }\n\n        public string GetJsonString()\n        {\n            var sb = StringBuilderPool.Get();\n            var jsonObject = new JsonObject(sb);\n\n            jsonObject.AddStringValue(\"hgnc\", Hgnc);\n            if (_isSpliceAdjacent)\n            {\n                jsonObject.AddDoubleValue(\"acceptorGainScore\", _acceptorGainScore, \"0.#\");\n                jsonObject.AddDoubleValue(\"acceptorGainDistance\", _acceptorGainPosition);\n\n                jsonObject.AddDoubleValue(\"acceptorLossScore\", _acceptorLossScore, \"0.#\");\n                jsonObject.AddDoubleValue(\"acceptorLossDistance\", _acceptorLossPosition);\n\n                jsonObject.AddDoubleValue(\"donorGainScore\", _donorGainScore, \"0.#\");\n                jsonObject.AddDoubleValue(\"donorGainDistance\", _donorGainPosition);\n\n                jsonObject.AddDoubleValue(\"donorLossScore\", _donorLossScore, \"0.#\");\n                jsonObject.AddDoubleValue(\"donorLossDistance\", _donorLossPosition);\n            }\n            else\n            {\n                if (_acceptorGainScore >= MinSpliceAiScore)\n                {\n                    jsonObject.AddDoubleValue(\"acceptorGainScore\", _acceptorGainScore, \"0.#\");\n                    jsonObject.AddDoubleValue(\"acceptorGainDistance\", _acceptorGainPosition);\n                }\n\n                if (_acceptorLossScore >= MinSpliceAiScore)\n                {\n                    jsonObject.AddDoubleValue(\"acceptorLossScore\", _acceptorLossScore, \"0.#\");\n                    jsonObject.AddDoubleValue(\"acceptorLossDistance\", _acceptorLossPosition);\n                }\n\n                if (_donorGainScore >= MinSpliceAiScore)\n                {\n                    jsonObject.AddDoubleValue(\"donorGainScore\", _donorGainScore, \"0.#\");\n                    jsonObject.AddDoubleValue(\"donorGainDistance\", _donorGainPosition);\n\n                }\n\n                if (_donorLossScore >= MinSpliceAiScore)\n                {\n                    jsonObject.AddDoubleValue(\"donorLossScore\", _donorLossScore, \"0.#\");\n                    jsonObject.AddDoubleValue(\"donorLossDistance\", _donorLossPosition);\n\n                }\n            }\n\n            return StringBuilderPool.GetStringAndReturn(sb);\n        }\n\n        public string InputLine { get; set; }\n    }\n}"
  },
  {
    "path": "SAUtils/SpliceAi/SpliceAiParser.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.Data;\nusing System.IO;\nusing Intervals;\nusing IO;\nusing OptimizedCore;\nusing VariantAnnotation.Interface.IO;\nusing VariantAnnotation.Interface.Providers;\nusing Variants;\n\nnamespace SAUtils.SpliceAi\n{\n    public sealed class SpliceAiParser:IDisposable\n    {\n        private readonly Stream _stream;\n        private readonly ISequenceProvider _sequenceProvider;\n        private readonly Dictionary<ushort, IntervalArray<byte>> _spliceIntervals;\n        private readonly HashSet<string> _unresolvedSymbols;\n        public static int Count;\n\n        private string _geneSymbol;\n        private double _acceptorGainScore;\n        private double _acceptorLossScore;\n        private double _donorGainScore;\n        private double _donorLossScore;\n\n        private int _acceptorGainPosition;\n        private int _acceptorLossPosition;\n        private int _donorGainPosition;\n        private int _donorLossPosition;\n\n        private readonly Dictionary<string, string> _spliceToNirvanaSymbols;\n\n        public SpliceAiParser(Stream stream, ISequenceProvider sequenceProvider, Dictionary<ushort, IntervalArray<byte>> spliceIntervals, Dictionary<string, string> spliceToNirGeneSymbols)\n        {\n            _stream                 = stream;\n            _sequenceProvider       = sequenceProvider;\n            _spliceIntervals        = spliceIntervals;\n            _spliceToNirvanaSymbols = spliceToNirGeneSymbols;\n            _unresolvedSymbols      = new HashSet<string>();\n        }\n\n        public IEnumerable<SpliceAiItem> GetItems()\n        {\n            using (var reader = FileUtilities.GetStreamReader(_stream))\n            {\n                string line;\n                while ((line = reader.ReadLine()) != null)\n                {\n                    // Skip empty lines.\n                    if (string.IsNullOrWhiteSpace(line)) continue;\n\n                    // comments may contain the Format field\n                    if (line.OptimizedStartsWith('#'))\n                    {\n                        if (line.Contains(\"Format:\")) GetFieldIndices(line);\n                        continue;\n                    }\n\n                    var item = ExtractItem(line);\n                    if (item == null) continue;\n                    UpdateGeneSymbol(item);\n                    if (string.IsNullOrEmpty(item.Hgnc)) continue;\n                    yield return item;\n                    \n                }\n            }\n            \n            Console.WriteLine($\"{_unresolvedSymbols.Count} unresolved gene symbols encountered. Symbols:\");\n            foreach (var symbol in _unresolvedSymbols)\n            {\n                Console.Write(symbol+',');\n            }\n        }\n\n        //##INFO=<ID=SpliceAI,Number=.,Type=String,Description=\"SpliceAIv1.3 variant annotation. These include delta scores (DS) and delta positions (DP) \n        //for acceptor gain (AG), acceptor loss (AL), donor gain (DG), and donor loss (DL). Format: ALLELE|SYMBOL|DS_AG|DS_AL|DS_DG|DS_DL|DP_AG|DP_AL|DP_DG|DP_DL\">\n        private int _geneSymbolIndex = -1;\n        private int _dsAgIndex = -1;\n        private int _dsAlIndex = -1;\n        private int _dsDgIndex = -1;\n        private int _dsDlIndex = -1;\n        private int _dpAgIndex = -1;\n        private int _dpAlIndex = -1;\n        private int _dpDgIndex = -1;\n        private int _dpDlIndex = -1;\n\n        private const string GeneSymbolTag = \"SYMBOL\";\n        private const string DsAgTag = \"DS_AG\";\n        private const string DsAlTag = \"DS_AL\";\n        private const string DsDgTag = \"DS_DG\";\n        private const string DsDlTag = \"DS_DL\";\n        private const string DpAgTag = \"DP_AG\";\n        private const string DpAlTag = \"DP_AL\";\n        private const string DpDgTag = \"DP_DG\";\n        private const string DpDlTag = \"DP_DL\";\n\n        private void GetFieldIndices(string line) {\n            var format = line.Split(\"Format:\")[1];\n            format = format.EndsWith(\"\\\">\") ? format.Substring(0, format.Length - 2): format;\n            var fields = format.OptimizedSplit('|');\n            \n            _geneSymbolIndex = Array.IndexOf(fields, GeneSymbolTag);\n\n            _dsAgIndex = Array.IndexOf(fields, DsAgTag);\n            _dsDgIndex = Array.IndexOf(fields, DsDgTag);\n            _dsAlIndex = Array.IndexOf(fields, DsAlTag);\n            _dsDlIndex = Array.IndexOf(fields, DsDlTag);\n\n            _dpAgIndex = Array.IndexOf(fields, DpAgTag);\n            _dpDgIndex = Array.IndexOf(fields, DpDgTag);\n            _dpAlIndex = Array.IndexOf(fields, DpAlTag);\n            _dpDlIndex = Array.IndexOf(fields, DpDlTag);\n        }\n        \n        \n        /// <summary>\n        /// Extracts a splice AI item from the specified VCF line.\n        /// </summary>\n        /// <param name=\"vcfLine\"></param>\n        /// <returns></returns>\n        private SpliceAiItem ExtractItem(string vcfLine)\n        {\n            var splitLine = vcfLine.Split('\\t');\n            if (splitLine.Length < VcfCommon.InfoIndex+1) return null;\n\n            var chromosomeName = splitLine[VcfCommon.ChromIndex];\n            if (!_sequenceProvider.RefNameToChromosome.ContainsKey(chromosomeName)) return null;\n\n            var chromosome = _sequenceProvider.RefNameToChromosome[chromosomeName];\n            var position   = int.Parse(splitLine[VcfCommon.PosIndex]);\n            var refAllele  = splitLine[VcfCommon.RefIndex];\n            var altAllele  = splitLine[VcfCommon.AltIndex];\n            \n            if (altAllele.Contains(',')) throw new DataException($\"multiple alt allele present for {chromosome}-{position}\");\n\n            var start = position;\n            //skipping insertions/deletions that were shifted\n            if (VariantUtils.IsLeftShiftPossible(refAllele, altAllele)) return null;\n            (start, refAllele, altAllele) = BiDirectionalTrimmer.Trim(start, refAllele, altAllele);\n            \n            var end = start + refAllele.Length - 1;\n            var isSpliceAdjacent = _spliceIntervals[chromosome.Index].OverlapsAny(start, end);\n            \n            ParseInfoField(splitLine[VcfCommon.InfoIndex]);\n            \n            if (!HasSignificantScore() && !isSpliceAdjacent) return null;\n            \n            Count++;\n            return new SpliceAiItem(chromosome, start, refAllele, altAllele, _geneSymbol,\n                _acceptorGainScore, _acceptorLossScore, _donorGainScore, _donorLossScore,\n                _acceptorGainPosition, _acceptorLossPosition, _donorGainPosition, _donorLossPosition, isSpliceAdjacent);\n        }\n\n        private void UpdateGeneSymbol(SpliceAiItem item)\n        {\n            if (_spliceToNirvanaSymbols.TryGetValue(item.Hgnc, out var nirHgnc)) item.Hgnc = nirHgnc;\n            else\n            {\n                _unresolvedSymbols.Add(item.Hgnc);\n            }\n        }\n\n        private bool HasSignificantScore()\n        {\n            return _acceptorLossScore >= SpliceAiItem.MinSpliceAiScore ||\n                   _acceptorGainScore >= SpliceAiItem.MinSpliceAiScore ||\n                   _donorGainScore    >= SpliceAiItem.MinSpliceAiScore ||\n                   _donorLossScore    >= SpliceAiItem.MinSpliceAiScore;\n        }\n\n        //1       69091   .       A       C       .       .       SpliceAI=C|OR4F5|0.01|0.00|0.00|0.00|42|25|24|2\n        private void ParseInfoField(string infoFields)\n        {\n            Clear();\n            if (infoFields == \"\" || infoFields == \".\") return;\n            var values = infoFields.OptimizedSplit('|');\n\n            _geneSymbol = values[_geneSymbolIndex];\n            _acceptorGainScore = Convert.ToDouble(values[_dsAgIndex]);\n            _acceptorLossScore = Convert.ToDouble(values[_dsAlIndex]);\n            _donorGainScore = Convert.ToDouble(values[_dsDgIndex]);\n            _donorLossScore = Convert.ToDouble(values[_dsDlIndex]);\n\n            _acceptorGainPosition = Convert.ToInt32(values[_dpAgIndex]);\n            _acceptorLossPosition = Convert.ToInt32(values[_dpAlIndex]);\n            _donorGainPosition = Convert.ToInt32(values[_dpDgIndex]);\n            _donorLossPosition = Convert.ToInt32(values[_dpDlIndex]);\n        }\n\n        private void Clear()\n        {\n            _geneSymbol = null;\n\n            _acceptorGainScore = 0;\n            _acceptorLossScore = 0;\n            _donorGainScore    = 0;\n            _donorLossScore    = 0;\n\n            _acceptorGainPosition = int.MaxValue;\n            _acceptorLossPosition = int.MaxValue;\n            _donorGainPosition    = int.MaxValue;\n            _donorLossPosition    = int.MaxValue;\n        }\n\n        public void Dispose()\n        {\n            _stream?.Dispose();\n        }\n    }\n\n}"
  },
  {
    "path": "SAUtils/SpliceAi/SpliceUtilities.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.IO;\nusing System.Linq;\nusing Intervals;\nusing OptimizedCore;\nusing VariantAnnotation.Caches;\nusing VariantAnnotation.Interface.AnnotatedPositions;\nusing VariantAnnotation.Interface.Providers;\n\nnamespace SAUtils.SpliceAi\n{\n    public static class SpliceUtilities\n    {\n        public const int SpliceFlankLength = 15;\n        public static Dictionary<ushort, IntervalArray<byte>> GetSpliceIntervals(ISequenceProvider sequenceProvider, TranscriptCacheData transcriptData)\n        {\n            var cache = transcriptData.GetCache();\n\n            var spliceIntervalDict = new Dictionary<ushort, IntervalArray<byte>>(sequenceProvider.RefIndexToChromosome.Count);\n\n            foreach (var chromIndex in sequenceProvider.RefIndexToChromosome.Keys)\n            {\n                var spliceIntervals = new List<Interval<byte>>(8 * 1024);\n                var overlappingTranscripts =\n                    cache.TranscriptIntervalForest.GetAllOverlappingValues(chromIndex, 1, int.MaxValue);\n\n                if (overlappingTranscripts == null) continue;\n\n                foreach (var transcript in overlappingTranscripts)\n                {\n                    bool isFirstExon = true;\n                    foreach (var transcriptRegion in transcript.TranscriptRegions)\n                    {\n                        if (transcriptRegion.Type != TranscriptRegionType.Exon) continue;\n                        var firstSplicePosition = transcriptRegion.Start;\n                        var secondSplicePosition = transcriptRegion.End;\n\n                        var firstInterval = new Interval<byte>(firstSplicePosition - SpliceFlankLength, firstSplicePosition + SpliceFlankLength, 0);\n                        var secondInterval = new Interval<byte>(secondSplicePosition - SpliceFlankLength, secondSplicePosition + SpliceFlankLength, 0);\n\n                        if(!isFirstExon) spliceIntervals.Add(firstInterval);\n                        spliceIntervals.Add(secondInterval);\n                        isFirstExon = false;\n                    }\n                    //remove the last added interval since this is the tail of the last exon- which is not a splice site\n                    if(spliceIntervals.Count > 0)spliceIntervals.RemoveAt(spliceIntervals.Count - 1);\n\n                }\n\n                spliceIntervalDict[chromIndex] = new IntervalArray<byte>(spliceIntervals.OrderBy(x => x.Begin).ThenBy(x => x.End).ToArray());\n            }\n\n            return spliceIntervalDict;\n        }\n\n        public static Dictionary<string, string> GetEnstToGeneSymbols(ISequenceProvider sequenceProvider, TranscriptCacheData transcriptData)\n        {\n            var cache = transcriptData.GetCache();\n            var enstToGeneSymbols = new Dictionary<string, string>();\n\n            foreach (var chromIndex in sequenceProvider.RefIndexToChromosome.Keys)\n            {\n                var overlappingTranscripts =\n                    cache.TranscriptIntervalForest.GetAllOverlappingValues(chromIndex, 1, int.MaxValue);\n\n                if (overlappingTranscripts == null) continue;\n\n                foreach (var transcript in overlappingTranscripts)\n                {\n                    if (transcript.Id.WithoutVersion.StartsWith(\"ENST\"))\n                        enstToGeneSymbols[transcript.Id.WithoutVersion] = transcript.Gene.Symbol;\n                }\n\n            }\n\n            return enstToGeneSymbols;\n        }\n\n        public static Dictionary<string, string> GetSpliceAiGeneSymbols(StreamReader reader)\n        {\n            var enstToGeneSymbols = new Dictionary<string, string>();\n            string line;\n            while ((line = reader.ReadLine()) != null)\n            {\n                var splits = line.OptimizedSplit('\\t');\n                var geneSymbol = splits[0];\n                var ensemblId = splits[1].OptimizedSplit('.')[0];\n\n                enstToGeneSymbols[ensemblId] = geneSymbol;\n            }\n\n            return enstToGeneSymbols;\n        }\n\n        public static Dictionary<string, string> GetSymbolMapping(Dictionary<string, string> spliceAiEnstToGeneSymbols, Dictionary<string, string> nirEnstToGeneSymbols)\n        {\n            var spliceToNirSymbols= new Dictionary<string, string>();\n            foreach (var (spliceEnst, spliceGene) in spliceAiEnstToGeneSymbols)\n            {\n                if (nirEnstToGeneSymbols.TryGetValue(spliceEnst, out var nirGene))\n                    spliceToNirSymbols[spliceGene] = nirGene;\n            }\n\n            return spliceToNirSymbols;\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/gnomAD/GnomadSnvMain.cs",
    "content": "﻿using System;\nusing System.IO;\nusing System.Threading.Tasks;\nusing CommandLine.Builders;\nusing CommandLine.NDesk.Options;\nusing Compression.Utilities;\nusing ErrorHandling;\nusing ErrorHandling.Exceptions;\nusing IO;\nusing OptimizedCore;\nusing SAUtils.InputFileParsers;\nusing SAUtils.NsaConcatenator;\nusing VariantAnnotation.Providers;\nusing VariantAnnotation.SA;\n\nnamespace SAUtils.gnomAD\n{\n    public sealed class GnomadSnvMain\n    {\n        private static string _genomeDirectory;\n        private static string _exomeDirectory;\n        private static string _compressedReference;\n        private static string _outputDirectory;\n        private static string _tempDirectory;\n\n        public static ExitCodes Run(string command, string[] commandArgs)\n        {\n            var ops = new OptionSet\n            {\n                {\n                     \"ref|r=\",\n                     \"compressed reference sequence file\",\n                     v => _compressedReference = v\n                 },\n                {\n                    \"genome|g=\",\n                    \"input directory containing VCF (and .version) files with genomic frequencies\",\n                    v => _genomeDirectory = v\n                },\n                {\n                    \"exome|e=\",\n                    \"input directory containing VCF (and .version) files with exomic frequencies\",\n                    v => _exomeDirectory = v\n                },\n                {\n                    \"temp|t=\",\n                    \"output temp directory for intermediate (per chrom) NSA files\",\n                    v => _tempDirectory = v\n                },\n                {\n                    \"out|o=\",\n                    \"output directory for NSA file\",\n                    v => _outputDirectory = v\n                }\n            };\n\n            var commandLineExample = $\"{command} [options]\";\n\n            var exitCode = new ConsoleAppBuilder(commandArgs, ops)\n                .Parse()\n                .CheckInputFilenameExists(_compressedReference, \"compressed reference sequence file name\", \"--ref\")\n                .CheckDirectoryExists(_genomeDirectory, \"input directory containing genome vcf files\", \"--genome\")\n                .CheckDirectoryExists(_outputDirectory, \"output Supplementary directory\", \"--out\")\n                .SkipBanner()\n                .ShowHelpMenu(\"Reads provided supplementary data files and populates tsv files\", commandLineExample)\n                .ShowErrors()\n                .Execute(ProgramExecution);\n\n            return exitCode;\n        }\n        \n        private static ExitCodes ProgramExecution()\n        {\n            //clearing temp directory\n            if (!Directory.Exists(_tempDirectory)) Directory.CreateDirectory(_tempDirectory);\n            Console.WriteLine($\"Cleaning {SaCommon.SaFileSuffix} and {SaCommon.IndexSuffix} files from temp directory {_tempDirectory}\");\n            foreach (var file in Directory.GetFiles(_tempDirectory, $\"*{SaCommon.SaFileSuffix}\"))\n            {\n                File.Delete(file);\n            }\n            foreach (var file in Directory.GetFiles(_tempDirectory, $\"*{SaCommon.SaFileSuffix}{SaCommon.IndexSuffix}\"))\n            {\n                File.Delete(file);\n            }\n\n            var version     = GetVersion();\n\n            var genomeFiles = GetVcfFiles(_genomeDirectory);\n            var exomeFiles = GetVcfFiles(_exomeDirectory);\n            const int degOfParalleleism = 12; //hard coding since we are IO bound and stressing the disk doesn't help\n            Console.WriteLine($\"Creating merged gnomAD database file from {genomeFiles.Length + exomeFiles.Length} input files. Degree of parallelism {degOfParalleleism}\");\n\n            Parallel.ForEach(\n                genomeFiles,\n                new ParallelOptions { MaxDegreeOfParallelism = degOfParalleleism },\n                genomeFile => CreateNsa(exomeFiles, genomeFile, version)\n                );\n            string outFileName = Path.Combine(_outputDirectory, $\"{version.Name}_{version.Version}\");\n\n            //concat the nsa files\n            Console.WriteLine(\"Concatenating per chromosome nsa files\");\n            var tempNsaFiles = Directory.GetFiles(_tempDirectory, $\"*{SaCommon.SaFileSuffix}\");\n            ConcatUtilities.ConcatenateNsaFiles(tempNsaFiles, outFileName);\n\n            return ExitCodes.Success;\n        }\n\n        private static void CreateNsa(string[] exomeFiles, string genomeFile, DataSourceVersion version) {\n            Console.WriteLine($\"Processing file: {genomeFile}\");\n            var outName = Path.GetFileNameWithoutExtension(genomeFile);\n\n            using (var exomeReader = GetExomeReader(exomeFiles, genomeFile))\n            using (var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference)))\n            using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_tempDirectory, outName + SaCommon.SaFileSuffix)))\n            using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_tempDirectory, outName + SaCommon.SaFileSuffix + SaCommon.IndexSuffix)))\n            using (var nsaWriter = new NsaWriter(nsaStream, indexStream, version, referenceProvider, SaCommon.GnomadTag, true, false, SaCommon.SchemaVersion, false))\n            using (var reader = GZipUtilities.GetAppropriateStreamReader(genomeFile))\n            {\n                var gnomadReader = new GnomadSnvReader(reader, exomeReader, referenceProvider);\n                var count = nsaWriter.Write(gnomadReader.GetCombinedItems());\n                Console.WriteLine($\"Wrote {count} items to NSA file.\");\n            }\n        }\n\n        private static StreamReader GetExomeReader(string[] exomeFileNames, string genomeFileName)\n        {\n            if (exomeFileNames == null || exomeFileNames.Length == 0) return null;\n            string chromName = GetChromName(genomeFileName);\n            string exomeFileName = null;\n            foreach (string fileName in exomeFileNames)\n            {\n                string exomeChrom = GetChromName(fileName);\n                if (chromName != exomeChrom) continue;\n                exomeFileName = fileName;\n                break;\n            }\n            return string.IsNullOrEmpty(exomeFileName) ? null : GZipUtilities.GetAppropriateStreamReader(exomeFileName);\n        }\n\n        private static string GetChromName(string filePath)\n        {\n            // the files are named in a consistent format that allows us to match files by chrom names\n            // e.g. gnomad.exomes.r2.1.sites.grch38.chr1_noVEP.vcf.gz or chr18.vcf.bgz\n            var fileName = Path.GetFileName(filePath);\n            foreach (var component in fileName.OptimizedSplit('.'))\n            {\n                if (component.StartsWith(\"chr\")) return component.OptimizedSplit('_')[0];\n            }\n\n            return null;\n        }\n\n        private static DataSourceVersion GetVersion()\n        {\n            var genomeVersionFiles = Directory.GetFiles(_genomeDirectory, \"*.version\");\n            if (genomeVersionFiles.Length != 1)\n                throw new InvalidDataException($\"Only one .version file should exist in: {_genomeDirectory}\");\n            var genomeVersion = DataSourceVersionReader.GetSourceVersion(genomeVersionFiles[0]);\n\n            if (string.IsNullOrEmpty(_exomeDirectory)) return genomeVersion;\n\n            var exomeVersionFiles = Directory.GetFiles(_exomeDirectory, \"*.version\");\n            if (exomeVersionFiles.Length != 1)\n                throw new InvalidDataException($\"Only one .version file should exist in: {_exomeDirectory}\");\n            var exomeVersion = DataSourceVersionReader.GetSourceVersion(genomeVersionFiles[0]);\n\n            if (genomeVersion.Version != exomeVersion.Version)\n                throw new DataMisalignedException(\n                    $\"Version mismatch! Genome version: {genomeVersion.Version}, Exome Version: {exomeVersion.Version}.\");\n            return genomeVersion;\n        }\n\n        private static string[] GetVcfFiles(string directory)\n        {\n            if (string.IsNullOrEmpty(directory)) return new string[]{};\n            // the files might have gz or bgz extensions\n            var files = Directory.GetFiles(directory, \"*.vcf.bgz\");\n            if(files.Length == 0)\n                files = Directory.GetFiles(directory, \"*.vcf.gz\");\n\n            if (files.Length == 0)\n                throw new UserErrorException($\"{directory} does not contain any VCF files\");\n\n            return files;\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/gnomAD/GnomadSnvReader.cs",
    "content": "using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing System.Linq;\nusing OptimizedCore;\nusing SAUtils.DataStructures;\nusing VariantAnnotation.Interface.IO;\nusing VariantAnnotation.Interface.Providers;\nusing VariantAnnotation.Interface.SA;\nusing Variants;\n\nnamespace SAUtils.gnomAD\n{\n    public sealed class GnomadSnvReader\n    {\n        private readonly StreamReader      _genomeReader;\n        private readonly StreamReader      _exomeReader;\n        private readonly ISequenceProvider _sequenceProvider;\n\n        private int[] _acAll;\n        private int[] _acAfr;\n        private int[] _acAmr;\n        private int[] _acEas;\n        private int[] _acFin;\n        private int[] _acNfe;\n        private int[] _acOth;\n        private int[] _acAsj;\n        private int[] _acSas;\n\n        private int _anAll;\n        private int _anAfr;\n        private int _anAmr;\n        private int _anEas;\n        private int _anFin;\n        private int _anNfe;\n        private int _anOth;\n        private int _anAsj;\n        private int _anSas;\n\n        //male numbers\n        private int[] _acMale;\n        private int   _anMale;\n        private int[] _hcMale;\n\n        //female numbers\n        private int[] _acFemale;\n        private int   _anFemale;\n        private int[] _hcFemale;\n\n        private int[] _hcAll;\n        private int[] _hcAfr;\n        private int[] _hcAmr;\n        private int[] _hcEas;\n        private int[] _hcFin;\n        private int[] _hcNfe;\n        private int[] _hcOth;\n        private int[] _hcAsj;\n        private int[] _hcSas;\n\n        // controls\n        private int[] _control_acAll;\n        private int   _control_anAll;\n\n        private int? _totalDepth;\n\n        public GnomadSnvReader(StreamReader genomeReader, StreamReader exomeReader, ISequenceProvider sequenceProvider)\n        {\n            _genomeReader     = genomeReader;\n            _exomeReader      = exomeReader;\n            _sequenceProvider = sequenceProvider;\n        }\n\n        private void Clear()\n        {\n            _acAll = null;\n            _acAfr = null;\n            _acAmr = null;\n            _acEas = null;\n            _acFin = null;\n            _acNfe = null;\n            _acOth = null;\n            _acAsj = null;\n            _acSas = null;\n\n            _anAll = 0;\n            _anAfr = 0;\n            _anAmr = 0;\n            _anEas = 0;\n            _anFin = 0;\n            _anNfe = 0;\n            _anOth = 0;\n            _anAsj = 0;\n            _anSas = 0;\n\n            _acMale = null;\n            _anMale = 0;\n            _hcMale = null;\n\n            _acFemale = null;\n            _anFemale = 0;\n            _hcFemale = null;\n\n            _hcAll = null;\n            _hcAfr = null;\n            _hcAmr = null;\n            _hcEas = null;\n            _hcFin = null;\n            _hcNfe = null;\n            _hcOth = null;\n            _hcAsj = null;\n            _hcSas = null;\n\n            //control\n            _control_acAll = null;\n            _control_anAll = 0;\n\n            _totalDepth = null;\n        }\n\n        /// <summary>\n        /// Merging genomic an exomic items to create one stream of gnomad entries\n        /// </summary>\n        /// <returns></returns>\n        public IEnumerable<GnomadItem> GetCombinedItems()\n        {\n            using (var genomeEnumerator = GetItems(_genomeReader, GnomadDataType.Genome).GetEnumerator())\n            using (var exomeEnumerator = GetItems(_exomeReader, GnomadDataType.Exome).GetEnumerator())\n            {\n                var hasGenomicItem = genomeEnumerator.MoveNext();\n                var hasExomeItem   = exomeEnumerator.MoveNext();\n\n                var minHeap = new MinHeap<GnomadItem>(GnomadItem.CompareTo);\n                while (hasExomeItem && hasGenomicItem)\n                {\n                    var genomeItem = genomeEnumerator.Current;\n                    var exomeItem  = exomeEnumerator.Current;\n                    var position   = Math.Min(genomeItem.Position, exomeItem.Position);\n\n                    while (hasGenomicItem && genomeItem.Position == position)\n                    {\n                        //all items for a position should be gathered so as to resolve conflicts properly\n                        minHeap.Add(GnomadUtilities.GetNormalizedItem(genomeItem, _sequenceProvider));\n                        hasGenomicItem = genomeEnumerator.MoveNext();\n                        genomeItem     = genomeEnumerator.Current;\n                    }\n\n                    while (hasExomeItem && exomeItem.Position == position)\n                    {\n                        minHeap.Add(GnomadUtilities.GetNormalizedItem(exomeItem, _sequenceProvider));\n                        hasExomeItem = exomeEnumerator.MoveNext();\n                        exomeItem    = exomeEnumerator.Current;\n                    }\n\n                    // at this point, the min heap should not be empty\n                    int heapPosition = minHeap.GetMin().Position;\n\n                    while (minHeap.Count() > 0 && heapPosition < position - VariantUtils.MaxUpstreamLength)\n                    {\n                        var (genomeItems, exomeItems) = GetMinItems(minHeap);\n                        foreach (var item in GnomadUtilities.GetMergedItems(genomeItems, exomeItems).Values)\n                        {\n                            if (item.AllAlleleNumber == null || item.AllAlleleNumber.Value == 0) continue;\n                            yield return item;\n                        }\n                    }\n                }\n\n                //flush out the last positions in heap\n                while (minHeap.Count() > 0)\n                {\n                    var (genomeItems, exomeItems) = GetMinItems(minHeap);\n                    foreach (var item in GnomadUtilities.GetMergedItems(genomeItems, exomeItems).Values)\n                        yield return item;\n                }\n\n                //now, only one of the iterator is left\n                if (hasGenomicItem)\n                    foreach (var item in GetRemainingItems(genomeEnumerator))\n                        yield return item;\n\n                if (hasExomeItem)\n                    foreach (var item in GetRemainingItems(exomeEnumerator))\n                        yield return item;\n            }\n        }\n\n        private static (Dictionary<(string refAllele, string altAllele), GnomadItem> genomeItems,\n            Dictionary<(string refAllele, string altAllele), GnomadItem> exomeItems) GetMinItems(MinHeap<GnomadItem> minHeap)\n        {\n            var genomeItems = new List<ISupplementaryDataItem>();\n            var exomeItems  = new List<ISupplementaryDataItem>();\n\n            if (minHeap.Count() == 0) return (null, null);\n            var position = minHeap.GetMin().Position;\n\n            while (minHeap.Count() > 0 && minHeap.GetMin().Position == position)\n            {\n                var item = minHeap.ExtractMin();\n                if (item.DataType == GnomadDataType.Genome) genomeItems.Add(item);\n                else exomeItems.Add(item);\n            }\n\n            genomeItems = SuppDataUtilities.RemoveConflictingAlleles(genomeItems, false);\n            exomeItems  = SuppDataUtilities.RemoveConflictingAlleles(exomeItems,  false);\n\n            var genomeItemsByAllele = new Dictionary<(string refAllele, string altAllele), GnomadItem>();\n            foreach (var item in genomeItems)\n            {\n                genomeItemsByAllele.Add((item.RefAllele, item.AltAllele), (GnomadItem) item);\n            }\n\n            var exomeItemsByAllele = new Dictionary<(string refAllele, string altAllele), GnomadItem>();\n            foreach (var item in exomeItems)\n            {\n                exomeItemsByAllele.Add((item.RefAllele, item.AltAllele), (GnomadItem) item);\n            }\n\n            return (genomeItemsByAllele, exomeItemsByAllele);\n        }\n\n        private IEnumerable<GnomadItem> GetRemainingItems(IEnumerator<GnomadItem> enumerator)\n        {\n            do\n            {\n                var item = enumerator.Current;\n                if (item == null) yield break;\n                if (item.AllAlleleNumber == null || item.AllAlleleNumber.Value == 0) continue;\n                yield return GnomadUtilities.GetNormalizedItem(item, _sequenceProvider);\n            } while (enumerator.MoveNext());\n        }\n\n        /// <summary>\n        /// Parses a source file and return an enumeration object containing \n        /// all the data objects that have been extracted.\n        /// </summary>\n        /// <returns></returns>\n        private IEnumerable<GnomadItem> GetItems(StreamReader reader, GnomadDataType type)\n        {\n            if (reader == null) yield break;\n            using (reader)\n            {\n                string line;\n                while ((line = reader.ReadLine()) != null)\n                {\n                    // Skip empty lines.\n                    if (string.IsNullOrWhiteSpace(line)) continue;\n\n                    // Skip comments.\n                    if (line.OptimizedStartsWith('#')) continue;\n                    var items = ExtractItems(line, type);\n                    if (items == null) continue;\n                    foreach (var item in items)\n                    {\n                        yield return item;\n                    }\n                }\n            }\n        }\n\n\n        /// <summary>\n        /// Extracts a gnomad item(s) from the specified VCF line.\n        /// </summary>\n        /// <param name=\"line\"></param>\n        /// <param name=\"type\"></param>\n        /// <returns></returns>\n        private List<GnomadItem> ExtractItems(string line, GnomadDataType type)\n        {\n            if (line == null) return null;\n            var splitLine = line.OptimizedSplit('\\t');\n\n            if (splitLine.Length < 8) return null;\n\n            Clear();\n\n            var chromosome = splitLine[VcfCommon.ChromIndex];\n            if (!_sequenceProvider.RefNameToChromosome.ContainsKey(chromosome)) return null;\n\n            var chrom      = _sequenceProvider.RefNameToChromosome[chromosome];\n            var position   = int.Parse(splitLine[VcfCommon.PosIndex]);\n            var refAllele  = splitLine[VcfCommon.RefIndex];\n            var altAlleles = splitLine[VcfCommon.AltIndex].OptimizedSplit(',');\n            var filters    = splitLine[VcfCommon.FilterIndex];\n            var infoFields = splitLine[VcfCommon.InfoIndex];\n\n            var hasFailedFilters = !(filters.Equals(\"PASS\") || filters.Equals(\".\"));\n\n            // parses the info fields and extract frequencies, coverage, num samples.\n            ParseInfoField(infoFields);\n\n            var gnomadItemsList = new List<GnomadItem>();\n\n            for (int i = 0; i < altAlleles.Length; i++)\n            {\n                gnomadItemsList.Add(new GnomadItem(\n                    chrom,\n                    position,\n                    refAllele,\n                    altAlleles[i],\n                    _totalDepth,\n                    _anAll, _anAfr, _anAmr, _anEas, _anFin, _anNfe, _anOth, _anAsj, _anSas, _anMale, _anFemale,\n                    GetCount(_acAll,  i), GetCount(_acAfr,    i), GetCount(_acAmr,    i), GetCount(_acEas, i),\n                    GetCount(_acFin,  i), GetCount(_acNfe,    i), GetCount(_acOth,    i), GetCount(_acAsj, i),\n                    GetCount(_acSas,  i), GetCount(_acMale,   i), GetCount(_acFemale, i),\n                    GetCount(_hcAll,  i), GetCount(_hcAfr,    i), GetCount(_hcAmr,    i), GetCount(_hcEas, i), GetCount(_hcFin, i),\n                    GetCount(_hcNfe,  i), GetCount(_hcOth,    i), GetCount(_hcAsj,    i), GetCount(_hcSas, i),\n                    GetCount(_hcMale, i), GetCount(_hcFemale, i),\n                    //controls\n                    _control_anAll,\n                    GetCount(_control_acAll, i),\n                    hasFailedFilters,\n                    type,\n                    line\n                ));\n            }\n\n            return gnomadItemsList;\n        }\n\n        private static int? GetCount(int[] counts, int i)\n        {\n            if (counts == null) return null;\n\n            if (i >= counts.Length) return null;\n\n            return counts[i];\n        }\n\n        /// <summary>\n        /// split up the info field and extract information from each of them.\n        /// </summary>\n        /// <param name=\"infoFields\"></param>\n        private void ParseInfoField(string infoFields)\n        {\n            if (infoFields == \"\" || infoFields == \".\") return;\n            var infoItems = infoFields.OptimizedSplit(';');\n\n            foreach (string infoItem in infoItems)\n            {\n                (string key, string value) = infoItem.OptimizedKeyValue();\n                // sanity check\n                if (value != null) SetInfoField(key, value);\n            }\n        }\n\n        /// <summary>\n        /// Get a key value pair and using the key, set appropriate values\n        /// </summary>\n        /// <param name=\"vcfId\"></param>\n        /// <param name=\"value\"></param>\n        private void SetInfoField(string vcfId, string value)\n        {\n            switch (vcfId)\n            {\n                case \"AC\":\n                    _acAll = value.OptimizedSplit(',').Select(val => Convert.ToInt32(val)).ToArray();\n                    break;\n\n                case \"AC_XY\":\n                    _acMale = value.OptimizedSplit(',').Select(val => Convert.ToInt32(val)).ToArray();\n                    break;\n\n                case \"AC_XX\":\n                    _acFemale = value.OptimizedSplit(',').Select(val => Convert.ToInt32(val)).ToArray();\n                    break;\n\n                case \"AC_afr\":\n                    _acAfr = value.OptimizedSplit(',').Select(val => Convert.ToInt32(val)).ToArray();\n                    break;\n\n                case \"AC_amr\":\n                    _acAmr = value.OptimizedSplit(',').Select(val => Convert.ToInt32(val)).ToArray();\n                    break;\n\n                case \"AC_eas\":\n                    _acEas = value.OptimizedSplit(',').Select(val => Convert.ToInt32(val)).ToArray();\n                    break;\n\n                case \"AC_fin\":\n                    _acFin = value.OptimizedSplit(',').Select(val => Convert.ToInt32(val)).ToArray();\n                    break;\n\n                case \"AC_nfe\":\n                    _acNfe = value.OptimizedSplit(',').Select(val => Convert.ToInt32(val)).ToArray();\n                    break;\n\n                case \"AC_oth\":\n                    _acOth = value.OptimizedSplit(',').Select(val => Convert.ToInt32(val)).ToArray();\n                    break;\n\n                case \"AC_asj\":\n                    _acAsj = value.OptimizedSplit(',').Select(val => Convert.ToInt32(val)).ToArray();\n                    break;\n\n                case \"AC_sas\":\n                    _acSas = value.OptimizedSplit(',').Select(val => Convert.ToInt32(val)).ToArray();\n                    break;\n\n                case \"AN\":\n                    _anAll = Convert.ToInt32(value);\n                    break;\n\n                case \"AN_XY\":\n                    _anMale = Convert.ToInt32(value);\n                    break;\n\n                case \"AN_XX\":\n                    _anFemale = Convert.ToInt32(value);\n                    break;\n\n                case \"AN_afr\":\n                    _anAfr = Convert.ToInt32(value);\n                    break;\n\n                case \"AN_amr\":\n                    _anAmr = Convert.ToInt32(value);\n                    break;\n\n                case \"AN_eas\":\n                    _anEas = Convert.ToInt32(value);\n                    break;\n\n                case \"AN_fin\":\n                    _anFin = Convert.ToInt32(value);\n                    break;\n\n                case \"AN_nfe\":\n                    _anNfe = Convert.ToInt32(value);\n                    break;\n\n                case \"AN_oth\":\n                    _anOth = Convert.ToInt32(value);\n                    break;\n\n                case \"AN_asj\":\n                    _anAsj = Convert.ToInt32(value);\n                    break;\n\n                case \"AN_sas\":\n                    _anSas = Convert.ToInt32(value);\n                    break;\n\n                case \"nhomalt\":\n                    _hcAll = value.OptimizedSplit(',').Select(val => Convert.ToInt32(val)).ToArray();\n                    break;\n\n                case \"nhomalt_XY\":\n                    _hcMale = value.OptimizedSplit(',').Select(val => Convert.ToInt32(val)).ToArray();\n                    break;\n                case \"nhomalt_XX\":\n                    _hcFemale = value.OptimizedSplit(',').Select(val => Convert.ToInt32(val)).ToArray();\n                    break;\n\n                case \"nhomalt_afr\":\n                    _hcAfr = value.OptimizedSplit(',').Select(val => Convert.ToInt32(val)).ToArray();\n                    break;\n\n                case \"nhomalt_amr\":\n                    _hcAmr = value.OptimizedSplit(',').Select(val => Convert.ToInt32(val)).ToArray();\n                    break;\n\n                case \"nhomalt_eas\":\n                    _hcEas = value.OptimizedSplit(',').Select(val => Convert.ToInt32(val)).ToArray();\n                    break;\n\n                case \"nhomalt_fin\":\n                    _hcFin = value.OptimizedSplit(',').Select(val => Convert.ToInt32(val)).ToArray();\n                    break;\n\n                case \"nhomalt_nfe\":\n                    _hcNfe = value.OptimizedSplit(',').Select(val => Convert.ToInt32(val)).ToArray();\n                    break;\n\n                case \"nhomalt_oth\":\n                    _hcOth = value.OptimizedSplit(',').Select(val => Convert.ToInt32(val)).ToArray();\n                    break;\n\n                case \"nhomalt_asj\":\n                    _hcAsj = value.OptimizedSplit(',').Select(val => Convert.ToInt32(val)).ToArray();\n                    break;\n\n                case \"nhomalt_sas\":\n                    _hcSas = value.OptimizedSplit(',').Select(val => Convert.ToInt32(val)).ToArray();\n                    break;\n\n                // controls\n                case \"AN_controls_and_biobanks\":\n                    _control_anAll = Convert.ToInt32(value);\n                    break;\n\n                case \"AC_controls_and_biobanks\":\n                    _control_acAll = value.OptimizedSplit(',').Select(val => Convert.ToInt32(val)).ToArray();\n                    break;\n\n                case \"VarDP\":\n                    _totalDepth = Convert.ToInt32(value);\n                    break;\n            }\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/gnomAD/GnomadSvBedParser.cs",
    "content": "using System.Collections.Generic;\nusing System.IO;\nusing Genome;\nusing SAUtils.DataStructures;\nusing SAUtils.ParseUtils;\nusing Variants;\n\nnamespace SAUtils.gnomAD;\n\npublic sealed class GnomadSvBedParser : GnomadSvParser\n{\n    public GnomadSvBedParser(\n        StreamReader reader,\n        Dictionary<string, Chromosome> refNameDict\n    ) : base(reader, refNameDict)\n    {\n        TsvIndices = new TsvIndices\n        {\n            Chromosome = 0,\n            Start      = 1,\n            End        = 2,\n            VariantId  = 3,\n            SvType     = 4,\n            Filters    = 241,\n\n            AllAlleleNumber    = 35,\n            AllAlleleCount     = 36,\n            AllAlleleFrequency = 37,\n            AllHomCount        = 41,\n\n            MaleAlleleNumber    = 45,\n            MaleAlleleCount     = 46,\n            MaleAlleleFrequency = 47,\n            MaleHomCount        = 51,\n\n            FemaleAlleleNumber    = 60,\n            FemaleAlleleCount     = 61,\n            FemaleAlleleFrequency = 62,\n            FemaleHomCount        = 66,\n\n            AfrAlleleNumber    = 71,\n            AfrAlleleCount     = 72,\n            AfrAlleleFrequency = 73,\n            AfrHomCount        = 77,\n\n            AmrAlleleNumber    = 105,\n            AmrAlleleCount     = 106,\n            AmrAlleleFrequency = 107,\n            AmrHomCount        = 111,\n\n            EasAlleleNumber    = 139,\n            EasAlleleCount     = 140,\n            EasAlleleFrequency = 141,\n            EasHomCount        = 145,\n\n            EurAlleleNumber    = 173,\n            EurAlleleCount     = 174,\n            EurAlleleFrequency = 175,\n            EurHomCount        = 179,\n\n            OthAlleleNumber    = 207,\n            OthAlleleCount     = 208,\n            OthAlleleFrequency = 209,\n            OthHomCount        = 211,\n        };\n    }\n\n    protected override GnomadSvItem ParseLine(string inputLine)\n    {\n        var    splitLine      = new SplitLine(in inputLine, in Delimiter);\n        string chromosomeName = splitLine.GetString(TsvIndices.Chromosome);\n        if (!RefNameDict.ContainsKey(chromosomeName))\n            return null;\n\n        Chromosome chromosome = RefNameDict[chromosomeName];\n        int?       start      = splitLine.ParseInteger(TsvIndices.Start);\n        int?       end        = splitLine.ParseInteger(TsvIndices.End);\n        if (start == null || end == null)\n            throw new InvalidDataException($\"Invalid Data on Line {inputLine}\");\n        \n        VariantType svType = SvTypeMapper(splitLine.GetString(TsvIndices.SvType));\n\n        // Ignoring BND for now\n        if (svType == VariantType.translocation_breakend)\n            return null;\n        \n        // For some reason the in the source file, the end position is +1 for insertions\n        if (svType == VariantType.insertion)\n            end--;\n\n        start += 2; // +1 start is 0-based in BED format, also +1 for padding base\n        if (start > end)\n            (start, end) = (end, start);\n        \n        string filters          = splitLine.GetString(TsvIndices.Filters);\n        bool   hasFailedFilters = SaUtilsCommon.HasFailedFilters(filters);\n        return new GnomadSvItem(chromosome, inputLine)\n        {\n            Start     = (int) start,\n            End       = (int) end,\n            VariantId = splitLine.GetString(TsvIndices.VariantId),\n            SvType    = SvTypeMapper(splitLine.GetString(TsvIndices.SvType)),\n\n            AllAlleleNumber    = splitLine.ParseInteger(TsvIndices.AllAlleleNumber),\n            AllAlleleCount     = splitLine.ParseInteger(TsvIndices.AllAlleleCount),\n            AllAlleleFrequency = splitLine.ParseDouble(TsvIndices.AllAlleleFrequency),\n            AllHomCount        = splitLine.ParseInteger(TsvIndices.AllHomCount),\n\n            MaleAlleleNumber    = splitLine.ParseInteger(TsvIndices.MaleAlleleNumber),\n            MaleAlleleCount     = splitLine.ParseInteger(TsvIndices.MaleAlleleCount),\n            MaleAlleleFrequency = splitLine.ParseDouble(TsvIndices.MaleAlleleFrequency),\n            MaleHomCount        = splitLine.ParseInteger(TsvIndices.MaleHomCount),\n\n            FemaleAlleleNumber    = splitLine.ParseInteger(TsvIndices.FemaleAlleleNumber),\n            FemaleAlleleCount     = splitLine.ParseInteger(TsvIndices.FemaleAlleleCount),\n            FemaleAlleleFrequency = splitLine.ParseDouble(TsvIndices.FemaleAlleleFrequency),\n            FemaleHomCount        = splitLine.ParseInteger(TsvIndices.FemaleHomCount),\n\n            AfrAlleleNumber    = splitLine.ParseInteger(TsvIndices.AfrAlleleNumber),\n            AfrAlleleCount     = splitLine.ParseInteger(TsvIndices.AfrAlleleCount),\n            AfrAlleleFrequency = splitLine.ParseDouble(TsvIndices.AfrAlleleFrequency),\n            AfrHomCount        = splitLine.ParseInteger(TsvIndices.AfrHomCount),\n\n            AmrAlleleNumber    = splitLine.ParseInteger(TsvIndices.AmrAlleleNumber),\n            AmrAlleleCount     = splitLine.ParseInteger(TsvIndices.AmrAlleleCount),\n            AmrAlleleFrequency = splitLine.ParseDouble(TsvIndices.AmrAlleleFrequency),\n            AmrHomCount        = splitLine.ParseInteger(TsvIndices.AmrHomCount),\n\n            EasAlleleNumber    = splitLine.ParseInteger(TsvIndices.EasAlleleNumber),\n            EasAlleleCount     = splitLine.ParseInteger(TsvIndices.EasAlleleCount),\n            EasAlleleFrequency = splitLine.ParseDouble(TsvIndices.EasAlleleFrequency),\n            EasHomCount        = splitLine.ParseInteger(TsvIndices.EasHomCount),\n\n            EurAlleleNumber    = splitLine.ParseInteger(TsvIndices.EurAlleleNumber),\n            EurAlleleCount     = splitLine.ParseInteger(TsvIndices.EurAlleleCount),\n            EurAlleleFrequency = splitLine.ParseDouble(TsvIndices.EurAlleleFrequency),\n            EurHomCount        = splitLine.ParseInteger(TsvIndices.EurHomCount),\n\n            OthAlleleNumber    = splitLine.ParseInteger(TsvIndices.OthAlleleNumber),\n            OthAlleleCount     = splitLine.ParseInteger(TsvIndices.OthAlleleCount),\n            OthAlleleFrequency = splitLine.ParseDouble(TsvIndices.OthAlleleFrequency),\n            OthHomCount        = splitLine.ParseInteger(TsvIndices.OthHomCount),\n\n            HasFailedFilters = hasFailedFilters\n        };\n    }\n\n    private static VariantType SvTypeMapper(string svType)\n    {\n        // All possible values found in data (with counts):\n        //      BND: 52604\n        //      CN=0: 1108\n        //      CPX: 4778\n        //      CTX: 8\n        //      DEL: 169635\n        //      DUP: 49571\n        //      INS: 31443\n        //      INS:ME: 672\n        //      INS:ME:ALU: 60475\n        //      INS:ME:LINE1: 10018\n        //      INS:ME:SVA: 6417\n        //      INV: 748\n        //      Total: 387477\n        return svType switch\n        {\n            \"BND\"          => VariantType.translocation_breakend,\n            \"CN=0\"         => VariantType.deletion,\n            \"CPX\"          => VariantType.complex_structural_alteration,\n            \"CTX\"          => VariantType.translocation_breakend,\n            \"DEL\"          => VariantType.deletion,\n            \"DUP\"          => VariantType.duplication,\n            \"INS\"          => VariantType.insertion,\n            \"INS:ME\"       => VariantType.mobile_element_insertion,\n            \"INS:ME:ALU\"   => VariantType.mobile_element_insertion,\n            \"INS:ME:LINE1\" => VariantType.mobile_element_insertion,\n            \"INS:ME:SVA\"   => VariantType.mobile_element_insertion,\n            \"INV\"          => VariantType.inversion,\n            _              => throw new InvalidDataException(\"unknown svType\")\n        };\n    }\n}"
  },
  {
    "path": "SAUtils/gnomAD/GnomadSvMain.cs",
    "content": "using System.IO;\nusing CommandLine.Builders;\nusing CommandLine.NDesk.Options;\nusing Compression.Utilities;\nusing ErrorHandling;\nusing ErrorHandling.Exceptions;\nusing IO;\nusing SAUtils.InputFileParsers;\nusing VariantAnnotation.Interface.SA;\nusing VariantAnnotation.NSA;\nusing VariantAnnotation.Providers;\nusing VariantAnnotation.SA;\n\nnamespace SAUtils.gnomAD;\n\npublic static class GnomadSvMain\n{\n    private static string _inputFileName;\n    private static string _compressedReference;\n    private static string _outputDirectory;\n\n    public static ExitCodes Run(string command, string[] commandArgs)\n    {\n        var ops = new OptionSet\n        {\n            {\n                \"ref|r=\",\n                \"compressed reference sequence file\",\n                v => _compressedReference = v\n            },\n            {\n                \"in|i=\",\n                \"gnomADV2 BED or TSV file\",\n                v => _inputFileName = v\n            },\n            {\n                \"out|o=\",\n                \"output directory\",\n                v => _outputDirectory = v\n            }\n        };\n\n        var commandLineExample = $\"{command} [options]\";\n\n        ExitCodes exitCode = new ConsoleAppBuilder(commandArgs, ops)\n            .Parse()\n            .CheckInputFilenameExists(_compressedReference, \"compressed reference sequence file name\", \"--ref\")\n            .CheckInputFilenameExists(_inputFileName,       \"gnomADV2 BED or TSV file\",                \"--in\")\n            .CheckDirectoryExists(_outputDirectory, \"output directory\", \"--out\")\n            .SkipBanner()\n            .ShowHelpMenu(\"Creates a supplementary database from gnomAD v2 structural variant annotations\", commandLineExample)\n            .ShowErrors()\n            .Execute(ProgramExecution);\n\n        return exitCode;\n    }\n\n    private static ExitCodes ProgramExecution()\n    {\n        var               referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));\n        DataSourceVersion version           = DataSourceVersionReader.GetSourceVersion(_inputFileName + \".version\");\n\n        string             outFileName = $\"{version.Name}_{version.Version}\".Replace(' ', '_');\n        using StreamReader reader      = GZipUtilities.GetAppropriateStreamReader(_inputFileName);\n        using GnomadSvParser gnomadSvParser = _inputFileName.Substring(_inputFileName.Length - 6) switch\n        {\n            \"tsv.gz\" => new GnomadSvTsvParser(reader, referenceProvider.RefNameToChromosome),\n            \"bed.gz\" => new GnomadSvBedParser(reader, referenceProvider.RefNameToChromosome),\n            _        => throw new InvalidFileFormatException(\"Input file should end in '.tsv.gz' or '.bed.gz'\")\n        };\n\n        using FileStream nsiStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.IntervalFileSuffix));\n        using var nsiWriter = new NsiWriter(\n            nsiStream,\n            version,\n            referenceProvider.Assembly,\n            SaCommon.GnomadStructuralVariant,\n            ReportFor.StructuralVariants,\n            SaCommon.SchemaVersion\n        );\n        nsiWriter.Write(gnomadSvParser.GetItems());\n\n        return ExitCodes.Success;\n    }\n}"
  },
  {
    "path": "SAUtils/gnomAD/GnomadSvParser.cs",
    "content": "using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing Genome;\nusing OptimizedCore;\nusing SAUtils.DataStructures;\nusing SAUtils.ParseUtils;\n\nnamespace SAUtils.gnomAD;\n\npublic abstract class GnomadSvParser : IDisposable\n{\n    private const      char                             CommentChar = '#';\n    private readonly   StreamReader                     _reader;\n    protected readonly Dictionary<string, Chromosome> RefNameDict;\n\n    protected readonly char       Delimiter = '\\t';\n    protected          TsvIndices TsvIndices;\n\n    protected GnomadSvParser(\n        StreamReader reader,\n        Dictionary<string, Chromosome> refNameDict\n    )\n    {\n        _reader     = reader;\n        RefNameDict = refNameDict;\n    }\n\n    public IEnumerable<GnomadSvItem> GetItems()\n    {\n        string line;\n        while ((line = _reader.ReadLine()) != null)\n        {\n            // Skip empty lines and comment lines\n            if (string.IsNullOrWhiteSpace(line) || line.OptimizedStartsWith(CommentChar))\n                continue;\n\n            GnomadSvItem gnomadSvItem = ParseLine(line);\n            if (gnomadSvItem == null)\n                continue;\n\n            yield return gnomadSvItem;\n        }\n    }\n\n    protected abstract GnomadSvItem ParseLine(string inputLine);\n\n    public void Dispose()\n    {\n        _reader?.Dispose();\n    }\n}"
  },
  {
    "path": "SAUtils/gnomAD/GnomadSvTsvParser.cs",
    "content": "using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing Genome;\nusing OptimizedCore;\nusing SAUtils.DataStructures;\nusing SAUtils.ParseUtils;\nusing Variants;\n\nnamespace SAUtils.gnomAD;\n\npublic sealed class GnomadSvTsvParser : GnomadSvParser\n{\n    public GnomadSvTsvParser(\n        StreamReader reader,\n        Dictionary<string, Chromosome> refNameDict\n    ) : base(reader, refNameDict)\n    {\n        TsvIndices = new TsvIndices()\n        {\n            Chromosome = 7,\n            Start      = 10,\n            End        = 13,\n            VariantId  = 1,\n            SvType     = 2,\n\n            AllAlleleCount     = 33,\n            AllAlleleFrequency = 34,\n            AllAlleleNumber    = 35\n        };\n    }\n\n\n    protected override GnomadSvItem ParseLine(string inputLine)\n    {\n        var    splitLine      = new SplitLine(in inputLine, in Delimiter);\n        string chromosomeName = splitLine.GetString(TsvIndices.Chromosome);\n        if (!RefNameDict.ContainsKey(chromosomeName))\n            return null;\n\n        Chromosome chromosome = RefNameDict[chromosomeName];\n        int?       start      = splitLine.ParseInteger(TsvIndices.Start);\n        int?       end        = splitLine.ParseInteger(TsvIndices.End);\n        if (start == null || end == null)\n            throw new InvalidDataException($\"Invalid Data on Line {inputLine}\");\n\n        VariantType svType = SvTypeMapper(splitLine.GetString(TsvIndices.SvType));\n        \n        // Ignoring BND for now\n        if (svType == VariantType.translocation_breakend)\n            return null;\n\n        start += 1; // +1 for padding base\n        if (start > end)\n        {\n            (start, end) = (end, start);\n        }\n\n        // 'allele_count': 'AC=21,AFR_AC=10,AMR_AC=9,EAS_AC=0,EUR_AC=2,OTH_AC=0'\n        // 'allele_frequency': 'AF=0.038889,AFR_AF=0.044643,AMR_AF=0.03913,EAS_AF=0,EUR_AF=0.023256,OTH_AF=0'\n        // 'allele_number': 'AN=540,AFR_AN=224,AMR_AN=230,EAS_AN=0,EUR_AN=86,OTH_AN=0'\n        Dictionary<string, int?>    countDict     = ParseValues(splitLine.GetString(TsvIndices.AllAlleleCount),     \"AC\", SplitLine.ParseInteger);\n        Dictionary<string, double?> frequencyDict = ParseValues(splitLine.GetString(TsvIndices.AllAlleleFrequency), \"AF\", SplitLine.ParseDouble);\n        Dictionary<string, int?>    numberDict    = ParseValues(splitLine.GetString(TsvIndices.AllAlleleNumber),    \"AN\", SplitLine.ParseInteger);\n\n        return new GnomadSvItem(chromosome, inputLine)\n        {\n            Start     = (int) start,\n            End       = (int) end,\n            VariantId = splitLine.GetString(TsvIndices.VariantId),\n            SvType    = svType,\n\n            AllAlleleNumber    = numberDict[\"ALL\"],\n            AllAlleleCount     = countDict[\"ALL\"],\n            AllAlleleFrequency = frequencyDict[\"ALL\"],\n\n            AfrAlleleNumber    = numberDict[\"AFR\"],\n            AfrAlleleCount     = countDict[\"AFR\"],\n            AfrAlleleFrequency = frequencyDict[\"AFR\"],\n\n            AmrAlleleNumber    = numberDict[\"AMR\"],\n            AmrAlleleCount     = countDict[\"AMR\"],\n            AmrAlleleFrequency = frequencyDict[\"AMR\"],\n\n            EasAlleleNumber    = numberDict[\"EAS\"],\n            EasAlleleCount     = countDict[\"EAS\"],\n            EasAlleleFrequency = frequencyDict[\"EAS\"],\n\n            EurAlleleNumber    = numberDict[\"EUR\"],\n            EurAlleleCount     = countDict[\"EUR\"],\n            EurAlleleFrequency = frequencyDict[\"EUR\"],\n\n            OthAlleleNumber    = numberDict[\"OTH\"],\n            OthAlleleCount     = countDict[\"OTH\"],\n            OthAlleleFrequency = frequencyDict[\"OTH\"]\n        };\n    }\n\n    private static Dictionary<string, T> ParseValues<T>(string subString, string keyType, Func<string, T> parsingFunction)\n    {\n        // 'allele_count': 'AC=21,AFR_AC=10,AMR_AC=9,EAS_AC=0,EUR_AC=2,OTH_AC=0'\n        string[]              splitValues = subString.OptimizedSplit(',');\n        Dictionary<string, T> parsedDict  = new();\n\n        foreach (string splitValue in splitValues)\n        {\n            (string key, string value) = splitValue.OptimizedKeyValue();\n            if (!key.Equals(keyType))\n            {\n                string dictKey = key.Replace($\"_{keyType}\", \"\");\n                parsedDict[dictKey] = parsingFunction(value);\n                continue;\n            }\n\n            parsedDict[\"ALL\"] = parsingFunction(value);\n        }\n\n        return parsedDict;\n    }\n\n    private static VariantType SvTypeMapper(string svType)\n    {\n        // https://www.ncbi.nlm.nih.gov/dbvar/content/var_summary/#nstd166\n        // All possible values found in data (with counts):\n        //      alu insertion: 61351\n        //      copy number variation: 11383\n        //      deletion: 161218\n        //      duplication: 44560\n        //      insertion: 26038\n        //      inversion: 727\n        //      line1 insertion: 10017\n        //      mobile element insertion: 655\n        //      sequence alteration: 4733\n        //      sva insertion: 6547\n        //      Total: 327229\n        return svType switch\n        {\n            \"alu insertion\"            => VariantType.mobile_element_insertion,\n            \"copy number variation\"    => VariantType.copy_number_variation,\n            \"deletion\"                 => VariantType.deletion,\n            \"duplication\"              => VariantType.duplication,\n            \"insertion\"                => VariantType.insertion,\n            \"inversion\"                => VariantType.inversion,\n            \"line1 insertion\"          => VariantType.mobile_element_insertion,\n            \"mobile element insertion\" => VariantType.mobile_element_insertion,\n            \"sequence alteration\"      => VariantType.structural_alteration,\n            \"sva insertion\"            => VariantType.mobile_element_insertion,\n            _                          => throw new InvalidDataException(\"unknown svType\")\n        };\n    }\n}"
  },
  {
    "path": "SAUtils/gnomAD/GnomadUtilities.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing SAUtils.DataStructures;\nusing SAUtils.ParseUtils;\nusing VariantAnnotation.Interface.Providers;\nusing Variants;\n\nnamespace SAUtils.gnomAD\n{\n    public static class GnomadUtilities\n    {\n        public static Dictionary<(string refAllele, string altAllele), GnomadItem> GetMergedItems(\n            Dictionary<(string refAllele, string altAllele), GnomadItem> genomeItems,\n            Dictionary<(string refAllele, string altAllele), GnomadItem> exomeItems)\n        {\n            if (genomeItems == null) return exomeItems;\n            if (exomeItems  == null) return genomeItems;\n\n            var mergedItems = new Dictionary<(string refAllele, string altAllele), GnomadItem>();\n            // take care of the genomeItems and merge if needed\n            foreach (var (key, value) in genomeItems)\n            {\n                mergedItems.Add(key, exomeItems.TryGetValue(key, out var exomeValue) ? MergeItems(value, exomeValue) : value);\n\n                exomeItems.Remove(key);\n            }\n\n            foreach (var (key, value) in exomeItems)\n            {\n                mergedItems.Add(key, value);\n            }\n\n            return mergedItems;\n        }\n\n        public static GnomadItem GetNormalizedItem(GnomadItem item, ISequenceProvider sequenceProvider)\n        {\n            var (alignedPos, alignedRef, alignedAlt) =\n                VariantUtils.TrimAndLeftAlign(item.Position, item.RefAllele, item.AltAllele, sequenceProvider.Sequence);\n\n            if (item.Position == alignedPos && item.RefAllele == alignedRef && item.AltAllele == alignedAlt)\n                return item;\n\n            return new GnomadItem(\n                    item.Chromosome,\n                    alignedPos,\n                    alignedRef,\n                    alignedAlt,\n                    item.Depth,\n                    item.AllAlleleNumber,\n                    item.AfrAlleleNumber,\n                    item.AmrAlleleNumber,\n                    item.EasAlleleNumber,\n                    item.FinAlleleNumber,\n                    item.NfeAlleleNumber,\n                    item.OthAlleleNumber,\n                    item.AsjAlleleNumber,\n                    item.SasAlleleNumber,\n                    item.MaleAlleleNumber,\n                    item.FemaleAlleleNumber,\n                    item.AllAlleleCount,\n                    item.AfrAlleleCount,\n                    item.AmrAlleleCount,\n                    item.EasAlleleCount,\n                    item.FinAlleleCount,\n                    item.NfeAlleleCount,\n                    item.OthAlleleCount,\n                    item.AsjAlleleCount,\n                    item.SasAlleleCount,\n                    item.MaleAlleleCount,\n                    item.FemaleAlleleCount,\n                    item.AllHomCount,\n                    item.AfrHomCount,\n                    item.AmrHomCount,\n                    item.EasHomCount,\n                    item.FinHomCount,\n                    item.NfeHomCount,\n                    item.OthHomCount,\n                    item.AsjHomCount,\n                    item.SasHomCount,\n                    item.MaleHomCount,\n                    item.FemaleHomCount,\n                    //controls\n                    item.ControlsAllAlleleNumber,\n                    item.ControlsAllAlleleCount,\n                    item.HasFailedFilters,\n                    item.DataType,\n                    item.InputLine)\n                ;\n        }\n\n        private static GnomadItem MergeItems(GnomadItem item1, GnomadItem item2)\n        {\n            if (item1.Chromosome.Index != item2.Chromosome.Index\n                || item1.Position      != item2.Position\n                || item1.RefAllele     != item2.RefAllele\n                || item1.AltAllele     != item2.AltAllele)\n                throw new DataMisalignedException(\n                    $\"Trying to merge unequal variants at {item1.Chromosome.UcscName}:{item1.Position} and {item2.Chromosome.UcscName}:{item2.Position}\");\n\n            if (item1.DataType == item2.DataType)\n                throw new DataMisalignedException($\"Trying to merge different data types at {item1.Chromosome.UcscName}:{item1.Position}\");\n\n            return new GnomadItem(item1.Chromosome,\n                item1.Position,\n                item1.RefAllele,\n                item1.AltAllele,\n                SaParseUtilities.Add(item1.Depth,              item2.Depth),\n                SaParseUtilities.Add(item1.AllAlleleNumber,    item2.AllAlleleNumber),\n                SaParseUtilities.Add(item1.AfrAlleleNumber,    item2.AfrAlleleNumber),\n                SaParseUtilities.Add(item1.AmrAlleleNumber,    item2.AmrAlleleNumber),\n                SaParseUtilities.Add(item1.EasAlleleNumber,    item2.EasAlleleNumber),\n                SaParseUtilities.Add(item1.FinAlleleNumber,    item2.FinAlleleNumber),\n                SaParseUtilities.Add(item1.NfeAlleleNumber,    item2.NfeAlleleNumber),\n                SaParseUtilities.Add(item1.OthAlleleNumber,    item2.OthAlleleNumber),\n                SaParseUtilities.Add(item1.AsjAlleleNumber,    item2.AsjAlleleNumber),\n                SaParseUtilities.Add(item1.SasAlleleNumber,    item2.SasAlleleNumber),\n                SaParseUtilities.Add(item1.MaleAlleleNumber,   item2.MaleAlleleNumber),\n                SaParseUtilities.Add(item1.FemaleAlleleNumber, item2.FemaleAlleleNumber),\n                SaParseUtilities.Add(item1.AllAlleleCount,     item2.AllAlleleCount),\n                SaParseUtilities.Add(item1.AfrAlleleCount,     item2.AfrAlleleCount),\n                SaParseUtilities.Add(item1.AmrAlleleCount,     item2.AmrAlleleCount),\n                SaParseUtilities.Add(item1.EasAlleleCount,     item2.EasAlleleCount),\n                SaParseUtilities.Add(item1.FinAlleleCount,     item2.FinAlleleCount),\n                SaParseUtilities.Add(item1.NfeAlleleCount,     item2.NfeAlleleCount),\n                SaParseUtilities.Add(item1.OthAlleleCount,     item2.OthAlleleCount),\n                SaParseUtilities.Add(item1.AsjAlleleCount,     item2.AsjAlleleCount),\n                SaParseUtilities.Add(item1.SasAlleleCount,     item2.SasAlleleCount),\n                SaParseUtilities.Add(item1.MaleAlleleCount,    item2.MaleAlleleCount),\n                SaParseUtilities.Add(item1.FemaleAlleleCount,  item2.FemaleAlleleCount),\n                SaParseUtilities.Add(item1.AllHomCount,        item2.AllHomCount),\n                SaParseUtilities.Add(item1.AfrHomCount,        item2.AfrHomCount),\n                SaParseUtilities.Add(item1.AmrHomCount,        item2.AmrHomCount),\n                SaParseUtilities.Add(item1.EasHomCount,        item2.EasHomCount),\n                SaParseUtilities.Add(item1.FinHomCount,        item2.FinHomCount),\n                SaParseUtilities.Add(item1.NfeHomCount,        item2.NfeHomCount),\n                SaParseUtilities.Add(item1.OthHomCount,        item2.OthHomCount),\n                SaParseUtilities.Add(item1.AsjHomCount,        item2.AsjHomCount),\n                SaParseUtilities.Add(item1.SasHomCount,        item2.SasHomCount),\n                SaParseUtilities.Add(item1.MaleHomCount,       item2.MaleHomCount),\n                SaParseUtilities.Add(item1.FemaleHomCount,     item2.FemaleHomCount),\n                //control\n                SaParseUtilities.Add(item1.ControlsAllAlleleNumber, item2.ControlsAllAlleleNumber),\n                SaParseUtilities.Add(item1.ControlsAllAlleleCount,  item2.ControlsAllAlleleCount),\n                item1.HasFailedFilters || item2.HasFailedFilters,\n                item1.DataType,\n                item1.InputLine + '\\n' + item2.InputLine\n            );\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/gnomAD/LcrInterval.cs",
    "content": "using Genome;\nusing VariantAnnotation.Interface.SA;\n\nnamespace SAUtils.gnomAD\n{\n    public class LcrInterval:ISuppIntervalItem\n    {\n        public int Start { get; }\n        public int End { get; }\n        public Chromosome Chromosome { get; }\n        public string GetJsonString() => string.Empty;\n\n        public LcrInterval(Chromosome chromosome, int start, int end)\n        {\n            Chromosome = chromosome;\n            Start = start;\n            End = end;\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/gnomAD/LcrRegionParser.cs",
    "content": "using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing Genome;\nusing VariantAnnotation.Interface.Providers;\nusing VariantAnnotation.Interface.SA;\n\nnamespace SAUtils.gnomAD\n{\n    public sealed class LcrRegionParser:IDisposable\n    {\n        private readonly StreamReader _reader;\n        private readonly ISequenceProvider _refProvider;\n\n        private int _nRegionSize;\n\n        public LcrRegionParser(StreamReader reader, ISequenceProvider refProvider)\n        {\n            _reader = reader;\n            _refProvider = refProvider;\n        }\n\n        public void Dispose() => _reader?.Dispose();\n        public IEnumerable<ISuppIntervalItem> GetItems()\n        {\n            using (var reader = _reader)\n            {\n                string line;\n                \n                while ((line = reader.ReadLine()) != null)\n                {\n                    if (line == string.Empty || line.StartsWith(\"#\")) continue;\n\n                    ISuppIntervalItem region;\n                    try\n                    {\n                        region = GetLcrRegion(line);\n                        if(region ==null) continue;\n\n                    }\n                    catch (Exception e)\n                    {\n                        Console.WriteLine(e);\n                        e.Data[\"Line\"] = line;\n                        throw;\n                    }\n                    yield return region;\n                }\n            }\n\n            Console.WriteLine($\"Total size of N-regions:{_nRegionSize}\");\n        }\n\n        private ISuppIntervalItem GetLcrRegion(string line)\n        {\n            (string chromName, int start, int end) = ParsePosition(line);\n            if (chromName==null) return null; //unknown chromosome\n            \n            var chromosome = _refProvider.RefNameToChromosome[chromName];\n            if (chromosome.IsEmpty()) return null;\n\n            if (_refProvider.Assembly == GenomeAssembly.GRCh38) start++;\n            \n            return IsNRegion(chromosome, start, end) ? null : new LcrInterval(chromosome, start, end);\n        }\n\n        private bool IsNRegion(Chromosome chrom, int start, int end)\n        {\n            if (_refProvider == null) return false;\n            \n            _refProvider.LoadChromosome(chrom);\n            var sequence = _refProvider.Sequence.Substring(start - 1, end - start + 1);\n\n            if (sequence == null) return false;\n            \n            foreach (char c in sequence)\n            {\n                if (c != 'N' && c != 'n') return false;\n            }\n\n            _nRegionSize+=end-start+1;\n            return true;\n        }\n\n        private (string ChromName, int Start, int End) ParsePosition(string line)\n        {\n            var splits = line.Split(':', '-', '\\t');\n            var chrom = splits[0];\n            if (!_refProvider.RefNameToChromosome.ContainsKey(chrom)) return (null, 0, 0);\n            \n            var start = int.Parse(splits[1]);\n            var end = int.Parse(splits[2]);\n\n            return (chrom, start, end);\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/gnomAD/LcrRegionsMain.cs",
    "content": "using System.IO;\nusing CommandLine.Builders;\nusing CommandLine.NDesk.Options;\nusing Compression.Utilities;\nusing ErrorHandling;\nusing IO;\nusing SAUtils.InputFileParsers;\nusing VariantAnnotation.Interface.SA;\nusing VariantAnnotation.NSA;\nusing VariantAnnotation.Providers;\nusing VariantAnnotation.SA;\n\nnamespace SAUtils.gnomAD\n{\n    public class LcrRegionsMain\n    {\n        private static string _referenceSequencePath;\n        private static string _inputFile;\n        private static string _outputDirectory;\n\n        public static ExitCodes Run(string command, string[] commandArgs)\n        {\n            var ops = new OptionSet\n            {\n                {\n                     \"ref|r=\",\n                     \"compressed reference sequence file\",\n                     v => _referenceSequencePath = v\n                 },\n                {\n                    \"in|i=\",\n                    \"input file path (along with a .version file)\",\n                    v => _inputFile = v\n                },\n                {\n                    \"out|o=\",\n                    \"output directory for NSI file\",\n                    v => _outputDirectory = v\n                }\n            };\n\n            var commandLineExample = $\"{command} [options]\";\n\n            var exitCode = new ConsoleAppBuilder(commandArgs, ops)\n                .Parse()\n                .CheckInputFilenameExists(_referenceSequencePath, \"compressed reference sequence file name\", \"--ref\")\n                .CheckInputFilenameExists(_inputFile, \"input file with LCR regions\", \"--ref\")\n                .CheckDirectoryExists(_outputDirectory, \"output Supplementary directory\", \"--out\")\n                .SkipBanner()\n                .ShowHelpMenu(\"Reads provided supplementary data files and populates tsv files\", commandLineExample)\n                .ShowErrors()\n                .Execute(ProgramExecution);\n\n            return exitCode;\n        }\n\n        private static ExitCodes ProgramExecution()\n        {\n            var refProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_referenceSequencePath));\n            var version     = DataSourceVersionReader.GetSourceVersion(_inputFile + \".version\");\n            var outFileName = $\"{version.Name}_{version.Version}\";\n            \n            using (var parser = new LcrRegionParser(GZipUtilities.GetAppropriateStreamReader(_inputFile), refProvider))\n            using (var stream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.LcrFileSuffix)))\n            using (var writer = new NsiWriter(stream, version, refProvider.Assembly, SaCommon.LowComplexityRegionTag, ReportFor.AllVariants, SaCommon.NsiSchemaVersion))\n            {\n                writer.Write(parser.GetItems());\n            }\n\n            return ExitCodes.Success;\n        }\n    }\n}"
  },
  {
    "path": "SAUtils/makeDgvDb/Main.cs",
    "content": "﻿using System.IO;\nusing CommandLine.Builders;\nusing CommandLine.NDesk.Options;\nusing Compression.Utilities;\nusing ErrorHandling;\nusing IO;\nusing SAUtils.InputFileParsers;\nusing SAUtils.InputFileParsers.DGV;\nusing VariantAnnotation.Interface.SA;\nusing VariantAnnotation.NSA;\nusing VariantAnnotation.Providers;\nusing VariantAnnotation.SA;\n\nnamespace SAUtils.makeDgvDb\n{\n    public static class Main\n    {\n        private static string _inputFileName;\n        private static string _compressedReference;\n        private static string _outputDirectory;\n        public static ExitCodes Run(string command, string[] commandArgs)\n        {\n            var ops = new OptionSet\n            {\n                {\n                    \"ref|r=\",\n                    \"compressed reference sequence file\",\n                    v => _compressedReference = v\n                },\n                {\n                    \"in|i=\",\n                    \"DGV VCFfile\",\n                    v => _inputFileName = v\n                },\n                {\n                    \"out|o=\",\n                    \"output directory\",\n                    v => _outputDirectory = v\n                }\n            };\n\n            string commandLineExample = $\"{command} [options]\";\n\n            var exitCode = new ConsoleAppBuilder(commandArgs, ops)\n                .Parse()\n                .CheckInputFilenameExists(_compressedReference, \"compressed reference sequence file name\", \"--ref\")\n                .HasRequiredParameter(_inputFileName, \"DGV VCFfile\", \"--in\")\n                .CheckInputFilenameExists(_inputFileName, \"DGV VCFfile\", \"--in\")\n                .HasRequiredParameter(_outputDirectory, \"output directory\", \"--out\")\n                .CheckDirectoryExists(_outputDirectory, \"output directory\", \"--out\")\n                .SkipBanner()\n                .ShowHelpMenu(\"Creates a supplementary database with ClinVar annotations\", commandLineExample)\n                .ShowErrors()\n                .Execute(ProgramExecution);\n\n            return exitCode;\n        }\n\n        private static ExitCodes ProgramExecution()\n        {\n            var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));\n            var version = DataSourceVersionReader.GetSourceVersion(_inputFileName + \".version\");\n            string outFileName = $\"{version.Name}_{version.Version}\";\n\n            using(var dgvReader = new DgvReader(GZipUtilities.GetAppropriateStreamReader(_inputFileName), referenceProvider.RefNameToChromosome))\n            using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.IntervalFileSuffix)))\n            using(var nsiWriter = new NsiWriter(nsaStream, version, referenceProvider.Assembly, SaCommon.DgvTag, ReportFor.StructuralVariants, SaCommon.SchemaVersion))\n            {\n                nsiWriter.Write(dgvReader.GetItems());\n            }\n\n            return ExitCodes.Success;\n        }\n    }\n}"
  },
  {
    "path": "Sandbox/AminoAcidAligner/AlignmentBuilder.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing System.Linq;\nusing System.Text;\n\nnamespace AminoAcidAligner\n{\n    public class AlignmentBuilder\n    {\n        public readonly string TranscriptId;\n        public string Chromosome;\n        private readonly Dictionary<string, StringBuilder> _speciesAlignment;\n\n        public AlignmentBuilder(string id)\n        {\n            TranscriptId = id;\n            _speciesAlignment = new Dictionary<string, StringBuilder>(100);//since we are doing 100 way alignment\n        }\n\n        public void Add(string transcriptId, string species, string sequence)\n        {\n            if (TranscriptId != transcriptId) return;\n            if (_speciesAlignment.TryGetValue(species, out var sb))\n            {\n                sb.Append(sequence);\n            }\n            else\n            {\n                _speciesAlignment[species] = new StringBuilder();\n                _speciesAlignment[species].Append(sequence);\n            }\n        }\n\n        public override string ToString()\n        {\n            if(!CheckAlignments()) throw new DataMisalignedException($\"Alignment issues found for {TranscriptId}\");\n\n            var sb = new StringBuilder();\n            foreach (var (species, sequence) in _speciesAlignment)\n            {\n                sb.Append($\"{species}\\t{sequence}\\n\");\n            }\n\n            return sb.ToString();\n        }\n\n        private bool CheckAlignments()\n        {\n            var length = -1;\n            //checking if all the alignments have same length\n            foreach (var sequence in _speciesAlignment.Values)\n            {\n                if (length == -1) length = sequence.Length;\n                if (length != sequence.Length) return false;\n            }\n\n            //check if there are any '-' es in Human\n            StringBuilder humanSb;\n            if (!_speciesAlignment.TryGetValue(\"hg38\", out humanSb) && !_speciesAlignment.TryGetValue(\"hg19\", out humanSb)) return true;\n            var hg38Sequence = humanSb.ToString();\n            if (hg38Sequence.Contains('-'))\n                Console.WriteLine($\"Human sequence contains - in {TranscriptId}\");\n\n            return true;\n        }\n\n        public string GetScoresLine()\n        {\n            var sb = new StringBuilder();\n            \n            StringBuilder humanSb;\n            string humanSequence=null;\n            if (_speciesAlignment.TryGetValue(\"hg38\", out humanSb) ||\n                _speciesAlignment.TryGetValue(\"hg19\", out humanSb))\n            {\n                humanSequence = humanSb.ToString();\n            }\n\n            if(humanSequence == null) throw new InvalidDataException($\"No human sequence available for {TranscriptId}\");\n\n            sb.Append($\"{TranscriptId}\\t{Chromosome}\\t{humanSequence}\");\n\n            var residueCount = new int[humanSequence.Length];\n            Array.Fill(residueCount, 0);\n\n            foreach (var alignment in _speciesAlignment.Values)\n            {\n                for (int i = 0; i < humanSequence.Length; i++)\n                {\n                    if (humanSequence[i] == alignment[i]) residueCount[i]++;\n                }\n            }\n\n            sb.Append('\\t');\n            sb.Append(string.Join(',', residueCount.Select(x => 100 * x / _speciesAlignment.Count)));\n            \n            return sb.ToString();\n        }\n    }\n}"
  },
  {
    "path": "Sandbox/AminoAcidAligner/AminoAcidAligner.csproj",
    "content": "﻿<Project Sdk=\"Microsoft.NET.Sdk\">\n    <PropertyGroup>\n        <OutputType>Exe</OutputType>\n        <TargetFramework>net6.0</TargetFramework>\n    </PropertyGroup>\n    <ItemGroup>\n      <ProjectReference Include=\"..\\..\\Compression\\Compression.csproj\" />\n      <ProjectReference Include=\"..\\..\\ErrorHandling\\ErrorHandling.csproj\" />\n      <ProjectReference Include=\"..\\..\\IO\\IO.csproj\" />\n    </ItemGroup>\n</Project>\n"
  },
  {
    "path": "Sandbox/AminoAcidAligner/ExonToTranscript.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing Compression.Utilities;\n\nnamespace AminoAcidAligner\n{\n    public static class ExonToTranscript\n    {\n        public static void Main(string[] args)\n        {\n            Console.WriteLine(\"Aggregate exon alignments into transcript alignments\");\n\n            if (args.Length != 3)\n            {\n                Console.WriteLine(\"usage: dotnet AminoAcidAligner.dll [input exon alignment FASTA file] [output transcript alignment file] [output AA conservation scores file]\");\n                return;\n            }\n\n            var exonAlignmentFile = args[0];\n            var transcriptAlignmentFile = args[1];\n            var conservationScoresFile = args[2];\n\n            using (var reader = GZipUtilities.GetAppropriateStreamReader(exonAlignmentFile))\n            using (var writer = GZipUtilities.GetStreamWriter(transcriptAlignmentFile))\n            using (var scoresWriter = GZipUtilities.GetStreamWriter(conservationScoresFile))\n            {\n                var count = CreateTranscriptAlignments(reader, writer, scoresWriter);\n                Console.WriteLine($\"Created {count} transcript alignments\");\n            }\n        }\n\n        \n        /// <summary>\n        /// merges multiple exon (amino acid) alignments to create transcript alignment\n        /// </summary>\n        /// <param name=\"reader\">Stream reader for the input FASTA file with exon alignment</param>\n        /// <param name=\"writer\">Stream writer for the output file with transcript alignment</param>\n        /// <param name=\"scoresWriter\">Stream writer for the output file with conservation scores(percentage) </param>\n        /// <returns>number if transcripts alignments created</returns>\n        /// <exception cref=\"NotImplementedException\"></exception>\n        private static int CreateTranscriptAlignments(StreamReader reader, StreamWriter writer, StreamWriter scoresWriter)\n        {\n            string name = null;\n            string sequence = null;\n            var count = 0;\n            AlignmentBuilder alignmentBuilder = null;\n            scoresWriter.WriteLine(\"#Ensembl\\tChromosome\\tProteinSequence\\tPercent Conservation at each AA residue\");\n            while (((name, sequence)= GetNextSequence(reader)) != (null, null))\n            {\n                (string transcriptId, string species, string chromosome) = Utilities.ParseSequenceName(name);\n                \n                if(alignmentBuilder == null) alignmentBuilder = new AlignmentBuilder(transcriptId);\n                \n                if (alignmentBuilder.TranscriptId != transcriptId)\n                {\n                    writer.WriteLine(alignmentBuilder.Chromosome);\n                    writer.WriteLine(alignmentBuilder.TranscriptId);\n                    writer.WriteLine(alignmentBuilder.ToString());\n                    scoresWriter.WriteLine(alignmentBuilder.GetScoresLine());\n                    alignmentBuilder = new AlignmentBuilder(transcriptId);\n                    count++;\n                }\n                \n                alignmentBuilder.Add(transcriptId, species, sequence);\n                if (species == \"hg38\" || species == \"hg19\") alignmentBuilder.Chromosome = chromosome;\n\n            }\n\n            return count;\n        }\n\n        \n        private static (string name, string sequence) GetNextSequence(StreamReader reader)\n        {\n            var name = reader.ReadLine();\n            while (name==\"\")\n            {\n                name = reader.ReadLine();\n            }\n            if (name == null) return (null, null);\n            if(!name.StartsWith('>')) throw new DataMisalignedException($\"FASTQ entry does not start with >. Observed name: {name}\");\n            var sequence = reader.ReadLine();\n            if (sequence == null) throw new DataMisalignedException($\"No sequence found for {name}\");\n\n            return (name, sequence);\n        }\n    }\n}"
  },
  {
    "path": "Sandbox/AminoAcidAligner/Utilities.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.IO;\nusing System.Linq;\nusing Compression.Utilities;\n\nnamespace AminoAcidAligner\n{\n    public static class Utilities\n    {\n        public static (string TranscriptId, string Species, string Chromosome) ParseSequenceName(string name)\n        {\n            //>ENST00000641515.2_hg38_1_2 3 0 0 chr1:65565-65573+\n            var terms = name.Split('_', ' ', '\\t');\n            var transcriptId = terms[0].TrimStart('>'); \n            //remove versions for Ensembl transcripts\n            if (transcriptId.StartsWith(\"ENST\")) transcriptId = transcriptId.Split('.')[0];\n\n            var species = terms[1];\n            string chrom = null;\n            chrom = terms.Length > 7 && string.IsNullOrEmpty(terms[7])? null: terms[7].Split(':')[0];\n            \n            return (transcriptId, species, chrom);\n        }\n\n        /// <summary>\n        /// using the CCDS file to find equivalence between Ensembl \n        /// </summary>\n        /// <param name=\"fileName\">input file name</param>\n        /// <returns>ensembl to RefSeq transcript mapping</returns>\n        //#ccds   original_member current_member  source  nucleotide_ID   protein_ID      status_in_CCDS  sequence_status\n        // CCDS2.2 1       0       NCBI    NM_152486.2     NP_689699.2     Updated 0\n        // CCDS2.2 0       1       NCBI    NM_152486.3     NP_689699.2     Accepted        1\n        // CCDS2.2 1       1       EBI,WTSI        ENST00000342066.7       ENSP00000342313.3       Accepted \n        public static List<HashSet<string>> GroupTranscripts(string fileName)\n        {\n            var ccdsToTranscriptIds = new Dictionary<string, HashSet<string>>();\n            \n            const int ccdsIndex = 0;\n            const int transcriptIndex = 4;\n            using (var reader = GZipUtilities.GetAppropriateStreamReader(fileName))\n            {\n                string line;\n                while ((line = reader.ReadLine()) != null)\n                {\n                    if(line.StartsWith('#')) continue;\n                    var terms = line.Split('\\t');\n                    var ccds = terms[ccdsIndex];\n                    var transcriptId = terms[transcriptIndex];\n                    //remove versions for Ensembl transcripts\n                    if (transcriptId.StartsWith(\"ENST\")) transcriptId = transcriptId.Split('.')[0];\n\n                    if (ccdsToTranscriptIds.TryGetValue(ccds, out var transcriptIds))\n                    {\n                        transcriptIds.Add(transcriptId);\n                    }\n                    else ccdsToTranscriptIds.Add(ccds, new HashSet<string>(){transcriptId});\n                    \n                }\n            }\n\n            return ccdsToTranscriptIds.Values.ToList();\n        }\n\n        public static Dictionary<string, HashSet<string>> GetEquivalentIds(List<HashSet<string>> transcriptGroups)\n        {\n            var idToGroup = new Dictionary<string, HashSet<string>>();\n            foreach (var transcriptGroup in transcriptGroups)\n            {\n                foreach (var transcript in transcriptGroup)\n                {\n                    idToGroup.Add(transcript, transcriptGroup);\n                }\n            }\n\n            return idToGroup;\n        }\n\n    }\n}"
  },
  {
    "path": "Sandbox/GenerateMustGenotypeVcf/ConfigurationSettings.cs",
    "content": "﻿namespace GenerateMustGenotypeVcf\n{\n    public static class ConfigurationSettings\n    {\n        #region members\n\n        // filenames\n        public static string OneKGenomeVcf;\n\t    public static string ClinVarVcf;\n\t    public static string CosmicVcf;\n        public static string CompressedReferencePath;\n\n        public static bool IsHg19;\n\n        #endregion\n    }\n}\n"
  },
  {
    "path": "Sandbox/GenerateMustGenotypeVcf/GenerateMustGenotypeVcf.csproj",
    "content": "﻿<Project Sdk=\"Microsoft.NET.Sdk\">\n  <PropertyGroup>\n    <OutputType>Exe</OutputType>\n    <TargetFramework>net6.0</TargetFramework>\n    <OutputPath>..\\bin\\$(Configuration)</OutputPath>\n    \n  </PropertyGroup>\n  <ItemGroup>\n    <ProjectReference Include=\"..\\..\\CommandLine\\CommandLine.csproj\" />\n    <ProjectReference Include=\"..\\..\\VariantAnnotation.Interface\\VariantAnnotation.Interface.csproj\" />\n    <ProjectReference Include=\"..\\..\\VariantAnnotation\\VariantAnnotation.csproj\" />\n  </ItemGroup>\n  <Import Project=\"..\\..\\VariantAnnotation\\CommonAssemblyInfo.props\" />\n</Project>"
  },
  {
    "path": "Sandbox/GenerateMustGenotypeVcf/GenerateMustGenotypeVcfsMain.cs",
    "content": "﻿using CommandLine.Handlers;\nusing CommandLine.NDesk.Options;\nusing CommandLine.VersionProvider;\nusing VariantAnnotation.DataStructures;\n\nnamespace GenerateMustGenotypeVcf\n{\n    sealed class GenerateMustGenotypeVcfsMain : AbstractCommandLineHandler\n\t{\n\t\tpublic static int Main(string[] args)\n\t\t{\n\t\t\tvar ops = new OptionSet\n\t\t\t{\n\t\t\t\t{\n\t\t\t\t\t\"onek=\",\n\t\t\t\t\t\"input 1000Genomes vcf file\",\n\t\t\t\t\tv => ConfigurationSettings.OneKGenomeVcf= v\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"cvr=\",\n\t\t\t\t\t\"input clinvar vcf file\",\n\t\t\t\t\tv => ConfigurationSettings.ClinVarVcf= v\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"cos=\",\n\t\t\t\t\t\"input cosmic vcf file\",\n\t\t\t\t\tv => ConfigurationSettings.CosmicVcf= v\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"ref=\",\n\t\t\t\t\t\"compressed reference sequence\",\n\t\t\t\t\tv => ConfigurationSettings.CompressedReferencePath= v\r\n                },\n\t\t\t    {\n\t\t\t        \"hg19\",\n                    \"need file for hg19\",\n                    v=>ConfigurationSettings.IsHg19 = v !=null\n\t\t\t    }\n\n\t\t\t};\n\n\t\t\tvar commandLineExample = \"--onek <input 1000 genomes vcf file> --cos <cosmic vcf file> --cvr <clinvar vcf file> --out <Output file name> --ref <GRCh37/GRCh38>\";\n\t\t\tvar generateMustGenotype = new GenerateMustGenotypeVcfsMain(\"Generates a must genotype vcf containing all ref minor positions in 1000 Genomes\",ops, commandLineExample, Constants.Authors);\n\t\t\tgenerateMustGenotype.Execute(args);\n\n\t\t\treturn generateMustGenotype.ExitCode;\n\t\t}\n\n\t\tpublic GenerateMustGenotypeVcfsMain(string programDescription, OptionSet ops, string commandLineExample, string programAuthors, IVersionProvider versionProvider = null) : base(programDescription, ops, commandLineExample, programAuthors, versionProvider)\n\t\t{\n\t\t}\n\n\t\tprotected override void ValidateCommandLine()\n\t\t{\n\t\t\tCheckInputFilenameExists(ConfigurationSettings.OneKGenomeVcf, \"input 1000 genomes vcf\", \"--onek\", false);\n\t\t\tCheckInputFilenameExists(ConfigurationSettings.ClinVarVcf, \"input clinvar vcf\", \"--cvr\",false);\n\t\t\tCheckInputFilenameExists(ConfigurationSettings.CosmicVcf, \"input cosmic vcf\", \"--cos\", false);\n\t\t\tCheckInputFilenameExists(ConfigurationSettings.CompressedReferencePath, \"compressed reference sequence\", \"--ref\");\n\t\t}\n\n\t\tprotected override void ProgramExecution()\n\t\t{\n\t\t\tusing (var refMinorExtractor = new MustGenotypeExtractor(ConfigurationSettings.CompressedReferencePath,ConfigurationSettings.OneKGenomeVcf,\n\t\t\t\tConfigurationSettings.ClinVarVcf,\n\t\t\t\tConfigurationSettings.CosmicVcf,ConfigurationSettings.IsHg19\n\t\t\t\t))\n\t\t\t{\n\t\t\t\trefMinorExtractor.ExtractEntries();\n\t\t\t}\n\t\t\t\n\t\t}\n\t}\n}\n"
  },
  {
    "path": "Sandbox/GenerateMustGenotypeVcf/MustGenotypeExtractor.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing System.Linq;\nusing VariantAnnotation.Algorithms;\nusing VariantAnnotation.DataStructures;\nusing VariantAnnotation.DataStructures.CompressedSequence;\nusing VariantAnnotation.FileHandling;\nusing VariantAnnotation.FileHandling.Compression;\nusing VariantAnnotation.FileHandling.VCF;\nusing VariantAnnotation.Interface;\nusing VariantAnnotation.Utilities;\n\nnamespace GenerateMustGenotypeVcf\n{\n\tpublic sealed class MustGenotypeExtractor:IDisposable\n\t{\n\t\tprivate readonly StreamReader _oneKGenomeReader;\n\t\tprivate readonly StreamReader _clinvarReader;\n\t\tprivate readonly StreamReader _cosmicReader;\n\t\tprivate readonly GenomeAssembly _assembly;\n\n\t    private readonly DataFileManager _dataFileManager;\n\t    private readonly ICompressedSequence _compressedSequence;\n\n\t\tprivate int _refMinorCount;\n\t\tprivate int _clinvarCount;\n\t\tprivate int _cosmicCount;\n\n\t\tprivate const int CosmicMinCount = 5;\n\t\tprivate const double RefMinorFreq = 0.95;\n\t\tprivate const int SmallVariantMaxLength = 50;\n\t\tprivate const string RefMinorFileName = \"RefMinorAllelev5.preprocess.vcf.gz\";\n\t\tprivate const string IsisClinicalIndelFileName = \"IsasClinicalIndelsv4.preprocess.vcf.gz\";\n\t\tprivate const string OncogenicFileName = \"OncogenicSitesv3.preprocess.vcf.gz\";\n\n\t\tprivate readonly List<string> _grch37Contigs = new List<string>()\n\t\t{\n\t\t\t\"##contig=<ID=1,assembly=b37,length=249250621>\",\n\t\t\t\"##contig=<ID=2,assembly=b37,length=243199373>\",\n\t\t\t\"##contig=<ID=3,assembly=b37,length=198022430>\",\n\t\t\t\"##contig=<ID=4,assembly=b37,length=191154276>\",\n\t\t\t\"##contig=<ID=5,assembly=b37,length=180915260>\",\n\t\t\t\"##contig=<ID=6,assembly=b37,length=171115067>\",\n\t\t\t\"##contig=<ID=7,assembly=b37,length=159138663>\",\n\t\t\t\"##contig=<ID=8,assembly=b37,length=146364022>\",\n\t\t\t\"##contig=<ID=9,assembly=b37,length=141213431>\",\n\t\t\t\"##contig=<ID=10,assembly=b37,length=135534747>\",\n\t\t\t\"##contig=<ID=11,assembly=b37,length=135006516>\",\n\t\t\t\"##contig=<ID=12,assembly=b37,length=133851895>\",\n\t\t\t\"##contig=<ID=13,assembly=b37,length=115169878>\",\n\t\t\t\"##contig=<ID=14,assembly=b37,length=107349540>\",\n\t\t\t\"##contig=<ID=15,assembly=b37,length=102531392>\",\n\t\t\t\"##contig=<ID=16,assembly=b37,length=90354753>\",\n\t\t\t\"##contig=<ID=17,assembly=b37,length=81195210>\",\n\t\t\t\"##contig=<ID=18,assembly=b37,length=78077248>\",\n\t\t\t\"##contig=<ID=19,assembly=b37,length=59128983>\",\n\t\t\t\"##contig=<ID=20,assembly=b37,length=63025520>\",\n\t\t\t\"##contig=<ID=21,assembly=b37,length=48129895>\",\n\t\t\t\"##contig=<ID=22,assembly=b37,length=51304566>\",\n\t\t\t\"##contig=<ID=MT,assembly=b37,length=16569>\",\n\t\t\t\"##contig=<ID=X,assembly=b37,length=155270560>\",\n\t\t\t\"##contig=<ID=Y,assembly=b37,length=59373566>\",\n\n\t\t};\n\n\t\tprivate readonly List<string> _hg19Contigs = new List<string>()\n\t\t{\n\t\t\t\"##contig=<ID=chr1,assembly=b37,length=249250621>\",\n\t\t\t\"##contig=<ID=chr2,assembly=b37,length=243199373>\",\n\t\t\t\"##contig=<ID=chr3,assembly=b37,length=198022430>\",\n\t\t\t\"##contig=<ID=chr4,assembly=b37,length=191154276>\",\n\t\t\t\"##contig=<ID=chr5,assembly=b37,length=180915260>\",\n\t\t\t\"##contig=<ID=chr6,assembly=b37,length=171115067>\",\n\t\t\t\"##contig=<ID=chr7,assembly=b37,length=159138663>\",\n\t\t\t\"##contig=<ID=chr8,assembly=b37,length=146364022>\",\n\t\t\t\"##contig=<ID=chr9,assembly=b37,length=141213431>\",\n\t\t\t\"##contig=<ID=chr10,assembly=b37,length=135534747>\",\n\t\t\t\"##contig=<ID=chr11,assembly=b37,length=135006516>\",\n\t\t\t\"##contig=<ID=chr12,assembly=b37,length=133851895>\",\n\t\t\t\"##contig=<ID=chr13,assembly=b37,length=115169878>\",\n\t\t\t\"##contig=<ID=chr14,assembly=b37,length=107349540>\",\n\t\t\t\"##contig=<ID=chr15,assembly=b37,length=102531392>\",\n\t\t\t\"##contig=<ID=chr16,assembly=b37,length=90354753>\",\n\t\t\t\"##contig=<ID=chr17,assembly=b37,length=81195210>\",\n\t\t\t\"##contig=<ID=chr18,assembly=b37,length=78077248>\",\n\t\t\t\"##contig=<ID=chr19,assembly=b37,length=59128983>\",\n\t\t\t\"##contig=<ID=chr20,assembly=b37,length=63025520>\",\n\t\t\t\"##contig=<ID=chr21,assembly=b37,length=48129895>\",\n\t\t\t\"##contig=<ID=chr22,assembly=b37,length=51304566>\",\n\t\t\t\"##contig=<ID=chrX,assembly=b37,length=155270560>\",\n\t\t\t\"##contig=<ID=chrY,assembly=b37,length=59373566>\",\n\n\t\t};\n\t\tprivate readonly List<string> _grch38Contigs = new List<string>()\n\t\t{\n\t\t\t\"##contig=<ID=chr1,assembly=GCF_000001405.26,length=248956422>\",\n\t\t\t\"##contig=<ID=chr2,assembly=GCF_000001405.26,length=242193529>\",\n\t\t\t\"##contig=<ID=chr3,assembly=GCF_000001405.26,length=198295559>\",\n\t\t\t\"##contig=<ID=chr4,assembly=GCF_000001405.26,length=190214555>\",\n\t\t\t\"##contig=<ID=chr5,assembly=GCF_000001405.26,length=181538259>\",\n\t\t\t\"##contig=<ID=chr6,assembly=GCF_000001405.26,length=170805979>\",\n\t\t\t\"##contig=<ID=chr7,assembly=GCF_000001405.26,length=159345973>\",\n\t\t\t\"##contig=<ID=chr8,assembly=GCF_000001405.26,length=145138636>\",\n\t\t\t\"##contig=<ID=chr9,assembly=GCF_000001405.26,length=138394717>\",\n\t\t\t\"##contig=<ID=chr10,assembly=GCF_000001405.26,length=133797422>\",\n\t\t\t\"##contig=<ID=chr11,assembly=GCF_000001405.26,length=135086622>\",\n\t\t\t\"##contig=<ID=chr12,assembly=GCF_000001405.26,length=133275309>\",\n\t\t\t\"##contig=<ID=chr13,assembly=GCF_000001405.26,length=114364328>\",\n\t\t\t\"##contig=<ID=chr14,assembly=GCF_000001405.26,length=107043718>\",\n\t\t\t\"##contig=<ID=chr15,assembly=GCF_000001405.26,length=90338345>\",\n\t\t\t\"##contig=<ID=chr16,assembly=GCF_000001405.26,length=83257441>\",\n\t\t\t\"##contig=<ID=chr17,assembly=GCF_000001405.26,length=83257441>\",\n\t\t\t\"##contig=<ID=chr18,assembly=GCF_000001405.26,length=80373285>\",\n\t\t\t\"##contig=<ID=chr19,assembly=GCF_000001405.26,length=58617616>\",\n\t\t\t\"##contig=<ID=chr20,assembly=GCF_000001405.26,length=64444167>\",\n\t\t\t\"##contig=<ID=chr21,assembly=GCF_000001405.26,length=46709983>\",\n\t\t\t\"##contig=<ID=chr22,assembly=GCF_000001405.26,length=50818468>\",\n\t\t\t\"##contig=<ID=chrM,assembly=GCF_000001405.26,length=16569>\",\n\t\t\t\"##contig=<ID=chrX,assembly=GCF_000001405.26,length=156040895>\",\n\t\t\t\"##contig=<ID=chrY,assembly=GCF_000001405.26,length=57227415>\"\n\n\t\t};\n\n\t\tprivate readonly List<string> _refMinorGrch37HeaderLines = new List<string>()\n\t\t{\n\t\t\t\"##fileformat=VCFv4.1\",\n\t\t\t\"##Description=RefMinor positions (ref allele frequency < 0.05) extracted from 1000 Genomes data\",\n\t\t\t\"##FILTER=<ID=PASS,Description=\\\"All filters passed\\\">\",\n\t\t\t\"##reference=ftp://ftp.1000genomes.ebi.ac.uk//vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz\",\n\t\t\t\"##source=1000GenomesPhase3Pipeline\",\n\t\t\t\"##contig=<ID=1,assembly=b37,length=249250621>\",\n\t\t\t\"##contig=<ID=2,assembly=b37,length=243199373>\",\n\t\t\t\"##contig=<ID=3,assembly=b37,length=198022430>\",\n\t\t\t\"##contig=<ID=4,assembly=b37,length=191154276>\",\n\t\t\t\"##contig=<ID=5,assembly=b37,length=180915260>\",\n\t\t\t\"##contig=<ID=6,assembly=b37,length=171115067>\",\n\t\t\t\"##contig=<ID=7,assembly=b37,length=159138663>\",\n\t\t\t\"##contig=<ID=8,assembly=b37,length=146364022>\",\n\t\t\t\"##contig=<ID=9,assembly=b37,length=141213431>\",\n\t\t\t\"##contig=<ID=10,assembly=b37,length=135534747>\",\n\t\t\t\"##contig=<ID=11,assembly=b37,length=135006516>\",\n\t\t\t\"##contig=<ID=12,assembly=b37,length=133851895>\",\n\t\t\t\"##contig=<ID=13,assembly=b37,length=115169878>\",\n\t\t\t\"##contig=<ID=14,assembly=b37,length=107349540>\",\n\t\t\t\"##contig=<ID=15,assembly=b37,length=102531392>\",\n\t\t\t\"##contig=<ID=16,assembly=b37,length=90354753>\",\n\t\t\t\"##contig=<ID=17,assembly=b37,length=81195210>\",\n\t\t\t\"##contig=<ID=18,assembly=b37,length=78077248>\",\n\t\t\t\"##contig=<ID=19,assembly=b37,length=59128983>\",\n\t\t\t\"##contig=<ID=20,assembly=b37,length=63025520>\",\n\t\t\t\"##contig=<ID=21,assembly=b37,length=48129895>\",\n\t\t\t\"##contig=<ID=22,assembly=b37,length=51304566>\",\n\t\t\t\"##contig=<ID=MT,assembly=b37,length=16569>\",\n\t\t\t\"##contig=<ID=X,assembly=b37,length=155270560>\",\n\t\t\t\"##contig=<ID=Y,assembly=b37,length=59373566>\",\n\t\t\t\"#CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\"\n\t\t};\n\n\t\tprivate readonly List<string> _refMinorHg19HeaderLines = new List<string>()\n\t\t{\n\t\t\t\"##fileformat=VCFv4.1\",\n\t\t\t\"##Description=RefMinor positions (ref allele frequency < 0.05) extracted from 1000 Genomes data\",\n\t\t\t\"##FILTER=<ID=PASS,Description=\\\"All filters passed\\\">\",\n\t\t\t\"##reference=ftp://ftp.1000genomes.ebi.ac.uk//vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz\",\n\t\t\t\"##source=1000GenomesPhase3Pipeline\",\n\t\t\t\"##contig=<ID=chr1,assembly=b37,length=249250621>\",\n\t\t\t\"##contig=<ID=chr2,assembly=b37,length=243199373>\",\n\t\t\t\"##contig=<ID=chr3,assembly=b37,length=198022430>\",\n\t\t\t\"##contig=<ID=chr4,assembly=b37,length=191154276>\",\n\t\t\t\"##contig=<ID=chr5,assembly=b37,length=180915260>\",\n\t\t\t\"##contig=<ID=chr6,assembly=b37,length=171115067>\",\n\t\t\t\"##contig=<ID=chr7,assembly=b37,length=159138663>\",\n\t\t\t\"##contig=<ID=chr8,assembly=b37,length=146364022>\",\n\t\t\t\"##contig=<ID=chr9,assembly=b37,length=141213431>\",\n\t\t\t\"##contig=<ID=chr10,assembly=b37,length=135534747>\",\n\t\t\t\"##contig=<ID=chr11,assembly=b37,length=135006516>\",\n\t\t\t\"##contig=<ID=chr12,assembly=b37,length=133851895>\",\n\t\t\t\"##contig=<ID=chr13,assembly=b37,length=115169878>\",\n\t\t\t\"##contig=<ID=chr14,assembly=b37,length=107349540>\",\n\t\t\t\"##contig=<ID=chr15,assembly=b37,length=102531392>\",\n\t\t\t\"##contig=<ID=chr16,assembly=b37,length=90354753>\",\n\t\t\t\"##contig=<ID=chr17,assembly=b37,length=81195210>\",\n\t\t\t\"##contig=<ID=chr18,assembly=b37,length=78077248>\",\n\t\t\t\"##contig=<ID=chr19,assembly=b37,length=59128983>\",\n\t\t\t\"##contig=<ID=chr20,assembly=b37,length=63025520>\",\n\t\t\t\"##contig=<ID=chr21,assembly=b37,length=48129895>\",\n\t\t\t\"##contig=<ID=chr22,assembly=b37,length=51304566>\",\n\t\t\t\"##contig=<ID=chrX,assembly=b37,length=155270560>\",\n\t\t\t\"##contig=<ID=chrY,assembly=b37,length=59373566>\",\n\t\t\t\"#CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\"\n\t\t};\n\n\n\t    private readonly List<string> _refMinorGRCh38HeaderLines = new List<string>()\n\t    {\n\t        \"##fileformat=VCFv4.1\",\n\t        \"##Description=RefMinor positions (ref allele frequency < 0.05) extracted from 1000 Genomes data\",\n\t        \"##FILTER=<ID=PASS,Description=\\\"All filters passed\\\">\",\n\t        \"##reference=ftp://ftp.1000genomes.ebi.ac.uk//vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz\",\n\t        \"##source=1000GenomesPhase3Pipeline\",\n\t        \"##contig=<ID=chr1,assembly=GCF_000001405.26,length=248956422>\",\n\t        \"##contig=<ID=chr2,assembly=GCF_000001405.26,length=242193529>\",\n\t        \"##contig=<ID=chr3,assembly=GCF_000001405.26,length=198295559>\",\n\t        \"##contig=<ID=chr4,assembly=GCF_000001405.26,length=190214555>\",\n\t        \"##contig=<ID=chr5,assembly=GCF_000001405.26,length=181538259>\",\n\t        \"##contig=<ID=chr6,assembly=GCF_000001405.26,length=170805979>\",\n\t        \"##contig=<ID=chr7,assembly=GCF_000001405.26,length=159345973>\",\n\t        \"##contig=<ID=chr8,assembly=GCF_000001405.26,length=145138636>\",\n\t        \"##contig=<ID=chr9,assembly=GCF_000001405.26,length=138394717>\",\n\t        \"##contig=<ID=chr10,assembly=GCF_000001405.26,length=133797422>\",\n\t        \"##contig=<ID=chr11,assembly=GCF_000001405.26,length=135086622>\",\n\t        \"##contig=<ID=chr12,assembly=GCF_000001405.26,length=133275309>\",\n\t        \"##contig=<ID=chr13,assembly=GCF_000001405.26,length=114364328>\",\n\t        \"##contig=<ID=chr14,assembly=GCF_000001405.26,length=107043718>\",\n\t        \"##contig=<ID=chr15,assembly=GCF_000001405.26,length=90338345>\",\n\t        \"##contig=<ID=chr16,assembly=GCF_000001405.26,length=83257441>\",\n\t        \"##contig=<ID=chr17,assembly=GCF_000001405.26,length=83257441>\",\n\t        \"##contig=<ID=chr18,assembly=GCF_000001405.26,length=80373285>\",\n\t        \"##contig=<ID=chr19,assembly=GCF_000001405.26,length=58617616>\",\n\t        \"##contig=<ID=chr20,assembly=GCF_000001405.26,length=64444167>\",\n\t        \"##contig=<ID=chr21,assembly=GCF_000001405.26,length=46709983>\",\n\t        \"##contig=<ID=chr22,assembly=GCF_000001405.26,length=50818468>\",\n\t        \"##contig=<ID=chrM,assembly=GCF_000001405.26,length=16569>\",\n\t        \"##contig=<ID=chrX,assembly=GCF_000001405.26,length=156040895>\",\n\t        \"##contig=<ID=chrY,assembly=GCF_000001405.26,length=57227415>\",\r\n            \"#CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\"\n\t    };\r\n\r\n        public MustGenotypeExtractor(string compressedSeqPath, string oneKGenomeVcf,string clinvarVcf, string cosmicVcf, bool isHg19 = false)\n\t\t{\n            _compressedSequence = new CompressedSequence();\n            _dataFileManager = new DataFileManager(new CompressedSequenceReader(FileUtilities.GetReadStream(compressedSeqPath),_compressedSequence ),_compressedSequence);\n\t\t    _assembly = _compressedSequence.GenomeAssembly == GenomeAssembly.GRCh37 && isHg19? GenomeAssembly.hg19:_compressedSequence.GenomeAssembly;\n\n\t\t\tif (_assembly == GenomeAssembly.Unknown)\n\t\t\t\tthrow new Exception(\"Genome assembly must be either GRCh37 or GRCh38\");\n            if(_compressedSequence.GenomeAssembly == GenomeAssembly.GRCh38 && isHg19)\n                throw new Exception(\"reference sequence is GRCh38 while generating hg19 files\");\n\n\t\t\t_oneKGenomeReader  = string.IsNullOrEmpty(oneKGenomeVcf)? null: GZipUtilities.GetAppropriateStreamReader(oneKGenomeVcf);\n\t\t\t_clinvarReader     = string.IsNullOrEmpty(clinvarVcf) ? null : GZipUtilities.GetAppropriateStreamReader(clinvarVcf);\n\t\t\t_cosmicReader      = string.IsNullOrEmpty(cosmicVcf) ? null : GZipUtilities.GetAppropriateStreamReader(cosmicVcf);\n\t\t}\n\n\t\t#region IDisposable\n\n\t\tprivate bool _isDisposed;\n\t\t\n\t\t/// <summary>\n\t\t/// public implementation of Dispose pattern callable by consumers. \n\t\t/// </summary>\n\t\tpublic void Dispose()\n\t\t{\n\t\t\tDispose(true);\n\t\t}\n\n\t\t/// <summary>\n\t\t/// protected implementation of Dispose pattern. \n\t\t/// </summary>\n\t\tprivate void Dispose(bool disposing)\n\t\t{\n\t\t\tlock (this)\n\t\t\t{\n\t\t\t\tif (_isDisposed) return;\n\n\t\t\t\tif (disposing)\n\t\t\t\t{\n\t\t\t\t\t// Free any other managed objects here. \n\t\t\t\t\tClose();\n\t\t\t\t}\n\n\t\t\t\t// Free any unmanaged objects here. \n\t\t\t\t_isDisposed = true;\n\t\t\t}\n\t\t}\n\n\t\tprivate void Close()\n\t\t{\n\t\t\t_oneKGenomeReader?.Dispose();\n\t\t\t_clinvarReader?.Dispose();\n\t\t\t_cosmicReader?.Dispose();\n\t\t}\n\n\t\t#endregion\n\n\t\tpublic void ExtractEntries()\n\t\t{\n\t\t\tExtractFromClinVar();\n\t\t\tif (_clinvarCount > 0)\n\t\t\t\tConsole.WriteLine($\"{_clinvarCount} non-SNVs extracted from clinvar\");\n\n\t\t\tExtractFromCosmic();\n\t\t\tif (_cosmicCount > 0)\n\t\t\t\tConsole.WriteLine($\"{_cosmicCount} entries with count > {CosmicMinCount} extracted from Cosmic\");\n\n\t\t\tExtractFromOneKg();\n\t\t\tif (_refMinorCount > 0)\n\t\t\t\tConsole.WriteLine($\"{_refMinorCount} ref minor positions extracted from 1000 G\");\n\t\t\t\t\n\t\t\t\n\t\t}\n\n\t\tprivate void ExtractFromCosmic()\n\t\t{\n\t\t\tif (_cosmicReader == null) return;\n\n\t\t    var needParseHeaderLine = true;\n\t\t\tusing (var writer = GZipUtilities.GetStreamWriter(OncogenicFileName))\n\t\t\t{\n\t\t\t\tstring line;\n\t\t\t\t\n\t\t\t\twhile ((line = _cosmicReader.ReadLine()) != null)\n\t\t\t\t{\n\t\t\t\t\t// Skip empty lines.\n\t\t\t\t\tif (string.IsNullOrWhiteSpace(line)) continue;\n\t\t\t\t\t//copy required header lines\n\t\t\t\t\tif (line.StartsWith(\"#\") && needParseHeaderLine)\n\t\t\t\t\t{\n\t\t\t\t\t\tProcessHeaderLine(writer, line);\n\t\t\t\t\t\tcontinue;\n\t\t\t\t\t}\n\n                    if(line.StartsWith(\"#\")) continue;\n\n\t\t\t\t    needParseHeaderLine = false;\r\n                    var fields = line.Split('\\t');\n\n\t\t\t\t\tif (IsLargeVariants(fields[VcfCommon.RefIndex], fields[VcfCommon.AltIndex]))\n\t\t\t\t\t\tcontinue;\n\t\t\t\t\tif (! HasMinCount(fields[VcfCommon.InfoIndex]))\n\t\t\t\t\t\tcontinue;\n\n\t\t\t\t\t_cosmicCount++;\n\n\n\t\t\t\t\tvar chrName = GetChrName(fields[VcfCommon.ChromIndex]);\n\n\t\t\t\t\t//skip mito for hg19\n\t\t\t\t\tif (_assembly == GenomeAssembly.hg19 && (chrName == \"chrM\" || chrName == \"MT\"))\n\t\t\t\t\t\tcontinue;\n\n\t\t\t\t    var pos = Convert.ToInt32(fields[VcfCommon.PosIndex]);\n\t\t\t\t    var refAllele = fields[VcfCommon.RefIndex];\n\n\t\t\t\t    if (ValidateReference(chrName, pos, refAllele))\r\n                        writer.Write(chrName + '\\t' +\n\t\t\t\t\t\tpos + '\\t' +\n\t\t\t\t\t\tfields[VcfCommon.IdIndex] + '\\t' +\n\t\t\t\t\t\trefAllele + '\\t' +\n\t\t\t\t\t\tfields[VcfCommon.AltIndex] + '\\t' +\n\t\t\t\t\t\t\".\\t.\\t.\\n\");\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\n\t\tprivate void ProcessHeaderLine(StreamWriter writer, string line)\n\t\t{\n\t\t    if (line.StartsWith(\"##fileformat=\"))\n\t\t    {\n\t\t        writer.WriteLine(\"##fileformat=VCFv4.1\");\n\t\t    }\n\t\t\tif (IsRequiredHeaderLine(line))\n\t\t\t{\n\t\t\t\twriter.Write(line + \"\\n\");\n\t\t\t\treturn;\n\t\t\t}\n\n\t\t\t//if we have seen the chrom header\n\t\t\tif (!line.StartsWith(\"#CHROM\")) return;\n\n\t\t\twriter.Write($\"##Description=COSMIC variants having count greater or equal to {CosmicMinCount}\" + \"\\n\");\n\t\t\tWriteContigLines(writer);\n\t\t\twriter.Write(line + \"\\n\");\n\t\t\t\n\t\t}\n\n\t\tprivate void WriteContigLines(StreamWriter writer)\n\t\t{\n\t\t\tList<string> contigLines = null;\n\t\t\tif (_assembly == GenomeAssembly.GRCh37) contigLines = _grch37Contigs;\n\t\t\tif (_assembly == GenomeAssembly.GRCh38) contigLines = _grch38Contigs;\n\t\t\tif (_assembly == GenomeAssembly.hg19) contigLines = _hg19Contigs;\n\n\t\t\tif (contigLines == null) return;\n\n\t\t\tforeach (var contigLine in contigLines)\n\t\t\t{\n\t\t\t\twriter.Write(contigLine + \"\\n\");\n\t\t\t}\n\t\t}\n\n\t\tprivate static bool IsRequiredHeaderLine(string line)\n\t\t{\n\t\t\treturn line.StartsWith(\"##source=\") ||\n\t\t\t       line.StartsWith(\"##reference=\");\n\t\t}\n\n\t\tprivate static bool IsLargeVariants(string refAllele, string altAlleles)\n\t\t{\n\t\t\tforeach (var altAllele in altAlleles.Split(','))\n\t\t\t{\n\t\t\t\tvar trimmedAlleles = BiDirectionalTrimmer.Trim(1, refAllele, altAllele);\n\t\t\t\tvar trimmedRef = trimmedAlleles.Item2;\n\t\t\t\tvar trimmedAlt = trimmedAlleles.Item3;\n\n\t\t\t\tif (trimmedRef.Length > SmallVariantMaxLength || trimmedAlt.Length > SmallVariantMaxLength) return true;\n\t\t\t}\n\n\t\t\treturn false;\n\n\t\t}\n\n\t\tprivate static bool HasMinCount(string info)\n\t\t{\n\t\t\tvar infoFields = info.Split(';');\n\t\t\tforeach (var infoField in infoFields)\n\t\t\t{\n\t\t\t\tif (!infoField.StartsWith(\"CNT=\")) continue;\n\n\t\t\t\tvar count = Convert.ToInt32(infoField.Substring(4));\n\t\t\t\t\n\t\t\t\treturn count >= CosmicMinCount;\n\t\t\t}\n\t\t\treturn false;\n\t\t}\n\n\t\tprivate void ExtractFromClinVar()\n\t\t{\n\t\t\tif (_clinvarReader == null) return;\n\t\t\t\n\t\t\tusing (var writer = GZipUtilities.GetStreamWriter(IsisClinicalIndelFileName))\n\t\t\t{\n\t\t\t\tstring line;\n\t\t\t\twhile ((line = _clinvarReader.ReadLine()) != null)\n\t\t\t\t{\n\t\t\t\t\t// Skip empty lines.\n\t\t\t\t\tif (string.IsNullOrWhiteSpace(line)) continue;\n\t\t\t\t\t\n\t\t\t\t\t//copy required header lines\n\t\t\t\t\tif (line.StartsWith(\"#\"))\n\t\t\t\t\t{\n\t\t\t\t\t\tProcessHeaderLine(writer, line);\n\t\t\t\t\t\tcontinue;\n\t\t\t\t\t}\n\n\t\t\t\t\tvar fields = line.Split('\\t');\n\n\t\t\t\t\tif (IsSnv(fields[VcfCommon.RefIndex], fields[VcfCommon.AltIndex]))\n\t\t\t\t\t\tcontinue;\n\n\t\t\t\t\t_clinvarCount++;\n\t\t\t\t\tvar chrName = GetChrName(fields[VcfCommon.ChromIndex]);\n\n                    //skip mito for hg19\n                    if (_assembly == GenomeAssembly.hg19 && (chrName == \"chrM\" || chrName == \"MT\"))\n\t\t\t\t\t\tcontinue;\n\t\t\t\t    var pos = Convert.ToInt32(fields[VcfCommon.PosIndex]);\n\t\t\t\t    var refAllele = fields[VcfCommon.RefIndex];\n\n                    if(ValidateReference(chrName,pos,refAllele))\n                        writer.Write(chrName + '\\t' +\n\t\t\t\t\t\t\t\t pos + '\\t' +\n\t\t\t\t\t\t\t\t fields[VcfCommon.IdIndex] + '\\t' +\n\t\t\t\t\t\t\t\t refAllele + '\\t' +\n\t\t\t\t\t\t\t\t fields[VcfCommon.AltIndex] + '\\t' +\n\t\t\t\t\t\t\t\t \".\\t.\\t.\\n\");\n\t\t\t\t}\n\t\t\t}\n\t\t\t\n\t\t}\n\n\t    private bool ValidateReference(string chromosome, int pos, string refAllele)\n\t    {\n\t        var refIndex = _compressedSequence.Renamer.GetReferenceIndex(chromosome);\n\t        if (refIndex == ChromosomeRenamer.UnknownReferenceIndex) return false;\r\n            _dataFileManager.LoadReference(refIndex, () => { });\n\t        return _compressedSequence.Substring(pos - 1, refAllele.Length) == refAllele;\n\t    }\n\n\t    private void ExtractFromOneKg()\n\t\t{\n\t\t\tif (_oneKGenomeReader == null) return;\n\n\t\t\tusing (var writer = GZipUtilities.GetStreamWriter(RefMinorFileName))\n\t\t\t{\n\t\t\t\tList<string> headerLines = null;\n\t\t\t\tif (_assembly == GenomeAssembly.GRCh37) headerLines = _refMinorGrch37HeaderLines;\n\t\t\t\tif (_assembly == GenomeAssembly.hg19) headerLines = _refMinorHg19HeaderLines;\n\t\t\t    if (_assembly == GenomeAssembly.GRCh38) headerLines = _refMinorGRCh38HeaderLines;\r\n\r\n                if (headerLines == null) \n\t\t\t\t\tthrow new Exception(\"Unknown assembly for RefMinor Extraction\");\n\n\t\t\t\tforeach (var headerLine in headerLines)\n\t\t\t\t\twriter.Write(headerLine + \"\\n\");\n\n\t\t\t\tstring line;\n\t\t\t\twhile ((line = _oneKGenomeReader.ReadLine()) != null)\n\t\t\t\t{\n\t\t\t\t\t// Skip empty lines.\n\t\t\t\t\tif (string.IsNullOrWhiteSpace(line)) continue;\n\t\t\t\t\t// Skip comments.\n\t\t\t\t\tif (line.StartsWith(\"#\"))continue;\n\n\t\t\t\t\tvar fields = line.Split('\\t');\n\n\t\t\t\t\tif (!IsRefMinorPosition(fields[VcfCommon.InfoIndex])) continue;\n\n\n\t\t\t\t\t_refMinorCount++;\n\t\t\t\t\tvar chrName = GetChrName(fields[VcfCommon.ChromIndex]);\n\n\t\t\t\t\t//skip mito for hg19\n\t\t\t\t\tif (_assembly == GenomeAssembly.hg19 && (chrName == \"chrM\" || chrName == \"MT\"))\n\t\t\t\t\t\tcontinue;\n\n\t\t\t\t    var pos = Convert.ToInt32(fields[VcfCommon.PosIndex]);\n\t\t\t\t    var refAllele = fields[VcfCommon.RefIndex];\n\n\t\t\t\t    if (ValidateReference(chrName, pos, refAllele))\r\n                        writer.Write(chrName + '\\t' +\n\t\t\t\t\t\t\t\t pos + '\\t' +\n\t\t\t\t\t\t\t\t fields[VcfCommon.IdIndex] + '\\t' +\n\t\t\t\t\t\t\t\t refAllele + '\\t' +\n\t\t\t\t\t\t\t\t fields[VcfCommon.AltIndex] + '\\t' +\n\t\t\t\t\t\t\t\t \".\\t.\\t.\\n\");\n\t\t\t\t}\n\t\t\t}\n\t\t\t\n\t\t}\n\n\t\tprivate string GetChrName(string chromosome)\n\t\t{\n\t\t\tvar chrName = _assembly == GenomeAssembly.GRCh38 || _assembly == GenomeAssembly.hg19\n\t\t\t\t? \"chr\" + chromosome\n\t\t\t\t: chromosome;\n\t\t\tif (chrName == \"chrMT\")\n\t\t\t\tchrName = \"chrM\";\n\t\t\treturn chrName;\n\t\t}\n\n\t\tprivate static bool IsSnv(string refAllele, string altAlleles)\n\t\t{\n\t\t\tif (!IsSnv(refAllele)) return false;\n\n\t\t\treturn altAlleles.Split(',').All(IsSnv);\n\t\t}\n\n\t\tprivate static bool IsSnv(string allele)\n\t\t{\n\t\t\tif (allele.Length != 1) return false;\n\n\t\t\tallele = allele.ToUpper();\n\n\t\t\tif (allele == \"A\" || allele == \"C\" || allele == \"G\" || allele == \"T\") return true;\n\n\t\t\treturn false;\n\t\t}\n\n\t\tprivate static bool IsRefMinorPosition(string info)\n\t\t{\n\t\t\tvar infoFields = info.Split(';');\n\t\t\tforeach (var infoField in infoFields)\n\t\t\t{\n\t\t\t\tif (! infoField.StartsWith(\"AF=\")) continue;\n\n\t\t\t\tvar totalAltAlleleFreq = 0.0;\n\n\t\t\t\tforeach (var freq in infoField.Substring(3).Split(','))\n\t\t\t\t{\n\t\t\t\t\ttotalAltAlleleFreq+=Convert.ToDouble(freq);\n\t\t\t\t}\n\t\t\t\t\t\n\t\t\t\treturn totalAltAlleleFreq >= RefMinorFreq;\n\t\t\t}\n\t\t\treturn false;\n\t\t}\n\n\t\t\n\t}\n}"
  },
  {
    "path": "Sandbox/Piano/ConfigurationSettings.cs",
    "content": "﻿namespace Piano\n{\n\tpublic static class ConfigurationSettings\n\t{\n\t\t// filenames\n\t\tpublic static string InputCachePrefix;\n\t\tpublic static string VcfPath;\n\t\tpublic static string RefSequencePath;\n\t\tpublic static string OutputFileName;\n\n        // parameters\n\t\tpublic static bool ForceMitochondrialAnnotation;\n\t}\n}"
  },
  {
    "path": "Sandbox/Piano/Piano.cs",
    "content": "﻿\nusing System;\nusing System.Collections.Generic;\nusing System.IO;\nusing CommandLine.Builders;\nusing CommandLine.NDesk.Options;\nusing Compression.Utilities;\nusing ErrorHandling;\nusing VariantAnnotation;\nusing VariantAnnotation.Interface.Positions;\nusing VariantAnnotation.Interface.Providers;\nusing VariantAnnotation.Interface.Sequence;\nusing VariantAnnotation.IO.Caches;\nusing Vcf;\nusing VariantAnnotation.Interface;\nusing VariantAnnotation.Interface.AnnotatedPositions;\nusing VariantAnnotation.Interface.IO;\nusing VariantAnnotation.Providers;\n\nnamespace Piano\n{\n    sealed class Piano\n    {\n        #region members\n\n        private const string OutHeader =\n            \"#Chrom\\tPos\\tRefAllele\\tAltAllele\\tGeneSymbol\\tGeneId\\tTranscriptID\\tProteinID\\tProteinPos\\tUpstream\\tAAchange\\tDownstream\\tConsequences\";\n        private readonly PerformanceMetrics _performanceMetrics = PerformanceMetrics.Instance;\r\n\r\n        #endregion\n\r\n\r\n        private  ExitCodes ProgramExecution()\n        {\n            var sequenceProvider = ProviderUtilities.GetSequenceProvider(ConfigurationSettings.RefSequencePath);\n            var transcriptAnnotationProvider =\n                ProviderUtilities.GetTranscriptAnnotationProvider(ConfigurationSettings.InputCachePrefix, sequenceProvider);\n\n           \n\n\n            var annotator = ProviderUtilities.GetAnnotator(transcriptAnnotationProvider, sequenceProvider);\n\n            var dataSourceVesions = new List<IDataSourceVersion>();\n            dataSourceVesions.AddRange(transcriptAnnotationProvider.DataSourceVersions);\n\n\n            using (var outputWriter = new StreamWriter(ConfigurationSettings.OutputFileName))\n            using (var vcfReader =new VcfReader(GZipUtilities.GetAppropriateReadStream(ConfigurationSettings.VcfPath), sequenceProvider.GetChromosomeDictionary(), null, false))\n            {\n                try\n                {\n\n                    if (vcfReader.IsRcrsMitochondrion && annotator.GenomeAssembly == GenomeAssembly.GRCh37\n                        || annotator.GenomeAssembly == GenomeAssembly.GRCh38\n                        || ConfigurationSettings.ForceMitochondrialAnnotation)\n                        annotator.EnableMitochondrialAnnotation();\n\n                    int previousChromIndex = -1;\n                    IPosition position;\n                   // var sortedVcfChecker = new SortedVcfChecker();\n                   outputWriter.WriteLine(OutHeader);\n\n                    while ((position = vcfReader.GetNextPosition()) != null)\n                    {\n                        // sortedVcfChecker.CheckVcfOrder(position.Chromosome.UcscName);\n                         previousChromIndex = UpdatePerformanceMetrics(previousChromIndex, position.Chromosome);\n\n                        var annotatedPosition = annotator.Annotate(position);\n                        WriteAnnotatedPostion(annotatedPosition, outputWriter);\n\n                    }\n                }\n                catch (Exception e)\n                {\n                    e.Data[ExitCodeUtilities.VcfLine] = vcfReader.VcfLine;\n                    throw;\n                }\n            }\n\n            return ExitCodes.Success;\n        }\n\n        private int UpdatePerformanceMetrics(int previousChromIndex, Chromosome chromosome)\n        {\n            if (chromosome.Index != previousChromIndex)\n            {\n                if (previousChromIndex != -1) _performanceMetrics.StopReference();\n                _performanceMetrics.StartReference(chromosome.UcscName);\n\n                previousChromIndex = chromosome.Index;\n            }\n\n            return previousChromIndex;\n        }\n        private static void WriteAnnotatedPostion(IAnnotatedPosition annotatedPosition, StreamWriter writer)\n        {\n            //\"#Chrom\\tPos\\tRefAllele\\tAltAllele\\tGeneSymbol\\tGeneId\\tTranscriptID\\tProteinID\\tProteinPos\\tUpstream\\tAAchange\\tDownstream\\tConsequences\";\n            if (annotatedPosition.AnnotatedVariants == null || annotatedPosition.AnnotatedVariants.Length == 0) return;\n\n            for (int i = 0; i < annotatedPosition.AnnotatedVariants.Length; i++)\n            {\n\n                var annotatedVariant = annotatedPosition.AnnotatedVariants[i];\n                var chromosome = annotatedPosition.Position.VcfFields[VcfCommon.ChromIndex];\n                var position = annotatedPosition.Position.Start;\n                var refAllele = annotatedPosition.Position.RefAllele;\n                var altAllele = annotatedPosition.Position.AltAlleles[i];\n\n                foreach (var ensemblTranscript in annotatedVariant.EnsemblTranscripts)\n                {\n                    var transcript = ensemblTranscript;\n\n                    if(transcript.ToString()==null) continue;\n\n                    var line = chromosome + \"\\t\" + position + \"\\t\" + refAllele +\n                               \"\\t\" + altAllele + \"\\t\" + transcript;\n                    writer.WriteLine(line);\n                }\n                foreach (var refSeqTranscript in annotatedVariant.RefSeqTranscripts)\n                {\n                    var transcript = refSeqTranscript ;\n                    if (transcript.ToString() == null) continue;\n\n                    var line = chromosome + \"\\t\" + position + \"\\t\" + refAllele +\n                               \"\\t\" + altAllele + \"\\t\" + transcript;\n                    writer.WriteLine(line);\n                }\n            }\n            \n        }\r\n\r\n        static int Main(string[] args)\n        {\n            var ops = new OptionSet\n            {\n                {\n                    \"cache|c=\",\n                    \"input cache {prefix}\",\n                    v => ConfigurationSettings.InputCachePrefix = v\n                },\n                {\n                    \"in|i=\",\n                    \"input VCF {path}\",\n                    v => ConfigurationSettings.VcfPath = v\n                },\n                {\n                    \"out|o=\",\n                    \"output {file path} \",\n                    v => ConfigurationSettings.OutputFileName = v\n                },\n                {\n                    \"ref|r=\",\n                    \"input compressed reference sequence {path}\",\n                    v => ConfigurationSettings.RefSequencePath = v\n                },\n                {\n                    \"force-mt\",\n                    \"forces to annotate mitochondria variants\",\n                    v => ConfigurationSettings.ForceMitochondrialAnnotation = v != null\n                }\n            };\n\n            var commandLineExample = \"-i <vcf path> -d <cache dir> -r <ref path> -o <base output filename>\";\n\n            var piano = new Piano();\n            var exitCode = new ConsoleAppBuilder(args, ops)\n                .UseVersionProvider(new VersionProvider())\n                .Parse()\n                .CheckInputFilenameExists(ConfigurationSettings.VcfPath, \"vcf\", \"--in\", true, \"-\")\n                .CheckInputFilenameExists(ConfigurationSettings.RefSequencePath, \"reference sequence\", \"--ref\")\n                .CheckInputFilenameExists(CacheConstants.TranscriptPath(ConfigurationSettings.InputCachePrefix), \"transcript cache\", \"--cache\")\n                .HasRequiredParameter(ConfigurationSettings.OutputFileName, \"output file stub\", \"--out\")\n                .ShowBanner(Constants.Authors)\n                .ShowHelpMenu(\"peptide annotation\", commandLineExample)\n                .ShowErrors()\n                .Execute(piano.ProgramExecution);\n\n            return (int)exitCode;\n        }\n\n       \n    }\n}"
  },
  {
    "path": "Sandbox/Piano/Piano.csproj",
    "content": "﻿<Project Sdk=\"Microsoft.NET.Sdk\">\n  <PropertyGroup>\n    <OutputType>Exe</OutputType>\n    <TargetFramework>net6.0</TargetFramework>\n    <OutputPath>..\\bin\\$(Configuration)</OutputPath>\n    \n  </PropertyGroup>\n  <ItemGroup>\n    <ProjectReference Include=\"..\\..\\CommandLine\\CommandLine.csproj\" />\n    <ProjectReference Include=\"..\\..\\VariantAnnotation.Interface\\VariantAnnotation.Interface.csproj\" />\n    <ProjectReference Include=\"..\\..\\VariantAnnotation\\VariantAnnotation.csproj\" />\n    <ProjectReference Include=\"..\\..\\Vcf\\Vcf.csproj\" />\n  </ItemGroup>\n  <Import Project=\"..\\..\\VariantAnnotation\\CommonAssemblyInfo.props\" />\n</Project>"
  },
  {
    "path": "Sandbox/Piano/PianoAnnotatedTranscript.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.Globalization;\r\nusing System.Linq;\r\nusing System.Text;\r\nusing VariantAnnotation.Algorithms;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.Interface.Intervals;\r\n\r\nnamespace Piano\r\n{\r\n    public class PianoAnnotatedTranscript:IAnnotatedTranscript\r\n    {\r\n        public void SerializeJson(StringBuilder sb)\r\n        {\r\n            throw new System.NotImplementedException();\r\n        }\r\n\r\n        public ITranscript Transcript { get; }\r\n        public string ReferenceAminoAcids { get; }\r\n        public string AlternateAminoAcids { get; }\r\n        public string ReferenceCodons { get; }\r\n        public string AlternateCodons { get; }\r\n        public IMappedPositions MappedPositions { get; }\r\n        public string HgvsCoding { get; }\r\n        public string HgvsProtein { get; }\r\n        public PredictionScore Sift { get; }\r\n        public PredictionScore PolyPhen { get; }\r\n        public IEnumerable<ConsequenceTag> Consequences { get; }\r\n        public IGeneFusionAnnotation GeneFusionAnnotation { get; }\r\n\r\n        public string UpstreamAminoAcids { get; }\r\n        public string DownStreamAminoAcids { get; }\r\n\r\n        public PianoAnnotatedTranscript(ITranscript transcript, string referenceAminoAcids, string alternateAminoAcids, IMappedPositions mappedPositions, string upstreamAminoAcids,\r\n            string downStreamAminoAcids,IEnumerable<ConsequenceTag> consequences)\r\n        {\r\n            Transcript           = transcript;\r\n            ReferenceAminoAcids  = referenceAminoAcids;\r\n            AlternateAminoAcids  = alternateAminoAcids;\r\n            MappedPositions      = mappedPositions;\r\n            UpstreamAminoAcids   = upstreamAminoAcids;\r\n            DownStreamAminoAcids = downStreamAminoAcids;\r\n            Consequences = consequences;\r\n        }\r\n\r\n        public override string ToString()\r\n        {\r\n            if (MappedPositions.ProteinInterval.Start == null || MappedPositions.ProteinInterval.End == null)\r\n                return null;\r\n            var geneId = Transcript.Source == Source.Ensembl\r\n                ? Transcript.Gene.EnsemblId.ToString()\r\n                : Transcript.Gene.EntrezGeneId.ToString();\r\n            var downStreamAminoAcids = string.IsNullOrEmpty(DownStreamAminoAcids) ? \".\" : DownStreamAminoAcids;\r\n            var upstreamAminoAcids = string.IsNullOrEmpty(UpstreamAminoAcids) ? \".\" : UpstreamAminoAcids;\r\n\r\n            var line = Transcript.Gene.Symbol + \"\\t\" + geneId + \"\\t\" + CombineIdAndVersion(Transcript.Id,Transcript.Version) + \"\\t\" +\r\n                       CombineIdAndVersion(Transcript.Translation.ProteinId,Transcript.Translation.ProteinVersion) + \"\\t\" +\r\n                       GetNullablePositionRange(MappedPositions.ProteinInterval) + \"\\t\" + upstreamAminoAcids + \"\\t\" +\r\n                       GetAlleleString(ReferenceAminoAcids, AlternateAminoAcids) + \"\\t\" + downStreamAminoAcids+\"\\t\"+string.Join(',', Consequences?.Select(ConsequenceUtil.GetConsequence));\r\n            return line;\r\n        }\r\n\r\n        private static string GetAlleleString(string a, string b)\r\n        {\r\n            return a == b ? a : $\"{(string.IsNullOrEmpty(a) ? \"-\" : a)}/{(string.IsNullOrEmpty(b) ? \"-\" : b)}\";\r\n        }\r\n\r\n        private static string CombineIdAndVersion(ICompactId id, byte version) => id + \".\" + version;\r\n\r\n        private static string GetNullablePositionRange(NullableInterval interval)\r\n        {\r\n            if (interval.Start == null && interval.End == null) return null;\r\n            if (interval.Start == null) return \"?-\" + interval.End.Value;\r\n            if (interval.End == null) return interval.Start.Value + \"-?\";\r\n            var start = interval.Start.Value;\r\n            var end = interval.End.Value;\r\n            if (start > end) Swap.Int(ref start, ref end);\r\n            return start == end ? start.ToString(CultureInfo.InvariantCulture) : start + \"-\" + end;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "Sandbox/Piano/PianoAnnotationProvider.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing VariantAnnotation.Caches;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.Interface.Caches;\r\nusing VariantAnnotation.Interface.Providers;\r\nusing VariantAnnotation.Interface.Sequence;\r\nusing VariantAnnotation.IO.Caches;\r\nusing VariantAnnotation.TranscriptAnnotation;\r\nusing VariantAnnotation.Utilities;\r\n\r\nnamespace Piano\r\n{\r\n    public class PianoAnnotationProvider:IAnnotationProvider\r\n    {\r\n        public string Name { get; }\r\n        public GenomeAssembly GenomeAssembly { get; }\r\n        public IEnumerable<IDataSourceVersion> DataSourceVersions { get; }\r\n        private readonly ITranscriptCache _transcriptCache;\r\n        private readonly ISequence _sequence;\r\n        private ushort _currentRefIndex = ushort.MaxValue;\r\n\r\n\r\n        public PianoAnnotationProvider(string pathPrefix, ISequenceProvider sequenceProvider)\r\n        {\r\n            Name = \"Transcript annotation provider\";\r\n            _sequence = sequenceProvider.Sequence;\r\n            _transcriptCache = InitiateCache(FileUtilities.GetReadStream(CacheConstants.TranscriptPath(pathPrefix)), sequenceProvider.GetChromosomeIndexDictionary(), sequenceProvider.GenomeAssembly, sequenceProvider.NumRefSeqs);\r\n            GenomeAssembly = _transcriptCache.GenomeAssembly;\r\n            DataSourceVersions = _transcriptCache.DataSourceVersions;\r\n\r\n\r\n        }\r\n\r\n        private static TranscriptCache InitiateCache(Stream stream,\r\n            IDictionary<ushort, Chromosome> chromosomeIndexDictionary, GenomeAssembly genomeAssembly, ushort numRefSeq)\r\n        {\r\n            TranscriptCache cache;\r\n            using (var reader = new TranscriptCacheReader(stream, genomeAssembly, numRefSeq)) cache = reader.Read(chromosomeIndexDictionary);\r\n            return cache;\r\n        }\r\n\r\n        public void Annotate(IAnnotatedPosition annotatedPosition)\r\n        {\r\n            if (annotatedPosition.AnnotatedVariants == null || annotatedPosition.AnnotatedVariants.Length == 0) return;\r\n\r\n            var refIndex = annotatedPosition.Position.Chromosome.Index;\r\n            LoadPredictionCaches(refIndex);\r\n\r\n            AddTranscripts(annotatedPosition);\r\n        }\r\n\r\n        private void LoadPredictionCaches(ushort refIndex)\r\n        {\r\n            if (refIndex == _currentRefIndex) return;\r\n            if (refIndex == ushort.MaxValue)\r\n            {\r\n                ClearCache();\r\n                return;\r\n            }\r\n            _currentRefIndex = refIndex;\r\n        }\r\n\r\n        private void ClearCache()\r\n        {\r\n            _currentRefIndex = ushort.MaxValue;\r\n        }\r\n\r\n\r\n        private void AddTranscripts(IAnnotatedPosition annotatedPosition)\r\n        {\r\n            var overlappingTranscripts = _transcriptCache.GetOverlappingFlankingTranscripts(annotatedPosition.Position);\r\n\r\n            if (overlappingTranscripts == null)\r\n            {\r\n                // todo: handle intergenic variants\r\n                return;\r\n            }\r\n\r\n            foreach (var annotatedVariant in annotatedPosition.AnnotatedVariants)\r\n            {\r\n                var annotatedTranscripts = new List<IAnnotatedTranscript>();\r\n\r\n                PianoAnnotationUtils.GetAnnotatedTranscripts(annotatedVariant.Variant, overlappingTranscripts,\r\n                    _sequence, annotatedTranscripts);\r\n\r\n                if (annotatedTranscripts.Count == 0) continue;\r\n\r\n                foreach (var annotatedTranscript in annotatedTranscripts)\r\n                {\r\n                    if (annotatedTranscript.Transcript.Source == Source.Ensembl)\r\n                        annotatedVariant.EnsemblTranscripts.Add(annotatedTranscript);\r\n                    else annotatedVariant.RefSeqTranscripts.Add(annotatedTranscript);\r\n                }\r\n            }\r\n        }\r\n\r\n    }\r\n}"
  },
  {
    "path": "Sandbox/Piano/PianoAnnotationUtils.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing VariantAnnotation.Algorithms;\r\nusing VariantAnnotation.AnnotatedPositions.Transcript;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.Interface.Intervals;\r\nusing VariantAnnotation.Interface.Positions;\r\nusing VariantAnnotation.Interface.Sequence;\r\n\r\nnamespace Piano\r\n{\r\n    public static class PianoAnnotationUtils\r\n    {\r\n        private static readonly AminoAcids AminoAcidsProvider = new AminoAcids(false);\r\n        private static readonly AminoAcids MitoAminoAcidsProvider = new AminoAcids(true);\r\n\r\n        public static void GetAnnotatedTranscripts(IVariant variant, ITranscript[] transcriptCandidates,\r\n            ISequence compressedSequence, IList<IAnnotatedTranscript> annotatedTranscripts)\r\n        {\r\n            foreach (var transcript in transcriptCandidates)\r\n            {\r\n                if (transcript.Overlaps(variant) && !variant.Behavior.ReducedTranscriptAnnotation)\r\n                {\r\n                    var annotatedTranscript = GetAnnotatedTranscript(variant, compressedSequence, transcript);\r\n                    if (annotatedTranscript != null) annotatedTranscripts.Add(annotatedTranscript);\r\n                }\r\n\r\n\r\n            }\r\n\r\n        }\r\n\r\n\r\n        private static IAnnotatedTranscript GetAnnotatedTranscript(IVariant variant, ISequence compressedSequence, ITranscript transcript)\r\n        {\r\n            var acidsProvider = variant.Chromosome.UcscName == \"chrM\"\r\n                        ? MitoAminoAcidsProvider\r\n                        : AminoAcidsProvider;\r\n            var annotatedTranscript =\r\n                PianoTranscriptAnnotator.GetAnnotatedTranscript(transcript, variant, compressedSequence,acidsProvider);\r\n\r\n            return annotatedTranscript;\r\n        }\r\n\r\n\r\n       \r\n    }\r\n}"
  },
  {
    "path": "Sandbox/Piano/PianoAnnotator.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.Linq;\r\nusing ErrorHandling.Exceptions;\r\nusing VariantAnnotation.AnnotatedPositions;\r\nusing VariantAnnotation.Interface;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.Interface.GeneAnnotation;\r\nusing VariantAnnotation.Interface.Positions;\r\nusing VariantAnnotation.Interface.Providers;\r\nusing VariantAnnotation.Interface.Sequence;\r\n\r\nnamespace Piano\r\n{\r\n    public class PianoAnnotator:IAnnotator\r\n    {\r\n\r\n        private readonly IAnnotationProvider _taProvider;\r\n        private readonly ISequenceProvider _sequenceProvider;\r\n        private bool _annotateMito;\r\n        public GenomeAssembly GenomeAssembly { get; }\r\n\r\n        public PianoAnnotator(IAnnotationProvider taProvider, ISequenceProvider sequenceProvider)\r\n        {\r\n            _taProvider = taProvider;\r\n            _sequenceProvider = sequenceProvider;\r\n\r\n            GenomeAssembly = GetGenomeAssembly();\r\n\r\n        }\r\n\r\n        private GenomeAssembly GetGenomeAssembly()\r\n        {\r\n            var assemblies = new Dictionary<GenomeAssembly, string>();\r\n            if (_taProvider != null) assemblies[_taProvider.GenomeAssembly] = _taProvider.Name;\r\n            if (_sequenceProvider != null) assemblies[_sequenceProvider.GenomeAssembly] = _sequenceProvider.Name;\r\n\r\n            if (assemblies.Count == 0) return GenomeAssembly.Unknown;\r\n            if (assemblies.Count == 1) return assemblies.First().Key;\r\n            foreach (var assembly in assemblies)\r\n            {\r\n                Console.WriteLine($\"{assembly.Value} has genome assembly {assembly.Key}\");\r\n            }\r\n            throw new InconsistantGenomeAssemblyException();\r\n        }\r\n\r\n       \r\n        public IAnnotatedPosition Annotate(IPosition position)\r\n        {\r\n            if (position == null) return null;\r\n\r\n            var annotatedVariants = GetAnnotatedVariants(position.Variants);\r\n            var annotatedPosition = new AnnotatedPosition(position, annotatedVariants);\r\n\r\n            if (annotatedPosition.AnnotatedVariants == null\r\n                || annotatedPosition.AnnotatedVariants.Length == 0\r\n                || position.Chromosome.UcscName == \"chrM\" && !_annotateMito\r\n            ) return annotatedPosition;\r\n\r\n            _sequenceProvider?.Annotate(annotatedPosition);\r\n\r\n            _taProvider.Annotate(annotatedPosition);\r\n\r\n            return annotatedPosition;\r\n        }\r\n        private static IAnnotatedVariant[] GetAnnotatedVariants(IVariant[] variants)\r\n        {\r\n            if (variants?[0].Behavior == null) return null;\r\n\r\n            var numVariants = variants.Length;\r\n            var annotatedVariants = new IAnnotatedVariant[numVariants];\r\n            for (var i = 0; i < numVariants; i++) annotatedVariants[i] = new AnnotatedVariant(variants[i]);\r\n\r\n            return annotatedVariants;\r\n        }\r\n        public IList<IAnnotatedGene> GetAnnotatedGenes()\r\n        {\r\n            return null;\r\n        }\r\n\r\n        public void EnableMitochondrialAnnotation()\r\n        {\r\n            _annotateMito = true;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "Sandbox/Piano/PianoTranscriptAnnotator.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing VariantAnnotation.AnnotatedPositions;\r\nusing VariantAnnotation.AnnotatedPositions.Consequence;\r\nusing VariantAnnotation.AnnotatedPositions.Transcript;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.Interface.Intervals;\r\nusing VariantAnnotation.Interface.Positions;\r\nusing VariantAnnotation.Interface.Sequence;\r\n\r\nnamespace Piano\r\n{\r\n    public static class PianoTranscriptAnnotator\r\n    {\r\n        private const int FlankingAminoAcidLength = 15;\r\n        public static IAnnotatedTranscript GetAnnotatedTranscript(ITranscript transcript, IVariant variant, ISequence refSequence,\r\n            AminoAcids aminoAcidsProvider)\r\n        {\r\n\r\n            var mappedPositions = MappedPositionsUtils.ComputeMappedPositions(variant.Start, variant.End, transcript);\r\n\r\n            var transcriptRefAllele = HgvsUtilities.GetTranscriptAllele(variant.RefAllele, transcript.Gene.OnReverseStrand);\r\n            var transcriptAltAllele = HgvsUtilities.GetTranscriptAllele(variant.AltAllele, transcript.Gene.OnReverseStrand);\r\n\r\n            var codonsAndAminoAcids = GetCodonsAndAminoAcids(transcript, refSequence, transcriptRefAllele, transcriptAltAllele, variant, mappedPositions, aminoAcidsProvider);\r\n\r\n            var referenceCodons = codonsAndAminoAcids.Item1;\r\n            var alternateCodons = codonsAndAminoAcids.Item2;\r\n            var referenceAminoAcids = codonsAndAminoAcids.Item3;\r\n            var alternateAminoAcids = codonsAndAminoAcids.Item4;\r\n\r\n\r\n            var insertionInStartCodonAndNoimpact = variant.Type == VariantType.insertion &&\r\n                                                   mappedPositions.ProteinInterval.Start <= 1 &&\r\n                                                   alternateAminoAcids.EndsWith(referenceAminoAcids);\r\n\r\n            var variantEffect = GetVariantEffect(transcript, variant, mappedPositions, referenceAminoAcids,\r\n                alternateAminoAcids, referenceCodons, alternateCodons, insertionInStartCodonAndNoimpact);\r\n\r\n\r\n            var consequences = GetConsequences(transcript, variant, variantEffect);\r\n\r\n            var proteinBegin = mappedPositions.ProteinInterval.Start == null\r\n                ? -1\r\n                : mappedPositions.ProteinInterval.Start.Value;\r\n\r\n            var proteinEnd = mappedPositions.ProteinInterval.End == null\r\n                ? -1\r\n                : mappedPositions.ProteinInterval.End.Value;\r\n\r\n            var upStreamAminoAcids = GetFlankingPeptides(transcript.Translation?.PeptideSeq, proteinBegin, proteinEnd, FlankingAminoAcidLength, true);\r\n            var downStreamAminoAcids = consequences.Contains(ConsequenceTag.frameshift_variant)? null: GetFlankingPeptides(transcript.Translation?.PeptideSeq, proteinBegin, proteinEnd, FlankingAminoAcidLength, false);\r\n\r\n            return new PianoAnnotatedTranscript(transcript,referenceAminoAcids, alternateAminoAcids, mappedPositions,upStreamAminoAcids,downStreamAminoAcids,consequences);\r\n        }\r\n\r\n        private static string GetFlankingPeptides(string peptideSeq, int proteinBegin,int proteinEnd, int nBase, bool upStrem)\r\n        {\r\n            if (peptideSeq == null) return null;\r\n            if (proteinBegin == -1 && proteinEnd == -1) return null;\r\n            if (proteinBegin == -1) proteinBegin = proteinEnd;\r\n            if (proteinEnd == -1) proteinEnd = proteinBegin;\r\n\r\n            if (upStrem)\r\n            {\r\n                var peptideStart = Math.Max(1, proteinBegin - nBase);\r\n                return peptideSeq.Substring(peptideStart - 1, (proteinBegin - peptideStart));\r\n            }\r\n\r\n            var peptideEnd = Math.Min(peptideSeq.Length, proteinEnd + nBase);\r\n            return peptideEnd > proteinEnd + 1 ? peptideSeq.Substring(proteinEnd, (peptideEnd - proteinEnd)) : \"\";\r\n        }\r\n\r\n\r\n        private static Tuple<string, string, string, string> GetCodonsAndAminoAcids(ITranscript transcript, ISequence refSequence,\r\n            string transcriptRefAllele, string transcriptAltAllele, ISimpleVariant variant,\r\n            IMappedPositions mappedPositions, AminoAcids aminoAcidsProvider)\r\n        {\r\n            var codingSequence = transcript.Translation == null\r\n                ? null\r\n                : new CodingSequence(refSequence, transcript.Translation.CodingRegion.Start,\r\n                    transcript.Translation.CodingRegion.End, transcript.CdnaMaps, transcript.Gene.OnReverseStrand,\r\n                    transcript.StartExonPhase);\r\n\r\n            // compute codons and amino acids\r\n            AssignCodonsAndAminoAcids(transcriptRefAllele, transcriptAltAllele, mappedPositions,\r\n                codingSequence, aminoAcidsProvider, out string referenceCodons,\r\n                out string alternateCodons, out string referenceAminoAcids, out string alternateAminoAcids);\r\n\r\n            return Tuple.Create(referenceCodons ?? \"\", alternateCodons ?? \"\", referenceAminoAcids ?? \"\",\r\n                alternateAminoAcids ?? \"\");\r\n        }\r\n\r\n        private static void AssignCodonsAndAminoAcids(string transcriptRefAllele, string transcriptAltAllele,\r\n            IMappedPositions mappedPositions, ISequence codingSequence, AminoAcids aminoAcidProvier, out string refCodons,\r\n            out string altCodons, out string refAminoAcids, out string altAminoAcids)\r\n        {\r\n            AssignExtended(transcriptRefAllele, transcriptAltAllele, mappedPositions.CdsInterval,\r\n                mappedPositions.ProteinInterval, codingSequence, out refCodons, out altCodons);\r\n\r\n\r\n            aminoAcidProvier.Assign(refCodons, altCodons, out refAminoAcids, out altAminoAcids);\r\n        }\r\n\r\n        private static List<ConsequenceTag> GetConsequences(ITranscript transcript, IVariant variant, VariantEffect variantEffect)\r\n        {\r\n            var featureEffect = new FeatureVariantEffects(transcript, variant.Type, variant.Start, variant.End,\r\n                variant.Behavior.StructuralVariantConsequence);\r\n\r\n            var consequence = new Consequences(variantEffect, featureEffect);\r\n            consequence.DetermineSmallVariantEffects();\r\n            return consequence.GetConsequences();\r\n        }\r\n\r\n        private static VariantEffect GetVariantEffect(ITranscript transcript, ISimpleVariant variant, IMappedPositions mappedPositions, string refAminoAcids, string altAminoAcids, string refCodons, string altCodons, bool insertionInStartAndNoImpact)\r\n        {\r\n            var positionalEffect = new TranscriptPositionalEffect();\r\n            positionalEffect.DetermineIntronicEffect(transcript.Introns, variant, variant.Type);\r\n            positionalEffect.DetermineExonicEffect(transcript, variant, mappedPositions, variant.AltAllele, insertionInStartAndNoImpact);\r\n\r\n            var variantEffect = new VariantEffect(positionalEffect, variant, transcript, refAminoAcids,\r\n                altAminoAcids,\r\n                refCodons, altCodons, mappedPositions.ProteinInterval.Start);\r\n            return variantEffect;\r\n        }\r\n\r\n        private static void AssignExtended(string transcriptReferenceAllele, string transcriptAlternateAllele,\r\n            NullableInterval cdsInterval, NullableInterval proteinInterval, ISequence codingSequence, out string refCodons, out string altCodons)\r\n        {\r\n            refCodons = null;\r\n            altCodons = null;\r\n\r\n            if (cdsInterval.Start == null || cdsInterval.End == null || proteinInterval.Start == null ||\r\n                proteinInterval.End == null) return;\r\n\r\n            int aminoAcidStart = proteinInterval.Start.Value * 3 - 2;\r\n            int aminoAcidEnd = proteinInterval.End.Value * 3;\r\n\r\n            int prefixLen = cdsInterval.Start.Value - aminoAcidStart;\r\n            int suffixLen = aminoAcidEnd - cdsInterval.End.Value;\r\n\r\n            int start1 = aminoAcidStart - 1;\r\n            int start2 = aminoAcidEnd - suffixLen;\r\n\r\n            int maxSuffixLen = codingSequence.Length - start2;\r\n\r\n            var atTailEnd = false;\r\n            if (suffixLen > maxSuffixLen)\r\n            {\r\n                suffixLen = maxSuffixLen;\r\n                atTailEnd = true;\r\n            }\r\n\r\n            if (suffixLen > maxSuffixLen) suffixLen = maxSuffixLen;\r\n\r\n            string prefix = start1 + prefixLen < codingSequence.Length\r\n                ? codingSequence.Substring(start1, prefixLen).ToLower()\r\n                : \"AAA\";\r\n\r\n            string suffix = suffixLen > 0\r\n                ? codingSequence.Substring(start2, suffixLen).ToLower()\r\n                : \"\";\r\n\r\n            var needExtend = !atTailEnd && !Codons.IsTriplet(prefixLen + suffixLen + transcriptAlternateAllele.Length);\r\n            var extendedLen = (maxSuffixLen - suffixLen) > 45 ? 45 : (maxSuffixLen - suffixLen) / 3 * 3;\r\n            if (needExtend) suffix = codingSequence.Substring(start2, suffixLen + extendedLen);\r\n\r\n\r\n            refCodons = Codons.GetCodon(transcriptReferenceAllele, prefix, suffix);\r\n            altCodons = Codons.GetCodon(transcriptAlternateAllele, prefix, suffix);\r\n        }\r\n\r\n\r\n    }\r\n}"
  },
  {
    "path": "Sandbox/Piano/ProviderUtilities.cs",
    "content": "﻿using VariantAnnotation;\r\nusing VariantAnnotation.Interface;\r\nusing VariantAnnotation.Interface.GeneAnnotation;\r\nusing VariantAnnotation.Interface.Providers;\r\nusing VariantAnnotation.Providers;\r\nusing VariantAnnotation.Utilities;\r\n\r\nnamespace Piano\r\n{\r\n    public static class ProviderUtilities\r\n    {\r\n        public static ISequenceProvider GetSequenceProvider(string compressedReferencePath)\r\n        {\r\n            return new ReferenceSequenceProvider(FileUtilities.GetReadStream(compressedReferencePath));\r\n        }\r\n\r\n        public static IAnnotationProvider GetTranscriptAnnotationProvider(string path, ISequenceProvider sequenceProvider)\r\n        {\r\n            return new PianoAnnotationProvider(path, sequenceProvider);\r\n        }\r\n\r\n        public static IAnnotator GetAnnotator(IAnnotationProvider taProvider, ISequenceProvider sequenceProvider)\r\n        {\r\n            return new PianoAnnotator(taProvider, sequenceProvider);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "Sandbox/Sandbox.sln",
    "content": "﻿\r\nMicrosoft Visual Studio Solution File, Format Version 12.00\r\n# Visual Studio 15\r\nVisualStudioVersion = 15.0.26730.16\r\nMinimumVisualStudioVersion = 10.0.40219.1\r\nProject(\"{9A19103F-16F7-4668-BE54-9A1E7A4F7556}\") = \"VariantAnnotation.Interface\", \"..\\VariantAnnotation.Interface\\VariantAnnotation.Interface.csproj\", \"{3D09B50F-73B4-4021-B5F0-2100574BD54B}\"\r\nEndProject\r\nProject(\"{9A19103F-16F7-4668-BE54-9A1E7A4F7556}\") = \"ErrorHandling\", \"..\\ErrorHandling\\ErrorHandling.csproj\", \"{2B6B916D-B9DD-4156-A486-F2835C1EE992}\"\r\nEndProject\r\nProject(\"{9A19103F-16F7-4668-BE54-9A1E7A4F7556}\") = \"VariantAnnotation\", \"..\\VariantAnnotation\\VariantAnnotation.csproj\", \"{7030787E-D41A-4397-9472-40D79EEB3DD2}\"\r\nEndProject\r\nProject(\"{9A19103F-16F7-4668-BE54-9A1E7A4F7556}\") = \"SAUtils\", \"..\\SAUtils\\SAUtils.csproj\", \"{DDAF33F9-5925-4689-B438-D339A49E52CD}\"\r\nEndProject\r\nProject(\"{9A19103F-16F7-4668-BE54-9A1E7A4F7556}\") = \"CommandLine\", \"..\\CommandLine\\CommandLine.csproj\", \"{34A0FF3C-8E65-4378-B2FE-41E661C0B7B6}\"\r\nEndProject\r\nProject(\"{9A19103F-16F7-4668-BE54-9A1E7A4F7556}\") = \"CommonUtilities\", \"..\\CommonUtilities\\CommonUtilities.csproj\", \"{44D54D5C-E5E8-4622-9701-6B26C9A831A1}\"\r\nEndProject\r\nProject(\"{9A19103F-16F7-4668-BE54-9A1E7A4F7556}\") = \"Compression\", \"..\\Compression\\Compression.csproj\", \"{82F0CE32-D465-4E7B-91CA-A4B67763F433}\"\r\nEndProject\r\nProject(\"{9A19103F-16F7-4668-BE54-9A1E7A4F7556}\") = \"Vcf\", \"..\\Vcf\\Vcf.csproj\", \"{BCB37D8F-9B8A-4846-B441-6975CF67BADF}\"\r\nEndProject\r\nProject(\"{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}\") = \"AminoAcidAligner\", \"AminoAcidAligner\\AminoAcidAligner.csproj\", \"{793E5969-C6A0-4072-9D6B-4878AA79C917}\"\r\nEndProject\r\nProject(\"{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}\") = \"IO\", \"..\\IO\\IO.csproj\", \"{5A95D583-1B37-4AE9-BC38-FDCCCB3183CE}\"\r\nEndProject\r\nGlobal\r\n\tGlobalSection(SolutionConfigurationPlatforms) = preSolution\r\n\t\tDebug|Any CPU = Debug|Any CPU\r\n\t\tRelease|Any CPU = Release|Any CPU\r\n\tEndGlobalSection\r\n\tGlobalSection(ProjectConfigurationPlatforms) = postSolution\r\n\t\t{E435C06D-762A-4BB8-9EF8-B75D02812737}.Debug|Any CPU.ActiveCfg = Debug|Any CPU\r\n\t\t{E435C06D-762A-4BB8-9EF8-B75D02812737}.Debug|Any CPU.Build.0 = Debug|Any CPU\r\n\t\t{E435C06D-762A-4BB8-9EF8-B75D02812737}.Release|Any CPU.ActiveCfg = Release|Any CPU\r\n\t\t{E435C06D-762A-4BB8-9EF8-B75D02812737}.Release|Any CPU.Build.0 = Release|Any CPU\r\n\t\t{D71384D9-A24C-4F7A-BE25-AEA088C36E7B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU\r\n\t\t{D71384D9-A24C-4F7A-BE25-AEA088C36E7B}.Debug|Any CPU.Build.0 = Debug|Any CPU\r\n\t\t{D71384D9-A24C-4F7A-BE25-AEA088C36E7B}.Release|Any CPU.ActiveCfg = Release|Any CPU\r\n\t\t{D71384D9-A24C-4F7A-BE25-AEA088C36E7B}.Release|Any CPU.Build.0 = Release|Any CPU\r\n\t\t{3D09B50F-73B4-4021-B5F0-2100574BD54B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU\r\n\t\t{3D09B50F-73B4-4021-B5F0-2100574BD54B}.Debug|Any CPU.Build.0 = Debug|Any CPU\r\n\t\t{3D09B50F-73B4-4021-B5F0-2100574BD54B}.Release|Any CPU.ActiveCfg = Release|Any CPU\r\n\t\t{3D09B50F-73B4-4021-B5F0-2100574BD54B}.Release|Any CPU.Build.0 = Release|Any CPU\r\n\t\t{2B6B916D-B9DD-4156-A486-F2835C1EE992}.Debug|Any CPU.ActiveCfg = Debug|Any CPU\r\n\t\t{2B6B916D-B9DD-4156-A486-F2835C1EE992}.Debug|Any CPU.Build.0 = Debug|Any CPU\r\n\t\t{2B6B916D-B9DD-4156-A486-F2835C1EE992}.Release|Any CPU.ActiveCfg = Release|Any CPU\r\n\t\t{2B6B916D-B9DD-4156-A486-F2835C1EE992}.Release|Any CPU.Build.0 = Release|Any CPU\r\n\t\t{7030787E-D41A-4397-9472-40D79EEB3DD2}.Debug|Any CPU.ActiveCfg = Debug|Any CPU\r\n\t\t{7030787E-D41A-4397-9472-40D79EEB3DD2}.Debug|Any CPU.Build.0 = Debug|Any CPU\r\n\t\t{7030787E-D41A-4397-9472-40D79EEB3DD2}.Release|Any CPU.ActiveCfg = Release|Any CPU\r\n\t\t{7030787E-D41A-4397-9472-40D79EEB3DD2}.Release|Any CPU.Build.0 = Release|Any CPU\r\n\t\t{DDAF33F9-5925-4689-B438-D339A49E52CD}.Debug|Any CPU.ActiveCfg = Debug|Any CPU\r\n\t\t{DDAF33F9-5925-4689-B438-D339A49E52CD}.Debug|Any CPU.Build.0 = Debug|Any CPU\r\n\t\t{DDAF33F9-5925-4689-B438-D339A49E52CD}.Release|Any CPU.ActiveCfg = Release|Any CPU\r\n\t\t{DDAF33F9-5925-4689-B438-D339A49E52CD}.Release|Any CPU.Build.0 = Release|Any CPU\r\n\t\t{34A0FF3C-8E65-4378-B2FE-41E661C0B7B6}.Debug|Any CPU.ActiveCfg = Debug|Any CPU\r\n\t\t{34A0FF3C-8E65-4378-B2FE-41E661C0B7B6}.Debug|Any CPU.Build.0 = Debug|Any CPU\r\n\t\t{34A0FF3C-8E65-4378-B2FE-41E661C0B7B6}.Release|Any CPU.ActiveCfg = Release|Any CPU\r\n\t\t{34A0FF3C-8E65-4378-B2FE-41E661C0B7B6}.Release|Any CPU.Build.0 = Release|Any CPU\r\n\t\t{44D54D5C-E5E8-4622-9701-6B26C9A831A1}.Debug|Any CPU.ActiveCfg = Debug|Any CPU\r\n\t\t{44D54D5C-E5E8-4622-9701-6B26C9A831A1}.Debug|Any CPU.Build.0 = Debug|Any CPU\r\n\t\t{44D54D5C-E5E8-4622-9701-6B26C9A831A1}.Release|Any CPU.ActiveCfg = Release|Any CPU\r\n\t\t{44D54D5C-E5E8-4622-9701-6B26C9A831A1}.Release|Any CPU.Build.0 = Release|Any CPU\r\n\t\t{82F0CE32-D465-4E7B-91CA-A4B67763F433}.Debug|Any CPU.ActiveCfg = Debug|Any CPU\r\n\t\t{82F0CE32-D465-4E7B-91CA-A4B67763F433}.Debug|Any CPU.Build.0 = Debug|Any CPU\r\n\t\t{82F0CE32-D465-4E7B-91CA-A4B67763F433}.Release|Any CPU.ActiveCfg = Release|Any CPU\r\n\t\t{82F0CE32-D465-4E7B-91CA-A4B67763F433}.Release|Any CPU.Build.0 = Release|Any CPU\r\n\t\t{BCB37D8F-9B8A-4846-B441-6975CF67BADF}.Debug|Any CPU.ActiveCfg = Debug|Any CPU\r\n\t\t{BCB37D8F-9B8A-4846-B441-6975CF67BADF}.Debug|Any CPU.Build.0 = Debug|Any CPU\r\n\t\t{BCB37D8F-9B8A-4846-B441-6975CF67BADF}.Release|Any CPU.ActiveCfg = Release|Any CPU\r\n\t\t{BCB37D8F-9B8A-4846-B441-6975CF67BADF}.Release|Any CPU.Build.0 = Release|Any CPU\r\n\t\t{793E5969-C6A0-4072-9D6B-4878AA79C917}.Debug|Any CPU.ActiveCfg = Debug|Any CPU\r\n\t\t{793E5969-C6A0-4072-9D6B-4878AA79C917}.Debug|Any CPU.Build.0 = Debug|Any CPU\r\n\t\t{793E5969-C6A0-4072-9D6B-4878AA79C917}.Release|Any CPU.ActiveCfg = Release|Any CPU\r\n\t\t{793E5969-C6A0-4072-9D6B-4878AA79C917}.Release|Any CPU.Build.0 = Release|Any CPU\r\n\t\t{5A95D583-1B37-4AE9-BC38-FDCCCB3183CE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU\r\n\t\t{5A95D583-1B37-4AE9-BC38-FDCCCB3183CE}.Debug|Any CPU.Build.0 = Debug|Any CPU\r\n\t\t{5A95D583-1B37-4AE9-BC38-FDCCCB3183CE}.Release|Any CPU.ActiveCfg = Release|Any CPU\r\n\t\t{5A95D583-1B37-4AE9-BC38-FDCCCB3183CE}.Release|Any CPU.Build.0 = Release|Any CPU\r\n\tEndGlobalSection\r\n\tGlobalSection(SolutionProperties) = preSolution\r\n\t\tHideSolutionNode = FALSE\r\n\tEndGlobalSection\r\n\tGlobalSection(ExtensibilityGlobals) = postSolution\r\n\t\tSolutionGuid = {46D55784-DE62-489B-A761-1BF0A66DD2C1}\r\n\tEndGlobalSection\r\nEndGlobal\r\n"
  },
  {
    "path": "Sandbox/Sandbox.sln.DotSettings",
    "content": "﻿<wpf:ResourceDictionary xml:space=\"preserve\" xmlns:x=\"http://schemas.microsoft.com/winfx/2006/xaml\" xmlns:s=\"clr-namespace:System;assembly=mscorlib\" xmlns:ss=\"urn:shemas-jetbrains-com:settings-storage-xaml\" xmlns:wpf=\"http://schemas.microsoft.com/winfx/2006/xaml/presentation\">\n\t<s:String x:Key=\"/Default/CodeInspection/Highlighting/AnalysisEnabled/@EntryValue\">SOLUTION</s:String></wpf:ResourceDictionary>"
  },
  {
    "path": "Sandbox/Scripts/ConvertCacheMatrix.pl",
    "content": "#!/usr/bin/perl\n\nuse File::Find;\nuse Data::Dumper;\nuse Storable qw(fd_retrieve dclone);\nuse Compress::Zlib;\nuse MIME::Base64;\n\nuse strict;\r\n\n$Data::Dumper::Sortkeys = 1; # Sort the keys in the output\n$Data::Dumper::Deepcopy = 1; # Enable deep copies of structures\n\nmy @transcriptFiles = ();\nmy @regulatoryFiles = ();\n\nmy $numArgs = @ARGV;\n\nif($numArgs != 1) {\n\tprint \"USAGE: ConvertCacheMatrix.pl <input VEP directory>\\n\";\n\texit 1;\n}\n\nmy ($srcDir) = @ARGV;\n\nif(! -d $srcDir) {\n\tprint \"ERROR: The directory ($srcDir) does not exist.\\n\";\n\texit 1;\n}\n\nfind(\\&wanted, $srcDir);\n\nforeach my $transcriptPath (@transcriptFiles) {\n\n\tprint \"- Dumping $transcriptPath.\\n\";\n\t\n    open my $fh, \"zcat \".$transcriptPath.\" |\";\n    my $cache;\n    $cache = fd_retrieve($fh);\n    close $fh;\n\t\n\tmy $outputCache = dclone($cache);\n\t\n\tmy $newPath = $transcriptPath;\n\t$newPath =~ s/\\.gz$/_transcripts_data_dumper.txt.gz/g;\n\n\t# loop through each reference sequence\n\tforeach my $refSeq (keys %{$cache}) {\n\n\t\tprint \"refSeq: $refSeq\\n\";\n\n\t\t# loop through each transcript\n\t\tmy $numTranscripts = scalar @{$cache->{$refSeq}};\n\t\tprint \"# transcripts: $numTranscripts\\n\";\n\t\t\n\t\tfor(my $transcriptIndex = 0; $transcriptIndex < $numTranscripts; $transcriptIndex++) {\n\t\t\n\t\t\tprint \"- evaluating transcript \".($transcriptIndex + 1).\"... \";\n\t\t\n\t\t\t# evaluate the SIFT entry\n\t\t\tmy $sift = $cache->{$refSeq}[$transcriptIndex]->{'_variation_effect_feature_cache'}->{'protein_function_predictions'}->{'sift'}->{'matrix'};\n\t\t\t\n\t\t\tif(defined($sift)) {\n\t\t\t\tmy $dest = Compress::Zlib::memGunzip($sift) \n\t\t\t\t\tor die \"Cannot uncompress SIFT matrix: $gzerrno\";\n\t\t\t\t\n\t\t\t\t$outputCache->{$refSeq}[$transcriptIndex]->{'_variation_effect_feature_cache'}->{'protein_function_predictions'}->{'sift'}->{'matrix'} = encode_base64($dest, \"\");\n\t\t\t}\n\t\t\t\n\t\t\t# evaluate the PolyPhen humvar entry\n\t\t\tmy $polyphen = $cache->{$refSeq}[$transcriptIndex]->{'_variation_effect_feature_cache'}->{'protein_function_predictions'}->{'polyphen_humvar'}->{'matrix'};\n\t\t\t\n\t\t\tif(defined($polyphen)) {\n\t\t\t\tmy $dest = Compress::Zlib::memGunzip($polyphen) \n\t\t\t\t\tor die \"Cannot uncompress PolyPhen matrix: $gzerrno\";\n\t\t\t\t\n\t\t\t\t$outputCache->{$refSeq}[$transcriptIndex]->{'_variation_effect_feature_cache'}->{'protein_function_predictions'}->{'polyphen_humvar'}->{'matrix'} = encode_base64($dest, \"\");\n\t\t\t}\n\t\t\t\n\t\t\t# evaluate the PolyPhen humdiv entry\n\t\t\tmy $polyphenDiv = $cache->{$refSeq}[$transcriptIndex]->{'_variation_effect_feature_cache'}->{'protein_function_predictions'}->{'polyphen_humdiv'}->{'matrix'};\n\t\t\t\n\t\t\tif(defined($polyphenDiv)) {\n\t\t\t\tmy $dest = Compress::Zlib::memGunzip($polyphenDiv) \n\t\t\t\t\tor die \"Cannot uncompress PolyPhen humdiv matrix: $gzerrno\";\n\t\t\t\t\n\t\t\t\t$outputCache->{$refSeq}[$transcriptIndex]->{'_variation_effect_feature_cache'}->{'protein_function_predictions'}->{'polyphen_humdiv'}->{'matrix'} = encode_base64($dest, \"\");\n\t\t\t}\n\t\t\t\n\t\t\tprint \"finished.\\n\";\n\t\t}\n\t}\n\t\n\topen (my $MPS, \"| /bin/gzip -9 -c > $newPath\") or die \"error starting gzip $!\";\n\tprint $MPS Dumper($outputCache);\n\tclose $MPS;\n}\n\nforeach my $regulatoryPath (@regulatoryFiles) {\n\n\tprint \"- Dumping $regulatoryPath.\\n\";\n\t\n    open my $fh, \"zcat \".$regulatoryPath.\" |\";\n    my $cache;\n    $cache = fd_retrieve($fh);\n   close $fh;\n \t\n\tmy $newPath = $regulatoryPath;\n\t$newPath =~ s/\\.gz$/_regulatory_regions_data_dumper.txt.gz/g;\n\t\n\topen (my $MPS, \"| /bin/gzip -9 -c > $newPath\") or die \"error starting gzip $!\";\n\tprint $MPS Dumper($cache);\n\tclose $MPS;\n}\n\n# ========================================\n\nsub wanted {\n\n\tmy $filePath = $File::Find::name;\n\t\n\tif($filePath =~ /data_dumper/) { return; }\n\t\n\tif($filePath =~ /_reg.gz$/) { \n\t\tpush(@regulatoryFiles, $filePath) if -f $filePath;\n\t\treturn;\n\t}\n\t\n\tif($filePath =~ /_var.gz$/) { return; }\n\n\tif($filePath =~ /.gz$/) {\n\t\tpush(@transcriptFiles, $filePath) if -f $filePath;\n\t\treturn; \n\t}\n}\n"
  },
  {
    "path": "Sandbox/Scripts/StressTestUnitTests.ps1",
    "content": "# ================\n# global variables\n# ================\n\n# =========\n# main loop\n# =========\n\ncd D:\\Projects\\NirvanaDevelopment\\UnitTests\ndotnet build\n$loopCount = 1\n\ndo {\n\tWrite-Host\n\tWrite-Host \"********************************\"\n\tWrite-Host \"*** current loop: $loopCount\"\n\tWrite-Host \"********************************\"\n\tWrite-Host\n\n\t$loopCount++\n\tiex \"dotnet test --no-build\"\n\tWrite-Host \"last exit code: $lastExitCode or $?\"\n} while ($LastExitCode -eq 0)\n"
  },
  {
    "path": "Sandbox/Scripts/UpdateCacheFiles.ps1",
    "content": "# Configuration\n\n$UnfilteredRefSeq72Path = \"E:\\Data\\Nirvana\\Cache\\Test\\RefSeq\\72\\chr1.ndb\"\n$Ensembl72Chr1Path = \"E:\\Data\\Nirvana\\Cache\\12\\Ensembl\\72\\chr1.ndb\"\n$Ensembl79Chr1Path = \"E:\\Data\\Nirvana\\Cache\\12\\Ensembl\\79\\chr1.ndb\"\n$Ensembl72Chr3Path = \"E:\\Data\\Nirvana\\Cache\\12\\Ensembl\\72\\chr3.ndb\"\n$Ensembl72Chr4Path = \"E:\\Data\\Nirvana\\Cache\\12\\Ensembl\\72\\chr4.ndb\"\n$Ensembl72Chr7Path = \"E:\\Data\\Nirvana\\Cache\\12\\Ensembl\\72\\chr7.ndb\"\n$Ensembl72Chr10Path = \"E:\\Data\\Nirvana\\Cache\\12\\Ensembl\\72\\chr10.ndb\"\n$Ensembl72Chr15Path = \"E:\\Data\\Nirvana\\Cache\\12\\Ensembl\\72\\chr15.ndb\"\n$Ensembl72Chr17Path = \"E:\\Data\\Nirvana\\Cache\\12\\Ensembl\\72\\chr17.ndb\"\n$OutputDir = \"D:\\Projects\\Nirvana\\NirvanaUnitTests\\Resources\\Caches\"\n\n$ExtractTranscriptsBin = \"d:\\Projects\\Nirvana\\Sandbox\\x64\\Release\\ExtractTranscripts.exe\"\n$ExtractRegulatoryFeaturesBin = \"d:\\Projects\\Nirvana\\Sandbox\\x64\\Release\\ExtractRegulatoryFeatures.exe\"\n\n# =======================================\n# extract the Ensembl regulatory features\n# =======================================\n\n$Ensembl72RegulatoryFeatures = @(\"ENSR00000079256\")\n\nForEach ($regFeature in $Ensembl72RegulatoryFeatures) {\n\t$outputPath = \"$($OutputDir)\\$($regFeature)_Ensembl72.ndb\"\n\t& $ExtractRegulatoryFeaturesBin -i $Ensembl72Chr1Path -o $outputPath -r $regFeature\n}\n\n$Ensembl79RegulatoryFeatures = @(\"ENSR00001584270\")\n\nForEach ($regFeature in $Ensembl79RegulatoryFeatures) {\n\t$outputPath = \"$($OutputDir)\\$($regFeature)_Ensembl79.ndb\"\n\t& $ExtractRegulatoryFeaturesBin -i $Ensembl79Chr1Path -o $outputPath -r $regFeature\n}\n\n# ==============================\n# extract the RefSeq transcripts\n# ==============================\n\n$RefSeqTranscripts = @(\"CCDS30708.1\", \"CCDS58003.1\", \"CCDS877.1\", \"ENSESTT00000006045\", \"ENSESTT00000008349\", \"ENSESTT00000011387\", \"ENSESTT00000011417\", \"ENSESTT00000012399\", \"ENSESTT00000034529\", \"ENSESTT00000034591\", \"ENSESTT00000034721\", \"ENSESTT00000034761\", \"ENSESTT00000051657\", \"ENSESTT00000056515\", \"ENSESTT00000058286\", \"ENSESTT00000064454\", \"ENSESTT00000064869\", \"ENSESTT00000079558\", \"ENSESTT00000082723\", \"ENSESTT00000082768\", \"ENSESTT00000083199\", \"ENSESTT00000083507\", \"ENSESTT00000085167\", \"ENSESTT00000086709\", \"NM_000644.2\", \"NM_001258340.1\", \"NM_002524.4\", \"NM_007158.5\", \"NM_024011.2\", \"NM_152665.2\", \"NM_176877.2\", \"NM_178221.2\", \"NR_024321.1\", \"NR_026752.1\", \"NR_027120.1\", \"NR_034014.1\", \"NR_034015.1\", \"NR_039983.2\", \"NR_046018.2\", \"XM_003846383.1\", \"NM_001080484.1\")\n\nForEach ($transcript in $RefSeqTranscripts) {\n\t$outputPath = \"$($OutputDir)\\$($transcript)_RefSeq72.ndb\"\n\t& $ExtractTranscriptsBin -i $UnfilteredRefSeq72Path -o $outputPath -t $transcript\n}\n\n# handle vcf entries\n& $ExtractTranscriptsBin -i $UnfilteredRefSeq72Path -o \"$($OutputDir)\\chr1_115256529_RefSeq72.ndb\" -v \"chr1\\t115256529\\t.\\tT\\tA\\t.\\tPASS\\t.\\tGT:GQX:DP:DPF\\t0/0:99:34:2\"\n& $ExtractTranscriptsBin -i $UnfilteredRefSeq72Path -o \"$($OutputDir)\\chr1_59758869_RefSeq72.ndb\" -n chr1 -p 59758869 -r T -a G\n\n# ======================================\n# extract the Ensembl transcripts (chr1)\n# ======================================\n\n$EnsemblChr1Transcripts = @(\"ENST00000371614\", \"ENST00000255416\", \"ENST00000310991\", \"ENST00000327044\", \"ENST00000355439\", \"ENST00000368246\", \"ENST00000369535\", \"ENST00000374163\", \"ENST00000375759\", \"ENST00000378635\", \"ENST00000379407\", \"ENST00000487053\", \"ENST00000518655\", \"ENST00000391369\")\n\nForEach ($transcript in $EnsemblChr1Transcripts) {\n\t$outputPath = \"$($OutputDir)\\$($transcript)_Ensembl72.ndb\"\n\t& $ExtractTranscriptsBin -i $Ensembl72Chr1Path -o $outputPath -t $transcript\n}\n\n# ======================================\n# extract the Ensembl transcripts (chr3)\n# ======================================\n\n$EnsemblChr3Transcripts = @(\"ENST00000422325\")\n\nForEach ($transcript in $EnsemblChr3Transcripts) {\n\t$outputPath = \"$($OutputDir)\\$($transcript)_Ensembl72.ndb\"\n\t& $ExtractTranscriptsBin -i $Ensembl72Chr3Path -o $outputPath -t $transcript\n}\n\n# ======================================\n# extract the Ensembl transcripts (chr4)\n# ======================================\n\n$EnsemblChr4Transcripts = @(\"ENST00000288135\")\n\nForEach ($transcript in $EnsemblChr4Transcripts) {\n\t$outputPath = \"$($OutputDir)\\$($transcript)_Ensembl72.ndb\"\n\t& $ExtractTranscriptsBin -i $Ensembl72Chr4Path -o $outputPath -t $transcript\n}\n\n# ======================================\n# extract the Ensembl transcripts (chr7)\n# ======================================\n\n$EnsemblChr7Transcripts = @(\"ENST00000275493\")\n\nForEach ($transcript in $EnsemblChr7Transcripts) {\n\t$outputPath = \"$($OutputDir)\\$($transcript)_Ensembl72.ndb\"\n\t& $ExtractTranscriptsBin -i $Ensembl72Chr7Path -o $outputPath -t $transcript\n}\n\n# =======================================\n# extract the Ensembl transcripts (chr10)\n# =======================================\n\n$EnsemblChr10Transcripts = @(\"ENST00000348795\")\n\nForEach ($transcript in $EnsemblChr10Transcripts) {\n\t$outputPath = \"$($OutputDir)\\$($transcript)_Ensembl72.ndb\"\n\t& $ExtractTranscriptsBin -i $Ensembl72Chr10Path -o $outputPath -t $transcript\n}\n\n# =======================================\n# extract the Ensembl transcripts (chr15)\n# =======================================\n\n$EnsemblChr15Transcripts = @(\"ENST00000543887\")\n\nForEach ($transcript in $EnsemblChr15Transcripts) {\n\t$outputPath = \"$($OutputDir)\\$($transcript)_Ensembl72.ndb\"\n\t& $ExtractTranscriptsBin -i $Ensembl72Chr15Path -o $outputPath -t $transcript\n}\n\n# =======================================\n# extract the Ensembl transcripts (chr17)\n# =======================================\n\n$EnsemblChr17Transcripts = @(\"ENST00000269305\", \"ENST00000576171\")\n\nForEach ($transcript in $EnsemblChr17Transcripts) {\n\t$outputPath = \"$($OutputDir)\\$($transcript)_Ensembl72.ndb\"\n\t& $ExtractTranscriptsBin -i $Ensembl72Chr17Path -o $outputPath -t $transcript\n}"
  },
  {
    "path": "Sandbox/Scripts/UpdateMiniSaFiles.ps1",
    "content": "####################################################################\r\n# This program is used to update just the miniSA, CA, and CI files #\r\n####################################################################\r\n\r\n# ================\r\n# global variables\r\n# ================\r\n\r\n$NirvanaRootDir=\"E:\\Data\\Nirvana\"\r\n$SaRootDir=\"$NirvanaRootDir\\SA\"\r\n$IntermediateTsvsDir=\"$NirvanaRootDir\\IntermediateTsvs\"\r\n\r\n$NirvanaSourceDir=\"D:\\Projects\\NirvanaDevelopment\"\r\n$ResourcesDir=\"$NirvanaSourceDir\\UnitTests\\Resources\"\r\n\r\n$RefVersion=\"5\"\r\n$SaVersion=\"38.2\"\r\n\r\n$CustomIntervalsBed=\"$ResourcesDir\\customIntervals.bed\"\r\n\r\n$GRCh37=\"$NirvanaRootDir\\References\\$RefVersion\\Homo_sapiens.GRCh37.Nirvana.dat\"\r\n$GRCh38=\"$NirvanaRootDir\\References\\$RefVersion\\Homo_sapiens.GRCh38.Nirvana.dat\"\r\n\r\n# unit test resource directories\r\n$miniSAGRCh37=\"$ResourcesDir\\MiniSuppAnnot\"\r\n$miniSAGRCh38=\"$ResourcesDir\\MiniSuppAnnot\\hg38\"\r\n$miniCIGRCh37=\"$ResourcesDir\\MiniSuppAnnot\\CustomIntervals\"\r\n$miniCAGRCh37=\"$ResourcesDir\\MiniSuppAnnot\\CustomAnnotations\"\r\n$directoryIntegrity=\"$ResourcesDir\\DirectoryIntegrity\"\r\n\r\n# intermediate TSV directories\r\n$HgmdTsv=\"$IntermediateTsvsDir\\HGMD\"\r\n$IcslIntervalsTsv=\"$IntermediateTsvsDir\\IcslIntervals\"\r\n$InternalAfTsv=\"$IntermediateTsvsDir\\InternalAF\"\r\n\r\n# SA directories\r\n$SaGRCh37=\"$SaRootDir\\$SaVersion\\GRCh37\"\r\n$SaGRCh38=\"$SaRootDir\\$SaVersion\\GRCh38\"\r\n$SaHgmd=\"$SaRootDir\\HGMD\"\r\n$SaIcslIntervals=\"$SaRootDir\\IcslIntervals\"\r\n$SaInternalAF=\"$SaRootDir\\InternalAF\"\r\n\r\n$SaUtils=\"$NirvanaSourceDir\\bin\\Release\\netcoreapp1.1\\SAUtils.dll\"\r\n$ExtractMiniSA=\"dotnet $SaUtils extractMiniSA\"\r\n\r\n# =========\r\n# functions\r\n# =========\r\n\r\nfunction bg() {\r\n\tParam ($name, $job)\r\n\t$script=[scriptblock]::Create($job)\r\n\tStart-Job -Name $name -ScriptBlock $script\r\n}\r\n\r\nfunction updateMiniSA(){\r\n\tParam($name,$miniSADir,$SADir,$ref)\r\n\tGet-ChildItem $miniSADir -Filter *.nsa | \r\n\tForeach-Object {\r\n\t\t$miniSAfile=$_.BaseName\r\n\t\t$refName,$start,$end = $miniSAfile.Split('_',3)\r\n\t\tbg $name \"$ExtractMiniSA --in $SADir\\$refName.nsa --begin $start --end $end --ref $Ref --out $miniSADir\"\r\n\t}\r\n}\r\n\r\nfunction updateMiniCA(){\r\n\tParam($name,$outputDir,$SADir,$ref,$targetDataSource)\r\n\tGet-ChildItem $outputDir -Filter *.nsa | \r\n\tForeach-Object {\r\n\t\t$miniCAfile=$_.BaseName\r\n\t\t$refName,$start,$end,$dataSource = $miniCAfile.Split('_',4)\r\n\t\tif($dataSource -match $targetDataSource) {\r\n\t\t\tbg $name \"$ExtractMiniSA --in $SADir\\$refName.nsa --begin $start --end $end --ref $Ref --out $outputDir -n $targetDataSource\"\r\n\t\t}\r\n\t}\r\n}\r\n\r\nfunction copyIfNewer() {\r\n\tParam($sourceDir, $destDir, $filename)\r\n\t$localFile = Get-Item \"$destDir\\$filename\"\r\n\t$remoteFile = Get-Item \"$sourceDir\\$filename\"\r\n\r\n\tif ($remoteFile.LastWriteTime -gt $localFile.LastWriteTime)\r\n\t{\r\n\t\tCopy-Item $remoteFile $localFile\r\n\t}\r\n}\r\n\r\n# ===========================\r\n# create the IcslIntervals SA\r\n# ===========================\r\n\r\n$IcslIntervalsChr1 = \"$SaIcslIntervals\\chr1.nsa\"\r\n\r\nif (!(Test-Path $IcslIntervalsChr1)) {\r\n\tNew-Item -ItemType Directory -Force -Path $IcslIntervalsTsv | Out-Null\r\n\t& dotnet $SaUtils createTSV --bed $CustomIntervalsBed -r $GRCh37 -o $IcslIntervalsTsv\r\n\t& dotnet $SaUtils createSA -r $GRCh37 -d $IcslIntervalsTsv -o $SaIcslIntervals\r\n}\r\n\r\n# ==================\r\n# create the HGMD SA\r\n# ==================\r\n\r\n$HgmdChr1 = \"$SaHgmd\\chr1.nsa\"\r\n\r\nif (!(Test-Path $HgmdChr1)) {\r\n\tNew-Item -ItemType Directory -Force -Path $SaHgmd | Out-Null\r\n\t& dotnet $SaUtils createSA -r $GRCh37 -d $HgmdTsv -o $SaHgmd\r\n}\r\n\r\n# ========================\r\n# create the InternalAF SA\r\n# ========================\r\n\r\n$InternalAfChr1 = \"$SaInternalAF\\chr1.nsa\"\r\n\r\nif (!(Test-Path $InternalAfChr1)) {\r\n\tNew-Item -ItemType Directory -Force -Path $SaInternalAF | Out-Null\r\n\t& dotnet $SaUtils createSA -r $GRCh37 -d $InternalAfTsv -o $SaInternalAF\r\n}\r\n\r\n# ===============================\r\n# copy chrM to DirectoryIntegrity\r\n# ===============================\r\n\r\ncopyIfNewer $SaGRCh37 $directoryIntegrity \"chrM.nsa\"\r\ncopyIfNewer $SaGRCh37 $directoryIntegrity \"chrM.nsa.idx\"\r\n\r\n# =============\r\n# update miniSA \r\n# =============\r\n\r\nupdateMiniSA \"SA-37\" $miniSAGRCh37 $SaGRCh37 $GRCh37\r\nupdateMiniSA \"SA-38\" $miniSAGRCh38 $SaGRCh38 $GRCh38\r\n\r\n# ====================================\r\n# update the mini-CA and mini-CI files \r\n# ====================================\r\n\r\nupdateMiniCA \"hgmd-37\" $miniCAGRCh37 $SaHgmd $GRCh37 \"hgmd\"\r\nupdateMiniCA \"internalAF-37\" $miniCAGRCh37 $SaInternalAF $GRCh37 \"internalAF\"\r\nupdateMiniCA \"IcslIntervals-37\" $miniCIGRCh37 $SaIcslIntervals $GRCh37 \"IcslIntervals\"\r\n\r\nGet-Job | Wait-Job"
  },
  {
    "path": "Sandbox/Scripts/updateSA.ps1",
    "content": "##############\r\n# This program is used to update SA , miniSA and minCA when the SA schema changes.\r\n# please update the file path whenever updated the datasource\r\n##############\r\n\r\n# ================\r\n# global variables\r\n# ================\r\n\r\n$NirvanaRootDir=\"E:\\Data\\Nirvana\"\r\n$NirvanaSourceDir=\"D:\\Projects\\Nirvana\"\r\n$ExternalDataRootDir=\"\\\\ussd-prd-isi04\\Nirvana\\Development\\IntermediateTsvs\"\r\n\r\n$RefVersion=5.2\r\n$currentSAversion=40.1\r\n\r\n$GRCh37=\"$NirvanaRootDir\\References\\$RefVersion\\Homo_sapiens.GRCh37.Nirvana.dat\"\r\n$GRCh38=\"$NirvanaRootDir\\References\\$RefVersion\\Homo_sapiens.GRCh38.Nirvana.dat\"\r\n\r\n\r\n$miniSAGRCh37=\"$NirvanaSourceDir\\UnitTests\\Resources\\MiniSuppAnnot\"\r\n$miniSAGRCh38=\"$NirvanaSourceDir\\UnitTests\\Resources\\MiniSuppAnnot\\hg38\"\r\n\r\n$SAOutGRCh37=\"$NirvanaRootDir\\SupplementaryDatabase\\$currentSAversion\\GRCh37\"\r\n$SAOutGRCh38=\"$NirvanaRootDir\\SupplementaryDatabase\\$currentSAversion\\GRCh38\"\r\n\r\n\r\n$CreateSupplementaryDatabase=\"dotnet $NirvanaSourceDir\\bin\\Release\\netcoreapp1.1\\SAUtils.dll createSA\"\r\n$ExtractMiniSAdb=\"dotnet $NirvanaSourceDir\\bin\\Release\\netcoreapp1.1\\SAUtils.dll extractMiniSA\"\r\n\r\n$SAisilonPath=\"\\\\ussd-prd-isi04\\Nirvana\\Development\\SupplementaryDatabase\\$currentSAversion\"\r\n$PhylopFolder=\"\\\\ussd-prd-isi04\\Nirvana\\SupplementaryDatabase\\PhyloP\\latest\"\r\n#$OmimDatabase=\"\\\\ussd-prd-isi04\\Nirvana\\Development\\OmimDatabase\\3\\genePhenotypeMap.mim\"\r\n# ================\r\n# update files\r\n# ================\r\n\r\n$CVR37=\"$ExternalDataRootDir\\2017-04\\GRCh37\\clinvar_20170403.tsv.gz\"\r\n$DBS37=\"$ExternalDataRootDir\\2017-04\\GRCh37\\dbsnp_150.tsv.gz\"\r\n$GLOBAl37=\"$ExternalDataRootDir\\2017-04\\GRCh37\\globalAllele_150.tsv.gz\"\r\n$CSM37=\"$ExternalDataRootDir\\2017-04\\GRCh37\\cosmic_80.tsv.gz\"\r\n$DGV37=\"$ExternalDataRootDir\\2017-04\\GRCh37\\dgv_20160515.interval.tsv.gz\"\r\n$CLINGEN37=\"$ExternalDataRootDir\\2017-04\\GRCh37\\clinGen_20160414.interval.tsv.gz\"\r\n\r\n\r\n$CVR38=\"$ExternalDataRootDir\\2017-04\\GRCh38\\clinvar_20170403.tsv.gz\"\r\n$DBS38=\"$ExternalDataRootDir\\2017-04\\GRCh38\\dbsnp_150.tsv.gz\"\r\n$GLOBAl38=\"$ExternalDataRootDir\\2017-04\\GRCh38\\globalAllele_150.tsv.gz\"\r\n$CSM38=\"$ExternalDataRootDir\\2017-04\\GRCh38\\cosmic_80.tsv.gz\"\r\n$DGV38=\"$ExternalDataRootDir\\2017-04\\GRCh38\\dgv_20160515.interval.tsv.gz\"\r\n$CLINGEN38=\"$ExternalDataRootDir\\2017-04\\GRCh38\\clinGen_unknown.interval.tsv.gz\"\r\n\r\n# ==================\r\n# files won't update\r\n# ==================\r\n$ONEK37=\"$ExternalDataRootDir\\2017-04\\GRCh37\\oneKg_Phase_3_v5a.tsv.gz\"\r\n$ONEKSV37=\"$ExternalDataRootDir\\2017-04\\GRCh37\\oneKg_Phase_3_v5a.interval.tsv.gz\"\r\n$EXAC37=\"$ExternalDataRootDir\\2017-04\\GRCh37\\exac_0.3.1.tsv.gz\"\r\n$EVS37=\"$ExternalDataRootDir\\2017-04\\GRCh37\\evs_2.tsv.gz\"\r\n$RefMinor37=\"$ExternalDataRootDir\\2017-04\\GRCh37\\RefMinor_Phase_3_v5a.tsv.gz\"\r\n\r\n$EVS38=\"$ExternalDataRootDir\\2017-04\\GRCh38\\evs_2.tsv.gz\"\r\n$ONEK38=\"$ExternalDataRootDir\\2017-04\\GRCh38\\oneKg_Phase_3_v3plus.tsv.gz\"\r\n$RefMinor38=\"$ExternalDataRootDir\\2017-04\\GRCh38\\RefMinor_Phase_3_v3plus.tsv.gz\"\r\n\r\n\r\n\r\n\r\nfunction bg() {\r\n\tParam ($name, $job)\r\n\t$script=[scriptblock]::Create($job)\r\n\tStart-Job -Name $name -ScriptBlock $script\r\n}\r\n\r\n\r\n\r\n\r\nfunction updateMiniSA(){\r\n\tParam($name,$miniSADir,$SADir,$ref)\r\n\tGet-ChildItem $miniSADir -Filter *.nsa | \r\n\tForeach-Object {\r\n\t\t$miniSAfile=$_.BaseName\r\n\t\t$refName,$start,$end = $miniSAfile.Split('_',3)\r\n\t\tbg $name \"$ExtractMiniSAdb --in $SADir\\$refName.nsa --begin $start --end $end --ref $Ref --out $miniSADir\"\r\n\t}\r\n\r\n}\r\n\r\n\r\n# =========================================\r\n# Create Supplementary database \r\n# =========================================\r\n\r\nmkdir $SAOutGRCh37\r\nmkdir $SAOutGRCh38\r\n\r\n#============================\r\n# copy OMIM\r\n#============================\r\n\tCopy-Item $OmimDatabase $SAOutGRCh37\r\n\tCopy-Item $OmimDatabase $SAOutGRCh38\r\n\r\n\t\r\n\r\nbg \"SA-37\" \"$CreateSupplementaryDatabase --out $SAOutGRCh37 --ref $GRCh37 -t $DBS37 -t $CSM37 -t $EVS37 -t $CVR37 -t $ONEK37  -i $ONEKSV37 -i $DGV37 -i $CLINGEN37 -t $EXAC37 -t $GLOBAl37 -t $RefMinor37\"\r\n\r\n\r\nbg \"SA-38\" \"$CreateSupplementaryDatabase --out $SAOutGRCh38 --ref $GRCh38 -t $DBS38 -t $CSM38 -t $EVS38 -t $CVR38 -t $ONEK38  -i $DGV38 -i $CLINGEN38  -t $GLOBAl38 -t $RefMinor38\"\r\n\r\nget-job|wait-job\r\n\r\n\r\n# =========================\r\n# update miniSA \r\n# =========================\r\n\r\nupdateMiniSA \"update-37\" $miniSAGRCh37 $SAOutGRCh37 $GRCh37\r\n\r\nupdateMiniSA \"update-38\" $miniSAGRCh38 $SAOutGRCh38 $GRCh38\r\n\r\nget-job|wait-job\r\n\r\n\r\n\r\n#===========================\r\n#update custom annotation\r\n#===========================\r\n\r\nfunction updateMiniCA(){\r\n\tParam($name,$miniCADir,$CADir,$ref)\r\n\tGet-ChildItem $miniCADir -Filter *.nsa | \r\n\tForeach-Object {\r\n\t\t$miniCAfile=$_.BaseName\r\n\t\t$refName,$start,$end = $miniCAfile.Split('_',3)\r\n\t\tbg $name \"$ExtractMiniSAdb --in $CADir\\$refName.nsa --begin $start --end $end --ref $Ref --name --out $miniSADir\"\r\n\t}\r\n\r\n}\r\n\r\n\r\n\r\n##########\r\n# copy the SA to isilon\r\n#########\r\nmkdir $SAisilonPath\r\n\r\nCopy-Item $SAOutGRCh37 $SAisilonPath\\GRCh37 -Force -Recurse\r\nCopy-Item $SAOutGRCh38 $SAisilonPath\\GRCh38 -Force -Recurse\r\n\r\nImport-Module PSCX\r\n\r\n#============================\r\n# Add hardLink to phylop\r\n#============================\r\nGet-ChildItem \"$PhylopFolder\\GRCh37\" -Filter *.npd |\r\n\tForeach-Object {\r\n\t$npdFile=$_.Name\r\n\tNew-HardLink \"$SAisilonPath\\GRCh37\\$npdFile\" \"$PhylopFolder\\GRCh37\\$npdFile\"\r\n\t}\r\n\r\n\tGet-ChildItem \"$PhylopFolder\\GRCh38\" -Filter *.npd |\r\n\tForeach-Object {\r\n\t$npdFile=$_.Name\r\n\tNew-HardLink \"$SAisilonPath\\GRCh38\\$npdFile\" \"$PhylopFolder\\GRCh38\\$npdFile\"\r\n\t}\r\n\r\n"
  },
  {
    "path": "Sandbox/UnitTests/Piano/PianoAnnotatedTranscriptTests.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing Moq;\r\nusing Piano;\r\nusing VariantAnnotation.AnnotatedPositions.Transcript;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.Interface.Intervals;\r\nusing Xunit;\r\n\r\nnamespace UnitTests\r\n{\r\n    public class PianoAnnotatedTranscriptTests\r\n    {\r\n        [Fact]\r\n        public void Empty_upstreamAminoAcids_return_dot()\r\n        {\r\n            var mockedTranscript = new Mock<ITranscript>();\r\n            mockedTranscript.Setup(x => x.Source).Returns(Source.Ensembl);\r\n            mockedTranscript.Setup(x => x.Gene.EnsemblId.ToString()).Returns(\"ENSG12345\");\r\n            mockedTranscript.Setup(x => x.Gene.Symbol).Returns(\"TestGene\");\r\n            mockedTranscript.Setup(x => x.Id).Returns(CompactId.Convert(\"ENST124\"));\r\n            mockedTranscript.Setup(x => x.Version).Returns(1);\r\n            mockedTranscript.Setup(x => x.Translation.ProteinId).Returns(CompactId.Convert(\"ENSP123456\"));\r\n            mockedTranscript.Setup(x => x.Translation.ProteinVersion).Returns(2);\r\n\r\n\r\n\r\n            var mappedPosition = new Mock<IMappedPositions>();\r\n            mappedPosition.Setup(x => x.ProteinInterval).Returns(new NullableInterval(100, 100));\r\n            var transcript = new PianoAnnotatedTranscript(mockedTranscript.Object, \"A\", \"R\",mappedPosition.Object, \"\", \"ATYRGD\",\r\n                new List<ConsequenceTag> {ConsequenceTag.missense_variant});\r\n            var expectedOut = \"TestGene\tENSG12345\tENST124.1\tENSP123456.2\t100\t.\tA/R\tATYRGD\tmissense_variant\";\r\n\r\n            Assert.Equal(expectedOut,transcript.ToString());\r\n        }\r\n\r\n        [Fact]\r\n        public void refSeq_gene_return_entrezId()\r\n        {\r\n            var mockedTranscript = new Mock<ITranscript>();\r\n            mockedTranscript.Setup(x => x.Source).Returns(Source.RefSeq);\r\n            mockedTranscript.Setup(x => x.Gene.EntrezGeneId.ToString()).Returns(\"12345\");\r\n            mockedTranscript.Setup(x => x.Gene.Symbol).Returns(\"TestGene\");\r\n            mockedTranscript.Setup(x => x.Id).Returns(CompactId.Convert(\"NM_124\"));\r\n            mockedTranscript.Setup(x => x.Version).Returns(1);\r\n            mockedTranscript.Setup(x => x.Translation.ProteinId).Returns(CompactId.Convert(\"NP_342\"));\r\n            mockedTranscript.Setup(x => x.Translation.ProteinVersion).Returns(2);\r\n\r\n\r\n\r\n            var mappedPosition = new Mock<IMappedPositions>();\r\n            mappedPosition.Setup(x => x.ProteinInterval).Returns(new NullableInterval(100, 101));\r\n            var transcript = new PianoAnnotatedTranscript(mockedTranscript.Object, \"AT\", \"GR\", mappedPosition.Object, \"KILGF\", \"ATYRGD\",\r\n                new List<ConsequenceTag> { ConsequenceTag.missense_variant ,ConsequenceTag.splice_region_variant});\r\n            var expectedOut = \"TestGene\t12345\tNM_124.1\tNP_342.2\t100-101\tKILGF\tAT/GR\tATYRGD\tmissense_variant,splice_region_variant\";\r\n\r\n            Assert.Equal(expectedOut, transcript.ToString());\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "Sandbox/UnitTests/Piano/PianoTests.cs",
    "content": "﻿using Moq;\nusing Piano;\nusing VariantAnnotation.AnnotatedPositions.Transcript;\nusing VariantAnnotation.Caches.DataStructures;\nusing VariantAnnotation.Interface.AnnotatedPositions;\nusing VariantAnnotation.Interface.Intervals;\nusing VariantAnnotation.Interface.Positions;\nusing VariantAnnotation.Sequence;\nusing Vcf;\nusing Xunit;\n\nnamespace UnitTests\r\n{\n    public class PianoTests\n    {\n        private const string Enst00000343938GenomicSequence = \"GAGGGCGGGGCGAGGGCGGGGCGGTGGGCGGGGACGGGGCCCGCACGGCGGCTACGGCCTAGGTGAGCGGCTCGGACTCGGCGGCCGCACCTGCCCAACCCAACCCGCACGGTCCGGAAGTCGCCGAGGGGCCGGGAGCGGGAGGGGACGTCGTCCTAGAGGGCCGGAGCGGGCGGGCGGCCGAGGACCCGGCTCCCGCGCAGGACGGAGCCGTGGCTCAGGTCGGCCCCTCCCCAACACCACCCCGGGCCTCCGCCCCTTCCTGGGCCTCTCGGTGGAGCAGGGACCCGAACCGGTGCCCATCCAGTCCGGTGCCATCTGAAGCCCCCTTCCCAGGTGAGACTCGTAGCGCTCGCTCGACAGGGTCTGGTCCCACCCACAAGGCCTGGGGCGCCGTGGGGCCCCGTCTCCTGCTGGCCCCCCAGCCTGCTGTCAGCCCCCGTGCTCTGTGCTCAGGCCGCCCTCGCGCCCGGCCCTGACCTTGGGCCGTTGGGCTGCCCTGGGAAAGGCCTGGAGGTGTCCTGGGTCACCTTCCTGGGCTGGCAAGCTGCCTGCCTCCTGCACAGCCACTGCCCTTCCTGTTGTTACCGAGCCACCAGCCACAGCTCTGAGAAGCTCCTGGCAGCTTCTGTTTGCCACTGGCTCGAATCTGGGCAGGAAGGCAAGGCCCGCAGAATATCTGGTGACCAAGAAGGAAACCCCAGAGCCTCAGAGACCATCTTCTCAGTGGACAAAATTAAGGCCCGAGGAGGGGAGGGGCGTGCTGGAAGTCTATGGGACTGCATCTTTCTGAGGCCCAGGAGCAGCCATCCCCCACACCTGAAGCCCGGTGAGCTCACATCTGGGGCCTCCGCCTGGTGCCAAGCATGCAACCCAACCTGTGGGGCCTGCAACGCCAGGCTTCAGCACCCTGCAGGCACCAGTGCTCCAGCAGCCTGGGCCACGGGCTGGGCAGGGCTTGCAGCCCATGATCCCTAGTGATGAAGGGCCCAGTCCTAGGGTGCTGAGCAACCTGCCCACCTGCTCCTGGCCAGGAGCTCTCACCACGGCTGGGTGCCCTTCCCCCTCCCCCACCGATGGAGTCCCTGCAGCCAGGGAGGCCAGGACAGGGCTCCCAGCACCAACCGGCCTAGGAACCCCCAGGCCCTCTTCCTGGTCGAGGTGGAATGCAGCTGACTCTCAGGTTCCCCAGAGCAGGTGCGGGCCCGTGGGGCACCCGGGGAGACAGGGCAAGGGTGCTTGGCAACACTCACACAAAGCATGGGTGCCTGGATGTCTGTGGATCTGTGGAGTGACTATGTGAATGCCAGCAGAATCCAAAGCAGGGCCTGGGCCACTCGTGGAAGGCTCCCTAGGGCTAGTACAAGAGCCTCGTGGCAATCTTCTGAGTGGTAAAACCCATCTGTGTGGGACATGGAGTTTCAGCAACAGGAGTGAAAACACGTGTCCATCCATCCAGCAAGTGCCAGCCCTACAGCCTCTTTTCTGCTTTTGGGGATGTAGCAGTGAGGAAGATGGGGCAGCCTGCCCGGCAGCATCCCCCCACCCCCGGCCCCACCTGTCTCTGCTTTCTGCTGTGTCTGTTTTCTTGTCTAGGACTTCAGAACTTCCTGTCTTTGTTGTCATCTGACCCCACCCCAGATGGCTGCTCGCACTCCCCATGCACCCAGATAGATGGCTAGGATGGTGCTTGGCTCTCGGCAGGGGCTTAGTATTTCTCCAGCTGGTAAAAGCAGATACAGCATCTAGAGAGAGAAACAAAAACAAGAAAGCACCAGCAGAGACACCTGCTGCAGACAGCGGGGCCTAGTGGTCTGATAAAGCCAGAGGGGGCCACTCTCGGGGTCAGGGACTGACACGGAGTCAGTGGCCTGATCCACAGGAGGGGCTGTGCCAAGGTCCCTGAATGCGCAATCCTGATGAAGGGTGGGTCAGGGTGGTGTGCCTGAGAGCCTGCGGCTTGGCTGGGAGCAGAGCCAGGCAGCTCCTGGGAGGAAGCTCCATGAGGGGCATGAGTGTTCAGTGAGCGGCAATGGGATCGCAGCTATTTTGTTCCCCTCCACACACAGAAAATGAGCCACAGAGCAAGCTGACCCCAGCGACACAGCCCCCCAGCCCTACTGTATTTCCGTTCCTATCAAAAAATGGATGACTCGGAGACAGGTTTCAATCTGAAAGTCGTCCTGGTCAGTTTCAAGCAGTGTCTCGATGAGAAGGAAGAGGTCTTGCTGGACCCCTACATTGCCAGCTGGAAGGGCCTGGTCAGGTGCGTGTGCCAGGGCTGCCTCCTGAGGTGGGCGCTCCCCTGGCCCGAGTCCCATATGTGGCATCTGCCTCCCGACTGCCTGTCCCCACCAGCTTTGCTGCCCGTTTCCAGATGGGTGTGAGCCCCCGCAGGCTGGGCAGCGTCCCCTGCACCCCAGGCGGGCTGCCCCAGGCCTGGGCGAGGACTCGAGCCCCGCTCCCTTCCACAGGTTTCTGAACAGCCTGGGCACCATCTTCTCATTCATCTCCAAGGACGTGGTCTCCAAGCTGCGGATCATGGAGCGCCTCAGGGGCGGCCCGCAGAGCGAGCACTACCGCAGCCTGCAGGCCATGGTGGCCCACGAGCTGAGCAACCGGCTGGTGGACCTGGAGCGCCGCTCCCACCACCCGGAGTCTGGCTGCCGGACGGTGCTGCGCCTGCACCGCGCCCTGCACTGGCTGCAGCTGTTCCTGGAGGGCCTGCGTACCAGCCCCGAGGACGCACGCACCTCCGCGCTCTGCGCCGACTCCTACAACGCCTCGCTGGCCGCCTACCACCCCTGGGTCGTGCGCCGCGCCGTCACCGTGGCCTTCTGCACGCTGCCCACACGCGAGGTCTTCCTGGAGGCCATGAACGTGGGGCCCCCGGAGCAGGCCGTGCAGATGCTAGGCGAGGCCCTCCCCTTCATCCAGCGTGTCTACAACGTCTCCCAGAAGCTCTACGCCGAGCACTCCCTGCTGGACCTGCCCTAGGGGCGGGAAGCCAGGGCCGCACCGGCTTTCCTGCTGCAGATCTGGGCTGCGGTGGCCAGGGCCGTGAGTCCCGTGGCAGAGCCTTCTGGGCGCTGCGGGAACAGGAGATCCTCTGTCGCCCCTGTGAGCTGAGCTGGTTAGGAACCACAGACTGTGACAGAGAAGGTGGCGACCAGCCCAGAAGAGGCCCACCCTCTCGGTCCGGAACAAGACGCCTCGGCCACGGCTCCCCCTCGGCCTATTACACGCGTGCGCAGCCAGGCCTCGCCAGGGTGCGGTGCAGAGCAGAGCAGGCAGGGGTGGGGGCCGGGCCTGCAAGAGCCCGAAAGGTCGCCACCCCCTAGCCTGTGGGGTGCATCTGCGAACCAGGGTGAAGTCACAGGTCCCGGGGTGTGGAGGCTCCATCCTTTCTCCTTTCTGCCAGCCGATGTGTCCTCATCTCAGGCCCGTGCCTGGGACCCCGTGTCTGCCCAGGTGGGCAGCCTTGAGCCCAGGGGACTCAGTGCCCTCCATGCCCTGGCTGGCAGAAACCCTCAACAGCAGTCTGGGCACTGTGGGGCTCTCCCCGCCTCTCCTGCCTTGTTTGCCCCTCAGCGTGCCAGGCAGACTGGGGGCAGGACAGCCGGAAGCTGAGACCAAGGCTCCTCACAGAAGGGCCCAGGAAGTCCCCGCCCTTGGGACAGCCTCCTCCGTAGCCCCTGCACGGCACCAGTTCCCCGAGGGACGCAGCAGGCCGCCTCCCGCAGCGGCCGTGGGTCTGCACAGCCCAGCCCAGCCCAAGGCCCCCAGGAGCTGGGACTCTGCTACACCCAGTGAAATGCTGTGTCCCTTCTCCCCCGTGCCCCTTGATGCCCCCTCCCCACAGTGCTCAGGAGACCCGTGGGGCACGGAACAGGAGGGTCTGGACCCTGTGGCCCAGCCAAAGGCTACCAGACAGCCACAACCAGCCCAGCCACCATCCAGTGCCTGGGGCCTGGCCACTGGCTCTTCACAGTGGACCCCAGCACCTCGGGGTGGCAGAGGGACGGCCCCCACGGCCCAGCAGACATGCGAGCTTCCAGAGTGCAATCTATGTGATGTCTTCCAACGTTAATAAATCACACAGCCTCCCAGGAGGGAGACGCTGGGGTGCAC\";\n\n        private static ITranscript GetMockedTranscriptOnForwardStrand()\n        {\n            var mockedTranscript = new Mock<ITranscript>(); //get info from ENST00000343938.4\n            var chromosome = new Chromosome(\"chr1\", \"1\", 0);\n            var start = 1260147;\n            var end = 1264277;\n\n            var introns = new IInterval[]\n            {\n                new Interval(1260483, 1262215),\n                new Interval(1262413, 1262620)\n            };\n\n            var cdnaMaps = new ICdnaCoordinateMap[]\n            {\n                new CdnaCoordinateMap(1260147, 1260482, 1, 336),\n                new CdnaCoordinateMap(1262216, 1262412, 337, 533),\n                new CdnaCoordinateMap(1262621, 1264277, 534, 2160),\n            };\n\n            var translation = new Mock<ITranslation>();\n            translation.SetupGet(x => x.CodingRegion).Returns(new CdnaCoordinateMap(1262291, 1263143, 412, 1056));\n            translation.SetupGet(x => x.ProteinId).Returns(CompactId.Convert(\"ENST00000343938\"));\n            translation.SetupGet(x => x.ProteinVersion).Returns(4);\n            translation.SetupGet(x => x.PeptideSeq).Returns(\n                \"MDDSETGFNLKVVLVSFKQCLDEKEEVLLDPYIASWKGLVRFLNSLGTIFSFISKDVVSKLRIMERLRGGPQSEHYRSLQAMVAHELSNRLVDLERRSHHPESGCRTVLRLHRALHWLQLFLEGLRTSPEDARTSALCADSYNASLAAYHPWVVRRAVTVAFCTLPTREVFLEAMNVGPPEQAVQMLGEALPFIQRVYNVSQKLYAEHSLLDLP\");\n\n            var gene = new Mock<IGene>();\n            gene.SetupGet(x => x.OnReverseStrand).Returns(false);\n            gene.SetupGet(x => x.EnsemblId).Returns(CompactId.Convert(\"ENSG00000224051 \"));\n            gene.SetupGet(x => x.Symbol).Returns(\"CPTP\");\n            mockedTranscript.SetupGet(x => x.Id).Returns(CompactId.Convert(\"ENST00000343938\"));\n            mockedTranscript.SetupGet(x => x.Source).Returns(Source.Ensembl);\n            mockedTranscript.SetupGet(x => x.Version).Returns(4);\n            mockedTranscript.SetupGet(x => x.Chromosome).Returns(chromosome);\n            mockedTranscript.SetupGet(x => x.Start).Returns(start);\n            mockedTranscript.SetupGet(x => x.End).Returns(end);\n            mockedTranscript.SetupGet(x => x.Gene).Returns(gene.Object);\n            mockedTranscript.SetupGet(x => x.Introns).Returns(introns);\n            mockedTranscript.SetupGet(x => x.CdnaMaps).Returns(cdnaMaps);\n            mockedTranscript.SetupGet(x => x.Translation).Returns(translation.Object);\n            mockedTranscript.SetupGet(x => x.TotalExonLength).Returns(2190);\n\n            return mockedTranscript.Object;\n\n        }\n\n        [Fact]\n        public void MissenseVariant()\n        {\n            var transcript = GetMockedTranscriptOnForwardStrand();\n            var chromosome = new Chromosome(\"chr1\", \"1\", 0);\n            var variant = new Variant(chromosome, 1262295, 1262295, \"A\", \"C\", VariantType.SNV, \"1:1262295:A>C\", false, false, null, null, new AnnotationBehavior(false, false, false, false, false, false));\n            var refSequence = new SimpleSequence(Enst00000343938GenomicSequence, 1260147 - 1);\n            var result = PianoTranscriptAnnotator.GetAnnotatedTranscript(transcript, variant, refSequence, new AminoAcids(false));\n            var expectedResult =\n                \"CPTP\tENSG000000224051\tENST00000343938.4\tENST00000343938.4\t2\tM\tD/A\tDSETGFNLKVVLVSF\tmissense_variant\";\n            Assert.Equal(expectedResult, result.ToString());\n\n        }\n\n        [Fact]\n        public void missense_variant_in_TSS_returns()\n        {\n            var transcript = GetMockedTranscriptOnForwardStrand();\n            var chromosome = new Chromosome(\"chr1\", \"1\", 0);\n            var variant = new Variant(chromosome, 1262291, 1262291, \"A\", \"C\", VariantType.SNV, \"1:1262291:A>C\", false, false, null, null, new AnnotationBehavior(false, false, false, false, false, false));\n            var refSequence = new SimpleSequence(Enst00000343938GenomicSequence, 1260147 - 1);\n            var result = PianoTranscriptAnnotator.GetAnnotatedTranscript(transcript, variant, refSequence, new AminoAcids(false));\n            var expectedResult =\n                \"CPTP\tENSG000000224051\tENST00000343938.4\tENST00000343938.4\t1\t.\tM/L\tDDSETGFNLKVVLVS\tstart_lost\";\n            Assert.Equal(expectedResult, result.ToString());\n        }\n\n        [Fact]\n        public void synounymous_mutation_returns_no_change()\n        {\n            var transcript = GetMockedTranscriptOnForwardStrand();\n            var chromosome = new Chromosome(\"chr1\", \"1\", 0);\n            var variant = new Variant(chromosome, 1262347, 1262347, \"G\", \"A\", VariantType.SNV, \"1:1262347:G>A\", false, false, null, null, new AnnotationBehavior(false, false, false, false, false, false));\n            var refSequence = new SimpleSequence(Enst00000343938GenomicSequence, 1260147 - 1);\n            var result = PianoTranscriptAnnotator.GetAnnotatedTranscript(transcript, variant, refSequence, new AminoAcids(false));\n            var expectedResult =\n                \"CPTP\tENSG000000224051\tENST00000343938.4\tENST00000343938.4\t19\tSETGFNLKVVLVSFK\tQ\tCLDEKEEVLLDPYIA\tsynonymous_variant\";\n            Assert.Equal(expectedResult, result.ToString());\n        }\n\n        [Fact]\n        public void Frameshift_mutation_returns_no_downStreamAminoAcids()\n        {\n            var transcript = GetMockedTranscriptOnForwardStrand();\n            var chromosome = new Chromosome(\"chr1\", \"1\", 0);\n            var variant = new Variant(chromosome, 1262347, 1262348, \"GT\", \"G\", VariantType.deletion, \"vid\", false, false, null, null, new AnnotationBehavior(false, false, false, false, false, false));\n            var refSequence = new SimpleSequence(Enst00000343938GenomicSequence, 1260147 - 1);\n            var result = PianoTranscriptAnnotator.GetAnnotatedTranscript(transcript, variant, refSequence, new AminoAcids(false));\n            var expectedResult =\n                \"CPTP\tENSG000000224051\tENST00000343938.4\tENST00000343938.4\t19-20\tSETGFNLKVVLVSFK\tQCLDEKEEVLLDPYIAS/QVSMRRKRSCWTPTLPX\t.\tframeshift_variant\";\n            Assert.Equal(expectedResult, result.ToString());\n        }\n\r\n    }\n\n}"
  },
  {
    "path": "Sandbox/UnitTests/Piano/SimpleSequence.cs",
    "content": "﻿using VariantAnnotation.Interface.Sequence;\r\n\r\nnamespace UnitTests\r\n{\r\n    public sealed class SimpleSequence : ISequence\r\n    {\r\n        private readonly string _sequence;\r\n        private readonly int _zeroBasedStartOffset;\r\n        public int Length => _zeroBasedStartOffset + _sequence.Length;\r\n\r\n        public SimpleSequence(string s, int zeroBasedStartOffset = 0)\r\n        {\r\n            _zeroBasedStartOffset = zeroBasedStartOffset;\r\n            _sequence = s;\r\n        }\r\n\r\n        public string Substring(int offset, int length)\r\n        {\r\n            if (offset - _zeroBasedStartOffset + length > _sequence.Length\r\n                || offset < _zeroBasedStartOffset)\r\n                return \"\";\r\n            return _sequence.Substring(offset - _zeroBasedStartOffset, length);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "Sandbox/UnitTests/Resources/ConflicitingEntries1000G.vcf",
    "content": "1\t90\t.\tAT\tA,AC\t100\t.\t.\n1\t91\t.\tT\tA,G\t100\t.\t.\n1\t99\t.\tAT\tA,AC\t100\t.\t.\n1\t100\t.\tT\tTC,G\t100\t.\t.\n1\t100\t.\tT\tC,G\t100\t.\t.\nX\t60072\t.\tG\tC\t100\tPASS\tAC=64;AF=0.0127796;AN=5008;NS=2504;DP=12897;AMR_AF=0.0144;AFR_AF=0.0363;EUR_AF=0.005;SAS_AF=0.001;EAS_AF=0;AA=.|||;VT=SNP\nX\t60072\t.\tG\tC,T\t100\tPASS\tAC=71,462;AF=0.0141773,0.0922524;AN=5008;NS=2504;DP=12897;AMR_AF=0.0159,0.0173;AFR_AF=0.0408,0.1165;EUR_AF=0.005,0.0318;SAS_AF=0.001,0.1728;EAS_AF=0,0.0942;AA=.|||;VT=SNP;MULTI_ALLELIC\n4\t47016909\trs552911847;rs71193895\tGTATT\tGTATTTATT,G\t100\tPASS\tAC=17,843;AF=0.00339457,0.168331;AN=5008;NS=2504;DP=14760;EAS_AF=0.001,0.0526;AMR_AF=0,0.2608;AFR_AF=0.0121,0.0545;EUR_AF=0,0.332;SAS_AF=0,0.2076;VT=INDEL;MULTI_ALLELIC\n4\t47016909\trs111662489\tGTATTTATT\tG\t100\tPASS\tAC=2944;AF=0.587859;AN=5008;NS=2504;DP=14760;EAS_AF=0.6617;AMR_AF=0.549;AFR_AF=0.6634;EUR_AF=0.4911;SAS_AF=0.5368;VT=INDEL\n4\t47016909\trs202176827\tGTATTTATTTATT\tG\t100\tPASS\tAC=414;AF=0.0826677;AN=5008;NS=2504;DP=14760;EAS_AF=0.0883;AMR_AF=0.0764;AFR_AF=0.0204;EUR_AF=0.0805;SAS_AF=0.1677;VT=INDEL\n4\t47016909\trs558472223;rs557145274;rs553321222;rs202176827\tGTATTTATTTATT\tGTATTTATTTATTTATT,GTATTTATT,GTATT,G\t100\tPASS\tAC=16,849,2937,405;AF=0.00319489,0.169529,0.586462,0.0808706;AN=5008;NS=2504;DP=14760;EAS_AF=0.001,0.0496,0.6607,0.0883;AMR_AF=0,0.2651,0.5461,0.0749;AFR_AF=0.0113,0.0545,0.6604,0.0189;EUR_AF=0,0.338,0.4911,0.0746;SAS_AF=0,0.2076,0.5368,0.1677;VT=INDEL;MULTI_ALLELIC\n"
  },
  {
    "path": "Sandbox/UnitTests/Resources/RefMinorAllele.vcf",
    "content": "﻿1\t15255\trs541857151\tG\tC\t100\tPASS\tAC=1;AF=0.000199681;AN=5008;NS=2504;DP=27519;EAS_AF=0;AMR_AF=0.0014;AFR_AF=0;EUR_AF=0;SAS_AF=0;AA=g|||;VT=SNP\n1\t15260\trs561825427\tC\tT\t100\tPASS\tAC=2;AF=0.000399361;AN=5008;NS=2504;DP=26100;EAS_AF=0;AMR_AF=0;AFR_AF=0.0015;EUR_AF=0;SAS_AF=0;AA=c|||;VT=SNP\n1\t15274\trs62636497\tA\tG,T\t100\tPASS\tAC=1739,3230;AF=0.349244,0.640974;AN=5008;NS=2504;DP=23255;EAS_AF=0.4812,0.5188;AMR_AF=0.2752,0.7205;AFR_AF=0.323,0.6369;EUR_AF=0.2922,0.7078;SAS_AF=0.3497,0.6472;AA=g|||;VT=SNP;MULTI_ALLELIC\n1\t15418\trs564536632\tG\tA\t100\tPASS\tAC=1;AF=0.000199681;AN=5008;NS=2504;DP=42394;EAS_AF=0;AMR_AF=0;AFR_AF=0.0008;EUR_AF=0;SAS_AF=0;AA=g|||;VT=SNP\n1\t15585\trs533630043\tG\tA\t100\tPASS\tAC=5;AF=0.998403;AN=5008;NS=2504;DP=29383;EAS_AF=0;AMR_AF=0.0014;AFR_AF=0.0008;EUR_AF=0.002;SAS_AF=0.001;AA=g|||;VT=SNP\n"
  },
  {
    "path": "Sandbox/UnitTests/Resources/Test1000GFile.vcf",
    "content": "1\t10177\t.\tA\tAC\t100\tPASS\tAC=2130;AF=0.425319;AN=5008;NS=2504;DP=103152;EAS_AF=0.3363;AMR_AF=0.3602;AFR_AF=0.4909;EUR_AF=0.4056;SAS_AF=0.4949;AA=|||unknown(NO_COVERAGE)\t\n1\t10235\t.\tT\tTA\t100\tPASS\tAC=6;AF=0.00119808;AN=5008;NS=2504;DP=78015;EAS_AF=0;AMR_AF=0.0014;AFR_AF=0;EUR_AF=0;SAS_AF=0.0051;AA=|||unknown(NO_COVERAGE)\t\n1\t10352\trs145072688\tT\tTA\t100\tPASS\tAC=2191;AF=0.4375;AN=5008;NS=2504;DP=88915;EAS_AF=0.4306;AMR_AF=0.4107;AFR_AF=0.4788;EUR_AF=0.4264;SAS_AF=0.4192;AA=|||unknown(NO_COVERAGE)\t\n1\t10505\t.\tA\tT\t100\tPASS\tAC=1;AF=0.000199681;AN=5008;NS=2504;DP=9632;EAS_AF=0;AMR_AF=0;AFR_AF=0.0008;EUR_AF=0;SAS_AF=0;AA=.|||\t\n1\t10506\t.\tC\tG\t100\tPASS\tAC=1;AF=0.000199681;AN=5008;NS=2504;DP=9676;EAS_AF=0;AMR_AF=0;AFR_AF=0.0008;EUR_AF=0;SAS_AF=0;AA=.|||\t\n1\t15274\trs201931625\tA\tG,T\t100\tPASS\tAC=1739,3210;AF=0.347244,0.640974;AN=5008;NS=2504;DP=23255;EAS_AF=0.4812,0.5188;AMR_AF=0.2752,0.7205;AFR_AF=0.323,0.6369;EUR_AF=0.2922,0.7078;SAS_AF=0.3497,0.6472;AA=g|||\n"
  },
  {
    "path": "Sandbox/UnitTests/Resources/TestCosmicParser.Coding.vcf",
    "content": "﻿##fileformat=VCFv4.1\n##source=COSMICv71\n##reference=GRCh37\n##fileDate=20141104\n##comment=\"Missing nucleotide details indicate ambiguity during curation process\"\n##comment=\"URL stub for COSM ID field (use numeric portion of ID)='http://cancer.sanger.ac.uk/cosmic/mutation/overview?id='\"\n##comment=\"REF and ALT sequences are both forward strand\n##INFO=<ID=GENE,Number=1,Type=String,Description=\"Gene name\">\n##INFO=<ID=STRAND,Number=1,Type=String,Description=\"Gene strand\">\n##INFO=<ID=CDS,Number=1,Type=String,Description=\"CDS annotation\">\n##INFO=<ID=AA,Number=1,Type=String,Description=\"Peptide annotation\">\n##INFO=<ID=CNT,Number=1,Type=Integer,Description=\"How many samples have this mutation\">\n#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n17\t7577520\tCOSM11929\tAT\tGA\t.\t.\tGENE=TP53;STRAND=+;CDS=c.146A>C;AA=p.H49P;CNT=1\n3\t41266082\tCOSM27285\tC\tT\t.\t.\tGENE=CTNNB1;STRAND=+;CDS=c.134A>C;AA=p.D45A;CNT=1\n7\t55242484\tCOSM29274\tT\tC\t.\t.\tGENE=EGFR;STRAND=+;CDS=c.140A>C;AA=p.H47P;CNT=1\n"
  },
  {
    "path": "Sandbox/UnitTests/Resources/TestCosmicParser.NonCoding.vcf",
    "content": "﻿##fileformat=VCFv4.1\n##source=COSMICv71\n##reference=GRCh37\n##fileDate=20141104\n##comment=\"Missing nucleotide details indicate ambiguity during curation process\"\n##comment=\"URL stub for COSM ID field (use numeric portion of ID)='http://cancer.sanger.ac.uk/cosmic/mutation/overview?id='\"\n##comment=\"REF and ALT sequences are both forward strand\n##INFO=<ID=GENE,Number=1,Type=String,Description=\"Gene name\">\n##INFO=<ID=STRAND,Number=1,Type=String,Description=\"Gene strand\">\n##INFO=<ID=CDS,Number=1,Type=String,Description=\"CDS annotation\">\n##INFO=<ID=AA,Number=1,Type=String,Description=\"Peptide annotation\">\n##INFO=<ID=CNT,Number=1,Type=Integer,Description=\"How many samples have this mutation\">\n#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n14\t81610259\tCOSN26416\tA\tG\t.\t.\tGENE=TSHR;STRAND=+;CDS=c.134A>C;AA=p.D45A;CNT=1\n3\t178936116\tCOSN27489\tGT\tC\t.\t.\tGENE=PIK3CA;STRAND=+;CDS=c.146A>C;AA=p.H49P;CNT=1\n3\t178916648\tCOSN27496\tG\tA\t.\t.\tGENE=PIK3CA;STRAND=+;CDS=c.255C>A;AA=p.I85I;CNT=1\n4\t178916648\tCOSN27497\tG\tA\t.\t.\tGENE=PIK3CA;STRAND=+;CDS=c.255C>A;AA=p.I85I;CNT=1\n"
  },
  {
    "path": "Sandbox/UnitTests/Resources/TestCosmicParser.tsv",
    "content": "Gene name\tAccession Number\tGene CDS length\tHGNC ID\tSample name\tID_sample\tID_tumour\tPrimary site\tSite subtype\tPrimary histology\tHistology subtype\tGenome-wide screen\tMutation ID\tMutation CDS\tMutation AA\tMutation Description\tMutation zygosity\tMutation GRCh37 genome position\tMutation GRCh37 strand\tSNP\tFATHMM prediction\tMutation somatic status\tPubmed_PMID\tID_STUDY\tSample source\tTumour origin\tAge\tComments\nTSHR\tENST00000541158\t2295\t12373\t1103576\t1103576\t1017828\tthyroid\tNS\tadenoma-nodule-goitre\tNS\tn\tCOSN26415\tc.1856A>G\tp.D619G\tSubstitution - Missense\thet\t14:81610258-81610258\t+\tn\tPASSENGER/OTHER\tConfirmed somatic variant\t.\t.\t10595453\t\tsurgery fresh/frozen\tNS\t\t\nTSHR\tENST00000541158\t2295\t12373\t1136601\t1136601\t1049165\tthyroid\tNS\tadenoma-nodule-goitre\tNS\tn\tCOSN26416\tc.1856A>G\tp.D619G\tSubstitution - Missense\t\t14:81610259-81610259\t+\tn\tPASSENGER/OTHER\tReported in another cancer sample as somatic\t.\t.\t18694911\t\tsurgery - NOS\tNS\t\t\nTP53\tENST00000269305\t1182\t11998\tG1205\t1378050\t1288091\thaematopoietic_and_lymphoid_tissue\tlymph_node\tlymphoid_neoplasm\tMALT_lymphoma\tn\tCOSM11929\tc.760_761AT>GA\tp.I254D\tSubstitution - Missense\t\t17:7577520-7577521\t-\t\t\tVariant of unknown origin\t.\t.\t8541549\t\tNS\tNS\t\tGrade:High grade\nCTNNB1\tENST00000349496\t2346\t2514\t1127061\t1127061\t1039915\tliver\tNS\tother\thepatoblastoma\tn\tCOSM27285\tc.79C>T\tp.Q27*\tSubstitution - Nonsense\t\t3:41266082-41266082\t+\tn\t\tConfirmed somatic variant\t.\t.\t17962810\t\tsurgery-fixed\tNS\t10.8\t\nPIK3CA\tNM_006218.1\t3207\t8975\t1747707\t1747707\t1652683\tliver\tNS\tcarcinoma\thepatocellular_carcinoma\tn\tCOSN27489\tc.1658_1659GT>C\tp.S553fs*7\tComplex - frameshift\t\t3:178936116-178936117\t+\t\t\tConfirmed somatic variant\t.\t.\t22258409\t\tsurgery-fixed\tNS\t\t\nPIK3CA\tNM_006218.1\t3207\t8975\t2023854\t2023854\t1906049\tsalivary_gland\tNS\tcarcinoma\tmyoepithelial_carcinoma\tn\tCOSN27496\tc.35G>A\tp.G12D\tSubstitution - Missense\t\t3:178916648-178916648\t+\tn\tCANCER\tReported in another cancer sample as somatic\t.\t.\t23933559\t\tsurgery - NOS\tNS\t\t\nEGFR\tENST00000275493\t3633\t3236\t1188169\t1188169\t1100068\tthyroid\tNS\tcarcinoma\tpapillary_carcinoma\tn\tCOSM29274\tc.2254T>C\tp.S752P\tSubstitution - Missense\thet\t7:55242484-55242484\t+\tn\tCANCER\tVariant of unknown origin\t.\t.\t19253367\t\tsurgery-fixed\tprimary\t66\tDrug Response:Gefitinib clinical partial response,Grade:Some Grade data are given in publication,Metastatic site:brain,Metastatic site:lung,Metastatic site:lymph node,Stage:Some Stage data are given in publication\n"
  },
  {
    "path": "Sandbox/UnitTests/Resources/TestWigParser.wig",
    "content": "fixedStep chrom=chr3 start=400601 step=100\n11\n22\n33\nfixedStep chrom=chr3 start=400601 step=100 span=5\n11\n22\n33\n"
  },
  {
    "path": "Sandbox/UnitTests/Resources/mini.WigFix",
    "content": "fixedStep chrom=chr1 start=100 step=1\n0.064\n0.058\n0.064\n0.058\n0.064\n0.064\n0.064\n0.064\n0.064\n0.058\n0.064\n0.064\n0.064\n0.064\n0.064\n0.064\n0.064\n0.064\n0.064\n0.064\n0.064\n0.064\n0.058\n0.064\n0.000\n0.000\n0.000\n0.000\n0.000\n0.000\n0.058\nfixedStep chrom=chr1 start=175 step=1\n0.064\n0.058\n0.064\n0.058\n0.064\n0.058\n0.058\n-2.088\n0.064\n0.058\n0.058\n0.064\n0.064\n0.064\n0.064\n0.064\n0.058\n0.064\n0.064\n-2.363\n0.064\n0.064\n0.064\n0.064\n0.000\n0.064\n0.064\n0.058\n0.064\n0.064\n-2.096\n0.064\n-2.039\n0.064\n0.064\n0.064\n0.064\n0.064\n-2.363\n0.064\n-2.381\n0.064\n0.064\n0.064\n-2.305\n0.064\n0.058\n0.064\nfixedStep chrom=chr1 start=250 step=1\n0.058\n0.064\n0.000\n0.064\n0.058\n-2.305\n0.064\n0.064\n0.064\n0.058\n0.058\n-2.096\n0.064\n0.064\n0.064\n0.064\n0.064\n0.064\n0.064\n0.064\n0.064\n0.058\n0.064\n0.058\n0.064\n0.058\n0.058\n-2.088\n0.064\n0.058\n0.058\n0.064\n0.064\n0.064\n0.064\n0.064\n0.058\n0.064\n0.064\n-2.363\n0.064\n0.064\n0.064\n0.064\n0.000\n0.064\n0.064\n0.058\n0.064\n0.064\n-2.096\n0.064\n-2.039\n0.064\n0.064\n0.064\n0.064\n0.064\n-2.363\n0.064\n-2.381\n0.064\n0.064\n0.064\n-2.305\n0.064\n0.058\n0.064\n0.064\n0.058\n0.064\n0.058\n0.064\n0.058\n0.058\n-2.088\n0.064\n0.058\n0.058\n0.064\n0.064\n0.064\n0.064\n0.064\n0.058\n0.064\n0.064\n-2.363\n0.064\n0.064\n0.064\n0.064\n0.000\n0.064\n0.064\n0.058\n0.064\n0.064\n-2.096\n0.064\n-2.039\n0.064\n0.064\n0.064\n0.064\n0.064\n-2.363\n0.064\n-2.381\n0.064\n0.064\n0.064\n-2.305\n0.064\n0.058\n0.064\n0.064\n0.058\n0.064\n0.058\n0.064\n0.058\n0.058\n-2.088\n0.064\n0.058\n0.058\n0.064\n0.064\n0.064\n0.064\n0.064\n0.058\n0.064\n0.064\n-2.363\n0.064\n0.064\n0.064\n0.064\n0.000\n0.064\n0.064\n0.058\n0.064\n0.064\n-2.096\n0.064\n-2.039\n0.064\n0.064\n0.064\n0.064\n0.064\n-2.363\n0.064\n-2.381\n0.064\n0.064\n0.064\n-2.305\n0.064\n0.058\n0.064"
  },
  {
    "path": "Sandbox/UnitTests/Resources/missingLastVariantHgmd.vcf",
    "content": "##fileformat=VCFv4.1\n##Copyright=HGMD. Not for redistribution.\n##source=HGMD_PRO_2015.2\n##reference=hg19\n##comment=\"REF and ALT sequences are both on forward strand of reference assembly\"\n##IAE_TOP=<KEY=HGMD,MATCH=Position>\n##IAE_INFO=<INFO=CLASS,Type=String,JSON=class>\n##IAE_INFO=<INFO=MUT,Type=String,JSON=mutant>\n##IAE_INFO=<INFO=GENE,Type=String,JSON=gene>\n##IAE_INFO=<INFO=STRAND,Type=String,JSON=strand>\n##IAE_INFO=<INFO=DNA,Type=String,JSON=hgvsc>\n##IAE_INFO=<INFO=PROT,Type=String,JSON=hgvsp>\n##IAE_INFO=<INFO=DB,Type=String,JSON=dbsnp137>\n##IAE_INFO=<INFO=PHEN,Type=String,JSON=phenotype>\n##IAE_INFO=<INFO=ACC,Type=String,JSON=accession>\n##INFO=<ID=CLASS,Number=1,Type=String,Description=\"Mutation Category, https://portal.biobase-international.com/hgmd/pro/global.php#cats\">\n##INFO=<ID=MUT,Number=1,Type=String,Description=\"HGMD mutant allele\">\n##INFO=<ID=GENE,Number=1,Type=String,Description=\"Gene symbol\">\n##INFO=<ID=STRAND,Number=1,Type=String,Description=\"Gene strand\">\n##INFO=<ID=DNA,Number=1,Type=String,Description=\"DNA annotation\">\n##INFO=<ID=PROT,Number=1,Type=String,Description=\"Protein annotation\">\n##INFO=<ID=DB,Number=1,Type=String,Description=\"dbSNP identifier, build 137\">\n##INFO=<ID=PHEN,Number=1,Type=String,Description=\"HGMD primary phenotype\">\n##INFO=<ID=ACC,Number=1,Type=String,Description=\"HGMD accession\"\n#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\nchr1\t899318\t.\tCCT\tC\t.\t.\tCLASS=DM?;MUT=ALT;GENE=KLHL17;STRAND=+;DNA=NM_198317.2:c.1375_1376delCT;PHEN=Schizophrenia;ACC=CD142720\nchr1\t949523\t.\tC\tT\t.\t.\tCLASS=DM;MUT=ALT;GENE=ISG15;STRAND=+;DNA=NM_005101.3:c.163C>T;PROT=NP_005092.1:p.Q55*;PHEN=Idiopathic_basal_ganglia_calcification;ACC=CM1411641\nchr1\t949696\t.\tC\tCG\t.\t.\tCLASS=DM;MUT=ALT;GENE=ISG15;STRAND=+;DNA=NM_005101.3:c.339dupG;PHEN=Mycobacterial_disease_mendelian_susceptibility_to;ACC=CI128669\nchr3\t361508\t.\tC\tT\t.\t.\tCLASS=DP;MUT=ALT;GENE=CHL1;STRAND=+;DNA=NM_006614.3:c.49C>T;PROT=NP_006605.2:p.L17F;DB=rs2272522;PHEN=Schizophrenia_association_with;ACC=CM023348\nchr3\t1269501\t.\tG\tA\t.\t.\tCLASS=DM?;MUT=ALT;GENE=CNTN6;STRAND=+;DNA=NM_014461.3:c.183-1G>A;PHEN=Adenomatous_polyposis_coli;ACC=CS1410394\nchr3\t1363515\t.\tTA\tT\t.\t.\tCLASS=DM?;MUT=ALT;GENE=CNTN6;STRAND=+;DNA=NM_014461.3:c.944delA;PHEN=Adenomatous_polyposis_coli;ACC=CD1410396\nchr4\t367647\t.\tC\tT\t.\t.\tCLASS=DM;MUT=ALT;GENE=ZNF141;STRAND=+;DNA=NM_003441.2:c.1421C>T;PROT=NP_003432.1:p.T474I;PHEN=Postaxial_polydactyly_type_A;ACC=CM130005\nchr4\t437663\t.\tC\tT\t.\t.\tCLASS=DM?;MUT=ALT;GENE=ZNF721;STRAND=-;DNA=NM_133474.3:c.593G>A;PROT=NP_597731.2:p.R198H;PHEN=Schizophrenia;ACC=CM142691\nchr4\t619535\t.\tCCCGCC\tCGAGGACGGCCTGCGA\t.\t.\tCLASS=DM;MUT=ALT;GENE=PDE6B;STRAND=+;DNA=NM_000283.3:c.121_125delCCGCCinsGAGGACGGCCTGCGA;PHEN=Retinitis_pigmentosa_autosomal_recessive;ACC=CX148735"
  },
  {
    "path": "Sandbox/UnitTests/Resources/testClinGenUnifier.txt",
    "content": "9\tchr1\t757092\t2394455\tnssv1604129\t0\t.\t757092\t2394455\t17\t\"ID,Name,Alias,parent,Dbxref,var_origin,Start_range,End_range,clinical_int,copy_number,remapScore,validated,sample_name,phenotype,Variant_seq,Reference_seq,var_type,\"\t\"16087,nssv1604129,ISCA_INST_v5_2927,nsv869079,URL:www.ncbi.nlm.nih.gov/dbvar/variants/nsv869079%2CClinVar:SCV000178149,Not tested,.%2C757093,2394455%2C.,Pathogenic,1,1,Pass,Unknown,Developmental delay AND/OR other significant developmental or morphological phenotypes,-%2C.,~,copy_number_loss,\"\n9\tchr1\t757092\t2394455\tnssv1495164\t0\t.\t757092\t2394455\t17\t\"ID,Name,Alias,parent,Dbxref,var_origin,Start_range,End_range,clinical_int,copy_number,remapScore,validated,sample_name,phenotype,Variant_seq,Reference_seq,var_type,\"\t\"12784,nssv1495164,ISCA_INST_8724,nsv869079,URL:www.ncbi.nlm.nih.gov/dbvar/variants/nsv869079%2CClinVar:SCV000178148,Maternal,.%2C757093,2394455%2C.,Uncertain significance,4,1,Pass,Unknown,Developmental delay AND/OR other significant developmental or morphological phenotypes,~%2C.,-,copy_number_gain,\"\n9\tchr1\t779726\t2558913\tnssv582353\t0\t.\t779726\t2558913\t18\t\"ID,Name,Alias,parent,Dbxref,var_origin,Start_range,End_range,clinical_int,copy_number,remapScore,validated,sample_name,phenotype,phenotype_id,Variant_seq,Reference_seq,var_type,\"\t\"8885,nssv582353,ISCA_ret_INST_5468,nsv529358,URL:www.ncbi.nlm.nih.gov/dbvar/variants/nsv529358%2CClinVar:SCV000196301,Not tested,.%2C779727,2558913%2C.,Pathogenic,1,1,Pass,Unknown,Hypotelorism%2CMicrocephaly%2CShort stature,HP:0000252%2CHP:0000601%2CHP:0004322%2CMedGen:C0349588%2CMedGen:C1845868%2CMedGen:CN000563,-%2C.,~,copy_number_loss,\"\n9\tchr1\t779726\t2558913\tnssv582220\t0\t.\t779726\t2558913\t16\t\"ID,Name,Alias,parent,Dbxref,var_origin,Start_range,End_range,clinical_int,copy_number,remapScore,sample_name,phenotype,Variant_seq,Reference_seq,var_type,\"\t\"8754,nssv582220,ISCA_ret_INST_5335,nsv529358,URL:www.ncbi.nlm.nih.gov/dbvar/variants/nsv529358%2CClinVar:SCV000196302,Not tested,.%2C779727,2558913%2C.,Pathogenic,1,1,Unknown,Developmental delay AND/OR other significant developmental or morphological phenotypes,-%2C.,~,copy_number_loss,\"\n0\tchr1\t65410207\t68057686\tnssv1610460\t0\t.\t65410207\t68057686\t17\tID,Name,Alias,parent,Dbxref,var_origin,Start_range,End_range,clinical_int,copy_number,validated,sample_name,phenotype,phenotype_id,Variant_seq,Reference_seq,var_type,\t6399,nssv1610460,ISCA_INST_v6_4056,nsv932267,URL:www.ncbi.nlm.nih.gov/dbvar/variants/nsv932267%2CClinVar:SCV000181748,Not tested,.%2C65410208,68057686%2C.,Uncertain significance,1,Pass,Unknown,Intellectual disability%2CPanhypopituitarism%2CShort stature,HP:0000871%2CHP:0001249%2CHP:0004322%2CMedGen:C0349588%2CMedGen:C1843367%2CMedGen:CN000817,-%2C.,~,copy_number_loss,\n26\tchr1\t145601945\t146944906\tnssv581879\t0\t.\t145601945\t146944906\t16\t\"ID,Name,Alias,parent,Dbxref,var_origin,Start_range,End_range,clinical_int,copy_number,remapScore,sample_name,phenotype,Variant_seq,Reference_seq,var_type,\"\t\"12076,nssv581879,ISCA_ret_INST_4990,nsv530955,URL:www.ncbi.nlm.nih.gov/dbvar/variants/nsv530955%2CClinVar:SCV000175616,Not tested,.%2C145601946,146944906%2C.,Benign,3,1.60363,Unknown,Developmental delay AND/OR other significant developmental or morphological phenotypes,~%2C.,-,copy_number_gain,\"\n26\tchr1\t145601945\t146944906\tnssv584556\t0\t.\t145601945\t146944906\t18\t\"ID,Name,Alias,parent,Dbxref,var_origin,Start_range,End_range,clinical_int,copy_number,remapScore,sample_name,phenotype,phenotype_id,gender,Variant_seq,Reference_seq,var_type,\"\t\"14691,nssv584556,ISCA_INST_2924,nsv530955,URL:www.ncbi.nlm.nih.gov/dbvar/variants/nsv530955%2CClinVar:SCV000175617,Not tested,.%2C145601946,146944906%2C.,Uncertain significance,3,1.60363,ISCA_id_2774,Developmental delay AND/OR other significant developmental or morphological phenotypes%2CSpecific learning disability,HP:0001328%2CMedGen:CN001216,F,~%2C.,-,copy_number_gain,\"\n26\tchr1\t146987840\t148234205\tnssv581879\t0\t.\t146987840\t148234205\t16\t\"ID,Name,Alias,parent,Dbxref,var_origin,Start_range,End_range,clinical_int,copy_number,remapScore,sample_name,phenotype,Variant_seq,Reference_seq,var_type,\"\t\"12077,nssv581879,ISCA_ret_INST_4990,nsv530955,URL:www.ncbi.nlm.nih.gov/dbvar/variants/nsv530955%2CClinVar:SCV000175616,Not tested,.%2C146987841,148234205%2C.,Benign,3,1.48829,Unknown,Developmental delay AND/OR other significant developmental or morphological phenotypes,~%2C.,-,copy_number_gain,\"\n26\tchr1\t146987840\t148234205\tnssv584556\t0\t.\t146987840\t148234205\t18\t\"ID,Name,Alias,parent,Dbxref,var_origin,Start_range,End_range,clinical_int,copy_number,remapScore,sample_name,phenotype,phenotype_id,gender,Variant_seq,Reference_seq,var_type,\"\t\"14692,nssv584556,ISCA_INST_2924,nsv530955,URL:www.ncbi.nlm.nih.gov/dbvar/variants/nsv530955%2CClinVar:SCV000175617,Not tested,.%2C146987841,148234205%2C.,Uncertain significance,3,1.48829,ISCA_id_2774,Developmental delay AND/OR other significant developmental or morphological phenotypes%2CSpecific learning disability,HP:0001328%2CMedGen:CN001216,F,~%2C.,-,copy_number_gain,\"\n"
  },
  {
    "path": "Sandbox/UnitTests/Resources/tmpPopInfo.txt",
    "content": "Population Code\tPopulation Description\tSuper Population Code\tSequence Data Available\tAlignment Data Available\tVariant Data Available\nCHB\tHan Chinese in Bejing, China\tEAS\t1\t1\t1\nJPT\tJapanese in Tokyo, Japan\tEAS\t1\t1\t1\nCEU\tUtah Residents (CEPH) with Northern and Western Ancestry\tEUR\t1\t1\t1\nTSI\tToscani in Italia\tEUR\t1\t1\t1\nFIN\tFinnish in Finland\tEUR\t1\t1\t1\n"
  },
  {
    "path": "Sandbox/UnitTests/Resources/tmpSampleInfo.txt",
    "content": "Sample\tPopulation\tGender\nCHB0001\tCHB\tmale\nCHB0002\tCHB\tmale\nCHB0003\tCHB\tfemale\nJPT0001\tJPT\tfemale\nCEU0001\tCEU\tmale\nTSI0001\tTSI\tmale\nFIN0001\tFIN\tmale\nJPT0002\tJPT\tfemale\nCEU0002\tCEU\tfemale\nTSI0002\tTSI\tfemale\nFIN0002\tFIN\tfemale\nJPT0003\tJPT\tmale\nCEU0003\tCEU\tmale\nTSI0003\tTSI\tmale\nFIN0003\tFIN\tmale\t\nFIN0004\tFIN\tmale"
  },
  {
    "path": "Sandbox/UnitTests/UnitTests.csproj",
    "content": "﻿<Project Sdk=\"Microsoft.NET.Sdk\">\n  <PropertyGroup>\n    <TargetFramework>net6.0</TargetFramework>\n    <OutputPath>..\\bin\\$(Configuration)</OutputPath>\n    \n  </PropertyGroup>\n  <ItemGroup>\n    <PackageReference Include=\"Microsoft.NET.Test.Sdk\" Version=\"15.0.0\" />\n    <PackageReference Include=\"Moq\" Version=\"4.7.137\" />\n    <PackageReference Include=\"xunit\" Version=\"2.2.0\" />\n    <PackageReference Include=\"xunit.runner.visualstudio\" Version=\"2.2.0\" />\n  </ItemGroup>\n  <ItemGroup>\n    <ProjectReference Include=\"..\\..\\SAUtils\\SAUtils.csproj\" />\n    <ProjectReference Include=\"..\\..\\VariantAnnotation.Interface\\VariantAnnotation.Interface.csproj\" />\n    <ProjectReference Include=\"..\\..\\VariantAnnotation\\VariantAnnotation.csproj\" />\n    <ProjectReference Include=\"..\\Piano\\Piano.csproj\" />\n  </ItemGroup>\n  <ItemGroup>\r\n    <Service Include=\"{82a7f48d-3b50-4b1e-b82e-3ada8210c358}\" />\r\n  </ItemGroup>\n  <Import Project=\"..\\..\\VariantAnnotation\\CommonAssemblyInfo.props\" />\n</Project>"
  },
  {
    "path": "Sandbox/UnitTests/Utilities/ResourceUtilities.cs",
    "content": "﻿using System.IO;\nusing System.Reflection;\n\nnamespace UnitTests.Utilities\n{\n    public static class ResourceUtilities\n    {\n        /// <summary>\n        /// given a resource filename, this method returns a stream corresponding to the file if\n        /// it exists. Otherwise a file not found exception is thrown.\n        /// </summary>\n        // ReSharper disable once UnusedParameter.Global\n        public static Stream GetResourceStream(string resourcePath, bool checkMissingFile = true)\n        {\n            var stream = Assembly.GetEntryAssembly().GetManifestResourceStream(resourcePath);\n\n            if (checkMissingFile && stream == null)\n            {\n                throw new FileNotFoundException($\"ERROR: The embedded resource file ({resourcePath}) was not found.\");\n            }\n\n            return stream;\n        }\n    }\n}\n"
  },
  {
    "path": "Sandbox/UnitTests/Utilities/Resources.cs",
    "content": "﻿using System;\nusing System.IO;\n\nnamespace UnitTests.Utilities\n{\n    public static class Resources\n    {\n        public static readonly string Top;\n        public static string TopPath(string path) => Path.Combine(Top, path);\n\n        static Resources()\n        {\n            var solutionDir = GetParentDirectory(AppContext.BaseDirectory, 3);\n            Top = Path.Combine(solutionDir, \"UnitTests\", \"Resources\");\n        }\n\n        private static string GetParentDirectory(string directory, int numLevels)\n        {\n            for (int i = 0; i < numLevels; i++) directory = Path.GetDirectoryName(directory);\n            return directory;\n        }\n    }\n}\n"
  },
  {
    "path": "SingleAnnotationLambda/CacheConfiguration.cs",
    "content": "﻿using System;\r\nusing Genome;\r\n\r\nnamespace SingleAnnotationLambda\r\n{\r\n    public sealed class CacheConfiguration : IEquatable<CacheConfiguration>\r\n    {\r\n        private readonly GenomeAssembly _genomeAssembly;\r\n        private readonly string _supplementaryAnnotations;\r\n        private readonly int _vepVersion;\r\n\r\n        public CacheConfiguration(GenomeAssembly genomeAssembly, string supplementaryAnnotations, int vepVersion)\r\n        {\r\n            _genomeAssembly           = genomeAssembly;\r\n            _supplementaryAnnotations = supplementaryAnnotations?.ToLower();\r\n            _vepVersion               = vepVersion;\r\n        }\r\n\r\n        public bool Equals(CacheConfiguration other)\r\n        {\r\n            if (ReferenceEquals(null, other)) return false;\r\n            if (ReferenceEquals(this, other)) return true;\r\n            return _genomeAssembly == other._genomeAssembly &&\r\n                   string.Equals(_supplementaryAnnotations, other._supplementaryAnnotations) &&\r\n                   _vepVersion == other._vepVersion;\r\n        }\r\n\r\n        public override int GetHashCode()\r\n        {\r\n            unchecked\r\n            {\r\n                var hashCode = (int) _genomeAssembly;\r\n                if (_supplementaryAnnotations != null) hashCode = (hashCode * 397) ^ _supplementaryAnnotations.GetHashCode();\r\n                hashCode = (hashCode * 397) ^ _vepVersion;\r\n                return hashCode;\r\n            }\r\n        }\r\n\r\n        public override string ToString()\r\n        {\r\n            return $\"genome assembly: {_genomeAssembly}, SA: {_supplementaryAnnotations}, VEP: {_vepVersion}\";\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "SingleAnnotationLambda/CacheUtilities.cs",
    "content": "﻿using System.Linq;\r\nusing Cloud;\r\nusing Genome;\r\n\r\nnamespace SingleAnnotationLambda\r\n{\r\n    public static class CacheUtilities\r\n    {\r\n        public const int DefaultVepVersion = 91;\r\n\r\n        private static readonly int[] SupportedVepVersions = { 84, 91 };\r\n\r\n        public static bool IsVepVersionSupported(int vepVersion) =>\r\n            SupportedVepVersions.Any(supportedVepVersion => vepVersion == supportedVepVersion);\r\n\r\n        public static string GetSupportedVersions() => string.Join(\", \", SupportedVepVersions);\r\n\r\n        private static string UrlCombine(string baseUrl, string relativeUrl) => baseUrl.TrimEnd('/') + '/' + relativeUrl.TrimStart('/');\r\n\r\n        public static string GetCachePathPrefix(int vepVersion, GenomeAssembly genomeAssembly)\r\n        {\r\n            string suffix = $\"{genomeAssembly}/{LambdaUrlHelper.DefaultCacheSource}\";\r\n            \r\n            switch (vepVersion)\r\n            {\r\n                case 84:\r\n                    return UrlCombine($\"{LambdaUrlHelper.GetBaseUrl() +LambdaUrlHelper.S3CacheFolderBase}/26/VEP84/\", suffix);\r\n                default:\r\n                    return UrlCombine($\"{LambdaUrlHelper.GetCacheFolder()}\", suffix);\r\n            }\r\n            \r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "SingleAnnotationLambda/ExceptionHandler.cs",
    "content": "﻿using System;\r\nusing System.IO;\r\nusing Cloud.Notifications;\r\nusing Cloud.Utilities;\r\nusing ErrorHandling;\r\nusing IO;\r\n\r\nnamespace SingleAnnotationLambda\r\n{\r\n    public static class ExceptionHandler\r\n    {\r\n        public static Stream GetStream(string id, string snsTopicArn, Exception e)\r\n        {\r\n            Logger.Log(e);\r\n            GC.Collect();\r\n\r\n            string snsMessage = SNS.CreateMessage(e.Message, \"exception\", e.StackTrace);\r\n            SNS.SendMessage(snsTopicArn, snsMessage);\r\n\r\n            ErrorCategory errorCategory = ExceptionUtilities.ExceptionToErrorCategory(e);\r\n            string message = GetMessage(errorCategory, e.Message);\r\n\r\n            LogUtilities.LogObject(\"Result\", message);\r\n\r\n            return SingleResult.Create(id, message, null);\r\n        }\r\n\r\n        private static string GetMessage(ErrorCategory errorCategory, string exceptionMessage)\r\n        {\r\n            if (errorCategory == ErrorCategory.UserError) return \"User error: \" + FirstCharToLower(exceptionMessage);\r\n            return \"Nirvana error: an unexpected annotation error occurred while annotating this variant.\";\r\n        }\r\n\r\n        private static string FirstCharToLower(string input) => string.IsNullOrEmpty(input) || char.IsLower(input[0])\r\n            ? input\r\n            : char.ToLowerInvariant(input[0]) + input.Substring(1);\r\n    }\r\n}\r\n"
  },
  {
    "path": "SingleAnnotationLambda/SingleAnnotationLambda.cs",
    "content": "using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Reflection;\r\nusing System.Text;\r\nusing Amazon.Lambda.Core;\r\nusing Cloud;\r\nusing Cloud.Messages.Single;\r\nusing Cloud.Utilities;\r\nusing CommandLine.Utilities;\r\nusing ErrorHandling.Exceptions;\r\nusing Genome;\r\nusing IO;\r\nusing Nirvana;\r\nusing OptimizedCore;\r\nusing VariantAnnotation;\r\nusing VariantAnnotation.Interface;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.Interface.IO;\r\nusing VariantAnnotation.Interface.Positions;\r\nusing VariantAnnotation.Utilities;\r\nusing JsonWriter = VariantAnnotation.IO.JsonWriter;\r\n\r\n[assembly: LambdaSerializer(typeof(Amazon.Lambda.Serialization.Json.JsonSerializer))]\r\n\r\nnamespace SingleAnnotationLambda\r\n{\r\n    // ReSharper disable once UnusedMember.Global\r\n    // ReSharper disable once UnusedType.Global\r\n    public class SingleAnnotationLambda\r\n    {\r\n        private const int MaxNumCacheConfigurations = 2;\r\n        private readonly Dictionary<CacheConfiguration, AnnotationResources> _cacheConfigurationToAnnotationResources = new Dictionary<CacheConfiguration, AnnotationResources>();\r\n        private readonly LinkedList<CacheConfiguration> _recentCacheConfigurations = new LinkedList<CacheConfiguration>();\r\n\r\n        // ReSharper disable once UnusedMember.Global\r\n        public Stream Run(SingleConfig config, ILambdaContext context)\r\n        {\r\n            string snsTopicArn = null;\r\n            Stream response;\r\n\r\n            try\r\n            {\r\n                LogUtilities.UpdateLogger(context.Logger, null);\r\n                LogUtilities.LogLambdaInfo(context, CommandLineUtilities.InformationalVersion);\r\n                LogUtilities.LogObject(\"Config\", config);\r\n                LogUtilities.Log(new[] { LambdaUrlHelper.UrlBaseEnvironmentVariableName, LambdaUtilities.SnsTopicKey });\r\n\r\n                LambdaUtilities.GarbageCollect();\r\n\r\n                snsTopicArn = LambdaUtilities.GetEnvironmentVariable(LambdaUtilities.SnsTopicKey);\r\n\r\n                config.Validate();\r\n\r\n                GenomeAssembly genomeAssembly = GenomeAssemblyHelper.Convert(config.genomeAssembly);\r\n                \r\n                var cacheConfiguration        = new CacheConfiguration(genomeAssembly, config.supplementaryAnnotations, config.vepVersion);\r\n                bool preloadRequired          = !string.IsNullOrEmpty(config.supplementaryAnnotations);\r\n                AnnotationResources annotationResources = GetAndCacheAnnotationResources(config, cacheConfiguration);\r\n\r\n                if (genomeAssembly!=GenomeAssembly.hg19) annotationResources.Annotator.EnableMitochondrialAnnotation();\r\n\r\n                (IPosition position, string[] sampleNames) = config.GetPositionAndSampleNames(annotationResources.SequenceProvider, annotationResources.RefMinorProvider);\r\n                if (position.Chromosome.IsEmpty()) throw new UserErrorException($\"An unknown chromosome was specified ({config.variant.chromosome})\");\r\n\r\n                string annotationResult = GetPositionAnnotation(position, annotationResources, sampleNames, preloadRequired);\r\n                response = SingleResult.Create(config.id, LambdaUrlHelper.SuccessMessage, annotationResult);\r\n            }\r\n            catch (Exception exception)\r\n            {\r\n                response = ExceptionHandler.GetStream(config.id, snsTopicArn, exception);\r\n            }\r\n\r\n            return response;\r\n        }\r\n\r\n        private AnnotationResources GetAndCacheAnnotationResources(SingleConfig input, CacheConfiguration cacheConfiguration)\r\n        {\r\n            if (_cacheConfigurationToAnnotationResources.TryGetValue(cacheConfiguration, out AnnotationResources annotationResources))\r\n            {\r\n                if (!_recentCacheConfigurations.Last.Value.Equals(cacheConfiguration))\r\n                {\r\n                    _recentCacheConfigurations.Remove(cacheConfiguration);\r\n                    _recentCacheConfigurations.AddLast(cacheConfiguration);\r\n                    Logger.WriteLine($\"Cached configurations: {string.Join(\"; \", _recentCacheConfigurations)}\");\r\n                }\r\n\r\n                return annotationResources;\r\n            }\r\n\r\n            if (_recentCacheConfigurations.Count == MaxNumCacheConfigurations)\r\n            {\r\n                CacheConfiguration configurationToRemove = _recentCacheConfigurations.First.Value;\r\n                _recentCacheConfigurations.RemoveFirst();\r\n                _cacheConfigurationToAnnotationResources.Remove(configurationToRemove);\r\n                GC.Collect();\r\n                GC.WaitForPendingFinalizers();\r\n            }\r\n\r\n            Logger.WriteLine($\"Creating annotation resources for {cacheConfiguration}\");\r\n            annotationResources = GetAnnotationResources(input);\r\n            _cacheConfigurationToAnnotationResources[cacheConfiguration] = annotationResources;\r\n            _recentCacheConfigurations.AddLast(cacheConfiguration);\r\n            Logger.WriteLine($\"Cached configurations: {string.Join(\"; \", _recentCacheConfigurations)}\");\r\n\r\n            return annotationResources;\r\n        }\r\n\r\n        private static AnnotationResources GetAnnotationResources(SingleConfig lambdaConfig)\r\n        {\r\n            GenomeAssembly genomeAssembly = GenomeAssemblyHelper.Convert(lambdaConfig.genomeAssembly);\r\n            string cachePathPrefix        = CacheUtilities.GetCachePathPrefix(lambdaConfig.vepVersion, genomeAssembly);\r\n            string nirvanaS3Ref           = LambdaUrlHelper.GetRefUrl(genomeAssembly);\r\n\r\n            string annotatorVersion     = \"Nirvana \" + CommandLineUtilities.GetVersion(Assembly.GetAssembly(typeof(SingleAnnotationLambda)));\r\n            var metrics = new PerformanceMetrics();\r\n\r\n            Logger.WriteLine($\"Cache prefix: {cachePathPrefix}\");\r\n            //todo: get customStrTsv from lambdaConfig\r\n            var annotationResources = new AnnotationResources(nirvanaS3Ref, cachePathPrefix,\r\n                null, lambdaConfig.customAnnotations, null, false, false, metrics)\r\n            {\r\n                AnnotatorVersionTag = annotatorVersion\r\n            };\r\n\r\n            return annotationResources;\r\n        }\r\n\r\n        private static string GetPositionAnnotation(IPosition position, IAnnotationResources resources, string[] sampleNames, bool preloadRequired)\r\n        {\r\n            if (preloadRequired) resources.SingleVariantPreLoad(position);\r\n            IAnnotatedPosition annotatedPosition = resources.Annotator.Annotate(position);\r\n            var                sb                = annotatedPosition?.GetJsonStringBuilder();\r\n            if (sb == null) throw new UserErrorException(\"No variant is provided for annotation\");\r\n            \r\n            string json = StringBuilderPool.GetStringAndReturn(sb);\r\n            if (json == null) throw new UserErrorException(\"No variant is provided for annotation\");\r\n\r\n            var outputJsonStream = new MemoryStream();\r\n            using (var jsonWriter = new JsonWriter(outputJsonStream, null, resources, Date.CurrentTimeStamp, sampleNames, true))\r\n            {\r\n                WriteAnnotatedPosition(annotatedPosition, jsonWriter, json);\r\n                jsonWriter.WriteGenes(resources.Annotator.GetGeneAnnotations());\r\n            }\r\n\r\n            outputJsonStream.Position = 0;\r\n            return Encoding.UTF8.GetString(outputJsonStream.ToArray());\r\n        }\r\n\r\n        private static void WriteAnnotatedPosition(IAnnotatedPosition annotatedPosition, IJsonWriter jsonWriter,\r\n            string jsonOutput) => jsonWriter.WritePosition(annotatedPosition.Position, jsonOutput);\r\n    }\r\n}\r\n"
  },
  {
    "path": "SingleAnnotationLambda/SingleAnnotationLambda.csproj",
    "content": "﻿<Project Sdk=\"Microsoft.NET.Sdk\">\r\n  <PropertyGroup>\r\n    <TargetFramework>net6.0</TargetFramework>\r\n    <GenerateRuntimeConfigurationFiles>true</GenerateRuntimeConfigurationFiles>\r\n    <AWSProjectType>Lambda</AWSProjectType>\r\n    <OutputPath>bin\\$(Configuration)</OutputPath>\r\n    \r\n  </PropertyGroup>\r\n  <ItemGroup>\r\n    <PackageReference Include=\"Amazon.Lambda.Core\" Version=\"2.1.0\" />\r\n    <PackageReference Include=\"Amazon.Lambda.Serialization.Json\" Version=\"2.0.0\" />\r\n  </ItemGroup>\r\n  <ItemGroup>\r\n    <ProjectReference Include=\"..\\Cloud\\Cloud.csproj\" />\r\n    <ProjectReference Include=\"..\\ErrorHandling\\ErrorHandling.csproj\" />\r\n    <ProjectReference Include=\"..\\Genome\\Genome.csproj\" />\r\n    <ProjectReference Include=\"..\\Nirvana\\Nirvana.csproj\" />\r\n    <ProjectReference Include=\"..\\Vcf\\Vcf.csproj\" />\r\n  </ItemGroup>\r\n  <Import Project=\"..\\CommonAssemblyInfo.props\" />\r\n</Project>\r\n"
  },
  {
    "path": "SingleAnnotationLambda/SingleConfigExtensions.cs",
    "content": "﻿using System.IO;\r\nusing Cloud.Messages.Single;\r\nusing ErrorHandling.Exceptions;\r\nusing Genome;\r\nusing MitoHeteroplasmy;\r\nusing OptimizedCore;\r\nusing VariantAnnotation.Interface.IO;\r\nusing VariantAnnotation.Interface.Positions;\r\nusing VariantAnnotation.Interface.Providers;\r\nusing Vcf;\r\nusing Vcf.VariantCreator;\r\n\r\nnamespace SingleAnnotationLambda\r\n{\r\n    public static class SingleConfigExtensions\r\n    {\r\n        public static void Validate(this SingleConfig config)\r\n        {\r\n            if (string.IsNullOrEmpty(config.id)) throw new UserErrorException(\"Please specify the id.\");\r\n            if (string.IsNullOrEmpty(config.genomeAssembly)) throw new UserErrorException(\"Please specify the genome assembly.\");\r\n            if (config.variant == null) throw new UserErrorException(\"Please specify the variant (chromosome, position, reference allele, and alt alleles).\");\r\n            config.ValidateSupplementaryAnnotations();\r\n            config.ValidateVepVersion();\r\n            config.variant?.Validate();\r\n        }\r\n\r\n        private static void ValidateSupplementaryAnnotations(this SingleConfig config)\r\n        {\r\n            if (string.IsNullOrEmpty(config.supplementaryAnnotations)) return;\r\n            if (SupplementaryAnnotationUtilities.IsValueSupported(config.supplementaryAnnotations)) return;\r\n            throw new UserErrorException($\"An invalid supplementary annotation value ({config.supplementaryAnnotations}) was specified. Please choose one of the following values: {SupplementaryAnnotationUtilities.GetSupportedValues()}\");\r\n        }\r\n\r\n        private static void ValidateVepVersion(this SingleConfig config)\r\n        {\r\n            if (config.vepVersion == 0) config.vepVersion = CacheUtilities.DefaultVepVersion;\r\n            if (CacheUtilities.IsVepVersionSupported(config.vepVersion)) return;\r\n            throw new UserErrorException($\"An invalid VEP version ({config.vepVersion}) was specified. Please choose one of the following versions: {CacheUtilities.GetSupportedVersions()}\");\r\n        }\r\n\r\n        public static (IPosition, string[]) GetPositionAndSampleNames(this SingleConfig config, ISequenceProvider sequenceProvider,\r\n            IRefMinorProvider refMinorProvider) => (ToPosition(config.variant.GetVcfFields(), sequenceProvider, refMinorProvider), config.variant.sampleNames);\r\n\r\n        private static IPosition ToPosition(string[] vcfFields, ISequenceProvider sequenceProvider, IRefMinorProvider refMinorProvider)\r\n        {\r\n            Chromosome chromosome = ReferenceNameUtilities.GetChromosome(sequenceProvider.RefNameToChromosome, vcfFields[VcfCommon.ChromIndex]);\r\n\r\n            sequenceProvider.LoadChromosome(chromosome);\r\n\r\n            (int start, bool foundError) = vcfFields[VcfCommon.PosIndex].OptimizedParseInt32();\r\n            if (foundError) throw new InvalidDataException($\"Unable to convert the VCF position to an integer: {vcfFields[VcfCommon.PosIndex]}\");\r\n\r\n            SimplePosition simplePosition = SimplePosition.GetSimplePosition(chromosome, start, vcfFields, new NullVcfFilter());\r\n            var variantFactory = new VariantFactory(sequenceProvider.Sequence, new VariantId());\r\n            var mitoHeteroplasmyProvider = new MitoHeteroplasmyProvider();\r\n            return Position.ToPosition(simplePosition, refMinorProvider, sequenceProvider, mitoHeteroplasmyProvider, variantFactory);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "SingleAnnotationLambda/SingleResult.cs",
    "content": "﻿using System.Data;\r\nusing System.IO;\r\nusing System.Text;\r\nusing Cloud;\r\nusing Cloud.Utilities;\r\nusing Newtonsoft.Json;\r\n\r\nnamespace SingleAnnotationLambda\r\n{\r\n    public static class SingleResult\r\n    {\r\n        private const string OutputBeforeNirvanaJson = \",\\\"annotation\\\":\";\r\n        private const string OutputEnd = \"}\";\r\n\r\n        public static Stream Create(string id, string status, string nirvanaJson)\r\n        {\r\n            string statusJson = JsonConvert.SerializeObject(status);\r\n            string outputStart = $\"{{\\\"id\\\":\\\"{id}\\\",\\\"status\\\":{statusJson}\";\r\n            string output;\r\n\r\n            if (status == LambdaUrlHelper.SuccessMessage)\r\n            {\r\n                if (nirvanaJson == null)  throw new NoNullAllowedException(\"Nirvana annotation cannot be null when the job is successful.\");\r\n                output = outputStart + OutputBeforeNirvanaJson + nirvanaJson + OutputEnd;\r\n            }\r\n            else\r\n            {\r\n                output = outputStart + OutputEnd;\r\n            }\r\n\r\n            LogUtilities.LogObject(\"Result\", output);\r\n\r\n            var outputStream = new MemoryStream(Encoding.UTF8.GetBytes(output));\r\n            return outputStream;\r\n        }   \r\n    }\r\n}\r\n"
  },
  {
    "path": "SingleAnnotationLambda/SupplementaryAnnotationUtilities.cs",
    "content": "﻿using System.Linq;\r\n\r\nnamespace SingleAnnotationLambda\r\n{\r\n    public static class SupplementaryAnnotationUtilities\r\n    {\r\n        private static readonly string[] SupportedValues = { \"latest\", \"release\" };\r\n\r\n        public static bool IsValueSupported(string supplementaryAnnotations)\r\n        {\r\n            string sa = supplementaryAnnotations?.ToLower();\r\n            return SupportedValues.Any(supportedValue => sa == supportedValue);\r\n        }\r\n\r\n        public static string GetSupportedValues() => string.Join(\", \", SupportedValues);\r\n    }\r\n}\r\n"
  },
  {
    "path": "Tabix/AssemblyInfo.cs",
    "content": "﻿using System.Runtime.CompilerServices;\n\n[assembly: InternalsVisibleTo(\"UnitTests\")]"
  },
  {
    "path": "Tabix/BgzfBlockVcfReader.cs",
    "content": "﻿using System.IO;\r\nusing Compression.FileHandling;\r\nusing Genome;\r\nusing OptimizedCore;\r\n\r\nnamespace Tabix\r\n{\r\n    public static class BgzfBlockVcfReader\r\n    {\r\n        public static bool FindVariantsInBlocks(Stream stream, long beginOffset, long endOffset, BgzfBlock block,\r\n            Chromosome chromosome, int start, int end)\r\n        {\r\n            stream.Position = beginOffset;\r\n\r\n            while (stream.Position <= endOffset)\r\n            {\r\n                string blockString = block.Read(stream);\r\n                if (HasVcfPositionsOnInterval(blockString, chromosome, start, end)) return true;\r\n            }\r\n\r\n            return false;\r\n        }\r\n\r\n        internal static bool HasVcfPositionsOnInterval(string s, Chromosome chromosome, int start, int end)\r\n        {\r\n            string[] rawLines = s.OptimizedSplit('\\n');\r\n\r\n            foreach (string line in rawLines)\r\n            {\r\n                string[] cols = line.Split('\\t', 3);\r\n                if (cols.Length < 2) continue;\r\n\r\n                string chromosomeName = cols[0];\r\n                string positionString = cols[1];\r\n\r\n                if (chromosomeName != chromosome.EnsemblName && chromosomeName != chromosome.UcscName) continue;\r\n                if (!int.TryParse(positionString, out int position)) continue;\r\n\r\n                if (position > end) break;\r\n                if (position >= start && position <= end) return true;\r\n            }\r\n\r\n            return false;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "Tabix/BinUtilities.cs",
    "content": "﻿using System.Collections.Generic;\r\n\r\nnamespace Tabix\r\n{\r\n    internal static class BinUtilities\r\n    {\r\n        internal static int FirstBin(int bin) => ((1 << ((bin << 1) + bin)) - 1) / 7;\r\n        internal static int ParentBin(int bin) => (bin - 1) >> 3;\r\n\r\n        internal static int BottomBin(int bin)\r\n        {\r\n            var level = 0;\r\n            for (int b = bin; b != 0; b = ParentBin(b)) level++;\r\n            return (bin - FirstBin(level)) << (Constants.NumLevels - level) * 3;\r\n        }\r\n\r\n        /// <summary>\r\n        /// assumes begin is 0-based\r\n        /// </summary>\r\n        internal static int ConvertPositionToBin(int begin) => 4681 + (begin >> Constants.MinShift);\r\n\r\n        internal static IEnumerable<int> OverlappingBinsWithVariants(int begin, int end, Dictionary<int, Interval[]> idToChunks)\r\n        {\r\n            var overlappingBins = new List<int>();\r\n            if (begin >= end) return overlappingBins;\r\n\r\n            int shift = Constants.InitialShift;\r\n            if (end >= Constants.MaxReferenceLength) end = Constants.MaxReferenceLength;\r\n\r\n            var level = 0;\r\n            var levelStartBin = 0;\r\n\r\n            for (--end; level <= Constants.NumLevels; shift -= 3, levelStartBin += 1 << ((level << 1) + level), level++)\r\n            {\r\n                int beginBin = levelStartBin + (begin >> shift);\r\n                int endBin   = levelStartBin + (end >> shift);\r\n\r\n                for (int bin = beginBin; bin <= endBin; bin++)\r\n                {\r\n                    if (idToChunks.ContainsKey(bin)) overlappingBins.Add(bin);\r\n                }\r\n            }\r\n\r\n            return overlappingBins;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "Tabix/Constants.cs",
    "content": "﻿namespace Tabix\r\n{\r\n    internal static class Constants\r\n    {\r\n        internal const int TabixMagic = 21578324;\r\n        internal const int MinShift   = 14;\r\n        internal const int NumLevels  = 5;\r\n        // ReSharper disable once UnusedMember.Global\r\n        internal const int VcfFormat  = 2;\r\n\r\n        internal const int InitialShift       = 29;\r\n        internal const int MaxReferenceLength = 536_870_912;\r\n    }\r\n}\r\n"
  },
  {
    "path": "Tabix/Index.cs",
    "content": "﻿using System.Collections.Generic;\r\n\r\nnamespace Tabix\r\n{\r\n    public sealed class Index\r\n    {\r\n        // ReSharper disable NotAccessedField.Global\r\n        // ReSharper disable MemberCanBePrivate.Global\r\n        public readonly int Format;\r\n        public readonly int SequenceNameIndex;\r\n        public readonly int BeginIndex;\r\n        public readonly int EndIndex;\r\n        public readonly char CommentChar;\r\n        public readonly int NumLinesToSkip;\r\n        // ReSharper restore MemberCanBePrivate.Global\r\n        // ReSharper restore NotAccessedField.Global\r\n        public readonly ReferenceIndex[] ReferenceSequences;\r\n\r\n        internal readonly Dictionary<string, ushort> RefNameToTabixIndex;\r\n\r\n        public Index(int format, int sequenceNameIndex, int beginIndex, int endIndex, char commentChar,\r\n            int numLinesToSkip, ReferenceIndex[] referenceSequences, Dictionary<string, ushort> refNameToTabixIndex)\r\n        {\r\n            Format              = format;\r\n            SequenceNameIndex   = sequenceNameIndex;\r\n            BeginIndex          = beginIndex;\r\n            EndIndex            = endIndex;\r\n            CommentChar         = commentChar;\r\n            NumLinesToSkip      = numLinesToSkip;\r\n            ReferenceSequences  = referenceSequences;\r\n            RefNameToTabixIndex = refNameToTabixIndex;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "Tabix/Interval.cs",
    "content": "﻿namespace Tabix\r\n{\r\n    public struct Interval\r\n    {\r\n        public readonly ulong Begin;\r\n        public readonly ulong End;\r\n\r\n        public Interval(ulong begin, ulong end)\r\n        {\r\n            Begin = begin;\r\n            End   = end;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "Tabix/Reader.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing System.IO.Compression;\r\nusing System.Text;\r\nusing Compression.FileHandling;\r\nusing Genome;\r\n\r\nnamespace Tabix\r\n{\r\n    public static class Reader\r\n    {\r\n        // ReSharper disable once MemberCanBePrivate.Global\r\n        public static Index Read(BinaryReader reader, Dictionary<string, Chromosome> refNameToChromosome)\r\n        {\r\n            int magic = reader.ReadInt32();\r\n            if (magic != Constants.TabixMagic) throw new InvalidDataException(\"This does not seem to be a tabix file. Did you use a GZipStream?\");\r\n\r\n            int numReferenceSequences       = reader.ReadInt32();\r\n            int format                      = reader.ReadInt32();\r\n            int sequenceNameIndex           = reader.ReadInt32() - 1;\r\n            int sequenceBeginIndex          = reader.ReadInt32() - 1;\r\n            int sequenceEndIndex            = reader.ReadInt32() - 1;\r\n            var commentChar                 = (char)reader.ReadInt32();\r\n            int numLinesToSkip              = reader.ReadInt32();\r\n            int concatenatedSequenceNameLen = reader.ReadInt32();\r\n            byte[] concatenatedNames           = reader.ReadBytes(concatenatedSequenceNameLen);\r\n\r\n            string[] referenceSequenceNames = GetReferenceSequenceNames(concatenatedNames, numReferenceSequences);\r\n            var referenceSequences     = new ReferenceIndex[numReferenceSequences];\r\n            var refNameToTabixIndex    = new Dictionary<string, ushort>(numReferenceSequences);\r\n\r\n            for (ushort i = 0; i < numReferenceSequences; i++)\r\n            {\r\n                string chromosomeName = referenceSequenceNames[i];\r\n                var chromosome        = ReferenceNameUtilities.GetChromosome(refNameToChromosome, chromosomeName);\r\n\r\n                referenceSequences[i] = ReadReferenceSequence(reader, chromosome);\r\n                refNameToTabixIndex[chromosome.UcscName]    = i;\r\n                refNameToTabixIndex[chromosome.EnsemblName] = i;\r\n            }\r\n\r\n            return new Index(format, sequenceNameIndex, sequenceBeginIndex, sequenceEndIndex, commentChar,\r\n                numLinesToSkip, referenceSequences, refNameToTabixIndex);\r\n        }\r\n\r\n        public static Index GetTabixIndex(Stream tabixStream, Dictionary<string, Chromosome> refNameToChromosome)\r\n        {\r\n            using (var binaryReader = new BinaryReader(new BlockGZipStream(tabixStream, CompressionMode.Decompress)))\r\n            {\r\n                return Read(binaryReader, refNameToChromosome);\r\n            }\r\n        }\r\n\r\n        private static string[] GetReferenceSequenceNames(byte[] concatenatedBytes, int numRefSeqs)\r\n        {\r\n            var refSeqNames = new string[numRefSeqs];\r\n            IEnumerable<int> nullIndexes = GetNullIndexes(concatenatedBytes, numRefSeqs);\r\n            var startIndex = 0;\r\n\r\n            var index = 0;\r\n            foreach (int nullIndex in nullIndexes)\r\n            {\r\n                refSeqNames[index++] = Encoding.ASCII.GetString(concatenatedBytes, startIndex, nullIndex - startIndex);\r\n                startIndex = nullIndex + 1;\r\n            }\r\n\r\n            return refSeqNames;\r\n        }\r\n\r\n        private static IEnumerable<int> GetNullIndexes(IReadOnlyList<byte> bytes, int numRefSeqs)\r\n        {\r\n            var nullPositions = new int[numRefSeqs];\r\n            var index = 0;\r\n            for (var pos = 0; pos < bytes.Count; pos++) if (bytes[pos] == 0) nullPositions[index++] = pos;\r\n            return nullPositions;\r\n        }\r\n\r\n        private static ReferenceIndex ReadReferenceSequence(BinaryReader reader, Chromosome chromosome)\r\n        {\r\n            int numBins = reader.ReadInt32();\r\n            var idToChunks = new Dictionary<int, Interval[]>();\r\n\r\n            for (var i = 0; i < numBins; i++)\r\n            {\r\n                (int id, Interval[] chunks) = ReadBin(reader);\r\n                idToChunks[id] = chunks;\r\n            }\r\n\r\n            int numLinearFileOffsets = reader.ReadInt32();\r\n            var linearFileOffsets    = new ulong[numLinearFileOffsets];\r\n            int firstNonZero = -1;\r\n\r\n            for (var i = 0; i < numLinearFileOffsets; i++)\r\n            {\r\n                linearFileOffsets[i] = reader.ReadUInt64();\r\n                if (firstNonZero == -1 && linearFileOffsets[i] != 0) firstNonZero = i;\r\n            }\r\n\r\n            for (var i = 0; i < firstNonZero; i++) linearFileOffsets[i] = linearFileOffsets[firstNonZero];\r\n            return new ReferenceIndex(chromosome, idToChunks, linearFileOffsets);\r\n        }\r\n\r\n        private static (int Id, Interval[] Chunks) ReadBin(BinaryReader reader)\r\n        {\r\n            int id        = reader.ReadInt32();\r\n            int numChunks = reader.ReadInt32();\r\n\r\n            var chunks = new Interval[numChunks];\r\n            for (var i = 0; i < numChunks; i++) chunks[i] = ReadChunk(reader);\r\n\r\n            return (id, chunks);\r\n        }\r\n\r\n        private static Interval ReadChunk(BinaryReader reader)\r\n        {\r\n            ulong begin = reader.ReadUInt64();\r\n            ulong end   = reader.ReadUInt64();\r\n            return new Interval(begin, end);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "Tabix/ReferenceIndex.cs",
    "content": "﻿using System.Collections.Generic;\nusing Genome;\n\nnamespace Tabix\n{\n    public sealed class ReferenceIndex\n    {\n        public readonly Chromosome Chromosome;\n        public readonly Dictionary<int, Interval[]> IdToChunks;\n\n        // for each 16 kbp interval\n        public readonly ulong[] LinearFileOffsets;\n\n        public ReferenceIndex(Chromosome chromosome, Dictionary<int, Interval[]> idToChunks, ulong[] linearFileOffsets)\n        {\n            Chromosome        = chromosome;\n            IdToChunks        = idToChunks;\n            LinearFileOffsets = linearFileOffsets;\n        }\n    }\n}\n"
  },
  {
    "path": "Tabix/Search.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing Compression.FileHandling;\r\nusing Genome;\r\n\r\nnamespace Tabix\r\n{\r\n    // ReSharper disable once UnusedMember.Global\r\n    public sealed class Search\r\n    {\r\n        private readonly Index _index;\r\n        private readonly Stream _vcfStream;\r\n\r\n        public Search(Index index, Stream vcfStream)\r\n        {\r\n            _index     = index;\r\n            _vcfStream = vcfStream;\r\n        }\r\n\r\n        // ReSharper disable once UnusedMember.Global\r\n        public bool HasVariants(string chromosomeName, int begin, int end)\r\n        {\r\n            var refSeq = _index.GetTabixReferenceSequence(chromosomeName);\r\n            if (refSeq == null) return false;\r\n\r\n            int adjBegin = SearchUtilities.AdjustBegin(begin);\r\n\r\n            IEnumerable<int> bins = BinUtilities.OverlappingBinsWithVariants(adjBegin, end, refSeq.IdToChunks);\r\n\r\n            var block = new BgzfBlock();\r\n            foreach (int bin in bins)\r\n            {\r\n                refSeq.IdToChunks.TryGetValue(bin, out Interval[] chunks);\r\n                if (HasVariantsInBin(refSeq.Chromosome, begin, end, block, chunks)) return true;\r\n            }\r\n\r\n            return false;\r\n        }\r\n\r\n        private bool HasVariantsInBin(Chromosome chromosome, int begin, int end, BgzfBlock block, Interval[] intervals)\r\n        {\r\n            (long minVirtualOffset, long maxVirtualOffset) = SearchUtilities.GetMinMaxVirtualFileOffset(intervals);\r\n\r\n            long minOffset = VirtualPosition.From(minVirtualOffset).FileOffset;\r\n            long maxOffset = VirtualPosition.From(maxVirtualOffset).FileOffset;\r\n\r\n            return BgzfBlockVcfReader.FindVariantsInBlocks(_vcfStream, minOffset, maxOffset, block, chromosome, begin, end);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "Tabix/SearchUtilities.cs",
    "content": "﻿namespace Tabix\r\n{\r\n    public static class SearchUtilities\r\n    {\r\n        // ReSharper disable once UnusedMember.Global\r\n        public static long GetOffset(this Index index, string chromosomeName, int begin)\r\n        {\r\n            var refSeq = index.GetTabixReferenceSequence(chromosomeName);\r\n            if (refSeq == null) return -1;\r\n\r\n            // N.B. tabix assumes begin is 0-based and end is 1-based\r\n            int end = begin;\r\n            begin = AdjustBegin(begin);\r\n\r\n            if (begin == 0) return refSeq.LinearFileOffsets.FirstNonZeroValue();\r\n\r\n            ulong minOffset = GetMinOffset(refSeq, begin);\r\n            ulong maxOffset = GetMaxOffset(refSeq, end);\r\n\r\n            int bin = BinUtilities.ConvertPositionToBin(begin);\r\n\r\n            if (refSeq.IdToChunks.TryGetValue(bin, out Interval[] chunks))\r\n                return GetMinOverlapOffset(chunks, minOffset, maxOffset);\r\n\r\n            int linearIndex = begin >> Constants.MinShift;\r\n            if (linearIndex >= refSeq.LinearFileOffsets.Length) return -1;\r\n\r\n            return (long)refSeq.LinearFileOffsets[linearIndex];\r\n        }\r\n\r\n        internal static int AdjustBegin(int begin)\r\n        {\r\n            // N.B. tabix assumes begin is 0-based and end is 1-based\r\n            begin--;\r\n            if (begin < 0) begin = 0;\r\n            return begin;\r\n        }\r\n\r\n        // ReSharper disable once ParameterTypeCanBeEnumerable.Global\r\n        internal static long FirstNonZeroValue(this ulong[] offsets)\r\n        {\r\n            foreach (ulong offset in offsets)\r\n            {\r\n                if (offset == 0) continue;\r\n                return (long)offset;\r\n            }\r\n\r\n            return -1;\r\n        }\r\n\r\n        internal static ReferenceIndex GetTabixReferenceSequence(this Index index, string chromosomeName)\r\n        {\r\n            if (string.IsNullOrEmpty(chromosomeName)) return null;\r\n            return !index.RefNameToTabixIndex.TryGetValue(chromosomeName, out ushort tabixIndex)\r\n                ? null\r\n                : index.ReferenceSequences[tabixIndex];\r\n        }\r\n\r\n        internal static long GetMinOverlapOffset(Interval[] chunks, ulong minOffset, ulong maxOffset)\r\n        {\r\n            if (chunks == null) return 0;\r\n\r\n            var minOverlapOffset = ulong.MaxValue;\r\n\r\n            // ReSharper disable once LoopCanBeConvertedToQuery\r\n            foreach (var chunk in chunks)\r\n            {\r\n                if (chunk.End > minOffset && chunk.Begin < maxOffset && chunk.Begin < minOverlapOffset)\r\n                    minOverlapOffset = chunk.Begin;\r\n            }\r\n\r\n            return (long)minOverlapOffset;\r\n        }\r\n\r\n        internal static ulong GetMinOffset(ReferenceIndex refSeq, int begin)\r\n        {\r\n            int bin = BinUtilities.FirstBin(Constants.NumLevels) + (begin >> Constants.MinShift);\r\n\r\n            do\r\n            {\r\n                if (refSeq.IdToChunks.ContainsKey(bin)) break;\r\n\r\n                int firstBin = (BinUtilities.ParentBin(bin) << 3) + 1;\r\n\r\n                if (bin > firstBin) bin--;\r\n                else bin = BinUtilities.ParentBin(bin);\r\n\r\n            } while (bin != 0);\r\n\r\n            int bottomBin = BinUtilities.BottomBin(bin);\r\n\r\n            return refSeq.LinearFileOffsets[bottomBin];\r\n        }\r\n\r\n        internal static ulong GetMaxOffset(ReferenceIndex refSeq, int end)\r\n        {\r\n            int bin = BinUtilities.FirstBin(Constants.NumLevels) + ((end - 1) >> Constants.MinShift) + 1;\r\n\r\n            while (true)\r\n            {\r\n                while (bin % 8 == 1) bin = BinUtilities.ParentBin(bin);\r\n\r\n                if (bin == 0) return ulong.MaxValue;\r\n                if (refSeq.IdToChunks.TryGetValue(bin, out Interval[] chunks) && chunks.Length > 0) return chunks[0].Begin;\r\n\r\n                bin++;\r\n            }\r\n        }\r\n\r\n        internal static (long MinOffset, long MaxOffset) GetMinMaxVirtualFileOffset(Interval[] intervals)\r\n        {\r\n            int numIntervals = intervals.Length;\r\n\r\n            var minBegin = (long)intervals[0].Begin;\r\n            var minEnd   = (long)intervals[0].End;\r\n\r\n            for (var i = 1; i < numIntervals; i++)\r\n            {\r\n                var interval = intervals[i];\r\n                var begin    = (long)interval.Begin;\r\n                var end      = (long)interval.End;\r\n\r\n                if (begin < minBegin) minBegin = begin;\r\n                if (end   > minEnd)   minEnd   = end;\r\n            }\r\n\r\n            return (minBegin, minEnd);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "Tabix/Tabix.csproj",
    "content": "﻿<Project Sdk=\"Microsoft.NET.Sdk\">\r\n  <PropertyGroup>\r\n    <TargetFramework>net6.0</TargetFramework>\r\n    <OutputPath>..\\bin\\$(Configuration)</OutputPath>\n    \r\n  </PropertyGroup>\r\n  <Import Project=\"..\\CommonAssemblyInfo.props\" />\r\n  <ItemGroup>\r\n    <ProjectReference Include=\"..\\Compression\\Compression.csproj\" />\r\n    <ProjectReference Include=\"..\\Genome\\Genome.csproj\" />\r\n    <ProjectReference Include=\"..\\OptimizedCore\\OptimizedCore.csproj\" />\r\n  </ItemGroup>\r\n</Project>\r\n"
  },
  {
    "path": "Tabix/VirtualPosition.cs",
    "content": "﻿namespace Tabix\r\n{\r\n    public static class VirtualPosition\r\n    {\r\n        public static (long FileOffset, int BlockOffset) From(long virtualPosition)\r\n        {\r\n            unchecked\r\n            {\r\n                return ((virtualPosition >> 16) & 0xFFFFFFFFFFFFL, (int)(virtualPosition & 0xffff));\r\n            }\r\n        }\r\n\r\n        // ReSharper disable once UnusedMember.Global\r\n        public static long To(long fileOffset, int blockOffset) => (fileOffset << 16) | ((long)blockOffset & 0xffff);\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/AnnotationLambda/AnnotationLambdaTests.cs",
    "content": "using System.IO;\r\nusing Cloud.Messages.Annotation;\r\nusing Compression.Utilities;\r\nusing IO;\r\nusing Tabix;\r\nusing UnitTests.TestUtilities;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.AnnotationLambda\r\n{\r\n    public sealed class AnnotationLambdaTests\r\n    {\r\n        [Fact]\r\n        public void GetTabixVirtualPosition_AsExpected()\r\n        {\r\n            var annotationConfig = new AnnotationConfig\r\n            {\r\n                vcfUrl = \"anywhere/input.vcf.gz\",\r\n                tabixUrl = Resources.TopPath(\"Mother_chr22.genome.vcf.gz.tbi\"),\r\n                annotationRange = new AnnotationRange(new AnnotationPosition(\"chr22\", 20_000_000),\r\n                    new AnnotationPosition(\"chr22\", 30_000_000))\r\n            };\r\n\r\n            var tabixStream = FileUtilities.GetReadStream(annotationConfig.tabixUrl);\r\n            \r\n            var indexReader = new BinaryReader(GZipUtilities.GetAppropriateReadStream(annotationConfig.tabixUrl));\r\n            var expectedPosition = Reader.Read(indexReader, ChromosomeUtilities.RefNameToChromosome).GetOffset(\"chr22\", annotationConfig.annotationRange.Start.Position);\r\n\r\n            var virtualPosition = global::AnnotationLambda.AnnotationLambda.GetTabixVirtualPosition(annotationConfig.annotationRange, tabixStream, ChromosomeUtilities.RefNameToChromosome);\r\n\r\n            Assert.Equal(expectedPosition, virtualPosition);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetTabixVirtualPosition_ReturnZeroWhenNoRangeSpecified()\r\n        {\r\n            Assert.Equal(0, global::AnnotationLambda.AnnotationLambda.GetTabixVirtualPosition(null, null, null));\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/AnnotationLambda/S3UtilitiesTests.cs",
    "content": "﻿using AnnotationLambda;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.AnnotationLambda\r\n{\r\n    public sealed class S3UtilitiesTests\r\n    {\r\n        [Theory]\r\n        [InlineData(\"/Test/\", \"bob\", \"Test/bob\")]\r\n        [InlineData(\"Test/\", \"bob\", \"Test/bob\")]\r\n        [InlineData(\"/Test\", \"bob\", \"Test/bob\")]\r\n        [InlineData(\"Test\", \"bob\", \"Test/bob\")]\r\n        [InlineData(\"\", \"bob\", \"bob\")]\r\n        [InlineData(null, \"bob\", \"bob\")]\r\n        [InlineData(\"/\", \"bob\", \"bob\")]\r\n        public void GetKey_Theory(string outputDir, string filename, string expectedResult)\r\n        {\r\n            var observedResult = S3Utilities.GetKey(outputDir, filename);\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/CacheUtils/DataDumperImport/DataStructures/Import/ImportNodeExtensionsTests.cs",
    "content": "﻿using System.IO;\r\nusing CacheUtils.DataDumperImport.DataStructures.Import;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.CacheUtils.DataDumperImport.DataStructures.Import\r\n{\r\n    public sealed class ImportNodeExtensionsTests\r\n    {\r\n        [Fact]\r\n        public void GetInt32_Nominal()\r\n        {\r\n            var node = new StringKeyValueNode(\"bob\", \"123\");\r\n            var observedResult = node.GetInt32();\r\n            Assert.Equal(123, observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetInt32_ReturnMinusOne_WhenNull()\r\n        {\r\n            var node = new StringKeyValueNode(\"bob\", null);\r\n            var observedResult = node.GetInt32();\r\n            Assert.Equal(-1, observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetInt32_ThrowException_When_NotNumber()\r\n        {\r\n            var node = new StringKeyValueNode(\"bob\", \"123N\");\r\n\r\n            Assert.Throws<InvalidDataException>(delegate\r\n            {\r\n                // ReSharper disable once UnusedVariable\r\n                var observedResult = node.GetInt32();\r\n            });\r\n        }\r\n\r\n        [Fact]\r\n        public void GetString_ThrowException_When_NotCorrectType()\r\n        {\r\n            var node = new ObjectKeyValueNode(\"bob\", null);\r\n\r\n            Assert.Throws<InvalidDataException>(delegate\r\n            {\r\n                // ReSharper disable once UnusedVariable\r\n                var observedResult = node.GetString();\r\n            });\r\n        }\r\n\r\n        [Fact]\r\n        public void GetString_ReturnNull_IfEmptyOrMinus()\r\n        {\r\n            var node = new StringKeyValueNode(\"bob\", \"-\");\r\n            var observedResult = node.GetString();\r\n            Assert.Null(observedResult);\r\n\r\n            node = new StringKeyValueNode(\"bob\", \"\");\r\n            observedResult = node.GetString();\r\n            Assert.Null(observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetBool_ReturnTrue()\r\n        {\r\n            var node = new StringKeyValueNode(\"bob\", \"1\");\r\n            var observedResult = node.GetBool();\r\n            Assert.True(observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetBool_ReturnFalse()\r\n        {\r\n            var node = new StringKeyValueNode(\"bob\", \"0\");\r\n            var observedResult = node.GetBool();\r\n            Assert.False(observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void IsUndefined_ReturnTrue()\r\n        {\r\n            var node = new StringKeyValueNode(\"bob\", null);\r\n            var observedResult = node.IsUndefined();\r\n            Assert.True(observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void IsUndefined_ReturnFalse()\r\n        {\r\n            var node = new StringKeyValueNode(\"bob\", \"test\");\r\n            var observedResult = node.IsUndefined();\r\n            Assert.False(observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void IsUndefined_ReturnFalse_IncorrectType()\r\n        {\r\n            var node = new ObjectKeyValueNode(\"bob\", null);\r\n            var observedResult = node.IsUndefined();\r\n            Assert.False(observedResult);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/CacheUtils/DataDumperImport/FauxRegex/RegexDecisionTreeTests.cs",
    "content": "﻿using System;\r\nusing CacheUtils.DataDumperImport.FauxRegex;\r\nusing CacheUtils.DataDumperImport.IO;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.CacheUtils.DataDumperImport.FauxRegex\r\n{\r\n    public sealed class RegexDecisionTreeTests\r\n    {\r\n        [Fact]\r\n        public void GetEntryType_RootObjectKeyValue()\r\n        {\r\n            var results = RegexDecisionTree.GetEntryType(\"$VAR1 = {\");\r\n            Assert.Equal(EntryType.RootObjectKeyValue, results.Type);\r\n            Assert.Equal(\"$VAR1\", results.Key);\r\n            Assert.Null(results.Value);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetEntryType_ListObjectKeyValue()\r\n        {\r\n            var results = RegexDecisionTree.GetEntryType(\"          '1' => [\");\r\n            Assert.Equal(EntryType.ListObjectKeyValue, results.Type);\r\n            Assert.Equal(\"1\", results.Key);\r\n            Assert.Null(results.Value);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetEntryType_OpenBraces()\r\n        {\r\n            var results = RegexDecisionTree.GetEntryType(\"                   bless( {\");\r\n            Assert.Equal(EntryType.OpenBraces, results.Type);\r\n            Assert.Null(results.Key);\r\n            Assert.Null(results.Value);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetEntryType_StringKeyValue()\r\n        {\r\n            var results = RegexDecisionTree.GetEntryType(\"                            '_ccds' => 'CCDS44137.1',\");\r\n            Assert.Equal(EntryType.StringKeyValue, results.Type);\r\n            Assert.Equal(\"_ccds\", results.Key);\r\n            Assert.Equal(\"CCDS44137.1\", results.Value);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetEntryType_DigitKeyValue()\r\n        {\r\n            var results = RegexDecisionTree.GetEntryType(\"                                                              'phase' => -1,\");\r\n            Assert.Equal(EntryType.DigitKeyValue, results.Type);\r\n            Assert.Equal(\"phase\", results.Key);\r\n            Assert.Equal(\"-1\", results.Value);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetEntryType_EndBracesWithDataType()\r\n        {\r\n            var results = RegexDecisionTree.GetEntryType(\"                                                            }, 'Bio::EnsEMBL::Exon' ),\");\r\n            Assert.Equal(EntryType.EndBracesWithDataType, results.Type);\r\n            Assert.Equal(\"Bio::EnsEMBL::Exon\", results.Key);\r\n            Assert.Null(results.Value);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetEntryType_EndBraces()\r\n        {\r\n            var results = RegexDecisionTree.GetEntryType(\"                                                                },\");\r\n            Assert.Equal(EntryType.EndBraces, results.Type);\r\n            Assert.Null(results.Key);\r\n            Assert.Null(results.Value);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetEntryType_ObjectKeyValue()\r\n        {\r\n            var results = RegexDecisionTree.GetEntryType(\"                                                                                           'next' => bless( {\");\r\n            Assert.Equal(EntryType.ObjectKeyValue, results.Type);\r\n            Assert.Equal(\"next\", results.Key);\r\n            Assert.Null(results.Value);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetEntryType_UndefKeyValue()\r\n        {\r\n            var results = RegexDecisionTree.GetEntryType(\"                                                                                           'adaptor' => undef,\");\r\n            Assert.Equal(EntryType.UndefKeyValue, results.Type);\r\n            Assert.Equal(\"adaptor\", results.Key);\r\n            Assert.Null(results.Value);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetEntryType_EmptyListKeyValue()\r\n        {\r\n            var results = RegexDecisionTree.GetEntryType(\"                                                                    'seq_edits' => [],\");\r\n            Assert.Equal(EntryType.EmptyListKeyValue, results.Type);\r\n            Assert.Equal(\"seq_edits\", results.Key);\r\n            Assert.Null(results.Value);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetEntryType_EmptyValueKeyValue()\r\n        {\r\n            var results = RegexDecisionTree.GetEntryType(\"                                                'cell_types' => {},\");\r\n            Assert.Equal(EntryType.EmptyValueKeyValue, results.Type);\r\n            Assert.Equal(\"cell_types\", results.Key);\r\n            Assert.Null(results.Value);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetEntryType_ReferenceStringKeyValue()\r\n        {\r\n            var results = RegexDecisionTree.GetEntryType(\"                                                       'transcript' => $VAR1->{'22'}[0],\");\r\n            Assert.Equal(EntryType.ReferenceStringKeyValue, results.Type);\r\n            Assert.Equal(\"transcript\", results.Key);\r\n            Assert.Equal(\"$VAR1->{'22'}[0]\", results.Value);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetEntryType_DigitKey()\r\n        {\r\n            var results = RegexDecisionTree.GetEntryType(\"                                                                            0,\");\r\n            Assert.Equal(EntryType.DigitKey, results.Type);\r\n            Assert.Equal(\"0\", results.Key);\r\n            Assert.Null(results.Value);\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(\"'next' => bless( [\")]\r\n        [InlineData(\"A.B,\")]\r\n        [InlineData(\"$VAR1 = [\")]\r\n        public void GetEntryType_ThrowsNotImplementedException(string s)\r\n        {\r\n            Assert.Throws<NotImplementedException>(delegate\r\n            {\r\n                // ReSharper disable once UnusedVariable\r\n                var results = RegexDecisionTree.GetEntryType(s);\r\n            });\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(\"123\", true)]\r\n        [InlineData(\"-123\", true)]\r\n        [InlineData(\"12A\", false)]\r\n        public void OnlyDigits(string s, bool expectedResult)\r\n        {\r\n            var observedResult = RegexDecisionTree.OnlyDigits(s);\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/CacheUtils/DataDumperImport/FileHandling/DataDumperReaderTests.cs",
    "content": "﻿using System.IO;\r\nusing System.Text;\r\nusing CacheUtils.DataDumperImport.DataStructures.Import;\r\nusing CacheUtils.DataDumperImport.IO;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.CacheUtils.DataDumperImport.FileHandling\r\n{\r\n    public sealed class DataDumperReaderTests\r\n    {\r\n        [Fact]\r\n        public void GetRootNode_EndToEnd()\r\n        {\r\n            ObjectKeyValueNode rootNode;\r\n\r\n            using (var ms = new MemoryStream())\r\n            {\r\n                using (var writer = new StreamWriter(ms, Encoding.UTF8, 1024, true))\r\n                {\r\n                    writer.WriteLine(\"$VAR1 = {\");\r\n                    writer.WriteLine(\"          '22' => {\");\r\n                    writer.WriteLine(\"                    'RegulatoryFeature' => [\");\r\n                    writer.WriteLine(\"                                             bless( {\");\r\n                    writer.WriteLine(\"                                                      'seq' => 'AGGGG'\");\r\n                    writer.WriteLine(\"                                                      'tmp_frequencies' => '87 167 281 56 8 744 40 107 851 5 333 54 12 56 104 372 82 117 402\");\r\n                    writer.WriteLine(\"291 145 49 800 903 13 528 433 11 0 3 12 0 8 733 13 482 322 181\");\r\n                    writer.WriteLine(\"76 414 449 21 0 65 334 48 32 903 566 504 890 775 5 507 307 73 266\");\r\n                    writer.WriteLine(\"459 187 134 36 2 91 11 324 18 3 9 341 8 71 67 17 37 396 59\");\r\n                    writer.WriteLine(\"'\");\r\n                    writer.WriteLine(\"                                                      'cell_types' => {},\");\r\n                    writer.WriteLine(\"                                                      '_bound_lengths' => [\");\r\n                    writer.WriteLine(\"                                                                            0,\");\r\n                    writer.WriteLine(\"                                                                            0\");\r\n                    writer.WriteLine(\"                                                                          ],\");\r\n                    writer.WriteLine(\"                                                      'transcript' => $VAR1->{'1'}[0],\");\r\n                    writer.WriteLine(\"                                                    }, 'Bio::EnsEMBL::Funcgen::RegulatoryFeature' )\");\r\n                    writer.WriteLine(\"                                           ]\");\r\n                    writer.WriteLine(\"                  }\");\r\n                    writer.WriteLine(\"        };\");\r\n                }\r\n\r\n                ms.Position = 0;\r\n\r\n                using (var reader = new DataDumperReader(ms)) rootNode = reader.GetRootNode();\r\n            }\r\n\r\n            Assert.NotNull(rootNode);\r\n            var node = rootNode;\r\n            Assert.Equal(\"$VAR1\", node.Key);\r\n\r\n            var chr22Node = node.Value.Values[0] as ObjectKeyValueNode;\r\n            Assert.NotNull(chr22Node);\r\n            Assert.Equal(\"22\", chr22Node.Key);\r\n\r\n            var rfNode = chr22Node.Value.Values[0] as ListObjectKeyValueNode;\r\n            Assert.NotNull(rfNode);\r\n            Assert.Equal(\"RegulatoryFeature\", rfNode.Key);\r\n\r\n            var blessNode = rfNode.Values[0] as ObjectValueNode;\r\n            Assert.NotNull(blessNode);\r\n            Assert.Null(blessNode.Key);\r\n            Assert.Equal(\"Bio::EnsEMBL::Funcgen::RegulatoryFeature\", blessNode.Type);\r\n\r\n            var nodes = blessNode.Values;\r\n            var seqNode = nodes[0] as StringKeyValueNode;\r\n            Assert.NotNull(seqNode);\r\n            Assert.Equal(\"seq\", seqNode.Key);\r\n            Assert.Equal(\"AGGGG\", seqNode.Value);\r\n\r\n            var tmpFreqNode = nodes[1] as StringKeyValueNode;\r\n            Assert.NotNull(tmpFreqNode);\r\n            Assert.Equal(\"tmp_frequencies\", tmpFreqNode.Key);\r\n            Assert.Equal(\"87 167 281 56 8 744 40 107 851 5 333 54 12 56 104 372 82 117 402 291 145 49 800 903 13 528 433 11 0 3 12 0 8 733 13 482 322 181 76 414 449 21 0 65 334 48 32 903 566 504 890 775 5 507 307 73 266 459 187 134 36 2 91 11 324 18 3 9 341 8 71 67 17 37 396 59\", tmpFreqNode.Value);\r\n\r\n            var cellTypesNode = nodes[2] as StringKeyValueNode;\r\n            Assert.NotNull(cellTypesNode);\r\n            Assert.Equal(\"cell_types\", cellTypesNode.Key);\r\n            Assert.Null(cellTypesNode.Value);\r\n\r\n            var boundLengthsNode = nodes[3] as ListObjectKeyValueNode;\r\n            Assert.NotNull(boundLengthsNode);\r\n            Assert.Equal(\"_bound_lengths\", boundLengthsNode.Key);\r\n\r\n            var bl1Node = boundLengthsNode.Values[0] as StringValueNode;\r\n            Assert.NotNull(bl1Node);\r\n            Assert.Equal(\"0\", bl1Node.Key);\r\n\r\n            var bl2Node = boundLengthsNode.Values[1] as StringValueNode;\r\n            Assert.NotNull(bl2Node);\r\n            Assert.Equal(\"0\", bl2Node.Key);\r\n\r\n            var transcriptNode = nodes[4] as StringKeyValueNode;\r\n            Assert.NotNull(transcriptNode);\r\n            Assert.Equal(\"transcript\", transcriptNode.Key);\r\n            Assert.Equal(\"$VAR1->{'1'}[0]\", transcriptNode.Value);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetRootNode_ObjectValue_UnhandledEntryType_ThrowsException()\r\n        {\r\n            Assert.Throws<InvalidDataException>(delegate\r\n            {\r\n                using (var ms = new MemoryStream())\r\n                {\r\n                    using (var writer = new StreamWriter(ms, Encoding.UTF8, 1024, true))\r\n                    {\r\n                        writer.WriteLine(\"$VAR1 = {\");\r\n                        writer.WriteLine(\"                bless( {\");\r\n                        writer.WriteLine(\"                        0\");\r\n                        writer.WriteLine(\"                }, 'Bio::EnsEMBL::Funcgen::RegulatoryFeature' )\");\r\n                        writer.WriteLine(\"        };\");\r\n                    }\r\n\r\n                    ms.Position = 0;\r\n                    using (var reader = new DataDumperReader(ms)) reader.GetRootNode();\r\n                }\r\n            });\r\n        }\r\n\r\n        [Fact]\r\n        public void GetRootNode_ListObjectKeyValue_UnhandledEntryType_ThrowsException()\r\n        {\r\n            Assert.Throws<InvalidDataException>(delegate\r\n            {\r\n                using (var ms = new MemoryStream())\r\n                {\r\n                    using (var writer = new StreamWriter(ms, Encoding.UTF8, 1024, true))\r\n                    {\r\n                        writer.WriteLine(\"$VAR1 = {\");\r\n                        writer.WriteLine(\"                '_bound_lengths' => [\");\r\n                        writer.WriteLine(\"                        'seq' => 'AGGGG'\");\r\n                        writer.WriteLine(\"                ]\");\r\n                        writer.WriteLine(\"        };\");\r\n                    }\r\n\r\n                    ms.Position = 0;\r\n                    using (var reader = new DataDumperReader(ms)) reader.GetRootNode();\r\n                }\r\n            });\r\n        }\r\n\r\n        [Fact]\r\n        public void GetRootNode_EmptyStream_ThrowsException()\r\n        {\r\n            Assert.Throws<InvalidDataException>(delegate\r\n            {\r\n                using (var ms = new MemoryStream())\r\n                {\r\n                    using (var reader = new DataDumperReader(ms)) reader.GetRootNode();\r\n                }\r\n            });\r\n        }\r\n\r\n        [Fact]\r\n        public void GetRootNode_NoRootObject_ThrowsException()\r\n        {\r\n            Assert.Throws<InvalidDataException>(delegate\r\n            {\r\n                using (var ms = new MemoryStream())\r\n                {\r\n                    using (var writer = new StreamWriter(ms, Encoding.UTF8, 1024, true))\r\n                    {\r\n                        writer.WriteLine(\"'seq' => 'AGGGG'\");\r\n                    }\r\n\r\n                    ms.Position = 0;\r\n                    using (var reader = new DataDumperReader(ms)) reader.GetRootNode();\r\n                }\r\n            });\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/CacheUtils/DataDumperImport/Import/ImportRegulatoryFeatureTests.cs",
    "content": "﻿using System.IO;\r\nusing System.Text;\r\nusing CacheUtils.DataDumperImport.DataStructures.Import;\r\nusing CacheUtils.DataDumperImport.Import;\r\nusing CacheUtils.DataDumperImport.IO;\r\nusing UnitTests.TestUtilities;\r\nusing VariantAnnotation.Interface.Caches;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.CacheUtils.DataDumperImport.Import\r\n{\r\n    public sealed class ImportRegulatoryFeatureTests\r\n    {\r\n        private readonly ObjectValueNode _regulatoryFeatureNode;\r\n\r\n        public ImportRegulatoryFeatureTests()\r\n        {\r\n            var dataDumperOutput   = GetDataDumperOutput();\r\n            _regulatoryFeatureNode = GetObjectValueNode(dataDumperOutput);\r\n        }\r\n\r\n        #region Data::Dumper output data\r\n\r\n        private static string GetDataDumperOutput()\r\n        {\r\n            return @\"$VAR1 = {\r\n          '22' => {\r\n                    'RegulatoryFeature' => [\r\n                                             bless( {\r\n                                                      '_analysis_id' => 16,\r\n                                                      '_bound_lengths' => [\r\n                                                                            0,\r\n                                                                            0\r\n                                                                          ],\r\n                                                      '_vep_feature_type' => 'RegulatoryFeature',\r\n                                                      'cell_types' => {\r\n                                                                        'A549' => 'INACTIVE',\r\n                                                                        'Aorta' => 'NA',\r\n                                                                        'B_cells_(PB)_Roadmap' => 'NA',\r\n                                                                        'CD14+CD16-_monocyte_(CB)' => 'NA',\r\n                                                                        'CD14+CD16-_monocyte_(VB)' => 'NA',\r\n                                                                        'CD4+_ab_T_cell_(VB)' => 'NA',\r\n                                                                        'CD8+_ab_T_cell_(CB)' => 'NA',\r\n                                                                        'CM_CD4+_ab_T_cell_(VB)' => 'NA',\r\n                                                                        'DND-41' => 'INACTIVE',\r\n                                                                        'EPC_(VB)' => 'NA',\r\n                                                                        'Fetal_Adrenal_Gland' => 'NA',\r\n                                                                        'Fetal_Intestine_Large' => 'NA',\r\n                                                                        'Fetal_Intestine_Small' => 'NA',\r\n                                                                        'Fetal_Muscle_Leg' => 'NA',\r\n                                                                        'Fetal_Muscle_Trunk' => 'NA',\r\n                                                                        'Fetal_Stomach' => 'NA',\r\n                                                                        'Fetal_Thymus' => 'NA',\r\n                                                                        'GM12878' => 'INACTIVE',\r\n                                                                        'Gastric' => 'NA',\r\n                                                                        'H1-mesenchymal' => 'NA',\r\n                                                                        'H1-neuronal_progenitor' => 'NA',\r\n                                                                        'H1-trophoblast' => 'NA',\r\n                                                                        'H1ESC' => 'INACTIVE',\r\n                                                                        'H9' => 'NA',\r\n                                                                        'HMEC' => 'INACTIVE',\r\n                                                                        'HSMM' => 'INACTIVE',\r\n                                                                        'HSMMtube' => 'INACTIVE',\r\n                                                                        'HUVEC' => 'INACTIVE',\r\n                                                                        'HUVEC_prol_(CB)' => 'NA',\r\n                                                                        'HeLa-S3' => 'INACTIVE',\r\n                                                                        'HepG2' => 'REPRESSED',\r\n                                                                        'IMR90' => 'INACTIVE',\r\n                                                                        'K562' => 'ACTIVE',\r\n                                                                        'Left_Ventricle' => 'NA',\r\n                                                                        'Lung' => 'NA',\r\n                                                                        'M0_macrophage_(CB)' => 'NA',\r\n                                                                        'M0_macrophage_(VB)' => 'NA',\r\n                                                                        'M1_macrophage_(CB)' => 'NA',\r\n                                                                        'M1_macrophage_(VB)' => 'NA',\r\n                                                                        'M2_macrophage_(CB)' => 'NA',\r\n                                                                        'M2_macrophage_(VB)' => 'NA',\r\n                                                                        'MSC_(VB)' => 'NA',\r\n                                                                        'Monocytes-CD14+' => 'INACTIVE',\r\n                                                                        'Monocytes-CD14+_(PB)_Roadmap' => 'NA',\r\n                                                                        'NH-A' => 'INACTIVE',\r\n                                                                        'NHDF-AD' => 'INACTIVE',\r\n                                                                        'NHEK' => 'INACTIVE',\r\n                                                                        'NHLF' => 'INACTIVE',\r\n                                                                        'Natural_Killer_cells_(PB)' => 'NA',\r\n                                                                        'Osteobl' => 'INACTIVE',\r\n                                                                        'Ovary' => 'NA',\r\n                                                                        'Pancreas' => 'NA',\r\n                                                                        'Placenta' => 'NA',\r\n                                                                        'Psoas_Muscle' => 'NA',\r\n                                                                        'Right_Atrium' => 'NA',\r\n                                                                        'Small_Intestine' => 'NA',\r\n                                                                        'Spleen' => 'NA',\r\n                                                                        'T_cells_(PB)_Roadmap' => 'NA',\r\n                                                                        'Thymus' => 'NA',\r\n                                                                        'eosinophil_(VB)' => 'NA',\r\n                                                                        'erythroblast_(CB)' => 'NA',\r\n                                                                        'iPS-20b' => 'NA',\r\n                                                                        'iPS_DF_19.11' => 'NA',\r\n                                                                        'iPS_DF_6.9' => 'NA',\r\n                                                                        'naive_B_cell_(VB)' => 'NA',\r\n                                                                        'neutrophil_(CB)' => 'NA',\r\n                                                                        'neutrophil_(VB)' => 'NA',\r\n                                                                        'neutrophil_myelocyte_(BM)' => 'NA'\r\n                                                                      },\r\n                                                      'dbID' => '71269',\r\n                                                      'end' => '50555915',\r\n                                                      'epigenome_count' => 1,\r\n                                                      'feature_type' => 'TF_binding_site',\r\n                                                      'regulatory_build_id' => 1,\r\n                                                      'slice' => bless( {\r\n                                                                          'circular' => 0,\r\n                                                                          'coord_system' => bless( {\r\n                                                                                                     'dbID' => '2',\r\n                                                                                                     'default' => 1,\r\n                                                                                                     'name' => 'chromosome',\r\n                                                                                                     'rank' => '1',\r\n                                                                                                     'sequence_level' => 0,\r\n                                                                                                     'top_level' => 0,\r\n                                                                                                     'version' => 'GRCh37'\r\n                                                                                                   }, 'Bio::EnsEMBL::CoordSystem' ),\r\n                                                                          'end' => '51304566',\r\n                                                                          'seq_region_length' => '51304566',\r\n                                                                          'seq_region_name' => '22',\r\n                                                                          'start' => 1,\r\n                                                                          'strand' => 1\r\n                                                                        }, 'Bio::EnsEMBL::Slice' ),\r\n                                                      'stable_id' => 'ENSR00000394520',\r\n                                                      'start' => '50555633',\r\n                                                      'strand' => 0\r\n                                                    }, 'Bio::EnsEMBL::Funcgen::RegulatoryFeature' )\r\n                                           ]\r\n                  }\r\n        };\";\r\n        }\r\n\r\n        #endregion\r\n\r\n        private static ObjectValueNode GetObjectValueNode(string dataDumperOutput)\r\n        {\r\n            ObjectKeyValueNode rootNode;\r\n\r\n            using (var ms = new MemoryStream())\r\n            {\r\n                using (var reader = new StringReader(dataDumperOutput))\r\n                using (var writer = new StreamWriter(ms, Encoding.UTF8, 1024, true))\r\n                {\r\n                    while (true)\r\n                    {\r\n                        var line = reader.ReadLine();\r\n                        if (line == null) break;\r\n                        writer.WriteLine(line);\r\n                    }\r\n                }\r\n\r\n                ms.Position = 0;\r\n\r\n                using (var reader = new DataDumperReader(ms)) rootNode = reader.GetRootNode();\r\n            }\r\n\r\n            var chr22Node = rootNode.Value.Values[0] as ObjectKeyValueNode;\r\n            Assert.NotNull(chr22Node);\r\n\r\n            var regulatoryFeatureNodes = chr22Node.Value.Values[0] as ListObjectKeyValueNode;\r\n            Assert.NotNull(regulatoryFeatureNodes);\r\n\r\n            return regulatoryFeatureNodes.Values[0] as ObjectValueNode;\r\n        }\r\n\r\n        [Fact]\r\n        public void Parse_Nominal()\r\n        {\r\n            var regulatoryRegion = ImportRegulatoryFeature.Parse(_regulatoryFeatureNode, ChromosomeUtilities.Chr1);\r\n            Assert.NotNull(regulatoryRegion);\r\n\r\n            Assert.Equal(ChromosomeUtilities.Chr1.Index, regulatoryRegion.Chromosome.Index);\r\n            Assert.Equal(50555633, regulatoryRegion.Start);\r\n            Assert.Equal(50555915, regulatoryRegion.End);\r\n            Assert.Equal(\"ENSR00000394520\", regulatoryRegion.Id.WithoutVersion);\r\n            Assert.Equal(RegulatoryRegionType.TF_binding_site, regulatoryRegion.Type);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/CacheUtils/DataDumperImport/Import/ImportTranscriptTests.cs",
    "content": "﻿using System.IO;\r\nusing System.Text;\r\nusing CacheUtils.DataDumperImport.DataStructures;\r\nusing CacheUtils.DataDumperImport.DataStructures.Import;\r\nusing CacheUtils.DataDumperImport.Import;\r\nusing CacheUtils.DataDumperImport.IO;\r\nusing Intervals;\r\nusing UnitTests.TestUtilities;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.CacheUtils.DataDumperImport.Import\r\n{\r\n    public sealed class ImportTranscriptTests\r\n    {\r\n        private readonly ObjectValueNode _transcriptNode;\r\n\r\n        public ImportTranscriptTests()\r\n        {\r\n            var dataDumperOutput = GetDataDumperOutput();\r\n            _transcriptNode      = GetObjectValueNode(dataDumperOutput);\r\n        }\r\n\r\n        #region Data::Dumper output data\r\n\r\n        private static string GetDataDumperOutput()\r\n        {\r\n            return @\"$VAR1 = {\r\n          '22' => [\r\n                    bless( {\r\n                             '_ccds' => 'CCDS14080.1',\r\n                             '_gene' => bless( {\r\n                                                 'end' => '50051190',\r\n                                                 'stable_id' => 'ENSG00000188511',\r\n                                                 'start' => '49808176',\r\n                                                 'strand' => -1\r\n                                               }, 'Bio::EnsEMBL::Gene' ),\r\n                             '_gene_hgnc_id' => '28010',\r\n                             '_gene_phenotype' => 0,\r\n                             '_gene_stable_id' => 'ENSG00000188511',\r\n                             '_gene_symbol' => 'C22orf34',\r\n                             '_gene_symbol_source' => 'HGNC',\r\n                             '_protein' => 'ENSP00000394865',\r\n                             '_refseq' => 'NM_014577.1',\r\n                             '_swissprot' => '-',\r\n                             '_trans_exon_array' => [\r\n                                                      bless( {\r\n                                                               'end' => '50051152',\r\n                                                               'end_phase' => 1,\r\n                                                               'phase' => -1,\r\n                                                               'stable_id' => 'ENSE00001657619',\r\n                                                               'start' => '50051053',\r\n                                                               'strand' => -1\r\n                                                             }, 'Bio::EnsEMBL::Exon' ),\r\n                                                      bless( {\r\n                                                               'end' => '49834861',\r\n                                                               'end_phase' => -1,\r\n                                                               'phase' => 1,\r\n                                                               'stable_id' => 'ENSE00001694252',\r\n                                                               'start' => '49834525',\r\n                                                               'strand' => -1\r\n                                                             }, 'Bio::EnsEMBL::Exon' ),\r\n                                                      bless( {\r\n                                                               'end' => '49810577',\r\n                                                               'end_phase' => -1,\r\n                                                               'phase' => -1,\r\n                                                               'stable_id' => 'ENSE00001775575',\r\n                                                               'start' => '49810464',\r\n                                                               'strand' => -1\r\n                                                             }, 'Bio::EnsEMBL::Exon' ),\r\n                                                      bless( {\r\n                                                               'end' => '49810384',\r\n                                                               'end_phase' => -1,\r\n                                                               'phase' => -1,\r\n                                                               'stable_id' => 'ENSE00001669960',\r\n                                                               'start' => '49810251',\r\n                                                               'strand' => -1\r\n                                                             }, 'Bio::EnsEMBL::Exon' ),\r\n                                                      bless( {\r\n                                                               'end' => '49809684',\r\n                                                               'end_phase' => -1,\r\n                                                               'phase' => -1,\r\n                                                               'stable_id' => 'ENSE00001595042',\r\n                                                               'start' => '49808176',\r\n                                                               'strand' => -1\r\n                                                             }, 'Bio::EnsEMBL::Exon' )\r\n                                                    ],\r\n                             '_trembl' => 'F2Z342',\r\n                             '_uniparc' => 'UPI00004105EF',\r\n                             '_variation_effect_feature_cache' => {\r\n                                                                    'codon_table' => 1,\r\n                                                                    'five_prime_utr' => bless( {\r\n                                                                                                 '_root_verbose' => 0,\r\n                                                                                                 'primary_seq' => bless( {\r\n                                                                                                                           '_nowarnonempty' => undef,\r\n                                                                                                                           '_root_verbose' => 0,\r\n                                                                                                                           'alphabet' => 'dna',\r\n                                                                                                                           'display_id' => 'ENST00000414287',\r\n                                                                                                                           'length' => 45,\r\n                                                                                                                           'seq' => 'GCT'\r\n                                                                                                                         }, 'Bio::PrimarySeq' )\r\n                                                                                               }, 'Bio::Seq' ),\r\n                                                                    'introns' => [\r\n                                                                                   bless( {\r\n                                                                                            'adaptor' => undef,\r\n                                                                                            'analysis' => undef,\r\n                                                                                            'dbID' => undef,\r\n                                                                                            'end' => '50051052',\r\n                                                                                            'next' => bless( {\r\n                                                                                                               'end' => '49834861',\r\n                                                                                                               'end_phase' => -1,\r\n                                                                                                               'phase' => 1,\r\n                                                                                                               'stable_id' => 'ENSE00001694252',\r\n                                                                                                               'start' => '49834525',\r\n                                                                                                               'strand' => -1\r\n                                                                                                             }, 'Bio::EnsEMBL::Exon' ),\r\n                                                                                            'prev' => bless( {\r\n                                                                                                               'end' => '50051152',\r\n                                                                                                               'end_phase' => 1,\r\n                                                                                                               'phase' => -1,\r\n                                                                                                               'stable_id' => 'ENSE00001657619',\r\n                                                                                                               'start' => '50051053',\r\n                                                                                                               'strand' => -1\r\n                                                                                                             }, 'Bio::EnsEMBL::Exon' ),\r\n                                                                                            'seqname' => undef,\r\n                                                                                            'slice' => bless( {\r\n                                                                                                                'circular' => 0,\r\n                                                                                                                'coord_system' => bless( {\r\n                                                                                                                                           'dbID' => '2',\r\n                                                                                                                                           'default' => 1,\r\n                                                                                                                                           'name' => 'chromosome',\r\n                                                                                                                                           'rank' => '1',\r\n                                                                                                                                           'sequence_level' => 0,\r\n                                                                                                                                           'top_level' => 0,\r\n                                                                                                                                           'version' => 'GRCh37'\r\n                                                                                                                                         }, 'Bio::EnsEMBL::CoordSystem' ),\r\n                                                                                                                'end' => '51304566',\r\n                                                                                                                'seq_region_length' => '51304566',\r\n                                                                                                                'seq_region_name' => '22',\r\n                                                                                                                'start' => 1,\r\n                                                                                                                'strand' => 1\r\n                                                                                                              }, 'Bio::EnsEMBL::Slice' ),\r\n                                                                                            'start' => '49834862',\r\n                                                                                            'strand' => -1\r\n                                                                                          }, 'Bio::EnsEMBL::Intron' ),\r\n                                                                                   bless( {\r\n                                                                                            'adaptor' => undef,\r\n                                                                                            'analysis' => undef,\r\n                                                                                            'dbID' => undef,\r\n                                                                                            'end' => '49834524',\r\n                                                                                            'next' => bless( {\r\n                                                                                                               'end' => '49810577',\r\n                                                                                                               'end_phase' => -1,\r\n                                                                                                               'phase' => -1,\r\n                                                                                                               'stable_id' => 'ENSE00001775575',\r\n                                                                                                               'start' => '49810464',\r\n                                                                                                               'strand' => -1\r\n                                                                                                             }, 'Bio::EnsEMBL::Exon' ),\r\n                                                                                            'prev' => bless( {\r\n                                                                                                               'end' => '49834861',\r\n                                                                                                               'end_phase' => -1,\r\n                                                                                                               'phase' => 1,\r\n                                                                                                               'stable_id' => 'ENSE00001694252',\r\n                                                                                                               'start' => '49834525',\r\n                                                                                                               'strand' => -1\r\n                                                                                                             }, 'Bio::EnsEMBL::Exon' ),\r\n                                                                                            'seqname' => undef,\r\n                                                                                            'slice' => bless( {\r\n                                                                                                                'circular' => 0,\r\n                                                                                                                'coord_system' => bless( {\r\n                                                                                                                                           'dbID' => '2',\r\n                                                                                                                                           'default' => 1,\r\n                                                                                                                                           'name' => 'chromosome',\r\n                                                                                                                                           'rank' => '1',\r\n                                                                                                                                           'sequence_level' => 0,\r\n                                                                                                                                           'top_level' => 0,\r\n                                                                                                                                           'version' => 'GRCh37'\r\n                                                                                                                                         }, 'Bio::EnsEMBL::CoordSystem' ),\r\n                                                                                                                'end' => '51304566',\r\n                                                                                                                'seq_region_length' => '51304566',\r\n                                                                                                                'seq_region_name' => '22',\r\n                                                                                                                'start' => 1,\r\n                                                                                                                'strand' => 1\r\n                                                                                                              }, 'Bio::EnsEMBL::Slice' ),\r\n                                                                                            'start' => '49810578',\r\n                                                                                            'strand' => -1\r\n                                                                                          }, 'Bio::EnsEMBL::Intron' ),\r\n                                                                                   bless( {\r\n                                                                                            'adaptor' => undef,\r\n                                                                                            'analysis' => undef,\r\n                                                                                            'dbID' => undef,\r\n                                                                                            'end' => '49810463',\r\n                                                                                            'next' => bless( {\r\n                                                                                                               'end' => '49810384',\r\n                                                                                                               'end_phase' => -1,\r\n                                                                                                               'phase' => -1,\r\n                                                                                                               'stable_id' => 'ENSE00001669960',\r\n                                                                                                               'start' => '49810251',\r\n                                                                                                               'strand' => -1\r\n                                                                                                             }, 'Bio::EnsEMBL::Exon' ),\r\n                                                                                            'prev' => bless( {\r\n                                                                                                               'end' => '49810577',\r\n                                                                                                               'end_phase' => -1,\r\n                                                                                                               'phase' => -1,\r\n                                                                                                               'stable_id' => 'ENSE00001775575',\r\n                                                                                                               'start' => '49810464',\r\n                                                                                                               'strand' => -1\r\n                                                                                                             }, 'Bio::EnsEMBL::Exon' ),\r\n                                                                                            'seqname' => undef,\r\n                                                                                            'slice' => bless( {\r\n                                                                                                                'circular' => 0,\r\n                                                                                                                'coord_system' => bless( {\r\n                                                                                                                                           'dbID' => '2',\r\n                                                                                                                                           'default' => 1,\r\n                                                                                                                                           'name' => 'chromosome',\r\n                                                                                                                                           'rank' => '1',\r\n                                                                                                                                           'sequence_level' => 0,\r\n                                                                                                                                           'top_level' => 0,\r\n                                                                                                                                           'version' => 'GRCh37'\r\n                                                                                                                                         }, 'Bio::EnsEMBL::CoordSystem' ),\r\n                                                                                                                'end' => '51304566',\r\n                                                                                                                'seq_region_length' => '51304566',\r\n                                                                                                                'seq_region_name' => '22',\r\n                                                                                                                'start' => 1,\r\n                                                                                                                'strand' => 1\r\n                                                                                                              }, 'Bio::EnsEMBL::Slice' ),\r\n                                                                                            'start' => '49810385',\r\n                                                                                            'strand' => -1\r\n                                                                                          }, 'Bio::EnsEMBL::Intron' ),\r\n                                                                                   bless( {\r\n                                                                                            'adaptor' => undef,\r\n                                                                                            'analysis' => undef,\r\n                                                                                            'dbID' => undef,\r\n                                                                                            'end' => '49810250',\r\n                                                                                            'next' => bless( {\r\n                                                                                                               'end' => '49809684',\r\n                                                                                                               'end_phase' => -1,\r\n                                                                                                               'phase' => -1,\r\n                                                                                                               'stable_id' => 'ENSE00001595042',\r\n                                                                                                               'start' => '49808176',\r\n                                                                                                               'strand' => -1\r\n                                                                                                             }, 'Bio::EnsEMBL::Exon' ),\r\n                                                                                            'prev' => bless( {\r\n                                                                                                               'end' => '49810384',\r\n                                                                                                               'end_phase' => -1,\r\n                                                                                                               'phase' => -1,\r\n                                                                                                               'stable_id' => 'ENSE00001669960',\r\n                                                                                                               'start' => '49810251',\r\n                                                                                                               'strand' => -1\r\n                                                                                                             }, 'Bio::EnsEMBL::Exon' ),\r\n                                                                                            'seqname' => undef,\r\n                                                                                            'slice' => bless( {\r\n                                                                                                                'circular' => 0,\r\n                                                                                                                'coord_system' => bless( {\r\n                                                                                                                                           'dbID' => '2',\r\n                                                                                                                                           'default' => 1,\r\n                                                                                                                                           'name' => 'chromosome',\r\n                                                                                                                                           'rank' => '1',\r\n                                                                                                                                           'sequence_level' => 0,\r\n                                                                                                                                           'top_level' => 0,\r\n                                                                                                                                           'version' => 'GRCh37'\r\n                                                                                                                                         }, 'Bio::EnsEMBL::CoordSystem' ),\r\n                                                                                                                'end' => '51304566',\r\n                                                                                                                'seq_region_length' => '51304566',\r\n                                                                                                                'seq_region_name' => '22',\r\n                                                                                                                'start' => 1,\r\n                                                                                                                'strand' => 1\r\n                                                                                                              }, 'Bio::EnsEMBL::Slice' ),\r\n                                                                                            'start' => '49809685',\r\n                                                                                            'strand' => -1\r\n                                                                                          }, 'Bio::EnsEMBL::Intron' )\r\n                                                                                 ],\r\n                                                                    'mapper' => bless( {\r\n                                                                                         'cdna_coding_end' => '225',\r\n                                                                                         'cdna_coding_start' => 46,\r\n                                                                                         'exon_coord_mapper' => bless( {\r\n                                                                                                                         '_is_sorted' => 0,\r\n                                                                                                                         '_pair_cdna' => {\r\n                                                                                                                                           'CDNA' => [\r\n                                                                                                                                                       bless( {\r\n                                                                                                                                                                'from' => bless( {\r\n                                                                                                                                                                                   'end' => 100,\r\n                                                                                                                                                                                   'id' => 'cdna',\r\n                                                                                                                                                                                   'start' => 1\r\n                                                                                                                                                                                 }, 'Bio::EnsEMBL::Mapper::Unit' ),\r\n                                                                                                                                                                'ori' => -1,\r\n                                                                                                                                                                'to' => bless( {\r\n                                                                                                                                                                                 'end' => '50051152',\r\n                                                                                                                                                                                 'id' => 'genome',\r\n                                                                                                                                                                                 'start' => '50051053'\r\n                                                                                                                                                                               }, 'Bio::EnsEMBL::Mapper::Unit' )\r\n                                                                                                                                                              }, 'Bio::EnsEMBL::Mapper::Pair' ),\r\n                                                                                                                                                       bless( {\r\n                                                                                                                                                                'from' => bless( {\r\n                                                                                                                                                                                   'end' => '437',\r\n                                                                                                                                                                                   'id' => 'cdna',\r\n                                                                                                                                                                                   'start' => 101\r\n                                                                                                                                                                                 }, 'Bio::EnsEMBL::Mapper::Unit' ),\r\n                                                                                                                                                                'ori' => -1,\r\n                                                                                                                                                                'to' => bless( {\r\n                                                                                                                                                                                 'end' => '49834861',\r\n                                                                                                                                                                                 'id' => 'genome',\r\n                                                                                                                                                                                 'start' => '49834525'\r\n                                                                                                                                                                               }, 'Bio::EnsEMBL::Mapper::Unit' )\r\n                                                                                                                                                              }, 'Bio::EnsEMBL::Mapper::Pair' ),\r\n                                                                                                                                                       bless( {\r\n                                                                                                                                                                'from' => bless( {\r\n                                                                                                                                                                                   'end' => '551',\r\n                                                                                                                                                                                   'id' => 'cdna',\r\n                                                                                                                                                                                   'start' => '438'\r\n                                                                                                                                                                                 }, 'Bio::EnsEMBL::Mapper::Unit' ),\r\n                                                                                                                                                                'ori' => -1,\r\n                                                                                                                                                                'to' => bless( {\r\n                                                                                                                                                                                 'end' => '49810577',\r\n                                                                                                                                                                                 'id' => 'genome',\r\n                                                                                                                                                                                 'start' => '49810464'\r\n                                                                                                                                                                               }, 'Bio::EnsEMBL::Mapper::Unit' )\r\n                                                                                                                                                              }, 'Bio::EnsEMBL::Mapper::Pair' ),\r\n                                                                                                                                                       bless( {\r\n                                                                                                                                                                'from' => bless( {\r\n                                                                                                                                                                                   'end' => '685',\r\n                                                                                                                                                                                   'id' => 'cdna',\r\n                                                                                                                                                                                   'start' => '552'\r\n                                                                                                                                                                                 }, 'Bio::EnsEMBL::Mapper::Unit' ),\r\n                                                                                                                                                                'ori' => -1,\r\n                                                                                                                                                                'to' => bless( {\r\n                                                                                                                                                                                 'end' => '49810384',\r\n                                                                                                                                                                                 'id' => 'genome',\r\n                                                                                                                                                                                 'start' => '49810251'\r\n                                                                                                                                                                               }, 'Bio::EnsEMBL::Mapper::Unit' )\r\n                                                                                                                                                              }, 'Bio::EnsEMBL::Mapper::Pair' ),\r\n                                                                                                                                                       bless( {\r\n                                                                                                                                                                'from' => bless( {\r\n                                                                                                                                                                                   'end' => '2194',\r\n                                                                                                                                                                                   'id' => 'cdna',\r\n                                                                                                                                                                                   'start' => '686'\r\n                                                                                                                                                                                 }, 'Bio::EnsEMBL::Mapper::Unit' ),\r\n                                                                                                                                                                'ori' => -1,\r\n                                                                                                                                                                'to' => bless( {\r\n                                                                                                                                                                                 'end' => '49809684',\r\n                                                                                                                                                                                 'id' => 'genome',\r\n                                                                                                                                                                                 'start' => '49808176'\r\n                                                                                                                                                                               }, 'Bio::EnsEMBL::Mapper::Unit' )\r\n                                                                                                                                                              }, 'Bio::EnsEMBL::Mapper::Pair' )\r\n                                                                                                                                                     ]\r\n                                                                                                                                         },\r\n                                                                                                                         '_pair_genomic' => {\r\n                                                                                                                                              'GENOME' => [\r\n                                                                                                                                                            bless( {\r\n                                                                                                                                                                     'from' => bless( {\r\n                                                                                                                                                                                        'end' => 100,\r\n                                                                                                                                                                                        'id' => 'cdna',\r\n                                                                                                                                                                                        'start' => 1\r\n                                                                                                                                                                                      }, 'Bio::EnsEMBL::Mapper::Unit' ),\r\n                                                                                                                                                                     'ori' => -1,\r\n                                                                                                                                                                     'to' => bless( {\r\n                                                                                                                                                                                      'end' => '50051152',\r\n                                                                                                                                                                                      'id' => 'genome',\r\n                                                                                                                                                                                      'start' => '50051053'\r\n                                                                                                                                                                                    }, 'Bio::EnsEMBL::Mapper::Unit' )\r\n                                                                                                                                                                   }, 'Bio::EnsEMBL::Mapper::Pair' ),\r\n                                                                                                                                                            bless( {\r\n                                                                                                                                                                     'from' => bless( {\r\n                                                                                                                                                                                        'end' => '437',\r\n                                                                                                                                                                                        'id' => 'cdna',\r\n                                                                                                                                                                                        'start' => 101\r\n                                                                                                                                                                                      }, 'Bio::EnsEMBL::Mapper::Unit' ),\r\n                                                                                                                                                                     'ori' => -1,\r\n                                                                                                                                                                     'to' => bless( {\r\n                                                                                                                                                                                      'end' => '49834861',\r\n                                                                                                                                                                                      'id' => 'genome',\r\n                                                                                                                                                                                      'start' => '49834525'\r\n                                                                                                                                                                                    }, 'Bio::EnsEMBL::Mapper::Unit' )\r\n                                                                                                                                                                   }, 'Bio::EnsEMBL::Mapper::Pair' ),\r\n                                                                                                                                                            bless( {\r\n                                                                                                                                                                     'from' => bless( {\r\n                                                                                                                                                                                        'end' => '551',\r\n                                                                                                                                                                                        'id' => 'cdna',\r\n                                                                                                                                                                                        'start' => '438'\r\n                                                                                                                                                                                      }, 'Bio::EnsEMBL::Mapper::Unit' ),\r\n                                                                                                                                                                     'ori' => -1,\r\n                                                                                                                                                                     'to' => bless( {\r\n                                                                                                                                                                                      'end' => '49810577',\r\n                                                                                                                                                                                      'id' => 'genome',\r\n                                                                                                                                                                                      'start' => '49810464'\r\n                                                                                                                                                                                    }, 'Bio::EnsEMBL::Mapper::Unit' )\r\n                                                                                                                                                                   }, 'Bio::EnsEMBL::Mapper::Pair' ),\r\n                                                                                                                                                            bless( {\r\n                                                                                                                                                                     'from' => bless( {\r\n                                                                                                                                                                                        'end' => '685',\r\n                                                                                                                                                                                        'id' => 'cdna',\r\n                                                                                                                                                                                        'start' => '552'\r\n                                                                                                                                                                                      }, 'Bio::EnsEMBL::Mapper::Unit' ),\r\n                                                                                                                                                                     'ori' => -1,\r\n                                                                                                                                                                     'to' => bless( {\r\n                                                                                                                                                                                      'end' => '49810384',\r\n                                                                                                                                                                                      'id' => 'genome',\r\n                                                                                                                                                                                      'start' => '49810251'\r\n                                                                                                                                                                                    }, 'Bio::EnsEMBL::Mapper::Unit' )\r\n                                                                                                                                                                   }, 'Bio::EnsEMBL::Mapper::Pair' ),\r\n                                                                                                                                                            bless( {\r\n                                                                                                                                                                     'from' => bless( {\r\n                                                                                                                                                                                        'end' => '2194',\r\n                                                                                                                                                                                        'id' => 'cdna',\r\n                                                                                                                                                                                        'start' => '686'\r\n                                                                                                                                                                                      }, 'Bio::EnsEMBL::Mapper::Unit' ),\r\n                                                                                                                                                                     'ori' => -1,\r\n                                                                                                                                                                     'to' => bless( {\r\n                                                                                                                                                                                      'end' => '49809684',\r\n                                                                                                                                                                                      'id' => 'genome',\r\n                                                                                                                                                                                      'start' => '49808176'\r\n                                                                                                                                                                                    }, 'Bio::EnsEMBL::Mapper::Unit' )\r\n                                                                                                                                                                   }, 'Bio::EnsEMBL::Mapper::Pair' )\r\n                                                                                                                                                          ]\r\n                                                                                                                                            },\r\n                                                                                                                         'from' => 'cdna',\r\n                                                                                                                         'from_cs' => undef,\r\n                                                                                                                         'pair_count' => 5,\r\n                                                                                                                         'to' => 'genomic',\r\n                                                                                                                         'to_cs' => undef\r\n                                                                                                                       }, 'Bio::EnsEMBL::Mapper' ),\r\n                                                                                         'start_phase' => -1\r\n                                                                                       }, 'Bio::EnsEMBL::TranscriptMapper' ),\r\n                                                                    'peptide' => 'MIV',\r\n                                                                    'protein_features' => [\r\n                                                                                            bless( {\r\n                                                                                                     'analysis' => bless( {\r\n                                                                                                                            '_display_label' => 'Low complexity (Seg)'\r\n                                                                                                                          }, 'Bio::EnsEMBL::Analysis' ),\r\n                                                                                                     'end' => '58',\r\n                                                                                                     'hseqname' => 'seg',\r\n                                                                                                     'start' => '39'\r\n                                                                                                   }, 'Bio::EnsEMBL::ProteinFeature' )\r\n                                                                                          ],\r\n                                                                    'protein_function_predictions' => {\r\n                                                                                                        'polyphen_humdiv' => bless( {\r\n                                                                                                                                      'analysis' => 'polyphen',\r\n                                                                                                                                      'matrix' => 'VkVQ-humdiv',\r\n                                                                                                                                      'matrix_compressed' => 1,\r\n                                                                                                                                      'peptide_length' => undef,\r\n                                                                                                                                      'sub_analysis' => 'humdiv',\r\n                                                                                                                                      'translation_md5' => '84229aef711b14371f4c0c6f5ec78ebe'\r\n                                                                                                                                    }, 'Bio::EnsEMBL::Variation::ProteinFunctionPredictionMatrix' ),\r\n                                                                                                        'polyphen_humvar' => bless( {\r\n                                                                                                                                      'analysis' => 'polyphen',\r\n                                                                                                                                      'matrix' => 'VkVQ-humvar',\r\n                                                                                                                                      'matrix_compressed' => 1,\r\n                                                                                                                                      'peptide_length' => undef,\r\n                                                                                                                                      'sub_analysis' => 'humvar',\r\n                                                                                                                                      'translation_md5' => '84229aef711b14371f4c0c6f5ec78ebe'\r\n                                                                                                                                    }, 'Bio::EnsEMBL::Variation::ProteinFunctionPredictionMatrix' ),\r\n                                                                                                        'sift' => bless( {\r\n                                                                                                                           'analysis' => 'sift',\r\n                                                                                                                           'matrix' => 'VkVQ-sift',\r\n                                                                                                                           'matrix_compressed' => 1,\r\n                                                                                                                           'peptide_length' => undef,\r\n                                                                                                                           'sub_analysis' => undef,\r\n                                                                                                                           'translation_md5' => '63fc5b02b6c430f970688d120e14647c'\r\n                                                                                                                         }, 'Bio::EnsEMBL::Variation::ProteinFunctionPredictionMatrix' )\r\n                                                                                                      },\r\n                                                                    'seq_edits' => [\r\n                                                                                     bless( {\r\n                                                                                              'alt_seq' => 'U',\r\n                                                                                              'code' => '_selenocysteine',\r\n                                                                                              'description' => undef,\r\n                                                                                              'end' => '667',\r\n                                                                                              'name' => 'Selenocysteine',\r\n                                                                                              'start' => '667'\r\n                                                                                            }, 'Bio::EnsEMBL::SeqEdit' )\r\n                                                                                   ],\r\n                                                                    'sorted_exons' => [\r\n                                                                                        bless( {\r\n                                                                                                 'end' => '49809684',\r\n                                                                                                 'end_phase' => -1,\r\n                                                                                                 'phase' => -1,\r\n                                                                                                 'stable_id' => 'ENSE00001595042',\r\n                                                                                                 'start' => '49808176',\r\n                                                                                                 'strand' => -1\r\n                                                                                               }, 'Bio::EnsEMBL::Exon' ),\r\n                                                                                        bless( {\r\n                                                                                                 'end' => '49810384',\r\n                                                                                                 'end_phase' => -1,\r\n                                                                                                 'phase' => -1,\r\n                                                                                                 'stable_id' => 'ENSE00001669960',\r\n                                                                                                 'start' => '49810251',\r\n                                                                                                 'strand' => -1\r\n                                                                                               }, 'Bio::EnsEMBL::Exon' ),\r\n                                                                                        bless( {\r\n                                                                                                 'end' => '49810577',\r\n                                                                                                 'end_phase' => -1,\r\n                                                                                                 'phase' => -1,\r\n                                                                                                 'stable_id' => 'ENSE00001775575',\r\n                                                                                                 'start' => '49810464',\r\n                                                                                                 'strand' => -1\r\n                                                                                               }, 'Bio::EnsEMBL::Exon' ),\r\n                                                                                        bless( {\r\n                                                                                                 'end' => '49834861',\r\n                                                                                                 'end_phase' => -1,\r\n                                                                                                 'phase' => 1,\r\n                                                                                                 'stable_id' => 'ENSE00001694252',\r\n                                                                                                 'start' => '49834525',\r\n                                                                                                 'strand' => -1\r\n                                                                                               }, 'Bio::EnsEMBL::Exon' ),\r\n                                                                                        bless( {\r\n                                                                                                 'end' => '50051152',\r\n                                                                                                 'end_phase' => 1,\r\n                                                                                                 'phase' => -1,\r\n                                                                                                 'stable_id' => 'ENSE00001657619',\r\n                                                                                                 'start' => '50051053',\r\n                                                                                                 'strand' => -1\r\n                                                                                               }, 'Bio::EnsEMBL::Exon' )\r\n                                                                                      ],\r\n                                                                    'three_prime_utr' => bless( {\r\n                                                                                                  '_root_verbose' => 0,\r\n                                                                                                  'primary_seq' => bless( {\r\n                                                                                                                            '_nowarnonempty' => undef,\r\n                                                                                                                            '_root_verbose' => 0,\r\n                                                                                                                            'alphabet' => 'dna',\r\n                                                                                                                            'display_id' => 'ENST00000414287',\r\n                                                                                                                            'length' => '1969',\r\n                                                                                                                            'seq' => 'CAC'\r\n                                                                                                                          }, 'Bio::PrimarySeq' )\r\n                                                                                                }, 'Bio::Seq' ),\r\n                                                                    'translateable_seq' => 'ATG'\r\n                                                                  },\r\n                             '_vep_lazy_loaded' => 1,\r\n                             'attributes' => [\r\n                                               bless( {\r\n                                                        'code' => 'miRNA',\r\n                                                        'name' => 'Micro RNA',\r\n                                                        'value' => '62-83'\r\n                                                      }, 'Bio::EnsEMBL::Attribute' ),\r\n                                               bless( {\r\n                                                        'code' => 'cds_start_NF',\r\n                                                        'name' => 'CDS start not found',\r\n                                                        'value' => '1'\r\n                                                      }, 'Bio::EnsEMBL::Attribute' )\r\n                                             ],\r\n                             'biotype' => 'nonsense_mediated_decay',\r\n                             'cdna_coding_end' => '225',\r\n                             'cdna_coding_start' => 46,\r\n                             'coding_region_end' => undef,\r\n                             'coding_region_start' => undef,\r\n                             'dbID' => '2441076',\r\n                             'description' => undef,\r\n                             'end' => '50051152',\r\n                             'is_canonical' => 1,\r\n                             'slice' => bless( {\r\n                                                 'circular' => 0,\r\n                                                 'coord_system' => bless( {\r\n                                                                            'dbID' => '2',\r\n                                                                            'default' => 1,\r\n                                                                            'name' => 'chromosome',\r\n                                                                            'rank' => '1',\r\n                                                                            'sequence_level' => 0,\r\n                                                                            'top_level' => 0,\r\n                                                                            'version' => 'GRCh37'\r\n                                                                          }, 'Bio::EnsEMBL::CoordSystem' ),\r\n                                                 'end' => '51304566',\r\n                                                 'seq_region_length' => '51304566',\r\n                                                 'seq_region_name' => '22',\r\n                                                 'start' => 1,\r\n                                                 'strand' => 1\r\n                                               }, 'Bio::EnsEMBL::Slice' ),\r\n                             'source' => 'havana',\r\n                             'stable_id' => 'ENST00000414287',\r\n                             'start' => '49808176',\r\n                             'strand' => -1,\r\n                             'translation' => bless( {\r\n                                                       'dbID' => '1232784',\r\n                                                       'end' => 125,\r\n                                                       'end_exon' => bless( {\r\n                                                                              'end' => '49834861',\r\n                                                                              'end_phase' => -1,\r\n                                                                              'phase' => 1,\r\n                                                                              'stable_id' => 'ENSE00001694252',\r\n                                                                              'start' => '49834525',\r\n                                                                              'strand' => -1\r\n                                                                            }, 'Bio::EnsEMBL::Exon' ),\r\n                                                       'seq' => undef,\r\n                                                       'stable_id' => 'ENSP00000394865',\r\n                                                       'start' => 46,\r\n                                                       'start_exon' => bless( {\r\n                                                                                'end' => '50051152',\r\n                                                                                'end_phase' => 1,\r\n                                                                                'phase' => 1,\r\n                                                                                'stable_id' => 'ENSE00001657619',\r\n                                                                                'start' => '50051053',\r\n                                                                                'strand' => -1\r\n                                                                              }, 'Bio::EnsEMBL::Exon' ),\r\n                                                       'transcript' => $VAR1->{'22'}[0],\r\n                                                       'version' => 1\r\n                                                     }, 'Bio::EnsEMBL::Translation' ),\r\n                             'version' => 1\r\n                           }, 'Bio::EnsEMBL::Transcript' )\r\n                  ]\r\n        };\";\r\n        }\r\n\r\n        #endregion\r\n\r\n        private static ObjectValueNode GetObjectValueNode(string dataDumperOutput)\r\n        {\r\n            ObjectKeyValueNode rootNode;\r\n\r\n            using (var ms = new MemoryStream())\r\n            {\r\n                using (var reader = new StringReader(dataDumperOutput))\r\n                using (var writer = new StreamWriter(ms, Encoding.UTF8, 1024, true))\r\n                {\r\n                    while (true)\r\n                    {\r\n                        var line = reader.ReadLine();\r\n                        if (line == null) break;\r\n                        writer.WriteLine(line);\r\n                    }\r\n                }\r\n\r\n                ms.Position = 0;\r\n\r\n                using (var reader = new DataDumperReader(ms)) rootNode = reader.GetRootNode();\r\n            }\r\n\r\n            var chr22Node = rootNode.Value.Values[0] as ListObjectKeyValueNode;\r\n            Assert.NotNull(chr22Node);\r\n\r\n            return chr22Node.Values[0] as ObjectValueNode;\r\n        }\r\n\r\n        [Fact]\r\n        public void Parse_Nominal()\r\n        {\r\n            var mutableTranscript = ImportTranscript.Parse(_transcriptNode, ChromosomeUtilities.Chr1, Source.Ensembl);\r\n            Assert.NotNull(mutableTranscript);\r\n\r\n            Assert.Equal(ChromosomeUtilities.Chr1.Index, mutableTranscript.Chromosome.Index);\r\n            Assert.Equal(49808176, mutableTranscript.Start);\r\n            Assert.Equal(50051152, mutableTranscript.End);\r\n            Assert.Equal(\"ENST00000414287\", mutableTranscript.Id);\r\n            Assert.Equal(1, mutableTranscript.Version);\r\n            Assert.Equal(\"CCDS14080.1\", mutableTranscript.CcdsId);\r\n            Assert.Equal(\"NM_014577.1\", mutableTranscript.RefSeqId);\r\n            Assert.Equal(Source.Ensembl, mutableTranscript.Source);\r\n            Assert.Equal(49808176, mutableTranscript.Gene.Start);\r\n            Assert.Equal(50051190, mutableTranscript.Gene.End);\r\n            Assert.Equal(\"ENSG00000188511\", mutableTranscript.Gene.GeneId);\r\n            Assert.Equal(\"C22orf34\", mutableTranscript.Gene.Symbol);\r\n            Assert.Equal(28010, mutableTranscript.Gene.HgncId);\r\n            Assert.Equal(ChromosomeUtilities.Chr1.Index, mutableTranscript.Gene.Chromosome.Index);\r\n            Assert.True(mutableTranscript.Gene.OnReverseStrand);\r\n            Assert.Equal(GeneSymbolSource.HGNC, mutableTranscript.Gene.SymbolSource);\r\n            Assert.Equal(5, mutableTranscript.Exons.Length);\r\n            Assert.Equal(50051053, mutableTranscript.Exons[0].Start);\r\n            Assert.Equal(50051152, mutableTranscript.Exons[0].End);\r\n            Assert.Equal(-1, mutableTranscript.Exons[0].Phase);\r\n            Assert.Equal(2194, mutableTranscript.TotalExonLength);\r\n            Assert.Equal(4, mutableTranscript.Introns.Length);\r\n            Assert.Equal(49834862, mutableTranscript.Introns[0].Start);\r\n            Assert.Equal(50051052, mutableTranscript.Introns[0].End);\r\n            Assert.Equal(\"ATG\", mutableTranscript.TranslateableSequence);\r\n            Assert.Equal(new IInterval[] { new Interval(62, 83) }, mutableTranscript.MicroRnas);\r\n            Assert.True(mutableTranscript.CdsStartNotFound);\r\n            Assert.False(mutableTranscript.CdsEndNotFound);\r\n            Assert.Equal(new[] { 667 }, mutableTranscript.SelenocysteinePositions);\r\n            Assert.Equal(1, mutableTranscript.StartExonPhase);\r\n            Assert.Equal(BioType.nonsense_mediated_decay, mutableTranscript.BioType);\r\n            Assert.True(mutableTranscript.IsCanonical);\r\n            Assert.Equal(5, mutableTranscript.CdnaMaps.Length);\r\n            Assert.Equal(50051053, mutableTranscript.CdnaMaps[0].Start);\r\n            Assert.Equal(50051152, mutableTranscript.CdnaMaps[0].End);\r\n            Assert.Equal(1, mutableTranscript.CdnaMaps[0].CdnaStart);\r\n            Assert.Equal(100, mutableTranscript.CdnaMaps[0].CdnaEnd);\r\n            Assert.Equal(49834737, mutableTranscript.CodingRegion.Start);\r\n            Assert.Equal(50051107, mutableTranscript.CodingRegion.End);\r\n            Assert.Equal(46, mutableTranscript.CodingRegion.CdnaStart);\r\n            Assert.Equal(225, mutableTranscript.CodingRegion.CdnaEnd);\r\n            Assert.Equal(\"ENSP00000394865\", mutableTranscript.ProteinId);\r\n            Assert.Equal(1, mutableTranscript.ProteinVersion);\r\n            Assert.Equal(\"MIV\", mutableTranscript.PeptideSequence);\r\n            Assert.Equal(\"VkVQ-sift\", mutableTranscript.SiftData);\r\n            Assert.Equal(\"VkVQ-humvar\", mutableTranscript.PolyphenData);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/CacheUtils/Genes/Combiners/CombinerUtilsTests.cs",
    "content": "﻿using System.IO;\r\nusing CacheUtils.Genes.Combiners;\r\nusing CacheUtils.Genes.DataStructures;\r\nusing Intervals;\r\nusing UnitTests.TestUtilities;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.CacheUtils.Genes.Combiners\r\n{\r\n    public sealed class CombinerUtilsTests\r\n    {\r\n        [Fact]\r\n        public void Merge_DifferentCombinations()\r\n        {\r\n            var interval = new Interval(17369, 17436);\r\n            var uga37 = new UgaGene(ChromosomeUtilities.Chr1, interval, null, true, \"102466751\", null, \"MIR6859-1\", 50039);\r\n            var uga38 = new UgaGene(ChromosomeUtilities.Chr1, null, interval, true, null, \"ENSG00000278267\", \"MIR6859-1\", 50039);\r\n\r\n            var observedResult = CombinerUtils.Merge(uga37, uga38);\r\n            Assert.Equal(\"102466751\", observedResult.EntrezGeneId);\r\n            Assert.Equal(\"ENSG00000278267\", observedResult.EnsemblId);\r\n        }\r\n\r\n        [Fact]\r\n        public void Merge_ThrowException_IfValuesDifferent()\r\n        {\r\n            var interval = new Interval(17369, 17436);\r\n            var uga37 = new UgaGene(ChromosomeUtilities.Chr1, interval, null, true, \"102466751\", \"ENSG00000278267\", \"MIR6859-1\", 50039);\r\n            var uga38 = new UgaGene(ChromosomeUtilities.Chr1, null, interval, true, \"000000000\", \"ENSG00000278267\", \"MIR6859-1\", 50039);\r\n\r\n            Assert.Throws<InvalidDataException>(delegate\r\n            {\r\n                // ReSharper disable once UnusedVariable\r\n                var observedResult = CombinerUtils.Merge(uga37, uga38);\r\n            });\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/CacheUtils/Genes/Combiners/HgncIdCombinerTests.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing CacheUtils.Genes.Combiners;\r\nusing CacheUtils.Genes.DataStructures;\r\nusing Intervals;\r\nusing UnitTests.TestUtilities;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.CacheUtils.Genes.Combiners\r\n{\r\n    public sealed class HgncIdCombinerTests\r\n    {\r\n        private readonly HgncIdCombiner _combiner = new HgncIdCombiner();\r\n\r\n        [Fact]\r\n        public void Combine_CombineWhenAllIdsMatch()\r\n        {\r\n            var interval = new Interval(17369, 17436);\r\n            var uga37 = new HashSet<UgaGene> { new UgaGene(ChromosomeUtilities.Chr1, interval, null, true, \"102466751\", \"ENSG00000278267\", \"MIR6859-1\", 50039) };\r\n            var uga38 = new HashSet<UgaGene> { new UgaGene(ChromosomeUtilities.Chr1, null, interval, true, \"102466751\", \"ENSG00000278267\", \"MIR6859-1\", 50039) };\r\n\r\n            var observedResults = new List<UgaGene>();\r\n            _combiner.Combine(observedResults, uga37, uga38);\r\n\r\n            Assert.Single(observedResults);\r\n\r\n            var observedGene = observedResults[0];\r\n            Assert.Equal(\"102466751\", observedGene.EntrezGeneId);\r\n            Assert.Equal(\"ENSG00000278267\", observedGene.EnsemblId);\r\n            Assert.Equal(interval, observedGene.GRCh37);\r\n            Assert.Equal(interval, observedGene.GRCh38);\r\n        }\r\n\r\n        [Fact]\r\n        public void Combine_DoNotCombine_MixedStrands()\r\n        {\r\n            var interval = new Interval(17369, 17436);\r\n            var uga37 = new HashSet<UgaGene> { new UgaGene(ChromosomeUtilities.Chr1, interval, null, true, \"102466751\", \"ENSG00000278267\", \"MIR6859-1\", 50039) };\r\n            var uga38 = new HashSet<UgaGene> { new UgaGene(ChromosomeUtilities.Chr1, null, interval, false, \"102466751\", \"ENSG00000278267\", \"MIR6859-1\", 50039) };\r\n\r\n            var observedResults = new List<UgaGene>();\r\n            _combiner.Combine(observedResults, uga37, uga38);\r\n\r\n            Assert.Equal(2, observedResults.Count);\r\n            Assert.True(observedResults[0].OnReverseStrand);\r\n            Assert.False(observedResults[1].OnReverseStrand);\r\n        }\r\n\r\n        [Fact]\r\n        public void Combine_MIR6859_CombineWhenMissingGeneId()\r\n        {\r\n            var interval = new Interval(17369, 17436);\r\n            var uga37 = new HashSet<UgaGene> { new UgaGene(ChromosomeUtilities.Chr1, interval, null, true, \"102466751\", null, \"MIR6859-1\", 50039) };\r\n            var uga38 = new HashSet<UgaGene> { new UgaGene(ChromosomeUtilities.Chr1, null, interval, true, \"102466751\", \"ENSG00000278267\", \"MIR6859-1\", 50039) };\r\n\r\n            var observedResults = new List<UgaGene>();\r\n            _combiner.Combine(observedResults, uga37, uga38);\r\n\r\n            Assert.Single(observedResults);\r\n\r\n            var observedGene = observedResults[0];\r\n            Assert.Equal(\"102466751\", observedGene.EntrezGeneId);\r\n            Assert.Equal(\"ENSG00000278267\", observedGene.EnsemblId);\r\n            Assert.Equal(interval, observedGene.GRCh37);\r\n            Assert.Equal(interval, observedGene.GRCh38);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/CacheUtils/Genes/Combiners/PartitionCombinerTests.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing CacheUtils.Genes.Combiners;\r\nusing CacheUtils.Genes.DataStructures;\r\nusing Intervals;\r\nusing UnitTests.TestUtilities;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.CacheUtils.Genes.Combiners\r\n{\r\n    public sealed class PartitionCombinerTests\r\n    {\r\n        private readonly PartitionCombiner _combiner = new PartitionCombiner();\r\n\r\n        [Fact]\r\n        public void Combine_MergeIfSameIds_EntrezGeneOnly()\r\n        {\r\n            var interval = new Interval(17369, 17436);\r\n            var uga37 = new HashSet<UgaGene> { new UgaGene(ChromosomeUtilities.Chr1, interval, null, true, \"102466751\", null, \"MIR6859-1\", 50039) };\r\n            var uga38 = new HashSet<UgaGene> { new UgaGene(ChromosomeUtilities.Chr1, null, interval, true, \"102466751\", null, \"MIR6859-1\", 50039) };\r\n\r\n            var observedResults = new List<UgaGene>();\r\n            _combiner.Combine(observedResults, uga37, uga38);\r\n\r\n            Assert.Single(observedResults);\r\n\r\n            var observedGene = observedResults[0];\r\n            Assert.Equal(\"102466751\", observedGene.EntrezGeneId);\r\n            Assert.Null(observedGene.EnsemblId);\r\n            Assert.Equal(interval, observedGene.GRCh37);\r\n            Assert.Equal(interval, observedGene.GRCh38);\r\n        }\r\n\r\n        [Fact]\r\n        public void Combine_MergeIfSameIds_EnsemblOnly()\r\n        {\r\n            var interval = new Interval(17369, 17436);\r\n            var uga37 = new HashSet<UgaGene> { new UgaGene(ChromosomeUtilities.Chr1, interval, null, true, null, \"ENSG00000278267\", \"MIR6859-1\", 50039) };\r\n            var uga38 = new HashSet<UgaGene> { new UgaGene(ChromosomeUtilities.Chr1, null, interval, true, null, \"ENSG00000278267\", \"MIR6859-1\", 50039) };\r\n\r\n            var observedResults = new List<UgaGene>();\r\n            _combiner.Combine(observedResults, uga37, uga38);\r\n\r\n            Assert.Single(observedResults);\r\n\r\n            var observedGene = observedResults[0];\r\n            Assert.Equal(\"ENSG00000278267\", observedGene.EnsemblId);\r\n            Assert.Null(observedGene.EntrezGeneId);\r\n            Assert.Equal(interval, observedGene.GRCh37);\r\n            Assert.Equal(interval, observedGene.GRCh38);\r\n        }\r\n\r\n        [Fact]\r\n        public void Combine_DoNotCombine_MixedIds()\r\n        {\r\n            var interval = new Interval(17369, 17436);\r\n            var uga37 = new HashSet<UgaGene> { new UgaGene(ChromosomeUtilities.Chr1, interval, null, true, \"102466751\", null, \"MIR6859-1\", 50039) };\r\n            var uga38 = new HashSet<UgaGene> { new UgaGene(ChromosomeUtilities.Chr1, null, interval, true, \"102466751\", \"ENSG00000278267\", \"MIR6859-1\", 50039) };\r\n\r\n            var observedResults = new List<UgaGene>();\r\n            _combiner.Combine(observedResults, uga37, uga38);\r\n\r\n            Assert.Equal(2, observedResults.Count);\r\n            Assert.Equal(\"ENSG00000278267\", observedResults[0].EnsemblId);\r\n            Assert.Null(observedResults[1].EnsemblId);            \r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/CacheUtils/Genes/GeneFlattenerTests.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing CacheUtils.DataDumperImport.DataStructures;\r\nusing CacheUtils.DataDumperImport.DataStructures.Mutable;\r\nusing CacheUtils.Genes;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.CacheUtils.Genes\r\n{\r\n    public sealed class GeneFlattenerTests\r\n    {\r\n        [Fact]\r\n        public void Flatten_AllGenesShouldBeCombined()\r\n        {\r\n            var genes = new List<MutableGene>\r\n            {\r\n                new MutableGene(null, 100, 120, false, null, GeneSymbolSource.Unknown, \"test\", -1),\r\n                new MutableGene(null, 110, 115, false, null, GeneSymbolSource.Unknown, \"test\", -1),\r\n                new MutableGene(null, 120, 130, false, null, GeneSymbolSource.Unknown, \"test\", -1)\r\n            };\r\n\r\n            var flatGenes = GeneFlattener.FlattenWithSameId(genes);\r\n\r\n            Assert.Single(flatGenes);\r\n\r\n            var flatGene = flatGenes[0];\r\n            Assert.Equal(100, flatGene.Start);\r\n            Assert.Equal(130, flatGene.End);\r\n        }\r\n\r\n        [Fact]\r\n        public void Flatten_ReturnSameGene_WhenListHasOneEntry()\r\n        {\r\n            var genes = new List<MutableGene>\r\n            {\r\n                new MutableGene(null, 100, 120, false, null, GeneSymbolSource.Unknown, \"test\", -1)\r\n            };\r\n\r\n            var flatGenes = GeneFlattener.FlattenWithSameId(genes);\r\n\r\n            Assert.Single(flatGenes);\r\n            Assert.Equal(genes[0].Start, flatGenes[0].Start);\r\n            Assert.Equal(genes[0].End, flatGenes[0].End);\r\n        }\r\n\r\n        [Fact]\r\n        public void Flatten_ReturnNull_WhenInputNull()\r\n        {\r\n            var flatGenes = GeneFlattener.FlattenWithSameId(null as List<MutableGene>);\r\n            Assert.Null(flatGenes);\r\n        }\r\n\r\n        [Fact]\r\n        public void Flatten_NoGenesShouldBeCombined()\r\n        {\r\n            var genes = new List<MutableGene>\r\n            {\r\n                new MutableGene(null, 100, 120, false, null, GeneSymbolSource.Unknown, \"test\", -1),\r\n                new MutableGene(null, 130, 140, false, null, GeneSymbolSource.Unknown, \"test\", -1),\r\n                new MutableGene(null, 150, 160, false, null, GeneSymbolSource.Unknown, \"test\", -1)\r\n            };\r\n\r\n            var flatGenes = GeneFlattener.FlattenWithSameId(genes);\r\n\r\n            Assert.Equal(3, flatGenes.Count);\r\n            for (int i = 0; i < flatGenes.Count; i++)\r\n            {\r\n                Assert.Equal(genes[i].Start, flatGenes[i].Start);\r\n                Assert.Equal(genes[i].End, flatGenes[i].End);\r\n            }\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/CacheUtils/Genes/Utilities/DictionaryUtilitiesTests.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing CacheUtils.Genes.DataStructures;\r\nusing CacheUtils.Genes.Utilities;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.CacheUtils.Genes.Utilities\r\n{\r\n    public sealed class DictionaryUtilitiesTests\r\n    {\r\n        [Fact]\r\n        public void GetSingleValueDict_OneKey_OneValue()\r\n        {\r\n            var uga1 = new UgaGene(null, null, null, true, \"102466751\", null, \"MIR6859-1\", 50039);\r\n            var uga2 = new UgaGene(null, null, null, true, null, \"ENSG00000278267\", \"MIR6859-1\", 50039);\r\n            var genes = new List<UgaGene> { uga1, uga2 };\r\n\r\n            var observedResult = genes.GetSingleValueDict(x => x.EnsemblId);\r\n            Assert.NotNull(observedResult);\r\n            Assert.Single(observedResult);\r\n            Assert.True(observedResult.ContainsKey(\"ENSG00000278267\"));\r\n        }\r\n\r\n        [Fact]\r\n        public void GetSingleValueDict_ThrowException_IfMultipleValuesShareKey()\r\n        {\r\n            var uga1 = new UgaGene(null, null, null, true, null, \"ENSG00000278267\", \"MIR6859-1\", 50039);\r\n            var uga2 = new UgaGene(null, null, null, true, null, \"ENSG00000278267\", \"MIR6859-1\", 50039);\r\n            var genes = new List<UgaGene> { uga1, uga2 };\r\n\r\n            Assert.Throws<InvalidDataException>(delegate\r\n            {\r\n                // ReSharper disable once UnusedVariable\r\n                var observedResult = genes.GetSingleValueDict(x => x.EnsemblId);\r\n            });\r\n        }\r\n\r\n        [Fact]\r\n        public void GetMultiValueDict_OneKey_WithTwoValues()\r\n        {\r\n            var uga1 = new UgaGene(null, null, null, true, \"102466751\", null, \"MIR6859-1\", 50039);\r\n            var uga2 = new UgaGene(null, null, null, true, null, \"ENSG00000278267\", \"MIR6859-1\", 50039);\r\n            var uga3 = new UgaGene(null, null, null, true, null, \"ENSG00000278267\", \"MIR6859-1\", 50039);\r\n            var genes = new List<UgaGene> { uga1, uga2, uga3 };\r\n\r\n            var observedResult = genes.GetMultiValueDict(x => x.EnsemblId);\r\n            Assert.NotNull(observedResult);\r\n            Assert.Single(observedResult);\r\n\r\n            var firstEntry = observedResult[\"ENSG00000278267\"];\r\n            Assert.NotNull(firstEntry);\r\n            Assert.Equal(2, firstEntry.Count);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetKeyValueDict_OneKey_OneValue()\r\n        {\r\n            var uga1 = new UgaGene(null, null, null, true, \"102466751\", null, \"MIR6859-1\", 50039);\r\n            var uga2 = new UgaGene(null, null, null, true, null, \"ENSG00000278267\", \"MIR6859-1\", 50039);\r\n            var uga3 = new UgaGene(null, null, null, true, null, \"ENSG00000278267\", \"MIR6859-1\", 50039);\r\n            var genes = new List<UgaGene> { uga1, uga2, uga3 };\r\n\r\n            var observedResult = genes.GetKeyValueDict(x => x.EnsemblId, x => x.HgncId);\r\n            Assert.NotNull(observedResult);\r\n            Assert.Single(observedResult);\r\n\r\n            var hgncId = observedResult[\"ENSG00000278267\"];\r\n            Assert.Equal(50039, hgncId);\r\n        }\r\n\r\n        [Fact]\r\n        public void CreateIndex_ThreeValues()\r\n        {\r\n            const string a = \"tom\";\r\n            const string b = \"jane\";\r\n            const string c = \"sally\";\r\n            var genes = new List<string> { a, b, c };\r\n\r\n            var observedResult = genes.CreateIndex();\r\n            Assert.NotNull(observedResult);\r\n            Assert.Equal(3, observedResult.Count);\r\n\r\n            Assert.Equal(0, observedResult[a]);\r\n            Assert.Equal(1, observedResult[b]);\r\n            Assert.Equal(2, observedResult[c]);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/CacheUtils/IO/Caches/TranscriptCacheWriterTests.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing CacheUtils.TranscriptCache;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.CacheUtils.IO.Caches\r\n{\r\n    public sealed class TranscriptCacheWriterTests\r\n    {\r\n        [Fact]\r\n        public void CreateIndex_PopulatedDictionary()\r\n        {\r\n            var strings = new[] { \"A\", \"B\", \"D\", \"P\", \"Z\" };\r\n            var dict = TranscriptCacheWriter.CreateIndex(strings, EqualityComparer<string>.Default);\r\n            Assert.NotNull(dict);\r\n            Assert.Equal(3, dict[\"P\"]);\r\n        }\r\n\r\n        [Fact]\r\n        public void CreateIndex_EmptyDictionary_WhenInputNull()\r\n        {\r\n            var dict = TranscriptCacheWriter.CreateIndex(null, EqualityComparer<string>.Default);\r\n            Assert.NotNull(dict);\r\n            Assert.Empty(dict);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/CacheUtils/TranscriptCache/TranscriptRegionMergerTests.cs",
    "content": "﻿using CacheUtils.DataDumperImport.DataStructures.Mutable;\r\nusing CacheUtils.TranscriptCache;\r\nusing CacheUtils.TranscriptCache.Comparers;\r\nusing Intervals;\r\nusing UnitTests.TestUtilities;\r\nusing VariantAnnotation.Caches.DataStructures;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.CacheUtils.TranscriptCache\r\n{\r\n    public sealed class TranscriptRegionMergerTests\r\n    {\r\n        private readonly TranscriptRegionComparer _comparer = new TranscriptRegionComparer();\r\n\r\n        [Fact]\r\n        public void GetTranscriptRegions_OneExon()\r\n        {\r\n            var cdnaMaps = new[]\r\n            {\r\n                new MutableTranscriptRegion(TranscriptRegionType.Exon, 0, 64571756, 64572037, 2569, 2850)\r\n            };\r\n\r\n            var exons = new[]\r\n            {\r\n                new MutableExon(ChromosomeUtilities.Chr5, 64571756, 64572037, 0)\r\n            };\r\n\r\n            var expectedRegions = new ITranscriptRegion[]\r\n            {\r\n                new MutableTranscriptRegion(TranscriptRegionType.Exon, 1, 64571756, 64572037, 2569, 2850)\r\n            };\r\n\r\n            var observedRegions = TranscriptRegionMerger.GetTranscriptRegions(cdnaMaps, exons, null, false);\r\n            Assert.Single(observedRegions);\r\n            Assert.Equal(expectedRegions, observedRegions, _comparer);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetTranscriptRegions_WithGap_Forward()\r\n        {\r\n            var exons = new[]\r\n            {\r\n                new MutableExon(ChromosomeUtilities.Chr5, 89623195, 89624305, 0),\r\n                new MutableExon(ChromosomeUtilities.Chr5, 89653782, 89653866, 0),\r\n                new MutableExon(ChromosomeUtilities.Chr5, 89690803, 89690846, 0),\r\n                new MutableExon(ChromosomeUtilities.Chr5, 89692770, 89693008, 0),\r\n                new MutableExon(ChromosomeUtilities.Chr5, 89702368, 89702526, 0),\r\n                new MutableExon(ChromosomeUtilities.Chr5, 89711875, 89712016, 0),\r\n                new MutableExon(ChromosomeUtilities.Chr5, 89717610, 89717776, 0),\r\n                new MutableExon(ChromosomeUtilities.Chr5, 89720651, 89720875, 0),\r\n                new MutableExon(ChromosomeUtilities.Chr5, 89725044, 89731687, 0)\r\n            };\r\n\r\n            var introns = new IInterval[]\r\n            {\r\n                new Interval(89624306, 89653781),\r\n                new Interval(89653867, 89690802),\r\n                new Interval(89690847, 89692769),\r\n                new Interval(89693009, 89702367),\r\n                new Interval(89702527, 89711874),\r\n                new Interval(89712017, 89717609),\r\n                new Interval(89717777, 89720650),\r\n                new Interval(89720876, 89725043)\r\n            };\r\n\r\n            var cdnaMaps = new[]\r\n            {\r\n                new MutableTranscriptRegion(TranscriptRegionType.Exon, 0, 89623195, 89623860, 1, 666),\r\n                new MutableTranscriptRegion(TranscriptRegionType.Exon, 0, 89623862, 89624305, 667, 1110),\r\n                new MutableTranscriptRegion(TranscriptRegionType.Exon, 0, 89653782, 89653866, 1111, 1195),\r\n                new MutableTranscriptRegion(TranscriptRegionType.Exon, 0, 89690803, 89690846, 1196, 1239),\r\n                new MutableTranscriptRegion(TranscriptRegionType.Exon, 0, 89692770, 89693008, 1240, 1478),\r\n                new MutableTranscriptRegion(TranscriptRegionType.Exon, 0, 89702368, 89702526, 1479, 1637),\r\n                new MutableTranscriptRegion(TranscriptRegionType.Exon, 0, 89711875, 89712016, 1638, 1779),\r\n                new MutableTranscriptRegion(TranscriptRegionType.Exon, 0, 89717610, 89717776, 1780, 1946),\r\n                new MutableTranscriptRegion(TranscriptRegionType.Exon, 0, 89720651, 89720875, 1947, 2171),\r\n                new MutableTranscriptRegion(TranscriptRegionType.Exon, 0, 89725044, 89731687, 2172, 8815)\r\n            };\r\n\r\n            var expectedRegions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 89623195, 89623860, 1, 666),\r\n                new TranscriptRegion(TranscriptRegionType.Gap, 1, 89623861, 89623861, 666, 667),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 89623862, 89624305, 667, 1110),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 1, 89624306, 89653781, 1110, 1111),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 2, 89653782, 89653866, 1111, 1195),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 2, 89653867, 89690802, 1195, 1196),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 3, 89690803, 89690846, 1196, 1239),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 3, 89690847, 89692769, 1239, 1240),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 4, 89692770, 89693008, 1240, 1478),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 4, 89693009, 89702367, 1478, 1479),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 5, 89702368, 89702526, 1479, 1637),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 5, 89702527, 89711874, 1637, 1638),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 6, 89711875, 89712016, 1638, 1779),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 6, 89712017, 89717609, 1779, 1780),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 7, 89717610, 89717776, 1780, 1946),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 7, 89717777, 89720650, 1946, 1947),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 8, 89720651, 89720875, 1947, 2171),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 8, 89720876, 89725043, 2171, 2172),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 9, 89725044, 89731687, 2172, 8815)\r\n            };\r\n\r\n            var observedRegions = TranscriptRegionMerger.GetTranscriptRegions(cdnaMaps, exons, introns, false);\r\n            Assert.Equal(19, observedRegions.Length);\r\n            Assert.Equal(expectedRegions, observedRegions, _comparer);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetTranscriptRegions_WithGap_Reverse()\r\n        {\r\n            var exons = new[]\r\n            {\r\n                new MutableExon(ChromosomeUtilities.Chr5, 64571756, 64574228, 2),\r\n                new MutableExon(ChromosomeUtilities.Chr5, 64575621, 64575829, 0),\r\n                new MutableExon(ChromosomeUtilities.Chr5, 64578301, 64578407, 0),\r\n                new MutableExon(ChromosomeUtilities.Chr5, 64578866, 64578927, 0)\r\n            };\r\n\r\n            var introns = new IInterval[]\r\n            {\r\n                new Interval(64574229, 64575620),\r\n                new Interval(64575830, 64578300),\r\n                new Interval(64578408, 64578865)\r\n            };\r\n\r\n            var cdnaMaps = new[]\r\n            {\r\n                new MutableTranscriptRegion(TranscriptRegionType.Exon, 0, 64571756, 64572037, 2569, 2850),\r\n                new MutableTranscriptRegion(TranscriptRegionType.Exon, 0, 64572039, 64574228, 379, 2568),\r\n                new MutableTranscriptRegion(TranscriptRegionType.Exon, 0, 64575621, 64575829, 170, 378),\r\n                new MutableTranscriptRegion(TranscriptRegionType.Exon, 0, 64578301, 64578407, 63, 169),\r\n                new MutableTranscriptRegion(TranscriptRegionType.Exon, 0, 64578866, 64578927, 1, 62)\r\n            };\r\n\r\n            var expectedRegions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 4, 64571756, 64572037, 2569, 2850),\r\n                new TranscriptRegion(TranscriptRegionType.Gap, 4, 64572038, 64572038, 2568, 2569),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 4, 64572039, 64574228, 379, 2568),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 3, 64574229, 64575620, 378, 379),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 3, 64575621, 64575829, 170, 378),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 2, 64575830, 64578300, 169, 170),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 2, 64578301, 64578407, 63, 169),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 1, 64578408, 64578865, 62, 63),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 64578866, 64578927, 1, 62)\r\n            };\r\n\r\n            var observedRegions = TranscriptRegionMerger.GetTranscriptRegions(cdnaMaps, exons, introns, true);\r\n            Assert.Equal(9, observedRegions.Length);\r\n            Assert.Equal(expectedRegions, observedRegions, _comparer);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetTranscriptRegions_Reverse()\r\n        {\r\n            var exons = new[]\r\n            {\r\n                new MutableExon(ChromosomeUtilities.Chr1, 20977055, 20977207, 1),\r\n                new MutableExon(ChromosomeUtilities.Chr1, 20976856, 20977050, 1)\r\n            };\r\n\r\n            var introns = new IInterval[]\r\n            {\r\n                new Interval(20977051, 20977054)\r\n            };\r\n\r\n            var cdnaMaps = new[]\r\n            {\r\n                new MutableTranscriptRegion(TranscriptRegionType.Exon, 0, 20977055, 20977207, 1, 153),\r\n                new MutableTranscriptRegion(TranscriptRegionType.Exon, 0, 20976856, 20977050, 154, 348)\r\n            };\r\n\r\n            var expectedRegions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 2, 20976856, 20977050, 154, 348),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 1, 20977051, 20977054, 153, 154),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 20977055, 20977207, 1, 153)\r\n            };\r\n\r\n            var observedRegions = TranscriptRegionMerger.GetTranscriptRegions(cdnaMaps, exons, introns, true);\r\n            Assert.Equal(3, observedRegions.Length);\r\n            Assert.Equal(expectedRegions, observedRegions, _comparer);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetTranscriptRegions_TwoExonsNoGap_Forward()\r\n        {\r\n            var exons = new[]\r\n            {\r\n                new MutableExon(ChromosomeUtilities.Chr12, 7079944, 7080253, 1),\r\n                new MutableExon(ChromosomeUtilities.Chr12, 7083501, 7083602, 2),\r\n                new MutableExon(ChromosomeUtilities.Chr12, 7083714, 7083855, 2),\r\n                new MutableExon(ChromosomeUtilities.Chr12, 7084252, 7084310, 1),\r\n                new MutableExon(ChromosomeUtilities.Chr12, 7084391, 7084540, 2),\r\n                new MutableExon(ChromosomeUtilities.Chr12, 7084858, 7085165, 2)\r\n            };\r\n\r\n            var introns = new IInterval[]\r\n            {\r\n                new Interval(7080254, 7083500),\r\n                new Interval(7083603, 7083713),\r\n                new Interval(7083856, 7084251),\r\n                new Interval(7084311, 7084390),\r\n                new Interval(7084541, 7084857)\r\n            };\r\n\r\n            var cdnaMaps = new[]\r\n            {\r\n                new MutableTranscriptRegion(TranscriptRegionType.Exon, 0, 7079944, 7080212, 1, 269),\r\n                new MutableTranscriptRegion(TranscriptRegionType.Exon, 0, 7080213, 7080253, 271, 311),\r\n                new MutableTranscriptRegion(TranscriptRegionType.Exon, 0, 7083501, 7083602, 312, 413),\r\n                new MutableTranscriptRegion(TranscriptRegionType.Exon, 0, 7083714, 7083855, 414, 555),\r\n                new MutableTranscriptRegion(TranscriptRegionType.Exon, 0, 7084252, 7084310, 556, 614),\r\n                new MutableTranscriptRegion(TranscriptRegionType.Exon, 0, 7084391, 7084540, 615, 764),\r\n                new MutableTranscriptRegion(TranscriptRegionType.Exon, 0, 7084858, 7085165, 765, 1072)\r\n            };\r\n\r\n            var expectedRegions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 7079944, 7080212, 1, 269), \r\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 7080213, 7080253, 271, 311),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 1, 7080254, 7083500, 311, 312),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 2, 7083501, 7083602, 312, 413),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 2, 7083603, 7083713, 413, 414),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 3, 7083714, 7083855, 414, 555),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 3, 7083856, 7084251, 555, 556),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 4, 7084252, 7084310, 556, 614),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 4, 7084311, 7084390, 614, 615),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 5, 7084391, 7084540, 615, 764),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 5, 7084541, 7084857, 764, 765),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 6, 7084858, 7085165, 765, 1072)\r\n            };\r\n\r\n            var observedRegions = TranscriptRegionMerger.GetTranscriptRegions(cdnaMaps, exons, introns, false);\r\n            Assert.Equal(12, observedRegions.Length);\r\n            Assert.Equal(expectedRegions, observedRegions, _comparer);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/CacheUtils/Utilities/AccessionUtilitiesTests.cs",
    "content": "﻿using System.IO;\r\nusing CacheUtils.Utilities;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.CacheUtils.Utilities\r\n{\r\n    public sealed class AccessionUtilitiesTests\r\n    {\r\n        [Fact]\r\n        public void GetMaxVersion_Dupl()\r\n        {\r\n            const string expectedId = \"NM_004522.2_dupl6\";\r\n            const byte expectedVersion = 1;\r\n            var observedResult = AccessionUtilities.GetMaxVersion(\"NM_004522.2_dupl6\", 1);\r\n            Assert.Equal(expectedId, observedResult.Id);\r\n            Assert.Equal(expectedVersion, observedResult.Version);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetMaxVersion_IdVersionMax()\r\n        {\r\n            const string expectedId = \"NM_004522\";\r\n            const byte expectedVersion = 2;\r\n            var observedResult = AccessionUtilities.GetMaxVersion(\"NM_004522.2\", 1);\r\n            Assert.Equal(expectedId, observedResult.Id);\r\n            Assert.Equal(expectedVersion, observedResult.Version);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetMaxVersion_SuppliedVersionMax()\r\n        {\r\n            const string expectedId = \"NM_004522\";\r\n            const byte expectedVersion = 3;\r\n            var observedResult = AccessionUtilities.GetMaxVersion(\"NM_004522.2\", 3);\r\n            Assert.Equal(expectedId, observedResult.Id);\r\n            Assert.Equal(expectedVersion, observedResult.Version);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetAccessionNumber_ReturnNumber_RefSeq()\r\n        {\r\n            const int expectedResult = 4522;\r\n            var observedResult = AccessionUtilities.GetAccessionNumber(\"NM_004522\");\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetAccessionNumber_ReturnNumber_Ensembl()\r\n        {\r\n            const int expectedResult = 515242;\r\n            var observedResult = AccessionUtilities.GetAccessionNumber(\"ENST00000515242\");\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetAccessionNumber_ReturnMinusOne()\r\n        {\r\n            const int expectedResult = -1;\r\n            var observedResult = AccessionUtilities.GetAccessionNumber(null);\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetAccessionNumber_ThrowException_IfUnderlineMissingRefSeq()\r\n        {\r\n            Assert.Throws<InvalidDataException>(delegate\r\n            {\r\n                // ReSharper disable once UnusedVariable\r\n                var observedResult = AccessionUtilities.GetAccessionNumber(\"NM004522\");\r\n            });\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/CacheUtils/Utilities/RemoteFileTests.cs",
    "content": "﻿using System;\r\nusing CacheUtils.Utilities;\r\nusing VariantAnnotation.Utilities;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.CacheUtils.Utilities\r\n{\r\n    public sealed class RemoteFileTests\r\n    {\r\n        [Fact]\r\n        public void GetFilename_WithoutUrlPrefix()\r\n        {\r\n            string expectedResult = $\"ccds_1000_{Date.GetDate(DateTime.Now.Ticks)}.txt\";\r\n            var observedResult = RemoteFile.GetFilename(\"ccds_1000.txt\", true);\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetFilename_WithoutDate()\r\n        {\r\n            const string expectedResult = \"CCDS2Sequence.20160908.txt\";\r\n            var observedResult = RemoteFile.GetFilename(\"ftp://ftp.ncbi.nlm.nih.gov/pub/CCDS/current_human/CCDS2Sequence.20160908.txt\", false);\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetFilename_WithUrlPrefix()\r\n        {\r\n            string expectedResult = $\"CCDS2Sequence.20160908_{Date.GetDate(DateTime.Now.Ticks)}.txt\";\r\n            var observedResult = RemoteFile.GetFilename(\"ftp://ftp.ncbi.nlm.nih.gov/pub/CCDS/current_human/CCDS2Sequence.20160908.txt\", true);\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/Cloud/ConsistencyTests.cs",
    "content": "using Cloud;\nusing VariantAnnotation.SA;\nusing Xunit;\n\nnamespace UnitTests.Cloud;\n\npublic sealed class ConsistencyTests\n{\n    [Fact]\n    public void Consistency_with_SAUtils()\n    {\n        Assert.Equal(LambdaUrlHelper.SaSchemaVersion, SaCommon.SchemaVersion);\n    }\n}"
  },
  {
    "path": "UnitTests/Cloud/JsonUtilitiesTests.cs",
    "content": "﻿using System.IO;\r\nusing System.Linq;\r\nusing System.Text;\r\nusing Cloud.Utilities;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Cloud\r\n{\r\n    public sealed class JsonUtilitiesTests\r\n    {\r\n\r\n        [Fact]\r\n        public void Serialize_AsExpected()\r\n        {\r\n            var inputObject = new[]\r\n            {\r\n                new ObjectExample {Name = \"Ada\", Age = 8, Skills = new []{\"dancing\", \"skating\"}},\r\n                new ObjectExample {Name = \"Bob\", Age = 10, Skills = new []{\"programming\"}}\r\n            };\r\n            var memStream = JsonUtilities.Serialize(inputObject);\r\n\r\n            const string expectedString = \"[{\\\"Name\\\":\\\"Ada\\\",\\\"Age\\\":8,\\\"Skills\\\":[\\\"dancing\\\",\\\"skating\\\"]},{\\\"Name\\\":\\\"Bob\\\",\\\"Age\\\":10,\\\"Skills\\\":[\\\"programming\\\"]}]\";\r\n            var expectedStream = new MemoryStream(Encoding.ASCII.GetBytes(expectedString));\r\n\r\n            Assert.Equal(expectedStream.Length, memStream.Length);\r\n            Assert.True(expectedStream.ToArray().SequenceEqual(memStream.ToArray()));\r\n        }\r\n\r\n        [Fact]\r\n        public void Stringify_AsExpected()\r\n        {\r\n            var inputObject = new[]\r\n            {\r\n                new ObjectExample {Name = \"Ken\", Age = 16, Skills = new[] {\"boxing\"}},\r\n                new ObjectExample {Name = \"Armanda\", Age = 18, Skills = new[] {\"cooking\"}}\r\n            };\r\n\r\n            const string expectedString = \"[{\\\"Name\\\":\\\"Ken\\\",\\\"Age\\\":16,\\\"Skills\\\":[\\\"boxing\\\"]},{\\\"Name\\\":\\\"Armanda\\\",\\\"Age\\\":18,\\\"Skills\\\":[\\\"cooking\\\"]}]\";\r\n\r\n            Assert.Equal(expectedString, JsonUtilities.Stringify(inputObject));\r\n        }\r\n    }\r\n\r\n    public sealed class ObjectExample\r\n    {\r\n        public string Name;\r\n        public int Age;\r\n        public string[] Skills;\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/Cloud/LambdaUrlHelperTests.cs",
    "content": "﻿using System;\nusing Cloud;\nusing Cloud.Utilities;\nusing Genome;\nusing IO;\nusing ReferenceSequence;\nusing VariantAnnotation.SA;\nusing Xunit;\n\nnamespace UnitTests.Cloud\n{\n    public sealed class LambdaUrlHelperTests\n    {\n        [Fact]\n        public void GetDataUrlBase_AsExpected()\n        {\n            Environment.SetEnvironmentVariable(\"NirvanaDataUrlBase\", \"http://somewhere.on.the.earth/\");\n            Assert.Equal($\"http://somewhere.on.the.earth/ab0cf104f39708eabd07b8cb67e149ba-Cache/{CacheConstants.DataVersion}/\", LambdaUrlHelper.GetCacheFolder());\n            Assert.Equal($\"http://somewhere.on.the.earth/d95867deadfe690e40f42068d6b59df8-References/{ReferenceSequenceCommon.HeaderVersion}/Homo_sapiens.\", LambdaUrlHelper.GetRefPrefix());\n        }\n\n        [Fact]\n        public void GetS3RefLocation_AsExpected()\n        {\n            Environment.SetEnvironmentVariable(\"NirvanaDataUrlBase\", \"whatever\");\n            Assert.Equal(LambdaUrlHelper.GetRefPrefix() + \"GRCh37\" + LambdaUrlHelper.RefSuffix, LambdaUrlHelper.GetRefUrl(GenomeAssembly.GRCh37));\n        }\n        \n        [Fact]\n        public void GetS3_SaManifest_Location_AsExpected()\n        {\n            Environment.SetEnvironmentVariable(\"NirvanaDataUrlBase\", \"http://nirvana-annotations.s3.us-west-2.amazonaws.com/\");\n            var saManifestUrl = LambdaUtilities.GetManifestUrl(\"latest\", GenomeAssembly.GRCh38, SaCommon.SchemaVersion);\n            HttpUtilities.ValidateUrl(saManifestUrl);\n        }\n        \n        [Fact]\n        public void GetS3_SaManifest_Location_from_config()\n        {\n            var saManifestUrl = LambdaUtilities.GetManifestUrl(\"latest\", GenomeAssembly.GRCh38, SaCommon.SchemaVersion);\n            HttpUtilities.ValidateUrl(saManifestUrl);\n        }\n    }\n}\n"
  },
  {
    "path": "UnitTests/Cloud/RedactionUtilitiesTests.cs",
    "content": "﻿using Cloud;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Cloud\r\n{\r\n    public sealed class RedactionUtilitiesTests\r\n    {\r\n        [Fact]\r\n        public void Redact_PresignedUrl()\r\n        {\r\n            const string json     = \"{\\\"id\\\":\\\"e96a15ab-13f8-48cd-b3b8-ca37aca8480f\\\",\\\"genomeAssembly\\\":\\\"GRCh37\\\",\\\"vcfUrl\\\":\\\"https://s3.amazonaws.com/illumina-early-access/Test.vcf.gz?AWSAccessKeyId=AKISKSD87A3C4&Expires=109838429&Signature=s98df7s8df12f2jo4lfjfs9d0fu0sd9f\\\",\\\"tabixUrl\\\":\\\"https://s3.amazonaws.com/illumina-early-access/Test.vcf.gz.tbi?AWSAccessKeyId=AKISKSD87A3C4&Expires=109838429&Signature=s98df7s8df12f2jo4lfjfs9d0fu0sd9f\\\",\\\"outputDir\\\":{\\\"bucketName\\\":\\\"illumina-early-access\\\",\\\"region\\\":\\\"us-east-1\\\",\\\"path\\\":\\\"/5a2a3c8c-3744-422d-b343/\\\",\\\"accessKey\\\":\\\"AKIAIOSFODNN7EXAMPLE\\\",\\\"secretKey\\\":\\\"wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY\\\",\\\"sessionToken\\\":\\\"AQoEXAMPLEH4aoAH0gNCAPyJxz4BlCFFxWNE1OPTgk5TthT+FvwqnKwRcOIfrRh3c/LTo6UDdyJwOOvEVPvLXCrrrUtdnniCEXAMPLE/IvU1dYUg2RVAJBanLiHb4IgRmpRV3zrkuWJOgQs8IZZaIv2BXIa2R4OlgkBN9bkUDNCJiBeb/AXlzBBko7b15fjrBs2+cTQtpZ3CYWFXG8C5zqx37wnOE49mRl/+OtkIKGO7fAE\\\"},\\\"supplementaryAnnotations\\\":\\\"latest\\\",\\\"customAnnotations\\\":[{\\\"nsaUrl\\\":\\\"https://s3.amazonaws.com/illumina-early-access/ClinVar.nsa?AWSAccessKeyId=AKISKSD87A3C4&Expires=109838429&Signature=s98df7s8df12f2jo4lfjfs9d0fu0sd9f\\\",\\\"idxUrl\\\":\\\"https://s3.amazonaws.com/illumina-early-access/ClinVar.nsa.idx?AWSAccessKeyId=AKISKSD87A3C4&Expires=109838429&Signature=s98df7s8df12f2jo4lfjfs9d0fu0sd9f\\\"},{\\\"nsiUrl\\\":\\\"https://s3.amazonaws.com/illumina-early-access/ClinVar.nsi?AWSAccessKeyId=AKISKSD87A3C4&Expires=109838429&Signature=s98df7s8df12f2jo4lfjfs9d0fu0sd9f\\\"},{\\\"ngaUrl\\\":\\\"https://s3.amazonaws.com/illumina-early-access/ClinVar.nga?AWSAccessKeyId=AKISKSD87A3C4&Expires=109838429&Signature=s98df7s8df12f2jo4lfjfs9d0fu0sd9f\\\"}]}\";\r\n            const string expected = \"{\\\"id\\\":\\\"e96a15ab-13f8-48cd-b3b8-ca37aca8480f\\\",\\\"genomeAssembly\\\":\\\"GRCh37\\\",\\\"vcfUrl\\\":\\\"https://s3.amazonaws.com/illumina-early-access/Test.vcf.gz?AWSAccessKeyId=XXXXXXXXXXXXX&Expires=109838429&Signature=s98df7s8df12f2jo4lfjfs9d0fu0sd9f\\\",\\\"tabixUrl\\\":\\\"https://s3.amazonaws.com/illumina-early-access/Test.vcf.gz.tbi?AWSAccessKeyId=XXXXXXXXXXXXX&Expires=109838429&Signature=s98df7s8df12f2jo4lfjfs9d0fu0sd9f\\\",\\\"outputDir\\\":{\\\"bucketName\\\":\\\"illumina-early-access\\\",\\\"region\\\":\\\"us-east-1\\\",\\\"path\\\":\\\"/5a2a3c8c-3744-422d-b343/\\\",\\\"accessKey\\\":\\\"XXXXXXXXXXXXXXXXXXXX\\\",\\\"secretKey\\\":\\\"XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\\\",\\\"sessionToken\\\":\\\"XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\\\"},\\\"supplementaryAnnotations\\\":\\\"latest\\\",\\\"customAnnotations\\\":[{\\\"nsaUrl\\\":\\\"https://s3.amazonaws.com/illumina-early-access/ClinVar.nsa?AWSAccessKeyId=XXXXXXXXXXXXX&Expires=109838429&Signature=s98df7s8df12f2jo4lfjfs9d0fu0sd9f\\\",\\\"idxUrl\\\":\\\"https://s3.amazonaws.com/illumina-early-access/ClinVar.nsa.idx?AWSAccessKeyId=XXXXXXXXXXXXX&Expires=109838429&Signature=s98df7s8df12f2jo4lfjfs9d0fu0sd9f\\\"},{\\\"nsiUrl\\\":\\\"https://s3.amazonaws.com/illumina-early-access/ClinVar.nsi?AWSAccessKeyId=XXXXXXXXXXXXX&Expires=109838429&Signature=s98df7s8df12f2jo4lfjfs9d0fu0sd9f\\\"},{\\\"ngaUrl\\\":\\\"https://s3.amazonaws.com/illumina-early-access/ClinVar.nga?AWSAccessKeyId=XXXXXXXXXXXXX&Expires=109838429&Signature=s98df7s8df12f2jo4lfjfs9d0fu0sd9f\\\"}]}\";\r\n\r\n            string observed = json.Redact();\r\n            Assert.Equal(expected, observed);\r\n        }\r\n\r\n        [Fact]\r\n        public void Redact_AwsSignatureVersion4()\r\n        {\r\n            const string json     = \"{\\\"id\\\":\\\"Test\\\",\\\"genomeAssembly\\\":\\\"GRCh38\\\",\\\"vcfUrl\\\":\\\"https://illumina-dev.s3.us-west-2.amazonaws.com/Annotation/input/test.vcf.gz?X-Amz-Expires=604800&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAZYATIEHL37L46ZIO/20191007/us-west-2/s3/aws4_request&X-Amz-Date=20191007T222533Z&X-Amz-SignedHeaders=host&X-Amz-Signature=44433f0ec4875323d8e82084469f4e34b6384aead83f9c176595b96badaba3f8\\\",\\\"tabixUrl\\\":\\\"https://illumina-dev.s3.us-west-2.amazonaws.com/Annotation/input/test.vcf.gz.tbi?X-Amz-Expires=604800&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAZYATIEHL37L46ZIO/20191007/us-west-2/s3/aws4_request&X-Amz-Date=20191007T222533Z&X-Amz-SignedHeaders=host&X-Amz-Signature=19cd9c1244cf156952746e85bfc4977946a80a1110205a3dae9b578647dacd50\\\",\\\"outputDir\\\":{\\\"bucketName\\\":\\\"illumina-early-access\\\",\\\"region\\\":\\\"us-east-1\\\",\\\"path\\\":\\\"/5a2a3c8c-3744-422d-b343/\\\",\\\"accessKey\\\":\\\"AKIAIOSFODNN7EXAMPLE\\\",\\\"secretKey\\\":\\\"wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY\\\",\\\"sessionToken\\\":\\\"AQoEXAMPLEH4aoAH0gNCAPyJxz4BlCFFxWNE1OPTgk5TthT+FvwqnKwRcOIfrRh3c/LTo6UDdyJwOOvEVPvLXCrrrUtdnniCEXAMPLE/IvU1dYUg2RVAJBanLiHb4IgRmpRV3zrkuWJOgQs8IZZaIv2BXIa2R4OlgkBN9bkUDNCJiBeb/AXlzBBko7b15fjrBs2+cTQtpZ3CYWFXG8C5zqx37wnOE49mRl/+OtkIKGO7fAE\\\"},\\\"supplementaryAnnotations\\\":\\\"latest\\\",\\\"customAnnotations\\\":null}\";\r\n            const string expected = \"{\\\"id\\\":\\\"Test\\\",\\\"genomeAssembly\\\":\\\"GRCh38\\\",\\\"vcfUrl\\\":\\\"https://illumina-dev.s3.us-west-2.amazonaws.com/Annotation/input/test.vcf.gz?X-Amz-Expires=604800&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=XXXXXXXXXXXXXXXXXXXX/20191007/us-west-2/s3/aws4_request&X-Amz-Date=20191007T222533Z&X-Amz-SignedHeaders=host&X-Amz-Signature=44433f0ec4875323d8e82084469f4e34b6384aead83f9c176595b96badaba3f8\\\",\\\"tabixUrl\\\":\\\"https://illumina-dev.s3.us-west-2.amazonaws.com/Annotation/input/test.vcf.gz.tbi?X-Amz-Expires=604800&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=XXXXXXXXXXXXXXXXXXXX/20191007/us-west-2/s3/aws4_request&X-Amz-Date=20191007T222533Z&X-Amz-SignedHeaders=host&X-Amz-Signature=19cd9c1244cf156952746e85bfc4977946a80a1110205a3dae9b578647dacd50\\\",\\\"outputDir\\\":{\\\"bucketName\\\":\\\"illumina-early-access\\\",\\\"region\\\":\\\"us-east-1\\\",\\\"path\\\":\\\"/5a2a3c8c-3744-422d-b343/\\\",\\\"accessKey\\\":\\\"XXXXXXXXXXXXXXXXXXXX\\\",\\\"secretKey\\\":\\\"XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\\\",\\\"sessionToken\\\":\\\"XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\\\"},\\\"supplementaryAnnotations\\\":\\\"latest\\\",\\\"customAnnotations\\\":null}\";\r\n\r\n            string observed = json.Redact();\r\n            Assert.Equal(expected, observed);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/Cloud/S3PathTests.cs",
    "content": "﻿using Cloud.Messages;\r\nusing ErrorHandling.Exceptions;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Cloud\r\n{\r\n    public sealed class S3PathTests\r\n    {\r\n        [Theory]\r\n        [InlineData(\"/this/is/a/folder/\", false)]\r\n        [InlineData(\"/this/is/a/file\", true)]\r\n        public void ValidatePathFormat_AsExpected(string path, bool isDirectory)\r\n        {\r\n            Assert.Throws<UserErrorException>(() => S3Path.ValidatePathFormat(path, isDirectory));\r\n        }\r\n\r\n        [Fact]\r\n        public void FormatPath_AsExpected()\r\n        {\r\n            Assert.Equal(\"to/the/file\", S3Path.FormatPath(\"/to/the/file\"));\r\n            Assert.Equal(\"to/the/directory/\", S3Path.FormatPath(\"/to/the/directory/\"));\r\n        }\r\n    }\r\n}\r\n\r\n"
  },
  {
    "path": "UnitTests/Cloud/SaUrlsTests.cs",
    "content": "﻿using Cloud.Messages;\r\nusing ErrorHandling.Exceptions;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Cloud\r\n{\r\n    public sealed class SaUrlsTests\r\n    {\r\n        [Theory]\r\n        [InlineData(\"test.nsa\", \"test.idx\", \"test.nsi\", \"test.nga\")]\r\n        [InlineData(null, \"test.idx\", \"test.nsi\", \"test.nga\")]\r\n        [InlineData(\"test.nsa\", \"test.idx\", null, \"test.nga\")]\r\n        [InlineData(\"test.nsa\", \"test.idx\", \"test.nsi\", null)]\r\n        [InlineData(null, \"test.idx\", null, null)]\r\n        [InlineData(null, null, null, null)]\r\n        [InlineData(\"test.nsa\", null, null, null)]\r\n        public void SetSaType_InvalidValues_ThrowException(string nsaUrl, string idxUrl, string nsiUrl, string ngaUrl)\r\n        {\r\n            var saUrls = new SaUrls{nsaUrl = nsaUrl, idxUrl = idxUrl, nsiUrl = nsiUrl, ngaUrl = ngaUrl};\r\n            Assert.Throws<UserErrorException>(() => saUrls.GetSaType());\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(\"test.nsa\", \"test.idx\", null, null, CustomSaType.Nsa)]\r\n        [InlineData(null, null, \"test.nsi\", null, CustomSaType.Nsi)]\r\n        [InlineData(null, null, null, \"test.nga\", CustomSaType.Nga)]\r\n        public void SetSaType_AsExpected(string nsaUrl, string idxUrl, string nsiUrl, string ngaUrl, CustomSaType expectSaType)\r\n        {\r\n            var saUrls = new SaUrls { nsaUrl = nsaUrl, idxUrl = idxUrl, nsiUrl = nsiUrl, ngaUrl = ngaUrl };\r\n            Assert.Equal(expectSaType, saUrls.SaType);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/Cloud/UploadUtilitiesTests.cs",
    "content": "﻿using System;\r\nusing System.Net;\r\nusing System.Security.Cryptography;\r\nusing System.Threading.Tasks;\r\nusing Amazon.S3.Model;\r\nusing Cloud.Utilities;\r\nusing IO;\r\nusing Moq;\r\nusing UnitTests.TestUtilities;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Cloud\r\n{\r\n    public sealed class UploadUtilitiesTests\r\n    {\r\n        private readonly FileMetadata _metadata = new FileMetadata(new byte[] { 0, 1, 2, 3, 4, 5, 6 }, 1);\r\n        private readonly string _filePath = Resources.TopPath(\"clinvar.dict\");\r\n        private readonly AesCryptoServiceProvider _aes = new AesCryptoServiceProvider();\r\n\r\n        private static Mock<IS3Client> GetS3ClientMock()\r\n        {\r\n            var s3ClientMock = new Mock<IS3Client>();\r\n            s3ClientMock.Setup(x => x.PutObjectAsync(It.IsAny<PutObjectRequest>())).ReturnsAsync(new PutObjectResponse());\r\n            return s3ClientMock;\r\n        }\r\n\r\n        private static Mock<IS3Client> GetS3ClientMockAlwaysFail()\r\n        {\r\n            var s3ClientMock = new Mock<IS3Client>();\r\n            s3ClientMock.Setup(x => x.PutObjectAsync(It.IsAny<PutObjectRequest>())).ThrowsAsync(new WebException());\r\n            return s3ClientMock;\r\n        }\r\n\r\n        private static Mock<IS3Client> GetS3ClientMockCanWorkAfterRetries()\r\n        {\r\n            var s3ClientMock = new Mock<IS3Client>();\r\n            s3ClientMock.SetupSequence(x => x.PutObjectAsync(It.IsAny<PutObjectRequest>()))\r\n                .ThrowsAsync(new WebException())\r\n                .ThrowsAsync(new WebException())\r\n                .ReturnsAsync(new PutObjectResponse())\r\n                .ThrowsAsync(new WebException());\r\n\r\n            return s3ClientMock;\r\n        }\r\n\r\n        [Fact]\r\n        public void TryDecryptUpload_AsExpected()\r\n        {\r\n            var s3ClientMock = GetS3ClientMock();\r\n            Assert.True(s3ClientMock.Object.TryDecryptUpload(\"bucket\", \"bob.json.gz\", _filePath, _aes, _metadata));\r\n            s3ClientMock.Verify(x => x.PutObjectAsync(It.IsAny<PutObjectRequest>()), Times.Once);\r\n        }\r\n\r\n        [Fact]\r\n        public void TryDecryptUpload_FileNotFound()\r\n        {\r\n            var s3ClientMock = GetS3ClientMock();\r\n            Assert.False(s3ClientMock.Object.TryDecryptUpload(\"bucket\", \"bob.json.gz\", \"bob123\", _aes, _metadata));\r\n            s3ClientMock.Verify(x => x.PutObjectAsync(It.IsAny<PutObjectRequest>()), Times.Never);\r\n        }\r\n\r\n        [Fact]\r\n        public void DecryptUpload_OnlyPutOnceWhenSuccess()\r\n        {\r\n            var s3ClientMock = GetS3ClientMock();\r\n\r\n            s3ClientMock.Object.DecryptUpload(\"bucket\", \"bob.json.gz\", _filePath, _aes, _metadata, 1);\r\n            s3ClientMock.Verify(x => x.PutObjectAsync(It.IsAny<PutObjectRequest>()), Times.Once);\r\n        }\r\n\r\n        [Fact]\r\n        public void DecryptUpload_SuccessWithRetries()\r\n        {\r\n            var s3ClientMock = GetS3ClientMockCanWorkAfterRetries();\r\n\r\n            s3ClientMock.Object.DecryptUpload(\"bucket\", \"bob.json.gz\", _filePath, _aes, _metadata, 1);\r\n            s3ClientMock.Verify(x => x.PutObjectAsync(It.IsAny<PutObjectRequest>()), Times.Exactly(3));\r\n        }\r\n\r\n        [Fact]\r\n        public void DecryptUpload_TimeOutWhenFail()\r\n        {\r\n            var timeOut = TimeSpan.FromMilliseconds(500);\r\n            var s3ClientMockAlwaysFail = GetS3ClientMockAlwaysFail();\r\n            var failTask = Task.Run(() => s3ClientMockAlwaysFail.Object.DecryptUpload(\"bucket\", \"bob.json.gz\", _filePath, _aes, _metadata, 1));\r\n      \r\n            Assert.False(Task.WaitAll(new[] { failTask }, timeOut));\r\n            s3ClientMockAlwaysFail.Verify(x => x.PutObjectAsync(It.IsAny<PutObjectRequest>()), Times.AtLeast(2));\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/CommandLine/Builders/ConsoleAppBuilderDataTests.cs",
    "content": "using CommandLine.Builders;\r\nusing CommandLine.NDesk.Options;\r\nusing VariantAnnotation.Providers;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.CommandLine.Builders\r\n{\r\n    public sealed class ConsoleAppBuilderDataTests\r\n    {\r\n        [Fact]\r\n        public void VersionProvider_Set()\r\n        {\r\n            var ops = new OptionSet { { \"test=\", \"test\", v => { } } };\r\n\r\n            var data = new ConsoleAppBuilder(null, ops).UseVersionProvider(new VersionProvider())\r\n                .Parse()\r\n                .Data;\r\n\r\n            Assert.True(data.VersionProvider is VersionProvider);\r\n        }\r\n    }\r\n\r\n    public sealed class ConsoleAppValidatorTests\r\n    {\r\n        [Fact]\r\n        public void ShowBanner_EnabledOutput()\r\n        {\r\n            var ops = new OptionSet { { \"test=\", \"test\", v => { } } };\r\n\r\n            var banner = new ConsoleAppBuilder(null, ops).UseVersionProvider(new VersionProvider())\r\n                .Parse()\r\n                .ShowBanner(\"authors\");\r\n\r\n            Assert.True(banner is ConsoleAppBanner);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/CommandLine/Builders/ConsoleAppBuilderTests.cs",
    "content": "﻿using CommandLine.Builders;\r\nusing CommandLine.NDesk.Options;\r\nusing ErrorHandling;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.CommandLine.Builders\r\n{\r\n    public sealed class ConsoleAppBuilderTests\r\n    {\r\n        [Fact]\r\n        public void Parse_UnsupportedOption()\r\n        {\r\n            var ops = new OptionSet { { \"test=\", \"test\", v => { } } };\r\n\r\n            var data = new ConsoleAppBuilder(new[] { \"--if\", \"-\" }, ops)\r\n                .Parse()\r\n                .Data;\r\n\r\n            Assert.Single(data.Errors);\r\n            Assert.Equal(2, data.UnsupportedOps.Count);\r\n        }\r\n\r\n        [Fact]\r\n        public void Parse_Version()\r\n        {\r\n            var ops = new OptionSet { { \"test=\", \"test\", v => { } } };\r\n\r\n            var validator = new ConsoleAppBuilder(new[] {\"--version\"}, ops)\r\n                .Parse();\r\n\r\n            Assert.True(validator.Data.ShowVersion);\r\n\r\n            var exitCode = validator\r\n                .CheckInputFilenameExists(\"dummy\", \"vcf\", \"--in\")\r\n                .ShowBanner(\"authors\")\r\n                .ShowHelpMenu(\"description\", \"example\")\r\n                .ShowErrors()\r\n                .Execute(() => ExitCodes.Success);\r\n\r\n            Assert.Equal(ExitCodes.Success, exitCode);\r\n        }\r\n\r\n        [Fact]\r\n        public void Parse_HelpMenu()\r\n        {\r\n            var ops = new OptionSet { { \"test=\", \"test\", v => { } } };\r\n\r\n            var validator = new ConsoleAppBuilder(new[] { \"--help\" }, ops)\r\n                .Parse();\r\n\r\n            Assert.True(validator.Data.ShowHelpMenu);\r\n\r\n            var exitCode = validator\r\n                .CheckInputFilenameExists(\"dummy\", \"vcf\", \"--in\")\r\n                .ShowBanner(\"authors\")\r\n                .ShowHelpMenu(\"description\", \"example\")\r\n                .ShowErrors()\r\n                .Execute(() => ExitCodes.Success);\r\n\r\n            Assert.Equal(ExitCodes.Success, exitCode);\r\n        }\r\n\r\n        [Fact]\r\n        public void Parse_ShowOutput()\r\n        {\r\n            var ops = new OptionSet { { \"test=\", \"test\", v => { } } };\r\n\r\n            var exitCode = new ConsoleAppBuilder(new[] { \"--test\", \"test\" }, ops)\r\n                .Parse()\r\n                .ShowBanner(\"authors\")\r\n                .ShowHelpMenu(\"description\", \"example\")\r\n                .ShowErrors()\r\n                .Execute(() => ExitCodes.Success);\r\n\r\n            Assert.Equal(ExitCodes.Success, exitCode);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/CommandLine/Builders/TopLevelAppBuilderTests.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing CommandLine.Builders;\r\nusing ErrorHandling;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.CommandLine.Builders\r\n{\r\n    public sealed class TopLevelAppBuilderTests\r\n    {\r\n        private readonly Dictionary<string, TopLevelOption> _ops;\r\n\r\n        public TopLevelAppBuilderTests()\r\n        {\r\n            _ops = new Dictionary<string, TopLevelOption>\r\n            {\r\n                [\"combine\"] = new TopLevelOption(\"combine cache directories\", EmptyMethod)\r\n            };\r\n        }\r\n\r\n        private static ExitCodes EmptyMethod(string command, string[] args) => ExitCodes.Success;\r\n\r\n        [Fact]\r\n        public void Parse_UnsupportedOption()\r\n        {\r\n            var validator = new TopLevelAppBuilder(new[] {\"--if\", \"-\"}, _ops).Parse();\r\n            Assert.True(validator.Data.Errors.Count > 0);\r\n\r\n            var exitCode = validator\r\n                .ShowBanner(\"banner\")\r\n                .ShowHelpMenu(\"help\")\r\n                .ShowErrors()\r\n                .Execute();\r\n\r\n            Assert.Equal(ExitCodes.UnknownCommandLineOption, exitCode);\r\n        }\r\n\r\n        [Fact]\r\n        public void Parse_ShowHelpMenu()\r\n        {\r\n            var validator = new TopLevelAppBuilder(null, _ops).Parse();\r\n            Assert.True(validator.Data.ShowHelpMenu);\r\n\r\n            var exitCode = validator\r\n                .ShowBanner(\"banner\")\r\n                .ShowHelpMenu(\"help\")\r\n                .ShowErrors()\r\n                .Execute();\r\n\r\n            Assert.Equal(ExitCodes.MissingCommandLineOption, exitCode);\r\n        }\r\n\r\n        [Fact]\r\n        public void Parse_Nominal()\r\n        {\r\n            var exitCode = new TopLevelAppBuilder(new[] { \"combine\", \"dummy\" }, _ops)\r\n                .Parse()\r\n                .ShowBanner(\"banner\")\r\n                .ShowHelpMenu(\"help\")\r\n                .ShowErrors()\r\n                .Execute();\r\n\r\n            Assert.Equal(ExitCodes.Success, exitCode);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/CommandLine/Builders/ValidationExtensionsTests.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing CommandLine.Builders;\r\nusing CommandLine.NDesk.Options;\r\nusing ErrorHandling;\r\nusing UnitTests.TestUtilities;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.CommandLine.Builders\r\n{\r\n    public sealed class ValidationExtensionsTests\r\n    {\r\n        private static ExitCodes Execute(IConsoleAppValidator validator)\r\n        {\r\n            return validator\r\n                .DisableOutput()\r\n                .ShowBanner(\"authors\")\r\n                .ShowHelpMenu(\"description\", \"example\")\r\n                .ShowErrors()\r\n                .Execute(() => ExitCodes.Success);\r\n        }\r\n\r\n        [Fact]\r\n        public void CheckInputFilenameExists_FileExists_SuccessExitCode()\r\n        {\r\n            string randomPath = RandomPath.GetRandomPath();\r\n            File.Create(randomPath);\r\n\r\n            var ops = new OptionSet { { \"if=\", \"if\", v => { } } };\r\n\r\n            var exitCode = Execute(new ConsoleAppBuilder(new[] { \"--if\", randomPath }, ops)\r\n                .Parse()\r\n                .CheckInputFilenameExists(randomPath, \"test\", \"--if\"));\r\n\r\n            Assert.Equal(ExitCodes.Success, exitCode);\r\n        }\r\n\r\n        [Fact]\r\n        public void CheckInputFilenameExists_MissingFiles_FileNotFoundExitCode()\r\n        {\r\n            string randomPath = RandomPath.GetRandomPath() + \".anavrin\";\r\n\r\n            var ops = new OptionSet { { \"id=\", \"id\", v => { } } };\r\n\r\n            var exitCode = Execute(new ConsoleAppBuilder(new[] { \"--if\", randomPath }, ops)\r\n                .Parse()\r\n                .CheckInputFilenameExists(randomPath, \"test\", \"--if\"));\r\n\r\n            Assert.Equal(ExitCodes.FileNotFound, exitCode);\r\n        }\r\n\r\n        [Fact]\r\n        public void CheckInputFilenameExists_MissingArguments_MissingCommandLineExitCode()\r\n        {\r\n            var ops = new OptionSet { { \"if=\", \"if\", v => { } } };\r\n\r\n            var exitCode = Execute(new ConsoleAppBuilder(null, ops)\r\n                .Parse()\r\n                .CheckInputFilenameExists(null, \"test\", \"--if\"));\r\n\r\n            Assert.Equal(ExitCodes.MissingCommandLineOption, exitCode);\r\n        }\r\n\r\n        [Fact]\r\n        public void CheckInputFilenameExists_EmptyPath_MissingCommandLineExitCode()\r\n        {\r\n            var ops = new OptionSet { { \"if=\", \"if\", v => { } } };\r\n\r\n            var exitCode = Execute(new ConsoleAppBuilder(new[] { \"--if\" }, ops)\r\n                .Parse()\r\n                .CheckInputFilenameExists(null, \"test\", \"--if\"));\r\n\r\n            Assert.Equal(ExitCodes.MissingCommandLineOption, exitCode);\r\n        }\r\n\r\n        [Fact]\r\n        public void CheckInputFilenameExists_IgnoredPath_SuccessExitCode()\r\n        {\r\n            var ops = new OptionSet { { \"if=\", \"if\", v => { } } };\r\n\r\n            var exitCode = Execute(new ConsoleAppBuilder(new[] { \"--if\", \"-\" }, ops)\r\n                .Parse()\r\n                .CheckInputFilenameExists(\"-\", \"test\", \"--if\",true, \"-\"));\r\n\r\n            Assert.Equal(ExitCodes.Success, exitCode);\r\n        }\r\n\r\n        [Fact]\r\n        public void CheckDirectoryExists_MissingDirectory_PathNotFoundExitCode()\r\n        {\r\n            var ops = new OptionSet { { \"if=\", \"if\", v => { } } };\r\n\r\n            var exitCode = Execute(new ConsoleAppBuilder(new[] { \"--if\", \"-\" }, ops)\r\n                .Parse()\r\n                .CheckDirectoryExists(\"-\", \"test\", \"--if\"));\r\n\r\n            Assert.Equal(ExitCodes.PathNotFound, exitCode);\r\n        }\r\n\r\n        [Fact]\r\n        public void CheckDirectoryExists_EmptyPath_MissingCommandLineOptionExitCode()\r\n        {\r\n            var ops = new OptionSet { { \"if=\", \"if\", v => { } } };\r\n\r\n            var exitCode = Execute(new ConsoleAppBuilder(new[] { \"--if\", \"-\" }, ops)\r\n                .Parse()\r\n                .CheckDirectoryExists(null, \"test\", \"--if\"));\r\n\r\n            Assert.Equal(ExitCodes.MissingCommandLineOption, exitCode);\r\n        }\r\n\r\n        [Fact]\r\n        public void CheckEachFilenameExists_MissingFile_MissingCommandLineOptionExitCode()\r\n        {\r\n            var ops = new OptionSet { { \"if=\", \"if\", v => { } } };\r\n            var filenames = new List<string> { \"bob\", null };\r\n\r\n            var exitCode = Execute(new ConsoleAppBuilder(new[] { \"--if\", \"-\" }, ops)\r\n                .Parse()\r\n                .CheckEachFilenameExists(filenames, \"test\", \"--if\"));\r\n\r\n            Assert.Equal(ExitCodes.MissingCommandLineOption, exitCode);\r\n        }\r\n\r\n        [Fact]\r\n        public void HasRequiredParameter_Exists_SuccessExitCode()\r\n        {\r\n            string observedString       = default;\r\n            const string expectedString = \"foo\";\r\n\r\n            var ops = new OptionSet { { \"test=\", \"test\", v => observedString = v } };\r\n\r\n            var exitCode = Execute(new ConsoleAppBuilder(new[] { \"--test\", expectedString }, ops)\r\n                .Parse()\r\n                .HasRequiredParameter(observedString, \"test\", \"--test\"));\r\n\r\n            Assert.Equal(expectedString, observedString);\r\n            Assert.Equal(ExitCodes.Success, exitCode);\r\n        }\r\n\r\n        [Fact]\r\n        public void HasRequiredDate_Exists_SuccessExitCode()\r\n        {\r\n            string observedDate = default;\r\n            const string expectedDate = \"2018-03-14\";\r\n\r\n            var ops = new OptionSet { { \"date=\", \"date\", v => observedDate = v } };\r\n\r\n            var exitCode = Execute(new ConsoleAppBuilder(new[] { \"--date\", expectedDate }, ops)\r\n                .Parse()\r\n                .HasRequiredDate(observedDate, \"date\", \"--date\"));\r\n\r\n            Assert.Equal(expectedDate, observedDate);\r\n            Assert.Equal(ExitCodes.Success, exitCode);\r\n        }\r\n\r\n        [Fact]\r\n        public void HasRequiredDate_Exists_BadFormat()\r\n        {\r\n            string observedDate = default;\r\n            var ops = new OptionSet { { \"date=\", \"date\", v => observedDate = v } };\r\n\r\n            var validator = new ConsoleAppBuilder(new[] { \"--date\", \"garbage\" }, ops)\r\n                .Parse()\r\n                .HasRequiredDate(observedDate, \"date\", \"--date\");\r\n\r\n            Assert.True(validator.Data.Errors.Count > 0);\r\n        }\r\n\r\n        [Fact]\r\n        public void HasRequiredDate_DoesNotExist_MissingCommandLineExitCode()\r\n        {\r\n            string observedDate = default;\r\n\r\n            var ops = new OptionSet { { \"date=\", \"date\", v => observedDate = v } };\r\n\r\n            var exitCode = Execute(new ConsoleAppBuilder(new[] { \"--bar\", \"bar\" }, ops)\r\n                .Parse()\r\n                .HasRequiredDate(observedDate, \"date\", \"--date\"));\r\n\r\n            Assert.Equal(ExitCodes.MissingCommandLineOption, exitCode);\r\n        }\r\n\r\n        [Fact]\r\n        public void CheckOutputFilenameSuffix_True()\r\n        {\r\n            var ops = new OptionSet { { \"date=\", \"date\", v => { } } };\r\n\r\n            var validator = new ConsoleAppBuilder(new[] {\"--date\", \"2018-03-14\" }, ops)\r\n                .Parse()\r\n                .CheckOutputFilenameSuffix(\"test.json\", \".json\", \"temp\");\r\n\r\n            Assert.Equal(ExitCodes.Success, validator.Data.ExitCode);\r\n            Assert.Empty(validator.Data.Errors);\r\n        }\r\n\r\n        [Fact]\r\n        public void CheckOutputFilenameSuffix_False()\r\n        {\r\n            var ops = new OptionSet { { \"date=\", \"date\", v => { } } };\r\n\r\n            var validator = new ConsoleAppBuilder(new[] { \"--date\", \"2018-03-14\" }, ops)\r\n                .Parse()\r\n                .CheckOutputFilenameSuffix(\"test.json\", \".gz\", \"temp\");\r\n\r\n            Assert.NotEqual(ExitCodes.Success, validator.Data.ExitCode);\r\n            Assert.True(validator.Data.Errors.Count > 0);\r\n        }\r\n\r\n        [Fact]\r\n        public void HasRequiredParameter_DoesNotExist_MissingCommandLineExitCode()\r\n        {\r\n            string testString           = default;\r\n            const string expectedString = default;\r\n\r\n            var ops = new OptionSet\r\n            {\r\n                {\"test=\", \"test\", v => testString = v},\r\n                {\"bar=\", \"bar\", v => { } }\r\n            };\r\n\r\n            var exitCode = Execute(new ConsoleAppBuilder(new[] { \"--bar\", \"bar\" }, ops)\r\n                .Parse()\r\n                .HasRequiredParameter(testString, \"test\", \"--test\"));\r\n\r\n            Assert.Equal(expectedString, testString);\r\n            Assert.Equal(ExitCodes.MissingCommandLineOption, exitCode);\r\n        }\r\n\r\n        [Fact]\r\n        public void HasRequiredParameter_MissingArguments_MissingCommandLineExitCode()\r\n        {\r\n            string observedString = default;\r\n            var ops = new OptionSet { { \"test=\", \"test\", v => observedString = v } };\r\n\r\n            var exitCode = Execute(new ConsoleAppBuilder(null, ops)\r\n                .Parse()\r\n                .HasRequiredParameter(observedString, \"test\", \"--test\"));\r\n\r\n            Assert.Equal(ExitCodes.MissingCommandLineOption, exitCode);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/CommandLine/NDesk.Options/OptionContextTests.cs",
    "content": "﻿using System;\r\nusing CommandLine.NDesk.Options;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.CommandLine.NDesk.Options\r\n{\r\n    public sealed class OptionContextTests\r\n    {\r\n        private readonly OptionSet _optionSet;\r\n\r\n        public OptionContextTests()\r\n        {\r\n            _optionSet = new OptionSet\r\n            {\r\n                { \"a=\", \"test\", v => { /* ignore */ } }\r\n            };\r\n        }\r\n\r\n        [Fact]\r\n        public void Should_ThrowException_When_ContextIsEmpty()\r\n        {\r\n            var optionContext = new OptionContext();\r\n\r\n            Assert.Throws<InvalidOperationException>(delegate\r\n            {\r\n                // ReSharper disable once UnusedVariable\r\n                string ignore = optionContext.OptionValues[0];\r\n            });\r\n        }\r\n\r\n        [Fact]\r\n        public void Should_ThrowException_When_IndexGreaterThanLength()\r\n        {\r\n            var optionContext = new OptionContext { Option = _optionSet[0] };\r\n\r\n            Assert.Throws<ArgumentOutOfRangeException>(delegate\r\n            {\r\n                // ReSharper disable once UnusedVariable\r\n                string ignore = optionContext.OptionValues[2];\r\n            });\r\n        }\r\n\r\n        [Fact]\r\n        public void Should_ThrowException_When_RequiredValueMissing()\r\n        {\r\n            var optionContext = new OptionContext { Option = _optionSet[0], OptionName = \"-a\" };\r\n\r\n            Assert.Throws<OptionException>(delegate\r\n            {\r\n                // ReSharper disable once UnusedVariable\r\n                string ignore = optionContext.OptionValues[0];\r\n            });\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/CommandLine/NDesk.Options/OptionSetTests.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Text;\r\nusing CommandLine.NDesk.Options;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.CommandLine.NDesk.Options\r\n{\r\n    public sealed class OptionSetTests\r\n    {\r\n        private readonly OptionSet _optionSet;\r\n        private string _a;\r\n\r\n        public OptionSetTests()\r\n        {\r\n            _optionSet = new OptionSet\r\n            {\r\n                {\"a=\", \"\", v => _a = v},\r\n                {\"b\", \"\", v => { }},\r\n                {\"c\", \"\", v => { }},\r\n                {\"n=\", \"\", (int v) => { }}\r\n            };\r\n        }\r\n\r\n        [Fact]\r\n        public void BundledValues()\r\n        {\r\n            var defines = new List<string>();\r\n            var libs = new List<string>();\r\n            bool debug = false;\r\n\r\n            var optionSet = new OptionSet\r\n            {\r\n                { \"D|define=\",  \"\", v => defines.Add (v) },\r\n                { \"L|library:\", \"\", v => libs.Add (v) },\r\n                { \"Debug\",      \"\", v => debug = v != null },\r\n                { \"E\",          \"\", v => { /* ignore */ } }\r\n            };\r\n\r\n            optionSet.Parse(new[] { \"-DNAME\", \"-D\", \"NAME2\", \"-Debug\", \"-L/foo\", \"-L\", \"/bar\", \"-EDNAME3\" });\r\n\r\n            Assert.Equal(3, defines.Count);\r\n            Assert.Equal(\"NAME\", defines[0]);\r\n            Assert.Equal(\"NAME2\", defines[1]);\r\n            Assert.Equal(\"NAME3\", defines[2]);\r\n            Assert.True(debug);\r\n\r\n            Assert.Equal(2, libs.Count);\r\n            Assert.Equal(\"/foo\", libs[0]);\r\n            Assert.Null(libs[1]);\r\n\r\n            Assert.Throws<OptionException>(delegate\r\n            {\r\n                optionSet.Parse(new[] { \"-EVALUENOTSUP\" });\r\n            });\r\n        }\r\n\r\n        [Fact]\r\n        public void RequiredValues()\r\n        {\r\n            string a = null;\r\n            int n = 0;\r\n\r\n            var optionSet = new OptionSet\r\n            {\r\n                { \"a=\", \"\", v => a = v },\r\n                { \"n=\", \"\",(int v) => n = v }\r\n            };\r\n\r\n            var extra = optionSet.Parse(new[] { \"a\", \"-a\", \"s\", \"-n=42\", \"n\" });\r\n            Assert.Equal(2, extra.Count);\r\n            Assert.Equal(\"a\", extra[0]);\r\n            Assert.Equal(\"n\", extra[1]);\r\n            Assert.Equal(\"s\", a);\r\n            Assert.Equal(42, n);\r\n\r\n            extra = optionSet.Parse(new[] { \"-a=\" });\r\n            Assert.Empty(extra);\r\n            Assert.Equal(\"\", a);\r\n        }\r\n\r\n        [Fact]\r\n        public void OptionalValues()\r\n        {\r\n            string a = null;\r\n            int n = -1;\r\n            Foo foo = null;\r\n\r\n            var optionSet = new OptionSet\r\n            {\r\n                {\"a:\", \"\", v => a = v},\r\n                {\"n:\", \"\", (int v) => n = v},\r\n                {\"f:\", \"\", (Foo v) => foo = v}\r\n            };\r\n\r\n            optionSet.Parse(new[] { \"-a=s\" });\r\n            Assert.Equal(\"s\", a);\r\n            optionSet.Parse(new[] { \"-a\" });\r\n            Assert.Null(a);\r\n            optionSet.Parse(new[] { \"-a=\" });\r\n            Assert.Equal(\"\", a);\r\n\r\n            optionSet.Parse(new[] { \"-f\", \"A\" });\r\n            Assert.Null(foo);\r\n            optionSet.Parse(new[] { \"-f\" });\r\n            Assert.Null(foo);\r\n\r\n            optionSet.Parse(new[] { \"-n42\" });\r\n            Assert.Equal(42, n);\r\n            optionSet.Parse(new[] { \"-n=42\" });\r\n            Assert.Equal(42, n);\r\n\r\n            Assert.Throws<OptionException>(delegate\r\n            {\r\n                optionSet.Parse(new[] { \"-n=\" });\r\n            });\r\n        }\r\n\r\n        [Fact]\r\n        public void BooleanValues()\r\n        {\r\n            bool a = false;\r\n            var optionSet = new OptionSet\r\n            {\r\n                { \"a\", \"\", v => a = v != null }\r\n            };\r\n\r\n            optionSet.Parse(new[] { \"-a\" });\r\n            Assert.True(a);\r\n\r\n            optionSet.Parse(new[] { \"-a+\" });\r\n            Assert.True(a);\r\n\r\n            optionSet.Parse(new[] { \"-a-\" });\r\n            Assert.False(a);\r\n        }\r\n\r\n        [Fact]\r\n        public void CombinationPlatter()\r\n        {\r\n            int a = -1, b = -1;\r\n            string av = null, bv = null;\r\n            int help = 0;\r\n            int verbose = 0;\r\n\r\n            var optionSet = new OptionSet\r\n            {\r\n                { \"a=\", \"\", v => { a = 1; av = v; } },\r\n                { \"b\", \"desc\", v => {b = 2; bv = v;} },\r\n                { \"v\", \"\", v => { ++verbose; } },\r\n                { \"h|?|help\", \"\", v =>\r\n                {\r\n                    switch (v)\r\n                    {\r\n                        case \"h\":\r\n                            help |= 0x1;\r\n                            break;\r\n                        case \"?\":\r\n                            help |= 0x2;\r\n                            break;\r\n                        case \"help\":\r\n                            help |= 0x4;\r\n                            break;\r\n                    }\r\n                } }\r\n            };\r\n\r\n            var e = optionSet.Parse(new[] { \"foo\", \"-v\", \"-a=42\", \"/b-\", \"-a\", \"64\", \"bar\", \"/h\", \"-?\", \"--help\", \"-v\" });\r\n\r\n            Assert.Equal(2, e.Count);\r\n            Assert.Equal(\"foo\", e[0]);\r\n            Assert.Equal(\"bar\", e[1]);\r\n            Assert.Equal(1, a);\r\n            Assert.Equal(\"64\", av);\r\n            Assert.Equal(2, b);\r\n            Assert.Null(bv);\r\n            Assert.Equal(2, verbose);\r\n            Assert.Equal(0x7, help);\r\n        }\r\n\r\n        [Fact]\r\n        public void Should_ThrowException_When_MissingRequiredValue()\r\n        {\r\n            Assert.Throws<OptionException>(delegate\r\n            {\r\n                _optionSet.Parse(new[] { \"-a\" });\r\n            });\r\n        }\r\n\r\n        [Fact]\r\n        public void ShouldNot_ThrowException_When_ProvidingMoreOptionsThanExpected()\r\n        {\r\n            var ex = Record.Exception(() =>\r\n            {\r\n                _optionSet.Parse(new[] { \"-a\", \"-a\" });\r\n            });\r\n\r\n            Assert.Null(ex);\r\n            Assert.Equal(\"-a\", _a);\r\n        }\r\n\r\n        [Fact]\r\n        public void ShouldNot_ThrowException_When_ProvidingUnregisteredNamedOption()\r\n        {\r\n            var ex = Record.Exception(() =>\r\n            {\r\n                _optionSet.Parse(new[] { \"-a\", \"-b\" });\r\n            });\r\n\r\n            Assert.Null(ex);\r\n            Assert.Equal(\"-b\", _a);\r\n        }\r\n\r\n        [Fact]\r\n        public void Should_ThrowException_When_ArgumentNull()\r\n        {\r\n            Assert.Throws<ArgumentNullException>(delegate\r\n            {\r\n                _optionSet.Add(null);\r\n            });\r\n        }\r\n\r\n        [Fact]\r\n        public void Should_ThrowException_With_InvalidType()\r\n        {\r\n            Assert.Throws<OptionException>(delegate\r\n            {\r\n                _optionSet.Parse(new[] { \"-n\", \"value\" });\r\n            });\r\n        }\r\n\r\n        [Fact]\r\n        public void Should_ThrowException_When_BundlingWithOptionRequiringValue()\r\n        {\r\n            Assert.Throws<OptionException>(delegate\r\n            {\r\n                _optionSet.Parse(new[] { \"-cz\", \"extra\" });\r\n            });\r\n        }\r\n\r\n        [Fact]\r\n        public void WriteOptionDescriptions()\r\n        {\r\n            var optionSet = new OptionSet\r\n            {\r\n                { \"p|indicator-style=\", \"append / indicator to directories\",    v => {} },\r\n                { \"color:\",             \"controls color info\",                  v => {} },\r\n                { \"color2:\",            \"set {color}\",                          v => {} },\r\n                { \"long-desc\",\r\n                    \"This has a really\\nlong, multi-line description that also\\ntests\\n\" +\r\n                    \"the-builtin-supercalifragilisticexpialidicious-break-on-hyphen.  \" +\r\n                    \"Also, a list:\\n\" +\r\n                    \"  item 1\\n\" +\r\n                    \"  item 2\",\r\n                    v => {} },\r\n                { \"long-desc2\",\r\n                    \"IWantThisDescriptionToBreakInsideAWordGeneratingAutoWordHyphenation.\",\r\n                    v => {} },\r\n                { \"long-desc3\",\r\n                    \"OnlyOnePeriod.AndNoWhitespaceShouldBeSupportedEvenWithLongDescriptions\",\r\n                    v => {} },\r\n                { \"long-desc4\",\r\n                    \"Lots of spaces in the middle 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 and more until the end.\",\r\n                    v => {} },\r\n                { \"long-desc5\",\r\n                    \"Lots of spaces in the middle - . - . - . - . - . - . - . - and more until the end.\",\r\n                    v => {} },\r\n                { \"h|?|help\",           \"show help text\",                       v => {} },\r\n                { \"version\",            \"output version information and exit\",  v => {} },\r\n                { \"<>\", \"\", v => {} }\r\n            };\r\n\r\n            var expected = new StringBuilder();\r\n            expected.AppendLine(\"  -p, --indicator-style <VALUE>\");\r\n            expected.AppendLine(\"                             append / indicator to directories\");\r\n            expected.AppendLine(\"      --color [<VALUE>]      controls color info\");\r\n            expected.AppendLine(\"      --color2 [<color>]     set color\");\r\n            expected.AppendLine(\"      --long-desc            This has a really\");\r\n            expected.AppendLine(\"                               long, multi-line description that also\");\r\n            expected.AppendLine(\"                               tests\");\r\n            expected.AppendLine(\"                               the-builtin-supercalifragilisticexpialidicious-\");\r\n            expected.AppendLine(\"                               break-on-hyphen.  Also, a list:\");\r\n            expected.AppendLine(\"                                 item 1\");\r\n            expected.AppendLine(\"                                 item 2\");\r\n            expected.AppendLine(\"      --long-desc2           IWantThisDescriptionToBreakInsideAWordGeneratingAu-\");\r\n            expected.AppendLine(\"                               toWordHyphenation.\");\r\n            expected.AppendLine(\"      --long-desc3           OnlyOnePeriod.\");\r\n            expected.AppendLine(\"                               AndNoWhitespaceShouldBeSupportedEvenWithLongDesc-\");\r\n            expected.AppendLine(\"                               riptions\");\r\n            expected.AppendLine(\"      --long-desc4           Lots of spaces in the middle 1 2 3 4 5 6 7 8 9 0\");\r\n            expected.AppendLine(\"                               1 2 3 4 5 and more until the end.\");\r\n            expected.AppendLine(\"      --long-desc5           Lots of spaces in the middle - . - . - . - . - . -\");\r\n            expected.AppendLine(\"                                . - . - and more until the end.\");\r\n            expected.AppendLine(\"  -h, -?, --help             show help text\");\r\n            expected.AppendLine(\"      --version              output version information and exit\");\r\n\r\n            var actual = new StringWriter();\r\n            optionSet.WriteOptionDescriptions(actual);\r\n\r\n            Assert.Equal(expected.ToString(), actual.ToString());\r\n        }\r\n\r\n        [Fact]\r\n        public void OptionBundling()\r\n        {\r\n            string a, b, c, f;\r\n\r\n            a = b = c = f = null;\r\n            var optionSet = new OptionSet\r\n            {\r\n                { \"a\", \"\", v => a = \"a\" },\r\n                { \"b\", \"\", v => b = \"b\" },\r\n                { \"c\", \"\", v => c = \"c\" },\r\n                { \"f=\", \"\", v => f = v }\r\n            };\r\n\r\n            var extra = optionSet.Parse(new[] { \"-abcf\", \"foo\", \"bar\" });\r\n            Assert.Single(extra);\r\n            Assert.Equal(\"bar\", extra[0]);\r\n            Assert.Equal(\"a\", a);\r\n            Assert.Equal(\"b\", b);\r\n            Assert.Equal(\"c\", c);\r\n            Assert.Equal(\"foo\", f);\r\n        }\r\n\r\n        [Fact]\r\n        public void HaltProcessing()\r\n        {\r\n            var optionSet = new OptionSet\r\n            {\r\n                { \"a\", \"\", v => {} },\r\n                { \"b\", \"\", v => {} }\r\n            };\r\n\r\n            var e = optionSet.Parse(new[] { \"-a\", \"-b\", \"--\", \"-a\", \"-b\" });\r\n            Assert.Equal(2, e.Count);\r\n            Assert.Equal(\"-a\", e[0]);\r\n            Assert.Equal(\"-b\", e[1]);\r\n        }\r\n\r\n        private sealed class ContextCheckerOption : Option\r\n        {\r\n            private readonly string _eName;\r\n            private readonly string _eValue;\r\n            private readonly int _index;\r\n\r\n            public ContextCheckerOption(string p, string d, string eName, string eValue, int index)\r\n                : base(p, d, 1)\r\n            {\r\n                _eName = eName;\r\n                _eValue = eValue;\r\n                _index = index;\r\n            }\r\n\r\n            protected override void OnParseComplete(OptionContext c)\r\n            {\r\n                Assert.Equal(1, c.OptionValues.Count);\r\n                Assert.Equal(c.OptionValues[0], _eValue);\r\n                Assert.Equal(c.OptionName, _eName);\r\n                Assert.Equal(c.OptionIndex, _index);\r\n                Assert.Equal(c.Option, this);\r\n                Assert.Equal(c.Option.Description, Description);\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void OptionContext()\r\n        {\r\n            var optionSet = new OptionSet\r\n            {\r\n                new ContextCheckerOption (\"a=\", \"a desc\", \"/a\",   \"a-val\", 1),\r\n                new ContextCheckerOption (\"b\",  \"b desc\", \"--b+\", \"--b+\",  2),\r\n                new ContextCheckerOption (\"c=\", \"c desc\", \"--c\",  \"C\",     3),\r\n                new ContextCheckerOption (\"d\",  \"d desc\", \"/d-\",  null,    4)\r\n            };\r\n            Assert.Equal(4, optionSet.Count);\r\n            optionSet.Parse(new[] { \"/a\", \"a-val\", \"--b+\", \"--c=C\", \"/d-\" });\r\n        }\r\n\r\n        [Fact]\r\n        public void DefaultHandler()\r\n        {\r\n            var extra = new List<string>();\r\n            var optionSet = new OptionSet\r\n            {\r\n                { \"<>\", \"\", v => extra.Add (v) }\r\n            };\r\n            var e = optionSet.Parse(new[] { \"-a\", \"b\", \"--c=D\", \"E\" });\r\n            Assert.Empty(e);\r\n            Assert.Equal(4, extra.Count);\r\n            Assert.Equal(\"-a\", extra[0]);\r\n            Assert.Equal(\"b\", extra[1]);\r\n            Assert.Equal(\"--c=D\", extra[2]);\r\n            Assert.Equal(\"E\", extra[3]);\r\n        }\r\n\r\n        [Fact]\r\n        public void MixedDefaultHandler()\r\n        {\r\n            var tests = new List<string>();\r\n            var optionSet = new OptionSet\r\n            {\r\n                { \"t|<>=\", \"\", v => tests.Add (v) }\r\n            };\r\n            var e = optionSet.Parse(new[] { \"-tA\", \"-t:B\", \"-t=C\", \"D\", \"--E=F\" });\r\n            Assert.Empty(e);\r\n            Assert.Equal(5, tests.Count);\r\n            Assert.Equal(\"A\", tests[0]);\r\n            Assert.Equal(\"B\", tests[1]);\r\n            Assert.Equal(\"C\", tests[2]);\r\n            Assert.Equal(\"D\", tests[3]);\r\n            Assert.Equal(\"--E=F\", tests[4]);\r\n        }\r\n\r\n        [Fact]\r\n        public void DefaultHandlerRuns()\r\n        {\r\n            var formats = new Dictionary<string, List<string>>();\r\n            string format = \"foo\";\r\n\r\n            var optionSet = new OptionSet\r\n            {\r\n                { \"f|format=\", \"\", v => format = v },\r\n                { \"<>\",\r\n                    \"\", v => {\r\n                        if (!formats.TryGetValue (format, out var f)) {\r\n                            f = new List<string> ();\r\n                            formats.Add (format, f);\r\n                        }\r\n                        f.Add (v);\r\n                    } }\r\n            };\r\n\r\n            var e = optionSet.Parse(new[] { \"a\", \"b\", \"-fbar\", \"c\", \"d\", \"--format=baz\", \"e\", \"f\" });\r\n            Assert.Empty(e);\r\n            Assert.Equal(3, formats.Count);\r\n            Assert.Equal(2, formats[\"foo\"].Count);\r\n            Assert.Equal(\"a\", formats[\"foo\"][0]);\r\n            Assert.Equal(\"b\", formats[\"foo\"][1]);\r\n            Assert.Equal(2, formats[\"bar\"].Count);\r\n            Assert.Equal(\"c\",formats[\"bar\"][0]);\r\n            Assert.Equal(\"d\",formats[\"bar\"][1]);\r\n            Assert.Equal(2, formats[\"baz\"].Count);\r\n            Assert.Equal(\"e\", formats[\"baz\"][0]);\r\n            Assert.Equal(\"f\", formats[\"baz\"][1]);\r\n        }\r\n\r\n        // ReSharper disable once ClassNeverInstantiated.Local\r\n        private class Foo\r\n        {\r\n            private readonly string _s;\r\n\r\n            private Foo(string s) { _s = s; }\r\n            public override string ToString() { return _s; }\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/CommandLine/NDesk.Options/OptionsTests.cs",
    "content": "﻿using System;\r\nusing System.IO;\r\nusing CommandLine.NDesk.Options;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.CommandLine.NDesk.Options\r\n{\r\n    public sealed class OptionsTests\r\n    {\r\n        [Fact]\r\n        public void Should_ThrowException_When_PrototypeNull()\r\n        {\r\n            Assert.Throws<ArgumentNullException>(delegate\r\n            {\r\n                // ReSharper disable once UnusedVariable\r\n                var option = new DefaultOption(null, null);\r\n            });\r\n        }\r\n\r\n        [Fact]\r\n        public void Should_ThrowException_When_PrototypeEmpty()\r\n        {\r\n            Assert.Throws<ArgumentException>(delegate\r\n            {\r\n                // ReSharper disable once UnusedVariable\r\n                var option = new DefaultOption(\"\", null);\r\n            });\r\n        }\r\n\r\n        [Fact]\r\n        public void Should_ThrowException_When_OptionNameEmpty()\r\n        {\r\n            Assert.Throws<InvalidDataException>(delegate\r\n            {\r\n                // ReSharper disable once UnusedVariable\r\n                var option = new DefaultOption(\"a|b||c=\", null);\r\n            });\r\n        }\r\n\r\n        [Fact]\r\n        public void Should_ThrowException_When_OptionTypesConflict()\r\n        {\r\n            Assert.Throws<InvalidDataException>(delegate\r\n            {\r\n                // ReSharper disable once UnusedVariable\r\n                var option = new DefaultOption(\"a=|b:\", null);\r\n            });\r\n        }\r\n\r\n        [Fact]\r\n        public void Should_ThrowException_When_DefaultHandlerRequiresValue()\r\n        {\r\n            Assert.Throws<ArgumentException>(delegate\r\n            {\r\n                // ReSharper disable once UnusedVariable\r\n                var option = new DefaultOption(\"<>=\", null);\r\n            });\r\n        }\r\n\r\n        [Fact]\r\n        public void Should_ThrowException_When_DefaultHandlerRequiresValues()\r\n        {\r\n            Assert.Throws<ArgumentException>(delegate\r\n            {\r\n                // ReSharper disable once UnusedVariable\r\n                var option = new DefaultOption(\"<>:\", null);\r\n            });\r\n\r\n            Assert.Throws<ArgumentException>(delegate\r\n            {\r\n                // ReSharper disable once UnusedVariable\r\n                var option = new DefaultOption(\"t|<>=\", null, 2);\r\n            });\r\n        }\r\n\r\n        [Fact]\r\n        public void Should_Not_ThrowException()\r\n        {\r\n            // ReSharper disable NotAccessedVariable\r\n            // ReSharper disable RedundantAssignment\r\n            var ex = Record.Exception(() =>\r\n            {\r\n                var option = new DefaultOption(\"a|b=\", null, 2);                \r\n                option     = new DefaultOption(\"t|<>=\", null, 1);\r\n                option     = new DefaultOption(\"a\", null, 0);\r\n            });\r\n            // ReSharper restore RedundantAssignment\r\n            // ReSharper restore NotAccessedVariable\r\n\r\n            Assert.Null(ex);\r\n        }\r\n\r\n        [Fact]\r\n        public void Should_ThrowException_When_MaxValueCountOutOfRange()\r\n        {\r\n            Assert.Throws<ArgumentOutOfRangeException>(delegate\r\n            {\r\n                // ReSharper disable once UnusedVariable\r\n                var option = new DefaultOption(\"a\", null, -1);\r\n            });\r\n        }\r\n\r\n        [Fact]\r\n        public void Should_ThrowException_When_MaxValueCountZero_And_RequiredType()\r\n        {\r\n            Assert.Throws<ArgumentException>(delegate\r\n            {\r\n                // ReSharper disable once UnusedVariable\r\n                var option = new DefaultOption(\"a=\", null, 0);\r\n            });\r\n        }\r\n\r\n        [Fact]\r\n        public void Should_ThrowException_With_IllFormedSeparator()\r\n        {\r\n            Assert.Throws<ArgumentException>(delegate\r\n            {\r\n                // ReSharper disable once UnusedVariable\r\n                var option = new DefaultOption(\"a={\", null);\r\n            });\r\n\r\n            Assert.Throws<ArgumentException>(delegate\r\n            {\r\n                // ReSharper disable once UnusedVariable\r\n                var option = new DefaultOption(\"a=}\", null);\r\n            });\r\n\r\n            Assert.Throws<ArgumentException>(delegate\r\n            {\r\n                // ReSharper disable once UnusedVariable\r\n                var option = new DefaultOption(\"a={{}}\", null);\r\n            });\r\n\r\n            Assert.Throws<ArgumentException>(delegate\r\n            {\r\n                // ReSharper disable once UnusedVariable\r\n                var option = new DefaultOption(\"a={}}\", null);\r\n            });\r\n\r\n            Assert.Throws<ArgumentException>(delegate\r\n            {\r\n                // ReSharper disable once UnusedVariable\r\n                var option = new DefaultOption(\"a={}{\", null);\r\n            });\r\n        }\r\n\r\n        [Fact]\r\n        public void Should_ThrowException_When_CannotProvideSeparatorsWhenTakingOneValue()\r\n        {\r\n            Assert.Throws<InvalidDataException>(delegate\r\n            {\r\n                // ReSharper disable once UnusedVariable\r\n                var option = new DefaultOption(\"a==\", null);\r\n            });\r\n\r\n            Assert.Throws<InvalidDataException>(delegate\r\n            {\r\n                // ReSharper disable once UnusedVariable\r\n                var option = new DefaultOption(\"a={}\", null);\r\n            });\r\n\r\n            Assert.Throws<InvalidDataException>(delegate\r\n            {\r\n                // ReSharper disable once UnusedVariable\r\n                var option = new DefaultOption(\"a=+-*/\", null);\r\n            });\r\n        }\r\n\r\n        private sealed class DefaultOption : Option\r\n        {\r\n            public DefaultOption(string prototypes, string description)\r\n                : base(prototypes, description, 1)\r\n            {}\r\n\r\n            public DefaultOption(string prototypes, string description, int c)\r\n                : base(prototypes, description, c)\r\n            {}\r\n\r\n            protected override void OnParseComplete(OptionContext c)\r\n            {\r\n                throw new NotImplementedException();\r\n            }\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/CommandLine/Utilities/BenchmarkTests.cs",
    "content": "﻿using System;\r\nusing CommandLine.Utilities;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.CommandLine.Utilities\r\n{\r\n    public sealed class BenchmarkTests\r\n    {\r\n        [Fact]\r\n        public void ToHumanReadable_Days()\r\n        {\r\n            const string expectedString = \"1:02:03:04.5\";\r\n            var timeSpan = new TimeSpan(1, 2, 3, 4, 500);\r\n            var observedString = Benchmark.ToHumanReadable(timeSpan);\r\n            Assert.Equal(expectedString, observedString);\r\n        }\r\n\r\n        [Fact]\r\n        public void ToHumanReadable_LessThanOneDay()\r\n        {\r\n            const string expectedString = \"01:02:03.4\";\r\n            var timeSpan = new TimeSpan(0, 1, 2, 3, 400);\r\n            var observedString = Benchmark.ToHumanReadable(timeSpan);\r\n            Assert.Equal(expectedString, observedString);\r\n        }\r\n\r\n        [Fact]\r\n        public void Benchmark_EndToEnd()\r\n        {\r\n            var benchmark   = new Benchmark();\r\n            var elapsedTime = benchmark.GetElapsedTime();\r\n            // perform some work\r\n            double unitsPerSecond = Benchmark.GetElapsedIterationsPerSecond(elapsedTime, 100);\r\n            Assert.True(unitsPerSecond > 0);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/CommandLine/Utilities/MemoryUtilitiesTests.cs",
    "content": "﻿using CommandLine.Utilities;\nusing Xunit;\n\nnamespace UnitTests.CommandLine.Utilities\n{\n    public sealed class MemoryUtilitiesTests\n    {\n        [Fact]\n        public void ToHumanReadable_Convert_Bytes()\n        {\n            var observedValue = MemoryUtilities.ToHumanReadable(123);\n            Assert.Equal(\"123 B\", observedValue);\n        }\n\n        [Fact]\n        public void ToHumanReadable_Convert_KiloBytes()\n        {\n            var observedValue = MemoryUtilities.ToHumanReadable(1_234);\n            Assert.Equal(\"1.2 KB\", observedValue);\n        }\n\n        [Fact]\n        public void ToHumanReadable_Convert_MegaBytes()\n        {\n            var observedValue = MemoryUtilities.ToHumanReadable(1_234_567);\n            Assert.Equal(\"1.2 MB\", observedValue);\n        }\n\n        [Fact]\n        public void ToHumanReadable_Convert_GigaBytes()\n        {\n            var observedValue = MemoryUtilities.ToHumanReadable(1_234_567_890);\n            Assert.Equal(\"1.150 GB\", observedValue);\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/CommandLine/VersionProviders/DefaultVersionProviderTests.cs",
    "content": "﻿using CommandLine.VersionProviders;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.CommandLine.VersionProviders\r\n{\r\n    public sealed class DefaultVersionProviderTests\r\n    {\r\n        [Fact]\r\n        public void GetProgramVersion()\r\n        {\r\n            var versionProvider = new DefaultVersionProvider();\r\n            Assert.Equal(string.Empty, versionProvider.DataVersion);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/Compression/CompressionAlgorithmTests.cs",
    "content": "﻿using System;\r\nusing Compression.Algorithms;\r\nusing UnitTests.Compression.FileHandling;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Compression\r\n{\r\n    public sealed class CompressionAlgorithmTests\r\n    {\r\n        private const int NumOriginalBytes = 20000;\r\n        private readonly byte[] _originalBytes;\r\n\r\n        public CompressionAlgorithmTests()\r\n        {\r\n            _originalBytes = BlockStreamTests.GetRandomBytes(NumOriginalBytes);\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(CompressionAlgorithms.Zlib)]\r\n        [InlineData(CompressionAlgorithms.Zstandard)]\r\n        public void RoundTrip(CompressionAlgorithms ca)\r\n        {\r\n            var compressionAlgorithm = GetCompressionAlgorithm(ca);\r\n\r\n            int compressedBufferSize    = compressionAlgorithm.GetCompressedBufferBounds(NumOriginalBytes);\r\n            var observedCompressedBytes = new byte[compressedBufferSize];\r\n            var smallBuffer             = new byte[10];\r\n\r\n            Assert.Throws<InvalidOperationException>(delegate\r\n            {\r\n                compressionAlgorithm.Compress(_originalBytes, NumOriginalBytes, null, compressedBufferSize);\r\n            });\r\n\r\n            Assert.Throws<InvalidOperationException>(delegate\r\n            {\r\n                compressionAlgorithm.Compress(_originalBytes, NumOriginalBytes, smallBuffer, compressedBufferSize);\r\n            });\r\n\r\n            int numCompressedBytes = compressionAlgorithm.Compress(_originalBytes, NumOriginalBytes, observedCompressedBytes,\r\n                compressedBufferSize);\r\n\r\n            int decompressedBufferSize = compressionAlgorithm.GetDecompressedLength(observedCompressedBytes, numCompressedBytes);\r\n            var observedDecompressedBytes = new byte[decompressedBufferSize];\r\n\r\n            Assert.Throws<InvalidOperationException>(delegate\r\n            {\r\n                compressionAlgorithm.Decompress(observedCompressedBytes, numCompressedBytes, null, decompressedBufferSize);\r\n            });\r\n\r\n            int numDecompressedBytes = compressionAlgorithm.Decompress(observedCompressedBytes, numCompressedBytes,\r\n                observedDecompressedBytes, decompressedBufferSize);\r\n\r\n            Assert.Equal(NumOriginalBytes, numDecompressedBytes);\r\n            Assert.Equal(_originalBytes, observedDecompressedBytes);\r\n        }\r\n\r\n        private static ICompressionAlgorithm GetCompressionAlgorithm(CompressionAlgorithms ca)\r\n        {\r\n            switch (ca)\r\n            {\r\n                case CompressionAlgorithms.Zlib:\r\n                    return new Zlib();\r\n                case CompressionAlgorithms.Zstandard:\r\n                    return new Zstandard();\r\n                default:\r\n                    throw new InvalidOperationException($\"Unknown compression algorithm: {ca}\");\r\n            }\r\n        }\r\n    }\r\n\r\n    public enum CompressionAlgorithms\r\n    {\r\n        Zlib,\r\n        Zstandard\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/Compression/DataStructures/BlockTests.cs",
    "content": "﻿using System;\nusing System.IO;\nusing Compression.Algorithms;\nusing Compression.DataStructures;\nusing Compression.FileHandling;\nusing ErrorHandling.Exceptions;\nusing UnitTests.Compression.FileHandling;\nusing Xunit;\n\nnamespace UnitTests.Compression.DataStructures\n{\n    public sealed class BlockTests\n    {\n        private static readonly byte[] ExpectedDecompressedBytes;\n        private static readonly int NumExpectedUncompressedBytes;\n        private static readonly Zstandard Zstd = new Zstandard(1);\n\n        static BlockTests()\n        {\n            const string polyPhenBase64 = \"G4D6gHGAUIA0gDSA+oAagDKACoD//3GAUIBQgE2ANIAygBqAbYFxgAOASoAAgP//NIACgCSAGoACgAyASoACgBCAB4AMgAKABYAJgJWANIAEQ8gDiAOsA8oD///KA8oDvAO8A8oDjAO8A7wDugNOQ6wDugPKA8oDp4FpQ+2A//86Q6eBOkOqQkaBKkJpQ7KBKkJBQrKBsoGygSBCaUMGQwCAAIAHgASACIAEgBmAAoACgAKAGYAEgBSAB4D//wKAAYABgCSAC4ABgCSAB4ADgAKABYAZgAGAAYAAgAOAB4AUgAeA//8DgAKAAYAkgAiA//9mQ/BCdEJmQ/5BiwN+Qn5CCUJmQ/BC6ELwQvBC/kH+Qf5BiwNmQ3mAA0NhgWGBW0K7gAND1UFhgWWBA0NhgVeB1UHVQf//tYBhgYFDW0IBgDqAA4ABgBSAA4AMgA2AAIACgB6AAoAHgP//AIACgAOAA4A6gA2A//9KgAWADIA0gACASoAQgAWAB4BKgAmAEIAagBCAAoAHgAqASoA0gACAAIAAgACAAIAAgACAAIAAgACAAIAAgP//AIAAgACAAIAAgACAAIB5gPtCYYFhgVFCu4ADQ9VBYYFhgQNDYYFXgdVB1UH//7WAYYE/Q1FCAYAZgASAA4ACgAGAGYADgAOAAYAZgAeAC4AEgAiA//8DgAOAAIABgAuA4YAAgAiArID//6qAe4AXgESA6IAOgFyAXIBZgA2AIIBZgJCB6IAJQosDZkPwQnRC8EKIA4iBfkL///BCZkM3Q2ZD6EJ+Qn5CiIGLA/BCckKsA+NC40KsA+NCrAOTA+NCQUOsA3ZD//+lA6UD20LbQuNCrAOTA///SoACgACANIADgBqAEIACgAOASoAEgAuABIAEgAKAA4AEgEqANIAEQ8oDjAOqA8oD///KA8oDvAO8A8oDjAO8A7wDvQNTQ6wDugPXA8oDBEPIA4gDrAPKA///ygPKA7wDvAPKA4wDvAO8A7oDTkOsA7oDygPKA///dYAAgASAVIAEgDqAG4AGgAyAdYAGgByAFIAUgAeABYAJgOKAVIAAgACAAIAAgACAAIAAgACAAIAAgACA//8AgACAAIAAgACAAIABgACAAICqgCyAH4B7gP//6IAygBeAF4CqgCyARIBcgEaACIAOgA2AXoF7gAOASoAAgP//NIACgCSAGoACgAyASoACgBCAB4AMgAKABYAJgJWANIB+gPtCYYFhgVtCtYADQ8tBYYFhgQNDV4FhgdVBy0H//7WAYYE/Q1tCAIAzgAeABYAkgAKASoAGgASAAoAzgAyA//8VgBWAAoACgAKASoAkgARDygOMA6oDygP//8oDygO8A7wDygOMA7wDvAO6A1NDrAO6A8gDygMCgCOAAIACgA6A//8lgAGAA4ABgBuAA4AMgA6ADYACgAOAAIA1gBqAAYBXgACAAoAkgP//M4AagASABoBKgASADIAMgBiAAIAEgAiASoAkgP//JYADgAWACoAAgDWAAYAEgAGAE4AGgBWAE4AMgAOAAoAAgDWAGoByQqwD40LjQqwD40KqA5MD40I4Q6wDdkP//3ZDckPbQttC40KsA5MDAIAAgACAAID//wCAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAYAAgHJCrAPjQuNCrAPjQqwDkwPjQkFDrAN2Q///pQOlA9tC20LjQqwDkwMAgKSAKoAggHuA///ogDKAF4AXgKqALIBEgFyAQoAIgA6ADYDogHuAAYBTgAWABYAtgACAPYATgAWACIA9gASAC4ATgBOA//8FgAuAVoA9gACAGIADgAOAAoADgBmAAIADgACABoAEgAeAC4AHgP//AYAAgCSACIABgACAA4AGgCSA//8zgAaABIAEgEqABIAMgBOADYAAgAKAA4BKgCSAeYD7QmGBYYFRQruAA0PVQWGBYYEDQ2GBV4HVQdVB//+1gGGBP0NRQgGAOIADgAKAJ4ACgCiACYACgAKAKIAFgACAB4AHgP//A4ADgHeAJ4D//0eABYAFgDKAA4BHgBGABYADgGWABYAegBGADIAAgAOAA4BlgDKACUKLA2ZD8EJ+QvBCiwOSgX5C///oQmZDN0NiQ+hCfkJ+QoiBiwPwQglCiwNmQ/BCfkLwQosDkoF+Qv//6EJmQzdDYkPoQn5CfkKIgYsD8EIBgDqAA4ABgB6AAoAcgAWAAYACgCiABYAAgP//BIACgAOAA4B3gBOA//9mQ/BC90JmQ5NCiwN+Qn5CCUJmQ/BC6ELwQvBC/kH+QQ5CiwNmQwWAdYAAgP//VIAEgDqAK4AEgBSAdYAEgBuAC4AUgASACIAPgOKAVIC1gANDW0LLQdVBV4E/Q36A1UF5gFFCW0JbQltCxULVQWGB//8/Q1tCCUKLA2ZD8EJ+QvBCiwOSgX5C///oQmZDN0NiQ+hCfkJ+QoiBiwPwQnJCrAP///1BkwNyQpADkwPjQj5DrANyQj5DPkN2Q3xC40I4Q6wDkAMJQosDZkPwQn5C8EKLA5KBfkL//+hCZkM3Q2JD6EJ+Qn5CiIGLA/BCckKsA///kkKTA3JCkAOTA+NCPkOsA3JCPkM+Q3ZDfELjQjhDrAOQA6eBaUPtgP//OkOngTpDqkJGgSpCaUOygSpCQUKygbKBsoEgQmlDBkNyQqwD//+SQpMDckKQA5MD40I+Q6wDckI+Qz5DdkN8QuNCOEOsA5ADp4FpQ+2A//86Q6eBOkOqQkaBKkJpQ7KBKkJBQrKBsoGygSBCaUMGQ3JCrAP//5JCkwNyQpADkwPjQj5DrANyQj5DPkN2Q3xC40I4Q6wDkANyQqwD///9QZMDckK3A5MD40I+Q6wDgUI+Qz5DdkN8QuNCOEOsA7cDCUKLA2ZD8EJ+QvBCiwOSgX5C///oQmZDN0NiQ+hCfkJ+QoiBiwPwQqeBaUPtgP//OkOngTpDqkI4gSpCaUOygSpCp4GygbKBsoEgQmlDBkO1gANDW0LLQdVBV4E/Q36A1UF5gFFCW0JbQltCxULVQWGB//8/Q1tCcoFhQzVD9EL//0dCs0LsQfRCmIBmQzVDZkM1Q/RCPEKzQmiBMkMPgI2AH0NPgU+B5ELmgCZD60ELgQuBJkNGgQaAoUI7Qv//EYDSgIVD5ELEgBdDJ4FdgBdDbYHPQn5C//8ngRBDY4EwQhCAjoDEgGOBu0F9Q9JCMIGNA///ooGNAw2AiQNpQ1pCN0ONAz+AaUNpQzdD1EGlQmRDvQOJA///U0M5Qs9BHENEgVND9kHPQc6AU0OQQoZC1UKQQoWADYDHgJ4DU0MOgF1Dr4FigSpDlYBdQ+VBYoEVgV1DNkIrQqRCpEL//xmA3YCkAypDbEK1A58DnwN8Q58D0ANzgJ8D//98Q7UDtQO1A58DgkMbQ0yBzwOfAwKA50FGgEaAx4AXgOdBMoAygA+A//80gAGAXoBDgAOAAoAPgPFCLIHdQsMD//8hgMMDFkOwA8MDn0KBQ8MDFkObA/1CYEPmQl9DmAPiA8EDAYArgSyAIYARgACAK4EAgB+AAYD//yGARoBGgEOAC4AQgACAf4FGgBxC0QMLQxJDxgNvQsUDxQMLQ4JD0QP//8YDpQOCQ5WAhkKlA90DxQOtQt0DoAOCQ9ADREPRA8YDgkOCQ9EDoANRgMYDxgP//z5DugPdA9ADUoF0Q01DE0P//9dCeEMBgRNDCoBDQk1DeEN4QxNDz0KSQgGBeEPhQeVCzQN7QgNDwwNQQsMDwgMDQ5oDzgNQgLYDoAN1Q///mUKaA9wDwwOKgM4DtgN5Q84De0LNA7MDeUNPQ84DwwP//8MDsANxQk9DKUPkA84DA4C9gA2ADYAPgAmAvYADgA2AAID//w2AKIAogBGAAYAAgASA/YAogEGAykIpQrRBpIBXgcpCA4DhgAKA//8pQilCKUK9QemAhYADgMpCyEHZQsoDdkPKQpcDZkPKA09DykJZgKwDqQP//1CBckM3Q2FDykLKA8oDJ4CXAytCd0IXQ1SBlwM5gG1CnoBPQy+AEENIQxBD6YD//56AwQN2Q11D+0HOA84D3wO8A+QD1gPOA7QD4APOA9sD3APcA///ugPQA+cD3wPmA+gD5wPnA+cD5wPnA+cD5wPnA+cD5wP//+cD5wPnA+cD5wPoA+cDIoB4Q9RBUoEXQ7eAeEPiQVKB//9NQ9RB2ELYQpNCEYC3gOiAsgNNQ4BD5APOA84D4AO8A+AD3APCA9cD5APOA4WB3APcA///ugPcA+cD4AOaQs4DwgN1Q7QDdUPOAytDcUOQQv//sQPCA7QDsgO+QsaAkELcA8IDUYG0A54DfkNbQ1tDtAP//3pD4IB6Q54DngO0A5sDW0OSQhWA0AOeA+cD5wPmA+YD5wPlA+cD5wPmA+cD5wP//+cD5wPnA+QD5gPnA+gD5wPhA+cD5APcA+cD5APnA+YD4QPkA+cD5QPmA///5APkA+UD5QPnA+YDGYB8gaCAY4AOgEqAtYD//0qABoDigJqA6YA2gemAR4AYgAGAKIEBgG5D4APHA0FD1wO0A9cD0AP//50D1wOaA9ADmgN3gJ0DsgPQA+AD1wNuQ9cD0APQA///xwPgA9FC0AN3gMUD0APXA9cD0APGA7QD0ULgA48DwAPnAy+B///gA8AD4APgA8AD3APkA8cD3APXA9wDwgPXA9sD5wPkA7ED5AP//9aA4APAA98D4AOHQ9YD4APAA9wDuwPQA7QD0APWA+cD3wO6QbQDKID//7QDJYC0A54DXUJ+Q7QD3UKeA0xDW0PFQRhDVkPQA7QDJIBjQgWAA4AHQj2AY0K2QWCAlYC7QmCA///FgMWAA4BcgMyAu0IHQhBCtAP//wyAtANdQpoDngPFQS9DtAPVQltDS0L7QhpC3UJWQ9ADsgNZgL0DyEJ9Qm5D+UG9AwaB+UH//4KBfUJzQ01DTUNLgFmAMYDUA6sDlgPgA8QDTkPgA8QD3APXA///tgPgA7QD3QOlQo+AlgPCA9ED5APdA+gD6AP//+cD6APnA+gD6APoA+gD6APoA+gD6APoA+gD6APoA+gD6AO9A+QD5APiA9wD4gPkA3CB4gP//+AD5APkA+QD5APiA9gDIkPmA+IDxgPkA+ID4gP//98D5gOPA+ID8EHfA+ID5APkA+ID3wPfA48D5gPQAyxC0gPIA6kDY0OpA9ID//+pAyJCuAPHA8gDyAPIA4RDOkMygN4DC0K6A+YD4gPiA+YD2gPmA+UD4QPeA+YD4gPlA+UD4wN3Q///3gPnA+YD5wPoA+gD6APoA+gD6APnA+gD5wPoA+gD6APoA+gD6APnA///6APoA+gD6AP//+cD6APnA+gD6APoA+gD6APoA+gD6APoA+gD6APoA+gD6AMTgIwDCoD//2dDGoFLgNFCZIGPgagDNYDyQo6AM0IjgY+BhYHKAz5D5wPoA+gD6APoA+gD6APoA+gD6APoA+gD///oA+gD6APoA+gD6APoA+cD6APnA///6APoA+gD6APnA+gD6APoA+gD5wPoA+cD6APoA+gD6APlA+gD5wPnA+gD5gPoA+gD5wPnA+gD5wPoA+gD6AP//+YD6APoA+gDYEPaA6cD/kJKQ6oD///AA1FDTEPaA6cD0wMVQnRDlAPAA58D1AODgDBD2gPTA9QDsAPTA9oDVoDLAxVCwAPaA9QD2gPaA8sDlAP//+YD1AO8A+QDygPYA+QDuAPiA+ID0gPWA+QDQELiA+AD3AOWA///1gPnA+QDHYDSA4gDZ0O6AyxC0gNjQzpDLELSA4gDhEO6A6cDIkL//yxC3gPIA75BmwN7Q3tDLENYQ7ID//9SQ9uAUkOcA3tDsgOYAytDOkIXgLIDe0PnA+gD5wP//+gD6APoA+gD5wPoA+gD6APoA+cD6APnA+gD6APoA+gD5wPoA+gD6APoA+gD6APoA+gD5wPoA+gD6APoA+gD5wP//+cD6APoA94D5gPlA+UD///lA+QD4QPjA8cD5gPmA+YD5gPmA+MD5QPaA+YDzEL+QtoD1APLA78DywPaA///ywOTQsoD0wPUA9oD2gO/A3RDjYDiA9QDc0PhA9oD0gPUA9ID4QNlQ9wDAYEmQ9oD3gPeA+IDwgP///lB5QPeA+gD6APoA+gD5wPoA+gD6APoA+cD6APoA+gD6APoA+gD6APoA+gD///nA+gD6APnA+gD5wPoA+gD5gPnA+gD5wPoA+cD///nA+cD6APoA+gDLELSA8gDqQOQA4gD0gP//6kDMYGnA8cDyAPIA7oDY0PdQh2A3gO6AwqAjQMzQjNCqEIjgagD///kQVmAjoBfQoBC8kKxQhqBIIAKgMoD8kLnA+gD6APoA+gD6APoA+gD6APnA+gD6APoA+gD6APnA///5wPoA+gD/kLaA58D9kLaA5QD1APUA///eEPTA5wDywN+gRWBUUOwA8AD4wPaA4ND4QPeA94D1QPaA+EDLELZAxxD2gPeA94D4QPeA8cD//8cQ+UD4QOnQtsDakNqQ9UDKkPbA4FDakMOQ9sDakOsA8MDrwP//1SADkPiA9UD5wPoA+gD5wPoA+gD6APoA+YD6APoA+cD6APnA///6APnA+gD6APoAwVD4AO2A7QD4AP//+AD1wOlA8gD4AOlA9cD1wPQA92AjAPQA+QD4AN2Q+AD4ID//+ADtAPcA9cDTkPIA+ADtgPYA8tBjAN6Q7YD0APkA90D5wPoA+gD6AP//+gD6APnA+gD5gPoA+gD6APoA+gD6APoA+cD6APnA+gD6AP//+cD6APnA+gD6APoA+gD6APoA+gD6APoA+gD6APoA+gD6ANWgH6BqgOUA8oD/kLaA7IDeEP+QtoDqgPKA8sDywP//zBDUUPiA9MDnEKDgL0DzQPaA31D2gO/A70DUUPaA8wD1APUA9MD//9aQ58D4gPaA+cD6APnA///6APoA+gD6APnA+gD6APoA+gD5wPoA+cD6APoA+gD6AMdQ9EDyAOpA///iAOUA2NDqQMiQtIDyAPIA8gDugOEQ4gD3UK6Ax2A3gPoA9YD///mA+ED5gPmA8cD4wPmA+MD5QOfA98D3gPiA+UD6APmA+cD6APoA+gD6APoA+gD5wPoA+cD6APoA+gD6APoA+gD5wP//+gD6APGA+YD3wPGA+ID2gP6QeIDsAPaA+QD1APiA9QD///TA9oD4gPmA9sD5wPoA+gD5wPoA+cD6APoA+YD5wPoA+cD6APnA///5wPnA+gD6APoA+cD6APoA+cD6APoA+gD6APmA+gD6APnA+gD5wP//+cD5wPoA+gD6APoA+gD6APoA+cD6APoA+gD6APnA+gD6APoA+gD6APoA+gD6APoA///5gPoA+cD5QPoA+cD6APoA+YD5wPoA+cD6AP//+cD5wPnA+cD6APoA+gD6AP//+cD6APnA+gD6APoA+gD6APoA+gD6APoA+gD6APoA+gD6APnA+gD6APoA///6APoA+cD6APmA+gD6APoA+gD6APoA+gD5wPoA+cDvQPkA+QD4gPcA+ID5ANAQuID///cA+QD5APkA+QD4APYA3hC5gPkA+gD6APoA+gD6APoA+gD6APoA+gD6APoA+gD6APoA+gD6APoA///5wPnA+gD6APoA+gD6APoA+cD6AP//+gD6APoA+gD6APoA+gD5wPoA+gDxgPmA98DuAPkA9oD5APiA///1APkA9MD4gPTA1yB1APaA+ID5gPkA+SAlQNNgReAlQP//0tDREPPQSRClQMkQhNDQ0KGQhiATYHPQpUDlQOWA+AD0QN2Q+ADxAPgA9wD//+2A+ADtAPdA7QDj4C2A8ID0QPkA+AD5wPoA+gD6APoA+gD6APnA+gD///oA+gD6APoA+gD6APoA+cD6APoA+cD6APnA///6APoA+gD6APnA+gD6APoA+gD5wPoA+cD6APoA+gD6AMJQrIDF4D//7IDOkKbA3tDs0FYQ7IDRUJ7Q89CWEO+QZVCd0PfA7ID///kA8MDwQPkA7gD5APiA8JC1APkA8JC4gPbA7QD10KnA9MD5gPkAyND4QPHA7MDlwOgA///2QPIA7sD4QOwA94D2gPHA3lCrQPIA+ED7kHnA+gD6APoA+gD6APoA+gD6APoA+gD6AP//+gD6APoA+gD6APoA+gD5wPoA+gD6APoA+gD6APoA+gD5wPoA+gD6APoA+gD5wP//+cD6APoA+cD6APoA+gD6APoA+gD5wPoA///6APoA+gD6APoA+gD6APnA+gD6APmA+gD6APoA+gD6APoA///6APlA+gD6APoA+gD6APoA+cD5QPoA+gDVEPhA94D2gPTA9oD4QP//9oDxELZA94D3gPeA94DxwOdA+6A5QPeA5sD5AOEQ4RD0wOEQ8SA2gOeA7oD3gOigf//ywOpA4FDmwPKA+QDwQPFA+YDXIH//+QD0wPkA+IDxQPiA+YDzQPiA9oD4gO6A9QD4gPnA+QDHkPNA80DwAOXgJsDvwNhQq8D//+yA80DzAPNA78DmwOvA2FCwANdQ2GAwQP2QhtCRUP4QcIDmIBuQjaAmwO1QkVD6YAbQ///N4CjgdcDnAOcA+YD2gPUA+QDxgPkA+ID1APUA+QD2gPrgOID4gP//8UD2gPmA+QDHUPSA6IDiAOiQogD//+UA2dDA0PSA4RDxwOnA4RDZ0OpA0BDyAM4gN4D5wPlA9oD5gPkA+YD5gP//+ID5gPjA+UD4wPMQt4D5APlA+gD5gPnA+gD6APnA+gD6APoA+gD///oA+gD6APoA+gD5wPnA+gD6APoA+gD5gPoA+cD5QPoA+cD6APoA+YD5wPoA+cD6AP//+cD5wPnA+cD6APoA70D5QPjA+ED4QP//+UDyAPhA8gD4QPjA+UD5QPjA94D1QNVQucD4wM3Q+YD5QPjA+YD2gPmA+UD4wPhA+YD5gP//+YD5gPcA+ED4gPoA+YD5wPoA+gD6APoA///6APoA+gD6APoA+gD6APoA+gD6APoA+gD6APoA8YD5APiA+IDXIHiA+QDqgPiA///4gPkA+QD5APiA98D3wO4A+QDtAOrA+AD4APdA9mA3QPgA05D3QP//90D4APgA+AD3APdA9gDdkPgA60DI0PhA70DvQPeA4ND4QPAA60DlwPhA6sD2QPZA9MD//9agbQD5wPhA/5C2gPUA6oDwgOnA9oDngNaQwBCFULAA9QDsgP//5EDkQOEQ9oD1APNgF1D80KSQvhBkkKGQ3+AtUETgP//X0M0QzRD60K/QR6BBoC5A/NC5wPoA+gD6APoA///6APoA+gD6APoA+gD6APoA+gD6APoA+gD6APoA1BD5APmQmuA2gOeA9kD2gPeQpsD3gOeA8gD//+BQ1BDngO8A+QD2gPfQcADMUMxQ8AD4ULAA5gDMUPhQs0DK0OYA5sDgUNjgP//4ULNA8ADvAPkA+QD4gPhA+ID5APyQeIDc0PiA+QD5APkA+QD4gPPA///5wPkA3pD4APIA1RD4AO0A+AD1wPggLYD4AO2A9cDpQP//6IDtAPRA+QD3QP//94DrgOdA9kDK0PeA8sDsQOUA94DsQPSA8sDywNYgXFDkAPmA94D0kLYA8QDuAOMA7YD2ANoQqgDU4HxQdAD0APHA8cDuANkQ///2APFA///6APoA+gD6APoA+gD6APoA+cD6APoA+gD6APoA+cD6APoA+gD6AMPgAdDu4AwgdSABYAjgFiA8IAFgICBD4AEQq2BrYH//52AW4ACgNSA5QPoA+cD5wPoA+YD6APoA+cD5wPoA+cD6APoA+gD///mA+gD6APoAyiAwEKBgc6AAoDOgMBCDoDIgAGA//+BgYGBgYEggZ6AdIABgAhDWYDnA+gD6APnA+gD5wPoA+gD5gPnA+gD5wPoA+cD///nA+cD6APoA+gD5wPoA+gD6APoA///6APoA+gD6APoA+gD6APoA+gD6APoA+gD6APoAyND2wPVA88DwgPOA9sD3oDDA5iAzQPWAzCB1gPWA68DakP//+ID1gM2Q9oDwAMwQ9oDlAPUA8oD//94Q9oDkQPUA3RDVoB4Q5EDywPiA9oDcoBIQ6yABYEOQxKAB0PKQv2AeoF2Q///jUIcQs1BBYCsgHqBdkMHQ+cD6APoA+cD6APoA+gD6APmA+gD6APnA+gD5wP//+gD5wPoA+gD6APnA+gD6APoA+gD6APoA+gD6APoA+gD6AP//+gD6APoA+gD6APoA+gDWkPaA1aA///aA3RD2gPLAzBDywPaA1FDywORA8sD/kKUA8oD5gPaA71BtAMlgP//MkNDQp0Dy0G9QUqB6YBOQlxD10IxQ8dBnkI/gLQDngPnA+gD6APoA///6APoA+cD6APmA+gD6APoA+gD6APoA+gD5wPoA+cDBIAoQ4CAq4D7gFuAMkIKgHuAIoD//waAhIE6gfOAE4AEgCKAtQNcQiKBqgMUgP//bEPjQY8DmIGOgT6AbEMfQvlCXIAfQpiBPEJcgKoDkANqQ9sD1gPPA6UDzwPbA6dCvwNegP//1gPWA9YD1QPBA6wDp0LbA9YDI4GoA2SBI4EMQxSAfYAygDqAw4BnQ///8kI8gI+BIIAagcOAygPqQiOBqAMKgAqAZ0MjgWND6kIageRBjQP//4BCI4E8gCCAGoEzQsoDakMlQrwDlgM4Q///3EIfQ45COEN4gasDc0OrA6sDc0P9QjhDy0GQAw+AI4GoAzND8kLZQYBCqAP//zNCCoBZgDhDM0M+QwxDKEJkgRGAqAPyQpVC0gMdgP//0gNWgNIDyAMWQ6kD0gMKQ70DhEOpA59CiAOnA94D0gMDgExCA4AJgFaAN4BMQgGATID//wiATIDDgJWAlYAjgAOACICjQi+B5wPoA+gD6AP//+gD6APnA+gD5gPoA+gD6APoA+gD6APoA+cD6APnA8ID5wPhA+ED5gPzQeYD5QPhA+QD5wPhA+UD5QPlA///3APlA+gD5wMqgaoD7UEigURDw0EUgGCAbYEigZADYIAyQ///40FlgO1BmIGqAx9ClwPhA5cDNYHhA8wD3gPdA///zAPhA8sD2QOyA6sDlwPLA9QD5wPhA7oD5APiA+ID3wPiA+QD///eA19DX0PkA+QD5APiA98D0wNfQ+YD4gOwQiKBZUNqQ9YDsELbA8wDPUObA9sD///NA80DtgP4gAZDmwPiA9UDgoFogAVDBUNHQ05CvQNOgAVD//9uQwVDlgOQA5ADVoCCgTGA0wOrAyVCvQOrA2pDyEJqQ70D//8yQw+AEIGrA6sDqwONAzhD00IugL0DlgPoA+gD///nA+gD5wPoA+gD6APoA+gD6APoA+gD6APoA+gD6APoA+gD5wPoA+gD5wPoA+gD6APoA///6APoA+gD6APoA+cD5wPoA+gD6APoA6oD5APiA+ID3wPiA+QD///iA19D3gPiA+ID5APkA9sDywPwQeYD4gNdgNEDiAMdQ8cDLELSA5QDZ0OfQtIDiAOnA7oDqQP//1iAn0LeA8cDd0LJA99Cy0GoA99CKICoA8tB10LJA05CqQP//6+A30JlQ2VD2gPIQucD6APoA+cD6APnA+gD6APmA+cD6APnA+gD5wP//+cD5wPoA+gD6AOZA+QD4gPiA98D4gPkA///4ANpQ+ID5APkA+QD5APfA84DcIHnA+IDZYDBA+5CtULuQkZCM4BlgLVCo4GbA9tCeEMbQxtDmoD4QWWA1wP//8YD5gPfA7gD5APUA+QD4gP//80D5APTA+IDywNcgdQD0wPiA+YD5APnA+gD5wP//+gD6APoA+gD5wPoA+gD6APoA+cD6APnA+gD6APoA+gDRIGVA+SA//+VAxpCS0MTQ+SAz0FxQ89B10IOgM9BTYEkQhpClQNEQ/5C2gOyA/5C2gN0Q9oDywNWgJQD2gOUA8sDUUP//0xDkQOyA+ED1AOoQtQDPID//9QDXYDUA74D7kK+A9QDGEO+A2xDmgOxQnFDrAPfA9QD2gPmA+YD5QPQA+UD5QPOA+UD1ELkA+UD5gPlA+UD5QPjA84D5wP//ziAu0LbgNuA//+MgCJCNoCMgAKAgIHbgNFB0UGEgQeAW4APgMRCCIAigaoD//8KgNVCIoE+QxJDmIEfQqoDH4D5Qh9CH0JggJiBsEKPA0uAlgPnA8gD///kA12B5APkA9UD4APkA9wD4gPgA+IDzwPcA+ID5wPkAw6AMEOcQrlBJoEEgWJDlICvgQSA//+cQpxCnEKSQsyAPoFhgGJDnEKpgDFDSoEdgO9Ck4HvQplC//8DgTFDQYGiQvuABoCpgEGBCEIxQ+9CxoDOAytD///OA/RCzgOxA/FBUEPOA3VDtAMJgRKBvkJ1Q0pD3APCA2SBHYCGQ2pDHYDqQoZDhYFqQxqBpQOGQ4oDigOKA+JC6kJZgKUD//8cgdcDtgM2Q9cD///XA8UDY0OnA9cDpwPQA3BCtgNvgWNDpAPgA9cD5gPoA+cD5wPnA+cD5wPnA+cD5wPnA+cD///nA+cD5wPnA+cD6APnAxmAfIFKgEqAWIBKgAmA//83gAmAD4BHgKCAoIB5gAaAAoABgNtBoIBUgLtCPoGqgCWAqoD//5qAqoBcgGNC1YANQj6B/ICqgASBYIAUQgKAAYAhgS2ALYBVgBiAeIH//xiABoCpgCCAYIBIgAKABoABgAGAk0KbgKSBlwNIQ9ZBSkJ3QpcDVIHWQf//QkNIQ3FD6YA5gL9Cd0JGgDeAlkLHA98D4APcA8cD1wPgA7oD3AOFgdcD4APgA+AD3wPbA9YDqQP//9cDhoDOA3JC/ELCAxmAwQOxA/xCSkPOA3uAsgOxA4ND///0QlBD3APCA///UkPNgA6A1EIOgBlDn4HNgM2AUkMfgIVCN4AzgYSAFIDQgJ4DG0MygDlCUoBwgIyBNIA5QoWAUoAEgDlCBYA5gUKBsoD//wSAUoCWQtxBaUPXA48D///FA7ID4ANxQ48DJYHQA8cD0AOyA8cDtAO0A15D4APXAxyB1wNBQ///1wOAQ9cDxQPRQoRD1wO4A8cDb4FjQxdDnQOAQ+AD0ANKgZcD//8IgJcD6EFwQ0hDVIF3QpcDmoEXQ3aA1kFUgXdCbULBA3FDIkPNA80DzQOdA8IDzQOpgLED//+aA80DzAPMA8wDsQOdA+ZB5APNAwGAOULdgLKA0UFPgJZCMoCFgDKA3EFCgeWAjIFCgTSACYD//5ZC3EHpgJcD//8IgHZD6EFwQ0hDVIF3QpcD6EEXQ9ZBmkJ2gKSBtkLBA3FDA4DKQkGAQYB5gUGAd0IGgEGAKIDIQWKAAoASgRKBA4D//yiATkMpQqgD4APgA9wD1gPcA+ADR0PcA///AkPgA98D3wPbA9wD0ANwQ+QD3ANUgZcDK0I4QpcDD4CXA3ZD//+/QpMDRoB2Q7ZCRoCkgbZCF0PBA3ZDBIAGgf//DoBKgACAkIAJgA+ACYDEgAGALIAsgCGAAIAPgAGAWIFfgG+ANkOhQmBCZoFWQjZDCoARQgaAFICqQvlCYUJhQp2BU4H//zZDqkL//+QD2wPCA9wDugOYQtwD0APOA+AD1gPbA9YD1gOlA80DzQPkA9YDYYASQwiAlIDUQgWA0UJ2QpSAb4EZQwWAj0LiQWWB//8ZgG+BfkPUQuED///mA+cD5wPmA+cD5gPmA+QD5wPnA+cD5wPnA+UD5QPkA+cD5wMmgORBhYDigF+AbYBGQv//roABgLyACYCIgehBPoFpgDmAAYBGQj6BukG0A///F4CdA9VCmgO3QktCS0K0A5JCfkO3QvdCxUHdQpeA3wOyAxmA20EogBmAf4EYgOmAroABgCiAKIEBgHmAAYD//xiAJoB5gNtBKIGwgP//6UHpQQRDbYF0QwKBd4EggAxDOkIMQ89Cx0IbgAKBSYGuA0JDVYH//xpD4EJFQmFCn4AaQ+BCHkK1A+BCnwN/Q1hDboAeQmtCsgM/gLeAeEPUQTOAE0MjQktDgIH//zWADEPKQdhCKYAQgAqBkoE1gHhDTUP//6UDe4EggEiAbEKlAxKBhYEagYgDOENlQ8hCBUMeQh5CMYClA8hCvUHgA9wD0APgA6ID4APWA9YDtAPgA9sD2wPcA9wDcEP//7QD5APgA9lCygMogP//qQM3Q8oDIEPZQvaAvAPKQrwDYUOTA6BCZkMEQ9oDygOJgFJDzYANgBtDTYHUQvRB///NgExDQ4EVgAKBB4CJgBSAzYCeAxtDiYBSQ4GBiYBFQtZBFYAzgYmAFoCPQguBBEMOgP//zYBNgTOBVENDQjKAUkJZgJ6AWYA5gPJBAYBLgAGA//8FgI6BNYGZgDmAZIAFgKlCNYEBgASAaIA5gAOAOYDyQSKACYADgFWBToDHgAmAOYD//wKAA4CpQgmACIATQwiAWoA8gf//DoDYQQ6Av4ATQ5CAtEEkgBiAXoD1gIuAE0MkgN+AMUPyQqJCDoBXQudCo4CiQv//V0IxQypDMUOZQldC40EdgO9CtEEKgHhDCoGcgU1Dt4B1gJNCnIECgXhDHYDPQthCcUL//zOAgIGyAxND0EG4AyiA//+kAyiA94CkAyZCJkO4A69CjAP4gO5CfEImQ1dD0gOGQ///dIEXgAKAA4AXgAKADoAYgA6A44ABgF2ABoA1gASAAYABgNNBWICoA+QD1gOHQ9wD1gPgA7wDuwP//9sD1gPbA/pCzwPQA8IDugPkA9wD2oCNA1lCWUIegAxCiQMjgFlC//82Q8NCY0M2QzBDQYBBgCiAjQMrQuYD6APnA+cD5wPnA+cD5wPnA+cD5wPnA///5wPnA+cD5wPnA+gD5wMDgNpCcIF6geJBBYAgQwmAM4E8gD1C7UE9QopCPUKagM2A//+CQ91CLIGXA3ZDcUOaQktDlwMZgBdDCIDeQnZDdkN2Q0hDv0LzQf//lwN2QzBCuAPmQttBYUJ8Qv//hkPbQeZCuANyQqEDdoBWQjBC7kImQ7gDdoBpQ+ADs0L//9cDgEPXA9ADaUPHA9cDJYHQA7IDuANHQ50DzwPlA9cDnQPXA88D0ANpQ9AD3AM2Q9ADJYHQA88D1wPQA9ADxgPHAxdD4AP//3BCygO8A7sDl4CqA8oD2EG8A0iAlAO8A7wDygO8A60DZkP//8oDUkOoA+AD4APcA9YD3APkAxxC3AP//9wD4APfA+AD3wPcA9cD+kLkA9wDVYEYgBNDGkP5QhmAnAMaQxpDpoG1AxNDhUOFQ4VDdIBrQmtC0AP//zeAwQNxQvdCswPwQcMDFkO3QnFCwwOUgHlDeUNHQ///YoBxQtcDswNpQ9cDsYD//8UDmgPXA3FDaUMlgdADhEPQA7IDxwNuQ7QDXkPgA9cDBoAJgRyAHIABgByACYEAgBOAAYD//xyAPIA8gCqACYAAgAGACYEUgCWB1wPQA8UDwwOnA9cDHIHFA///0APQA9AD0APQA15DnQMcgeAD0AMFgFJCnoBOgPWAOYBSQiGABYAOgP//eYDOgJ6ANoAJgDaAAYA5QzWBAYAvQgSADoDcQf//6UH9gIWAhYA5QgSAQoFCgfmAMoBSgICAlkLcQfiAnQOkQupBekM1QnxDTUIagD6BnQOtQlNDqUJeQn+AuUH//8QDfkOngBNDh0IVQgWAFUITQw+ALYECgP//h0KHQodCqYE2gS2BOoATQ+CAGkK0A1tDEEK0A91CnQN6Q///m0K0A9VCngOSQheAm0LVQltD0AO0A7CAS0PCQcWA40G3QUtDiYEMgAyA40HCQYRCPID///mAt0EpgUtDykK5QZ0DfkPQQvRCHkOdA59CTUIagPRCfkN+Q35D//8eQ4FCXkKdA3xD5gPnA///4wPnA+UD5wPnA+cD5wPnA+UD5wPnA+cD5QPnA+cD6APnAyeAT0IngDSABYA2gAOAPoA0gCWA8kECgAKA//9SgDaAC4AGgE9CPoBIgIxClIFJgQKBAoHfQv//+oAEgDyBRYHiQeJBioGzgAaADIDfQjNCg4BLQ4OAB4ATQ0KBMoDKQn+AOYFLQwGBCEL//76APIABgcJBmgOEQv//nQPqQeBBU0MZgJ0DqULqQepBnQNeQk1DJUMlQxuAK4DgQcQDnQO+gEtDBID//+NBC4ARQ8pC+YDCQUtDKYGEQrdBN0LFgMJBCEJLQzSArIAZQwiAlIAFgMWAMkKUgBqA//92QueAdkLXQd+AlID8gF2A0UIJgLCAeEP//wWAE0MCgUZDE0MKgdRBeEMCgXFCVoDUQR2ACoEYQrIDSEMDgMFCA4BBgJ6AQYDIQWaAioAogBmAZoBOgRKBEoH//3OAC4B0QgmAsIB0QwKBMYBGQwKBFUPHQv//ykFGQ22Bz0IngBCAHID6gMpBdENIQwWAGUO/gPyAioGPgBlDBYDFgDmAMkIRgL1BMkJvgWGACYD//35DgELpgJcDD4D//5cDmoGXA0hDF4ArQpcD1kEXQ8tB1kF2gCtCbULBA5cDAIDEgA6AAIBfgAmAxIAOgA+ACYCWgBSAHIAXgBeAAoAJgP//WIFjgLCAdEPPQopCgkJ3gXRDqoDKQf//bICMQgxDz0LPQjGAAoEbgHBDDEMBgNtBKIA3gEKAAYDpgAKAKIABgAWAAoCagHmAW4AYgP//D4B8gQOABIC9gA6AD4ABgA+AbYAPgA+AAYCWgACALIAsgCGAAIABgA+ACYH//6SBlwO/QkqBlwN3QnRDQkP//ytClwNtQnZDnoAPgKSBbUIXQ8EDdkNhgBlDCIBdgIBCCIAZQyaA//9hgBJD34C9QSSADoCUgPSAGoB+Q9RC//8QgNOAzIAsgQKAfkIEgKOAHIDCQaOABIDCQSyBLYACgASAG0MkQi2ACoD//wGAwkECgBVCDIBKgC+AIEJigA6BeoCjgC+AB4AtgBtDGUIAgN+AEYARgECAAIDfgAGAAIAGgKyAAYAygAOAAYAKgP//A4B/gXOARIDaQv//AoCKQkSA0kHcQUeAI4HaQgaAaoGvgBOAR4ATgN+AWUOBQgKAIEJKgEqATIAvgC6BDIBKgP//uEEHgAaBHYDMgASAL4AEgCBCBoD8gEhDD0MPQxCAfkJCQ7qAfkL//4CAD0MHQw9DvEJ+QrtBIoAtgI2BAoBmgAaAA4AxgASARoAHgACABIAxgAeA//8BgAGAAIAAgAeAxIAdgE5CzgMWgP//sQNOQrEDmQNngNFCsQOOQn5DyUIngFhCDkMGQ84DmQNOQrEDI4D//7EDTkKxA5kDO4HRQrEDjkJ+Q5CAPYBYQg5DTkPOA5kDRYDUQoNCY4EUgWOB1EL//2OBA4DKQXpCg0KDQiVCFIGLgAOADoAdgROAF4ALgAqA+EL//zZDqkKugFKBNkOcgWBCCoEYgJuAaIGSgY8D+EJGgLED0UJOQnhDC0KxAzJC//+1QbEDyUJ+Q05DBkNGgKuBa4DOA5kDw0LFA8UDtgNgQ7YDwwMhQp8D//+zQcUDwwO2A5wDhkOGQyFCxoCGQzCAmwP4Qf//fEOqgZsDUENLgANCmwPMQllDaoBOgLRBUkJ8QsQDfEOrgbED//+aQnhDI4CuA9yADkM8QpgDBkOZA5kDeEOogI5CMkLOA64Di4GbA2KBMIB8Q1JCmwPbQf//YoGbA8NCFUMxQhuAA0JIQk6AxAN8QxOASEPFQiVC4UEbgUhDgIC7QQyAIoDFQsVCxULFQiOBPIH//0hDxULEQtYDOEP//9YDV0PWA9YDekPDA9YDowPOA5YDwwNqgS1DwQPlA9YDN4D//6+ASoAEgG+Av0EvgC+ABIBxgdGAX4EMgAKAU4B0gC+ABIBHgP//34DNA84D1gOWA9YDwwPOA1xD1gPWA80D1gPOA5YDlgN6Q+UD1gPlA+cD4wPkA+cD4QPnA+cD4wPmA+cD///nA+YD5gPgA+MD5gPoA+cDgIBIQzOBwYDFQt2AB0PSQbqAI4FIQ///fkIzgG+BOIATgCOBmAMHQ///xQOCQ/BCxQO6QsUDtgPbgFRDxQOGQ7MDhkMHQ5yA6EJOQ+IDxQPkA+cD5wPnA+YD5wPnA+ED5gP//+cD5wPnA+cD5wPmA+YD4APoA+cDX4F+QzFCDYF+QzFCU0MVQ///4kF+QyZCVUNMgBGAX4EmQuNCtQNVQ///sQMLQmeAmQMLgLEDmQNYQg5DsQPRQnpDJkNTQ2eAq4GaQs4DmQPmA+gD///gA+cD4wPnA+cD5gPnA+cD5QPmA+YD5wPlA+YD5wPoA+cD5gPnA+cD5wPlA+UD5wPnA+cD5QPnA+cD5wPnA+cD5gPmA+YD///kAyyACYACgP//bYECgHdCdoBfgASAu0FIgAiBAIEIgS6AB4AsgBZDHELfA+UD5APfA+UD4gPlA+UD2APiA+UD4gPkA+ID///iA+ID5APnA+UD5gPnA+cD5wPlA+UD5wPnA+cD5QPnA+cD5wPnA+cD5gPmA+YD///kA1qAykJmgD+AeEKKgBlCD0L//4qAykIJgLVBCoAFgFqAroATgU5DeEIMgCiBAYAHgHyAEIABgHyAAoAZgOCADIBOgP//AYAMgCSAJIAogSCAc4A2QxiBIYGqQgaA8EICgd2AroD1Qv//7EHsQZyBE4BugB+AjwPwQttBxANZQ8xCFUMVQ5sDo0L1gF2B//8VQ1lDzEJLgFJCo0IxQsQDfEMdgAdCL4AcgKmBHYBEgc+AAYAcgKmBL4CHgP//AYAdgEGAV4DAQqmBB4B3QlKAVYB2gC6AH4EqgD2ALoC7Qf//CIEIgYOAB4ACgASABYAEgOYD6AP//+AD5wPjA+cD5wPmA+cD5wPlA+YD5gPnA+UD5gPnA+gD5wM/gHdCy0EIgUaACIF3Qv//g4AAgAuAwUHLQW2BCIF+gH6AE4B3QgiBRYDUQmuAQoCDQkWAJUIlQv//kIDMQgmAY4HagAKAEoBngB2BVUODQgCAo4ATgBOAK4AMgN2AB4AMgP//BIATgDyAPIAsgACADIABgCiBfID//35D40ImQuNCoIF+QxmBoEK5gFND40IVQxxDHEMNgROAHoC1A35D3wPlA+UD5AP//+QD5QPiA+QD2APlA+UD5QPlA+UD5APkA98D5QPfAwKAGUKVgF2AcoAfgBlCAoBDgAOADIBygL2AGIGQgAWA//8FgBlCGIERgIiB//8BgDOBEYCsgOeAAoBZgIiBAYB3gDCAMIADgB2AVoBLQiqBAoCVgVCAM4BQgCaAlYEPgCSAC4D//zyAbIBsgE2AG4ABgAGArkLrgP//sQMgQ+9CsQOqgbEDeEMnQ9FCsQMnQ3NDeENTQ4+AMULIQt4DsQOggX5DC4D//35D2EF+QxxDoIEcQ35DjoEcQ3VCHENfgTFCFUPVA35DLIB3QiyA//8cQgKAbYEIgQGAP4C7QQGArYAEgAGALoA/gH6Ad0LLQbRCqQN6Qm1CqQODQosDiwOJgQ5DqQP//44DQUMtgCxC7EJpQ90DpgNegBNDzUIqQhqBtUETQ///KkIFgNFBxULNQs1CeEKqgbmABIATQ81CAoAAgASAAYALgASA//8EgAKAAIATgAeAEYACgACAAIAEgAeAxIAcgLRCpgNkQ9RCekJAQyyAjgNBQ/RCqQMJQ44DQUNBQwdDaUM1Q8oD///lA+cD5wPmA+AD5gPmA+YD5gPkA+cD5wPnA+cD5wPmA+YD5QPnA///YkPgA2JD///WA5YD1gPOAwVDtQPWA7UDzgPgQX5DmQOZA8MD4APWA0WA1EJFgAKAg0K8gBtC1UEGgLaAg0K8gFmB//9CgEWAvIDygFVDg0JOgP//RkJGQgaAeIHuQkuA9UEvgFJCoUKhQqFCO0IvgXiAB4DmQgmBtIA8Q7NCs0IPgLNCNkMKgLNC//8dQjxDNkM8Q6pCs0LMQSiAPEPHQf///UK9QXmBHUKugP1CGIB5gXOAvkLHQWBCakIdQhGACoFugKID/UIagI6BqoCqgCuAaoBEgQKA0YABgJSA6YDhgEeBcYFMgP//AYCOgQKAuIA6Q85Bf4ELgLGAQEPOQX+BsYACQ85BuEJwQmZCEoAXgWGB//+4gK6APEMhgf//x0GcgTxDB4ERgAqAJUIbQrNCUoESgeyApoEKgTxDs0JLgJ1COIE4gVaB0YDuQkuAOIEGgFaBOIHrQUZC9UH//wuAB4DuQkZC4gPmA+UD5QP//+UD5gPhA+UD3QPmA+YD5gPmA+YD5QPlA+ED5gPdAyqBPEMAQ7NCD4AbQjZDGYCzQv//akI8QzZDPEOqQmpCpoFHgBiAeYEAgIiAEoANgBWAAIAAgACACYABgEyADYAcgDGAHIABgP//AYD9gA+ABIDuQjiBOIFGQgGA7kIRgfmAdIDuQvmA60FGQqWB//8LgL6A7kKhQjiAp0JYgAWAVYGdgORBB4AggCCAn4G9gJWB//8EgFiAnYA2gKdC70EhgFFCkICQgBCBWYBRQouAkIADgBCBi4BNgaOBFIEFgP//WYCrQvRBBIDEgACAA4BMgASAIoAvgAKAB4BmgP//HYABgACABIAJgBWAxIAvgCmAW0KQgEGASIEAgAWAloADgP///kGQgJOBoIBmgEGAkIBygFtCoIAEgMSAB4AEgAeACoD//xWAB4AHgGaAB4AAgAGAAIAHgAqACoCNgACAAYDdgCCAE4AfgAGAKIEHgBOA//8CgCCAUYA8gDyAAYAMgAGAH4F8gByAB0IBgP//0YAtgE2BAoABgBKACoEvgLiALYAvgB2AQYARgMBCTYE3gKtCD4H//xCB7oCrQjeAmoADgBaAN4EEQu6A9oCfgHmAN4CrQvRB///dgACADIAggAuA3YABgAGAAYBEgBOAAYAggAuAC4ABgAeAKIF8gLmAekOqgaqBT0M6gX5DHEOqgTFCfkN/QguA40LjQv//MYExQrUDT0OzgAxDBYD//wxDhoAfQm5ChoBegQxDCIAfQlSBGoGLgIuAoIF3Q8VCg4AIQ///BYAIQwGAs0JoQtWAGUIIQwyBGUIvgXeBiIBYgb5BCEMAQx2AB0IvgByATYEdgAqByID//0GAB0IBgLiAPoABgAOAHIBrgMBCqYHGA+ID1QPVA+ID1QPiA98D1QPbA+ID3wP//98D3wPUA9QD3APmA+ID\";\n\n            ExpectedDecompressedBytes    = Convert.FromBase64String(polyPhenBase64);\n            NumExpectedUncompressedBytes = ExpectedDecompressedBytes.Length;\n        }\n\n        private static MemoryStream GetBlockStream(ICompressionAlgorithm compressionAlgorithm, byte[] bytes, int numBytes, out int copyLength)\n        {\n            var ms = new MemoryStream();\n            var writeBlock = new Block(compressionAlgorithm);\n\n            copyLength = writeBlock.CopyTo(bytes, 0, numBytes);\n            writeBlock.Write(ms);\n            writeBlock.WriteEof(ms);\n\n            return ms;\n        }\n\n        [Fact]\n        public void QuickLzBlock()\n        {\n            var ms = GetBlockStream(Zstd, ExpectedDecompressedBytes, NumExpectedUncompressedBytes, out var copyLength);\n            ms.Seek(0, SeekOrigin.Begin);\n\n            var readBlock = new Block(Zstd);\n            readBlock.Read(ms);\n\n            var observedDecompressedBytes = new byte[NumExpectedUncompressedBytes];\n            var numObservedDecompressedBytes = readBlock.CopyFrom(observedDecompressedBytes, 0, copyLength);\n\n            Assert.False(readBlock.IsFull);\n            Assert.False(readBlock.HasMoreData);\n            Assert.Equal(NumExpectedUncompressedBytes, numObservedDecompressedBytes);\n            Assert.Equal(ExpectedDecompressedBytes, observedDecompressedBytes);\n        }\n\n        [Fact]\n        public void BlockZeroLengthCopy()\n        {\n            var writeBlock = new Block(Zstd);\n            const int expectedCopyLength = 0;\n\n            Assert.Equal(expectedCopyLength, writeBlock.CopyTo(ExpectedDecompressedBytes, 0, 0));\n            Assert.Equal(expectedCopyLength, writeBlock.CopyFrom(ExpectedDecompressedBytes, 0, 0));\n        }\n\n        [Fact]\n        public void BlockWrongSize()\n        {\n            var ms = GetBlockStream(Zstd, ExpectedDecompressedBytes, NumExpectedUncompressedBytes, out _);\n            ms.Seek(0, SeekOrigin.Begin);\n\n            var readBlock = new Block(Zstd);\n\n            using (var updatedMs = new MemoryStream())\n            {\n                updatedMs.Write(ms.ToArray(), 0, (int)ms.Length);\n                updatedMs.Seek(0, SeekOrigin.Begin);\n\n                // read the original header\n                var header = new BlockHeader();\n                header.Read(updatedMs);\n\n                // change and write the header\n                header.NumUncompressedBytes--;\n                updatedMs.Seek(0, SeekOrigin.Begin);\n                header.Write(updatedMs);\n\n                // read the updated block\n                updatedMs.Seek(0, SeekOrigin.Begin);\n                // ReSharper disable once AccessToDisposedClosure\n                Assert.Throws<CompressionException>(delegate { readBlock.Read(updatedMs); });\n            }\n        }\n\n        [Fact]\n        public void BlockTruncation()\n        {\n            var ms = GetBlockStream(Zstd, ExpectedDecompressedBytes, NumExpectedUncompressedBytes, out _);\n            ms.Seek(0, SeekOrigin.Begin);\n\n            var readBlock = new Block(Zstd);\n\n            using (var truncatedMs = new MemoryStream())\n            {\n                truncatedMs.Write(ms.ToArray(), 0, (int)ms.Length - 100);\n                truncatedMs.Seek(0, SeekOrigin.Begin);\n                // ReSharper disable once AccessToDisposedClosure\n                Assert.Throws<IOException>(delegate { readBlock.Read(truncatedMs); });\n            }\n        }\n\n        [Fact]\n        public void UncompressedBlockTruncation()\n        {\n            const int bufferSize = 10000;\n            var buffer = BlockStreamTests.GetRandomBytes(bufferSize);\n\n            var ms = GetBlockStream(Zstd, buffer, bufferSize, out _);\n            ms.Seek(0, SeekOrigin.Begin);\n\n            var readBlock = new Block(Zstd);\n\n            using (var truncatedMs = new MemoryStream())\n            {\n                truncatedMs.Write(ms.ToArray(), 0, (int)ms.Length - 100);\n                truncatedMs.Seek(0, SeekOrigin.Begin);\n                // ReSharper disable once AccessToDisposedClosure\n                Assert.Throws<IOException>(delegate { readBlock.Read(truncatedMs); });\n            }\n        }\n    }\n}\n"
  },
  {
    "path": "UnitTests/Compression/FileHandling/BgzipTextWriterTests.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing System.IO.Compression;\r\nusing Compression.FileHandling;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Compression.FileHandling\r\n{\r\n    public sealed class BgzipTextWriterTests\r\n    {\r\n        [Fact]\r\n        public void BgzipTextWriter_EndToEnd()\r\n        {\r\n            var asterisks         = new string('*', BlockGZipStream.BlockGZipFormatCommon.BlockSize);\r\n            var observedLines     = new List<string>();\r\n            var observedPositions = new List<long>();\r\n\r\n            using (var ms = new MemoryStream())\r\n            {\r\n                using (var stream = new BlockGZipStream(ms, CompressionMode.Compress, true, 1))\r\n                using (var writer = new BgzipTextWriter(stream))\r\n                {\r\n                    writer.Flush();\r\n                    writer.WriteLine(\"BOB\");\r\n                    writer.WriteLine();\r\n                    writer.Flush();\r\n                    writer.Write(\"AB\");\r\n                    writer.Write(\"\");\r\n                    writer.Write(\"C\");\r\n                    writer.Write(\" \");\r\n                    writer.WriteLine(\"123\");\r\n                    writer.WriteLine(asterisks);\r\n                    writer.WriteLine(asterisks);\r\n                    writer.WriteLine(asterisks);\r\n                }\r\n\r\n                ms.Position = 0;\r\n\r\n                using (var reader = new BgzipTextReader(new BlockGZipStream(ms, CompressionMode.Decompress)))\r\n                {\r\n                    while (true)\r\n                    {\r\n                        string line = reader.ReadLine();\r\n                        observedPositions.Add(reader.Position);\r\n                        if (line == null) break;\r\n                        observedLines.Add(line);\r\n                    }\r\n                }\r\n            }\r\n\r\n            Assert.Equal(6,         observedLines.Count);\r\n            Assert.Equal(\"BOB\",     observedLines[0]);\r\n            Assert.Equal(0,         observedLines[1].Length);\r\n            Assert.Equal(\"ABC 123\", observedLines[2]);\r\n            Assert.Equal(asterisks, observedLines[3]);\r\n            Assert.Equal(2162687,   observedPositions[0]);\r\n            Assert.Equal(2162688,   observedPositions[1]);\r\n            Assert.Equal(2162696,   observedPositions[2]);\r\n            Assert.Equal(45678601,  observedPositions[3]);\r\n            Assert.Equal(88932362,  observedPositions[4]);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/Compression/FileHandling/BlockGZipStreamTests.cs",
    "content": "﻿using System;\nusing System.IO;\nusing System.IO.Compression;\nusing System.Text;\nusing Compression.FileHandling;\nusing ErrorHandling.Exceptions;\nusing IO;\nusing UnitTests.TestUtilities;\nusing Xunit;\n\nnamespace UnitTests.Compression.FileHandling\n{\n    public sealed class BlockGZipStreamTests\n    {\n        #region members\n\n        private readonly byte[] _expectedDecompressedBuffer;\n\n        #endregion\n\n        public BlockGZipStreamTests()\n        {\n            // TODO: Fix fragile constructor\n            _expectedDecompressedBuffer = GrabBytes(ResourceUtilities.GetReadStream(Resources.TopPath(\"HelloWorld_original.dat\")));\n        }\n\n        [Fact]\n\t\tpublic void FileIO()\n        {\n            var observedDecompressedBuffer = new byte[_expectedDecompressedBuffer.Length];\n            string randomPath = RandomPath.GetRandomPath();\n\n            // compress the data\n            long observedPosition;\n\n            using (var writer = new BlockGZipStream(FileUtilities.GetCreateStream(randomPath), CompressionMode.Compress, false, 1))\n            {\n                writer.Write(_expectedDecompressedBuffer, 0, _expectedDecompressedBuffer.Length);\n                observedPosition = writer.Position;\n\n                var exception = Record.Exception(() =>\n                {\n                    var buffer = new byte[10];\n                    // ReSharper disable once AccessToDisposedClosure\n                    writer.Read(buffer, 0, 1);\n                });\n\n                Assert.NotNull(exception);\n                Assert.IsType<CompressionException>(exception);\n            }\n\n            const long expectedPosition = 979042574;\n            Assert.Equal(expectedPosition, observedPosition);\n\n            // decompress the data\n            using (var reader = new BlockGZipStream(FileUtilities.GetReadStream(randomPath), CompressionMode.Decompress))\n            {\n                reader.Read(observedDecompressedBuffer, 0, _expectedDecompressedBuffer.Length);\n\n                var exception = Record.Exception(() =>\n                {\n                    var buffer = new byte[10];\n                    // ReSharper disable once AccessToDisposedClosure\n                    reader.Write(buffer, 0, 1);\n                });\n\n                Assert.NotNull(exception);\n                Assert.IsType<CompressionException>(exception);\n            }\n\n            Assert.Equal(_expectedDecompressedBuffer, observedDecompressedBuffer);\n        }\n\n        [Fact]\n        public void InvalidHeader()\n        {\n            const string dummyString = \"The quick brown fox jumped over the lazy dog.\";\n\n            using (var ms          = new MemoryStream())\n            using (var truncatedMs = new MemoryStream())\n            {\n                using (var writer = new StreamWriter(ms, Encoding.ASCII, 4096, true))\n                {\n                    writer.WriteLine(dummyString);\n                }\n\n                var observedCompressedBuffer = ms.ToArray();\n                truncatedMs.Write(ms.ToArray(), 0, 17);\n\n                ms.Seek(0, SeekOrigin.Begin);\n                truncatedMs.Seek(0, SeekOrigin.Begin);\n\n                // attempt to decompress the data\n                Assert.Throws<CompressionException>(delegate\n                {\n                    using (var reader = new BlockGZipStream(ms, CompressionMode.Decompress, true))\n                    {\n                        reader.Read(observedCompressedBuffer, 0, observedCompressedBuffer.Length);\n                    }\n                });\n\n                Assert.Throws<CompressionException>(delegate\n                {\n                    using (var reader = new BlockGZipStream(truncatedMs, CompressionMode.Decompress, true))\n                    {\n                        reader.Read(observedCompressedBuffer, 0, observedCompressedBuffer.Length);\n                    }\n                });\n            }\n        }\n\n        [Fact]\n        public void NullStream()\n        {\n            Assert.Throws<ArgumentNullException>(delegate\n            {\n                using (new BlockGZipStream(null, CompressionMode.Decompress))\n                {\n                }\n            });\n        }\n\n        [Fact]\n        public void NotImplementedMethods()\n        {\n            using (var ms = new MemoryStream())\n            {\n                // ReSharper disable AccessToDisposedClosure\n                using (var writer = new BlockGZipStream(ms, CompressionMode.Compress, true))\n                {\n                    Assert.Throws<NotSupportedException>(delegate\n                    {\n                        // ReSharper disable once UnusedVariable\n                        long len = writer.Length;\n                    });\n\n                    Assert.Throws<NotSupportedException>(delegate { writer.SetLength(10); });\n\n                    Assert.Throws<NotSupportedException>(delegate { writer.Seek(0, SeekOrigin.Begin); });\n                }\n                // ReSharper restore AccessToDisposedClosure\n            }\n        }\n\n        [Fact]\n        public void StreamIO()\n        {\n            byte[] observedCompressedBuffer;\n            var observedDecompressedBuffer = new byte[_expectedDecompressedBuffer.Length];\n\n            using (var ms = new MemoryStream())\n            {\n                // compress the data\n                using (var writer = new BlockGZipStream(ms, CompressionMode.Compress, true, 9))\n                {\n                    Assert.Throws<CompressionException>(delegate\n                    {\n                        // ReSharper disable once AccessToDisposedClosure\n                        writer.Read(observedDecompressedBuffer, 0, 1);\n                    });\n\n                    Assert.True(writer.CanWrite);\n                    Assert.False(writer.CanRead);\n                    Assert.False(writer.CanSeek);\n\n                    writer.Write(_expectedDecompressedBuffer, 0, _expectedDecompressedBuffer.Length);\n                }\n\n                observedCompressedBuffer = ms.ToArray();\n                ms.Seek(0, SeekOrigin.Begin);\n\n                // decompress the data\n                using (var reader = new BlockGZipStream(ms, CompressionMode.Decompress))\n                {\n                    Assert.Throws<CompressionException>(delegate\n                    {\n                        // ReSharper disable once AccessToDisposedClosure\n                        reader.Write(_expectedDecompressedBuffer, 0, 1);\n                    });\n\n                    Assert.False(reader.CanWrite);\n                    Assert.True(reader.CanRead);\n                    Assert.True(reader.CanSeek);\n\n                    reader.Read(observedDecompressedBuffer, 0, _expectedDecompressedBuffer.Length);\n                }\n            }\n\n            Assert.Equal(_expectedDecompressedBuffer, observedDecompressedBuffer);\n            Assert.Equal(9629, observedCompressedBuffer.Length);\n        }\n\n        [Fact]\n        public void StreamTypeMismatch()\n        {\n            string randomPath = RandomPath.GetRandomPath();\n\n            using (var writeStream = new FileStream(randomPath, FileMode.Create, FileAccess.Write))\n            {\n                Assert.Throws<CompressionException>(delegate\n                {\n                    // ReSharper disable once AccessToDisposedClosure\n                    using (new BlockGZipStream(writeStream, CompressionMode.Decompress))\n                    {\n                    }\n                });\n            }\n\n            using (var readStream = FileUtilities.GetReadStream(randomPath))\n            {\n                Assert.Throws<CompressionException>(delegate\n                {\n                    // ReSharper disable once AccessToDisposedClosure\n                    using (new BlockGZipStream(readStream, CompressionMode.Compress))\n                    {\n                    }\n                });\n            }\n        }\n\n        [Theory]\n        [InlineData(650*1024)]\n        [InlineData(65*1024)]\n        [InlineData(1024)]\n        public void VariableDataLength(int numBytesToBeWritten)\n        {\n            using (var ms = new MemoryStream())\n            {\n                // compress our data\n                using (var writer = new StreamWriter(new BlockGZipStream(ms, CompressionMode.Compress, true)))\n                {\n                    var currentIndex = 1;\n                    var numBytes     = 0;\n\n                    while (true)\n                    {\n                        string s = $\"Hello World {currentIndex}\";\n                        writer.WriteLine(s);\n                        currentIndex++;\n                        numBytes += s.Length;\n                        if (numBytes > numBytesToBeWritten) break;\n                    }\n                }\n\n                ms.Seek(0, SeekOrigin.Begin);\n\n                // decompress our data\n                using (var reader = FileUtilities.GetStreamReader(new BlockGZipStream(ms, CompressionMode.Decompress)))\n                {\n                    var index = 1;\n\n                    while (true)\n                    {\n                        string expected = $\"Hello World {index}\";\n                        index++;\n\n                        string observed = reader.ReadLine();\n                        if (observed == null) break;\n                        Assert.Equal(expected, observed);\n                    }\n                }\n            }\n        }\n\n        [Fact]\n        public void EndOfFile()\n        {\n            using (var ms = new MemoryStream())\n            {\n                var writeBuffer = ByteUtilities.GetRandomBytes(100);\n                var readBuffer = new byte[60];\n\n                using (var bgzipStream = new BlockGZipStream(ms, CompressionMode.Compress, true))\n                {\n                    bgzipStream.Write(writeBuffer, 0, writeBuffer.Length);\n                }\n\n                ms.Position = 0;\n\n                using (var bgzipStream = new BlockGZipStream(ms, CompressionMode.Decompress))\n                {\n                    int numBytesRead = bgzipStream.Read(readBuffer, 0, 0);\n                    Assert.Equal(0, numBytesRead);\n\n                    numBytesRead = bgzipStream.Read(readBuffer, 0, readBuffer.Length);\n                    Assert.Equal(readBuffer.Length, numBytesRead);\n\n                    numBytesRead = bgzipStream.Read(readBuffer, 0, readBuffer.Length);\n                    Assert.Equal(writeBuffer.Length - readBuffer.Length, numBytesRead);\n\n                    numBytesRead = bgzipStream.Read(readBuffer, 0, readBuffer.Length);\n                    Assert.Equal(0, numBytesRead);\n                }\n            }\n        }\n\n        [Fact]\n        public void ReadBlockCorrupted()\n        {\n            using (var ms          = new MemoryStream())\n            using (var truncatedMs = new MemoryStream())\n            using (var corruptMs   = new MemoryStream())\n            {\n                using (var bgzipStream = new BlockGZipStream(ms, CompressionMode.Compress, true))\n                using (var writer      = new StreamWriter(bgzipStream, Encoding.ASCII, 4096))\n                {\n                    writer.WriteLine(\"The quick brown fox jumped over the lazy dog.\");\n                }\n\n                var compressedData = ms.ToArray();\n\n                truncatedMs.Write(compressedData, 0, compressedData.Length - 10);\n                truncatedMs.Position = 0;\n\n                corruptMs.Write(compressedData, 0, BlockGZipStream.BlockGZipFormatCommon.BlockHeaderLength);\n                corruptMs.Write(_expectedDecompressedBuffer, 0, _expectedDecompressedBuffer.Length);\n                corruptMs.Position = 0;\n\n                var readBuffer = new byte[60];\n\n                Assert.Throws<CompressionException>(delegate\n                {\n                    using (var bgzipStream = new BlockGZipStream(truncatedMs, CompressionMode.Decompress))\n                    {\n                        bgzipStream.Read(readBuffer, 0, readBuffer.Length);\n                    }\n                });\n\n                Assert.Throws<CompressionException>(delegate\n                {\n                    using (var bgzipStream = new BlockGZipStream(corruptMs, CompressionMode.Decompress))\n                    {\n                        bgzipStream.Read(readBuffer, 0, readBuffer.Length);\n                    }\n                });\n            }\n        }\n\n        [Fact]\n        public void DoubleDispose()\n        {\n            using (var ms = new MemoryStream())\n            {\n                var bgzipStream = new BlockGZipStream(ms, CompressionMode.Compress);\n                bgzipStream.Dispose();\n                bgzipStream.Dispose();\n            }\n        }\n\n        private static byte[] GrabBytes(Stream s)\n        {\n            byte[] buffer;\n\n            using (var ms = new MemoryStream())\n            {\n                s.CopyTo(ms);\n                buffer = ms.ToArray();\n            }\n\n            return buffer;\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/Compression/FileHandling/BlockHeaderTests.cs",
    "content": "﻿using System.IO;\nusing Compression.FileHandling;\nusing ErrorHandling.Exceptions;\nusing Xunit;\n\nnamespace UnitTests.Compression.FileHandling\n{\n    public sealed class BlockHeaderTests\n    {\n        [Fact]\n        public void ReadAndWrite()\n        {\n            const int expectedNumUncompressedBytes = 100;\n            const int expectedNumCompressedBytes   = 50;\n\n            var header = new BlockHeader\n            {\n                NumUncompressedBytes = expectedNumUncompressedBytes,\n                NumCompressedBytes   = expectedNumCompressedBytes\n            };\n\n            using (var ms = new MemoryStream())\n            {\n                header.Write(ms);\n\n                ms.Seek(0, SeekOrigin.Begin);\n\n                header.NumUncompressedBytes = -1;\n                header.NumCompressedBytes   = -1;\n\n                header.Read(ms);\n            }\n\n            Assert.Equal(expectedNumUncompressedBytes, header.NumUncompressedBytes);\n            Assert.Equal(expectedNumCompressedBytes, header.NumCompressedBytes);\n        }\n\n        [Fact]\n        public void SizeMismatch()\n        {\n            using (var ms = new MemoryStream())\n            {\n                var array = new byte[10];\n                ms.Write(array, 0, array.Length);\n\n                ms.Seek(0, SeekOrigin.Begin);\n\n                var header = new BlockHeader();\n                // ReSharper disable once AccessToDisposedClosure\n                Assert.Throws<IOException>(delegate { header.Read(ms); });\n            }\n        }\n\n        [Fact]\n        public void WrongHeaderId()\n        {\n            using (var ms = new MemoryStream())\n            {\n                var array = new byte[12];\n                ms.Write(array, 0, array.Length);\n\n                ms.Seek(0, SeekOrigin.Begin);\n\n                var header = new BlockHeader();\n                // ReSharper disable once AccessToDisposedClosure\n                Assert.Throws<CompressionException>(delegate { header.Read(ms); });\n            }\n        }\n    }\n}\n"
  },
  {
    "path": "UnitTests/Compression/FileHandling/BlockStreamTests.cs",
    "content": "﻿using System;\nusing System.IO;\nusing System.IO.Compression;\nusing System.Linq;\nusing System.Security.Cryptography;\nusing System.Text;\nusing Compression.Algorithms;\nusing Compression.DataStructures;\nusing Compression.FileHandling;\nusing ErrorHandling.Exceptions;\nusing Genome;\nusing IO;\nusing UnitTests.TestUtilities;\nusing VariantAnnotation.Interface.AnnotatedPositions;\nusing VariantAnnotation.IO.Caches;\nusing Xunit;\n\nnamespace UnitTests.Compression.FileHandling\n{\n    public sealed class BlockStreamTests\n    {\n        private const long NumTicks                   = 3;\n        private const GenomeAssembly ExpectedAssembly = GenomeAssembly.hg19;\n        private const string SmallString              = \"Testing 123\";\n        private const string FinalString              = \"Squeamish Ossifrage\";\n\n        private static readonly Random Random  = new Random(10);\n        private static readonly Zstandard Zstd = new Zstandard(1);\n\n        [Fact]\n        public void BlockStream_EndToEnd()\n        {\n            string expectedString = GetRandomString(Block.DefaultSize + 10000);\n\n            var customHeader = new DemoCustomHeader(-1, -1);\n            var header = new DemoHeader(CacheConstants.Identifier, CacheConstants.SchemaVersion,\n                CacheConstants.DataVersion, Source.Ensembl, NumTicks, ExpectedAssembly, customHeader);\n\n            using (var ms = new MemoryStream())\n            {                \n                WriteBlockStream(Zstd, header, customHeader, ms, expectedString);\n                ms.Position = 0;\n                ReadFromBlockStream(Zstd, ms, expectedString);\n            }\n        }\n\n        private static string GetRandomString(int length)\n        {\n            const string chars = \" !\\\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\";\n            return new string(Enumerable.Repeat(chars, length).Select(s => s[Random.Next(s.Length)]).ToArray());\n        }\n\n        public static byte[] GetRandomBytes(int numBytes)\n        {\n            var buffer = new byte[numBytes];\n            using (var csp = RandomNumberGenerator.Create()) csp.GetBytes(buffer);\n            return buffer;\n        }\n\n        // ReSharper disable once UnusedParameter.Local\n        private static void ReadFromBlockStream(ICompressionAlgorithm compressionAlgorithm, Stream ms, string expectedRandomString)\n        {\n            // grab the header\n            var header = DemoHeader.Read(ms);\n            Assert.Equal(ExpectedAssembly, header.Assembly);\n\n            using (var blockStream = new BlockStream(compressionAlgorithm, ms, CompressionMode.Decompress))\n            using (var reader      = new ExtendedBinaryReader(blockStream))\n            {\n                CheckWriteException(blockStream);\n\n                // sequential string check\n                CheckString(reader, expectedRandomString);\n                CheckString(reader, SmallString);\n                CheckString(reader, FinalString);\n\n                // random access string check\n                blockStream.SetBlockPosition(header.Custom.FileOffset, header.Custom.InternalOffset);\n                //reader.Reset();\n\n                CheckString(reader, SmallString);\n            }\n        }\n\n        // ReSharper disable once UnusedParameter.Local\n        // ReSharper disable once ParameterOnlyUsedForPreconditionCheck.Local\n        private static void CheckString(ExtendedBinaryReader reader, string expectedString)\n        {\n            string s = reader.ReadAsciiString();           \n            Assert.NotNull(s);\n            Assert.Equal(expectedString.Length, s.Length);\n            Assert.Equal(expectedString, s);\n        }\n\n        private static void WriteBlockStream(ICompressionAlgorithm compressionAlgorithm, DemoHeader header,\n            DemoCustomHeader customHeader, Stream ms, string s)\n        {\n            using (var blockStream  = new BlockStream(compressionAlgorithm, ms, CompressionMode.Compress, true))\n            using (var writer       = new ExtendedBinaryWriter(blockStream))\n            {\n                CheckReadException(blockStream);\n\n                blockStream.WriteHeader(header.Write);\n\n                writer.WriteOptAscii(s);\n\n                (customHeader.FileOffset, customHeader.InternalOffset) = blockStream.GetBlockPosition();\n                Assert.Equal(customHeader.FileOffset, blockStream.Position);\n\n                writer.WriteOptAscii(SmallString);\n                blockStream.Flush();\n\n                // this will be flushed during dispose\n                writer.WriteOptAscii(FinalString);\n            }\n        }\n\n        private static void CheckReadException(Stream writer)\n        {\n            var exception = Record.Exception(() =>\n            {\n                var buffer = new byte[10];\n                // ReSharper disable once AccessToDisposedClosure\n                writer.Read(buffer, 0, 1);\n            });\n\n            Assert.NotNull(exception);\n            Assert.IsType<CompressionException>(exception);\n        }\n\n        private static void CheckWriteException(Stream reader)\n        {\n            var exception = Record.Exception(() =>\n            {\n                var buffer = new byte[10];\n                // ReSharper disable once AccessToDisposedClosure\n                reader.Write(buffer, 0, 1);\n            });\n\n            Assert.NotNull(exception);\n            Assert.IsType<CompressionException>(exception);\n        }\n\n        [Fact]\n        public void NullStream()\n        {\n            Assert.Throws<ArgumentNullException>(delegate\n            {\n                using (new BlockStream(Zstd, null, CompressionMode.Decompress))\n                {\n                }\n            });\n        }\n\n        [Fact]\n        public void NotImplementedMethods()\n        {\n            using (var ms = new MemoryStream())\n            {\n                // ReSharper disable AccessToDisposedClosure\n                using (var writer = new BlockStream(Zstd, ms, CompressionMode.Compress, true))\n                {\n                    Assert.Throws<NotSupportedException>(delegate\n                    {\n                        // ReSharper disable once UnusedVariable\n                        long len = writer.Length;\n                    });\n\n                    Assert.Throws<NotSupportedException>(delegate { writer.SetLength(10); });\n\n                    Assert.Throws<NotSupportedException>(delegate { writer.Seek(0, SeekOrigin.Begin); });\n\n                    Assert.Throws<NotSupportedException>(delegate { writer.Position = 0; });\n                }\n                // ReSharper restore AccessToDisposedClosure\n            }\n        }\n\n        [Fact]\n        public void StreamTypeMismatch()\n        {\n            string randomPath = RandomPath.GetRandomPath();\n\n            using (var writeStream = new FileStream(randomPath, FileMode.Create, FileAccess.Write))\n            {\n                Assert.Throws<ArgumentException>(delegate\n                {\n                    // ReSharper disable once AccessToDisposedClosure\n                    using (new BlockStream(Zstd, writeStream, CompressionMode.Decompress))\n                    {\n                    }\n                });\n            }\n\n            using (var readStream = FileUtilities.GetReadStream(randomPath))\n            {\n                Assert.Throws<ArgumentException>(delegate\n                {\n                    // ReSharper disable once AccessToDisposedClosure\n                    using (new BlockStream(Zstd, readStream, CompressionMode.Compress))\n                    {\n                    }\n                });\n            }\n        }\n\n        [Fact]\n        public void CanReadWriteSeek()\n        {\n            string randomPath = RandomPath.GetRandomPath();\n\n            using (var writeStream = new FileStream(randomPath, FileMode.Create, FileAccess.Write))\n            using (var blockStream = new BlockStream(Zstd, writeStream, CompressionMode.Compress))\n            {\n                Assert.False(blockStream.CanRead);\n                Assert.True(blockStream.CanWrite);\n                Assert.True(blockStream.CanSeek);\n            }\n        }\n\n        [Fact]\n        public void ValidateParameters()\n        {\n            using (var ms = new MemoryStream())\n            {\n                using (var blockStream = new BlockStream(Zstd, ms, CompressionMode.Compress))\n                {\n                    var buffer = new byte[10];\n\n                    // ReSharper disable once AssignNullToNotNullAttribute\n                    Assert.Throws<ArgumentNullException>(delegate       { blockStream.Write(null, 10, 10);   });\n                    Assert.Throws<ArgumentOutOfRangeException>(delegate { blockStream.Write(buffer, -1, 10); });\n                    Assert.Throws<ArgumentOutOfRangeException>(delegate { blockStream.Write(buffer, 10, -1); });\n                    Assert.Throws<ArgumentException>(delegate           { blockStream.Write(buffer, 5, 10);  });\n                }\n            }\n        }\n\n        [Fact]\n        public void EndOfFile()\n        {\n            using (var ms = new MemoryStream())\n            {\n                var writeBuffer = GetRandomBytes(100);\n                var readBuffer  = new byte[60];\n\n                using (var blockStream = new BlockStream(Zstd, ms, CompressionMode.Compress, true))\n                {\n                    blockStream.Write(writeBuffer, 0, writeBuffer.Length);\n                }\n\n                ms.Position = 0;\n\n                using (var blockStream = new BlockStream(Zstd, ms, CompressionMode.Decompress))\n                {\n                    int numBytesRead = blockStream.Read(readBuffer, 0, readBuffer.Length);\n                    Assert.Equal(readBuffer.Length, numBytesRead);\n\n                    numBytesRead = blockStream.Read(readBuffer, 0, readBuffer.Length);\n                    Assert.Equal(writeBuffer.Length - readBuffer.Length, numBytesRead);\n\n                    numBytesRead = blockStream.Read(readBuffer, 0, readBuffer.Length);\n                    Assert.Equal(0, numBytesRead);\n                }\n            }\n        }\n\n        [Fact]\n        public void DoubleDispose()\n        {\n            using (var ms = new MemoryStream())\n            {\n                var blockStream = new BlockStream(Zstd, ms, CompressionMode.Compress);\n                blockStream.Dispose();\n                blockStream.Dispose();\n            }\n        }\n    }\n\n    public sealed class DemoHeader : Header\n    {\n        public readonly DemoCustomHeader Custom;\n\n        public DemoHeader(string identifier, ushort schemaVersion, ushort dataVersion, Source source,\n            long creationTimeTicks, GenomeAssembly genomeAssembly, DemoCustomHeader customHeader) : base(\n            identifier, schemaVersion, dataVersion, source, creationTimeTicks, genomeAssembly)\n        {\n            Custom = customHeader;\n        }\n\n        public new void Write(BinaryWriter writer)\n        {\n            base.Write(writer);\n            Custom.Write(writer);\n        }\n\n        public static DemoHeader Read(Stream stream)\n        {\n            DemoHeader header;\n\n            using (var reader = new BinaryReader(stream, Encoding.Default, true))\n            {\n                var baseHeader   = Read(reader);\n                var customHeader = DemoCustomHeader.Read(reader);\n\n                header = new DemoHeader(baseHeader.Identifier, baseHeader.SchemaVersion, baseHeader.DataVersion,\n                    baseHeader.Source, baseHeader.CreationTimeTicks, baseHeader.Assembly, customHeader);\n            }\n\n            return header;\n        }\n    }\n\n    public sealed class DemoCustomHeader\n    {\n        public long FileOffset;\n        public int InternalOffset;\n\n        public DemoCustomHeader(long fileOffset, int internalOffset)\n        {\n            FileOffset     = fileOffset;\n            InternalOffset = internalOffset;\n        }\n\n        public void Write(BinaryWriter writer)\n        {\n            writer.Write(FileOffset);\n            writer.Write(InternalOffset);\n        }\n\n        public static DemoCustomHeader Read(BinaryReader reader)\n        {\n            long fileOffset    = reader.ReadInt64();\n            int internalOffset = reader.ReadInt32();\n            return new DemoCustomHeader(fileOffset, internalOffset);\n        }\n    }\n}\n"
  },
  {
    "path": "UnitTests/Compression/Utilities/GZipUtilitiesTests.cs",
    "content": "﻿using System.IO;\r\nusing System.IO.Compression;\r\nusing Compression.Utilities;\r\nusing IO;\r\nusing UnitTests.TestUtilities;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Compression.Utilities\r\n{\r\n    public sealed class GZipUtilitiesTests\r\n    {\r\n        private const string ExpectedString = \"charlie\";\r\n\r\n        [Fact]\r\n        public void GetAppropriateReadStream_Handle_TextFile()\r\n        {\r\n            string randomPath = RandomPath.GetRandomPath();\r\n\r\n            using (var writer = new StreamWriter(FileUtilities.GetCreateStream(randomPath)))\r\n            {\r\n                writer.WriteLine(ExpectedString);\r\n            }\r\n\r\n            string observedString;\r\n            using (var reader = GZipUtilities.GetAppropriateStreamReader(randomPath))\r\n            {\r\n                observedString = reader.ReadLine();\r\n            }\r\n\r\n            Assert.Equal(ExpectedString, observedString);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetAppropriateReadStream_Handle_GZipFile()\r\n        {\r\n            string randomPath = RandomPath.GetRandomPath();            \r\n\r\n            using (var writer = new StreamWriter(new GZipStream(FileUtilities.GetCreateStream(randomPath), CompressionMode.Compress)))\r\n            {\r\n                writer.WriteLine(ExpectedString);\r\n            }\r\n\r\n            string observedString;\r\n            using (var reader = GZipUtilities.GetAppropriateStreamReader(randomPath))\r\n            {\r\n                observedString = reader.ReadLine();\r\n            }\r\n\r\n            Assert.Equal(ExpectedString, observedString);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetAppropriateReadStream_Handle_BlockGZipFile()\r\n        {\r\n            string randomPath = RandomPath.GetRandomPath();\r\n\r\n            using (var writer = GZipUtilities.GetStreamWriter(randomPath))\r\n            {\r\n                writer.WriteLine(ExpectedString);\r\n            }\r\n\r\n            string observedString;\r\n            using (var reader = GZipUtilities.GetAppropriateStreamReader(randomPath))\r\n            {\r\n                observedString = reader.ReadLine();\r\n            }\r\n\r\n            Assert.Equal(ExpectedString, observedString);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/Compression/Utilities/LibraryUtilitiesTests.cs",
    "content": "﻿using Xunit;\r\nusing Compression.Utilities;\r\n\r\nnamespace UnitTests.Compression.Utilities\r\n{\r\n    public sealed class LibraryUtilitiesTests\r\n    {\r\n        [Fact]\r\n        public void CheckLibrary_ValidLibrary_NoExceptionThrown()\r\n        {\r\n            var ex = Record.Exception(LibraryUtilities.CheckLibrary);\r\n            Assert.Null(ex);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/CustomAnnotationLambda/CustomAnnotationConfigTests.cs",
    "content": "﻿using Cloud.Messages;\r\nusing Cloud.Messages.Custom;\r\nusing CustomAnnotationLambda;\r\nusing ErrorHandling.Exceptions;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.CustomAnnotationLambda\r\n{\r\n    public sealed class CustomAnnotationConfigTests\r\n    {\r\n        [Fact]\r\n        public void CheckFieldsNotNull_AsExpected()\r\n        {\r\n            var config = GetConfig();\r\n            config.id = null;\r\n            var exception = Assert.Throws<UserErrorException>(() => config.CheckRequiredFieldsNotNull());\r\n            Assert.Equal(\"id cannot be null.\", exception.Message);\r\n\r\n            config = GetConfig();\r\n            config.tsvUrl = null;\r\n            exception = Assert.Throws<UserErrorException>(() => config.CheckRequiredFieldsNotNull());\r\n            Assert.Equal(\"tsvUrl cannot be null.\", exception.Message);\r\n\r\n            config = GetConfig();\r\n            config.outputDir = null;\r\n            exception = Assert.Throws<UserErrorException>(() => config.CheckRequiredFieldsNotNull());\r\n            Assert.Equal(\"outputDir cannot be null.\", exception.Message);\r\n\r\n            config = GetConfig();\r\n            config.outputDir.bucketName = null;\r\n            exception = Assert.Throws<UserErrorException>(() => config.CheckRequiredFieldsNotNull());\r\n            Assert.Equal(\"bucketName of outputDir cannot be null.\", exception.Message);\r\n\r\n            config = GetConfig();\r\n            config.outputDir.path = null;\r\n            exception = Assert.Throws<UserErrorException>(() => config.CheckRequiredFieldsNotNull());\r\n            Assert.Equal(\"path of outputDir cannot be null.\", exception.Message);\r\n\r\n            config = GetConfig();\r\n            config.outputDir.region = null;\r\n            exception = Assert.Throws<UserErrorException>(() => config.CheckRequiredFieldsNotNull());\r\n            Assert.Equal(\"region of outputDir cannot be null.\", exception.Message);\r\n\r\n            config = GetConfig();\r\n            config.outputDir.accessKey = null;\r\n            exception = Assert.Throws<UserErrorException>(() => config.CheckRequiredFieldsNotNull());\r\n            Assert.Equal(\"accessKey of outputDir cannot be null.\", exception.Message);\r\n\r\n            config = GetConfig();\r\n            config.outputDir.secretKey = null;\r\n            exception = Assert.Throws<UserErrorException>(() => config.CheckRequiredFieldsNotNull());\r\n            Assert.Equal(\"secretKey of outputDir cannot be null.\", exception.Message);\r\n\r\n            config = GetConfig();\r\n            config.outputDir.sessionToken = null;\r\n            exception = Assert.Throws<UserErrorException>(() => config.CheckRequiredFieldsNotNull());\r\n            Assert.Equal(\"sessionToken of outputDir cannot be null.\", exception.Message);\r\n\r\n        }\r\n\r\n        private static CustomConfig GetConfig() => new CustomConfig\r\n        {\r\n            id = \"Test\",\r\n            tsvUrl = \"https://somewhere.org/input.tsv\",\r\n            outputDir = new S3Path\r\n            {\r\n                bucketName = \"OutputBucket\",\r\n                path = \"/OutputDir/\",\r\n                region = \"nowhere\",\r\n                accessKey = \"access\",\r\n                secretKey = \"show me the money\",\r\n                sessionToken = \"314159265\"\r\n            }\r\n        };\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/Downloader/AnnotationRepositoryTests.cs",
    "content": "using System.Collections.Generic;\nusing System.IO;\nusing Downloader;\nusing IO;\nusing Moq;\nusing Xunit;\n\nnamespace UnitTests.Downloader\n{\n    public class AnnotationRepositoryTests\n    {\n        [Fact]\n        public void DownloadFiles_Nominal()\n        {\n            const ushort dataVersion = CacheConstants.DataVersion;\n\n            var clientMock = new Mock<IClient>();\n            clientMock.Setup(x => x.DownloadFile(It.IsAny<RemoteFile>())).Returns(true).Verifiable();\n            \n            var files = new List<RemoteFile>\n            {\n                new RemoteFile($\"remote/{dataVersion}/GRCh37/Both.transcripts.ndb\", Path.Combine(\"local\", \"GRCh37\", \"Both.transcripts.ndb\"), \"Both.transcripts.ndb (GRCh37)\"),\n                new RemoteFile($\"remote/{dataVersion}/GRCh37/Both.sift.ndb\",        Path.Combine(\"local\", \"GRCh37\", \"Both.sift.ndb\"),        \"Both.sift.ndb (GRCh37)\"),\n                new RemoteFile($\"remote/{dataVersion}/GRCh37/Both.polyphen.ndb\",    Path.Combine(\"local\", \"GRCh37\", \"Both.polyphen.ndb\"),    \"Both.polyphen.ndb (GRCh37)\"),\n                new RemoteFile($\"remote/{dataVersion}/GRCh38/Both.transcripts.ndb\", Path.Combine(\"local\", \"GRCh38\", \"Both.transcripts.ndb\"), \"Both.transcripts.ndb (GRCh38)\"),\n                new RemoteFile($\"remote/{dataVersion}/GRCh38/Both.sift.ndb\",        Path.Combine(\"local\", \"GRCh38\", \"Both.sift.ndb\"),        \"Both.sift.ndb (GRCh38)\"),\n                new RemoteFile($\"remote/{dataVersion}/GRCh38/Both.polyphen.ndb\",    Path.Combine(\"local\", \"GRCh38\", \"Both.polyphen.ndb\"),    \"Both.polyphen.ndb (GRCh38)\")\n            };\n            \n            AnnotationRepository.DownloadFiles(clientMock.Object, files);\n            clientMock.Verify(x => x.DownloadFile(It.IsAny<RemoteFile>()), Times.Exactly(6));\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/Downloader/ConfigurationTests.cs",
    "content": "﻿using Downloader;\r\nusing VariantAnnotation.SA;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Downloader\r\n{\r\n    public sealed class ConfigurationTests\r\n    {\r\n        [Fact]\r\n        public void Load_ExpectedResults()\r\n        {\r\n            (string hostName, string cacheDir, string referencesDir, string manifestGRCh37, string manifestGRCh38) = Configuration.Load(null, null);\r\n            Assert.EndsWith(\"annotations.nirvana.illumina.com\", hostName);\r\n            Assert.StartsWith(\"/\", cacheDir);\r\n            Assert.EndsWith(\"Cache\", cacheDir);\r\n            Assert.StartsWith(\"/\", referencesDir);\r\n            Assert.EndsWith(\"References\", referencesDir);\r\n            Assert.Contains(\"GRCh37\", manifestGRCh37);\r\n            Assert.Contains(\"GRCh38\", manifestGRCh38);\r\n        }\r\n\r\n        [Fact]\r\n        public void Load_OverrideHostName()\r\n        {\r\n            (string hostName, string _, string _, string _, string _) = Configuration.Load(\"www.illumina.com\", null);\r\n            Assert.Equal(\"www.illumina.com\", hostName);\r\n        }\r\n\r\n        [Fact]\r\n        public void Load_OverrideManifest()\r\n        {\r\n            var config = new global::Cloud.Configuration();\r\n            (string _, string _, string _, string manifestGRCh37, string manifestGRCh38) = Configuration.Load(null, \"Schema23\");\r\n            Assert.Equal($\"http://annotations.nirvana.illumina.com/{config.ManifestDirectory}/{SaCommon.SchemaVersion}/Schema23_SA_GRCh37.txt\", manifestGRCh37);\r\n            Assert.Equal($\"http://annotations.nirvana.illumina.com/{config.ManifestDirectory}/{SaCommon.SchemaVersion}/Schema23_SA_GRCh38.txt\", manifestGRCh38);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/Downloader/FileExtensions/CacheFileExtensionsTests.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing Downloader;\r\nusing Downloader.FileExtensions;\r\nusing Genome;\r\nusing IO;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Downloader.FileExtensions\r\n{\r\n    public sealed class CacheFileExtensionsTests\r\n    {\r\n        [Fact]\r\n        public void AddCacheFiles_Nominal()\r\n        {\r\n            var comparer = new RemoteFileComparer();\r\n            var genomeAssemblies              = new List<GenomeAssembly> { GenomeAssembly.GRCh37, GenomeAssembly.GRCh38 };\r\n            const string remoteCacheDirectory = \"remote\";\r\n            const string cacheDirectory       = \"local\";\r\n\r\n            const ushort dataVersion = CacheConstants.DataVersion;\r\n\r\n            var expectedFiles = new List<RemoteFile>\r\n            {\r\n                new RemoteFile($\"remote/{dataVersion}/GRCh37/Both.transcripts.ndb\", Path.Combine(\"local\", \"GRCh37\", \"Both.transcripts.ndb\"), \"Both.transcripts.ndb (GRCh37)\"),\r\n                new RemoteFile($\"remote/{dataVersion}/GRCh37/Both.sift.ndb\",        Path.Combine(\"local\", \"GRCh37\", \"Both.sift.ndb\"),        \"Both.sift.ndb (GRCh37)\"),\r\n                new RemoteFile($\"remote/{dataVersion}/GRCh37/Both.polyphen.ndb\",    Path.Combine(\"local\", \"GRCh37\", \"Both.polyphen.ndb\"),    \"Both.polyphen.ndb (GRCh37)\"),\r\n                new RemoteFile($\"remote/{dataVersion}/GRCh38/Both.transcripts.ndb\", Path.Combine(\"local\", \"GRCh38\", \"Both.transcripts.ndb\"), \"Both.transcripts.ndb (GRCh38)\"),\r\n                new RemoteFile($\"remote/{dataVersion}/GRCh38/Both.sift.ndb\",        Path.Combine(\"local\", \"GRCh38\", \"Both.sift.ndb\"),        \"Both.sift.ndb (GRCh38)\"),\r\n                new RemoteFile($\"remote/{dataVersion}/GRCh38/Both.polyphen.ndb\",    Path.Combine(\"local\", \"GRCh38\", \"Both.polyphen.ndb\"),    \"Both.polyphen.ndb (GRCh38)\")\r\n            };\r\n\r\n            var files = new List<RemoteFile>();\r\n            files.AddCacheFiles(genomeAssemblies, remoteCacheDirectory, cacheDirectory);\r\n\r\n            Assert.Equal(expectedFiles, files, comparer);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/Downloader/FileExtensions/ReferencesFileExtensionTests.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing Downloader;\r\nusing Downloader.FileExtensions;\r\nusing Genome;\r\nusing ReferenceSequence;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Downloader.FileExtensions\r\n{\r\n    public sealed class ReferencesFileExtensionsTests\r\n    {\r\n        [Fact]\r\n        public void AddReferenceFiles_Nominal()\r\n        {\r\n            var comparer                           = new RemoteFileComparer();\r\n            var genomeAssemblies                   = new List<GenomeAssembly> { GenomeAssembly.GRCh37, GenomeAssembly.GRCh38 };\r\n            const string remoteReferencesDirectory = \"remote\";\r\n            const string referencesDirectory       = \"local\";\r\n\r\n            const ushort dataVersion = ReferenceSequenceCommon.HeaderVersion;\r\n\r\n            var expectedFiles = new List<RemoteFile>\r\n            {\r\n                new RemoteFile($\"remote/{dataVersion}/Homo_sapiens.GRCh37.Nirvana.dat\", Path.Combine(\"local\", \"Homo_sapiens.GRCh37.Nirvana.dat\"), \"Homo_sapiens.GRCh37.Nirvana.dat\"),\r\n                new RemoteFile($\"remote/{dataVersion}/Homo_sapiens.GRCh38.Nirvana.dat\", Path.Combine(\"local\", \"Homo_sapiens.GRCh38.Nirvana.dat\"), \"Homo_sapiens.GRCh38.Nirvana.dat\")\r\n            };\r\n\r\n            var files = new List<RemoteFile>();\r\n            files.AddReferenceFiles(genomeAssemblies, remoteReferencesDirectory, referencesDirectory);\r\n\r\n            Assert.Equal(expectedFiles, files, comparer);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/Downloader/FileExtensions/SupplementaryAnnotationFileExtensionsTests.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing Downloader;\r\nusing Downloader.FileExtensions;\r\nusing Genome;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Downloader.FileExtensions\r\n{\r\n    public sealed class SupplementaryAnnotationFileExtensionsTests\r\n    {\r\n        [Fact]\r\n        public void AddSupplementaryAnnotationFiles_Nominal()\r\n        {\r\n            var comparer             = new RemoteFileComparer();\r\n            const string saDirectory = \"local\";\r\n\r\n            var remotePaths37 = new List<string>\r\n            {\r\n                \"/0bf0cb93e64824b20f0b551a629596fd-TopMed/2/GRCh37/TOPMed_freeze_5.nsa\"\r\n            };\r\n\r\n\r\n            var remotePaths38 = new List<string>\r\n            {\r\n                \"/43cafec8b0624b77663e2ba1dec32883-gnomAD-exome/2/GRCh38/gnomAD_exome_2.0.2.nsa\",\r\n                \"/2551e067cb59c540a4da905a99ee5ff4-ClinGen/2/GRCh38/ClinGen_20160414.nsi\"\r\n            };\r\n\r\n            var remotePathsByGenomeAssembly = new Dictionary<GenomeAssembly, List<string>>\r\n            {\r\n                [GenomeAssembly.GRCh37] = remotePaths37,\r\n                [GenomeAssembly.GRCh38] = remotePaths38\r\n            };\r\n\r\n            var expectedFiles = new List<RemoteFile>\r\n            {\r\n                new RemoteFile(\"/0bf0cb93e64824b20f0b551a629596fd-TopMed/2/GRCh37/TOPMed_freeze_5.nsa\", Path.Combine(\"local\", \"GRCh37\", \"TOPMed_freeze_5.nsa\"), \"TOPMed_freeze_5.nsa (GRCh37)\"),\r\n                new RemoteFile(\"/0bf0cb93e64824b20f0b551a629596fd-TopMed/2/GRCh37/TOPMed_freeze_5.nsa.idx\", Path.Combine(\"local\", \"GRCh37\", \"TOPMed_freeze_5.nsa.idx\"), \"TOPMed_freeze_5.nsa.idx (GRCh37)\"),\r\n                new RemoteFile(\"/43cafec8b0624b77663e2ba1dec32883-gnomAD-exome/2/GRCh38/gnomAD_exome_2.0.2.nsa\", Path.Combine(\"local\", \"GRCh38\", \"gnomAD_exome_2.0.2.nsa\"), \"gnomAD_exome_2.0.2.nsa (GRCh38)\"),\r\n                new RemoteFile(\"/43cafec8b0624b77663e2ba1dec32883-gnomAD-exome/2/GRCh38/gnomAD_exome_2.0.2.nsa.idx\", Path.Combine(\"local\", \"GRCh38\", \"gnomAD_exome_2.0.2.nsa.idx\"), \"gnomAD_exome_2.0.2.nsa.idx (GRCh38)\"),\r\n                new RemoteFile(\"/2551e067cb59c540a4da905a99ee5ff4-ClinGen/2/GRCh38/ClinGen_20160414.nsi\", Path.Combine(\"local\", \"GRCh38\", \"ClinGen_20160414.nsi\"), \"ClinGen_20160414.nsi (GRCh38)\")\r\n            };\r\n\r\n            var files = new List<RemoteFile>();\r\n            files.AddSupplementaryAnnotationFiles(remotePathsByGenomeAssembly, saDirectory);\r\n\r\n            Assert.Equal(expectedFiles, files, comparer);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/Downloader/GenomeAssemblyHelperTests.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing ErrorHandling.Exceptions;\r\nusing Genome;\r\nusing Xunit;\r\nusing du = Downloader.Utilities;\r\n\r\nnamespace UnitTests.Downloader\r\n{\r\n    public sealed class GenomeAssemblyHelperTests\r\n    {\r\n        [Fact]\r\n        public void GetGenomeAssemblies_GRCh37()\r\n        {\r\n            List<GenomeAssembly> genomeAssemblies = du.GenomeAssemblyHelper.GetGenomeAssemblies(\"GRCh37\");\r\n            Assert.Single(genomeAssemblies);\r\n            Assert.Equal(GenomeAssembly.GRCh37, genomeAssemblies[0]);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetGenomeAssemblies_GRCh38()\r\n        {\r\n            List<GenomeAssembly> genomeAssemblies = du.GenomeAssemblyHelper.GetGenomeAssemblies(\"GrcH38\");\r\n            Assert.Single(genomeAssemblies);\r\n            Assert.Equal(GenomeAssembly.GRCh38, genomeAssemblies[0]);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetGenomeAssemblies_Both()\r\n        {\r\n            List<GenomeAssembly> genomeAssemblies = du.GenomeAssemblyHelper.GetGenomeAssemblies(\"BoTh\");\r\n            Assert.Equal(2, genomeAssemblies.Count);\r\n            Assert.Equal(GenomeAssembly.GRCh37, genomeAssemblies[0]);\r\n            Assert.Equal(GenomeAssembly.GRCh38, genomeAssemblies[1]);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetGenomeAssemblies_Unknown()\r\n        {\r\n            Assert.Throws<UserErrorException>(delegate\r\n            {\r\n                // ReSharper disable once UnusedVariable\r\n                List<GenomeAssembly> genomeAssemblies = du.GenomeAssemblyHelper.GetGenomeAssemblies(\"hg19\");\r\n            });\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/Downloader/ManifestTests.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.Linq;\r\nusing Downloader;\r\nusing Genome;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Downloader\r\n{\r\n    public sealed class ManifestTests\r\n    {\r\n        private const string ManifestGRCh37 = \"Manifest_GRCh37\";\r\n        private const string ManifestGRCh38 = \"Manifest_GRCh38\";\r\n\r\n        [Fact]\r\n        public void CreateGenomeAssemblyPaths_GRCh37()\r\n        {\r\n            var genomeAssemblies = new List<GenomeAssembly> { GenomeAssembly.GRCh37 };\r\n            List<(GenomeAssembly GenomeAssembly, string ManifestPath)> list = Manifest.CreateGenomeAssemblyPaths(ManifestGRCh37, ManifestGRCh38, genomeAssemblies).ToList();\r\n            Assert.Single(list);\r\n            Assert.Equal(GenomeAssembly.GRCh37, list[0].GenomeAssembly);\r\n            Assert.Equal(ManifestGRCh37, list[0].ManifestPath);\r\n        }\r\n\r\n        [Fact]\r\n        public void CreateGenomeAssemblyPaths_GRCh38()\r\n        {\r\n            var genomeAssemblies = new List<GenomeAssembly> { GenomeAssembly.GRCh38 };\r\n            List<(GenomeAssembly GenomeAssembly, string ManifestPath)> list = Manifest.CreateGenomeAssemblyPaths(ManifestGRCh37, ManifestGRCh38, genomeAssemblies).ToList();\r\n            Assert.Single(list);\r\n            Assert.Equal(GenomeAssembly.GRCh38, list[0].GenomeAssembly);\r\n            Assert.Equal(ManifestGRCh38, list[0].ManifestPath);\r\n        }\r\n\r\n        [Fact]\r\n        public void CreateGenomeAssemblyPaths_Both()\r\n        {\r\n            var genomeAssemblies = new List<GenomeAssembly> { GenomeAssembly.GRCh37, GenomeAssembly.GRCh38 };\r\n            List<(GenomeAssembly GenomeAssembly, string ManifestPath)> list = Manifest.CreateGenomeAssemblyPaths(ManifestGRCh37, ManifestGRCh38, genomeAssemblies).ToList();\r\n            Assert.Equal(2, list.Count);\r\n        }\r\n\r\n        [Fact]\r\n        public void CreateGenomeAssemblyPaths_Unknown()\r\n        {\r\n            var genomeAssemblies = new List<GenomeAssembly> { GenomeAssembly.hg19 };\r\n            List<(GenomeAssembly GenomeAssembly, string ManifestPath)> list = Manifest.CreateGenomeAssemblyPaths(ManifestGRCh37, ManifestGRCh38, genomeAssemblies).ToList();\r\n            Assert.Empty(list);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/Downloader/RemoteFileComparer.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing Downloader;\r\n\r\nnamespace UnitTests.Downloader\r\n{\r\n    internal sealed class RemoteFileComparer : EqualityComparer<RemoteFile>\r\n    {\r\n        public override bool Equals(RemoteFile x, RemoteFile y)\r\n        {\r\n            return x.LocalPath   == y.LocalPath  &&\r\n                   x.RemotePath  == y.RemotePath &&\r\n                   x.Description == y.Description;\r\n        }\r\n\r\n        public override int GetHashCode(RemoteFile obj)\r\n        {\r\n            unchecked\r\n            {\r\n                int hashCode = obj.RemotePath.GetHashCode();\r\n                hashCode = (hashCode * 397) ^ obj.LocalPath.GetHashCode();\r\n                hashCode = (hashCode * 397) ^ obj.Description.GetHashCode();\r\n                return hashCode;\r\n            }\r\n        }\r\n    }\r\n}\r\n\r\n"
  },
  {
    "path": "UnitTests/EndToEndTests.cs",
    "content": "﻿using System;\r\nusing OptimizedCore;\r\nusing UnitTests.TestUtilities;\r\nusing Xunit;\r\n\r\nnamespace UnitTests\r\n{\r\n    // NOTE: these tests do not include phyloP scores yet\r\n    // CACHE: v26 (VEP91)\r\n    // SA: SA38 based on intermediate TSV from /illumina/development/Nirvana/Development/IntermediateTsvs/2017-05/GRCh37\r\n    public sealed class EndToEndTests\r\n    {\r\n        private readonly string _cacheFilePrefix;\r\n\r\n        public EndToEndTests() => _cacheFilePrefix = Resources.EndToEnd37(\"chr12_7018490_7086889_Both\");\r\n\r\n        [Fact]\r\n        public void Annotation_RefMinor_not_annotated_when_no_SA()\r\n        {\r\n            const string vcfLine           = \"chr12\t7054859\t.\tG\t.\t100\tPASS\t.\t.\t.\";\r\n            var          annotatedPosition = AnnotationUtilities.GetAnnotatedPosition(_cacheFilePrefix, null, null, vcfLine);\r\n            var          sb                = annotatedPosition.GetJsonStringBuilder();\r\n            \r\n            Assert.Null(sb);\r\n        }\r\n\r\n        [Obsolete(\"We need to extract the SA files to enable these again.\")]\r\n        [Theory(Skip = \"We need to extract the SA files to enable these again.\")]\r\n        [InlineData(\"chr12\t7045879\t.\tC\t<STR14>,<STR22>\t.\tPASS\tSVTYPE=STR;END=7045936;REF=19;RL=57;RU=CAG;REPID=DRPLA\tGT:SO:CN:CI:AD_SP:AD_FL:AD_IR\t1/2:SPANNING/SPANNING:14/22:9/4:19/20:0/0\", \"{\\\"chromosome\\\":\\\"chr12\\\",\\\"position\\\":7045879,\\\"repeatUnit\\\":\\\"CAG\\\",\\\"refRepeatCount\\\":19,\\\"svEnd\\\":7045936,\\\"refAllele\\\":\\\"C\\\",\\\"altAlleles\\\":[\\\"<STR14>\\\",\\\"<STR22>\\\"],\\\"filters\\\":[\\\"PASS\\\"],\\\"cytogeneticBand\\\":\\\"12p13.31\\\",\\\"samples\\\":[{\\\"genotype\\\":\\\"1/2\\\",\\\"repeatNumbers\\\":\\\"14/22\\\",\\\"repeatNumberSpans\\\":\\\"9/4\\\"}],\\\"variants\\\":[{\\\"vid\\\":\\\"12:7045880:7045936:CAG:14\\\",\\\"chromosome\\\":\\\"chr12\\\",\\\"begin\\\":7045880,\\\"end\\\":7045936,\\\"refAllele\\\":\\\"C\\\",\\\"altAllele\\\":\\\"<STR14>\\\",\\\"variantType\\\":\\\"short_tandem_repeat_variation\\\",\\\"transcripts\\\":{\\\"refSeq\\\":[{\\\"transcript\\\":\\\"NM_001007026.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"exons\\\":\\\"5/10\\\",\\\"geneId\\\":\\\"1822\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"short_tandem_repeat_contraction\\\"],\\\"isCanonical\\\":true,\\\"proteinId\\\":\\\"NP_001007027.1\\\"},{\\\"transcript\\\":\\\"XM_005253672.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"exons\\\":\\\"5/10\\\",\\\"geneId\\\":\\\"1822\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"short_tandem_repeat_contraction\\\"],\\\"proteinId\\\":\\\"XP_005253729.1\\\"},{\\\"transcript\\\":\\\"NM_001940.3\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"exons\\\":\\\"5/10\\\",\\\"geneId\\\":\\\"1822\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"short_tandem_repeat_contraction\\\"],\\\"proteinId\\\":\\\"NP_001931.2\\\"}],\\\"ensembl\\\":[{\\\"transcript\\\":\\\"ENST00000356654.4\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"exons\\\":\\\"5/10\\\",\\\"geneId\\\":\\\"ENSG00000111676\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"short_tandem_repeat_contraction\\\"],\\\"isCanonical\\\":true,\\\"proteinId\\\":\\\"ENSP00000349076.3\\\"},{\\\"transcript\\\":\\\"ENST00000396684.2\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"exons\\\":\\\"5/10\\\",\\\"geneId\\\":\\\"ENSG00000111676\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"short_tandem_repeat_contraction\\\"],\\\"proteinId\\\":\\\"ENSP00000379915.2\\\"}]}},{\\\"vid\\\":\\\"12:7045880:7045936:CAG:22\\\",\\\"chromosome\\\":\\\"chr12\\\",\\\"begin\\\":7045880,\\\"end\\\":7045936,\\\"refAllele\\\":\\\"C\\\",\\\"altAllele\\\":\\\"<STR22>\\\",\\\"variantType\\\":\\\"short_tandem_repeat_variation\\\",\\\"transcripts\\\":{\\\"refSeq\\\":[{\\\"transcript\\\":\\\"NM_001007026.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"exons\\\":\\\"5/10\\\",\\\"geneId\\\":\\\"1822\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"short_tandem_repeat_expansion\\\"],\\\"isCanonical\\\":true,\\\"proteinId\\\":\\\"NP_001007027.1\\\"},{\\\"transcript\\\":\\\"XM_005253672.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"exons\\\":\\\"5/10\\\",\\\"geneId\\\":\\\"1822\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"short_tandem_repeat_expansion\\\"],\\\"proteinId\\\":\\\"XP_005253729.1\\\"},{\\\"transcript\\\":\\\"NM_001940.3\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"exons\\\":\\\"5/10\\\",\\\"geneId\\\":\\\"1822\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"short_tandem_repeat_expansion\\\"],\\\"proteinId\\\":\\\"NP_001931.2\\\"}],\\\"ensembl\\\":[{\\\"transcript\\\":\\\"ENST00000356654.4\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"exons\\\":\\\"5/10\\\",\\\"geneId\\\":\\\"ENSG00000111676\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"short_tandem_repeat_expansion\\\"],\\\"isCanonical\\\":true,\\\"proteinId\\\":\\\"ENSP00000349076.3\\\"},{\\\"transcript\\\":\\\"ENST00000396684.2\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"exons\\\":\\\"5/10\\\",\\\"geneId\\\":\\\"ENSG00000111676\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"short_tandem_repeat_expansion\\\"],\\\"proteinId\\\":\\\"ENSP00000379915.2\\\"}]}}]}\")]\r\n        [InlineData(\"chr12\t7048190\t.\tG\tA\t322\tPASS\tSB=0.1234567\t.\t.\", \"{\\\"chromosome\\\":\\\"chr12\\\",\\\"position\\\":7048190,\\\"refAllele\\\":\\\"G\\\",\\\"altAlleles\\\":[\\\"A\\\"],\\\"quality\\\":322,\\\"filters\\\":[\\\"PASS\\\"],\\\"strandBias\\\":0.123457,\\\"cytogeneticBand\\\":\\\"12p13.31\\\",\\\"samples\\\":[{\\\"isEmpty\\\":true}],\\\"variants\\\":[{\\\"vid\\\":\\\"12:7048190:A\\\",\\\"chromosome\\\":\\\"chr12\\\",\\\"begin\\\":7048190,\\\"end\\\":7048190,\\\"refAllele\\\":\\\"G\\\",\\\"altAllele\\\":\\\"A\\\",\\\"variantType\\\":\\\"SNV\\\",\\\"transcripts\\\":{\\\"refSeq\\\":[{\\\"transcript\\\":\\\"NM_001007026.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"codons\\\":\\\"Gca/Aca\\\",\\\"aminoAcids\\\":\\\"A/T\\\",\\\"cdnaPos\\\":\\\"3301\\\",\\\"cdsPos\\\":\\\"3064\\\",\\\"exons\\\":\\\"7/10\\\",\\\"proteinPos\\\":\\\"1022\\\",\\\"geneId\\\":\\\"1822\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"missense_variant\\\"],\\\"hgvsc\\\":\\\"NM_001007026.1:c.3064G>A\\\",\\\"hgvsp\\\":\\\"NP_001007027.1:p.(Ala1022Thr)\\\",\\\"isCanonical\\\":true,\\\"polyPhenScore\\\":0.36,\\\"polyPhenPrediction\\\":\\\"benign\\\",\\\"proteinId\\\":\\\"NP_001007027.1\\\",\\\"siftScore\\\":0.1,\\\"siftPrediction\\\":\\\"tolerated - low confidence\\\"},{\\\"transcript\\\":\\\"XM_005253672.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"codons\\\":\\\"Gca/Aca\\\",\\\"aminoAcids\\\":\\\"A/T\\\",\\\"cdnaPos\\\":\\\"3391\\\",\\\"cdsPos\\\":\\\"3061\\\",\\\"exons\\\":\\\"7/10\\\",\\\"proteinPos\\\":\\\"1021\\\",\\\"geneId\\\":\\\"1822\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"missense_variant\\\"],\\\"hgvsc\\\":\\\"XM_005253672.1:c.3061G>A\\\",\\\"hgvsp\\\":\\\"XP_005253729.1:p.(Ala1021Thr)\\\",\\\"proteinId\\\":\\\"XP_005253729.1\\\"},{\\\"transcript\\\":\\\"NM_001940.3\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"codons\\\":\\\"Gca/Aca\\\",\\\"aminoAcids\\\":\\\"A/T\\\",\\\"cdnaPos\\\":\\\"3294\\\",\\\"cdsPos\\\":\\\"3064\\\",\\\"exons\\\":\\\"7/10\\\",\\\"proteinPos\\\":\\\"1022\\\",\\\"geneId\\\":\\\"1822\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"missense_variant\\\"],\\\"hgvsc\\\":\\\"NM_001940.3:c.3064G>A\\\",\\\"hgvsp\\\":\\\"NP_001931.2:p.(Ala1022Thr)\\\",\\\"polyPhenScore\\\":0.36,\\\"polyPhenPrediction\\\":\\\"benign\\\",\\\"proteinId\\\":\\\"NP_001931.2\\\",\\\"siftScore\\\":0.1,\\\"siftPrediction\\\":\\\"tolerated - low confidence\\\"},{\\\"transcript\\\":\\\"XM_005253669.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"113246\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"proteinId\\\":\\\"XP_005253726.1\\\"},{\\\"transcript\\\":\\\"NM_001301836.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"113246\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"proteinId\\\":\\\"NP_001288765.1\\\"},{\\\"transcript\\\":\\\"NM_001301834.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"113246\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"isCanonical\\\":true,\\\"proteinId\\\":\\\"NP_001288763.1\\\"},{\\\"transcript\\\":\\\"XM_005253670.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"113246\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"proteinId\\\":\\\"XP_005253727.1\\\"},{\\\"transcript\\\":\\\"NR_023317.1\\\",\\\"bioType\\\":\\\"snRNA\\\",\\\"geneId\\\":\\\"100147744\\\",\\\"hgnc\\\":\\\"RNU7-1\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"isCanonical\\\":true},{\\\"transcript\\\":\\\"NM_001301838.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"113246\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"proteinId\\\":\\\"NP_001288767.1\\\"},{\\\"transcript\\\":\\\"NM_001301837.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"113246\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"proteinId\\\":\\\"NP_001288766.1\\\"},{\\\"transcript\\\":\\\"NR_126035.1\\\",\\\"bioType\\\":\\\"misc_RNA\\\",\\\"geneId\\\":\\\"113246\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"]},{\\\"transcript\\\":\\\"NM_138425.3\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"113246\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"proteinId\\\":\\\"NP_612434.1\\\"}],\\\"ensembl\\\":[{\\\"transcript\\\":\\\"ENST00000356654.4\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"codons\\\":\\\"Gca/Aca\\\",\\\"aminoAcids\\\":\\\"A/T\\\",\\\"cdnaPos\\\":\\\"3301\\\",\\\"cdsPos\\\":\\\"3064\\\",\\\"exons\\\":\\\"7/10\\\",\\\"proteinPos\\\":\\\"1022\\\",\\\"geneId\\\":\\\"ENSG00000111676\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"missense_variant\\\"],\\\"hgvsc\\\":\\\"ENST00000356654.4:c.3064G>A\\\",\\\"hgvsp\\\":\\\"ENSP00000349076.3:p.(Ala1022Thr)\\\",\\\"isCanonical\\\":true,\\\"polyPhenScore\\\":0.36,\\\"polyPhenPrediction\\\":\\\"benign\\\",\\\"proteinId\\\":\\\"ENSP00000349076.3\\\",\\\"siftScore\\\":0.1,\\\"siftPrediction\\\":\\\"tolerated - low confidence\\\"},{\\\"transcript\\\":\\\"ENST00000396684.2\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"codons\\\":\\\"Gca/Aca\\\",\\\"aminoAcids\\\":\\\"A/T\\\",\\\"cdnaPos\\\":\\\"3298\\\",\\\"cdsPos\\\":\\\"3064\\\",\\\"exons\\\":\\\"7/10\\\",\\\"proteinPos\\\":\\\"1022\\\",\\\"geneId\\\":\\\"ENSG00000111676\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"missense_variant\\\"],\\\"hgvsc\\\":\\\"ENST00000396684.2:c.3064G>A\\\",\\\"hgvsp\\\":\\\"ENSP00000379915.2:p.(Ala1022Thr)\\\",\\\"polyPhenScore\\\":0.36,\\\"polyPhenPrediction\\\":\\\"benign\\\",\\\"proteinId\\\":\\\"ENSP00000379915.2\\\",\\\"siftScore\\\":0.1,\\\"siftPrediction\\\":\\\"tolerated - low confidence\\\"},{\\\"transcript\\\":\\\"ENST00000541029.1\\\",\\\"bioType\\\":\\\"retained_intron\\\",\\\"geneId\\\":\\\"ENSG00000111676\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"downstream_gene_variant\\\"]},{\\\"transcript\\\":\\\"ENST00000537488.1\\\",\\\"bioType\\\":\\\"retained_intron\\\",\\\"geneId\\\":\\\"ENSG00000111676\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"]},{\\\"transcript\\\":\\\"ENST00000538392.1\\\",\\\"bioType\\\":\\\"retained_intron\\\",\\\"geneId\\\":\\\"ENSG00000111678\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"]},{\\\"transcript\\\":\\\"ENST00000542222.1\\\",\\\"bioType\\\":\\\"processed_transcript\\\",\\\"geneId\\\":\\\"ENSG00000111678\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"]},{\\\"transcript\\\":\\\"ENST00000545581.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"ENSG00000111678\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"proteinId\\\":\\\"ENSP00000440602.1\\\"},{\\\"transcript\\\":\\\"ENST00000607421.1\\\",\\\"bioType\\\":\\\"antisense\\\",\\\"geneId\\\":\\\"ENSG00000272173\\\",\\\"hgnc\\\":\\\"U47924.2\\\",\\\"consequence\\\":[\\\"downstream_gene_variant\\\"],\\\"isCanonical\\\":true},{\\\"transcript\\\":\\\"ENST00000458811.1\\\",\\\"bioType\\\":\\\"snRNA\\\",\\\"geneId\\\":\\\"ENSG00000238923\\\",\\\"hgnc\\\":\\\"RNU7-1\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"isCanonical\\\":true},{\\\"transcript\\\":\\\"ENST00000544681.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"ENSG00000111678\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"proteinId\\\":\\\"ENSP00000475422.1\\\"},{\\\"transcript\\\":\\\"ENST00000537087.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"ENSG00000111678\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"proteinId\\\":\\\"ENSP00000440937.1\\\"},{\\\"transcript\\\":\\\"ENST00000229281.5\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"ENSG00000111678\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"isCanonical\\\":true,\\\"proteinId\\\":\\\"ENSP00000229281.5\\\"}]}}]}\")]\r\n        [InlineData(\"chr12\t7054859\t.\tG\t.\t100\tPASS\t.\t.\t.\", \"{\\\"chromosome\\\":\\\"chr12\\\",\\\"position\\\":7054859,\\\"refAllele\\\":\\\"G\\\",\\\"quality\\\":100,\\\"filters\\\":[\\\"PASS\\\"],\\\"cytogeneticBand\\\":\\\"12p13.31\\\",\\\"samples\\\":[{\\\"isEmpty\\\":true}],\\\"variants\\\":[{\\\"vid\\\":\\\"12:7054859:G\\\",\\\"chromosome\\\":\\\"chr12\\\",\\\"begin\\\":7054859,\\\"end\\\":7054859,\\\"isReferenceMinorAllele\\\":true,\\\"refAllele\\\":\\\"A\\\",\\\"altAllele\\\":\\\"G\\\",\\\"variantType\\\":\\\"SNV\\\",\\\"globalAllele\\\":{\\\"globalMinorAllele\\\":\\\"G\\\",\\\"globalMinorAlleleFrequency\\\":0.003794},\\\"transcripts\\\":{\\\"refSeq\\\":[{\\\"transcript\\\":\\\"NM_001007026.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"1822\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"downstream_gene_variant\\\"],\\\"isCanonical\\\":true,\\\"proteinId\\\":\\\"NP_001007027.1\\\"},{\\\"transcript\\\":\\\"XM_005253672.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"1822\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"downstream_gene_variant\\\"],\\\"proteinId\\\":\\\"XP_005253729.1\\\"},{\\\"transcript\\\":\\\"NM_001940.3\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"1822\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"downstream_gene_variant\\\"],\\\"proteinId\\\":\\\"NP_001931.2\\\"},{\\\"transcript\\\":\\\"XM_005253669.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"113246\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"downstream_gene_variant\\\"],\\\"proteinId\\\":\\\"XP_005253726.1\\\"},{\\\"transcript\\\":\\\"NM_001301836.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"introns\\\":\\\"2/2\\\",\\\"geneId\\\":\\\"113246\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"intron_variant\\\"],\\\"hgvsc\\\":\\\"NM_001301836.1:c.191-75A>G\\\",\\\"proteinId\\\":\\\"NP_001288765.1\\\"},{\\\"transcript\\\":\\\"NM_001301834.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"introns\\\":\\\"3/3\\\",\\\"geneId\\\":\\\"113246\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"intron_variant\\\"],\\\"hgvsc\\\":\\\"NM_001301834.1:c.230-75A>G\\\",\\\"isCanonical\\\":true,\\\"proteinId\\\":\\\"NP_001288763.1\\\"},{\\\"transcript\\\":\\\"XM_005253670.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"introns\\\":\\\"2/2\\\",\\\"geneId\\\":\\\"113246\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"intron_variant\\\"],\\\"hgvsc\\\":\\\"XM_005253670.1:c.191-75A>G\\\",\\\"proteinId\\\":\\\"XP_005253727.1\\\"},{\\\"transcript\\\":\\\"NR_023317.1\\\",\\\"bioType\\\":\\\"snRNA\\\",\\\"geneId\\\":\\\"100147744\\\",\\\"hgnc\\\":\\\"RNU7-1\\\",\\\"consequence\\\":[\\\"downstream_gene_variant\\\"],\\\"isCanonical\\\":true},{\\\"transcript\\\":\\\"NM_001301838.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"introns\\\":\\\"2/2\\\",\\\"geneId\\\":\\\"113246\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"intron_variant\\\"],\\\"hgvsc\\\":\\\"NM_001301838.1:c.125-75A>G\\\",\\\"proteinId\\\":\\\"NP_001288767.1\\\"},{\\\"transcript\\\":\\\"NM_001301837.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"introns\\\":\\\"2/2\\\",\\\"geneId\\\":\\\"113246\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"intron_variant\\\"],\\\"hgvsc\\\":\\\"NM_001301837.1:c.143-75A>G\\\",\\\"proteinId\\\":\\\"NP_001288766.1\\\"},{\\\"transcript\\\":\\\"NR_126035.1\\\",\\\"bioType\\\":\\\"misc_RNA\\\",\\\"introns\\\":\\\"2/2\\\",\\\"geneId\\\":\\\"113246\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"intron_variant\\\",\\\"non_coding_transcript_variant\\\"],\\\"hgvsc\\\":\\\"NR_126035.1:n.544-75A>G\\\"},{\\\"transcript\\\":\\\"NM_138425.3\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"introns\\\":\\\"2/2\\\",\\\"geneId\\\":\\\"113246\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"intron_variant\\\"],\\\"hgvsc\\\":\\\"NM_138425.3:c.230-75A>G\\\",\\\"proteinId\\\":\\\"NP_612434.1\\\"},{\\\"transcript\\\":\\\"NM_138425.2\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"introns\\\":\\\"2/2\\\",\\\"geneId\\\":\\\"113246\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"intron_variant\\\"],\\\"hgvsc\\\":\\\"NM_138425.2:c.230-75A>G\\\",\\\"proteinId\\\":\\\"NP_612434.1\\\"},{\\\"transcript\\\":\\\"NM_080548.4\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"5777\\\",\\\"hgnc\\\":\\\"PTPN6\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"proteinId\\\":\\\"NP_536858.1\\\"},{\\\"transcript\\\":\\\"XM_005253719.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"5777\\\",\\\"hgnc\\\":\\\"PTPN6\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"proteinId\\\":\\\"XP_005253776.1\\\"}],\\\"ensembl\\\":[{\\\"transcript\\\":\\\"ENST00000356654.4\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"ENSG00000111676\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"downstream_gene_variant\\\"],\\\"isCanonical\\\":true,\\\"proteinId\\\":\\\"ENSP00000349076.3\\\"},{\\\"transcript\\\":\\\"ENST00000396684.2\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"ENSG00000111676\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"downstream_gene_variant\\\"],\\\"proteinId\\\":\\\"ENSP00000379915.2\\\"},{\\\"transcript\\\":\\\"ENST00000537488.1\\\",\\\"bioType\\\":\\\"retained_intron\\\",\\\"geneId\\\":\\\"ENSG00000111676\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"downstream_gene_variant\\\"]},{\\\"transcript\\\":\\\"ENST00000538392.1\\\",\\\"bioType\\\":\\\"retained_intron\\\",\\\"geneId\\\":\\\"ENSG00000111678\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"downstream_gene_variant\\\"]},{\\\"transcript\\\":\\\"ENST00000542222.1\\\",\\\"bioType\\\":\\\"processed_transcript\\\",\\\"introns\\\":\\\"2/2\\\",\\\"geneId\\\":\\\"ENSG00000111678\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"intron_variant\\\",\\\"non_coding_transcript_variant\\\"],\\\"hgvsc\\\":\\\"ENST00000542222.1:n.408-75A>G\\\"},{\\\"transcript\\\":\\\"ENST00000545581.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"introns\\\":\\\"3/3\\\",\\\"geneId\\\":\\\"ENSG00000111678\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"intron_variant\\\"],\\\"hgvsc\\\":\\\"ENST00000545581.1:c.230-75A>G\\\",\\\"proteinId\\\":\\\"ENSP00000440602.1\\\"},{\\\"transcript\\\":\\\"ENST00000607421.1\\\",\\\"bioType\\\":\\\"antisense\\\",\\\"geneId\\\":\\\"ENSG00000272173\\\",\\\"hgnc\\\":\\\"U47924.2\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"isCanonical\\\":true},{\\\"transcript\\\":\\\"ENST00000458811.1\\\",\\\"bioType\\\":\\\"snRNA\\\",\\\"geneId\\\":\\\"ENSG00000238923\\\",\\\"hgnc\\\":\\\"RNU7-1\\\",\\\"consequence\\\":[\\\"downstream_gene_variant\\\"],\\\"isCanonical\\\":true},{\\\"transcript\\\":\\\"ENST00000544681.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"ENSG00000111678\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"downstream_gene_variant\\\"],\\\"proteinId\\\":\\\"ENSP00000475422.1\\\"},{\\\"transcript\\\":\\\"ENST00000537087.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"introns\\\":\\\"2/2\\\",\\\"geneId\\\":\\\"ENSG00000111678\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"intron_variant\\\"],\\\"hgvsc\\\":\\\"ENST00000537087.1:c.143-75A>G\\\",\\\"proteinId\\\":\\\"ENSP00000440937.1\\\"},{\\\"transcript\\\":\\\"ENST00000229281.5\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"introns\\\":\\\"2/2\\\",\\\"geneId\\\":\\\"ENSG00000111678\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"intron_variant\\\"],\\\"hgvsc\\\":\\\"ENST00000229281.5:c.230-75A>G\\\",\\\"isCanonical\\\":true,\\\"proteinId\\\":\\\"ENSP00000229281.5\\\"},{\\\"transcript\\\":\\\"ENST00000540506.2\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"introns\\\":\\\"2/2\\\",\\\"geneId\\\":\\\"ENSG00000111678\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"intron_variant\\\"],\\\"hgvsc\\\":\\\"ENST00000540506.2:c.125-75A>G\\\",\\\"proteinId\\\":\\\"ENSP00000475635.1\\\"},{\\\"transcript\\\":\\\"ENST00000543115.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"ENSG00000111679\\\",\\\"hgnc\\\":\\\"PTPN6\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"proteinId\\\":\\\"ENSP00000443393.1\\\"},{\\\"transcript\\\":\\\"ENST00000542848.1\\\",\\\"bioType\\\":\\\"nonsense_mediated_decay\\\",\\\"geneId\\\":\\\"ENSG00000111679\\\",\\\"hgnc\\\":\\\"PTPN6\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"proteinId\\\":\\\"ENSP00000444805.1\\\"},{\\\"transcript\\\":\\\"ENST00000543120.1\\\",\\\"bioType\\\":\\\"processed_transcript\\\",\\\"geneId\\\":\\\"ENSG00000111679\\\",\\\"hgnc\\\":\\\"PTPN6\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"]},{\\\"transcript\\\":\\\"ENST00000399448.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"ENSG00000111679\\\",\\\"hgnc\\\":\\\"PTPN6\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"proteinId\\\":\\\"ENSP00000382376.1\\\"},{\\\"transcript\\\":\\\"ENST00000534900.1\\\",\\\"bioType\\\":\\\"processed_transcript\\\",\\\"geneId\\\":\\\"ENSG00000111679\\\",\\\"hgnc\\\":\\\"PTPN6\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"]},{\\\"transcript\\\":\\\"ENST00000447931.2\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"ENSG00000111679\\\",\\\"hgnc\\\":\\\"PTPN6\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"proteinId\\\":\\\"ENSP00000415979.2\\\"},{\\\"transcript\\\":\\\"ENST00000538318.1\\\",\\\"bioType\\\":\\\"retained_intron\\\",\\\"geneId\\\":\\\"ENSG00000111679\\\",\\\"hgnc\\\":\\\"PTPN6\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"]}]}}]}\")]\r\n        [InlineData(\"chr12\t7073931\t.\tT\t<INV>\t100\tPASS\tSVTYPE=INV;END=7074100\t.\t.\", \"{\\\"chromosome\\\":\\\"chr12\\\",\\\"position\\\":7073931,\\\"svEnd\\\":7074100,\\\"refAllele\\\":\\\"T\\\",\\\"altAlleles\\\":[\\\"<INV>\\\"],\\\"quality\\\":100,\\\"filters\\\":[\\\"PASS\\\"],\\\"cytogeneticBand\\\":\\\"12p13.31\\\",\\\"samples\\\":[{\\\"isEmpty\\\":true}],\\\"clingen\\\":[{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":173786,\\\"end\\\":34835837,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"id\\\":\\\"nsv995956\\\",\\\"clinicalInterpretation\\\":\\\"pathogenic\\\",\\\"observedGains\\\":1,\\\"validated\\\":true,\\\"phenotypes\\\":[\\\"Decreased calvarial ossification\\\",\\\"Delayed gross motor development\\\",\\\"Feeding difficulties\\\",\\\"Frontal bossing\\\",\\\"Morphological abnormality of the central nervous system\\\",\\\"Patchy alopecia\\\"],\\\"phenotypeIds\\\":[\\\"HP:0002007\\\",\\\"HP:0002011\\\",\\\"HP:0002194\\\",\\\"HP:0002232\\\",\\\"HP:0005474\\\",\\\"HP:0011968\\\",\\\"MedGen:C0232466\\\",\\\"MedGen:C1862862\\\",\\\"MedGen:CN001816\\\",\\\"MedGen:CN001820\\\",\\\"MedGen:CN001989\\\",\\\"MedGen:CN004852\\\"],\\\"reciprocalOverlap\\\":0},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":282465,\\\"end\\\":7425202,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"id\\\":\\\"nsv532325\\\",\\\"clinicalInterpretation\\\":\\\"pathogenic\\\",\\\"observedGains\\\":1,\\\"phenotypes\\\":[\\\"Developmental delay AND/OR other significant developmental or morphological phenotypes\\\",\\\"Global developmental delay\\\"],\\\"phenotypeIds\\\":[\\\"HP:0001263\\\",\\\"MedGen:CN001157\\\"],\\\"reciprocalOverlap\\\":0.00002},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":282465,\\\"end\\\":8514342,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"id\\\":\\\"nsv532326\\\",\\\"clinicalInterpretation\\\":\\\"pathogenic\\\",\\\"observedGains\\\":1,\\\"phenotypes\\\":[\\\"Developmental delay AND/OR other significant developmental or morphological phenotypes\\\"],\\\"reciprocalOverlap\\\":0.00002},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":282465,\\\"end\\\":25623263,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"id\\\":\\\"nsv532324\\\",\\\"clinicalInterpretation\\\":\\\"pathogenic\\\",\\\"observedGains\\\":1,\\\"phenotypes\\\":[\\\"Abnormality of cardiac morphology\\\",\\\"Agenesis of corpus callosum\\\",\\\"Developmental delay AND/OR other significant developmental or morphological phenotypes\\\"],\\\"phenotypeIds\\\":[\\\"HP:0001274\\\",\\\"HP:0001627\\\",\\\"MedGen:C1837248\\\",\\\"MedGen:CN001482\\\"],\\\"reciprocalOverlap\\\":0.00001},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":282465,\\\"end\\\":28568117,\\\"variantType\\\":\\\"copy_number_loss\\\",\\\"id\\\":\\\"nsv531493\\\",\\\"clinicalInterpretation\\\":\\\"pathogenic\\\",\\\"observedLosses\\\":1,\\\"phenotypes\\\":[\\\"Developmental delay AND/OR other significant developmental or morphological phenotypes\\\",\\\"Global developmental delay\\\"],\\\"phenotypeIds\\\":[\\\"HP:0001263\\\",\\\"MedGen:CN001157\\\"],\\\"reciprocalOverlap\\\":0.00001},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":282465,\\\"end\\\":34533111,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"id\\\":\\\"nsv532323\\\",\\\"clinicalInterpretation\\\":\\\"pathogenic\\\",\\\"observedGains\\\":2,\\\"phenotypes\\\":[\\\"Coarse facial features\\\",\\\"Abnormal facial shape\\\",\\\"Abnormality of cardiac morphology\\\",\\\"Cleft upper lip\\\",\\\"Global developmental delay\\\",\\\"Hearing impairment\\\",\\\"Short stature\\\"],\\\"phenotypeIds\\\":[\\\"HP:0000280\\\",\\\"MedGen:C1854600\\\",\\\"HP:0000204\\\",\\\"HP:0000365\\\",\\\"HP:0001263\\\",\\\"HP:0001627\\\",\\\"HP:0001999\\\",\\\"HP:0004322\\\",\\\"MedGen:C0349588\\\",\\\"MedGen:C1384666\\\",\\\"MedGen:CN000197\\\",\\\"MedGen:CN001157\\\",\\\"MedGen:CN001482\\\",\\\"MedGen:CN001810\\\"],\\\"reciprocalOverlap\\\":0},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":282465,\\\"end\\\":34756196,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"id\\\":\\\"nsv916406\\\",\\\"clinicalInterpretation\\\":\\\"pathogenic\\\",\\\"observedGains\\\":1,\\\"phenotypes\\\":[\\\"Ambiguous genitalia\\\",\\\"Delayed fine motor development\\\",\\\"Delayed gross motor development\\\",\\\"Delayed speech and language development\\\",\\\"Developmental delay AND/OR other significant developmental or morphological phenotypes\\\",\\\"Intellectual disability\\\",\\\"Short stature\\\"],\\\"phenotypeIds\\\":[\\\"HP:0000062\\\",\\\"HP:0000750\\\",\\\"HP:0001249\\\",\\\"HP:0002194\\\",\\\"HP:0004322\\\",\\\"HP:0010862\\\",\\\"MedGen:C0349588\\\",\\\"MedGen:C1843367\\\",\\\"MedGen:CN000062\\\",\\\"MedGen:CN000706\\\",\\\"MedGen:CN001989\\\",\\\"MedGen:CN116596\\\"],\\\"reciprocalOverlap\\\":0},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":282465,\\\"end\\\":34756209,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"id\\\":\\\"nsv533931\\\",\\\"clinicalInterpretation\\\":\\\"pathogenic\\\",\\\"observedGains\\\":1,\\\"validated\\\":true,\\\"phenotypes\\\":[\\\"Developmental delay AND/OR other significant developmental or morphological phenotypes\\\"],\\\"reciprocalOverlap\\\":0},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":282465,\\\"end\\\":34761006,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"id\\\":\\\"nsv917315\\\",\\\"clinicalInterpretation\\\":\\\"pathogenic\\\",\\\"observedGains\\\":1,\\\"validated\\\":true,\\\"phenotypes\\\":[\\\"Developmental delay AND/OR other significant developmental or morphological phenotypes\\\"],\\\"reciprocalOverlap\\\":0},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":282465,\\\"end\\\":133773393,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"id\\\":\\\"nsv917029\\\",\\\"clinicalInterpretation\\\":\\\"pathogenic\\\",\\\"observedGains\\\":1,\\\"phenotypes\\\":[\\\"Abnormality of toe\\\",\\\"Defect in the atrial septum\\\",\\\"Downslanted palpebral fissures\\\",\\\"Frontal bossing\\\",\\\"Low-set ears\\\",\\\"Overlapping fingers\\\",\\\"Patent ductus arteriosus\\\",\\\"Sacral dimple\\\",\\\"Sandal gap\\\",\\\"Single transverse palmar crease\\\"],\\\"phenotypeIds\\\":[\\\"HP:0000369\\\",\\\"HP:0000494\\\",\\\"HP:0000954\\\",\\\"HP:0000960\\\",\\\"HP:0001631\\\",\\\"HP:0001643\\\",\\\"HP:0001780\\\",\\\"HP:0001852\\\",\\\"HP:0002007\\\",\\\"HP:0010557\\\",\\\"MedGen:C0426848\\\",\\\"MedGen:C1865016\\\",\\\"MedGen:C1873502\\\",\\\"MedGen:CN000345\\\",\\\"MedGen:CN001485\\\",\\\"MedGen:CN001496\\\",\\\"MedGen:CN001615\\\",\\\"MedGen:CN001674\\\",\\\"MedGen:CN001816\\\",\\\"MedGen:CN009386\\\"],\\\"reciprocalOverlap\\\":0},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":322142,\\\"end\\\":34079848,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"id\\\":\\\"nsv532328\\\",\\\"clinicalInterpretation\\\":\\\"pathogenic\\\",\\\"observedGains\\\":1,\\\"phenotypes\\\":[\\\"Developmental delay AND/OR other significant developmental or morphological phenotypes\\\"],\\\"reciprocalOverlap\\\":0.00001},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":1367440,\\\"end\\\":20810511,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"id\\\":\\\"nsv995558\\\",\\\"clinicalInterpretation\\\":\\\"pathogenic\\\",\\\"observedGains\\\":1,\\\"validated\\\":true,\\\"phenotypes\\\":[\\\"Feeding difficulties\\\",\\\"Laryngomalacia\\\"],\\\"phenotypeIds\\\":[\\\"HP:0001601\\\",\\\"HP:0011968\\\",\\\"MedGen:C0232466\\\",\\\"MedGen:CN001457\\\"],\\\"reciprocalOverlap\\\":0.00001},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":2980907,\\\"end\\\":15140282,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"id\\\":\\\"nsv868869\\\",\\\"clinicalInterpretation\\\":\\\"pathogenic\\\",\\\"observedGains\\\":1,\\\"phenotypes\\\":[\\\"Developmental delay AND/OR other significant developmental or morphological phenotypes\\\"],\\\"reciprocalOverlap\\\":0.00001},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":6837831,\\\"end\\\":7858216,\\\"variantType\\\":\\\"copy_number_loss\\\",\\\"id\\\":\\\"nsv531496\\\",\\\"clinicalInterpretation\\\":\\\"pathogenic\\\",\\\"observedLosses\\\":1,\\\"phenotypes\\\":[\\\"Developmental delay AND/OR other significant developmental or morphological phenotypes\\\"],\\\"reciprocalOverlap\\\":0.00017}],\\\"dgv\\\":[{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":6985480,\\\"end\\\":7103003,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"variantFreqAll\\\":0.00003,\\\"id\\\":\\\"nsv1035811\\\",\\\"sampleSize\\\":29084,\\\"observedGains\\\":1,\\\"reciprocalOverlap\\\":0.00144},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":7005694,\\\"end\\\":7115157,\\\"variantType\\\":\\\"insertion\\\",\\\"variantFreqAll\\\":0.25,\\\"id\\\":\\\"nsv509453\\\",\\\"sampleSize\\\":4,\\\"observedGains\\\":1,\\\"reciprocalOverlap\\\":0.00154},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":7012055,\\\"end\\\":7163058,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"variantFreqAll\\\":0.00003,\\\"id\\\":\\\"nsv1047373\\\",\\\"sampleSize\\\":29084,\\\"observedGains\\\":1,\\\"reciprocalOverlap\\\":0.00112}],\\\"variants\\\":[{\\\"vid\\\":\\\"12:7073932:7074100:Inverse\\\",\\\"chromosome\\\":\\\"chr12\\\",\\\"begin\\\":7073932,\\\"end\\\":7074100,\\\"refAllele\\\":\\\"T\\\",\\\"altAllele\\\":\\\"<INV>\\\",\\\"variantType\\\":\\\"inversion\\\",\\\"overlappingGenes\\\":[\\\"EMG1\\\"],\\\"transcripts\\\":{\\\"ensembl\\\":[{\\\"transcript\\\":\\\"ENST00000607161.1\\\",\\\"bioType\\\":\\\"processed_transcript\\\",\\\"introns\\\":\\\"1/5\\\",\\\"geneId\\\":\\\"ENSG00000126749\\\",\\\"hgnc\\\":\\\"EMG1\\\",\\\"consequence\\\":[\\\"transcript_variant\\\"]}]}}]}\")]\r\n        [InlineData(\"chr12\t7040534\t.\tACATA\tA\t100\tPASS\t.\t.\t.\", \"{\\\"chromosome\\\":\\\"chr12\\\",\\\"position\\\":7040534,\\\"refAllele\\\":\\\"ACATA\\\",\\\"altAlleles\\\":[\\\"A\\\"],\\\"quality\\\":100,\\\"filters\\\":[\\\"PASS\\\"],\\\"cytogeneticBand\\\":\\\"12p13.31\\\",\\\"samples\\\":[{\\\"isEmpty\\\":true}],\\\"variants\\\":[{\\\"vid\\\":\\\"12:7040535:7040538\\\",\\\"chromosome\\\":\\\"chr12\\\",\\\"begin\\\":7040535,\\\"end\\\":7040538,\\\"refAllele\\\":\\\"CATA\\\",\\\"altAllele\\\":\\\"-\\\",\\\"variantType\\\":\\\"deletion\\\",\\\"transcripts\\\":{\\\"refSeq\\\":[{\\\"transcript\\\":\\\"NM_001007026.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"introns\\\":\\\"1/9\\\",\\\"geneId\\\":\\\"1822\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"intron_variant\\\"],\\\"hgvsc\\\":\\\"NM_001007026.1:c.-162-2468_-162-2465delCATA\\\",\\\"isCanonical\\\":true,\\\"proteinId\\\":\\\"NP_001007027.1\\\"},{\\\"transcript\\\":\\\"XM_005253672.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"introns\\\":\\\"1/9\\\",\\\"geneId\\\":\\\"1822\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"intron_variant\\\"],\\\"hgvsc\\\":\\\"XM_005253672.1:c.-162-2468_-162-2465delCATA\\\",\\\"proteinId\\\":\\\"XP_005253729.1\\\"},{\\\"transcript\\\":\\\"NM_001940.3\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"introns\\\":\\\"1/9\\\",\\\"geneId\\\":\\\"1822\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"intron_variant\\\"],\\\"hgvsc\\\":\\\"NM_001940.3:c.-162-2468_-162-2465delCATA\\\",\\\"proteinId\\\":\\\"NP_001931.2\\\"}],\\\"ensembl\\\":[{\\\"transcript\\\":\\\"ENST00000356654.4\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"introns\\\":\\\"1/9\\\",\\\"geneId\\\":\\\"ENSG00000111676\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"intron_variant\\\"],\\\"hgvsc\\\":\\\"ENST00000356654.4:c.-162-2468_-162-2465delCATA\\\",\\\"isCanonical\\\":true,\\\"proteinId\\\":\\\"ENSP00000349076.3\\\"},{\\\"transcript\\\":\\\"ENST00000396684.2\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"introns\\\":\\\"1/9\\\",\\\"geneId\\\":\\\"ENSG00000111676\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"intron_variant\\\"],\\\"hgvsc\\\":\\\"ENST00000396684.2:c.-162-2468_-162-2465delCATA\\\",\\\"proteinId\\\":\\\"ENSP00000379915.2\\\"}]}}]}\")]\r\n        [InlineData(\"chr12\t7045274\t.\tT\t<DEL>\t100\tPASS\tSVTYPE=DEL;END=7084024\t.\t.\", \"{\\\"chromosome\\\":\\\"chr12\\\",\\\"position\\\":7045274,\\\"svEnd\\\":7084024,\\\"refAllele\\\":\\\"T\\\",\\\"altAlleles\\\":[\\\"<DEL>\\\"],\\\"quality\\\":100,\\\"filters\\\":[\\\"PASS\\\"],\\\"cytogeneticBand\\\":\\\"12p13.31\\\",\\\"samples\\\":[{\\\"isEmpty\\\":true}],\\\"clingen\\\":[{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":147099,\\\"end\\\":7054359,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"id\\\":\\\"nsv498529\\\",\\\"clinicalInterpretation\\\":\\\"pathogenic\\\",\\\"observedGains\\\":1,\\\"phenotypes\\\":[\\\"Developmental delay AND/OR other significant developmental or morphological phenotypes\\\"],\\\"reciprocalOverlap\\\":0.00132},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":173786,\\\"end\\\":34835837,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"id\\\":\\\"nsv995956\\\",\\\"clinicalInterpretation\\\":\\\"pathogenic\\\",\\\"observedGains\\\":1,\\\"validated\\\":true,\\\"phenotypes\\\":[\\\"Decreased calvarial ossification\\\",\\\"Delayed gross motor development\\\",\\\"Feeding difficulties\\\",\\\"Frontal bossing\\\",\\\"Morphological abnormality of the central nervous system\\\",\\\"Patchy alopecia\\\"],\\\"phenotypeIds\\\":[\\\"HP:0002007\\\",\\\"HP:0002011\\\",\\\"HP:0002194\\\",\\\"HP:0002232\\\",\\\"HP:0005474\\\",\\\"HP:0011968\\\",\\\"MedGen:C0232466\\\",\\\"MedGen:C1862862\\\",\\\"MedGen:CN001816\\\",\\\"MedGen:CN001820\\\",\\\"MedGen:CN001989\\\",\\\"MedGen:CN004852\\\"],\\\"reciprocalOverlap\\\":0.00112},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":282465,\\\"end\\\":7425202,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"id\\\":\\\"nsv532325\\\",\\\"clinicalInterpretation\\\":\\\"pathogenic\\\",\\\"observedGains\\\":1,\\\"phenotypes\\\":[\\\"Developmental delay AND/OR other significant developmental or morphological phenotypes\\\",\\\"Global developmental delay\\\"],\\\"phenotypeIds\\\":[\\\"HP:0001263\\\",\\\"MedGen:CN001157\\\"],\\\"reciprocalOverlap\\\":0.00543},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":282465,\\\"end\\\":8514342,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"id\\\":\\\"nsv532326\\\",\\\"clinicalInterpretation\\\":\\\"pathogenic\\\",\\\"observedGains\\\":1,\\\"phenotypes\\\":[\\\"Developmental delay AND/OR other significant developmental or morphological phenotypes\\\"],\\\"reciprocalOverlap\\\":0.00471},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":282465,\\\"end\\\":25623263,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"id\\\":\\\"nsv532324\\\",\\\"clinicalInterpretation\\\":\\\"pathogenic\\\",\\\"observedGains\\\":1,\\\"phenotypes\\\":[\\\"Abnormality of cardiac morphology\\\",\\\"Agenesis of corpus callosum\\\",\\\"Developmental delay AND/OR other significant developmental or morphological phenotypes\\\"],\\\"phenotypeIds\\\":[\\\"HP:0001274\\\",\\\"HP:0001627\\\",\\\"MedGen:C1837248\\\",\\\"MedGen:CN001482\\\"],\\\"reciprocalOverlap\\\":0.00153},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":282465,\\\"end\\\":28568117,\\\"variantType\\\":\\\"copy_number_loss\\\",\\\"id\\\":\\\"nsv531493\\\",\\\"clinicalInterpretation\\\":\\\"pathogenic\\\",\\\"observedLosses\\\":1,\\\"phenotypes\\\":[\\\"Developmental delay AND/OR other significant developmental or morphological phenotypes\\\",\\\"Global developmental delay\\\"],\\\"phenotypeIds\\\":[\\\"HP:0001263\\\",\\\"MedGen:CN001157\\\"],\\\"reciprocalOverlap\\\":0.00137},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":282465,\\\"end\\\":34533111,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"id\\\":\\\"nsv532323\\\",\\\"clinicalInterpretation\\\":\\\"pathogenic\\\",\\\"observedGains\\\":2,\\\"phenotypes\\\":[\\\"Coarse facial features\\\",\\\"Abnormal facial shape\\\",\\\"Abnormality of cardiac morphology\\\",\\\"Cleft upper lip\\\",\\\"Global developmental delay\\\",\\\"Hearing impairment\\\",\\\"Short stature\\\"],\\\"phenotypeIds\\\":[\\\"HP:0000280\\\",\\\"MedGen:C1854600\\\",\\\"HP:0000204\\\",\\\"HP:0000365\\\",\\\"HP:0001263\\\",\\\"HP:0001627\\\",\\\"HP:0001999\\\",\\\"HP:0004322\\\",\\\"MedGen:C0349588\\\",\\\"MedGen:C1384666\\\",\\\"MedGen:CN000197\\\",\\\"MedGen:CN001157\\\",\\\"MedGen:CN001482\\\",\\\"MedGen:CN001810\\\"],\\\"reciprocalOverlap\\\":0.00113},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":282465,\\\"end\\\":34756196,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"id\\\":\\\"nsv916406\\\",\\\"clinicalInterpretation\\\":\\\"pathogenic\\\",\\\"observedGains\\\":1,\\\"phenotypes\\\":[\\\"Ambiguous genitalia\\\",\\\"Delayed fine motor development\\\",\\\"Delayed gross motor development\\\",\\\"Delayed speech and language development\\\",\\\"Developmental delay AND/OR other significant developmental or morphological phenotypes\\\",\\\"Intellectual disability\\\",\\\"Short stature\\\"],\\\"phenotypeIds\\\":[\\\"HP:0000062\\\",\\\"HP:0000750\\\",\\\"HP:0001249\\\",\\\"HP:0002194\\\",\\\"HP:0004322\\\",\\\"HP:0010862\\\",\\\"MedGen:C0349588\\\",\\\"MedGen:C1843367\\\",\\\"MedGen:CN000062\\\",\\\"MedGen:CN000706\\\",\\\"MedGen:CN001989\\\",\\\"MedGen:CN116596\\\"],\\\"reciprocalOverlap\\\":0.00112},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":282465,\\\"end\\\":34756209,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"id\\\":\\\"nsv533931\\\",\\\"clinicalInterpretation\\\":\\\"pathogenic\\\",\\\"observedGains\\\":1,\\\"validated\\\":true,\\\"phenotypes\\\":[\\\"Developmental delay AND/OR other significant developmental or morphological phenotypes\\\"],\\\"reciprocalOverlap\\\":0.00112},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":282465,\\\"end\\\":34761006,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"id\\\":\\\"nsv917315\\\",\\\"clinicalInterpretation\\\":\\\"pathogenic\\\",\\\"observedGains\\\":1,\\\"validated\\\":true,\\\"phenotypes\\\":[\\\"Developmental delay AND/OR other significant developmental or morphological phenotypes\\\"],\\\"reciprocalOverlap\\\":0.00112},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":282465,\\\"end\\\":133773393,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"id\\\":\\\"nsv917029\\\",\\\"clinicalInterpretation\\\":\\\"pathogenic\\\",\\\"observedGains\\\":1,\\\"phenotypes\\\":[\\\"Abnormality of toe\\\",\\\"Defect in the atrial septum\\\",\\\"Downslanted palpebral fissures\\\",\\\"Frontal bossing\\\",\\\"Low-set ears\\\",\\\"Overlapping fingers\\\",\\\"Patent ductus arteriosus\\\",\\\"Sacral dimple\\\",\\\"Sandal gap\\\",\\\"Single transverse palmar crease\\\"],\\\"phenotypeIds\\\":[\\\"HP:0000369\\\",\\\"HP:0000494\\\",\\\"HP:0000954\\\",\\\"HP:0000960\\\",\\\"HP:0001631\\\",\\\"HP:0001643\\\",\\\"HP:0001780\\\",\\\"HP:0001852\\\",\\\"HP:0002007\\\",\\\"HP:0010557\\\",\\\"MedGen:C0426848\\\",\\\"MedGen:C1865016\\\",\\\"MedGen:C1873502\\\",\\\"MedGen:CN000345\\\",\\\"MedGen:CN001485\\\",\\\"MedGen:CN001496\\\",\\\"MedGen:CN001615\\\",\\\"MedGen:CN001674\\\",\\\"MedGen:CN001816\\\",\\\"MedGen:CN009386\\\"],\\\"reciprocalOverlap\\\":0.00029},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":322142,\\\"end\\\":34079848,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"id\\\":\\\"nsv532328\\\",\\\"clinicalInterpretation\\\":\\\"pathogenic\\\",\\\"observedGains\\\":1,\\\"phenotypes\\\":[\\\"Developmental delay AND/OR other significant developmental or morphological phenotypes\\\"],\\\"reciprocalOverlap\\\":0.00115},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":1367440,\\\"end\\\":20810511,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"id\\\":\\\"nsv995558\\\",\\\"clinicalInterpretation\\\":\\\"pathogenic\\\",\\\"observedGains\\\":1,\\\"validated\\\":true,\\\"phenotypes\\\":[\\\"Feeding difficulties\\\",\\\"Laryngomalacia\\\"],\\\"phenotypeIds\\\":[\\\"HP:0001601\\\",\\\"HP:0011968\\\",\\\"MedGen:C0232466\\\",\\\"MedGen:CN001457\\\"],\\\"reciprocalOverlap\\\":0.00199},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":2980907,\\\"end\\\":15140282,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"id\\\":\\\"nsv868869\\\",\\\"clinicalInterpretation\\\":\\\"pathogenic\\\",\\\"observedGains\\\":1,\\\"phenotypes\\\":[\\\"Developmental delay AND/OR other significant developmental or morphological phenotypes\\\"],\\\"reciprocalOverlap\\\":0.00319},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":6837831,\\\"end\\\":7858216,\\\"variantType\\\":\\\"copy_number_loss\\\",\\\"id\\\":\\\"nsv531496\\\",\\\"clinicalInterpretation\\\":\\\"pathogenic\\\",\\\"observedLosses\\\":1,\\\"phenotypes\\\":[\\\"Developmental delay AND/OR other significant developmental or morphological phenotypes\\\"],\\\"reciprocalOverlap\\\":0.03798}],\\\"dgv\\\":[{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":6985480,\\\"end\\\":7103003,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"variantFreqAll\\\":0.00003,\\\"id\\\":\\\"nsv1035811\\\",\\\"sampleSize\\\":29084,\\\"observedGains\\\":1,\\\"reciprocalOverlap\\\":0.32972},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":7005694,\\\"end\\\":7115157,\\\"variantType\\\":\\\"insertion\\\",\\\"variantFreqAll\\\":0.25,\\\"id\\\":\\\"nsv509453\\\",\\\"sampleSize\\\":4,\\\"observedGains\\\":1,\\\"reciprocalOverlap\\\":0.354},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":7012055,\\\"end\\\":7163058,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"variantFreqAll\\\":0.00003,\\\"id\\\":\\\"nsv1047373\\\",\\\"sampleSize\\\":29084,\\\"observedGains\\\":1,\\\"reciprocalOverlap\\\":0.25662},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":7053085,\\\"end\\\":7063682,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"variantFreqAll\\\":0.00006,\\\"id\\\":\\\"nsv557262\\\",\\\"sampleSize\\\":17421,\\\"observedGains\\\":1,\\\"reciprocalOverlap\\\":0.2735},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":7054324,\\\"end\\\":7054491,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"id\\\":\\\"esv5830\\\",\\\"sampleSize\\\":1,\\\"reciprocalOverlap\\\":0.00434},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":7054931,\\\"end\\\":7065635,\\\"variantType\\\":\\\"copy_number_loss\\\",\\\"variantFreqAll\\\":0.00011,\\\"id\\\":\\\"dgv2324n54\\\",\\\"sampleSize\\\":17421,\\\"observedLosses\\\":2,\\\"reciprocalOverlap\\\":0.27626},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":7055492,\\\"end\\\":7070110,\\\"variantType\\\":\\\"copy_number_loss\\\",\\\"variantFreqAll\\\":0.00017,\\\"id\\\":\\\"dgv2325n54\\\",\\\"sampleSize\\\":17421,\\\"observedLosses\\\":3,\\\"reciprocalOverlap\\\":0.37726},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":7058967,\\\"end\\\":7059349,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"variantFreqAll\\\":0.00541,\\\"id\\\":\\\"esv3356433\\\",\\\"sampleSize\\\":185,\\\"observedGains\\\":1,\\\"reciprocalOverlap\\\":0.00988},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":7059100,\\\"end\\\":7059327,\\\"variantType\\\":\\\"insertion\\\",\\\"variantFreqAll\\\":1,\\\"id\\\":\\\"nsv513351\\\",\\\"sampleSize\\\":1,\\\"observedGains\\\":1,\\\"reciprocalOverlap\\\":0.00588},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":7059146,\\\"end\\\":7059890,\\\"variantType\\\":\\\"insertion\\\",\\\"variantFreqAll\\\":0.33333,\\\"id\\\":\\\"esv994245\\\",\\\"sampleSize\\\":3,\\\"observedGains\\\":1,\\\"reciprocalOverlap\\\":0.01923},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":7059221,\\\"end\\\":7059221,\\\"variantType\\\":\\\"insertion\\\",\\\"variantFreqAll\\\":0.01622,\\\"id\\\":\\\"esv3381477\\\",\\\"sampleSize\\\":185,\\\"observedGains\\\":3},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":7060846,\\\"end\\\":7070110,\\\"variantType\\\":\\\"copy_number_loss\\\",\\\"variantFreqAll\\\":0.00011,\\\"id\\\":\\\"dgv2326n54\\\",\\\"sampleSize\\\":17421,\\\"observedLosses\\\":2,\\\"reciprocalOverlap\\\":0.2391},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":7063701,\\\"end\\\":7070500,\\\"variantType\\\":\\\"copy_number_loss\\\",\\\"variantFreqAll\\\":1,\\\"id\\\":\\\"nsv952791\\\",\\\"sampleSize\\\":1,\\\"observedLosses\\\":1,\\\"reciprocalOverlap\\\":0.17548}],\\\"variants\\\":[{\\\"vid\\\":\\\"12:7045275:7084024\\\",\\\"chromosome\\\":\\\"chr12\\\",\\\"begin\\\":7045275,\\\"end\\\":7084024,\\\"refAllele\\\":\\\"T\\\",\\\"altAllele\\\":\\\"<DEL>\\\",\\\"variantType\\\":\\\"deletion\\\",\\\"regulatoryRegions\\\":[{\\\"id\\\":\\\"ENSR00000361210\\\",\\\"type\\\":\\\"promoter\\\",\\\"consequence\\\":[\\\"regulatory_region_ablation\\\",\\\"regulatory_region_variant\\\"]},{\\\"id\\\":\\\"ENSR00000361211\\\",\\\"type\\\":\\\"promoter\\\",\\\"consequence\\\":[\\\"regulatory_region_ablation\\\",\\\"regulatory_region_variant\\\"]},{\\\"id\\\":\\\"ENSR00000361212\\\",\\\"type\\\":\\\"CTCF_binding_site\\\",\\\"consequence\\\":[\\\"regulatory_region_ablation\\\",\\\"regulatory_region_variant\\\"]},{\\\"id\\\":\\\"ENSR00000361213\\\",\\\"type\\\":\\\"promoter\\\",\\\"consequence\\\":[\\\"regulatory_region_ablation\\\",\\\"regulatory_region_variant\\\"]},{\\\"id\\\":\\\"ENSR00000361214\\\",\\\"type\\\":\\\"promoter\\\",\\\"consequence\\\":[\\\"regulatory_region_ablation\\\",\\\"regulatory_region_variant\\\"]},{\\\"id\\\":\\\"ENSR00000361215\\\",\\\"type\\\":\\\"promoter\\\",\\\"consequence\\\":[\\\"regulatory_region_ablation\\\",\\\"regulatory_region_variant\\\"]},{\\\"id\\\":\\\"ENSR00000361216\\\",\\\"type\\\":\\\"promoter\\\",\\\"consequence\\\":[\\\"regulatory_region_ablation\\\",\\\"regulatory_region_variant\\\"]}],\\\"overlappingGenes\\\":[\\\"ATN1\\\",\\\"C12orf57\\\",\\\"U47924.2\\\",\\\"RNU7-1\\\",\\\"PTPN6\\\",\\\"MIR200CHG\\\",\\\"EMG1\\\",\\\"MIR200C\\\",\\\"MIR141\\\",\\\"U47924.1\\\",\\\"PHB2\\\",\\\"SCARNA12\\\"],\\\"transcripts\\\":{\\\"refSeq\\\":[{\\\"transcript\\\":\\\"NM_001007026.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"exons\\\":\\\"5-10/10\\\",\\\"introns\\\":\\\"5-9/9\\\",\\\"geneId\\\":\\\"1822\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"transcript_truncation\\\",\\\"unidirectional_gene_fusion\\\"],\\\"geneFusion\\\":{\\\"exon\\\":5,\\\"fusions\\\":[{\\\"hgvsc\\\":\\\"ATN1{NM_001007026.1}:c.1_844_EMG1{NM_006331.7}:c.412+170_735\\\",\\\"intron\\\":3},{\\\"hgvsc\\\":\\\"ATN1{NM_001007026.1}:c.1_844_EMG1{NM_001320049.1}:c.409+170_582\\\",\\\"intron\\\":4}]},\\\"isCanonical\\\":true,\\\"proteinId\\\":\\\"NP_001007027.1\\\"},{\\\"transcript\\\":\\\"XM_005253672.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"exons\\\":\\\"5-10/10\\\",\\\"introns\\\":\\\"5-9/9\\\",\\\"geneId\\\":\\\"1822\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"transcript_truncation\\\",\\\"unidirectional_gene_fusion\\\"],\\\"geneFusion\\\":{\\\"exon\\\":5,\\\"fusions\\\":[{\\\"hgvsc\\\":\\\"ATN1{XM_005253672.1}:c.1_841_EMG1{NM_006331.7}:c.412+170_735\\\",\\\"intron\\\":3},{\\\"hgvsc\\\":\\\"ATN1{XM_005253672.1}:c.1_841_EMG1{NM_001320049.1}:c.409+170_582\\\",\\\"intron\\\":4}]},\\\"proteinId\\\":\\\"XP_005253729.1\\\"},{\\\"transcript\\\":\\\"NM_001940.3\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"exons\\\":\\\"5-10/10\\\",\\\"introns\\\":\\\"5-9/9\\\",\\\"geneId\\\":\\\"1822\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"transcript_truncation\\\",\\\"unidirectional_gene_fusion\\\"],\\\"geneFusion\\\":{\\\"exon\\\":5,\\\"fusions\\\":[{\\\"hgvsc\\\":\\\"ATN1{NM_001940.3}:c.1_844_EMG1{NM_006331.7}:c.412+170_735\\\",\\\"intron\\\":3},{\\\"hgvsc\\\":\\\"ATN1{NM_001940.3}:c.1_844_EMG1{NM_001320049.1}:c.409+170_582\\\",\\\"intron\\\":4}]},\\\"proteinId\\\":\\\"NP_001931.2\\\"},{\\\"transcript\\\":\\\"NM_006331.7\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"exons\\\":\\\"1-3/6\\\",\\\"introns\\\":\\\"1-3/5\\\",\\\"geneId\\\":\\\"10436\\\",\\\"hgnc\\\":\\\"EMG1\\\",\\\"consequence\\\":[\\\"transcript_truncation\\\",\\\"unidirectional_gene_fusion\\\"],\\\"geneFusion\\\":{\\\"intron\\\":3,\\\"fusions\\\":[{\\\"hgvsc\\\":\\\"ATN1{NM_001007026.1}:c.1_844_EMG1{NM_006331.7}:c.412+170_735\\\",\\\"exon\\\":5},{\\\"hgvsc\\\":\\\"ATN1{XM_005253672.1}:c.1_841_EMG1{NM_006331.7}:c.412+170_735\\\",\\\"exon\\\":5},{\\\"hgvsc\\\":\\\"ATN1{NM_001940.3}:c.1_844_EMG1{NM_006331.7}:c.412+170_735\\\",\\\"exon\\\":5}]},\\\"isCanonical\\\":true,\\\"proteinId\\\":\\\"NP_006322.4\\\"},{\\\"transcript\\\":\\\"NM_001320049.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"exons\\\":\\\"1-4/6\\\",\\\"introns\\\":\\\"1-4/5\\\",\\\"geneId\\\":\\\"10436\\\",\\\"hgnc\\\":\\\"EMG1\\\",\\\"consequence\\\":[\\\"transcript_truncation\\\",\\\"unidirectional_gene_fusion\\\"],\\\"geneFusion\\\":{\\\"intron\\\":4,\\\"fusions\\\":[{\\\"hgvsc\\\":\\\"ATN1{NM_001007026.1}:c.1_844_EMG1{NM_001320049.1}:c.409+170_582\\\",\\\"exon\\\":5},{\\\"hgvsc\\\":\\\"ATN1{XM_005253672.1}:c.1_841_EMG1{NM_001320049.1}:c.409+170_582\\\",\\\"exon\\\":5},{\\\"hgvsc\\\":\\\"ATN1{NM_001940.3}:c.1_844_EMG1{NM_001320049.1}:c.409+170_582\\\",\\\"exon\\\":5}]},\\\"proteinId\\\":\\\"NP_001306978.1\\\"},{\\\"transcript\\\":\\\"NR_135131.1\\\",\\\"bioType\\\":\\\"misc_RNA\\\",\\\"exons\\\":\\\"1-3/8\\\",\\\"introns\\\":\\\"1-3/7\\\",\\\"geneId\\\":\\\"10436\\\",\\\"hgnc\\\":\\\"EMG1\\\",\\\"consequence\\\":[\\\"transcript_truncation\\\"]}],\\\"ensembl\\\":[{\\\"transcript\\\":\\\"ENST00000356654.4\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"exons\\\":\\\"5-10/10\\\",\\\"introns\\\":\\\"5-9/9\\\",\\\"geneId\\\":\\\"ENSG00000111676\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"transcript_truncation\\\",\\\"unidirectional_gene_fusion\\\"],\\\"geneFusion\\\":{\\\"exon\\\":5,\\\"fusions\\\":[{\\\"hgvsc\\\":\\\"ATN1{ENST00000356654.4}:c.1_844_EMG1{ENST00000261406.6}:c.409+170_732\\\",\\\"intron\\\":4}]},\\\"isCanonical\\\":true,\\\"proteinId\\\":\\\"ENSP00000349076.3\\\"},{\\\"transcript\\\":\\\"ENST00000396684.2\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"exons\\\":\\\"5-10/10\\\",\\\"introns\\\":\\\"5-9/9\\\",\\\"geneId\\\":\\\"ENSG00000111676\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"transcript_truncation\\\",\\\"unidirectional_gene_fusion\\\"],\\\"geneFusion\\\":{\\\"exon\\\":5,\\\"fusions\\\":[{\\\"hgvsc\\\":\\\"ATN1{ENST00000396684.2}:c.1_844_EMG1{ENST00000261406.6}:c.409+170_732\\\",\\\"intron\\\":4}]},\\\"proteinId\\\":\\\"ENSP00000379915.2\\\"},{\\\"transcript\\\":\\\"ENST00000541029.1\\\",\\\"bioType\\\":\\\"retained_intron\\\",\\\"exons\\\":\\\"1-2/2\\\",\\\"introns\\\":\\\"1/1\\\",\\\"geneId\\\":\\\"ENSG00000111676\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"transcript_ablation\\\"]},{\\\"transcript\\\":\\\"ENST00000537488.1\\\",\\\"bioType\\\":\\\"retained_intron\\\",\\\"exons\\\":\\\"1-3/3\\\",\\\"introns\\\":\\\"1-2/2\\\",\\\"geneId\\\":\\\"ENSG00000111676\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"transcript_ablation\\\"]},{\\\"transcript\\\":\\\"ENST00000607161.1\\\",\\\"bioType\\\":\\\"processed_transcript\\\",\\\"exons\\\":\\\"1-3/6\\\",\\\"introns\\\":\\\"1-3/5\\\",\\\"geneId\\\":\\\"ENSG00000126749\\\",\\\"hgnc\\\":\\\"EMG1\\\",\\\"consequence\\\":[\\\"transcript_truncation\\\"]},{\\\"transcript\\\":\\\"ENST00000261406.6\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"exons\\\":\\\"1-4/7\\\",\\\"introns\\\":\\\"1-4/6\\\",\\\"geneId\\\":\\\"ENSG00000126749\\\",\\\"hgnc\\\":\\\"EMG1\\\",\\\"consequence\\\":[\\\"transcript_truncation\\\",\\\"unidirectional_gene_fusion\\\"],\\\"geneFusion\\\":{\\\"intron\\\":4,\\\"fusions\\\":[{\\\"hgvsc\\\":\\\"ATN1{ENST00000356654.4}:c.1_844_EMG1{ENST00000261406.6}:c.409+170_732\\\",\\\"exon\\\":5},{\\\"hgvsc\\\":\\\"ATN1{ENST00000396684.2}:c.1_844_EMG1{ENST00000261406.6}:c.409+170_732\\\",\\\"exon\\\":5}]},\\\"isCanonical\\\":true,\\\"proteinId\\\":\\\"ENSP00000476966.1\\\"},{\\\"transcript\\\":\\\"ENST00000546220.1\\\",\\\"bioType\\\":\\\"processed_transcript\\\",\\\"exons\\\":\\\"1-3/6\\\",\\\"introns\\\":\\\"1-3/5\\\",\\\"geneId\\\":\\\"ENSG00000126749\\\",\\\"hgnc\\\":\\\"EMG1\\\",\\\"consequence\\\":[\\\"transcript_truncation\\\"]},{\\\"transcript\\\":\\\"ENST00000539440.1\\\",\\\"bioType\\\":\\\"retained_intron\\\",\\\"exons\\\":\\\"1-3/4\\\",\\\"introns\\\":\\\"1-2/3\\\",\\\"geneId\\\":\\\"ENSG00000126749\\\",\\\"hgnc\\\":\\\"EMG1\\\",\\\"consequence\\\":[\\\"transcript_truncation\\\"]},{\\\"transcript\\\":\\\"ENST00000564245.1\\\",\\\"bioType\\\":\\\"processed_transcript\\\",\\\"exons\\\":\\\"1-3/8\\\",\\\"introns\\\":\\\"1-3/7\\\",\\\"geneId\\\":\\\"ENSG00000126749\\\",\\\"hgnc\\\":\\\"EMG1\\\",\\\"consequence\\\":[\\\"transcript_truncation\\\"]},{\\\"transcript\\\":\\\"ENST00000451846.2\\\",\\\"bioType\\\":\\\"retained_intron\\\",\\\"exons\\\":\\\"1-2/2\\\",\\\"introns\\\":\\\"1/1\\\",\\\"geneId\\\":\\\"ENSG00000126749\\\",\\\"hgnc\\\":\\\"EMG1\\\",\\\"consequence\\\":[\\\"transcript_truncation\\\"]},{\\\"transcript\\\":\\\"ENST00000539535.2\\\",\\\"bioType\\\":\\\"processed_transcript\\\",\\\"exons\\\":\\\"1-3/8\\\",\\\"introns\\\":\\\"1-3/7\\\",\\\"geneId\\\":\\\"ENSG00000126749\\\",\\\"hgnc\\\":\\\"EMG1\\\",\\\"consequence\\\":[\\\"transcript_truncation\\\"]},{\\\"transcript\\\":\\\"ENST00000541016.1\\\",\\\"bioType\\\":\\\"processed_transcript\\\",\\\"exons\\\":\\\"1-3/5\\\",\\\"introns\\\":\\\"1-3/4\\\",\\\"geneId\\\":\\\"ENSG00000126749\\\",\\\"hgnc\\\":\\\"EMG1\\\",\\\"consequence\\\":[\\\"transcript_truncation\\\"]},{\\\"transcript\\\":\\\"ENST00000539196.1\\\",\\\"bioType\\\":\\\"processed_transcript\\\",\\\"exons\\\":\\\"1-3/5\\\",\\\"introns\\\":\\\"1-3/4\\\",\\\"geneId\\\":\\\"ENSG00000126749\\\",\\\"hgnc\\\":\\\"EMG1\\\",\\\"consequence\\\":[\\\"transcript_truncation\\\"]}]}}]}\")]\r\n        [InlineData(\"chr12\t7067124\t.\tGGCC\tATTG\t100\tPASS\t.\t.\t.\", \"{\\\"chromosome\\\":\\\"chr12\\\",\\\"position\\\":7067124,\\\"refAllele\\\":\\\"GGCC\\\",\\\"altAlleles\\\":[\\\"ATTG\\\"],\\\"quality\\\":100,\\\"filters\\\":[\\\"PASS\\\"],\\\"cytogeneticBand\\\":\\\"12p13.31\\\",\\\"samples\\\":[{\\\"isEmpty\\\":true}],\\\"variants\\\":[{\\\"vid\\\":\\\"12:7067124:7067127:ATTG\\\",\\\"chromosome\\\":\\\"chr12\\\",\\\"begin\\\":7067124,\\\"end\\\":7067127,\\\"refAllele\\\":\\\"GGCC\\\",\\\"altAllele\\\":\\\"ATTG\\\",\\\"variantType\\\":\\\"MNV\\\",\\\"transcripts\\\":{\\\"refSeq\\\":[{\\\"transcript\\\":\\\"NM_080548.4\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"codons\\\":\\\"GGCCcc/ATTGcc\\\",\\\"aminoAcids\\\":\\\"GP/IA\\\",\\\"cdnaPos\\\":\\\"1404-1407\\\",\\\"cdsPos\\\":\\\"1255-1258\\\",\\\"exons\\\":\\\"11/16\\\",\\\"proteinPos\\\":\\\"419-420\\\",\\\"geneId\\\":\\\"5777\\\",\\\"hgnc\\\":\\\"PTPN6\\\",\\\"consequence\\\":[\\\"missense_variant\\\"],\\\"hgvsc\\\":\\\"NM_080548.4:c.1255_1258delGGCCinsATTG\\\",\\\"hgvsp\\\":\\\"NP_536858.1:p.(Gly419_Pro420delinsIleAla)\\\",\\\"proteinId\\\":\\\"NP_536858.1\\\"},{\\\"transcript\\\":\\\"XM_005253719.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"codons\\\":\\\"GGCCcc/ATTGcc\\\",\\\"aminoAcids\\\":\\\"GP/IA\\\",\\\"cdnaPos\\\":\\\"1248-1251\\\",\\\"cdsPos\\\":\\\"1132-1135\\\",\\\"exons\\\":\\\"10/15\\\",\\\"proteinPos\\\":\\\"378-379\\\",\\\"geneId\\\":\\\"5777\\\",\\\"hgnc\\\":\\\"PTPN6\\\",\\\"consequence\\\":[\\\"missense_variant\\\"],\\\"hgvsc\\\":\\\"XM_005253719.1:c.1132_1135delGGCCinsATTG\\\",\\\"hgvsp\\\":\\\"XP_005253776.1:p.(Gly378_Pro379delinsIleAla)\\\",\\\"proteinId\\\":\\\"XP_005253776.1\\\"},{\\\"transcript\\\":\\\"NM_002831.5\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"codons\\\":\\\"GGCCcc/ATTGcc\\\",\\\"aminoAcids\\\":\\\"GP/IA\\\",\\\"cdnaPos\\\":\\\"1491-1494\\\",\\\"cdsPos\\\":\\\"1249-1252\\\",\\\"exons\\\":\\\"11/16\\\",\\\"proteinPos\\\":\\\"417-418\\\",\\\"geneId\\\":\\\"5777\\\",\\\"hgnc\\\":\\\"PTPN6\\\",\\\"consequence\\\":[\\\"missense_variant\\\"],\\\"hgvsc\\\":\\\"NM_002831.5:c.1249_1252delGGCCinsATTG\\\",\\\"hgvsp\\\":\\\"NP_002822.2:p.(Gly417_Pro418delinsIleAla)\\\",\\\"proteinId\\\":\\\"NP_002822.2\\\"},{\\\"transcript\\\":\\\"NM_080549.3\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"codons\\\":\\\"GGCCcc/ATTGcc\\\",\\\"aminoAcids\\\":\\\"GP/IA\\\",\\\"cdnaPos\\\":\\\"1491-1494\\\",\\\"cdsPos\\\":\\\"1249-1252\\\",\\\"exons\\\":\\\"11/16\\\",\\\"proteinPos\\\":\\\"417-418\\\",\\\"geneId\\\":\\\"5777\\\",\\\"hgnc\\\":\\\"PTPN6\\\",\\\"consequence\\\":[\\\"missense_variant\\\"],\\\"hgvsc\\\":\\\"NM_080549.3:c.1249_1252delGGCCinsATTG\\\",\\\"hgvsp\\\":\\\"NP_536859.1:p.(Gly417_Pro418delinsIleAla)\\\",\\\"isCanonical\\\":true,\\\"proteinId\\\":\\\"NP_536859.1\\\"}],\\\"ensembl\\\":[{\\\"transcript\\\":\\\"ENST00000542848.1\\\",\\\"bioType\\\":\\\"nonsense_mediated_decay\\\",\\\"geneId\\\":\\\"ENSG00000111679\\\",\\\"hgnc\\\":\\\"PTPN6\\\",\\\"consequence\\\":[\\\"downstream_gene_variant\\\"],\\\"proteinId\\\":\\\"ENSP00000444805.1\\\"},{\\\"transcript\\\":\\\"ENST00000543120.1\\\",\\\"bioType\\\":\\\"processed_transcript\\\",\\\"geneId\\\":\\\"ENSG00000111679\\\",\\\"hgnc\\\":\\\"PTPN6\\\",\\\"consequence\\\":[\\\"downstream_gene_variant\\\"]},{\\\"transcript\\\":\\\"ENST00000399448.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"codons\\\":\\\"GGCCcc/ATTGcc\\\",\\\"aminoAcids\\\":\\\"GP/IA\\\",\\\"cdnaPos\\\":\\\"1404-1407\\\",\\\"cdsPos\\\":\\\"1255-1258\\\",\\\"exons\\\":\\\"11/16\\\",\\\"proteinPos\\\":\\\"419-420\\\",\\\"geneId\\\":\\\"ENSG00000111679\\\",\\\"hgnc\\\":\\\"PTPN6\\\",\\\"consequence\\\":[\\\"missense_variant\\\"],\\\"hgvsc\\\":\\\"ENST00000399448.1:c.1255_1258delGGCCinsATTG\\\",\\\"hgvsp\\\":\\\"ENSP00000382376.1:p.(Gly419_Pro420delinsIleAla)\\\",\\\"proteinId\\\":\\\"ENSP00000382376.1\\\"},{\\\"transcript\\\":\\\"ENST00000534900.1\\\",\\\"bioType\\\":\\\"processed_transcript\\\",\\\"geneId\\\":\\\"ENSG00000111679\\\",\\\"hgnc\\\":\\\"PTPN6\\\",\\\"consequence\\\":[\\\"downstream_gene_variant\\\"]},{\\\"transcript\\\":\\\"ENST00000447931.2\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"codons\\\":\\\"GGCCcc/ATTGcc\\\",\\\"aminoAcids\\\":\\\"GP/IA\\\",\\\"cdnaPos\\\":\\\"1248-1251\\\",\\\"cdsPos\\\":\\\"1132-1135\\\",\\\"exons\\\":\\\"10/15\\\",\\\"proteinPos\\\":\\\"378-379\\\",\\\"geneId\\\":\\\"ENSG00000111679\\\",\\\"hgnc\\\":\\\"PTPN6\\\",\\\"consequence\\\":[\\\"missense_variant\\\"],\\\"hgvsc\\\":\\\"ENST00000447931.2:c.1132_1135delGGCCinsATTG\\\",\\\"hgvsp\\\":\\\"ENSP00000415979.2:p.(Gly378_Pro379delinsIleAla)\\\",\\\"proteinId\\\":\\\"ENSP00000415979.2\\\"},{\\\"transcript\\\":\\\"ENST00000538318.1\\\",\\\"bioType\\\":\\\"retained_intron\\\",\\\"geneId\\\":\\\"ENSG00000111679\\\",\\\"hgnc\\\":\\\"PTPN6\\\",\\\"consequence\\\":[\\\"downstream_gene_variant\\\"]},{\\\"transcript\\\":\\\"ENST00000538715.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"ENSG00000111679\\\",\\\"hgnc\\\":\\\"PTPN6\\\",\\\"consequence\\\":[\\\"downstream_gene_variant\\\"],\\\"proteinId\\\":\\\"ENSP00000438740.1\\\"},{\\\"transcript\\\":\\\"ENST00000318974.9\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"codons\\\":\\\"GGCCcc/ATTGcc\\\",\\\"aminoAcids\\\":\\\"GP/IA\\\",\\\"cdnaPos\\\":\\\"1493-1496\\\",\\\"cdsPos\\\":\\\"1249-1252\\\",\\\"exons\\\":\\\"11/16\\\",\\\"proteinPos\\\":\\\"417-418\\\",\\\"geneId\\\":\\\"ENSG00000111679\\\",\\\"hgnc\\\":\\\"PTPN6\\\",\\\"consequence\\\":[\\\"missense_variant\\\"],\\\"hgvsc\\\":\\\"ENST00000318974.9:c.1249_1252delGGCCinsATTG\\\",\\\"hgvsp\\\":\\\"ENSP00000326010.9:p.(Gly417_Pro418delinsIleAla)\\\",\\\"proteinId\\\":\\\"ENSP00000326010.9\\\"},{\\\"transcript\\\":\\\"ENST00000456013.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"codons\\\":\\\"GGCCcc/ATTGcc\\\",\\\"aminoAcids\\\":\\\"GP/IA\\\",\\\"cdnaPos\\\":\\\"1491-1494\\\",\\\"cdsPos\\\":\\\"1249-1252\\\",\\\"exons\\\":\\\"11/16\\\",\\\"proteinPos\\\":\\\"417-418\\\",\\\"geneId\\\":\\\"ENSG00000111679\\\",\\\"hgnc\\\":\\\"PTPN6\\\",\\\"consequence\\\":[\\\"missense_variant\\\"],\\\"hgvsc\\\":\\\"ENST00000456013.1:c.1249_1252delGGCCinsATTG\\\",\\\"hgvsp\\\":\\\"ENSP00000391592.1:p.(Gly417_Pro418delinsIleAla)\\\",\\\"isCanonical\\\":true,\\\"proteinId\\\":\\\"ENSP00000391592.1\\\"},{\\\"transcript\\\":\\\"ENST00000543744.1\\\",\\\"bioType\\\":\\\"retained_intron\\\",\\\"geneId\\\":\\\"ENSG00000111679\\\",\\\"hgnc\\\":\\\"PTPN6\\\",\\\"consequence\\\":[\\\"downstream_gene_variant\\\"]},{\\\"transcript\\\":\\\"ENST00000540740.1\\\",\\\"bioType\\\":\\\"processed_transcript\\\",\\\"geneId\\\":\\\"ENSG00000111679\\\",\\\"hgnc\\\":\\\"PTPN6\\\",\\\"consequence\\\":[\\\"downstream_gene_variant\\\"]},{\\\"transcript\\\":\\\"ENST00000416215.2\\\",\\\"bioType\\\":\\\"retained_intron\\\",\\\"cdnaPos\\\":\\\"1657-1660\\\",\\\"exons\\\":\\\"10/15\\\",\\\"geneId\\\":\\\"ENSG00000111679\\\",\\\"hgnc\\\":\\\"PTPN6\\\",\\\"consequence\\\":[\\\"non_coding_transcript_exon_variant\\\"],\\\"hgvsc\\\":\\\"ENST00000416215.2:n.1657_1660delGGCCinsATTG\\\"},{\\\"transcript\\\":\\\"ENST00000545153.1\\\",\\\"bioType\\\":\\\"nonsense_mediated_decay\\\",\\\"geneId\\\":\\\"ENSG00000111679\\\",\\\"hgnc\\\":\\\"PTPN6\\\",\\\"consequence\\\":[\\\"downstream_gene_variant\\\"],\\\"proteinId\\\":\\\"ENSP00000476175.1\\\"},{\\\"transcript\\\":\\\"ENST00000535462.1\\\",\\\"bioType\\\":\\\"nonsense_mediated_decay\\\",\\\"geneId\\\":\\\"ENSG00000111679\\\",\\\"hgnc\\\":\\\"PTPN6\\\",\\\"consequence\\\":[\\\"downstream_gene_variant\\\"],\\\"proteinId\\\":\\\"ENSP00000441044.1\\\"},{\\\"transcript\\\":\\\"ENST00000541698.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"ENSG00000111679\\\",\\\"hgnc\\\":\\\"PTPN6\\\",\\\"consequence\\\":[\\\"downstream_gene_variant\\\"],\\\"proteinId\\\":\\\"ENSP00000445646.1\\\"},{\\\"transcript\\\":\\\"ENST00000542462.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"ENSG00000111679\\\",\\\"hgnc\\\":\\\"PTPN6\\\",\\\"consequence\\\":[\\\"downstream_gene_variant\\\"],\\\"proteinId\\\":\\\"ENSP00000440114.1\\\"},{\\\"transcript\\\":\\\"ENST00000542277.1\\\",\\\"bioType\\\":\\\"processed_transcript\\\",\\\"geneId\\\":\\\"ENSG00000111679\\\",\\\"hgnc\\\":\\\"PTPN6\\\",\\\"consequence\\\":[\\\"downstream_gene_variant\\\"]},{\\\"transcript\\\":\\\"ENST00000536013.1\\\",\\\"bioType\\\":\\\"nonsense_mediated_decay\\\",\\\"geneId\\\":\\\"ENSG00000111679\\\",\\\"hgnc\\\":\\\"PTPN6\\\",\\\"consequence\\\":[\\\"downstream_gene_variant\\\"],\\\"proteinId\\\":\\\"ENSP00000446345.1\\\"},{\\\"transcript\\\":\\\"ENST00000539365.1\\\",\\\"bioType\\\":\\\"retained_intron\\\",\\\"geneId\\\":\\\"ENSG00000111679\\\",\\\"hgnc\\\":\\\"PTPN6\\\",\\\"consequence\\\":[\\\"downstream_gene_variant\\\"]},{\\\"transcript\\\":\\\"ENST00000539029.1\\\",\\\"bioType\\\":\\\"processed_transcript\\\",\\\"geneId\\\":\\\"ENSG00000111679\\\",\\\"hgnc\\\":\\\"PTPN6\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"]},{\\\"transcript\\\":\\\"ENST00000542761.1\\\",\\\"bioType\\\":\\\"retained_intron\\\",\\\"geneId\\\":\\\"ENSG00000111679\\\",\\\"hgnc\\\":\\\"PTPN6\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"]},{\\\"transcript\\\":\\\"ENST00000537533.1\\\",\\\"bioType\\\":\\\"processed_transcript\\\",\\\"geneId\\\":\\\"ENSG00000111679\\\",\\\"hgnc\\\":\\\"PTPN6\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"]}]}}]}\")]\r\n        [InlineData(\"chr12\t7033330\t.\tT\t<INS>\t100\tPASS\tSVTYPE=INS;END=7033330;SVLEN=1350\t.\t.\", \"{\\\"chromosome\\\":\\\"chr12\\\",\\\"position\\\":7033330,\\\"svEnd\\\":7033330,\\\"refAllele\\\":\\\"T\\\",\\\"altAlleles\\\":[\\\"<INS>\\\"],\\\"quality\\\":100,\\\"filters\\\":[\\\"PASS\\\"],\\\"svLength\\\":1350,\\\"cytogeneticBand\\\":\\\"12p13.31\\\",\\\"samples\\\":[{\\\"isEmpty\\\":true}],\\\"clingen\\\":[{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":147099,\\\"end\\\":7054359,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"id\\\":\\\"nsv498529\\\",\\\"clinicalInterpretation\\\":\\\"pathogenic\\\",\\\"observedGains\\\":1,\\\"phenotypes\\\":[\\\"Developmental delay AND/OR other significant developmental or morphological phenotypes\\\"]},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":173786,\\\"end\\\":34835837,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"id\\\":\\\"nsv995956\\\",\\\"clinicalInterpretation\\\":\\\"pathogenic\\\",\\\"observedGains\\\":1,\\\"validated\\\":true,\\\"phenotypes\\\":[\\\"Decreased calvarial ossification\\\",\\\"Delayed gross motor development\\\",\\\"Feeding difficulties\\\",\\\"Frontal bossing\\\",\\\"Morphological abnormality of the central nervous system\\\",\\\"Patchy alopecia\\\"],\\\"phenotypeIds\\\":[\\\"HP:0002007\\\",\\\"HP:0002011\\\",\\\"HP:0002194\\\",\\\"HP:0002232\\\",\\\"HP:0005474\\\",\\\"HP:0011968\\\",\\\"MedGen:C0232466\\\",\\\"MedGen:C1862862\\\",\\\"MedGen:CN001816\\\",\\\"MedGen:CN001820\\\",\\\"MedGen:CN001989\\\",\\\"MedGen:CN004852\\\"]},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":282465,\\\"end\\\":7425202,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"id\\\":\\\"nsv532325\\\",\\\"clinicalInterpretation\\\":\\\"pathogenic\\\",\\\"observedGains\\\":1,\\\"phenotypes\\\":[\\\"Developmental delay AND/OR other significant developmental or morphological phenotypes\\\",\\\"Global developmental delay\\\"],\\\"phenotypeIds\\\":[\\\"HP:0001263\\\",\\\"MedGen:CN001157\\\"]},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":282465,\\\"end\\\":8514342,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"id\\\":\\\"nsv532326\\\",\\\"clinicalInterpretation\\\":\\\"pathogenic\\\",\\\"observedGains\\\":1,\\\"phenotypes\\\":[\\\"Developmental delay AND/OR other significant developmental or morphological phenotypes\\\"]},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":282465,\\\"end\\\":25623263,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"id\\\":\\\"nsv532324\\\",\\\"clinicalInterpretation\\\":\\\"pathogenic\\\",\\\"observedGains\\\":1,\\\"phenotypes\\\":[\\\"Abnormality of cardiac morphology\\\",\\\"Agenesis of corpus callosum\\\",\\\"Developmental delay AND/OR other significant developmental or morphological phenotypes\\\"],\\\"phenotypeIds\\\":[\\\"HP:0001274\\\",\\\"HP:0001627\\\",\\\"MedGen:C1837248\\\",\\\"MedGen:CN001482\\\"]},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":282465,\\\"end\\\":28568117,\\\"variantType\\\":\\\"copy_number_loss\\\",\\\"id\\\":\\\"nsv531493\\\",\\\"clinicalInterpretation\\\":\\\"pathogenic\\\",\\\"observedLosses\\\":1,\\\"phenotypes\\\":[\\\"Developmental delay AND/OR other significant developmental or morphological phenotypes\\\",\\\"Global developmental delay\\\"],\\\"phenotypeIds\\\":[\\\"HP:0001263\\\",\\\"MedGen:CN001157\\\"]},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":282465,\\\"end\\\":34533111,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"id\\\":\\\"nsv532323\\\",\\\"clinicalInterpretation\\\":\\\"pathogenic\\\",\\\"observedGains\\\":2,\\\"phenotypes\\\":[\\\"Coarse facial features\\\",\\\"Abnormal facial shape\\\",\\\"Abnormality of cardiac morphology\\\",\\\"Cleft upper lip\\\",\\\"Global developmental delay\\\",\\\"Hearing impairment\\\",\\\"Short stature\\\"],\\\"phenotypeIds\\\":[\\\"HP:0000280\\\",\\\"MedGen:C1854600\\\",\\\"HP:0000204\\\",\\\"HP:0000365\\\",\\\"HP:0001263\\\",\\\"HP:0001627\\\",\\\"HP:0001999\\\",\\\"HP:0004322\\\",\\\"MedGen:C0349588\\\",\\\"MedGen:C1384666\\\",\\\"MedGen:CN000197\\\",\\\"MedGen:CN001157\\\",\\\"MedGen:CN001482\\\",\\\"MedGen:CN001810\\\"]},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":282465,\\\"end\\\":34756196,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"id\\\":\\\"nsv916406\\\",\\\"clinicalInterpretation\\\":\\\"pathogenic\\\",\\\"observedGains\\\":1,\\\"phenotypes\\\":[\\\"Ambiguous genitalia\\\",\\\"Delayed fine motor development\\\",\\\"Delayed gross motor development\\\",\\\"Delayed speech and language development\\\",\\\"Developmental delay AND/OR other significant developmental or morphological phenotypes\\\",\\\"Intellectual disability\\\",\\\"Short stature\\\"],\\\"phenotypeIds\\\":[\\\"HP:0000062\\\",\\\"HP:0000750\\\",\\\"HP:0001249\\\",\\\"HP:0002194\\\",\\\"HP:0004322\\\",\\\"HP:0010862\\\",\\\"MedGen:C0349588\\\",\\\"MedGen:C1843367\\\",\\\"MedGen:CN000062\\\",\\\"MedGen:CN000706\\\",\\\"MedGen:CN001989\\\",\\\"MedGen:CN116596\\\"]},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":282465,\\\"end\\\":34756209,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"id\\\":\\\"nsv533931\\\",\\\"clinicalInterpretation\\\":\\\"pathogenic\\\",\\\"observedGains\\\":1,\\\"validated\\\":true,\\\"phenotypes\\\":[\\\"Developmental delay AND/OR other significant developmental or morphological phenotypes\\\"]},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":282465,\\\"end\\\":34761006,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"id\\\":\\\"nsv917315\\\",\\\"clinicalInterpretation\\\":\\\"pathogenic\\\",\\\"observedGains\\\":1,\\\"validated\\\":true,\\\"phenotypes\\\":[\\\"Developmental delay AND/OR other significant developmental or morphological phenotypes\\\"]},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":282465,\\\"end\\\":133773393,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"id\\\":\\\"nsv917029\\\",\\\"clinicalInterpretation\\\":\\\"pathogenic\\\",\\\"observedGains\\\":1,\\\"phenotypes\\\":[\\\"Abnormality of toe\\\",\\\"Defect in the atrial septum\\\",\\\"Downslanted palpebral fissures\\\",\\\"Frontal bossing\\\",\\\"Low-set ears\\\",\\\"Overlapping fingers\\\",\\\"Patent ductus arteriosus\\\",\\\"Sacral dimple\\\",\\\"Sandal gap\\\",\\\"Single transverse palmar crease\\\"],\\\"phenotypeIds\\\":[\\\"HP:0000369\\\",\\\"HP:0000494\\\",\\\"HP:0000954\\\",\\\"HP:0000960\\\",\\\"HP:0001631\\\",\\\"HP:0001643\\\",\\\"HP:0001780\\\",\\\"HP:0001852\\\",\\\"HP:0002007\\\",\\\"HP:0010557\\\",\\\"MedGen:C0426848\\\",\\\"MedGen:C1865016\\\",\\\"MedGen:C1873502\\\",\\\"MedGen:CN000345\\\",\\\"MedGen:CN001485\\\",\\\"MedGen:CN001496\\\",\\\"MedGen:CN001615\\\",\\\"MedGen:CN001674\\\",\\\"MedGen:CN001816\\\",\\\"MedGen:CN009386\\\"]},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":322142,\\\"end\\\":34079848,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"id\\\":\\\"nsv532328\\\",\\\"clinicalInterpretation\\\":\\\"pathogenic\\\",\\\"observedGains\\\":1,\\\"phenotypes\\\":[\\\"Developmental delay AND/OR other significant developmental or morphological phenotypes\\\"]},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":1367440,\\\"end\\\":20810511,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"id\\\":\\\"nsv995558\\\",\\\"clinicalInterpretation\\\":\\\"pathogenic\\\",\\\"observedGains\\\":1,\\\"validated\\\":true,\\\"phenotypes\\\":[\\\"Feeding difficulties\\\",\\\"Laryngomalacia\\\"],\\\"phenotypeIds\\\":[\\\"HP:0001601\\\",\\\"HP:0011968\\\",\\\"MedGen:C0232466\\\",\\\"MedGen:CN001457\\\"]},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":2980907,\\\"end\\\":15140282,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"id\\\":\\\"nsv868869\\\",\\\"clinicalInterpretation\\\":\\\"pathogenic\\\",\\\"observedGains\\\":1,\\\"phenotypes\\\":[\\\"Developmental delay AND/OR other significant developmental or morphological phenotypes\\\"]},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":6837831,\\\"end\\\":7858216,\\\"variantType\\\":\\\"copy_number_loss\\\",\\\"id\\\":\\\"nsv531496\\\",\\\"clinicalInterpretation\\\":\\\"pathogenic\\\",\\\"observedLosses\\\":1,\\\"phenotypes\\\":[\\\"Developmental delay AND/OR other significant developmental or morphological phenotypes\\\"]}],\\\"dgv\\\":[{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":6889463,\\\"end\\\":7041469,\\\"variantType\\\":\\\"copy_number_loss\\\",\\\"variantFreqAll\\\":0.02105,\\\"id\\\":\\\"nsv832324\\\",\\\"sampleSize\\\":95,\\\"observedLosses\\\":2},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":6948468,\\\"end\\\":7033823,\\\"variantType\\\":\\\"copy_number_loss\\\",\\\"variantFreqAll\\\":0.00006,\\\"id\\\":\\\"nsv557261\\\",\\\"sampleSize\\\":17421,\\\"observedLosses\\\":1},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":6985480,\\\"end\\\":7103003,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"variantFreqAll\\\":0.00003,\\\"id\\\":\\\"nsv1035811\\\",\\\"sampleSize\\\":29084,\\\"observedGains\\\":1},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":7005694,\\\"end\\\":7115157,\\\"variantType\\\":\\\"insertion\\\",\\\"variantFreqAll\\\":0.25,\\\"id\\\":\\\"nsv509453\\\",\\\"sampleSize\\\":4,\\\"observedGains\\\":1},{\\\"chromosome\\\":\\\"12\\\",\\\"begin\\\":7012055,\\\"end\\\":7163058,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"variantFreqAll\\\":0.00003,\\\"id\\\":\\\"nsv1047373\\\",\\\"sampleSize\\\":29084,\\\"observedGains\\\":1}],\\\"variants\\\":[{\\\"vid\\\":\\\"12:7033331:7033330:INS\\\",\\\"chromosome\\\":\\\"chr12\\\",\\\"begin\\\":7033331,\\\"end\\\":7033330,\\\"refAllele\\\":\\\"T\\\",\\\"altAllele\\\":\\\"<INS>\\\",\\\"variantType\\\":\\\"insertion\\\",\\\"regulatoryRegions\\\":[{\\\"id\\\":\\\"ENSR00000361206\\\",\\\"type\\\":\\\"CTCF_binding_site\\\",\\\"consequence\\\":[\\\"regulatory_region_variant\\\"]}]}]}\")]\r\n        [InlineData(\"chr12\t7043410\t.\tC\tCTCC\t50\tPASS\t.\t.\t.\", \"{\\\"chromosome\\\":\\\"chr12\\\",\\\"position\\\":7043410,\\\"refAllele\\\":\\\"C\\\",\\\"altAlleles\\\":[\\\"CTCC\\\"],\\\"quality\\\":50,\\\"filters\\\":[\\\"PASS\\\"],\\\"cytogeneticBand\\\":\\\"12p13.31\\\",\\\"samples\\\":[{\\\"isEmpty\\\":true}],\\\"variants\\\":[{\\\"vid\\\":\\\"12:7043411:7043410:TCC\\\",\\\"chromosome\\\":\\\"chr12\\\",\\\"begin\\\":7043411,\\\"end\\\":7043410,\\\"refAllele\\\":\\\"-\\\",\\\"altAllele\\\":\\\"TCC\\\",\\\"variantType\\\":\\\"insertion\\\",\\\"transcripts\\\":{\\\"refSeq\\\":[{\\\"transcript\\\":\\\"NM_001007026.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"codons\\\":\\\"-/TCC\\\",\\\"aminoAcids\\\":\\\"-/S\\\",\\\"cdnaPos\\\":\\\"336-337\\\",\\\"cdsPos\\\":\\\"99-100\\\",\\\"exons\\\":\\\"3/10\\\",\\\"proteinPos\\\":\\\"33-34\\\",\\\"geneId\\\":\\\"1822\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"inframe_insertion\\\"],\\\"hgvsc\\\":\\\"NM_001007026.1:c.100_102dupTCC\\\",\\\"hgvsp\\\":\\\"NP_001007027.1:p.(Ser34dup)\\\",\\\"isCanonical\\\":true,\\\"proteinId\\\":\\\"NP_001007027.1\\\"},{\\\"transcript\\\":\\\"XM_005253672.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"codons\\\":\\\"-/TCC\\\",\\\"aminoAcids\\\":\\\"-/S\\\",\\\"cdnaPos\\\":\\\"429-430\\\",\\\"cdsPos\\\":\\\"99-100\\\",\\\"exons\\\":\\\"3/10\\\",\\\"proteinPos\\\":\\\"33-34\\\",\\\"geneId\\\":\\\"1822\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"inframe_insertion\\\"],\\\"hgvsc\\\":\\\"XM_005253672.1:c.100_102dupTCC\\\",\\\"hgvsp\\\":\\\"XP_005253729.1:p.(Ser34dup)\\\",\\\"proteinId\\\":\\\"XP_005253729.1\\\"},{\\\"transcript\\\":\\\"NM_001940.3\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"codons\\\":\\\"-/TCC\\\",\\\"aminoAcids\\\":\\\"-/S\\\",\\\"cdnaPos\\\":\\\"329-330\\\",\\\"cdsPos\\\":\\\"99-100\\\",\\\"exons\\\":\\\"3/10\\\",\\\"proteinPos\\\":\\\"33-34\\\",\\\"geneId\\\":\\\"1822\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"inframe_insertion\\\"],\\\"hgvsc\\\":\\\"NM_001940.3:c.100_102dupTCC\\\",\\\"hgvsp\\\":\\\"NP_001931.2:p.(Ser34dup)\\\",\\\"proteinId\\\":\\\"NP_001931.2\\\"}],\\\"ensembl\\\":[{\\\"transcript\\\":\\\"ENST00000356654.4\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"codons\\\":\\\"-/TCC\\\",\\\"aminoAcids\\\":\\\"-/S\\\",\\\"cdnaPos\\\":\\\"336-337\\\",\\\"cdsPos\\\":\\\"99-100\\\",\\\"exons\\\":\\\"3/10\\\",\\\"proteinPos\\\":\\\"33-34\\\",\\\"geneId\\\":\\\"ENSG00000111676\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"inframe_insertion\\\"],\\\"hgvsc\\\":\\\"ENST00000356654.4:c.100_102dupTCC\\\",\\\"hgvsp\\\":\\\"ENSP00000349076.3:p.(Ser34dup)\\\",\\\"isCanonical\\\":true,\\\"proteinId\\\":\\\"ENSP00000349076.3\\\"},{\\\"transcript\\\":\\\"ENST00000396684.2\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"codons\\\":\\\"-/TCC\\\",\\\"aminoAcids\\\":\\\"-/S\\\",\\\"cdnaPos\\\":\\\"333-334\\\",\\\"cdsPos\\\":\\\"99-100\\\",\\\"exons\\\":\\\"3/10\\\",\\\"proteinPos\\\":\\\"33-34\\\",\\\"geneId\\\":\\\"ENSG00000111676\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"inframe_insertion\\\"],\\\"hgvsc\\\":\\\"ENST00000396684.2:c.100_102dupTCC\\\",\\\"hgvsp\\\":\\\"ENSP00000379915.2:p.(Ser34dup)\\\",\\\"proteinId\\\":\\\"ENSP00000379915.2\\\"},{\\\"transcript\\\":\\\"ENST00000541029.1\\\",\\\"bioType\\\":\\\"retained_intron\\\",\\\"geneId\\\":\\\"ENSG00000111676\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"]},{\\\"transcript\\\":\\\"ENST00000537488.1\\\",\\\"bioType\\\":\\\"retained_intron\\\",\\\"geneId\\\":\\\"ENSG00000111676\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"]}]}}]}\")]\r\n        [InlineData(\"chr12\t7043410\t.\tCT\tGATG\t50\tPASS\t.\t.\t.\", \"{\\\"chromosome\\\":\\\"chr12\\\",\\\"position\\\":7043410,\\\"refAllele\\\":\\\"CT\\\",\\\"altAlleles\\\":[\\\"GATG\\\"],\\\"quality\\\":50,\\\"filters\\\":[\\\"PASS\\\"],\\\"cytogeneticBand\\\":\\\"12p13.31\\\",\\\"samples\\\":[{\\\"isEmpty\\\":true}],\\\"variants\\\":[{\\\"vid\\\":\\\"12:7043410:7043411:GATG\\\",\\\"chromosome\\\":\\\"chr12\\\",\\\"begin\\\":7043410,\\\"end\\\":7043411,\\\"refAllele\\\":\\\"CT\\\",\\\"altAllele\\\":\\\"GATG\\\",\\\"variantType\\\":\\\"indel\\\",\\\"transcripts\\\":{\\\"refSeq\\\":[{\\\"transcript\\\":\\\"NM_001007026.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"codons\\\":\\\"gcCTcc/gcGATGcc\\\",\\\"aminoAcids\\\":\\\"AS/AMX\\\",\\\"cdnaPos\\\":\\\"336-337\\\",\\\"cdsPos\\\":\\\"99-100\\\",\\\"exons\\\":\\\"3/10\\\",\\\"proteinPos\\\":\\\"33-34\\\",\\\"geneId\\\":\\\"1822\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"frameshift_variant\\\"],\\\"hgvsc\\\":\\\"NM_001007026.1:c.99_100delCTinsGATG\\\",\\\"hgvsp\\\":\\\"NP_001007027.1:p.(Ser34MetfsTer27)\\\",\\\"isCanonical\\\":true,\\\"proteinId\\\":\\\"NP_001007027.1\\\"},{\\\"transcript\\\":\\\"XM_005253672.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"codons\\\":\\\"gcCTcc/gcGATGcc\\\",\\\"aminoAcids\\\":\\\"AS/AMX\\\",\\\"cdnaPos\\\":\\\"429-430\\\",\\\"cdsPos\\\":\\\"99-100\\\",\\\"exons\\\":\\\"3/10\\\",\\\"proteinPos\\\":\\\"33-34\\\",\\\"geneId\\\":\\\"1822\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"frameshift_variant\\\"],\\\"hgvsc\\\":\\\"XM_005253672.1:c.99_100delCTinsGATG\\\",\\\"hgvsp\\\":\\\"XP_005253729.1:p.(Ser34MetfsTer27)\\\",\\\"proteinId\\\":\\\"XP_005253729.1\\\"},{\\\"transcript\\\":\\\"NM_001940.3\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"codons\\\":\\\"gcCTcc/gcGATGcc\\\",\\\"aminoAcids\\\":\\\"AS/AMX\\\",\\\"cdnaPos\\\":\\\"329-330\\\",\\\"cdsPos\\\":\\\"99-100\\\",\\\"exons\\\":\\\"3/10\\\",\\\"proteinPos\\\":\\\"33-34\\\",\\\"geneId\\\":\\\"1822\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"frameshift_variant\\\"],\\\"hgvsc\\\":\\\"NM_001940.3:c.99_100delCTinsGATG\\\",\\\"hgvsp\\\":\\\"NP_001931.2:p.(Ser34MetfsTer27)\\\",\\\"proteinId\\\":\\\"NP_001931.2\\\"}],\\\"ensembl\\\":[{\\\"transcript\\\":\\\"ENST00000356654.4\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"codons\\\":\\\"gcCTcc/gcGATGcc\\\",\\\"aminoAcids\\\":\\\"AS/AMX\\\",\\\"cdnaPos\\\":\\\"336-337\\\",\\\"cdsPos\\\":\\\"99-100\\\",\\\"exons\\\":\\\"3/10\\\",\\\"proteinPos\\\":\\\"33-34\\\",\\\"geneId\\\":\\\"ENSG00000111676\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"frameshift_variant\\\"],\\\"hgvsc\\\":\\\"ENST00000356654.4:c.99_100delCTinsGATG\\\",\\\"hgvsp\\\":\\\"ENSP00000349076.3:p.(Ser34MetfsTer27)\\\",\\\"isCanonical\\\":true,\\\"proteinId\\\":\\\"ENSP00000349076.3\\\"},{\\\"transcript\\\":\\\"ENST00000396684.2\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"codons\\\":\\\"gcCTcc/gcGATGcc\\\",\\\"aminoAcids\\\":\\\"AS/AMX\\\",\\\"cdnaPos\\\":\\\"333-334\\\",\\\"cdsPos\\\":\\\"99-100\\\",\\\"exons\\\":\\\"3/10\\\",\\\"proteinPos\\\":\\\"33-34\\\",\\\"geneId\\\":\\\"ENSG00000111676\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"frameshift_variant\\\"],\\\"hgvsc\\\":\\\"ENST00000396684.2:c.99_100delCTinsGATG\\\",\\\"hgvsp\\\":\\\"ENSP00000379915.2:p.(Ser34MetfsTer27)\\\",\\\"proteinId\\\":\\\"ENSP00000379915.2\\\"},{\\\"transcript\\\":\\\"ENST00000541029.1\\\",\\\"bioType\\\":\\\"retained_intron\\\",\\\"geneId\\\":\\\"ENSG00000111676\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"]},{\\\"transcript\\\":\\\"ENST00000537488.1\\\",\\\"bioType\\\":\\\"retained_intron\\\",\\\"geneId\\\":\\\"ENSG00000111676\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"]}]}}]}\")]\r\n        public void Annotate_with_SA(string vcfLine, string expectedResults)\r\n        {\r\n            var    annotatedPosition = AnnotationUtilities.GetAnnotatedPosition(_cacheFilePrefix, null, null, vcfLine);\r\n            var    sb  = annotatedPosition.GetJsonStringBuilder();\r\n            var    observedResults   = StringBuilderPool.GetStringAndReturn(sb);\r\n\r\n            Assert.Equal(expectedResults, observedResults);\r\n        }\r\n\r\n#if (NI_ALLELE)\r\n        [Theory]\r\n        [InlineData(\"chr12\t7048190\t.\tG\tA,*\t322\tPASS\tSB=0.1234567\t.\t.\", \"{\\\"chromosome\\\":\\\"chr12\\\",\\\"position\\\":7048190,\\\"refAllele\\\":\\\"G\\\",\\\"altAlleles\\\":[\\\"A\\\",\\\"*\\\"],\\\"quality\\\":322,\\\"filters\\\":[\\\"PASS\\\"],\\\"strandBias\\\":0.123457,\\\"cytogeneticBand\\\":\\\"12p13.31\\\",\\\"samples\\\":[{\\\"isEmpty\\\":true}],\\\"variants\\\":[{\\\"vid\\\":\\\"12:7048190:A\\\",\\\"chromosome\\\":\\\"chr12\\\",\\\"begin\\\":7048190,\\\"end\\\":7048190,\\\"refAllele\\\":\\\"G\\\",\\\"altAllele\\\":\\\"A\\\",\\\"variantType\\\":\\\"SNV\\\",\\\"transcripts\\\":[{\\\"transcript\\\":\\\"ENST00000356654.4\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"codons\\\":\\\"Gca/Aca\\\",\\\"aminoAcids\\\":\\\"A/T\\\",\\\"cdnaPos\\\":\\\"3301\\\",\\\"cdsPos\\\":\\\"3064\\\",\\\"exons\\\":\\\"7/10\\\",\\\"proteinPos\\\":\\\"1022\\\",\\\"geneId\\\":\\\"ENSG00000111676\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"missense_variant\\\"],\\\"hgvsc\\\":\\\"ENST00000356654.4:c.3064G>A\\\",\\\"hgvsp\\\":\\\"ENSP00000349076.3:p.(Ala1022Thr)\\\",\\\"isCanonical\\\":true,\\\"polyPhenScore\\\":0.36,\\\"polyPhenPrediction\\\":\\\"benign\\\",\\\"proteinId\\\":\\\"ENSP00000349076.3\\\",\\\"siftScore\\\":0.1,\\\"siftPrediction\\\":\\\"tolerated - low confidence\\\"},{\\\"transcript\\\":\\\"NM_001007026.1\\\",\\\"source\\\":\\\"RefSeq\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"codons\\\":\\\"Gca/Aca\\\",\\\"aminoAcids\\\":\\\"A/T\\\",\\\"cdnaPos\\\":\\\"3301\\\",\\\"cdsPos\\\":\\\"3064\\\",\\\"exons\\\":\\\"7/10\\\",\\\"proteinPos\\\":\\\"1022\\\",\\\"geneId\\\":\\\"1822\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"missense_variant\\\"],\\\"hgvsc\\\":\\\"NM_001007026.1:c.3064G>A\\\",\\\"hgvsp\\\":\\\"NP_001007027.1:p.(Ala1022Thr)\\\",\\\"isCanonical\\\":true,\\\"polyPhenScore\\\":0.36,\\\"polyPhenPrediction\\\":\\\"benign\\\",\\\"proteinId\\\":\\\"NP_001007027.1\\\",\\\"siftScore\\\":0.1,\\\"siftPrediction\\\":\\\"tolerated - low confidence\\\"},{\\\"transcript\\\":\\\"ENST00000396684.2\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"codons\\\":\\\"Gca/Aca\\\",\\\"aminoAcids\\\":\\\"A/T\\\",\\\"cdnaPos\\\":\\\"3298\\\",\\\"cdsPos\\\":\\\"3064\\\",\\\"exons\\\":\\\"7/10\\\",\\\"proteinPos\\\":\\\"1022\\\",\\\"geneId\\\":\\\"ENSG00000111676\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"missense_variant\\\"],\\\"hgvsc\\\":\\\"ENST00000396684.2:c.3064G>A\\\",\\\"hgvsp\\\":\\\"ENSP00000379915.2:p.(Ala1022Thr)\\\",\\\"polyPhenScore\\\":0.36,\\\"polyPhenPrediction\\\":\\\"benign\\\",\\\"proteinId\\\":\\\"ENSP00000379915.2\\\",\\\"siftScore\\\":0.1,\\\"siftPrediction\\\":\\\"tolerated - low confidence\\\"},{\\\"transcript\\\":\\\"NM_001940.3\\\",\\\"source\\\":\\\"RefSeq\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"codons\\\":\\\"Gca/Aca\\\",\\\"aminoAcids\\\":\\\"A/T\\\",\\\"cdnaPos\\\":\\\"3294\\\",\\\"cdsPos\\\":\\\"3064\\\",\\\"exons\\\":\\\"7/10\\\",\\\"proteinPos\\\":\\\"1022\\\",\\\"geneId\\\":\\\"1822\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"missense_variant\\\"],\\\"hgvsc\\\":\\\"NM_001940.3:c.3064G>A\\\",\\\"hgvsp\\\":\\\"NP_001931.2:p.(Ala1022Thr)\\\",\\\"polyPhenScore\\\":0.36,\\\"polyPhenPrediction\\\":\\\"benign\\\",\\\"proteinId\\\":\\\"NP_001931.2\\\",\\\"siftScore\\\":0.1,\\\"siftPrediction\\\":\\\"tolerated - low confidence\\\"},{\\\"transcript\\\":\\\"ENST00000541029.1\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"retained_intron\\\",\\\"geneId\\\":\\\"ENSG00000111676\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"downstream_gene_variant\\\"]},{\\\"transcript\\\":\\\"ENST00000537488.1\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"retained_intron\\\",\\\"geneId\\\":\\\"ENSG00000111676\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"]},{\\\"transcript\\\":\\\"ENST00000538392.1\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"retained_intron\\\",\\\"geneId\\\":\\\"ENSG00000111678\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"]},{\\\"transcript\\\":\\\"ENST00000542222.1\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"processed_transcript\\\",\\\"geneId\\\":\\\"ENSG00000111678\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"]},{\\\"transcript\\\":\\\"NM_001301836.1\\\",\\\"source\\\":\\\"RefSeq\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"113246\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"proteinId\\\":\\\"NP_001288765.1\\\"},{\\\"transcript\\\":\\\"ENST00000545581.1\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"ENSG00000111678\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"proteinId\\\":\\\"ENSP00000440602.1\\\"},{\\\"transcript\\\":\\\"NM_001301834.1\\\",\\\"source\\\":\\\"RefSeq\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"113246\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"isCanonical\\\":true,\\\"proteinId\\\":\\\"NP_001288763.1\\\"},{\\\"transcript\\\":\\\"ENST00000607421.1\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"antisense\\\",\\\"geneId\\\":\\\"ENSG00000272173\\\",\\\"hgnc\\\":\\\"U47924.2\\\",\\\"consequence\\\":[\\\"downstream_gene_variant\\\"],\\\"isCanonical\\\":true},{\\\"transcript\\\":\\\"ENST00000458811.1\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"snRNA\\\",\\\"geneId\\\":\\\"ENSG00000238923\\\",\\\"hgnc\\\":\\\"RNU7-1\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"isCanonical\\\":true},{\\\"transcript\\\":\\\"NR_023317.1\\\",\\\"source\\\":\\\"RefSeq\\\",\\\"bioType\\\":\\\"snRNA\\\",\\\"geneId\\\":\\\"100147744\\\",\\\"hgnc\\\":\\\"RNU7-1\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"isCanonical\\\":true},{\\\"transcript\\\":\\\"ENST00000544681.1\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"ENSG00000111678\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"proteinId\\\":\\\"ENSP00000475422.1\\\"},{\\\"transcript\\\":\\\"NM_001301838.1\\\",\\\"source\\\":\\\"RefSeq\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"113246\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"proteinId\\\":\\\"NP_001288767.1\\\"},{\\\"transcript\\\":\\\"NM_001301837.1\\\",\\\"source\\\":\\\"RefSeq\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"113246\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"proteinId\\\":\\\"NP_001288766.1\\\"},{\\\"transcript\\\":\\\"NR_126035.1\\\",\\\"source\\\":\\\"RefSeq\\\",\\\"bioType\\\":\\\"misc_RNA\\\",\\\"geneId\\\":\\\"113246\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"]},{\\\"transcript\\\":\\\"NM_138425.3\\\",\\\"source\\\":\\\"RefSeq\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"113246\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"proteinId\\\":\\\"NP_612434.1\\\"},{\\\"transcript\\\":\\\"ENST00000537087.1\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"ENSG00000111678\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"proteinId\\\":\\\"ENSP00000440937.1\\\"},{\\\"transcript\\\":\\\"ENST00000229281.5\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"ENSG00000111678\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"isCanonical\\\":true,\\\"proteinId\\\":\\\"ENSP00000229281.5\\\"}]},{\\\"vid\\\":\\\"12:7048190:*\\\",\\\"chromosome\\\":\\\"chr12\\\",\\\"begin\\\":7048190,\\\"end\\\":7048190,\\\"refAllele\\\":\\\"G\\\",\\\"altAllele\\\":\\\"*\\\",\\\"variantType\\\":\\\"non_informative_allele\\\"}]}\")]\r\n        [InlineData(\"chr12\t7048190\t.\tG\tA,<*>\t322\tPASS\tSB=0.1234567\t.\t.\", \"{\\\"chromosome\\\":\\\"chr12\\\",\\\"position\\\":7048190,\\\"refAllele\\\":\\\"G\\\",\\\"altAlleles\\\":[\\\"A\\\",\\\"<*>\\\"],\\\"quality\\\":322,\\\"filters\\\":[\\\"PASS\\\"],\\\"strandBias\\\":0.123457,\\\"cytogeneticBand\\\":\\\"12p13.31\\\",\\\"samples\\\":[{\\\"isEmpty\\\":true}],\\\"variants\\\":[{\\\"vid\\\":\\\"12:7048190:A\\\",\\\"chromosome\\\":\\\"chr12\\\",\\\"begin\\\":7048190,\\\"end\\\":7048190,\\\"refAllele\\\":\\\"G\\\",\\\"altAllele\\\":\\\"A\\\",\\\"variantType\\\":\\\"SNV\\\",\\\"transcripts\\\":[{\\\"transcript\\\":\\\"ENST00000356654.4\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"codons\\\":\\\"Gca/Aca\\\",\\\"aminoAcids\\\":\\\"A/T\\\",\\\"cdnaPos\\\":\\\"3301\\\",\\\"cdsPos\\\":\\\"3064\\\",\\\"exons\\\":\\\"7/10\\\",\\\"proteinPos\\\":\\\"1022\\\",\\\"geneId\\\":\\\"ENSG00000111676\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"missense_variant\\\"],\\\"hgvsc\\\":\\\"ENST00000356654.4:c.3064G>A\\\",\\\"hgvsp\\\":\\\"ENSP00000349076.3:p.(Ala1022Thr)\\\",\\\"isCanonical\\\":true,\\\"polyPhenScore\\\":0.36,\\\"polyPhenPrediction\\\":\\\"benign\\\",\\\"proteinId\\\":\\\"ENSP00000349076.3\\\",\\\"siftScore\\\":0.1,\\\"siftPrediction\\\":\\\"tolerated - low confidence\\\"},{\\\"transcript\\\":\\\"NM_001007026.1\\\",\\\"source\\\":\\\"RefSeq\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"codons\\\":\\\"Gca/Aca\\\",\\\"aminoAcids\\\":\\\"A/T\\\",\\\"cdnaPos\\\":\\\"3301\\\",\\\"cdsPos\\\":\\\"3064\\\",\\\"exons\\\":\\\"7/10\\\",\\\"proteinPos\\\":\\\"1022\\\",\\\"geneId\\\":\\\"1822\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"missense_variant\\\"],\\\"hgvsc\\\":\\\"NM_001007026.1:c.3064G>A\\\",\\\"hgvsp\\\":\\\"NP_001007027.1:p.(Ala1022Thr)\\\",\\\"isCanonical\\\":true,\\\"polyPhenScore\\\":0.36,\\\"polyPhenPrediction\\\":\\\"benign\\\",\\\"proteinId\\\":\\\"NP_001007027.1\\\",\\\"siftScore\\\":0.1,\\\"siftPrediction\\\":\\\"tolerated - low confidence\\\"},{\\\"transcript\\\":\\\"ENST00000396684.2\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"codons\\\":\\\"Gca/Aca\\\",\\\"aminoAcids\\\":\\\"A/T\\\",\\\"cdnaPos\\\":\\\"3298\\\",\\\"cdsPos\\\":\\\"3064\\\",\\\"exons\\\":\\\"7/10\\\",\\\"proteinPos\\\":\\\"1022\\\",\\\"geneId\\\":\\\"ENSG00000111676\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"missense_variant\\\"],\\\"hgvsc\\\":\\\"ENST00000396684.2:c.3064G>A\\\",\\\"hgvsp\\\":\\\"ENSP00000379915.2:p.(Ala1022Thr)\\\",\\\"polyPhenScore\\\":0.36,\\\"polyPhenPrediction\\\":\\\"benign\\\",\\\"proteinId\\\":\\\"ENSP00000379915.2\\\",\\\"siftScore\\\":0.1,\\\"siftPrediction\\\":\\\"tolerated - low confidence\\\"},{\\\"transcript\\\":\\\"NM_001940.3\\\",\\\"source\\\":\\\"RefSeq\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"codons\\\":\\\"Gca/Aca\\\",\\\"aminoAcids\\\":\\\"A/T\\\",\\\"cdnaPos\\\":\\\"3294\\\",\\\"cdsPos\\\":\\\"3064\\\",\\\"exons\\\":\\\"7/10\\\",\\\"proteinPos\\\":\\\"1022\\\",\\\"geneId\\\":\\\"1822\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"missense_variant\\\"],\\\"hgvsc\\\":\\\"NM_001940.3:c.3064G>A\\\",\\\"hgvsp\\\":\\\"NP_001931.2:p.(Ala1022Thr)\\\",\\\"polyPhenScore\\\":0.36,\\\"polyPhenPrediction\\\":\\\"benign\\\",\\\"proteinId\\\":\\\"NP_001931.2\\\",\\\"siftScore\\\":0.1,\\\"siftPrediction\\\":\\\"tolerated - low confidence\\\"},{\\\"transcript\\\":\\\"ENST00000541029.1\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"retained_intron\\\",\\\"geneId\\\":\\\"ENSG00000111676\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"downstream_gene_variant\\\"]},{\\\"transcript\\\":\\\"ENST00000537488.1\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"retained_intron\\\",\\\"geneId\\\":\\\"ENSG00000111676\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"]},{\\\"transcript\\\":\\\"ENST00000538392.1\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"retained_intron\\\",\\\"geneId\\\":\\\"ENSG00000111678\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"]},{\\\"transcript\\\":\\\"ENST00000542222.1\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"processed_transcript\\\",\\\"geneId\\\":\\\"ENSG00000111678\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"]},{\\\"transcript\\\":\\\"NM_001301836.1\\\",\\\"source\\\":\\\"RefSeq\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"113246\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"proteinId\\\":\\\"NP_001288765.1\\\"},{\\\"transcript\\\":\\\"ENST00000545581.1\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"ENSG00000111678\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"proteinId\\\":\\\"ENSP00000440602.1\\\"},{\\\"transcript\\\":\\\"NM_001301834.1\\\",\\\"source\\\":\\\"RefSeq\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"113246\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"isCanonical\\\":true,\\\"proteinId\\\":\\\"NP_001288763.1\\\"},{\\\"transcript\\\":\\\"ENST00000607421.1\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"antisense\\\",\\\"geneId\\\":\\\"ENSG00000272173\\\",\\\"hgnc\\\":\\\"U47924.2\\\",\\\"consequence\\\":[\\\"downstream_gene_variant\\\"],\\\"isCanonical\\\":true},{\\\"transcript\\\":\\\"ENST00000458811.1\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"snRNA\\\",\\\"geneId\\\":\\\"ENSG00000238923\\\",\\\"hgnc\\\":\\\"RNU7-1\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"isCanonical\\\":true},{\\\"transcript\\\":\\\"NR_023317.1\\\",\\\"source\\\":\\\"RefSeq\\\",\\\"bioType\\\":\\\"snRNA\\\",\\\"geneId\\\":\\\"100147744\\\",\\\"hgnc\\\":\\\"RNU7-1\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"isCanonical\\\":true},{\\\"transcript\\\":\\\"ENST00000544681.1\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"ENSG00000111678\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"proteinId\\\":\\\"ENSP00000475422.1\\\"},{\\\"transcript\\\":\\\"NM_001301838.1\\\",\\\"source\\\":\\\"RefSeq\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"113246\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"proteinId\\\":\\\"NP_001288767.1\\\"},{\\\"transcript\\\":\\\"NM_001301837.1\\\",\\\"source\\\":\\\"RefSeq\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"113246\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"proteinId\\\":\\\"NP_001288766.1\\\"},{\\\"transcript\\\":\\\"NR_126035.1\\\",\\\"source\\\":\\\"RefSeq\\\",\\\"bioType\\\":\\\"misc_RNA\\\",\\\"geneId\\\":\\\"113246\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"]},{\\\"transcript\\\":\\\"NM_138425.3\\\",\\\"source\\\":\\\"RefSeq\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"113246\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"proteinId\\\":\\\"NP_612434.1\\\"},{\\\"transcript\\\":\\\"ENST00000537087.1\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"ENSG00000111678\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"proteinId\\\":\\\"ENSP00000440937.1\\\"},{\\\"transcript\\\":\\\"ENST00000229281.5\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"ENSG00000111678\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"isCanonical\\\":true,\\\"proteinId\\\":\\\"ENSP00000229281.5\\\"}]},{\\\"vid\\\":\\\"12:7048190:*\\\",\\\"chromosome\\\":\\\"chr12\\\",\\\"begin\\\":7048190,\\\"end\\\":7048190,\\\"refAllele\\\":\\\"G\\\",\\\"altAllele\\\":\\\"<*>\\\",\\\"variantType\\\":\\\"non_informative_allele\\\"}]}\")]\r\n        [InlineData(\"chr12\t7048190\t.\tG\tA,<M>\t322\tPASS\tSB=0.1234567\t.\t.\", \"{\\\"chromosome\\\":\\\"chr12\\\",\\\"position\\\":7048190,\\\"refAllele\\\":\\\"G\\\",\\\"altAlleles\\\":[\\\"A\\\",\\\"<M>\\\"],\\\"quality\\\":322,\\\"filters\\\":[\\\"PASS\\\"],\\\"strandBias\\\":0.123457,\\\"cytogeneticBand\\\":\\\"12p13.31\\\",\\\"samples\\\":[{\\\"isEmpty\\\":true}],\\\"variants\\\":[{\\\"vid\\\":\\\"12:7048190:A\\\",\\\"chromosome\\\":\\\"chr12\\\",\\\"begin\\\":7048190,\\\"end\\\":7048190,\\\"refAllele\\\":\\\"G\\\",\\\"altAllele\\\":\\\"A\\\",\\\"variantType\\\":\\\"SNV\\\",\\\"transcripts\\\":[{\\\"transcript\\\":\\\"ENST00000356654.4\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"codons\\\":\\\"Gca/Aca\\\",\\\"aminoAcids\\\":\\\"A/T\\\",\\\"cdnaPos\\\":\\\"3301\\\",\\\"cdsPos\\\":\\\"3064\\\",\\\"exons\\\":\\\"7/10\\\",\\\"proteinPos\\\":\\\"1022\\\",\\\"geneId\\\":\\\"ENSG00000111676\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"missense_variant\\\"],\\\"hgvsc\\\":\\\"ENST00000356654.4:c.3064G>A\\\",\\\"hgvsp\\\":\\\"ENSP00000349076.3:p.(Ala1022Thr)\\\",\\\"isCanonical\\\":true,\\\"polyPhenScore\\\":0.36,\\\"polyPhenPrediction\\\":\\\"benign\\\",\\\"proteinId\\\":\\\"ENSP00000349076.3\\\",\\\"siftScore\\\":0.1,\\\"siftPrediction\\\":\\\"tolerated - low confidence\\\"},{\\\"transcript\\\":\\\"NM_001007026.1\\\",\\\"source\\\":\\\"RefSeq\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"codons\\\":\\\"Gca/Aca\\\",\\\"aminoAcids\\\":\\\"A/T\\\",\\\"cdnaPos\\\":\\\"3301\\\",\\\"cdsPos\\\":\\\"3064\\\",\\\"exons\\\":\\\"7/10\\\",\\\"proteinPos\\\":\\\"1022\\\",\\\"geneId\\\":\\\"1822\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"missense_variant\\\"],\\\"hgvsc\\\":\\\"NM_001007026.1:c.3064G>A\\\",\\\"hgvsp\\\":\\\"NP_001007027.1:p.(Ala1022Thr)\\\",\\\"isCanonical\\\":true,\\\"polyPhenScore\\\":0.36,\\\"polyPhenPrediction\\\":\\\"benign\\\",\\\"proteinId\\\":\\\"NP_001007027.1\\\",\\\"siftScore\\\":0.1,\\\"siftPrediction\\\":\\\"tolerated - low confidence\\\"},{\\\"transcript\\\":\\\"ENST00000396684.2\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"codons\\\":\\\"Gca/Aca\\\",\\\"aminoAcids\\\":\\\"A/T\\\",\\\"cdnaPos\\\":\\\"3298\\\",\\\"cdsPos\\\":\\\"3064\\\",\\\"exons\\\":\\\"7/10\\\",\\\"proteinPos\\\":\\\"1022\\\",\\\"geneId\\\":\\\"ENSG00000111676\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"missense_variant\\\"],\\\"hgvsc\\\":\\\"ENST00000396684.2:c.3064G>A\\\",\\\"hgvsp\\\":\\\"ENSP00000379915.2:p.(Ala1022Thr)\\\",\\\"polyPhenScore\\\":0.36,\\\"polyPhenPrediction\\\":\\\"benign\\\",\\\"proteinId\\\":\\\"ENSP00000379915.2\\\",\\\"siftScore\\\":0.1,\\\"siftPrediction\\\":\\\"tolerated - low confidence\\\"},{\\\"transcript\\\":\\\"NM_001940.3\\\",\\\"source\\\":\\\"RefSeq\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"codons\\\":\\\"Gca/Aca\\\",\\\"aminoAcids\\\":\\\"A/T\\\",\\\"cdnaPos\\\":\\\"3294\\\",\\\"cdsPos\\\":\\\"3064\\\",\\\"exons\\\":\\\"7/10\\\",\\\"proteinPos\\\":\\\"1022\\\",\\\"geneId\\\":\\\"1822\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"missense_variant\\\"],\\\"hgvsc\\\":\\\"NM_001940.3:c.3064G>A\\\",\\\"hgvsp\\\":\\\"NP_001931.2:p.(Ala1022Thr)\\\",\\\"polyPhenScore\\\":0.36,\\\"polyPhenPrediction\\\":\\\"benign\\\",\\\"proteinId\\\":\\\"NP_001931.2\\\",\\\"siftScore\\\":0.1,\\\"siftPrediction\\\":\\\"tolerated - low confidence\\\"},{\\\"transcript\\\":\\\"ENST00000541029.1\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"retained_intron\\\",\\\"geneId\\\":\\\"ENSG00000111676\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"downstream_gene_variant\\\"]},{\\\"transcript\\\":\\\"ENST00000537488.1\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"retained_intron\\\",\\\"geneId\\\":\\\"ENSG00000111676\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"]},{\\\"transcript\\\":\\\"ENST00000538392.1\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"retained_intron\\\",\\\"geneId\\\":\\\"ENSG00000111678\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"]},{\\\"transcript\\\":\\\"ENST00000542222.1\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"processed_transcript\\\",\\\"geneId\\\":\\\"ENSG00000111678\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"]},{\\\"transcript\\\":\\\"NM_001301836.1\\\",\\\"source\\\":\\\"RefSeq\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"113246\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"proteinId\\\":\\\"NP_001288765.1\\\"},{\\\"transcript\\\":\\\"ENST00000545581.1\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"ENSG00000111678\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"proteinId\\\":\\\"ENSP00000440602.1\\\"},{\\\"transcript\\\":\\\"NM_001301834.1\\\",\\\"source\\\":\\\"RefSeq\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"113246\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"isCanonical\\\":true,\\\"proteinId\\\":\\\"NP_001288763.1\\\"},{\\\"transcript\\\":\\\"ENST00000607421.1\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"antisense\\\",\\\"geneId\\\":\\\"ENSG00000272173\\\",\\\"hgnc\\\":\\\"U47924.2\\\",\\\"consequence\\\":[\\\"downstream_gene_variant\\\"],\\\"isCanonical\\\":true},{\\\"transcript\\\":\\\"ENST00000458811.1\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"snRNA\\\",\\\"geneId\\\":\\\"ENSG00000238923\\\",\\\"hgnc\\\":\\\"RNU7-1\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"isCanonical\\\":true},{\\\"transcript\\\":\\\"NR_023317.1\\\",\\\"source\\\":\\\"RefSeq\\\",\\\"bioType\\\":\\\"snRNA\\\",\\\"geneId\\\":\\\"100147744\\\",\\\"hgnc\\\":\\\"RNU7-1\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"isCanonical\\\":true},{\\\"transcript\\\":\\\"ENST00000544681.1\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"ENSG00000111678\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"proteinId\\\":\\\"ENSP00000475422.1\\\"},{\\\"transcript\\\":\\\"NM_001301838.1\\\",\\\"source\\\":\\\"RefSeq\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"113246\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"proteinId\\\":\\\"NP_001288767.1\\\"},{\\\"transcript\\\":\\\"NM_001301837.1\\\",\\\"source\\\":\\\"RefSeq\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"113246\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"proteinId\\\":\\\"NP_001288766.1\\\"},{\\\"transcript\\\":\\\"NR_126035.1\\\",\\\"source\\\":\\\"RefSeq\\\",\\\"bioType\\\":\\\"misc_RNA\\\",\\\"geneId\\\":\\\"113246\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"]},{\\\"transcript\\\":\\\"NM_138425.3\\\",\\\"source\\\":\\\"RefSeq\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"113246\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"proteinId\\\":\\\"NP_612434.1\\\"},{\\\"transcript\\\":\\\"ENST00000537087.1\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"ENSG00000111678\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"proteinId\\\":\\\"ENSP00000440937.1\\\"},{\\\"transcript\\\":\\\"ENST00000229281.5\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"ENSG00000111678\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"isCanonical\\\":true,\\\"proteinId\\\":\\\"ENSP00000229281.5\\\"}]},{\\\"vid\\\":\\\"12:7048190:*\\\",\\\"chromosome\\\":\\\"chr12\\\",\\\"begin\\\":7048190,\\\"end\\\":7048190,\\\"refAllele\\\":\\\"G\\\",\\\"altAllele\\\":\\\"<M>\\\",\\\"variantType\\\":\\\"non_informative_allele\\\"}]}\")]\r\n        [InlineData(\"chr12\t7048190\t.\tG\tA,<NON_REF>\t322\tPASS\tSB=0.1234567\t.\t.\", \"{\\\"chromosome\\\":\\\"chr12\\\",\\\"position\\\":7048190,\\\"refAllele\\\":\\\"G\\\",\\\"altAlleles\\\":[\\\"A\\\",\\\"<NON_REF>\\\"],\\\"quality\\\":322,\\\"filters\\\":[\\\"PASS\\\"],\\\"strandBias\\\":0.123457,\\\"cytogeneticBand\\\":\\\"12p13.31\\\",\\\"samples\\\":[{\\\"isEmpty\\\":true}],\\\"variants\\\":[{\\\"vid\\\":\\\"12:7048190:A\\\",\\\"chromosome\\\":\\\"chr12\\\",\\\"begin\\\":7048190,\\\"end\\\":7048190,\\\"refAllele\\\":\\\"G\\\",\\\"altAllele\\\":\\\"A\\\",\\\"variantType\\\":\\\"SNV\\\",\\\"transcripts\\\":[{\\\"transcript\\\":\\\"ENST00000356654.4\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"codons\\\":\\\"Gca/Aca\\\",\\\"aminoAcids\\\":\\\"A/T\\\",\\\"cdnaPos\\\":\\\"3301\\\",\\\"cdsPos\\\":\\\"3064\\\",\\\"exons\\\":\\\"7/10\\\",\\\"proteinPos\\\":\\\"1022\\\",\\\"geneId\\\":\\\"ENSG00000111676\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"missense_variant\\\"],\\\"hgvsc\\\":\\\"ENST00000356654.4:c.3064G>A\\\",\\\"hgvsp\\\":\\\"ENSP00000349076.3:p.(Ala1022Thr)\\\",\\\"isCanonical\\\":true,\\\"polyPhenScore\\\":0.36,\\\"polyPhenPrediction\\\":\\\"benign\\\",\\\"proteinId\\\":\\\"ENSP00000349076.3\\\",\\\"siftScore\\\":0.1,\\\"siftPrediction\\\":\\\"tolerated - low confidence\\\"},{\\\"transcript\\\":\\\"NM_001007026.1\\\",\\\"source\\\":\\\"RefSeq\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"codons\\\":\\\"Gca/Aca\\\",\\\"aminoAcids\\\":\\\"A/T\\\",\\\"cdnaPos\\\":\\\"3301\\\",\\\"cdsPos\\\":\\\"3064\\\",\\\"exons\\\":\\\"7/10\\\",\\\"proteinPos\\\":\\\"1022\\\",\\\"geneId\\\":\\\"1822\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"missense_variant\\\"],\\\"hgvsc\\\":\\\"NM_001007026.1:c.3064G>A\\\",\\\"hgvsp\\\":\\\"NP_001007027.1:p.(Ala1022Thr)\\\",\\\"isCanonical\\\":true,\\\"polyPhenScore\\\":0.36,\\\"polyPhenPrediction\\\":\\\"benign\\\",\\\"proteinId\\\":\\\"NP_001007027.1\\\",\\\"siftScore\\\":0.1,\\\"siftPrediction\\\":\\\"tolerated - low confidence\\\"},{\\\"transcript\\\":\\\"ENST00000396684.2\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"codons\\\":\\\"Gca/Aca\\\",\\\"aminoAcids\\\":\\\"A/T\\\",\\\"cdnaPos\\\":\\\"3298\\\",\\\"cdsPos\\\":\\\"3064\\\",\\\"exons\\\":\\\"7/10\\\",\\\"proteinPos\\\":\\\"1022\\\",\\\"geneId\\\":\\\"ENSG00000111676\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"missense_variant\\\"],\\\"hgvsc\\\":\\\"ENST00000396684.2:c.3064G>A\\\",\\\"hgvsp\\\":\\\"ENSP00000379915.2:p.(Ala1022Thr)\\\",\\\"polyPhenScore\\\":0.36,\\\"polyPhenPrediction\\\":\\\"benign\\\",\\\"proteinId\\\":\\\"ENSP00000379915.2\\\",\\\"siftScore\\\":0.1,\\\"siftPrediction\\\":\\\"tolerated - low confidence\\\"},{\\\"transcript\\\":\\\"NM_001940.3\\\",\\\"source\\\":\\\"RefSeq\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"codons\\\":\\\"Gca/Aca\\\",\\\"aminoAcids\\\":\\\"A/T\\\",\\\"cdnaPos\\\":\\\"3294\\\",\\\"cdsPos\\\":\\\"3064\\\",\\\"exons\\\":\\\"7/10\\\",\\\"proteinPos\\\":\\\"1022\\\",\\\"geneId\\\":\\\"1822\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"missense_variant\\\"],\\\"hgvsc\\\":\\\"NM_001940.3:c.3064G>A\\\",\\\"hgvsp\\\":\\\"NP_001931.2:p.(Ala1022Thr)\\\",\\\"polyPhenScore\\\":0.36,\\\"polyPhenPrediction\\\":\\\"benign\\\",\\\"proteinId\\\":\\\"NP_001931.2\\\",\\\"siftScore\\\":0.1,\\\"siftPrediction\\\":\\\"tolerated - low confidence\\\"},{\\\"transcript\\\":\\\"ENST00000541029.1\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"retained_intron\\\",\\\"geneId\\\":\\\"ENSG00000111676\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"downstream_gene_variant\\\"]},{\\\"transcript\\\":\\\"ENST00000537488.1\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"retained_intron\\\",\\\"geneId\\\":\\\"ENSG00000111676\\\",\\\"hgnc\\\":\\\"ATN1\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"]},{\\\"transcript\\\":\\\"ENST00000538392.1\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"retained_intron\\\",\\\"geneId\\\":\\\"ENSG00000111678\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"]},{\\\"transcript\\\":\\\"ENST00000542222.1\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"processed_transcript\\\",\\\"geneId\\\":\\\"ENSG00000111678\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"]},{\\\"transcript\\\":\\\"NM_001301836.1\\\",\\\"source\\\":\\\"RefSeq\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"113246\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"proteinId\\\":\\\"NP_001288765.1\\\"},{\\\"transcript\\\":\\\"ENST00000545581.1\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"ENSG00000111678\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"proteinId\\\":\\\"ENSP00000440602.1\\\"},{\\\"transcript\\\":\\\"NM_001301834.1\\\",\\\"source\\\":\\\"RefSeq\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"113246\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"isCanonical\\\":true,\\\"proteinId\\\":\\\"NP_001288763.1\\\"},{\\\"transcript\\\":\\\"ENST00000607421.1\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"antisense\\\",\\\"geneId\\\":\\\"ENSG00000272173\\\",\\\"hgnc\\\":\\\"U47924.2\\\",\\\"consequence\\\":[\\\"downstream_gene_variant\\\"],\\\"isCanonical\\\":true},{\\\"transcript\\\":\\\"ENST00000458811.1\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"snRNA\\\",\\\"geneId\\\":\\\"ENSG00000238923\\\",\\\"hgnc\\\":\\\"RNU7-1\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"isCanonical\\\":true},{\\\"transcript\\\":\\\"NR_023317.1\\\",\\\"source\\\":\\\"RefSeq\\\",\\\"bioType\\\":\\\"snRNA\\\",\\\"geneId\\\":\\\"100147744\\\",\\\"hgnc\\\":\\\"RNU7-1\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"isCanonical\\\":true},{\\\"transcript\\\":\\\"ENST00000544681.1\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"ENSG00000111678\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"proteinId\\\":\\\"ENSP00000475422.1\\\"},{\\\"transcript\\\":\\\"NM_001301838.1\\\",\\\"source\\\":\\\"RefSeq\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"113246\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"proteinId\\\":\\\"NP_001288767.1\\\"},{\\\"transcript\\\":\\\"NM_001301837.1\\\",\\\"source\\\":\\\"RefSeq\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"113246\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"proteinId\\\":\\\"NP_001288766.1\\\"},{\\\"transcript\\\":\\\"NR_126035.1\\\",\\\"source\\\":\\\"RefSeq\\\",\\\"bioType\\\":\\\"misc_RNA\\\",\\\"geneId\\\":\\\"113246\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"]},{\\\"transcript\\\":\\\"NM_138425.3\\\",\\\"source\\\":\\\"RefSeq\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"113246\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"proteinId\\\":\\\"NP_612434.1\\\"},{\\\"transcript\\\":\\\"ENST00000537087.1\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"ENSG00000111678\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"proteinId\\\":\\\"ENSP00000440937.1\\\"},{\\\"transcript\\\":\\\"ENST00000229281.5\\\",\\\"source\\\":\\\"Ensembl\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"ENSG00000111678\\\",\\\"hgnc\\\":\\\"C12orf57\\\",\\\"consequence\\\":[\\\"upstream_gene_variant\\\"],\\\"isCanonical\\\":true,\\\"proteinId\\\":\\\"ENSP00000229281.5\\\"}]},{\\\"vid\\\":\\\"12:7048190:*\\\",\\\"chromosome\\\":\\\"chr12\\\",\\\"begin\\\":7048190,\\\"end\\\":7048190,\\\"refAllele\\\":\\\"G\\\",\\\"altAllele\\\":\\\"<NON_REF>\\\",\\\"variantType\\\":\\\"non_informative_allele\\\"}]}\")]\r\n        [InlineData(\"chr12\t7048190\t.\tG\t*\t322\tPASS\tSB=0.1234567\t.\t.\", \"{\\\"chromosome\\\":\\\"chr12\\\",\\\"position\\\":7048190,\\\"refAllele\\\":\\\"G\\\",\\\"altAlleles\\\":[\\\"*\\\"],\\\"quality\\\":322,\\\"filters\\\":[\\\"PASS\\\"],\\\"strandBias\\\":0.123457,\\\"cytogeneticBand\\\":\\\"12p13.31\\\",\\\"samples\\\":[{\\\"isEmpty\\\":true}],\\\"variants\\\":[{\\\"vid\\\":\\\"12:7048190:*\\\",\\\"chromosome\\\":\\\"chr12\\\",\\\"begin\\\":7048190,\\\"end\\\":7048190,\\\"refAllele\\\":\\\"G\\\",\\\"altAllele\\\":\\\"*\\\",\\\"variantType\\\":\\\"non_informative_allele\\\"}]}\")]\r\n        [InlineData(\"chr12\t7048190\t.\tG\t<*>\t322\tPASS\tSB=0.1234567\t.\t.\", \"{\\\"chromosome\\\":\\\"chr12\\\",\\\"position\\\":7048190,\\\"refAllele\\\":\\\"G\\\",\\\"altAlleles\\\":[\\\"<*>\\\"],\\\"quality\\\":322,\\\"filters\\\":[\\\"PASS\\\"],\\\"strandBias\\\":0.123457,\\\"cytogeneticBand\\\":\\\"12p13.31\\\",\\\"samples\\\":[{\\\"isEmpty\\\":true}],\\\"variants\\\":[{\\\"vid\\\":\\\"12:7048190:*\\\",\\\"chromosome\\\":\\\"chr12\\\",\\\"begin\\\":7048190,\\\"end\\\":7048190,\\\"refAllele\\\":\\\"G\\\",\\\"altAllele\\\":\\\"<*>\\\",\\\"variantType\\\":\\\"non_informative_allele\\\"}]}\")]\r\n        [InlineData(\"chr12\t7048190\t.\tG\t<M>\t322\tPASS\tSB=0.1234567\t.\t.\", \"{\\\"chromosome\\\":\\\"chr12\\\",\\\"position\\\":7048190,\\\"refAllele\\\":\\\"G\\\",\\\"altAlleles\\\":[\\\"<M>\\\"],\\\"quality\\\":322,\\\"filters\\\":[\\\"PASS\\\"],\\\"strandBias\\\":0.123457,\\\"cytogeneticBand\\\":\\\"12p13.31\\\",\\\"samples\\\":[{\\\"isEmpty\\\":true}],\\\"variants\\\":[{\\\"vid\\\":\\\"12:7048190:*\\\",\\\"chromosome\\\":\\\"chr12\\\",\\\"begin\\\":7048190,\\\"end\\\":7048190,\\\"refAllele\\\":\\\"G\\\",\\\"altAllele\\\":\\\"<M>\\\",\\\"variantType\\\":\\\"non_informative_allele\\\"}]}\")]\r\n        [InlineData(\"chr12\t7048190\t.\tG\t<NON_REF>\t322\tPASS\tSB=0.1234567\t.\t.\", null)]\r\n        public void Annotate_NonInformativeAllele_MinimalAnnotation_NoSa_AsExpected(string vcfLine, string expectedResults)\r\n        {\r\n            var annotatedPosition = AnnotationUtilities.GetAnnotatedPosition(_cacheFilePrefix, null, vcfLine);\r\n            string observedResults = annotatedPosition.GetJsonString();\r\n            Assert.Equal(expectedResults, observedResults);\r\n        }\r\n#endif\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/ErrorHandling/ExceptionUtilitiesTests.cs",
    "content": "﻿using System;\r\nusing System.IO;\r\nusing System.Net;\r\nusing System.Threading.Tasks;\r\nusing Xunit;\r\nusing static ErrorHandling.ExceptionUtilities;\r\n\r\nnamespace UnitTests.ErrorHandling\r\n{\r\n    public sealed class ExceptionUtilitiesTests\r\n    {\r\n        private readonly Exception _generalException = new Exception(\"first level\", new Exception(\"second level\", new Exception(\"third level\")));\r\n        private readonly Exception _taskCancellation1 = new Exception(\"first level\", new TaskCanceledException(\"second level\", new Exception(\"third level\")));\r\n        private readonly Exception _taskCancellation2 = new Exception(\"first level\", new Exception(\"second level\", new TaskCanceledException(\"third level\")));\r\n\r\n        [Fact]\r\n        public void HasException_AsExpected()\r\n        {\r\n            Assert.False(HasException<TaskCanceledException>(_generalException));\r\n            Assert.True(HasException<TaskCanceledException>(_taskCancellation1));\r\n            Assert.True(HasException<TaskCanceledException>(_taskCancellation2));\r\n        }\r\n\r\n        [Fact]\r\n        public void GetInnermostException_AsExpected()\r\n        {\r\n            Assert.Equal(\"third level\", GetInnermostException(_generalException).Message);\r\n            Assert.Equal(\"third level\", GetInnermostException(_taskCancellation1).Message);\r\n            Assert.Equal(\"third level\", GetInnermostException(_taskCancellation2).Message);\r\n        }\r\n    }\r\n\r\n    public sealed class MockHttpWebResponse : WebResponse\r\n    {\r\n        private readonly Stream _stream;\r\n        public HttpStatusCode StatusCode { get; }\r\n\r\n        public MockHttpWebResponse(Stream stream, HttpStatusCode statusCode)\r\n        {\r\n            _stream = stream;\r\n            StatusCode = statusCode;\r\n        }\r\n\r\n        public override Stream GetResponseStream() => _stream;\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/ErrorHandling/Exceptions/ExceptionsTests.cs",
    "content": "﻿using System;\r\nusing System.Collections;\r\nusing System.Collections.Generic;\r\nusing ErrorHandling;\r\nusing ErrorHandling.Exceptions;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.ErrorHandling.Exceptions\r\n{\r\n    public sealed class ExceptionsTests\r\n    {\r\n        private sealed class ExceptionGenerator : IEnumerable<object[]>\r\n        {\r\n            private readonly List<object[]> _data = new List<object[]>\r\n            {\r\n                new object[] { new CompressionException(\"test\"),               ExitCodes.Compression},\r\n                new object[] { new FileNotSortedException(\"test\"),             ExitCodes.FileNotSorted},\r\n                new object[] { new InvalidFileFormatException(\"test\"),         ExitCodes.InvalidFileFormat},\r\n                new object[] { new MissingCompressionLibraryException(\"test\"), ExitCodes.MissingCompressionLibrary},\r\n                new object[] { new ProcessLockedFileException(\"test\"),         ExitCodes.SharingViolation},\r\n                new object[] { new UserErrorException(\"test\"),                 ExitCodes.UserError}\r\n            };\r\n\r\n            public IEnumerator<object[]> GetEnumerator() => _data.GetEnumerator();\r\n            IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();\r\n        }\r\n\r\n        [Theory]\r\n        [ClassData(typeof(ExceptionGenerator))]\r\n        public void Check_ExceptionToExitCode_Mapping(Exception ex, ExitCodes expectedExitCode)\r\n        {\r\n            ExitCodes observedExitCode = ExitCodeUtilities.GetExitCode(ex.GetType());\r\n            Assert.Equal(expectedExitCode, observedExitCode);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/ErrorHandling/ExitCodeUtilitiesTests.cs",
    "content": "﻿using System;\r\nusing System.Threading;\r\nusing ErrorHandling;\r\nusing ErrorHandling.Exceptions;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.ErrorHandling\r\n{\r\n    public sealed class ExitCodeUtilitiesTests\r\n    {\r\n        [Fact]\r\n        public void ShowException_CompressionException_CheckExitCode()\r\n        {\r\n            var compressionException = new CompressionException(\"test\");\r\n            compressionException.Data[ExitCodeUtilities.VcfLine] = \"chr1\\t100\\tA\\tC\";\r\n            var exitCode = ExitCodeUtilities.ShowException(compressionException);\r\n            Assert.Equal(ExitCodes.Compression, exitCode);\r\n        }\r\n\r\n        [Fact]\r\n        public void ShowException_UnknownException_ExitCode_ShouldBeOne()\r\n        {\r\n            var unknownException = new AbandonedMutexException();\r\n            var exitCode = ExitCodeUtilities.ShowException(unknownException);\r\n            Assert.Equal(ExitCodes.InvalidFunction, exitCode);\r\n        }\r\n\r\n        [Fact]\r\n        public void ShowException_AggregateException_ExitCode_ShouldBeOne()\r\n        {\r\n            // TODO: It would be great to verify which exception was shown\r\n            var refNullException   = new NullReferenceException();\r\n            var aggregateException = new AggregateException(refNullException);\r\n            var exitCode           = ExitCodeUtilities.ShowException(aggregateException);\r\n            Assert.Equal(ExitCodes.InvalidFunction, exitCode);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/ErrorHandling/ExitCodesTests.cs",
    "content": ""
  },
  {
    "path": "UnitTests/GeneAnnotationLambda/GeneAnnotationLambdaTests.cs",
    "content": "using System.Collections.Generic;\nusing System.IO;\nusing System.Linq;\nusing Cloud.Messages.Gene;\nusing UnitTests.TestUtilities;\nusing Xunit;\n\nnamespace UnitTests.GeneAnnotationLambda\n{\n    public sealed class GeneAnnotationLambdaTests\n    {\n        private readonly string _manifestPath  = Resources.TopPath(\"manifest.txt\");\n        private readonly string _customNgaPath = Resources.TopPath(\"custom_gene.nga\");\n        private readonly string _prefix        = Resources.Top + Path.DirectorySeparatorChar;\n\n        [Fact]\n        public void GetNgaFiles_AsExpected()\n        {\n            IEnumerable<string> ngaFiles = global::GeneAnnotationLambda.GeneAnnotationLambda.GetNgaFileList(_manifestPath, _prefix, new[] { _customNgaPath });\n\n            IEnumerable<string> expectedFiles = new[]\n            {\n                \"ClinGen_Dosage_Sensitivity_Map_20190507.nga\",\n                \"gnomAD_gene_scores_2.1.nga\",\n                \"OMIM_20190812.nga\",\n                \"custom_gene.nga\"\n            }.Select(Resources.TopPath);\n\n            Assert.Equal(expectedFiles, ngaFiles);\n        }\n\n        [Fact]\n        public void GetGeneAnnotation_AsExpected()\n        {\n            var input = new GeneConfig\n            {\n                id = \"test\",\n                geneSymbols = new[] { \"TP53\", \"ZIC2\", \"LOC645752\" },\n                ngaUrls = new[] { _customNgaPath }\n            };\n\n            string responseString = global::GeneAnnotationLambda.GeneAnnotationLambda.GetGeneAnnotation(input, _manifestPath, _prefix);\r\n\r\n            Assert.Contains(\"header\", responseString);\n            Assert.Contains(\"TP53\", responseString);\n            Assert.Contains(\"ZIC2\", responseString);\n            Assert.Contains(\"clingenDosageSensitivityMap\", responseString);\n            Assert.Contains(\"gnomAD\", responseString);\n            Assert.Contains(\"omim\", responseString);\n            Assert.Contains(\"InternalGeneAnnotation\", responseString);\n            Assert.DoesNotContain(\"LOC645752\", responseString);\n        }\n    }\n}\n"
  },
  {
    "path": "UnitTests/GeneAnnotationLambda/GeneConfigTests.cs",
    "content": "﻿using Cloud.Messages.Gene;\r\nusing ErrorHandling.Exceptions;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.GeneAnnotationLambda\r\n{\r\n    public sealed class GeneConfigTests\r\n    {\r\n        [Fact]\r\n        public void Validate_NoId_ThrowException()\r\n        {\r\n            var input = new GeneConfig {geneSymbols = new[] {\"TP53\"}};\r\n            Assert.Throws<UserErrorException>(() => input.Validate());\r\n        }\r\n\r\n        [Fact]\r\n        public void Validate_NoGeneSymbols_ThrowException()\r\n        {\r\n            var input = new GeneConfig { id = \"test\" };\r\n            Assert.Throws<UserErrorException>(() => input.Validate());\r\n        }\r\n\r\n        [Fact]\r\n        public void Validate_EmptyGeneSymbols_ThrowException()\r\n        {\r\n            var input = new GeneConfig { id = \"test\", geneSymbols = new string[]{}};\r\n            Assert.Throws<UserErrorException>(() => input.Validate());\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/Genome/ChromosomeIntervalTests.cs",
    "content": "﻿using Genome;\r\nusing UnitTests.TestUtilities;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Genome\r\n{\r\n    public sealed class ChromosomeIntervalTests\r\n    {\r\n        [Fact]\r\n        public void ChromosomeInterval_Setup()\r\n        {\r\n            const int expectedStart = 100;\r\n            const int expectedEnd = 200;\r\n\r\n            var observedInterval = new ChromosomeInterval(ChromosomeUtilities.Chr1, 100, 200);\r\n\r\n            Assert.Equal(ChromosomeUtilities.Chr1, observedInterval.Chromosome);\r\n            Assert.Equal(expectedStart, observedInterval.Start);\r\n            Assert.Equal(expectedEnd, observedInterval.End);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/Genome/CytogeneticBandTests.cs",
    "content": "﻿using Genome;\r\nusing UnitTests.TestUtilities;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Genome\r\n{\r\n    public sealed class CytogeneticBandTests\r\n    {\r\n        private static readonly Band[] CytogeneticBands = {\r\n            new Band(88300001, 92800000, \"q14.3\"),\r\n            new Band(92800001, 97200000, \"q21\")\r\n        };\r\n\r\n        [Theory]\r\n        [InlineData(88400000, 92900000, \"11q14.3-q21\")]\r\n        [InlineData(88400000, 92400000, \"11q14.3\")]\r\n        [InlineData(92820001, 92900000, \"11q21\")]\r\n        [InlineData(92820001, 92820001, \"11q21\")]\r\n        [InlineData(1, 1, null)]\r\n        [InlineData(97000000, 98200000, null)]\r\n        public void GetCytogeneticBand_Range(int start, int end, string expectedCytogeneticBand)\r\n        {\r\n            string observedCytogeneticBand = CytogeneticBands.Find(ChromosomeUtilities.Chr11, start, end);\r\n\r\n            Assert.Equal(expectedCytogeneticBand, observedCytogeneticBand);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetCytogeneticBand_UnknownReference_ReturnNull()\r\n        {\r\n            string observedCytogeneticBand = CytogeneticBands.Find(ChromosomeUtilities.Chr12, 100, 200);\r\n            Assert.Null(observedCytogeneticBand);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetCytogeneticBand_UnknownReferenceIndex_ReturnNull()\r\n        {\r\n            string observedCytogeneticBand = CytogeneticBands.Find(ChromosomeUtilities.Bob, 100, 200);\r\n            Assert.Null(observedCytogeneticBand);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/Genome/EmptyChromosomeTests.cs",
    "content": "﻿using Genome;\r\nusing UnitTests.TestUtilities;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Genome\r\n{\r\n    public sealed class EmptyChromosomeTests\r\n    {\r\n        private readonly Chromosome _emptyChromosome  = Chromosome.GetEmptyChromosome(\"chr1\");\r\n        private readonly Chromosome _emptyChromosome2 = Chromosome.GetEmptyChromosome(\"chr1\");\r\n\r\n        [Fact]\r\n        public void Equals_True()\r\n        {\r\n            Assert.True(_emptyChromosome.Equals(_emptyChromosome2));\r\n        }\r\n\r\n        [Fact]\r\n        public void Equals_False()\r\n        {\r\n            Assert.False(_emptyChromosome.Equals(ChromosomeUtilities.Chr1));\r\n            Assert.False(ChromosomeUtilities.Chr1.Equals(_emptyChromosome));\r\n        }\r\n\r\n        [Fact]\r\n        public void GetHashCode_True()\r\n        {\r\n            Assert.Equal(_emptyChromosome.GetHashCode(), _emptyChromosome2.GetHashCode());\r\n        }\r\n\r\n        [Fact]\r\n        public void GetHashCode_False()\r\n        {\r\n            Assert.NotEqual(_emptyChromosome.GetHashCode(), ChromosomeUtilities.Chr1.GetHashCode());\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/Genome/GenomeAssemblyHelperTests.cs",
    "content": "﻿using ErrorHandling.Exceptions;\r\nusing Genome;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Genome\r\n{\r\n    public sealed class GenomeAssemblyHelperTests\r\n    {\r\n        [Theory]\r\n        [InlineData(\"GRCH37\", GenomeAssembly.GRCh37)]\r\n        [InlineData(\"GRCH38\", GenomeAssembly.GRCh38)]\r\n        [InlineData(\"HG19\",   GenomeAssembly.hg19)]\r\n        [InlineData(\"\",       GenomeAssembly.Unknown)]\r\n        [InlineData(\"RCRS\",   GenomeAssembly.rCRS)]\r\n        public void Convert_GenomeAssemblyExists(string s, GenomeAssembly expectedGenomeAssembly)\r\n        {\r\n            var observedResult = GenomeAssemblyHelper.Convert(s);\r\n            Assert.Equal(expectedGenomeAssembly, observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void Convert_GenomeAssemblyDoesNotExist()\r\n        {\r\n            Assert.Throws<UserErrorException>(delegate\r\n            {\r\n                GenomeAssemblyHelper.Convert(\"dummy\");\r\n            });\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/Genome/ReferenceNameUtilitiesTests.cs",
    "content": "﻿using System.IO;\r\nusing Genome;\r\nusing UnitTests.TestUtilities;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Genome\r\n{\r\n    public sealed class ReferenceNameUtilitiesTests\r\n    {\r\n        [Fact]\r\n        public void GetChromosome_RefIndex_Exists()\r\n        {\r\n            var chromosome = ReferenceNameUtilities.GetChromosome(ChromosomeUtilities.RefIndexToChromosome, 2);\r\n            Assert.Equal(\"3\", chromosome.EnsemblName);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetChromosome_RefIndex_DoesNotExist()\r\n        {\r\n            Assert.Throws<InvalidDataException>(delegate\r\n            {\r\n                ReferenceNameUtilities.GetChromosome(ChromosomeUtilities.RefIndexToChromosome, 171);\r\n            });\r\n        }\r\n\r\n        [Fact]\r\n        public void GetChromosome_RefName_Exists()\r\n        {\r\n            var chromosome = ReferenceNameUtilities.GetChromosome(ChromosomeUtilities.RefNameToChromosome, \"1\");\r\n            Assert.Equal(0, chromosome.Index);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetChromosome_RefName_DoesNotExist()\r\n        {\r\n            const string chromosomeName = \"dummy\";\r\n            var chromosome = ReferenceNameUtilities.GetChromosome(ChromosomeUtilities.RefNameToChromosome, chromosomeName);\r\n            Assert.Equal(chromosomeName, chromosome.EnsemblName);\r\n            Assert.Equal(chromosomeName, chromosome.UcscName);\r\n            Assert.True(chromosome.IsEmpty());\r\n        }\r\n\r\n        [Fact]\r\n        public void GetChromosome_RefName_NullName()\r\n        {\r\n            var chromosome = ReferenceNameUtilities.GetChromosome(ChromosomeUtilities.RefNameToChromosome, null);\r\n            Assert.Equal(string.Empty, chromosome.EnsemblName);\r\n            Assert.Equal(string.Empty, chromosome.UcscName);\r\n            Assert.True(chromosome.IsEmpty());\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/Genome/SequenceUtilitiesTests.cs",
    "content": "﻿using Genome;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Genome\r\n{\r\n    public sealed class SequenceUtilitiesTests\r\n    {\r\n        [Theory]\r\n        [InlineData(\"ACGTTTGA\", \"TCAAACGT\")]\r\n        [InlineData(null, null)]\r\n        public void GetReverseComplement(string bases, string expectedResult)\r\n        {\r\n            var observedResult = SequenceUtilities.GetReverseComplement(bases);\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(\"ACGT\", false)]\r\n        [InlineData(\"ACXT\", true)]\r\n        [InlineData(null, false)]\r\n        public void HasNonCanonicalBase(string bases, bool expectedResult)\r\n        {\r\n            var observedResult = SequenceUtilities.HasNonCanonicalBase(bases);\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/IO/BufferedBinaryReaderTests.cs",
    "content": "﻿using System;\r\nusing System.IO;\r\nusing System.Text;\r\nusing IO;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.IO\r\n{\r\n    public sealed class BufferedBinaryReaderTests\r\n    {\r\n        [Fact]\r\n        public void ReadBoolean()\r\n        {\r\n            const bool expectedValue = true;\r\n            bool observedValue = GetObservedValue(writer => writer.Write(expectedValue), reader => reader.ReadBoolean());\r\n            Assert.Equal(expectedValue, observedValue);\r\n        }\r\n\r\n        [Fact]\r\n        public void ReadByte()\r\n        {\r\n            const byte expectedValue = byte.MaxValue;\r\n            byte observedValue = GetObservedValue(writer => writer.Write(expectedValue), reader => reader.ReadByte());\r\n            Assert.Equal(expectedValue, observedValue);\r\n        }\r\n\r\n        [Fact]\r\n        public void ReadBytes()\r\n        {\r\n            byte[] expectedValue = Encoding.ASCII.GetBytes(\"Hello world\");\r\n            byte[] observedValue = GetObservedValue(writer => writer.Write(expectedValue), reader => reader.ReadBytes(expectedValue.Length));\r\n            Assert.Equal(expectedValue, observedValue);\r\n        }\r\n\r\n        [Fact]\r\n        public void ReadUInt16()\r\n        {\r\n            const ushort expectedValue = ushort.MaxValue;\r\n            ushort observedValue = GetObservedValue(writer => writer.Write(expectedValue), reader => reader.ReadUInt16());\r\n            Assert.Equal(expectedValue, observedValue);\r\n        }\r\n\r\n        [Fact]\r\n        public void ReadUInt32()\r\n        {\r\n            const uint expectedValue = uint.MaxValue;\r\n            uint observedValue = GetObservedValue(writer => writer.Write(expectedValue), reader => reader.ReadUInt32());\r\n            Assert.Equal(expectedValue, observedValue);\r\n        }\r\n\r\n        [Fact]\r\n        public void ReadAsciiString()\r\n        {\r\n            const string expectedValue = \"Hello world\";\r\n            string observedValue = GetObservedValue(writer => writer.Write(expectedValue), reader => reader.ReadAsciiString());\r\n            Assert.Equal(expectedValue, observedValue);\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(ushort.MaxValue)]\r\n        [InlineData(3)]\r\n        [InlineData(0)]\r\n        public void ReadOptUInt16_HandleExtremeIntegers(ushort expectedValue)\r\n        {\r\n            ushort observedValue = GetObservedValue(writer => writer.WriteOpt(expectedValue), reader => reader.ReadOptUInt16());\r\n            Assert.Equal(expectedValue, observedValue);\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(int.MaxValue)]\r\n        [InlineData(-1)]\r\n        [InlineData(int.MinValue)]\r\n        public void ReadOptInt32_HandleExtremeIntegers(int expectedValue)\r\n        {\r\n            int observedValue = GetObservedValue(writer => writer.WriteOpt(expectedValue), reader => reader.ReadOptInt32());\r\n            Assert.Equal(expectedValue, observedValue);\r\n        }\r\n\r\n        private static T GetObservedValue<T>(Action<ExtendedBinaryWriter> writeMethod, Func<BufferedBinaryReader, T> readMethod)\r\n        {\r\n            T observedValue;\r\n            using (var memoryStream = new MemoryStream())\r\n            {\r\n                using (var writer = new ExtendedBinaryWriter(memoryStream, Encoding.UTF8, true))\r\n                {\r\n                    writeMethod(writer);\r\n                }\r\n\r\n                memoryStream.Position = 0;\r\n\r\n                using (var reader = new BufferedBinaryReader(memoryStream))\r\n                {\r\n                    observedValue = readMethod(reader);\r\n                }\r\n            }\r\n\r\n            return observedValue;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/IO/ExtendedBinaryReaderTests.cs",
    "content": "﻿using System;\r\nusing System.IO;\r\nusing System.Text;\r\nusing IO;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.IO\r\n{\r\n    public sealed class ExtendedBinaryReaderTests\r\n    {\r\n        [Theory]\r\n        [InlineData(3)]\r\n        [InlineData(0)]\r\n        [InlineData(-2)]\r\n        public void ReadOptInt32_HandleSmallIntegers(int expectedValue)\r\n        {\r\n            int observedValue = GetObservedValue(writer => writer.WriteOpt(expectedValue), reader => reader.ReadOptInt32());\r\n            Assert.Equal(expectedValue, observedValue);\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(int.MaxValue)]\r\n        [InlineData(int.MinValue)]\r\n        public void ReadOptInt32_HandleExtremeIntegers(int expectedValue)\r\n        {\r\n            int observedValue = GetObservedValue(writer => writer.WriteOpt(expectedValue), reader => reader.ReadOptInt32());\r\n            Assert.Equal(expectedValue, observedValue);\r\n        }\r\n\r\n        [Fact]\r\n        public void ReadOptInt32_ThrowException_WithCorruptData()\r\n        {\r\n            Assert.Throws<FormatException>(delegate\r\n            {\r\n                using (var ms = new MemoryStream())\r\n                {\r\n                    using (var writer = new BinaryWriter(ms, Encoding.UTF8, true))\r\n                    {\r\n                        const ulong corruptInt = 0xffffffffffffffff;\r\n                        writer.Write(corruptInt);\r\n                    }\r\n\r\n                    ms.Position = 0;\r\n\r\n                    using (var reader = new ExtendedBinaryReader(ms))\r\n                    {\r\n                        reader.ReadOptInt32();\r\n                    }\r\n                }\r\n            });\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(ushort.MaxValue)]\r\n        [InlineData(ushort.MinValue)]\r\n        public void ReadOptUInt16_HandleExtremeIntegers(ushort expectedValue)\r\n        {\r\n            ushort observedValue = GetObservedValue(writer => writer.WriteOpt(expectedValue), reader => reader.ReadOptUInt16());\r\n            Assert.Equal(expectedValue, observedValue);\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(3)]\r\n        [InlineData(0)]\r\n        [InlineData(-2)]\r\n        public void ReadOptInt64_HandleSmallIntegers(long expectedValue)\r\n        {\r\n            long observedValue = GetObservedValue(writer => writer.WriteOpt(expectedValue), reader => reader.ReadOptInt64());\r\n            Assert.Equal(expectedValue, observedValue);\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(long.MaxValue)]\r\n        [InlineData(long.MinValue)]\r\n        public void ReadOptInt64_HandleExtremeIntegers(long expectedValue)\r\n        {\r\n            long observedValue = GetObservedValue(writer => writer.WriteOpt(expectedValue), reader => reader.ReadOptInt64());\r\n            Assert.Equal(expectedValue, observedValue);\r\n        }\r\n\r\n        [Fact]\r\n        public void ReadOptInt64_ThrowException_WithCorruptData()\r\n        {\r\n            Assert.Throws<FormatException>(delegate\r\n            {\r\n                using (var ms = new MemoryStream())\r\n                {\r\n                    using (var writer = new BinaryWriter(ms, Encoding.UTF8, true))\r\n                    {\r\n                        const ulong corruptData = 0xffffffffffffffff;\r\n                        writer.Write(corruptData);\r\n                        writer.Write(corruptData);\r\n                    }\r\n\r\n                    ms.Position = 0;\r\n\r\n                    using (var reader = new ExtendedBinaryReader(ms))\r\n                    {\r\n                        reader.ReadOptInt64();\r\n                    }\r\n                }\r\n            });\r\n        }\r\n\r\n        [Fact]\r\n        public void ReadAsciiString_NullString()\r\n        {\r\n            string observedValue = GetObservedValue(writer => writer.WriteOptAscii(null), reader => reader.ReadAsciiString());\r\n            Assert.Null(observedValue);\r\n        }\r\n\r\n        [Fact]\r\n        public void BufferedBinaryReader_EndToEnd_DoNotLeaveOpen()\r\n        {\r\n            const int expectedResult = 5;\r\n            int observedResult;\r\n            byte[] data;\r\n\r\n            using (var ms = new MemoryStream())\r\n            using (var writer = new ExtendedBinaryWriter(ms))\r\n            {\r\n                writer.Write(expectedResult);\r\n                data = ms.ToArray();\r\n            }\r\n\r\n            using (var ms = new MemoryStream(data))\r\n            using (var reader = new ExtendedBinaryReader(ms))\r\n            {\r\n                observedResult = reader.ReadInt32();\r\n            }\r\n\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n\r\n        private static T GetObservedValue<T>(Action<ExtendedBinaryWriter> writeMethod, Func<ExtendedBinaryReader, T> readMethod)\r\n        {\r\n            T observedValue;\r\n            using (var memoryStream = new MemoryStream())\r\n            {\r\n                using (var writer = new ExtendedBinaryWriter(memoryStream, Encoding.UTF8, true))\r\n                {\r\n                    writeMethod(writer);\r\n                }\r\n\r\n                memoryStream.Position = 0;\r\n\r\n                using (var reader = new ExtendedBinaryReader(memoryStream))\r\n                {\r\n                    observedValue = readMethod(reader);\r\n                }\r\n            }\r\n\r\n            return observedValue;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/IO/FilePathUtilitiesTests.cs",
    "content": "﻿using IO;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.IO\r\n{\r\n    public sealed class FilePathUtilitiesTests\r\n    {\r\n        [InlineData(\"C:\\\\Input files\\\\input.test.mp3\", \".mp3\", true)]\r\n        [InlineData(\"C:\\\\Input files\\\\input\", \"C:\\\\Input files\\\\input\", true)]\r\n        [InlineData(\"\\\\\\\\ussd-prd-isi04\\\\Nirvana\\\\input.vcf\", \"vcf\", false)]\r\n        [InlineData(\"/d/Projects/Nirvana/input.vcf\", \".vcf\", true)]\r\n        [InlineData(\"https://illumina.s3.amazonaws.com/input/Custom_SA/Custom-annotations_short-GRCh37.nsa?AWSAccessKeyId=UUNE5Q&Expires=asdf223&Signature=asdfasd\", \".nsa\", true)]\r\n        [InlineData(\"https://stratus-gds-stage.s3.us-west-2.amazonaws.com/b9077f78-6b4e-4068-b4b2-08d6d80d1d7d/custom-filter-file/custom-annotation/2b8e155e-9046-4ef5-9ec0-374ccc98a93c/2b8e155e-9046-4ef5-9ec0-374ccc98a93c.nsa?X-Amz-Expires=604800&response-content-disposition=attachment%3Bfilename%3D%222b8e155e-9046-4ef5-9ec0-374ccc98a93c.nsa%22&x-userId=086723b2-1e53-32cd-a410-80cb885de66c&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAJ7P2VLXQJYGXATTA/20190708/us-west-2/s3/aws4_request&X-Amz-Date=20190708T163940Z&X-Amz-SignedHeaders=host&X-Amz-Signature=d386f9d0aa7aab1a1a67c3ee625a208589924a51e384840ce9159a88b6c8363a\", \"nsa\", false)]\r\n        [Theory]\r\n        public void GetFileSuffix_AsExpected(string filePath, string suffix, bool includeDot)\r\n        {\r\n            Assert.Equal(suffix, filePath.GetFileSuffix(includeDot));\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/IO/FileUtilitiesTests.cs",
    "content": "﻿using System.IO;\r\nusing IO;\r\nusing UnitTests.TestUtilities;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.IO\r\n{\r\n    public sealed class FileUtilitiesTests\r\n    {\r\n        [Fact]\r\n        public void GetReadStream_GetCreateStream_Loopback()\r\n        {\r\n            string random = RandomPath.GetRandomPath();\r\n            const string expectedString = \"charlie\";\r\n\r\n            using (var writer = new StreamWriter(FileUtilities.GetCreateStream(random)))\r\n            {\r\n                writer.WriteLine(expectedString);\r\n            }\r\n\r\n            string observedString;\r\n            using (var reader = FileUtilities.GetStreamReader(FileUtilities.GetReadStream(random)))\r\n            {\r\n                observedString = reader.ReadLine();\r\n            }\r\n\r\n            Assert.Equal(expectedString, observedString);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/IO/HttpUtilitiesTests.cs",
    "content": "﻿using System;\r\nusing System.IO;\r\nusing System.Net;\r\nusing System.Xml.Linq;\r\nusing ErrorHandling.Exceptions;\r\nusing UnitTests.ErrorHandling;\r\nusing Xunit;\r\nusing IO;\r\n\r\nnamespace UnitTests.IO\r\n{\r\n    public sealed class HttpUtilitiesTests\r\n    {\r\n        [Fact]\r\n        public void IsWebProtocolErrorException_AsExpected()\r\n        {\r\n            Assert.False(HttpUtilities.IsWebProtocolErrorException(new Exception(\"An exception\")));\r\n            Assert.False(HttpUtilities.IsWebProtocolErrorException(new WebException(\"web exception\")));\r\n            Assert.False(HttpUtilities.IsWebProtocolErrorException(new WebException(\"web exception\", WebExceptionStatus.ConnectFailure)));\r\n            Assert.True(HttpUtilities.IsWebProtocolErrorException(new WebException(\"web exception\", null, WebExceptionStatus.ProtocolError, new MockHttpWebResponse(null, HttpStatusCode.NotFound))));\r\n            Assert.True(HttpUtilities.IsWebProtocolErrorException(new WebException(\"web exception\", null, WebExceptionStatus.ProtocolError, new MockHttpWebResponse(null, HttpStatusCode.Forbidden))));\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(\"InvalidAccessKeyId\", \"The AWS Access Key Id you provided does not exist in our records\", \"https://unit.test/bob.vcf.gz\", \"Something wrong.\", \"Authentication error while reading from URL for bob.vcf.gz.\")]\r\n        [InlineData(\"AccessDenied\", \"Request has expired\", \"https://expired.url/bob.vcf.gz\", \"Something wrong again.\", \"The provided URL for bob.vcf.gz has expired.\")]\r\n        public void ProcessHttpRequestForbiddenException_AsExpected(string errorCode, string message, string url, string exceptionMessage, string newErrorMessage)\r\n        {\r\n            XElement xmlMessage = new XElement(\"Root\", new XElement(\"Code\", errorCode), new XElement(\"Message\", message));\r\n            var stream = new MemoryStream();\r\n            xmlMessage.Save(stream);\r\n            stream.Position = 0;\r\n\r\n            var response = new MockHttpWebResponse(stream, HttpStatusCode.Forbidden);\r\n            var inputException = new WebException(exceptionMessage, null, WebExceptionStatus.ProtocolError, response);\r\n\r\n            var outputException = HttpUtilities.ProcessHttpRequestWebProtocolErrorException(inputException, url);\r\n            Assert.IsType<UserErrorException>(outputException);\r\n            Assert.Equal(newErrorMessage, outputException.Message);\r\n        }\r\n\r\n        [Fact]\r\n        public void ValidateUrl_invalid_user_provided()\r\n        {\r\n            Assert.Throws<UserErrorException>(() =>\r\n                HttpUtilities.ValidateUrl(\r\n                    \"https://nirvana-annotations.s3.us-west-2.amazonaws.com/645778a7d475ac437d15765ef3c6f50c-OMIM/0/OMIM_20191004.nga\"));\r\n        }\r\n\r\n        [Fact]\r\n        public void ValidateUrl_invalid_deployment()\r\n        {\r\n            Assert.Throws<DeploymentErrorException>(() =>\r\n                HttpUtilities.ValidateUrl(\r\n                    \"https://nirvana-annotations.s3.us-west-2.amazonaws.com/645778a7d475ac437d15765ef3c6f50c-OMIM/0/OMIM_20191004.nga\", false));\r\n        }\r\n\r\n        [Fact]\r\n        public void ValidateUrl_valid()\r\n        {\r\n            HttpUtilities.ValidateUrl(\r\n                    \"https://nirvana-annotations.s3.us-west-2.amazonaws.com/645778a7d475ac437d15765ef3c6f50c-OMIM/6/OMIM_20191004.nga\", false);\r\n        }\r\n\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/IO/LengthStreamTests.cs",
    "content": "﻿using System;\r\nusing System.IO;\r\nusing System.Text;\r\nusing IO;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.IO\r\n{\r\n    public sealed class LengthStreamTests\r\n    {\r\n        [Fact]\r\n        public void Length_AsExpected()\r\n        {\r\n            long trueLength, modifiedLength;\r\n\r\n            using (var memoryStream = new MemoryStream())\r\n            {\r\n                using (var writer = new StreamWriter(memoryStream, Encoding.ASCII, 1024, true))\r\n                {\r\n                    writer.Write(\"The quick brown fox jumps over the lazy dog\");\r\n                }\r\n\r\n                trueLength = memoryStream.Length;\r\n\r\n\r\n                using (var lengthStream = new LengthStream(memoryStream, 3))\r\n                {\r\n                    modifiedLength = lengthStream.Length;\r\n                }\r\n            }\r\n\r\n            Assert.Equal(43, trueLength);\r\n            Assert.Equal(3, modifiedLength);\r\n        }\r\n\r\n        [Fact]\r\n        public void StreamTests_AsExpected()\r\n        {\r\n            using (var memoryStream = new MemoryStream())\r\n            {\r\n                using (var writer = new StreamWriter(memoryStream, Encoding.ASCII, 1024, true))\r\n                {\r\n                    writer.Write(\"The quick brown fox jumps over the lazy dog\");\r\n                }\r\n\r\n                long expectedPosition = memoryStream.Position;\r\n                memoryStream.Position = 0;\r\n\r\n                using (var lengthStream = new LengthStream(memoryStream, 3))\r\n                using (var reader = new StreamReader(lengthStream))\r\n                {\r\n                    reader.ReadToEnd();\r\n                    Assert.True(lengthStream.CanRead);\r\n                    Assert.True(lengthStream.CanWrite);\r\n                    Assert.True(lengthStream.CanSeek);\r\n                    Assert.Equal(3, lengthStream.Length);\r\n                    Assert.True(lengthStream.Position >= expectedPosition);\r\n                }\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void StreamTests_Throws_NotSupportedException()\r\n        {\r\n            using (var memoryStream = new MemoryStream())\r\n            using (var lengthStream = new LengthStream(memoryStream, 3))\r\n            {\r\n                var buffer = new byte[10];\r\n\r\n                ThrowsNotSupportedException(lengthStream, stream => stream.Position = 5);\r\n                ThrowsNotSupportedException(lengthStream, stream => stream.Seek(0, SeekOrigin.Begin));\r\n                ThrowsNotSupportedException(lengthStream, stream => stream.Write(buffer, 0, buffer.Length));\r\n                ThrowsNotSupportedException(lengthStream, stream => stream.SetLength(7));\r\n                ThrowsNotSupportedException(lengthStream, stream => stream.Flush());\r\n            }\r\n        }\r\n\r\n        private static void ThrowsNotSupportedException<T>(LengthStream lengthStream, Func<LengthStream, T> exceptionFunc)\r\n        {\r\n            Assert.Throws<NotSupportedException>(delegate\r\n            {\r\n                // ReSharper disable once UnusedVariable\r\n                exceptionFunc(lengthStream);\r\n            });\r\n        }\r\n\r\n        private static void ThrowsNotSupportedException(LengthStream lengthStream, Action<LengthStream> exceptionAction)\r\n        {\r\n            Assert.Throws<NotSupportedException>(delegate\r\n            {\r\n                // ReSharper disable once UnusedVariable\r\n                exceptionAction(lengthStream);\r\n            });\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/IO/MD5StreamTests.cs",
    "content": "﻿using System;\r\nusing System.IO;\r\nusing System.Linq;\r\nusing System.Text;\r\nusing IO;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.IO\r\n{\r\n    public sealed class MD5StreamTests\r\n    {\r\n        [Fact]\r\n        public void GetFileMetadata_AsExpected()\r\n        {\r\n            FileMetadata observed, cachedObserved;\r\n\r\n            using (var memoryStream = new MemoryStream())\r\n            using (var md5Stream    = new MD5Stream(memoryStream))\r\n            {\r\n                using (var writer = new StreamWriter(md5Stream, Encoding.ASCII))\r\n                {\r\n                    writer.Write(\"The quick brown fox jumps over the lazy dog\");\r\n                }\r\n\r\n                observed       = md5Stream.GetFileMetadata();\r\n                cachedObserved = md5Stream.GetFileMetadata();\r\n            }\r\n\r\n            byte[] expectedMd5 = StringToByteArray(\"9e107d9d372bb6826bd81d3542a419d6\");\r\n            const int expectedLength = 43;\r\n\r\n            Assert.Equal(expectedMd5, observed.MD5);\r\n            Assert.Equal(expectedLength, observed.Length);\r\n            Assert.Equal(expectedMd5, cachedObserved.MD5);\r\n            Assert.Equal(expectedLength, cachedObserved.Length);\r\n        }\r\n\r\n        [Fact]\r\n        public void StreamTests_AsExpected()\r\n        {\r\n            using (var memoryStream = new MemoryStream())\r\n            using (var md5Stream = new MD5Stream(memoryStream))\r\n            {\r\n                using (var writer = new StreamWriter(md5Stream, Encoding.ASCII))\r\n                {\r\n                    writer.Write(\"The quick brown fox jumps over the lazy dog\");\r\n                    md5Stream.Flush();\r\n                }\r\n\r\n                Assert.True(md5Stream.CanRead);\r\n                Assert.True(md5Stream.CanWrite);\r\n                Assert.True(md5Stream.CanSeek);\r\n                Assert.Equal(43, md5Stream.Length);\r\n                Assert.Equal(43, md5Stream.Position);\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void StreamTests_Throws_NotSupportedException()\r\n        {\r\n            using (var memoryStream = new MemoryStream())\r\n            using (var md5Stream    = new MD5Stream(memoryStream))\r\n            {\r\n                var buffer = new byte[10];\r\n\r\n                ThrowsNotSupportedException(md5Stream, stream => stream.Read(buffer, 0, buffer.Length));\r\n                ThrowsNotSupportedException(md5Stream, stream => stream.Position = 5);\r\n                ThrowsNotSupportedException(md5Stream, stream => stream.Seek(0, SeekOrigin.Begin));\r\n                ThrowsNotSupportedException(md5Stream, stream => stream.SetLength(7));\r\n            }\r\n        }\r\n\r\n        private static void ThrowsNotSupportedException<T>(MD5Stream md5Stream, Func<MD5Stream, T> exceptionFunc)\r\n        {\r\n            Assert.Throws<NotSupportedException>(delegate\r\n            {\r\n                // ReSharper disable once UnusedVariable\r\n                exceptionFunc(md5Stream);\r\n            });\r\n        }\r\n\r\n        private static void ThrowsNotSupportedException(MD5Stream lengthStream, Action<MD5Stream> exceptionAction)\r\n        {\r\n            Assert.Throws<NotSupportedException>(delegate\r\n            {\r\n                // ReSharper disable once UnusedVariable\r\n                exceptionAction(lengthStream);\r\n            });\r\n        }\r\n\r\n        private static byte[] StringToByteArray(string hex)\r\n        {\r\n            return Enumerable.Range(0, hex.Length)\r\n                .Where(x => x % 2 == 0)\r\n                .Select(x => Convert.ToByte(hex.Substring(x, 2), 16))\r\n                .ToArray();\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/IO/PersistentStreamTests.cs",
    "content": "﻿using System.IO;\nusing System.Text;\nusing IO;\nusing Moq;\nusing Xunit;\n\nnamespace UnitTests.IO\n{\n    public sealed class PersistentStreamTests\n    {\n        private static Stream GetMockStream()\n        {\n            var memStream = new MemoryStream();\n            using (var writer = new StreamWriter(memStream, Encoding.Default, 4096, true))\n            {\n                writer.WriteLine(\"2551e067cb59c540a4da905a99ee5ff4-ClinGen/2/GRCh37/ClinGen_20160414.nsi\");\n                writer.WriteLine(\"43321b1a4f1c73724c00223e07d5e812-1kgSv/3/GRCh37/1000_Genomes_Project_Phase_3_v5a.nsi\");\n                writer.WriteLine(\"929439472713ec609b92b97dc22a2d42-dbSNP/4/GRCh37/dbSNP_151.nsa\");\n            }\n\n            memStream.Position = 0;\n            return memStream;\n        }\n        \n        private static IConnect GetWebRequest_connect_on_third()\n        {\n            var moqRequest = new Mock<IConnect>();\n\n            //Connect succeeds on 3rd attempt\n            moqRequest.SetupSequence(x => x.Connect(0))\n                .Throws(new IOException())\n                .Throws(new IOException())\n                .Returns((null,GetMockStream()));\n\n            return moqRequest.Object;\n        }\n\n        private static IConnect GetWebRequest_flaky_stream()\n        {\n            var moqRequest = new Mock<IConnect>();\n\n            moqRequest.SetupSequence(x => x.Connect(0))\n                .Returns((null, null))\n                .Returns((null, GetMockStream()));\n            \n            return moqRequest.Object;\n        }\n\n        private static IConnect GetWebRequest_connect_on_seventh()\n        {\n            var moqRequest = new Mock<IConnect>();\n\n            //Connect succeeds on 3rd attempt\n            moqRequest.SetupSequence(x => x.Connect(0))\n                .Throws(new IOException())\n                .Throws(new IOException())\n                .Throws(new IOException())\n                .Throws(new IOException())\n                .Throws(new IOException())\n                .Throws(new IOException())\n                .Returns((null, GetMockStream()));\n\n            return moqRequest.Object;\n        }\n\n        [Fact]\n        public void TestFlakyConnection()\n        {\n            // pStream attempts to connect at construction time. It should succeed at the third attempt\n            new PersistentStream(GetWebRequest_connect_on_third(), 0);\n            // no exception thrown means this test succeeded\n        }\n\n        [Fact]\n        public void FailToConnect()\n        {\n            Assert.Throws<IOException>(() => new PersistentStream(GetWebRequest_connect_on_seventh(), 0));\n        }\n\n        [Fact]\n        public void ReadFlakyStream()\n        {\n            var pStream = new PersistentStream(GetWebRequest_flaky_stream(),0);\n            var buffer = new byte[4096];\n            Assert.Equal(100, pStream.Read(buffer, 0, 100));\n        }\n    }\n}\n"
  },
  {
    "path": "UnitTests/IO/UrlUtilitiesTests.cs",
    "content": "﻿using IO;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.IO\r\n{\r\n    public sealed class UrlUtilitiesTests\r\n    {\r\n        [Theory]\r\n        [InlineData(\"http://www.illumina.com\", \"bob\", \"http://www.illumina.com/bob\")]\r\n        [InlineData(\"http://www.illumina.com/\", \"bob\", \"http://www.illumina.com/bob\")]\r\n        [InlineData(\"http://www.illumina.com\", \"/bob\", \"http://www.illumina.com/bob\")]\r\n        [InlineData(\"http://www.illumina.com/\", \"/bob\", \"http://www.illumina.com/bob\")]\r\n        public void Combine_Nominal(string prefix, string suffix, string expected)\r\n        {\r\n            string observed = prefix.UrlCombine(suffix);\r\n            Assert.Equal(expected, observed);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetFileName_Nominal()\r\n        {\r\n            const string url = \"https://illumina-usw2-olympia-dev.s3.amazonaws.com/Annotation/input/Mother.vcf.gz?AWSAccessKeyId=AKIAI774CQHRMUZUNE5Q&Signature=W7Rofh4%2BFXPrPE9ONrdk2iKrGqE%3D&Expires=1561072628\";\r\n            string observed = UrlUtilities.GetFileName(url);\r\n            Assert.Equal(\"Mother.vcf.gz\", observed);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/Intervals/IntervalArrayTests.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.Linq;\r\nusing Intervals;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Intervals\r\n{\r\n    public sealed class IntervalArrayTests\r\n    {\r\n        private readonly IntervalArray<string> _intervalArray;\r\n\r\n        public IntervalArrayTests()\r\n        {\r\n            var intervals = new List<Interval<string>>\r\n            {\r\n                new Interval<string>(10, 20, \"bob\"),\r\n                new Interval<string>(5, 7, \"mary\"),\r\n                new Interval<string>(7, 9, \"jane\")\r\n            };\r\n\r\n            // interval array expects a sorted array of intervals\r\n            _intervalArray = new IntervalArray<string>(intervals.OrderBy(x => x.Begin).ThenBy(x => x.End).ToArray());\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(4, 4, false)]\r\n        [InlineData(5, 6, true)]\r\n        [InlineData(7, 11, true)]\r\n        [InlineData(21, 23, false)]\r\n        public void OverlapsAny(int begin, int end, bool expectedResult)\r\n        {\r\n            Assert.Equal(expectedResult, _intervalArray.OverlapsAny(begin, end));\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(6, 9, new[] { \"mary\", \"jane\" })]\r\n        [InlineData(8, 10, new[] { \"jane\", \"bob\" })]\r\n        [InlineData(11, 50, new[] { \"bob\" })]\r\n        [InlineData(21, 23, null)]\r\n        public void GetAllOverlappingValues(int begin, int end, string[] expectedValues)\r\n        {\r\n            var observedValues = _intervalArray.GetAllOverlappingValues(begin, end);\r\n            Assert.Equal(expectedValues, observedValues);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/Intervals/IntervalExtensionsTests.cs",
    "content": "﻿using Intervals;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Intervals\r\n{\r\n    public sealed class IntervalExtensionsTests\r\n    {\r\n        [Theory]\r\n        [InlineData(1, 3, 5, 7, 0, false)]\r\n        [InlineData(1, 3, 5, 7, 2, true)]\r\n        public void Overlaps_TwoIntervalsWithFlankingLength(int start1, int end1, int start2, int end2,\r\n            int flankingLength, bool expectedResult)\r\n        {\r\n            var interval  = new Interval(start1, end1);\r\n            var interval2 = new Interval(start2, end2);\r\n            bool observedResult = interval.Overlaps(interval2, flankingLength);\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(5, 7, 1, 3, false)]\r\n        [InlineData(1, 7, 5, 10, true)]\r\n        public void Overlaps_IntervalAndCoordinates(int start1, int end1, int start2, int end2, bool expectedResult)\r\n        {\r\n            var interval        = new Interval(start1, end1);\r\n            bool observedResult = interval.Overlaps(start2, end2);\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void Contains_TwoIntervals()\r\n        {\r\n            var interval1 = new Interval(1, 10);\r\n            var interval2 = new Interval(5, 6);\r\n            bool observedResult = interval1.Contains(interval2);\r\n            Assert.True(observedResult);\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(1, 3, 5, 7, -1, -1)]\r\n        [InlineData(1, 7, 5, 7, 5, 7)]\r\n        public void Intersects_TwoIntervals(int start1, int end1, int start2, int end2, int expectedStart,\r\n            int expectedEnd)\r\n        {\r\n            var interval         = new Interval(start1, end1);\r\n            var interval2        = new Interval(start2, end2);\r\n            var observedInterval = interval.Intersects(interval2);\r\n            Assert.Equal(expectedStart, observedInterval.Start);\r\n            Assert.Equal(expectedEnd, observedInterval.End);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/Intervals/IntervalForestTests.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.Linq;\r\nusing Intervals;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Intervals\r\n{\r\n    public sealed class IntervalForestTests\r\n    {\r\n        private readonly IntervalForest<string> _intervalForest;\r\n\r\n        public IntervalForestTests()\r\n        {\r\n            var intervalArraysByRefIndex = new IntervalArray<string>[3];\r\n            intervalArraysByRefIndex[0] = GetIntervalArrayRefIndex0();\r\n            intervalArraysByRefIndex[1] = GetIntervalArrayRefIndex1();\r\n            intervalArraysByRefIndex[2] = GetIntervalArrayRefIndex2();\r\n            _intervalForest = new IntervalForest<string>(intervalArraysByRefIndex);\r\n        }\r\n\r\n        private static IntervalArray<string> GetIntervalArrayRefIndex0()\r\n        {\r\n            return GetIntervalArray(new List<Interval<string>>\r\n            {\r\n                new Interval<string>(10, 20, \"bob\"),\r\n                new Interval<string>(5, 7, \"mary\"),\r\n                new Interval<string>(7, 9, \"jane\")\r\n            });\r\n        }\r\n\r\n        private static IntervalArray<string> GetIntervalArrayRefIndex1()\r\n        {\r\n            return GetIntervalArray(new List<Interval<string>>\r\n            {\r\n                new Interval<string>(100, 200, \"jones\"),\r\n                new Interval<string>(125, 150, \"smith\")\r\n            });\r\n        }\r\n\r\n        private static IntervalArray<string> GetIntervalArrayRefIndex2()\r\n        {\r\n            return GetIntervalArray(new List<Interval<string>>\r\n            {\r\n                new Interval<string>(9, 28, \"zoe\"),\r\n                new Interval<string>(1, 7, \"clive\")\r\n            });\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(0, 4, 4, false)]\r\n        [InlineData(0, 5, 6, true)]\r\n        [InlineData(1, 90, 95, false)]\r\n        [InlineData(2, 5, 6, true)]\r\n        public void OverlapsAny(ushort refIndex, int begin, int end, bool expectedResult)\r\n        {\r\n            Assert.Equal(expectedResult, _intervalForest.OverlapsAny(refIndex, begin, end));\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(0, 6, 9, new[] { \"mary\", \"jane\" })]\r\n        [InlineData(1, 180, 190, new[] { \"jones\" })]\r\n        [InlineData(2, 6, 10, new[] { \"clive\", \"zoe\" })]\r\n        [InlineData(3, 23, 25, null)]\r\n        public void GetAllOverlappingValues(ushort refIndex, int begin, int end, string[] expectedValues)\r\n        {\r\n            var observedValues = _intervalForest.GetAllOverlappingValues(refIndex, begin, end);\r\n            Assert.Equal(expectedValues, observedValues);\r\n        }\r\n\r\n        private static IntervalArray<string> GetIntervalArray(List<Interval<string>> intervals) => new\r\n            IntervalArray<string>(intervals.OrderBy(x => x.Begin).ThenBy(x => x.End).ToArray());\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/Intervals/IntervalOperationsTests.cs",
    "content": "﻿using Intervals;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Intervals\r\n{\r\n    public sealed class IntervalOperationsTests\r\n    {\r\n        [Theory]\r\n        [InlineData(1, 10, 5, 6, true)]\r\n        [InlineData(5, 6, 1, 10, true)]\r\n        [InlineData(1, 3, 5, 7, false)]\r\n        [InlineData(5, 7, 1, 3, false)]\r\n        [InlineData(1, 7, 5, 10, true)]\r\n        [InlineData(5, 10, 1, 7, true)]\r\n        public void Overlaps(int start1, int end1, int start2, int end2, bool expectedResult)\r\n        {\r\n            bool observedResult = Utilities.Overlaps(start1, end1, start2, end2);\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(1, 10, 5, 6, true)]\r\n        [InlineData(5, 6, 1, 10, false)]\r\n        [InlineData(1, 3, 5, 7, false)]\r\n        [InlineData(5, 7, 1, 3, false)]\r\n        [InlineData(1, 7, 5, 10, false)]\r\n        [InlineData(5, 10, 1, 7, false)]\r\n        public void Contains(int start1, int end1, int start2, int end2, bool expectedResult)\r\n        {\r\n            bool observedResult = Utilities.Contains(start1, end1, start2, end2);\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(1, 10, 5, 6, 5, 6)]\r\n        [InlineData(5, 6, 1, 10, 5, 6)]\r\n        [InlineData(1, 3, 5, 7, -1, -1)]\r\n        [InlineData(5, 7, 1, 3, -1, -1)]\r\n        [InlineData(1, 7, 5, 10, 5, 7)]\r\n        [InlineData(5, 10, 1, 7, 5, 7)]\r\n        public void Intersects(int start1, int end1, int start2, int end2, int expectedStart, int expectedEnd)\r\n        {\r\n            (int observedStart, int observedEnd) = Utilities.Intersects(start1, end1, start2, end2);\r\n            Assert.Equal(expectedStart, observedStart);\r\n            Assert.Equal(expectedEnd, observedEnd);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/Intervals/NullIntervalSearchTests.cs",
    "content": "﻿using Intervals;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Intervals\r\n{\r\n    public sealed class NullIntervalSearchTests\r\n    {\r\n        [Fact]\r\n        public void OverlapsAny_IIntervalForest()\r\n        {\r\n            var intervalForest = new NullIntervalSearch<string>();\r\n            Assert.False(intervalForest.OverlapsAny(1, 2, 3));\r\n        }\r\n\r\n        [Fact]\r\n        public void GetAllOverlappingValues_IIntervalForest()\r\n        {\r\n            var intervalForest = new NullIntervalSearch<string>();\r\n            Assert.Null(intervalForest.GetAllOverlappingValues(1, 2, 3));\r\n        }\r\n\r\n        [Fact]\r\n        public void GetAllOverlappingValues_IIntervalSearch()\r\n        {\r\n            var intervalSearch = new NullIntervalSearch<string>();\r\n            Assert.Null(intervalSearch.GetAllOverlappingValues(1, 2));\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/Intervals/OverlapTypeTests.cs",
    "content": "﻿using Intervals;\nusing Xunit;\n\nnamespace UnitTests.Intervals\n{\n    public sealed class OverlapTypeTests\n    {\n        // given two intervals T and V, describe how V overlaps T\n        [Theory]\n        [InlineData(400, 500, OverlapType.Partial)]\n        [InlineData(200, 400, OverlapType.CompletelyWithin)]\n        [InlineData(100, 200, OverlapType.Partial)]\n        [InlineData(100, 500, OverlapType.CompletelyOverlaps)]\n        [InlineData(200, 500, OverlapType.CompletelyOverlaps)]\n        [InlineData(100, 400, OverlapType.CompletelyOverlaps)]\n        [InlineData(500, 600, OverlapType.None)]\n        [InlineData(0,   100, OverlapType.None)]\n        public void GetOverlapType(int vStart, int vEnd, OverlapType expectedResults)\n        {\n            const int tStart = 200;\n            const int tEnd   = 400;\n\n            OverlapType observedResults = Utilities.GetOverlapType(tStart, tEnd, vStart, vEnd);\n\n            Assert.Equal(expectedResults, observedResults);\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/Jasix/IndexTests.cs",
    "content": "﻿using System.IO;\nusing System.IO.Compression;\nusing System.Text;\nusing Jasix;\nusing Jasix.DataStructures;\nusing Xunit;\nusing UnitTests.TestUtilities;\nusing Compression.FileHandling;\nusing ErrorHandling.Exceptions;\n\nnamespace UnitTests.Jasix\n{\n    public sealed class IndexTests\n    {\n        [Fact]\n        public void Query_succeedes_when_it_overlaps_tail_of_previous_bin()\n        {\n            var chrIndex = new JasixChrIndex(\"chr1\");\n\n            for (var i = 100; i < 100 + JasixCommons.PreferredNodeCount; i++)\n            {\n                chrIndex.Add(i, i + 5, 100_000 + i);\n            }\n\n            for (int i = 102 + JasixCommons.PreferredNodeCount; i < 152 + JasixCommons.PreferredNodeCount; i++)\n            {\n                chrIndex.Add(i, i + 5, 100_020 + i);\n            }\n\n            //close current node\n            chrIndex.Flush();\n\n            Assert.Equal(100_100, chrIndex.FindFirstSmallVariant(102, 103));\n        }\n\n        [Fact]\n        public void Add_fill_node_and_start_another()\n        {\n            var index = new JasixIndex();\n\n            //creating two nodes each containing 50 entries\n            for (var i = 0; i < 2 * JasixCommons.PreferredNodeCount; i++)\n            {\n                index.Add(\"chr1\", 100 + i, 101 + i, 100_000 + i);\n            }\n\n            index.Add(\"chr1\", 160 + 2 * JasixCommons.PreferredNodeCount, 166 + 2 * JasixCommons.PreferredNodeCount, 200_100);\n            index.Add(\"chr2\", 100, 100, 200_150);\n            index.Add(\"chr2\", 102, 105, 200_200);\n\n            index.Flush();\n\n            Assert.Equal(100_000, index.GetFirstVariantPosition(\"chr1\", 100, 102));\n            Assert.Equal(100_000 + JasixCommons.PreferredNodeCount, index.GetFirstVariantPosition(\"chr1\", 2 * JasixCommons.PreferredNodeCount + 55, 2 * JasixCommons.PreferredNodeCount + 55));\n            Assert.Equal(-1, index.GetFirstVariantPosition(\"chr1\", 2 * JasixCommons.PreferredNodeCount + 120, 2 * JasixCommons.PreferredNodeCount + 124));\n            Assert.Equal(200_100, index.GetFirstVariantPosition(\"chr1\", 2 * JasixCommons.PreferredNodeCount + 158, 2 * JasixCommons.PreferredNodeCount + 160));\n            Assert.Equal(200_150, index.GetFirstVariantPosition(\"chr2\", 103, 105));\n        }\n\n\n        [Fact]\n        public void GetFirstVariantPosition_multi_chrom_index()\n        {\n            var index = new JasixIndex();\n\n            index.Add(\"chr1\", 100, 101, 100000);\n            index.Add(\"chr1\", 105, 109, 100050);\n            index.Add(\"chr1\", 160, 166, 100100);\n            index.Add(\"chr2\", 100, 100, 100150);\n            index.Add(\"chr2\", 102, 105, 100200);\n\n            index.Flush();\n\n            var chrPos = Utilities.ParseQuery(\"chr1\");\n\n            Assert.Equal(100000, index.GetFirstVariantPosition(chrPos.Item1, chrPos.Item2, chrPos.Item3));\n\n            chrPos = Utilities.ParseQuery(\"chr2\");\n            Assert.Equal(100150, index.GetFirstVariantPosition(chrPos.Item1, chrPos.Item2, chrPos.Item3));\n        }\n\n        [Fact]\n        public void FindLargeVaritants_method_does_not_return_small_variants()\n        {\n            var index = new JasixIndex();\n\n            index.Add(\"chr1\", 100, 101, 100_000);\n            index.Add(\"chr1\", 105, 109, 100_050);\n            index.Add(\"chr1\", 160, 166, 100_100);\n            index.Add(\"chr1\", 200, 1000, 100_075);//large variant\n            index.Add(\"chr2\", 100, 100, 100_150);\n            index.Add(\"chr2\", 102, 105, 100_200);\n\n            index.Flush();\n\n            //checking large variants\n            Assert.Null(index.LargeVariantPositions(\"chr1\", 100, 199));\n            var largeVariants = index.LargeVariantPositions(\"chr1\", 100, 201);\n            Assert.NotNull(largeVariants);\n            Assert.Single(largeVariants);\n            Assert.Equal(100075, largeVariants[0]);\n        }\n\n        [Fact]\n        public void Write_and_read_back()\n        {\n            var index = new JasixIndex();\n\n            index.Add(\"chr1\", 100, 101, 100000,\"1\");\n            index.Add(\"chr1\", 105, 109, 100050,\"1\");\n            index.Add(\"chr1\", 150, 1000, 100075,\"1\");//large variant\n            index.Add(\"chr1\", 160, 166, 100100, \"1\");\n            index.Add(\"chr2\", 100, 100, 100150, \"2\");\n            index.Add(\"chr2\", 102, 105, 100200, \"2\");\n\n            var writeStream = new MemoryStream();\n            using (writeStream)\n            {\n                index.Write(writeStream);\n            }\n\n            var readStream= new MemoryStream(writeStream.ToArray());\n            readStream.Seek(0,SeekOrigin.Begin);\n\n            JasixIndex readBackIndex;\n            using (readStream)\n            {\n                readBackIndex = new JasixIndex(readStream);\n            }\n\n            Assert.Equal(100000, readBackIndex.GetFirstVariantPosition(\"chr1\", 100, 102));\n            Assert.Equal(100000, readBackIndex.GetFirstVariantPosition(\"chr1\", 103, 104));\n            Assert.Equal(100000, readBackIndex.GetFirstVariantPosition(\"chr1\", 120, 124));\n            Assert.Equal(100000, readBackIndex.GetFirstVariantPosition(\"chr1\", 158, 160));\n            Assert.Equal(100150, readBackIndex.GetFirstVariantPosition(\"chr2\", 103, 105));\n\n            //checking large variants\n            Assert.Null(readBackIndex.LargeVariantPositions(\"chr1\", 100, 149));\n            var largeVariants = readBackIndex.LargeVariantPositions(\"chr1\", 100, 201);\n            Assert.NotNull(largeVariants);\n            Assert.Single(largeVariants);\n            Assert.Equal(100075, largeVariants[0]);\n        }\n\n        [Fact]\n        public void BgzipTestReader_basic()\n        {\n            var stream = ResourceUtilities.GetReadStream(Resources.TopPath(\"TinyAnnotated.json\"));\n\n            var lineCount = 0;\n            using (var jasixReader = new StreamReader(stream))\n            {\n                while (jasixReader.ReadLine() != null)\n                {\n                    lineCount++;\n                }\n            }\n\n            Assert.Equal(4, lineCount);\n        }\n\n        [Fact]\n        public void IndexCreation_multChromosome()\n        {\n            var jsonStream = new BlockGZipStream(ResourceUtilities.GetReadStream(Resources.TopPath(\"cosmicv72.indels.json.gz\")), CompressionMode.Decompress);\n\n            var writeStream = new MemoryStream();\n            using (var indexCreator = new IndexCreator(jsonStream, writeStream))\n            {\n                indexCreator.CreateIndex();\n            }\n\n            JasixIndex readBackIndex;\n            var        readStream = new MemoryStream(writeStream.ToArray());\n            readStream.Seek(0, SeekOrigin.Begin);\n\n            using (readStream)\n            {\n                readBackIndex = new JasixIndex(readStream);\n            }\n\n            Assert.Equal(2268, readBackIndex.GetFirstVariantPosition(\"chr1\", 9775924, 9775924));\n            Assert.Equal(14035925971, readBackIndex.GetFirstVariantPosition(\"chr2\", 16081096, 16081096));\n            Assert.Equal(433156622693, readBackIndex.GetFirstVariantPosition(\"chr20\", 36026164, 36026164));\n            Assert.Equal(439602269527, readBackIndex.GetFirstVariantPosition(\"chrX\", 66765044, 66765044));\n        }\n\n        [Fact]\n        public void Begin_end_section_and_readback()\n        {\n            var index = new JasixIndex();\n            const string section = \"section1\";\n            index.BeginSection(section, 0);\n            Assert.Throws<UserErrorException>(() => index.BeginSection(section, 1));\n            index.EndSection(section, 100);\n            Assert.Throws<UserErrorException>(() => index.EndSection(section, 101));\n\n            Assert.Equal(0, index.GetSectionBegin(section));\n            Assert.Equal(100, index.GetSectionEnd(section));\n        }\n\n        [Fact]\n        public void GetChromosomeList()\n        {\n            var readStream = new BlockGZipStream(ResourceUtilities.GetReadStream(Resources.TopPath(\"Clinvar20150901.json.gz\")), CompressionMode.Decompress);\n            var indexStream = ResourceUtilities.GetReadStream(Resources.TopPath(\"Clinvar20150901.json.gz.jsi\"));\n\n            var outStream = new MemoryStream();\n            using (var writer = new StreamWriter(outStream, Encoding.UTF8, 512, true))\n            using (var qp = new QueryProcessor(new StreamReader(readStream), indexStream, writer))\n            {\n                writer.NewLine = \"\\r\\n\";\n                qp.ListChromosomesAndSections();\n            }\n\n            Assert.NotEqual(0, outStream.Length);\n            outStream.Position = 0;\n            \n            using (var reader = new StreamReader(outStream))\n            {\n                string chromList = reader.ReadToEnd();\n                Assert.Equal(\"1\\r\\n2\\r\\n3\\r\\n4\\r\\n5\\r\\n6\\r\\n7\\r\\n8\\r\\n9\\r\\n10\\r\\n11\\r\\n12\\r\\n13\\r\\n14\\r\\n15\\r\\n16\\r\\n17\\r\\n18\\r\\n19\\r\\n20\\r\\n21\\r\\nX\\r\\nY\\r\\nheader\\r\\npositions\\r\\ngenes\\r\\n\", chromList);\n            }\n        }\n\n        [Fact]\n        public void GetHeaderOnly()\n        {\n            var readStream = new BlockGZipStream(ResourceUtilities.GetReadStream(Resources.TopPath(\"Clinvar20150901.json.gz\")),\n                CompressionMode.Decompress);\n            var indexStream = ResourceUtilities.GetReadStream(Resources.TopPath(\"Clinvar20150901.json.gz.jsi\"));\n\n            var outStream = new MemoryStream();\n            using (var writer = new StreamWriter(outStream, Encoding.UTF8, 512, true))\n            using (var qp = new QueryProcessor(new StreamReader(readStream), indexStream, writer))\n            {\n                qp.PrintHeaderOnly();\n            }\n\n            Assert.NotEqual(0, outStream.Length);\n            outStream.Position = 0;\n            using (var reader = new StreamReader(outStream))\n            {\n                string actualHeaderLine = reader.ReadToEnd().Replace(\"\\r\\n\", \"\\n\");\n                Assert.Equal(\n                    \"{\\n  \\\"header\\\": {\\n    \\\"annotator\\\": \\\"Nirvana 2.0.9.0\\\",\\n    \\\"creationTime\\\": \\\"2018-04-30 17:17:23\\\",\\n    \\\"genomeAssembly\\\": \\\"GRCh37\\\",\\n    \\\"schemaVersion\\\": 6,\\n    \\\"dataVersion\\\": \\\"91.26.45\\\",\\n    \\\"dataSources\\\": [\\n      {\\n        \\\"name\\\": \\\"VEP\\\",\\n        \\\"version\\\": \\\"91\\\",\\n        \\\"description\\\": \\\"Ensembl\\\",\\n        \\\"releaseDate\\\": \\\"2018-03-05\\\"\\n      },\\n      {\\n        \\\"name\\\": \\\"ClinVar\\\",\\n        \\\"version\\\": \\\"20180129\\\",\\n        \\\"description\\\": \\\"A freely accessible, public archive of reports of the relationships among human variations and phenotypes, with supporting evidence\\\",\\n        \\\"releaseDate\\\": \\\"2018-01-29\\\"\\n      },\\n      {\\n        \\\"name\\\": \\\"COSMIC\\\",\\n        \\\"version\\\": \\\"84\\\",\\n        \\\"description\\\": \\\"somatic mutation and related details and information relating to human cancers\\\",\\n        \\\"releaseDate\\\": \\\"2018-02-13\\\"\\n      },\\n      {\\n        \\\"name\\\": \\\"dbSNP\\\",\\n        \\\"version\\\": \\\"150\\\",\\n        \\\"description\\\": \\\"Identifiers for observed variants\\\",\\n        \\\"releaseDate\\\": \\\"2017-04-03\\\"\\n      },\\n      {\\n        \\\"name\\\": \\\"gnomAD_exome\\\",\\n        \\\"version\\\": \\\"2.0.2\\\",\\n        \\\"description\\\": \\\"Exome allele frequencies from Genome Aggregation Database (gnomAD)\\\",\\n        \\\"releaseDate\\\": \\\"2017-10-05\\\"\\n      },\\n      {\\n        \\\"name\\\": \\\"gnomAD\\\",\\n        \\\"version\\\": \\\"2.0.2\\\",\\n        \\\"description\\\": \\\"Whole genome allele frequencies from Genome Aggregation Database (gnomAD)\\\",\\n        \\\"releaseDate\\\": \\\"2017-10-05\\\"\\n      },\\n      {\\n        \\\"name\\\": \\\"MITOMAP\\\",\\n        \\\"version\\\": \\\"20180228\\\",\\n        \\\"description\\\": \\\"Small variants in the MITOMAP human mitochondrial genome database\\\",\\n        \\\"releaseDate\\\": \\\"2018-02-28\\\"\\n      },\\n      {\\n        \\\"name\\\": \\\"1000 Genomes Project\\\",\\n        \\\"version\\\": \\\"Phase 3 v5a\\\",\\n        \\\"description\\\": \\\"A public catalogue of human variation and genotype data\\\",\\n        \\\"releaseDate\\\": \\\"2013-05-27\\\"\\n      },\\n      {\\n        \\\"name\\\": \\\"TOPMed\\\",\\n        \\\"version\\\": \\\"freeze_5\\\",\\n        \\\"description\\\": \\\"Allele frequencies from TOPMed data lifted over using dbSNP ids.\\\",\\n        \\\"releaseDate\\\": \\\"2017-08-28\\\"\\n      },\\n      {\\n        \\\"name\\\": \\\"ClinGen\\\",\\n        \\\"version\\\": \\\"20160414\\\",\\n        \\\"releaseDate\\\": \\\"2016-04-14\\\"\\n      },\\n      {\\n        \\\"name\\\": \\\"DGV\\\",\\n        \\\"version\\\": \\\"20160515\\\",\\n        \\\"description\\\": \\\"Provides a comprehensive summary of structural variation in the human genome\\\",\\n        \\\"releaseDate\\\": \\\"2016-05-15\\\"\\n      },\\n      {\\n        \\\"name\\\": \\\"MITOMAP\\\",\\n        \\\"version\\\": \\\"20180228\\\",\\n        \\\"description\\\": \\\"Large structural variants in the MITOMAP human mitochondrial genome database\\\",\\n        \\\"releaseDate\\\": \\\"2018-02-28\\\"\\n      },\\n      {\\n        \\\"name\\\": \\\"ExAC\\\",\\n        \\\"version\\\": \\\"0.3.1\\\",\\n        \\\"description\\\": \\\"Gene scores from the ExAC project\\\",\\n        \\\"releaseDate\\\": \\\"2016-03-16\\\"\\n      },\\n      {\\n        \\\"name\\\": \\\"OMIM\\\",\\n        \\\"version\\\": \\\"20180213\\\",\\n        \\\"description\\\": \\\"An Online Catalog of Human Genes and Genetic Disorders\\\",\\n        \\\"releaseDate\\\": \\\"2018-02-13\\\"\\n      },\\n      {\\n        \\\"name\\\": \\\"phyloP\\\",\\n        \\\"version\\\": \\\"hg19\\\",\\n        \\\"description\\\": \\\"46 way conservation score between humans and 45 other vertebrates\\\",\\n        \\\"releaseDate\\\": \\\"2009-11-10\\\"\\n      }\\n    ]\\n  }\\n}\",\n                    actualHeaderLine);\n            }\n        }\n\n        [Fact]\n        public void GetGeneSection()\n        {\n            var readStream = new BlockGZipStream(ResourceUtilities.GetReadStream(Resources.TopPath(\"Clinvar20150901.json.gz\")), CompressionMode.Decompress);\n            var indexStream = ResourceUtilities.GetReadStream(Resources.TopPath(\"Clinvar20150901.json.gz.jsi\"));\n\n            var outStream = new MemoryStream();\n            using (var writer = new StreamWriter(outStream, Encoding.UTF8, 512, true))\n            using (var qp = new QueryProcessor(new StreamReader(readStream), indexStream, writer))\n            {\n                writer.NewLine = \"\\r\\n\";\n                qp.PrintSection(\"genes\");\n            }\n\n            Assert.NotEqual(0, outStream.Length);\n            outStream.Position = 0;\n            using (var reader = new StreamReader(outStream))\n            {\n                var count = 0;\n                var line = reader.ReadLine();\n                while (line != null)\n                {\n                    count++;\n                    line = reader.ReadLine();\n                }\n                \n                Assert.Equal(4382, count);\n            }\n        }\n\n    }\n}\n"
  },
  {
    "path": "UnitTests/Jasix/JasixFunctionalityTests.cs",
    "content": "﻿using Jasix;\nusing Xunit;\n\nnamespace UnitTests.Jasix\n{\n    public sealed class JasixFunctionalityTests\n    {\n        [Fact]\n        public void ParsingDeletionJsonLine()\n        {\n            const string jsonLine =\n                \"{\\\"chromosome\\\":\\\"chr1\\\",\\\"refAllele\\\":\\\"GT\\\",\\\"position\\\":2337967,\\\"altAlleles\\\":[\\\"G\\\"],\\\"cyt\\r\\nogeneticBand\\\":\\\"1p36.32\\\",\\\"variants\\\":[{\\\"altAllele\\\":\\\"C\\\",\\\"refAllele\\\":\\\"-\\\",\\\"begin\\\":2337968,\\\"chromosome\\\":\\\"chr1\\\",\\\"dbsnp\\\":[\\\"rs797044762\\\"],\\\"end\\\":2337967,\\\"variantType\\\":\\\"insertion\\\",\\\"vid\\\":\\\"1:2337968:2337967:C\\\",\\\"regulatoryRegions\\\":[{\\\"id\\\":\\\"ENSR00001576444\\\",\\\"consequence\\\":[\\\"regulatory_region_variant\\\"]}],\\\"transcripts\\\":{\\\"refSeq\\\":[{\\\"transcript\\\":\\\"XM_005244712.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"11079\\\",\\\"hgnc\\\":\\\"RER1\\\",\\\"consequence\\\":[\\\"downstream_gene_variant\\\"],\\\"proteinId\\\":\\\"XP_005244769.1\\\"},{\\\"transcript\\\":\\\"NM_007033.4\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"11079\\\",\\\"hgnc\\\":\\\"RER1\\\",\\\"consequence\\\":[\\\"downstream_gene_variant\\\"],\\\"isCanonical\\\":true,\\\"proteinId\\\":\\\"NP_008964.3\\\"},{\\\"transcript\\\":\\\"XM_005244713.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"11079\\\",\\\"hgnc\\\":\\\"RER1\\\",\\\"consequence\\\":[\\\"downstream_gene_variant\\\"],\\\"proteinId\\\":\\\"XP_005244770.1\\\"},{\\\"transcript\\\":\\\"NM_002617.3\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"aminoAcids\\\":\\\"-/X\\\",\\\"cDnaPos\\\":\\\"936-937\\\",\\\"codons\\\":\\\"-/G\\\",\\\"cdsPos\\\":\\\"867-868\\\",\\\"exons\\\":\\\"5/6\\\",\\\"geneId\\\":\\\"5192\\\",\\\"hgnc\\\":\\\"PEX10\\\",\\\"consequence\\\":[\\\"frameshift_variant\\\"],\\\"hgvsc\\\":\\\"NM_002617.3:c.867_868insG\\\",\\\"hgvsp\\\":\\\"NP_002608.1:p.His290AlafsTer49\\\",\\\"proteinId\\\":\\\"NP_002608.1\\\",\\\"proteinPos\\\":\\\"289-290\\\"},{\\\"transcript\\\":\\\"NM_153818.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"aminoAcids\\\":\\\"-/X\\\",\\\"cDnaPos\\\":\\\"996-997\\\",\\\"codons\\\":\\\"-/G\\\",\\\"cdsPos\\\":\\\"927-928\\\",\\\"exons\\\":\\\"5/6\\\",\\\"geneId\\\":\\\"5192\\\",\\\"hgnc\\\":\\\"PEX10\\\",\\\"consequence\\\":[\\\"frameshift_variant\\\"],\\\"hgvsc\\\":\\\"NM_153818.1:c.927_928insG\\\",\\\"hgvsp\\\":\\\"NP_722540.1:p.His310AlafsTer49\\\",\\\"isCanonical\\\":true,\\\"proteinId\\\":\\\"NP_722540.1\\\",\\\"proteinPos\\\":\\\"309-310\\\"}]}}]}\";\n\n            var chrPos = IndexCreator.GetChromPosition(jsonLine);\n            Assert.Equal(\"chr1\", chrPos.Item1);\n            Assert.Equal(2337967, chrPos.Item2);\n            Assert.Equal(2337968, chrPos.Item3);\n        }\n\n        [Fact]\n        public void ParsingSnvJsonLine()\n        {\n            const string jsonLine =\n                \"{\\\"chromosome\\\":\\\"chr1\\\",\\\"refAllele\\\":\\\"G\\\",\\\"position\\\":2337967,\\\"altAlleles\\\":[\\\"C\\\",\\\"T\\\"],\\\"cyt\\r\\nogeneticBand\\\":\\\"1p36.32\\\",\\\"variants\\\":[{\\\"altAllele\\\":\\\"C\\\",\\\"refAllele\\\":\\\"-\\\",\\\"begin\\\":2337968,\\\"chromosome\\\":\\\"chr1\\\",\\\"dbsnp\\\":[\\\"rs797044762\\\"],\\\"end\\\":2337967,\\\"variantType\\\":\\\"insertion\\\",\\\"vid\\\":\\\"1:2337968:2337967:C\\\",\\\"regulatoryRegions\\\":[{\\\"id\\\":\\\"ENSR00001576444\\\",\\\"consequence\\\":[\\\"regulatory_region_variant\\\"]}],\\\"transcripts\\\":{\\\"refSeq\\\":[{\\\"transcript\\\":\\\"XM_005244712.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"11079\\\",\\\"hgnc\\\":\\\"RER1\\\",\\\"consequence\\\":[\\\"downstream_gene_variant\\\"],\\\"proteinId\\\":\\\"XP_005244769.1\\\"},{\\\"transcript\\\":\\\"NM_007033.4\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"11079\\\",\\\"hgnc\\\":\\\"RER1\\\",\\\"consequence\\\":[\\\"downstream_gene_variant\\\"],\\\"isCanonical\\\":true,\\\"proteinId\\\":\\\"NP_008964.3\\\"},{\\\"transcript\\\":\\\"XM_005244713.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"11079\\\",\\\"hgnc\\\":\\\"RER1\\\",\\\"consequence\\\":[\\\"downstream_gene_variant\\\"],\\\"proteinId\\\":\\\"XP_005244770.1\\\"},{\\\"transcript\\\":\\\"NM_002617.3\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"aminoAcids\\\":\\\"-/X\\\",\\\"cDnaPos\\\":\\\"936-937\\\",\\\"codons\\\":\\\"-/G\\\",\\\"cdsPos\\\":\\\"867-868\\\",\\\"exons\\\":\\\"5/6\\\",\\\"geneId\\\":\\\"5192\\\",\\\"hgnc\\\":\\\"PEX10\\\",\\\"consequence\\\":[\\\"frameshift_variant\\\"],\\\"hgvsc\\\":\\\"NM_002617.3:c.867_868insG\\\",\\\"hgvsp\\\":\\\"NP_002608.1:p.His290AlafsTer49\\\",\\\"proteinId\\\":\\\"NP_002608.1\\\",\\\"proteinPos\\\":\\\"289-290\\\"},{\\\"transcript\\\":\\\"NM_153818.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"aminoAcids\\\":\\\"-/X\\\",\\\"cDnaPos\\\":\\\"996-997\\\",\\\"codons\\\":\\\"-/G\\\",\\\"cdsPos\\\":\\\"927-928\\\",\\\"exons\\\":\\\"5/6\\\",\\\"geneId\\\":\\\"5192\\\",\\\"hgnc\\\":\\\"PEX10\\\",\\\"consequence\\\":[\\\"frameshift_variant\\\"],\\\"hgvsc\\\":\\\"NM_153818.1:c.927_928insG\\\",\\\"hgvsp\\\":\\\"NP_722540.1:p.His310AlafsTer49\\\",\\\"isCanonical\\\":true,\\\"proteinId\\\":\\\"NP_722540.1\\\",\\\"proteinPos\\\":\\\"309-310\\\"}]}}]}\";\n\n            var chrPos = IndexCreator.GetChromPosition(jsonLine);\n            Assert.Equal(\"chr1\", chrPos.Item1);\n            Assert.Equal(2337967, chrPos.Item2);\n            Assert.Equal(2337967, chrPos.Item3);\n        }\n\n        [Fact]\n        public void ParsingJsonInsertionLine()\n        {\n            const string jsonLine =\n                \"{\\\"chromosome\\\":\\\"chr1\\\",\\\"refAllele\\\":\\\"G\\\",\\\"position\\\":2337967,\\\"altAlleles\\\":[\\\"GCC\\\"],\\\"cyt\\r\\nogeneticBand\\\":\\\"1p36.32\\\",\\\"variants\\\":[{\\\"altAllele\\\":\\\"C\\\",\\\"refAllele\\\":\\\"-\\\",\\\"begin\\\":2337968,\\\"chromosome\\\":\\\"chr1\\\",\\\"dbsnp\\\":[\\\"rs797044762\\\"],\\\"end\\\":2337967,\\\"variantType\\\":\\\"insertion\\\",\\\"vid\\\":\\\"1:2337968:2337967:C\\\",\\\"regulatoryRegions\\\":[{\\\"id\\\":\\\"ENSR00001576444\\\",\\\"consequence\\\":[\\\"regulatory_region_variant\\\"]}],\\\"transcripts\\\":{\\\"refSeq\\\":[{\\\"transcript\\\":\\\"XM_005244712.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"11079\\\",\\\"hgnc\\\":\\\"RER1\\\",\\\"consequence\\\":[\\\"downstream_gene_variant\\\"],\\\"proteinId\\\":\\\"XP_005244769.1\\\"},{\\\"transcript\\\":\\\"NM_007033.4\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"11079\\\",\\\"hgnc\\\":\\\"RER1\\\",\\\"consequence\\\":[\\\"downstream_gene_variant\\\"],\\\"isCanonical\\\":true,\\\"proteinId\\\":\\\"NP_008964.3\\\"},{\\\"transcript\\\":\\\"XM_005244713.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"geneId\\\":\\\"11079\\\",\\\"hgnc\\\":\\\"RER1\\\",\\\"consequence\\\":[\\\"downstream_gene_variant\\\"],\\\"proteinId\\\":\\\"XP_005244770.1\\\"},{\\\"transcript\\\":\\\"NM_002617.3\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"aminoAcids\\\":\\\"-/X\\\",\\\"cDnaPos\\\":\\\"936-937\\\",\\\"codons\\\":\\\"-/G\\\",\\\"cdsPos\\\":\\\"867-868\\\",\\\"exons\\\":\\\"5/6\\\",\\\"geneId\\\":\\\"5192\\\",\\\"hgnc\\\":\\\"PEX10\\\",\\\"consequence\\\":[\\\"frameshift_variant\\\"],\\\"hgvsc\\\":\\\"NM_002617.3:c.867_868insG\\\",\\\"hgvsp\\\":\\\"NP_002608.1:p.His290AlafsTer49\\\",\\\"proteinId\\\":\\\"NP_002608.1\\\",\\\"proteinPos\\\":\\\"289-290\\\"},{\\\"transcript\\\":\\\"NM_153818.1\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"aminoAcids\\\":\\\"-/X\\\",\\\"cDnaPos\\\":\\\"996-997\\\",\\\"codons\\\":\\\"-/G\\\",\\\"cdsPos\\\":\\\"927-928\\\",\\\"exons\\\":\\\"5/6\\\",\\\"geneId\\\":\\\"5192\\\",\\\"hgnc\\\":\\\"PEX10\\\",\\\"consequence\\\":[\\\"frameshift_variant\\\"],\\\"hgvsc\\\":\\\"NM_153818.1:c.927_928insG\\\",\\\"hgvsp\\\":\\\"NP_722540.1:p.His310AlafsTer49\\\",\\\"isCanonical\\\":true,\\\"proteinId\\\":\\\"NP_722540.1\\\",\\\"proteinPos\\\":\\\"309-310\\\"}]}}]}\";\n\n            var chrPos = IndexCreator.GetChromPosition(jsonLine);\n            Assert.Equal(\"chr1\", chrPos.Item1);\n            Assert.Equal(2337967, chrPos.Item2);\n            Assert.Equal(2337968, chrPos.Item3);\n        }\n\n        [Fact]\n        public void ParseJsonStructuralVariant()\n        {\n            const string jsonLine =\n                \"{\\\"chromosome\\\":\\\"chr3\\\",\\\"refAllele\\\":\\\"A\\\",\\\"position\\\":62431401,\\\"svEnd\\\":62431801,\\\"altAlleles\\\":[\\\"<DEL>\\\"],\\\"cytogeneticBand\\\":\\\"3p14.2\\\",\\\"variants\\\":[{\\\"altAllele\\\":\\\"<DEL>\\\",\\\"refAllele\\\":\\\"A\\\",\\\"begin\\\":62431402,\\\"chromosome\\\":\\\"chr3\\\",\\\"end\\\":62431801,\\\"variantType\\\":\\\"unknown\\\",\\\"vid\\\":\\\"3:62431402:62431401\\\",\\\"globalAllele\\\":{\\\"globalMajorAllele\\\":\\\"C\\\",\\\"globalMajorAlleleFrequency\\\":0.9856,\\\"globalMinorAllele\\\":\\\"A\\\",\\\"globalMinorAlleleFrequency\\\":0.01438}}]}\";\n\n            var chrPos = IndexCreator.GetChromPosition(jsonLine);\n            Assert.Equal(\"chr3\", chrPos.Item1);\n            Assert.Equal(62431401, chrPos.Item2);\n            Assert.Equal(62431801, chrPos.Item3);\n        }\n\n        [Fact]\n        public void ParseJsonBreakEnd()\n        {\n            const string jsonLine =\n                \"{\\\"chromosome\\\":\\\"2\\\",\\\"refAllele\\\":\\\"G\\\",\\\"position\\\":321681,\\\"quality\\\":6,\\\"filters\\\":[\\\"PASS\\\"],\\\"altAlleles\\\":[\\\"G]2:421681]\\\"],\\\"cytogeneticBand\\\":\\\"2p25.3\\\",\\\"oneKg\\\":[{\\\"chromosome\\\":\\\"2\\\",\\\"begin\\\":314969,\\\"end\\\":694521,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"variantFreqAll\\\":0.0008,\\\"variantFreqEas\\\":0.00397,\\\"id\\\":\\\"esv3589600\\\",\\\"sampleSize\\\":2504,\\\"sampleSizeAfr\\\":661,\\\"sampleSizeAmr\\\":347,\\\"sampleSizeEas\\\":504,\\\"sampleSizeEur\\\":503,\\\"sampleSizeSas\\\":489,\\\"observedGains\\\":2}],\\\"variants\\\":[{\\\"altAllele\\\":\\\"G]2:421681]\\\",\\\"refAllele\\\":\\\"G\\\",\\\"begin\\\":321681,\\\"chromosome\\\":\\\"2\\\",\\\"end\\\":321686,\\\"variantType\\\":\\\"translocation_breakend\\\",\\\"vid\\\":\\\"2:321681:+:2:421681:-\\\",\\\"overlappingGenes\\\":[\\\"AC079779.6\\\"]}]}\";\n\n            var chrPos = IndexCreator.GetChromPosition(jsonLine);\n            Assert.Equal(\"2\", chrPos.Item1);\n            Assert.Equal(321681, chrPos.Item2);\n            Assert.Equal(321681, chrPos.Item3);\n        }\n    }\n}\n"
  },
  {
    "path": "UnitTests/Jasix/JasixQueryProcessingTests.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing System.IO.Compression;\r\nusing System.Linq;\r\nusing Jasix;\r\nusing Jasix.DataStructures;\r\nusing Xunit;\r\nusing Compression.FileHandling;\r\nusing IO;\r\nusing Newtonsoft.Json.Linq;\r\nusing UnitTests.TestUtilities;\r\n\r\nnamespace UnitTests.Jasix\r\n{\r\n    public sealed class JasixQueryProcessingTests\r\n    {\r\n        [Fact]\r\n        public void Combination_of_large_and_small_variants()\r\n        {\r\n            var index = new JasixIndex();\r\n\r\n            //query range 10,000- 10,020\r\n            index.Add(\"chr1\", 8_000, 9_900, 90_000);//SV not overlapping the query\r\n            index.Add(\"chr1\", 9_000, 10_005, 90_100);// partially overlapping\r\n            index.Add(\"chr1\", 9_500, 10_050, 90_200);//completely overlapping\r\n            index.Add(\"chr1\", 10_000, 10_001, 100_000);\r\n            index.Add(\"chr1\", 10_004, 10_006, 100_100);\r\n            index.Add(\"chr1\", 10_009, 10_550, 100_200);//SV starting from the middle of the range\r\n            index.Add(\"chr1\", 10_008, 10_010, 100_300);\r\n            index.Add(\"chr1\", 10_011, 10_020, 100_400);\r\n            index.Add(\"chr1\", 10_039, 10_550, 100_200);//SV past the range\r\n\r\n            index.Flush();\r\n\r\n            var firstSmallVarLocation = index.GetFirstVariantPosition(\"chr1\", 10_000, 10_020);\r\n            var largeVariantLocations = index.LargeVariantPositions(\"chr1\", 10_000, 10_020);\r\n\r\n            Assert.Equal(90_000, firstSmallVarLocation);\r\n            Assert.True(largeVariantLocations.SequenceEqual(new List<long> { 90_100, 90_200, 100_200 }));\r\n        }\r\n\r\n        [Fact]\r\n        public void Quiring_large_variants_overlapping_range_but_starting_before()\r\n        {\r\n            var index = new JasixIndex();\r\n\r\n            //query range 10,000- 10,020\r\n            index.Add(\"chr1\", 8_000, 10_000, 80_000);//SV ending at the start of query\r\n            index.Add(\"chr1\", 8_000, 9_900, 90_000);//SV not overlapping the query\r\n            index.Add(\"chr1\", 9_000, 10_005, 90_100);// partially overlapping\r\n            index.Add(\"chr1\", 9_500, 10_050, 90_200);//completely overlapping\r\n            index.Add(\"chr1\", 10_000, 10_001, 100_000);\r\n            index.Add(\"chr1\", 10_000, 10_701, 100_050);//starting at the begin of query\r\n            index.Add(\"chr1\", 10_004, 10_006, 100_100);\r\n            index.Add(\"chr1\", 10_009, 10_550, 100_200);//SV starting from the middle of the range\r\n            index.Add(\"chr1\", 10_008, 10_010, 100_300);\r\n            index.Add(\"chr1\", 10_011, 10_020, 100_400);\r\n            index.Add(\"chr1\", 10_039, 10_550, 100_200);//SV past the range\r\n\r\n            index.Flush();\r\n\r\n            var largeVariantBefore = index.LargeVariantPositions(\"chr1\", 10_000, 9_999);\r\n\r\n            Assert.True(largeVariantBefore.SequenceEqual(new List<long> { 80_000, 90_100, 90_200 }));\r\n        }\r\n\r\n        [Fact]\r\n        public void First_variant_position_when_the_first_variant_is_large()\r\n        {\r\n            var index = new JasixIndex();\r\n\r\n            //query range 10,000- 10,020\r\n            index.Add(\"chr1\", 10_000, 10_701, 100_050);//SV at the begin of query\r\n            index.Add(\"chr1\", 10_004, 10_006, 100_100);\r\n            index.Add(\"chr1\", 10_009, 10_550, 100_200);//SV starting from the middle of the range\r\n            index.Add(\"chr1\", 10_008, 10_010, 100_300);\r\n            index.Add(\"chr1\", 10_011, 10_020, 100_400);\r\n            index.Add(\"chr1\", 10_039, 10_550, 100_200);//SV past the range\r\n\r\n            index.Flush();\r\n\r\n            var firstVariantLocation = index.GetFirstVariantPosition(\"chr1\", 10_000, 10_010);\r\n\r\n            Assert.Equal(100_050, firstVariantLocation);\r\n        }\r\n\r\n        [Fact]\r\n        public void TestQuerySingle()\r\n        {\r\n            var readStream = new BlockGZipStream(ResourceUtilities.GetReadStream(Resources.TopPath(\"cosmicv72.indels.json.gz\")), CompressionMode.Decompress);\r\n            var indexStream = ResourceUtilities.GetReadStream(Resources.TopPath(\"cosmicv72.indels.json.gz.jsi\"));\r\n\r\n            using (var qp = new QueryProcessor(FileUtilities.GetStreamReader(readStream), indexStream))\r\n            {\r\n                var header = qp.GetHeader();\r\n                Assert.Equal(\"\\\"header\\\":{\\\"annotator\\\":\\\"Nirvana 2.0.9.0\\\",\\\"creationTime\\\":\\\"2018-04-30 15:44:31\\\",\\\"genomeAssembly\\\":\\\"GRCh37\\\",\\\"schemaVersion\\\":6,\\\"dataVersion\\\":\\\"91.26.45\\\",\\\"dataSources\\\":[{\\\"name\\\":\\\"VEP\\\",\\\"version\\\":\\\"91\\\",\\\"description\\\":\\\"Ensembl\\\",\\\"releaseDate\\\":\\\"2018-03-05\\\"},{\\\"name\\\":\\\"ClinVar\\\",\\\"version\\\":\\\"20180129\\\",\\\"description\\\":\\\"A freely accessible, public archive of reports of the relationships among human variations and phenotypes, with supporting evidence\\\",\\\"releaseDate\\\":\\\"2018-01-29\\\"},{\\\"name\\\":\\\"COSMIC\\\",\\\"version\\\":\\\"84\\\",\\\"description\\\":\\\"somatic mutation and related details and information relating to human cancers\\\",\\\"releaseDate\\\":\\\"2018-02-13\\\"},{\\\"name\\\":\\\"dbSNP\\\",\\\"version\\\":\\\"150\\\",\\\"description\\\":\\\"Identifiers for observed variants\\\",\\\"releaseDate\\\":\\\"2017-04-03\\\"},{\\\"name\\\":\\\"gnomAD_exome\\\",\\\"version\\\":\\\"2.0.2\\\",\\\"description\\\":\\\"Exome allele frequencies from Genome Aggregation Database (gnomAD)\\\",\\\"releaseDate\\\":\\\"2017-10-05\\\"},{\\\"name\\\":\\\"gnomAD\\\",\\\"version\\\":\\\"2.0.2\\\",\\\"description\\\":\\\"Whole genome allele frequencies from Genome Aggregation Database (gnomAD)\\\",\\\"releaseDate\\\":\\\"2017-10-05\\\"},{\\\"name\\\":\\\"MITOMAP\\\",\\\"version\\\":\\\"20180228\\\",\\\"description\\\":\\\"Small variants in the MITOMAP human mitochondrial genome database\\\",\\\"releaseDate\\\":\\\"2018-02-28\\\"},{\\\"name\\\":\\\"1000 Genomes Project\\\",\\\"version\\\":\\\"Phase 3 v5a\\\",\\\"description\\\":\\\"A public catalogue of human variation and genotype data\\\",\\\"releaseDate\\\":\\\"2013-05-27\\\"},{\\\"name\\\":\\\"TOPMed\\\",\\\"version\\\":\\\"freeze_5\\\",\\\"description\\\":\\\"Allele frequencies from TOPMed data lifted over using dbSNP ids.\\\",\\\"releaseDate\\\":\\\"2017-08-28\\\"},{\\\"name\\\":\\\"ClinGen\\\",\\\"version\\\":\\\"20160414\\\",\\\"releaseDate\\\":\\\"2016-04-14\\\"},{\\\"name\\\":\\\"DGV\\\",\\\"version\\\":\\\"20160515\\\",\\\"description\\\":\\\"Provides a comprehensive summary of structural variation in the human genome\\\",\\\"releaseDate\\\":\\\"2016-05-15\\\"},{\\\"name\\\":\\\"MITOMAP\\\",\\\"version\\\":\\\"20180228\\\",\\\"description\\\":\\\"Large structural variants in the MITOMAP human mitochondrial genome database\\\",\\\"releaseDate\\\":\\\"2018-02-28\\\"},{\\\"name\\\":\\\"ExAC\\\",\\\"version\\\":\\\"0.3.1\\\",\\\"description\\\":\\\"Gene scores from the ExAC project\\\",\\\"releaseDate\\\":\\\"2016-03-16\\\"},{\\\"name\\\":\\\"OMIM\\\",\\\"version\\\":\\\"20180213\\\",\\\"description\\\":\\\"An Online Catalog of Human Genes and Genetic Disorders\\\",\\\"releaseDate\\\":\\\"2018-02-13\\\"},{\\\"name\\\":\\\"phyloP\\\",\\\"version\\\":\\\"hg19\\\",\\\"description\\\":\\\"46 way conservation score between humans and 45 other vertebrates\\\",\\\"releaseDate\\\":\\\"2009-11-10\\\"}]}\", header);\r\n\r\n                var results =\r\n                    qp.ReadOverlappingJsonLines(Utilities.ParseQuery(\"1:9775924\"));\r\n                Assert.Single(results);\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void TestQueryMultiple()\r\n        {\r\n            var readStream = new BlockGZipStream(ResourceUtilities.GetReadStream(Resources.TopPath(\"cosmicv72.indels.json.gz\")), CompressionMode.Decompress);\r\n            var indexStream = ResourceUtilities.GetReadStream(Resources.TopPath(\"cosmicv72.indels.json.gz.jsi\"));\r\n\r\n            using (var qp = new QueryProcessor(FileUtilities.GetStreamReader(readStream), indexStream))\r\n            {\r\n                var results =\r\n                    qp.ReadOverlappingJsonLines(Utilities.ParseQuery(\"1:9775924-9778952\"));\r\n                Assert.Equal(3, results.Count());\r\n\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void TestQueryMultipleWithSkippingMiddleOne()\r\n        {\r\n            var readStream = new BlockGZipStream(ResourceUtilities.GetReadStream(Resources.TopPath(\"cosmicv72.indels.json.gz\")), CompressionMode.Decompress);\r\n            var indexStream = ResourceUtilities.GetReadStream(Resources.TopPath(\"cosmicv72.indels.json.gz.jsi\"));\r\n\r\n            using (var qp = new QueryProcessor(FileUtilities.GetStreamReader(readStream), indexStream))\r\n            {\r\n                var results =\r\n                    qp.ReadOverlappingJsonLines(Utilities.ParseQuery(\"1:27023180-27023190\"));\r\n                Assert.Equal(2, results.Count());\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void TestQueryChr1()\r\n        {\r\n            var readStream = new BlockGZipStream(ResourceUtilities.GetReadStream(Resources.TopPath(\"cosmicv72.indels.json.gz\")), CompressionMode.Decompress);\r\n            var indexStream = ResourceUtilities.GetReadStream(Resources.TopPath(\"cosmicv72.indels.json.gz.jsi\"));\r\n\r\n            using (var qp = new QueryProcessor(FileUtilities.GetStreamReader(readStream), indexStream))\r\n            {\r\n                var results =\r\n                    qp.ReadOverlappingJsonLines(Utilities.ParseQuery(\"1\"));\r\n\r\n                Assert.Equal(422, results.Count());\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void Query_onthefly_Ensembl_and_Ucsc()\r\n        {\r\n            var readStream = new BlockGZipStream(ResourceUtilities.GetReadStream(Resources.TopPath(\"Clinvar20150901.json.gz\")), CompressionMode.Decompress);\r\n            var indexStream = ResourceUtilities.GetReadStream(Resources.TopPath(\"Clinvar20150901.json.gz.jsi\"));\r\n\r\n            using (var qp = new QueryProcessor(FileUtilities.GetStreamReader(readStream), indexStream))\r\n            {\r\n                int ucscCount = qp.ProcessQuery(new[] {\"chr1\"});\r\n                int ensemblCount = qp.ProcessQuery(new[] { \"1\" });\r\n\r\n                Assert.Equal(13, ucscCount);\r\n                Assert.Equal(13, ensemblCount);\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void Query_with_header()\r\n        {\r\n            var readStream = new BlockGZipStream(ResourceUtilities.GetReadStream(Resources.TopPath(\"Clinvar20150901.json.gz\")), CompressionMode.Decompress);\r\n            var indexStream = ResourceUtilities.GetReadStream(Resources.TopPath(\"Clinvar20150901.json.gz.jsi\"));\r\n\r\n            using( var stream = new MemoryStream())\r\n            using (var writer = new StreamWriter(stream))\r\n            using (var qp = new QueryProcessor(FileUtilities.GetStreamReader(readStream), indexStream, writer))\r\n            {\r\n                qp.ProcessQuery(new[] {\"chr1\"}, true);\r\n                writer.Flush();\r\n                \r\n                var jsonString = System.Text.Encoding.UTF8.GetString(stream.ToArray(), 0, (int) stream.Length);\r\n                Assert.NotEmpty(jsonString);\r\n                var jObject = JObject.Parse(jsonString);\r\n                Assert.NotNull(jObject);\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void Report_overlapping_small_and_extending_large_variants()\r\n        {\r\n            var readStream = new BlockGZipStream(ResourceUtilities.GetReadStream(Resources.TopPath(\"JasixTest.json.gz\")), CompressionMode.Decompress);\r\n            var indexStream = ResourceUtilities.GetReadStream(Resources.TopPath(\"JasixTest.json.gz.jsi\"));\r\n\r\n            using (var qp = new QueryProcessor(FileUtilities.GetStreamReader(readStream), indexStream))\r\n            {\r\n                var results =\r\n                    qp.ReadOverlappingJsonLines(Utilities.ParseQuery(\"chr1:16378-17000\"));\r\n\r\n                Assert.Equal(3, results.Count());\r\n\r\n                results =\r\n                    qp.ReadJsonLinesExtendingInto(Utilities.ParseQuery(\"chr1:16378-17000\"));\r\n\r\n                Assert.Single(results);\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void Report_overlapping_small_and_extending_multiple_large_variants()\r\n        {\r\n            var readStream = new BlockGZipStream(ResourceUtilities.GetReadStream(Resources.TopPath(\"JasixTest.json.gz\")), CompressionMode.Decompress);\r\n            var indexStream = ResourceUtilities.GetReadStream(Resources.TopPath(\"JasixTest.json.gz.jsi\"));\r\n\r\n            using (var qp = new QueryProcessor(FileUtilities.GetStreamReader(readStream), indexStream))\r\n            {\r\n                var results =\r\n                    qp.ReadOverlappingJsonLines(Utilities.ParseQuery(\"chr1:19004-20000\"));\r\n\r\n                Assert.Equal(3, results.Count());\r\n\r\n                results =\r\n                    qp.ReadJsonLinesExtendingInto(Utilities.ParseQuery(\"chr1:19004-20000\"));\r\n\r\n                Assert.Equal(2, results.Count());\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void Report_overlapping_small_and_large_variants_starting_at_same_location()\r\n        {\r\n            var readStream = new BlockGZipStream(ResourceUtilities.GetReadStream(Resources.TopPath(\"JasixTest.json.gz\")), CompressionMode.Decompress);\r\n            var indexStream = ResourceUtilities.GetReadStream(Resources.TopPath(\"JasixTest.json.gz.jsi\"));\r\n\r\n            using (var qp = new QueryProcessor(FileUtilities.GetStreamReader(readStream), indexStream))\r\n            {\r\n                var results =\r\n                    qp.ReadOverlappingJsonLines(Utilities.ParseQuery(\"chr1:46993-50000\"));\r\n\r\n                Assert.Equal(5, results.Count());\r\n\r\n                results =\r\n                    qp.ReadJsonLinesExtendingInto(Utilities.ParseQuery(\"chr1:46993-50000\"));\r\n\r\n                Assert.Empty(results);\r\n            }\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/Jasix/OtfIndexCreatorTests.cs",
    "content": "﻿using System.IO;\nusing Jasix;\nusing Jasix.DataStructures;\nusing Moq;\nusing UnitTests.TestUtilities;\nusing VariantAnnotation.Interface.Positions;\nusing Xunit;\n\nnamespace UnitTests.Jasix\n{\n    public sealed class OtfIndexCreatorTests\n    {\n        [Fact]\n        public void Add_one_chrom()\n        {\n            var position1 = new Mock<IPosition>();\n            position1.SetupGet(x => x.Chromosome).Returns(ChromosomeUtilities.Chr1);\n            position1.SetupGet(x => x.Start).Returns(100);\n            position1.SetupGet(x => x.RefAllele).Returns(\"A\");\n            position1.SetupGet(x => x.AltAlleles).Returns(new []{\"C\"});\n\n            var memStream = new MemoryStream();\n            using (var indexCreator = new OnTheFlyIndexCreator(memStream))\n            {\n                indexCreator.BeginSection(\"positions\", 100);\n                indexCreator.Add(position1.Object, 2588);\n                indexCreator.EndSection(\"positions\",2699 );\n            }\n\n            var readStream = new MemoryStream(memStream.ToArray());\n            readStream.Seek(0, SeekOrigin.Begin);\n            var index = new JasixIndex(readStream);\n\n            Assert.Equal(100, index.GetSectionBegin(\"positions\"));\n            Assert.Equal(2588, index.GetFirstVariantPosition(\"chr1\", 100,102));\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/Jist/JiSTtests.cs",
    "content": "using System.Collections.Generic;\nusing System.IO;\nusing System.IO.Compression;\nusing System.Text;\nusing Compression.FileHandling;\nusing Genome;\nusing Jasix.DataStructures;\nusing Jist;\nusing Moq;\nusing Newtonsoft.Json.Linq;\nusing UnitTests.TestUtilities;\nusing VariantAnnotation.Interface;\nusing VariantAnnotation.Interface.Positions;\nusing VariantAnnotation.Interface.Providers;\nusing VariantAnnotation.IO;\nusing Xunit;\n\nnamespace UnitTests.Jist\n{\n    public sealed class JiSTtests\n    {\n        private const string NirvanaHeader = \"{\\\"header\\\":\\\"Jist test header\\\",\\\"positions\\\":[\\n\";\n        private const string NirvanaGenes = JsonStitcher.GeneHeaderLine;\n        private const string NirvanaFooter = JsonStitcher.FooterLine;\n\n        private static (Stream jsonStream, Stream jasixStream) GetJsonStreams(Chromosome chromosome, bool withGenes)\n        {\n            var jsonStream  = new MemoryStream();\n            var jasixStream = new MemoryStream();\n            var annotationResources = new Mock<IAnnotationResources>();\n            annotationResources.SetupGet(x => x.AnnotatorVersionTag).Returns(\"NirvanaTest\");\n            annotationResources.SetupGet(x => x.VepDataVersion).Returns(\"VEPTest\");\n            annotationResources.SetupGet(x => x.DataSourceVersions).Returns(new List<IDataSourceVersion>());\n            annotationResources.SetupGet(x => x.SequenceProvider.Assembly).Returns(GenomeAssembly.GRCh38);\n\n            using (var jsonWriter = new JsonWriter(new BlockGZipStream(jsonStream, CompressionMode.Compress, true), jasixStream, annotationResources.Object, \"2020-05-17\", null, true))\n            {\n                var position = new Mock<IPosition>();\n                position.SetupGet(x => x.Chromosome).Returns(chromosome);\n                \n                for (int i = 100 * (chromosome.Index+1); i < 123 *(chromosome.Index +1); i++)\n                {\n                    position.SetupGet(x => x.Start).Returns(i);\n                    position.SetupGet(x => x.RefAllele).Returns(\"A\");\n                    position.SetupGet(x => x.AltAlleles).Returns(new []{\"T\"});\n                    jsonWriter.WritePosition(position.Object, $\"{JsonObject.OpenBrace}\\\"chromosome\\\":\\\"{chromosome.UcscName}\\\",\\\"position\\\":{i}{JsonObject.CloseBrace}\");\n                }\n\n                if (withGenes)\n                {\n                    var geneEntries = new string[]\n                    {\n                        $\"{{\\\"gene{chromosome.EnsemblName}A\\\":\\\"gene annotation\\\"}}\",\n                        $\"{{\\\"gene{chromosome.EnsemblName}B\\\":\\\"gene annotation\\\"}}\"\n\n                    };\n                    jsonWriter.WriteGenes(geneEntries);\n\n                }\n            }\n\n            jsonStream.Position = 0;\n            jasixStream.Position = 0;\n            return (jsonStream, jasixStream);\n        }\n\n        [Fact]\n        public void All_jsons_with_genes()\n        {\n            var jsonStreams = new Stream[3];\n            var jasixSteams = new Stream[3];\n\n            (jsonStreams[0], jasixSteams[0]) = GetJsonStreams(ChromosomeUtilities.Chr1, true);\n            (jsonStreams[1], jasixSteams[1]) = GetJsonStreams(ChromosomeUtilities.Chr2, true);\n            (jsonStreams[2], jasixSteams[2]) = GetJsonStreams(ChromosomeUtilities.Chr3, true);\n\n            var outStream = new MemoryStream();\n            using (var stitcher = new JsonStitcher(jsonStreams, jasixSteams, outStream, true))\n            {\n                stitcher.Stitch();\n            }\n\n            outStream.Position = 0;\n            var sb = new StringBuilder();\n            using (var bgZipStream = new BlockGZipStream(outStream, CompressionMode.Decompress))\n            using (var reader = new StreamReader(bgZipStream))\n            {\n                string line;\n                while ((line = reader.ReadLine())!=null)\n                {\n                    sb.Append(line+'\\n');\n                }\n            }\n\n            var fullJson = sb.ToString();\n            //making sure all the first and last positions are present in the merged JSON\n            Assert.Contains(\"\\\"header\\\":{\\\"annotator\\\":\\\"NirvanaTest\\\"\", fullJson);\n            Assert.Contains(\"{\\\"chromosome\\\":\\\"chr1\\\",\\\"position\\\":100}\", fullJson);\n            Assert.Contains(\"{\\\"chromosome\\\":\\\"chr1\\\",\\\"position\\\":122}\", fullJson);\n            Assert.Contains(\"{\\\"chromosome\\\":\\\"chr2\\\",\\\"position\\\":200}\", fullJson);\n            Assert.Contains(\"{\\\"chromosome\\\":\\\"chr2\\\",\\\"position\\\":222}\", fullJson);\n            Assert.Contains(\"{\\\"chromosome\\\":\\\"chr3\\\",\\\"position\\\":300}\", fullJson);\n            Assert.Contains(\"{\\\"chromosome\\\":\\\"chr3\\\",\\\"position\\\":322}\", fullJson);\n            \n            //checking if all the genes are there\n            Assert.Contains(\"gene1A\", fullJson);\n            Assert.Contains(\"gene1B\", fullJson);\n            Assert.Contains(\"gene2A\", fullJson);\n            Assert.Contains(\"gene2B\", fullJson);\n            Assert.Contains(\"gene3A\", fullJson);\n            Assert.Contains(\"gene3B\", fullJson);\n\n            \n            //need to check if this is a valid json\n            var jObject = JObject.Parse(fullJson);\n            Assert.NotNull(jObject);\n        }\n        \n        [Fact]\n        public void Some_with_genes()\n        {\n            var jsonStreams = new Stream[3];\n            var jasixSteams = new Stream[3];\n\n            (jsonStreams[0], jasixSteams[0]) = GetJsonStreams(ChromosomeUtilities.Chr1, true);\n            (jsonStreams[1], jasixSteams[1]) = GetJsonStreams(ChromosomeUtilities.Chr2, false);\n            (jsonStreams[2], jasixSteams[2]) = GetJsonStreams(ChromosomeUtilities.Chr3, true);\n\n            var outStream = new MemoryStream();\n            using (var stitcher = new JsonStitcher(jsonStreams, jasixSteams, outStream, true))\n            {\n                stitcher.Stitch();\n            }\n\n            outStream.Position = 0;\n            var sb = new StringBuilder();\n            using (var bgZipStream = new BlockGZipStream(outStream, CompressionMode.Decompress))\n            using (var reader = new StreamReader(bgZipStream))\n            {\n                string line;\n                while ((line = reader.ReadLine())!=null)\n                {\n                    sb.Append(line+'\\n');\n                }\n            }\n\n            var fullJson = sb.ToString();\n            //making sure all the first and last positions are present in the merged JSON\n            Assert.Contains(\"\\\"header\\\":{\\\"annotator\\\":\\\"NirvanaTest\\\"\", fullJson);\n            Assert.Contains(\"{\\\"chromosome\\\":\\\"chr1\\\",\\\"position\\\":100}\", fullJson);\n            Assert.Contains(\"{\\\"chromosome\\\":\\\"chr1\\\",\\\"position\\\":122}\", fullJson);\n            Assert.Contains(\"{\\\"chromosome\\\":\\\"chr2\\\",\\\"position\\\":200}\", fullJson);\n            Assert.Contains(\"{\\\"chromosome\\\":\\\"chr2\\\",\\\"position\\\":222}\", fullJson);\n            Assert.Contains(\"{\\\"chromosome\\\":\\\"chr3\\\",\\\"position\\\":300}\", fullJson);\n            Assert.Contains(\"{\\\"chromosome\\\":\\\"chr3\\\",\\\"position\\\":322}\", fullJson);\n            \n            //checking if all the genes are there\n            Assert.Contains(\"gene1A\", fullJson);\n            Assert.Contains(\"gene1B\", fullJson);\n            Assert.DoesNotContain(\"gene2A\", fullJson);\n            Assert.DoesNotContain(\"gene2B\", fullJson);\n            Assert.Contains(\"gene3A\", fullJson);\n            Assert.Contains(\"gene3B\", fullJson);\n\n            \n            //need to check if this is a valid json\n            var jObject = JObject.Parse(fullJson);\n            Assert.NotNull(jObject);\n        }\n\n\n        [Fact]\n        public void All_jsons_without_genes()\n        {\n            var jsonStreams = new Stream[3];\n            var jasixSteams = new Stream[3];\n\n            (jsonStreams[0], jasixSteams[0]) = GetJsonStreams(ChromosomeUtilities.Chr1, false);\n            (jsonStreams[1], jasixSteams[1]) = GetJsonStreams(ChromosomeUtilities.Chr2, false);\n            (jsonStreams[2], jasixSteams[2]) = GetJsonStreams(ChromosomeUtilities.Chr3, false);\n\n            var outStream = new MemoryStream();\n            using (var stitcher = new JsonStitcher(jsonStreams, jasixSteams, outStream, true))\n            {\n                stitcher.Stitch();\n            }\n\n            outStream.Position = 0;\n            var sb = new StringBuilder();\n            using (var bgZipStream = new BlockGZipStream(outStream, CompressionMode.Decompress))\n            using (var reader = new StreamReader(bgZipStream))\n            {\n                string line;\n                while ((line = reader.ReadLine())!=null)\n                {\n                    sb.Append(line+'\\n');\n                }\n            }\n\n            var fullJson = sb.ToString();\n            //making sure all the first and last positions are present in the merged JSON\n            Assert.Contains(\"\\\"header\\\":{\\\"annotator\\\":\\\"NirvanaTest\\\"\", fullJson);\n            Assert.Contains(\"{\\\"chromosome\\\":\\\"chr1\\\",\\\"position\\\":100}\", fullJson);\n            Assert.Contains(\"{\\\"chromosome\\\":\\\"chr1\\\",\\\"position\\\":122}\", fullJson);\n            Assert.Contains(\"{\\\"chromosome\\\":\\\"chr2\\\",\\\"position\\\":200}\", fullJson);\n            Assert.Contains(\"{\\\"chromosome\\\":\\\"chr2\\\",\\\"position\\\":222}\", fullJson);\n            Assert.Contains(\"{\\\"chromosome\\\":\\\"chr3\\\",\\\"position\\\":300}\", fullJson);\n            Assert.Contains(\"{\\\"chromosome\\\":\\\"chr3\\\",\\\"position\\\":322}\", fullJson);\n            \n            //checking if all the genes are there\n            Assert.DoesNotContain(\"gene1A\", fullJson);\n            Assert.DoesNotContain(\"gene1B\", fullJson);\n            Assert.DoesNotContain(\"gene2A\", fullJson);\n            Assert.DoesNotContain(\"gene2B\", fullJson);\n            Assert.DoesNotContain(\"gene3A\", fullJson);\n            Assert.DoesNotContain(\"gene3B\", fullJson);\n\n            \n            //need to check if this is a valid json\n            var jObject = JObject.Parse(fullJson);\n            Assert.NotNull(jObject);\n        }\n\n        //The following tests don't use JsonWriter. They are intended to isolate issues that might be due to some \n        // error in the json writer. The following tests try to create the ideal json output.\n        private static (Stream jsonStream, Stream jasixStream) GetNirvanaJsonStream(int chromNumber)\n        {\n            var jsonStream = new MemoryStream();\n            var jasixStream = new MemoryStream();\n\n            using (var bgZipStream = new BlockGZipStream(jsonStream, CompressionMode.Compress, true))\n            using (var writer = new BgzipTextWriter(bgZipStream))\n            using(var jasixIndex = new JasixIndex())\n            {\n                writer.Write(NirvanaHeader);\n                writer.Flush();\n                jasixIndex.BeginSection(JasixCommons.PositionsSectionTag, writer.Position);\n                for (int i = 100*chromNumber; i < 123*chromNumber; i++)\n                {\n                    writer.WriteLine($\"{JsonObject.OpenBrace}\\\"chromosome\\\":\\\"chr{chromNumber}\\\",\\\"position\\\":{i}{JsonObject.CloseBrace},\");\n                    if(i%50==0) writer.Flush();//creating another block\n                }\n                writer.WriteLine($\"{JsonObject.OpenBrace}\\\"chromosome\\\":\\\"chr{chromNumber}\\\",\\\"position\\\":{100*chromNumber+25}{JsonObject.CloseBrace}\");\n                writer.Flush();\n                jasixIndex.EndSection(JasixCommons.PositionsSectionTag, writer.Position);\n                \n                writer.Write(NirvanaGenes);\n                writer.Flush();\n                \n                jasixIndex.BeginSection(JasixCommons.GenesSectionTag, writer.Position);\n                writer.WriteLine($\"{{\\\"gene{chromNumber}A\\\":\\\"gene annotation\\\"}},\");\n                writer.WriteLine($\"{{\\\"gene{chromNumber}B\\\":\\\"gene annotation\\\"}}\");\n                writer.Flush();\n                jasixIndex.EndSection(JasixCommons.GenesSectionTag, writer.Position);\n                writer.Write(NirvanaFooter);\n                jasixIndex.Write(jasixStream);\n            }\n\n            jsonStream.Position = 0;\n            jasixStream.Position = 0;\n            return (jsonStream, jasixStream);\n        }\n\n        [Fact]\n        public void EndToEndStitching()\n        {\n            var jsonStreams = new Stream[3];\n            var jasixSteams = new Stream[3];\n\n            (jsonStreams[0], jasixSteams[0]) = GetNirvanaJsonStream(1);\n            (jsonStreams[1], jasixSteams[1]) = GetNirvanaJsonStream(2);\n            (jsonStreams[2], jasixSteams[2]) = GetNirvanaJsonStream(3);\n\n            var outStream = new MemoryStream();\n            using (var stitcher = new JsonStitcher(jsonStreams, jasixSteams, outStream, true))\n            {\n                stitcher.Stitch();\n            }\n\n            outStream.Position = 0;\n            var sb = new StringBuilder();\n            using (var bgZipStream = new BlockGZipStream(outStream, CompressionMode.Decompress))\n            using (var reader = new StreamReader(bgZipStream))\n            {\n                string line;\n                while ((line = reader.ReadLine())!=null)\n                {\n                    sb.Append(line+'\\n');\n                }\n            }\n\n            var fullJson = sb.ToString();\n            //making sure all the first and last positions are present in the merged JSON\n            Assert.Contains(NirvanaHeader, fullJson);\n            Assert.Contains(\"{\\\"chromosome\\\":\\\"chr1\\\",\\\"position\\\":100}\", fullJson);\n            Assert.Contains(\"{\\\"chromosome\\\":\\\"chr1\\\",\\\"position\\\":125}\", fullJson);\n            Assert.Contains(\"{\\\"chromosome\\\":\\\"chr2\\\",\\\"position\\\":200}\", fullJson);\n            Assert.Contains(\"{\\\"chromosome\\\":\\\"chr2\\\",\\\"position\\\":225}\", fullJson);\n            Assert.Contains(\"{\\\"chromosome\\\":\\\"chr3\\\",\\\"position\\\":300}\", fullJson);\n            Assert.Contains(\"{\\\"chromosome\\\":\\\"chr3\\\",\\\"position\\\":325}\", fullJson);\n            \n            //checking if all the genes are there\n            Assert.Contains(\"gene1A\", fullJson);\n            Assert.Contains(\"gene1B\", fullJson);\n            Assert.Contains(\"gene2A\", fullJson);\n            Assert.Contains(\"gene2B\", fullJson);\n            Assert.Contains(\"gene3A\", fullJson);\n            Assert.Contains(\"gene3B\", fullJson);\n\n            \n            //need to check if this is a valid json\n            var jObject = JObject.Parse(fullJson);\n            Assert.NotNull(jObject);\n        }\n\n        private static (Stream jsonStream, Stream jasixStream) GetNirvanaJsonStreamWithoutGenes(int chromNumber)\n        {\n            var jsonStream  = new MemoryStream();\n            var jasixStream = new MemoryStream();\n\n            using (var bgZipStream = new BlockGZipStream(jsonStream, CompressionMode.Compress, true))\n            using (var writer = new BgzipTextWriter(bgZipStream))\n            using(var jasixIndex = new JasixIndex())\n            {\n                writer.Write(NirvanaHeader);\n                writer.Flush();\n                jasixIndex.BeginSection(JasixCommons.PositionsSectionTag, writer.Position);\n                for (int i = 100 *chromNumber; i < 123 *chromNumber; i++)\n                {\n                    writer.WriteLine($\"{JsonObject.OpenBrace}\\\"chromosome\\\":\\\"chr{chromNumber}\\\",\\\"position\\\":{i}{JsonObject.CloseBrace},\");\n                    if(i %50 ==0) writer.Flush();//creating another block\n                }\n                writer.WriteLine($\"{JsonObject.OpenBrace}\\\"chromosome\\\":\\\"chr{chromNumber}\\\",\\\"position\\\":{100 *chromNumber +25}{JsonObject.CloseBrace}\");\n                writer.Flush();\n                jasixIndex.EndSection(JasixCommons.PositionsSectionTag, writer.Position);\n                \n                writer.Write(NirvanaFooter);\n                jasixIndex.Write(jasixStream);\n            }\n\n            jsonStream.Position  = 0;\n            jasixStream.Position = 0;\n            return (jsonStream, jasixStream);\n        }\n        [Fact]\n        public void StitchingWithoutGenes()\n        {\n            var jsonStreams = new Stream[3];\n            var jasixSteams = new Stream[3];\n\n            (jsonStreams[0], jasixSteams[0]) = GetNirvanaJsonStream(1);\n            (jsonStreams[1], jasixSteams[1]) = GetNirvanaJsonStreamWithoutGenes(2);\n            (jsonStreams[2], jasixSteams[2]) = GetNirvanaJsonStream(3);\n\n            var outStream = new MemoryStream();\n            using (var stitcher = new JsonStitcher(jsonStreams, jasixSteams, outStream, true))\n            {\n                stitcher.Stitch();\n            }\n\n            outStream.Position = 0;\n            var sb = new StringBuilder();\n            using (var bgZipStream = new BlockGZipStream(outStream, CompressionMode.Decompress))\n            using (var reader = new StreamReader(bgZipStream))\n            {\n                string line;\n                while ((line = reader.ReadLine())!=null)\n                {\n                    sb.Append(line+'\\n');\n                }\n            }\n\n            var fullJson = sb.ToString();\n            //making sure all the first and last positions are present in the merged JSON\n            Assert.Contains(NirvanaHeader, fullJson);\n            Assert.Contains(\"{\\\"chromosome\\\":\\\"chr1\\\",\\\"position\\\":100}\", fullJson);\n            Assert.Contains(\"{\\\"chromosome\\\":\\\"chr1\\\",\\\"position\\\":125}\", fullJson);\n            Assert.Contains(\"{\\\"chromosome\\\":\\\"chr2\\\",\\\"position\\\":200}\", fullJson);\n            Assert.Contains(\"{\\\"chromosome\\\":\\\"chr2\\\",\\\"position\\\":225}\", fullJson);\n            Assert.Contains(\"{\\\"chromosome\\\":\\\"chr3\\\",\\\"position\\\":300}\", fullJson);\n            Assert.Contains(\"{\\\"chromosome\\\":\\\"chr3\\\",\\\"position\\\":325}\", fullJson);\n            \n            //checking if all the genes are there\n            Assert.Contains(\"gene1A\", fullJson);\n            Assert.Contains(\"gene1B\", fullJson);\n            Assert.Contains(\"gene3A\", fullJson);\n            Assert.Contains(\"gene3B\", fullJson);\n\n            \n            //need to check if this is a valid json\n            var jObject = JObject.Parse(fullJson);\n            Assert.NotNull(jObject);\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/MitoHeteroplasmy/MitoHeteroplasmyProviderTests.cs",
    "content": "﻿using MitoHeteroplasmy;\nusing UnitTests.TestUtilities;\nusing VariantAnnotation.Pools;\nusing Variants;\nusing Xunit;\n\nnamespace UnitTests.MitoHeteroplasmy\n{\n    public sealed class MitoHeteroplasmyProviderTests\n    {\n\n        private static MitoHeteroplasmyProvider GetProvider()\n        {\n            var provider = new MitoHeteroplasmyProvider();\n            provider.Add(1, \"C\", new[] { 0.123, 0.200, 0.301 }, new[] { 1, 3, 4 });\n            provider.Add(1, \"G\", new[] { 0.101, 0.201 }, new[] { 1, 2 });\n            provider.Add(2, \"T\", new[] { 0, 0.001, 0.002, 0.003 }, new[] { 134, 1111, 936, 203 });\n\n            return provider;\n        }\n\n        [Fact]\n        public void GetVrfPercentiles_AsExpected()\n        {\n            var provider = GetProvider();\n\n            var position = 1;\n\n            IVariant[] variants          = {\n                VariantPool.Get(ChromosomeUtilities.ChrM, position, position, \"N\", \"C\", VariantType.SNV,\n                    null, false, false, false, null, AnnotationBehavior.SmallVariants, false),\n                VariantPool.Get(ChromosomeUtilities.ChrM, position, position, \"N\", \"G\", VariantType.SNV,\n                    null, false, false, false, null, AnnotationBehavior.SmallVariants, false),\n                VariantPool.Get(ChromosomeUtilities.ChrM, position, position, \"N\", \"T\", VariantType.SNV,\n                    null, false, false, false, null, AnnotationBehavior.SmallVariants, false)\n            };\n            var percentilesSample = provider.GetVrfPercentiles(variants, new[] { 0.2, 0.15, 0.02 });\n\n            Assert.Equal(3, percentilesSample.Length);\n            Assert.True(percentilesSample[0].HasValue);\n            Assert.Equal(100 / 8.0, percentilesSample[0].Value, 3);\n            Assert.True(percentilesSample[1].HasValue);\n            Assert.Equal(100 / 3.0, percentilesSample[1].Value, 3);\n            Assert.Null(percentilesSample[2]);\n\n            foreach (IVariant variant in variants)\n            {\n                VariantPool.Return((Variant) variant);\n            }\n        }\n\n        [Fact]\n        public void GetVrfPercentiles_NullIfNoValue()\n        {\n            var provider = GetProvider();\n\n            var position = 1;\n            \n            IVariant[] variants = {\n                VariantPool.Get(ChromosomeUtilities.ChrM, position, position, \"N\", \"T\", VariantType.SNV,\n                    null, false, false, false, null, AnnotationBehavior.SmallVariants, false),\n                VariantPool.Get(ChromosomeUtilities.ChrM, position, position, \"N\", \"ACC\", VariantType.insertion,\n                    null, false, false, false, null, AnnotationBehavior.SmallVariants, false)\n            };\n            var percentiles = provider.GetVrfPercentiles(variants, new[] { 0.24, 0.12 });\n\n            Assert.Null(percentiles);\n            foreach (IVariant variant in variants)\n            {\n                VariantPool.Return((Variant) variant);\n            }\n        }\n\n        [Fact]\n        public void GetVrfPercentiles_ProperRounding()\n        {\n            var provider = GetProvider();\n            var position = 2;\n            \n            IVariant[] variants = {\n                VariantPool.Get(ChromosomeUtilities.ChrM, position, position, \"N\", \"T\", VariantType.SNV,\n                    null, false, false, false, null, AnnotationBehavior.SmallVariants, false)\n            };\n\n\n            var percentilesSample = provider.GetVrfPercentiles(variants, new[] { 0.0014 });\n            \n            Assert.Single(percentilesSample);\n            Assert.True(percentilesSample[0].HasValue);\n            Assert.Equal(52.22, percentilesSample[0].Value, 2);\n            foreach (IVariant variant in variants)\n            {\n                VariantPool.Return((Variant) variant);\n            }\n        }\n        [Fact]\n        public void GetVrfPercentiles_zero()\n        {\n            var provider   = GetProvider();\n            var position   = 1;\n            \n            IVariant[] variants = {\n                VariantPool.Get(ChromosomeUtilities.ChrM, position, position, \"N\", \"G\", VariantType.SNV,\n                    null, false, false, false, null, AnnotationBehavior.SmallVariants, false)\n            };\n\n            var percentilesSample = provider.GetVrfPercentiles(variants, new[] { 0.0034 });\n            \n            Assert.Single(percentilesSample);\n            Assert.True(percentilesSample[0].HasValue);\n            Assert.Equal(0, percentilesSample[0].Value, 2);\n            foreach (IVariant variant in variants)\n            {\n                VariantPool.Return((Variant) variant);\n            }\n        }\n        \n        [Fact]\n        public void GetVrfPercentiles_100()\n        {\n            var provider   = GetProvider();\n            var position   = 2;\n\n            IVariant[] variants = {\n                VariantPool.Get(ChromosomeUtilities.ChrM, position, position, \"N\", \"T\", VariantType.SNV,\n                    null, false, false, false, null, AnnotationBehavior.SmallVariants, false)\n            };\n            \n            var percentilesSample = provider.GetVrfPercentiles(variants, new[] { 0.0034 });\n            \n            Assert.Single(percentilesSample);\n            Assert.True(percentilesSample[0].HasValue);\n            Assert.Equal(100, percentilesSample[0].Value, 2);\n            foreach (IVariant variant in variants)\n            {\n                VariantPool.Return((Variant) variant);\n            }\n        }\n\n        [Fact]\n        public void CapVrf()\n        {\n            var provider = new MitoHeteroplasmyProvider();\n            provider.Add(750, \"G\", new[] { 0.0,0.001,0.002,0.991,0.994,0.995,0.996,0.997,0.998,0.999 }, new[] { 24,4,2,3,2,1,1,4,3,2460});\n            var position   = 750;\n            \n            IVariant[] variants = {\n                VariantPool.Get(ChromosomeUtilities.ChrM, position, position, \"N\", \"G\", VariantType.SNV,\n                    null, false, false, false, null, AnnotationBehavior.SmallVariants, false)\n            };\n            var percentilesSample = provider.GetVrfPercentiles(variants, new[] { 1.0 });\n            \n            Assert.Single(percentilesSample);\n            Assert.True(percentilesSample[0].HasValue);\n            Assert.Equal(1.76, percentilesSample[0].Value, 2);\n            foreach (IVariant variant in variants)\n            {\n                VariantPool.Return((Variant) variant);\n            }\n        }\n    }\n}\n"
  },
  {
    "path": "UnitTests/MockedData/Genes.cs",
    "content": "﻿using UnitTests.TestUtilities;\nusing VariantAnnotation.AnnotatedPositions.Transcript;\nusing VariantAnnotation.Caches.DataStructures;\n\n// ReSharper disable InconsistentNaming\nnamespace UnitTests.MockedData\n{\n    public static class Genes\n    {\n        public static readonly Gene MED8 =\n            new(ChromosomeUtilities.Chr1, 43383908, 43389812, true, \"MED8\", 19971, CompactId.Convert(\"112950\"), CompactId.Convert(\"ENSG00000159479\"));\n\n        public static readonly Gene SAMD13 =\n            new(ChromosomeUtilities.Chr1, 84298366, 84389957, false, \"SAMD13\", 24582, CompactId.Convert(\"148418\"), CompactId.Convert(\n                \"ENSG00000203943\"));\n\n        public static readonly Gene POTEI =\n            new(ChromosomeUtilities.Chr2, 130459455, 131626428, true, \"POTEI\", 37093, CompactId.Convert(\"653269\"), CompactId.Convert(\n                \"ENSG00000196834\"));\n\n        public static readonly Gene PTPN18 =\n            new(ChromosomeUtilities.Chr2, 130356007, 130375409, false, \"PTPN18\", 9649, CompactId.Convert(\"26469\"), CompactId.Convert(\n                \"ENSG00000072135\"));\n\n        public static readonly Gene AL078459_1 =\n            new(ChromosomeUtilities.Chr1, 85276715, 85448124, false, \"AL078459.1\", -1, CompactId.Empty, CompactId.Convert(\"ENSG00000223653\"));\n    }\n}"
  },
  {
    "path": "UnitTests/MockedData/TranscriptRegions.cs",
    "content": "﻿using VariantAnnotation.Caches.DataStructures;\nusing VariantAnnotation.Interface.AnnotatedPositions;\n\n// ReSharper disable InconsistentNaming\nnamespace UnitTests.MockedData\n{\n    public static class TranscriptRegions\n    {\n        public static readonly ITranscriptRegion[] ENST00000290663 =\n        {\n            new TranscriptRegion(TranscriptRegionType.Exon,   8, 43383917, 43384552, 848, 1483),\n            new TranscriptRegion(TranscriptRegionType.Intron, 7, 43384553, 43385045, 847, 848),\n            new TranscriptRegion(TranscriptRegionType.Exon,   7, 43385046, 43385106, 787, 847),\n            new TranscriptRegion(TranscriptRegionType.Intron, 6, 43385107, 43385977, 786, 787),\n            new TranscriptRegion(TranscriptRegionType.Exon,   6, 43385978, 43386226, 538, 786),\n            new TranscriptRegion(TranscriptRegionType.Intron, 5, 43386227, 43386588, 537, 538),\n            new TranscriptRegion(TranscriptRegionType.Exon,   5, 43386589, 43386670, 456, 537),\n            new TranscriptRegion(TranscriptRegionType.Intron, 4, 43386671, 43386857, 455, 456),\n            new TranscriptRegion(TranscriptRegionType.Exon,   4, 43386858, 43386998, 315, 455),\n            new TranscriptRegion(TranscriptRegionType.Intron, 3, 43386999, 43387502, 314, 315),\n            new TranscriptRegion(TranscriptRegionType.Exon,   3, 43387503, 43387647, 170, 314),\n            new TranscriptRegion(TranscriptRegionType.Intron, 2, 43387648, 43388309, 169, 170),\n            new TranscriptRegion(TranscriptRegionType.Exon,   2, 43388310, 43388428, 51,  169),\n            new TranscriptRegion(TranscriptRegionType.Intron, 1, 43388429, 43389758, 50,  51),\n            new TranscriptRegion(TranscriptRegionType.Exon,   1, 43389759, 43389808, 1,   50)\n        };\n\n        public static readonly ITranscriptRegion[] ENST00000370673 =\n        {\n            new TranscriptRegion(TranscriptRegionType.Exon,   1, 84298366, 84298567, 1,   202),\n            new TranscriptRegion(TranscriptRegionType.Intron, 1, 84298568, 84303202, 202, 203),\n            new TranscriptRegion(TranscriptRegionType.Exon,   2, 84303203, 84303287, 203, 287),\n            new TranscriptRegion(TranscriptRegionType.Intron, 2, 84303288, 84325636, 287, 288),\n            new TranscriptRegion(TranscriptRegionType.Exon,   3, 84325637, 84325748, 288, 399),\n            new TranscriptRegion(TranscriptRegionType.Intron, 3, 84325749, 84349630, 399, 400),\n            new TranscriptRegion(TranscriptRegionType.Exon,   4, 84349631, 84350798, 400, 1567)\n        };\n\n        public static readonly ITranscriptRegion[] ENST00000615053 =\n        {\n            new TranscriptRegion(TranscriptRegionType.Exon,   13, 130463799, 130464144, 1581, 1926),\n            new TranscriptRegion(TranscriptRegionType.Intron, 12, 130464145, 130465651, 1580, 1581),\n            new TranscriptRegion(TranscriptRegionType.Exon,   12, 130465652, 130465664, 1568, 1580),\n            new TranscriptRegion(TranscriptRegionType.Intron, 11, 130465665, 130465666, 1567, 1568),\n            new TranscriptRegion(TranscriptRegionType.Exon,   11, 130465667, 130465772, 1462, 1567),\n            new TranscriptRegion(TranscriptRegionType.Intron, 10, 130465773, 130474377, 1461, 1462),\n            new TranscriptRegion(TranscriptRegionType.Exon,   10, 130474378, 130474534, 1305, 1461),\n            new TranscriptRegion(TranscriptRegionType.Intron, 9,  130474535, 130488188, 1304, 1305),\n            new TranscriptRegion(TranscriptRegionType.Exon,   9,  130488189, 130488201, 1292, 1304),\n            new TranscriptRegion(TranscriptRegionType.Intron, 8,  130488202, 130489237, 1291, 1292),\n            new TranscriptRegion(TranscriptRegionType.Exon,   8,  130489238, 130489279, 1250, 1291),\n            new TranscriptRegion(TranscriptRegionType.Intron, 7,  130489280, 130490669, 1249, 1250),\n            new TranscriptRegion(TranscriptRegionType.Exon,   7,  130490670, 130490740, 1179, 1249),\n            new TranscriptRegion(TranscriptRegionType.Intron, 6,  130490741, 130496551, 1178, 1179),\n            new TranscriptRegion(TranscriptRegionType.Exon,   6,  130496552, 130496622, 1108, 1178),\n            new TranscriptRegion(TranscriptRegionType.Intron, 5,  130496623, 130499083, 1107, 1108),\n            new TranscriptRegion(TranscriptRegionType.Exon,   5,  130499084, 130499221, 970,  1107),\n            new TranscriptRegion(TranscriptRegionType.Intron, 4,  130499222, 130500535, 969,  970),\n            new TranscriptRegion(TranscriptRegionType.Exon,   4,  130500536, 130500642, 863,  969),\n            new TranscriptRegion(TranscriptRegionType.Intron, 3,  130500643, 130503445, 862,  863),\n            new TranscriptRegion(TranscriptRegionType.Exon,   3,  130503446, 130503619, 689,  862),\n            new TranscriptRegion(TranscriptRegionType.Intron, 2,  130503620, 130503779, 688,  689),\n            new TranscriptRegion(TranscriptRegionType.Exon,   2,  130503780, 130503894, 574,  688),\n            new TranscriptRegion(TranscriptRegionType.Intron, 1,  130503895, 130508714, 573,  574),\n            new TranscriptRegion(TranscriptRegionType.Exon,   1,  130508715, 130509287, 1,    573)\n        };\n\n        public static readonly ITranscriptRegion[] ENST00000347849 =\n        {\n            new TranscriptRegion(TranscriptRegionType.Exon,   1,  130356045, 130356200, 1,    156),\n            new TranscriptRegion(TranscriptRegionType.Intron, 1,  130356201, 130369132, 156,  157),\n            new TranscriptRegion(TranscriptRegionType.Exon,   2,  130369133, 130369201, 157,  225),\n            new TranscriptRegion(TranscriptRegionType.Intron, 2,  130369202, 130369764, 225,  226),\n            new TranscriptRegion(TranscriptRegionType.Exon,   3,  130369765, 130369827, 226,  288),\n            new TranscriptRegion(TranscriptRegionType.Intron, 3,  130369828, 130370047, 288,  289),\n            new TranscriptRegion(TranscriptRegionType.Exon,   4,  130370048, 130370190, 289,  431),\n            new TranscriptRegion(TranscriptRegionType.Intron, 4,  130370191, 130370556, 431,  432),\n            new TranscriptRegion(TranscriptRegionType.Exon,   5,  130370557, 130370623, 432,  498),\n            new TranscriptRegion(TranscriptRegionType.Intron, 5,  130370624, 130370704, 498,  499),\n            new TranscriptRegion(TranscriptRegionType.Exon,   6,  130370705, 130370782, 499,  576),\n            new TranscriptRegion(TranscriptRegionType.Intron, 6,  130370783, 130370874, 576,  577),\n            new TranscriptRegion(TranscriptRegionType.Exon,   7,  130370875, 130370964, 577,  666),\n            new TranscriptRegion(TranscriptRegionType.Intron, 7,  130370965, 130371198, 666,  667),\n            new TranscriptRegion(TranscriptRegionType.Exon,   8,  130371199, 130371287, 667,  755),\n            new TranscriptRegion(TranscriptRegionType.Intron, 8,  130371288, 130372256, 755,  756),\n            new TranscriptRegion(TranscriptRegionType.Exon,   9,  130372257, 130372483, 756,  982),\n            new TranscriptRegion(TranscriptRegionType.Intron, 9,  130372484, 130372872, 982,  983),\n            new TranscriptRegion(TranscriptRegionType.Exon,   10, 130372873, 130372947, 983,  1057),\n            new TranscriptRegion(TranscriptRegionType.Intron, 10, 130372948, 130373156, 1057, 1058),\n            new TranscriptRegion(TranscriptRegionType.Exon,   11, 130373157, 130374571, 1058, 2472)\n        };\n\n        public static readonly ITranscriptRegion[] ENST00000427819 =\n        {\n            new TranscriptRegion(TranscriptRegionType.Exon,   1, 85276715, 85276797, 1,   83),\n            new TranscriptRegion(TranscriptRegionType.Intron, 1, 85276798, 85277640, 83,  84),\n            new TranscriptRegion(TranscriptRegionType.Exon,   2, 85277641, 85277738, 84,  181),\n            new TranscriptRegion(TranscriptRegionType.Intron, 2, 85277739, 85376765, 181, 182),\n            new TranscriptRegion(TranscriptRegionType.Exon,   3, 85376766, 85376835, 182, 251),\n            new TranscriptRegion(TranscriptRegionType.Intron, 3, 85376836, 85380373, 251, 252),\n            new TranscriptRegion(TranscriptRegionType.Exon,   4, 85380374, 85380565, 252, 443),\n            new TranscriptRegion(TranscriptRegionType.Intron, 4, 85380566, 85398456, 443, 444),\n            new TranscriptRegion(TranscriptRegionType.Exon,   5, 85398457, 85399963, 444, 1950)\n        };\n    }\n}"
  },
  {
    "path": "UnitTests/MockedData/Transcripts.cs",
    "content": "﻿using UnitTests.TestUtilities;\nusing VariantAnnotation.AnnotatedPositions.Transcript;\nusing VariantAnnotation.Caches.DataStructures;\nusing VariantAnnotation.Interface.AnnotatedPositions;\n\n// ReSharper disable InconsistentNaming\nnamespace UnitTests.MockedData\n{\n    public static class Transcripts\n    {\n        // the following 5 transcripts were chosen to stress test our gene fusions:\n        public static readonly ITranscript ENST00000290663 = new Transcript(ChromosomeUtilities.Chr1, 43383917, 43389808,\n            CompactId.Convert(\"ENST00000290663\", 10), Translations.ENST00000290663, BioType.protein_coding, Genes.MED8, 1483, 0, true,\n            TranscriptRegions.ENST00000290663, 8, null, 0, 0, Source.Ensembl, false, false, null, null);\n        \n        public static readonly ITranscript ENST00000370673 = new Transcript(ChromosomeUtilities.Chr1, 84298366, 84350798,\n            CompactId.Convert(\"ENST00000370673\", 7), Translations.ENST00000370673, BioType.protein_coding, Genes.SAMD13, 1567, 0, false,\n            TranscriptRegions.ENST00000370673, 4, null, 0, 0, Source.Ensembl, false, false, null, null);\n        \n        public static readonly ITranscript ENST00000615053 = new Transcript(ChromosomeUtilities.Chr2, 130463799, 130509287,\n            CompactId.Convert(\"ENST00000615053\", 3), Translations.ENST00000615053, BioType.protein_coding, Genes.POTEI, 1926, 0, false,\n            TranscriptRegions.ENST00000615053, 13, null, 0, 0, Source.Ensembl, false, false, null, null);\n        \n        public static readonly ITranscript ENST00000347849 = new Transcript(ChromosomeUtilities.Chr2, 130356045, 130374571,\n            CompactId.Convert(\"ENST00000347849\", 7), Translations.ENST00000347849, BioType.protein_coding, Genes.PTPN18, 2472, 0, false,\n            TranscriptRegions.ENST00000347849, 11, null, 0, 0, Source.Ensembl, false, false, null, null);\n        \n        // antisense RNA\n        public static readonly ITranscript ENST00000427819 = new Transcript(ChromosomeUtilities.Chr1, 85276715, 85399963,\n            CompactId.Convert(\"ENST00000427819\", 5), null, BioType.antisense_RNA, Genes.AL078459_1, 1950, 0, false,\n            TranscriptRegions.ENST00000427819, 5, null, 0, 0, Source.Ensembl, false, false, null, null);\n    }\n}"
  },
  {
    "path": "UnitTests/MockedData/Translations.cs",
    "content": "﻿using VariantAnnotation.AnnotatedPositions.Transcript;\nusing VariantAnnotation.Caches.DataStructures;\nusing VariantAnnotation.Interface.AnnotatedPositions;\n\n// ReSharper disable InconsistentNaming\nnamespace UnitTests.MockedData\n{\n    public static class Translations\n    {\n        public static readonly ITranslation ENST00000290663 = new Translation(new CodingRegion(43384450, 43389764, 45, 950, 906),\n            CompactId.Convert(\"ENSP00000290663\", 6),\n            \"MQREEKQLEASLDALLSQVADLKNSLGSFICKLENEYGRLTWPSVLDSFALLSGQLNTLNKVLKHEKTPLFRNQVIIPLVLSPDRDEDLMRQTEGRVPVFSHEVVPDHLRTKPDPEVEEQEKQLTTDAARIGADAAQKQIQSLNKMCSNLLEKISKEERESESGGLRPNKQTFNPTDTNALVAAVAFGKGLSNWRPSGSSGPGQAGQPGAGTILAGTSGLQQVQMAGAPSQQQPMLSGVQMAQAGQPGKMPSGIKTNIKSASMHPYQRPSCLGFILAIPLRRKVKKLLGQEGKKNAHLQLW\");\n\n        public static readonly ITranslation ENST00000370673 = new Translation(new CodingRegion(84298558, 84349774, 193, 543, 351),\n            CompactId.Convert(\"ENSP00000359707\", 3),\n            \"MRGVAEVKEPCSLPMLSVDMENKENGSVGVKNSMENGRPPDPADWAVMDVVNYFRTVGFEEQASAFQEQEIDGKSLLLMTRNDVLTGLQLKLGPALKIYEYHVKPLQTKHLKNNSS\");\n\n        public static readonly ITranslation ENST00000615053 = new Translation(new CodingRegion(130465653, 130509235, 53, 1579, 1527),\n            CompactId.Convert(\"ENSP00000483193\", 1),\n            \"MVAEVDSMPAASSVKKPFVLRSKMGKWCRHCFPCCRGSGKSNVGTSGDQDDSTMKTLRSKMGKWCCHCFPCCRGSGKSNVGTSGDHDDSAMKTLRSKMGKWCCHCFPCCRGSGKSNVGAWGDYDDSAFVEPRYHVRREDLDKLHRAAWWGKVARKDLIVMLRDTDVNKQDKQKRTALHLASANGNSGVVKLLLDRRCQLNVLDNKKRTALTKAVQCQEDECALMLLEHGTDPNIPDEYGNTTLHYAIYNEDKLMAKALLLYGADIESKNKHGLTPLLLGVHEQKQQVVKFLIKKKANLNALDRYGRTALILAVCCGSASIVSLLLEQNIDVSSQDLSGQTAREYAVSSHHHVICQLLSDYKEKQMLKISSENSNPEQDLKLTSEEESQRFKGSENSQPEKMSQEPEINKDGDRKVEEEMKKHGSTHVGFPENLTNGATAGNGDDGLIPPRKSRTPESQQFPDTENEEYHSDEQNDTQKQFCEEQNTGILHDEILIHEEKQIEVVENEF\");\n\n        public static readonly ITranslation ENST00000347849 = new Translation(new CodingRegion(130356108, 130373224, 64, 1125, 1062),\n            CompactId.Convert(\"ENSP00000310092\", 5),\n            \"MSRSLDSARSFLERLEARGGREGAVLAGEFSKRCERYWAQEQEPLQTGLFCITLIKEKWLNEDIMLRTLKVTFQKESRSVYQLQYMSWPDRGVPSSPDHMLAMVEEARRLQGSGPEPLCVHCSAGCGRTGVLCTVDYVRQLLLTQMIPPDFSLFDVVLKMRKQRPAAVQTEEQYRFLYHTVAQMFCSTLQNASPHYQNIKENCAPLYDDALFLRTPQALLAIPRPPGGVLRSISVPGSPGHAMADTYAVVQKRGAPAGAGSGTQTGTGTGTGARSAEEAPLYSKVTPRAQRPGAHAEDARGTLPGRVPADQSPAGSGAYEDVAGGAQTGGLGFNLRIGRPKGPRDPPAEWTRV\");\n    }\n}"
  },
  {
    "path": "UnitTests/Nirvana/AnnotationFilesTests.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Linq;\r\nusing Nirvana;\r\nusing UnitTests.TestUtilities;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Nirvana\r\n{\r\n    public sealed class AnnotationFilesTests\r\n    {\r\n        [Fact]\r\n        public void GetFiles_FromDirectory_AsExpected()\r\n        {\r\n            var files = new AnnotationFiles();\r\n            var saDirectory = Resources.MockSaFiles;\r\n            files.AddFiles(saDirectory);\r\n\r\n            var expectedNsaFiles = new List<(string, string)>\r\n            {\r\n                (Path.Combine(saDirectory, \"sa1.nsa\"), Path.Combine(saDirectory, \"sa1.nsa.idx\")),\r\n                (Path.Combine(saDirectory, \"sa2.nsa\"), Path.Combine(saDirectory, \"sa2.nsa.idx\"))\r\n            };\r\n\r\n            var expectedNsiFiles = new List<string>\r\n            {\r\n                Path.Combine(saDirectory, \"sa3.nsi\"),\r\n                Path.Combine(saDirectory, \"sa4.nsi\")\r\n            };\r\n\r\n            var expectedConservationFile = (Path.Combine(saDirectory, \"sa5.npd\"), Path.Combine(saDirectory, \"sa5.npd.idx\"));\r\n\r\n            var expectedNgaFiles = new List<string>\r\n            {\r\n                Path.Combine(saDirectory, \"sa6.nga\"),\r\n                Path.Combine(saDirectory, \"sa7.nga\")\r\n            };\r\n\r\n            var expectedRefMinorFile = (Path.Combine(saDirectory, \"sa8.rma\"), Path.Combine(saDirectory, \"sa8.rma.idx\"));\r\n\r\n            Assert.Equal(expectedNsaFiles, files.NsaFiles.OrderBy(x=> x.Nsa));\r\n            Assert.Equal(expectedNsiFiles, files.NsiFiles.OrderBy(x=>x));\r\n            Assert.Equal(expectedConservationFile, files.PhylopFile);\r\n            Assert.Equal(expectedNgaFiles, files.NgaFiles.OrderBy(x=>x));\r\n            Assert.Equal(expectedRefMinorFile, files.RefMinorFile);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetFiles_FromDirectoryNoSa_NoFileAdded()\r\n        {\r\n            var files = new AnnotationFiles();\r\n            files.AddFiles(\".\");\r\n\r\n            Assert.Empty(files.NsaFiles);\r\n            Assert.Empty(files.NsiFiles);\r\n            Assert.Empty(files.NgaFiles);\r\n            Assert.Equal(default, files.PhylopFile);\r\n            Assert.Equal(default, files.RefMinorFile);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/Nirvana/PreLoadUtilitiesTests.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing Genome;\r\nusing Nirvana;\r\nusing UnitTests.SAUtils.InputFileParsers;\r\nusing UnitTests.TestUtilities;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Nirvana\r\n{\r\n    public sealed class PreLoadUtilitiesTests\r\n    {\r\n        private static Stream GetVcfStream()\r\n        {\r\n            var stream = new MemoryStream();\r\n            var writer = new StreamWriter(stream);\r\n\r\n            writer.WriteLine(\"##dbSNP\");\r\n            writer.WriteLine(\"#CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\");\r\n            writer.WriteLine(\"1\\t10019\\trs775809821\\tTA\\tT\\t.\\t.\\tRS=775809821;RSPOS=10020;dbSNPBuildID=144;SSR=0;SAO=0;VP=0x050000020005000002000200;GENEINFO=DDX11L1:100287102;WGT=1;VC=DIV;R5;ASP\");\r\n            writer.WriteLine(\"1\\t10285\\trs866375379\\tT\\tA,C\\t.\\t.\\tRS=866375379;RSPOS=10285;dbSNPBuildID=147;SSR=0;SAO=0;VP=0x050100020005000002000100;GENEINFO=DDX11L1:100287102;WGT=1;VC=SNV;SLO;R5;ASP\");\r\n            writer.WriteLine(\"1\\t10329\\trs150969722\\tAC\\tA\\t.\\t.\\tRS=150969722;RSPOS=10330;dbSNPBuildID=134;SSR=0;SAO=0;VP=0x050000020005000002000200;GENEINFO=DDX11L1:100287102;WGT=1;VC=DIV;R5;ASP\");\r\n            writer.WriteLine(\"2\\t10019\\trs775809821\\tTA\\tT\\t.\\t.\\tRS=775809821;RSPOS=10020;dbSNPBuildID=144;SSR=0;SAO=0;VP=0x050000020005000002000200;GENEINFO=DDX11L1:100287102;WGT=1;VC=DIV;R5;ASP\");\r\n            writer.WriteLine(\"2\\t10285\\trs866375379\\tT\\tA,C\\t.\\t.\\tRS=866375379;RSPOS=10285;dbSNPBuildID=147;SSR=0;SAO=0;VP=0x050100020005000002000100;GENEINFO=DDX11L1:100287102;WGT=1;VC=SNV;SLO;R5;ASP\");\r\n            writer.WriteLine(\"2\\t10329\\trs150969722\\tAC\\tA\\t.\\t.\\tRS=150969722;RSPOS=10330;dbSNPBuildID=134;SSR=0;SAO=0;VP=0x050000020005000002000200;GENEINFO=DDX11L1:100287102;WGT=1;VC=DIV;R5;ASP\");\r\n\r\n            writer.Flush();\r\n\r\n            stream.Position = 0;\r\n            return stream;\r\n        }\r\n\r\n        [Fact]\r\n        public void GetAllPositions()\r\n        {\r\n            //we only need the sequence provider for variant rotation. \r\n            var seqProvider = ParserTestUtils.GetSequenceProvider(10329, \"AC\", 'A', ChromosomeUtilities.RefNameToChromosome);\r\n            (var positions, _) = PreLoadUtilities.GetPositions(GetVcfStream(), null, seqProvider, null);\r\n\r\n            Assert.Equal(2, positions.Count);\r\n            Assert.Equal(4, positions[ChromosomeUtilities.Chr1].Count);\r\n            Assert.Equal(4, positions[ChromosomeUtilities.Chr2].Count);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetPositions_inRange()\r\n        {\r\n            var annotationRange = new GenomicRange(new GenomicPosition(ChromosomeUtilities.Chr1, 10019), new GenomicPosition(ChromosomeUtilities.Chr1, 10290));\r\n            var seqProvider = ParserTestUtils.GetSequenceProvider(10329, \"AC\", 'A', ChromosomeUtilities.RefNameToChromosome);\r\n            (var positions, _) = PreLoadUtilities.GetPositions(GetVcfStream(), annotationRange, seqProvider, null);\r\n\r\n            Assert.Single(positions);\r\n            Assert.Equal(3, positions[ChromosomeUtilities.Chr1].Count);\r\n        }\r\n\r\n        private static Stream GetRefMinorVcfStream()\r\n        {\r\n            var stream = new MemoryStream();\r\n            var writer = new StreamWriter(stream);\r\n\r\n            writer.WriteLine(\"##dbSNP\");\r\n            writer.WriteLine(\"#CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\");\r\n            writer.WriteLine(\"1\\t10019\\trs775809821\\tTA\\tT\\t.\\t.\\tRS=775809821;RSPOS=10020;dbSNPBuildID=144;SSR=0;SAO=0;VP=0x050000020005000002000200;GENEINFO=DDX11L1:100287102;WGT=1;VC=DIV;R5;ASP\");\r\n            //ref minor position\r\n            writer.WriteLine(\"1\\t10275\\trs866375379\\tT\\t.\\t.\\t.\\tRS=866375379;RSPOS=10285;dbSNPBuildID=147;SSR=0;SAO=0;VP=0x050100020005000002000100;GENEINFO=DDX11L1:100287102;WGT=1;VC=SNV;SLO;R5;ASP\");\r\n            writer.WriteLine(\"1\\t10285\\trs866375379\\tT\\tA,C\\t.\\t.\\tRS=866375379;RSPOS=10285;dbSNPBuildID=147;SSR=0;SAO=0;VP=0x050100020005000002000100;GENEINFO=DDX11L1:100287102;WGT=1;VC=SNV;SLO;R5;ASP\");\r\n            //ref position. not ref minor\r\n            writer.WriteLine(\"1\\t10289\\trs866375379\\tT\\t.\\t.\\t.\\tRS=866375379;RSPOS=10285;dbSNPBuildID=147;SSR=0;SAO=0;VP=0x050100020005000002000100;GENEINFO=DDX11L1:100287102;WGT=1;VC=SNV;SLO;R5;ASP\");\r\n            writer.WriteLine(\"1\\t10329\\trs150969722\\tAC\\tA\\t.\\t.\\tRS=150969722;RSPOS=10330;dbSNPBuildID=134;SSR=0;SAO=0;VP=0x050000020005000002000200;GENEINFO=DDX11L1:100287102;WGT=1;VC=DIV;R5;ASP\");\r\n            writer.WriteLine(\"2\\t10019\\trs775809821\\tTA\\tT\\t.\\t.\\tRS=775809821;RSPOS=10020;dbSNPBuildID=144;SSR=0;SAO=0;VP=0x050000020005000002000200;GENEINFO=DDX11L1:100287102;WGT=1;VC=DIV;R5;ASP\");\r\n            writer.WriteLine(\"2\\t10285\\trs866375379\\tT\\tA,C\\t.\\t.\\tRS=866375379;RSPOS=10285;dbSNPBuildID=147;SSR=0;SAO=0;VP=0x050100020005000002000100;GENEINFO=DDX11L1:100287102;WGT=1;VC=SNV;SLO;R5;ASP\");\r\n            writer.WriteLine(\"2\\t10329\\trs150969722\\tAC\\tA\\t.\\t.\\tRS=150969722;RSPOS=10330;dbSNPBuildID=134;SSR=0;SAO=0;VP=0x050000020005000002000200;GENEINFO=DDX11L1:100287102;WGT=1;VC=DIV;R5;ASP\");\r\n\r\n            writer.Flush();\r\n\r\n            stream.Position = 0;\r\n            return stream;\r\n        }\r\n\r\n        [Fact]\r\n        public void GetAllPositions_skip_refs()\r\n        {\r\n            //we only need the sequence provider for variant rotation. \r\n            var seqProvider = ParserTestUtils.GetSequenceProvider(10329, \"AC\", 'A', ChromosomeUtilities.RefNameToChromosome);\r\n            var refMinorProvider = ParserTestUtils.GetRefMinorProvider(\r\n                new List<(Chromosome chrom, int position, string globalMinor)>\r\n                {\r\n                    (ChromosomeUtilities.Chr1, 10275, \"A\" )\r\n                }\r\n            );\r\n            (var positions, _) = PreLoadUtilities.GetPositions(GetRefMinorVcfStream(), null, seqProvider, refMinorProvider);\r\n\r\n            Assert.Equal(2, positions.Count);\r\n            Assert.Equal(5, positions[ChromosomeUtilities.Chr1].Count);\r\n            Assert.Equal(4, positions[ChromosomeUtilities.Chr2].Count);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/Nirvana/ProviderUtilitiesTests.cs",
    "content": "﻿using Nirvana;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Nirvana\r\n{\r\n    public sealed class ProviderUtilitiesTests\r\n    {\r\n        [Fact]\r\n        public void GetNsaProvider_NoSaFile_ReturnNull()\r\n        {\r\n            var annotationFiles = new AnnotationFiles();\r\n            var nsaProvider = ProviderUtilities.GetNsaProvider(annotationFiles);\r\n\r\n            Assert.Null(nsaProvider);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/NirvanaLambda/AnnotationJobTests.cs",
    "content": "﻿using System;\r\nusing System.IO;\r\nusing System.Text;\r\nusing System.Threading.Tasks;\r\nusing Amazon.Lambda.Model;\r\nusing ErrorHandling;\r\nusing NirvanaLambda;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.NirvanaLambda\r\n{\r\n    public sealed class AnnotationJobTests\r\n    {\r\n        [Fact]\r\n        public void GetResultSummaryFromSuccessInvocation_AsExpected()\r\n        {\r\n            const string annotationResult = \"{\\\"id\\\":\\\"Test\\\",\\\"status\\\":\\\"Success\\\",\\\"filePath\\\":\\\"result/input_00001.json.gz\\\"}\";\r\n            var memoryStream = new MemoryStream(Encoding.UTF8.GetBytes(annotationResult));\r\n\r\n            var processed = AnnotationJob.GetResultSummaryFromSuccessInvocation(memoryStream);\r\n\r\n            Assert.Equal(\"input_00001.json.gz\", processed.FileName);\r\n            Assert.Null(processed.ErrorMessage);\r\n            Assert.Null(processed.ErrorCategory);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetResultSummaryFromSuccessInvocation_PassFailedStatus_FromAnnotationJob()\r\n        {\r\n            const string annotationResult = \"{\\\"id\\\":\\\"Test\\\",\\\"status\\\":\\\"Something Wrong!\\\",\\\"filePath\\\":\\\"\\\",\\\"ErrorCategory\\\":\\\"NirvanaError\\\"}\";\r\n            var memoryStream = new MemoryStream(Encoding.UTF8.GetBytes(annotationResult));\r\n\r\n            var processed = AnnotationJob.GetResultSummaryFromSuccessInvocation(memoryStream);\r\n\r\n            Assert.Equal(\"\", processed.FileName);\r\n            Assert.Equal(\"Something Wrong!\", processed.ErrorMessage);\r\n            Assert.Equal(ErrorCategory.NirvanaError, processed.ErrorCategory);\r\n        }\r\n\r\n        [Fact]\r\n        public void CheckResponse_AsExpected()\r\n        {\r\n            Assert.Throws<Exception>(() => new AnnotationJob(null, 1).CheckResponse(new InvokeResponse {FunctionError = \"Unhandled\"}));\r\n            Assert.Throws<Exception>(() => new AnnotationJob(null, 1).CheckResponse(null));\r\n        }\r\n\r\n        [Fact]\r\n        public void GetResultSummaryFromFailedInvocation_AsExpected()\r\n        {\r\n            var job = new AnnotationJob(null, 1);\r\n            var generalExpection = new Exception(\"first level exception\", new Exception(\"second level exception\", new Exception(\"third level exception\")));\r\n            var taskCanceledExpection = new Exception(\"first level exception\", new TaskCanceledException(\"second level exception\", new Exception(\"third level exception\")));\r\n\r\n            var generalResult = job.GetResultSummaryFromFailedInvocation(generalExpection);\r\n            var taskCanceledResult = job.GetResultSummaryFromFailedInvocation(taskCanceledExpection);\r\n\r\n            Assert.Equal(ErrorCategory.NirvanaError, generalResult.ErrorCategory);\r\n            Assert.Equal(\"Failed job when invoking the annotation job: third level exception.\", generalResult.ErrorMessage);\r\n\r\n            Assert.Equal(ErrorCategory.TimeOutError, taskCanceledResult.ErrorCategory);\r\n            Assert.Equal(\"Failed job when invoking the annotation job: third level exception. Annotation job was not finished in 0 milliseconds.\", taskCanceledResult.ErrorMessage);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/NirvanaLambda/NirvanaConfigTests.cs",
    "content": "﻿using Cloud.Messages;\r\nusing Cloud.Messages.Nirvana;\r\nusing ErrorHandling.Exceptions;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.NirvanaLambda\r\n{\r\n    public sealed class NirvanaConfigTests\r\n    {\r\n        [Fact]\r\n        public void CheckFieldsNotNull_AsExpected()\r\n        {\r\n            var config = GetConfig();\r\n            config.id = null;\r\n            var exception = Assert.Throws<UserErrorException>(() =>config.CheckRequiredFieldsNotNull());\r\n            Assert.Equal(\"id cannot be null.\", exception.Message);\r\n\r\n            config = GetConfig();\r\n            config.genomeAssembly = null;\r\n            exception = Assert.Throws<UserErrorException>(() => config.CheckRequiredFieldsNotNull());\r\n            Assert.Equal(\"genomeAssembly cannot be null.\", exception.Message);\r\n\r\n            config = GetConfig();\r\n            config.outputDir = null;\r\n            exception = Assert.Throws<UserErrorException>(() => config.CheckRequiredFieldsNotNull());\r\n            Assert.Equal(\"outputDir cannot be null.\", exception.Message);\r\n\r\n            config = GetConfig();\r\n            config.outputDir.bucketName = null;\r\n            exception = Assert.Throws<UserErrorException>(() => config.CheckRequiredFieldsNotNull());\r\n            Assert.Equal(\"bucketName of outputDir cannot be null.\", exception.Message);\r\n\r\n            config = GetConfig();\r\n            config.outputDir.path = null;\r\n            exception = Assert.Throws<UserErrorException>(() => config.CheckRequiredFieldsNotNull());\r\n            Assert.Equal(\"path of outputDir cannot be null.\", exception.Message);\r\n            \r\n        }\r\n\r\n        private static NirvanaConfig GetConfig() => new NirvanaConfig\r\n        {\r\n            id = \"Test\",\r\n            genomeAssembly = \"Assembly\",\r\n            vcfUrl = \"https://s3.amazonaws.com/input/input.vcf.gz?SomeStuff\",\r\n            tabixUrl = \"https://s3.amazonaws.com/input/input.vcf.gz.tbi?SomeStuff\",\r\n            outputDir = new S3Path\r\n            {\r\n                bucketName = \"OutputBucket\",\r\n                region = \"us-west-2\",\r\n                path = \"/OutputDir/\",\r\n                accessKey = \"1234567\",\r\n                secretKey = \"show me the money\",\r\n                sessionToken = \"a token\"\r\n            }\r\n        };\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/NirvanaLambda/NirvanaLambdaTests.cs",
    "content": "using ErrorHandling;\r\nusing NL = NirvanaLambda.NirvanaLambda;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.NirvanaLambda\r\n{\r\n    public sealed class NirvanaLambdaTests\r\n    {\r\n        [Theory]\r\n        [InlineData(\"/tmp/ada.vcf\", 0, \"ada_00000\")]\r\n        [InlineData(\"/ada.vcf\", 1, \"ada_00001\")]\r\n        [InlineData(\"ada.vcf\", 2, \"ada_00002\")]\r\n        [InlineData(\"ada.vcf.gz\", 3, \"ada_00003\")]\r\n        [InlineData(\"ada.vcf.data.vcf.gz\", 4, \"ada_00004\")]\r\n        [InlineData(\"https://s3.amazonaws.com/illumina-early-access-zeus/Olympia.vcf.gz?AWSAccessKeyId=AKISKSD87A3C4&Expires=109838429&Signature=s98df7s8df12f2jo4lfjfs9d0fu0sd9f\", 5, \"Olympia_00005\")]\r\n        [InlineData(\"https://stratus-gds-stage.s3.us-west-2.amazonaws.com/d3a56bf8-5528-4b4d-b5bb-08d6c9c1c9dd/test-data/vcf/some-chroms/dq/DQ-Strelka-Germline-chr22-hg38.vcf.gz?X-Amz-Expires=604800&response-content-disposition=attachment%3Bfilename%3D%22DQ-Strelka-Germline-chr22-hg38.vcf.gz%22&x-userId=fb2136c7-01c2-32cc-8d53-b78db2c022de&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAJ7P2VLXQJYGXATTA/20190516/us-west-2/s3/aws4_request&X-Amz-Date=20190516T160606Z&X-Amz-SignedHeaders=host&X-Amz-Signature=8b2f512998b820e8fb18433b5fd2de1c189c157accff92d5d5316a9fa3684d19\", 6, \"DQ-Strelka-Germline-chr22-hg38_00006\")]\r\n        public void GetIndexedPrefix_AsExpected(string inputVcfPath, int jobIndex, string expectedPrefix)\r\n        {\r\n            Assert.Equal(NL.GetIndexedPrefix(inputVcfPath, jobIndex), expectedPrefix);\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(ErrorCategory.UserError, \"Wrong input.\", \"User error: wrong input.\")]\r\n        [InlineData(ErrorCategory.NirvanaError, null, \"Nirvana error: an unexpected annotation error occurred while annotating this VCF.\")]\r\n        [InlineData(ErrorCategory.TimeOutError, null, \"Timeout error: annotation of the VCF was not finished on time due to network congestion. Please try again later.\")]\r\n        [InlineData(ErrorCategory.InvocationThrottledError, null, \"Invocation throttled error: there are too many lambdas currently running in this account. Please try again later.\")]\r\n        public void GetFailedRunStatus_AsExpected(ErrorCategory errorCategory, string errorMessage, string expectedStatus)\r\n        {\r\n            Assert.Equal(expectedStatus, NL.GetFailedRunStatus(errorCategory, errorMessage));\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/NirvanaLambda/PartitionUtilitiesTests.cs",
    "content": "﻿using System.Linq;\r\nusing NirvanaLambda;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.NirvanaLambda\r\n{\r\n    public sealed class PartitionUtilitiesTests\r\n    {\r\n        [Fact]\r\n        public void FindEqualOrClosestSmallerOffsets_AsExpected()\r\n        {\r\n            var sizeBasedOffsets = new long[] { 0, 100, 200, 300, 400 };\r\n            var allLinearIndexes = new long[] { 15, 45, 97, 123, 146, 175, 200, 234, 265, 293, 401 };\r\n\r\n            var blockOffsets = PartitionUtilities.FindEqualOrClosestSmallerOffsets(sizeBasedOffsets, allLinearIndexes);\r\n\r\n            var expected = new long[] { 15, 97, 200, 293 };\r\n            Assert.Equal(expected, blockOffsets);\r\n        }\r\n\r\n        [Fact]\r\n        public void MergeConsecutiveEqualValues_AsExpected()\r\n        {\r\n            var input = new[] { 1, 2, 3, 3, 2, 5, 4, 4 };\r\n\r\n            var expected = new[] { 1, 2, 3, 2, 5, 4 };\r\n\r\n            Assert.Equal(expected, PartitionUtilities.MergeConsecutiveEqualValues(input).ToArray());\r\n        }\r\n\r\n        [Fact]\r\n        public void GetEqualSizeOffsets_AsExpected()\r\n        {\r\n            const int fileSize      = 1001;\r\n            const int numPartitions = 3;\r\n\r\n            var expected = new long[] { 0, 333, 666 };\r\n\r\n            Assert.Equal(expected, PartitionUtilities.GetEqualSizeOffsets(fileSize, numPartitions));\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/OptimizedCore/StringBuilderCacheTests.cs",
    "content": "﻿using OptimizedCore;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.OptimizedCore\r\n{\r\n    public sealed class StringBuilderCacheTests\r\n    {\r\n        [Fact]\r\n        public void Acquire_UseAndRelease()\r\n        {\r\n            const string expectedString  = \"ABC123\";\r\n            const string expectedString2 = \"The quick brown fox jumps over the lazy dog.\";\r\n\r\n            var sb = StringBuilderPool.Get();\r\n            sb.Append(expectedString);\r\n            Assert.Equal(expectedString, StringBuilderPool.GetStringAndReturn(sb));\r\n\r\n            // acquire an existing string builder\r\n            sb = StringBuilderPool.Get();\r\n            sb.Append(expectedString2);\r\n            Assert.Equal(expectedString2, StringBuilderPool.GetStringAndReturn(sb));\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/OptimizedCore/StringExtensionsTests.cs",
    "content": "﻿using OptimizedCore;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.OptimizedCore\r\n{\r\n    public sealed class StringExtensionsTests\r\n    {\r\n        [Theory]\r\n        [InlineData(\"\\tjane\\tjim\")]\r\n        [InlineData(\"bob\\tjane\\t\")]\r\n        [InlineData(\"bob\\tjane\\tjim\")]\r\n        public void OptimizedSplit(string s)\r\n        {\r\n            var observedResult = s.OptimizedSplit('\\t');\r\n            var expectedResult = s.Split('\\t');\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(null)]\r\n        [InlineData(\"\")]\r\n        [InlineData(\"0\")]\r\n        [InlineData(\"123\")]\r\n        [InlineData(\"-123\")]\r\n        [InlineData(\"2147483647\")]\r\n        [InlineData(\"-2147483647\")]\r\n        [InlineData(\"4444444444\")]\r\n        [InlineData(\"123.3\")]\r\n        public void OptimizedParseInt32(string s)\r\n        {\r\n            var observedResult = s.OptimizedParseInt32();\r\n            bool expectedFoundError = !int.TryParse(s, out int expectedResult);\r\n\r\n            Assert.Equal(expectedFoundError, observedResult.FoundError);\r\n            Assert.Equal(expectedResult, observedResult.Number);\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(\"#CHROM\", '#')]\r\n        [InlineData(\"#CHROM\", 'L')]\r\n        public void OptimizedStartsWith(string s, char leadingChar)\r\n        {\r\n            bool observedResult = s.OptimizedStartsWith(leadingChar);\r\n            bool expectedResult = s.StartsWith(leadingChar);\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(\"END=123\")]\r\n        [InlineData(\"RECOMPOSED\")]\r\n        public void OptimizedKeyValue(string s)\r\n        {\r\n            var observedResult = s.OptimizedKeyValue();\r\n            var expectedResult = s.Split('=');\r\n\r\n            Assert.Equal(expectedResult[0], observedResult.Key);\r\n            if (expectedResult.Length == 1) Assert.Null(observedResult.Value);\r\n            else Assert.Equal(expectedResult[1], observedResult.Value);\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(\"<CNV>\", '>')]\r\n        [InlineData(\"<CNV>\", 'L')]\r\n        public void OptimizedEndsWith(string s, char leadingChar)\r\n        {\r\n            bool observedResult = s.OptimizedEndsWith(leadingChar);\r\n            bool expectedResult = s.EndsWith(leadingChar);\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/RepeatExpansions/MatcherTests.cs",
    "content": "﻿using System.Text;\r\nusing Genome;\r\nusing Intervals;\r\nusing RepeatExpansions;\r\nusing UnitTests.TestUtilities;\r\nusing Variants;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.RepeatExpansions\r\n{\r\n    public sealed class MatcherTests\r\n    {\r\n        private readonly Matcher _matcher;\r\n\r\n        public MatcherTests()\r\n        {\r\n            var repeatNumbers    = new[] { 7, 8, 9 };\r\n            double[] percentiles = { 0, 1, 1.5 };\r\n\r\n            var classificationRanges = new[] { new Interval(0, 27) };\r\n            var classifications      = new[] { \"Normal\" };\r\n\r\n            var aInterval = new ChromosomeInterval(ChromosomeUtilities.Chr1, 100, 200);\r\n            var aPhenotype = new RepeatExpansionPhenotype(aInterval, \"A\", null, repeatNumbers, percentiles, classifications, classificationRanges);\r\n\r\n            var chr1Phenotypes = new Interval<RepeatExpansionPhenotype>[1];\r\n            chr1Phenotypes[0] = new Interval<RepeatExpansionPhenotype>(aInterval.Start, aInterval.End, aPhenotype);\r\n\r\n            var intervalArrays = new IntervalArray<RepeatExpansionPhenotype>[1];\r\n            intervalArrays[ChromosomeUtilities.Chr1.Index] = new IntervalArray<RepeatExpansionPhenotype>(chr1Phenotypes);\r\n\r\n            var phenotypeForest = new IntervalForest<RepeatExpansionPhenotype>(intervalArrays);\r\n            _matcher = new Matcher(phenotypeForest);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetMatchingAnnotations_Overlap_ReturnEntry()\r\n        {\r\n            var variant = new RepeatExpansion(ChromosomeUtilities.Chr1, 100, 200, null, null, null, 9, 7);\r\n            var sa      = _matcher.GetMatchingAnnotations(variant);\r\n\r\n            var sb = new StringBuilder();\r\n            sa.SerializeJson(sb);\r\n            string observedResult = sb.ToString();\r\n\r\n            Assert.Contains(\"{\\\"phenotype\\\":\\\"A\\\"\", observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetMatchingAnnotations_NoOverlap_ReturnNull()\r\n        {\r\n            var variant = new RepeatExpansion(ChromosomeUtilities.Chr1, 220, 230, null, null, null, 9, 7);\r\n            var sa = _matcher.GetMatchingAnnotations(variant);\r\n            Assert.Null(sa);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/RepeatExpansions/PercentileUtilitiesTests.cs",
    "content": "﻿using RepeatExpansions;\nusing Xunit;\n\nnamespace UnitTests.RepeatExpansions\n{\n    public sealed class PercentileUtilitiesTests\n    {\n        private readonly int[] _values = { 7, 8, 9, 10, 11, 12, 13, 15 };\n        private readonly double[] _percentiles  = { 0, 1, 1.5, 3.5, 75.5, 86.5, 98.5, 99.5 };\n\n        [Fact]\n        public void ComputePercentiles_Nominal()\n        {\n            var repeatNumbers = new[] { 10, 15, 20, 100, 200 };\n            var alleleCounts = new[] { 550, 34, 78, 30, 45 };\n\n            double[] expectedPercentiles = {\n                0, 550 * 100.0 / 737, (550 + 34) * 100.0 / 737, (550 + 34 + 78) * 100.0 / 737,\n                (550 + 34 + 78 + 30) * 100.0 / 737\n            };\n\n            double[] observedResults = PercentileUtilities.ComputePercentiles(repeatNumbers.Length, alleleCounts);\n            Assert.Equal(expectedPercentiles, observedResults);\n        }\n\n        [Fact]\n        public void GetPercentile_RepeatNumberInRange_PositiveIndex()\n        {\n            double observedResult = PercentileUtilities.GetPercentile(14, _values, _percentiles);\n            Assert.Equal(99.5, observedResult);\n        }\n\n        [Fact]\n        public void GetPercentile_RepeatNumberOutOfRange_NegativeIndex()\n        {\n            double observedResult = PercentileUtilities.GetPercentile(20, _values, _percentiles);\n            Assert.Equal(100, observedResult);\n        }\n    }\n}\n"
  },
  {
    "path": "UnitTests/RepeatExpansions/RepeatExpansionProviderTests.cs",
    "content": "﻿using Genome;\r\nusing OptimizedCore;\r\nusing RepeatExpansions;\r\nusing UnitTests.TestUtilities;\r\nusing VariantAnnotation.AnnotatedPositions;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.IO;\r\nusing VariantAnnotation.Pools;\r\nusing Variants;\r\nusing Vcf;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.RepeatExpansions\r\n{\r\n    public sealed class RepeatExpansionProviderTests\r\n    {\r\n        private readonly RepeatExpansionProvider _provider;\r\n\r\n        private const int Start = 63898361;\r\n        private const int End   = 63898390;\r\n\r\n        public RepeatExpansionProviderTests()\r\n        {\r\n            _provider = new RepeatExpansionProvider(GenomeAssembly.GRCh37, ChromosomeUtilities.RefNameToChromosome, 23, null);\r\n        }\r\n\r\n        \r\n        [Fact]\r\n        public void Annotate_NotRepeatExpansion_NullPhenotypes()\r\n        {\r\n            var variant = VariantPool.Get(ChromosomeUtilities.Chr3, Start, End, \"A\", \"C\", VariantType.SNV, null, false, false, false, null, \r\n                AnnotationBehavior.SmallVariants, false);\r\n\r\n            var annotatedPosition = GetAnnotatedPosition(variant);\r\n            _provider.Annotate(annotatedPosition);\r\n            \r\n            var firstVariant = annotatedPosition.AnnotatedVariants[0];\r\n            Assert.Null(firstVariant.RepeatExpansionPhenotypes);\r\n            \r\n            VariantPool.Return(variant);\r\n            PositionPool.Return((Position)annotatedPosition.Position);\r\n            AnnotatedVariantPool.Return((AnnotatedVariant)firstVariant);\r\n            AnnotatedPositionPool.Return((AnnotatedPosition) annotatedPosition);\r\n        }\r\n\r\n        [Fact]\r\n        public void Annotate_RepeatExpansion_NotExactMatch_NullPhenotypes()\r\n        {\r\n            var variant = new RepeatExpansion(ChromosomeUtilities.Chr3, Start, End + 1, \"A\", \"<STR3>\", null, 10, 5);\r\n\r\n            var annotatedPosition = GetAnnotatedPosition(variant);\r\n            _provider.Annotate(annotatedPosition);\r\n\r\n            var firstVariant = annotatedPosition.AnnotatedVariants[0];\r\n            Assert.Null(firstVariant.RepeatExpansionPhenotypes);\r\n            \r\n            PositionPool.Return((Position)annotatedPosition.Position);\r\n            AnnotatedVariantPool.Return((AnnotatedVariant)firstVariant);\r\n            AnnotatedPositionPool.Return((AnnotatedPosition) annotatedPosition);\r\n        }\r\n\r\n        [Fact]\r\n        public void Annotate_RepeatExpansion_no_refRepeatCount()\r\n        {\r\n            var variant = new RepeatExpansion(ChromosomeUtilities.Chr3, Start, End + 1, \"A\", \"<STR3>\", null, 10, null);\r\n\r\n            var annotatedPosition = GetAnnotatedPosition(variant);\r\n            _provider.Annotate(annotatedPosition);\r\n\r\n            var firstVariant = annotatedPosition.AnnotatedVariants[0];\r\n            Assert.NotNull(firstVariant);\r\n            \r\n            PositionPool.Return((Position)annotatedPosition.Position);\r\n            AnnotatedVariantPool.Return((AnnotatedVariant)firstVariant);\r\n            AnnotatedPositionPool.Return((AnnotatedPosition) annotatedPosition);\r\n        }\r\n\r\n        [Fact]\r\n        public void Annotate_RepeatExpansion_ExactMatch_OnePhenotype()\r\n        {\r\n            var variant = new RepeatExpansion(ChromosomeUtilities.Chr3, Start, End, \"A\", \"<STR3>\", null, 10, 5);\r\n\r\n            var annotatedPosition = GetAnnotatedPosition(variant);\r\n            _provider.Annotate(annotatedPosition);\r\n\r\n            var firstVariant = annotatedPosition.AnnotatedVariants[0];\r\n            Assert.NotNull(firstVariant.RepeatExpansionPhenotypes);\r\n\r\n            var sb = StringBuilderPool.Get();\r\n            var jsonObject = new JsonObject(sb);\r\n\r\n            jsonObject.AddObjectValue(firstVariant.RepeatExpansionPhenotypes.JsonKey,\r\n                firstVariant.RepeatExpansionPhenotypes);\r\n\r\n            const string expectedJson = \"\\\"repeatExpansionPhenotypes\\\":[{\\\"phenotype\\\":\\\"Spinocerebellar ataxia 7\\\",\\\"omimId\\\":164500,\\\"classifications\\\":[\\\"Normal\\\"],\\\"percentile\\\":6.33}]\";\r\n            string observedJson = sb.ToString();\r\n            Assert.Equal(expectedJson, observedJson);\r\n            \r\n            PositionPool.Return((Position)annotatedPosition.Position);\r\n            AnnotatedVariantPool.Return((AnnotatedVariant)firstVariant);\r\n            AnnotatedPositionPool.Return((AnnotatedPosition) annotatedPosition);\r\n        }\r\n\r\n        private static IAnnotatedPosition GetAnnotatedPosition(IVariant variant)\r\n        {\r\n            IVariant[] variants = { variant };\r\n            var position = PositionPool.Get(ChromosomeUtilities.Chr3, Start, End, null, null, null, null, variants, null, null, null, null,\r\n                false);\r\n\r\n            var                 annotatedVariant  = AnnotatedVariantPool.Get(variant);\r\n            IAnnotatedVariant[] annotatedVariants = { annotatedVariant };\r\n\r\n            return AnnotatedPositionPool.Get(position, annotatedVariants);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/Resources/COSM18152.tsv",
    "content": "Gene name\tAccession Number\tGene CDS length\tHGNC ID\tSample name\tID_sample\tID_tumour\tPrimary site\tSite subtype 1\tSite subtype 2\tSite subtype 3\tPrimary histology\tHistology subtype 1\tHistology subtype 2\tHistology subtype 3\tGenome-wide screen\tMutation ID\tMutation CDS\tMutation AA\tMutation Description\tMutation zygosity\tLOH\tGRCh\tMutation genome position\tMutation strand\tSNP\tResistance Mutation\tFATHMM prediction\tFATHMM score\tMutation somatic status\tPubmed_PMID\tID_STUDY\tSample source\tTumour origin\tAge\nVHL\tENST00000256474\t642\t12687\t264\t1776734\t1680780\tkidney\tNS\tNS\tNS\tcarcinoma\tclear_cell_renal_cell_carcinoma\tNS\tNS\tn\tCOSM18152\tc.463G>A\tp.V155M\tSubstitution - Missense\t\tu\t37\t3:10188320-10188320\t+\tn\t-\tPATHOGENIC\t.97012\tConfirmed somatic variant\t23036577\t\tsurgery fresh/frozen\tNS\t\nVHL\tENST00000256474\t642\t12687\t2146326\t2146326\t2015515\tkidney\tNS\tNS\tNS\tcarcinoma\tclear_cell_renal_cell_carcinoma\tNS\tNS\tn\tCOSM18152\tc.463G>A\tp.V155M\tSubstitution - Missense\t\tu\t37\t3:10188320-10188320\t+\tn\t-\tPATHOGENIC\t.97012\tConfirmed somatic variant\t24471421\t\tsurgery-fixed\tNS\t68\nVHL\tENST00000256474\t642\t12687\t980518\t980518\t896240\tkidney\tNS\tNS\tNS\tcarcinoma\tclear_cell_renal_cell_carcinoma\tNS\tNS\tn\tCOSM18152\tc.463G>A\tp.V155M\tSubstitution - Missense\t\tu\t37\t3:10188320-10188320\t+\tn\t-\tPATHOGENIC\t.97012\tConfirmed somatic variant\t11505222\t\tsurgery fresh/frozen\tNS\t56\nVHL\tENST00000256474\t642\t12687\tPD3476a\t1101397\t1015974\tkidney\tNS\tNS\tNS\tcarcinoma\tclear_cell_renal_cell_carcinoma\tNS\tNS\tn\tCOSM18152\tc.463G>A\tp.V155M\tSubstitution - Missense\thet\tu\t37\t3:10188320-10188320\t+\tn\t-\tPATHOGENIC\t.97012\tConfirmed somatic variant\t20054297\t255\tNS\tprimary\t46\nVHL\tENST00000256474\t642\t12687\t2146325\t2146325\t2015515\tkidney\tNS\tNS\tNS\tcarcinoma\tclear_cell_renal_cell_carcinoma\tNS\tNS\tn\tCOSM18152\tc.463G>A\tp.V155M\tSubstitution - Missense\t\tu\t37\t3:10188320-10188320\t+\tn\t-\tPATHOGENIC\t.97012\tConfirmed somatic variant\t24471421\t\tsurgery-fixed\tNS\t68\nVHL\tENST00000256474\t642\t12687\tK112\t1692686\t1600914\tkidney\tNS\tNS\tNS\tcarcinoma\tclear_cell_renal_cell_carcinoma\tNS\tNS\tn\tCOSM18152\tc.463G>A\tp.V155M\tSubstitution - Missense\t\tu\t37\t3:10188320-10188320\t+\tn\t-\tPATHOGENIC\t.97012\tConfirmed somatic variant\t22138691\t\tsurgery fresh/frozen\tprimary\t44\nVHL\tENST00000256474\t642\t12687\tMEL-JWCI-WGS-12\t1838362\t1732464\tskin\tupper_arm\tNS\tNS\tmalignant_melanoma\tNS\tNS\tNS\ty\tCOSM18152\tc.463G>A\tp.V155M\tSubstitution - Missense\t\tu\t37\t3:10188320-10188320\t+\tn\t-\tPATHOGENIC\t.97012\tConfirmed somatic variant\t22817889\t\tNS\tprimary\t46\n"
  },
  {
    "path": "UnitTests/Resources/COSM18152.vcf",
    "content": "3\t10188320\tCOSM18152\tG\tA\t.\t.\tGENE=VHL;STRAND=+;CDS=c.463G>A;AA=p.V155M;CNT=7\n"
  },
  {
    "path": "UnitTests/Resources/COSM983708.tsv",
    "content": "Gene name\tAccession Number\tGene CDS length\tHGNC ID\tSample name\tID_sample\tID_tumour\tPrimary site\tSite subtype 1\tSite subtype 2\tSite subtype 3\tPrimary histology\tHistology subtype 1\tHistology subtype 2\tHistology subtype 3\tGenome-wide screen\tMutation ID\tMutation CDS\tMutation AA\tMutation Description\tMutation zygosity\tLOH\tGRCh\tMutation genome position\tMutation strand\tSNP\tResistance Mutation\tFATHMM prediction\tFATHMM score\tMutation somatic status\tPubmed_PMID\tID_STUDY\tSample source\tTumour origin\tAge\nBCL6B\tENST00000293805\t1440\t1002\tTCGA-AX-A0J0-01\t1783376\t1687375\tendometrium\tNS\tNS\tNS\tcarcinoma\tendometrioid_carcinoma\tNS\tNS\ty\tCOSM983708\tc.701_702insCAG\tp.S244_E245insS\tInsertion - In frame\thet\tu\t37\t17:6928019-6928020\t+\t\t-\t\t\tReported in another cancer sample as somatic\t\t419\tfresh/frozen - NOS\tprimary\t47\n"
  },
  {
    "path": "UnitTests/Resources/COSM983708.vcf",
    "content": "17\t6928019\tCOSM983708\tC\tCCAG\t.\t.\tGENE=BCL6B;STRAND=+;CDS=c.701_702insCAG;AA=p.S244_E245insS;CNT=27\n"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000000101.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\" xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n<ClinVarSet ID=\"47064274\"><RecordStatus>current</RecordStatus><Title>NM_000235.4(LIPA):c.894+1G&gt;A AND Wolman disease</Title><ReferenceClinVarAssertion DateCreated=\"2012-08-13\" DateLastUpdated=\"2019-11-02\" ID=\"57604\"><ClinVarAccession Acc=\"RCV000000101\" Version=\"3\" Type=\"RCV\" DateUpdated=\"2019-11-02\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"1996-04-01\"><ReviewStatus>no assertion criteria provided</ReviewStatus><Description>Pathogenic</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>not provided</AffectedStatus></Sample><Method><MethodType>literature only</MethodType></Method><ObservedData ID=\"47897969\"><Attribute Type=\"Description\">In 2 sibs with Wolman disease (278000) from a consanguineous family, Aslanidis et al. (1996) detected homozygosity for a G-to-A mutation at position +1 of the splice donor site following exon 8 of the LIPA gene. Both children died within the first year of life. The parents, who were heterozygous for the mutation, had reduced enzymatic activity, while no enzymatic activity was detectable in fibroblasts from the affected children. Although the same donor splice site is involved as in the mutation reported in CESD (934G-A, 613497.0002), the nucleotide at position +1 was changed in the Wolman disease mutation while the nucleotide at position -1 was changed in the CESD mutation. Both mutations result in deletion of the same 24 amino acids (exon 8), but the effects are dramatically different: the -1 mutation allowed some correct splicing (3% of total LIPA RNA), but the +1 splice site mutation, which affects one of the invariable nucleotides of the splice consensus sequences, permits no correct splicing. Aslanidis et al. (1996) suggested that the residual activity in CESD patients compared to Wolman patients may result either from a partially active enzyme with the internal deletion of 24 amino acids (skipping of exon 8) or from the production of low amounts of the full size of the protein due to inefficient exon exclusion from the mutated allele.</Attribute><Citation Type=\"general\"><ID Source=\"PubMed\">8617513</ID></Citation></ObservedData></ObservedIn><MeasureSet Type=\"Variant\" ID=\"81\" Acc=\"VCV000000081\" Version=\"1\"><Measure Type=\"single nucleotide variant\" ID=\"15120\"><Name><ElementValue Type=\"Preferred\">NM_000235.4(LIPA):c.894+1G&gt;A</ElementValue></Name><AttributeSet><Attribute Accession=\"NM_001288979\" Version=\"1\" Change=\"c.546+1G&gt;A\" Type=\"HGVS, coding, RefSeq\">NM_001288979.1:c.546+1G&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_000235\" Version=\"4\" Change=\"c.894+1G&gt;A\" Type=\"HGVS, coding, RefSeq\">NM_000235.4:c.894+1G&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_001127605\" Version=\"3\" Change=\"c.894+1G&gt;A\" Type=\"HGVS, coding, RefSeq\">NM_001127605.3:c.894+1G&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NG_008194\" Version=\"1\" Change=\"g.34394G&gt;A\" Type=\"HGVS, genomic, RefSeqGene\">NG_008194.1:g.34394G&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000010\" Version=\"11\" Change=\"g.89222510C&gt;T\" Type=\"HGVS, genomic, top level\" integerValue=\"38\">NC_000010.11:g.89222510C&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000010\" Version=\"10\" Change=\"g.90982267C&gt;T\" Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">NC_000010.10:g.90982267C&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_000235\" Version=\"3\" Change=\"c.894+1G&gt;A\" Type=\"HGVS, previous\">NM_000235.3:c.894+1G&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">splice donor variant</Attribute><XRef ID=\"SO:0001575\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_000235.4:c.894+1G&gt;A\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">splice donor variant</Attribute><XRef ID=\"SO:0001575\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_001127605.3:c.894+1G&gt;A\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">splice donor variant</Attribute><XRef ID=\"SO:0001575\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_001288979.1:c.546+1G&gt;A\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"nucleotide change\">IVS8, G-A, +1</Attribute></AttributeSet><CytogeneticLocation>10q23.31</CytogeneticLocation><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"10\" Accession=\"NC_000010.11\" start=\"89222510\" stop=\"89222510\" display_start=\"89222510\" display_stop=\"89222510\" variantLength=\"1\" positionVCF=\"89222510\" referenceAlleleVCF=\"C\" alternateAlleleVCF=\"T\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"10\" Accession=\"NC_000010.10\" start=\"90982267\" stop=\"90982267\" display_start=\"90982267\" display_stop=\"90982267\" variantLength=\"1\" positionVCF=\"90982267\" referenceAlleleVCF=\"C\" alternateAlleleVCF=\"T\" /><MeasureRelationship Type=\"within single gene\"><Name><ElementValue Type=\"Preferred\">lipase A, lysosomal acid type</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">LIPA</ElementValue></Symbol><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"10\" Accession=\"NC_000010.11\" start=\"89213569\" stop=\"89252039\" display_start=\"89213569\" display_stop=\"89252039\" Strand=\"-\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"10\" Accession=\"NC_000010.10\" start=\"90973325\" stop=\"91011659\" display_start=\"90973325\" display_stop=\"91011659\" variantLength=\"38335\" Strand=\"-\" /><XRef ID=\"3988\" DB=\"Gene\" /><XRef Type=\"MIM\" ID=\"613497\" DB=\"OMIM\" /><XRef ID=\"HGNC:6617\" DB=\"HGNC\" /></MeasureRelationship><XRef Type=\"Allelic variant\" ID=\"613497.0005\" DB=\"OMIM\" /><XRef Type=\"rs\" ID=\"1564751995\" DB=\"dbSNP\" /><Comment DataSource=\"ClinGen\" Type=\"public\">ClinGen staff contributed the HGVS expression for this variant.</Comment></Measure><Name><ElementValue Type=\"Preferred\">NM_000235.4(LIPA):c.894+1G&gt;A</ElementValue></Name></MeasureSet><TraitSet Type=\"Disease\" ID=\"40\"><Trait ID=\"9048\" Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">Wolman disease</ElementValue><XRef ID=\"7899\" DB=\"Office of Rare Diseases\" /></Name><XRef ID=\"CN438428\" DB=\"MedGen\" /></Trait></TraitSet></ReferenceClinVarAssertion><ClinVarAssertion ID=\"20244\"><ClinVarSubmissionID localKey=\"613497.0005_WOLMAN DISEASE\" submitter=\"OMIM\" submitterDate=\"2017-12-22\" title=\"LIPA, IVS8, G-A, +1_WOLMAN DISEASE\" /><ClinVarAccession Acc=\"SCV000020244\" Version=\"2\" Type=\"SCV\" OrgID=\"3\" OrganizationCategory=\"resource\" OrgType=\"primary\" DateUpdated=\"2019-03-31\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"1996-04-01\"><ReviewStatus>no assertion criteria provided</ReviewStatus><Description>Pathogenic</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ExternalID DB=\"OMIM\" ID=\"613497.0005\" Type=\"Allelic variant\" /><ObservedIn><Sample><Origin>germline</Origin><Species>human</Species><AffectedStatus>not provided</AffectedStatus></Sample><Method><MethodType>literature only</MethodType></Method><ObservedData><Attribute Type=\"Description\">In 2 sibs with Wolman disease (278000) from a consanguineous family, Aslanidis et al. (1996) detected homozygosity for a G-to-A mutation at position +1 of the splice donor site following exon 8 of the LIPA gene. Both children died within the first year of life. The parents, who were heterozygous for the mutation, had reduced enzymatic activity, while no enzymatic activity was detectable in fibroblasts from the affected children. Although the same donor splice site is involved as in the mutation reported in CESD (934G-A, 613497.0002), the nucleotide at position +1 was changed in the Wolman disease mutation while the nucleotide at position -1 was changed in the CESD mutation. Both mutations result in deletion of the same 24 amino acids (exon 8), but the effects are dramatically different: the -1 mutation allowed some correct splicing (3% of total LIPA RNA), but the +1 splice site mutation, which affects one of the invariable nucleotides of the splice consensus sequences, permits no correct splicing. Aslanidis et al. (1996) suggested that the residual activity in CESD patients compared to Wolman patients may result either from a partially active enzyme with the internal deletion of 24 amino acids (skipping of exon 8) or from the production of low amounts of the full size of the protein due to inefficient exon exclusion from the mutated allele.</Attribute><Citation><ID Source=\"PubMed\">8617513</ID></Citation><XRef DB=\"OMIM\" ID=\"278000\" Type=\"MIM\" /></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><Name><ElementValue Type=\"Preferred\">LIPA, IVS8, G-A, +1</ElementValue></Name><AttributeSet><Attribute Type=\"NonHGVS\">IVS8, G-A, +1</Attribute></AttributeSet><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">LIPA</ElementValue></Symbol></MeasureRelationship><XRef DB=\"OMIM\" ID=\"613497.0005\" Type=\"Allelic variant\" /></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">WOLMAN DISEASE</ElementValue></Name></Trait></TraitSet></ClinVarAssertion></ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000000734.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\" xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n<ClinVarSet ID=\"47064847\"><RecordStatus>current</RecordStatus><Title>NM_000512.4(GALNS):c.413T&gt;C (p.Val138Ala) AND Mucopolysaccharidosis, MPS-IV-A</Title><ReferenceClinVarAssertion DateCreated=\"2012-08-13\" DateLastUpdated=\"2019-11-02\" ID=\"58237\"><ClinVarAccession Acc=\"RCV000000734\" Version=\"3\" Type=\"RCV\" DateUpdated=\"2019-11-02\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2018-05-30\"><ReviewStatus>no assertion criteria provided</ReviewStatus><Description>Pathogenic</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>not provided</AffectedStatus></Sample><Method><MethodType>literature only</MethodType></Method><ObservedData ID=\"47899666\"><Attribute Type=\"Description\">In a patient with a severe form of Morquio syndrome A (253000), Tomatsu et al. (1992) identified a homozygous 468T-C transition in the GALNS gene, resulting in an ala138-to-val (A138V) substitution.</Attribute><Citation Type=\"general\"><CitationText>Tomatsu, S., Fukuda, S., Masue, M., Sukegawa, K., Masuno, M., Orii, T. Mucopolysaccharidosis type IVA: characterization and chromosomal localization of N-acetylgalactosamine-6-sulfate sulfatase gene and genetic heterogeneity. (Abstract) Am. J. Hum. Genet. 51 (suppl.): A178, 1992.</CitationText></Citation></ObservedData></ObservedIn><MeasureSet Type=\"Variant\" ID=\"699\" Acc=\"VCV000000699\" Version=\"1\"><Measure Type=\"single nucleotide variant\" ID=\"15738\"><Name><ElementValue Type=\"Preferred\">NM_000512.4(GALNS):c.413T&gt;C (p.Val138Ala)</ElementValue></Name><AttributeSet><Attribute Accession=\"NM_001323543\" Version=\"2\" Change=\"c.-143T&gt;C\" Type=\"HGVS, coding, RefSeq\">NM_001323543.2:c.-143T&gt;C</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_000512\" Version=\"5\" Change=\"c.413T&gt;C\" Type=\"HGVS, coding, RefSeq\">NM_000512.5:c.413T&gt;C</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_001323544\" Version=\"2\" Change=\"c.431T&gt;C\" Type=\"HGVS, coding, RefSeq\">NM_001323544.2:c.431T&gt;C</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NG_008667\" Version=\"1\" Change=\"g.20966T&gt;C\" Type=\"HGVS, genomic, RefSeqGene\">NG_008667.1:g.20966T&gt;C</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000016\" Version=\"10\" Change=\"g.88841001A&gt;G\" Type=\"HGVS, genomic, top level\" integerValue=\"38\">NC_000016.10:g.88841001A&gt;G</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000016\" Version=\"9\" Change=\"g.88907409A&gt;G\" Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">NC_000016.9:g.88907409A&gt;G</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"P34059\" Change=\"p.Val138Ala\" Type=\"HGVS, protein\">P34059:p.Val138Ala</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_000503\" Version=\"1\" Change=\"p.Val138Ala\" Type=\"HGVS, protein, RefSeq\">NP_000503.1:p.Val138Ala</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_001310473\" Version=\"1\" Change=\"p.Val144Ala\" Type=\"HGVS, protein, RefSeq\">NP_001310473.1:p.Val144Ala</Attribute></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">5 prime UTR variant</Attribute><XRef ID=\"SO:0001623\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_001323543.2:c.-143T&gt;C\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">missense variant</Attribute><XRef ID=\"SO:0001583\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_000512.5:c.413T&gt;C\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">missense variant</Attribute><XRef ID=\"SO:0001583\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_001323544.2:c.431T&gt;C\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"ProteinChange1LetterCode\">A138V</Attribute></AttributeSet><AttributeSet><Attribute Type=\"ProteinChange1LetterCode\">V138A</Attribute></AttributeSet><AttributeSet><Attribute Type=\"ProteinChange1LetterCode\">V144A</Attribute></AttributeSet><AttributeSet><Attribute Type=\"ProteinChange3LetterCode\">ALA138VAL</Attribute></AttributeSet><CytogeneticLocation>16q24.3</CytogeneticLocation><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"16\" Accession=\"NC_000016.10\" start=\"88841001\" stop=\"88841001\" display_start=\"88841001\" display_stop=\"88841001\" variantLength=\"1\" positionVCF=\"88841001\" referenceAlleleVCF=\"A\" alternateAlleleVCF=\"G\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"16\" Accession=\"NC_000016.9\" start=\"88907409\" stop=\"88907409\" display_start=\"88907409\" display_stop=\"88907409\" variantLength=\"1\" positionVCF=\"88907409\" referenceAlleleVCF=\"A\" alternateAlleleVCF=\"G\" /><MeasureRelationship Type=\"within single gene\"><Name><ElementValue Type=\"Preferred\">galactosamine (N-acetyl)-6-sulfatase</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">GALNS</ElementValue></Symbol><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"16\" Accession=\"NC_000016.10\" start=\"88813734\" stop=\"88856966\" display_start=\"88813734\" display_stop=\"88856966\" Strand=\"-\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"16\" Accession=\"NC_000016.9\" start=\"88880141\" stop=\"88923373\" display_start=\"88880141\" display_stop=\"88923373\" variantLength=\"43233\" Strand=\"-\" /><XRef ID=\"2588\" DB=\"Gene\" /><XRef Type=\"MIM\" ID=\"612222\" DB=\"OMIM\" /><XRef ID=\"HGNC:4122\" DB=\"HGNC\" /></MeasureRelationship><Citation Type=\"general\"><ID Source=\"PubMed\">8651279</ID></Citation><XRef ID=\"P34059#VAR_007188\" DB=\"UniProtKB\" /><XRef Type=\"Allelic variant\" ID=\"612222.0002\" DB=\"OMIM\" /><XRef Type=\"rs\" ID=\"118204436\" DB=\"dbSNP\" /><Comment DataSource=\"NCBI curation\" Type=\"public\">NCBI staff reviewed the sequence information reported in PubMed 8651279 to confirm this allele on current reference sequence (V138A).</Comment></Measure><Name><ElementValue Type=\"Preferred\">NM_000512.4(GALNS):c.413T&gt;C (p.Val138Ala)</ElementValue></Name><XRef ID=\"CA251564\" DB=\"ClinGen\" /></MeasureSet><TraitSet Type=\"Disease\" ID=\"175\"><Trait ID=\"3036\" Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">Mucopolysaccharidosis, MPS-IV-A</ElementValue><XRef ID=\"Morquio+syndrome+A/4876\" DB=\"Genetic Alliance\" /><XRef ID=\"7259005\" DB=\"SNOMED CT\" /></Name><Name><ElementValue Type=\"Alternate\">MPS IVA</ElementValue><XRef Type=\"MIM\" ID=\"253000\" DB=\"OMIM\" /></Name><Name><ElementValue Type=\"Alternate\">Mucopolysaccharidosis type IV A</ElementValue></Name><Name><ElementValue Type=\"Alternate\">Morquio syndrome A, mild</ElementValue></Name><Name><ElementValue Type=\"Alternate\">Mucopolysaccharidosis Type IVA</ElementValue><XRef ID=\"NBK148668\" DB=\"GeneReviews\" /></Name><Symbol><ElementValue Type=\"Preferred\">MPS4A</ElementValue><XRef Type=\"MIM\" ID=\"253000\" DB=\"OMIM\" /></Symbol><AttributeSet><Attribute Type=\"public definition\">The phenotypic spectrum of mucopolysaccharidosis IVA (MPS IVA) is a continuum that ranges from a severe and rapidly progressive early-onset form to a slowly progressive later-onset form. Children with MPS IVA have no distinctive clinical findings at birth. The severe form is usually apparent between ages one and three years, often first manifesting as kyphoscoliosis, knock-knee (genu valgum), and pectus carinatum; the slowly progressive form may not become evident until late childhood or adolescence often first manifesting as hip problems (pain, stiffness, and Legg Perthes disease). Progressive bone and joint involvement leads to short stature, and eventually to disabling pain and arthritis. Involvement of other organ systems can lead to significant morbidity, including respiratory compromise, obstructive sleep apnea, valvular heart disease, hearing impairment, visual impairment from corneal clouding, dental abnormalities, and hepatomegaly. Compression of the spinal cord is a common complication that results in neurologic impairment. Children with MPS IVA have normal intellectual abilities at the outset of the disease.</Attribute><XRef ID=\"NBK148668\" DB=\"GeneReviews\" /></AttributeSet><Citation Type=\"review\" Abbrev=\"GeneReviews\"><ID Source=\"PubMed\">23844448</ID><ID Source=\"BookShelf\">NBK148668</ID></Citation><XRef ID=\"C0086651\" DB=\"MedGen\" /><XRef ID=\"309297\" DB=\"Orphanet\" /><XRef ID=\"582\" DB=\"Orphanet\" /><XRef Type=\"MIM\" ID=\"253000\" DB=\"OMIM\" /></Trait></TraitSet></ReferenceClinVarAssertion><ClinVarAssertion ID=\"20884\"><ClinVarSubmissionID localKey=\"612222.0002_MUCOPOLYSACCHARIDOSIS, TYPE IVA\" submitter=\"OMIM\" submitterDate=\"2018-05-30\" title=\"GALNS, ALA138VAL_MUCOPOLYSACCHARIDOSIS, TYPE IVA\" /><ClinVarAccession Acc=\"SCV000020884\" Version=\"3\" Type=\"SCV\" OrgID=\"3\" OrganizationCategory=\"resource\" OrgType=\"primary\" DateUpdated=\"2019-03-31\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2018-05-30\"><ReviewStatus>no assertion criteria provided</ReviewStatus><Description>Pathogenic</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ExternalID DB=\"OMIM\" ID=\"612222.0002\" Type=\"Allelic variant\" /><ObservedIn><Sample><Origin>germline</Origin><Species>human</Species><AffectedStatus>not provided</AffectedStatus></Sample><Method><MethodType>literature only</MethodType></Method><ObservedData><Attribute Type=\"Description\">In a patient with a severe form of Morquio syndrome A (253000), Tomatsu et al. (1992) identified a homozygous 468T-C transition in the GALNS gene, resulting in an ala138-to-val (A138V) substitution.</Attribute><Citation><CitationText>Tomatsu, S., Fukuda, S., Masue, M., Sukegawa, K., Masuno, M., Orii, T. Mucopolysaccharidosis type IVA: characterization and chromosomal localization of N-acetylgalactosamine-6-sulfate sulfatase gene and genetic heterogeneity. (Abstract) Am. J. Hum. Genet. 51 (suppl.): A178, 1992.</CitationText></Citation><XRef DB=\"OMIM\" ID=\"253000\" Type=\"MIM\" /></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><Name><ElementValue Type=\"Preferred\">GALNS, ALA138VAL</ElementValue></Name><AttributeSet><Attribute Type=\"NonHGVS\">ALA138VAL</Attribute></AttributeSet><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">GALNS</ElementValue></Symbol></MeasureRelationship><XRef DB=\"OMIM\" ID=\"612222.0002\" Type=\"Allelic variant\" /></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">MUCOPOLYSACCHARIDOSIS, TYPE IVA</ElementValue></Name></Trait></TraitSet></ClinVarAssertion></ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000001054.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\" xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n<ClinVarSet ID=\"47065141\"><RecordStatus>current</RecordStatus><Title>NM_013236.3(ATXN10):c.1173+54822_1173+54826ATTCT(10_32) AND Spinocerebellar ataxia 10</Title><ReferenceClinVarAssertion DateCreated=\"2012-08-13\" DateLastUpdated=\"2019-11-02\" ID=\"58557\"><ClinVarAccession Acc=\"RCV000001054\" Version=\"4\" Type=\"RCV\" DateUpdated=\"2019-11-02\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2018-11-07\"><ReviewStatus>no assertion criteria provided</ReviewStatus><Description>Benign</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>not provided</AffectedStatus></Sample><Method><MethodType>literature only</MethodType></Method><ObservedData ID=\"47898978\"><Attribute Type=\"Description\">In all affected members of 5 Mexican families with SCA10 (603516), Matsuura et al. (2000) detected expansion of a pentanucleotide (ATTCT) repeat in intron 9 of the ATXN10 gene. There was an inverse correlation between the expansion size, up to 22.5 kb larger than the normal allele, and the age of onset. Analysis of 562 chromosomes from unaffected individuals of various ethnic origins, including 242 chromosomes from Mexicans, showed a range of 10 to 22 ATTCT repeats with no evidence of expansions. The data indicated that the ATXN10 intronic ATTCT pentanucleotide repeat in SCA10 patients is unstable and represented the largest microsatellite expansion found to that time in the human genome.</Attribute><Citation Type=\"general\"><ID Source=\"PubMed\">11017075</ID></Citation></ObservedData><ObservedData ID=\"47898978\"><Attribute Type=\"Description\">In a multigenerational study, Matsuura et al. (2004) demonstrated that (1) the expanded ATTCT repeats are highly unstable when paternally transmitted, whereas maternal transmission results in significantly smaller changes in repeat size; (2) blood leukocytes, lymphoblastoid cells, buccal cells, and sperm have a variable degree of mosaicism in ATTCT expansion; (3) the length of the expanded repeat was not observed to change in individuals over a 5-year period; and (4) clinically determined anticipation is sometimes associated with intergenerational contraction rather than expansion of the ATTCT repeat.</Attribute><Citation Type=\"general\"><ID Source=\"PubMed\">15127363</ID></Citation></ObservedData></ObservedIn><ObservedIn><Sample><Origin>not provided</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>not provided</AffectedStatus></Sample><Method><Purpose>assert pathogenicity</Purpose><MethodType>curation</MethodType></Method><ObservedData ID=\"47898979\"><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\" ID=\"999\" Acc=\"VCV000000999\" Version=\"1\"><Measure Type=\"Microsatellite\" ID=\"16038\"><Name><ElementValue Type=\"Preferred\">NM_013236.3(ATXN10):c.1173+54822_1173+54826ATTCT(10_32)</ElementValue></Name><Name><ElementValue Type=\"Alternate\">ATXN10, (ATTCT)n EXPANSION</ElementValue><XRef Type=\"Allelic variant\" ID=\"611150.0001\" DB=\"OMIM\" /></Name><AttributeSet><Attribute Accession=\"NM_013236\" Version=\"2\" Change=\"c.1173+54822_1173+54826ATTCT(10_32)\" Type=\"HGVS, previous\">NM_013236.2:c.1173+54822_1173+54826ATTCT(10_32)</Attribute></AttributeSet><CytogeneticLocation>22q13.31</CytogeneticLocation><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"22\" Accession=\"NC_000022.11\" start=\"45795360\" stop=\"45795364\" display_start=\"45795360\" display_stop=\"45795364\" positionVCF=\"45795354\" referenceAlleleVCF=\"GATTCTATTCTATTCTATTCT\" alternateAlleleVCF=\"G\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"22\" Accession=\"NC_000022.10\" start=\"46191240\" stop=\"46191244\" display_start=\"46191240\" display_stop=\"46191244\" positionVCF=\"46191234\" referenceAlleleVCF=\"GATTCTATTCTATTCTATTCT\" alternateAlleleVCF=\"G\" /><MeasureRelationship Type=\"within multiple genes by overlap\"><Name><ElementValue Type=\"Preferred\">ataxin 10 repeat instability region</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">LOC108660404</ElementValue></Symbol><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"22\" Accession=\"NC_000022.11\" start=\"45795355\" stop=\"45795424\" display_start=\"45795355\" display_stop=\"45795424\" Strand=\"+\" /><XRef ID=\"108660404\" DB=\"Gene\" /></MeasureRelationship><MeasureRelationship Type=\"within multiple genes by overlap\"><Name><ElementValue Type=\"Preferred\">ataxin 10</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">ATXN10</ElementValue></Symbol><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"22\" Accession=\"NC_000022.11\" start=\"45671798\" stop=\"45845307\" display_start=\"45671798\" display_stop=\"45845307\" Strand=\"+\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"22\" Accession=\"NC_000022.10\" start=\"46067677\" stop=\"46241186\" display_start=\"46067677\" display_stop=\"46241186\" variantLength=\"173510\" Strand=\"+\" /><XRef ID=\"25814\" DB=\"Gene\" /><XRef Type=\"MIM\" ID=\"611150\" DB=\"OMIM\" /><XRef ID=\"HGNC:10549\" DB=\"HGNC\" /></MeasureRelationship><MeasureRelationship Type=\"within multiple genes by overlap\"><Name><ElementValue Type=\"Preferred\">origin of replication for ATXN10 repeat region</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">LOC107181287</ElementValue></Symbol><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"22\" Accession=\"NC_000022.11\" start=\"45794654\" stop=\"45795563\" display_start=\"45794654\" display_stop=\"45795563\" Strand=\"+\" /><XRef ID=\"107181287\" DB=\"Gene\" /></MeasureRelationship><XRef Type=\"Allelic variant\" ID=\"611150.0001\" DB=\"OMIM\" /><XRef Type=\"rs\" ID=\"60726084\" DB=\"dbSNP\" /></Measure><Name><ElementValue Type=\"Preferred\">NM_013236.3(ATXN10):c.1173+54822_1173+54826ATTCT(10_32)</ElementValue></Name></MeasureSet><TraitSet Type=\"Disease\" ID=\"264\"><Trait ID=\"4178\" Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">Spinocerebellar ataxia 10</ElementValue><XRef ID=\"Spinocerebellar+ataxia+10/6744\" DB=\"Genetic Alliance\" /><XRef ID=\"10474\" DB=\"Office of Rare Diseases\" /></Name><Name><ElementValue Type=\"Alternate\">Spinocerebellar Ataxia Type10</ElementValue><XRef ID=\"NBK1175\" DB=\"GeneReviews\" /></Name><Symbol><ElementValue Type=\"Preferred\">SCA10</ElementValue><XRef Type=\"MIM\" ID=\"603516\" DB=\"OMIM\" /><XRef ID=\"10474\" DB=\"Office of Rare Diseases\" /></Symbol><AttributeSet><Attribute Type=\"public definition\">Spinocerebellar ataxia type 10 (SCA10) is characterized by slowly progressive cerebellar ataxia that usually starts as poor balance and unsteady gait, followed by upper-limb ataxia, scanning dysarthria, and dysphagia. Abnormal tracking eye movements are common. Recurrent seizures after the onset of gait ataxia have been reported with variable frequencies among different families. Some individuals have cognitive dysfunction, behavioral disturbances, mood disorders, mild pyramidal signs, and peripheral neuropathy. Age of onset ranges from 12 to 48 years.</Attribute><XRef ID=\"NBK1175\" DB=\"GeneReviews\" /></AttributeSet><Citation Type=\"review\" Abbrev=\"GeneReviews\"><ID Source=\"PubMed\">20301354</ID><ID Source=\"BookShelf\">NBK1175</ID></Citation><Citation Type=\"review\" Abbrev=\"GeneReviews\"><ID Source=\"PubMed\">20301317</ID><ID Source=\"BookShelf\">NBK1138</ID></Citation><Citation Type=\"practice guideline\" Abbrev=\"EFNS, 2010\"><ID Source=\"PubMed\">20050888</ID></Citation><XRef ID=\"C1963674\" DB=\"MedGen\" /><XRef ID=\"98761\" DB=\"Orphanet\" /><XRef Type=\"MIM\" ID=\"603516\" DB=\"OMIM\" /></Trait></TraitSet></ReferenceClinVarAssertion><ClinVarAssertion ID=\"21204\"><ClinVarSubmissionID localKey=\"611150.0001_SPINOCEREBELLAR ATAXIA 10\" submitter=\"OMIM\" submitterDate=\"2007-08-20\" title=\"ATXN10, (ATTCT)n EXPANSION _SPINOCEREBELLAR ATAXIA 10\" /><ClinVarAccession Acc=\"SCV000021204\" Version=\"2\" Type=\"SCV\" OrgID=\"3\" OrganizationCategory=\"resource\" OrgType=\"primary\" DateUpdated=\"2019-05-02\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2018-11-07\"><ReviewStatus>no assertion criteria provided</ReviewStatus><Description>Benign</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ExternalID DB=\"OMIM\" ID=\"611150.0001\" Type=\"Allelic variant\" /><ObservedIn><Sample><Origin>germline</Origin><Species>human</Species><AffectedStatus>not provided</AffectedStatus></Sample><Method><MethodType>literature only</MethodType></Method><ObservedData><Attribute Type=\"Description\">In all affected members of 5 Mexican families with SCA10 (603516), Matsuura et al. (2000) detected expansion of a pentanucleotide (ATTCT) repeat in intron 9 of the ATXN10 gene. There was an inverse correlation between the expansion size, up to 22.5 kb larger than the normal allele, and the age of onset. Analysis of 562 chromosomes from unaffected individuals of various ethnic origins, including 242 chromosomes from Mexicans, showed a range of 10 to 22 ATTCT repeats with no evidence of expansions. The data indicated that the ATXN10 intronic ATTCT pentanucleotide repeat in SCA10 patients is unstable and represented the largest microsatellite expansion found to that time in the human genome.</Attribute><Citation><ID Source=\"PubMed\">11017075</ID></Citation><XRef DB=\"OMIM\" ID=\"603516\" Type=\"MIM\" /></ObservedData><ObservedData><Attribute Type=\"Description\">In a multigenerational study, Matsuura et al. (2004) demonstrated that (1) the expanded ATTCT repeats are highly unstable when paternally transmitted, whereas maternal transmission results in significantly smaller changes in repeat size; (2) blood leukocytes, lymphoblastoid cells, buccal cells, and sperm have a variable degree of mosaicism in ATTCT expansion; (3) the length of the expanded repeat was not observed to change in individuals over a 5-year period; and (4) clinically determined anticipation is sometimes associated with intergenerational contraction rather than expansion of the ATTCT repeat.</Attribute><Citation><ID Source=\"PubMed\">15127363</ID></Citation></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><Name><ElementValue Type=\"Preferred\">ATXN10, (ATTCT)n EXPANSION</ElementValue></Name><AttributeSet><Attribute Type=\"NonHGVS\">(ATTCT)n EXPANSION</Attribute></AttributeSet><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">ATXN10</ElementValue></Symbol></MeasureRelationship><XRef DB=\"OMIM\" ID=\"611150.0001\" Type=\"Allelic variant\" /></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">SPINOCEREBELLAR ATAXIA 10</ElementValue></Name></Trait></TraitSet></ClinVarAssertion><ClinVarAssertion ID=\"92367\"><ClinVarSubmissionID localKey=\"NM_013236.2:c.1173+54822_1173+54826ATTCT(10_32)_NBK1175\" submitter=\"GeneReviews\" submitterDate=\"2013-01-08\" title=\"NM_013236.2:c.1173+54822_1173+54826ATTCT(10_32) and Spinocerebellar Ataxia Type10\" /><ClinVarAccession Acc=\"SCV000055894\" Version=\"1\" Type=\"SCV\" OrgID=\"500062\" OrganizationCategory=\"resource\" OrgType=\"primary\" DateUpdated=\"2019-07-02\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2012-09-20\"><ReviewStatus>no assertion criteria provided</ReviewStatus><Description>non-pathogenic</Description><Comment Type=\"ConvertedByNCBI\">Converted during submission to Benign.</Comment></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ExternalID DB=\"GeneReviews\" ID=\"NBK1175\" /><ObservedIn><Sample><Origin>not provided</Origin><Species>human</Species><AffectedStatus>not provided</AffectedStatus></Sample><Method><Purpose>Assert pathogenicity</Purpose><MethodType>curation</MethodType></Method><ObservedData><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><AttributeSet><Attribute Type=\"HGVS\">NM_013236.2:c.1173+54822_1173+54826ATTCT(10_32)</Attribute></AttributeSet></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">Spinocerebellar Ataxia Type10</ElementValue></Name><XRef DB=\"GeneReviews\" ID=\"NBK1175\" /><XRef DB=\"OMIM\" ID=\"603516\" Type=\"MIM Number\" /></Trait></TraitSet></ClinVarAssertion></ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000001373.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\" xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n<ClinVarSet ID=\"47065424\"><RecordStatus>current</RecordStatus><Title>NM_001174089.2(SLC4A11):c.2019-16_2019-6delinsGGCCGGCCGG AND Corneal endothelial dystrophy</Title><ReferenceClinVarAssertion DateCreated=\"2012-08-13\" DateLastUpdated=\"2019-11-02\" ID=\"58876\"><ClinVarAccession Acc=\"RCV000001373\" Version=\"3\" Type=\"RCV\" DateUpdated=\"2019-11-02\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2008-03-01\"><ReviewStatus>no assertion criteria provided</ReviewStatus><Description>Pathogenic</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>not provided</AffectedStatus></Sample><Method><MethodType>literature only</MethodType></Method><ObservedData ID=\"47899162\"><Attribute Type=\"Description\">In a consanguineous family in India, Vithana et al. (2006) found that corneal endothelial dystrophy (CHED; 217700) cosegregated with a deletion-insertion mutation in intron 15 of the SLC4A11 gene that inactivated the splice acceptor site.</Attribute><Citation Type=\"general\"><ID Source=\"PubMed\">18024964</ID></Citation></ObservedData></ObservedIn><MeasureSet Type=\"Variant\" ID=\"1310\" Acc=\"VCV000001310\" Version=\"1\"><Measure Type=\"Indel\" ID=\"16349\"><Name><ElementValue Type=\"Preferred\">NM_001174089.2(SLC4A11):c.2019-16_2019-6delinsGGCCGGCCGG</ElementValue></Name><AttributeSet><Attribute Accession=\"NM_001363745\" Version=\"2\" Change=\"c.1905-16_1905-6delinsGGCCGGCCGG\" Type=\"HGVS, coding, RefSeq\">NM_001363745.2:c.1905-16_1905-6delinsGGCCGGCCGG</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_001174089\" Version=\"2\" Change=\"c.2019-16_2019-6delinsGGCCGGCCGG\" Type=\"HGVS, coding, RefSeq\">NM_001174089.2:c.2019-16_2019-6delinsGGCCGGCCGG</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_001174090\" Version=\"1\" Change=\"c.2148-16_2148-6delinsGGCCGGCCGG\" Type=\"HGVS, coding, RefSeq\">NM_001174090.1:c.2148-16_2148-6delinsGGCCGGCCGG</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NG_017072\" Version=\"1\" Change=\"g.15215_15225delinsGGCCGGCCGG\" Type=\"HGVS, genomic, RefSeqGene\">NG_017072.1:g.15215_15225delinsGGCCGGCCGG</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NG_012093\" Version=\"2\" Change=\"g.25151_25161delinsCCGGCCGGCC\" Type=\"HGVS, genomic, RefSeqGene\">NG_012093.2:g.25151_25161delinsCCGGCCGGCC</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000020\" Version=\"11\" Change=\"g.3229017_3229027delinsCCGGCCGGCC\" Type=\"HGVS, genomic, top level\" integerValue=\"38\">NC_000020.11:g.3229017_3229027delinsCCGGCCGGCC</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000020\" Version=\"11\" Change=\"g.3229017_3229027delGCAGACGGGCAinsCCGGCCGGCC\" Type=\"HGVS, genomic, top level, other\" integerValue=\"38\">NC_000020.11:g.3229017_3229027delGCAGACGGGCAinsCCGGCCGGCC</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000020\" Version=\"10\" Change=\"g.3209663_3209673delinsCCGGCCGGCC\" Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">NC_000020.10:g.3209663_3209673delinsCCGGCCGGCC</Attribute></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">intron variant</Attribute><XRef ID=\"SO:0001627\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_001174089.2:c.2019-16_2019-6delinsGGCCGGCCGG\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">intron variant</Attribute><XRef ID=\"SO:0001627\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_001174090.1:c.2148-16_2148-6delinsGGCCGGCCGG\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">intron variant</Attribute><XRef ID=\"SO:0001627\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_001363745.2:c.1905-16_1905-6delinsGGCCGGCCGG\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"nucleotide change\">IVS15AS, -6, DEL/INS</Attribute></AttributeSet><CytogeneticLocation>20p13</CytogeneticLocation><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"20\" Accession=\"NC_000020.11\" start=\"3229017\" stop=\"3229027\" display_start=\"3229017\" display_stop=\"3229027\" variantLength=\"1\" positionVCF=\"3229017\" referenceAlleleVCF=\"GCAGACGGGCA\" alternateAlleleVCF=\"CCGGCCGGCC\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"20\" Accession=\"NC_000020.10\" start=\"3209663\" stop=\"3209673\" display_start=\"3209663\" display_stop=\"3209673\" variantLength=\"1\" positionVCF=\"3209663\" referenceAlleleVCF=\"GCAGACGGGCA\" alternateAlleleVCF=\"CCGGCCGGCC\" /><MeasureRelationship Type=\"within single gene\"><Name><ElementValue Type=\"Preferred\">solute carrier family 4 member 11</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">SLC4A11</ElementValue></Symbol><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"20\" Accession=\"NC_000020.11\" start=\"3227417\" stop=\"3241484\" display_start=\"3227417\" display_stop=\"3241484\" Strand=\"-\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"20\" Accession=\"NC_000020.10\" start=\"3208062\" stop=\"3219886\" display_start=\"3208062\" display_stop=\"3219886\" variantLength=\"11825\" Strand=\"-\" /><XRef ID=\"83959\" DB=\"Gene\" /><XRef Type=\"MIM\" ID=\"610206\" DB=\"OMIM\" /><XRef ID=\"HGNC:16438\" DB=\"HGNC\" /></MeasureRelationship><XRef Type=\"Allelic variant\" ID=\"610206.0007\" DB=\"OMIM\" /><XRef Type=\"rs\" ID=\"869320617\" DB=\"dbSNP\" /><Comment DataSource=\"NCBI curation\" Type=\"public\">NCBI staff reviewed the sequence information reported in PubMed 16767101 Supplementary Fig. 3 to determine the location of this allele on the current reference sequence.</Comment></Measure><Name><ElementValue Type=\"Preferred\">NM_001174089.2(SLC4A11):c.2019-16_2019-6delinsGGCCGGCCGG</ElementValue></Name><XRef ID=\"CA354154\" DB=\"ClinGen\" /></MeasureSet><TraitSet Type=\"Disease\" ID=\"25737\"><Trait ID=\"34501\" Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">Corneal endothelial dystrophy</ElementValue><XRef Type=\"Phenotypic series\" ID=\"PS121700\" DB=\"OMIM\" /></Name><Symbol><ElementValue Type=\"Alternate\">CHED</ElementValue><XRef Type=\"MIM\" ID=\"121700\" DB=\"OMIM\" /></Symbol><XRef ID=\"C0544008\" DB=\"MedGen\" /></Trait></TraitSet></ReferenceClinVarAssertion><ClinVarAssertion ID=\"21523\"><ClinVarSubmissionID localKey=\"610206.0007_CORNEAL ENDOTHELIAL DYSTROPHY\" submitter=\"OMIM\" submitterDate=\"2016-12-29\" title=\"SLC4A11, IVS15AS, -6, DEL/INS_CORNEAL ENDOTHELIAL DYSTROPHY\" /><ClinVarAccession Acc=\"SCV000021523\" Version=\"3\" Type=\"SCV\" OrgID=\"3\" OrganizationCategory=\"resource\" OrgType=\"primary\" DateUpdated=\"2019-03-31\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2008-03-01\"><ReviewStatus>no assertion criteria provided</ReviewStatus><Description>Pathogenic</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ExternalID DB=\"OMIM\" ID=\"610206.0007\" Type=\"Allelic variant\" /><ObservedIn><Sample><Origin>germline</Origin><Species>human</Species><AffectedStatus>not provided</AffectedStatus></Sample><Method><MethodType>literature only</MethodType></Method><ObservedData><Attribute Type=\"Description\">In a consanguineous family in India, Vithana et al. (2006) found that corneal endothelial dystrophy (CHED; 217700) cosegregated with a deletion-insertion mutation in intron 15 of the SLC4A11 gene that inactivated the splice acceptor site.</Attribute><Citation><ID Source=\"PubMed\">18024964</ID></Citation><XRef DB=\"OMIM\" ID=\"217700\" Type=\"MIM\" /></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><Name><ElementValue Type=\"Preferred\">SLC4A11, IVS15AS, -6, DEL/INS</ElementValue></Name><AttributeSet><Attribute Type=\"NonHGVS\">IVS15AS, -6, DEL/INS</Attribute></AttributeSet><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">SLC4A11</ElementValue></Symbol></MeasureRelationship><XRef DB=\"OMIM\" ID=\"610206.0007\" Type=\"Allelic variant\" /></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">CORNEAL ENDOTHELIAL DYSTROPHY</ElementValue></Name></Trait></TraitSet></ClinVarAssertion></ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000001752.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\" xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n<ClinVarSet ID=\"47065757\"><RecordStatus>current</RecordStatus><Title>NM_000528.4(MAN2B1):c.215A&gt;T (p.His72Leu) AND Deficiency of alpha-mannosidase</Title><ReferenceClinVarAssertion DateCreated=\"2012-08-13\" DateLastUpdated=\"2019-11-02\" ID=\"59255\"><ClinVarAccession Acc=\"RCV000001752\" Version=\"3\" Type=\"RCV\" DateUpdated=\"2019-11-02\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2012-06-07\"><ReviewStatus>no assertion criteria provided</ReviewStatus><Description>Conflicting interpretations of pathogenicity</Description><Explanation DataSource=\"ClinVar\" Type=\"public\">Pathogenic(1);Uncertain significance(1)</Explanation></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>yes</AffectedStatus></Sample><Method><MethodType>literature only</MethodType></Method><ObservedData ID=\"47899427\"><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>not provided</AffectedStatus></Sample><Method><MethodType>literature only</MethodType></Method><ObservedData ID=\"47900897\"><Attribute Type=\"Description\">In 2 sibs with alpha-mannosidosis (MANSA; 248500), born of consanguineous parents, Nilssen et al. (1997) identified a homozygous 212A-T transversion in exon 2 of the MANB gene, resulting in a his71-to-leu (H71L) substitution. Residue his71 is conserved among lysosomal alpha-mannosidases from several species. The sibs were thought to be mildly affected and residual acidic alpha-mannosidase activity of 20% of normal was detected in the patient's fibroblasts, according to the report of this family by Bach et al. (1978). Nevertheless, the patients showed vacuolated leukocytes and fibroblasts consistent with the disease phenotype. The authors suggested that mutant mannosidase enzymes, even though containing residual activity upon testing at the appropriate pH, may be mislocalized to nonlysosomal compartments and therefore functionally inactive.</Attribute><Citation Type=\"general\"><ID Source=\"PubMed\">724292</ID></Citation><Citation Type=\"general\"><ID Source=\"PubMed\">9158146</ID></Citation></ObservedData><ObservedData ID=\"47900897\"><Attribute Type=\"Description\">Gotoda et al. (1998) identified the same mutation, which they designated HIS72LEU in keeping with the codon numbering system of Wakamatsu et al. (1997). The patient, represented by cell line GM2051, was one of the patients reported by Nilssen et al. (1997).</Attribute><Citation Type=\"general\"><ID Source=\"PubMed\">9158146</ID></Citation><Citation Type=\"general\"><ID Source=\"PubMed\">9370301</ID></Citation><Citation Type=\"general\"><ID Source=\"PubMed\">9758606</ID></Citation></ObservedData></ObservedIn><MeasureSet Type=\"Variant\" ID=\"1684\" Acc=\"VCV000001684\" Version=\"1\"><Measure Type=\"single nucleotide variant\" ID=\"16723\"><Name><ElementValue Type=\"Preferred\">NM_000528.4(MAN2B1):c.215A&gt;T (p.His72Leu)</ElementValue></Name><AttributeSet><Attribute Accession=\"NM_000528\" Version=\"4\" Change=\"c.215A&gt;T\" Type=\"HGVS, coding, RefSeq\">NM_000528.4:c.215A&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_001173498\" Version=\"1\" Change=\"c.215A&gt;T\" Type=\"HGVS, coding, RefSeq\">NM_001173498.1:c.215A&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NG_015814\" Version=\"1\" Change=\"g.3947T&gt;A\" Type=\"HGVS, genomic, RefSeqGene\">NG_015814.1:g.3947T&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NG_008318\" Version=\"1\" Change=\"g.6028A&gt;T\" Type=\"HGVS, genomic, RefSeqGene\">NG_008318.1:g.6028A&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000019\" Version=\"10\" Change=\"g.12665750T&gt;A\" Type=\"HGVS, genomic, top level\" integerValue=\"38\">NC_000019.10:g.12665750T&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000019\" Version=\"9\" Change=\"g.12776564T&gt;A\" Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">NC_000019.9:g.12776564T&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_000528\" Version=\"3\" Change=\"c.215A&gt;T\" Type=\"HGVS, previous\">NM_000528.3:c.215A&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"O00754\" Change=\"p.His72Leu\" Type=\"HGVS, protein\">O00754:p.His72Leu</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_000519\" Version=\"2\" Change=\"p.His72Leu\" Type=\"HGVS, protein, RefSeq\">NP_000519.2:p.His72Leu</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_000519\" Version=\"2\" Change=\"p.His72Leu\" Type=\"HGVS, protein, RefSeq\">NP_000519.2:p.His72Leu</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_000519\" Version=\"2\" Change=\"p.His72Leu\" Type=\"HGVS, protein, RefSeq\">NP_000519.2:p.His72Leu</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_001166969\" Version=\"1\" Change=\"p.His72Leu\" Type=\"HGVS, protein, RefSeq\">NP_001166969.1:p.His72Leu</Attribute></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">missense variant</Attribute><XRef ID=\"SO:0001583\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_000528.4:c.215A&gt;T\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">missense variant</Attribute><XRef ID=\"SO:0001583\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_001173498.1:c.215A&gt;T\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"ProteinChange1LetterCode\">H71L</Attribute></AttributeSet><AttributeSet><Attribute Type=\"ProteinChange1LetterCode\">H72L</Attribute></AttributeSet><AttributeSet><Attribute Type=\"ProteinChange3LetterCode\">HIS71LEU</Attribute></AttributeSet><CytogeneticLocation>19p13.13</CytogeneticLocation><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"19\" Accession=\"NC_000019.10\" start=\"12665750\" stop=\"12665750\" display_start=\"12665750\" display_stop=\"12665750\" variantLength=\"1\" positionVCF=\"12665750\" referenceAlleleVCF=\"T\" alternateAlleleVCF=\"A\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"19\" Accession=\"NC_000019.9\" start=\"12776564\" stop=\"12776564\" display_start=\"12776564\" display_stop=\"12776564\" variantLength=\"1\" positionVCF=\"12776564\" referenceAlleleVCF=\"T\" alternateAlleleVCF=\"A\" /><MeasureRelationship Type=\"within single gene\"><Name><ElementValue Type=\"Preferred\">mannosidase alpha class 2B member 1</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">MAN2B1</ElementValue></Symbol><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"19\" Accession=\"NC_000019.10\" start=\"12646508\" stop=\"12666777\" display_start=\"12646508\" display_stop=\"12666777\" Strand=\"-\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"19\" Accession=\"NC_000019.9\" start=\"12757321\" stop=\"12777590\" display_start=\"12757321\" display_stop=\"12777590\" variantLength=\"20270\" Strand=\"-\" /><XRef ID=\"4125\" DB=\"Gene\" /><XRef Type=\"MIM\" ID=\"609458\" DB=\"OMIM\" /><XRef ID=\"HGNC:6826\" DB=\"HGNC\" /></MeasureRelationship><XRef ID=\"O00754#VAR_003338\" DB=\"UniProtKB\" /><XRef Type=\"Allelic variant\" ID=\"609458.0001\" DB=\"OMIM\" /><XRef Type=\"rs\" ID=\"387906261\" DB=\"dbSNP\" /></Measure><Name><ElementValue Type=\"Preferred\">NM_000528.4(MAN2B1):c.215A&gt;T (p.His72Leu)</ElementValue></Name><XRef ID=\"CA339900\" DB=\"ClinGen\" /></MeasureSet><TraitSet Type=\"Disease\" ID=\"428\"><Trait ID=\"2728\" Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">Deficiency of alpha-mannosidase</ElementValue><XRef ID=\"Alpha-Mannosidosis+Type+1/332\" DB=\"Genetic Alliance\" /><XRef ID=\"124466001\" DB=\"SNOMED CT\" /></Name><Name><ElementValue Type=\"Alternate\">Alpha-Mannosidosis</ElementValue><XRef ID=\"NBK1396\" DB=\"GeneReviews\" /></Name><Symbol><ElementValue Type=\"Preferred\">MANSA</ElementValue><XRef Type=\"MIM\" ID=\"248500\" DB=\"OMIM\" /></Symbol><AttributeSet><Attribute Type=\"public definition\">Alpha-mannosidosis encompasses a continuum of clinical findings from mild to severe. Three major clinical subtypes have been suggested: A mild form recognized after age ten years with absence of skeletal abnormalities, myopathy, and slow progression (type 1). A moderate form recognized before age ten years with presence of skeletal abnormalities, myopathy, and slow progression (type 2). A severe form manifested as prenatal loss or early death from progressive central nervous system involvement or infection (type 3). Individuals with a milder phenotype have mild-to-moderate intellectual disability, impaired hearing, characteristic coarse features, clinical or radiographic skeletal abnormalities, immunodeficiency, and primary central nervous system disease – mainly cerebellar involvement causing ataxia. Periods of psychiatric symptoms are common. Associated medical problems can include corneal opacities, hepatosplenomegaly, aseptic destructive arthritis, and metabolic myopathy. Alpha-mannosidosis is insidiously progressive; some individuals may live into the sixth decade.</Attribute><XRef ID=\"NBK1396\" DB=\"GeneReviews\" /></AttributeSet><Citation Type=\"review\" Abbrev=\"GeneReviews\"><ID Source=\"PubMed\">20301570</ID><ID Source=\"BookShelf\">NBK1396</ID></Citation><Citation Type=\"Translational/Evidence-based\" Abbrev=\"EuroGentest, 2011\"><ID Source=\"PubMed\">21368911</ID></Citation><XRef ID=\"C0024748\" DB=\"MedGen\" /><XRef ID=\"61\" DB=\"Orphanet\" /><XRef Type=\"MIM\" ID=\"248500\" DB=\"OMIM\" /></Trait></TraitSet></ReferenceClinVarAssertion><ClinVarAssertion ID=\"21908\"><ClinVarSubmissionID localKey=\"609458.0001_ALPHA-MANNOSIDOSIS\" submitter=\"OMIM\" submitterDate=\"2013-04-04\" title=\"MAN2B1, HIS71LEU_ALPHA-MANNOSIDOSIS\" /><ClinVarAccession Acc=\"SCV000021908\" Version=\"1\" Type=\"SCV\" OrgID=\"3\" OrganizationCategory=\"resource\" OrgType=\"primary\" DateUpdated=\"2019-03-31\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"1998-10-01\"><ReviewStatus>no assertion criteria provided</ReviewStatus><Description>Pathogenic</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ExternalID DB=\"OMIM\" ID=\"609458.0001\" Type=\"Allelic variant\" /><ObservedIn><Sample><Origin>germline</Origin><Species>human</Species><AffectedStatus>not provided</AffectedStatus></Sample><Method><MethodType>literature only</MethodType></Method><ObservedData><Attribute Type=\"Description\">In 2 sibs with alpha-mannosidosis (MANSA; 248500), born of consanguineous parents, Nilssen et al. (1997) identified a homozygous 212A-T transversion in exon 2 of the MANB gene, resulting in a his71-to-leu (H71L) substitution. Residue his71 is conserved among lysosomal alpha-mannosidases from several species. The sibs were thought to be mildly affected and residual acidic alpha-mannosidase activity of 20% of normal was detected in the patient's fibroblasts, according to the report of this family by Bach et al. (1978). Nevertheless, the patients showed vacuolated leukocytes and fibroblasts consistent with the disease phenotype. The authors suggested that mutant mannosidase enzymes, even though containing residual activity upon testing at the appropriate pH, may be mislocalized to nonlysosomal compartments and therefore functionally inactive.</Attribute><Citation><ID Source=\"PubMed\">9158146</ID></Citation><Citation><ID Source=\"PubMed\">724292</ID></Citation><XRef DB=\"OMIM\" ID=\"248500\" Type=\"MIM\" /></ObservedData><ObservedData><Attribute Type=\"Description\">Gotoda et al. (1998) identified the same mutation, which they designated HIS72LEU in keeping with the codon numbering system of Wakamatsu et al. (1997). The patient, represented by cell line GM2051, was one of the patients reported by Nilssen et al. (1997).</Attribute><Citation><ID Source=\"PubMed\">9758606</ID></Citation><Citation><ID Source=\"PubMed\">9370301</ID></Citation><Citation><ID Source=\"PubMed\">9158146</ID></Citation></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><Name><ElementValue Type=\"Preferred\">MAN2B1, HIS71LEU</ElementValue></Name><AttributeSet><Attribute Type=\"NonHGVS\">HIS71LEU</Attribute></AttributeSet><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">MAN2B1</ElementValue></Symbol></MeasureRelationship><XRef DB=\"OMIM\" ID=\"609458.0001\" Type=\"Allelic variant\" /></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">ALPHA-MANNOSIDOSIS</ElementValue></Name></Trait></TraitSet></ClinVarAssertion><ClinVarAssertion ID=\"453008\"><ClinVarSubmissionID localKey=\"NM_000528.3:c.215A&gt;T|OMIM:248500\" submitter=\"ClinVar Staff, National Center for Biotechnology Information (NCBI)\" submitterDate=\"2015-08-25\" /><ClinVarAccession Acc=\"SCV000243988\" Version=\"1\" Type=\"SCV\" OrgID=\"500139\" OrganizationCategory=\"resource\" OrgType=\"primary\" DateUpdated=\"2019-03-31\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2012-06-07\"><ReviewStatus>no assertion criteria provided</ReviewStatus><Description>Uncertain significance</Description><Citation><ID Source=\"PubMed\">9758606</ID></Citation><Citation><URL>http://web.expasy.org/variant_pages/VAR_003338.html</URL></Citation></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>yes</AffectedStatus></Sample><Method><MethodType>literature only</MethodType></Method><ObservedData><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><Name><ElementValue Type=\"Alternate\">p.His72Leu</ElementValue></Name><AttributeSet><Attribute Type=\"HGVS\">NM_000528.3:c.215A&gt;T</Attribute></AttributeSet><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">MAN2B1</ElementValue></Symbol></MeasureRelationship></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">Deficiency of alpha-mannosidase</ElementValue></Name><XRef DB=\"OMIM\" ID=\"248500\" Type=\"MIM\" /></Trait></TraitSet></ClinVarAssertion></ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000003254.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\" xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n<ClinVarSet ID=\"47067068\"><RecordStatus>current</RecordStatus><Title>NM_144701.3(IL23R):c.1142G&gt;A (p.Arg381Gln) AND Inflammatory bowel disease 17, protection against</Title><ReferenceClinVarAssertion DateCreated=\"2012-08-13\" DateLastUpdated=\"2019-11-02\" ID=\"60757\"><ClinVarAccession Acc=\"RCV000003254\" Version=\"6\" Type=\"RCV\" DateUpdated=\"2019-11-02\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2010-04-01\"><ReviewStatus>no assertion criteria provided</ReviewStatus><Description>protective</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>not provided</AffectedStatus></Sample><Method><MethodType>literature only</MethodType></Method><ObservedData ID=\"47901593\"><Attribute Type=\"Description\">Inflammatory Bowel Disease</Attribute></ObservedData><ObservedData ID=\"47901593\"><Attribute Type=\"Description\">Using a large-scale genomewide association study, Duerr et al. (2006) identified an uncommon coding mutation in the IL23R gene, a 1142G-A transition, resulting in an arg381-to-gln (R381Q) substitution (rs11209026), that confers strong protection against Crohn disease (see 612261). This SNP was identified in 1 cohort and replicated in 2 others. The gln381 allele was found in 7% of non-Jewish controls and 1.9% of non-Jewish patients with ileal Crohn disease.</Attribute><Citation Type=\"general\"><ID Source=\"PubMed\">17068223</ID></Citation></ObservedData><ObservedData ID=\"47901593\"><Attribute Type=\"Description\">Libioulle et al. (2007) performed a genomewide association study with more than 300,000 SNPs in 547 Caucasian patients with Crohn disease from Belgium and 928 controls and found the strongest association (p less than 10(-9)) with markers of the IL23R gene, including rs11209026, which corresponds to the R381Q substitution. The association with R381Q was replicated in 1,255 additional Caucasian CD patients and 550 controls (combined p = 2.2 x 10(-18)).</Attribute><Citation Type=\"general\"><ID Source=\"PubMed\">17447842</ID></Citation></ObservedData><ObservedData ID=\"47901593\"><Attribute Type=\"Description\">Raelson et al. (2007) analyzed the IL23R region in 477 parent-proband trios with Crohn disease from the Quebec Founder Population and 2 independent German samples involving 521 affected-child trios, 752 cases, and 828 independent controls. The authors found that the R381Q SNP did not occur consistently in all risk and protective haplotypes, and concluded that it is highly unlikely that R381Q fully explains the functional role of this gene in CD etiology.</Attribute><Citation Type=\"general\"><ID Source=\"PubMed\">17804789</ID></Citation></ObservedData><ObservedData ID=\"47901593\"><Attribute Type=\"Description\">In a Caucasian German IBD cohort that included 833 CD patients, 456 patients with ulcerative colitis (UC), and 1,381 unrelated controls, Glas et al. (2007) confirmed the association between rs11209026 and protection against CD (OR, 0.43; p = 8.04 x 10(-8)) and also found a significant association with protection from UC (OR, 0.70; p = 0.00361).</Attribute><Citation Type=\"general\"><ID Source=\"PubMed\">17786191</ID></Citation></ObservedData><ObservedData ID=\"47901593\"><Attribute Type=\"Description\">In a study involving 1,841 ulcerative colitis cases and 1,470 controls, Fisher et al. (2008) found the strongest signal at rs11209026 (p = 8.0 x 10(-8); OR, 0.53), with evidence that additional independent IL23R variants also contribute to ulcerative colitis risk.</Attribute><Citation Type=\"general\"><ID Source=\"PubMed\">18438406</ID></Citation></ObservedData><ObservedData ID=\"47901593\"><Attribute Type=\"Description\">McGovern et al. (2010) combined new data from 2 genomewide association studies of ulcerative colitis involving 266,047 SNPs and performed a metaanalysis with previously published data (Silverberg et al., 2009), thus bringing together a discovery set of 2,693 European UC patients and 6,791 controls; the top results from the metaanalysis were then independently replicated with 2,009 additional European UC cases and 1,580 controls. McGovern et al. (2010) confirmed association with UC at rs11209026 (combined p = 1.9 x 10(-13)).</Attribute><Citation Type=\"general\"><ID Source=\"PubMed\">19122664</ID></Citation><Citation Type=\"general\"><ID Source=\"PubMed\">20228799</ID></Citation></ObservedData><ObservedData ID=\"47901593\"><Attribute Type=\"Description\">Psoriasis</Attribute></ObservedData><ObservedData ID=\"47901593\"><Attribute Type=\"Description\">Capon et al. (2007) reported a significant association between the R381Q variant and protection against psoriasis (PSORS7; 605606) among 318 British patients with the disorder. The findings were replicated in a second group of 519 British patients. Together, the association yielded an odds ratio of 0.49 (p = 0.00014), with the gln381 allele offering protection from the disease. Capon et al. (2007) noted that the arg381 residue is highly conserved among higher vertebrates and is located within the binding domain for JAK2 kinase, which is the first mediator of the IL23R signaling cascade (Parham et al., 2002).</Attribute><Citation Type=\"general\"><ID Source=\"PubMed\">12023369</ID></Citation><Citation Type=\"general\"><ID Source=\"PubMed\">17587057</ID></Citation></ObservedData></ObservedIn><MeasureSet Type=\"Variant\" ID=\"3108\" Acc=\"VCV000003108\" Version=\"2\"><Measure Type=\"single nucleotide variant\" ID=\"18147\"><Name><ElementValue Type=\"Preferred\">NM_144701.3(IL23R):c.1142G&gt;A (p.Arg381Gln)</ElementValue></Name><AttributeSet><Attribute Accession=\"NM_144701\" Version=\"3\" Change=\"c.1142G&gt;A\" Type=\"HGVS, coding, RefSeq\">NM_144701.3:c.1142G&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NG_011498\" Version=\"1\" Change=\"g.78790G&gt;A\" Type=\"HGVS, genomic, RefSeqGene\">NG_011498.1:g.78790G&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000001\" Version=\"11\" Change=\"g.67240275G&gt;A\" Type=\"HGVS, genomic, top level\" integerValue=\"38\">NC_000001.11:g.67240275G&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000001\" Version=\"10\" Change=\"g.67705958G&gt;A\" Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">NC_000001.10:g.67705958G&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"Q5VWK5\" Change=\"p.Arg381Gln\" Type=\"HGVS, protein\">Q5VWK5:p.Arg381Gln</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_653302\" Version=\"2\" Change=\"p.Arg381Gln\" Type=\"HGVS, protein, RefSeq\">NP_653302.2:p.Arg381Gln</Attribute></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">missense variant</Attribute><XRef ID=\"SO:0001583\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_144701.3:c.1142G&gt;A\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"ProteinChange1LetterCode\">R381Q</Attribute></AttributeSet><AttributeSet><Attribute Type=\"ProteinChange3LetterCode\">ARG381GLN</Attribute></AttributeSet><AlleleFrequencyList><AlleleFrequency Value=\"0.02276\" Source=\"1000 Genomes Project\" /><AlleleFrequency Value=\"0.04221\" Source=\"Exome Aggregation Consortium (ExAC)\" /><AlleleFrequency Value=\"0.04539\" Source=\"The Genome Aggregation Database (gnomAD)\" /><AlleleFrequency Value=\"0.04220\" Source=\"The Genome Aggregation Database (gnomAD), exomes\" /><AlleleFrequency Value=\"0.04453\" Source=\"Trans-Omics for Precision Medicine (TOPMed)\" /></AlleleFrequencyList><GlobalMinorAlleleFrequency Value=\"0.02276\" Source=\"1000 Genomes Project\" MinorAllele=\"A\" /><CytogeneticLocation>1p31.3</CytogeneticLocation><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"1\" Accession=\"NC_000001.11\" start=\"67240275\" stop=\"67240275\" display_start=\"67240275\" display_stop=\"67240275\" variantLength=\"1\" positionVCF=\"67240275\" referenceAlleleVCF=\"G\" alternateAlleleVCF=\"A\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"1\" Accession=\"NC_000001.10\" start=\"67705958\" stop=\"67705958\" display_start=\"67705958\" display_stop=\"67705958\" variantLength=\"1\" positionVCF=\"67705958\" referenceAlleleVCF=\"G\" alternateAlleleVCF=\"A\" /><MeasureRelationship Type=\"within single gene\"><Name><ElementValue Type=\"Preferred\">interleukin 23 receptor</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">IL23R</ElementValue></Symbol><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"1\" Accession=\"NC_000001.11\" start=\"67138639\" stop=\"67259979\" display_start=\"67138639\" display_stop=\"67259979\" Strand=\"+\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"1\" Accession=\"NC_000001.10\" start=\"67632168\" stop=\"67725661\" display_start=\"67632168\" display_stop=\"67725661\" variantLength=\"93494\" Strand=\"+\" /><XRef ID=\"149233\" DB=\"Gene\" /><XRef Type=\"MIM\" ID=\"607562\" DB=\"OMIM\" /><XRef ID=\"HGNC:19100\" DB=\"HGNC\" /></MeasureRelationship><XRef ID=\"Q5VWK5#VAR_029754\" DB=\"UniProtKB\" /><XRef Type=\"Allelic variant\" ID=\"607562.0001\" DB=\"OMIM\" /><XRef Type=\"rs\" ID=\"11209026\" DB=\"dbSNP\" /></Measure><Name><ElementValue Type=\"Preferred\">NM_144701.3(IL23R):c.1142G&gt;A (p.Arg381Gln)</ElementValue></Name><XRef ID=\"CA116015\" DB=\"ClinGen\" /></MeasureSet><TraitSet Type=\"Disease\" ID=\"793\"><Trait ID=\"9850\" Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">Inflammatory bowel disease 17, protection against</ElementValue></Name><XRef ID=\"C4017090\" DB=\"MedGen\" /></Trait></TraitSet></ReferenceClinVarAssertion><ClinVarAssertion ID=\"23412\"><ClinVarSubmissionID localKey=\"607562.0001_INFLAMMATORY BOWEL DISEASE 17, PROTECTION AGAINST\" submitter=\"OMIM\" submitterDate=\"2019-07-23\" title=\"IL23R, ARG381GLN_INFLAMMATORY BOWEL DISEASE 17, PROTECTION AGAINST\" /><ClinVarAccession Acc=\"SCV000023412\" Version=\"5\" Type=\"SCV\" OrgID=\"3\" OrganizationCategory=\"resource\" OrgType=\"primary\" DateUpdated=\"2019-07-27\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2010-04-01\"><ReviewStatus>no assertion criteria provided</ReviewStatus><Description>protective</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ExternalID DB=\"OMIM\" ID=\"607562.0001\" Type=\"Allelic variant\" /><ObservedIn><Sample><Origin>germline</Origin><Species>human</Species><AffectedStatus>not provided</AffectedStatus></Sample><Method><MethodType>literature only</MethodType></Method><ObservedData><Attribute Type=\"Description\">Inflammatory Bowel Disease</Attribute></ObservedData><ObservedData><Attribute Type=\"Description\">Using a large-scale genomewide association study, Duerr et al. (2006) identified an uncommon coding mutation in the IL23R gene, a 1142G-A transition, resulting in an arg381-to-gln (R381Q) substitution (rs11209026), that confers strong protection against Crohn disease (see 612261). This SNP was identified in 1 cohort and replicated in 2 others. The gln381 allele was found in 7% of non-Jewish controls and 1.9% of non-Jewish patients with ileal Crohn disease.</Attribute><Citation><ID Source=\"PubMed\">17068223</ID></Citation><XRef DB=\"OMIM\" ID=\"612261\" Type=\"MIM\" /></ObservedData><ObservedData><Attribute Type=\"Description\">Libioulle et al. (2007) performed a genomewide association study with more than 300,000 SNPs in 547 Caucasian patients with Crohn disease from Belgium and 928 controls and found the strongest association (p less than 10(-9)) with markers of the IL23R gene, including rs11209026, which corresponds to the R381Q substitution. The association with R381Q was replicated in 1,255 additional Caucasian CD patients and 550 controls (combined p = 2.2 x 10(-18)).</Attribute><Citation><ID Source=\"PubMed\">17447842</ID></Citation></ObservedData><ObservedData><Attribute Type=\"Description\">Raelson et al. (2007) analyzed the IL23R region in 477 parent-proband trios with Crohn disease from the Quebec Founder Population and 2 independent German samples involving 521 affected-child trios, 752 cases, and 828 independent controls. The authors found that the R381Q SNP did not occur consistently in all risk and protective haplotypes, and concluded that it is highly unlikely that R381Q fully explains the functional role of this gene in CD etiology.</Attribute><Citation><ID Source=\"PubMed\">17804789</ID></Citation></ObservedData><ObservedData><Attribute Type=\"Description\">In a Caucasian German IBD cohort that included 833 CD patients, 456 patients with ulcerative colitis (UC), and 1,381 unrelated controls, Glas et al. (2007) confirmed the association between rs11209026 and protection against CD (OR, 0.43; p = 8.04 x 10(-8)) and also found a significant association with protection from UC (OR, 0.70; p = 0.00361).</Attribute><Citation><ID Source=\"PubMed\">17786191</ID></Citation></ObservedData><ObservedData><Attribute Type=\"Description\">In a study involving 1,841 ulcerative colitis cases and 1,470 controls, Fisher et al. (2008) found the strongest signal at rs11209026 (p = 8.0 x 10(-8); OR, 0.53), with evidence that additional independent IL23R variants also contribute to ulcerative colitis risk.</Attribute><Citation><ID Source=\"PubMed\">18438406</ID></Citation></ObservedData><ObservedData><Attribute Type=\"Description\">McGovern et al. (2010) combined new data from 2 genomewide association studies of ulcerative colitis involving 266,047 SNPs and performed a metaanalysis with previously published data (Silverberg et al., 2009), thus bringing together a discovery set of 2,693 European UC patients and 6,791 controls; the top results from the metaanalysis were then independently replicated with 2,009 additional European UC cases and 1,580 controls. McGovern et al. (2010) confirmed association with UC at rs11209026 (combined p = 1.9 x 10(-13)).</Attribute><Citation><ID Source=\"PubMed\">20228799</ID></Citation><Citation><ID Source=\"PubMed\">19122664</ID></Citation></ObservedData><ObservedData><Attribute Type=\"Description\">Psoriasis</Attribute></ObservedData><ObservedData><Attribute Type=\"Description\">Capon et al. (2007) reported a significant association between the R381Q variant and protection against psoriasis (PSORS7; 605606) among 318 British patients with the disorder. The findings were replicated in a second group of 519 British patients. Together, the association yielded an odds ratio of 0.49 (p = 0.00014), with the gln381 allele offering protection from the disease. Capon et al. (2007) noted that the arg381 residue is highly conserved among higher vertebrates and is located within the binding domain for JAK2 kinase, which is the first mediator of the IL23R signaling cascade (Parham et al., 2002).</Attribute><Citation><ID Source=\"PubMed\">17587057</ID></Citation><Citation><ID Source=\"PubMed\">12023369</ID></Citation><XRef DB=\"OMIM\" ID=\"605606\" Type=\"MIM\" /></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><Name><ElementValue Type=\"Preferred\">IL23R, ARG381GLN</ElementValue></Name><AttributeSet><Attribute Type=\"NonHGVS\">ARG381GLN</Attribute></AttributeSet><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">IL23R</ElementValue></Symbol></MeasureRelationship><XRef DB=\"OMIM\" ID=\"607562.0001\" Type=\"Allelic variant\" /></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">INFLAMMATORY BOWEL DISEASE 17, PROTECTION AGAINST</ElementValue></Name></Trait></TraitSet></ClinVarAssertion></ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000005426.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\" xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n<ClinVarSet ID=\"47068954\"><RecordStatus>current</RecordStatus><Title>NM_001271604.2(JPH3):c.431_433CTG(6_27) (p.Ala150_Ala157del) AND Huntington disease-like 2</Title><ReferenceClinVarAssertion DateCreated=\"2012-08-13\" DateLastUpdated=\"2019-11-02\" ID=\"62929\"><ClinVarAccession Acc=\"RCV000005426\" Version=\"3\" Type=\"RCV\" DateUpdated=\"2019-11-02\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2001-12-01\"><ReviewStatus>no assertion criteria provided</ReviewStatus><Description>Pathogenic</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>not provided</AffectedStatus></Sample><Method><MethodType>literature only</MethodType></Method><ObservedData ID=\"47904718\"><Attribute Type=\"Description\">In affected members of an African American family with Huntington disease-like-2 (HDL2; 606438), Holmes et al. (2001) demonstrated a CAG/CTG repeat expansion of about 40 or more triplets in an alternatively spliced exon of the JPH3 gene. Holmes et al. (2001) found the same mutation in 4 other African American individuals from the southeastern United States, each of whom had a familial Huntington disease-like disorder and had tested negative for the Huntington disease mutation in the IT15 gene (613004).</Attribute><Citation Type=\"general\"><ID Source=\"PubMed\">11694876</ID></Citation></ObservedData><ObservedData ID=\"47904718\"><Attribute Type=\"Description\">Among 74 patients with an HD-like phenotype but without CAG repeat expansions in the IT15 gene, Stevanin et al. (2002) identified 1 patient with a pure uninterrupted 50 CAG/CTG repeat in the JPH3 gene. The patient was a 44-year-old Moroccan woman with subcortical dementia, mild choreic movements, and atrophy of the cerebral cortex.</Attribute><Citation Type=\"general\"><ID Source=\"PubMed\">11914418</ID></Citation></ObservedData><ObservedData ID=\"47904718\"><Attribute Type=\"Description\">In 3 members of a family with HLD2, originally reported by Walker et al. (2002) as having choreoacanthocytosis, Walker et al. (2003) identified trinucleotide repeat expansions of 51, 58, and 57 triplets in the JPH3 gene. The authors identified affected members of 2 other families with trinucleotide repeats in the JPH3 gene.</Attribute><Citation Type=\"general\"><ID Source=\"PubMed\">11940688</ID></Citation><Citation Type=\"general\"><ID Source=\"PubMed\">14557581</ID></Citation></ObservedData></ObservedIn><MeasureSet Type=\"Variant\" ID=\"5119\" Acc=\"VCV000005119\" Version=\"1\"><Measure Type=\"Microsatellite\" ID=\"20158\"><Name><ElementValue Type=\"Preferred\">NM_001271604.2(JPH3):c.431_433CTG(6_27) (p.Ala150_Ala157del)</ElementValue></Name><Name><ElementValue Type=\"Alternate\">JPH3, CAG(n) EXPANSION</ElementValue><XRef Type=\"Allelic variant\" ID=\"605268.0001\" DB=\"OMIM\" /></Name><AttributeSet><Attribute Accession=\"NM_001271604\" Version=\"2\" Change=\"c.431CTG(&gt;40)\" Type=\"HGVS, non-validated\">NM_001271604.2:c.431CTG(&gt;40)</Attribute></AttributeSet><CytogeneticLocation>16q24.2</CytogeneticLocation><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"16\" Accession=\"NC_000016.10\" start=\"87604288\" stop=\"87604290\" display_start=\"87604288\" display_stop=\"87604290\" positionVCF=\"87604287\" referenceAlleleVCF=\"CCTGCTGCTGCTGCTGCTGCTGCTG\" alternateAlleleVCF=\"C\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"16\" Accession=\"NC_000016.9\" start=\"87637894\" stop=\"87637896\" display_start=\"87637894\" display_stop=\"87637896\" positionVCF=\"87637893\" referenceAlleleVCF=\"CCTGCTGCTGCTGCTGCTGCTGCTG\" alternateAlleleVCF=\"C\" /><MeasureRelationship Type=\"within multiple genes by overlap\"><Name><ElementValue Type=\"Preferred\">junctophilin 3 repeat instability region</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">LOC109029536</ElementValue></Symbol><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"16\" Accession=\"NC_000016.10\" start=\"87604283\" stop=\"87604329\" display_start=\"87604283\" display_stop=\"87604329\" Strand=\"+\" /><XRef ID=\"109029536\" DB=\"Gene\" /></MeasureRelationship><MeasureRelationship Type=\"within multiple genes by overlap\"><Name><ElementValue Type=\"Preferred\">junctophilin 3</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">JPH3</ElementValue></Symbol><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"16\" Accession=\"NC_000016.10\" start=\"87601835\" stop=\"87698156\" display_start=\"87601835\" display_stop=\"87698156\" Strand=\"+\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"16\" Accession=\"NC_000016.9\" start=\"87636498\" stop=\"87731761\" display_start=\"87636498\" display_stop=\"87731761\" variantLength=\"95264\" Strand=\"+\" /><XRef ID=\"57338\" DB=\"Gene\" /><XRef Type=\"MIM\" ID=\"605268\" DB=\"OMIM\" /><XRef ID=\"HGNC:14203\" DB=\"HGNC\" /></MeasureRelationship><XRef Type=\"Allelic variant\" ID=\"605268.0001\" DB=\"OMIM\" /><XRef Type=\"rs\" ID=\"71156237\" DB=\"dbSNP\" /></Measure><Name><ElementValue Type=\"Preferred\">NM_001271604.2(JPH3):c.431_433CTG(6_27) (p.Ala150_Ala157del)</ElementValue></Name></MeasureSet><TraitSet Type=\"Disease\" ID=\"1416\"><Trait ID=\"5270\" Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">Huntington disease-like 2</ElementValue><XRef ID=\"Huntington+disease-like+2/8568\" DB=\"Genetic Alliance\" /></Name><Symbol><ElementValue Type=\"Preferred\">HDL2</ElementValue><XRef Type=\"MIM\" ID=\"606438\" DB=\"OMIM\" /></Symbol><AttributeSet><Attribute Type=\"public definition\">Huntington disease-like 2 (HDL2) typically presents in midlife with a relentless progressive triad of movement, emotional, and cognitive abnormalities which lead to death within ten to 20 years. HDL2 cannot be differentiated from Huntington disease clinically. Neurologic abnormalities include chorea, hypokinesia (rigidity, bradykinesia), dysarthria, and hyperreflexia in the later stages of the disease. There is a strong correlation between the duration of the disease and the progression of the motor and cognitive disorder.</Attribute><XRef ID=\"NBK1529\" DB=\"GeneReviews\" /></AttributeSet><Citation Type=\"review\" Abbrev=\"GeneReviews\"><ID Source=\"PubMed\">20301701</ID><ID Source=\"BookShelf\">NBK1529</ID></Citation><XRef ID=\"C1847987\" DB=\"MedGen\" /><XRef ID=\"98934\" DB=\"Orphanet\" /><XRef Type=\"MIM\" ID=\"606438\" DB=\"OMIM\" /></Trait></TraitSet></ReferenceClinVarAssertion><ClinVarAssertion ID=\"25608\"><ClinVarSubmissionID localKey=\"605268.0001_HUNTINGTON DISEASE-LIKE 2\" submitter=\"OMIM\" submitterDate=\"2018-08-16\" title=\"JPH3, CAG(n) EXPANSION_HUNTINGTON DISEASE-LIKE 2\" /><ClinVarAccession Acc=\"SCV000025608\" Version=\"2\" Type=\"SCV\" OrgID=\"3\" OrganizationCategory=\"resource\" OrgType=\"primary\" DateUpdated=\"2019-03-31\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2001-12-01\"><ReviewStatus>no assertion criteria provided</ReviewStatus><Description>Pathogenic</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ExternalID DB=\"OMIM\" ID=\"605268.0001\" Type=\"Allelic variant\" /><ObservedIn><Sample><Origin>germline</Origin><Species>human</Species><AffectedStatus>not provided</AffectedStatus></Sample><Method><MethodType>literature only</MethodType></Method><ObservedData><Attribute Type=\"Description\">In affected members of an African American family with Huntington disease-like-2 (HDL2; 606438), Holmes et al. (2001) demonstrated a CAG/CTG repeat expansion of about 40 or more triplets in an alternatively spliced exon of the JPH3 gene. Holmes et al. (2001) found the same mutation in 4 other African American individuals from the southeastern United States, each of whom had a familial Huntington disease-like disorder and had tested negative for the Huntington disease mutation in the IT15 gene (613004).</Attribute><Citation><ID Source=\"PubMed\">11694876</ID></Citation><XRef DB=\"OMIM\" ID=\"606438\" Type=\"MIM\" /><XRef DB=\"OMIM\" ID=\"613004\" Type=\"MIM\" /></ObservedData><ObservedData><Attribute Type=\"Description\">Among 74 patients with an HD-like phenotype but without CAG repeat expansions in the IT15 gene, Stevanin et al. (2002) identified 1 patient with a pure uninterrupted 50 CAG/CTG repeat in the JPH3 gene. The patient was a 44-year-old Moroccan woman with subcortical dementia, mild choreic movements, and atrophy of the cerebral cortex.</Attribute><Citation><ID Source=\"PubMed\">11914418</ID></Citation></ObservedData><ObservedData><Attribute Type=\"Description\">In 3 members of a family with HLD2, originally reported by Walker et al. (2002) as having choreoacanthocytosis, Walker et al. (2003) identified trinucleotide repeat expansions of 51, 58, and 57 triplets in the JPH3 gene. The authors identified affected members of 2 other families with trinucleotide repeats in the JPH3 gene.</Attribute><Citation><ID Source=\"PubMed\">11940688</ID></Citation><Citation><ID Source=\"PubMed\">14557581</ID></Citation></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><Name><ElementValue Type=\"Preferred\">JPH3, CAG(n) EXPANSION</ElementValue></Name><AttributeSet><Attribute Type=\"NonHGVS\">CAG(n) EXPANSION</Attribute></AttributeSet><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">JPH3</ElementValue></Symbol></MeasureRelationship><XRef DB=\"OMIM\" ID=\"605268.0001\" Type=\"Allelic variant\" /></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">HUNTINGTON DISEASE-LIKE 2</ElementValue></Name></Trait></TraitSet></ClinVarAssertion></ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000007484.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\"\n            xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n    <ClinVarSet ID=\"47070679\">\n        <RecordStatus>current</RecordStatus>\n        <Title>NM_007262.4(PARK7):c.[-24+75_-24+92dup;487G&gt;A] AND Parkinson disease 7</Title>\n        <ReferenceClinVarAssertion DateCreated=\"2012-08-13\" DateLastUpdated=\"2019-11-02\" ID=\"64987\">\n            <ClinVarAccession Acc=\"RCV000007484\" Version=\"2\" Type=\"RCV\" DateUpdated=\"2019-11-02\"/>\n            <RecordStatus>current</RecordStatus>\n            <ClinicalSignificance DateLastEvaluated=\"2005-11-01\">\n                <ReviewStatus>no assertion criteria provided</ReviewStatus>\n                <Description>Pathogenic</Description>\n            </ClinicalSignificance>\n            <Assertion Type=\"variation to disease\"/>\n            <ObservedIn>\n                <Sample>\n                    <Origin>germline</Origin>\n                    <Species TaxonomyId=\"9606\">human</Species>\n                    <AffectedStatus>not provided</AffectedStatus>\n                </Sample>\n                <Method>\n                    <MethodType>literature only</MethodType>\n                </Method>\n                <ObservedData ID=\"47906392\">\n                    <Attribute Type=\"Description\">In 3 affected sibs from a consanguineous southern Italian family with\n                        early-onset parkinsonism (606324), Annesi et al. (2005) identified double homozygosity for\n                        mutations in the DJ1 gene. One was a 3385G-A transition in exon 7, resulting in a glu163-to-lys\n                        (E163K) substitution, and the other was an 18-bp duplication (168-185dup) in the promoter\n                        region. Age at disease onset was 36, 35, and 24 years, respectively. Severe amyotrophic lateral\n                        sclerosis and cognitive impairment were prominent in 1 sib, while the other 2 had prominent\n                        parkinsonism and behavioral abnormalities.\n                    </Attribute>\n                    <Citation Type=\"general\">\n                        <ID Source=\"PubMed\">16240358</ID>\n                    </Citation>\n                </ObservedData>\n            </ObservedIn>\n            <MeasureSet Type=\"Haplotype\" ID=\"446717\" Acc=\"VCV000446717\" Version=\"1\">\n                <Measure Type=\"single nucleotide variant\" ID=\"22107\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">NM_007262.5(PARK7):c.487G&gt;A (p.Glu163Lys)</ElementValue>\n                    </Name>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_001123377\" Version=\"1\" Change=\"c.487G&gt;A\"\n                                   Type=\"HGVS, coding, RefSeq\">NM_001123377.1:c.487G&gt;A\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_007262\" Version=\"5\" Change=\"c.487G&gt;A\" Type=\"HGVS, coding, RefSeq\">\n                            NM_007262.5:c.487G&gt;A\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NG_008271\" Version=\"1\" Change=\"g.28318G&gt;A\"\n                                   Type=\"HGVS, genomic, RefSeqGene\">NG_008271.1:g.28318G&gt;A\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NC_000001\" Version=\"11\" Change=\"g.7984971G&gt;A\"\n                                   Type=\"HGVS, genomic, top level\" integerValue=\"38\">NC_000001.11:g.7984971G&gt;A\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NC_000001\" Version=\"10\" Change=\"g.8045031G&gt;A\"\n                                   Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">NC_000001.10:g.8045031G&gt;A\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"Q99497\" Change=\"p.Glu163Lys\" Type=\"HGVS, protein\">Q99497:p.Glu163Lys\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NP_001116849\" Version=\"1\" Change=\"p.Glu163Lys\"\n                                   Type=\"HGVS, protein, RefSeq\">NP_001116849.1:p.Glu163Lys\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NP_009193\" Version=\"2\" Change=\"p.Glu163Lys\" Type=\"HGVS, protein, RefSeq\">\n                            NP_009193.2:p.Glu163Lys\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"MolecularConsequence\">missense variant</Attribute>\n                        <XRef ID=\"SO:0001583\" DB=\"Sequence Ontology\"/>\n                        <XRef ID=\"NM_001123377.1:c.487G&gt;A\" DB=\"RefSeq\"/>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"MolecularConsequence\">missense variant</Attribute>\n                        <XRef ID=\"SO:0001583\" DB=\"Sequence Ontology\"/>\n                        <XRef ID=\"NM_007262.5:c.487G&gt;A\" DB=\"RefSeq\"/>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"ProteinChange1LetterCode\">E163K</Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"ProteinChange3LetterCode\">GLU163LYS</Attribute>\n                    </AttributeSet>\n                    <AlleleFrequencyList>\n                        <AlleleFrequency Value=\"0.00002\" Source=\"Exome Aggregation Consortium (ExAC)\"/>\n                        <AlleleFrequency Value=\"0.00001\" Source=\"The Genome Aggregation Database (gnomAD), exomes\"/>\n                    </AlleleFrequencyList>\n                    <CytogeneticLocation>1p36.23</CytogeneticLocation>\n                    <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\"\n                                      AssemblyStatus=\"current\" Chr=\"1\" Accession=\"NC_000001.11\" start=\"7984971\"\n                                      stop=\"7984971\" display_start=\"7984971\" display_stop=\"7984971\" variantLength=\"1\"\n                                      positionVCF=\"7984971\" referenceAlleleVCF=\"G\" alternateAlleleVCF=\"A\"/>\n                    <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\"\n                                      AssemblyStatus=\"previous\" Chr=\"1\" Accession=\"NC_000001.10\" start=\"8045031\"\n                                      stop=\"8045031\" display_start=\"8045031\" display_stop=\"8045031\" variantLength=\"1\"\n                                      positionVCF=\"8045031\" referenceAlleleVCF=\"G\" alternateAlleleVCF=\"A\"/>\n                    <MeasureRelationship Type=\"within single gene\">\n                        <Name>\n                            <ElementValue Type=\"Preferred\">Parkinsonism associated deglycase</ElementValue>\n                        </Name>\n                        <Symbol>\n                            <ElementValue Type=\"Preferred\">PARK7</ElementValue>\n                        </Symbol>\n                        <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\"\n                                          AssemblyStatus=\"current\" Chr=\"1\" Accession=\"NC_000001.11\" start=\"7961654\"\n                                          stop=\"7985505\" display_start=\"7961654\" display_stop=\"7985505\" Strand=\"+\"/>\n                        <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\"\n                                          AssemblyStatus=\"previous\" Chr=\"1\" Accession=\"NC_000001.10\" start=\"8021713\"\n                                          stop=\"8045341\" display_start=\"8021713\" display_stop=\"8045341\"\n                                          variantLength=\"23629\" Strand=\"+\"/>\n                        <XRef ID=\"11315\" DB=\"Gene\"/>\n                        <XRef Type=\"MIM\" ID=\"602533\" DB=\"OMIM\"/>\n                        <XRef ID=\"HGNC:16369\" DB=\"HGNC\"/>\n                    </MeasureRelationship>\n                    <XRef ID=\"Q99497#VAR_034801\" DB=\"UniProtKB\"/>\n                    <XRef Type=\"Allelic variant\" ID=\"602533.0006\" DB=\"OMIM\"/>\n                    <XRef Type=\"rs\" ID=\"74315354\" DB=\"dbSNP\"/>\n                </Measure>\n                <Measure Type=\"Duplication\" ID=\"75266\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">NM_007262.5(PARK7):c.-24+75_-24+92dup</ElementValue>\n                    </Name>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_001123377\" Version=\"1\" Change=\"c.-24+133_-24+150dup\"\n                                   Type=\"HGVS, coding, RefSeq\">NM_001123377.1:c.-24+133_-24+150dup\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_007262\" Version=\"5\" Change=\"c.-24+75_-24+92dup\"\n                                   Type=\"HGVS, coding, RefSeq\">NM_007262.5:c.-24+75_-24+92dup\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NG_008271\" Version=\"1\" Change=\"g.5215_5232dup\"\n                                   Type=\"HGVS, genomic, RefSeqGene\">NG_008271.1:g.5215_5232dup\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NC_000001\" Version=\"11\" Change=\"g.7961868_7961885dup\"\n                                   Type=\"HGVS, genomic, top level\" integerValue=\"38\">NC_000001.11:g.7961868_7961885dup\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NC_000001\" Version=\"10\" Change=\"g.8021928_8021945dup\"\n                                   Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">\n                            NC_000001.10:g.8021928_8021945dup\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_007262\" Version=\"4\" Change=\"c.-24+75_-24+92dup\" Type=\"HGVS, previous\">\n                            NM_007262.4:c.-24+75_-24+92dup\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"MolecularConsequence\">intron variant</Attribute>\n                        <XRef ID=\"SO:0001627\" DB=\"Sequence Ontology\"/>\n                        <XRef ID=\"NM_001123377.1:c.-24+133_-24+150dup\" DB=\"RefSeq\"/>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"MolecularConsequence\">intron variant</Attribute>\n                        <XRef ID=\"SO:0001627\" DB=\"Sequence Ontology\"/>\n                        <XRef ID=\"NM_007262.5:c.-24+75_-24+92dup\" DB=\"RefSeq\"/>\n                    </AttributeSet>\n                    <GlobalMinorAlleleFrequency Value=\"0.09285\" Source=\"1000 Genomes Project\"\n                                                MinorAllele=\"GTGCTGGACGGTGTCCC\"/>\n                    <CytogeneticLocation>1p36.23</CytogeneticLocation>\n                    <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\"\n                                      AssemblyStatus=\"current\" Chr=\"1\" Accession=\"NC_000001.11\" start=\"7961868\"\n                                      stop=\"7961885\" display_start=\"7961868\" display_stop=\"7961885\" variantLength=\"18\"\n                                      positionVCF=\"7961850\" referenceAlleleVCF=\"G\"\n                                      alternateAlleleVCF=\"GGTGCTGGACGGTGTCCCT\"/>\n                    <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\"\n                                      AssemblyStatus=\"previous\" Chr=\"1\" Accession=\"NC_000001.10\" start=\"8021928\"\n                                      stop=\"8021945\" display_start=\"8021928\" display_stop=\"8021945\" variantLength=\"18\"\n                                      positionVCF=\"8021910\" referenceAlleleVCF=\"G\"\n                                      alternateAlleleVCF=\"GGTGCTGGACGGTGTCCCT\"/>\n                    <MeasureRelationship Type=\"within single gene\">\n                        <Name>\n                            <ElementValue Type=\"Preferred\">Parkinsonism associated deglycase</ElementValue>\n                        </Name>\n                        <Symbol>\n                            <ElementValue Type=\"Preferred\">PARK7</ElementValue>\n                        </Symbol>\n                        <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\"\n                                          AssemblyStatus=\"current\" Chr=\"1\" Accession=\"NC_000001.11\" start=\"7961654\"\n                                          stop=\"7985505\" display_start=\"7961654\" display_stop=\"7985505\" Strand=\"+\"/>\n                        <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\"\n                                          AssemblyStatus=\"previous\" Chr=\"1\" Accession=\"NC_000001.10\" start=\"8021713\"\n                                          stop=\"8045341\" display_start=\"8021713\" display_stop=\"8045341\"\n                                          variantLength=\"23629\" Strand=\"+\"/>\n                        <XRef ID=\"11315\" DB=\"Gene\"/>\n                        <XRef Type=\"MIM\" ID=\"602533\" DB=\"OMIM\"/>\n                        <XRef ID=\"HGNC:16369\" DB=\"HGNC\"/>\n                    </MeasureRelationship>\n                    <Citation Type=\"general\">\n                        <ID Source=\"PubMed\">16240358</ID>\n                    </Citation>\n                    <XRef Type=\"Allelic variant\" ID=\"602533.0006\" DB=\"OMIM\"/>\n                    <XRef Type=\"rs\" ID=\"200968609\" DB=\"dbSNP\"/>\n                </Measure>\n                <Name>\n                    <ElementValue Type=\"Preferred\">NM_007262.4(PARK7):c.[-24+75_-24+92dup;487G&gt;A]</ElementValue>\n                </Name>\n                <Name>\n                    <ElementValue Type=\"Alternate\">PARK7, GLU163LYS AND 18-BP DUP</ElementValue>\n                    <XRef Type=\"Allelic variant\" ID=\"602533.0006\" DB=\"OMIM\"/>\n                </Name>\n                <AttributeSet>\n                    <Attribute Type=\"HGVS, genomic, RefSeqGene\" Change=\"g.[5215_5232dup;28318G&gt;A]\">\n                        NG_008271.1:g.[5215_5232dup;28318G&gt;A]\n                    </Attribute>\n                </AttributeSet>\n                <AttributeSet>\n                    <Attribute Type=\"HGVS, coding, RefSeq\" Change=\"c.[-24+75_-24+92dup;487G&gt;A]\">\n                        NM_007262.4:c.[-24+75_-24+92dup;487G&gt;A]\n                    </Attribute>\n                </AttributeSet>\n                <XRef ID=\"CA030604\" DB=\"ClinGen\"/>\n                <XRef Type=\"Allelic variant\" ID=\"602533.0006\" DB=\"OMIM\"/>\n            </MeasureSet>\n            <TraitSet Type=\"Disease\" ID=\"1946\">\n                <Trait ID=\"3764\" Type=\"Disease\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">Parkinson disease 7</ElementValue>\n                        <XRef ID=\"Parkinson+disease+7/9090\" DB=\"Genetic Alliance\"/>\n                    </Name>\n                    <Name>\n                        <ElementValue Type=\"Alternate\">PARKINSON DISEASE 7, AUTOSOMAL RECESSIVE EARLY-ONSET\n                        </ElementValue>\n                        <XRef Type=\"MIM\" ID=\"606324\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"602533.0001\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"602533.0002\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"602533.0003\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"602533.0004\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"602533.0005\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"602533.0006\" DB=\"OMIM\"/>\n                    </Name>\n                    <Symbol>\n                        <ElementValue Type=\"Preferred\">PARK7</ElementValue>\n                        <XRef Type=\"MIM\" ID=\"606324\" DB=\"OMIM\"/>\n                    </Symbol>\n                    <AttributeSet>\n                        <Attribute Type=\"public definition\">Parkinson disease is a progressive disorder of the nervous\n                            system. The disorder affects several regions of the brain, especially an area called the\n                            substantia nigra that controls balance and movement.Often the first symptom of Parkinson\n                            disease is trembling or shaking (tremor) of a limb, especially when the body is at rest.\n                            Typically, the tremor begins on one side of the body, usually in one hand. Tremors can also\n                            affect the arms, legs, feet, and face. Other characteristic symptoms of Parkinson disease\n                            include rigidity or stiffness of the limbs and torso, slow movement (bradykinesia) or an\n                            inability to move (akinesia), and impaired balance and coordination (postural instability).\n                            These symptoms worsen slowly over time.Parkinson disease can also affect emotions and\n                            thinking ability (cognition). Some affected individuals develop psychiatric conditions such\n                            as depression and visual hallucinations. People with Parkinson disease also have an\n                            increased risk of developing dementia, which is a decline in intellectual functions\n                            including judgment and memory.Generally, Parkinson disease that begins after age 50 is\n                            called late-onset disease. The condition is described as early-onset disease if signs and\n                            symptoms begin before age 50. Early-onset cases that begin before age 20 are sometimes\n                            referred to as juvenile-onset Parkinson disease.\n                        </Attribute>\n                        <XRef ID=\"parkinson-disease\" DB=\"Genetics Home Reference\"/>\n                    </AttributeSet>\n                    <Citation Type=\"review\" Abbrev=\"GeneReviews\">\n                        <ID Source=\"PubMed\">20301402</ID>\n                        <ID Source=\"BookShelf\">NBK1223</ID>\n                    </Citation>\n                    <Citation Type=\"Recommendation\" Abbrev=\"EFNS/MDS-ES, 2013\">\n                        <ID Source=\"PubMed\">23279440</ID>\n                    </Citation>\n                    <XRef ID=\"C1853445\" DB=\"MedGen\"/>\n                    <XRef ID=\"2828\" DB=\"Orphanet\"/>\n                    <XRef Type=\"MIM\" ID=\"606324\" DB=\"OMIM\"/>\n                </Trait>\n            </TraitSet>\n        </ReferenceClinVarAssertion>\n        <ClinVarAssertion ID=\"27684\">\n            <ClinVarSubmissionID localKey=\"602533.0006_PARKINSON DISEASE 7, AUTOSOMAL RECESSIVE EARLY-ONSET\"\n                                 submitter=\"OMIM\" submitterDate=\"2017-12-08\"\n                                 title=\"PARK7, GLU163LYS AND 18-BP DUP_PARKINSON DISEASE 7, AUTOSOMAL RECESSIVE EARLY-ONSET\"/>\n            <ClinVarAccession Acc=\"SCV000027684\" Version=\"2\" Type=\"SCV\" OrgID=\"3\" OrganizationCategory=\"resource\"\n                              OrgType=\"primary\" DateUpdated=\"2019-08-03\"/>\n            <RecordStatus>current</RecordStatus>\n            <ClinicalSignificance DateLastEvaluated=\"2005-11-01\">\n                <ReviewStatus>no assertion criteria provided</ReviewStatus>\n                <Description>Pathogenic</Description>\n            </ClinicalSignificance>\n            <Assertion Type=\"variation to disease\"/>\n            <ExternalID DB=\"OMIM\" ID=\"602533.0006\" Type=\"Allelic variant\"/>\n            <ObservedIn>\n                <Sample>\n                    <Origin>germline</Origin>\n                    <Species>human</Species>\n                    <AffectedStatus>not provided</AffectedStatus>\n                </Sample>\n                <Method>\n                    <MethodType>literature only</MethodType>\n                </Method>\n                <ObservedData>\n                    <Attribute Type=\"Description\">In 3 affected sibs from a consanguineous southern Italian family with\n                        early-onset parkinsonism (606324), Annesi et al. (2005) identified double homozygosity for\n                        mutations in the DJ1 gene. One was a 3385G-A transition in exon 7, resulting in a glu163-to-lys\n                        (E163K) substitution, and the other was an 18-bp duplication (168-185dup) in the promoter\n                        region. Age at disease onset was 36, 35, and 24 years, respectively. Severe amyotrophic lateral\n                        sclerosis and cognitive impairment were prominent in 1 sib, while the other 2 had prominent\n                        parkinsonism and behavioral abnormalities.\n                    </Attribute>\n                    <Citation>\n                        <ID Source=\"PubMed\">16240358</ID>\n                    </Citation>\n                    <XRef DB=\"OMIM\" ID=\"606324\" Type=\"MIM\"/>\n                </ObservedData>\n            </ObservedIn>\n            <MeasureSet Type=\"Variant\">\n                <Measure Type=\"Variation\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">PARK7, GLU163LYS AND 18-BP DUP</ElementValue>\n                    </Name>\n                    <AttributeSet>\n                        <Attribute Type=\"NonHGVS\">GLU163LYS AND 18-BP DUP</Attribute>\n                    </AttributeSet>\n                    <MeasureRelationship Type=\"variant in gene\">\n                        <Symbol>\n                            <ElementValue Type=\"Preferred\">PARK7</ElementValue>\n                        </Symbol>\n                    </MeasureRelationship>\n                    <XRef DB=\"OMIM\" ID=\"602533.0006\" Type=\"Allelic variant\"/>\n                </Measure>\n            </MeasureSet>\n            <TraitSet Type=\"Disease\">\n                <Trait Type=\"Disease\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">PARKINSON DISEASE 7, AUTOSOMAL RECESSIVE EARLY-ONSET\n                        </ElementValue>\n                    </Name>\n                </Trait>\n            </TraitSet>\n        </ClinVarAssertion>\n    </ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000010551.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\"\n            xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n    <ClinVarSet ID=\"47073318\">\n        <RecordStatus>current</RecordStatus>\n        <Title>NM_000451.3(SHOX):c.394C&gt;G (p.Leu132Val) AND Leri Weill dyschondrosteosis</Title>\n        <ReferenceClinVarAssertion DateCreated=\"2012-08-13\" DateLastUpdated=\"2019-11-02\" ID=\"68054\">\n            <ClinVarAccession Acc=\"RCV000010551\" Version=\"3\" Type=\"RCV\" DateUpdated=\"2019-11-02\"/>\n            <RecordStatus>current</RecordStatus>\n            <ClinicalSignificance DateLastEvaluated=\"2000-08-01\">\n                <ReviewStatus>no assertion criteria provided</ReviewStatus>\n                <Description>Pathogenic</Description>\n            </ClinicalSignificance>\n            <Assertion Type=\"variation to disease\"/>\n            <ObservedIn>\n                <Sample>\n                    <Origin>germline</Origin>\n                    <Species TaxonomyId=\"9606\">human</Species>\n                    <AffectedStatus>not provided</AffectedStatus>\n                </Sample>\n                <Method>\n                    <MethodType>literature only</MethodType>\n                </Method>\n                <ObservedData ID=\"47909786\">\n                    <Attribute Type=\"Description\">In a patient with Leri-Weill dyschondrosteosis (127300), Grigelioniene\n                        et al. (2000) identified a 485C-G transversion in the SHOX gene, resulting in a leu132-to-val\n                        amino acid substitution.\n                    </Attribute>\n                    <Citation Type=\"general\">\n                        <ID Source=\"PubMed\">11030412</ID>\n                    </Citation>\n                </ObservedData>\n            </ObservedIn>\n            <MeasureSet Type=\"Variant\" ID=\"9875\" Acc=\"VCV000009875\" Version=\"1\">\n                <Measure Type=\"single nucleotide variant\" ID=\"24914\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">NM_000451.3(SHOX):c.394C&gt;G (p.Leu132Val)</ElementValue>\n                    </Name>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_000451\" Version=\"3\" Change=\"c.394C&gt;G\" Type=\"HGVS, coding, RefSeq\">\n                            NM_000451.3:c.394C&gt;G\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_006883\" Version=\"2\" Change=\"c.394C&gt;G\" Type=\"HGVS, coding, RefSeq\">\n                            NM_006883.2:c.394C&gt;G\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NG_009385\" Version=\"2\" Change=\"g.15391C&gt;G\"\n                                   Type=\"HGVS, genomic, RefSeqGene\">NG_009385.2:g.15391C&gt;G\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NG_046891\" Version=\"1\" Change=\"g.1782C&gt;G\"\n                                   Type=\"HGVS, genomic, RefSeqGene\">NG_046891.1:g.1782C&gt;G\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NC_000023\" Version=\"11\" Change=\"g.634734C&gt;G\"\n                                   Type=\"HGVS, genomic, top level\" integerValue=\"38\">NC_000023.11:g.634734C&gt;G\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NC_000024\" Version=\"10\" Change=\"g.634734C&gt;G\"\n                                   Type=\"HGVS, genomic, top level\" integerValue=\"38\">NC_000024.10:g.634734C&gt;G\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NC_000024\" Version=\"9\" Change=\"g.545469C&gt;G\"\n                                   Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">NC_000024.9:g.545469C&gt;G\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NC_000023\" Version=\"10\" Change=\"g.595469C&gt;G\"\n                                   Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">NC_000023.10:g.595469C&gt;G\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"O15266\" Change=\"p.Leu132Val\" Type=\"HGVS, protein\">O15266:p.Leu132Val\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NP_000442\" Version=\"1\" Change=\"p.Leu132Val\" Type=\"HGVS, protein, RefSeq\">\n                            NP_000442.1:p.Leu132Val\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NP_006874\" Version=\"1\" Change=\"p.Leu132Val\" Type=\"HGVS, protein, RefSeq\">\n                            NP_006874.1:p.Leu132Val\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"MolecularConsequence\">missense variant</Attribute>\n                        <XRef ID=\"SO:0001583\" DB=\"Sequence Ontology\"/>\n                        <XRef ID=\"NM_000451.3:c.394C&gt;G\" DB=\"RefSeq\"/>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"MolecularConsequence\">missense variant</Attribute>\n                        <XRef ID=\"SO:0001583\" DB=\"Sequence Ontology\"/>\n                        <XRef ID=\"NM_006883.2:c.394C&gt;G\" DB=\"RefSeq\"/>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"ProteinChange1LetterCode\">L132V</Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"ProteinChange3LetterCode\">LEU132VAL</Attribute>\n                    </AttributeSet>\n                    <CytogeneticLocation>Xp22.33</CytogeneticLocation>\n                    <CytogeneticLocation>Yp11.2</CytogeneticLocation>\n                    <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\"\n                                      AssemblyStatus=\"current\" Chr=\"X\" Accession=\"NC_000023.11\" start=\"634734\"\n                                      stop=\"634734\" display_start=\"634734\" display_stop=\"634734\" variantLength=\"1\"\n                                      positionVCF=\"634734\" referenceAlleleVCF=\"C\" alternateAlleleVCF=\"G\"/>\n                    <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\"\n                                      AssemblyStatus=\"current\" Chr=\"Y\" Accession=\"NC_000024.10\" start=\"634734\"\n                                      stop=\"634734\" display_start=\"634734\" display_stop=\"634734\" variantLength=\"1\"\n                                      positionVCF=\"634734\" referenceAlleleVCF=\"C\" alternateAlleleVCF=\"G\"/>\n                    <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\"\n                                      AssemblyStatus=\"previous\" Chr=\"Y\" Accession=\"NC_000024.9\" start=\"545469\"\n                                      stop=\"545469\" display_start=\"545469\" display_stop=\"545469\" variantLength=\"1\"\n                                      positionVCF=\"545469\" referenceAlleleVCF=\"C\" alternateAlleleVCF=\"G\"/>\n                    <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\"\n                                      AssemblyStatus=\"previous\" Chr=\"X\" Accession=\"NC_000023.10\" start=\"595469\"\n                                      stop=\"595469\" display_start=\"595469\" display_stop=\"595469\" variantLength=\"1\"\n                                      positionVCF=\"595469\" referenceAlleleVCF=\"C\" alternateAlleleVCF=\"G\"/>\n                    <MeasureRelationship Type=\"within multiple genes by overlap\">\n                        <Name>\n                            <ElementValue Type=\"Preferred\">meiotic recombination hotspot SHOX</ElementValue>\n                        </Name>\n                        <Symbol>\n                            <ElementValue Type=\"Preferred\">LOC107652445</ElementValue>\n                        </Symbol>\n                        <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\"\n                                          AssemblyStatus=\"current\" Chr=\"X\" Accession=\"NC_000023.11\" start=\"633053\"\n                                          stop=\"636275\" display_start=\"633053\" display_stop=\"636275\" Strand=\"+\"/>\n                        <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\"\n                                          AssemblyStatus=\"current\" Chr=\"Y\" Accession=\"NC_000024.10\" start=\"633053\"\n                                          stop=\"636275\" display_start=\"633053\" display_stop=\"636275\" Strand=\"+\"/>\n                        <XRef ID=\"107652445\" DB=\"Gene\"/>\n                    </MeasureRelationship>\n                    <MeasureRelationship Type=\"within multiple genes by overlap\">\n                        <Name>\n                            <ElementValue Type=\"Preferred\">short stature homeobox</ElementValue>\n                        </Name>\n                        <Symbol>\n                            <ElementValue Type=\"Preferred\">SHOX</ElementValue>\n                        </Symbol>\n                        <AttributeSet>\n                            <Attribute dateValue=\"2015-10-15\" Type=\"Haploinsufficiency\">Sufficient evidence for dosage\n                                pathogenicity\n                            </Attribute>\n                            <Citation>\n                                <URL>https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=SHOX</URL>\n                            </Citation>\n                        </AttributeSet>\n                        <AttributeSet>\n                            <Attribute dateValue=\"2015-10-15\" Type=\"Triplosensitivity\">No evidence available</Attribute>\n                            <Citation>\n                                <URL>https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=SHOX</URL>\n                            </Citation>\n                        </AttributeSet>\n                        <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\"\n                                          AssemblyStatus=\"current\" Chr=\"X\" Accession=\"NC_000023.11\" start=\"624344\"\n                                          stop=\"659411\" display_start=\"624344\" display_stop=\"659411\" Strand=\"+\"/>\n                        <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\"\n                                          AssemblyStatus=\"current\" Chr=\"Y\" Accession=\"NC_000024.10\" start=\"624344\"\n                                          stop=\"659411\" display_start=\"624344\" display_stop=\"659411\" Strand=\"+\"/>\n                        <XRef ID=\"6473\" DB=\"Gene\"/>\n                        <XRef Type=\"MIM\" ID=\"312865\" DB=\"OMIM\"/>\n                        <XRef Type=\"MIM\" ID=\"400020\" DB=\"OMIM\"/>\n                        <XRef ID=\"HGNC:10853\" DB=\"HGNC\"/>\n                    </MeasureRelationship>\n                    <XRef ID=\"O15266#VAR_019414\" DB=\"UniProtKB\"/>\n                    <XRef Type=\"Allelic variant\" ID=\"312865.0004\" DB=\"OMIM\"/>\n                    <XRef Type=\"rs\" ID=\"137852554\" DB=\"dbSNP\"/>\n                </Measure>\n                <Name>\n                    <ElementValue Type=\"Preferred\">NM_000451.3(SHOX):c.394C&gt;G (p.Leu132Val)</ElementValue>\n                </Name>\n                <XRef ID=\"CA254914\" DB=\"ClinGen\"/>\n            </MeasureSet>\n            <TraitSet Type=\"Disease\" ID=\"2777\">\n                <Trait ID=\"2677\" Type=\"Disease\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">Leri Weill dyschondrosteosis</ElementValue>\n                        <XRef ID=\"Leri+Weill+dyschondrosteosis/4180\" DB=\"Genetic Alliance\"/>\n                        <XRef ID=\"3224\" DB=\"Office of Rare Diseases\"/>\n                    </Name>\n                    <Symbol>\n                        <ElementValue Type=\"Preferred\">LWD</ElementValue>\n                        <XRef Type=\"MIM\" ID=\"127300\" DB=\"OMIM\"/>\n                        <XRef ID=\"3224\" DB=\"Office of Rare Diseases\"/>\n                    </Symbol>\n                    <Symbol>\n                        <ElementValue Type=\"Alternate\">DCO</ElementValue>\n                        <XRef Type=\"MIM\" ID=\"127300\" DB=\"OMIM\"/>\n                        <XRef ID=\"3224\" DB=\"Office of Rare Diseases\"/>\n                    </Symbol>\n                    <AttributeSet>\n                        <Attribute Type=\"public definition\">The phenotypic spectrum of SHOX deficiency disorders, caused\n                            by haploinsufficiency of the short stature homeobox-containing gene (SHOX), ranges from\n                            Leri-Weill dyschondrosteosis (LWD) at the severe end of the spectrum to nonspecific short\n                            stature at the mild end of the spectrum. In adults with SHOX deficiency, the proportion of\n                            LWD versus short stature without features of LWD is not well defined. In LWD the classic\n                            clinical triad is short stature, mesomelia, and Madelung deformity. Mesomelia, in which the\n                            middle portion of a limb is shortened in relation to the proximal portion, can be evident\n                            first in school-aged children and increases with age in frequency and severity. Madelung\n                            deformity (abnormal alignment of the radius, ulna, and carpal bones at the wrist) typically\n                            develops in mid-to-late childhood and is more common and severe in females. The phenotype of\n                            short stature caused by SHOX deficiency in the absence of mesomelia and Madelung deformity\n                            (called SHOX-deficient short stature in this GeneReview) is highly variable, even within the\n                            same family.\n                        </Attribute>\n                        <XRef ID=\"NBK1215\" DB=\"GeneReviews\"/>\n                    </AttributeSet>\n                    <Citation Type=\"review\" Abbrev=\"GeneReviews\">\n                        <ID Source=\"PubMed\">20301394</ID>\n                        <ID Source=\"BookShelf\">NBK1215</ID>\n                    </Citation>\n                    <XRef ID=\"C0265309\" DB=\"MedGen\"/>\n                    <XRef Type=\"MIM\" ID=\"127300\" DB=\"OMIM\"/>\n                </Trait>\n            </TraitSet>\n        </ReferenceClinVarAssertion>\n        <ClinVarAssertion ID=\"30777\">\n            <ClinVarSubmissionID localKey=\"312865.0004_LERI-WEILL DYSCHONDROSTEOSIS\" submitter=\"OMIM\"\n                                 submitterDate=\"2018-11-07\" title=\"SHOX, LEU132VAL_LERI-WEILL DYSCHONDROSTEOSIS\"/>\n            <ClinVarAccession Acc=\"SCV000030777\" Version=\"2\" Type=\"SCV\" OrgID=\"3\" OrganizationCategory=\"resource\"\n                              OrgType=\"primary\" DateUpdated=\"2019-03-31\"/>\n            <RecordStatus>current</RecordStatus>\n            <ClinicalSignificance DateLastEvaluated=\"2000-08-01\">\n                <ReviewStatus>no assertion criteria provided</ReviewStatus>\n                <Description>Pathogenic</Description>\n            </ClinicalSignificance>\n            <Assertion Type=\"variation to disease\"/>\n            <ExternalID DB=\"OMIM\" ID=\"312865.0004\" Type=\"Allelic variant\"/>\n            <ObservedIn>\n                <Sample>\n                    <Origin>germline</Origin>\n                    <Species>human</Species>\n                    <AffectedStatus>not provided</AffectedStatus>\n                </Sample>\n                <Method>\n                    <MethodType>literature only</MethodType>\n                </Method>\n                <ObservedData>\n                    <Attribute Type=\"Description\">In a patient with Leri-Weill dyschondrosteosis (127300), Grigelioniene\n                        et al. (2000) identified a 485C-G transversion in the SHOX gene, resulting in a leu132-to-val\n                        amino acid substitution.\n                    </Attribute>\n                    <Citation>\n                        <ID Source=\"PubMed\">11030412</ID>\n                    </Citation>\n                    <XRef DB=\"OMIM\" ID=\"127300\" Type=\"MIM\"/>\n                </ObservedData>\n            </ObservedIn>\n            <MeasureSet Type=\"Variant\">\n                <Measure Type=\"Variation\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">SHOX, LEU132VAL</ElementValue>\n                    </Name>\n                    <AttributeSet>\n                        <Attribute Type=\"NonHGVS\">LEU132VAL</Attribute>\n                    </AttributeSet>\n                    <MeasureRelationship Type=\"variant in gene\">\n                        <Symbol>\n                            <ElementValue Type=\"Preferred\">SHOX</ElementValue>\n                        </Symbol>\n                    </MeasureRelationship>\n                    <XRef DB=\"OMIM\" ID=\"312865.0004\" Type=\"Allelic variant\"/>\n                </Measure>\n            </MeasureSet>\n            <TraitSet Type=\"Disease\">\n                <Trait Type=\"Disease\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">LERI-WEILL DYSCHONDROSTEOSIS</ElementValue>\n                    </Name>\n                </Trait>\n            </TraitSet>\n        </ClinVarAssertion>\n    </ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000016673.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\"\n            xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n    <ClinVarSet ID=\"49090582\">\n        <RecordStatus>current</RecordStatus>\n        <Title>NM_000518.4(HBB):c.126_129delCTTT (p.Phe42fs) AND beta^0^ Thalassemia</Title>\n        <ReferenceClinVarAssertion DateCreated=\"2012-08-13\" DateLastUpdated=\"2019-12-23\" ID=\"74176\">\n            <ClinVarAccession Acc=\"RCV000016673\" Version=\"29\" Type=\"RCV\" DateUpdated=\"2019-12-23\"/>\n            <RecordStatus>current</RecordStatus>\n            <ClinicalSignificance DateLastEvaluated=\"2002-09-28\">\n                <ReviewStatus>no assertion criteria provided</ReviewStatus>\n                <Description>Pathogenic</Description>\n            </ClinicalSignificance>\n            <Assertion Type=\"variation to disease\"/>\n            <ObservedIn>\n                <Sample>\n                    <Origin>germline</Origin>\n                    <Species TaxonomyId=\"9606\">human</Species>\n                    <AffectedStatus>not provided</AffectedStatus>\n                </Sample>\n                <Method>\n                    <MethodType>literature only</MethodType>\n                </Method>\n                <ObservedData ID=\"49933449\">\n                    <Attribute Type=\"Description\">Frameshift, -4, codons 41/42, TTCTTT to TT, was found in an Asian\n                        Indian with beta-zero-thalassemia (613985) by Kazazian et al. (1984) and in Chinese by Kimura et\n                        al. (1983).\n                    </Attribute>\n                    <Citation Type=\"general\">\n                        <ID Source=\"PubMed\">6826539</ID>\n                    </Citation>\n                    <Citation Type=\"general\">\n                        <CitationText>Kazazian, H. H., Jr. Personal Communication. 1982. Baltimore, Md.</CitationText>\n                    </Citation>\n                </ObservedData>\n                <ObservedData ID=\"49933449\">\n                    <Attribute Type=\"Description\">Lau et al. (1997) found that the deletion of CTTT at codons 41/42\n                        accounted for 40% of all beta-thalassemia alleles in Hong Kong. Chiu et al. (2002) designed\n                        allele-specific primers and a fluorescent probe for detection of this mutation in the HBB gene\n                        from maternal plasma by real-time PCR. Using this method, they showed that beta-thalassemia\n                        major could be excluded from fetal inheritance by demonstrating absence of inheritance of the\n                        paternally transmitted mutation. By studying circulating fetal DNA in the maternal plasma for\n                        this mutation, Chiu et al. (2002) added beta-thalassemia to the list of disorders that could be\n                        prenatally diagnosed using this noninvasive method, which had previously demonstrated usefulness\n                        in diagnosing sex-linked diseases (Costa et al., 2002) and fetal rhesus D status (Lo et al.,\n                        1998).\n                    </Attribute>\n                    <Citation Type=\"general\">\n                        <ID Source=\"PubMed\">12000828</ID>\n                    </Citation>\n                    <Citation Type=\"general\">\n                        <ID Source=\"PubMed\">12383672</ID>\n                    </Citation>\n                    <Citation Type=\"general\">\n                        <ID Source=\"PubMed\">9113933</ID>\n                    </Citation>\n                    <Citation Type=\"general\">\n                        <ID Source=\"PubMed\">9845707</ID>\n                    </Citation>\n                </ObservedData>\n            </ObservedIn>\n            <MeasureSet Type=\"Variant\" ID=\"15417\" Acc=\"VCV000015417\" Version=\"5\">\n                <Measure Type=\"Deletion\" ID=\"30456\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">NM_000518.4(HBB):c.126_129delCTTT (p.Phe42fs)</ElementValue>\n                    </Name>\n                    <Name>\n                        <ElementValue Type=\"Alternate\">41/42-TTCT</ElementValue>\n                    </Name>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_000518\" Version=\"4\" Change=\"c.124_127delTTCT\" Type=\"HGVS, coding\">\n                            NM_000518.4:c.124_127delTTCT\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_000518\" Version=\"5\" Change=\"c.126_129del\" Type=\"HGVS, coding, RefSeq\">\n                            NM_000518.5:c.126_129del\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NG_042296\" Version=\"1\" Change=\"g.296_299del\"\n                                   Type=\"HGVS, genomic, RefSeqGene\">NG_042296.1:g.296_299del\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NG_046672\" Version=\"1\" Change=\"g.4700_4703del\"\n                                   Type=\"HGVS, genomic, RefSeqGene\">NG_046672.1:g.4700_4703del\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NG_059281\" Version=\"1\" Change=\"g.5306_5309del\"\n                                   Type=\"HGVS, genomic, RefSeqGene\">NG_059281.1:g.5306_5309del\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NG_000007\" Version=\"3\" Change=\"g.70850_70853del\"\n                                   Type=\"HGVS, genomic, RefSeqGene\">NG_000007.3:g.70850_70853del\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NC_000011\" Version=\"10\" Change=\"g.5226765_5226768del\"\n                                   Type=\"HGVS, genomic, top level\" integerValue=\"38\">NC_000011.10:g.5226765_5226768del\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NC_000011\" Version=\"10\" Change=\"g.5226763_5226766del\"\n                                   Type=\"HGVS, genomic, top level, other\" integerValue=\"38\">\n                            NC_000011.10:g.5226763_5226766del\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NC_000011\" Version=\"9\" Change=\"g.5247993_5247996del\"\n                                   Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">\n                            NC_000011.9:g.5247993_5247996del\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"HGVS, legacy\">HBB:c.126_129delCTTT</Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_000518\" Version=\"4\" Change=\"c.126_129delCTTT\" Type=\"HGVS, previous\">\n                            NM_000518.4:c.126_129delCTTT\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"HGVS, protein\">p.Phe42Leufs*19</Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"HGVS, protein\">p.Phe42LeufsTer17</Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NP_000509\" Version=\"1\" Change=\"p.Phe42fs\" Type=\"HGVS, protein\">\n                            NP_000509.1:p.Phe42fs\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NP_000509\" Version=\"1\" Change=\"p.Phe42fs\" Type=\"HGVS, protein\">\n                            NP_000509.1:p.Phe42fs\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NP_000509\" Version=\"1\" Change=\"p.Phe42fs\" Type=\"HGVS, protein\">\n                            NP_000509.1:p.Phe42fs\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NP_000509\" Version=\"1\" Change=\"p.Phe42fs\" Type=\"HGVS, protein, RefSeq\">\n                            NP_000509.1:p.Phe42fs\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"MolecularConsequence\">frameshift variant</Attribute>\n                        <XRef ID=\"SO:0001589\" DB=\"Sequence Ontology\"/>\n                        <XRef ID=\"NM_000518.5:c.126_129del\" DB=\"RefSeq\"/>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"ProteinChange1LetterCode\">F42fs</Attribute>\n                    </AttributeSet>\n                    <GlobalMinorAlleleFrequency Value=\"0.00100\" Source=\"1000 Genomes Project\" MinorAllele=\"AA\"/>\n                    <CytogeneticLocation>11p15.4</CytogeneticLocation>\n                    <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\"\n                                      AssemblyStatus=\"current\" Chr=\"11\" Accession=\"NC_000011.10\" start=\"5226763\"\n                                      stop=\"5226766\" display_start=\"5226763\" display_stop=\"5226766\" variantLength=\"4\"\n                                      positionVCF=\"5226762\" referenceAlleleVCF=\"CAAAG\" alternateAlleleVCF=\"C\"/>\n                    <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\"\n                                      AssemblyStatus=\"previous\" Chr=\"11\" Accession=\"NC_000011.9\" start=\"5247993\"\n                                      stop=\"5247996\" display_start=\"5247993\" display_stop=\"5247996\" variantLength=\"4\"\n                                      positionVCF=\"5247992\" referenceAlleleVCF=\"CAAAG\" alternateAlleleVCF=\"C\"/>\n                    <MeasureRelationship Type=\"within multiple genes by overlap\">\n                        <Name>\n                            <ElementValue Type=\"Preferred\">HBB recombination region</ElementValue>\n                        </Name>\n                        <Symbol>\n                            <ElementValue Type=\"Preferred\">LOC106099062</ElementValue>\n                        </Symbol>\n                        <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\"\n                                          AssemblyStatus=\"current\" Chr=\"11\" Accession=\"NC_000011.10\" start=\"5226570\"\n                                          stop=\"5228834\" display_start=\"5226570\" display_stop=\"5228834\" Strand=\"+\"/>\n                        <XRef ID=\"106099062\" DB=\"Gene\"/>\n                    </MeasureRelationship>\n                    <MeasureRelationship Type=\"within multiple genes by overlap\">\n                        <Name>\n                            <ElementValue Type=\"Preferred\">hemoglobin subunit beta</ElementValue>\n                        </Name>\n                        <Symbol>\n                            <ElementValue Type=\"Preferred\">HBB</ElementValue>\n                        </Symbol>\n                        <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\"\n                                          AssemblyStatus=\"current\" Chr=\"11\" Accession=\"NC_000011.10\" start=\"5225464\"\n                                          stop=\"5227071\" display_start=\"5225464\" display_stop=\"5227071\" Strand=\"-\"/>\n                        <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\"\n                                          AssemblyStatus=\"previous\" Chr=\"11\" Accession=\"NC_000011.9\" start=\"5246695\"\n                                          stop=\"5248300\" display_start=\"5246695\" display_stop=\"5248300\"\n                                          variantLength=\"1606\" Strand=\"-\"/>\n                        <XRef ID=\"3043\" DB=\"Gene\"/>\n                        <XRef Type=\"MIM\" ID=\"141900\" DB=\"OMIM\"/>\n                        <XRef ID=\"HGNC:4827\" DB=\"HGNC\"/>\n                    </MeasureRelationship>\n                    <MeasureRelationship Type=\"within multiple genes by overlap\">\n                        <Name>\n                            <ElementValue Type=\"Preferred\">origin of replication at HBB</ElementValue>\n                        </Name>\n                        <Symbol>\n                            <ElementValue Type=\"Preferred\">LOC107133510</ElementValue>\n                        </Symbol>\n                        <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\"\n                                          AssemblyStatus=\"current\" Chr=\"11\" Accession=\"NC_000011.10\" start=\"5222166\"\n                                          stop=\"5229620\" display_start=\"5222166\" display_stop=\"5229620\" Strand=\"+\"/>\n                        <XRef ID=\"107133510\" DB=\"Gene\"/>\n                    </MeasureRelationship>\n                    <XRef ID=\"GTR000500319\" DB=\"Genetic Testing Registry (GTR)\"/>\n                    <XRef ID=\"849\" DB=\"HBVAR\"/>\n                    <XRef ID=\"698148\" DB=\"Illumina Clinical Services Laboratory,Illumina\"/>\n                    <XRef Type=\"Allelic variant\" ID=\"141900.0326\" DB=\"OMIM\"/>\n                    <XRef Type=\"rs\" ID=\"80356821\" DB=\"dbSNP\"/>\n                </Measure>\n                <Name>\n                    <ElementValue Type=\"Preferred\">NM_000518.4(HBB):c.126_129delCTTT (p.Phe42fs)</ElementValue>\n                </Name>\n                <XRef ID=\"CA125284\" DB=\"ClinGen\"/>\n            </MeasureSet>\n            <TraitSet Type=\"Disease\" ID=\"4683\">\n                <Trait ID=\"11956\" Type=\"Disease\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">beta^0^ Thalassemia</ElementValue>\n                        <XRef ID=\"86715000\" DB=\"SNOMED CT\"/>\n                    </Name>\n                    <Name>\n                        <ElementValue Type=\"Alternate\">Beta-zero-thalassemia</ElementValue>\n                    </Name>\n                    <XRef ID=\"C0271980\" DB=\"MedGen\"/>\n                </Trait>\n            </TraitSet>\n        </ReferenceClinVarAssertion>\n        <ClinVarAssertion ID=\"36943\">\n            <ClinVarSubmissionID localKey=\"141900.0326_BETA-ZERO-THALASSEMIA\" submitter=\"OMIM\"\n                                 submitterDate=\"2017-12-12\" title=\"HBB, 4-BP DEL, 41/42CTTT_BETA-ZERO-THALASSEMIA\"/>\n            <ClinVarAccession Acc=\"SCV000036943\" Version=\"3\" Type=\"SCV\" OrgID=\"3\" OrganizationCategory=\"resource\"\n                              OrgType=\"primary\" DateUpdated=\"2019-05-02\"/>\n            <RecordStatus>current</RecordStatus>\n            <ClinicalSignificance DateLastEvaluated=\"2002-09-28\">\n                <ReviewStatus>no assertion criteria provided</ReviewStatus>\n                <Description>Pathogenic</Description>\n            </ClinicalSignificance>\n            <Assertion Type=\"variation to disease\"/>\n            <ExternalID DB=\"OMIM\" ID=\"141900.0326\" Type=\"Allelic variant\"/>\n            <ObservedIn>\n                <Sample>\n                    <Origin>germline</Origin>\n                    <Species>human</Species>\n                    <AffectedStatus>not provided</AffectedStatus>\n                </Sample>\n                <Method>\n                    <MethodType>literature only</MethodType>\n                </Method>\n                <ObservedData>\n                    <Attribute Type=\"Description\">Frameshift, -4, codons 41/42, TTCTTT to TT, was found in an Asian\n                        Indian with beta-zero-thalassemia (613985) by Kazazian et al. (1984) and in Chinese by Kimura et\n                        al. (1983).\n                    </Attribute>\n                    <Citation>\n                        <CitationText>Kazazian, H. H., Jr. Personal Communication. 1982. Baltimore, Md.</CitationText>\n                    </Citation>\n                    <Citation>\n                        <ID Source=\"PubMed\">6826539</ID>\n                    </Citation>\n                    <XRef DB=\"OMIM\" ID=\"613985\" Type=\"MIM\"/>\n                </ObservedData>\n                <ObservedData>\n                    <Attribute Type=\"Description\">Lau et al. (1997) found that the deletion of CTTT at codons 41/42\n                        accounted for 40% of all beta-thalassemia alleles in Hong Kong. Chiu et al. (2002) designed\n                        allele-specific primers and a fluorescent probe for detection of this mutation in the HBB gene\n                        from maternal plasma by real-time PCR. Using this method, they showed that beta-thalassemia\n                        major could be excluded from fetal inheritance by demonstrating absence of inheritance of the\n                        paternally transmitted mutation. By studying circulating fetal DNA in the maternal plasma for\n                        this mutation, Chiu et al. (2002) added beta-thalassemia to the list of disorders that could be\n                        prenatally diagnosed using this noninvasive method, which had previously demonstrated usefulness\n                        in diagnosing sex-linked diseases (Costa et al., 2002) and fetal rhesus D status (Lo et al.,\n                        1998).\n                    </Attribute>\n                    <Citation>\n                        <ID Source=\"PubMed\">9113933</ID>\n                    </Citation>\n                    <Citation>\n                        <ID Source=\"PubMed\">12383672</ID>\n                    </Citation>\n                    <Citation>\n                        <ID Source=\"PubMed\">12000828</ID>\n                    </Citation>\n                    <Citation>\n                        <ID Source=\"PubMed\">9845707</ID>\n                    </Citation>\n                </ObservedData>\n            </ObservedIn>\n            <MeasureSet Type=\"Variant\">\n                <Measure Type=\"Variation\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">HBB, 4-BP DEL, 41/42CTTT</ElementValue>\n                    </Name>\n                    <AttributeSet>\n                        <Attribute Type=\"NonHGVS\">4-BP DEL, 41/42CTTT</Attribute>\n                    </AttributeSet>\n                    <MeasureRelationship Type=\"variant in gene\">\n                        <Symbol>\n                            <ElementValue Type=\"Preferred\">HBB</ElementValue>\n                        </Symbol>\n                    </MeasureRelationship>\n                    <XRef DB=\"OMIM\" ID=\"141900.0326\" Type=\"Allelic variant\"/>\n                </Measure>\n            </MeasureSet>\n            <TraitSet Type=\"Disease\">\n                <Trait Type=\"Disease\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">BETA-ZERO-THALASSEMIA</ElementValue>\n                    </Name>\n                </Trait>\n            </TraitSet>\n        </ClinVarAssertion>\n    </ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000017510.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\" xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n<ClinVarSet ID=\"47079000\"><RecordStatus>current</RecordStatus><Title>NM_004285.4(H6PD):c.1860_1861insACAGGTGGTTGACCTGTGGCCGGGTCTGA (p.Glu621delinsThrGlyGlyTer) AND Cortisone reductase deficiency 1</Title><ReferenceClinVarAssertion DateCreated=\"2012-08-13\" DateLastUpdated=\"2019-11-02\" ID=\"75013\"><ClinVarAccession Acc=\"RCV000017510\" Version=\"24\" Type=\"RCV\" DateUpdated=\"2019-11-02\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2008-10-01\"><ReviewStatus>no assertion criteria provided</ReviewStatus><Description>Pathogenic</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>not provided</AffectedStatus></Sample><Method><MethodType>literature only</MethodType></Method><Method><MethodType>literature only</MethodType></Method><ObservedData ID=\"47915430\"><Attribute Type=\"Description\">In the Scottish female with apparent cortisone reductase deficiency (CORTRD1; 604931) reported by Jamieson et al. (1999), Draper et al. (2003) detected a heterozygous 29-bp insertion between nucleotides 620 and 621 of the H6PD gene. Functional studies in hepatic WRL68 cells demonstrated that the 620ins29bp mutant was devoid of H6PDH activity. The 620_621ins29 mutation was found in none of 100 Scottish controls. In this patient, Draper et al. (2003) also detected homozygosity for a pair of linked intronic mutations in the HSD11B1 gene (600713.0001). In 100 Scottish controls homozygosity for these intronic changes had a frequency of 2%. The patient reported by Jamieson et al. (1999) presented at the age of 36 years with hirsutism, oligomenorrhea, obesity, acne, and infertility, features resembling those of polycystic ovary syndrome (PCOS; 184700).</Attribute><Citation Type=\"general\"><ID Source=\"PubMed\">10522997</ID></Citation><Citation Type=\"general\"><ID Source=\"PubMed\">12858176</ID></Citation></ObservedData><ObservedData ID=\"47915430\"><Attribute Type=\"Description\">In the Scottish woman with cortisone reductase deficiency reported by Jamieson et al. (1999), Lavery et al. (2008) detected compound heterozygosity for the 620_621ins29 mutation in H6PD and a 960G-A transition in exon 4 (138090.0003). The 29-bp insertion caused a frameshift predicted to result in an in-frame stop codon that truncates the protein by 268 amino acids (Asp620fsTer3). No mutations or sequence variants were detected in the HSD11B1 gene.</Attribute><Citation Type=\"general\"><ID Source=\"PubMed\">10522997</ID></Citation><Citation Type=\"general\"><ID Source=\"PubMed\">18628520</ID></Citation></ObservedData></ObservedIn><MeasureSet Type=\"Variant\" ID=\"16130\" Acc=\"VCV000016130\" Version=\"1\"><Measure Type=\"Insertion\" ID=\"31169\"><Name><ElementValue Type=\"Preferred\">NM_004285.4(H6PD):c.1860_1861insACAGGTGGTTGACCTGTGGCCGGGTCTGA (p.Glu621delinsThrGlyGlyTer)</ElementValue></Name><Name><ElementValue Type=\"Alternate\">H6PD, 29-BP INS, NT620</ElementValue><XRef Type=\"Allelic variant\" ID=\"138090.0001\" DB=\"OMIM\" /></Name><AttributeSet><Attribute Accession=\"NM_004285\" Version=\"4\" Change=\"c.1860_1861insACAGGTGGTTGACCTGTGGCCGGGTCTGA\" Type=\"HGVS, coding, RefSeq\">NM_004285.4:c.1860_1861insACAGGTGGTTGACCTGTGGCCGGGTCTGA</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_001282587\" Version=\"2\" Change=\"c.1893_1894insACAGGTGGTTGACCTGTGGCCGGGTCTGA\" Type=\"HGVS, coding, RefSeq\">NM_001282587.2:c.1893_1894insACAGGTGGTTGACCTGTGGCCGGGTCTGA</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NG_012218\" Version=\"1\" Change=\"g.34550_34551insACAGGTGGTTGACCTGTGGCCGGGTCTGA\" Type=\"HGVS, genomic, RefSeqGene\">NG_012218.1:g.34550_34551insACAGGTGGTTGACCTGTGGCCGGGTCTGA</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000001\" Version=\"11\" Change=\"g.9264353_9264354insACAGGTGGTTGACCTGTGGCCGGGTCTGA\" Type=\"HGVS, genomic, top level\" integerValue=\"38\">NC_000001.11:g.9264353_9264354insACAGGTGGTTGACCTGTGGCCGGGTCTGA</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000001\" Version=\"10\" Change=\"g.9324412_9324413insACAGGTGGTTGACCTGTGGCCGGGTCTGA\" Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">NC_000001.10:g.9324412_9324413insACAGGTGGTTGACCTGTGGCCGGGTCTGA</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_004285\" Version=\"3\" Change=\"c.1860_1861insACAGGTGGTTGACCTGTGGCCGGGTCTGA\" Type=\"HGVS, previous\">NM_004285.3:c.1860_1861insACAGGTGGTTGACCTGTGGCCGGGTCTGA</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_004276\" Version=\"2\" Change=\"p.Glu621delinsThrGlyGlyTer\" Type=\"HGVS, protein, RefSeq\">NP_004276.2:p.Glu621delinsThrGlyGlyTer</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_001269516\" Version=\"1\" Change=\"p.Glu632delinsThrGlyGlyTer\" Type=\"HGVS, protein, RefSeq\">NP_001269516.1:p.Glu632delinsThrGlyGlyTer</Attribute></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">nonsense</Attribute><XRef ID=\"SO:0001587\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_001282587.2:c.1893_1894insACAGGTGGTTGACCTGTGGCCGGGTCTGA\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">nonsense</Attribute><XRef ID=\"SO:0001587\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_004285.4:c.1860_1861insACAGGTGGTTGACCTGTGGCCGGGTCTGA\" DB=\"RefSeq\" /></AttributeSet><AlleleFrequencyList><AlleleFrequency Value=\"0.00001\" Source=\"Exome Aggregation Consortium (ExAC)\" /></AlleleFrequencyList><CytogeneticLocation>1p36.22</CytogeneticLocation><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"1\" Accession=\"NC_000001.11\" start=\"9264353\" stop=\"9264354\" display_start=\"9264353\" display_stop=\"9264354\" variantLength=\"29\" positionVCF=\"9264353\" referenceAlleleVCF=\"C\" alternateAlleleVCF=\"CACAGGTGGTTGACCTGTGGCCGGGTCTGA\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"1\" Accession=\"NC_000001.10\" start=\"9324412\" stop=\"9324413\" display_start=\"9324412\" display_stop=\"9324413\" variantLength=\"29\" positionVCF=\"9324412\" referenceAlleleVCF=\"C\" alternateAlleleVCF=\"CACAGGTGGTTGACCTGTGGCCGGGTCTGA\" /><MeasureRelationship Type=\"within single gene\"><Name><ElementValue Type=\"Preferred\">hexose-6-phosphate dehydrogenase/glucose 1-dehydrogenase</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">H6PD</ElementValue></Symbol><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"1\" Accession=\"NC_000001.11\" start=\"9234767\" stop=\"9271337\" display_start=\"9234767\" display_stop=\"9271337\" Strand=\"+\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"1\" Accession=\"NC_000001.10\" start=\"9294862\" stop=\"9331395\" display_start=\"9294862\" display_stop=\"9331395\" variantLength=\"36534\" Strand=\"+\" /><XRef ID=\"9563\" DB=\"Gene\" /><XRef Type=\"MIM\" ID=\"138090\" DB=\"OMIM\" /><XRef ID=\"HGNC:4795\" DB=\"HGNC\" /></MeasureRelationship><Citation Type=\"general\"><ID Source=\"PubMed\">12858176</ID></Citation><XRef Type=\"Allelic variant\" ID=\"138090.0001\" DB=\"OMIM\" /><XRef Type=\"rs\" ID=\"606231222\" DB=\"dbSNP\" /><Comment DataSource=\"NCBI curation\" Type=\"public\">NCBI staff reviewed the sequence information reported in PubMed 12858176 Fig. 4a to determine the location of this insertion on the current reference sequence.</Comment></Measure><Name><ElementValue Type=\"Preferred\">NM_004285.4(H6PD):c.1860_1861insACAGGTGGTTGACCTGTGGCCGGGTCTGA (p.Glu621delinsThrGlyGlyTer)</ElementValue></Name><XRef ID=\"CA126205\" DB=\"ClinGen\" /></MeasureSet><TraitSet Type=\"Disease\" ID=\"2453\"><Trait ID=\"1021\" Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">Cortisone reductase deficiency 1</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">CORTRD1</ElementValue><XRef Type=\"MIM\" ID=\"604931\" DB=\"OMIM\" /></Symbol><XRef ID=\"C3551716\" DB=\"MedGen\" /><XRef ID=\"168588\" DB=\"Orphanet\" /><XRef Type=\"MIM\" ID=\"604931\" DB=\"OMIM\" /></Trait></TraitSet></ReferenceClinVarAssertion><ClinVarAssertion ID=\"37782\"><ClinVarSubmissionID localKey=\"138090.0001_CORTISONE REDUCTASE DEFICIENCY 1\" submitter=\"OMIM\" submitterDate=\"2014-11-17\" title=\"H6PD, 29-BP INS, NT620_CORTISONE REDUCTASE DEFICIENCY 1\" /><ClinVarAccession Acc=\"SCV000037782\" Version=\"2\" Type=\"SCV\" OrgID=\"3\" OrganizationCategory=\"resource\" OrgType=\"primary\" DateUpdated=\"2019-03-31\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2008-10-01\"><ReviewStatus>no assertion criteria provided</ReviewStatus><Description>Pathogenic</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ExternalID DB=\"OMIM\" ID=\"138090.0001\" Type=\"Allelic variant\" /><ObservedIn><Sample><Origin>germline</Origin><Species>human</Species><AffectedStatus>not provided</AffectedStatus></Sample><Method><MethodType>literature only</MethodType></Method><ObservedData><Attribute Type=\"Description\">In the Scottish female with apparent cortisone reductase deficiency (CORTRD1; 604931) reported by Jamieson et al. (1999), Draper et al. (2003) detected a heterozygous 29-bp insertion between nucleotides 620 and 621 of the H6PD gene. Functional studies in hepatic WRL68 cells demonstrated that the 620ins29bp mutant was devoid of H6PDH activity. The 620_621ins29 mutation was found in none of 100 Scottish controls. In this patient, Draper et al. (2003) also detected homozygosity for a pair of linked intronic mutations in the HSD11B1 gene (600713.0001). In 100 Scottish controls homozygosity for these intronic changes had a frequency of 2%. The patient reported by Jamieson et al. (1999) presented at the age of 36 years with hirsutism, oligomenorrhea, obesity, acne, and infertility, features resembling those of polycystic ovary syndrome (PCOS; 184700).</Attribute><Citation><ID Source=\"PubMed\">10522997</ID></Citation><Citation><ID Source=\"PubMed\">12858176</ID></Citation><XRef DB=\"OMIM\" ID=\"604931\" Type=\"MIM\" /><XRef DB=\"OMIM\" ID=\"184700\" Type=\"MIM\" /></ObservedData><ObservedData><Attribute Type=\"Description\">In the Scottish woman with cortisone reductase deficiency reported by Jamieson et al. (1999), Lavery et al. (2008) detected compound heterozygosity for the 620_621ins29 mutation in H6PD and a 960G-A transition in exon 4 (138090.0003). The 29-bp insertion caused a frameshift predicted to result in an in-frame stop codon that truncates the protein by 268 amino acids (Asp620fsTer3). No mutations or sequence variants were detected in the HSD11B1 gene.</Attribute><Citation><ID Source=\"PubMed\">10522997</ID></Citation><Citation><ID Source=\"PubMed\">18628520</ID></Citation></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><Name><ElementValue Type=\"Preferred\">H6PD, 29-BP INS, NT620</ElementValue></Name><AttributeSet><Attribute Type=\"NonHGVS\">29-BP INS, NT620</Attribute></AttributeSet><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">H6PD</ElementValue></Symbol></MeasureRelationship><XRef DB=\"OMIM\" ID=\"138090.0001\" Type=\"Allelic variant\" /></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">CORTISONE REDUCTASE DEFICIENCY 1</ElementValue></Name></Trait></TraitSet></ClinVarAssertion></ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000021819.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\" xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n<ClinVarSet ID=\"47082554\"><RecordStatus>current</RecordStatus><Title>NM_020975.6(RET):c.1896_1900delinsCGTGC (p.Glu632_Cys634delinsAspValArg) AND Multiple endocrine neoplasia, type 2</Title><ReferenceClinVarAssertion DateCreated=\"2012-08-13\" DateLastUpdated=\"2019-11-02\" ID=\"79322\"><ClinVarAccession Acc=\"RCV000021819\" Version=\"2\" Type=\"RCV\" DateUpdated=\"2019-11-02\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2018-05-04\"><ReviewStatus>no assertion criteria provided</ReviewStatus><Description>Pathogenic</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>yes</AffectedStatus></Sample><Method><MethodType>literature only</MethodType></Method><ObservedData ID=\"47921399\"><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\" ID=\"38624\" Acc=\"VCV000038624\" Version=\"1\"><Measure Type=\"Indel\" ID=\"47228\"><Name><ElementValue Type=\"Preferred\">NM_020975.6(RET):c.1896_1900delinsCGTGC (p.Glu632_Cys634delinsAspValArg)</ElementValue></Name><AttributeSet><Attribute Accession=\"LRG_518t1\" Change=\"c.1896_1900delGCTGTinsCGTGC\" Type=\"HGVS, coding, LRG\">LRG_518t1:c.1896_1900delGCTGTinsCGTGC</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"LRG_518t2\" Change=\"c.1896_1900delinsCGTGC\" Type=\"HGVS, coding, LRG\">LRG_518t2:c.1896_1900delinsCGTGC</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_001355216\" Version=\"1\" Change=\"c.1134_1138delinsCGTGC\" Type=\"HGVS, coding, RefSeq\">NM_001355216.1:c.1134_1138delinsCGTGC</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_020630\" Version=\"5\" Change=\"c.1896_1900delinsCGTGC\" Type=\"HGVS, coding, RefSeq\">NM_020630.5:c.1896_1900delinsCGTGC</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_020975\" Version=\"6\" Change=\"c.1896_1900delinsCGTGC\" Type=\"HGVS, coding, RefSeq\">NM_020975.6:c.1896_1900delinsCGTGC</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"LRG_518\" Change=\"g.42428_42432delinsCGTGC\" Type=\"HGVS, genomic, LRG\">LRG_518:g.42428_42432delinsCGTGC</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NG_007489\" Version=\"1\" Change=\"g.42428_42432delinsCGTGC\" Type=\"HGVS, genomic, RefSeqGene\">NG_007489.1:g.42428_42432delinsCGTGC</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000010\" Version=\"11\" Change=\"g.43114496_43114500delinsCGTGC\" Type=\"HGVS, genomic, top level\" integerValue=\"38\">NC_000010.11:g.43114496_43114500delinsCGTGC</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000010\" Version=\"10\" Change=\"g.43609944_43609948delinsCGTGC\" Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">NC_000010.10:g.43609944_43609948delinsCGTGC</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_020975\" Version=\"4\" Change=\"c.1896_1900delGCTGTinsCGTGC\" Type=\"HGVS, previous\">NM_020975.4:c.1896_1900delGCTGTinsCGTGC</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_001342145\" Version=\"1\" Change=\"p.Glu378_Cys380delinsAspValArg\" Type=\"HGVS, protein, RefSeq\">NP_001342145.1:p.Glu378_Cys380delinsAspValArg</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_065681\" Version=\"1\" Change=\"p.Glu632_Cys634delinsAspValArg\" Type=\"HGVS, protein, RefSeq\">NP_065681.1:p.Glu632_Cys634delinsAspValArg</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_066124\" Version=\"1\" Change=\"p.Glu632_Cys634delinsAspValArg\" Type=\"HGVS, protein, RefSeq\">NP_066124.1:p.Glu632_Cys634delinsAspValArg</Attribute></AttributeSet><AttributeSet><Attribute Type=\"Location\">exon 11</Attribute></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">missense variant</Attribute><XRef ID=\"SO:0001583\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_001355216.1:c.1134_1138delinsCGTGC\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">missense variant</Attribute><XRef ID=\"SO:0001583\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_020630.5:c.1896_1900delinsCGTGC\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">missense variant</Attribute><XRef ID=\"SO:0001583\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_020975.6:c.1896_1900delinsCGTGC\" DB=\"RefSeq\" /></AttributeSet><CytogeneticLocation>10q11.21</CytogeneticLocation><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"10\" Accession=\"NC_000010.11\" start=\"43114496\" stop=\"43114500\" display_start=\"43114496\" display_stop=\"43114500\" variantLength=\"5\" positionVCF=\"43114496\" referenceAlleleVCF=\"GCTGT\" alternateAlleleVCF=\"CGTGC\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"10\" Accession=\"NC_000010.10\" start=\"43609944\" stop=\"43609948\" display_start=\"43609944\" display_stop=\"43609948\" variantLength=\"5\" positionVCF=\"43609944\" referenceAlleleVCF=\"GCTGT\" alternateAlleleVCF=\"CGTGC\" /><MeasureRelationship Type=\"within single gene\"><Name><ElementValue Type=\"Preferred\">ret proto-oncogene</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">RET</ElementValue></Symbol><AttributeSet><Attribute dateValue=\"2011-10-28\" Type=\"Haploinsufficiency\">Sufficient evidence for dosage pathogenicity</Attribute><Citation><URL>https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=RET</URL></Citation></AttributeSet><AttributeSet><Attribute dateValue=\"2011-10-28\" Type=\"Triplosensitivity\">No evidence available</Attribute><Citation><URL>https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=RET</URL></Citation></AttributeSet><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"10\" Accession=\"NC_000010.11\" start=\"43077027\" stop=\"43130351\" display_start=\"43077027\" display_stop=\"43130351\" Strand=\"+\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"10\" Accession=\"NC_000010.10\" start=\"43572516\" stop=\"43625798\" display_start=\"43572516\" display_stop=\"43625798\" variantLength=\"53283\" Strand=\"+\" /><XRef ID=\"5979\" DB=\"Gene\" /><XRef Type=\"MIM\" ID=\"164761\" DB=\"OMIM\" /><XRef ID=\"HGNC:9967\" DB=\"HGNC\" /><Comment DataSource=\"NCBI curation\" Type=\"public\">This gene is cited in the ACMG recommendations of 2013 (PubMed 23788249) for reporting incidental findings in exons.</Comment><Comment DataSource=\"NCBI curation\" Type=\"public\">This gene is cited in the ACMG recommendations of 2016 (PubMed 27854360) for reporting incidental findings in exons.</Comment></MeasureRelationship><Citation Type=\"general\"><ID Source=\"PubMed\">8099202</ID></Citation><XRef Type=\"rs\" ID=\"377767408\" DB=\"dbSNP\" /></Measure><Name><ElementValue Type=\"Preferred\">NM_020975.6(RET):c.1896_1900delinsCGTGC (p.Glu632_Cys634delinsAspValArg)</ElementValue></Name><XRef ID=\"CA008271\" DB=\"ClinGen\" /></MeasureSet><TraitSet Type=\"Disease\" ID=\"8702\"><Trait ID=\"14899\" Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">Multiple endocrine neoplasia, type 2</ElementValue><XRef ID=\"Multiple+endocrine+neoplasia%2C+type+2/4957\" DB=\"Genetic Alliance\" /><XRef ID=\"3830\" DB=\"Office of Rare Diseases\" /><XRef ID=\"61808009\" DB=\"SNOMED CT\" /></Name><Symbol><ElementValue Type=\"Preferred\">MEN2</ElementValue><XRef ID=\"3830\" DB=\"Office of Rare Diseases\" /></Symbol><AttributeSet><Attribute Type=\"public definition\">Multiple endocrine neoplasia type 2 (MEN 2) includes the following phenotypes: MEN 2A, FMTC (familial medullary thyroid carcinoma, which may be a variant of MEN 2A), and MEN 2B. All three phenotypes involve high risk for development of medullary carcinoma of the thyroid (MTC); MEN 2A and MEN 2B involve an increased risk for pheochromocytoma; MEN 2A involves an increased risk for parathyroid adenoma or hyperplasia. Additional features in MEN 2B include mucosal neuromas of the lips and tongue, distinctive facies with enlarged lips, ganglioneuromatosis of the gastrointestinal tract, and a marfanoid habitus. MTC typically occurs in early childhood in MEN 2B, early adulthood in MEN 2A, and middle age in FMTC.</Attribute><XRef ID=\"NBK1257\" DB=\"GeneReviews\" /></AttributeSet><AttributeSet><Attribute Type=\"disease mechanism\" integerValue=\"274\">gain of function</Attribute><XRef ID=\"GTR000021494\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000260603\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000325033\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000501433\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000501488\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000508573\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000509383\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000509879\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000520029\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000520186\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000521505\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000522280\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000522559\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000528040\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000528367\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000528651\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000528909\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000528911\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000528912\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000528913\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000530118\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000552183\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000552245\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000553206\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000553528\" DB=\"Genetic Testing Registry (GTR)\" /></AttributeSet><Citation Type=\"review\" Abbrev=\"GeneReviews\"><ID Source=\"PubMed\">20301434</ID><ID Source=\"BookShelf\">NBK1257</ID></Citation><Citation Type=\"practice guideline\" Abbrev=\"Brandi et al., 2001\"><ID Source=\"PubMed\">11739416</ID></Citation><Citation Type=\"practice guideline\" Abbrev=\"NSGC, 2004\"><ID Source=\"PubMed\">15604628</ID></Citation><XRef ID=\"C4048306\" DB=\"MedGen\" /></Trait></TraitSet></ReferenceClinVarAssertion><ClinVarAssertion ID=\"46066\" SubmissionName=\"MEN2 update May 2018\"><ClinVarSubmissionID localKey=\"176|MedGen:CN073359\" submittedAssembly=\"not applicable\" submitter=\"Research and Development, ARUP Laboratories\" submitterDate=\"2018-05-10\" /><ClinVarAccession Acc=\"SCV000042485\" Version=\"2\" Type=\"SCV\" OrgID=\"506018\" OrganizationCategory=\"laboratory\" OrgType=\"primary\" DateUpdated=\"2019-08-03\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2018-05-04\"><ReviewStatus>no assertion criteria provided</ReviewStatus><Description>Pathogenic</Description><Citation><ID Source=\"PubMed\">8099202</ID></Citation><Citation><ID Source=\"PubMed\">8612479</ID></Citation><Citation><ID Source=\"PubMed\">7595167</ID></Citation><Citation><URL>http://www.arup.utah.edu/database/MEN2/MEN2_display.php</URL></Citation><Comment>This indel changes three amino acids (ELC&gt;DVR) which results in a p.C634R mutation. In vitro studies: RET activation (PMID 8612479). In the oldest reference, codon 634 was called codon 380. Additional reference: PMID 7595167.</Comment></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ExternalID DB=\"ARUP Institute\" ID=\"176\" /><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>yes</AffectedStatus></Sample><Method><MethodType>literature only</MethodType></Method><ObservedData><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><AttributeSet><Attribute Type=\"HGVS\">NM_020975.4:c.1896_1900delGCTGTinsCGTGC</Attribute></AttributeSet><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">RET</ElementValue></Symbol></MeasureRelationship></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><XRef DB=\"MedGen\" ID=\"CN073359\" Type=\"CUI\" /></Trait></TraitSet></ClinVarAssertion></ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000030349.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\" xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n<ClinVarSet ID=\"47086145\"><RecordStatus>current</RecordStatus><Title>NM_174936.3(PCSK9):c.2009G&gt;A (p.Gly670Glu) AND Familial hypercholesterolemia</Title><ReferenceClinVarAssertion DateCreated=\"2012-08-13\" DateLastUpdated=\"2019-11-02\" ID=\"87852\"><ClinVarAccession Acc=\"RCV000030349\" Version=\"7\" Type=\"RCV\" DateUpdated=\"2019-11-02\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2017-06-22\"><ReviewStatus>criteria provided, multiple submitters, no conflicts</ReviewStatus><Description>Benign/Likely benign</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><AttributeSet><Attribute Type=\"ModeOfInheritance\">autosomal unknown</Attribute></AttributeSet><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>yes</AffectedStatus></Sample><Method><MethodType>research</MethodType></Method><Method><Purpose>assert pathogenicity</Purpose><MethodType>curation</MethodType></Method><ObservedData ID=\"47929462\"><Attribute integerValue=\"1\" Type=\"VariantAlleles\" /></ObservedData><ObservedData ID=\"47929462\"><Attribute Type=\"Description\">not provided</Attribute></ObservedData><ObservedData ID=\"47929462\"><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><Method><MethodType>research</MethodType></Method><Method><MethodType>clinical testing</MethodType></Method><ObservedData ID=\"47929463\"><Attribute Type=\"Description\">not provided</Attribute></ObservedData><ObservedData ID=\"47929463\"><Attribute Type=\"Description\">not provided</Attribute></ObservedData><ObservedData ID=\"47929463\"><Attribute Type=\"Description\">not provided</Attribute></ObservedData><ObservedData ID=\"47929463\"><Attribute Type=\"Description\">not provided</Attribute></ObservedData><ObservedData ID=\"47929463\"><Attribute Type=\"Description\">not provided</Attribute></ObservedData><ObservedData ID=\"47929463\"><Attribute Type=\"Description\">not provided</Attribute></ObservedData><ObservedData ID=\"47929463\"><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\" ID=\"36670\" Acc=\"VCV000036670\" Version=\"1\"><Measure Type=\"single nucleotide variant\" ID=\"45331\"><Name><ElementValue Type=\"Preferred\">NM_174936.3(PCSK9):c.2009G&gt;A (p.Gly670Glu)</ElementValue></Name><AttributeSet><Attribute Accession=\"LRG_275t1\" Change=\"c.2009G&gt;A\" Type=\"HGVS, coding, LRG\">LRG_275t1:c.2009G&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_174936\" Version=\"3\" Change=\"c.2009G&gt;A\" Type=\"HGVS, coding, RefSeq\">NM_174936.3:c.2009G&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"LRG_275\" Change=\"g.28968G&gt;A\" Type=\"HGVS, genomic, LRG\">LRG_275:g.28968G&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NG_009061\" Version=\"1\" Change=\"g.28968G&gt;A\" Type=\"HGVS, genomic, RefSeqGene\">NG_009061.1:g.28968G&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000001\" Version=\"11\" Change=\"g.55063514G&gt;A\" Type=\"HGVS, genomic, top level\" integerValue=\"38\">NC_000001.11:g.55063514G&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000001\" Version=\"10\" Change=\"g.55529187G&gt;A\" Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">NC_000001.10:g.55529187G&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NR_110451\" Version=\"1\" Change=\"n.1616G&gt;A\" Type=\"HGVS, non-coding\">NR_110451.1:n.1616G&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"LRG_275p1\" Change=\"p.Gly670Glu\" Type=\"HGVS, protein\">LRG_275p1:p.Gly670Glu</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"Q8NBP7\" Change=\"p.Gly670Glu\" Type=\"HGVS, protein\">Q8NBP7:p.Gly670Glu</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_777596\" Version=\"2\" Change=\"p.Gly670Glu\" Type=\"HGVS, protein, RefSeq\">NP_777596.2:p.Gly670Glu</Attribute></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">missense variant</Attribute><XRef ID=\"SO:0001583\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_174936.3:c.2009G&gt;A\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">non-coding transcript variant</Attribute><XRef ID=\"SO:0001619\" DB=\"Sequence Ontology\" /><XRef ID=\"NR_110451.1:n.1616G&gt;A\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"ProteinChange1LetterCode\">G670E</Attribute></AttributeSet><AlleleFrequencyList><AlleleFrequency Value=\"0.88944\" Source=\"NHLBI Exome Sequencing Project (ESP) Exome Variant Server\" /><AlleleFrequency Value=\"0.89896\" Source=\"1000 Genomes Project\" /><AlleleFrequency Value=\"0.94328\" Source=\"Exome Aggregation Consortium (ExAC)\" /><AlleleFrequency Value=\"0.89402\" Source=\"The Genome Aggregation Database (gnomAD)\" /><AlleleFrequency Value=\"0.94770\" Source=\"The Genome Aggregation Database (gnomAD), exomes\" /><AlleleFrequency Value=\"0.89336\" Source=\"Trans-Omics for Precision Medicine (TOPMed)\" /></AlleleFrequencyList><GlobalMinorAlleleFrequency Value=\"0.10104\" Source=\"1000 Genomes Project\" MinorAllele=\"G\" /><CytogeneticLocation>1p32.3</CytogeneticLocation><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"1\" Accession=\"NC_000001.11\" start=\"55063514\" stop=\"55063514\" display_start=\"55063514\" display_stop=\"55063514\" variantLength=\"1\" positionVCF=\"55063514\" referenceAlleleVCF=\"G\" alternateAlleleVCF=\"A\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"1\" Accession=\"NC_000001.10\" start=\"55529187\" stop=\"55529187\" display_start=\"55529187\" display_stop=\"55529187\" variantLength=\"1\" positionVCF=\"55529187\" referenceAlleleVCF=\"G\" alternateAlleleVCF=\"A\" /><MeasureRelationship Type=\"within single gene\"><Name><ElementValue Type=\"Preferred\">proprotein convertase subtilisin/kexin type 9</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">PCSK9</ElementValue></Symbol><AttributeSet><Attribute dateValue=\"2015-12-17\" Type=\"Haploinsufficiency\">Dosage sensitivity unlikely</Attribute><Citation><URL>https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=PCSK9</URL></Citation></AttributeSet><AttributeSet><Attribute dateValue=\"2015-12-17\" Type=\"Triplosensitivity\">No evidence available</Attribute><Citation><URL>https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=PCSK9</URL></Citation></AttributeSet><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"1\" Accession=\"NC_000001.11\" start=\"55039548\" stop=\"55064853\" display_start=\"55039548\" display_stop=\"55064853\" Strand=\"+\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"1\" Accession=\"NC_000001.10\" start=\"55505148\" stop=\"55530525\" display_start=\"55505148\" display_stop=\"55530525\" variantLength=\"25378\" Strand=\"+\" /><XRef ID=\"255738\" DB=\"Gene\" /><XRef Type=\"MIM\" ID=\"607786\" DB=\"OMIM\" /><XRef ID=\"HGNC:20001\" DB=\"HGNC\" /><Comment DataSource=\"NCBI curation\" Type=\"public\">This gene is cited in the ACMG recommendations of 2013 (PubMed 23788249) for reporting incidental findings in exons.</Comment><Comment DataSource=\"NCBI curation\" Type=\"public\">This gene is cited in the ACMG recommendations of 2016 (PubMed 27854360) for reporting incidental findings in exons.</Comment></MeasureRelationship><Citation Type=\"general\"><ID Source=\"PubMed\">19191301</ID></Citation><XRef ID=\"PCSK9 117\" DB=\"Iberoamerican FH Network\" /><XRef ID=\"1537\" DB=\"Illumina Clinical Services Laboratory,Illumina\" /><XRef ID=\"PCSK9 117\" DB=\"Laboratory of Genetics and Molecular Cardiology,University of São Paulo\" /><XRef ID=\"Q8NBP7#VAR_017201\" DB=\"UniProtKB\" /><XRef Type=\"rs\" ID=\"505151\" DB=\"dbSNP\" /></Measure><Name><ElementValue Type=\"Preferred\">NM_174936.3(PCSK9):c.2009G&gt;A (p.Gly670Glu)</ElementValue></Name><XRef ID=\"CA023140\" DB=\"ClinGen\" /></MeasureSet><TraitSet Type=\"Disease\" ID=\"7435\"><Trait ID=\"15983\" Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">Familial hypercholesterolemia</ElementValue><XRef ID=\"Familial+Hypercholesterolemia/2746\" DB=\"Genetic Alliance\" /><XRef ID=\"398036000\" DB=\"SNOMED CT\" /></Name><Name><ElementValue Type=\"Alternate\">LDL RECEPTOR DISORDER</ElementValue><XRef Type=\"MIM\" ID=\"143890\" DB=\"OMIM\" /></Name><Name><ElementValue Type=\"Alternate\">Hyperlipoproteinemia Type IIa</ElementValue></Name><Name><ElementValue Type=\"Alternate\">HYPER-LOW-DENSITY-LIPOPROTEINEMIA</ElementValue><XRef Type=\"MIM\" ID=\"143890\" DB=\"OMIM\" /></Name><Name><ElementValue Type=\"Alternate\">HYPERCHOLESTEROLEMIC XANTHOMATOSIS, FAMILIAL</ElementValue><XRef Type=\"MIM\" ID=\"143890\" DB=\"OMIM\" /><XRef Type=\"MIM\" ID=\"144400\" DB=\"OMIM\" /></Name><Name><ElementValue Type=\"Alternate\">Fredrickson type IIa hyperlipoproteinemia</ElementValue><XRef ID=\"397915002\" DB=\"SNOMED CT\" /></Name><Name><ElementValue Type=\"Alternate\">Hyperlipoproteinemia Type II</ElementValue></Name><Name><ElementValue Type=\"Alternate\">HYPERCHOLESTEROLEMIA, FAMILIAL, MODIFIER OF</ElementValue><XRef Type=\"Allelic variant\" ID=\"600946.0028\" DB=\"OMIM\" /></Name><Name><ElementValue Type=\"Alternate\">HYPERCHOLESTEROLEMIA, FAMILIAL, 1</ElementValue><XRef Type=\"MIM\" ID=\"143890\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"606945.0059\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"606945.0060\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"606945.0061\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"606945.0062\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"606945.0063\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"606945.0065\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"606945.0066\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"606945.0067\" DB=\"OMIM\" /></Name><Symbol><ElementValue Type=\"Preferred\">FHCL1</ElementValue><XRef Type=\"MIM\" ID=\"143890\" DB=\"OMIM\" /></Symbol><Symbol><ElementValue Type=\"Alternate\">FH</ElementValue><XRef Type=\"MIM\" ID=\"143890\" DB=\"OMIM\" /></Symbol><Symbol><ElementValue Type=\"Alternate\">FHC</ElementValue><XRef Type=\"MIM\" ID=\"143890\" DB=\"OMIM\" /></Symbol><AttributeSet><Attribute Type=\"public definition\">Familial hypercholesterolemia (FH) is characterized by severely elevated LDL cholesterol (LDL-C) levels that lead to atherosclerotic plaque deposition in the coronary arteries and proximal aorta at an early age, leading to an increased risk for cardiovascular disease. Xanthomas (patches of yellowish cholesterol buildup) may worsen with age as a result of extremely high cholesterol levels. Xanthomas can occur around the eyelids and within the tendons of the elbows, hands, knees, and feet. In FH, the more common cardiovascular disease is coronary artery disease (CAD), which may manifest as angina and myocardial infarction; stroke occurs more rarely. Untreated men are at a 50% risk for a fatal or nonfatal coronary event by age 50 years; untreated women are at a 30% risk by age 60 years. An estimated 70%-95% of FH results from a heterozygous pathogenic variant in one of three genes (APOB, LDLR, PCSK9). FH is the most common inherited cardiovascular disease, with a prevalence of 1:200-250. FH likely accounts for 2%-3% of myocardial infarctions in individuals younger than age 60 years. In contrast, homozygous FH (HoFH) results from biallelic (homozygous or compound heterozygous) pathogenic variants in one of these known genes (APOB, LDLR, PCSK9). Most individuals with HoFH experience severe CAD by their mid-20s and the rate of either death or coronary bypass surgery by the teenage years is high. Severe aortic stenosis is also common.</Attribute><XRef ID=\"NBK174884\" DB=\"GeneReviews\" /></AttributeSet><Citation Type=\"Recommendation\" Abbrev=\"ACMG, 2013\"><ID Source=\"PubMed\">23788249</ID></Citation><Citation Type=\"practice guideline\" Abbrev=\"NICE, 2008\"><URL>http://www.nice.org.uk/guidance/cg71</URL><CitationText>Identification and management of familial hypercholesterolaemia</CitationText></Citation><Citation Type=\"practice guideline\" Abbrev=\"NHFA, 2012\"><ID Source=\"PubMed\">22364837</ID></Citation><Citation Type=\"practice guideline\" Abbrev=\"IPMFH, 2004\"><ID Source=\"PubMed\">15177124</ID></Citation><Citation Type=\"practice guideline\" Abbrev=\"NLAEP, 2011\"><ID Source=\"PubMed\">21600525</ID></Citation><Citation Type=\"practice guideline\" Abbrev=\"Int'l FH Foundation, 2014\"><ID Source=\"PubMed\">24418289</ID></Citation><Citation Type=\"review\" Abbrev=\"GeneReviews\"><ID Source=\"PubMed\">24404629</ID><ID Source=\"BookShelf\">NBK174884</ID></Citation><Citation Type=\"Recommendation\" Abbrev=\"Feldman et al., 2015\"><ID Source=\"PubMed\">25404096</ID></Citation><Citation Type=\"Position Statement\" Abbrev=\"EAS, 2014\"><ID Source=\"PubMed\">25053660</ID></Citation><Citation Type=\"Position Statement\" Abbrev=\"PLEF, 2014\"><ID Source=\"PubMed\">24636176</ID></Citation><Citation Type=\"Position Statement\" Abbrev=\"PLEF, 2013\"><ID Source=\"PubMed\">23725921</ID></Citation><Citation Type=\"Recommendation\" Abbrev=\"ACMG, 2015\"><ID Source=\"PubMed\">25356965</ID></Citation><Citation Type=\"Recommendation\" Abbrev=\"ACMG, 2016\"><ID Source=\"PubMed\">27854360</ID><ID Source=\"DOI\">10.1038/gim.2016.190</ID></Citation><XRef ID=\"C0745103\" DB=\"MedGen\" /><XRef ID=\"391665\" DB=\"Orphanet\" /><XRef Type=\"MIM\" ID=\"143890\" DB=\"OMIM\" /></Trait></TraitSet></ReferenceClinVarAssertion><ClinVarAssertion ID=\"57166\"><ClinVarSubmissionID localKey=\"PCSK9_2009G_A_062411\" submitter=\"Integrated Genetics/Laboratory Corporation of America\" title=\"PCSK9:c.2009G&gt;A and Familial Hypercholesterolemia\" /><ClinVarAccession Acc=\"SCV000053016\" Version=\"1\" Type=\"SCV\" OrgID=\"500026\" OrganizationCategory=\"laboratory\" OrgType=\"primary\" DateUpdated=\"2019-03-31\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2011-08-18\"><ReviewStatus>criteria provided, single submitter</ReviewStatus><Description>benign</Description><Comment Type=\"ConvertedByNCBI\">Converted during submission to Benign.</Comment></ClinicalSignificance><CustomAssertionScore Type=\"QOD\" Value=\"3\" /><CustomAssertionScore Type=\"aFXN\" Value=\"NA\" /><CustomAssertionScore Type=\"pFXN\" Value=\"-2\" /><CustomAssertionScore Type=\"pbGP\" Value=\"3\" /><CustomAssertionScore Type=\"tbGP\" Value=\"NA\" /><Assertion Type=\"variation to disease\" /><AttributeSet><Attribute Type=\"ModeOfInheritance\">autosomal unknown</Attribute></AttributeSet><AttributeSet><Attribute Type=\"AssertionMethod\">LabCorp Variant Classification Summary - May 2015</Attribute><Citation Type=\"general\"><URL>https://submit.ncbi.nlm.nih.gov/ft/byid/pttb9itm/labcorp_variant_classification_method_-_may_2015.pdf</URL></Citation></AttributeSet><ObservedIn><Sample><Origin>germline</Origin><Tissue>Blood</Tissue><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>yes</AffectedStatus></Sample><Method><Purpose>assert pathogenicity</Purpose><MethodType>curation</MethodType></Method><ObservedData><Attribute Type=\"VariantAlleles\">1</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"single nucleotide variant\"><AttributeSet><Attribute Type=\"HGVS\">NM_174936.3:c.2009G&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Type=\"HGVS\">p.Gly670Glu</Attribute></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">missense mutation</Attribute><XRef DB=\"Sequence Ontology\" ID=\"SO:0001783\" /></AttributeSet><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">PCSK9</ElementValue></Symbol></MeasureRelationship><Citation Type=\"general\"><ID Source=\"PubMed\">19191301</ID></Citation></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">Familial Hypercholesterolemia</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">FH</ElementValue></Symbol></Trait></TraitSet></ClinVarAssertion><ClinVarAssertion ID=\"619516\"><ClinVarSubmissionID localKey=\"g.28968G&gt;A\" localKeyIsSubmitted=\"1\" submittedAssembly=\"GRCh37\" submitter=\"Cardiovascular Research Group,Instituto Nacional de Saude Doutor Ricardo Jorge\" submitterDate=\"2016-10-14\" /><ClinVarAccession Acc=\"SCV000323075\" Version=\"1\" Type=\"SCV\" OrgID=\"505909\" OrganizationCategory=\"laboratory\" OrgType=\"primary\" DateUpdated=\"2019-03-31\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2016-03-01\"><ReviewStatus>criteria provided, single submitter</ReviewStatus><Description>Benign</Description><Comment Type=\"public\">MAF = 2% in 100 subjects with average plasma cholesterol; 291 hmz(AA) + 28 htz(GA) in 319 normolipidemic individuals; 98(AA)/100 normolipidemic individuals</Comment></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ExternalID DB=\"Cardiovascular Research Group\" ID=\"PCSK9 117\" /><AttributeSet><Attribute Type=\"AssertionMethod\">ACMG Guidelines, 2015</Attribute><Citation><ID Source=\"PubMed\">25741868</ID></Citation></AttributeSet><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>yes</AffectedStatus></Sample><Method><MethodType>research</MethodType></Method><ObservedData><Attribute Type=\"Description\">not provided</Attribute></ObservedData><Comment Type=\"public\">%MAF (ExAC):5.67</Comment></ObservedIn><ObservedIn><Sample><Origin>germline</Origin><CellLine>HEK</CellLine><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>yes</AffectedStatus></Sample><Method><MethodType>research</MethodType><ObsMethodAttribute><Attribute Type=\"MethodResult\">normal PCSK9 processing</Attribute></ObsMethodAttribute></Method><ObservedData><Attribute Type=\"Description\">not provided</Attribute></ObservedData><Citation><ID Source=\"PubMed\">15358785</ID></Citation><Citation><ID Source=\"PubMed\">15893176</ID></Citation><Citation><ID Source=\"PubMed\">17170371</ID></Citation><Comment Type=\"public\">Heterologous cells (HEK), pulse-chase [S35]Met/Cys assays</Comment></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><Name><ElementValue Type=\"Alternate\">NM_174936.3:c.2009G&gt;A</ElementValue></Name><AttributeSet><Attribute Type=\"HGVS\">NG_009061.1:g.28968G&gt;A</Attribute></AttributeSet><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">PCSK9</ElementValue></Symbol></MeasureRelationship></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><XRef DB=\"MedGen\" ID=\"C0020445\" Type=\"CUI\" /></Trait></TraitSet></ClinVarAssertion><ClinVarAssertion ID=\"671795\" SubmissionName=\"ICSL_2016Q4\"><ClinVarSubmissionID localKey=\"1537|Familial Hypercholesterolemia\" submittedAssembly=\"GRCh37\" submitter=\"Illumina Clinical Services Laboratory,Illumina\" submitterDate=\"2016-10-18\" /><ClinVarAccession Acc=\"SCV000358273\" Version=\"2\" Type=\"SCV\" OrgID=\"504895\" OrganizationCategory=\"laboratory\" OrgType=\"primary\" DateUpdated=\"2019-03-31\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2016-06-14\"><ReviewStatus>criteria provided, single submitter</ReviewStatus><Description>Likely benign</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ExternalID DB=\"Illumina Clinical Services Laboratory\" ID=\"1537\" /><AttributeSet><Attribute Type=\"AssertionMethod\">ICSL Variant Classification 20161018</Attribute><Citation Type=\"general\"><URL>https://submit.ncbi.nlm.nih.gov/ft/byid/4jQgNGYk/ICSL_Variant_Classification_20161018.pdf</URL><CitationText>ICSL_Variant_Classification_20161018.pdf</CitationText></Citation></AttributeSet><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><AttributeSet><Attribute Type=\"HGVS\">NM_174936.3:c.2009G&gt;A</Attribute></AttributeSet><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">PCSK9</ElementValue></Symbol></MeasureRelationship></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">Familial Hypercholesterolemia</ElementValue></Name></Trait></TraitSet></ClinVarAssertion><ClinVarAssertion ID=\"1142333\" SubmissionName=\"SUB2605555\"><ClinVarSubmissionID localKey=\"g.55529187G&gt;A\" localKeyIsSubmitted=\"1\" submittedAssembly=\"GRCh37\" submitter=\"Laboratory of Genetics and Molecular Cardiology,University of São Paulo\" submitterDate=\"2017-08-04\" /><ClinVarAccession Acc=\"SCV000588691\" Version=\"1\" Type=\"SCV\" OrgID=\"505581\" OrganizationCategory=\"laboratory\" OrgType=\"primary\" DateUpdated=\"2019-03-31\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2016-03-01\"><ReviewStatus>criteria provided, single submitter</ReviewStatus><Description>Benign</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ExternalID DB=\"Laboratory of Genetics and Molecular Cardiology\" ID=\"PCSK9 117\" /><AttributeSet><Attribute Type=\"AssertionMethod\">ACMG Guidelines, 2015</Attribute><Citation><ID Source=\"PubMed\">25741868</ID></Citation></AttributeSet><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus></Sample><Method><MethodType>research</MethodType></Method><ObservedData><Attribute Type=\"Description\">not provided</Attribute></ObservedData><Comment Type=\"public\">%MAF(ExAC):5.67</Comment></ObservedIn><ObservedIn><Sample><Origin>germline</Origin><CellLine>HEK cells</CellLine><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus></Sample><Method><MethodType>research</MethodType><ObsMethodAttribute><Attribute Type=\"MethodResult\">normal PCSK9 processing</Attribute></ObsMethodAttribute></Method><ObservedData><Attribute Type=\"Description\">not provided</Attribute></ObservedData><Citation><ID Source=\"PubMed\">15358785</ID></Citation><Comment Type=\"public\">Assay description:Heterologous cells (HEK), pulse-chase [S35]Met/Cys assays</Comment></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><Name><ElementValue Type=\"Alternate\">NM_174936.3:c.2009G&gt;A</ElementValue></Name><AttributeSet><Attribute Type=\"HGVS\">NG_009061.1:g.28968G&gt;A</Attribute></AttributeSet><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">PCSK9</ElementValue></Symbol></MeasureRelationship></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><XRef DB=\"MedGen\" ID=\"C0020445\" Type=\"CUI\" /></Trait></TraitSet><StudyName>HipercolBrasil</StudyName><StudyDescription>HipercolBrasil is a program that aims to describe the genetic data obtained from the cascade screening applied in a large FH Brazilian cohort since 2011.</StudyDescription></ClinVarAssertion><ClinVarAssertion ID=\"1173281\" SubmissionName=\"SUB2605339\"><ClinVarSubmissionID localKey=\"NM_174936.3:c.2009G&gt;A|MedGen:C0020445\" submittedAssembly=\"GRCh37\" submitter=\"Laboratorium voor Moleculaire Diagnostiek Experimentele Vasculaire Geneeskunde,Academisch Medisch Centrum\" submitterDate=\"2017-04-25\" /><ClinVarAccession Acc=\"SCV000606717\" Version=\"1\" Type=\"SCV\" OrgID=\"506186\" OrganizationCategory=\"laboratory\" OrgType=\"primary\" DateUpdated=\"2019-03-31\" /><RecordStatus>current</RecordStatus><ClinicalSignificance><ReviewStatus>no assertion criteria provided</ReviewStatus><Description>Benign</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus></Sample><Method><MethodType>research</MethodType></Method><ObservedData><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><AttributeSet><Attribute Type=\"HGVS\">NM_174936.3:c.2009G&gt;A</Attribute></AttributeSet><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">PCSK9</ElementValue></Symbol></MeasureRelationship></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><XRef DB=\"MedGen\" ID=\"C0020445\" Type=\"CUI\" /></Trait></TraitSet></ClinVarAssertion><ClinVarAssertion ID=\"1348269\" SubmissionName=\"Color Genomics - ClinVar Submission - December 2017 - \"><ClinVarSubmissionID localKey=\"Db4KrQBSZeoE7-orUyypkRLTNpF8zhx0-5xXQqUGIbQ|MedGen:C0020445\" submittedAssembly=\"GRCh37\" submitter=\"Color\" submitterDate=\"2017-12-21\" /><ClinVarAccession Acc=\"SCV000690973\" Version=\"1\" Type=\"SCV\" OrgID=\"505849\" OrganizationCategory=\"laboratory\" OrgType=\"primary\" DateUpdated=\"2019-03-31\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2017-06-22\"><ReviewStatus>criteria provided, single submitter</ReviewStatus><Description>Benign</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ExternalID DB=\"Color Genomics, Inc.\" ID=\"Db4KrQBSZeoE7-orUyypkRLTNpF8zhx0-5xXQqUGIbQ\" /><AttributeSet><Attribute Type=\"AssertionMethod\">ACMG Guidelines, 2015</Attribute><Citation><ID Source=\"PubMed\">25741868</ID></Citation></AttributeSet><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus></Sample><Method><TypePlatform>NGS</TypePlatform><MethodType>clinical testing</MethodType></Method><ObservedData><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><AttributeSet><Attribute Type=\"HGVS\">NM_174936.3:c.2009G&gt;A</Attribute></AttributeSet><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">PCSK9</ElementValue></Symbol></MeasureRelationship></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><XRef DB=\"MedGen\" ID=\"C0020445\" Type=\"CUI\" /></Trait></TraitSet></ClinVarAssertion><ClinVarAssertion ID=\"1456761\" SubmissionName=\"IBeroAmerican FH Network\"><ClinVarSubmissionID localKey=\"g.28968G&gt;A\" localKeyIsSubmitted=\"1\" submittedAssembly=\"GRCh37\" submitter=\"Iberoamerican FH Network\" submitterDate=\"2017-07-28\" /><ClinVarAccession Acc=\"SCV000748072\" Version=\"1\" Type=\"SCV\" OrgID=\"506189\" OrganizationCategory=\"consortium\" OrgType=\"primary\" DateUpdated=\"2019-03-31\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2016-03-01\"><ReviewStatus>criteria provided, single submitter</ReviewStatus><Description>Benign</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ExternalID DB=\"Iberoamerican FH Network\" ID=\"PCSK9 117\" /><AttributeSet><Attribute Type=\"AssertionMethod\">ACMG Guidelines, 2015</Attribute><Citation><ID Source=\"PubMed\">25741868</ID></Citation></AttributeSet><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus></Sample><Method><MethodType>research</MethodType></Method><ObservedData><Attribute Type=\"Description\">not provided</Attribute></ObservedData><Comment Type=\"public\">%MAF(ExAC):5.67</Comment></ObservedIn><ObservedIn><Sample><Origin>germline</Origin><CellLine>HEK cells</CellLine><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus></Sample><Method><MethodType>research</MethodType><ObsMethodAttribute><Attribute Type=\"MethodResult\">normal PCSK9 processing</Attribute></ObsMethodAttribute></Method><ObservedData><Attribute Type=\"Description\">not provided</Attribute></ObservedData><Citation><ID Source=\"PubMed\">15358785</ID></Citation><Comment Type=\"public\">Assay Description:Heterologous cells (HEK), pulse-chase [S35]Met/Cys assays</Comment></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><Name><ElementValue Type=\"Alternate\">NM_174936.3:c.2009G&gt;A</ElementValue></Name><AttributeSet><Attribute Type=\"HGVS\">NG_009061.1:g.28968G&gt;A</Attribute></AttributeSet><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">PCSK9</ElementValue></Symbol></MeasureRelationship></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><XRef DB=\"MedGen\" ID=\"C0020445\" Type=\"CUI\" /></Trait></TraitSet><Comment Type=\"public\">Variant present in the database from Mexico</Comment></ClinVarAssertion></ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000032548.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\"\n            xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n    <ClinVarSet ID=\"49273949\">\n        <RecordStatus>current</RecordStatus>\n        <Title>NM_207352.4(CYP4V2):c.802-8_810delinsGC AND Bietti crystalline corneoretinal dystrophy</Title>\n        <ReferenceClinVarAssertion DateCreated=\"2013-01-08\" DateLastUpdated=\"2019-12-31\" ID=\"93199\">\n            <ClinVarAccession Acc=\"RCV000032548\" Version=\"8\" Type=\"RCV\" DateUpdated=\"2019-12-31\"/>\n            <RecordStatus>current</RecordStatus>\n            <ClinicalSignificance DateLastEvaluated=\"2017-02-06\">\n                <ReviewStatus>criteria provided, multiple submitters, no conflicts</ReviewStatus>\n                <Description>Pathogenic</Description>\n            </ClinicalSignificance>\n            <Assertion Type=\"variation to disease\"/>\n            <AttributeSet>\n                <Attribute Type=\"ModeOfInheritance\" integerValue=\"263\">Autosomal recessive inheritance</Attribute>\n                <XRef ID=\"73690472\" DB=\"Laboratory for Molecular Medicine,Partners HealthCare Personalized Medicine\"/>\n            </AttributeSet>\n            <ObservedIn>\n                <Sample>\n                    <Origin>germline</Origin>\n                    <Species TaxonomyId=\"9606\">human</Species>\n                    <AffectedStatus>not provided</AffectedStatus>\n                </Sample>\n                <Method>\n                    <MethodType>clinical testing</MethodType>\n                </Method>\n                <Method>\n                    <MethodType>literature only</MethodType>\n                </Method>\n                <ObservedData ID=\"50064184\">\n                    <Attribute integerValue=\"1\" Type=\"VariantAlleles\"/>\n                </ObservedData>\n                <ObservedData ID=\"50064184\">\n                    <Attribute Type=\"Description\">In 7 Japanese patients with Bietti crystalline corneoretinal dystrophy\n                        (BCD; 210370), Lin et al. (2005) identified homozygosity for an insertion/deletion mutation\n                        (c.802-8_810del17insGC) at intron 6 of the CYP4V2 gene that resulted in the skipping of exon 7.\n                        All 7 patients also shared homozygosity for 6 closely linked intragenic polymorphic markers,\n                        consistent with a founder effect; however, the authors noted that the founder was probably a\n                        very distant ancestor because the region of the conserved linked markers was small (6.7-17.1\n                        kb). In a Chinese BCD patient, Lin et al. (2005) identified compound heterozygosity for this\n                        mutation and a c.992A-C transversion in the CYP4V2 gene, resulting in a his-331-to-pro (H331P;\n                        608614.0007) substitution.\n                    </Attribute>\n                    <Citation Type=\"general\">\n                        <ID Source=\"PubMed\">15937078</ID>\n                    </Citation>\n                </ObservedData>\n                <ObservedData ID=\"50064184\">\n                    <Attribute Type=\"Description\">Li et al. (2004) had previously reported the indel mutation as a 15-bp\n                        deletion, which they found in homozygosity in 6 Japanese and 2 Chinese families with BCD as well\n                        as in 1 Japanese and 2 Chinese sporadic BCD patients. In addition, they identified the indel\n                        mutation in compound heterozygous state with the IVS8-2A-G mutation (608614.0005) in a Chinese\n                        patient with sporadic BCD and with the H331P mutation in affected individuals from a Chinese\n                        family. Screening for these 3 mutations in 50 controls, including 12 of Chinese, 16 of Japanese,\n                        and 22 of European origin, detected only 1 heterozygous instance of the H331P mutation in 1\n                        Chinese control sample.\n                    </Attribute>\n                    <Citation Type=\"general\">\n                        <ID Source=\"PubMed\">15042513</ID>\n                    </Citation>\n                </ObservedData>\n                <ObservedData ID=\"50064184\">\n                    <Attribute Type=\"Description\">In 4 Chinese sisters with congenital cataract, high myopia, thin\n                        corneas, and a diagnosis of retinitis pigmentosa, Wang et al. (2012) identified compound\n                        heterozygosity for the c.802-8_810del17insGC mutation and the IVS8-2A-G mutation in the CYP4V2\n                        gene. The mutations segregated fully with disease in 22 examined members of this 4-generation\n                        family.\n                    </Attribute>\n                    <Citation Type=\"general\">\n                        <ID Source=\"PubMed\">22693542</ID>\n                    </Citation>\n                </ObservedData>\n                <ObservedData ID=\"50064184\">\n                    <Attribute Type=\"Description\">In the proband from a Chinese family diagnosed with autosomal\n                        recessive RP, Fu et al. (2013) identified compound heterozygosity for the CYP4V2\n                        c.802-8_810del17insGC (c.802-8_810del17insGC, NM_207352.3) and IVS8-2A-G mutations. Both\n                        mutations were also present in an affected sib, but mutation status was unknown for their\n                        unaffected deceased parents. Clinical reevaluation was not possible in this family, but\n                        reexamination of affected individuals in another Chinese RP family with mutations in CYP4V2\n                        resulted in a rediagnosis of their phenotype as BCD (see 608614.0009).\n                    </Attribute>\n                    <Citation Type=\"general\">\n                        <ID Source=\"PubMed\">23661369</ID>\n                    </Citation>\n                </ObservedData>\n                <ObservedData ID=\"50064184\">\n                    <Attribute integerValue=\"1\" Type=\"NumFamiliesWithVariant\"/>\n                </ObservedData>\n            </ObservedIn>\n            <ObservedIn>\n                <Sample>\n                    <Origin>germline</Origin>\n                    <Species TaxonomyId=\"9606\">human</Species>\n                    <AffectedStatus>unknown</AffectedStatus>\n                </Sample>\n                <Method>\n                    <MethodType>clinical testing</MethodType>\n                </Method>\n                <ObservedData ID=\"50064185\">\n                    <Attribute Type=\"Description\">not provided</Attribute>\n                </ObservedData>\n            </ObservedIn>\n            <ObservedIn>\n                <Sample>\n                    <Origin>not provided</Origin>\n                    <Species TaxonomyId=\"9606\">human</Species>\n                    <AffectedStatus>not provided</AffectedStatus>\n                    <NumberTested>1</NumberTested>\n                </Sample>\n                <Method>\n                    <MethodType>literature only</MethodType>\n                </Method>\n                <Method>\n                    <Purpose>assert pathogenicity</Purpose>\n                    <MethodType>curation</MethodType>\n                </Method>\n                <ObservedData ID=\"50064186\">\n                    <Attribute Type=\"Description\">not provided</Attribute>\n                </ObservedData>\n                <ObservedData ID=\"50064186\">\n                    <Attribute Type=\"Description\">not provided</Attribute>\n                </ObservedData>\n            </ObservedIn>\n            <MeasureSet Type=\"Variant\" ID=\"39271\" Acc=\"VCV000039271\" Version=\"1\">\n                <Measure Type=\"Indel\" ID=\"47877\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">NM_207352.4(CYP4V2):c.802-8_810delinsGC</ElementValue>\n                    </Name>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_207352\" Version=\"3\" Change=\"c.802-8_810del17insGC\" Type=\"HGVS, coding\">\n                            NM_207352.3:c.802-8_810del17insGC\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_207352\" Version=\"4\" Change=\"c.802-8_810delinsGC\"\n                                   Type=\"HGVS, coding, RefSeq\">NM_207352.4:c.802-8_810delinsGC\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Change=\"g.14630_14646del17insGC\" Accession=\"NG_007965\" Version=\"1\"\n                                   Type=\"HGVS, genomic\">NG_007965.1:g.14630_14646del17insGC\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NG_007965\" Version=\"1\" Change=\"g.14630_14646del17insGC\"\n                                   Type=\"HGVS, genomic, RefSeqGene\">NG_007965.1:g.14630_14646del17insGC\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NG_007965\" Version=\"1\" Change=\"g.14630_14646delinsGC\"\n                                   Type=\"HGVS, genomic, RefSeqGene\">NG_007965.1:g.14630_14646delinsGC\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NG_007965\" Version=\"1\" Change=\"g.14630_14646delinsGC\"\n                                   Type=\"HGVS, genomic, RefSeqGene\">NG_007965.1:g.14630_14646delinsGC\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NC_000004\" Version=\"12\" Change=\"g.186201149_186201165delinsGC\"\n                                   Type=\"HGVS, genomic, top level\" integerValue=\"38\">\n                            NC_000004.12:g.186201149_186201165delinsGC\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_207352\" Version=\"3\" Change=\"c.802-8_810delinsGC\" Type=\"HGVS, previous\">\n                            NM_207352.3:c.802-8_810delinsGC\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"HGVS, protein\">p.?</Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"Location\">NM_207352.3:exon 7</Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"Location\">NM_207352.3:intron 6</Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"MolecularConsequence\">splice acceptor variant</Attribute>\n                        <XRef ID=\"SO:0001574\" DB=\"Sequence Ontology\"/>\n                        <XRef ID=\"NM_207352.4:c.802-8_810delinsGC\" DB=\"RefSeq\"/>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"nucleotide change\">IVS6-8 del/insGC</Attribute>\n                    </AttributeSet>\n                    <CytogeneticLocation>4q35.2</CytogeneticLocation>\n                    <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\"\n                                      AssemblyStatus=\"current\" Chr=\"4\" Accession=\"NC_000004.12\" start=\"186201149\"\n                                      stop=\"186201165\" display_start=\"186201149\" display_stop=\"186201165\"\n                                      alternateAllele=\"GC\" positionVCF=\"186201149\"\n                                      referenceAlleleVCF=\"TCATACAGGTCATCGCT\" alternateAlleleVCF=\"GC\"/>\n                    <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\"\n                                      AssemblyStatus=\"previous\" Chr=\"4\" Accession=\"NC_000004.11\" start=\"187122303\"\n                                      stop=\"187122319\" display_start=\"187122303\" display_stop=\"187122319\"\n                                      alternateAllele=\"GC\" positionVCF=\"187122303\"\n                                      referenceAlleleVCF=\"TCATACAGGTCATCGCT\" alternateAlleleVCF=\"GC\"/>\n                    <MeasureRelationship Type=\"within single gene\">\n                        <Name>\n                            <ElementValue Type=\"Preferred\">cytochrome P450 family 4 subfamily V member 2</ElementValue>\n                        </Name>\n                        <Symbol>\n                            <ElementValue Type=\"Preferred\">CYP4V2</ElementValue>\n                        </Symbol>\n                        <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\"\n                                          AssemblyStatus=\"current\" Chr=\"4\" Accession=\"NC_000004.12\" start=\"186191567\"\n                                          stop=\"186213463\" display_start=\"186191567\" display_stop=\"186213463\"\n                                          Strand=\"+\"/>\n                        <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\"\n                                          AssemblyStatus=\"previous\" Chr=\"4\" Accession=\"NC_000004.11\" start=\"187112673\"\n                                          stop=\"187134616\" display_start=\"187112673\" display_stop=\"187134616\"\n                                          variantLength=\"21944\" Strand=\"+\"/>\n                        <XRef ID=\"285440\" DB=\"Gene\"/>\n                        <XRef Type=\"MIM\" ID=\"608614\" DB=\"OMIM\"/>\n                        <XRef ID=\"HGNC:23198\" DB=\"HGNC\"/>\n                    </MeasureRelationship>\n                    <Citation Type=\"general\">\n                        <ID Source=\"PubMed\">15860296</ID>\n                    </Citation>\n                    <XRef Type=\"Allelic variant\" ID=\"608614.0006\" DB=\"OMIM\"/>\n                    <XRef Type=\"rs\" ID=\"207482233\" DB=\"dbSNP\"/>\n                </Measure>\n                <Name>\n                    <ElementValue Type=\"Preferred\">NM_207352.4(CYP4V2):c.802-8_810delinsGC</ElementValue>\n                </Name>\n                <XRef ID=\"CA343740\" DB=\"ClinGen\"/>\n            </MeasureSet>\n            <TraitSet Type=\"Disease\" ID=\"559\">\n                <Trait ID=\"4671\" Type=\"Disease\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">Bietti crystalline corneoretinal dystrophy</ElementValue>\n                        <XRef ID=\"Bietti+Crystalline+Corneoretinal+Dystrophy/825\" DB=\"Genetic Alliance\"/>\n                        <XRef ID=\"10050\" DB=\"Office of Rare Diseases\"/>\n                    </Name>\n                    <Name>\n                        <ElementValue Type=\"Alternate\">Bietti Crystalline Dystrophy</ElementValue>\n                        <XRef ID=\"NBK91457\" DB=\"GeneReviews\"/>\n                    </Name>\n                    <Symbol>\n                        <ElementValue Type=\"Preferred\">BCD</ElementValue>\n                        <XRef Type=\"MIM\" ID=\"210370\" DB=\"OMIM\"/>\n                        <XRef ID=\"10050\" DB=\"Office of Rare Diseases\"/>\n                    </Symbol>\n                    <AttributeSet>\n                        <Attribute Type=\"public definition\">Bietti crystalline dystrophy (BCD) is a chorioretinal\n                            degeneration characterized by the presence of yellow-white crystals and/or complex lipid\n                            deposits in the retina and (to a variable degree) the cornea. Progressive atrophy and\n                            degeneration of the retinal pigment epithelium (RPE) / choroid lead to symptoms similar to\n                            those of other forms of retinal degeneration that fall under the category of retinitis\n                            pigmentosa and allied disorders, namely: reduced visual acuity, poor night vision, abnormal\n                            retinal electrophysiology, visual field loss, and often impaired color vision. Marked\n                            asymmetry between eyes is not uncommon. Onset is typically during the second to third decade\n                            of life, but ranges from the early teenage years to beyond the third decade. With time, loss\n                            of peripheral visual field, central acuity, or both result in legal blindness in most if not\n                            all affected individuals.\n                        </Attribute>\n                        <XRef ID=\"NBK91457\" DB=\"GeneReviews\"/>\n                    </AttributeSet>\n                    <Citation Type=\"review\" Abbrev=\"GeneReviews\">\n                        <ID Source=\"PubMed\">22497028</ID>\n                        <ID Source=\"BookShelf\">NBK91457</ID>\n                    </Citation>\n                    <XRef ID=\"C1859486\" DB=\"MedGen\"/>\n                    <XRef Type=\"MIM\" ID=\"210370\" DB=\"OMIM\"/>\n                </Trait>\n            </TraitSet>\n        </ReferenceClinVarAssertion>\n        <ClinVarAssertion ID=\"92690\">\n            <ClinVarSubmissionID localKey=\"IVS6-8 del/insGC_NBK91457\" submitter=\"GeneReviews\" submitterDate=\"2013-03-26\"\n                                 title=\"IVS6-8 del/insGC and Bietti Crystalline Dystrophy\"/>\n            <ClinVarAccession Acc=\"SCV000056215\" Version=\"2\" Type=\"SCV\" OrgID=\"500062\" OrganizationCategory=\"resource\"\n                              OrgType=\"primary\" DateUpdated=\"2019-07-02\"/>\n            <RecordStatus>current</RecordStatus>\n            <ClinicalSignificance DateLastEvaluated=\"2012-04-12\">\n                <ReviewStatus>no assertion criteria provided</ReviewStatus>\n                <Description>pathologic</Description>\n                <Comment Type=\"ConvertedByNCBI\">Converted during submission to Pathogenic.</Comment>\n            </ClinicalSignificance>\n            <Assertion Type=\"variation to disease\"/>\n            <ExternalID DB=\"GeneReviews\" ID=\"NBK91457\"/>\n            <ObservedIn>\n                <Sample>\n                    <Origin>not provided</Origin>\n                    <Species>human</Species>\n                    <AffectedStatus>not provided</AffectedStatus>\n                </Sample>\n                <Method>\n                    <Purpose>Assert pathogenicity</Purpose>\n                    <MethodType>curation</MethodType>\n                </Method>\n                <ObservedData>\n                    <Attribute Type=\"Description\">not provided</Attribute>\n                </ObservedData>\n            </ObservedIn>\n            <MeasureSet Type=\"Variant\">\n                <Measure Type=\"Variation\">\n                    <AttributeSet>\n                        <Attribute Type=\"nucleotide change\">IVS6-8 del/insGC</Attribute>\n                    </AttributeSet>\n                </Measure>\n            </MeasureSet>\n            <TraitSet Type=\"Disease\">\n                <Trait Type=\"Disease\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">Bietti Crystalline Dystrophy</ElementValue>\n                    </Name>\n                    <XRef DB=\"GeneReviews\" ID=\"NBK91457\"/>\n                    <XRef DB=\"OMIM\" ID=\"210370\" Type=\"MIM\"/>\n                </Trait>\n            </TraitSet>\n        </ClinVarAssertion>\n        <ClinVarAssertion ID=\"310094\" SubmissionName=\"targeted_exome_RP\">\n            <ClinVarSubmissionID localKey=\"NM_207352.3:c.802-8_810del17insGC|KYOTOUNIVOPHTHALMOL\"\n                                 submitter=\"Department of Ophthalmology and Visual Sciences Kyoto University\"\n                                 submitterDate=\"2014-07-29\"/>\n            <ClinVarAccession Acc=\"SCV000172674\" Version=\"1\" Type=\"SCV\" OrgID=\"505202\" OrganizationCategory=\"laboratory\"\n                              OrgType=\"primary\" DateUpdated=\"2019-05-02\"/>\n            <RecordStatus>current</RecordStatus>\n            <ClinicalSignificance>\n                <ReviewStatus>no assertion criteria provided</ReviewStatus>\n                <Description>pathogenic</Description>\n                <Comment Type=\"ConvertedByNCBI\">Converted during submission to Pathogenic.</Comment>\n            </ClinicalSignificance>\n            <Assertion Type=\"variation to disease\"/>\n            <ObservedIn>\n                <Sample>\n                    <Origin>not provided</Origin>\n                    <Species>human</Species>\n                    <AffectedStatus>not provided</AffectedStatus>\n                    <NumberTested>1</NumberTested>\n                </Sample>\n                <Method>\n                    <MethodType>not provided</MethodType>\n                </Method>\n                <ObservedData>\n                    <Attribute Type=\"Description\">not provided</Attribute>\n                </ObservedData>\n            </ObservedIn>\n            <MeasureSet Type=\"Variant\">\n                <Measure Type=\"Variation\">\n                    <AttributeSet>\n                        <Attribute Type=\"HGVS\">NM_207352.3:c.802-8_810del17insGC</Attribute>\n                    </AttributeSet>\n                </Measure>\n            </MeasureSet>\n            <TraitSet Type=\"Disease\">\n                <Trait Type=\"Disease\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">Bietti crystalline corneoretinal dystrophy</ElementValue>\n                    </Name>\n                    <XRef DB=\"OMIM\" ID=\"210370\" Type=\"MIM\"/>\n                </Trait>\n            </TraitSet>\n        </ClinVarAssertion>\n        <ClinVarAssertion ID=\"457710\">\n            <ClinVarSubmissionID localKey=\"608614.0006_BIETTI CRYSTALLINE CORNEORETINAL DYSTROPHY\" submitter=\"OMIM\"\n                                 submitterDate=\"2015-09-26\"\n                                 title=\"CYP4V2, IVS6AS, 17-BP DEL/2-BP INS_BIETTI CRYSTALLINE CORNEORETINAL DYSTROPHY\"/>\n            <ClinVarAccession Acc=\"SCV000246149\" Version=\"1\" Type=\"SCV\" OrgID=\"3\" OrganizationCategory=\"resource\"\n                              OrgType=\"primary\" DateUpdated=\"2019-05-02\"/>\n            <RecordStatus>current</RecordStatus>\n            <ClinicalSignificance DateLastEvaluated=\"2013-06-14\">\n                <ReviewStatus>no assertion criteria provided</ReviewStatus>\n                <Description>Pathogenic</Description>\n            </ClinicalSignificance>\n            <Assertion Type=\"variation to disease\"/>\n            <ExternalID DB=\"OMIM\" ID=\"608614.0006\" Type=\"Allelic variant\"/>\n            <ObservedIn>\n                <Sample>\n                    <Origin>germline</Origin>\n                    <Species>human</Species>\n                    <AffectedStatus>not provided</AffectedStatus>\n                </Sample>\n                <Method>\n                    <MethodType>literature only</MethodType>\n                </Method>\n                <ObservedData>\n                    <Attribute Type=\"Description\">In 7 Japanese patients with Bietti crystalline corneoretinal dystrophy\n                        (BCD; 210370), Lin et al. (2005) identified homozygosity for an insertion/deletion mutation\n                        (c.802-8_810del17insGC) at intron 6 of the CYP4V2 gene that resulted in the skipping of exon 7.\n                        All 7 patients also shared homozygosity for 6 closely linked intragenic polymorphic markers,\n                        consistent with a founder effect; however, the authors noted that the founder was probably a\n                        very distant ancestor because the region of the conserved linked markers was small (6.7-17.1\n                        kb). In a Chinese BCD patient, Lin et al. (2005) identified compound heterozygosity for this\n                        mutation and a c.992A-C transversion in the CYP4V2 gene, resulting in a his-331-to-pro (H331P;\n                        608614.0007) substitution.\n                    </Attribute>\n                    <Citation>\n                        <ID Source=\"PubMed\">15937078</ID>\n                    </Citation>\n                    <XRef DB=\"OMIM\" ID=\"210370\" Type=\"MIM\"/>\n                </ObservedData>\n                <ObservedData>\n                    <Attribute Type=\"Description\">Li et al. (2004) had previously reported the indel mutation as a 15-bp\n                        deletion, which they found in homozygosity in 6 Japanese and 2 Chinese families with BCD as well\n                        as in 1 Japanese and 2 Chinese sporadic BCD patients. In addition, they identified the indel\n                        mutation in compound heterozygous state with the IVS8-2A-G mutation (608614.0005) in a Chinese\n                        patient with sporadic BCD and with the H331P mutation in affected individuals from a Chinese\n                        family. Screening for these 3 mutations in 50 controls, including 12 of Chinese, 16 of Japanese,\n                        and 22 of European origin, detected only 1 heterozygous instance of the H331P mutation in 1\n                        Chinese control sample.\n                    </Attribute>\n                    <Citation>\n                        <ID Source=\"PubMed\">15042513</ID>\n                    </Citation>\n                </ObservedData>\n                <ObservedData>\n                    <Attribute Type=\"Description\">In 4 Chinese sisters with congenital cataract, high myopia, thin\n                        corneas, and a diagnosis of retinitis pigmentosa, Wang et al. (2012) identified compound\n                        heterozygosity for the c.802-8_810del17insGC mutation and the IVS8-2A-G mutation in the CYP4V2\n                        gene. The mutations segregated fully with disease in 22 examined members of this 4-generation\n                        family.\n                    </Attribute>\n                    <Citation>\n                        <ID Source=\"PubMed\">22693542</ID>\n                    </Citation>\n                </ObservedData>\n                <ObservedData>\n                    <Attribute Type=\"Description\">In the proband from a Chinese family diagnosed with autosomal\n                        recessive RP, Fu et al. (2013) identified compound heterozygosity for the CYP4V2\n                        c.802-8_810del17insGC (c.802-8_810del17insGC, NM_207352.3) and IVS8-2A-G mutations. Both\n                        mutations were also present in an affected sib, but mutation status was unknown for their\n                        unaffected deceased parents. Clinical reevaluation was not possible in this family, but\n                        reexamination of affected individuals in another Chinese RP family with mutations in CYP4V2\n                        resulted in a rediagnosis of their phenotype as BCD (see 608614.0009).\n                    </Attribute>\n                    <Citation>\n                        <ID Source=\"PubMed\">23661369</ID>\n                    </Citation>\n                </ObservedData>\n            </ObservedIn>\n            <MeasureSet Type=\"Variant\">\n                <Measure Type=\"Variation\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">CYP4V2, IVS6AS, 17-BP DEL/2-BP INS</ElementValue>\n                    </Name>\n                    <AttributeSet>\n                        <Attribute Type=\"NonHGVS\">IVS6AS, 17-BP DEL/2-BP INS</Attribute>\n                    </AttributeSet>\n                    <MeasureRelationship Type=\"variant in gene\">\n                        <Symbol>\n                            <ElementValue Type=\"Preferred\">CYP4V2</ElementValue>\n                        </Symbol>\n                    </MeasureRelationship>\n                    <XRef DB=\"OMIM\" ID=\"608614.0006\" Type=\"Allelic variant\"/>\n                </Measure>\n            </MeasureSet>\n            <TraitSet Type=\"Disease\">\n                <Trait Type=\"Disease\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">BIETTI CRYSTALLINE CORNEORETINAL DYSTROPHY</ElementValue>\n                    </Name>\n                </Trait>\n            </TraitSet>\n        </ClinVarAssertion>\n        <ClinVarAssertion ID=\"762364\" SubmissionName=\"ICSL_2016Q4\">\n            <ClinVarSubmissionID localKey=\"639241|Bietti Crystalline Dystrophy\" submittedAssembly=\"GRCh37\"\n                                 submitter=\"Illumina Clinical Services Laboratory,Illumina\" submitterDate=\"2016-10-18\"/>\n            <ClinVarAccession Acc=\"SCV000448843\" Version=\"2\" Type=\"SCV\" OrgID=\"504895\" OrganizationCategory=\"laboratory\"\n                              OrgType=\"primary\" DateUpdated=\"2019-05-02\"/>\n            <RecordStatus>current</RecordStatus>\n            <ClinicalSignificance DateLastEvaluated=\"2016-06-14\">\n                <ReviewStatus>criteria provided, single submitter</ReviewStatus>\n                <Description>Pathogenic</Description>\n                <Citation>\n                    <ID Source=\"PubMed\">17962476</ID>\n                </Citation>\n                <Citation>\n                    <ID Source=\"PubMed\">15860296</ID>\n                </Citation>\n                <Citation>\n                    <ID Source=\"PubMed\">15042513</ID>\n                </Citation>\n                <Citation>\n                    <ID Source=\"PubMed\">26085992</ID>\n                </Citation>\n                <Citation>\n                    <ID Source=\"PubMed\">25593508</ID>\n                </Citation>\n                <Citation>\n                    <ID Source=\"PubMed\">21565171</ID>\n                </Citation>\n                <Citation>\n                    <ID Source=\"PubMed\">24739949</ID>\n                </Citation>\n                <Citation>\n                    <ID Source=\"PubMed\">26865810</ID>\n                </Citation>\n                <Citation>\n                    <ID Source=\"PubMed\">25629076</ID>\n                </Citation>\n                <Citation>\n                    <ID Source=\"PubMed\">22693542</ID>\n                </Citation>\n                <Comment Type=\"public\">The c.802-8_810delTCATACAGGTCATCGCTinsGC variant, also described in the\n                    literature as c.802-8_810delinsG, occurs in a canonical splice site (acceptor) and is therefore\n                    predicted to disrupt or distort the normal gene product. The c.802-8_810delTCATACAGGTCATCGCTinsGC\n                    variant is the most common variant associated with Bietti crystalline dystrophy in the Japanese and\n                    Chinese populations, accounting for up to 83% of disease alleles (Park et al. 2016). The variant has\n                    been reported in at least nine studies in which it is found in at least 130 patients including 64 in\n                    a homozygous state, 65 in a compound heterozygous state and one individual in a heterozygous state\n                    in whom a second allele has not been detected (Li et al. 2004; Wada et al. 2006; Lai et al. 2007;\n                    Xiao et al. 2011; Yin et al. 2014; Meng et al. 2014; Tian et al. 2015; Park et al. 2016; Astuti et\n                    al. 2016). The variant was absent from 146 controls but is reported at a frequency of 0.00496 in the\n                    East Asian population of the 1000 Genomes Project. Due to the potential impact of splice acceptor\n                    variants and the supporting evidence from the literature, the c.802-8_810delTCATACAGGTCATCGCTinsGC\n                    variant is classified as pathogenic for Bietti crystalline dystrophy.\n                </Comment>\n            </ClinicalSignificance>\n            <Assertion Type=\"variation to disease\"/>\n            <ExternalID DB=\"Illumina Clinical Services Laboratory\" ID=\"639241\"/>\n            <AttributeSet>\n                <Attribute Type=\"AssertionMethod\">ICSL Variant Classification 20161018</Attribute>\n                <Citation Type=\"general\">\n                    <URL>https://submit.ncbi.nlm.nih.gov/ft/byid/4jQgNGYk/ICSL_Variant_Classification_20161018.pdf</URL>\n                    <CitationText>ICSL_Variant_Classification_20161018.pdf</CitationText>\n                </Citation>\n            </AttributeSet>\n            <ObservedIn>\n                <Sample>\n                    <Origin>germline</Origin>\n                    <Species TaxonomyId=\"9606\">human</Species>\n                    <AffectedStatus>unknown</AffectedStatus>\n                </Sample>\n                <Method>\n                    <MethodType>clinical testing</MethodType>\n                </Method>\n                <ObservedData>\n                    <Attribute Type=\"Description\">not provided</Attribute>\n                </ObservedData>\n            </ObservedIn>\n            <MeasureSet Type=\"Variant\">\n                <Measure Type=\"Variation\">\n                    <AttributeSet>\n                        <Attribute Type=\"HGVS\">NM_207352.3:c.802-8_810delTCATACAGGTCATCGCTinsGC</Attribute>\n                    </AttributeSet>\n                    <MeasureRelationship Type=\"variant in gene\">\n                        <Symbol>\n                            <ElementValue Type=\"Preferred\">CYP4V2</ElementValue>\n                        </Symbol>\n                    </MeasureRelationship>\n                </Measure>\n            </MeasureSet>\n            <TraitSet Type=\"Disease\">\n                <Trait Type=\"Disease\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">Bietti Crystalline Dystrophy</ElementValue>\n                    </Name>\n                </Trait>\n            </TraitSet>\n        </ClinVarAssertion>\n        <ClinVarAssertion ID=\"1412652\" SubmissionName=\"SUB5348977\">\n            <ClinVarSubmissionID localKey=\"73690472|Orphanet:ORPHA41751\" submittedAssembly=\"GRCh37\"\n                                 submitter=\"Laboratory for Molecular Medicine,Partners HealthCare Personalized Medicine\"\n                                 submitterDate=\"2019-03-21\"/>\n            <ClinVarAccession Acc=\"SCV000731492\" Version=\"1\" Type=\"SCV\" OrgID=\"21766\" OrganizationCategory=\"laboratory\"\n                              OrgType=\"primary\" DateUpdated=\"2019-08-30\"/>\n            <RecordStatus>current</RecordStatus>\n            <ClinicalSignificance DateLastEvaluated=\"2017-02-06\">\n                <ReviewStatus>criteria provided, single submitter</ReviewStatus>\n                <Description>Pathogenic</Description>\n                <Citation>\n                    <ID Source=\"PubMed\">26085992</ID>\n                </Citation>\n                <Citation>\n                    <ID Source=\"PubMed\">17962476</ID>\n                </Citation>\n                <Citation>\n                    <ID Source=\"PubMed\">15860296</ID>\n                </Citation>\n                <Citation>\n                    <ID Source=\"PubMed\">23793346</ID>\n                </Citation>\n                <Citation>\n                    <ID Source=\"PubMed\">15937078</ID>\n                </Citation>\n                <Citation>\n                    <ID Source=\"PubMed\">23661369</ID>\n                </Citation>\n                <Citation>\n                    <ID Source=\"PubMed\">24739949</ID>\n                </Citation>\n                <Citation>\n                    <ID Source=\"PubMed\">21565171</ID>\n                </Citation>\n                <Citation>\n                    <ID Source=\"PubMed\">15042513</ID>\n                </Citation>\n                <Citation>\n                    <ID Source=\"PubMed\">25629076</ID>\n                </Citation>\n                <Citation>\n                    <ID Source=\"PubMed\">25593508</ID>\n                </Citation>\n                <Citation>\n                    <ID Source=\"PubMed\">26865810</ID>\n                </Citation>\n                <Citation>\n                    <ID Source=\"PubMed\">22693542</ID>\n                </Citation>\n                <Citation>\n                    <ID Source=\"PubMed\">19508456</ID>\n                </Citation>\n                <Comment>The c.802-8_810delinsGC (NM_207352.3 c.802-8_810delinsGC) variant in CYP4V2 has been reported\n                    in over 60 homozygous and compound heterozygous individuals with B ietti crystalline dystrophy and\n                    related disorders and is the most common variant associated with this disease in East Asian\n                    populations (Wada 2005, Lin 2005, La i 2007, Yokoi 2010, Xiao 2011, Wang 2012, Chung 2013, Fu 2013,\n                    Yin 2014, Meng 20 14, Tian 2015, Astuti 2015, and Park 2016). This variant has also been reported as\n                    pathogenic in ClinVar (Variation ID#39271). This variant has been identified in 0.2% (16/8520) of\n                    East Asian chromosomes by chromosomes by the Exome Aggregat ion Consortium (ExAC,\n                    http://exac.broadinstitute.org). This variant alters the c anonical splice site, and therefore is\n                    expected to impact splicing and lead to a n absent or truncated protein. In summary, this variant\n                    meets criteria to be cla ssified as pathogenic for Bietti crystalline dystrophy and related\n                    disorders in an autosomal recessive manner based upon its biallelic occurrence in patients wi th\n                    this disease and predicted functional impact.\n                </Comment>\n            </ClinicalSignificance>\n            <Assertion Type=\"variation to disease\"/>\n            <ExternalID DB=\"Laboratory for Molecular Medicine\" ID=\"73690472\"/>\n            <AttributeSet>\n                <Attribute Type=\"ModeOfInheritance\">Autosomal recessive inheritance</Attribute>\n            </AttributeSet>\n            <AttributeSet>\n                <Attribute Type=\"AssertionMethod\">LMM Criteria</Attribute>\n                <Citation>\n                    <ID Source=\"PubMed\">24033266</ID>\n                </Citation>\n            </AttributeSet>\n            <ObservedIn>\n                <Sample>\n                    <Origin>germline</Origin>\n                    <Species TaxonomyId=\"9606\">human</Species>\n                    <AffectedStatus>not provided</AffectedStatus>\n                    <FamilyData NumFamiliesWithVariant=\"1\"/>\n                </Sample>\n                <Method>\n                    <MethodType>clinical testing</MethodType>\n                </Method>\n                <ObservedData>\n                    <Attribute Type=\"VariantAlleles\" integerValue=\"1\"/>\n                </ObservedData>\n            </ObservedIn>\n            <MeasureSet Type=\"Variant\">\n                <Measure Type=\"Variation\">\n                    <Name>\n                        <ElementValue Type=\"Alternate\">NM_207352.3:c.802-8_810delinsGC</ElementValue>\n                    </Name>\n                    <Name>\n                        <ElementValue Type=\"Alternate\">p.?</ElementValue>\n                    </Name>\n                    <AttributeSet>\n                        <Attribute Type=\"Location\">NM_207352.3:EXON 7</Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"HGVS\">NC_000004.11:g.187122303_187122319delinsGC</Attribute>\n                    </AttributeSet>\n                    <MeasureRelationship Type=\"variant in gene\">\n                        <Symbol>\n                            <ElementValue Type=\"Preferred\">CYP4V2</ElementValue>\n                        </Symbol>\n                    </MeasureRelationship>\n                </Measure>\n            </MeasureSet>\n            <TraitSet Type=\"Disease\">\n                <Trait Type=\"Disease\">\n                    <XRef DB=\"Orphanet\" ID=\"ORPHA41751\"/>\n                </Trait>\n            </TraitSet>\n        </ClinVarAssertion>\n    </ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000032707.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\"\n            xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n    <ClinVarSet ID=\"47088290\">\n        <RecordStatus>current</RecordStatus>\n        <Title>NM_000168.6(GLI3):c.1616_1617del (p.Arg539fs) AND Postaxial polydactyly, type A1/B</Title>\n        <ReferenceClinVarAssertion DateCreated=\"2013-02-14\" DateLastUpdated=\"2019-11-02\" ID=\"94199\">\n            <ClinVarAccession Acc=\"RCV000032707\" Version=\"25\" Type=\"RCV\" DateUpdated=\"2019-11-02\"/>\n            <RecordStatus>current</RecordStatus>\n            <ClinicalSignificance DateLastEvaluated=\"2012-11-01\">\n                <ReviewStatus>no assertion criteria provided</ReviewStatus>\n                <Description>Pathogenic</Description>\n            </ClinicalSignificance>\n            <Assertion Type=\"variation to disease\"/>\n            <ObservedIn>\n                <Sample>\n                    <Origin>germline</Origin>\n                    <Species TaxonomyId=\"9606\">human</Species>\n                    <AffectedStatus>not provided</AffectedStatus>\n                </Sample>\n                <Method>\n                    <MethodType>literature only</MethodType>\n                </Method>\n                <ObservedData ID=\"47933964\">\n                    <Attribute Type=\"Description\">In affected members of a 3-generation nonconsanguineous Saudi Arabian\n                        family with postaxial polydactyly (174200), Al-Qattan (2012) identified heterozygosity for a\n                        2-bp deletion (1615delGA) in the GLI3 gene, predicted to cause a frameshift resulting in a\n                        premature termination codon (R539Tfs*12). Al-Qattan (2012) noted that although this frameshift\n                        predicts truncation in the N-terminal part of the gene and a GCPS phenotype would be expected,\n                        none of the family members had craniofacial features.\n                    </Attribute>\n                    <Citation Type=\"general\">\n                        <ID Source=\"PubMed\">22428873</ID>\n                    </Citation>\n                </ObservedData>\n            </ObservedIn>\n            <MeasureSet Type=\"Variant\" ID=\"39511\" Acc=\"VCV000039511\" Version=\"1\">\n                <Measure Type=\"Deletion\" ID=\"48110\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">NM_000168.6(GLI3):c.1616_1617del (p.Arg539fs)</ElementValue>\n                    </Name>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_000168\" Version=\"6\" Change=\"c.1616_1617del\"\n                                   Type=\"HGVS, coding, RefSeq\">NM_000168.6:c.1616_1617del\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NG_008434\" Version=\"1\" Change=\"g.263390_263391del\"\n                                   Type=\"HGVS, genomic, RefSeqGene\">NG_008434.1:g.263390_263391del\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NC_000007\" Version=\"14\" Change=\"g.41978630_41978631del\"\n                                   Type=\"HGVS, genomic, top level\" integerValue=\"38\">NC_000007.14:g.41978630_41978631del\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NC_000007\" Version=\"13\" Change=\"g.42018229_42018230del\"\n                                   Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">\n                            NC_000007.13:g.42018229_42018230del\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_000168\" Version=\"5\" Change=\"c.1616_1617del\" Type=\"HGVS, previous\">\n                            NM_000168.5:c.1616_1617del\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NP_000159\" Version=\"3\" Change=\"p.Arg539fs\" Type=\"HGVS, protein, RefSeq\">\n                            NP_000159.3:p.Arg539fs\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"MolecularConsequence\">frameshift variant</Attribute>\n                        <XRef ID=\"SO:0001589\" DB=\"Sequence Ontology\"/>\n                        <XRef ID=\"NM_000168.6:c.1616_1617del\" DB=\"RefSeq\"/>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"ProteinChange1LetterCode\">R539fs</Attribute>\n                    </AttributeSet>\n                    <CytogeneticLocation>7p14.1</CytogeneticLocation>\n                    <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\"\n                                      AssemblyStatus=\"current\" Chr=\"7\" Accession=\"NC_000007.14\" start=\"41978629\"\n                                      stop=\"41978630\" display_start=\"41978629\" display_stop=\"41978630\" variantLength=\"2\"\n                                      positionVCF=\"41978628\" referenceAlleleVCF=\"GTC\" alternateAlleleVCF=\"G\"/>\n                    <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\"\n                                      AssemblyStatus=\"previous\" Chr=\"7\" Accession=\"NC_000007.13\" start=\"42018228\"\n                                      stop=\"42018229\" display_start=\"42018228\" display_stop=\"42018229\" variantLength=\"2\"\n                                      positionVCF=\"42018227\" referenceAlleleVCF=\"GTC\" alternateAlleleVCF=\"G\"/>\n                    <MeasureRelationship Type=\"within single gene\">\n                        <Name>\n                            <ElementValue Type=\"Preferred\">GLI family zinc finger 3</ElementValue>\n                        </Name>\n                        <Symbol>\n                            <ElementValue Type=\"Preferred\">GLI3</ElementValue>\n                        </Symbol>\n                        <AttributeSet>\n                            <Attribute dateValue=\"2012-03-22\" Type=\"Haploinsufficiency\">Sufficient evidence for dosage\n                                pathogenicity\n                            </Attribute>\n                            <Citation>\n                                <URL>https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=GLI3</URL>\n                            </Citation>\n                        </AttributeSet>\n                        <AttributeSet>\n                            <Attribute dateValue=\"2012-03-22\" Type=\"Triplosensitivity\">No evidence available</Attribute>\n                            <Citation>\n                                <URL>https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=GLI3</URL>\n                            </Citation>\n                        </AttributeSet>\n                        <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\"\n                                          AssemblyStatus=\"current\" Chr=\"7\" Accession=\"NC_000007.14\" start=\"41960949\"\n                                          stop=\"42237209\" display_start=\"41960949\" display_stop=\"42237209\" Strand=\"-\"/>\n                        <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\"\n                                          AssemblyStatus=\"previous\" Chr=\"7\" Accession=\"NC_000007.13\" start=\"42000546\"\n                                          stop=\"42276617\" display_start=\"42000546\" display_stop=\"42276617\"\n                                          variantLength=\"276072\" Strand=\"-\"/>\n                        <XRef ID=\"2737\" DB=\"Gene\"/>\n                        <XRef Type=\"MIM\" ID=\"165240\" DB=\"OMIM\"/>\n                        <XRef ID=\"HGNC:4319\" DB=\"HGNC\"/>\n                    </MeasureRelationship>\n                    <Citation Type=\"general\">\n                        <ID Source=\"PubMed\">22428873</ID>\n                    </Citation>\n                    <XRef Type=\"Allelic variant\" ID=\"165240.0022\" DB=\"OMIM\"/>\n                    <XRef Type=\"rs\" ID=\"398122899\" DB=\"dbSNP\"/>\n                </Measure>\n                <Name>\n                    <ElementValue Type=\"Preferred\">NM_000168.6(GLI3):c.1616_1617del (p.Arg539fs)</ElementValue>\n                </Name>\n                <XRef ID=\"CA130348\" DB=\"ClinGen\"/>\n            </MeasureSet>\n            <TraitSet Type=\"Disease\" ID=\"3817\">\n                <Trait ID=\"11262\" Type=\"Disease\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">Postaxial polydactyly, type A1/B</ElementValue>\n                    </Name>\n                    <Name>\n                        <ElementValue Type=\"Alternate\">RECLASSIFIED - VARIANT OF UNKNOWN SIGNIFICANCE</ElementValue>\n                        <XRef Type=\"Allelic variant\" ID=\"165240.0009\" DB=\"OMIM\"/>\n                    </Name>\n                    <XRef ID=\"C4016298\" DB=\"MedGen\"/>\n                </Trait>\n            </TraitSet>\n        </ReferenceClinVarAssertion>\n        <ClinVarAssertion ID=\"93666\">\n            <ClinVarSubmissionID localKey=\"165240.0022_POSTAXIAL POLYDACTYLY, TYPE A1/B\" submitter=\"OMIM\"\n                                 submitterDate=\"2013-02-11\"\n                                 title=\"GLI3, 2-BP DEL, 1615GA_POSTAXIAL POLYDACTYLY, TYPE A1/B\"/>\n            <ClinVarAccession Acc=\"SCV000056470\" Version=\"1\" Type=\"SCV\" OrgID=\"3\" OrganizationCategory=\"resource\"\n                              OrgType=\"primary\" DateUpdated=\"2019-03-31\"/>\n            <RecordStatus>current</RecordStatus>\n            <ClinicalSignificance DateLastEvaluated=\"2012-11-01\">\n                <ReviewStatus>no assertion criteria provided</ReviewStatus>\n                <Description>Pathogenic</Description>\n            </ClinicalSignificance>\n            <Assertion Type=\"variation to disease\"/>\n            <ExternalID DB=\"OMIM\" ID=\"165240.0022\" Type=\"Allelic variant\"/>\n            <ObservedIn>\n                <Sample>\n                    <Origin>germline</Origin>\n                    <Species>human</Species>\n                    <AffectedStatus>not provided</AffectedStatus>\n                </Sample>\n                <Method>\n                    <MethodType>literature only</MethodType>\n                </Method>\n                <ObservedData>\n                    <Attribute Type=\"Description\">In affected members of a 3-generation nonconsanguineous Saudi Arabian\n                        family with postaxial polydactyly (174200), Al-Qattan (2012) identified heterozygosity for a\n                        2-bp deletion (1615delGA) in the GLI3 gene, predicted to cause a frameshift resulting in a\n                        premature termination codon (R539Tfs*12). Al-Qattan (2012) noted that although this frameshift\n                        predicts truncation in the N-terminal part of the gene and a GCPS phenotype would be expected,\n                        none of the family members had craniofacial features.\n                    </Attribute>\n                    <Citation>\n                        <ID Source=\"PubMed\">22428873</ID>\n                    </Citation>\n                    <XRef DB=\"OMIM\" ID=\"174200\" Type=\"MIM\"/>\n                </ObservedData>\n            </ObservedIn>\n            <MeasureSet Type=\"Variant\">\n                <Measure Type=\"Variation\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">GLI3, 2-BP DEL, 1615GA</ElementValue>\n                    </Name>\n                    <AttributeSet>\n                        <Attribute Type=\"NonHGVS\">2-BP DEL, 1615GA</Attribute>\n                    </AttributeSet>\n                    <MeasureRelationship Type=\"variant in gene\">\n                        <Symbol>\n                            <ElementValue Type=\"Preferred\">GLI3</ElementValue>\n                        </Symbol>\n                    </MeasureRelationship>\n                    <XRef DB=\"OMIM\" ID=\"165240.0022\" Type=\"Allelic variant\"/>\n                </Measure>\n            </MeasureSet>\n            <TraitSet Type=\"Disease\">\n                <Trait Type=\"Disease\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">POSTAXIAL POLYDACTYLY, TYPE A1/B</ElementValue>\n                    </Name>\n                </Trait>\n            </TraitSet>\n        </ClinVarAssertion>\n    </ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000038438.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\" xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n<ClinVarSet ID=\"47093208\"><RecordStatus>current</RecordStatus><Title>NM_005228.5(EGFR):c.2543C&gt;T (p.Pro848Leu) AND not specified</Title><ReferenceClinVarAssertion DateCreated=\"2013-05-02\" DateLastUpdated=\"2019-11-02\" ID=\"110285\"><ClinVarAccession Acc=\"RCV000038438\" Version=\"2\" Type=\"RCV\" DateUpdated=\"2019-11-02\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2013-07-11\"><ReviewStatus>criteria provided, single submitter</ReviewStatus><Description>Likely benign</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ObservedIn><Sample><Origin>somatic</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>not provided</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData ID=\"47941637\"><Attribute integerValue=\"3\" Type=\"VariantAlleles\" /></ObservedData><ObservedData ID=\"47941637\"><Attribute integerValue=\"3\" Type=\"NumFamiliesWithVariant\" /></ObservedData></ObservedIn><MeasureSet Type=\"Variant\" ID=\"45282\" Acc=\"VCV000045282\" Version=\"1\"><Measure Type=\"single nucleotide variant\" ID=\"54449\"><Name><ElementValue Type=\"Preferred\">NM_005228.5(EGFR):c.2543C&gt;T (p.Pro848Leu)</ElementValue></Name><AttributeSet><Attribute Accession=\"LRG_304t1\" Change=\"c.2543C&gt;T\" Type=\"HGVS, coding, LRG\">LRG_304t1:c.2543C&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_001346941\" Version=\"2\" Change=\"c.1742C&gt;T\" Type=\"HGVS, coding, RefSeq\">NM_001346941.2:c.1742C&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_001346900\" Version=\"2\" Change=\"c.2384C&gt;T\" Type=\"HGVS, coding, RefSeq\">NM_001346900.2:c.2384C&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_001346897\" Version=\"2\" Change=\"c.2408C&gt;T\" Type=\"HGVS, coding, RefSeq\">NM_001346897.2:c.2408C&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_001346899\" Version=\"1\" Change=\"c.2408C&gt;T\" Type=\"HGVS, coding, RefSeq\">NM_001346899.1:c.2408C&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_001346898\" Version=\"2\" Change=\"c.2543C&gt;T\" Type=\"HGVS, coding, RefSeq\">NM_001346898.2:c.2543C&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_005228\" Version=\"5\" Change=\"c.2543C&gt;T\" Type=\"HGVS, coding, RefSeq\">NM_005228.5:c.2543C&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"LRG_304\" Change=\"g.177761C&gt;T\" Type=\"HGVS, genomic, LRG\">LRG_304:g.177761C&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NG_007726\" Version=\"3\" Change=\"g.177761C&gt;T\" Type=\"HGVS, genomic, RefSeqGene\">NG_007726.3:g.177761C&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000007\" Version=\"14\" Change=\"g.55191792C&gt;T\" Type=\"HGVS, genomic, top level\" integerValue=\"38\">NC_000007.14:g.55191792C&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000007\" Version=\"13\" Change=\"g.55259485C&gt;T\" Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">NC_000007.13:g.55259485C&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Type=\"HGVS, incomplete\">c.2543C&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_005228\" Version=\"3\" Change=\"c.2543C&gt;T\" Type=\"HGVS, previous\">NM_005228.3:c.2543C&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_001333870\" Version=\"1\" Change=\"p.Pro581Leu\" Type=\"HGVS, protein, RefSeq\">NP_001333870.1:p.Pro581Leu</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_001333829\" Version=\"1\" Change=\"p.Pro795Leu\" Type=\"HGVS, protein, RefSeq\">NP_001333829.1:p.Pro795Leu</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_001333826\" Version=\"1\" Change=\"p.Pro803Leu\" Type=\"HGVS, protein, RefSeq\">NP_001333826.1:p.Pro803Leu</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_001333828\" Version=\"1\" Change=\"p.Pro803Leu\" Type=\"HGVS, protein, RefSeq\">NP_001333828.1:p.Pro803Leu</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_001333827\" Version=\"1\" Change=\"p.Pro848Leu\" Type=\"HGVS, protein, RefSeq\">NP_001333827.1:p.Pro848Leu</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_005219\" Version=\"2\" Change=\"p.Pro848Leu\" Type=\"HGVS, protein, RefSeq\">NP_005219.2:p.Pro848Leu</Attribute></AttributeSet><AttributeSet><Attribute Type=\"Location\">NM_005228.3:exon 21</Attribute></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">missense variant</Attribute><XRef ID=\"SO:0001583\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_001346897.2:c.2408C&gt;T\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">missense variant</Attribute><XRef ID=\"SO:0001583\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_001346898.2:c.2543C&gt;T\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">missense variant</Attribute><XRef ID=\"SO:0001583\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_001346899.1:c.2408C&gt;T\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">missense variant</Attribute><XRef ID=\"SO:0001583\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_001346900.2:c.2384C&gt;T\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">missense variant</Attribute><XRef ID=\"SO:0001583\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_001346941.2:c.1742C&gt;T\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">missense variant</Attribute><XRef ID=\"SO:0001583\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_005228.5:c.2543C&gt;T\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"ProteinChange1LetterCode\">P581L</Attribute></AttributeSet><AttributeSet><Attribute Type=\"ProteinChange1LetterCode\">P795L</Attribute></AttributeSet><AttributeSet><Attribute Type=\"ProteinChange1LetterCode\">P803L</Attribute></AttributeSet><AttributeSet><Attribute Type=\"ProteinChange1LetterCode\">P848L</Attribute></AttributeSet><AlleleFrequencyList><AlleleFrequency Value=\"0.00046\" Source=\"NHLBI Exome Sequencing Project (ESP) Exome Variant Server\" /><AlleleFrequency Value=\"0.00046\" Source=\"Exome Aggregation Consortium (ExAC)\" /><AlleleFrequency Value=\"0.00025\" Source=\"The Genome Aggregation Database (gnomAD)\" /><AlleleFrequency Value=\"0.00027\" Source=\"Trans-Omics for Precision Medicine (TOPMed)\" /></AlleleFrequencyList><CytogeneticLocation>7p11.2</CytogeneticLocation><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"7\" Accession=\"NC_000007.14\" start=\"55191792\" stop=\"55191792\" display_start=\"55191792\" display_stop=\"55191792\" variantLength=\"1\" positionVCF=\"55191792\" referenceAlleleVCF=\"C\" alternateAlleleVCF=\"T\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"7\" Accession=\"NC_000007.13\" start=\"55259485\" stop=\"55259485\" display_start=\"55259485\" display_stop=\"55259485\" variantLength=\"1\" positionVCF=\"55259485\" referenceAlleleVCF=\"C\" alternateAlleleVCF=\"T\" /><MeasureRelationship Type=\"within single gene\"><Name><ElementValue Type=\"Preferred\">epidermal growth factor receptor</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">EGFR</ElementValue></Symbol><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"7\" Accession=\"NC_000007.14\" start=\"55019017\" stop=\"55211628\" display_start=\"55019017\" display_stop=\"55211628\" Strand=\"+\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"7\" Accession=\"NC_000007.13\" start=\"55086724\" stop=\"55275030\" display_start=\"55086724\" display_stop=\"55275030\" variantLength=\"188307\" Strand=\"+\" /><XRef ID=\"1956\" DB=\"Gene\" /><XRef Type=\"MIM\" ID=\"131550\" DB=\"OMIM\" /><XRef ID=\"HGNC:3236\" DB=\"HGNC\" /></MeasureRelationship><XRef Type=\"rs\" ID=\"148934350\" DB=\"dbSNP\" /></Measure><Name><ElementValue Type=\"Preferred\">NM_005228.5(EGFR):c.2543C&gt;T (p.Pro848Leu)</ElementValue></Name><XRef ID=\"CA135927\" DB=\"ClinGen\" /></MeasureSet><TraitSet Type=\"Disease\" ID=\"9590\"><Trait ID=\"16789\" Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">not specified</ElementValue></Name><Name><ElementValue Type=\"Alternate\">AllHighlyPenetrant</ElementValue></Name><AttributeSet><Attribute Type=\"public definition\">The term 'not specified' was created for use in ClinVar so that submitters can convey the concept that a variant is benign, likely benign, or of uncertain significance for an unspecified set of disorders.  This usage was introduced in 2014 to replace AllHighlyPenetrant.</Attribute></AttributeSet><XRef ID=\"CN169374\" DB=\"MedGen\" /></Trait></TraitSet></ReferenceClinVarAssertion><ClinVarAssertion ID=\"103310\" SubmissionName=\"LMM_all.variants_NCBI_3.16.2013\"><ClinVarSubmissionID localKey=\"8729025|Not Specified\" submitter=\"Laboratory for Molecular Medicine,Partners HealthCare Personalized Medicine\" submitterDate=\"2015-01-29\" /><ClinVarAccession Acc=\"SCV000062110\" Version=\"4\" Type=\"SCV\" OrgID=\"21766\" OrganizationCategory=\"laboratory\" OrgType=\"primary\" DateUpdated=\"2019-03-31\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2013-07-11\"><ReviewStatus>criteria provided, single submitter</ReviewStatus><Description>Likely benign</Description><Citation><ID Source=\"PubMed\">17877814</ID></Citation><Citation><ID Source=\"PubMed\">17285735</ID></Citation><Citation><ID Source=\"PubMed\">22848293</ID></Citation><Comment Type=\"public\">Pro848Leu variant in Exon 21 of EGFR: This variant is not expected to have clinical significance because iIn vitro studies suggest that this variant does not activate EGFR activity and does not render the protein sensitive to tyrosine kinase inhibitors (TKIs) (De Gunst 2007, Han 2011). It has been previously identified in both tumor and normal tissue in an individuals with lung cancer (Sequist 2007), and it has been identified in 0.06% (6/8600) of European American chromosomes by the NHLBI Exome Sequencing Project (http://evs.gs.washington.edu/EVS/; dbSNP rs148934350).</Comment></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ExternalID DB=\"Laboratory for Molecular Medicine (Partners HealthCare Personalized Medicine)\" ID=\"8729025\" /><AttributeSet><Attribute Type=\"AssertionMethod\">LMM Criteria</Attribute><Citation Type=\"general\"><ID Source=\"PubMed\">24033266</ID></Citation></AttributeSet><ObservedIn><Sample><Origin>somatic</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>not provided</AffectedStatus><FamilyData NumFamiliesWithVariant=\"3\" /></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData><Attribute Type=\"VariantAlleles\" integerValue=\"3\" /></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><Name><ElementValue Type=\"Alternate\">NM_005228.3:c.2543C&gt;T</ElementValue></Name><Name><ElementValue Type=\"Alternate\">p.Pro848Leu</ElementValue></Name><AttributeSet><Attribute Type=\"Location\">NM_005228.3:EXON 21</Attribute></AttributeSet><AttributeSet><Attribute Type=\"HGVS\">NC_000007.13:g.55259485C&gt;T</Attribute></AttributeSet><SequenceLocation Assembly=\"GRCh37\" Chr=\"7\" alternateAllele=\"T\" referenceAllele=\"C\" start=\"55259485\" stop=\"55259485\" variantLength=\"1\" /><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">EGFR</ElementValue></Symbol></MeasureRelationship></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">Not Specified</ElementValue></Name></Trait></TraitSet></ClinVarAssertion></ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000050055.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\"\n            xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n    <ClinVarSet ID=\"47101830\">\n        <RecordStatus>current</RecordStatus>\n        <Title>NM_017890.4(VPS13B):c.11825_11827dup (p.Asp3942dup) AND Cohen syndrome</Title>\n        <ReferenceClinVarAssertion DateCreated=\"2013-07-22\" DateLastUpdated=\"2019-11-02\" ID=\"132715\">\n            <ClinVarAccession Acc=\"RCV000050055\" Version=\"3\" Type=\"RCV\" DateUpdated=\"2019-11-02\"/>\n            <RecordStatus>current</RecordStatus>\n            <ClinicalSignificance DateLastEvaluated=\"2018-09-26\">\n                <ReviewStatus>criteria provided, multiple submitters, no conflicts</ReviewStatus>\n                <Description>Uncertain significance</Description>\n            </ClinicalSignificance>\n            <Assertion Type=\"variation to disease\"/>\n            <ObservedIn>\n                <Sample>\n                    <Origin>germline</Origin>\n                    <Species TaxonomyId=\"9606\">human</Species>\n                    <AffectedStatus>unknown</AffectedStatus>\n                </Sample>\n                <Method>\n                    <MethodType>clinical testing</MethodType>\n                </Method>\n                <ObservedData ID=\"47956212\">\n                    <Attribute Type=\"Description\">not provided</Attribute>\n                </ObservedData>\n                <ObservedData ID=\"47956212\">\n                    <Attribute Type=\"Description\">not provided</Attribute>\n                </ObservedData>\n            </ObservedIn>\n            <ObservedIn>\n                <Sample>\n                    <Origin>not provided</Origin>\n                    <Species TaxonomyId=\"9606\">human</Species>\n                    <AffectedStatus>not provided</AffectedStatus>\n                    <NumberTested>1</NumberTested>\n                </Sample>\n                <Method>\n                    <MethodType>literature only</MethodType>\n                </Method>\n                <ObservedData ID=\"47956213\">\n                    <Attribute Type=\"Description\">not provided</Attribute>\n                </ObservedData>\n            </ObservedIn>\n            <MeasureSet Type=\"Variant\" ID=\"56642\" Acc=\"VCV000056642\" Version=\"3\">\n                <Measure Type=\"Duplication\" ID=\"71281\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">NM_017890.4(VPS13B):c.11825_11827dup (p.Asp3942dup)\n                        </ElementValue>\n                    </Name>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_017890\" Version=\"4\" Change=\"c.11825_11827dupATG\" Type=\"HGVS, coding\">\n                            NM_017890.4:c.11825_11827dupATG\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"LRG_351t2\" Change=\"c.11750_11752dup\" Type=\"HGVS, coding, LRG\">\n                            LRG_351t2:c.11750_11752dup\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"LRG_351t1\" Change=\"c.11825_11827dup\" Type=\"HGVS, coding, LRG\">\n                            LRG_351t1:c.11825_11827dup\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_152564\" Version=\"4\" Change=\"c.11750_11752dup\"\n                                   Type=\"HGVS, coding, RefSeq\">NM_152564.4:c.11750_11752dup\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_017890\" Version=\"4\" Change=\"c.11825_11827dup\"\n                                   Type=\"HGVS, coding, RefSeq\">NM_017890.4:c.11825_11827dup\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"LRG_351\" Change=\"g.867157_867159dup\" Type=\"HGVS, genomic, LRG\">\n                            LRG_351:g.867157_867159dup\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NG_007098\" Version=\"2\" Change=\"g.867157_867159dup\"\n                                   Type=\"HGVS, genomic, RefSeqGene\">NG_007098.2:g.867157_867159dup\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NC_000008\" Version=\"11\" Change=\"g.99875422_99875424dup\"\n                                   Type=\"HGVS, genomic, top level\" integerValue=\"38\">NC_000008.11:g.99875422_99875424dup\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NC_000008\" Version=\"10\" Change=\"g.100887650_100887652dup\"\n                                   Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">\n                            NC_000008.10:g.100887650_100887652dup\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_017890\" Version=\"3\" Change=\"c.11825_11827dupATG\" Type=\"HGVS, previous\">\n                            NM_017890.3:c.11825_11827dupATG\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"LRG_351p2\" Change=\"p.Asp3917dup\" Type=\"HGVS, protein\">\n                            LRG_351p2:p.Asp3917dup\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"LRG_351p1\" Change=\"p.Asp3942dup\" Type=\"HGVS, protein\">\n                            LRG_351p1:p.Asp3942dup\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NP_689777\" Version=\"3\" Change=\"p.Asp3917dup\" Type=\"HGVS, protein, RefSeq\">\n                            NP_689777.3:p.Asp3917dup\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NP_060360\" Version=\"3\" Change=\"p.Asp3942dup\" Type=\"HGVS, protein, RefSeq\">\n                            NP_060360.3:p.Asp3942dup\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"Location\">NM_017890.4:exon 62</Attribute>\n                        <XRef ID=\"CI110618\" DB=\"HGMD\"/>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"MolecularConsequence\">inframe_insertion</Attribute>\n                        <XRef ID=\"SO:0001821\" DB=\"Sequence Ontology\"/>\n                        <XRef ID=\"NM_017890.4:c.11825_11827dup\" DB=\"RefSeq\"/>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"MolecularConsequence\">inframe_insertion</Attribute>\n                        <XRef ID=\"SO:0001821\" DB=\"Sequence Ontology\"/>\n                        <XRef ID=\"NM_152564.4:c.11750_11752dup\" DB=\"RefSeq\"/>\n                    </AttributeSet>\n                    <GlobalMinorAlleleFrequency Value=\"0.00220\" Source=\"1000 Genomes Project\" MinorAllele=\"GATGATG\"/>\n                    <CytogeneticLocation>8q22.2</CytogeneticLocation>\n                    <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\"\n                                      AssemblyStatus=\"current\" Chr=\"8\" Accession=\"NC_000008.11\" start=\"99875422\"\n                                      stop=\"99875424\" display_start=\"99875422\" display_stop=\"99875424\" variantLength=\"3\"\n                                      positionVCF=\"99875420\" referenceAlleleVCF=\"A\" alternateAlleleVCF=\"AGAT\"/>\n                    <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\"\n                                      AssemblyStatus=\"previous\" Chr=\"8\" Accession=\"NC_000008.10\" start=\"100887650\"\n                                      stop=\"100887652\" display_start=\"100887650\" display_stop=\"100887652\"\n                                      variantLength=\"3\" positionVCF=\"100887648\" referenceAlleleVCF=\"A\"\n                                      alternateAlleleVCF=\"AGAT\"/>\n                    <MeasureRelationship Type=\"within single gene\">\n                        <Name>\n                            <ElementValue Type=\"Preferred\">vacuolar protein sorting 13 homolog B</ElementValue>\n                        </Name>\n                        <Symbol>\n                            <ElementValue Type=\"Preferred\">VPS13B</ElementValue>\n                        </Symbol>\n                        <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\"\n                                          AssemblyStatus=\"current\" Chr=\"8\" Accession=\"NC_000008.11\" start=\"99013266\"\n                                          stop=\"99877586\" display_start=\"99013266\" display_stop=\"99877586\" Strand=\"+\"/>\n                        <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\"\n                                          AssemblyStatus=\"previous\" Chr=\"8\" Accession=\"NC_000008.10\" start=\"100025493\"\n                                          stop=\"100889813\" display_start=\"100025493\" display_stop=\"100889813\"\n                                          variantLength=\"864321\" Strand=\"+\"/>\n                        <XRef ID=\"157680\" DB=\"Gene\"/>\n                        <XRef Type=\"MIM\" ID=\"607817\" DB=\"OMIM\"/>\n                        <XRef ID=\"HGNC:2183\" DB=\"HGNC\"/>\n                    </MeasureRelationship>\n                    <Citation Type=\"general\">\n                        <ID Source=\"PubMed\">20921020</ID>\n                    </Citation>\n                    <XRef Type=\"rs\" ID=\"386834068\" DB=\"dbSNP\"/>\n                </Measure>\n                <Name>\n                    <ElementValue Type=\"Preferred\">NM_017890.4(VPS13B):c.11825_11827dup (p.Asp3942dup)</ElementValue>\n                </Name>\n                <XRef ID=\"CA223354\" DB=\"ClinGen\"/>\n            </MeasureSet>\n            <TraitSet Type=\"Disease\" ID=\"723\">\n                <Trait ID=\"825\" Type=\"Disease\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">Cohen syndrome</ElementValue>\n                        <XRef ID=\"Cohen+Syndrome/1715\" DB=\"Genetic Alliance\"/>\n                        <XRef ID=\"cohen-syndrome\" DB=\"Genetics Home Reference\"/>\n                        <XRef ID=\"6126\" DB=\"Office of Rare Diseases\"/>\n                        <XRef ID=\"56604005\" DB=\"SNOMED CT\"/>\n                    </Name>\n                    <Name>\n                        <ElementValue Type=\"Alternate\">Cutis verticis gyrata, retinitis pigmentosa, and sensorineural\n                            deafness\n                        </ElementValue>\n                    </Name>\n                    <Symbol>\n                        <ElementValue Type=\"Preferred\">COH1</ElementValue>\n                        <XRef Type=\"MIM\" ID=\"216550\" DB=\"OMIM\"/>\n                        <XRef ID=\"6126\" DB=\"Office of Rare Diseases\"/>\n                    </Symbol>\n                    <Symbol>\n                        <ElementValue Type=\"Alternate\">CHS1</ElementValue>\n                        <XRef Type=\"MIM\" ID=\"216550\" DB=\"OMIM\"/>\n                    </Symbol>\n                    <Symbol>\n                        <ElementValue Type=\"Alternate\">COH</ElementValue>\n                        <XRef Type=\"MIM\" ID=\"216550\" DB=\"OMIM\"/>\n                    </Symbol>\n                    <AttributeSet>\n                        <Attribute Type=\"public definition\">Cohen syndrome is characterized by failure to thrive in\n                            infancy and childhood; truncal obesity in the teen years; early-onset hypotonia and\n                            developmental delays; microcephaly developing during the first year of life; moderate to\n                            profound psychomotor retardation; progressive retinochoroidal dystrophy and high myopia;\n                            neutropenia in many with recurrent infections and aphthous ulcers in some; a cheerful\n                            disposition; joint hypermobility; and characteristic facial features.\n                        </Attribute>\n                        <XRef ID=\"NBK1482\" DB=\"GeneReviews\"/>\n                    </AttributeSet>\n                    <Citation Type=\"review\" Abbrev=\"GeneReviews\">\n                        <ID Source=\"PubMed\">20301655</ID>\n                        <ID Source=\"BookShelf\">NBK1482</ID>\n                    </Citation>\n                    <XRef ID=\"C0265223\" DB=\"MedGen\"/>\n                    <XRef ID=\"193\" DB=\"Orphanet\"/>\n                    <XRef Type=\"MIM\" ID=\"216550\" DB=\"OMIM\"/>\n                </Trait>\n            </TraitSet>\n        </ReferenceClinVarAssertion>\n        <ClinVarAssertion ID=\"131853\" SubmissionName=\"FinDis_mutations\">\n            <ClinVarSubmissionID localKey=\"FINDIS548\"\n                                 submitter=\"Juha Muilu Group; Institute for Molecular Medicine Finland (FIMM)\"\n                                 submitterDate=\"2013-05-19\"/>\n            <ClinVarAccession Acc=\"SCV000082464\" Version=\"1\" Type=\"SCV\" OrgID=\"500116\" OrganizationCategory=\"laboratory\"\n                              OrgType=\"primary\" DateUpdated=\"2019-03-31\"/>\n            <RecordStatus>current</RecordStatus>\n            <ClinicalSignificance>\n                <ReviewStatus>no assertion criteria provided</ReviewStatus>\n                <Description>probable-pathogenic</Description>\n                <Comment Type=\"ConvertedByNCBI\">Converted during submission to Likely pathogenic.</Comment>\n            </ClinicalSignificance>\n            <Assertion Type=\"variation to disease\"/>\n            <ObservedIn>\n                <Sample>\n                    <Origin>not provided</Origin>\n                    <Species>human</Species>\n                    <AffectedStatus>not provided</AffectedStatus>\n                    <NumberTested>1</NumberTested>\n                </Sample>\n                <Method>\n                    <MethodType>not provided</MethodType>\n                </Method>\n                <ObservedData>\n                    <Attribute Type=\"Description\">not provided</Attribute>\n                </ObservedData>\n            </ObservedIn>\n            <MeasureSet Type=\"Variant\">\n                <Measure Type=\"Variation\">\n                    <AttributeSet>\n                        <Attribute Type=\"HGVS\">NM_017890.4:c.11825_11827dupATG</Attribute>\n                    </AttributeSet>\n                    <Citation Type=\"general\">\n                        <ID Source=\"PubMed\">20921020</ID>\n                    </Citation>\n                </Measure>\n            </MeasureSet>\n            <TraitSet Type=\"Disease\">\n                <Trait Type=\"Disease\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">Cohen syndrome</ElementValue>\n                    </Name>\n                    <XRef DB=\"OMIM\" ID=\"216550\" Type=\"MIM\"/>\n                </Trait>\n            </TraitSet>\n            <Comment Type=\"public\">FinDis database variant: This variant was not found or characterized by our\n                laboratory, data were collected from public sources: see reference\n            </Comment>\n        </ClinVarAssertion>\n        <ClinVarAssertion ID=\"1762906\" SubmissionName=\"SUB4910108\">\n            <ClinVarSubmissionID localKey=\"626|OMIM:216550\" submittedAssembly=\"GRCh37\"\n                                 submitter=\"Center for Genomics, Ann and Robert H. Lurie Children's Hospital of Chicago\"\n                                 submitterDate=\"2018-12-12\"/>\n            <ClinVarAccession Acc=\"SCV000899088\" Version=\"1\" Type=\"SCV\" OrgID=\"506344\" OrganizationCategory=\"laboratory\"\n                              OrgType=\"primary\" DateUpdated=\"2019-04-24\"/>\n            <RecordStatus>current</RecordStatus>\n            <ClinicalSignificance DateLastEvaluated=\"2018-02-02\">\n                <ReviewStatus>criteria provided, single submitter</ReviewStatus>\n                <Description>Uncertain significance</Description>\n                <Comment Type=\"public\">VPS13B NM_017890.4 exon 62 p.Asp3942_dup (c.11825_11827dupATG): This variant has\n                    been reported in the literature as a compound heterozygote (in trans with a multi-exon deletion of\n                    this gene) in 1 individual with a diagnosis of Cohen syndrome (Rivera-Brugues 2011 PMID:20921020,\n                    gene identified as alternate name COH1). However, this variant is present in 0.4% (100/24024) of\n                    African alleles in the Genome Aggregation Database (http://gnomad.broadinstitute.org/rs558633643).\n                    This variant is present in ClinVar (Variation ID:56642). Evolutionary conservation and computational\n                    predictive tools for this variant are limited or unavailable. This variant represents a duplication\n                    of 1 amino acid at position 3942 and is not predicted to alter the reading frame. However, the\n                    effect of this variant on the protein is unclear. In summary, data on this variant is insufficient\n                    for disease classification. Therefore, the clinical significance of this variant is uncertain.\n                </Comment>\n            </ClinicalSignificance>\n            <Assertion Type=\"variation to disease\"/>\n            <ExternalID DB=\"Center for Genomics\" ID=\"626\"/>\n            <AttributeSet>\n                <Attribute Type=\"AssertionMethod\">ACMG Guidelines, 2015</Attribute>\n                <Citation>\n                    <ID Source=\"PubMed\">25741868</ID>\n                </Citation>\n            </AttributeSet>\n            <ObservedIn>\n                <Sample>\n                    <Origin>germline</Origin>\n                    <Species TaxonomyId=\"9606\">human</Species>\n                    <AffectedStatus>unknown</AffectedStatus>\n                </Sample>\n                <Method>\n                    <MethodType>clinical testing</MethodType>\n                </Method>\n                <ObservedData>\n                    <Attribute Type=\"Description\">not provided</Attribute>\n                </ObservedData>\n            </ObservedIn>\n            <MeasureSet Type=\"Variant\">\n                <Measure Type=\"Variation\">\n                    <SequenceLocation Assembly=\"GRCh37\" Chr=\"8\" alternateAllele=\"GAT\" referenceAllele=\"-\"\n                                      start=\"100887648\" stop=\"100887649\" variantLength=\"2\"/>\n                    <MeasureRelationship Type=\"variant in gene\">\n                        <Symbol>\n                            <ElementValue Type=\"Preferred\">VPS13B</ElementValue>\n                        </Symbol>\n                    </MeasureRelationship>\n                </Measure>\n            </MeasureSet>\n            <TraitSet Type=\"Disease\">\n                <Trait Type=\"Disease\">\n                    <XRef DB=\"OMIM\" ID=\"216550\" Type=\"MIM\"/>\n                </Trait>\n            </TraitSet>\n        </ClinVarAssertion>\n        <ClinVarAssertion ID=\"1858348\" SubmissionName=\"SUB5371970\">\n            <ClinVarSubmissionID localKey=\"940775|MedGen:C0265223\" submittedAssembly=\"GRCh37\" submitter=\"Invitae\"\n                                 submitterDate=\"2019-03-28\"/>\n            <ClinVarAccession Acc=\"SCV000963968\" Version=\"1\" Type=\"SCV\" OrgID=\"500031\" OrganizationCategory=\"laboratory\"\n                              OrgType=\"primary\" DateUpdated=\"2019-08-13\"/>\n            <RecordStatus>current</RecordStatus>\n            <ClinicalSignificance DateLastEvaluated=\"2018-09-26\">\n                <ReviewStatus>criteria provided, single submitter</ReviewStatus>\n                <Description>Uncertain significance</Description>\n                <Comment>This variant, c.11825_11827dupATG, results in the insertion of 1 amino acid(s) to the VPS13B\n                    protein (p.Asp3942dup), but otherwise preserves the integrity of the reading frame. This variant is\n                    present in population databases (rs558633643, ExAC 0.4%). This variant has been observed in an\n                    individual affected with Cohen syndrome (PMID:Â¬â€ 20921020). ClinVar contains an entry for this\n                    variant (Variation ID: 56642). Experimental studies and prediction algorithms are not available for\n                    this variant, and the functional significance of the affected amino acid(s) is currently unknown. In\n                    summary, the available evidence is currently insufficient to determine the role of this variant in\n                    disease. Therefore, it has been classified as a Variant of Uncertain Significance.\n                </Comment>\n            </ClinicalSignificance>\n            <Assertion Type=\"variation to disease\"/>\n            <ExternalID DB=\"Invitae\" ID=\"940775\"/>\n            <AttributeSet>\n                <Attribute Type=\"AssertionMethod\">Invitae Variant Classification Sherloc (09022015)</Attribute>\n                <Citation>\n                    <ID Source=\"PubMed\">28492532</ID>\n                </Citation>\n            </AttributeSet>\n            <ObservedIn>\n                <Sample>\n                    <Origin>germline</Origin>\n                    <Species TaxonomyId=\"9606\">human</Species>\n                    <AffectedStatus>unknown</AffectedStatus>\n                </Sample>\n                <Method>\n                    <MethodType>clinical testing</MethodType>\n                </Method>\n                <ObservedData>\n                    <Attribute Type=\"Description\">not provided</Attribute>\n                </ObservedData>\n            </ObservedIn>\n            <MeasureSet Type=\"Variant\">\n                <Measure Type=\"Variation\">\n                    <AttributeSet>\n                        <Attribute Type=\"HGVS\">NM_017890.4:c.11825_11827dupATG</Attribute>\n                    </AttributeSet>\n                    <MeasureRelationship Type=\"variant in gene\">\n                        <Symbol>\n                            <ElementValue Type=\"Preferred\">VPS13B</ElementValue>\n                        </Symbol>\n                    </MeasureRelationship>\n                </Measure>\n            </MeasureSet>\n            <TraitSet Type=\"Disease\">\n                <Trait Type=\"Disease\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">Cohen syndrome</ElementValue>\n                    </Name>\n                    <XRef DB=\"MedGen\" ID=\"C0265223\" Type=\"CUI\"/>\n                </Trait>\n            </TraitSet>\n        </ClinVarAssertion>\n    </ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000073701.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\" xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n<ClinVarSet ID=\"48294456\"><RecordStatus>current</RecordStatus><Title>NM_001127511.3(APC):c.165+21247G&gt;C AND Familial colorectal cancer</Title><ReferenceClinVarAssertion DateCreated=\"2013-10-28\" DateLastUpdated=\"2019-12-15\" ID=\"182406\"><ClinVarAccession Acc=\"RCV000073701\" Version=\"1\" Type=\"RCV\" DateUpdated=\"2019-12-16\" /><RecordStatus>current</RecordStatus><ClinicalSignificance><ReviewStatus>no assertion criteria provided</ReviewStatus><Description>other</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ObservedIn><Sample><Origin>unknown</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>not provided</AffectedStatus><NumberTested>1</NumberTested></Sample><Method><MethodType>literature only</MethodType></Method><ObservedData ID=\"49313773\"><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\" ID=\"82712\" Acc=\"VCV000082712\" Version=\"1\"><Measure Type=\"single nucleotide variant\" ID=\"93603\"><Name><ElementValue Type=\"Preferred\">NM_001127511.3(APC):c.165+21247G&gt;C</ElementValue></Name><AttributeSet><Attribute Accession=\"NM_001354895\" Version=\"2\" Change=\"c.-19+21247G&gt;C\" Type=\"HGVS, coding, RefSeq\">NM_001354895.2:c.-19+21247G&gt;C</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_001127511\" Version=\"3\" Change=\"c.165+21247G&gt;C\" Type=\"HGVS, coding, RefSeq\">NM_001127511.3:c.165+21247G&gt;C</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_001354897\" Version=\"2\" Change=\"c.165+21247G&gt;C\" Type=\"HGVS, coding, RefSeq\">NM_001354897.2:c.165+21247G&gt;C</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_001354902\" Version=\"2\" Change=\"c.165+21247G&gt;C\" Type=\"HGVS, coding, RefSeq\">NM_001354902.2:c.165+21247G&gt;C</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"LRG_130\" Change=\"g.41609G&gt;C\" Type=\"HGVS, genomic, LRG\">LRG_130:g.41609G&gt;C</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NG_008481\" Version=\"4\" Change=\"g.41609G&gt;C\" Type=\"HGVS, genomic, RefSeqGene\">NG_008481.4:g.41609G&gt;C</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000005\" Version=\"10\" Change=\"g.112729129G&gt;C\" Type=\"HGVS, genomic, top level\" integerValue=\"38\">NC_000005.10:g.112729129G&gt;C</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000005\" Version=\"9\" Change=\"g.112064826G&gt;C\" Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">NC_000005.9:g.112064826G&gt;C</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000005\" Version=\"8\" Change=\"g.112092725G&gt;C\" Type=\"HGVS, genomic, top level, previous\" integerValue=\"36\">NC_000005.8:g.112092725G&gt;C</Attribute></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">intron variant</Attribute><XRef ID=\"SO:0001627\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_001127511.3:c.165+21247G&gt;C\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">intron variant</Attribute><XRef ID=\"SO:0001627\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_001354895.2:c.-19+21247G&gt;C\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">intron variant</Attribute><XRef ID=\"SO:0001627\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_001354897.2:c.165+21247G&gt;C\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">intron variant</Attribute><XRef ID=\"SO:0001627\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_001354902.2:c.165+21247G&gt;C\" DB=\"RefSeq\" /></AttributeSet><AlleleFrequencyList><AlleleFrequency Value=\"0.98742\" Source=\"1000 Genomes Project\" /><AlleleFrequency Value=\"0.98618\" Source=\"The Genome Aggregation Database (gnomAD)\" /><AlleleFrequency Value=\"0.98558\" Source=\"Trans-Omics for Precision Medicine (TOPMed)\" /></AlleleFrequencyList><GlobalMinorAlleleFrequency Value=\"0.01258\" Source=\"1000 Genomes Project\" MinorAllele=\"G\" /><CytogeneticLocation>5q22.2</CytogeneticLocation><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"5\" Accession=\"NC_000005.10\" start=\"112729129\" stop=\"112729129\" display_start=\"112729129\" display_stop=\"112729129\" variantLength=\"1\" positionVCF=\"112729129\" referenceAlleleVCF=\"G\" alternateAlleleVCF=\"C\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"5\" Accession=\"NC_000005.9\" start=\"112064826\" stop=\"112064826\" display_start=\"112064826\" display_stop=\"112064826\" variantLength=\"1\" positionVCF=\"112064826\" referenceAlleleVCF=\"G\" alternateAlleleVCF=\"C\" /><SequenceLocation Assembly=\"NCBI36\" AssemblyAccessionVersion=\"GCF_000001405.12\" AssemblyStatus=\"previous\" Chr=\"5\" Accession=\"NC_000005.8\" start=\"112092725\" stop=\"112092725\" display_start=\"112092725\" display_stop=\"112092725\" variantLength=\"1\" positionVCF=\"112092725\" referenceAlleleVCF=\"G\" alternateAlleleVCF=\"C\" /><MeasureRelationship Type=\"within single gene\"><Name><ElementValue Type=\"Preferred\">APC regulator of WNT signaling pathway</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">APC</ElementValue></Symbol><AttributeSet><Attribute dateValue=\"2012-07-06\" Type=\"Haploinsufficiency\">Sufficient evidence for dosage pathogenicity</Attribute><Citation><URL>https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=APC</URL></Citation></AttributeSet><AttributeSet><Attribute dateValue=\"2012-07-06\" Type=\"Triplosensitivity\">No evidence available</Attribute><Citation><URL>https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=APC</URL></Citation></AttributeSet><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"5\" Accession=\"NC_000005.10\" start=\"112707498\" stop=\"112846239\" display_start=\"112707498\" display_stop=\"112846239\" Strand=\"+\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"5\" Accession=\"NC_000005.9\" start=\"112043201\" stop=\"112181935\" display_start=\"112043201\" display_stop=\"112181935\" variantLength=\"138735\" Strand=\"+\" /><XRef ID=\"324\" DB=\"Gene\" /><XRef Type=\"MIM\" ID=\"611731\" DB=\"OMIM\" /><XRef ID=\"HGNC:583\" DB=\"HGNC\" /><Comment DataSource=\"NCBI curation\" Type=\"public\">This gene is cited in the ACMG recommendations of 2013 (PubMed 23788249) for reporting incidental findings in exons.</Comment><Comment DataSource=\"NCBI curation\" Type=\"public\">This gene is cited in the ACMG recommendations of 2016 (PubMed 27854360) for reporting incidental findings in exons.</Comment></MeasureRelationship><XRef Type=\"rs\" ID=\"464338\" DB=\"dbSNP\" /></Measure><Name><ElementValue Type=\"Preferred\">NM_001127511.3(APC):c.165+21247G&gt;C</ElementValue></Name><XRef ID=\"CA023527\" DB=\"ClinGen\" /></MeasureSet><TraitSet Type=\"Disease\" ID=\"1590\"><Trait ID=\"986\" Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">Familial colorectal cancer</ElementValue><XRef ID=\"Familial+Colorectal+Cancer/2734\" DB=\"Genetic Alliance\" /><XRef ID=\"8533\" DB=\"Office of Rare Diseases\" /></Name><Name><ElementValue Type=\"Alternate\">COLORECTAL CANCER</ElementValue><XRef Type=\"MIM\" ID=\"114500\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"172411.0001\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"191170.0038\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"516004.0001\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"516020.0003\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"516030.0005\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"516030.0010\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"516030.0011\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"516040.0002\" DB=\"OMIM\" /></Name><Name><ElementValue Type=\"Alternate\">COLON CANCER</ElementValue><XRef Type=\"MIM\" ID=\"114500\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"191170.0021\" DB=\"OMIM\" /></Name><Symbol><ElementValue Type=\"Preferred\">CRC</ElementValue><XRef Type=\"MIM\" ID=\"114500\" DB=\"OMIM\" /></Symbol><AttributeSet><Attribute Type=\"disease mechanism\" integerValue=\"273\">loss of function</Attribute><XRef ID=\"GTR000321103\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000330058\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000333054\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000500363\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000501399\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000501400\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000501401\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000501402\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000501424\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000508787\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000508788\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000508789\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000508790\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000508791\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000508792\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000508793\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000508794\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000508795\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000508796\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000508797\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000508798\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000508803\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000509789\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000509791\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000510423\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000512828\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000515534\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000515774\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000519254\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000519255\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000519256\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000519258\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000519259\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000519260\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000519489\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000519490\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000520069\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000520077\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000520080\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000520394\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000520404\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000520406\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000522383\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000522559\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000527981\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000527982\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000527983\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000527984\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000528534\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000528654\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000530028\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000530120\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000530202\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000552183\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000552245\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000552303\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000558503\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000562230\" DB=\"Genetic Testing Registry (GTR)\" /></AttributeSet><Citation Type=\"Position Statement\" Abbrev=\"EGAPP, 2009\"><ID Source=\"pmc\">2743612</ID></Citation><Citation Type=\"Suggested Reading\" Abbrev=\"AMA/NCHPEG, 2012\"><URL>http://www.nchpeg.org/documents/crc/11-0456%20Fact%20sheets%20(MSI%20and%20IHC%20testing).pdf</URL></Citation><XRef ID=\"CN029768\" DB=\"MedGen\" /></Trait></TraitSet></ReferenceClinVarAssertion><ClinVarAssertion ID=\"167452\" SubmissionName=\"APC_CRC_solexa\"><ClinVarSubmissionID localKey=\"APC_CRC_112092725\" submitter=\"Systems Biology Platform Zhejiang California International NanoSystems Institute\" submitterDate=\"2009-02-04\" /><ClinVarAccession Acc=\"SCV000105292\" Version=\"1\" Type=\"SCV\" OrgID=\"500169\" OrganizationCategory=\"laboratory\" OrgType=\"primary\" DateUpdated=\"2019-08-03\" /><RecordStatus>current</RecordStatus><ClinicalSignificance><ReviewStatus>no assertion criteria provided</ReviewStatus><Description>cancer</Description><Comment>Converted during submission to other.</Comment></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ObservedIn><Sample><Origin>unknown</Origin><Species>human</Species><AffectedStatus>not provided</AffectedStatus><NumberTested>1</NumberTested></Sample><Method><MethodType>not provided</MethodType></Method><ObservedData><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><AttributeSet><Attribute Type=\"HGVS\">NC_000005.8:g.112092725G&gt;C</Attribute></AttributeSet></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">Familial colorectal cancer</ElementValue></Name><XRef DB=\"OMIM\" ID=\"114500\" Type=\"MIM\" /></Trait></TraitSet></ClinVarAssertion></ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000077146.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\"\n            xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n    <ClinVarSet ID=\"48295638\">\n        <RecordStatus>current</RecordStatus>\n        <Title>NM_007294.3(BRCA1):c.4357+2T&gt;G AND Breast-ovarian cancer, familial 1</Title>\n        <ReferenceClinVarAssertion DateCreated=\"2013-12-23\" DateLastUpdated=\"2019-12-15\" ID=\"190195\">\n            <ClinVarAccession Acc=\"RCV000077146\" Version=\"3\" Type=\"RCV\" DateUpdated=\"2019-12-16\"/>\n            <RecordStatus>current</RecordStatus>\n            <ClinicalSignificance DateLastEvaluated=\"2012-09-24\">\n                <ReviewStatus>no assertion criteria provided</ReviewStatus>\n                <Description>Pathogenic</Description>\n            </ClinicalSignificance>\n            <Assertion Type=\"variation to disease\"/>\n            <ObservedIn>\n                <Sample>\n                    <Origin>germline</Origin>\n                    <Species TaxonomyId=\"9606\">human</Species>\n                    <AffectedStatus>yes</AffectedStatus>\n                </Sample>\n                <Method>\n                    <MethodType>clinical testing</MethodType>\n                </Method>\n                <ObservedData ID=\"49314787\">\n                    <Attribute integerValue=\"1\" Type=\"VariantAlleles\"/>\n                </ObservedData>\n            </ObservedIn>\n            <ObservedIn>\n                <Sample>\n                    <Origin>germline</Origin>\n                    <Species TaxonomyId=\"9606\">human</Species>\n                    <AffectedStatus>not provided</AffectedStatus>\n                    <NumberTested>1</NumberTested>\n                </Sample>\n                <Method>\n                    <MethodType>clinical testing</MethodType>\n                </Method>\n                <ObservedData ID=\"49314987\">\n                    <Attribute integerValue=\"1\" Type=\"VariantAlleles\"/>\n                </ObservedData>\n            </ObservedIn>\n            <MeasureSet Type=\"Variant\" ID=\"91629\" Acc=\"VCV000091629\" Version=\"3\">\n                <Measure Type=\"single nucleotide variant\" ID=\"97106\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">NM_007294.3(BRCA1):c.4357+2T&gt;G</ElementValue>\n                    </Name>\n                    <AttributeSet>\n                        <Attribute Accession=\"LRG_292t1\" Change=\"c.4357+2T&gt;G\" Type=\"HGVS, coding, LRG\">\n                            LRG_292t1:c.4357+2T&gt;G\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_007298\" Version=\"3\" Change=\"c.1048+2T&gt;G\"\n                                   Type=\"HGVS, coding, RefSeq\">NM_007298.3:c.1048+2T&gt;G\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_007299\" Version=\"4\" Change=\"c.1048+2T&gt;G\"\n                                   Type=\"HGVS, coding, RefSeq\">NM_007299.4:c.1048+2T&gt;G\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_007297\" Version=\"4\" Change=\"c.4216+2T&gt;G\"\n                                   Type=\"HGVS, coding, RefSeq\">NM_007297.4:c.4216+2T&gt;G\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_007294\" Version=\"3\" Change=\"c.4357+2T&gt;G\"\n                                   Type=\"HGVS, coding, RefSeq\">NM_007294.3:c.4357+2T&gt;G\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_007300\" Version=\"4\" Change=\"c.4357+2T&gt;G\"\n                                   Type=\"HGVS, coding, RefSeq\">NM_007300.4:c.4357+2T&gt;G\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"LRG_292\" Change=\"g.135582T&gt;G\" Type=\"HGVS, genomic, LRG\">\n                            LRG_292:g.135582T&gt;G\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NG_005905\" Version=\"2\" Change=\"g.135582T&gt;G\"\n                                   Type=\"HGVS, genomic, RefSeqGene\">NG_005905.2:g.135582T&gt;G\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NC_000017\" Version=\"11\" Change=\"g.43082402A&gt;C\"\n                                   Type=\"HGVS, genomic, top level\" integerValue=\"38\">NC_000017.11:g.43082402A&gt;C\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NC_000017\" Version=\"10\" Change=\"g.41234419A&gt;C\"\n                                   Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">NC_000017.10:g.41234419A&gt;C\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"U14680\" Version=\"1\" Change=\"n.4476+2T&gt;G\" Type=\"HGVS, non-coding\">\n                            U14680.1:n.4476+2T&gt;G\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"Location\">U14680.1:intron 13</Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"MolecularConsequence\">splice donor variant</Attribute>\n                        <XRef ID=\"SO:0001575\" DB=\"Sequence Ontology\"/>\n                        <XRef ID=\"NM_007294.3:c.4357+2T&gt;G\" DB=\"RefSeq\"/>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"MolecularConsequence\">splice donor variant</Attribute>\n                        <XRef ID=\"SO:0001575\" DB=\"Sequence Ontology\"/>\n                        <XRef ID=\"NM_007297.4:c.4216+2T&gt;G\" DB=\"RefSeq\"/>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"MolecularConsequence\">splice donor variant</Attribute>\n                        <XRef ID=\"SO:0001575\" DB=\"Sequence Ontology\"/>\n                        <XRef ID=\"NM_007298.3:c.1048+2T&gt;G\" DB=\"RefSeq\"/>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"MolecularConsequence\">splice donor variant</Attribute>\n                        <XRef ID=\"SO:0001575\" DB=\"Sequence Ontology\"/>\n                        <XRef ID=\"NM_007299.4:c.1048+2T&gt;G\" DB=\"RefSeq\"/>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"MolecularConsequence\">splice donor variant</Attribute>\n                        <XRef ID=\"SO:0001575\" DB=\"Sequence Ontology\"/>\n                        <XRef ID=\"NM_007300.4:c.4357+2T&gt;G\" DB=\"RefSeq\"/>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"nucleotide change\">IVS13+2T&gt;G</Attribute>\n                    </AttributeSet>\n                    <CytogeneticLocation>17q21.31</CytogeneticLocation>\n                    <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\"\n                                      AssemblyStatus=\"current\" Chr=\"17\" Accession=\"NC_000017.11\" start=\"43082402\"\n                                      stop=\"43082402\" display_start=\"43082402\" display_stop=\"43082402\" variantLength=\"1\"\n                                      positionVCF=\"43082402\" referenceAlleleVCF=\"A\" alternateAlleleVCF=\"C\"/>\n                    <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\"\n                                      AssemblyStatus=\"previous\" Chr=\"17\" Accession=\"NC_000017.10\" start=\"41234419\"\n                                      stop=\"41234419\" display_start=\"41234419\" display_stop=\"41234419\" variantLength=\"1\"\n                                      positionVCF=\"41234419\" referenceAlleleVCF=\"A\" alternateAlleleVCF=\"C\"/>\n                    <MeasureRelationship Type=\"within single gene\">\n                        <Name>\n                            <ElementValue Type=\"Preferred\">BRCA1 DNA repair associated</ElementValue>\n                        </Name>\n                        <Symbol>\n                            <ElementValue Type=\"Preferred\">BRCA1</ElementValue>\n                        </Symbol>\n                        <AttributeSet>\n                            <Attribute dateValue=\"2015-11-16\" Type=\"Haploinsufficiency\">Sufficient evidence for dosage\n                                pathogenicity\n                            </Attribute>\n                            <Citation>\n                                <URL>https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=BRCA1</URL>\n                            </Citation>\n                        </AttributeSet>\n                        <AttributeSet>\n                            <Attribute dateValue=\"2015-11-16\" Type=\"Triplosensitivity\">No evidence available</Attribute>\n                            <Citation>\n                                <URL>https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=BRCA1</URL>\n                            </Citation>\n                        </AttributeSet>\n                        <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\"\n                                          AssemblyStatus=\"current\" Chr=\"17\" Accession=\"NC_000017.11\" start=\"43044295\"\n                                          stop=\"43125364\" display_start=\"43044295\" display_stop=\"43125364\" Strand=\"-\"/>\n                        <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\"\n                                          AssemblyStatus=\"previous\" Chr=\"17\" Accession=\"NC_000017.10\" start=\"41196311\"\n                                          stop=\"41277499\" display_start=\"41196311\" display_stop=\"41277499\"\n                                          variantLength=\"81189\" Strand=\"-\"/>\n                        <XRef ID=\"672\" DB=\"Gene\"/>\n                        <XRef Type=\"MIM\" ID=\"113705\" DB=\"OMIM\"/>\n                        <XRef ID=\"HGNC:1100\" DB=\"HGNC\"/>\n                        <Comment DataSource=\"NCBI curation\" Type=\"public\">This gene is cited in the ACMG recommendations\n                            of 2013 (PubMed 23788249) for reporting incidental findings in exons.\n                        </Comment>\n                        <Comment DataSource=\"NCBI curation\" Type=\"public\">This gene is cited in the ACMG recommendations\n                            of 2016 (PubMed 27854360) for reporting incidental findings in exons.\n                        </Comment>\n                    </MeasureRelationship>\n                    <XRef ID=\"4476+2&amp;base_change=T to G\" DB=\"Breast Cancer Information Core (BIC) (BRCA1)\"/>\n                    <XRef Type=\"rs\" ID=\"80358152\" DB=\"dbSNP\"/>\n                </Measure>\n                <Name>\n                    <ElementValue Type=\"Preferred\">NM_007294.3(BRCA1):c.4357+2T&gt;G</ElementValue>\n                </Name>\n                <XRef ID=\"CA002792\" DB=\"ClinGen\"/>\n            </MeasureSet>\n            <TraitSet Type=\"Disease\" ID=\"1917\">\n                <Trait ID=\"4711\" Type=\"Disease\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">Breast-ovarian cancer, familial 1</ElementValue>\n                        <XRef ID=\"Breast-ovarian+cancer%2C+familial+1/7865\" DB=\"Genetic Alliance\"/>\n                    </Name>\n                    <Name>\n                        <ElementValue Type=\"Alternate\">BREAST-OVARIAN CANCER, FAMILIAL, SUSCEPTIBILITY TO, 1\n                        </ElementValue>\n                        <XRef Type=\"MIM\" ID=\"604370\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"113705.0001\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"113705.0002\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"113705.0003\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"113705.0004\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"113705.0005\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"113705.0006\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"113705.0007\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"113705.0008\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"113705.0009\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"113705.0010\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"113705.0012\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"113705.0013\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"113705.0014\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"113705.0015\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"113705.0016\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"113705.0017\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"113705.0018\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"113705.0019\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"113705.0020\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"113705.0021\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"113705.0023\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"113705.0024\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"113705.0025\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"113705.0026\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"113705.0027\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"113705.0028\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"113705.0029\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"113705.0030\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"113705.0031\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"113705.0032\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"113705.0033\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"113705.0034\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"113705.0035\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"113705.0036\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"113705.0037\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"113705.0038\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"113705.0040\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"113705.0041\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"113705.0042\" DB=\"OMIM\"/>\n                    </Name>\n                    <Name>\n                        <ElementValue Type=\"Alternate\">OVARIAN CANCER, SUSCEPTIBILITY TO</ElementValue>\n                        <XRef Type=\"Allelic variant\" ID=\"602667.0001\" DB=\"OMIM\"/>\n                    </Name>\n                    <Name>\n                        <ElementValue Type=\"Alternate\">BREAST CANCER, FAMILIAL, SUSCEPTIBILITY TO, 1</ElementValue>\n                        <XRef ID=\"604370\" DB=\"OMIM\"/>\n                    </Name>\n                    <Name>\n                        <ElementValue Type=\"Alternate\">Breast cancer, familial 1</ElementValue>\n                    </Name>\n                    <Symbol>\n                        <ElementValue Type=\"Preferred\">BROVCA1</ElementValue>\n                        <XRef Type=\"MIM\" ID=\"604370\" DB=\"OMIM\"/>\n                    </Symbol>\n                    <Symbol>\n                        <ElementValue Type=\"Alternate\">HBOC</ElementValue>\n                        <XRef ID=\"GTR000501743\" DB=\"Genetic Testing Registry (GTR)\"/>\n                    </Symbol>\n                    <AttributeSet>\n                        <Attribute Type=\"public definition\">BRCA1- and BRCA2-associated hereditary breast and ovarian\n                            cancer syndrome (HBOC) is characterized by an increased risk for female and male breast\n                            cancer, ovarian cancer (includes fallopian tube and primary peritoneal cancers), and to a\n                            lesser extent other cancers such as prostate cancer, pancreatic cancer, and melanoma\n                            primarily in individuals with a BRCA2 pathogenic variant. The exact cancer risks differ\n                            slightly depending on whether HBOC is caused by a BRCA1 or BRCA2 pathogenic variant.\n                        </Attribute>\n                        <XRef ID=\"NBK1247\" DB=\"GeneReviews\"/>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"disease mechanism\" integerValue=\"273\">loss of function</Attribute>\n                        <XRef ID=\"GTR000017876\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000021517\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000320777\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000325409\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000330054\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000501743\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000501746\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000501817\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000505644\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000507653\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000507764\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000507864\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000507913\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000507930\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000509001\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000509002\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000509348\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000509349\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000509363\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000509450\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000509451\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000509692\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000509980\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000509982\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000509983\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000512320\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000512644\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000512645\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000512816\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000514601\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000519030\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000520069\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000520071\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000520072\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000520394\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000520410\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000520865\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000520866\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000520867\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000520869\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000520870\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000520871\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000520872\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000521908\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000522159\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000522160\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000522161\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000522162\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000522243\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000527942\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000528915\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000528916\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000530120\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000530202\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000531275\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000551440\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000552245\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000552304\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000562228\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000569406\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000569423\" DB=\"Genetic Testing Registry (GTR)\"/>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"keyword\">Neoplasm</Attribute>\n                    </AttributeSet>\n                    <Citation Type=\"review\" Abbrev=\"GeneReviews\">\n                        <ID Source=\"PubMed\">20301425</ID>\n                        <ID Source=\"BookShelf\">NBK1247</ID>\n                    </Citation>\n                    <Citation Type=\"practice guideline\" Abbrev=\"ACS, 2007\">\n                        <ID Source=\"PubMed\">17392385</ID>\n                    </Citation>\n                    <Citation Type=\"Position Statement\" Abbrev=\"ASCO, 2010\">\n                        <ID Source=\"PubMed\">20065170</ID>\n                    </Citation>\n                    <Citation Type=\"practice guideline\" Abbrev=\"NSGC, 2004\">\n                        <ID Source=\"PubMed\">15604628</ID>\n                    </Citation>\n                    <Citation Type=\"practice guideline\" Abbrev=\"NSGC, 2007\">\n                        <ID Source=\"PubMed\">17508274</ID>\n                    </Citation>\n                    <Citation Type=\"practice guideline\" Abbrev=\"ACOG, 2009\">\n                        <ID Source=\"PubMed\">19305347</ID>\n                    </Citation>\n                    <Citation Type=\"Recommendation\" Abbrev=\"ACMG, 2013\">\n                        <ID Source=\"PubMed\">23788249</ID>\n                    </Citation>\n                    <Citation Type=\"practice guideline\" Abbrev=\"NCCN, 2013\">\n                        <URL>http://www.nccn.org/professionals/physician_gls/pdf/genetics_screening.pdf</URL>\n                        <CitationText>National Comprehensive Cancer Network practice guidelines in oncology.\n                            Genetic/Familial High-Risk Assessment: Breast and Ovarian\n                        </CitationText>\n                    </Citation>\n                    <Citation Type=\"Suggested Reading\" Abbrev=\"Phillips et al., 2013\">\n                        <ID Source=\"PubMed\">23918944</ID>\n                    </Citation>\n                    <Citation Type=\"Suggested Reading\" Abbrev=\"Domchek et al., 2010\">\n                        <ID Source=\"pmc\">2948529</ID>\n                    </Citation>\n                    <Citation Type=\"practice guideline\" Abbrev=\"ASCO, 2014\">\n                        <ID Source=\"PubMed\">24493721</ID>\n                    </Citation>\n                    <Citation Type=\"practice guideline\" Abbrev=\"USPSTF, 2014\">\n                        <ID Source=\"PubMed\">24366376</ID>\n                    </Citation>\n                    <Citation Type=\"practice guideline\" Abbrev=\"NSGC, 2013\">\n                        <ID Source=\"PubMed\">23188549</ID>\n                    </Citation>\n                    <Citation Type=\"practice guideline\" Abbrev=\"ACMG/NSGC, 2015\">\n                        <ID Source=\"PubMed\">25394175</ID>\n                    </Citation>\n                    <Citation Type=\"Recommendation\" Abbrev=\"USPSTF, 2014\">\n                        <ID Source=\"PubMed\">24366402</ID>\n                    </Citation>\n                    <Citation Type=\"Recommendation\" Abbrev=\"USPSTF, 2013\">\n                        <ID Source=\"PubMed\">24432435</ID>\n                    </Citation>\n                    <Citation Type=\"Recommendation\" Abbrev=\"ACMG, 2015\">\n                        <ID Source=\"PubMed\">25356965</ID>\n                    </Citation>\n                    <Citation Type=\"Recommendation\" Abbrev=\"ACMG, 2016\">\n                        <ID Source=\"PubMed\">27854360</ID>\n                        <ID Source=\"DOI\">10.1038/gim.2016.190</ID>\n                    </Citation>\n                    <XRef ID=\"C2676676\" DB=\"MedGen\"/>\n                    <XRef ID=\"145\" DB=\"Orphanet\"/>\n                    <XRef Type=\"MIM\" ID=\"604370\" DB=\"OMIM\"/>\n                </Trait>\n            </TraitSet>\n        </ReferenceClinVarAssertion>\n        <ClinVarAssertion ID=\"189345\">\n            <ClinVarSubmissionID localKey=\"SCRP_var_1220\" submitter=\"Sharing Clinical Reports Project (SCRP)\"\n                                 submitterDate=\"2013-08-08\"\n                                 title=\"NM_007294.3:c.4357+2T&gt;G AND Breast-ovarian cancer, familial 1\"/>\n            <ClinVarAccession Acc=\"SCV000108943\" Version=\"3\" Type=\"SCV\" OrgID=\"500037\" OrganizationCategory=\"consortium\"\n                              OrgType=\"primary\" DateUpdated=\"2019-08-03\"/>\n            <RecordStatus>current</RecordStatus>\n            <ClinicalSignificance DateLastEvaluated=\"2012-09-24\">\n                <ReviewStatus>no assertion criteria provided</ReviewStatus>\n                <Description>Pathogenic</Description>\n            </ClinicalSignificance>\n            <Assertion Type=\"variation to disease\"/>\n            <ObservedIn>\n                <Sample>\n                    <Origin>germline</Origin>\n                    <Species>human</Species>\n                    <AffectedStatus>not provided</AffectedStatus>\n                    <NumberTested>1</NumberTested>\n                </Sample>\n                <Method>\n                    <MethodType>clinical testing</MethodType>\n                </Method>\n                <ObservedData>\n                    <Attribute Type=\"VariantAlleles\">1</Attribute>\n                </ObservedData>\n            </ObservedIn>\n            <MeasureSet Type=\"Variant\">\n                <Measure Type=\"Variation\">\n                    <AttributeSet>\n                        <Attribute Type=\"nucleotide change\">IVS13+2T&gt;G</Attribute>\n                    </AttributeSet>\n                    <MeasureRelationship Type=\"variant in gene\">\n                        <Symbol>\n                            <ElementValue Type=\"Preferred\">BRCA1</ElementValue>\n                        </Symbol>\n                    </MeasureRelationship>\n                </Measure>\n            </MeasureSet>\n            <TraitSet Type=\"Disease\">\n                <Trait Type=\"Disease\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">Breast-ovarian cancer, familial 1</ElementValue>\n                    </Name>\n                </Trait>\n            </TraitSet>\n        </ClinVarAssertion>\n        <ClinVarAssertion ID=\"261969\">\n            <ClinVarSubmissionID localKey=\"U14680.1:n.4476+2T&gt;G|MedGen:C2676676\"\n                                 submitter=\"Breast Cancer Information Core (BIC) (BRCA1)\" submitterDate=\"2014-03-28\"/>\n            <ClinVarAccession Acc=\"SCV000145071\" Version=\"1\" Type=\"SCV\" OrgID=\"504196\"\n                              OrganizationCategory=\"locus-specific database (LSDB)\" OrgType=\"primary\"\n                              DateUpdated=\"2019-08-03\"/>\n            <RecordStatus>current</RecordStatus>\n            <ClinicalSignificance DateLastEvaluated=\"1999-06-22\">\n                <ReviewStatus>no assertion criteria provided</ReviewStatus>\n                <Description>Pathogenic</Description>\n            </ClinicalSignificance>\n            <Assertion Type=\"variation to disease\"/>\n            <ObservedIn>\n                <Sample>\n                    <Origin>germline</Origin>\n                    <Species TaxonomyId=\"9606\">human</Species>\n                    <AffectedStatus>yes</AffectedStatus>\n                </Sample>\n                <Method>\n                    <MethodType>clinical testing</MethodType>\n                </Method>\n                <ObservedData>\n                    <Attribute Type=\"VariantAlleles\" integerValue=\"1\"/>\n                </ObservedData>\n            </ObservedIn>\n            <MeasureSet Type=\"Variant\">\n                <Measure Type=\"Variation\">\n                    <Name>\n                        <ElementValue Type=\"Alternate\">IVS13+2T&gt;G</ElementValue>\n                    </Name>\n                    <AttributeSet>\n                        <Attribute Type=\"Location\">U14680.1:intron 13</Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"HGVS\">U14680.1:n.4476+2T&gt;G</Attribute>\n                    </AttributeSet>\n                    <XRef DB=\"dbSNP\" ID=\"80358152\" Type=\"rsNumber\"/>\n                </Measure>\n            </MeasureSet>\n            <TraitSet Type=\"Disease\">\n                <Trait Type=\"Disease\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">Breast-ovarian cancer, familial 1</ElementValue>\n                    </Name>\n                    <XRef DB=\"MedGen\" ID=\"C2676676\" Type=\"CUI\"/>\n                </Trait>\n            </TraitSet>\n        </ClinVarAssertion>\n    </ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000080071.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\" xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n<ClinVarSet ID=\"49276297\"><RecordStatus>current</RecordStatus><Replaces>RCV000124228</Replaces><Title>NM_003159.2(CDKL5):c.2995G&gt;A (p.Val999Met) AND not specified</Title><ReferenceClinVarAssertion DateCreated=\"2014-01-17\" DateLastUpdated=\"2019-12-31\" ID=\"199236\"><ClinVarAccession Acc=\"RCV000080071\" Version=\"9\" Type=\"RCV\" DateUpdated=\"2019-12-31\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2014-12-05\"><ReviewStatus>criteria provided, multiple submitters, no conflicts</ReviewStatus><Description>Benign</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><AttributeSet><Attribute Type=\"ModeOfInheritance\" integerValue=\"396\">X-linked inheritance</Attribute><XRef ID=\"NM_003159.2(CDKL5):c.2995G&gt;A\" DB=\"Genetic Services Laboratory, University of Chicago\" /></AttributeSet><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>yes</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData ID=\"50065522\"><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData ID=\"50065523\"><Attribute integerValue=\"5\" Type=\"VariantAlleles\" /></ObservedData><ObservedData ID=\"50065523\"><Attribute integerValue=\"1\" Type=\"Hemizygote\" /></ObservedData><ObservedData ID=\"50065523\"><Attribute integerValue=\"4\" Type=\"SingleHeterozygote\" /></ObservedData><ObservedData ID=\"50065523\"><Attribute Type=\"Description\">not provided</Attribute></ObservedData><ObservedData ID=\"50065523\"><Attribute Type=\"Description\">not provided</Attribute></ObservedData><ObservedData ID=\"50065523\"><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><ObservedIn><Sample><Origin>maternal</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>not provided</AffectedStatus><NumberTested>2</NumberTested></Sample><Method><Description>DHPLC, exons 2-21</Description><MethodType>curation</MethodType></Method><Method><Description>CSGE, MECP2 negative</Description><MethodType>curation</MethodType></Method><ObservedData ID=\"50065524\"><Attribute integerValue=\"2\" Type=\"VariantAlleles\" /></ObservedData></ObservedIn><ObservedIn><Sample><Origin>unknown</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>not provided</AffectedStatus><NumberTested>4</NumberTested></Sample><Method><Description>DHPLC, exons 2-21</Description><MethodType>curation</MethodType></Method><Method><Description>CSGE, MECP2 negative</Description><MethodType>curation</MethodType></Method><Method><Description>direct, CDKL5 Exon 2-21</Description><MethodType>curation</MethodType></Method><ObservedData ID=\"50065525\"><Attribute integerValue=\"4\" Type=\"VariantAlleles\" /></ObservedData></ObservedIn><MeasureSet Type=\"Variant\" ID=\"94110\" Acc=\"VCV000094110\" Version=\"1\"><Measure Type=\"single nucleotide variant\" ID=\"100010\"><Name><ElementValue Type=\"Preferred\">NM_003159.2(CDKL5):c.2995G&gt;A (p.Val999Met)</ElementValue></Name><Name><ElementValue Type=\"Alternate\">p.V999M:GTG&gt;ATG</ElementValue></Name><AttributeSet><Attribute Accession=\"LRG_702t1\" Change=\"c.184+3207C&gt;T\" Type=\"HGVS, coding, LRG\">LRG_702t1:c.184+3207C&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_000330\" Version=\"3\" Change=\"c.184+3207C&gt;T\" Type=\"HGVS, coding, RefSeq\">NM_000330.3:c.184+3207C&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_001037343\" Version=\"1\" Change=\"c.2995G&gt;A\" Type=\"HGVS, coding, RefSeq\">NM_001037343.1:c.2995G&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_003159\" Version=\"2\" Change=\"c.2995G&gt;A\" Type=\"HGVS, coding, RefSeq\">NM_003159.2:c.2995G&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"LRG_702\" Change=\"g.29003C&gt;T\" Type=\"HGVS, genomic, LRG\">LRG_702:g.29003C&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NG_008475\" Version=\"1\" Change=\"g.232842G&gt;A\" Type=\"HGVS, genomic, RefSeqGene\">NG_008475.1:g.232842G&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NG_008659\" Version=\"3\" Change=\"g.29003C&gt;T\" Type=\"HGVS, genomic, RefSeqGene\">NG_008659.3:g.29003C&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000023\" Version=\"11\" Change=\"g.18653446G&gt;A\" Type=\"HGVS, genomic, top level\" integerValue=\"38\">NC_000023.11:g.18653446G&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000023\" Version=\"10\" Change=\"g.18671566G&gt;A\" Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">NC_000023.10:g.18671566G&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Type=\"HGVS, protein\">p.(Val999Met)</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"O76039\" Change=\"p.Val999Met\" Type=\"HGVS, protein\">O76039:p.Val999Met</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_001032420\" Version=\"1\" Change=\"p.Val999Met\" Type=\"HGVS, protein, RefSeq\">NP_001032420.1:p.Val999Met</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_003150\" Version=\"1\" Change=\"p.Val999Met\" Type=\"HGVS, protein, RefSeq\">NP_003150.1:p.Val999Met</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_003150\" Version=\"1\" Change=\"p.Val999Met\" Type=\"HGVS, protein, RefSeq\">NP_003150.1:p.Val999Met</Attribute></AttributeSet><AttributeSet><Attribute Type=\"Location\">NM_003159.2:exon 21</Attribute></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">intron variant</Attribute><XRef ID=\"SO:0001627\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_000330.3:c.184+3207C&gt;T\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">missense variant</Attribute><XRef ID=\"SO:0001583\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_001037343.1:c.2995G&gt;A\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">missense variant</Attribute><XRef ID=\"SO:0001583\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_003159.2:c.2995G&gt;A\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"ProteinChange1LetterCode\">V999M</Attribute></AttributeSet><AlleleFrequencyList><AlleleFrequency Value=\"0.03881\" Source=\"NHLBI Exome Sequencing Project (ESP) Exome Variant Server\" /><AlleleFrequency Value=\"0.03391\" Source=\"1000 Genomes Project\" /><AlleleFrequency Value=\"0.02951\" Source=\"The Genome Aggregation Database (gnomAD)\" /><AlleleFrequency Value=\"0.03213\" Source=\"Trans-Omics for Precision Medicine (TOPMed)\" /></AlleleFrequencyList><GlobalMinorAlleleFrequency Value=\"0.03391\" Source=\"1000 Genomes Project\" MinorAllele=\"A\" /><CytogeneticLocation>Xp22.13</CytogeneticLocation><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"X\" Accession=\"NC_000023.11\" start=\"18653446\" stop=\"18653446\" display_start=\"18653446\" display_stop=\"18653446\" variantLength=\"1\" positionVCF=\"18653446\" referenceAlleleVCF=\"G\" alternateAlleleVCF=\"A\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"X\" Accession=\"NC_000023.10\" start=\"18671566\" stop=\"18671566\" display_start=\"18671566\" display_stop=\"18671566\" variantLength=\"1\" positionVCF=\"18671566\" referenceAlleleVCF=\"G\" alternateAlleleVCF=\"A\" /><MeasureRelationship Type=\"within multiple genes by overlap\"><Name><ElementValue Type=\"Preferred\">cyclin dependent kinase like 5</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">CDKL5</ElementValue></Symbol><AttributeSet><Attribute dateValue=\"2012-07-12\" Type=\"Haploinsufficiency\">Sufficient evidence for dosage pathogenicity</Attribute><Citation><URL>https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=CDKL5</URL></Citation></AttributeSet><AttributeSet><Attribute dateValue=\"2012-07-12\" Type=\"Triplosensitivity\">No evidence available</Attribute><Citation><URL>https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=CDKL5</URL></Citation></AttributeSet><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"X\" Accession=\"NC_000023.11\" start=\"18425605\" stop=\"18653629\" display_start=\"18425605\" display_stop=\"18653629\" Strand=\"+\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"X\" Accession=\"NC_000023.10\" start=\"18443724\" stop=\"18671748\" display_start=\"18443724\" display_stop=\"18671748\" variantLength=\"228025\" Strand=\"+\" /><XRef ID=\"6792\" DB=\"Gene\" /><XRef Type=\"MIM\" ID=\"300203\" DB=\"OMIM\" /><XRef ID=\"HGNC:11411\" DB=\"HGNC\" /></MeasureRelationship><MeasureRelationship Type=\"within multiple genes by overlap\"><Name><ElementValue Type=\"Preferred\">retinoschisin 1</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">RS1</ElementValue></Symbol><AttributeSet><Attribute dateValue=\"2012-06-14\" Type=\"Haploinsufficiency\">Sufficient evidence for dosage pathogenicity</Attribute><Citation><URL>https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=RS1</URL></Citation></AttributeSet><AttributeSet><Attribute dateValue=\"2012-06-14\" Type=\"Triplosensitivity\">No evidence available</Attribute><Citation><URL>https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=RS1</URL></Citation></AttributeSet><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"X\" Accession=\"NC_000023.11\" start=\"18639688\" stop=\"18672108\" display_start=\"18639688\" display_stop=\"18672108\" Strand=\"-\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"X\" Accession=\"NC_000023.10\" start=\"18657807\" stop=\"18690222\" display_start=\"18657807\" display_stop=\"18690222\" variantLength=\"32416\" Strand=\"-\" /><XRef ID=\"6247\" DB=\"Gene\" /><XRef Type=\"MIM\" ID=\"300839\" DB=\"OMIM\" /><XRef ID=\"HGNC:10457\" DB=\"HGNC\" /></MeasureRelationship><XRef ID=\"CDKL5-A8\" DB=\"Center for Human Genetics, Inc\" /><XRef ID=\"41\" DB=\"RettBASE (CDKL5)\" /><XRef ID=\"O76039#VAR_037637\" DB=\"UniProtKB\" /><XRef Type=\"rs\" ID=\"35693326\" DB=\"dbSNP\" /></Measure><Name><ElementValue Type=\"Preferred\">NM_003159.2(CDKL5):c.2995G&gt;A (p.Val999Met)</ElementValue></Name><XRef ID=\"CA213360\" DB=\"ClinGen\" /></MeasureSet><TraitSet Type=\"Disease\" ID=\"9590\"><Trait ID=\"16789\" Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">not specified</ElementValue></Name><Name><ElementValue Type=\"Alternate\">AllHighlyPenetrant</ElementValue></Name><AttributeSet><Attribute Type=\"public definition\">The term 'not specified' was created for use in ClinVar so that submitters can convey the concept that a variant is benign, likely benign, or of uncertain significance for an unspecified set of disorders.  This usage was introduced in 2014 to replace AllHighlyPenetrant.</Attribute></AttributeSet><XRef ID=\"CN169374\" DB=\"MedGen\" /></Trait></TraitSet></ReferenceClinVarAssertion><ClinVarAssertion ID=\"194072\" SubmissionName=\"SUB4534263\"><ClinVarSubmissionID localKey=\"3161_000000\" localKeyIsSubmitted=\"1\" submittedAssembly=\"GRCh37\" submitter=\"EGL Genetic Diagnostics,Eurofins Clinical Diagnostics\" submitterDate=\"2018-09-19\" /><ClinVarAccession Acc=\"SCV000111966\" Version=\"8\" Type=\"SCV\" OrgID=\"500060\" OrganizationCategory=\"laboratory\" OrgType=\"primary\" DateUpdated=\"2019-08-03\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2014-12-05\"><ReviewStatus>criteria provided, single submitter</ReviewStatus><Description>Benign</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ExternalID DB=\"EGL Genetic Diagnostics\" ID=\"3161\" /><AttributeSet><Attribute Type=\"AssertionMethod\">EGL_Classification_Definitions_2015</Attribute><Citation><URL>https://submit.ncbi.nlm.nih.gov/ft/byid/dn5yhybg/egl_classification_definitions_2015.pdf</URL></Citation></AttributeSet><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus><Gender>mixed</Gender></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData><Attribute Type=\"VariantAlleles\" integerValue=\"5\" /></ObservedData><ObservedData><Attribute Type=\"Hemizygote\" integerValue=\"1\" /></ObservedData><ObservedData><Attribute Type=\"SingleHeterozygote\" integerValue=\"4\" /></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"single nucleotide variant\"><AttributeSet><Attribute Type=\"Location\">NM_003159.2:Ex21</Attribute></AttributeSet><SequenceLocation Assembly=\"GRCh37\" Chr=\"X\" alternateAllele=\"A\" referenceAllele=\"G\" start=\"18671566\" stop=\"18671566\" variantLength=\"1\" /><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">CDKL5</ElementValue></Symbol></MeasureRelationship><XRef DB=\"dbSNP\" ID=\"35693326\" Type=\"rsNumber\" /></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">not specified</ElementValue></Name></Trait></TraitSet><Citation><URL>http://www.egl-eurofins.com/emvclass/emvclass.php?approved_symbol=CDKL5</URL></Citation></ClinVarAssertion><ClinVarAssertion ID=\"289573\" SubmissionName=\"SUB3839901\"><ClinVarSubmissionID localKey=\"GDX:11870|Not Provided\" submittedAssembly=\"GRCh37\" submitter=\"GeneDx\" submitterDate=\"2018-03-26\" /><ClinVarAccession Acc=\"SCV000167649\" Version=\"10\" Type=\"SCV\" OrgID=\"26957\" OrganizationCategory=\"laboratory\" OrgType=\"primary\" DateUpdated=\"2019-08-03\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2012-06-05\"><ReviewStatus>criteria provided, single submitter</ReviewStatus><Description>Benign</Description><Comment>This variant is considered likely benign or benign based on one or more of the following criteria: it is a conservative change, it occurs at a poorly conserved position in the protein, it is predicted to be benign by multiple in silico algorithms, and/or has population frequency not consistent with disease.</Comment></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ExternalID DB=\"GeneDx\" ID=\"GDX:11870\" /><AttributeSet><Attribute Type=\"AssertionMethod\">GeneDX Variant Classification (06012015)</Attribute><Citation><URL>https://submit.ncbi.nlm.nih.gov/ft/byid/7oynscmk/mdi-5616_26957_genedx_interprules_final_061215.pdf</URL></Citation></AttributeSet><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>yes</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><AttributeSet><Attribute Type=\"HGVS\">NM_003159.2:c.2995G&gt;A</Attribute></AttributeSet><SequenceLocation Assembly=\"GRCh37\" Chr=\"X\" alternateAllele=\"A\" referenceAllele=\"G\" start=\"18671566\" stop=\"18671566\" variantLength=\"1\" /><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">CDKL5</ElementValue></Symbol></MeasureRelationship></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">not specified</ElementValue></Name></Trait></TraitSet></ClinVarAssertion><ClinVarAssertion ID=\"311173\" SubmissionName=\"CHWRETT_2012_1_30\"><ClinVarSubmissionID localKey=\"41\" localKeyIsSubmitted=\"1\" submitter=\"RettBASE\" submitterDate=\"2014-11-21\" /><ClinVarAccession Acc=\"SCV000188373\" Version=\"2\" Type=\"SCV\" OrgID=\"504290\" OrganizationCategory=\"locus-specific database (LSDB)\" OrgType=\"primary\" DateUpdated=\"2019-08-03\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2014-05-09\"><ReviewStatus>no assertion criteria provided</ReviewStatus><Description>Benign</Description><Comment>Found in unaffected mother with apparent balanced X-chromosome inactivation; in exon 20, affecting only the transcript lowly expressed; In silico prediction: SIFT = tolerated, MutationTaster = polymorphism, PolyPhen2 = benign, AlignGVGD = benign (C0)</Comment></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ExternalID DB=\"RettBASE\" ID=\"41\" URL=\"http://mecp2.chw.edu.au/cdkl5/cdkl5_variant?mut_id=41\" /><ObservedIn><Sample><Origin>maternal</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>not provided</AffectedStatus><NumberTested>1</NumberTested><Gender>female</Gender></Sample><Method><Description>DHPLC, exons 2-21</Description><MethodType>curation</MethodType></Method><ObservedData><Attribute Type=\"VariantAlleles\" integerValue=\"1\" /></ObservedData><Citation><ID Source=\"PubMed\">16813600</ID></Citation><Comment>Rett syndrome - early seizure</Comment></ObservedIn><ObservedIn><Sample><Origin>unknown</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>not provided</AffectedStatus><NumberTested>1</NumberTested><Gender>female</Gender></Sample><Method><Description>DHPLC, exons 2-21</Description><MethodType>curation</MethodType></Method><ObservedData><Attribute Type=\"VariantAlleles\" integerValue=\"1\" /></ObservedData><Citation><ID Source=\"PubMed\">16813600</ID></Citation><Comment>Unaffected - unaffected family member</Comment></ObservedIn><ObservedIn><Sample><Origin>unknown</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>not provided</AffectedStatus><NumberTested>1</NumberTested><Gender>female</Gender></Sample><Method><Description>direct, CDKL5 Exon 2-21</Description><MethodType>curation</MethodType></Method><ObservedData><Attribute Type=\"VariantAlleles\" integerValue=\"1\" /></ObservedData><Citation><ID Source=\"PubMed\">21775177</ID></Citation><Comment>Not Rett syndrome - infantile intractable epilepsy</Comment></ObservedIn><ObservedIn><Sample><Origin>maternal</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>not provided</AffectedStatus><NumberTested>1</NumberTested><Gender>female</Gender></Sample><Method><Description>CSGE, MECP2 negative</Description><MethodType>curation</MethodType></Method><ObservedData><Attribute Type=\"VariantAlleles\" integerValue=\"1\" /></ObservedData><Citation><ID Source=\"PubMed\">22867051</ID></Citation><Comment>Not Rett syndrome - epilepsy, Rett-like</Comment></ObservedIn><ObservedIn><Sample><Origin>unknown</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>not provided</AffectedStatus><NumberTested>1</NumberTested><Gender>female</Gender></Sample><Method><Description>CSGE, MECP2 negative</Description><MethodType>curation</MethodType></Method><ObservedData><Attribute Type=\"VariantAlleles\" integerValue=\"1\" /></ObservedData><Citation><ID Source=\"PubMed\">22867051</ID></Citation><Comment>Unaffected - unaffected family member</Comment></ObservedIn><ObservedIn><Sample><Origin>unknown</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>not provided</AffectedStatus><NumberTested>1</NumberTested><Gender>female</Gender></Sample><Method><Description>CSGE, MECP2 negative</Description><MethodType>curation</MethodType></Method><ObservedData><Attribute Type=\"VariantAlleles\" integerValue=\"1\" /></ObservedData><Citation><ID Source=\"PubMed\">22867051</ID></Citation><Comment>Unaffected - non-RTT control</Comment></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><AttributeSet><Attribute Type=\"HGVS\">NM_003159.2:c.2995G&gt;A</Attribute></AttributeSet><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">CDKL5</ElementValue></Symbol></MeasureRelationship><XRef DB=\"dbSNP\" ID=\"35693326\" Type=\"rsNumber\" /></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">Not specified</ElementValue></Name></Trait></TraitSet></ClinVarAssertion><ClinVarAssertion ID=\"340465\"><ClinVarSubmissionID localKey=\"NM_003159.2(CDKL5):c.2995G&gt;A\" submitter=\"Genetic Services Laboratory, University of Chicago\" submitterDate=\"2014-09-11\" /><ClinVarAccession Acc=\"SCV000192629\" Version=\"1\" Type=\"SCV\" OrgID=\"1238\" OrganizationCategory=\"laboratory\" OrgType=\"primary\" DateUpdated=\"2019-12-06\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2013-02-08\"><ReviewStatus>criteria provided, single submitter</ReviewStatus><Description>Benign</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ExternalID DB=\"Genetic Services Laboratory\" ID=\"NM_003159.2(CDKL5):c.2995G&gt;A\" /><AttributeSet><Attribute Type=\"ModeOfInheritance\">X-linked inheritance</Attribute></AttributeSet><AttributeSet><Attribute Type=\"AssertionMethod\">ACMG Guidelines, 2007</Attribute><Citation Type=\"general\"><ID Source=\"PubMed\">18414213</ID></Citation></AttributeSet><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><AttributeSet><Attribute Type=\"HGVS\">NM_003159.2:c.2995G&gt;A</Attribute></AttributeSet><SequenceLocation Assembly=\"GRCh37\" Chr=\"X\" start=\"18671566\" /><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">CDKL5</ElementValue></Symbol></MeasureRelationship><XRef DB=\"dbSNP\" ID=\"35693326\" Type=\"rsNumber\" /></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">not specified</ElementValue></Name></Trait></TraitSet></ClinVarAssertion><ClinVarAssertion ID=\"477474\" SubmissionName=\"variant_submission_CDKL5_UBE3A_03202015\"><ClinVarSubmissionID localKey=\"CDKL5_8|not provided\" submitter=\"Division of Genomic Diagnostics,The Children's Hospital of Philadelphia\" submitterDate=\"2015-03-20\" /><ClinVarAccession Acc=\"SCV000256048\" Version=\"1\" Type=\"SCV\" OrgID=\"165021\" OrganizationCategory=\"laboratory\" OrgType=\"primary\" DateUpdated=\"2019-08-03\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2015-02-13\"><ReviewStatus>no assertion criteria provided</ReviewStatus><Description>Benign</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ExternalID DB=\"Division of Genomic Diagnostics\" ID=\"CDKL5_8\" /><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><Name><ElementValue Type=\"Alternate\">p.(Val999Met)</ElementValue></Name><AttributeSet><Attribute Type=\"HGVS\">NM_003159.2:c.2995G&gt;A</Attribute></AttributeSet><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">CDKL5</ElementValue></Symbol></MeasureRelationship></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">not specified</ElementValue></Name></Trait></TraitSet></ClinVarAssertion><ClinVarAssertion ID=\"591146\"><ClinVarSubmissionID localKey=\"NM_003159.2:c.2995G&gt;A|NOT SPECIFIED\" submittedAssembly=\"GRCh37\" submitter=\"PreventionGenetics,PreventionGenetics\" submitterDate=\"2016-04-28\" /><ClinVarAccession Acc=\"SCV000309464\" Version=\"1\" Type=\"SCV\" OrgID=\"239772\" OrganizationCategory=\"laboratory\" OrgType=\"primary\" DateUpdated=\"2019-11-02\" /><RecordStatus>current</RecordStatus><ClinicalSignificance><ReviewStatus>criteria provided, single submitter</ReviewStatus><Description>Benign</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><AttributeSet><Attribute Type=\"AssertionMethod\">ACMG Guidelines, 2015</Attribute><Citation><ID Source=\"PubMed\">25741868</ID></Citation></AttributeSet><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><AttributeSet><Attribute Type=\"HGVS\">NM_003159.2:c.2995G&gt;A</Attribute></AttributeSet></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">NOT SPECIFIED</ElementValue></Name></Trait></TraitSet></ClinVarAssertion></ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000083638.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\" xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n<ClinVarSet ID=\"47119341\"><RecordStatus>current</RecordStatus><Title>NM_000348.4(SRD5A2):c.89_90insT (p.Ser31fs) AND not provided</Title><ReferenceClinVarAssertion DateCreated=\"2014-02-19\" DateLastUpdated=\"2019-11-02\" ID=\"206845\"><ClinVarAccession Acc=\"RCV000083638\" Version=\"1\" Type=\"RCV\" DateUpdated=\"2019-11-02\" /><RecordStatus>current</RecordStatus><ClinicalSignificance><ReviewStatus>no assertion provided</ReviewStatus><Description>not provided</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ObservedIn><Sample><Origin>not provided</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>not provided</AffectedStatus><NumberTested>1</NumberTested></Sample><Method><MethodType>literature only</MethodType></Method><ObservedData ID=\"47972833\"><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\" ID=\"97389\" Acc=\"VCV000097389\" Version=\"1\"><Measure Type=\"Insertion\" ID=\"103281\"><Name><ElementValue Type=\"Preferred\">NM_000348.4(SRD5A2):c.89_90insT (p.Ser31fs)</ElementValue></Name><AttributeSet><Attribute Accession=\"NM_000348\" Version=\"4\" Change=\"c.89_90insT\" Type=\"HGVS, coding, RefSeq\">NM_000348.4:c.89_90insT</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NG_008365\" Version=\"1\" Change=\"g.5160_5161insT\" Type=\"HGVS, genomic, RefSeqGene\">NG_008365.1:g.5160_5161insT</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000002\" Version=\"12\" Change=\"g.31580811_31580812insA\" Type=\"HGVS, genomic, top level\" integerValue=\"38\">NC_000002.12:g.31580811_31580812insA</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000002\" Version=\"11\" Change=\"g.31805880dup\" Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">NC_000002.11:g.31805880dup</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_000339\" Version=\"2\" Change=\"p.Ser31fs\" Type=\"HGVS, protein, RefSeq\">NP_000339.2:p.Ser31fs</Attribute></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">frameshift variant</Attribute><XRef ID=\"SO:0001589\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_000348.4:c.89_90insT\" DB=\"RefSeq\" /></AttributeSet><CytogeneticLocation>2p23.1</CytogeneticLocation><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"2\" Accession=\"NC_000002.12\" start=\"31580811\" stop=\"31580812\" display_start=\"31580811\" display_stop=\"31580812\" variantLength=\"1\" positionVCF=\"31580811\" referenceAlleleVCF=\"G\" alternateAlleleVCF=\"GA\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"2\" Accession=\"NC_000002.11\" start=\"31805879\" stop=\"31805880\" display_start=\"31805879\" display_stop=\"31805880\" variantLength=\"1\" positionVCF=\"31805879\" referenceAlleleVCF=\"G\" alternateAlleleVCF=\"GA\" /><MeasureRelationship Type=\"within single gene\"><Name><ElementValue Type=\"Preferred\">steroid 5 alpha-reductase 2</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">SRD5A2</ElementValue></Symbol><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"2\" Accession=\"NC_000002.12\" start=\"31522480\" stop=\"31665651\" display_start=\"31522480\" display_stop=\"31665651\" Strand=\"-\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"2\" Accession=\"NC_000002.11\" start=\"31749655\" stop=\"31806039\" display_start=\"31749655\" display_stop=\"31806039\" variantLength=\"56385\" Strand=\"-\" /><XRef ID=\"6716\" DB=\"Gene\" /><XRef Type=\"MIM\" ID=\"607306\" DB=\"OMIM\" /><XRef ID=\"HGNC:11285\" DB=\"HGNC\" /></MeasureRelationship><XRef Type=\"rs\" ID=\"1553329489\" DB=\"dbSNP\" /></Measure><Name><ElementValue Type=\"Preferred\">NM_000348.4(SRD5A2):c.89_90insT (p.Ser31fs)</ElementValue></Name><XRef ID=\"CA45142063\" DB=\"ClinGen\" /></MeasureSet><TraitSet Type=\"Disease\" ID=\"9460\"><Trait ID=\"17556\" Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">not provided</ElementValue><XRef ID=\"13DG0619\" DB=\"Developmental Genetics Unit,King Faisal Specialist Hospital &amp; Research Centre\" /></Name><AttributeSet><Attribute Type=\"public definition\">The term 'not provided' is registered in MedGen to support identification of submissions to ClinVar for which no condition was named when assessing the variant. 'not provided' differs from 'not specified', which is used when a variant is asserted to be benign, likely benign, or of uncertain significance for conditions that have not been specified.</Attribute></AttributeSet><XRef ID=\"CN517202\" DB=\"MedGen\" /></Trait></TraitSet></ReferenceClinVarAssertion><ClinVarAssertion ID=\"203527\" SubmissionName=\"SRD5A2_2007_12_06\"><ClinVarSubmissionID localKey=\"reichardt_3\" submitter=\"University of Sydney Medical Foundation\" submitterDate=\"2012-05-02\" /><ClinVarAccession Acc=\"SCV000115724\" Version=\"1\" Type=\"SCV\" OrgID=\"500310\" OrganizationCategory=\"laboratory\" OrgType=\"primary\" DateUpdated=\"2019-03-31\" /><RecordStatus>current</RecordStatus><ClinicalSignificance><ReviewStatus>no assertion provided</ReviewStatus><Description>not provided</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ObservedIn><Sample><Origin>not provided</Origin><Species>human</Species><AffectedStatus>not provided</AffectedStatus><NumberTested>1</NumberTested></Sample><Method><MethodType>not provided</MethodType></Method><ObservedData><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><AttributeSet><Attribute Type=\"HGVS\">NG_008365.1:g.5160_5161insT</Attribute></AttributeSet></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">not provided</ElementValue></Name></Trait></TraitSet></ClinVarAssertion></ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000087262.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\" xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n<ClinVarSet ID=\"49276548\"><RecordStatus>current</RecordStatus><Title>NM_002296.4(LBR):c.1599_1605delinsCTAGAAG (p.Leu534_Leu535delinsTer) AND Pelger-Huët anomaly</Title><ReferenceClinVarAssertion DateCreated=\"2014-03-01\" DateLastUpdated=\"2019-12-31\" ID=\"211572\"><ClinVarAccession Acc=\"RCV000087262\" Version=\"3\" Type=\"RCV\" DateUpdated=\"2019-12-31\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2003-04-01\"><ReviewStatus>no assertion criteria provided</ReviewStatus><Description>Pathogenic</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>not provided</AffectedStatus></Sample><Method><MethodType>literature only</MethodType></Method><ObservedData ID=\"50065713\"><Attribute Type=\"Description\">Waterham et al. (2003) described a fetus, the product of a consanguineous Turkish marriage, who presented with intrauterine growth retardation at 17 weeks' gestation and was found to have severe hydrops and short-limb skeletal dysplasia consistent with thanatophoric dysplasia. Intrauterine death occurred at 18 weeks, and delivery was induced. Fetal examination showed severe hydrops, extremely shortened edematous limbs, and postaxial polydactyly on both hands. Radiographic examination showed severe platyspondyly, short irregular ribs, a 'moth-eaten' aspect of scapular and pelvic bones, and very short tubular bones with angular diaphyses. Histopathology showed almost complete absence of ossification, severe disorganization of cartilage (with nodular calcification deposits), and defective or absent joint formation. On the basis of these findings, the diagnosis of Greenberg dysplasia (215140) was made. Elevated levels of cholesta-8,14-dien-3-beta-ol in cultured skin fibroblasts were consistent with deficiency of 3-beta-hydroxysterol delta(14)-reductase. Sequence analysis of the LBR gene identified a homozygous 7-bp substitution at nucleotide 1599 in exon 13, TCTTCTA-CTAGAAG, which resulted in a truncated protein. The mother showed classic Pelger-Huet anomaly (169400), which represents the heterozygous state of 3-beta-hydroxysterol delta(14)-reductase deficiency.</Attribute><Citation Type=\"general\"><ID Source=\"PubMed\">12618959</ID></Citation></ObservedData></ObservedIn><MeasureSet Type=\"Variant\" ID=\"9529\" Acc=\"VCV000009529\" Version=\"1\"><Measure Type=\"Indel\" ID=\"24568\"><Name><ElementValue Type=\"Preferred\">NM_002296.4(LBR):c.1599_1605delinsCTAGAAG (p.Leu534_Leu535delinsTer)</ElementValue></Name><AttributeSet><Attribute Accession=\"NM_194442\" Version=\"2\" Change=\"c.1599_1605delTCTTCTAinsCTAGAAG\" Type=\"HGVS, coding\">NM_194442.2:c.1599_1605delTCTTCTAinsCTAGAAG</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_002296\" Version=\"4\" Change=\"c.1599_1605delinsCTAGAAG\" Type=\"HGVS, coding, RefSeq\">NM_002296.4:c.1599_1605delinsCTAGAAG</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_194442\" Version=\"2\" Change=\"c.1599_1605delinsCTAGAAG\" Type=\"HGVS, coding, RefSeq\">NM_194442.2:c.1599_1605delinsCTAGAAG</Attribute></AttributeSet><AttributeSet><Attribute Change=\"g.29326_29332delTCTTCTAinsCTAGAAG\" Accession=\"NG_008099\" Version=\"1\" Type=\"HGVS, genomic\">NG_008099.1:g.29326_29332delTCTTCTAinsCTAGAAG</Attribute><Citation Type=\"general\"><ID Source=\"PubMed\">12618959</ID></Citation></AttributeSet><AttributeSet><Attribute Accession=\"NG_008099\" Version=\"1\" Change=\"g.29326_29332delinsCTAGAAG\" Type=\"HGVS, genomic, RefSeqGene\">NG_008099.1:g.29326_29332delinsCTAGAAG</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NG_008099\" Version=\"1\" Change=\"g.29326_29332delTCTTCTAinsCTAGAAG\" Type=\"HGVS, genomic, RefSeqGene\">NG_008099.1:g.29326_29332delTCTTCTAinsCTAGAAG</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000001\" Version=\"11\" Change=\"g.225404486_225404492delinsCTTCTAG\" Type=\"HGVS, genomic, top level\" integerValue=\"38\">NC_000001.11:g.225404486_225404492delinsCTTCTAG</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000001\" Version=\"10\" Change=\"g.225592188_225592194delinsCTTCTAG\" Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">NC_000001.10:g.225592188_225592194delinsCTTCTAG</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_002287\" Version=\"2\" Change=\"p.Leu534_Leu535delinsTer\" Type=\"HGVS, protein, RefSeq\">NP_002287.2:p.Leu534_Leu535delinsTer</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_919424\" Version=\"1\" Change=\"p.Leu534_Leu535delinsTer\" Type=\"HGVS, protein, RefSeq\">NP_919424.1:p.Leu534_Leu535delinsTer</Attribute></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">nonsense</Attribute><XRef ID=\"SO:0001587\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_002296.4:c.1599_1605delinsCTAGAAG\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">nonsense</Attribute><XRef ID=\"SO:0001587\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_194442.2:c.1599_1605delinsCTAGAAG\" DB=\"RefSeq\" /></AttributeSet><CytogeneticLocation>1q42.12</CytogeneticLocation><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"1\" Accession=\"NC_000001.11\" start=\"225404486\" stop=\"225404492\" display_start=\"225404486\" display_stop=\"225404492\" variantLength=\"7\" positionVCF=\"225404486\" referenceAlleleVCF=\"TAGAAGA\" alternateAlleleVCF=\"CTTCTAG\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"1\" Accession=\"NC_000001.10\" start=\"225592188\" stop=\"225592194\" display_start=\"225592188\" display_stop=\"225592194\" variantLength=\"7\" positionVCF=\"225592188\" referenceAlleleVCF=\"TAGAAGA\" alternateAlleleVCF=\"CTTCTAG\" /><MeasureRelationship Type=\"within single gene\"><Name><ElementValue Type=\"Preferred\">lamin B receptor</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">LBR</ElementValue></Symbol><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"1\" Accession=\"NC_000001.11\" start=\"225401502\" stop=\"225428855\" display_start=\"225401502\" display_stop=\"225428855\" Strand=\"-\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"1\" Accession=\"NC_000001.10\" start=\"225589203\" stop=\"225616556\" display_start=\"225589203\" display_stop=\"225616556\" variantLength=\"27354\" Strand=\"-\" /><XRef ID=\"3930\" DB=\"Gene\" /><XRef Type=\"MIM\" ID=\"600024\" DB=\"OMIM\" /><XRef ID=\"HGNC:6518\" DB=\"HGNC\" /></MeasureRelationship><XRef Type=\"Allelic variant\" ID=\"600024.0003\" DB=\"OMIM\" /><XRef Type=\"rs\" ID=\"387906416\" DB=\"dbSNP\" /></Measure><Name><ElementValue Type=\"Preferred\">NM_002296.4(LBR):c.1599_1605delinsCTAGAAG (p.Leu534_Leu535delinsTer)</ElementValue></Name><XRef ID=\"CA120510\" DB=\"ClinGen\" /></MeasureSet><TraitSet Type=\"Disease\" ID=\"2613\"><Trait ID=\"3778\" Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">Pelger-Huët anomaly</ElementValue><XRef ID=\"Pelger-Huet+anomaly/5642\" DB=\"Genetic Alliance\" /><XRef ID=\"85559002\" DB=\"SNOMED CT\" /></Name><Name><ElementValue Type=\"Alternate\">Pelger-Huet Anomaly</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">PHA</ElementValue><XRef Type=\"MIM\" ID=\"169400\" DB=\"OMIM\" /><XRef ID=\"9148\" DB=\"Office of Rare Diseases\" /></Symbol><XRef ID=\"C0030779\" DB=\"MedGen\" /><XRef Type=\"MIM\" ID=\"169400\" DB=\"OMIM\" /></Trait></TraitSet></ReferenceClinVarAssertion><ClinVarAssertion ID=\"211452\"><ClinVarSubmissionID localKey=\"600024.0003_PELGER-HUET ANOMALY\" submitter=\"OMIM\" submitterDate=\"2014-02-27\" title=\"LBR, 7-BP SUB, NT1599_PELGER-HUET ANOMALY\" /><ClinVarAccession Acc=\"SCV000120125\" Version=\"1\" Type=\"SCV\" OrgID=\"3\" OrganizationCategory=\"resource\" OrgType=\"primary\" DateUpdated=\"2019-03-31\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2003-04-01\"><ReviewStatus>no assertion criteria provided</ReviewStatus><Description>Pathogenic</Description></ClinicalSignificance><Assertion Type=\"variation to included disease\" /><ExternalID DB=\"OMIM\" ID=\"600024.0003\" Type=\"Allelic variant\" /><ObservedIn><Sample><Origin>germline</Origin><Species>human</Species><AffectedStatus>not provided</AffectedStatus></Sample><Method><MethodType>literature only</MethodType></Method><ObservedData><Attribute Type=\"Description\">Waterham et al. (2003) described a fetus, the product of a consanguineous Turkish marriage, who presented with intrauterine growth retardation at 17 weeks' gestation and was found to have severe hydrops and short-limb skeletal dysplasia consistent with thanatophoric dysplasia. Intrauterine death occurred at 18 weeks, and delivery was induced. Fetal examination showed severe hydrops, extremely shortened edematous limbs, and postaxial polydactyly on both hands. Radiographic examination showed severe platyspondyly, short irregular ribs, a 'moth-eaten' aspect of scapular and pelvic bones, and very short tubular bones with angular diaphyses. Histopathology showed almost complete absence of ossification, severe disorganization of cartilage (with nodular calcification deposits), and defective or absent joint formation. On the basis of these findings, the diagnosis of Greenberg dysplasia (215140) was made. Elevated levels of cholesta-8,14-dien-3-beta-ol in cultured skin fibroblasts were consistent with deficiency of 3-beta-hydroxysterol delta(14)-reductase. Sequence analysis of the LBR gene identified a homozygous 7-bp substitution at nucleotide 1599 in exon 13, TCTTCTA-CTAGAAG, which resulted in a truncated protein. The mother showed classic Pelger-Huet anomaly (169400), which represents the heterozygous state of 3-beta-hydroxysterol delta(14)-reductase deficiency.</Attribute><Citation><ID Source=\"PubMed\">12618959</ID></Citation><XRef DB=\"OMIM\" ID=\"215140\" Type=\"MIM\" /><XRef DB=\"OMIM\" ID=\"169400\" Type=\"MIM\" /></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><Name><ElementValue Type=\"Preferred\">LBR, 7-BP SUB, NT1599</ElementValue></Name><AttributeSet><Attribute Type=\"NonHGVS\">7-BP SUB, NT1599</Attribute></AttributeSet><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">LBR</ElementValue></Symbol></MeasureRelationship><XRef DB=\"OMIM\" ID=\"600024.0003\" Type=\"Allelic variant\" /></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><Name><ElementValue Type=\"included\">PELGER-HUET ANOMALY</ElementValue></Name></Trait></TraitSet></ClinVarAssertion></ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000112977.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\" xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n<ClinVarSet ID=\"49276716\"><RecordStatus>current</RecordStatus><Title>NM_000059.3(BRCA2):c.-26G&gt;A AND Breast-ovarian cancer, familial 2</Title><ReferenceClinVarAssertion DateCreated=\"2014-03-30\" DateLastUpdated=\"2019-12-31\" ID=\"266125\"><ClinVarAccession Acc=\"RCV000112977\" Version=\"3\" Type=\"RCV\" DateUpdated=\"2019-12-31\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2015-01-12\"><ReviewStatus>reviewed by expert panel</ReviewStatus><Description>Benign</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>yes</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData ID=\"50066314\"><Attribute integerValue=\"200\" Type=\"VariantAlleles\" /></ObservedData><ObservedData ID=\"50066314\"><Attribute Type=\"Description\">not provided</Attribute></ObservedData><ObservedData ID=\"50066314\"><Attribute Type=\"Description\">not provided</Attribute></ObservedData><ObservedData ID=\"50066314\"><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><ObservedIn><Sample><Origin>germline</Origin><Ethnicity>Caucasian</Ethnicity><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>yes</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData ID=\"50066315\"><Attribute integerValue=\"5\" Type=\"VariantAlleles\" /></ObservedData></ObservedIn><ObservedIn><Sample><Origin>germline</Origin><Ethnicity>Caucasian Southern African</Ethnicity><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>yes</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData ID=\"50066316\"><Attribute integerValue=\"6\" Type=\"VariantAlleles\" /></ObservedData></ObservedIn><ObservedIn><Sample><Origin>germline</Origin><Ethnicity>European, Asian, Oceanan</Ethnicity><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>yes</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData ID=\"50066317\"><Attribute integerValue=\"1\" Type=\"VariantAlleles\" /></ObservedData></ObservedIn><ObservedIn><Sample><Origin>germline</Origin><Ethnicity>Sinhalese</Ethnicity><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>yes</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData ID=\"50066318\"><Attribute integerValue=\"1\" Type=\"VariantAlleles\" /></ObservedData></ObservedIn><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>not provided</AffectedStatus><NumberTested>86</NumberTested></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData ID=\"50066319\"><Attribute integerValue=\"86\" Type=\"VariantAlleles\" /></ObservedData></ObservedIn><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus></Sample><Method><MethodType>curation</MethodType></Method><ObservedData ID=\"50066320\"><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><ObservedIn><Sample><Origin>not provided</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>yes</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData ID=\"50066321\"><Attribute integerValue=\"1\" Type=\"VariantAlleles\" /></ObservedData></ObservedIn><ObservedIn><Sample><Origin>unknown</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData ID=\"50066322\"><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\" ID=\"125965\" Acc=\"VCV000125965\" Version=\"1\"><Measure Type=\"single nucleotide variant\" ID=\"131503\"><Name><ElementValue Type=\"Preferred\">NM_000059.3(BRCA2):c.-26G&gt;A</ElementValue></Name><Name><ElementValue Type=\"Alternate\">203G/A</ElementValue></Name><Name><ElementValue Type=\"Alternate\">203G&gt;A</ElementValue></Name><Name><ElementValue Type=\"Alternate\">203 G&gt;A</ElementValue></Name><AttributeSet><Attribute Accession=\"LRG_293t1\" Change=\"c.-26G&gt;A\" Type=\"HGVS, coding, LRG\">LRG_293t1:c.-26G&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_000059\" Version=\"3\" Change=\"c.-26G&gt;A\" Type=\"HGVS, coding, RefSeq\">NM_000059.3:c.-26G&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"LRG_293\" Change=\"g.5956G&gt;A\" Type=\"HGVS, genomic, LRG\">LRG_293:g.5956G&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NG_017006\" Version=\"2\" Change=\"g.3929C&gt;T\" Type=\"HGVS, genomic, RefSeqGene\">NG_017006.2:g.3929C&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NG_017006\" Version=\"1\" Change=\"g.520C&gt;T\" Type=\"HGVS, genomic, RefSeqGene\">NG_017006.1:g.520C&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NG_012772\" Version=\"3\" Change=\"g.5956G&gt;A\" Type=\"HGVS, genomic, RefSeqGene\">NG_012772.3:g.5956G&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000013\" Version=\"11\" Change=\"g.32316435G&gt;A\" Type=\"HGVS, genomic, top level\" integerValue=\"38\">NC_000013.11:g.32316435G&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000013\" Version=\"10\" Change=\"g.32890572G&gt;A\" Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">NC_000013.10:g.32890572G&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"U43746\" Version=\"1\" Change=\"n.203G&gt;A\" Type=\"HGVS, non-coding\">U43746.1:n.203G&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Type=\"Location\">NM_000059.3:exon 2</Attribute></AttributeSet><AttributeSet><Attribute Type=\"Location\">U43746.1:exon 2</Attribute></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">5 prime UTR variant</Attribute><XRef ID=\"SO:0001623\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_000059.3:c.-26G&gt;A\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"nucleotide change\">5'UTR203G&gt;A</Attribute></AttributeSet><AlleleFrequencyList><AlleleFrequency Value=\"0.20883\" Source=\"NHLBI Exome Sequencing Project (ESP) Exome Variant Server\" /><AlleleFrequency Value=\"0.20927\" Source=\"1000 Genomes Project\" /><AlleleFrequency Value=\"0.24652\" Source=\"Exome Aggregation Consortium (ExAC)\" /><AlleleFrequency Value=\"0.22032\" Source=\"The Genome Aggregation Database (gnomAD)\" /><AlleleFrequency Value=\"0.24553\" Source=\"The Genome Aggregation Database (gnomAD), exomes\" /><AlleleFrequency Value=\"0.21567\" Source=\"Trans-Omics for Precision Medicine (TOPMed)\" /></AlleleFrequencyList><GlobalMinorAlleleFrequency Value=\"0.20927\" Source=\"1000 Genomes Project\" MinorAllele=\"A\" /><CytogeneticLocation>13q13.1</CytogeneticLocation><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"13\" Accession=\"NC_000013.11\" start=\"32316435\" stop=\"32316435\" display_start=\"32316435\" display_stop=\"32316435\" variantLength=\"1\" positionVCF=\"32316435\" referenceAlleleVCF=\"G\" alternateAlleleVCF=\"A\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"13\" Accession=\"NC_000013.10\" start=\"32890572\" stop=\"32890572\" display_start=\"32890572\" display_stop=\"32890572\" variantLength=\"1\" positionVCF=\"32890572\" referenceAlleleVCF=\"G\" alternateAlleleVCF=\"A\" /><MeasureRelationship Type=\"within single gene\"><Name><ElementValue Type=\"Preferred\">BRCA2 DNA repair associated</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">BRCA2</ElementValue></Symbol><AttributeSet><Attribute dateValue=\"2012-07-06\" Type=\"Haploinsufficiency\">Sufficient evidence for dosage pathogenicity</Attribute><Citation><URL>https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=BRCA2</URL></Citation></AttributeSet><AttributeSet><Attribute dateValue=\"2012-07-06\" Type=\"Triplosensitivity\">No evidence available</Attribute><Citation><URL>https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=BRCA2</URL></Citation></AttributeSet><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"13\" Accession=\"NC_000013.11\" start=\"32315480\" stop=\"32399672\" display_start=\"32315480\" display_stop=\"32399672\" Strand=\"+\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"13\" Accession=\"NC_000013.10\" start=\"32889616\" stop=\"32973808\" display_start=\"32889616\" display_stop=\"32973808\" variantLength=\"84193\" Strand=\"+\" /><XRef ID=\"675\" DB=\"Gene\" /><XRef Type=\"MIM\" ID=\"600185\" DB=\"OMIM\" /><XRef ID=\"HGNC:1101\" DB=\"HGNC\" /><Comment DataSource=\"NCBI curation\" Type=\"public\">This gene is cited in the ACMG recommendations of 2013 (PubMed 23788249) for reporting incidental findings in exons.</Comment><Comment DataSource=\"NCBI curation\" Type=\"public\">This gene is cited in the ACMG recommendations of 2016 (PubMed 27854360) for reporting incidental findings in exons.</Comment></MeasureRelationship><XRef ID=\"203&amp;base_change=G to A\" DB=\"Breast Cancer Information Core (BIC) (BRCA2)\" /><XRef ID=\"12084\" DB=\"Illumina Clinical Services Laboratory,Illumina\" /><XRef Type=\"rs\" ID=\"1799943\" DB=\"dbSNP\" /></Measure><Name><ElementValue Type=\"Preferred\">NM_000059.3(BRCA2):c.-26G&gt;A</ElementValue></Name><XRef ID=\"CA016113\" DB=\"ClinGen\" /></MeasureSet><TraitSet Type=\"Disease\" ID=\"2560\"><Trait ID=\"636\" Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">Breast-ovarian cancer, familial 2</ElementValue><XRef ID=\"Breast-ovarian+cancer%2C+familial+2/7866\" DB=\"Genetic Alliance\" /></Name><Name><ElementValue Type=\"Alternate\">BREAST-OVARIAN CANCER, FAMILIAL, SUSCEPTIBILITY TO, 2</ElementValue><XRef Type=\"MIM\" ID=\"612555\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"600185.0001\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"600185.0002\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"600185.0003\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"600185.0004\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"600185.0005\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"600185.0006\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"600185.0007\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"600185.0008\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"600185.0009\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"600185.0010\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"600185.0011\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"600185.0012\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"600185.0014\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"600185.0016\" DB=\"OMIM\" /></Name><Name><ElementValue Type=\"Alternate\">BREAST CANCER, FAMILIAL, SUSCEPTIBILITY TO, 2</ElementValue><XRef ID=\"612555\" DB=\"OMIM\" /></Name><Name><ElementValue Type=\"Alternate\">Breast cancer, familial 2</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">BROVCA2</ElementValue><XRef Type=\"MIM\" ID=\"612555\" DB=\"OMIM\" /></Symbol><Symbol><ElementValue Type=\"Alternate\">HBOC</ElementValue><XRef ID=\"GTR000501744\" DB=\"Genetic Testing Registry (GTR)\" /></Symbol><Symbol><ElementValue Type=\"Alternate\">BRCA2</ElementValue><XRef ID=\"GTR000501818\" DB=\"Genetic Testing Registry (GTR)\" /></Symbol><AttributeSet><Attribute Type=\"public definition\">BRCA1- and BRCA2-associated hereditary breast and ovarian cancer syndrome (HBOC) is characterized by an increased risk for female and male breast cancer, ovarian cancer (includes fallopian tube and primary peritoneal cancers), and to a lesser extent other cancers such as prostate cancer, pancreatic cancer, and melanoma primarily in individuals with a BRCA2 pathogenic variant. The exact cancer risks differ slightly depending on whether HBOC is caused by a BRCA1 or BRCA2 pathogenic variant.</Attribute><XRef ID=\"NBK1247\" DB=\"GeneReviews\" /></AttributeSet><AttributeSet><Attribute Type=\"disease mechanism\" integerValue=\"273\">loss of function</Attribute><XRef ID=\"GTR000017874\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000021468\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000320777\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000325401\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000326160\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000330054\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000501744\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000501748\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000501818\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000505644\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000507653\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000507764\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000507864\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000509001\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000509002\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000509450\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000509692\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000509980\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000509984\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000509985\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000512644\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000512645\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000512816\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000519030\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000520069\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000520071\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000520072\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000520394\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000520865\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000520866\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000520867\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000520869\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000520870\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000520871\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000520872\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000522159\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000522160\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000527942\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000528915\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000528916\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000530120\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000530202\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000530707\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000531275\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000531340\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000551447\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000552245\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000552304\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000562228\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000569406\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000569423\" DB=\"Genetic Testing Registry (GTR)\" /></AttributeSet><AttributeSet><Attribute Type=\"keyword\">Neoplasm</Attribute></AttributeSet><Citation Type=\"review\" Abbrev=\"GeneReviews\"><ID Source=\"PubMed\">20301425</ID><ID Source=\"BookShelf\">NBK1247</ID></Citation><Citation Type=\"practice guideline\" Abbrev=\"ACS, 2007\"><ID Source=\"PubMed\">17392385</ID></Citation><Citation Type=\"Position Statement\" Abbrev=\"ASCO, 2010\"><ID Source=\"PubMed\">20065170</ID></Citation><Citation Type=\"practice guideline\" Abbrev=\"NSGC, 2004\"><ID Source=\"PubMed\">15604628</ID></Citation><Citation Type=\"practice guideline\" Abbrev=\"NSGC, 2007\"><ID Source=\"PubMed\">17508274</ID></Citation><Citation Type=\"practice guideline\" Abbrev=\"ACOG, 2009\"><ID Source=\"PubMed\">19305347</ID></Citation><Citation Type=\"Recommendation\" Abbrev=\"ACMG, 2013\"><ID Source=\"PubMed\">23788249</ID></Citation><Citation Type=\"practice guideline\" Abbrev=\"NCCN, 2013\"><URL>http://www.nccn.org/professionals/physician_gls/pdf/genetics_screening.pdf</URL><CitationText>National Comprehensive Cancer Network practice guidelines in oncology. Genetic/Familial High-Risk Assessment: Breast and Ovarian</CitationText></Citation><Citation Type=\"Suggested Reading\" Abbrev=\"Phillips et al., 2013\"><ID Source=\"PubMed\">23918944</ID></Citation><Citation Type=\"Suggested Reading\" Abbrev=\"Domchek et al., 2010\"><ID Source=\"pmc\">2948529</ID></Citation><Citation Type=\"practice guideline\" Abbrev=\"ASCO, 2014\"><ID Source=\"PubMed\">24493721</ID></Citation><Citation Type=\"practice guideline\" Abbrev=\"USPSTF, 2014\"><ID Source=\"PubMed\">24366376</ID></Citation><Citation Type=\"practice guideline\" Abbrev=\"CAPS, 2013\"><ID Source=\"pmc\">3585492</ID></Citation><Citation Type=\"practice guideline\" Abbrev=\"NSGC, 2013\"><ID Source=\"PubMed\">23188549</ID></Citation><Citation Type=\"practice guideline\" Abbrev=\"ACMG/NSGC, 2015\"><ID Source=\"PubMed\">25394175</ID></Citation><Citation Type=\"Recommendation\" Abbrev=\"USPSTF, 2014\"><ID Source=\"PubMed\">24366402</ID></Citation><Citation Type=\"Recommendation\" Abbrev=\"USPSTF, 2013\"><ID Source=\"PubMed\">24432435</ID></Citation><Citation Type=\"Recommendation\" Abbrev=\"ACMG, 2015\"><ID Source=\"PubMed\">25356965</ID></Citation><Citation Type=\"Recommendation\" Abbrev=\"ACMG, 2016\"><ID Source=\"PubMed\">27854360</ID><ID Source=\"DOI\">10.1038/gim.2016.190</ID></Citation><XRef ID=\"C2675520\" DB=\"MedGen\" /><XRef ID=\"145\" DB=\"Orphanet\" /><XRef Type=\"MIM\" ID=\"612555\" DB=\"OMIM\" /></Trait></TraitSet></ReferenceClinVarAssertion><ClinVarAssertion ID=\"262846\"><ClinVarSubmissionID localKey=\"U43746.1:n.203G&gt;A|MedGen:C2675520\" submitter=\"Breast Cancer Information Core (BIC) (BRCA2)\" submitterDate=\"2014-03-28\" /><ClinVarAccession Acc=\"SCV000145948\" Version=\"2\" Type=\"SCV\" OrgID=\"504197\" OrganizationCategory=\"locus-specific database (LSDB)\" OrgType=\"primary\" DateUpdated=\"2019-08-03\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2002-05-29\"><ReviewStatus>no assertion criteria provided</ReviewStatus><Description>Benign</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>yes</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData><Attribute Type=\"VariantAlleles\" integerValue=\"5\" /></ObservedData></ObservedIn><ObservedIn><Sample><Origin>germline</Origin><GeographicOrigin>Austria</GeographicOrigin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>yes</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData><Attribute Type=\"VariantAlleles\" integerValue=\"193\" /></ObservedData></ObservedIn><ObservedIn><Sample><Origin>germline</Origin><GeographicOrigin>Belgium</GeographicOrigin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>yes</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData><Attribute Type=\"VariantAlleles\" integerValue=\"1\" /></ObservedData></ObservedIn><ObservedIn><Sample><Origin>germline</Origin><GeographicOrigin>Spain</GeographicOrigin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>yes</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData><Attribute Type=\"VariantAlleles\" integerValue=\"1\" /></ObservedData></ObservedIn><ObservedIn><Sample><Origin>germline</Origin><Ethnicity>Caucasian</Ethnicity><GeographicOrigin>American</GeographicOrigin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>yes</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData><Attribute Type=\"VariantAlleles\" integerValue=\"1\" /></ObservedData></ObservedIn><ObservedIn><Sample><Origin>germline</Origin><Ethnicity>Caucasian</Ethnicity><GeographicOrigin>Germany</GeographicOrigin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>yes</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData><Attribute Type=\"VariantAlleles\" integerValue=\"4\" /></ObservedData></ObservedIn><ObservedIn><Sample><Origin>germline</Origin><Ethnicity>Caucasian Southern African</Ethnicity><GeographicOrigin>South Africa</GeographicOrigin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>yes</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData><Attribute Type=\"VariantAlleles\" integerValue=\"6\" /></ObservedData></ObservedIn><ObservedIn><Sample><Origin>germline</Origin><Ethnicity>European, Asian, Oceanan</Ethnicity><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>yes</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData><Attribute Type=\"VariantAlleles\" integerValue=\"1\" /></ObservedData></ObservedIn><ObservedIn><Sample><Origin>germline</Origin><Ethnicity>Sinhalese</Ethnicity><GeographicOrigin>Sri Lanka</GeographicOrigin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>yes</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData><Attribute Type=\"VariantAlleles\" integerValue=\"1\" /></ObservedData></ObservedIn><ObservedIn><Sample><Origin>unknown</Origin><GeographicOrigin>Brazil</GeographicOrigin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>yes</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData><Attribute Type=\"VariantAlleles\" integerValue=\"1\" /></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><Name><ElementValue Type=\"Alternate\">203G/A</ElementValue></Name><Name><ElementValue Type=\"Alternate\">203G&gt;A</ElementValue></Name><AttributeSet><Attribute Type=\"Location\">U43746.1:exon 2</Attribute></AttributeSet><AttributeSet><Attribute Type=\"HGVS\">U43746.1:n.203G&gt;A</Attribute></AttributeSet></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">Breast-ovarian cancer, familial 2</ElementValue></Name><XRef DB=\"MedGen\" ID=\"C2675520\" Type=\"CUI\" /></Trait></TraitSet></ClinVarAssertion><ClinVarAssertion ID=\"336350\"><ClinVarSubmissionID localKey=\"SCRP_var_2130\" submitter=\"Sharing Clinical Reports Project (SCRP)\" submitterDate=\"2014-03-06\" title=\"NM_000059.3:c.-26G&gt;A AND Breast-ovarian cancer, familial 2\" /><ClinVarAccession Acc=\"SCV000189291\" Version=\"1\" Type=\"SCV\" OrgID=\"500037\" OrganizationCategory=\"consortium\" OrgType=\"primary\" DateUpdated=\"2019-08-03\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2011-03-17\"><ReviewStatus>no assertion criteria provided</ReviewStatus><Description>Benign</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ObservedIn><Sample><Origin>germline</Origin><Species>human</Species><AffectedStatus>not provided</AffectedStatus><NumberTested>86</NumberTested></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData><Attribute Type=\"VariantAlleles\">86</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><AttributeSet><Attribute Type=\"nucleotide change\">5'UTR203G&gt;A</Attribute></AttributeSet><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">BRCA2</ElementValue></Symbol></MeasureRelationship></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">Breast-ovarian cancer, familial 2</ElementValue></Name></Trait></TraitSet></ClinVarAssertion><ClinVarAssertion ID=\"502757\"><ClinVarSubmissionID localKey=\"NM_000059.3:c.-26G&gt;A|OMIM:612555\" submittedAssembly=\"GRCh37\" submitter=\"Michigan Medical Genetics Laboratories,University of Michigan\" submitterDate=\"2016-04-21\" /><ClinVarAccession Acc=\"SCV000195942\" Version=\"1\" Type=\"SCV\" OrgID=\"308659\" OrganizationCategory=\"laboratory\" OrgType=\"primary\" DateUpdated=\"2019-08-03\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2014-11-03\"><ReviewStatus>criteria provided, single submitter</ReviewStatus><Description>Benign</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><AttributeSet><Attribute Type=\"AssertionMethod\">ACMG Guidelines, 2015</Attribute><Citation><ID Source=\"PubMed\">25741868</ID></Citation></AttributeSet><ObservedIn><Sample><Origin>germline</Origin><Tissue>Blood</Tissue><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>yes</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><AttributeSet><Attribute Type=\"HGVS\">NM_000059.3:c.-26G&gt;A</Attribute></AttributeSet><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">BRCA2</ElementValue></Symbol></MeasureRelationship><XRef DB=\"dbSNP\" ID=\"1799943\" Type=\"rsNumber\" /></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><XRef DB=\"OMIM\" ID=\"612555\" Type=\"MIM\" /></Trait></TraitSet></ClinVarAssertion><ClinVarAssertion ID=\"456411\" SubmissionName=\"ENIGMA_2015-08-17\"><ClinVarSubmissionID localKey=\"NM_000059.3:c.-26G&gt;A|OMIM:612555\" submitter=\"Evidence-based Network for the Interpretation of Germline Mutant Alleles (ENIGMA)\" submitterDate=\"2015-08-17\" /><ClinVarAccession Acc=\"SCV000244917\" Version=\"1\" Type=\"SCV\" OrgID=\"504863\" OrganizationCategory=\"consortium\" OrgType=\"primary\" DateUpdated=\"2019-08-03\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2015-01-12\"><ReviewStatus>reviewed by expert panel</ReviewStatus><Description>Benign</Description><Comment>Class 1 not pathogenic based on frequency &gt;1% in an outbred sampleset. Frequency 0.3689 (Asian), 0.04878 (African), 0.2282 (European), derived from 1000 genomes (2012-04-30).</Comment></ClinicalSignificance><Assertion Type=\"variation to disease\" /><AttributeSet><Attribute Type=\"AssertionMethod\">ENIGMA BRCA1/2 Classification Criteria (2015)</Attribute><Citation Type=\"general\"><URL>https://submit.ncbi.nlm.nih.gov/ft/byid/hxnfuuxx/enigma_rules_2015-03-26.pdf</URL><CitationText>ENIGMA BRCA1/2 Classification Criteria (2015)</CitationText></Citation></AttributeSet><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus></Sample><Method><MethodType>curation</MethodType></Method><ObservedData><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><Name><ElementValue Type=\"Alternate\">203 G&gt;A</ElementValue></Name><AttributeSet><Attribute Type=\"HGVS\">NM_000059.3:c.-26G&gt;A</Attribute></AttributeSet><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">BRCA2</ElementValue></Symbol></MeasureRelationship></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">Breast-ovarian cancer, familial 2</ElementValue></Name><XRef DB=\"OMIM\" ID=\"612555\" Type=\"MIM\" /></Trait></TraitSet><StudyDescription>ENIGMA (Evidence-based Network for the Interpretation of Germline Mutant Alleles) is a consortium focused on determining the clinical significance of variants in BRCA1, BRCA2 and other known/suspected breast cancer genes. http://enigmaconsortium.org/</StudyDescription></ClinVarAssertion><ClinVarAssertion ID=\"1120397\" SubmissionName=\"SUB2591501\"><ClinVarSubmissionID localKey=\"NM_000059.3:c.-26G&gt;A|OMIM:612555\" submittedAssembly=\"GRCh37\" submitter=\"Fulgent Genetics,Fulgent Genetics\" submitterDate=\"2017-04-18\" /><ClinVarAccession Acc=\"SCV000575763\" Version=\"1\" Type=\"SCV\" OrgID=\"500105\" OrganizationCategory=\"laboratory\" OrgType=\"primary\" DateUpdated=\"2019-08-03\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2016-02-08\"><ReviewStatus>criteria provided, single submitter</ReviewStatus><Description>Benign</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><AttributeSet><Attribute Type=\"AssertionMethod\">ACMG Guidelines, 2015</Attribute><Citation><ID Source=\"PubMed\">25741868</ID></Citation></AttributeSet><ObservedIn><Sample><Origin>unknown</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><AttributeSet><Attribute Type=\"HGVS\">NM_000059.3:c.-26G&gt;A</Attribute></AttributeSet><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">BRCA2</ElementValue></Symbol></MeasureRelationship></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><XRef DB=\"OMIM\" ID=\"612555\" Type=\"MIM\" /></Trait></TraitSet></ClinVarAssertion><ClinVarAssertion ID=\"1449686\" SubmissionName=\"SUB3915625\"><ClinVarSubmissionID localKey=\"NM_000059.3:c.-26G&gt;A|OMIM:612555\" submittedAssembly=\"GRCh37\" submitter=\"Genome Diagnostics Laboratory,University Medical Center Utrecht\" submitterDate=\"2018-04-17\" /><ClinVarAccession Acc=\"SCV000743232\" Version=\"1\" Type=\"SCV\" OrgID=\"274978\" OrganizationCategory=\"laboratory\" OrgType=\"primary\" DateUpdated=\"2019-08-03\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2014-10-09\"><ReviewStatus>criteria provided, single submitter</ReviewStatus><Description>Benign</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><AttributeSet><Attribute Type=\"AssertionMethod\">ACGS Guidelines, 2013</Attribute><Citation><URL>https://submit.ncbi.nlm.nih.gov/ft/byid/yggjhwfz/evaluation_and_reporting_of_sequence_variants_bpgs_june_2013_-_finalpdf.pdf</URL></Citation></AttributeSet><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>yes</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><AttributeSet><Attribute Type=\"HGVS\">NM_000059.3:c.-26G&gt;A</Attribute></AttributeSet><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">BRCA2</ElementValue></Symbol></MeasureRelationship></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><XRef DB=\"OMIM\" ID=\"612555\" Type=\"MIM\" /></Trait></TraitSet><StudyName>VKGL Data-share Consensus</StudyName></ClinVarAssertion><ClinVarAssertion ID=\"1450829\" SubmissionName=\"SUB3885264\"><ClinVarSubmissionID localKey=\"NM_000059.3:c.-26G&gt;A|OMIM:612555\" submittedAssembly=\"GRCh37\" submitter=\"DNA and Cytogenetics Diagnostics Unit,Erasmus Medical Center\" submitterDate=\"2018-04-09\" /><ClinVarAccession Acc=\"SCV000744375\" Version=\"1\" Type=\"SCV\" OrgID=\"506497\" OrganizationCategory=\"laboratory\" OrgType=\"primary\" DateUpdated=\"2019-08-03\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2015-09-21\"><ReviewStatus>criteria provided, single submitter</ReviewStatus><Description>Benign</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><AttributeSet><Attribute Type=\"AssertionMethod\">ACGS Guidelines, 2013</Attribute><Citation><URL>https://submit.ncbi.nlm.nih.gov/ft/byid/3yyso7ro/evaluation_and_reporting_of_sequence_variants_bpgs_june_2013_-_finalpdf.pdf</URL></Citation></AttributeSet><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>yes</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><AttributeSet><Attribute Type=\"HGVS\">NM_000059.3:c.-26G&gt;A</Attribute></AttributeSet><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">BRCA2</ElementValue></Symbol></MeasureRelationship></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><XRef DB=\"OMIM\" ID=\"612555\" Type=\"MIM\" /></Trait></TraitSet><StudyName>VKGL Data-share Consensus</StudyName></ClinVarAssertion></ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000113363.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\" xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n<ClinVarSet ID=\"48300487\"><RecordStatus>current</RecordStatus><Title>NM_000059.3(BRCA2):c.4965C&gt;R (p.Tyr1655Ter) AND Breast-ovarian cancer, familial 2</Title><ReferenceClinVarAssertion DateCreated=\"2014-03-30\" DateLastUpdated=\"2019-12-15\" ID=\"266511\"><ClinVarAccession Acc=\"RCV000113363\" Version=\"1\" Type=\"RCV\" DateUpdated=\"2019-12-16\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2013-05-21\"><ReviewStatus>no assertion criteria provided</ReviewStatus><Description>Pathogenic</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ObservedIn><Sample><Origin>germline</Origin><Ethnicity>Caucasian Non Hispanic</Ethnicity><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>yes</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData ID=\"49325199\"><Attribute integerValue=\"1\" Type=\"VariantAlleles\" /></ObservedData></ObservedIn><MeasureSet Type=\"Variant\" ID=\"126056\" Acc=\"VCV000126056\" Version=\"1\"><Measure Type=\"single nucleotide variant\" ID=\"131594\"><Name><ElementValue Type=\"Preferred\">NM_000059.3(BRCA2):c.4965C&gt;R (p.Tyr1655Ter)</ElementValue></Name><AttributeSet><Attribute Accession=\"LRG_293t1\" Change=\"c.4965C&gt;R\" Type=\"HGVS, coding, LRG\">LRG_293t1:c.4965C&gt;R</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_000059\" Version=\"3\" Change=\"c.4965C&gt;R\" Type=\"HGVS, coding, RefSeq\">NM_000059.3:c.4965C&gt;R</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"LRG_293\" Change=\"g.28841C&gt;R\" Type=\"HGVS, genomic, LRG\">LRG_293:g.28841C&gt;R</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NG_012772\" Version=\"3\" Change=\"g.28841C&gt;R\" Type=\"HGVS, genomic, RefSeqGene\">NG_012772.3:g.28841C&gt;R</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000013\" Version=\"11\" Change=\"g.32339320C&gt;R\" Type=\"HGVS, genomic, top level\" integerValue=\"38\">NC_000013.11:g.32339320C&gt;R</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000013\" Version=\"10\" Change=\"g.32913457C&gt;R\" Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">NC_000013.10:g.32913457C&gt;R</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"U43746\" Version=\"1\" Change=\"n.5193CtoG/A\" Type=\"HGVS, non-validated\">U43746.1:n.5193CtoG/A</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"LRG_293p1\" Change=\"p.Tyr1655Ter\" Type=\"HGVS, protein\">LRG_293p1:p.Tyr1655Ter</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_000050\" Version=\"2\" Change=\"p.Tyr1655Ter\" Type=\"HGVS, protein, RefSeq\">NP_000050.2:p.Tyr1655Ter</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_000050\" Version=\"2\" Change=\"p.Tyr1655Ter\" Type=\"HGVS, protein, RefSeq\">NP_000050.2:p.Tyr1655Ter</Attribute></AttributeSet><AttributeSet><Attribute Type=\"Location\">U43746.1:exon 11</Attribute></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">nonsense</Attribute><XRef ID=\"SO:0001587\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_000059.3:c.4965C&gt;R\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"ProteinChange1LetterCode\">Y1655X</Attribute></AttributeSet><CytogeneticLocation>13q13.1</CytogeneticLocation><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"13\" Accession=\"NC_000013.11\" start=\"32339320\" stop=\"32339320\" display_start=\"32339320\" display_stop=\"32339320\" variantLength=\"1\" positionVCF=\"32339320\" referenceAlleleVCF=\"C\" alternateAlleleVCF=\"R\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"13\" Accession=\"NC_000013.10\" start=\"32913457\" stop=\"32913457\" display_start=\"32913457\" display_stop=\"32913457\" variantLength=\"1\" positionVCF=\"32913457\" referenceAlleleVCF=\"C\" alternateAlleleVCF=\"R\" /><MeasureRelationship Type=\"within single gene\"><Name><ElementValue Type=\"Preferred\">BRCA2 DNA repair associated</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">BRCA2</ElementValue></Symbol><AttributeSet><Attribute dateValue=\"2012-07-06\" Type=\"Haploinsufficiency\">Sufficient evidence for dosage pathogenicity</Attribute><Citation><URL>https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=BRCA2</URL></Citation></AttributeSet><AttributeSet><Attribute dateValue=\"2012-07-06\" Type=\"Triplosensitivity\">No evidence available</Attribute><Citation><URL>https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=BRCA2</URL></Citation></AttributeSet><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"13\" Accession=\"NC_000013.11\" start=\"32315480\" stop=\"32399672\" display_start=\"32315480\" display_stop=\"32399672\" Strand=\"+\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"13\" Accession=\"NC_000013.10\" start=\"32889616\" stop=\"32973808\" display_start=\"32889616\" display_stop=\"32973808\" variantLength=\"84193\" Strand=\"+\" /><XRef ID=\"675\" DB=\"Gene\" /><XRef Type=\"MIM\" ID=\"600185\" DB=\"OMIM\" /><XRef ID=\"HGNC:1101\" DB=\"HGNC\" /><Comment DataSource=\"NCBI curation\" Type=\"public\">This gene is cited in the ACMG recommendations of 2013 (PubMed 23788249) for reporting incidental findings in exons.</Comment><Comment DataSource=\"NCBI curation\" Type=\"public\">This gene is cited in the ACMG recommendations of 2016 (PubMed 27854360) for reporting incidental findings in exons.</Comment></MeasureRelationship><XRef Type=\"rs\" ID=\"80358721\" DB=\"dbSNP\" /></Measure><Name><ElementValue Type=\"Preferred\">NM_000059.3(BRCA2):c.4965C&gt;R (p.Tyr1655Ter)</ElementValue></Name></MeasureSet><TraitSet Type=\"Disease\" ID=\"2560\"><Trait ID=\"636\" Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">Breast-ovarian cancer, familial 2</ElementValue><XRef ID=\"Breast-ovarian+cancer%2C+familial+2/7866\" DB=\"Genetic Alliance\" /></Name><Name><ElementValue Type=\"Alternate\">BREAST-OVARIAN CANCER, FAMILIAL, SUSCEPTIBILITY TO, 2</ElementValue><XRef Type=\"MIM\" ID=\"612555\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"600185.0001\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"600185.0002\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"600185.0003\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"600185.0004\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"600185.0005\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"600185.0006\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"600185.0007\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"600185.0008\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"600185.0009\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"600185.0010\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"600185.0011\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"600185.0012\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"600185.0014\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"600185.0016\" DB=\"OMIM\" /></Name><Name><ElementValue Type=\"Alternate\">BREAST CANCER, FAMILIAL, SUSCEPTIBILITY TO, 2</ElementValue><XRef ID=\"612555\" DB=\"OMIM\" /></Name><Name><ElementValue Type=\"Alternate\">Breast cancer, familial 2</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">BROVCA2</ElementValue><XRef Type=\"MIM\" ID=\"612555\" DB=\"OMIM\" /></Symbol><Symbol><ElementValue Type=\"Alternate\">HBOC</ElementValue><XRef ID=\"GTR000501744\" DB=\"Genetic Testing Registry (GTR)\" /></Symbol><Symbol><ElementValue Type=\"Alternate\">BRCA2</ElementValue><XRef ID=\"GTR000501818\" DB=\"Genetic Testing Registry (GTR)\" /></Symbol><AttributeSet><Attribute Type=\"public definition\">BRCA1- and BRCA2-associated hereditary breast and ovarian cancer syndrome (HBOC) is characterized by an increased risk for female and male breast cancer, ovarian cancer (includes fallopian tube and primary peritoneal cancers), and to a lesser extent other cancers such as prostate cancer, pancreatic cancer, and melanoma primarily in individuals with a BRCA2 pathogenic variant. The exact cancer risks differ slightly depending on whether HBOC is caused by a BRCA1 or BRCA2 pathogenic variant.</Attribute><XRef ID=\"NBK1247\" DB=\"GeneReviews\" /></AttributeSet><AttributeSet><Attribute Type=\"disease mechanism\" integerValue=\"273\">loss of function</Attribute><XRef ID=\"GTR000017874\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000021468\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000320777\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000325401\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000326160\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000330054\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000501744\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000501748\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000501818\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000505644\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000507653\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000507764\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000507864\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000507913\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000507930\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000509001\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000509002\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000509348\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000509349\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000509450\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000509451\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000509692\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000509980\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000509984\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000509985\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000512644\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000512645\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000512816\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000519030\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000520069\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000520071\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000520072\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000520394\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000520410\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000520865\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000520866\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000520867\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000520869\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000520870\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000520871\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000520872\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000522159\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000522160\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000522162\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000522243\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000527942\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000528915\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000528916\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000530120\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000530202\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000530707\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000531275\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000531340\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000551447\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000552245\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000552304\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000562228\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000569406\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000569423\" DB=\"Genetic Testing Registry (GTR)\" /></AttributeSet><AttributeSet><Attribute Type=\"keyword\">Neoplasm</Attribute></AttributeSet><Citation Type=\"review\" Abbrev=\"GeneReviews\"><ID Source=\"PubMed\">20301425</ID><ID Source=\"BookShelf\">NBK1247</ID></Citation><Citation Type=\"practice guideline\" Abbrev=\"ACS, 2007\"><ID Source=\"PubMed\">17392385</ID></Citation><Citation Type=\"Position Statement\" Abbrev=\"ASCO, 2010\"><ID Source=\"PubMed\">20065170</ID></Citation><Citation Type=\"practice guideline\" Abbrev=\"NSGC, 2004\"><ID Source=\"PubMed\">15604628</ID></Citation><Citation Type=\"practice guideline\" Abbrev=\"NSGC, 2007\"><ID Source=\"PubMed\">17508274</ID></Citation><Citation Type=\"practice guideline\" Abbrev=\"ACOG, 2009\"><ID Source=\"PubMed\">19305347</ID></Citation><Citation Type=\"Recommendation\" Abbrev=\"ACMG, 2013\"><ID Source=\"PubMed\">23788249</ID></Citation><Citation Type=\"practice guideline\" Abbrev=\"NCCN, 2013\"><URL>http://www.nccn.org/professionals/physician_gls/pdf/genetics_screening.pdf</URL><CitationText>National Comprehensive Cancer Network practice guidelines in oncology. Genetic/Familial High-Risk Assessment: Breast and Ovarian</CitationText></Citation><Citation Type=\"Suggested Reading\" Abbrev=\"Phillips et al., 2013\"><ID Source=\"PubMed\">23918944</ID></Citation><Citation Type=\"Suggested Reading\" Abbrev=\"Domchek et al., 2010\"><ID Source=\"pmc\">2948529</ID></Citation><Citation Type=\"practice guideline\" Abbrev=\"ASCO, 2014\"><ID Source=\"PubMed\">24493721</ID></Citation><Citation Type=\"practice guideline\" Abbrev=\"USPSTF, 2014\"><ID Source=\"PubMed\">24366376</ID></Citation><Citation Type=\"practice guideline\" Abbrev=\"CAPS, 2013\"><ID Source=\"pmc\">3585492</ID></Citation><Citation Type=\"practice guideline\" Abbrev=\"NSGC, 2013\"><ID Source=\"PubMed\">23188549</ID></Citation><Citation Type=\"practice guideline\" Abbrev=\"ACMG/NSGC, 2015\"><ID Source=\"PubMed\">25394175</ID></Citation><Citation Type=\"Recommendation\" Abbrev=\"USPSTF, 2014\"><ID Source=\"PubMed\">24366402</ID></Citation><Citation Type=\"Recommendation\" Abbrev=\"USPSTF, 2013\"><ID Source=\"PubMed\">24432435</ID></Citation><Citation Type=\"Recommendation\" Abbrev=\"ACMG, 2015\"><ID Source=\"PubMed\">25356965</ID></Citation><Citation Type=\"Recommendation\" Abbrev=\"ACMG, 2016\"><ID Source=\"PubMed\">27854360</ID><ID Source=\"DOI\">10.1038/gim.2016.190</ID></Citation><XRef ID=\"C2675520\" DB=\"MedGen\" /><XRef ID=\"145\" DB=\"Orphanet\" /><XRef Type=\"MIM\" ID=\"612555\" DB=\"OMIM\" /></Trait></TraitSet></ReferenceClinVarAssertion><ClinVarAssertion ID=\"263413\"><ClinVarSubmissionID localKey=\"U43746.1:n.5193CtoG/A|MedGen:C2675520\" submitter=\"Breast Cancer Information Core (BIC) (BRCA2)\" submitterDate=\"2014-03-28\" /><ClinVarAccession Acc=\"SCV000146515\" Version=\"1\" Type=\"SCV\" OrgID=\"504197\" OrganizationCategory=\"locus-specific database (LSDB)\" OrgType=\"primary\" DateUpdated=\"2019-08-03\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2013-05-21\"><ReviewStatus>no assertion criteria provided</ReviewStatus><Description>Pathogenic</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ObservedIn><Sample><Origin>germline</Origin><Ethnicity>Caucasian Non Hispanic</Ethnicity><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>yes</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData><Attribute Type=\"VariantAlleles\" integerValue=\"1\" /></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><Name><ElementValue Type=\"Alternate\">Y1655X</ElementValue></Name><AttributeSet><Attribute Type=\"Location\">U43746.1:exon 11</Attribute></AttributeSet><AttributeSet><Attribute Type=\"HGVS\">U43746.1:n.5193CtoG/A</Attribute></AttributeSet><XRef DB=\"dbSNP\" ID=\"80358721\" Type=\"rsNumber\" /></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">Breast-ovarian cancer, familial 2</ElementValue></Name><XRef DB=\"MedGen\" ID=\"C2675520\" Type=\"CUI\" /></Trait></TraitSet></ClinVarAssertion></ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000120902.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\" xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n<ClinVarSet ID=\"47135312\"><RecordStatus>current</RecordStatus><Title>NM_017709.4(TENT5C):c.201C&gt;G (p.His67Gln) AND not specified</Title><ReferenceClinVarAssertion DateCreated=\"2014-06-09\" DateLastUpdated=\"2019-11-02\" ID=\"284727\"><ClinVarAccession Acc=\"RCV000120902\" Version=\"1\" Type=\"RCV\" DateUpdated=\"2019-11-02\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2013-09-19\"><ReviewStatus>no assertion provided</ReviewStatus><Description>not provided</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ObservedIn><Sample><Origin>germline</Origin><Ethnicity>European</Ethnicity><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus><NumberTested>331</NumberTested></Sample><Method><NamePlatform>Complete Genomics</NamePlatform><Purpose>Discovery</Purpose><MethodType>reference population</MethodType></Method><ObservedData ID=\"47997647\"><Attribute Type=\"AlleleFrequency\">0.0544</Attribute></ObservedData></ObservedIn><ObservedIn><Sample><Origin>germline</Origin><Ethnicity>African</Ethnicity><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus><NumberTested>43</NumberTested></Sample><Method><NamePlatform>Complete Genomics</NamePlatform><Purpose>Discovery</Purpose><MethodType>reference population</MethodType></Method><ObservedData ID=\"48000450\"><Attribute Type=\"AlleleFrequency\">0.27910000000000001</Attribute></ObservedData></ObservedIn><ObservedIn><Sample><Origin>germline</Origin><Ethnicity>East_Asian</Ethnicity><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus><NumberTested>62</NumberTested></Sample><Method><NamePlatform>Complete Genomics</NamePlatform><Purpose>Discovery</Purpose><MethodType>reference population</MethodType></Method><ObservedData ID=\"48000451\"><Attribute Type=\"AlleleFrequency\">0.1452</Attribute></ObservedData></ObservedIn><ObservedIn><Sample><Origin>germline</Origin><Ethnicity>Hispanic</Ethnicity><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus><NumberTested>118</NumberTested></Sample><Method><NamePlatform>Complete Genomics</NamePlatform><Purpose>Discovery</Purpose><MethodType>reference population</MethodType></Method><ObservedData ID=\"48002222\"><Attribute Type=\"AlleleFrequency\">0.0932</Attribute></ObservedData></ObservedIn><ObservedIn><Sample><Origin>germline</Origin><Ethnicity>Whole_cohort</Ethnicity><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus><NumberTested>681</NumberTested></Sample><Method><NamePlatform>Complete Genomics</NamePlatform><Purpose>Discovery</Purpose><MethodType>reference population</MethodType></Method><ObservedData ID=\"48002223\"><Attribute Type=\"AlleleFrequency\">0.0947</Attribute></ObservedData></ObservedIn><ObservedIn><Sample><Origin>germline</Origin><Ethnicity>African_European</Ethnicity><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus><NumberTested>46</NumberTested></Sample><Method><NamePlatform>Complete Genomics</NamePlatform><Purpose>Discovery</Purpose><MethodType>reference population</MethodType></Method><ObservedData ID=\"48010273\"><Attribute Type=\"AlleleFrequency\">0.1739</Attribute></ObservedData></ObservedIn><ObservedIn><Sample><Origin>germline</Origin><Ethnicity>Central_Asian</Ethnicity><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus><NumberTested>50</NumberTested></Sample><Method><NamePlatform>Complete Genomics</NamePlatform><Purpose>Discovery</Purpose><MethodType>reference population</MethodType></Method><ObservedData ID=\"48010274\"><Attribute Type=\"AlleleFrequency\">0.07</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\" ID=\"134229\" Acc=\"VCV000134229\" Version=\"1\"><Measure Type=\"single nucleotide variant\" ID=\"137968\"><Name><ElementValue Type=\"Preferred\">NM_017709.4(TENT5C):c.201C&gt;G (p.His67Gln)</ElementValue></Name><AttributeSet><Attribute Accession=\"NM_017709\" Version=\"4\" Change=\"c.201C&gt;G\" Type=\"HGVS, coding, RefSeq\">NM_017709.4:c.201C&gt;G</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000001\" Version=\"11\" Change=\"g.117623069C&gt;G\" Type=\"HGVS, genomic, top level\" integerValue=\"38\">NC_000001.11:g.117623069C&gt;G</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000001\" Version=\"10\" Change=\"g.118165691C&gt;G\" Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">NC_000001.10:g.118165691C&gt;G</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"Q5VWP2\" Change=\"p.His67Gln\" Type=\"HGVS, protein\">Q5VWP2:p.His67Gln</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_060179\" Version=\"2\" Change=\"p.His67Gln\" Type=\"HGVS, protein, RefSeq\">NP_060179.2:p.His67Gln</Attribute></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">missense variant</Attribute><XRef ID=\"SO:0001583\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_017709.4:c.201C&gt;G\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"ProteinChange1LetterCode\">H67Q</Attribute></AttributeSet><AlleleFrequencyList><AlleleFrequency Value=\"0.12680\" Source=\"1000 Genomes Project\" /></AlleleFrequencyList><GlobalMinorAlleleFrequency Value=\"0.20707\" Source=\"1000 Genomes Project\" MinorAllele=\"T\" /><CytogeneticLocation>1p12</CytogeneticLocation><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"1\" Accession=\"NC_000001.11\" start=\"117623069\" stop=\"117623069\" display_start=\"117623069\" display_stop=\"117623069\" variantLength=\"1\" positionVCF=\"117623069\" referenceAlleleVCF=\"C\" alternateAlleleVCF=\"G\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"1\" Accession=\"NC_000001.10\" start=\"118165691\" stop=\"118165691\" display_start=\"118165691\" display_stop=\"118165691\" variantLength=\"1\" positionVCF=\"118165691\" referenceAlleleVCF=\"C\" alternateAlleleVCF=\"G\" /><MeasureRelationship Type=\"within single gene\"><Name><ElementValue Type=\"Preferred\">terminal nucleotidyltransferase 5C</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">TENT5C</ElementValue></Symbol><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"1\" Accession=\"NC_000001.11\" start=\"117606048\" stop=\"117628389\" display_start=\"117606048\" display_stop=\"117628389\" Strand=\"+\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"1\" Accession=\"NC_000001.10\" start=\"118148603\" stop=\"118171010\" display_start=\"118148603\" display_stop=\"118171010\" variantLength=\"22408\" Strand=\"+\" /><XRef ID=\"54855\" DB=\"Gene\" /><XRef Type=\"MIM\" ID=\"613952\" DB=\"OMIM\" /><XRef ID=\"HGNC:24712\" DB=\"HGNC\" /></MeasureRelationship><XRef ID=\"Q5VWP2#VAR_060132\" DB=\"UniProtKB\" /><XRef Type=\"rs\" ID=\"1630312\" DB=\"dbSNP\" /></Measure><Name><ElementValue Type=\"Preferred\">NM_017709.4(TENT5C):c.201C&gt;G (p.His67Gln)</ElementValue></Name><XRef ID=\"CA159209\" DB=\"ClinGen\" /></MeasureSet><TraitSet Type=\"Disease\" ID=\"9590\"><Trait ID=\"16789\" Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">not specified</ElementValue></Name><Name><ElementValue Type=\"Alternate\">AllHighlyPenetrant</ElementValue></Name><AttributeSet><Attribute Type=\"public definition\">The term 'not specified' was created for use in ClinVar so that submitters can convey the concept that a variant is benign, likely benign, or of uncertain significance for an unspecified set of disorders.  This usage was introduced in 2014 to replace AllHighlyPenetrant.</Attribute></AttributeSet><XRef ID=\"CN169374\" DB=\"MedGen\" /></Trait></TraitSet></ReferenceClinVarAssertion><ClinVarAssertion ID=\"282039\" SubmissionName=\"Cancer_genes_01\"><ClinVarSubmissionID localKey=\"MedGen:CN169374\" submitter=\"ITMI\" submitterDate=\"2014-05-29\" /><ClinVarAccession Acc=\"SCV000085070\" Version=\"1\" Type=\"SCV\" OrgID=\"504914\" OrganizationCategory=\"laboratory\" OrgType=\"primary\" DateUpdated=\"2019-03-31\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2013-09-19\"><ReviewStatus>no assertion provided</ReviewStatus><Description>not provided</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ObservedIn><Sample><Origin>germline</Origin><Ethnicity>Whole_cohort</Ethnicity><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus><NumberTested>681</NumberTested></Sample><Method><NamePlatform>Complete Genomics</NamePlatform><TypePlatform>next-gen sequencing</TypePlatform><Purpose>discovery</Purpose><MethodType>reference population</MethodType></Method><ObservedData><Attribute Type=\"AlleleFrequency\">0.0947</Attribute></ObservedData></ObservedIn><ObservedIn><Sample><Origin>germline</Origin><Ethnicity>African</Ethnicity><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus><NumberTested>43</NumberTested></Sample><Method><NamePlatform>Complete Genomics</NamePlatform><TypePlatform>next-gen sequencing</TypePlatform><Purpose>discovery</Purpose><MethodType>reference population</MethodType></Method><ObservedData><Attribute Type=\"AlleleFrequency\">0.27910000000000001</Attribute></ObservedData></ObservedIn><ObservedIn><Sample><Origin>germline</Origin><Ethnicity>African_European</Ethnicity><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus><NumberTested>46</NumberTested></Sample><Method><NamePlatform>Complete Genomics</NamePlatform><TypePlatform>next-gen sequencing</TypePlatform><Purpose>discovery</Purpose><MethodType>reference population</MethodType></Method><ObservedData><Attribute Type=\"AlleleFrequency\">0.1739</Attribute></ObservedData></ObservedIn><ObservedIn><Sample><Origin>germline</Origin><Ethnicity>Central_Asian</Ethnicity><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus><NumberTested>50</NumberTested></Sample><Method><NamePlatform>Complete Genomics</NamePlatform><TypePlatform>next-gen sequencing</TypePlatform><Purpose>discovery</Purpose><MethodType>reference population</MethodType></Method><ObservedData><Attribute Type=\"AlleleFrequency\">0.07</Attribute></ObservedData></ObservedIn><ObservedIn><Sample><Origin>germline</Origin><Ethnicity>East_Asian</Ethnicity><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus><NumberTested>62</NumberTested></Sample><Method><NamePlatform>Complete Genomics</NamePlatform><TypePlatform>next-gen sequencing</TypePlatform><Purpose>discovery</Purpose><MethodType>reference population</MethodType></Method><ObservedData><Attribute Type=\"AlleleFrequency\">0.1452</Attribute></ObservedData></ObservedIn><ObservedIn><Sample><Origin>germline</Origin><Ethnicity>European</Ethnicity><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus><NumberTested>331</NumberTested></Sample><Method><NamePlatform>Complete Genomics</NamePlatform><TypePlatform>next-gen sequencing</TypePlatform><Purpose>discovery</Purpose><MethodType>reference population</MethodType></Method><ObservedData><Attribute Type=\"AlleleFrequency\">0.0544</Attribute></ObservedData></ObservedIn><ObservedIn><Sample><Origin>germline</Origin><Ethnicity>Hispanic</Ethnicity><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus><NumberTested>118</NumberTested></Sample><Method><NamePlatform>Complete Genomics</NamePlatform><TypePlatform>next-gen sequencing</TypePlatform><Purpose>discovery</Purpose><MethodType>reference population</MethodType></Method><ObservedData><Attribute Type=\"AlleleFrequency\">0.0932</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><SequenceLocation Assembly=\"GRCh37\" Chr=\"1\" alternateAllele=\"G\" referenceAllele=\"C\" start=\"118165691\" stop=\"118165691\" variantLength=\"1\" /></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">AllHighlyPenetrant</ElementValue></Name><XRef DB=\"MedGen\" ID=\"CN169374\" Type=\"CUI\" /></Trait></TraitSet><Citation Type=\"general\"><ID Source=\"PubMed\">24728327</ID></Citation><Comment Type=\"public\">Please see associated publication for description of ethnicities</Comment></ClinVarAssertion></ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000124712.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\" xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n<ClinVarSet ID=\"48742437\"><RecordStatus>current</RecordStatus><Title>NM_004006.2(DMD):c.8810A&gt;G (p.Gln2937Arg) AND not specified</Title><ReferenceClinVarAssertion DateCreated=\"2014-06-19\" DateLastUpdated=\"2019-12-17\" ID=\"292154\"><ClinVarAccession Acc=\"RCV000124712\" Version=\"3\" Type=\"RCV\" DateUpdated=\"2019-12-17\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2018-07-19\"><ReviewStatus>criteria provided, multiple submitters, no conflicts</ReviewStatus><Description>Benign</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData ID=\"49340762\"><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>yes</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData ID=\"49343045\"><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\" ID=\"137106\" Acc=\"VCV000137106\" Version=\"3\"><Measure Type=\"single nucleotide variant\" ID=\"140809\"><Name><ElementValue Type=\"Preferred\">NM_004006.2(DMD):c.8810A&gt;G (p.Gln2937Arg)</ElementValue></Name><Name><ElementValue Type=\"Alternate\">p.Q2937R:CAG&gt;CGG</ElementValue></Name><AttributeSet><Attribute Accession=\"LRG_199t1\" Change=\"c.8810A&gt;G\" Type=\"HGVS, coding, LRG\">LRG_199t1:c.8810A&gt;G</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_004021\" Version=\"3\" Change=\"c.1430=\" Type=\"HGVS, coding, RefSeq\">NM_004021.3:c.1430=</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_004023\" Version=\"3\" Change=\"c.1430=\" Type=\"HGVS, coding, RefSeq\">NM_004023.3:c.1430=</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_004013\" Version=\"2\" Change=\"c.1430A&gt;G\" Type=\"HGVS, coding, RefSeq\">NM_004013.2:c.1430A&gt;G</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_004020\" Version=\"3\" Change=\"c.1430A&gt;G\" Type=\"HGVS, coding, RefSeq\">NM_004020.3:c.1430A&gt;G</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_004022\" Version=\"2\" Change=\"c.1430A&gt;G\" Type=\"HGVS, coding, RefSeq\">NM_004022.2:c.1430A&gt;G</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_004012\" Version=\"4\" Change=\"c.4778=\" Type=\"HGVS, coding, RefSeq\">NM_004012.4:c.4778=</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_004011\" Version=\"4\" Change=\"c.4787=\" Type=\"HGVS, coding, RefSeq\">NM_004011.4:c.4787=</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_004014\" Version=\"2\" Change=\"c.623A&gt;G\" Type=\"HGVS, coding, RefSeq\">NM_004014.2:c.623A&gt;G</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_004010\" Version=\"3\" Change=\"c.8441A&gt;G\" Type=\"HGVS, coding, RefSeq\">NM_004010.3:c.8441A&gt;G</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_000109\" Version=\"4\" Change=\"c.8786=\" Type=\"HGVS, coding, RefSeq\">NM_000109.4:c.8786=</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_004009\" Version=\"3\" Change=\"c.8798A&gt;G\" Type=\"HGVS, coding, RefSeq\">NM_004009.3:c.8798A&gt;G</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_004006\" Version=\"2\" Change=\"c.8810A&gt;G\" Type=\"HGVS, coding, RefSeq\">NM_004006.2:c.8810A&gt;G</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"LRG_199\" Change=\"g.1866377A&gt;G\" Type=\"HGVS, genomic, LRG\">LRG_199:g.1866377A&gt;G</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NG_012232\" Version=\"1\" Change=\"g.1866377A&gt;G\" Type=\"HGVS, genomic, RefSeqGene\">NG_012232.1:g.1866377A&gt;G</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000023\" Version=\"11\" Change=\"g.31478233=\" Type=\"HGVS, genomic, top level\" integerValue=\"38\">NC_000023.11:g.31478233=</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000023\" Version=\"10\" Change=\"g.31496350=\" Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">NC_000023.10:g.31496350=</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_004018\" Version=\"1\" Change=\"c.-211404A&gt;G\" Type=\"HGVS, previous\">NM_004018.1:c.-211404A&gt;G</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"LRG_199p1\" Change=\"p.Gln2937Arg\" Type=\"HGVS, protein\">LRG_199p1:p.Gln2937Arg</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_004003\" Version=\"2\" Change=\"p.Arg1593=\" Type=\"HGVS, protein, RefSeq\">NP_004003.2:p.Arg1593=</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_004002\" Version=\"3\" Change=\"p.Arg1596=\" Type=\"HGVS, protein, RefSeq\">NP_004002.3:p.Arg1596=</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_000100\" Version=\"3\" Change=\"p.Arg2929=\" Type=\"HGVS, protein, RefSeq\">NP_000100.3:p.Arg2929=</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_004012\" Version=\"2\" Change=\"p.Arg477=\" Type=\"HGVS, protein, RefSeq\">NP_004012.2:p.Arg477=</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_004014\" Version=\"2\" Change=\"p.Arg477=\" Type=\"HGVS, protein, RefSeq\">NP_004014.2:p.Arg477=</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_004005\" Version=\"1\" Change=\"p.Gln208Arg\" Type=\"HGVS, protein, RefSeq\">NP_004005.1:p.Gln208Arg</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_004001\" Version=\"1\" Change=\"p.Gln2814Arg\" Type=\"HGVS, protein, RefSeq\">NP_004001.1:p.Gln2814Arg</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_004000\" Version=\"1\" Change=\"p.Gln2933Arg\" Type=\"HGVS, protein, RefSeq\">NP_004000.1:p.Gln2933Arg</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_003997\" Version=\"1\" Change=\"p.Gln2937Arg\" Type=\"HGVS, protein, RefSeq\">NP_003997.1:p.Gln2937Arg</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_004004\" Version=\"1\" Change=\"p.Gln477Arg\" Type=\"HGVS, protein, RefSeq\">NP_004004.1:p.Gln477Arg</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_004011\" Version=\"2\" Change=\"p.Gln477Arg\" Type=\"HGVS, protein, RefSeq\">NP_004011.2:p.Gln477Arg</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_004013\" Version=\"1\" Change=\"p.Gln477Arg\" Type=\"HGVS, protein, RefSeq\">NP_004013.1:p.Gln477Arg</Attribute></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">missense variant</Attribute><XRef ID=\"SO:0001583\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_004006.2:c.8810A&gt;G\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">missense variant</Attribute><XRef ID=\"SO:0001583\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_004009.3:c.8798A&gt;G\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">missense variant</Attribute><XRef ID=\"SO:0001583\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_004010.3:c.8441A&gt;G\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">missense variant</Attribute><XRef ID=\"SO:0001583\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_004013.2:c.1430A&gt;G\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">missense variant</Attribute><XRef ID=\"SO:0001583\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_004014.2:c.623A&gt;G\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">missense variant</Attribute><XRef ID=\"SO:0001583\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_004020.3:c.1430A&gt;G\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">missense variant</Attribute><XRef ID=\"SO:0001583\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_004022.2:c.1430A&gt;G\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">no sequence alteration</Attribute><XRef ID=\"SO:0002073\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_000109.4:c.8786=\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">no sequence alteration</Attribute><XRef ID=\"SO:0002073\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_004011.4:c.4787=\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">no sequence alteration</Attribute><XRef ID=\"SO:0002073\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_004012.4:c.4778=\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">no sequence alteration</Attribute><XRef ID=\"SO:0002073\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_004021.3:c.1430=\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">no sequence alteration</Attribute><XRef ID=\"SO:0002073\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_004023.3:c.1430=\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"ProteinChange1LetterCode\">Q208R</Attribute></AttributeSet><AttributeSet><Attribute Type=\"ProteinChange1LetterCode\">Q2814R</Attribute></AttributeSet><AttributeSet><Attribute Type=\"ProteinChange1LetterCode\">Q2933R</Attribute></AttributeSet><AttributeSet><Attribute Type=\"ProteinChange1LetterCode\">Q2937R</Attribute></AttributeSet><AttributeSet><Attribute Type=\"ProteinChange1LetterCode\">Q477R</Attribute></AttributeSet><AlleleFrequencyList><AlleleFrequency Value=\"0.04318\" Source=\"NHLBI Exome Sequencing Project (ESP) Exome Variant Server\" /><AlleleFrequency Value=\"0.11815\" Source=\"1000 Genomes Project\" /><AlleleFrequency Value=\"0.09548\" Source=\"Exome Aggregation Consortium (ExAC)\" /><AlleleFrequency Value=\"0.04757\" Source=\"The Genome Aggregation Database (gnomAD)\" /><AlleleFrequency Value=\"0.10219\" Source=\"The Genome Aggregation Database (gnomAD), exomes\" /><AlleleFrequency Value=\"0.05797\" Source=\"Trans-Omics for Precision Medicine (TOPMed)\" /></AlleleFrequencyList><GlobalMinorAlleleFrequency Value=\"0.11815\" Source=\"1000 Genomes Project\" MinorAllele=\"C\" /><CytogeneticLocation>Xp21.2</CytogeneticLocation><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"X\" Accession=\"NC_000023.11\" start=\"31478233\" stop=\"31478233\" display_start=\"31478233\" display_stop=\"31478233\" variantLength=\"1\" positionVCF=\"31478233\" referenceAlleleVCF=\"C\" alternateAlleleVCF=\"C\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"X\" Accession=\"NC_000023.10\" start=\"31496350\" stop=\"31496350\" display_start=\"31496350\" display_stop=\"31496350\" variantLength=\"1\" positionVCF=\"31496350\" referenceAlleleVCF=\"C\" alternateAlleleVCF=\"C\" /><MeasureRelationship Type=\"within single gene\"><Name><ElementValue Type=\"Preferred\">dystrophin</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">DMD</ElementValue></Symbol><AttributeSet><Attribute dateValue=\"2019-11-20\" Type=\"Haploinsufficiency\">Sufficient evidence for dosage pathogenicity</Attribute><Citation><URL>https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=DMD</URL></Citation></AttributeSet><AttributeSet><Attribute dateValue=\"2019-11-20\" Type=\"Triplosensitivity\">No evidence available</Attribute><Citation><URL>https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=DMD</URL></Citation></AttributeSet><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"X\" Accession=\"NC_000023.11\" start=\"31119219\" stop=\"33339460\" display_start=\"31119219\" display_stop=\"33339460\" Strand=\"-\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"X\" Accession=\"NC_000023.10\" start=\"31137344\" stop=\"33357725\" display_start=\"31137344\" display_stop=\"33357725\" variantLength=\"2220382\" Strand=\"-\" /><XRef ID=\"1756\" DB=\"Gene\" /><XRef Type=\"MIM\" ID=\"300377\" DB=\"OMIM\" /><XRef ID=\"HGNC:2928\" DB=\"HGNC\" /></MeasureRelationship><XRef Type=\"rs\" ID=\"1800280\" DB=\"dbSNP\" /></Measure><Name><ElementValue Type=\"Preferred\">NM_004006.2(DMD):c.8810A&gt;G (p.Gln2937Arg)</ElementValue></Name><XRef ID=\"CA290622\" DB=\"ClinGen\" /></MeasureSet><TraitSet Type=\"Disease\" ID=\"9590\"><Trait ID=\"16789\" Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">not specified</ElementValue></Name><Name><ElementValue Type=\"Alternate\">AllHighlyPenetrant</ElementValue></Name><AttributeSet><Attribute Type=\"public definition\">The term 'not specified' was created for use in ClinVar so that submitters can convey the concept that a variant is benign, likely benign, or of uncertain significance for an unspecified set of disorders.  This usage was introduced in 2014 to replace AllHighlyPenetrant.</Attribute></AttributeSet><XRef ID=\"CN169374\" DB=\"MedGen\" /></Trait></TraitSet></ReferenceClinVarAssertion><ClinVarAssertion ID=\"290072\" SubmissionName=\"SUB3839901\"><ClinVarSubmissionID localKey=\"GDX:17557|Not Provided\" submittedAssembly=\"GRCh37\" submitter=\"GeneDx\" submitterDate=\"2018-03-26\" /><ClinVarAccession Acc=\"SCV000168148\" Version=\"11\" Type=\"SCV\" OrgID=\"26957\" OrganizationCategory=\"laboratory\" OrgType=\"primary\" DateUpdated=\"2019-03-31\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2013-11-22\"><ReviewStatus>criteria provided, single submitter</ReviewStatus><Description>Benign</Description><Comment Type=\"public\">This variant is considered likely benign or benign based on one or more of the following criteria: it is a conservative change, it occurs at a poorly conserved position in the protein, it is predicted to be benign by multiple in silico algorithms, and/or has population frequency not consistent with disease.</Comment></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ExternalID DB=\"GeneDx\" ID=\"GDX:17557\" /><AttributeSet><Attribute Type=\"AssertionMethod\">GeneDX Variant Classification (06012015)</Attribute><Citation><URL>https://submit.ncbi.nlm.nih.gov/ft/byid/7oynscmk/mdi-5616_26957_genedx_interprules_final_061215.pdf</URL></Citation></AttributeSet><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>yes</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><AttributeSet><Attribute Type=\"HGVS\">NM_004006.2:c.8810A&gt;G</Attribute></AttributeSet><SequenceLocation Assembly=\"GRCh37\" Chr=\"X\" alternateAllele=\"C\" referenceAllele=\"C\" start=\"31496350\" stop=\"31496350\" variantLength=\"1\" /><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">DMD</ElementValue></Symbol></MeasureRelationship></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">not specified</ElementValue></Name></Trait></TraitSet></ClinVarAssertion><ClinVarAssertion ID=\"1795888\" SubmissionName=\"SUB5494893\"><ClinVarSubmissionID localKey=\"NM_004006.2:c.8810A&gt;G|MedGen:CN169374\" submittedAssembly=\"GRCh37\" submitter=\"Integrated Genetics/Laboratory Corporation of America\" submitterDate=\"2019-04-24\" /><ClinVarAccession Acc=\"SCV000919267\" Version=\"1\" Type=\"SCV\" OrgID=\"500026\" OrganizationCategory=\"laboratory\" OrgType=\"primary\" DateUpdated=\"2019-05-31\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2018-07-19\"><ReviewStatus>criteria provided, single submitter</ReviewStatus><Description>Benign</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><AttributeSet><Attribute Type=\"AssertionMethod\">LabCorp Variant Classification Summary - May 2015</Attribute><Citation><URL>https://submit.ncbi.nlm.nih.gov/ft/byid/pttb9itm/labcorp_variant_classification_method_-_may_2015.pdf</URL></Citation></AttributeSet><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><AttributeSet><Attribute Type=\"HGVS\">NM_004006.2:c.8810A&gt;G</Attribute></AttributeSet><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">DMD</ElementValue></Symbol></MeasureRelationship></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">not specified</ElementValue></Name><XRef DB=\"MedGen\" ID=\"CN169374\" Type=\"CUI\" /></Trait></TraitSet></ClinVarAssertion></ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000144179.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\" xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n<ClinVarSet ID=\"48746870\"><RecordStatus>current</RecordStatus><Title>NM_004387.4(NKX2-5):c.809G&gt;A (p.Cys270Tyr) AND multiple conditions</Title><ReferenceClinVarAssertion DateCreated=\"2014-09-26\" DateLastUpdated=\"2019-12-17\" ID=\"336420\"><ClinVarAccession Acc=\"RCV000144179\" Version=\"1\" Type=\"RCV\" DateUpdated=\"2019-12-17\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2014-01-01\"><ReviewStatus>no assertion criteria provided</ReviewStatus><Description>Likely benign</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ObservedIn><Sample><Origin>germline</Origin><Ethnicity>Causasians</Ethnicity><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>yes</AffectedStatus><NumberTested>188</NumberTested><FamilyData NumFamilies=\"188\" /></Sample><Method><NamePlatform>HiSeq/ABI3500</NamePlatform><MethodType>research</MethodType></Method><ObservedData ID=\"49349938\"><Attribute integerValue=\"4\" Type=\"VariantAlleles\" /></ObservedData><ObservedData ID=\"49349938\"><Attribute Type=\"FamilyHistory\">No</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\" ID=\"156161\" Acc=\"VCV000156161\" Version=\"2\"><Measure Type=\"single nucleotide variant\" ID=\"165960\"><Name><ElementValue Type=\"Preferred\">NM_004387.4(NKX2-5):c.809G&gt;A (p.Cys270Tyr)</ElementValue></Name><AttributeSet><Attribute Accession=\"NM_001166176\" Version=\"2\" Change=\"c.*608G&gt;A\" Type=\"HGVS, coding, RefSeq\">NM_001166176.2:c.*608G&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_001166175\" Version=\"2\" Change=\"c.*762G&gt;A\" Type=\"HGVS, coding, RefSeq\">NM_001166175.2:c.*762G&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_004387\" Version=\"4\" Change=\"c.809G&gt;A\" Type=\"HGVS, coding, RefSeq\">NM_004387.4:c.809G&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NG_013340\" Version=\"1\" Change=\"g.7578G&gt;A\" Type=\"HGVS, genomic, RefSeqGene\">NG_013340.1:g.7578G&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000005\" Version=\"10\" Change=\"g.173232735C&gt;T\" Type=\"HGVS, genomic, top level\" integerValue=\"38\">NC_000005.10:g.173232735C&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000005\" Version=\"9\" Change=\"g.172659738C&gt;T\" Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">NC_000005.9:g.172659738C&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_004387\" Version=\"3\" Change=\"c.809G&gt;A\" Type=\"HGVS, previous\">NM_004387.3:c.809G&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_004378\" Version=\"1\" Change=\"p.Cys270Tyr\" Type=\"HGVS, protein, RefSeq\">NP_004378.1:p.Cys270Tyr</Attribute></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">3 prime UTR variant</Attribute><XRef ID=\"SO:0001624\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_001166175.2:c.*762G&gt;A\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">3 prime UTR variant</Attribute><XRef ID=\"SO:0001624\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_001166176.2:c.*608G&gt;A\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">missense variant</Attribute><XRef ID=\"SO:0001583\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_004387.4:c.809G&gt;A\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"ProteinChange1LetterCode\">C270Y</Attribute></AttributeSet><AlleleFrequencyList><AlleleFrequency Value=\"0.00007\" Source=\"Exome Aggregation Consortium (ExAC)\" /><AlleleFrequency Value=\"0.00003\" Source=\"The Genome Aggregation Database (gnomAD)\" /><AlleleFrequency Value=\"0.00009\" Source=\"The Genome Aggregation Database (gnomAD), exomes\" /><AlleleFrequency Value=\"0.00003\" Source=\"Trans-Omics for Precision Medicine (TOPMed)\" /></AlleleFrequencyList><CytogeneticLocation>5q35.1</CytogeneticLocation><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"5\" Accession=\"NC_000005.10\" start=\"173232735\" stop=\"173232735\" display_start=\"173232735\" display_stop=\"173232735\" variantLength=\"1\" positionVCF=\"173232735\" referenceAlleleVCF=\"C\" alternateAlleleVCF=\"T\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"5\" Accession=\"NC_000005.9\" start=\"172659738\" stop=\"172659738\" display_start=\"172659738\" display_stop=\"172659738\" variantLength=\"1\" positionVCF=\"172659738\" referenceAlleleVCF=\"C\" alternateAlleleVCF=\"T\" /><MeasureRelationship Type=\"within single gene\"><Name><ElementValue Type=\"Preferred\">NK2 homeobox 5</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">NKX2-5</ElementValue></Symbol><AttributeSet><Attribute dateValue=\"2011-12-15\" Type=\"Haploinsufficiency\">Sufficient evidence for dosage pathogenicity</Attribute><Citation><URL>https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=NKX2-5</URL></Citation></AttributeSet><AttributeSet><Attribute dateValue=\"2011-12-15\" Type=\"Triplosensitivity\">No evidence available</Attribute><Citation><URL>https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=NKX2-5</URL></Citation></AttributeSet><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"5\" Accession=\"NC_000005.10\" start=\"173232109\" stop=\"173235321\" display_start=\"173232109\" display_stop=\"173235321\" Strand=\"-\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"5\" Accession=\"NC_000005.9\" start=\"172659106\" stop=\"172662314\" display_start=\"172659106\" display_stop=\"172662314\" variantLength=\"3209\" Strand=\"-\" /><XRef ID=\"1482\" DB=\"Gene\" /><XRef Type=\"MIM\" ID=\"600584\" DB=\"OMIM\" /><XRef ID=\"HGNC:2488\" DB=\"HGNC\" /></MeasureRelationship><XRef Type=\"rs\" ID=\"587782931\" DB=\"dbSNP\" /></Measure><Name><ElementValue Type=\"Preferred\">NM_004387.4(NKX2-5):c.809G&gt;A (p.Cys270Tyr)</ElementValue></Name><XRef ID=\"CA170785\" DB=\"ClinGen\" /></MeasureSet><TraitSet Type=\"Finding\" ID=\"19555\"><Trait ID=\"22586\" Type=\"Finding\"><Name><ElementValue Type=\"Preferred\">Single ventricle</ElementValue><XRef ID=\"HP:0001750\" DB=\"Human Phenotype Ontology\" /><XRef ID=\"4878\" DB=\"Office of Rare Diseases\" /></Name><Name><ElementValue Type=\"Alternate\">Common ventricle</ElementValue><XRef ID=\"HP:0001750\" DB=\"Human Phenotype Ontology\" /></Name><TraitRelationship Type=\"Finding member\" ID=\"890\" /><XRef ID=\"C0152424\" DB=\"MedGen\" /><XRef Type=\"primary\" ID=\"HP:0001750\" DB=\"Human Phenotype Ontology\" /></Trait><Trait ID=\"31881\" Type=\"Finding\"><Name><ElementValue Type=\"Preferred\">small Atrial septal defect</ElementValue></Name><TraitRelationship Type=\"Finding member\" ID=\"890\" /><XRef ID=\"CN219571\" DB=\"MedGen\" /></Trait></TraitSet></ReferenceClinVarAssertion><ClinVarAssertion ID=\"336308\"><ClinVarSubmissionID localKey=\"NC_000005.9:g.172659738C&gt;T|Single Ventricle|small Atrial septal defect\" submitter=\"Congenital Heart Disease Genetic Program Lab,American University of Beirut\" submitterDate=\"2014-07-23\" /><ClinVarAccession Acc=\"SCV000188643\" Version=\"1\" Type=\"SCV\" OrgID=\"505233\" OrganizationCategory=\"laboratory\" OrgType=\"primary\" DateUpdated=\"2019-12-16\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2014-01-01\"><ReviewStatus>no assertion criteria provided</ReviewStatus><Description>Likely benign</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ObservedIn><Sample><Origin>germline</Origin><Ethnicity>Causasians</Ethnicity><GeographicOrigin>Lebanon</GeographicOrigin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>yes</AffectedStatus><NumberTested>188</NumberTested><FamilyData NumFamilies=\"188\" NumFamiliesWithSegregationObserved=\"0\" NumFamiliesWithVariant=\"1\"><FamilyHistory>No</FamilyHistory></FamilyData></Sample><Method><NamePlatform>HiSeq/ABI3500</NamePlatform><TypePlatform>next-gen sequencing, Sanger sequencing</TypePlatform><MethodType>research</MethodType><MethodAttribute><Attribute Type=\"ControlsAppropriate\">yes</Attribute></MethodAttribute><ObsMethodAttribute><Attribute Type=\"MethodResult\">Lebanese Congenital heart disease population allele frequency 1/188 families and 0 MAF in reported databases</Attribute></ObsMethodAttribute></Method><ObservedData><Attribute Type=\"VariantAlleles\" integerValue=\"4\" /></ObservedData><ObservedData><Attribute Type=\"Homozygote\" integerValue=\"0\" /></ObservedData><ObservedData><Attribute Type=\"SingleHeterozygote\" integerValue=\"4\" /></ObservedData><Comment Type=\"public\">The number of individuals with the variant reflects the total number of individuals within the family.</Comment></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><AttributeSet><Attribute Type=\"HGVS\">NC_000005.9:g.172659738C&gt;T</Attribute></AttributeSet><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">NKX2-5</ElementValue></Symbol></MeasureRelationship></Measure></MeasureSet><TraitSet Type=\"Finding\"><Trait Type=\"Finding\"><Name><ElementValue Type=\"Preferred\">Single Ventricle</ElementValue></Name></Trait><Trait Type=\"Finding\"><Name><ElementValue Type=\"Preferred\">small Atrial septal defect</ElementValue></Name></Trait></TraitSet></ClinVarAssertion></ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000152657.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\"\n            xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n    <ClinVarSet ID=\"48748483\">\n        <RecordStatus>current</RecordStatus>\n        <Title>NM_000551.3(VHL):c.-75_-55del AND Von Hippel-Lindau syndrome</Title>\n        <ReferenceClinVarAssertion DateCreated=\"2015-01-29\" DateLastUpdated=\"2019-12-17\" ID=\"364387\">\n            <ClinVarAccession Acc=\"RCV000152657\" Version=\"1\" Type=\"RCV\" DateUpdated=\"2019-12-17\"/>\n            <RecordStatus>current</RecordStatus>\n            <ClinicalSignificance DateLastEvaluated=\"2014-12-09\">\n                <ReviewStatus>criteria provided, single submitter</ReviewStatus>\n                <Description>Likely pathogenic</Description>\n            </ClinicalSignificance>\n            <Assertion Type=\"variation to disease\"/>\n            <AttributeSet>\n                <Attribute Type=\"ModeOfInheritance\" integerValue=\"262\">Autosomal dominant inheritance</Attribute>\n                <XRef ID=\"13905298\" DB=\"Laboratory for Molecular Medicine,Partners HealthCare Personalized Medicine\"/>\n            </AttributeSet>\n            <ObservedIn>\n                <Sample>\n                    <Origin>germline</Origin>\n                    <Species TaxonomyId=\"9606\">human</Species>\n                    <AffectedStatus>not provided</AffectedStatus>\n                </Sample>\n                <Method>\n                    <MethodType>clinical testing</MethodType>\n                </Method>\n                <ObservedData ID=\"49717141\">\n                    <Attribute integerValue=\"6\" Type=\"VariantAlleles\"/>\n                </ObservedData>\n                <ObservedData ID=\"49717141\">\n                    <Attribute integerValue=\"1\" Type=\"NumFamiliesWithVariant\"/>\n                </ObservedData>\n            </ObservedIn>\n            <MeasureSet Type=\"Variant\" ID=\"166561\" Acc=\"VCV000166561\" Version=\"2\">\n                <Measure Type=\"Deletion\" ID=\"173876\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">NM_000551.3(VHL):c.-75_-55del</ElementValue>\n                    </Name>\n                    <AttributeSet>\n                        <Attribute Accession=\"LRG_322t1\" Change=\"c.-75_-55del\" Type=\"HGVS, coding, LRG\">\n                            LRG_322t1:c.-75_-55del\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_000551\" Version=\"3\" Change=\"c.-75_-55del\" Type=\"HGVS, coding, RefSeq\">\n                            NM_000551.3:c.-75_-55del\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"LRG_322\" Change=\"g.5139_5159del\" Type=\"HGVS, genomic, LRG\">\n                            LRG_322:g.5139_5159del\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NG_008212\" Version=\"3\" Change=\"g.5139_5159del\"\n                                   Type=\"HGVS, genomic, RefSeqGene\">NG_008212.3:g.5139_5159del\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NC_000003\" Version=\"12\" Change=\"g.10141773_10141793del\"\n                                   Type=\"HGVS, genomic, top level\" integerValue=\"38\">NC_000003.12:g.10141773_10141793del\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_000551\" Version=\"2\" Change=\"c.-75_-55del\" Type=\"HGVS, previous\">\n                            NM_000551.2:c.-75_-55del\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"Location\">NM_000551.2:exon 1</Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"Location\">NM_000551.3:exon 1</Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"MolecularConsequence\">5 prime UTR variant</Attribute>\n                        <XRef ID=\"SO:0001623\" DB=\"Sequence Ontology\"/>\n                        <XRef ID=\"NM_000551.3:c.-75_-55del\" DB=\"RefSeq\"/>\n                    </AttributeSet>\n                    <CytogeneticLocation>3p25.3</CytogeneticLocation>\n                    <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\"\n                                      AssemblyStatus=\"current\" Chr=\"3\" Accession=\"NC_000003.12\" start=\"10141773\"\n                                      stop=\"10141793\" display_start=\"10141773\" display_stop=\"10141793\"\n                                      variantLength=\"21\" positionVCF=\"10141769\"\n                                      referenceAlleleVCF=\"AGCGCGCACGCAGCTCCGCCCC\" alternateAlleleVCF=\"A\"/>\n                    <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\"\n                                      AssemblyStatus=\"previous\" Chr=\"3\" Accession=\"NC_000003.11\" start=\"10183457\"\n                                      stop=\"10183477\" display_start=\"10183457\" display_stop=\"10183477\"\n                                      variantLength=\"21\" positionVCF=\"10183453\"\n                                      referenceAlleleVCF=\"AGCGCGCACGCAGCTCCGCCCC\" alternateAlleleVCF=\"A\"/>\n                    <MeasureRelationship Type=\"genes overlapped by variant\">\n                        <Name>\n                            <ElementValue Type=\"Preferred\">von Hippel-Lindau tumor suppressor</ElementValue>\n                        </Name>\n                        <Symbol>\n                            <ElementValue Type=\"Preferred\">VHL</ElementValue>\n                        </Symbol>\n                        <AttributeSet>\n                            <Attribute dateValue=\"2017-06-28\" Type=\"Haploinsufficiency\">Sufficient evidence for dosage\n                                pathogenicity\n                            </Attribute>\n                            <Citation>\n                                <URL>https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=VHL</URL>\n                            </Citation>\n                        </AttributeSet>\n                        <AttributeSet>\n                            <Attribute dateValue=\"2017-06-28\" Type=\"Triplosensitivity\">No evidence available</Attribute>\n                            <Citation>\n                                <URL>https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=VHL</URL>\n                            </Citation>\n                        </AttributeSet>\n                        <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\"\n                                          AssemblyStatus=\"current\" Chr=\"3\" Accession=\"NC_000003.12\" start=\"10141778\"\n                                          stop=\"10153667\" display_start=\"10141778\" display_stop=\"10153667\" Strand=\"+\"/>\n                        <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\"\n                                          AssemblyStatus=\"previous\" Chr=\"3\" Accession=\"NC_000003.11\" start=\"10183318\"\n                                          stop=\"10195353\" display_start=\"10183318\" display_stop=\"10195353\"\n                                          variantLength=\"12036\" Strand=\"+\"/>\n                        <XRef ID=\"7428\" DB=\"Gene\"/>\n                        <XRef Type=\"MIM\" ID=\"608537\" DB=\"OMIM\"/>\n                        <XRef ID=\"HGNC:12687\" DB=\"HGNC\"/>\n                        <Comment DataSource=\"NCBI curation\" Type=\"public\">This gene is cited in the ACMG recommendations\n                            of 2013 (PubMed 23788249) for reporting incidental findings in exons.\n                        </Comment>\n                        <Comment DataSource=\"NCBI curation\" Type=\"public\">This gene is cited in the ACMG recommendations\n                            of 2016 (PubMed 27854360) for reporting incidental findings in exons.\n                        </Comment>\n                    </MeasureRelationship>\n                    <XRef Type=\"rs\" ID=\"727503744\" DB=\"dbSNP\"/>\n                </Measure>\n                <Name>\n                    <ElementValue Type=\"Preferred\">NM_000551.3(VHL):c.-75_-55del</ElementValue>\n                </Name>\n                <XRef ID=\"CA020542\" DB=\"ClinGen\"/>\n            </MeasureSet>\n            <TraitSet Type=\"Disease\" ID=\"570\">\n                <Trait ID=\"6116\" Type=\"Disease\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">Von Hippel-Lindau syndrome</ElementValue>\n                        <XRef ID=\"Von+Hippel-Lindau+syndrome/7416\" DB=\"Genetic Alliance\"/>\n                        <XRef ID=\"7855\" DB=\"Office of Rare Diseases\"/>\n                        <XRef ID=\"46659004\" DB=\"SNOMED CT\"/>\n                    </Name>\n                    <Symbol>\n                        <ElementValue Type=\"Preferred\">VHL</ElementValue>\n                        <XRef Type=\"MIM\" ID=\"193300\" DB=\"OMIM\"/>\n                        <XRef ID=\"7855\" DB=\"Office of Rare Diseases\"/>\n                    </Symbol>\n                    <AttributeSet>\n                        <Attribute Type=\"public definition\">Von Hippel-Lindau (VHL) syndrome is characterized by\n                            hemangioblastomas of the brain, spinal cord, and retina; renal cysts and clear cell renal\n                            cell carcinoma; pheochromocytoma, pancreatic cysts, and neuroendocrine tumors; endolymphatic\n                            sac tumors; and epididymal and broad ligament cysts. Cerebellar hemangioblastomas may be\n                            associated with headache, vomiting, gait disturbances, or ataxia. Spinal hemangioblastomas\n                            and related syrinx usually present with pain. Sensory and motor loss may develop with cord\n                            compression. Retinal hemangioblastomas may be the initial manifestation of VHL syndrome and\n                            can cause vision loss. Renal cell carcinoma occurs in about 70% of individuals with VHL and\n                            is the leading cause of mortality. Pheochromocytomas can be asymptomatic but may cause\n                            sustained or episodic hypertension. Pancreatic lesions often remain asymptomatic and rarely\n                            cause endocrine or exocrine insufficiency. Endolymphatic sac tumors can cause hearing loss\n                            of varying severity, which can be a presenting symptom. Cystadenomas of the epididymis are\n                            relatively common. They rarely cause problems, unless bilateral, in which case they may\n                            result in infertility.\n                        </Attribute>\n                        <XRef ID=\"NBK1463\" DB=\"GeneReviews\"/>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"disease mechanism\" integerValue=\"273\">loss of function</Attribute>\n                        <XRef ID=\"GTR000528276\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000552165\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000552183\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000552245\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000553527\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000553528\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000562500\" DB=\"Genetic Testing Registry (GTR)\"/>\n                        <XRef ID=\"GTR000569658\" DB=\"Genetic Testing Registry (GTR)\"/>\n                    </AttributeSet>\n                    <Citation Type=\"review\" Abbrev=\"GeneReviews\">\n                        <ID Source=\"PubMed\">20301636</ID>\n                        <ID Source=\"BookShelf\">NBK1463</ID>\n                    </Citation>\n                    <Citation Type=\"practice guideline\" Abbrev=\"NSGC, 2004\">\n                        <ID Source=\"PubMed\">15604628</ID>\n                    </Citation>\n                    <Citation Type=\"Recommendation\" Abbrev=\"ACMG, 2013\">\n                        <ID Source=\"PubMed\">23788249</ID>\n                    </Citation>\n                    <Citation Type=\"practice guideline\" Abbrev=\"ES, 2014\">\n                        <ID Source=\"PubMed\">24893135</ID>\n                    </Citation>\n                    <Citation Type=\"practice guideline\" Abbrev=\"NANETS, 2010\">\n                        <ID Source=\"pmc\">3419007</ID>\n                    </Citation>\n                    <Citation Type=\"practice guideline\" Abbrev=\"ACMG/NSGC, 2015\">\n                        <ID Source=\"PubMed\">25394175</ID>\n                    </Citation>\n                    <Citation Type=\"practice guideline\" Abbrev=\"KCRNC, 2013\">\n                        <ID Source=\"PubMed\">24319509</ID>\n                    </Citation>\n                    <Citation Type=\"Recommendation\" Abbrev=\"ACMG, 2015\">\n                        <ID Source=\"PubMed\">25356965</ID>\n                    </Citation>\n                    <Citation Type=\"Recommendation\" Abbrev=\"ACMG, 2016\">\n                        <ID Source=\"PubMed\">27854360</ID>\n                        <ID Source=\"DOI\">10.1038/gim.2016.190</ID>\n                    </Citation>\n                    <XRef ID=\"C0019562\" DB=\"MedGen\"/>\n                    <XRef ID=\"892\" DB=\"Orphanet\"/>\n                    <XRef Type=\"MIM\" ID=\"193300\" DB=\"OMIM\"/>\n                </Trait>\n            </TraitSet>\n        </ReferenceClinVarAssertion>\n        <ClinVarAssertion ID=\"359956\" SubmissionName=\"SUB5348979\">\n            <ClinVarSubmissionID localKey=\"13905298|Orphanet:ORPHA892\" submittedAssembly=\"GRCh37\"\n                                 submitter=\"Laboratory for Molecular Medicine,Partners HealthCare Personalized Medicine\"\n                                 submitterDate=\"2019-03-21\"/>\n            <ClinVarAccession Acc=\"SCV000202009\" Version=\"5\" Type=\"SCV\" OrgID=\"21766\" OrganizationCategory=\"laboratory\"\n                              OrgType=\"primary\" DateUpdated=\"2019-08-26\"/>\n            <RecordStatus>current</RecordStatus>\n            <ClinicalSignificance DateLastEvaluated=\"2014-12-09\">\n                <ReviewStatus>criteria provided, single submitter</ReviewStatus>\n                <Description>Likely pathogenic</Description>\n                <Citation>\n                    <ID Source=\"PubMed\">22357542</ID>\n                </Citation>\n                <Citation>\n                    <ID Source=\"PubMed\">12114475</ID>\n                </Citation>\n                <Citation>\n                    <ID Source=\"PubMed\">18836774</ID>\n                </Citation>\n                <Comment>The c.-75_-55del variant in VHL has been identified by our laboratory in 1 Cauca sian adult\n                    with VHL and segregated with disease in at least 5 affected relatives including 1 obligate carrier.\n                    This variant is located in the 5' untranslated re gion (UTR), a regulatory region, and may have an\n                    effect on translational efficie ncy. The deleted sequence in this variant is highly conserved in\n                    evolutionarily distant species and in vitro studies have shown that a deletion of this region r\n                    emoves a transcription factor binding site which is predicted to alter VHL trans cription (Zatyka\n                    2002). In summary, although additional studies are required to fully establish its clinical\n                    significance, the c.-75_-55del variant is likely pa thogenic.\n                </Comment>\n            </ClinicalSignificance>\n            <Assertion Type=\"variation to disease\"/>\n            <ExternalID DB=\"Laboratory for Molecular Medicine\" ID=\"13905298\"/>\n            <AttributeSet>\n                <Attribute Type=\"ModeOfInheritance\">Autosomal dominant inheritance</Attribute>\n            </AttributeSet>\n            <AttributeSet>\n                <Attribute Type=\"AssertionMethod\">LMM Criteria</Attribute>\n                <Citation>\n                    <ID Source=\"PubMed\">24033266</ID>\n                </Citation>\n            </AttributeSet>\n            <ObservedIn>\n                <Sample>\n                    <Origin>germline</Origin>\n                    <Species TaxonomyId=\"9606\">human</Species>\n                    <AffectedStatus>not provided</AffectedStatus>\n                    <FamilyData NumFamiliesWithVariant=\"1\"/>\n                </Sample>\n                <Method>\n                    <MethodType>clinical testing</MethodType>\n                </Method>\n                <ObservedData>\n                    <Attribute Type=\"VariantAlleles\" integerValue=\"6\"/>\n                </ObservedData>\n            </ObservedIn>\n            <MeasureSet Type=\"Variant\">\n                <Measure Type=\"Variation\">\n                    <Name>\n                        <ElementValue Type=\"Alternate\">NM_000551.3:c.-75_-55del</ElementValue>\n                    </Name>\n                    <AttributeSet>\n                        <Attribute Type=\"Location\">NM_000551.3:EXON 1</Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"HGVS\">NC_000003.11:g.10183457_10183477del</Attribute>\n                    </AttributeSet>\n                    <MeasureRelationship Type=\"variant in gene\">\n                        <Symbol>\n                            <ElementValue Type=\"Preferred\">VHL</ElementValue>\n                        </Symbol>\n                    </MeasureRelationship>\n                </Measure>\n            </MeasureSet>\n            <TraitSet Type=\"Disease\">\n                <Trait Type=\"Disease\">\n                    <XRef DB=\"Orphanet\" ID=\"ORPHA892\"/>\n                </Trait>\n            </TraitSet>\n        </ClinVarAssertion>\n    </ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000153339.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\" xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n<ClinVarSet ID=\"48314396\"><RecordStatus>current</RecordStatus><Title>NM_000828.4(GRIA3):c.-2G= AND not specified</Title><ReferenceClinVarAssertion DateCreated=\"2015-01-29\" DateLastUpdated=\"2019-12-15\" ID=\"365069\"><ClinVarAccession Acc=\"RCV000153339\" Version=\"2\" Type=\"RCV\" DateUpdated=\"2019-12-16\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2012-08-14\"><ReviewStatus>criteria provided, single submitter</ReviewStatus><Description>Benign</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData ID=\"49353729\"><Attribute integerValue=\"18\" Type=\"VariantAlleles\" /></ObservedData></ObservedIn><MeasureSet Type=\"Variant\" ID=\"167157\" Acc=\"VCV000167157\" Version=\"1\"><Measure Type=\"Duplication\" ID=\"177756\"><Name><ElementValue Type=\"Preferred\">NM_000828.4(GRIA3):c.-2G=</ElementValue></Name><AttributeSet><Attribute Accession=\"NM_000828\" Version=\"4\" Change=\"c.-2dup\" Type=\"HGVS, coding, RefSeq\">NM_000828.4:c.-2dup</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_001256743\" Version=\"2\" Change=\"c.-2dup\" Type=\"HGVS, coding, RefSeq\">NM_001256743.2:c.-2dup</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_007325\" Version=\"5\" Change=\"c.-2dup\" Type=\"HGVS, coding, RefSeq\">NM_007325.5:c.-2dup</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NG_009377\" Version=\"2\" Change=\"g.5292dup\" Type=\"HGVS, genomic, RefSeqGene\">NG_009377.2:g.5292dup</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000023\" Version=\"11\" Change=\"g.123184534dup\" Type=\"HGVS, genomic, top level\" integerValue=\"38\">NC_000023.11:g.123184534dup</Attribute></AttributeSet><AttributeSet><Attribute Type=\"Location\">NM_000828.4:5' UTR</Attribute></AttributeSet><AttributeSet><Attribute Type=\"Location\">NM_000828.4:exon 1</Attribute></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">5 prime UTR variant</Attribute><XRef ID=\"SO:0001623\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_000828.4:c.-2dup\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">5 prime UTR variant</Attribute><XRef ID=\"SO:0001623\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_001256743.2:c.-2dup\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">5 prime UTR variant</Attribute><XRef ID=\"SO:0001623\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_007325.5:c.-2dup\" DB=\"RefSeq\" /></AttributeSet><AlleleFrequencyList><AlleleFrequency Value=\"0.99999\" Source=\"Trans-Omics for Precision Medicine (TOPMed)\" /></AlleleFrequencyList><GlobalMinorAlleleFrequency Value=\"1.00000\" Source=\"1000 Genomes Project\" MinorAllele=\"GG\" /><CytogeneticLocation>Xq25</CytogeneticLocation><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"X\" Accession=\"NC_000023.11\" start=\"123184533\" stop=\"123184534\" display_start=\"123184533\" display_stop=\"123184534\" variantLength=\"1\" positionVCF=\"123184533\" referenceAlleleVCF=\"A\" alternateAlleleVCF=\"AG\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"X\" Accession=\"NC_000023.10\" start=\"122318386\" stop=\"122318387\" display_start=\"122318386\" display_stop=\"122318387\" variantLength=\"1\" referenceAllele=\"-\" alternateAllele=\"G\" /><MeasureRelationship Type=\"within single gene\"><Name><ElementValue Type=\"Preferred\">glutamate ionotropic receptor AMPA type subunit 3</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">GRIA3</ElementValue></Symbol><AttributeSet><Attribute dateValue=\"2019-10-23\" Type=\"Haploinsufficiency\">Some evidence for dosage pathogenicity</Attribute><Citation><URL>https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=GRIA3</URL></Citation></AttributeSet><AttributeSet><Attribute dateValue=\"2019-10-23\" Type=\"Triplosensitivity\">No evidence available</Attribute><Citation><URL>https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=GRIA3</URL></Citation></AttributeSet><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"X\" Accession=\"NC_000023.11\" start=\"123184243\" stop=\"123490915\" display_start=\"123184243\" display_stop=\"123490915\" Strand=\"+\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"X\" Accession=\"NC_000023.10\" start=\"122318095\" stop=\"122624765\" display_start=\"122318095\" display_stop=\"122624765\" variantLength=\"306671\" Strand=\"+\" /><XRef ID=\"2892\" DB=\"Gene\" /><XRef Type=\"MIM\" ID=\"305915\" DB=\"OMIM\" /><XRef ID=\"HGNC:4573\" DB=\"HGNC\" /></MeasureRelationship><XRef Type=\"rs\" ID=\"1276003103\" DB=\"dbSNP\" /><XRef Type=\"rs\" ID=\"58044961\" DB=\"dbSNP\" /><Comment DataSource=\"NCBI curation\" Type=\"AssemblySpecificAlleleDefinition\">The G in position 5292 of NG_009377.2 is in a single base gap when aligned to NC_000023.10 in GRCh37 assembly.  Thus what is asserted as an assertion relative to GRCh37 is no change on GRCh38 (NC_000023.11) and on the RefSeqGene.</Comment></Measure><Name><ElementValue Type=\"Preferred\">NM_000828.4(GRIA3):c.-2G=</ElementValue></Name><XRef ID=\"CA10575670\" DB=\"ClinGen\" /></MeasureSet><TraitSet Type=\"Disease\" ID=\"9590\"><Trait ID=\"16789\" Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">not specified</ElementValue></Name><Name><ElementValue Type=\"Alternate\">AllHighlyPenetrant</ElementValue></Name><AttributeSet><Attribute Type=\"public definition\">The term 'not specified' was created for use in ClinVar so that submitters can convey the concept that a variant is benign, likely benign, or of uncertain significance for an unspecified set of disorders.  This usage was introduced in 2014 to replace AllHighlyPenetrant.</Attribute></AttributeSet><XRef ID=\"CN169374\" DB=\"MedGen\" /></Trait></TraitSet></ReferenceClinVarAssertion><ClinVarAssertion ID=\"360794\" SubmissionName=\"SUB4534263\"><ClinVarSubmissionID localKey=\"6233_000000\" localKeyIsSubmitted=\"1\" submittedAssembly=\"GRCh37\" submitter=\"EGL Genetic Diagnostics,Eurofins Clinical Diagnostics\" submitterDate=\"2018-09-19\" /><ClinVarAccession Acc=\"SCV000202823\" Version=\"4\" Type=\"SCV\" OrgID=\"500060\" OrganizationCategory=\"laboratory\" OrgType=\"primary\" DateUpdated=\"2019-03-31\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2012-08-14\"><ReviewStatus>criteria provided, single submitter</ReviewStatus><Description>Benign</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ExternalID DB=\"EGL Genetic Diagnostics\" ID=\"6233\" /><AttributeSet><Attribute Type=\"AssertionMethod\">EGL_Classification_Definitions_2015</Attribute><Citation><URL>https://submit.ncbi.nlm.nih.gov/ft/byid/dn5yhybg/egl_classification_definitions_2015.pdf</URL></Citation></AttributeSet><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus><Gender>mixed</Gender></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData><Attribute Type=\"VariantAlleles\" integerValue=\"18\" /></ObservedData><ObservedData><Attribute Type=\"Hemizygote\" integerValue=\"18\" /></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Insertion\"><AttributeSet><Attribute Type=\"Location\">NM_000828.4:Ex1</Attribute></AttributeSet><SequenceLocation Assembly=\"GRCh37\" Chr=\"X\" alternateAllele=\"G\" referenceAllele=\"-\" start=\"122318386\" stop=\"122318387\" variantLength=\"2\" /><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">GRIA3</ElementValue></Symbol></MeasureRelationship><XRef DB=\"dbSNP\" ID=\"58044961\" Type=\"rsNumber\" /></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">not specified</ElementValue></Name></Trait></TraitSet><Citation><URL>http://www.egl-eurofins.com/emvclass/emvclass.php?approved_symbol=GRIA3</URL></Citation></ClinVarAssertion></ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000167792.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\" xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n<ClinVarSet ID=\"49280026\"><RecordStatus>current</RecordStatus><Title>NM_007294.3(BRCA1):c.135-18T&gt;G AND Hereditary breast and ovarian cancer syndrome</Title><ReferenceClinVarAssertion DateCreated=\"2015-03-28\" DateLastUpdated=\"2019-12-31\" ID=\"395864\"><ClinVarAccession Acc=\"RCV000167792\" Version=\"3\" Type=\"RCV\" DateUpdated=\"2019-12-31\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2017-11-06\"><ReviewStatus>criteria provided, single submitter</ReviewStatus><Description>Uncertain significance</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>yes</AffectedStatus></Sample><Method><MethodType>research</MethodType></Method><ObservedData ID=\"50067636\"><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData ID=\"50067637\"><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\" ID=\"54214\" Acc=\"VCV000054214\" Version=\"5\"><Measure Type=\"single nucleotide variant\" ID=\"68881\"><Name><ElementValue Type=\"Preferred\">NM_007294.3(BRCA1):c.135-18T&gt;G</ElementValue></Name><Name><ElementValue Type=\"Alternate\">IVS4-18T&gt;G</ElementValue></Name><AttributeSet><Attribute Accession=\"LRG_292t1\" Change=\"c.135-18T&gt;G\" Type=\"HGVS, coding, LRG\">LRG_292t1:c.135-18T&gt;G</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_007297\" Version=\"4\" Change=\"c.-7-18T&gt;G\" Type=\"HGVS, coding, RefSeq\">NM_007297.4:c.-7-18T&gt;G</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_007294\" Version=\"3\" Change=\"c.135-18T&gt;G\" Type=\"HGVS, coding, RefSeq\">NM_007294.3:c.135-18T&gt;G</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_007298\" Version=\"3\" Change=\"c.135-18T&gt;G\" Type=\"HGVS, coding, RefSeq\">NM_007298.3:c.135-18T&gt;G</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_007299\" Version=\"4\" Change=\"c.135-18T&gt;G\" Type=\"HGVS, coding, RefSeq\">NM_007299.4:c.135-18T&gt;G</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_007300\" Version=\"4\" Change=\"c.135-18T&gt;G\" Type=\"HGVS, coding, RefSeq\">NM_007300.4:c.135-18T&gt;G</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"LRG_292\" Change=\"g.111433T&gt;G\" Type=\"HGVS, genomic, LRG\">LRG_292:g.111433T&gt;G</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NG_005905\" Version=\"2\" Change=\"g.111433T&gt;G\" Type=\"HGVS, genomic, RefSeqGene\">NG_005905.2:g.111433T&gt;G</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000017\" Version=\"11\" Change=\"g.43106551A&gt;C\" Type=\"HGVS, genomic, top level\" integerValue=\"38\">NC_000017.11:g.43106551A&gt;C</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000017\" Version=\"10\" Change=\"g.41258568A&gt;C\" Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">NC_000017.10:g.41258568A&gt;C</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"U14680\" Version=\"1\" Change=\"n.254-18T&gt;G\" Type=\"HGVS, non-coding\">U14680.1:n.254-18T&gt;G</Attribute></AttributeSet><AttributeSet><Attribute Type=\"Location\">NM_007294.3:intron 3</Attribute></AttributeSet><AttributeSet><Attribute Type=\"Location\">U14680.1:intron 4</Attribute></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">intron variant</Attribute><XRef ID=\"SO:0001627\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_007294.3:c.135-18T&gt;G\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">intron variant</Attribute><XRef ID=\"SO:0001627\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_007297.4:c.-7-18T&gt;G\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">intron variant</Attribute><XRef ID=\"SO:0001627\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_007298.3:c.135-18T&gt;G\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">intron variant</Attribute><XRef ID=\"SO:0001627\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_007299.4:c.135-18T&gt;G\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">intron variant</Attribute><XRef ID=\"SO:0001627\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_007300.4:c.135-18T&gt;G\" DB=\"RefSeq\" /></AttributeSet><AlleleFrequencyList><AlleleFrequency Value=\"0.00001\" Source=\"Exome Aggregation Consortium (ExAC)\" /><AlleleFrequency Value=\"0.00000\" Source=\"The Genome Aggregation Database (gnomAD), exomes\" /><AlleleFrequency Value=\"0.00002\" Source=\"Trans-Omics for Precision Medicine (TOPMed)\" /></AlleleFrequencyList><CytogeneticLocation>17q21.31</CytogeneticLocation><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"17\" Accession=\"NC_000017.11\" start=\"43106551\" stop=\"43106551\" display_start=\"43106551\" display_stop=\"43106551\" variantLength=\"1\" positionVCF=\"43106551\" referenceAlleleVCF=\"A\" alternateAlleleVCF=\"C\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"17\" Accession=\"NC_000017.10\" start=\"41258568\" stop=\"41258568\" display_start=\"41258568\" display_stop=\"41258568\" variantLength=\"1\" positionVCF=\"41258568\" referenceAlleleVCF=\"A\" alternateAlleleVCF=\"C\" /><MeasureRelationship Type=\"within single gene\"><Name><ElementValue Type=\"Preferred\">BRCA1 DNA repair associated</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">BRCA1</ElementValue></Symbol><AttributeSet><Attribute dateValue=\"2015-11-16\" Type=\"Haploinsufficiency\">Sufficient evidence for dosage pathogenicity</Attribute><Citation><URL>https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=BRCA1</URL></Citation></AttributeSet><AttributeSet><Attribute dateValue=\"2015-11-16\" Type=\"Triplosensitivity\">No evidence available</Attribute><Citation><URL>https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=BRCA1</URL></Citation></AttributeSet><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"17\" Accession=\"NC_000017.11\" start=\"43044295\" stop=\"43125364\" display_start=\"43044295\" display_stop=\"43125364\" Strand=\"-\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"17\" Accession=\"NC_000017.10\" start=\"41196311\" stop=\"41277499\" display_start=\"41196311\" display_stop=\"41277499\" variantLength=\"81189\" Strand=\"-\" /><XRef ID=\"672\" DB=\"Gene\" /><XRef Type=\"MIM\" ID=\"113705\" DB=\"OMIM\" /><XRef ID=\"HGNC:1100\" DB=\"HGNC\" /><Comment DataSource=\"NCBI curation\" Type=\"public\">This gene is cited in the ACMG recommendations of 2013 (PubMed 23788249) for reporting incidental findings in exons.</Comment><Comment DataSource=\"NCBI curation\" Type=\"public\">This gene is cited in the ACMG recommendations of 2016 (PubMed 27854360) for reporting incidental findings in exons.</Comment></MeasureRelationship><XRef ID=\"254-18&amp;base_change=T to G\" DB=\"Breast Cancer Information Core (BIC) (BRCA1)\" /><XRef Type=\"rs\" ID=\"80358085\" DB=\"dbSNP\" /></Measure><Name><ElementValue Type=\"Preferred\">NM_007294.3(BRCA1):c.135-18T&gt;G</ElementValue></Name><XRef ID=\"CA000892\" DB=\"ClinGen\" /></MeasureSet><TraitSet Type=\"Disease\" ID=\"7920\"><Trait ID=\"16761\" Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">Hereditary breast and ovarian cancer syndrome</ElementValue><XRef ID=\"D061325\" DB=\"MeSH\" /><XRef ID=\"145\" DB=\"Orphanet\" /></Name><Name><ElementValue Type=\"Alternate\">Hereditary breast and ovarian cancer</ElementValue><XRef ID=\"UWMG_4506306\" DB=\"CSER_CC_NCGL; University of Washington Medical Center\" /></Name><Symbol><ElementValue Type=\"Preferred\">HBOC</ElementValue><XRef ID=\"GTR000500933\" DB=\"Genetic Testing Registry (GTR)\" /></Symbol><AttributeSet><Attribute Type=\"public definition\">BRCA1- and BRCA2-associated hereditary breast and ovarian cancer syndrome (HBOC) is characterized by an increased risk for female and male breast cancer, ovarian cancer (includes fallopian tube and primary peritoneal cancers), and to a lesser extent other cancers such as prostate cancer, pancreatic cancer, and melanoma primarily in individuals with a BRCA2 pathogenic variant. The exact cancer risks differ slightly depending on whether HBOC is caused by a BRCA1 or BRCA2 pathogenic variant.</Attribute><XRef ID=\"NBK1247\" DB=\"GeneReviews\" /></AttributeSet><AttributeSet><Attribute Type=\"disease mechanism\" integerValue=\"273\">loss of function</Attribute><XRef ID=\"GTR000017874\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000017876\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000021468\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000021517\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000325401\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000325409\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000330054\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000500933\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000501981\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000502103\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000503369\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000505644\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000507653\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000507764\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000508566\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000508948\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000508956\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000509001\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000509002\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000509978\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000509980\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000509982\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000509983\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000509984\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000509985\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000512699\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000512816\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000514601\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000520071\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000520072\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000520394\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000520887\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000521499\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000521908\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000521909\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000522159\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000522160\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000522161\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000522278\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000522559\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000523320\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000527942\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000528915\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000528916\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000530028\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000530118\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000530120\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000530202\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000551440\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000551441\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000551448\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000552183\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000552245\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000552302\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000558503\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000558910\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000561859\" DB=\"Genetic Testing Registry (GTR)\" /><XRef ID=\"GTR000562228\" DB=\"Genetic Testing Registry (GTR)\" /></AttributeSet><Citation Type=\"review\" Abbrev=\"GeneReviews\"><ID Source=\"PubMed\">20301425</ID><ID Source=\"BookShelf\">NBK1247</ID></Citation><Citation Type=\"practice guideline\" Abbrev=\"ACS, 2007\"><ID Source=\"PubMed\">17392385</ID></Citation><Citation Type=\"Position Statement\" Abbrev=\"ASCO, 2010\"><ID Source=\"PubMed\">20065170</ID></Citation><Citation Type=\"Position Statement\" Abbrev=\"ASCO, 2003\"><ID Source=\"PubMed\">12692171</ID></Citation><Citation Type=\"practice guideline\" Abbrev=\"NSGC, 2004\"><ID Source=\"PubMed\">15604628</ID></Citation><Citation Type=\"review\" Abbrev=\"Stratton and Rahman, 2008\"><ID Source=\"PubMed\">18163131</ID></Citation><Citation Type=\"practice guideline\" Abbrev=\"NSGC, 2007\"><ID Source=\"PubMed\">17508274</ID></Citation><Citation Type=\"practice guideline\" Abbrev=\"ACOG, 2009\"><ID Source=\"PubMed\">19305347</ID></Citation><Citation Type=\"Recommendation\" Abbrev=\"ACMG, 2013\"><ID Source=\"PubMed\">23788249</ID></Citation><Citation Type=\"practice guideline\" Abbrev=\"NCCN, 2013\"><URL>http://www.nccn.org/professionals/physician_gls/pdf/genetics_screening.pdf</URL><CitationText>National Comprehensive Cancer Network practice guidelines in oncology. Genetic/Familial High-Risk Assessment: Breast and Ovarian</CitationText></Citation><Citation Type=\"Suggested Reading\" Abbrev=\"Phillips et al., 2013\"><ID Source=\"PubMed\">23918944</ID></Citation><Citation Type=\"Suggested Reading\" Abbrev=\"Domchek et al., 2010\"><ID Source=\"pmc\">2948529</ID></Citation><Citation Type=\"practice guideline\" Abbrev=\"ASCO, 2014\"><ID Source=\"PubMed\">24493721</ID></Citation><Citation Type=\"practice guideline\" Abbrev=\"USPSTF, 2014\"><ID Source=\"PubMed\">24366376</ID></Citation><Citation Type=\"practice guideline\" Abbrev=\"NSGC, 2013\"><ID Source=\"PubMed\">23188549</ID></Citation><Citation Type=\"Recommendation\" Abbrev=\"USPSTF, 2014\"><ID Source=\"PubMed\">24366402</ID></Citation><Citation Type=\"Recommendation\" Abbrev=\"USPSTF, 2013\"><ID Source=\"PubMed\">24432435</ID></Citation><Citation Type=\"Recommendation\" Abbrev=\"ACMG, 2015\"><ID Source=\"PubMed\">25356965</ID></Citation><Citation Type=\"Recommendation\" Abbrev=\"ACMG, 2016\"><ID Source=\"PubMed\">27854360</ID><ID Source=\"DOI\">10.1038/gim.2016.190</ID></Citation><XRef ID=\"D061325\" DB=\"MeSH\" /><XRef ID=\"C0677776\" DB=\"MedGen\" /><XRef ID=\"145\" DB=\"Orphanet\" /><XRef Type=\"Phenotypic series\" ID=\"PS604370\" DB=\"OMIM\" /></Trait></TraitSet></ReferenceClinVarAssertion><ClinVarAssertion ID=\"123376\" SubmissionName=\"SUB3864685\"><ClinVarSubmissionID localKey=\"1157271|MedGen:C0677776\" submittedAssembly=\"GRCh37\" submitter=\"Invitae\" submitterDate=\"2018-04-02\" /><ClinVarAccession Acc=\"SCV000075446\" Version=\"6\" Type=\"SCV\" OrgID=\"500031\" OrganizationCategory=\"laboratory\" OrgType=\"primary\" DateUpdated=\"2019-08-03\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2017-11-06\"><ReviewStatus>criteria provided, single submitter</ReviewStatus><Description>Uncertain significance</Description><Citation><ID Source=\"PubMed\">23239986</ID></Citation><Comment>This sequence change falls in intron 3 of the BRCA1 gene. It does not directly change the encoded amino acid sequence of the BRCA1 protein. This variant is present in population databases (rs80358085, ExAC 0.002%). This variant has been reported in an individual with a personal and family history of breast cancer (PMID: 23239986). This variant is also known as IVS4-18T&gt;G in the literature. ClinVar contains an entry for this variant (Variation ID: 54214). Experimental studies have shown that this variant modestly increases skipping of exon 4 (also known as exon 5), but exon 4 skipping has also been observed in the wild-type controls at a low level (PMID: 23239986). In summary, the available evidence is currently insufficient to determine the role of this variant in disease. Therefore, it has been classified as a Variant of Uncertain Significance.</Comment></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ExternalID DB=\"Invitae\" ID=\"1157271\" /><AttributeSet><Attribute Type=\"AssertionMethod\">Nykamp K et al. (Genet Med 2017)</Attribute><Citation><ID Source=\"PubMed\">28492532</ID></Citation></AttributeSet><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><AttributeSet><Attribute Type=\"HGVS\">NM_007294.3:c.135-18T&gt;G</Attribute></AttributeSet><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">BRCA1</ElementValue></Symbol></MeasureRelationship></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><XRef DB=\"MedGen\" ID=\"C0677776\" Type=\"CUI\" /></Trait></TraitSet></ClinVarAssertion><ClinVarAssertion ID=\"1793001\" SubmissionName=\"SUB5257857\"><ClinVarSubmissionID localKey=\"NM_007294.3:c.135-18T&gt;G|Hereditary breast and ovarian cancer syndrome\" submittedAssembly=\"GRCh37\" submitter=\"Hereditary Cancer Genetics group,Vall d'Hebron Institute of Oncology\" submitterDate=\"2019-03-01\" /><ClinVarAccession Acc=\"SCV000916380\" Version=\"1\" Type=\"SCV\" OrgID=\"506912\" OrganizationCategory=\"laboratory\" OrgType=\"primary\" DateUpdated=\"2019-10-02\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2019-03-01\"><ReviewStatus>no assertion criteria provided</ReviewStatus><Description>Uncertain significance</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>yes</AffectedStatus><Gender>female</Gender></Sample><Method><MethodType>research</MethodType></Method><ObservedData><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><AttributeSet><Attribute Type=\"Location\">NM_007294.3:intron 3</Attribute></AttributeSet><AttributeSet><Attribute Type=\"HGVS\">NM_007294.3:c.135-18T&gt;G</Attribute></AttributeSet><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">BRCA1</ElementValue></Symbol></MeasureRelationship></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">Hereditary breast and ovarian cancer syndrome</ElementValue></Name></Trait></TraitSet><Citation Type=\"general\"><ID Source=\"PubMed\">30472649</ID></Citation></ClinVarAssertion></ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000169296.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\" xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n<ClinVarSet ID=\"47180809\"><RecordStatus>current</RecordStatus><Title>NM_000520.6(HEXA):c.986+3A&gt;G AND Tay-Sachs disease</Title><ReferenceClinVarAssertion DateCreated=\"2015-03-29\" DateLastUpdated=\"2019-11-02\" ID=\"399359\"><ClinVarAccession Acc=\"RCV000169296\" Version=\"5\" Type=\"RCV\" DateUpdated=\"2019-11-02\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2018-09-19\"><ReviewStatus>criteria provided, multiple submitters, no conflicts</ReviewStatus><Description>Pathogenic/Likely pathogenic</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><AttributeSet><Attribute Type=\"ModeOfInheritance\" integerValue=\"263\">Autosomal recessive inheritance</Attribute><XRef ID=\"28566\" DB=\"Counsyl\" /></AttributeSet><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData ID=\"48060286\"><Attribute Type=\"Description\">not provided</Attribute></ObservedData><ObservedData ID=\"48060286\"><Attribute Type=\"Description\">not provided</Attribute></ObservedData><ObservedData ID=\"48060286\"><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><ObservedIn><Sample><Origin>maternal</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus><NumberTested>1</NumberTested></Sample><Method><MethodType>research</MethodType></Method><ObservedData ID=\"48062139\"><Attribute integerValue=\"1\" Type=\"VariantAlleles\" /></ObservedData><ObservedData ID=\"48062139\"><Attribute Type=\"SampleLocalID\">15:72641417:T:00364C</Attribute></ObservedData></ObservedIn><ObservedIn><Sample><Origin>unknown</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus><NumberTested>1</NumberTested></Sample><Method><MethodType>research</MethodType></Method><Method><MethodType>literature only</MethodType></Method><ObservedData ID=\"48062140\"><Attribute integerValue=\"1\" Type=\"VariantAlleles\" /></ObservedData><ObservedData ID=\"48062140\"><Attribute Type=\"Description\">not provided</Attribute></ObservedData><ObservedData ID=\"48062140\"><Attribute Type=\"SampleLocalID\">15:72641417:T:00364M</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\" ID=\"188929\" Acc=\"VCV000188929\" Version=\"4\"><Measure Type=\"single nucleotide variant\" ID=\"186936\"><Name><ElementValue Type=\"Preferred\">NM_000520.6(HEXA):c.986+3A&gt;G</ElementValue></Name><AttributeSet><Attribute Accession=\"NM_001318825\" Version=\"2\" Change=\"c.1019+3A&gt;G\" Type=\"HGVS, coding, RefSeq\">NM_001318825.2:c.1019+3A&gt;G</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_000520\" Version=\"6\" Change=\"c.986+3A&gt;G\" Type=\"HGVS, coding, RefSeq\">NM_000520.6:c.986+3A&gt;G</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NG_009017\" Version=\"1\" Change=\"g.32104A&gt;G\" Type=\"HGVS, genomic, RefSeqGene\">NG_009017.1:g.32104A&gt;G</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NG_009017\" Version=\"2\" Change=\"g.32104A&gt;G\" Type=\"HGVS, genomic, RefSeqGene\">NG_009017.2:g.32104A&gt;G</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000015\" Version=\"10\" Change=\"g.72349076T&gt;C\" Type=\"HGVS, genomic, top level\" integerValue=\"38\">NC_000015.10:g.72349076T&gt;C</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000015\" Version=\"9\" Change=\"g.72641417T&gt;C\" Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">NC_000015.9:g.72641417T&gt;C</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_000520\" Version=\"4\" Change=\"c.986+3A&gt;G\" Type=\"HGVS, previous\">NM_000520.4:c.986+3A&gt;G</Attribute></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">intron variant</Attribute><XRef ID=\"SO:0001627\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_000520.6:c.986+3A&gt;G\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">intron variant</Attribute><XRef ID=\"SO:0001627\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_001318825.2:c.1019+3A&gt;G\" DB=\"RefSeq\" /></AttributeSet><AlleleFrequencyList><AlleleFrequency Value=\"0.00008\" Source=\"NHLBI Exome Sequencing Project (ESP) Exome Variant Server\" /><AlleleFrequency Value=\"0.00020\" Source=\"1000 Genomes Project\" /><AlleleFrequency Value=\"0.00002\" Source=\"Exome Aggregation Consortium (ExAC)\" /><AlleleFrequency Value=\"0.00002\" Source=\"The Genome Aggregation Database (gnomAD), exomes\" /><AlleleFrequency Value=\"0.00004\" Source=\"Trans-Omics for Precision Medicine (TOPMed)\" /></AlleleFrequencyList><GlobalMinorAlleleFrequency Value=\"0.00020\" Source=\"1000 Genomes Project\" MinorAllele=\"C\" /><CytogeneticLocation>15q23</CytogeneticLocation><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"15\" Accession=\"NC_000015.10\" start=\"72349076\" stop=\"72349076\" display_start=\"72349076\" display_stop=\"72349076\" variantLength=\"1\" positionVCF=\"72349076\" referenceAlleleVCF=\"T\" alternateAlleleVCF=\"C\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"15\" Accession=\"NC_000015.9\" start=\"72641417\" stop=\"72641417\" display_start=\"72641417\" display_stop=\"72641417\" variantLength=\"1\" positionVCF=\"72641417\" referenceAlleleVCF=\"T\" alternateAlleleVCF=\"C\" /><MeasureRelationship Type=\"within single gene\"><Name><ElementValue Type=\"Preferred\">hexosaminidase subunit alpha</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">HEXA</ElementValue></Symbol><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"15\" Accession=\"NC_000015.10\" start=\"72340924\" stop=\"72376014\" display_start=\"72340924\" display_stop=\"72376014\" Strand=\"-\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"15\" Accession=\"NC_000015.9\" start=\"72635777\" stop=\"72668519\" display_start=\"72635777\" display_stop=\"72668519\" variantLength=\"32743\" Strand=\"-\" /><XRef ID=\"3073\" DB=\"Gene\" /><XRef Type=\"MIM\" ID=\"606869\" DB=\"OMIM\" /><XRef ID=\"HGNC:4878\" DB=\"HGNC\" /></MeasureRelationship><XRef ID=\"1360913\" DB=\"Illumina Clinical Services Laboratory,Illumina\" /><XRef Type=\"rs\" ID=\"200926928\" DB=\"dbSNP\" /></Measure><Name><ElementValue Type=\"Preferred\">NM_000520.6(HEXA):c.986+3A&gt;G</ElementValue></Name><XRef ID=\"CA274144\" DB=\"ClinGen\" /></MeasureSet><TraitSet Type=\"Disease\" ID=\"1067\"><Trait ID=\"6027\" Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">Tay-Sachs disease</ElementValue><XRef ID=\"Tay+Sachs+disease/7008\" DB=\"Genetic Alliance\" /><XRef ID=\"tay-sachs-disease\" DB=\"Genetics Home Reference\" /><XRef ID=\"111385000\" DB=\"SNOMED CT\" /></Name><Symbol><ElementValue Type=\"Preferred\">TSD</ElementValue><XRef Type=\"MIM\" ID=\"272800\" DB=\"OMIM\" /><XRef ID=\"7737\" DB=\"Office of Rare Diseases\" /></Symbol><AttributeSet><Attribute Type=\"public definition\">Hexosaminidase A deficiency results in a group of neurodegenerative disorders caused by intralysosomal storage of the specific glycosphingolipid, GM2 ganglioside. The prototype hexosaminidase A deficiency is Tay-Sachs disease, also known as the acute infantile variant. Tay-Sachs disease is characterized by progressive weakness, loss of motor skills, decreased attentiveness, and increased startle response beginning between ages three and six months with progressive evidence of neurodegeneration including: seizures, blindness, spasticity, eventual total incapacitation, and death, usually before age four years. The juvenile (subacute), chronic, and adult-onset variants of hexosaminidase A deficiency have later onsets, slower progression, and more variable neurologic findings, including: progressive dystonia, spinocerebellar degeneration, motor neuron disease, and, in some individuals with adult-onset disease, a bipolar form of psychosis.</Attribute><XRef ID=\"NBK1218\" DB=\"GeneReviews\" /></AttributeSet><Citation Type=\"review\" Abbrev=\"GeneReviews\"><ID Source=\"PubMed\">20301397</ID><ID Source=\"BookShelf\">NBK1218</ID></Citation><Citation Type=\"practice guideline\" Abbrev=\"ACMG, 2008\"><ID Source=\"PubMed\">18197057</ID></Citation><Citation Type=\"practice guideline\" Abbrev=\"ACOG, 2009\"><ID Source=\"PubMed\">19888064</ID></Citation><XRef ID=\"C0039373\" DB=\"MedGen\" /><XRef ID=\"845\" DB=\"Orphanet\" /><XRef Type=\"MIM\" ID=\"272800\" DB=\"OMIM\" /></Trait></TraitSet></ReferenceClinVarAssertion><ClinVarAssertion ID=\"397371\" SubmissionName=\"Counsyl_2015_03_01\"><ClinVarSubmissionID localKey=\"NM_000520.4(HEXA):c.986+3A&gt;G\" localKeyIsSubmitted=\"1\" submitter=\"Counsyl\" submitterDate=\"2015-03-11\" /><ClinVarAccession Acc=\"SCV000220615\" Version=\"1\" Type=\"SCV\" OrgID=\"320494\" OrganizationCategory=\"laboratory\" OrgType=\"primary\" DateUpdated=\"2019-03-31\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2014-08-21\"><ReviewStatus>criteria provided, single submitter</ReviewStatus><Description>Likely pathogenic</Description><Citation><ID Source=\"PubMed\">23035047</ID></Citation><Citation><ID Source=\"PubMed\">7551830</ID></Citation><Citation><ID Source=\"PubMed\">24518553</ID></Citation></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ExternalID DB=\"Counsyl\" ID=\"28566\" /><AttributeSet><Attribute Type=\"ModeOfInheritance\">Autosomal recessive inheritance</Attribute></AttributeSet><AttributeSet><Attribute Type=\"AssertionMethod\">Counsyl Autosomal and X-linked Recessive Disease Classification criteria (2015)</Attribute><Citation Type=\"general\"><URL>https://submit.ncbi.nlm.nih.gov/ft/byid/4yisoce9/mdi-5618_320494_counsyl_autosomal_and_x-linked_recessive_disease_classification_criteria_(2015).pdf</URL><CitationText>Counsyl Autosomal and X-linked Recessive Disease Classification criteria (2015)</CitationText></Citation></AttributeSet><ObservedIn><Sample><Origin>unknown</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus></Sample><Method><MethodType>literature only</MethodType></Method><ObservedData><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><AttributeSet><Attribute Type=\"HGVS\">NM_000520.4:c.986+3A&gt;G</Attribute></AttributeSet><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">HEXA</ElementValue></Symbol></MeasureRelationship></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">Tay-Sachs disease</ElementValue></Name><XRef DB=\"OMIM\" ID=\"272800\" Type=\"MIM\" /></Trait></TraitSet></ClinVarAssertion><ClinVarAssertion ID=\"1139226\" SubmissionName=\"SUB2844653\"><ClinVarSubmissionID localKey=\"15:72641417:T\" localKeyIsSubmitted=\"1\" submittedAssembly=\"GRCh37\" submitter=\"HudsonAlpha Institute for Biotechnology, HudsonAlpha Institute for Biotechnology\" submitterDate=\"2017-07-06\" /><ClinVarAccession Acc=\"SCV000584096\" Version=\"1\" Type=\"SCV\" OrgID=\"505530\" OrganizationCategory=\"laboratory\" OrgType=\"primary\" DateUpdated=\"2019-08-30\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2016-10-13\"><ReviewStatus>criteria provided, single submitter</ReviewStatus><Description>Pathogenic</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ExternalID DB=\"HudsonAlpha Institute for Biotechnology\" ID=\"15:72641417\" /><AttributeSet><Attribute Type=\"AssertionMethod\">HA_assertions_20150911</Attribute><Citation Type=\"general\"><URL>https://submit.ncbi.nlm.nih.gov/ft/byid/suocabs5/ha_assertions_20150911.pdf</URL><CitationText>HA_assertions_20150911.pdf</CitationText></Citation></AttributeSet><ObservedIn><Sample><Origin>unknown</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus><NumberTested>1</NumberTested></Sample><Method><MethodType>research</MethodType></Method><ObservedData><Attribute Type=\"VariantAlleles\" integerValue=\"1\" /></ObservedData><ObservedData><Attribute Type=\"SampleLocalID\">15:72641417:T:00364M</Attribute></ObservedData></ObservedIn><ObservedIn><Sample><Origin>maternal</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus><NumberTested>1</NumberTested></Sample><Method><MethodType>research</MethodType></Method><ObservedData><Attribute Type=\"VariantAlleles\" integerValue=\"1\" /></ObservedData><ObservedData><Attribute Type=\"SampleLocalID\">15:72641417:T:00364C</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><SequenceLocation Assembly=\"GRCh37\" Chr=\"15\" alternateAllele=\"C\" referenceAllele=\"T\" start=\"72641417\" stop=\"72641417\" variantLength=\"1\" /><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">HEXA</ElementValue></Symbol></MeasureRelationship></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><XRef DB=\"OMIM\" ID=\"272800\" Type=\"MIM\" /></Trait></TraitSet><StudyName>CSER-HudsonAlpha</StudyName></ClinVarAssertion><ClinVarAssertion ID=\"1789761\" SubmissionName=\"SUB5118208\"><ClinVarSubmissionID localKey=\"1365825\" localKeyIsSubmitted=\"1\" submittedAssembly=\"GRCh37\" submitter=\"Illumina Clinical Services Laboratory,Illumina\" submitterDate=\"2019-02-01\" /><ClinVarAccession Acc=\"SCV000914693\" Version=\"1\" Type=\"SCV\" OrgID=\"504895\" OrganizationCategory=\"laboratory\" OrgType=\"primary\" DateUpdated=\"2019-05-24\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2017-09-08\"><ReviewStatus>criteria provided, single submitter</ReviewStatus><Description>Likely pathogenic</Description><Citation><ID Source=\"PubMed\">7551830</ID></Citation><Citation><ID Source=\"PubMed\">9150157</ID></Citation><Citation><ID Source=\"PubMed\">20100466</ID></Citation><Citation><ID Source=\"PubMed\">23035047</ID></Citation><Comment Type=\"public\">The HEXA c.986+3A&gt;G splice region variant has been reported in at least four studies in which it is found in a total of three patients in a compound heterozygous state and in a heterozygous state in one obligate carrier (Richard et al. 1995; Akerman et al. 1997; Giraud et al. 2010; Saunders et al. 2012). One compound heterozygote presented with the late-infantile onset form of Tay-Sachs disease while another compound heterozygote had onset in adulthood. Control data are unavailable for this variant which is reported at a frequency of 0.000025 in the total population from the Exome Aggregation Consortium. In one study, exons 7 to 9 of the c.986+3A&gt;G variant HEXA were amplified and the resulting mRNA product was shown to be lacking exon 8, suggesting this variant affects splicing (Richard et al. 1995). Based on the evidence, the c.986+3A&gt;G variant is classified as likely pathogenic for hexoaminidase A deficiency. This variant was observed by ICSL as part of a predisposition screen in an ostensibly healthy population.</Comment></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ExternalID DB=\"Illumina Clinical Services Laboratory\" ID=\"1360913\" /><AttributeSet><Attribute Type=\"AssertionMethod\">ICSL Variant Classification Criteria 09 May 2019</Attribute><Citation><URL>https://submit.ncbi.nlm.nih.gov/ft/byid/thsgk7t4/icsl_variant_classification_criteria_09_may_2019.pdf</URL></Citation></AttributeSet><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><AttributeSet><Attribute Type=\"HGVS\">NM_000520.4:c.986+3A&gt;G</Attribute></AttributeSet><SequenceLocation Assembly=\"GRCh37\" Chr=\"15\" alternateAllele=\"C\" referenceAllele=\"T\" start=\"72641417\" stop=\"72641417\" variantLength=\"1\" /><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">HEXA</ElementValue></Symbol></MeasureRelationship></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><XRef DB=\"OMIM\" ID=\"272800\" Type=\"MIM\" /></Trait></TraitSet></ClinVarAssertion><ClinVarAssertion ID=\"1796126\" SubmissionName=\"SUB5494893\"><ClinVarSubmissionID localKey=\"NM_000520.4:c.986+3A&gt;G|MedGen:C0039373\" submittedAssembly=\"GRCh37\" submitter=\"Integrated Genetics/Laboratory Corporation of America\" submitterDate=\"2019-04-24\" /><ClinVarAccession Acc=\"SCV000919505\" Version=\"1\" Type=\"SCV\" OrgID=\"500026\" OrganizationCategory=\"laboratory\" OrgType=\"primary\" DateUpdated=\"2019-05-31\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2018-04-09\"><ReviewStatus>criteria provided, single submitter</ReviewStatus><Description>Pathogenic</Description><Citation><ID Source=\"PubMed\">24518553</ID></Citation><Citation><ID Source=\"PubMed\">20100466</ID></Citation><Citation><ID Source=\"PubMed\">9150157</ID></Citation><Citation><ID Source=\"PubMed\">7551830</ID></Citation><Citation><ID Source=\"PubMed\">23035047</ID></Citation><Comment Type=\"public\">Variant summary: HEXA c.986+3A&gt;G alters a conserved nucleotide located close to a canonical splice site and therefore could affect mRNA splicing, leading to a significantly altered protein sequence. Several computational tools predict a significant impact on normal splicing: Four predict the variant weakens a 5' donor site. A publication, Richard_1995, functionally assessed the variant and found it to cause exon 8 to be deleted, which is located in the Glycoside hydrolase family 20, catalytic domain (via InterPro). The variant was observed with an allele frequency of 8.1e-06 in 246182 control chromosomes (gnomAD). This frequency is not higher than expected for a pathogenic variant in HEXA causing Tay-Sachs Disease (8.1e-06 vs 0.0014), allowing no conclusion about variant significance. The variant, c.986+3A&gt;G, has been reported in the literature in individuals affected with Tay-Sachs Disease. These data indicate that the variant is likely to be associated with disease. A ClinVar submission from a clinical diagnostic laboratory (evaluation after 2014) cites the variant as \"likely pathogenic.\" Based on the evidence outlined above, the variant was classified as pathogenic.</Comment></ClinicalSignificance><Assertion Type=\"variation to disease\" /><AttributeSet><Attribute Type=\"AssertionMethod\">LabCorp Variant Classification Summary - May 2015</Attribute><Citation><URL>https://submit.ncbi.nlm.nih.gov/ft/byid/pttb9itm/labcorp_variant_classification_method_-_may_2015.pdf</URL></Citation></AttributeSet><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><AttributeSet><Attribute Type=\"HGVS\">NM_000520.4:c.986+3A&gt;G</Attribute></AttributeSet><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">HEXA</ElementValue></Symbol></MeasureRelationship></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">Tay-Sachs disease</ElementValue></Name><XRef DB=\"MedGen\" ID=\"C0039373\" Type=\"CUI\" /></Trait></TraitSet></ClinVarAssertion><ClinVarAssertion ID=\"1831704\" SubmissionName=\"SUB5371970\"><ClinVarSubmissionID localKey=\"26592|MedGen:C0039373\" submittedAssembly=\"GRCh37\" submitter=\"Invitae\" submitterDate=\"2019-03-28\" /><ClinVarAccession Acc=\"SCV000937827\" Version=\"1\" Type=\"SCV\" OrgID=\"500031\" OrganizationCategory=\"laboratory\" OrgType=\"primary\" DateUpdated=\"2019-08-13\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2018-09-19\"><ReviewStatus>criteria provided, single submitter</ReviewStatus><Description>Pathogenic</Description><Citation><ID Source=\"PubMed\">7551830</ID></Citation><Citation><ID Source=\"PubMed\">24518553</ID></Citation><Citation><ID Source=\"PubMed\">23035047</ID></Citation><Comment>This sequence change falls in intron 8 of the HEXA gene. It does not directly change the encoded amino acid sequence of the HEXA protein, but it affects a nucleotide within the consensus splice site of the intron. This variant is present in population databases (rs200926928, ExAC 0.02%). This variant has been observed in a patient with Tay-Sachs disease (PMID: 7551830) and on the opposite chromosome (in trans) from a pathogenic variant in an individual affected with Tay-Sachs disease (PMID: 23035047). This finding is consistent with autosomal recessive inheritance, and suggests that this variant contributes to disease. This variant was also identified in a individual with Tay-Sachs disease in whom a second variant was not identified (PMID: 24518553). ClinVar contains an entry for this variant (Variation ID: 188929). Experimental studies have shown that this splice site variant leads to the skipping of exon 8 (PMID: 7551830). Nucleotide substitutions within the consensus splice site are a relatively common cause of aberrant splicing (PMID: 17576681, 9536098). Algorithms developed to predict the effect of sequence changes on RNA splicing suggest that this variant may disrupt the consensus splice site, but this prediction has not been confirmed by published transcriptional studies. For these reasons, this variant has been classified as Pathogenic.</Comment></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ExternalID DB=\"Invitae\" ID=\"26592\" /><AttributeSet><Attribute Type=\"AssertionMethod\">Invitae Variant Classification Sherloc (09022015)</Attribute><Citation><ID Source=\"PubMed\">28492532</ID></Citation></AttributeSet><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><AttributeSet><Attribute Type=\"HGVS\">NM_000520.4:c.986+3A&gt;G</Attribute></AttributeSet><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">HEXA</ElementValue></Symbol></MeasureRelationship></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">Tay-Sachs disease</ElementValue></Name><XRef DB=\"MedGen\" ID=\"C0039373\" Type=\"CUI\" /></Trait></TraitSet></ClinVarAssertion></ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000170338.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\"\n            xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n    <ClinVarSet ID=\"47181788\">\n        <RecordStatus>current</RecordStatus>\n        <Title>NM_024537.3(CARS2):c.[649_651delGAG];[752C&gt;T] AND Alpers encephalopathy</Title>\n        <ReferenceClinVarAssertion DateCreated=\"2015-05-06\" DateLastUpdated=\"2019-11-02\" ID=\"404986\">\n            <ClinVarAccession Acc=\"RCV000170338\" Version=\"1\" Type=\"RCV\" DateUpdated=\"2019-11-02\"/>\n            <RecordStatus>current</RecordStatus>\n            <ClinicalSignificance DateLastEvaluated=\"2014-01-17\">\n                <ReviewStatus>criteria provided, single submitter</ReviewStatus>\n                <Description>Pathogenic</Description>\n            </ClinicalSignificance>\n            <Assertion Type=\"variation to disease\"/>\n            <AttributeSet>\n                <Attribute Type=\"ModeOfInheritance\" integerValue=\"263\">Autosomal recessive inheritance</Attribute>\n            </AttributeSet>\n            <ObservedIn>\n                <Sample>\n                    <Origin>germline</Origin>\n                    <Species TaxonomyId=\"9606\">human</Species>\n                    <AffectedStatus>yes</AffectedStatus>\n                    <NumberTested>2</NumberTested>\n                    <FamilyData NumFamilies=\"2\"/>\n                </Sample>\n                <Method>\n                    <NamePlatform>HiSeq2000</NamePlatform>\n                    <Description>Trio-based Whole Exome Sequencing using proband and his unaffected parents\n                    </Description>\n                    <MethodType>research</MethodType>\n                </Method>\n                <ObservedData ID=\"48073638\">\n                    <Attribute integerValue=\"2\" Type=\"VariantAlleles\"/>\n                </ObservedData>\n                <ObservedData ID=\"48073638\">\n                    <Attribute integerValue=\"0\" Type=\"VariantChromosomes\"/>\n                </ObservedData>\n                <ObservedData ID=\"48073638\">\n                    <Attribute integerValue=\"0\" Type=\"NumberMosaic\"/>\n                </ObservedData>\n                <ObservedData ID=\"48073638\">\n                    <Attribute Type=\"FamilyHistory\">No</Attribute>\n                </ObservedData>\n                <ObservedData ID=\"48073638\">\n                    <Attribute integerValue=\"1\" Type=\"NumFamiliesWithVariant\"/>\n                </ObservedData>\n                <ObservedData ID=\"48073638\">\n                    <Attribute Type=\"FamilyHistory\">No</Attribute>\n                </ObservedData>\n                <ObservedData ID=\"48073638\">\n                    <Attribute integerValue=\"1\" Type=\"NumFamiliesWithVariant\"/>\n                </ObservedData>\n                <ObservedData ID=\"48073638\">\n                    <Attribute integerValue=\"1\" Type=\"NumFamiliesWithSegregationObserved\"/>\n                </ObservedData>\n                <ObservedData ID=\"48073638\">\n                    <Attribute integerValue=\"0\" Type=\"VariantChromosomes\"/>\n                </ObservedData>\n                <ObservedData ID=\"48073638\">\n                    <Attribute integerValue=\"0\" Type=\"NumberMosaic\"/>\n                </ObservedData>\n            </ObservedIn>\n            <GenotypeSet Type=\"CompoundHeterozygote\" ID=\"424709\" Acc=\"VCV000424709\" Version=\"1\">\n                <MeasureSet Type=\"Variant\" ID=\"218177\" Acc=\"VCV000218177\" Version=\"1\" NumberOfChromosomes=\"1\">\n                    <Measure Type=\"Deletion\" ID=\"167501\">\n                        <Name>\n                            <ElementValue Type=\"Preferred\">NM_024537.4(CARS2):c.649_651del (p.Glu217del)</ElementValue>\n                        </Name>\n                        <AttributeSet>\n                            <Attribute Accession=\"NM_001352252\" Version=\"1\" Change=\"c.-138_-136del\"\n                                       Type=\"HGVS, coding, RefSeq\">NM_001352252.1:c.-138_-136del\n                            </Attribute>\n                        </AttributeSet>\n                        <AttributeSet>\n                            <Attribute Accession=\"NM_001352253\" Version=\"2\" Change=\"c.649_651del\"\n                                       Type=\"HGVS, coding, RefSeq\">NM_001352253.2:c.649_651del\n                            </Attribute>\n                        </AttributeSet>\n                        <AttributeSet>\n                            <Attribute Accession=\"NM_024537\" Version=\"4\" Change=\"c.649_651del\"\n                                       Type=\"HGVS, coding, RefSeq\">NM_024537.4:c.649_651del\n                            </Attribute>\n                        </AttributeSet>\n                        <AttributeSet>\n                            <Attribute Accession=\"NG_042045\" Version=\"1\" Change=\"g.28124_28126del\"\n                                       Type=\"HGVS, genomic, RefSeqGene\">NG_042045.1:g.28124_28126del\n                            </Attribute>\n                        </AttributeSet>\n                        <AttributeSet>\n                            <Attribute Accession=\"NG_042045\" Version=\"2\" Change=\"g.35545_35547del\"\n                                       Type=\"HGVS, genomic, RefSeqGene\">NG_042045.2:g.35545_35547del\n                            </Attribute>\n                        </AttributeSet>\n                        <AttributeSet>\n                            <Attribute Accession=\"NC_000013\" Version=\"11\" Change=\"g.110683055_110683057del\"\n                                       Type=\"HGVS, genomic, top level\" integerValue=\"38\">\n                                NC_000013.11:g.110683055_110683057del\n                            </Attribute>\n                        </AttributeSet>\n                        <AttributeSet>\n                            <Attribute Accession=\"NC_000013\" Version=\"10\" Change=\"g.111335402_111335404del\"\n                                       Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">\n                                NC_000013.10:g.111335402_111335404del\n                            </Attribute>\n                        </AttributeSet>\n                        <AttributeSet>\n                            <Attribute Accession=\"NR_147941\" Version=\"1\" Change=\"n.620_622del\" Type=\"HGVS, non-coding\">\n                                NR_147941.1:n.620_622del\n                            </Attribute>\n                        </AttributeSet>\n                        <AttributeSet>\n                            <Attribute Accession=\"NM_024537\" Version=\"2\" Change=\"c.649_651delGAG\" Type=\"HGVS, previous\">\n                                NM_024537.2:c.649_651delGAG\n                            </Attribute>\n                        </AttributeSet>\n                        <AttributeSet>\n                            <Attribute Accession=\"NR_147942\" Version=\"1\" Change=\"n.736_738del\" Type=\"HGVS, previous\">\n                                NR_147942.1:n.736_738del\n                            </Attribute>\n                        </AttributeSet>\n                        <AttributeSet>\n                            <Attribute Accession=\"NP_001339182\" Version=\"1\" Change=\"p.Glu217del\"\n                                       Type=\"HGVS, protein, RefSeq\">NP_001339182.1:p.Glu217del\n                            </Attribute>\n                        </AttributeSet>\n                        <AttributeSet>\n                            <Attribute Accession=\"NP_078813\" Version=\"1\" Change=\"p.Glu217del\"\n                                       Type=\"HGVS, protein, RefSeq\">NP_078813.1:p.Glu217del\n                            </Attribute>\n                        </AttributeSet>\n                        <AttributeSet>\n                            <Attribute Type=\"MolecularConsequence\">5 prime UTR variant</Attribute>\n                            <XRef ID=\"SO:0001623\" DB=\"Sequence Ontology\"/>\n                            <XRef ID=\"NM_001352252.1:c.-138_-136del\" DB=\"RefSeq\"/>\n                        </AttributeSet>\n                        <AttributeSet>\n                            <Attribute Type=\"MolecularConsequence\">inframe_deletion</Attribute>\n                            <XRef ID=\"SO:0001822\" DB=\"Sequence Ontology\"/>\n                            <XRef ID=\"NM_001352253.2:c.649_651del\" DB=\"RefSeq\"/>\n                        </AttributeSet>\n                        <AttributeSet>\n                            <Attribute Type=\"MolecularConsequence\">inframe_deletion</Attribute>\n                            <XRef ID=\"SO:0001822\" DB=\"Sequence Ontology\"/>\n                            <XRef ID=\"NM_024537.4:c.649_651del\" DB=\"RefSeq\"/>\n                        </AttributeSet>\n                        <AttributeSet>\n                            <Attribute Type=\"MolecularConsequence\">non-coding transcript variant</Attribute>\n                            <XRef ID=\"SO:0001619\" DB=\"Sequence Ontology\"/>\n                            <XRef ID=\"NR_147941.1:n.620_622del\" DB=\"RefSeq\"/>\n                        </AttributeSet>\n                        <AttributeSet>\n                            <Attribute Type=\"ProteinChange1LetterCode\">E217del</Attribute>\n                        </AttributeSet>\n                        <AlleleFrequencyList>\n                            <AlleleFrequency Value=\"0.00002\" Source=\"Exome Aggregation Consortium (ExAC)\"/>\n                            <AlleleFrequency Value=\"0.00003\" Source=\"The Genome Aggregation Database (gnomAD), exomes\"/>\n                            <AlleleFrequency Value=\"0.00002\" Source=\"Trans-Omics for Precision Medicine (TOPMed)\"/>\n                        </AlleleFrequencyList>\n                        <CytogeneticLocation>13q34</CytogeneticLocation>\n                        <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\"\n                                          AssemblyStatus=\"current\" Chr=\"13\" Accession=\"NC_000013.11\" start=\"110683055\"\n                                          stop=\"110683057\" display_start=\"110683055\" display_stop=\"110683057\"\n                                          variantLength=\"3\" positionVCF=\"110683054\" referenceAlleleVCF=\"GCTC\"\n                                          alternateAlleleVCF=\"G\"/>\n                        <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\"\n                                          AssemblyStatus=\"previous\" Chr=\"13\" Accession=\"NC_000013.10\" start=\"111335402\"\n                                          stop=\"111335404\" display_start=\"111335402\" display_stop=\"111335404\"\n                                          variantLength=\"3\" positionVCF=\"111335401\" referenceAlleleVCF=\"GCTC\"\n                                          alternateAlleleVCF=\"G\"/>\n                        <MeasureRelationship Type=\"within single gene\">\n                            <Name>\n                                <ElementValue Type=\"Preferred\">cysteinyl-tRNA synthetase 2, mitochondrial</ElementValue>\n                            </Name>\n                            <Symbol>\n                                <ElementValue Type=\"Preferred\">CARS2</ElementValue>\n                            </Symbol>\n                            <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\"\n                                              AssemblyStatus=\"current\" Chr=\"13\" Accession=\"NC_000013.11\"\n                                              start=\"110641410\" stop=\"110713601\" display_start=\"110641410\"\n                                              display_stop=\"110713601\" Strand=\"-\"/>\n                            <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\"\n                                              AssemblyStatus=\"previous\" Chr=\"13\" Accession=\"NC_000013.10\"\n                                              start=\"111293756\" stop=\"111358479\" display_start=\"111293756\"\n                                              display_stop=\"111358479\" variantLength=\"64724\" Strand=\"-\"/>\n                            <XRef ID=\"79587\" DB=\"Gene\"/>\n                            <XRef Type=\"MIM\" ID=\"612800\" DB=\"OMIM\"/>\n                            <XRef ID=\"HGNC:25695\" DB=\"HGNC\"/>\n                        </MeasureRelationship>\n                        <Citation Type=\"general\">\n                            <ID Source=\"PubMed\">25787132</ID>\n                        </Citation>\n                        <XRef Type=\"Allelic variant\" ID=\"612800.0002\" DB=\"OMIM\"/>\n                        <XRef Type=\"rs\" ID=\"753472937\" DB=\"dbSNP\"/>\n                    </Measure>\n                    <Name>\n                        <ElementValue Type=\"Preferred\">NM_024537.2(CARS2):c.649_651delGAG</ElementValue>\n                    </Name>\n                    <AttributeSet>\n                        <Attribute Type=\"HGVS, coding, RefSeq\" Change=\"c.649_651delGAG\">NM_024537.2:c.649_651delGAG\n                        </Attribute>\n                    </AttributeSet>\n                    <XRef ID=\"CA210828\" DB=\"ClinGen\"/>\n                </MeasureSet>\n                <MeasureSet Type=\"Variant\" ID=\"218178\" Acc=\"VCV000218178\" Version=\"1\" NumberOfChromosomes=\"1\">\n                    <Measure Type=\"single nucleotide variant\" ID=\"167502\">\n                        <Name>\n                            <ElementValue Type=\"Preferred\">NM_024537.4(CARS2):c.752C&gt;T (p.Pro251Leu)</ElementValue>\n                        </Name>\n                        <AttributeSet>\n                            <Attribute Accession=\"NM_001352252\" Version=\"1\" Change=\"c.-35C&gt;T\"\n                                       Type=\"HGVS, coding, RefSeq\">NM_001352252.1:c.-35C&gt;T\n                            </Attribute>\n                        </AttributeSet>\n                        <AttributeSet>\n                            <Attribute Accession=\"NM_001352253\" Version=\"2\" Change=\"c.752C&gt;T\"\n                                       Type=\"HGVS, coding, RefSeq\">NM_001352253.2:c.752C&gt;T\n                            </Attribute>\n                        </AttributeSet>\n                        <AttributeSet>\n                            <Attribute Accession=\"NM_024537\" Version=\"4\" Change=\"c.752C&gt;T\"\n                                       Type=\"HGVS, coding, RefSeq\">NM_024537.4:c.752C&gt;T\n                            </Attribute>\n                        </AttributeSet>\n                        <AttributeSet>\n                            <Attribute Accession=\"NG_042045\" Version=\"1\" Change=\"g.34174C&gt;T\"\n                                       Type=\"HGVS, genomic, RefSeqGene\">NG_042045.1:g.34174C&gt;T\n                            </Attribute>\n                        </AttributeSet>\n                        <AttributeSet>\n                            <Attribute Accession=\"NG_042045\" Version=\"2\" Change=\"g.41595C&gt;T\"\n                                       Type=\"HGVS, genomic, RefSeqGene\">NG_042045.2:g.41595C&gt;T\n                            </Attribute>\n                        </AttributeSet>\n                        <AttributeSet>\n                            <Attribute Accession=\"NC_000013\" Version=\"11\" Change=\"g.110677007G&gt;A\"\n                                       Type=\"HGVS, genomic, top level\" integerValue=\"38\">NC_000013.11:g.110677007G&gt;A\n                            </Attribute>\n                        </AttributeSet>\n                        <AttributeSet>\n                            <Attribute Accession=\"NC_000013\" Version=\"10\" Change=\"g.111329354G&gt;A\"\n                                       Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">\n                                NC_000013.10:g.111329354G&gt;A\n                            </Attribute>\n                        </AttributeSet>\n                        <AttributeSet>\n                            <Attribute Accession=\"NR_147941\" Version=\"1\" Change=\"n.723C&gt;T\" Type=\"HGVS, non-coding\">\n                                NR_147941.1:n.723C&gt;T\n                            </Attribute>\n                        </AttributeSet>\n                        <AttributeSet>\n                            <Attribute Accession=\"NM_024537\" Version=\"2\" Change=\"c.752C&gt;T\" Type=\"HGVS, previous\">\n                                NM_024537.2:c.752C&gt;T\n                            </Attribute>\n                        </AttributeSet>\n                        <AttributeSet>\n                            <Attribute Accession=\"NR_147942\" Version=\"1\" Change=\"n.839C&gt;T\" Type=\"HGVS, previous\">\n                                NR_147942.1:n.839C&gt;T\n                            </Attribute>\n                        </AttributeSet>\n                        <AttributeSet>\n                            <Attribute Accession=\"Q9HA77\" Change=\"p.Pro251Leu\" Type=\"HGVS, protein\">Q9HA77:p.Pro251Leu\n                            </Attribute>\n                        </AttributeSet>\n                        <AttributeSet>\n                            <Attribute Accession=\"NP_001339182\" Version=\"1\" Change=\"p.Pro251Leu\"\n                                       Type=\"HGVS, protein, RefSeq\">NP_001339182.1:p.Pro251Leu\n                            </Attribute>\n                        </AttributeSet>\n                        <AttributeSet>\n                            <Attribute Accession=\"NP_078813\" Version=\"1\" Change=\"p.Pro251Leu\"\n                                       Type=\"HGVS, protein, RefSeq\">NP_078813.1:p.Pro251Leu\n                            </Attribute>\n                        </AttributeSet>\n                        <AttributeSet>\n                            <Attribute Type=\"MolecularConsequence\">5 prime UTR variant</Attribute>\n                            <XRef ID=\"SO:0001623\" DB=\"Sequence Ontology\"/>\n                            <XRef ID=\"NM_001352252.1:c.-35C&gt;T\" DB=\"RefSeq\"/>\n                        </AttributeSet>\n                        <AttributeSet>\n                            <Attribute Type=\"MolecularConsequence\">missense variant</Attribute>\n                            <XRef ID=\"SO:0001583\" DB=\"Sequence Ontology\"/>\n                            <XRef ID=\"NM_001352253.2:c.752C&gt;T\" DB=\"RefSeq\"/>\n                        </AttributeSet>\n                        <AttributeSet>\n                            <Attribute Type=\"MolecularConsequence\">missense variant</Attribute>\n                            <XRef ID=\"SO:0001583\" DB=\"Sequence Ontology\"/>\n                            <XRef ID=\"NM_024537.4:c.752C&gt;T\" DB=\"RefSeq\"/>\n                        </AttributeSet>\n                        <AttributeSet>\n                            <Attribute Type=\"MolecularConsequence\">non-coding transcript variant</Attribute>\n                            <XRef ID=\"SO:0001619\" DB=\"Sequence Ontology\"/>\n                            <XRef ID=\"NR_147941.1:n.723C&gt;T\" DB=\"RefSeq\"/>\n                        </AttributeSet>\n                        <AttributeSet>\n                            <Attribute Type=\"ProteinChange1LetterCode\">P251L</Attribute>\n                        </AttributeSet>\n                        <AttributeSet>\n                            <Attribute Type=\"ProteinChange3LetterCode\">PRO251LEU</Attribute>\n                        </AttributeSet>\n                        <AlleleFrequencyList>\n                            <AlleleFrequency Value=\"0.00020\" Source=\"1000 Genomes Project\"/>\n                            <AlleleFrequency Value=\"0.00004\" Source=\"Exome Aggregation Consortium (ExAC)\"/>\n                            <AlleleFrequency Value=\"0.00002\" Source=\"The Genome Aggregation Database (gnomAD), exomes\"/>\n                            <AlleleFrequency Value=\"0.00001\" Source=\"Trans-Omics for Precision Medicine (TOPMed)\"/>\n                        </AlleleFrequencyList>\n                        <GlobalMinorAlleleFrequency Value=\"0.00020\" Source=\"1000 Genomes Project\" MinorAllele=\"A\"/>\n                        <CytogeneticLocation>13q34</CytogeneticLocation>\n                        <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\"\n                                          AssemblyStatus=\"current\" Chr=\"13\" Accession=\"NC_000013.11\" start=\"110677007\"\n                                          stop=\"110677007\" display_start=\"110677007\" display_stop=\"110677007\"\n                                          variantLength=\"1\" positionVCF=\"110677007\" referenceAlleleVCF=\"G\"\n                                          alternateAlleleVCF=\"A\"/>\n                        <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\"\n                                          AssemblyStatus=\"previous\" Chr=\"13\" Accession=\"NC_000013.10\" start=\"111329354\"\n                                          stop=\"111329354\" display_start=\"111329354\" display_stop=\"111329354\"\n                                          variantLength=\"1\" positionVCF=\"111329354\" referenceAlleleVCF=\"G\"\n                                          alternateAlleleVCF=\"A\"/>\n                        <MeasureRelationship Type=\"within single gene\">\n                            <Name>\n                                <ElementValue Type=\"Preferred\">cysteinyl-tRNA synthetase 2, mitochondrial</ElementValue>\n                            </Name>\n                            <Symbol>\n                                <ElementValue Type=\"Preferred\">CARS2</ElementValue>\n                            </Symbol>\n                            <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\"\n                                              AssemblyStatus=\"current\" Chr=\"13\" Accession=\"NC_000013.11\"\n                                              start=\"110641410\" stop=\"110713601\" display_start=\"110641410\"\n                                              display_stop=\"110713601\" Strand=\"-\"/>\n                            <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\"\n                                              AssemblyStatus=\"previous\" Chr=\"13\" Accession=\"NC_000013.10\"\n                                              start=\"111293756\" stop=\"111358479\" display_start=\"111293756\"\n                                              display_stop=\"111358479\" variantLength=\"64724\" Strand=\"-\"/>\n                            <XRef ID=\"79587\" DB=\"Gene\"/>\n                            <XRef Type=\"MIM\" ID=\"612800\" DB=\"OMIM\"/>\n                            <XRef ID=\"HGNC:25695\" DB=\"HGNC\"/>\n                        </MeasureRelationship>\n                        <Citation Type=\"general\">\n                            <ID Source=\"PubMed\">25787132</ID>\n                        </Citation>\n                        <XRef ID=\"Q9HA77#VAR_075669\" DB=\"UniProtKB\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"612800.0003\" DB=\"OMIM\"/>\n                        <XRef Type=\"rs\" ID=\"557671802\" DB=\"dbSNP\"/>\n                    </Measure>\n                    <Name>\n                        <ElementValue Type=\"Preferred\">NM_024537.2(CARS2):c.752C&gt;T</ElementValue>\n                    </Name>\n                    <AttributeSet>\n                        <Attribute Type=\"HGVS, coding, RefSeq\" Change=\"c.752C&gt;T\">NM_024537.2:c.752C&gt;T</Attribute>\n                    </AttributeSet>\n                    <XRef ID=\"CA211164\" DB=\"ClinGen\"/>\n                </MeasureSet>\n                <Name>\n                    <ElementValue Type=\"Preferred\">NM_024537.3(CARS2):c.[649_651delGAG];[752C&gt;T]</ElementValue>\n                </Name>\n                <AttributeSet>\n                    <Attribute Type=\"HGVS, coding, RefSeq\" Change=\"c.[649_651delGAG];[752C&gt;T]\">\n                        NM_024537.3:c.[649_651delGAG];[752C&gt;T]\n                    </Attribute>\n                </AttributeSet>\n            </GenotypeSet>\n            <TraitSet Type=\"Disease\" ID=\"20059\">\n                <Trait ID=\"32242\" Type=\"Disease\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">Alpers encephalopathy</ElementValue>\n                    </Name>\n                    <XRef ID=\"CN221139\" DB=\"MedGen\"/>\n                </Trait>\n            </TraitSet>\n        </ReferenceClinVarAssertion>\n        <ClinVarAssertion ID=\"339704\" SubmissionName=\"CARS2\">\n            <ClinVarSubmissionID localKey=\"NM_024537.2:c.[649_651delGAG];[752C&gt;T]|Alpers encephalopathy\"\n                                 submitter=\"Shaikh Laboratory, University of Colorado\" submitterDate=\"2014-10-23\"/>\n            <ClinVarAccession Acc=\"SCV000148105\" Version=\"2\" Type=\"SCV\" OrgID=\"505273\" OrganizationCategory=\"laboratory\"\n                              OrgType=\"primary\" DateUpdated=\"2019-08-03\"/>\n            <RecordStatus>current</RecordStatus>\n            <ClinicalSignificance DateLastEvaluated=\"2014-01-17\">\n                <ReviewStatus>criteria provided, single submitter</ReviewStatus>\n                <Description>Pathogenic</Description>\n                <Citation Type=\"general\">\n                    <ID Source=\"PubMed\">25787132</ID>\n                </Citation>\n            </ClinicalSignificance>\n            <Assertion Type=\"variation to disease\"/>\n            <AttributeSet>\n                <Attribute Type=\"ModeOfInheritance\">Autosomal recessive inheritance</Attribute>\n            </AttributeSet>\n            <AttributeSet>\n                <Attribute Type=\"AssertionMethod\">CARS2 assertion criteria</Attribute>\n                <Citation Type=\"general\">\n                    <ID Source=\"PubMed\">25787132</ID>\n                </Citation>\n            </AttributeSet>\n            <ObservedIn>\n                <Sample>\n                    <Origin>germline</Origin>\n                    <Species TaxonomyId=\"9606\">human</Species>\n                    <AffectedStatus>yes</AffectedStatus>\n                    <NumberTested>1</NumberTested>\n                    <Gender>male</Gender>\n                    <FamilyData NumFamilies=\"1\" NumFamiliesWithSegregationObserved=\"1\" NumFamiliesWithVariant=\"1\">\n                        <FamilyHistory>No</FamilyHistory>\n                    </FamilyData>\n                </Sample>\n                <Method>\n                    <NamePlatform>HiSeq2000</NamePlatform>\n                    <TypePlatform>Next-gen Whole Exome Sequencing</TypePlatform>\n                    <Description>Trio-based Whole Exome Sequencing using proband and his unaffected parents\n                    </Description>\n                    <Software name=\"BWA, Samtools, Galaxy\"/>\n                    <MethodType>research</MethodType>\n                </Method>\n                <ObservedData>\n                    <Attribute Type=\"VariantAlleles\" integerValue=\"1\"/>\n                </ObservedData>\n                <ObservedData>\n                    <Attribute Type=\"VariantChromosomes\" integerValue=\"0\"/>\n                </ObservedData>\n                <ObservedData>\n                    <Attribute Type=\"Homozygote\" integerValue=\"0\"/>\n                </ObservedData>\n                <ObservedData>\n                    <Attribute Type=\"SingleHeterozygote\" integerValue=\"0\"/>\n                </ObservedData>\n                <ObservedData>\n                    <Attribute Type=\"CompoundHeterozygote\" integerValue=\"1\"/>\n                </ObservedData>\n                <ObservedData>\n                    <Attribute Type=\"NumberMosaic\" integerValue=\"0\"/>\n                </ObservedData>\n                <TraitSet Type=\"Finding\">\n                    <Trait Type=\"Finding\">\n                        <Name>\n                            <ElementValue Type=\"Preferred\">combined mitochondrial respiratory chain deficiency|\n                                neurological regression| complex movement disorder| intractable seizures\n                            </ElementValue>\n                        </Name>\n                    </Trait>\n                </TraitSet>\n            </ObservedIn>\n            <GenotypeSet Type=\"CompoundHeterozygote\">\n                <MeasureSet Type=\"Variant\">\n                    <Measure Type=\"Variation\">\n                        <AttributeSet>\n                            <Attribute Type=\"HGVS\">NM_024537.2:c.649_651delGAG</Attribute>\n                        </AttributeSet>\n                        <SequenceLocation Assembly=\"GRCh37.p13\" Chr=\"13\" start=\"111335402\" stop=\"111335404\"\n                                          variantLength=\"3\"/>\n                        <MeasureRelationship Type=\"variant in gene\">\n                            <Symbol>\n                                <ElementValue Type=\"Preferred\">CARS2</ElementValue>\n                            </Symbol>\n                        </MeasureRelationship>\n                    </Measure>\n                    <AttributeSet>\n                        <Attribute Type=\"HGVS\">NM_024537.2:c.649_651delGAG</Attribute>\n                    </AttributeSet>\n                </MeasureSet>\n                <MeasureSet Type=\"Variant\">\n                    <Measure Type=\"Variation\">\n                        <AttributeSet>\n                            <Attribute Type=\"HGVS\">NM_024537.2:c.752C&gt;T</Attribute>\n                        </AttributeSet>\n                        <SequenceLocation Assembly=\"GRCh37.p13\" Chr=\"13\" start=\"111329354\"/>\n                        <MeasureRelationship Type=\"variant in gene\">\n                            <Symbol>\n                                <ElementValue Type=\"Preferred\">CARS2</ElementValue>\n                            </Symbol>\n                        </MeasureRelationship>\n                    </Measure>\n                    <AttributeSet>\n                        <Attribute Type=\"HGVS\">NM_024537.2:c.752C&gt;T</Attribute>\n                    </AttributeSet>\n                </MeasureSet>\n            </GenotypeSet>\n            <TraitSet Type=\"Disease\">\n                <Trait Type=\"Disease\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">Alpers encephalopathy</ElementValue>\n                    </Name>\n                </Trait>\n            </TraitSet>\n            <Citation Type=\"general\">\n                <ID Source=\"PubMed\">25787132</ID>\n            </Citation>\n            <Citation Type=\"general\">\n                <ID Source=\"PubMed\">25787132</ID>\n            </Citation>\n            <Comment>The patient has mitochondrial encephalopathy and a combined mitochondrial oxidative phosphorylation\n                deficiency. The child presented with neurological regression, complex movement disorder and intractable\n                seizures. A combined deficiency of mitochondrial complexes I, III, and IV was noted in liver tissue,\n                along with increased mitochondrial DNA content in skeletal muscle. Incomplete assembly of complex V was\n                noted on blue native polyacrylamide gel electrophoretic (BN-PAGE) analysis in skeletal muscle and skin\n                fibroblasts.\n            </Comment>\n        </ClinVarAssertion>\n    </ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000171474.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\"\n            xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n    <ClinVarSet ID=\"41090759\">\n        <RecordStatus>current</RecordStatus>\n        <Title>Single allele AND not provided</Title>\n        <ReferenceClinVarAssertion DateCreated=\"2015-05-29\" DateLastUpdated=\"2019-05-15\" ID=\"408375\">\n            <ClinVarAccession Acc=\"RCV000171474\" Version=\"1\" Type=\"RCV\" DateUpdated=\"2019-05-15\"/>\n            <RecordStatus>current</RecordStatus>\n            <ClinicalSignificance>\n                <ReviewStatus>no assertion criteria provided</ReviewStatus>\n                <Description>Likely pathogenic</Description>\n            </ClinicalSignificance>\n            <Assertion Type=\"variation to disease\"/>\n            <ObservedIn>\n                <Sample>\n                    <Origin>germline</Origin>\n                    <Species TaxonomyId=\"9606\">human</Species>\n                    <AffectedStatus>yes</AffectedStatus>\n                </Sample>\n                <Method>\n                    <MethodType>research</MethodType>\n                </Method>\n                <ObservedData ID=\"39583245\">\n                    <Attribute Type=\"Description\">not provided</Attribute>\n                </ObservedData>\n            </ObservedIn>\n            <MeasureSet Type=\"Variant\" ID=\"191283\" Acc=\"VCV000191283\" Version=\"1\">\n                <Measure Type=\"single nucleotide variant\" ID=\"189086\">\n                    <CytogeneticLocation>13q13.3</CytogeneticLocation>\n                    <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\"\n                                      AssemblyStatus=\"previous\" Chr=\"13\" Accession=\"NC_000013.10\" start=\"36888396\"\n                                      stop=\"36888396\" display_start=\"36888396\" display_stop=\"36888396\" variantLength=\"1\"\n                                      referenceAllele=\"-\" alternateAllele=\"T\"/>\n                    <MeasureRelationship Type=\"within single gene\">\n                        <Name>\n                            <ElementValue Type=\"Preferred\">spartin</ElementValue>\n                        </Name>\n                        <Symbol>\n                            <ElementValue Type=\"Preferred\">SPART</ElementValue>\n                        </Symbol>\n                        <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\"\n                                          AssemblyStatus=\"current\" Chr=\"13\" Accession=\"NC_000013.11\" start=\"36301638\"\n                                          stop=\"36370180\" display_start=\"36301638\" display_stop=\"36370180\" Strand=\"-\"/>\n                        <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\"\n                                          AssemblyStatus=\"previous\" Chr=\"13\" Accession=\"NC_000013.10\" start=\"36875774\"\n                                          stop=\"36944316\" display_start=\"36875774\" display_stop=\"36944316\"\n                                          variantLength=\"68543\" Strand=\"-\"/>\n                        <XRef ID=\"23111\" DB=\"Gene\"/>\n                        <XRef Type=\"MIM\" ID=\"607111\" DB=\"OMIM\"/>\n                        <XRef ID=\"HGNC:18514\" DB=\"HGNC\"/>\n                    </MeasureRelationship>\n                    <XRef Type=\"rs\" ID=\"730882198\" DB=\"dbSNP\"/>\n                </Measure>\n                <Name>\n                    <ElementValue Type=\"Preferred\">Single allele</ElementValue>\n                </Name>\n            </MeasureSet>\n            <TraitSet Type=\"Disease\" ID=\"9460\">\n                <Trait ID=\"17556\" Type=\"Disease\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">not provided</ElementValue>\n                        <XRef ID=\"13DG0619\"\n                              DB=\"Developmental Genetics Unit,King Faisal Specialist Hospital &amp; Research Centre\"/>\n                    </Name>\n                    <AttributeSet>\n                        <Attribute Type=\"public definition\">The term 'not provided' is registered in MedGen to support\n                            identification of submissions to ClinVar for which no condition was named when assessing the\n                            variant. 'not provided' differs from 'not specified', which is used when a variant is\n                            asserted to be benign, likely benign, or of uncertain significance for conditions that have\n                            not been specified.\n                        </Attribute>\n                    </AttributeSet>\n                    <XRef ID=\"CN517202\" DB=\"MedGen\"/>\n                </Trait>\n            </TraitSet>\n        </ReferenceClinVarAssertion>\n        <ClinVarAssertion ID=\"407940\">\n            <ClinVarSubmissionID localKey=\"SGP2014-1596-1|Not provided\"\n                                 submitter=\"Developmental Genetics Unit,King Faisal Specialist Hospital &amp; Research Centre\"\n                                 submitterDate=\"2015-04-14\"/>\n            <ClinVarAccession Acc=\"SCV000221673\" Version=\"1\" Type=\"SCV\" OrgID=\"500184\" OrganizationCategory=\"laboratory\"\n                              OrgType=\"primary\" DateUpdated=\"2019-03-31\"/>\n            <RecordStatus>current</RecordStatus>\n            <ClinicalSignificance>\n                <ReviewStatus>no assertion criteria provided</ReviewStatus>\n                <Description>Likely pathogenic</Description>\n            </ClinicalSignificance>\n            <Assertion Type=\"variation to disease\"/>\n            <ExternalID DB=\"Developmental Genetics Unit\" ID=\"SGP2014-1596-1\"/>\n            <AttributeSet>\n                <Attribute Type=\"AssertionMethod\">ACMG Guidelines, 2015</Attribute>\n                <Citation>\n                    <ID Source=\"PubMed\">25741868</ID>\n                </Citation>\n            </AttributeSet>\n            <ObservedIn>\n                <Sample>\n                    <Origin>germline</Origin>\n                    <Species TaxonomyId=\"9606\">human</Species>\n                    <AffectedStatus>yes</AffectedStatus>\n                    <Indication Type=\"Indication\">\n                        <Trait Type=\"Finding\">\n                            <Name>\n                                <ElementValue Type=\"Preferred\">Neurological phenotype</ElementValue>\n                            </Name>\n                        </Trait>\n                    </Indication>\n                </Sample>\n                <Method>\n                    <MethodType>research</MethodType>\n                </Method>\n                <ObservedData>\n                    <Attribute Type=\"Description\">not provided</Attribute>\n                </ObservedData>\n            </ObservedIn>\n            <MeasureSet Type=\"Variant\">\n                <Measure Type=\"Variation\">\n                    <SequenceLocation Assembly=\"GRCh37\" Chr=\"13\" alternateAllele=\"T\" referenceAllele=\"-\"\n                                      start=\"36888396\" stop=\"36888396\" variantLength=\"1\"/>\n                    <MeasureRelationship Type=\"variant in gene\">\n                        <Symbol>\n                            <ElementValue Type=\"Preferred\">SPG20</ElementValue>\n                        </Symbol>\n                    </MeasureRelationship>\n                </Measure>\n            </MeasureSet>\n            <TraitSet Type=\"Disease\">\n                <Trait Type=\"Disease\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">Not provided</ElementValue>\n                    </Name>\n                </Trait>\n            </TraitSet>\n        </ClinVarAssertion>\n    </ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000179026.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\" xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n<ClinVarSet ID=\"48753919\"><RecordStatus>current</RecordStatus><Title>NM_002617.3(PEX10):c.867_868insG (p.His290fs) AND not provided</Title><ReferenceClinVarAssertion DateCreated=\"2015-06-26\" DateLastUpdated=\"2019-12-17\" ID=\"427602\"><ClinVarAccession Acc=\"RCV000179026\" Version=\"1\" Type=\"RCV\" DateUpdated=\"2019-12-17\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2015-02-04\"><ReviewStatus>criteria provided, single submitter</ReviewStatus><Description>Pathogenic</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData ID=\"49721670\"><Attribute integerValue=\"1\" Type=\"VariantAlleles\" /></ObservedData></ObservedIn><MeasureSet Type=\"Variant\" ID=\"197886\" Acc=\"VCV000197886\" Version=\"1\"><Measure Type=\"Insertion\" ID=\"195047\"><Name><ElementValue Type=\"Preferred\">NM_002617.3(PEX10):c.867_868insG (p.His290fs)</ElementValue></Name><AttributeSet><Attribute Accession=\"NM_002617\" Version=\"3\" Change=\"c.867_868insG\" Type=\"HGVS, coding, RefSeq\">NM_002617.3:c.867_868insG</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_153818\" Version=\"1\" Change=\"c.927_928insG\" Type=\"HGVS, coding, RefSeq\">NM_153818.1:c.927_928insG</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NG_008342\" Version=\"1\" Change=\"g.11043_11044insG\" Type=\"HGVS, genomic, RefSeqGene\">NG_008342.1:g.11043_11044insG</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NG_016128\" Version=\"1\" Change=\"g.19754_19755insC\" Type=\"HGVS, genomic, RefSeqGene\">NG_016128.1:g.19754_19755insC</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000001\" Version=\"11\" Change=\"g.2406528_2406529insC\" Type=\"HGVS, genomic, top level\" integerValue=\"38\">NC_000001.11:g.2406528_2406529insC</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000001\" Version=\"10\" Change=\"g.2337967_2337968insC\" Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">NC_000001.10:g.2337967_2337968insC</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_002608\" Version=\"1\" Change=\"p.His290fs\" Type=\"HGVS, protein, RefSeq\">NP_002608.1:p.His290fs</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_722540\" Version=\"1\" Change=\"p.His310fs\" Type=\"HGVS, protein, RefSeq\">NP_722540.1:p.His310fs</Attribute></AttributeSet><AttributeSet><Attribute Type=\"Location\">NM_153818.1:exon 5</Attribute></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">frameshift variant</Attribute><XRef ID=\"SO:0001589\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_002617.3:c.867_868insG\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">frameshift variant</Attribute><XRef ID=\"SO:0001589\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_153818.1:c.927_928insG\" DB=\"RefSeq\" /></AttributeSet><CytogeneticLocation>1p36.32</CytogeneticLocation><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"1\" Accession=\"NC_000001.11\" start=\"2406528\" stop=\"2406529\" display_start=\"2406528\" display_stop=\"2406529\" variantLength=\"1\" positionVCF=\"2406528\" referenceAlleleVCF=\"G\" alternateAlleleVCF=\"GC\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"1\" Accession=\"NC_000001.10\" start=\"2337967\" stop=\"2337968\" display_start=\"2337967\" display_stop=\"2337968\" variantLength=\"1\" positionVCF=\"2337967\" referenceAlleleVCF=\"G\" alternateAlleleVCF=\"GC\" /><MeasureRelationship Type=\"within single gene\"><Name><ElementValue Type=\"Preferred\">peroxisomal biogenesis factor 10</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">PEX10</ElementValue></Symbol><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"1\" Accession=\"NC_000001.11\" start=\"2403974\" stop=\"2413827\" display_start=\"2403974\" display_stop=\"2413827\" Strand=\"-\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"1\" Accession=\"NC_000001.10\" start=\"2336240\" stop=\"2344009\" display_start=\"2336240\" display_stop=\"2344009\" variantLength=\"7770\" Strand=\"-\" /><XRef ID=\"5192\" DB=\"Gene\" /><XRef Type=\"MIM\" ID=\"602859\" DB=\"OMIM\" /><XRef ID=\"HGNC:8851\" DB=\"HGNC\" /></MeasureRelationship><XRef Type=\"rs\" ID=\"797044762\" DB=\"dbSNP\" /></Measure><Name><ElementValue Type=\"Preferred\">NM_002617.3(PEX10):c.867_868insG (p.His290fs)</ElementValue></Name><XRef ID=\"CA203141\" DB=\"ClinGen\" /></MeasureSet><TraitSet Type=\"Disease\" ID=\"9460\"><Trait ID=\"17556\" Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">not provided</ElementValue><XRef ID=\"13DG0619\" DB=\"Developmental Genetics Unit,King Faisal Specialist Hospital &amp; Research Centre\" /></Name><AttributeSet><Attribute Type=\"public definition\">The term 'not provided' is registered in MedGen to support identification of submissions to ClinVar for which no condition was named when assessing the variant. 'not provided' differs from 'not specified', which is used when a variant is asserted to be benign, likely benign, or of uncertain significance for conditions that have not been specified.</Attribute></AttributeSet><XRef ID=\"CN517202\" DB=\"MedGen\" /></Trait></TraitSet></ReferenceClinVarAssertion><ClinVarAssertion ID=\"419538\" SubmissionName=\"SUB4534263\"><ClinVarSubmissionID localKey=\"26683_000000\" localKeyIsSubmitted=\"1\" submittedAssembly=\"GRCh37\" submitter=\"EGL Genetic Diagnostics,Eurofins Clinical Diagnostics\" submitterDate=\"2018-09-19\" /><ClinVarAccession Acc=\"SCV000231215\" Version=\"5\" Type=\"SCV\" OrgID=\"500060\" OrganizationCategory=\"laboratory\" OrgType=\"primary\" DateUpdated=\"2019-03-31\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2015-02-04\"><ReviewStatus>criteria provided, single submitter</ReviewStatus><Description>Pathogenic</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ExternalID DB=\"EGL Genetic Diagnostics\" ID=\"26683\" /><AttributeSet><Attribute Type=\"AssertionMethod\">EGL_Classification_Definitions_2015</Attribute><Citation><URL>https://submit.ncbi.nlm.nih.gov/ft/byid/dn5yhybg/egl_classification_definitions_2015.pdf</URL></Citation></AttributeSet><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus><Gender>mixed</Gender></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData><Attribute Type=\"VariantAlleles\" integerValue=\"1\" /></ObservedData><ObservedData><Attribute Type=\"SingleHeterozygote\" integerValue=\"1\" /></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Insertion\"><AttributeSet><Attribute Type=\"Location\">NM_153818.1:Ex5</Attribute></AttributeSet><SequenceLocation Assembly=\"GRCh37\" Chr=\"1\" alternateAllele=\"C\" referenceAllele=\"-\" start=\"2337967\" stop=\"2337968\" variantLength=\"2\" /><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">PEX10</ElementValue></Symbol></MeasureRelationship></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">not provided</ElementValue></Name></Trait></TraitSet><Citation><URL>http://www.egl-eurofins.com/emvclass/emvclass.php?approved_symbol=PEX10</URL></Citation></ClinVarAssertion></ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000194003.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\"\n            xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n    <ClinVarSet ID=\"49280875\">\n        <RecordStatus>current</RecordStatus>\n        <Title>NM_001080522.2(CC2D2A):c.4179+1del AND Joubert syndrome 9</Title>\n        <ReferenceClinVarAssertion DateCreated=\"2015-10-05\" DateLastUpdated=\"2019-12-31\" ID=\"463285\">\n            <ClinVarAccession Acc=\"RCV000194003\" Version=\"3\" Type=\"RCV\" DateUpdated=\"2019-12-31\"/>\n            <RecordStatus>current</RecordStatus>\n            <ClinicalSignificance DateLastEvaluated=\"2015-02-23\">\n                <ReviewStatus>criteria provided, multiple submitters, no conflicts</ReviewStatus>\n                <Description>Pathogenic</Description>\n            </ClinicalSignificance>\n            <Assertion Type=\"variation to disease\"/>\n            <ObservedIn>\n                <Sample>\n                    <Origin>germline</Origin>\n                    <Species TaxonomyId=\"9606\">human</Species>\n                    <AffectedStatus>yes</AffectedStatus>\n                </Sample>\n                <Method>\n                    <MethodType>clinical testing</MethodType>\n                </Method>\n                <ObservedData ID=\"49083670\">\n                    <Attribute Type=\"Description\">not provided</Attribute>\n                </ObservedData>\n            </ObservedIn>\n            <ObservedIn>\n                <Sample>\n                    <Origin>unknown</Origin>\n                    <Species TaxonomyId=\"9606\">human</Species>\n                    <AffectedStatus>yes</AffectedStatus>\n                </Sample>\n                <Method>\n                    <MethodType>research</MethodType>\n                </Method>\n                <ObservedData ID=\"49084009\">\n                    <Attribute Type=\"Description\">not provided</Attribute>\n                </ObservedData>\n            </ObservedIn>\n            <MeasureSet Type=\"Variant\" ID=\"56312\" Acc=\"VCV000056312\" Version=\"2\">\n                <Measure Type=\"Deletion\" ID=\"70951\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">NM_001080522.2(CC2D2A):c.4179+1del</ElementValue>\n                    </Name>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_001080522\" Version=\"2\" Change=\"c.4179+1delG\" Type=\"HGVS, coding\">\n                            NM_001080522.2:c.4179+1delG\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_001080522\" Version=\"2\" Change=\"c.4179delG\" Type=\"HGVS, coding\">\n                            NM_001080522.2:c.4179delG\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"LRG_697t1\" Change=\"c.4179+1del\" Type=\"HGVS, coding, LRG\">\n                            LRG_697t1:c.4179+1del\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NG_013035\" Version=\"1\" Change=\"g.123065del\"\n                                   Type=\"HGVS, genomic, RefSeqGene\">NG_013035.1:g.123065del\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NC_000004\" Version=\"12\" Change=\"g.15587930del\"\n                                   Type=\"HGVS, genomic, top level\" integerValue=\"38\">NC_000004.12:g.15587930del\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NC_000004\" Version=\"11\" Change=\"g.15589553del\"\n                                   Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">\n                            NC_000004.11:g.15589553del\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"Location\">NM_001080522.2:intron 33</Attribute>\n                    </AttributeSet>\n                    <CytogeneticLocation>4p15.32</CytogeneticLocation>\n                    <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\"\n                                      AssemblyStatus=\"current\" Chr=\"4\" Accession=\"NC_000004.12\" start=\"15587930\"\n                                      stop=\"15587930\" display_start=\"15587930\" display_stop=\"15587930\" variantLength=\"1\"\n                                      positionVCF=\"15587928\" referenceAlleleVCF=\"AG\" alternateAlleleVCF=\"A\"/>\n                    <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\"\n                                      AssemblyStatus=\"previous\" Chr=\"4\" Accession=\"NC_000004.11\" start=\"15589553\"\n                                      stop=\"15589553\" display_start=\"15589553\" display_stop=\"15589553\" variantLength=\"1\"\n                                      positionVCF=\"15589551\" referenceAlleleVCF=\"AG\" alternateAlleleVCF=\"A\"/>\n                    <MeasureRelationship Type=\"within single gene\">\n                        <Name>\n                            <ElementValue Type=\"Preferred\">coiled-coil and C2 domain containing 2A</ElementValue>\n                        </Name>\n                        <Symbol>\n                            <ElementValue Type=\"Preferred\">CC2D2A</ElementValue>\n                        </Symbol>\n                        <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\"\n                                          AssemblyStatus=\"current\" Chr=\"4\" Accession=\"NC_000004.12\" start=\"15468660\"\n                                          stop=\"15601971\" display_start=\"15468660\" display_stop=\"15601971\" Strand=\"+\"/>\n                        <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\"\n                                          AssemblyStatus=\"previous\" Chr=\"4\" Accession=\"NC_000004.11\" start=\"15471488\"\n                                          stop=\"15603179\" display_start=\"15471488\" display_stop=\"15603179\"\n                                          variantLength=\"131692\" Strand=\"+\"/>\n                        <XRef ID=\"57545\" DB=\"Gene\"/>\n                        <XRef Type=\"MIM\" ID=\"612013\" DB=\"OMIM\"/>\n                        <XRef ID=\"HGNC:29253\" DB=\"HGNC\"/>\n                    </MeasureRelationship>\n                    <Citation Type=\"general\">\n                        <ID Source=\"PubMed\">19777577</ID>\n                    </Citation>\n                    <Citation Type=\"general\">\n                        <ID Source=\"PubMed\">19466712</ID>\n                    </Citation>\n                    <XRef ID=\"CMGVARID01146\" DB=\"Centre for Mendelian Genomics,University Medical Centre Ljubljana\"/>\n                    <XRef Type=\"rs\" ID=\"386833760\" DB=\"dbSNP\"/>\n                </Measure>\n                <Name>\n                    <ElementValue Type=\"Preferred\">NM_001080522.2(CC2D2A):c.4179+1del</ElementValue>\n                </Name>\n                <XRef ID=\"CA215089\" DB=\"ClinGen\"/>\n            </MeasureSet>\n            <TraitSet Type=\"Disease\" ID=\"187\">\n                <Trait ID=\"5351\" Type=\"Disease\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">Joubert syndrome 9</ElementValue>\n                        <XRef ID=\"Joubert+syndrome+9/8695\" DB=\"Genetic Alliance\"/>\n                    </Name>\n                    <Symbol>\n                        <ElementValue Type=\"Preferred\">JBTS9</ElementValue>\n                        <XRef Type=\"MIM\" ID=\"612285\" DB=\"OMIM\"/>\n                    </Symbol>\n                    <Symbol>\n                        <ElementValue Type=\"Alternate\">CC2D2A</ElementValue>\n                    </Symbol>\n                    <AttributeSet>\n                        <Attribute Type=\"public definition\">Classic Joubert syndrome (JS) is characterized by three\n                            primary findings: A distinctive cerebellar and brain stem malformation called the molar\n                            tooth sign (MTS). Hypotonia. Developmental delays. Often these findings are accompanied by\n                            episodic tachypnea or apnea and/or atypical eye movements. In general, the breathing\n                            abnormalities improve with age, truncal ataxia develops over time, and acquisition of gross\n                            motor milestones is delayed. Cognitive abilities are variable, ranging from severe\n                            intellectual disability to normal. Additional findings can include retinal dystrophy, renal\n                            disease, ocular colobomas, occipital encephalocele, hepatic fibrosis, polydactyly, oral\n                            hamartomas, and endocrine abnormalities. Both intra- and interfamilial variation are seen.\n                        </Attribute>\n                        <XRef ID=\"NBK1325\" DB=\"GeneReviews\"/>\n                    </AttributeSet>\n                    <Citation Type=\"review\" Abbrev=\"GeneReviews\">\n                        <ID Source=\"PubMed\">20301500</ID>\n                        <ID Source=\"BookShelf\">NBK1325</ID>\n                    </Citation>\n                    <XRef ID=\"C2676788\" DB=\"MedGen\"/>\n                    <XRef ID=\"2318\" DB=\"Orphanet\"/>\n                    <XRef Type=\"MIM\" ID=\"612285\" DB=\"OMIM\"/>\n                </Trait>\n            </TraitSet>\n        </ReferenceClinVarAssertion>\n        <ClinVarAssertion ID=\"458535\">\n            <ClinVarSubmissionID localKey=\"NM_001080522.2(CC2D2A):c.4179+1del|Joubert syndrome 9\"\n                                 submitter=\"Genetic Services Laboratory, University of Chicago\"\n                                 submitterDate=\"2015-09-15\"/>\n            <ClinVarAccession Acc=\"SCV000246901\" Version=\"1\" Type=\"SCV\" OrgID=\"1238\" OrganizationCategory=\"laboratory\"\n                              OrgType=\"primary\" DateUpdated=\"2019-12-06\"/>\n            <RecordStatus>current</RecordStatus>\n            <ClinicalSignificance DateLastEvaluated=\"2014-09-22\">\n                <ReviewStatus>criteria provided, single submitter</ReviewStatus>\n                <Description>Pathogenic</Description>\n            </ClinicalSignificance>\n            <Assertion Type=\"variation to disease\"/>\n            <ExternalID DB=\"Genetic services laboratory\" ID=\"NM_001080522.2(CC2D2A):c.4179+1del\"/>\n            <AttributeSet>\n                <Attribute Type=\"AssertionMethod\">ACMG Guidelines, 2015</Attribute>\n                <Citation>\n                    <ID Source=\"PubMed\">25741868</ID>\n                </Citation>\n            </AttributeSet>\n            <ObservedIn>\n                <Sample>\n                    <Origin>germline</Origin>\n                    <Species TaxonomyId=\"9606\">human</Species>\n                    <AffectedStatus>yes</AffectedStatus>\n                </Sample>\n                <Method>\n                    <MethodType>clinical testing</MethodType>\n                </Method>\n                <ObservedData>\n                    <Attribute Type=\"Description\">not provided</Attribute>\n                </ObservedData>\n            </ObservedIn>\n            <MeasureSet Type=\"Variant\">\n                <Measure Type=\"Variation\">\n                    <AttributeSet>\n                        <Attribute Type=\"HGVS\">NM_001080522.2:c.4179+1del</Attribute>\n                    </AttributeSet>\n                    <MeasureRelationship Type=\"variant in gene\">\n                        <Symbol>\n                            <ElementValue Type=\"Preferred\">CC2D2A</ElementValue>\n                        </Symbol>\n                    </MeasureRelationship>\n                </Measure>\n            </MeasureSet>\n            <TraitSet Type=\"Disease\">\n                <Trait Type=\"Disease\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">Joubert syndrome 9</ElementValue>\n                    </Name>\n                </Trait>\n            </TraitSet>\n        </ClinVarAssertion>\n        <ClinVarAssertion ID=\"478444\">\n            <ClinVarSubmissionID localKey=\"NM_001080522.2:c.4179+1delG|OMIM:612285\"\n                                 submitter=\"UW Hindbrain Malformation Research Program,University of Washington\"\n                                 submitterDate=\"2015-09-16\"/>\n            <ClinVarAccession Acc=\"SCV000256339\" Version=\"1\" Type=\"SCV\" OrgID=\"505588\" OrganizationCategory=\"laboratory\"\n                              OrgType=\"primary\" DateUpdated=\"2019-03-31\"/>\n            <AdditionalSubmitters>\n                <SubmitterDescription OrgID=\"505516\"\n                                      SubmitterName=\"University of Washington Center for Mendelian Genomics,University of Washington\"\n                                      Type=\"secondary\" category=\"laboratory\"/>\n            </AdditionalSubmitters>\n            <RecordStatus>current</RecordStatus>\n            <ClinicalSignificance DateLastEvaluated=\"2015-02-23\">\n                <ReviewStatus>criteria provided, single submitter</ReviewStatus>\n                <Description>Pathogenic</Description>\n                <Citation>\n                    <ID Source=\"PubMed\">26092869</ID>\n                </Citation>\n            </ClinicalSignificance>\n            <Assertion Type=\"variation to disease\"/>\n            <AttributeSet>\n                <Attribute Type=\"AssertionMethod\">Submitter's publication</Attribute>\n                <Citation>\n                    <ID Source=\"PubMed\">26092869</ID>\n                </Citation>\n            </AttributeSet>\n            <ObservedIn>\n                <Sample>\n                    <Origin>unknown</Origin>\n                    <Species TaxonomyId=\"9606\">human</Species>\n                    <AffectedStatus>yes</AffectedStatus>\n                </Sample>\n                <Method>\n                    <MethodType>research</MethodType>\n                </Method>\n                <ObservedData>\n                    <Attribute Type=\"Description\">not provided</Attribute>\n                </ObservedData>\n            </ObservedIn>\n            <MeasureSet Type=\"Variant\">\n                <Measure Type=\"Variation\">\n                    <AttributeSet>\n                        <Attribute Type=\"HGVS\">NM_001080522.2:c.4179+1delG</Attribute>\n                    </AttributeSet>\n                </Measure>\n            </MeasureSet>\n            <TraitSet Type=\"Disease\">\n                <Trait Type=\"Disease\">\n                    <XRef DB=\"OMIM\" ID=\"612285\" Type=\"MIM\"/>\n                </Trait>\n            </TraitSet>\n        </ClinVarAssertion>\n    </ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000203290.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\" xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n<ClinVarSet ID=\"48330931\"><RecordStatus>current</RecordStatus><Title>NM_001127500.3(MET):c.3029C&gt;T (p.Thr1010Ile) AND Congenital diaphragmatic hernia</Title><ReferenceClinVarAssertion DateCreated=\"2016-01-09\" DateLastUpdated=\"2019-12-15\" ID=\"483149\"><ClinVarAccession Acc=\"RCV000203290\" Version=\"1\" Type=\"RCV\" DateUpdated=\"2019-12-16\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2015-03-03\"><ReviewStatus>criteria provided, single submitter</ReviewStatus><Description>Uncertain significance</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><AttributeSet><Attribute Type=\"ModeOfInheritance\" integerValue=\"262\">Autosomal dominant inheritance</Attribute><XRef ID=\"144-93-01_MET\" DB=\"Lupski Lab, Baylor-Hopkins CMG, Baylor College of Medicine\" /></AttributeSet><ObservedIn><Sample><Origin>germline</Origin><Ethnicity>Caucasian</Ethnicity><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>yes</AffectedStatus></Sample><Method><MethodType>research</MethodType></Method><ObservedData ID=\"49373838\"><Attribute integerValue=\"2\" Type=\"VariantAlleles\" /></ObservedData><ObservedData ID=\"49373838\"><Attribute Type=\"FamilyHistory\">yes</Attribute></ObservedData><ObservedData ID=\"49373838\"><Attribute integerValue=\"1\" Type=\"NumFamiliesWithVariant\" /></ObservedData><ObservedData ID=\"49373838\"><Attribute integerValue=\"2\" Type=\"SingleHeterozygote\" /></ObservedData></ObservedIn><MeasureSet Type=\"Variant\" ID=\"41624\" Acc=\"VCV000041624\" Version=\"2\"><Measure Type=\"single nucleotide variant\" ID=\"50063\"><Name><ElementValue Type=\"Preferred\">NM_001127500.3(MET):c.3029C&gt;T (p.Thr1010Ile)</ElementValue></Name><AttributeSet><Attribute Accession=\"LRG_662t1\" Change=\"c.3029C&gt;T\" Type=\"HGVS, coding, LRG\">LRG_662t1:c.3029C&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_001324402\" Version=\"2\" Change=\"c.1685C&gt;T\" Type=\"HGVS, coding, RefSeq\">NM_001324402.2:c.1685C&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_000245\" Version=\"4\" Change=\"c.2975C&gt;T\" Type=\"HGVS, coding, RefSeq\">NM_000245.4:c.2975C&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_001127500\" Version=\"3\" Change=\"c.3029C&gt;T\" Type=\"HGVS, coding, RefSeq\">NM_001127500.3:c.3029C&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"LRG_662\" Change=\"g.104532C&gt;T\" Type=\"HGVS, genomic, LRG\">LRG_662:g.104532C&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NG_008996\" Version=\"1\" Change=\"g.104532C&gt;T\" Type=\"HGVS, genomic, RefSeqGene\">NG_008996.1:g.104532C&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000007\" Version=\"14\" Change=\"g.116771936C&gt;T\" Type=\"HGVS, genomic, top level\" integerValue=\"38\">NC_000007.14:g.116771936C&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000007\" Version=\"13\" Change=\"g.116411990C&gt;T\" Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">NC_000007.13:g.116411990C&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_000245\" Version=\"2\" Change=\"c.2975C&gt;T\" Type=\"HGVS, previous\">NM_000245.2:c.2975C&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_001127500\" Version=\"1\" Change=\"c.3029C&gt;T\" Type=\"HGVS, previous\">NM_001127500.1:c.3029C&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Type=\"HGVS, protein\">p.T1010I</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"LRG_662p1\" Change=\"p.Thr1010Ile\" Type=\"HGVS, protein\">LRG_662p1:p.Thr1010Ile</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_001120972\" Version=\"1\" Change=\"p.Thr1010Ile\" Type=\"HGVS, protein, RefSeq\">NP_001120972.1:p.Thr1010Ile</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_001120972\" Version=\"1\" Change=\"p.Thr1010Ile\" Type=\"HGVS, protein, RefSeq\">NP_001120972.1:p.Thr1010Ile</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_001120972\" Version=\"1\" Change=\"p.Thr1010Ile\" Type=\"HGVS, protein, RefSeq\">NP_001120972.1:p.Thr1010Ile</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_001311331\" Version=\"1\" Change=\"p.Thr562Ile\" Type=\"HGVS, protein, RefSeq\">NP_001311331.1:p.Thr562Ile</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_000236\" Version=\"2\" Change=\"p.Thr992Ile\" Type=\"HGVS, protein, RefSeq\">NP_000236.2:p.Thr992Ile</Attribute></AttributeSet><AttributeSet><Attribute Type=\"Location\">NM_001127500.1:exon 14</Attribute><XRef ID=\"CM118113\" DB=\"HGMD\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">missense variant</Attribute><XRef ID=\"SO:0001583\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_000245.4:c.2975C&gt;T\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">missense variant</Attribute><XRef ID=\"SO:0001583\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_001127500.3:c.3029C&gt;T\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">missense variant</Attribute><XRef ID=\"SO:0001583\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_001324402.2:c.1685C&gt;T\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"ProteinChange1LetterCode\">T1010I</Attribute></AttributeSet><AttributeSet><Attribute Type=\"ProteinChange1LetterCode\">T562I</Attribute></AttributeSet><AttributeSet><Attribute Type=\"ProteinChange1LetterCode\">T992I</Attribute></AttributeSet><AlleleFrequencyList><AlleleFrequency Value=\"0.00890\" Source=\"NHLBI Exome Sequencing Project (ESP) Exome Variant Server\" /><AlleleFrequency Value=\"0.00339\" Source=\"1000 Genomes Project\" /><AlleleFrequency Value=\"0.00793\" Source=\"Exome Aggregation Consortium (ExAC)\" /><AlleleFrequency Value=\"0.01108\" Source=\"The Genome Aggregation Database (gnomAD)\" /><AlleleFrequency Value=\"0.00789\" Source=\"The Genome Aggregation Database (gnomAD), exomes\" /><AlleleFrequency Value=\"0.00738\" Source=\"Trans-Omics for Precision Medicine (TOPMed)\" /></AlleleFrequencyList><GlobalMinorAlleleFrequency Value=\"0.00339\" Source=\"1000 Genomes Project\" MinorAllele=\"T\" /><CytogeneticLocation>7q31.2</CytogeneticLocation><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"7\" Accession=\"NC_000007.14\" start=\"116771936\" stop=\"116771936\" display_start=\"116771936\" display_stop=\"116771936\" variantLength=\"1\" positionVCF=\"116771936\" referenceAlleleVCF=\"C\" alternateAlleleVCF=\"T\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"7\" Accession=\"NC_000007.13\" start=\"116411990\" stop=\"116411990\" display_start=\"116411990\" display_stop=\"116411990\" variantLength=\"1\" positionVCF=\"116411990\" referenceAlleleVCF=\"C\" alternateAlleleVCF=\"T\" /><MeasureRelationship Type=\"within single gene\"><Name><ElementValue Type=\"Preferred\">MET proto-oncogene, receptor tyrosine kinase</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">MET</ElementValue></Symbol><AttributeSet><Attribute dateValue=\"2012-09-06\" Type=\"Haploinsufficiency\">No evidence available</Attribute><Citation><URL>https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=MET</URL></Citation></AttributeSet><AttributeSet><Attribute dateValue=\"2012-09-06\" Type=\"Triplosensitivity\">No evidence available</Attribute><Citation><URL>https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=MET</URL></Citation></AttributeSet><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"7\" Accession=\"NC_000007.14\" start=\"116672196\" stop=\"116798386\" display_start=\"116672196\" display_stop=\"116798386\" Strand=\"+\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"7\" Accession=\"NC_000007.13\" start=\"116312458\" stop=\"116438439\" display_start=\"116312458\" display_stop=\"116438439\" variantLength=\"125982\" Strand=\"+\" /><XRef ID=\"4233\" DB=\"Gene\" /><XRef Type=\"MIM\" ID=\"164860\" DB=\"OMIM\" /><XRef ID=\"HGNC:7029\" DB=\"HGNC\" /></MeasureRelationship><XRef ID=\"CM118113\" DB=\"HGMD\" /><XRef ID=\"91469\" DB=\"Illumina Clinical Services Laboratory,Illumina\" /><XRef Type=\"rs\" ID=\"56391007\" DB=\"dbSNP\" /></Measure><Name><ElementValue Type=\"Preferred\">NM_001127500.3(MET):c.3029C&gt;T (p.Thr1010Ile)</ElementValue></Name><XRef ID=\"CA160417\" DB=\"ClinGen\" /></MeasureSet><TraitSet Type=\"Finding\" ID=\"16998\"><Trait ID=\"21917\" Type=\"Finding\"><Name><ElementValue Type=\"Preferred\">Congenital diaphragmatic hernia</ElementValue><XRef ID=\"252893\" DB=\"GeneTests\" /><XRef ID=\"Congenital+Diaphragmatic+Hernia/1803\" DB=\"Genetic Alliance\" /><XRef ID=\"HP:0000776\" DB=\"Human Phenotype Ontology\" /><XRef ID=\"1481\" DB=\"Office of Rare Diseases\" /></Name><Name><ElementValue Type=\"Alternate\">Diaphragmatic hernia</ElementValue><XRef ID=\"HP:0000776\" DB=\"Human Phenotype Ontology\" /></Name><Name><ElementValue Type=\"Alternate\">DIH</ElementValue><XRef Type=\"MIM\" ID=\"142340\" DB=\"OMIM\" /></Name><Symbol><ElementValue Type=\"Preferred\">DIH1</ElementValue><XRef ID=\"142340\" DB=\"OMIM\" /></Symbol><Symbol><ElementValue Type=\"Alternate\">CDH</ElementValue><XRef Type=\"MIM\" ID=\"142340\" DB=\"OMIM\" /><XRef ID=\"1481\" DB=\"Office of Rare Diseases\" /></Symbol><Symbol><ElementValue Type=\"Alternate\">HCD</ElementValue><XRef Type=\"MIM\" ID=\"142340\" DB=\"OMIM\" /></Symbol><Citation Type=\"review\" Abbrev=\"GeneReviews\"><ID Source=\"PubMed\">20301533</ID><ID Source=\"BookShelf\">NBK1359</ID></Citation><XRef ID=\"1732\" DB=\"Gene\" /><XRef ID=\"D065630\" DB=\"MeSH\" /><XRef ID=\"C0235833\" DB=\"MedGen\" /><XRef ID=\"2140\" DB=\"Orphanet\" /><XRef Type=\"MIM\" ID=\"142340\" DB=\"OMIM\" /><XRef Type=\"primary\" ID=\"HP:0000776\" DB=\"Human Phenotype Ontology\" /><XRef Type=\"secondary\" ID=\"HP:0006604\" DB=\"Human Phenotype Ontology\" /></Trait></TraitSet></ReferenceClinVarAssertion><ClinVarAssertion ID=\"483132\" SubmissionName=\"Beck_AJMG_2015_FBN1\"><ClinVarSubmissionID localKey=\"144-93-01_MET|OMIM:142340\" submitter=\"Lupski Lab, Baylor-Hopkins CMG, Baylor College of Medicine\" submitterDate=\"2015-11-15\" /><ClinVarAccession Acc=\"SCV000258327\" Version=\"1\" Type=\"SCV\" OrgID=\"505572\" OrganizationCategory=\"laboratory\" OrgType=\"primary\" DateUpdated=\"2019-09-27\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2015-03-03\"><ReviewStatus>criteria provided, single submitter</ReviewStatus><Description>Uncertain significance</Description><Comment>It is unclear whether these changes, alone or in aggregate, are contributing to the development of CDH in this family.</Comment></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ExternalID DB=\"Lupski Lab, Baylor-Hopkins CMG\" ID=\"144-93-01_MET\" /><AttributeSet><Attribute Type=\"ModeOfInheritance\">Autosomal dominant inheritance</Attribute></AttributeSet><AttributeSet><Attribute Type=\"AssertionMethod\">Beck et al. (Am J Med Genet A 2015)</Attribute><Citation><ID Source=\"PubMed\">25736269</ID></Citation></AttributeSet><ObservedIn><Sample><Origin>germline</Origin><Ethnicity>Caucasian</Ethnicity><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>yes</AffectedStatus><Gender>male</Gender><FamilyData NumFamiliesWithSegregationObserved=\"1\" NumFamiliesWithVariant=\"1\"><FamilyHistory>yes</FamilyHistory></FamilyData></Sample><Method><TypePlatform>next-generation sequencing</TypePlatform><Citation><ID Source=\"PubMed\">23806086</ID></Citation><Citation><ID Source=\"PubMed\">24088041</ID></Citation><MethodType>research</MethodType></Method><ObservedData><Attribute Type=\"VariantAlleles\" integerValue=\"2\" /></ObservedData><ObservedData><Attribute Type=\"SingleHeterozygote\" integerValue=\"2\" /></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><AttributeSet><Attribute Type=\"HGVS\">NM_000245.2:c.2975C&gt;T</Attribute></AttributeSet><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">MET</ElementValue></Symbol></MeasureRelationship></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><XRef DB=\"OMIM\" ID=\"142340\" Type=\"MIM\" /></Trait></TraitSet><Citation Type=\"general\"><ID Source=\"PubMed\">25736269</ID></Citation><StudyDescription>This case report describes a pathogenic FBN1 variant in a family with recurrent congenital diaphragmatic hernia</StudyDescription></ClinVarAssertion></ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000205418.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\"\n            xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n    <ClinVarSet ID=\"48759104\">\n        <RecordStatus>current</RecordStatus>\n        <Title>NM_000297.4(PKD2):c.290_292AGG[5] (p.Glu102del) AND not provided</Title>\n        <ReferenceClinVarAssertion DateCreated=\"2016-01-30\" DateLastUpdated=\"2019-12-17\" ID=\"489301\">\n            <ClinVarAccession Acc=\"RCV000205418\" Version=\"2\" Type=\"RCV\" DateUpdated=\"2019-12-17\"/>\n            <RecordStatus>current</RecordStatus>\n            <ClinicalSignificance DateLastEvaluated=\"2015-09-04\">\n                <ReviewStatus>criteria provided, single submitter</ReviewStatus>\n                <Description>Benign</Description>\n            </ClinicalSignificance>\n            <Assertion Type=\"variation to disease\"/>\n            <ObservedIn>\n                <Sample>\n                    <Origin>germline</Origin>\n                    <Species TaxonomyId=\"9606\">human</Species>\n                    <AffectedStatus>unknown</AffectedStatus>\n                </Sample>\n                <Method>\n                    <MethodType>clinical testing</MethodType>\n                </Method>\n                <ObservedData ID=\"49723084\">\n                    <Attribute Type=\"Description\">not provided</Attribute>\n                </ObservedData>\n            </ObservedIn>\n            <MeasureSet Type=\"Variant\" ID=\"219942\" Acc=\"VCV000219942\" Version=\"2\">\n                <Measure Type=\"Microsatellite\" ID=\"221475\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">NM_000297.4(PKD2):c.290_292AGG[5] (p.Glu102del)</ElementValue>\n                    </Name>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_000297\" Version=\"4\" Change=\"c.290_292AGG[5]\"\n                                   Type=\"HGVS, coding, RefSeq\">NM_000297.4:c.290_292AGG[5]\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NG_008604\" Version=\"1\" Change=\"g.5356_5358AGG[5]\"\n                                   Type=\"HGVS, genomic, RefSeqGene\">NG_008604.1:g.5356_5358AGG[5]\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NC_000004\" Version=\"12\" Change=\"g.88008023_88008025AGG[5]\"\n                                   Type=\"HGVS, genomic, top level\" integerValue=\"38\">\n                            NC_000004.12:g.88008023_88008025AGG[5]\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NC_000004\" Version=\"11\" Change=\"g.88929175_88929177AGG[5]\"\n                                   Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">\n                            NC_000004.11:g.88929175_88929177AGG[5]\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_000297\" Version=\"3\" Change=\"c.289_291delGAG\" Type=\"HGVS, previous\">\n                            NM_000297.3:c.289_291delGAG\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NR_156488\" Version=\"1\" Change=\"n.377_379AGG[5]\" Type=\"HGVS, previous\">\n                            NR_156488.1:n.377_379AGG[5]\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NP_000288\" Version=\"1\" Change=\"p.Glu102del\" Type=\"HGVS, protein, RefSeq\">\n                            NP_000288.1:p.Glu102del\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"MolecularConsequence\">inframe_deletion</Attribute>\n                        <XRef ID=\"SO:0001822\" DB=\"Sequence Ontology\"/>\n                        <XRef ID=\"NM_000297.4:c.290_292AGG[5]\" DB=\"RefSeq\"/>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"ProteinChange1LetterCode\">E102del</Attribute>\n                    </AttributeSet>\n                    <GlobalMinorAlleleFrequency Value=\"0.00739\" Source=\"1000 Genomes Project\"\n                                                MinorAllele=\"GAGGAGGAGGAGGAGGAGGAGG\"/>\n                    <CytogeneticLocation>4q22.1</CytogeneticLocation>\n                    <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\"\n                                      AssemblyStatus=\"current\" Chr=\"4\" Accession=\"NC_000004.12\" start=\"88008022\"\n                                      stop=\"88008024\" display_start=\"88008022\" display_stop=\"88008024\" variantLength=\"3\"\n                                      positionVCF=\"88008021\" referenceAlleleVCF=\"CGAG\" alternateAlleleVCF=\"C\"/>\n                    <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\"\n                                      AssemblyStatus=\"previous\" Chr=\"4\" Accession=\"NC_000004.11\" start=\"88929174\"\n                                      stop=\"88929176\" display_start=\"88929174\" display_stop=\"88929176\" variantLength=\"3\"\n                                      positionVCF=\"88929173\" referenceAlleleVCF=\"CGAG\" alternateAlleleVCF=\"C\"/>\n                    <MeasureRelationship Type=\"within single gene\">\n                        <Name>\n                            <ElementValue Type=\"Preferred\">polycystin 2, transient receptor potential cation channel\n                            </ElementValue>\n                        </Name>\n                        <Symbol>\n                            <ElementValue Type=\"Preferred\">PKD2</ElementValue>\n                        </Symbol>\n                        <AttributeSet>\n                            <Attribute dateValue=\"2012-07-06\" Type=\"Haploinsufficiency\">Sufficient evidence for dosage\n                                pathogenicity\n                            </Attribute>\n                            <Citation>\n                                <URL>https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=PKD2</URL>\n                            </Citation>\n                        </AttributeSet>\n                        <AttributeSet>\n                            <Attribute dateValue=\"2012-07-06\" Type=\"Triplosensitivity\">No evidence available</Attribute>\n                            <Citation>\n                                <URL>https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=PKD2</URL>\n                            </Citation>\n                        </AttributeSet>\n                        <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\"\n                                          AssemblyStatus=\"current\" Chr=\"4\" Accession=\"NC_000004.12\" start=\"88007635\"\n                                          stop=\"88077779\" display_start=\"88007635\" display_stop=\"88077779\" Strand=\"+\"/>\n                        <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\"\n                                          AssemblyStatus=\"previous\" Chr=\"4\" Accession=\"NC_000004.11\" start=\"88928798\"\n                                          stop=\"88998930\" display_start=\"88928798\" display_stop=\"88998930\"\n                                          variantLength=\"70133\" Strand=\"+\"/>\n                        <XRef ID=\"5311\" DB=\"Gene\"/>\n                        <XRef Type=\"MIM\" ID=\"173910\" DB=\"OMIM\"/>\n                        <XRef ID=\"HGNC:9009\" DB=\"HGNC\"/>\n                    </MeasureRelationship>\n                    <XRef Type=\"rs\" ID=\"750077647\" DB=\"dbSNP\"/>\n                </Measure>\n                <Name>\n                    <ElementValue Type=\"Preferred\">NM_000297.4(PKD2):c.290_292AGG[5] (p.Glu102del)</ElementValue>\n                </Name>\n                <XRef ID=\"CA349584\" DB=\"ClinGen\"/>\n            </MeasureSet>\n            <TraitSet Type=\"Disease\" ID=\"9460\">\n                <Trait ID=\"17556\" Type=\"Disease\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">not provided</ElementValue>\n                        <XRef ID=\"13DG0619\"\n                              DB=\"Developmental Genetics Unit,King Faisal Specialist Hospital &amp; Research Centre\"/>\n                    </Name>\n                    <AttributeSet>\n                        <Attribute Type=\"public definition\">The term 'not provided' is registered in MedGen to support\n                            identification of submissions to ClinVar for which no condition was named when assessing the\n                            variant. 'not provided' differs from 'not specified', which is used when a variant is\n                            asserted to be benign, likely benign, or of uncertain significance for conditions that have\n                            not been specified.\n                        </Attribute>\n                    </AttributeSet>\n                    <XRef ID=\"CN517202\" DB=\"MedGen\"/>\n                </Trait>\n            </TraitSet>\n        </ReferenceClinVarAssertion>\n        <ClinVarAssertion ID=\"485087\" SubmissionName=\"SUB5321749\">\n            <ClinVarSubmissionID localKey=\"29355|MedGen:CN517202\" submittedAssembly=\"GRCh37\" submitter=\"Invitae\"\n                                 submitterDate=\"2019-03-14\"/>\n            <ClinVarAccession Acc=\"SCV000260120\" Version=\"3\" Type=\"SCV\" OrgID=\"500031\" OrganizationCategory=\"laboratory\"\n                              OrgType=\"primary\" DateUpdated=\"2019-12-17\"/>\n            <RecordStatus>current</RecordStatus>\n            <ClinicalSignificance DateLastEvaluated=\"2015-09-04\">\n                <ReviewStatus>criteria provided, single submitter</ReviewStatus>\n                <Description>Benign</Description>\n            </ClinicalSignificance>\n            <Assertion Type=\"variation to disease\"/>\n            <ExternalID DB=\"Invitae\" ID=\"29355\"/>\n            <AttributeSet>\n                <Attribute Type=\"AssertionMethod\">Nykamp K et al. (Genet Med 2017)</Attribute>\n                <Citation>\n                    <ID Source=\"PubMed\">28492532</ID>\n                </Citation>\n            </AttributeSet>\n            <ObservedIn>\n                <Sample>\n                    <Origin>germline</Origin>\n                    <Species TaxonomyId=\"9606\">human</Species>\n                    <AffectedStatus>unknown</AffectedStatus>\n                </Sample>\n                <Method>\n                    <MethodType>clinical testing</MethodType>\n                </Method>\n                <ObservedData>\n                    <Attribute Type=\"Description\">not provided</Attribute>\n                </ObservedData>\n            </ObservedIn>\n            <MeasureSet Type=\"Variant\">\n                <Measure Type=\"Variation\">\n                    <AttributeSet>\n                        <Attribute Type=\"HGVS\">NM_000297.3:c.289_291delGAG</Attribute>\n                    </AttributeSet>\n                    <MeasureRelationship Type=\"variant in gene\">\n                        <Symbol>\n                            <ElementValue Type=\"Preferred\">PKD2</ElementValue>\n                        </Symbol>\n                    </MeasureRelationship>\n                </Measure>\n            </MeasureSet>\n            <TraitSet Type=\"Disease\">\n                <Trait Type=\"Disease\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">not provided</ElementValue>\n                    </Name>\n                    <XRef DB=\"MedGen\" ID=\"CN517202\" Type=\"CUI\"/>\n                </Trait>\n            </TraitSet>\n        </ClinVarAssertion>\n    </ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000207071.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\" xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n<ClinVarSet ID=\"47215070\"><RecordStatus>current</RecordStatus><Title>NM_014704.4(CEP104):c.1328_1329insT (p.Tyr444fs) AND Joubert syndrome 25</Title><ReferenceClinVarAssertion DateCreated=\"2016-02-08\" DateLastUpdated=\"2019-11-02\" ID=\"491407\"><ClinVarAccession Acc=\"RCV000207071\" Version=\"1\" Type=\"RCV\" DateUpdated=\"2019-11-02\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2016-02-02\"><ReviewStatus>no assertion criteria provided</ReviewStatus><Description>Pathogenic</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>not provided</AffectedStatus></Sample><Method><MethodType>literature only</MethodType></Method><ObservedData ID=\"48093290\"><Attribute Type=\"Description\">In a 3.5-year-old boy (patient 842629), born of consanguineous Arab-Israeli parents, with Joubert syndrome-25 (JBTS25; 616781), Srour et al. (2015) identified a homozygous 1-bp insertion (c.1328_1329insT, NM_014704.3) in the CEP104 gene, resulting in a frameshift and premature termination (Tyr444fsTer3). The mutation, which was found by exome sequencing and confirmed by Sanger sequencing, segregated with the disorder in the family and was nor found in the dbSNP, 1000 Genomes Project, Exome Variant Server, or ExAC databases, or in 350 in-house ethnically matched exomes. Functional studies and studies on patient cells were not performed.</Attribute><Citation Type=\"general\"><ID Source=\"PubMed\">26477546</ID></Citation></ObservedData></ObservedIn><MeasureSet Type=\"Variant\" ID=\"221275\" Acc=\"VCV000221275\" Version=\"1\"><Measure Type=\"Insertion\" ID=\"222986\"><Name><ElementValue Type=\"Preferred\">NM_014704.4(CEP104):c.1328_1329insT (p.Tyr444fs)</ElementValue></Name><AttributeSet><Attribute Accession=\"NM_014704\" Version=\"4\" Change=\"c.1328_1329insT\" Type=\"HGVS, coding, RefSeq\">NM_014704.4:c.1328_1329insT</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NG_046726\" Version=\"1\" Change=\"g.27152_27153insT\" Type=\"HGVS, genomic, RefSeqGene\">NG_046726.1:g.27152_27153insT</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000001\" Version=\"11\" Change=\"g.3835081_3835082insA\" Type=\"HGVS, genomic, top level\" integerValue=\"38\">NC_000001.11:g.3835081_3835082insA</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000001\" Version=\"10\" Change=\"g.3751645_3751646insA\" Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">NC_000001.10:g.3751645_3751646insA</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_014704\" Version=\"3\" Change=\"c.1328_1329insT\" Type=\"HGVS, previous\">NM_014704.3:c.1328_1329insT</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_055519\" Version=\"1\" Change=\"p.Tyr444fs\" Type=\"HGVS, protein, RefSeq\">NP_055519.1:p.Tyr444fs</Attribute></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">frameshift variant</Attribute><XRef ID=\"SO:0001589\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_014704.4:c.1328_1329insT\" DB=\"RefSeq\" /></AttributeSet><CytogeneticLocation>1p36.32</CytogeneticLocation><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"1\" Accession=\"NC_000001.11\" start=\"3835081\" stop=\"3835082\" display_start=\"3835081\" display_stop=\"3835082\" variantLength=\"1\" positionVCF=\"3835081\" referenceAlleleVCF=\"G\" alternateAlleleVCF=\"GA\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"1\" Accession=\"NC_000001.10\" start=\"3751645\" stop=\"3751646\" display_start=\"3751645\" display_stop=\"3751646\" variantLength=\"1\" positionVCF=\"3751645\" referenceAlleleVCF=\"G\" alternateAlleleVCF=\"GA\" /><MeasureRelationship Type=\"within single gene\"><Name><ElementValue Type=\"Preferred\">centrosomal protein 104</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">CEP104</ElementValue></Symbol><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"1\" Accession=\"NC_000001.11\" start=\"3812086\" stop=\"3857233\" display_start=\"3812086\" display_stop=\"3857233\" Strand=\"-\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"1\" Accession=\"NC_000001.10\" start=\"3728644\" stop=\"3773796\" display_start=\"3728644\" display_stop=\"3773796\" variantLength=\"45153\" Strand=\"-\" /><XRef ID=\"9731\" DB=\"Gene\" /><XRef Type=\"MIM\" ID=\"616690\" DB=\"OMIM\" /><XRef ID=\"HGNC:24866\" DB=\"HGNC\" /></MeasureRelationship><XRef Type=\"Allelic variant\" ID=\"616690.0002\" DB=\"OMIM\" /><XRef Type=\"rs\" ID=\"869025277\" DB=\"dbSNP\" /></Measure><Name><ElementValue Type=\"Preferred\">NM_014704.4(CEP104):c.1328_1329insT (p.Tyr444fs)</ElementValue></Name><XRef ID=\"CA351512\" DB=\"ClinGen\" /></MeasureSet><TraitSet Type=\"Disease\" ID=\"23754\"><Trait ID=\"34566\" Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">Joubert syndrome 25</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">JBTS25</ElementValue><XRef Type=\"MIM\" ID=\"616781\" DB=\"OMIM\" /></Symbol><AttributeSet><Attribute Type=\"public definition\">Classic Joubert syndrome (JS) is characterized by three primary findings: A distinctive cerebellar and brain stem malformation called the molar tooth sign (MTS). Hypotonia. Developmental delays. Often these findings are accompanied by episodic tachypnea or apnea and/or atypical eye movements. In general, the breathing abnormalities improve with age, truncal ataxia develops over time, and acquisition of gross motor milestones is delayed. Cognitive abilities are variable, ranging from severe intellectual disability to normal. Additional findings can include retinal dystrophy, renal disease, ocular colobomas, occipital encephalocele, hepatic fibrosis, polydactyly, oral hamartomas, and endocrine abnormalities. Both intra- and interfamilial variation are seen.</Attribute><XRef ID=\"NBK1325\" DB=\"GeneReviews\" /></AttributeSet><Citation Type=\"review\" Abbrev=\"GeneReviews\"><ID Source=\"PubMed\">20301500</ID><ID Source=\"BookShelf\">NBK1325</ID></Citation><XRef ID=\"C4084842\" DB=\"MedGen\" /><XRef ID=\"475\" DB=\"Orphanet\" /><XRef Type=\"MIM\" ID=\"616781\" DB=\"OMIM\" /></Trait></TraitSet></ReferenceClinVarAssertion><ClinVarAssertion ID=\"491037\"><ClinVarSubmissionID localKey=\"616690.0002_JOUBERT SYNDROME 25\" submitter=\"OMIM\" submitterDate=\"2016-02-02\" title=\"CEP104, 1-BP INS, 1328T   _JOUBERT SYNDROME 25\" /><ClinVarAccession Acc=\"SCV000262597\" Version=\"1\" Type=\"SCV\" OrgID=\"3\" OrganizationCategory=\"resource\" OrgType=\"primary\" DateUpdated=\"2019-03-31\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2016-02-02\"><ReviewStatus>no assertion criteria provided</ReviewStatus><Description>Pathogenic</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ExternalID DB=\"OMIM\" ID=\"616690.0002\" Type=\"Allelic variant\" /><ObservedIn><Sample><Origin>germline</Origin><Species>human</Species><AffectedStatus>not provided</AffectedStatus></Sample><Method><MethodType>literature only</MethodType></Method><ObservedData><Attribute Type=\"Description\">In a 3.5-year-old boy (patient 842629), born of consanguineous Arab-Israeli parents, with Joubert syndrome-25 (JBTS25; 616781), Srour et al. (2015) identified a homozygous 1-bp insertion (c.1328_1329insT, NM_014704.3) in the CEP104 gene, resulting in a frameshift and premature termination (Tyr444fsTer3). The mutation, which was found by exome sequencing and confirmed by Sanger sequencing, segregated with the disorder in the family and was nor found in the dbSNP, 1000 Genomes Project, Exome Variant Server, or ExAC databases, or in 350 in-house ethnically matched exomes. Functional studies and studies on patient cells were not performed.</Attribute><Citation><ID Source=\"PubMed\">26477546</ID></Citation><XRef DB=\"OMIM\" ID=\"616781\" Type=\"MIM\" /></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><Name><ElementValue Type=\"Preferred\">CEP104, 1-BP INS, 1328T</ElementValue></Name><AttributeSet><Attribute Type=\"NonHGVS\">1-BP INS, 1328T</Attribute></AttributeSet><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">CEP104</ElementValue></Symbol></MeasureRelationship><XRef DB=\"OMIM\" ID=\"616690.0002\" Type=\"Allelic variant\" /></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">JOUBERT SYNDROME 25</ElementValue></Name></Trait></TraitSet></ClinVarAssertion></ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000207504.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\" xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n<ClinVarSet ID=\"49281396\"><RecordStatus>current</RecordStatus><Title>NM_005343.4(HRAS):c.37G&gt;T (p.Gly13Cys) AND not provided</Title><ReferenceClinVarAssertion DateCreated=\"2016-02-20\" DateLastUpdated=\"2019-12-31\" ID=\"492539\"><ClinVarAccession Acc=\"RCV000207504\" Version=\"2\" Type=\"RCV\" DateUpdated=\"2019-12-31\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2018-05-04\"><ReviewStatus>criteria provided, multiple submitters, no conflicts</ReviewStatus><Description>Pathogenic</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ObservedIn><Sample><Origin>unknown</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>yes</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData ID=\"48101547\"><Attribute integerValue=\"1\" Type=\"VariantAlleles\" /></ObservedData><ObservedData ID=\"48101547\"><Attribute integerValue=\"1\" Type=\"VariantChromosomes\" /></ObservedData></ObservedIn><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>yes</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData ID=\"48111727\"><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\" ID=\"12606\" Acc=\"VCV000012606\" Version=\"5\"><Measure Type=\"single nucleotide variant\" ID=\"27645\"><Name><ElementValue Type=\"Preferred\">NM_005343.4(HRAS):c.37G&gt;T (p.Gly13Cys)</ElementValue></Name><Name><ElementValue Type=\"Alternate\">p.G13C:GGT&gt;TGT</ElementValue></Name><Name><ElementValue Type=\"Alternate\">NM_005343.3(HRAS):c.37G&gt;T</ElementValue><XRef ID=\"77fd6395-3146-46f0-86ca-08fb626eb660\" DB=\"ClinGen RASopathy Variant Curation Expert Panel\" /></Name><AttributeSet><Attribute Accession=\"NM_001318054\" Version=\"2\" Change=\"c.-283G&gt;T\" Type=\"HGVS, coding, RefSeq\">NM_001318054.2:c.-283G&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_001130442\" Version=\"2\" Change=\"c.37G&gt;T\" Type=\"HGVS, coding, RefSeq\">NM_001130442.2:c.37G&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_005343\" Version=\"4\" Change=\"c.37G&gt;T\" Type=\"HGVS, coding, RefSeq\">NM_005343.4:c.37G&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_176795\" Version=\"4\" Change=\"c.37G&gt;T\" Type=\"HGVS, coding, RefSeq\">NM_176795.4:c.37G&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NG_007666\" Version=\"1\" Change=\"g.6265G&gt;T\" Type=\"HGVS, genomic, RefSeqGene\">NG_007666.1:g.6265G&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000011\" Version=\"10\" Change=\"g.534286C&gt;A\" Type=\"HGVS, genomic, top level\" integerValue=\"38\">NC_000011.10:g.534286C&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000011\" Version=\"9\" Change=\"g.534286C&gt;A\" Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">NC_000011.9:g.534286C&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Type=\"HGVS, incomplete\">c.37G&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_005343\" Version=\"2\" Change=\"c.37G&gt;T\" Type=\"HGVS, previous\">NM_005343.2:c.37G&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_005343\" Version=\"3\" Change=\"c.37G&gt;T\" Type=\"HGVS, previous\">NM_005343.3:c.37G&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"P01112\" Change=\"p.Gly13Cys\" Type=\"HGVS, protein\">P01112:p.Gly13Cys</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_001123914\" Version=\"1\" Change=\"p.Gly13Cys\" Type=\"HGVS, protein, RefSeq\">NP_001123914.1:p.Gly13Cys</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_005334\" Version=\"1\" Change=\"p.Gly13Cys\" Type=\"HGVS, protein, RefSeq\">NP_005334.1:p.Gly13Cys</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_789765\" Version=\"1\" Change=\"p.Gly13Cys\" Type=\"HGVS, protein, RefSeq\">NP_789765.1:p.Gly13Cys</Attribute></AttributeSet><AttributeSet><Attribute Type=\"Location\">NM_005343.2:exon 2</Attribute></AttributeSet><AttributeSet><Attribute Type=\"Location\">NM_005343.3:exon 2</Attribute></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">5 prime UTR variant</Attribute><XRef ID=\"SO:0001623\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_001318054.2:c.-283G&gt;T\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">missense variant</Attribute><XRef ID=\"SO:0001583\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_001130442.2:c.37G&gt;T\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">missense variant</Attribute><XRef ID=\"SO:0001583\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_005343.4:c.37G&gt;T\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">missense variant</Attribute><XRef ID=\"SO:0001583\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_176795.4:c.37G&gt;T\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"ProteinChange1LetterCode\">G13C</Attribute></AttributeSet><AttributeSet><Attribute Type=\"ProteinChange3LetterCode\">GLY13CYS</Attribute></AttributeSet><CytogeneticLocation>11p15.5</CytogeneticLocation><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"11\" Accession=\"NC_000011.10\" start=\"534286\" stop=\"534286\" display_start=\"534286\" display_stop=\"534286\" variantLength=\"1\" positionVCF=\"534286\" referenceAlleleVCF=\"C\" alternateAlleleVCF=\"A\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"11\" Accession=\"NC_000011.9\" start=\"534286\" stop=\"534286\" display_start=\"534286\" display_stop=\"534286\" variantLength=\"1\" positionVCF=\"534286\" referenceAlleleVCF=\"C\" alternateAlleleVCF=\"A\" /><MeasureRelationship Type=\"within multiple genes by overlap\"><Name><ElementValue Type=\"Preferred\">HRas proto-oncogene, GTPase</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">HRAS</ElementValue></Symbol><AttributeSet><Attribute dateValue=\"2015-10-15\" Type=\"Haploinsufficiency\">No evidence available</Attribute><Citation><URL>https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=HRAS</URL></Citation></AttributeSet><AttributeSet><Attribute dateValue=\"2015-10-15\" Type=\"Triplosensitivity\">No evidence available</Attribute><Citation><URL>https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=HRAS</URL></Citation></AttributeSet><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"11\" Accession=\"NC_000011.10\" start=\"532242\" stop=\"535576\" display_start=\"532242\" display_stop=\"535576\" Strand=\"-\" /><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"11\" Accession=\"NT_187586.1\" start=\"61883\" stop=\"65217\" display_start=\"61883\" display_stop=\"65217\" Strand=\"-\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"11\" Accession=\"NC_000011.9\" start=\"532241\" stop=\"535549\" display_start=\"532241\" display_stop=\"535549\" variantLength=\"3309\" Strand=\"-\" /><XRef ID=\"3265\" DB=\"Gene\" /><XRef Type=\"MIM\" ID=\"190020\" DB=\"OMIM\" /><XRef ID=\"HGNC:5173\" DB=\"HGNC\" /></MeasureRelationship><MeasureRelationship Type=\"within multiple genes by overlap\"><Name><ElementValue Type=\"Preferred\">leucine rich repeat containing 56</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">LRRC56</ElementValue></Symbol><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"11\" Accession=\"NC_000011.10\" start=\"518967\" stop=\"554916\" display_start=\"518967\" display_stop=\"554916\" Strand=\"+\" /><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"11\" Accession=\"NT_187586.1\" start=\"67168\" stop=\"84552\" display_start=\"67168\" display_stop=\"84552\" Strand=\"+\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"11\" Accession=\"NC_000011.9\" start=\"537521\" stop=\"554915\" display_start=\"537521\" display_stop=\"554915\" variantLength=\"17395\" Strand=\"+\" /><XRef ID=\"115399\" DB=\"Gene\" /><XRef Type=\"MIM\" ID=\"618227\" DB=\"OMIM\" /><XRef ID=\"HGNC:25430\" DB=\"HGNC\" /></MeasureRelationship><XRef ID=\"77fd6395-3146-46f0-86ca-08fb626eb660\" DB=\"ClinGen RASopathy Variant Curation Expert Panel\" /><XRef ID=\"P01112#VAR_026107\" DB=\"UniProtKB\" /><XRef Type=\"Allelic variant\" ID=\"190020.0007\" DB=\"OMIM\" /><XRef Type=\"rs\" ID=\"104894228\" DB=\"dbSNP\" /></Measure><Name><ElementValue Type=\"Preferred\">NM_005343.4(HRAS):c.37G&gt;T (p.Gly13Cys)</ElementValue></Name><XRef ID=\"CA295247\" DB=\"ClinGen\" /></MeasureSet><TraitSet Type=\"Disease\" ID=\"9460\"><Trait ID=\"17556\" Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">not provided</ElementValue><XRef ID=\"13DG0619\" DB=\"Developmental Genetics Unit,King Faisal Specialist Hospital &amp; Research Centre\" /></Name><AttributeSet><Attribute Type=\"public definition\">The term 'not provided' is registered in MedGen to support identification of submissions to ClinVar for which no condition was named when assessing the variant. 'not provided' differs from 'not specified', which is used when a variant is asserted to be benign, likely benign, or of uncertain significance for conditions that have not been specified.</Attribute></AttributeSet><XRef ID=\"CN517202\" DB=\"MedGen\" /></Trait></TraitSet></ReferenceClinVarAssertion><ClinVarAssertion ID=\"373496\" SubmissionName=\"SUB5098196\"><ClinVarSubmissionID localKey=\"GDX:26317|Not Provided\" submittedAssembly=\"GRCh37\" submitter=\"GeneDx\" submitterDate=\"2019-01-29\" /><ClinVarAccession Acc=\"SCV000207847\" Version=\"11\" Type=\"SCV\" OrgID=\"26957\" OrganizationCategory=\"laboratory\" OrgType=\"primary\" DateUpdated=\"2019-08-03\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2018-05-04\"><ReviewStatus>criteria provided, single submitter</ReviewStatus><Description>Pathogenic</Description><Comment>The G13C variant in the HRAS gene has been reported previously in multiple unrelated individuals with Costello syndrome and is one of the common HRAS variants associated with this disorder (Estep et al., 2006; Gripp et al., 2006; Gripp et al., 2011). In addition to the classic Costello features, this variant has been described in patients with unique ectodermal findings such as sparse hair in early childhood and long eyelashes, and appears to have a low incidence of papillomata (Gripp et al., 2011). The G13C variant was not observed in approximately 6,500 individuals of European and African American ancestry in the NHLBI Exome Sequencing Project, indicating it is not a common benign variant in these populations. The G13C variant is a non-conservative amino acid substitution, which occurs at a conserved Glycine residue at codon 12; the majority of pathogenic variants in the HRAS gene (&gt;90%) alter the conserved glycine residues at positions 12 and 13 (Aoki et al., 2005; Gripp et al., 2006). Functional studies demonstrate that G13C alters GTP and GDP dissociation rates resulting in increased active GTP-bound HRAS, which upregulates the Ras/MAPK pathway (Wey et al., 2013). Therefore, we interpret G13C as a pathogenic variant.</Comment></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ExternalID DB=\"GeneDx\" ID=\"GDX:26317\" /><AttributeSet><Attribute Type=\"AssertionMethod\">GeneDx Variant Classification (06012015)</Attribute><Citation><URL>https://submit.ncbi.nlm.nih.gov/ft/byid/7oynscmk/mdi-5616_26957_genedx_interprules_final_061215.pdf</URL></Citation></AttributeSet><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>yes</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><AttributeSet><Attribute Type=\"HGVS\">NM_005343.2:c.37G&gt;T</Attribute></AttributeSet><SequenceLocation Assembly=\"GRCh37\" Chr=\"11\" alternateAllele=\"A\" referenceAllele=\"C\" start=\"534286\" stop=\"534286\" variantLength=\"1\" /><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">HRAS</ElementValue></Symbol></MeasureRelationship></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">Not Provided</ElementValue></Name></Trait></TraitSet></ClinVarAssertion><ClinVarAssertion ID=\"492524\"><ClinVarSubmissionID localKey=\"NM_005343.2:c.37G&gt;T|none provided\" submittedAssembly=\"GRCh37\" submitter=\"Molecular Diagnostics Lab,Nemours Alfred I. duPont Hospital for Children\" submitterDate=\"2015-12-22\" /><ClinVarAccession Acc=\"SCV000263057\" Version=\"1\" Type=\"SCV\" OrgID=\"28338\" OrganizationCategory=\"laboratory\" OrgType=\"primary\" DateUpdated=\"2019-08-03\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2014-06-04\"><ReviewStatus>criteria provided, single submitter</ReviewStatus><Description>Pathogenic</Description><Citation><ID Source=\"PubMed\">168335863</ID></Citation><Citation><ID Source=\"PubMed\">19213030</ID></Citation><Citation><ID Source=\"PubMed\">21438134</ID></Citation><Citation><ID Source=\"PubMed\">16372351</ID></Citation><Citation><ID Source=\"PubMed\">16329078</ID></Citation></ClinicalSignificance><Assertion Type=\"variation to disease\" /><AttributeSet><Attribute Type=\"AssertionMethod\">ACMG Guidelines, 2015</Attribute><Citation><ID Source=\"PubMed\">25741868</ID></Citation></AttributeSet><ObservedIn><Sample><Origin>unknown</Origin><Species TaxonomyId=\"9606\">human</Species><Age Type=\"minimum\" age_unit=\"years\">10</Age><Age Type=\"maximum\" age_unit=\"years\">19</Age><AffectedStatus>yes</AffectedStatus><Gender>female</Gender></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData><Attribute Type=\"VariantAlleles\" integerValue=\"1\" /></ObservedData><ObservedData><Attribute Type=\"VariantChromosomes\" integerValue=\"1\" /></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><Name><ElementValue Type=\"Alternate\">p.Gly13Cys</ElementValue></Name><AttributeSet><Attribute Type=\"Location\">NM_005343.2:exon 2</Attribute></AttributeSet><AttributeSet><Attribute Type=\"HGVS\">NM_005343.2:c.37G&gt;T</Attribute></AttributeSet><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">HRAS</ElementValue></Symbol></MeasureRelationship><XRef DB=\"dbSNP\" ID=\"104894228\" Type=\"rsNumber\" /></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">none provided</ElementValue></Name></Trait></TraitSet></ClinVarAssertion></ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000235027.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\"\n            xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n    <ClinVarSet ID=\"49282424\">\n        <RecordStatus>current</RecordStatus>\n        <Replaces>RCV000610359</Replaces>\n        <Title>NM_173660.5(DOK7):c.1124_1127dup (p.Ala378fs) AND Congenital myasthenic syndrome</Title>\n        <ReferenceClinVarAssertion DateCreated=\"2016-07-19\" DateLastUpdated=\"2019-12-31\" ID=\"557938\">\n            <ClinVarAccession Acc=\"RCV000235027\" Version=\"1\" Type=\"RCV\" DateUpdated=\"2019-12-31\"/>\n            <RecordStatus>current</RecordStatus>\n            <ClinicalSignificance DateLastEvaluated=\"2015-12-16\">\n                <ReviewStatus>criteria provided, single submitter</ReviewStatus>\n                <Description>Pathogenic</Description>\n            </ClinicalSignificance>\n            <Assertion Type=\"variation to disease\"/>\n            <AttributeSet>\n                <Attribute Type=\"ModeOfInheritance\" integerValue=\"263\">Autosomal recessive inheritance</Attribute>\n                <XRef ID=\"63693434\" DB=\"Laboratory for Molecular Medicine,Partners HealthCare Personalized Medicine\"/>\n            </AttributeSet>\n            <ObservedIn>\n                <Sample>\n                    <Origin>germline</Origin>\n                    <Species TaxonomyId=\"9606\">human</Species>\n                    <AffectedStatus>yes</AffectedStatus>\n                </Sample>\n                <Method>\n                    <MethodType>literature only</MethodType>\n                </Method>\n                <ObservedData ID=\"50068401\">\n                    <Attribute Type=\"Description\">not provided</Attribute>\n                </ObservedData>\n            </ObservedIn>\n            <ObservedIn>\n                <Sample>\n                    <Origin>germline</Origin>\n                    <Species TaxonomyId=\"9606\">human</Species>\n                    <AffectedStatus>not provided</AffectedStatus>\n                </Sample>\n                <Method>\n                    <MethodType>clinical testing</MethodType>\n                </Method>\n                <ObservedData ID=\"50068402\">\n                    <Attribute integerValue=\"1\" Type=\"VariantAlleles\"/>\n                </ObservedData>\n                <ObservedData ID=\"50068402\">\n                    <Attribute integerValue=\"1\" Type=\"NumFamiliesWithVariant\"/>\n                </ObservedData>\n            </ObservedIn>\n            <MeasureSet Type=\"Variant\" ID=\"1273\" Acc=\"VCV000001273\" Version=\"4\">\n                <Measure Type=\"Duplication\" ID=\"16312\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">NM_173660.5(DOK7):c.1124_1127dup (p.Ala378fs)</ElementValue>\n                    </Name>\n                    <Name>\n                        <ElementValue Type=\"Alternate\">p.Ala378SerfsX30</ElementValue>\n                    </Name>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_001164673\" Version=\"2\" Change=\"c.*345_*348dup\"\n                                   Type=\"HGVS, coding, RefSeq\">NM_001164673.2:c.*345_*348dup\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_001301071\" Version=\"2\" Change=\"c.1124_1127dup\"\n                                   Type=\"HGVS, coding, RefSeq\">NM_001301071.2:c.1124_1127dup\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_173660\" Version=\"5\" Change=\"c.1124_1127dup\"\n                                   Type=\"HGVS, coding, RefSeq\">NM_173660.5:c.1124_1127dup\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_001256896\" Version=\"1\" Change=\"c.194_197dup\"\n                                   Type=\"HGVS, coding, RefSeq\">NM_001256896.1:c.194_197dup\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_001363811\" Version=\"2\" Change=\"c.692_695dup\"\n                                   Type=\"HGVS, coding, RefSeq\">NM_001363811.2:c.692_695dup\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NG_013072\" Version=\"2\" Change=\"g.34805_34808dup\"\n                                   Type=\"HGVS, genomic, RefSeqGene\">NG_013072.2:g.34805_34808dup\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NC_000004\" Version=\"12\" Change=\"g.3493110_3493113dup\"\n                                   Type=\"HGVS, genomic, top level\" integerValue=\"38\">NC_000004.12:g.3493110_3493113dup\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NC_000004\" Version=\"11\" Change=\"g.3494840_3494841insTGCC\"\n                                   Type=\"HGVS, genomic, top level, other\" integerValue=\"37\">\n                            NC_000004.11:g.3494840_3494841insTGCC\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NC_000004\" Version=\"11\" Change=\"g.3494837_3494840dup\"\n                                   Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">\n                            NC_000004.11:g.3494837_3494840dup\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_173660\" Version=\"4\" Change=\"c.1124_1127dupTGCC\" Type=\"HGVS, previous\">\n                            NM_173660.4:c.1124_1127dupTGCC\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NP_775931\" Version=\"3\" Change=\"p.Ala378SerfsTer30\" Type=\"HGVS, protein\">\n                            NP_775931.3:p.Ala378SerfsTer30\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NP_001350740\" Version=\"1\" Change=\"p.Ala234fs\"\n                                   Type=\"HGVS, protein, RefSeq\">NP_001350740.1:p.Ala234fs\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NP_001288000\" Version=\"1\" Change=\"p.Ala378fs\"\n                                   Type=\"HGVS, protein, RefSeq\">NP_001288000.1:p.Ala378fs\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NP_775931\" Version=\"3\" Change=\"p.Ala378fs\" Type=\"HGVS, protein, RefSeq\">\n                            NP_775931.3:p.Ala378fs\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NP_001243825\" Version=\"1\" Change=\"p.Ala68fs\" Type=\"HGVS, protein, RefSeq\">\n                            NP_001243825.1:p.Ala68fs\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"Location\">NM_173660.4:exon 7</Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"MolecularConsequence\">3 prime UTR variant</Attribute>\n                        <XRef ID=\"SO:0001624\" DB=\"Sequence Ontology\"/>\n                        <XRef ID=\"NM_001164673.2:c.*345_*348dup\" DB=\"RefSeq\"/>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"MolecularConsequence\">frameshift variant</Attribute>\n                        <XRef ID=\"SO:0001589\" DB=\"Sequence Ontology\"/>\n                        <XRef ID=\"NM_001256896.1:c.194_197dup\" DB=\"RefSeq\"/>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"MolecularConsequence\">frameshift variant</Attribute>\n                        <XRef ID=\"SO:0001589\" DB=\"Sequence Ontology\"/>\n                        <XRef ID=\"NM_001301071.2:c.1124_1127dup\" DB=\"RefSeq\"/>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"MolecularConsequence\">frameshift variant</Attribute>\n                        <XRef ID=\"SO:0001589\" DB=\"Sequence Ontology\"/>\n                        <XRef ID=\"NM_001363811.2:c.692_695dup\" DB=\"RefSeq\"/>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"MolecularConsequence\">frameshift variant</Attribute>\n                        <XRef ID=\"SO:0001589\" DB=\"Sequence Ontology\"/>\n                        <XRef ID=\"NM_173660.5:c.1124_1127dup\" DB=\"RefSeq\"/>\n                    </AttributeSet>\n                    <CytogeneticLocation>4p16.3</CytogeneticLocation>\n                    <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\"\n                                      AssemblyStatus=\"current\" Chr=\"4\" Accession=\"NC_000004.12\" start=\"3493110\"\n                                      stop=\"3493113\" display_start=\"3493110\" display_stop=\"3493113\" variantLength=\"4\"\n                                      positionVCF=\"3493106\" referenceAlleleVCF=\"A\" alternateAlleleVCF=\"AGCCT\"/>\n                    <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\"\n                                      AssemblyStatus=\"previous\" Chr=\"4\" Accession=\"NC_000004.11\" start=\"3494837\"\n                                      stop=\"3494840\" display_start=\"3494837\" display_stop=\"3494840\" variantLength=\"4\"\n                                      positionVCF=\"3494833\" referenceAlleleVCF=\"A\" alternateAlleleVCF=\"AGCCT\"/>\n                    <MeasureRelationship Type=\"within single gene\">\n                        <Name>\n                            <ElementValue Type=\"Preferred\">docking protein 7</ElementValue>\n                        </Name>\n                        <Symbol>\n                            <ElementValue Type=\"Preferred\">DOK7</ElementValue>\n                        </Symbol>\n                        <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\"\n                                          AssemblyStatus=\"current\" Chr=\"4\" Accession=\"NC_000004.12\" start=\"3463306\"\n                                          stop=\"3501482\" display_start=\"3463306\" display_stop=\"3501482\" Strand=\"+\"/>\n                        <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\"\n                                          AssemblyStatus=\"previous\" Chr=\"4\" Accession=\"NC_000004.11\" start=\"3465032\"\n                                          stop=\"3496208\" display_start=\"3465032\" display_stop=\"3496208\"\n                                          variantLength=\"31177\" Strand=\"+\"/>\n                        <XRef ID=\"285489\" DB=\"Gene\"/>\n                        <XRef Type=\"MIM\" ID=\"610285\" DB=\"OMIM\"/>\n                        <XRef ID=\"HGNC:26594\" DB=\"HGNC\"/>\n                    </MeasureRelationship>\n                    <XRef Type=\"Allelic variant\" ID=\"610285.0001\" DB=\"OMIM\"/>\n                    <XRef Type=\"rs\" ID=\"606231128\" DB=\"dbSNP\"/>\n                </Measure>\n                <Name>\n                    <ElementValue Type=\"Preferred\">NM_173660.5(DOK7):c.1124_1127dup (p.Ala378fs)</ElementValue>\n                </Name>\n                <XRef ID=\"CA249193\" DB=\"ClinGen\"/>\n            </MeasureSet>\n            <TraitSet Type=\"Disease\" ID=\"9634\">\n                <Trait ID=\"9474\" Type=\"Disease\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">Congenital myasthenic syndrome</ElementValue>\n                    </Name>\n                    <Symbol>\n                        <ElementValue Type=\"Preferred\">CMS</ElementValue>\n                    </Symbol>\n                    <AttributeSet>\n                        <Attribute Type=\"public definition\">Congenital myasthenic syndromes (designated as CMS\n                            throughout this entry) are characterized by fatigable weakness of skeletal muscle (e.g.,\n                            ocular, bulbar, limb muscles) with onset at or shortly after birth or in early childhood;\n                            rarely, symptoms may not manifest until later in childhood. Cardiac and smooth muscle are\n                            usually not involved. Severity and course of disease are highly variable, ranging from minor\n                            symptoms to progressive disabling weakness. In some subtypes of CMS, myasthenic symptoms may\n                            be mild, but sudden severe exacerbations of weakness or even sudden episodes of respiratory\n                            insufficiency may be precipitated by fever, infections, or excitement. Major findings of the\n                            neonatal-onset subtype include: respiratory insufficiency with sudden apnea and cyanosis;\n                            feeding difficulties; poor suck and cry; choking spells; eyelid ptosis; and facial, bulbar,\n                            and generalized weakness. Arthrogryposis multiplex congenita may also be present. Stridor in\n                            infancy may be an important clue to CMS. Later childhood-onset subtypes show abnormal muscle\n                            fatigability with difficulty in activities such as running or climbing stairs; motor\n                            milestones may be delayed; fluctuating eyelid ptosis and fixed or fluctuating extraocular\n                            muscle weakness are common presentations.\n                        </Attribute>\n                        <XRef ID=\"NBK1168\" DB=\"GeneReviews\"/>\n                    </AttributeSet>\n                    <Citation Type=\"review\" Abbrev=\"GeneReviews\">\n                        <ID Source=\"PubMed\">20301347</ID>\n                        <ID Source=\"BookShelf\">NBK1168</ID>\n                    </Citation>\n                    <XRef ID=\"C0751882\" DB=\"MedGen\"/>\n                    <XRef Type=\"Phenotypic series\" ID=\"PS601462\" DB=\"OMIM\"/>\n                </Trait>\n            </TraitSet>\n        </ReferenceClinVarAssertion>\n        <ClinVarAssertion ID=\"557913\">\n            <ClinVarSubmissionID localKey=\"NM_173660.4:c.1124_1127dupTGCC|MedGen:C0751882\"\n                                 submittedAssembly=\"not applicable\" submitter=\"GeneReviews\" submitterDate=\"2016-07-14\"/>\n            <ClinVarAccession Acc=\"SCV000292401\" Version=\"1\" Type=\"SCV\" OrgID=\"500062\" OrganizationCategory=\"resource\"\n                              OrgType=\"primary\" DateUpdated=\"2019-07-02\"/>\n            <RecordStatus>current</RecordStatus>\n            <ClinicalSignificance DateLastEvaluated=\"2016-07-14\">\n                <ReviewStatus>no assertion criteria provided</ReviewStatus>\n                <Description>Pathogenic</Description>\n                <Citation>\n                    <ID Source=\"PubMed\">22230109</ID>\n                </Citation>\n                <Citation>\n                    <ID Source=\"PubMed\">16917026</ID>\n                </Citation>\n                <Citation>\n                    <URL>http://www.ncbi.nlm.nih.gov/books/NBK1168/</URL>\n                </Citation>\n            </ClinicalSignificance>\n            <Assertion Type=\"variation to disease\"/>\n            <ObservedIn>\n                <Sample>\n                    <Origin>germline</Origin>\n                    <Species TaxonomyId=\"9606\">human</Species>\n                    <AffectedStatus>yes</AffectedStatus>\n                </Sample>\n                <Method>\n                    <MethodType>literature only</MethodType>\n                </Method>\n                <ObservedData>\n                    <Attribute Type=\"Description\">not provided</Attribute>\n                </ObservedData>\n            </ObservedIn>\n            <MeasureSet Type=\"Variant\">\n                <Measure Type=\"Variation\">\n                    <Name>\n                        <ElementValue Type=\"Alternate\">NP_775931.3:p.Ala378SerfsTer30</ElementValue>\n                    </Name>\n                    <AttributeSet>\n                        <Attribute Type=\"HGVS\">NM_173660.4:c.1124_1127dupTGCC</Attribute>\n                    </AttributeSet>\n                    <MeasureRelationship Type=\"variant in gene\">\n                        <Symbol>\n                            <ElementValue Type=\"Preferred\">DOK7</ElementValue>\n                        </Symbol>\n                    </MeasureRelationship>\n                </Measure>\n            </MeasureSet>\n            <TraitSet Type=\"Disease\">\n                <Trait Type=\"Disease\">\n                    <XRef DB=\"MedGen\" ID=\"C0751882\" Type=\"CUI\"/>\n                </Trait>\n            </TraitSet>\n        </ClinVarAssertion>\n        <ClinVarAssertion ID=\"1392845\" SubmissionName=\"SUB5348977\">\n            <ClinVarSubmissionID localKey=\"63693434|Orphanet:ORPHA590\" submittedAssembly=\"GRCh37\"\n                                 submitter=\"Laboratory for Molecular Medicine,Partners HealthCare Personalized Medicine\"\n                                 submitterDate=\"2019-03-21\"/>\n            <ClinVarAccession Acc=\"SCV000711740\" Version=\"1\" Type=\"SCV\" OrgID=\"21766\" OrganizationCategory=\"laboratory\"\n                              OrgType=\"primary\" DateUpdated=\"2019-08-30\"/>\n            <RecordStatus>current</RecordStatus>\n            <ClinicalSignificance DateLastEvaluated=\"2015-12-16\">\n                <ReviewStatus>criteria provided, single submitter</ReviewStatus>\n                <Description>Pathogenic</Description>\n                <Citation>\n                    <ID Source=\"PubMed\">19261599</ID>\n                </Citation>\n                <Comment>The p.Ala378fs variant in DOK7 has been reported in 17 individuals with congenit al myasthenia\n                    syndrome (CMS) with limb-girdle pattern of muscle weakness who wer e either homozygous or compound\n                    heterozygous for this variant (Beeson 2006 and L orenzoni 2013). It has been identified in 0.1%\n                    (28/24230) of Eurpoean chromosome s by the Exome Aggregation Consortium (ExAC,\n                    http://exac.broadinstitute.org), wh ich is consistent with a carrier frequency for this disease. In\n                    vitro functional studies also provide some evidence that the Ala378fs variant may impact protein\n                    function. This variant is predicted to cause a frameshift, which alters the pro tein?s amino acid\n                    sequence beginning at position 378 and leads to a premature te rmination codon 30 amino acids\n                    downstream. Loss of function of the DOK7 gene is an established disease mechanism in individuals\n                    with CMS. In summary, this varia nt meets our criteria to be classified as pathogenic for CMS in an\n                    autosomal rec essive manner based upon its segregation in affected individuals and predicted i mpact\n                    on protein function.\n                </Comment>\n            </ClinicalSignificance>\n            <Assertion Type=\"variation to disease\"/>\n            <ExternalID DB=\"Laboratory for Molecular Medicine\" ID=\"63693434\"/>\n            <AttributeSet>\n                <Attribute Type=\"ModeOfInheritance\">Autosomal recessive inheritance</Attribute>\n            </AttributeSet>\n            <AttributeSet>\n                <Attribute Type=\"AssertionMethod\">LMM Criteria</Attribute>\n                <Citation>\n                    <ID Source=\"PubMed\">24033266</ID>\n                </Citation>\n            </AttributeSet>\n            <ObservedIn>\n                <Sample>\n                    <Origin>germline</Origin>\n                    <Species TaxonomyId=\"9606\">human</Species>\n                    <AffectedStatus>not provided</AffectedStatus>\n                    <FamilyData NumFamiliesWithVariant=\"1\"/>\n                </Sample>\n                <Method>\n                    <MethodType>clinical testing</MethodType>\n                </Method>\n                <ObservedData>\n                    <Attribute Type=\"VariantAlleles\" integerValue=\"1\"/>\n                </ObservedData>\n            </ObservedIn>\n            <MeasureSet Type=\"Variant\">\n                <Measure Type=\"Variation\">\n                    <Name>\n                        <ElementValue Type=\"Alternate\">NM_173660.4:c.1124_1127dupTGCC</ElementValue>\n                    </Name>\n                    <Name>\n                        <ElementValue Type=\"Alternate\">p.Ala378SerfsX30</ElementValue>\n                    </Name>\n                    <AttributeSet>\n                        <Attribute Type=\"Location\">NM_173660.4:EXON 7</Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"HGVS\">NC_000004.11:g.3494840_3494841insTGCC</Attribute>\n                    </AttributeSet>\n                    <MeasureRelationship Type=\"variant in gene\">\n                        <Symbol>\n                            <ElementValue Type=\"Preferred\">DOK7</ElementValue>\n                        </Symbol>\n                    </MeasureRelationship>\n                </Measure>\n            </MeasureSet>\n            <TraitSet Type=\"Disease\">\n                <Trait Type=\"Disease\">\n                    <XRef DB=\"Orphanet\" ID=\"ORPHA590\"/>\n                </Trait>\n            </TraitSet>\n        </ClinVarAssertion>\n    </ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000267121.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\" xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n<ClinVarSet ID=\"48774395\"><RecordStatus>current</RecordStatus><Title>NM_198156.3(VHL):c.164_171dup (p.Arg60fs) AND not provided</Title><ReferenceClinVarAssertion DateCreated=\"2016-12-05\" DateLastUpdated=\"2019-12-17\" ID=\"805869\"><ClinVarAccession Acc=\"RCV000267121\" Version=\"2\" Type=\"RCV\" DateUpdated=\"2019-12-17\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2018-03-30\"><ReviewStatus>criteria provided, single submitter</ReviewStatus><Description>Pathogenic</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>yes</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData ID=\"49729956\"><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\" ID=\"280053\" Acc=\"VCV000280053\" Version=\"2\"><Measure Type=\"Duplication\" ID=\"264141\"><Name><ElementValue Type=\"Preferred\">NM_198156.3(VHL):c.164_171dup (p.Arg60fs)</ElementValue></Name><AttributeSet><Attribute Accession=\"NM_000551\" Version=\"3\" Change=\"c.164_171dupAGGCCGGG\" Type=\"HGVS, coding\">NM_000551.3:c.164_171dupAGGCCGGG</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"LRG_322t1\" Change=\"c.164_171dup\" Type=\"HGVS, coding, LRG\">LRG_322t1:c.164_171dup</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_001354723\" Version=\"2\" Change=\"c.164_171dup\" Type=\"HGVS, coding, RefSeq\">NM_001354723.2:c.164_171dup</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_198156\" Version=\"3\" Change=\"c.164_171dup\" Type=\"HGVS, coding, RefSeq\">NM_198156.3:c.164_171dup</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"LRG_322\" Change=\"g.5377_5384dup\" Type=\"HGVS, genomic, LRG\">LRG_322:g.5377_5384dup</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NG_008212\" Version=\"3\" Change=\"g.5377_5384dup\" Type=\"HGVS, genomic, RefSeqGene\">NG_008212.3:g.5377_5384dup</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000003\" Version=\"12\" Change=\"g.10142011_10142018dup\" Type=\"HGVS, genomic, top level\" integerValue=\"38\">NC_000003.12:g.10142011_10142018dup</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000003\" Version=\"11\" Change=\"g.10183695_10183702dup\" Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">NC_000003.11:g.10183695_10183702dup</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"LRG_322p1\" Change=\"p.Arg60fs\" Type=\"HGVS, protein\">LRG_322p1:p.Arg60fs</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_001341652\" Version=\"1\" Change=\"p.Arg60fs\" Type=\"HGVS, protein, RefSeq\">NP_001341652.1:p.Arg60fs</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_937799\" Version=\"1\" Change=\"p.Arg60fs\" Type=\"HGVS, protein, RefSeq\">NP_937799.1:p.Arg60fs</Attribute></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">frameshift variant</Attribute><XRef ID=\"SO:0001589\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_001354723.2:c.164_171dup\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">frameshift variant</Attribute><XRef ID=\"SO:0001589\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_198156.3:c.164_171dup\" DB=\"RefSeq\" /></AttributeSet><CytogeneticLocation>3p25.3</CytogeneticLocation><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"3\" Accession=\"NC_000003.12\" start=\"10142011\" stop=\"10142018\" display_start=\"10142011\" display_stop=\"10142018\" variantLength=\"8\" positionVCF=\"10142008\" referenceAlleleVCF=\"T\" alternateAlleleVCF=\"TGGAGGCCG\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"3\" Accession=\"NC_000003.11\" start=\"10183695\" stop=\"10183702\" display_start=\"10183695\" display_stop=\"10183702\" variantLength=\"8\" positionVCF=\"10183692\" referenceAlleleVCF=\"T\" alternateAlleleVCF=\"TGGAGGCCG\" /><MeasureRelationship Type=\"within single gene\"><Name><ElementValue Type=\"Preferred\">von Hippel-Lindau tumor suppressor</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">VHL</ElementValue></Symbol><AttributeSet><Attribute dateValue=\"2017-06-28\" Type=\"Haploinsufficiency\">Sufficient evidence for dosage pathogenicity</Attribute><Citation><URL>https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=VHL</URL></Citation></AttributeSet><AttributeSet><Attribute dateValue=\"2017-06-28\" Type=\"Triplosensitivity\">No evidence available</Attribute><Citation><URL>https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=VHL</URL></Citation></AttributeSet><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"3\" Accession=\"NC_000003.12\" start=\"10141778\" stop=\"10153667\" display_start=\"10141778\" display_stop=\"10153667\" Strand=\"+\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"3\" Accession=\"NC_000003.11\" start=\"10183318\" stop=\"10195353\" display_start=\"10183318\" display_stop=\"10195353\" variantLength=\"12036\" Strand=\"+\" /><XRef ID=\"7428\" DB=\"Gene\" /><XRef Type=\"MIM\" ID=\"608537\" DB=\"OMIM\" /><XRef ID=\"HGNC:12687\" DB=\"HGNC\" /><Comment DataSource=\"NCBI curation\" Type=\"public\">This gene is cited in the ACMG recommendations of 2013 (PubMed 23788249) for reporting incidental findings in exons.</Comment><Comment DataSource=\"NCBI curation\" Type=\"public\">This gene is cited in the ACMG recommendations of 2016 (PubMed 27854360) for reporting incidental findings in exons.</Comment></MeasureRelationship><XRef Type=\"rs\" ID=\"886041345\" DB=\"dbSNP\" /></Measure><Name><ElementValue Type=\"Preferred\">NM_198156.3(VHL):c.164_171dup (p.Arg60fs)</ElementValue></Name><XRef ID=\"CA10602889\" DB=\"ClinGen\" /></MeasureSet><TraitSet Type=\"Disease\" ID=\"9460\"><Trait ID=\"17556\" Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">not provided</ElementValue><XRef ID=\"13DG0619\" DB=\"Developmental Genetics Unit,King Faisal Specialist Hospital &amp; Research Centre\" /></Name><AttributeSet><Attribute Type=\"public definition\">The term 'not provided' is registered in MedGen to support identification of submissions to ClinVar for which no condition was named when assessing the variant. 'not provided' differs from 'not specified', which is used when a variant is asserted to be benign, likely benign, or of uncertain significance for conditions that have not been specified.</Attribute></AttributeSet><XRef ID=\"CN517202\" DB=\"MedGen\" /></Trait></TraitSet></ReferenceClinVarAssertion><ClinVarAssertion ID=\"643185\" SubmissionName=\"SUB5098196\"><ClinVarSubmissionID localKey=\"GDX:704969|Not Provided\" submittedAssembly=\"GRCh37\" submitter=\"GeneDx\" submitterDate=\"2019-01-29\" /><ClinVarAccession Acc=\"SCV000329835\" Version=\"4\" Type=\"SCV\" OrgID=\"26957\" OrganizationCategory=\"laboratory\" OrgType=\"primary\" DateUpdated=\"2019-05-09\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2018-03-30\"><ReviewStatus>criteria provided, single submitter</ReviewStatus><Description>Pathogenic</Description><Comment Type=\"public\">The c.164_171dupAGGCCGGG variant in the VHL gene has been reported previously in associationwith von Hippel-Lindau syndrome (Chacon-Camacho et al., 2014). The duplication causes aframeshift starting with codon Arginine 60, changes this amino acid to a Glycine residue and creates apremature Stop codon at position 10 of the new reading frame, denoted p.Arg60GlyfsX10. Thisvariant is predicted to cause loss of normal protein function either through protein truncation ornonsense-mediated mRNA decay. Based on the currently available information, we considerc.164_171dupAGGCCGGG to be pathogenic.</Comment></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ExternalID DB=\"GeneDx\" ID=\"GDX:704969\" /><AttributeSet><Attribute Type=\"AssertionMethod\">GeneDx Variant Classification (06012015)</Attribute><Citation><URL>https://submit.ncbi.nlm.nih.gov/ft/byid/7oynscmk/mdi-5616_26957_genedx_interprules_final_061215.pdf</URL></Citation></AttributeSet><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>yes</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><AttributeSet><Attribute Type=\"HGVS\">NM_000551.3:c.164_171dupAGGCCGGG</Attribute></AttributeSet><SequenceLocation Assembly=\"GRCh37\" Chr=\"3\" alternateAllele=\"AGGCCGGGAGGCCGGG\" referenceAllele=\"AGGCCGGG\" start=\"10183702\" stop=\"10183703\" variantLength=\"2\" /><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">VHL</ElementValue></Symbol></MeasureRelationship></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">Not Provided</ElementValue></Name></Trait></TraitSet></ClinVarAssertion></ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000342164.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\"\n            xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n    <ClinVarSet ID=\"47356189\">\n        <RecordStatus>current</RecordStatus>\n        <Title>NM_000368.4(TSC1):c.2075_2120dup (p.Phe707delinsLeuArgProValAlaPheThrAlaGlnProValThrLeuTer) AND not\n            provided\n        </Title>\n        <ReferenceClinVarAssertion DateCreated=\"2016-12-05\" DateLastUpdated=\"2019-11-02\" ID=\"880915\">\n            <ClinVarAccession Acc=\"RCV000342164\" Version=\"1\" Type=\"RCV\" DateUpdated=\"2019-11-02\"/>\n            <RecordStatus>current</RecordStatus>\n            <ClinicalSignificance DateLastEvaluated=\"2016-09-20\">\n                <ReviewStatus>criteria provided, single submitter</ReviewStatus>\n                <Description>Pathogenic</Description>\n            </ClinicalSignificance>\n            <Assertion Type=\"variation to disease\"/>\n            <ObservedIn>\n                <Sample>\n                    <Origin>germline</Origin>\n                    <Species TaxonomyId=\"9606\">human</Species>\n                    <AffectedStatus>yes</AffectedStatus>\n                </Sample>\n                <Method>\n                    <MethodType>clinical testing</MethodType>\n                </Method>\n                <ObservedData ID=\"48252430\">\n                    <Attribute Type=\"Description\">not provided</Attribute>\n                </ObservedData>\n            </ObservedIn>\n            <MeasureSet Type=\"Variant\" ID=\"280887\" Acc=\"VCV000280887\" Version=\"2\">\n                <Measure Type=\"Duplication\" ID=\"264318\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">NM_000368.4(TSC1):c.2075_2120dup\n                            (p.Phe707delinsLeuArgProValAlaPheThrAlaGlnProValThrLeuTer)\n                        </ElementValue>\n                    </Name>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_000368\" Version=\"4\"\n                                   Change=\"c.2075_2120dupGAGACCAGTTGCTTTTACTGCACAACCAGTTACTCTATGAGCGTTT\"\n                                   Type=\"HGVS, coding\">\n                            NM_000368.4:c.2075_2120dupGAGACCAGTTGCTTTTACTGCACAACCAGTTACTCTATGAGCGTTT\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"LRG_486t1\" Change=\"c.2075_2120dup\" Type=\"HGVS, coding, LRG\">\n                            LRG_486t1:c.2075_2120dup\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_001362177\" Version=\"2\" Change=\"c.1712_1757dup\"\n                                   Type=\"HGVS, coding, RefSeq\">NM_001362177.2:c.1712_1757dup\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_001162427\" Version=\"2\" Change=\"c.1922_1967dup\"\n                                   Type=\"HGVS, coding, RefSeq\">NM_001162427.2:c.1922_1967dup\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_001162426\" Version=\"2\" Change=\"c.2072_2117dup\"\n                                   Type=\"HGVS, coding, RefSeq\">NM_001162426.2:c.2072_2117dup\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_000368\" Version=\"4\" Change=\"c.2075_2120dup\"\n                                   Type=\"HGVS, coding, RefSeq\">NM_000368.4:c.2075_2120dup\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"LRG_486\" Change=\"g.45850_45895dup\" Type=\"HGVS, genomic, LRG\">\n                            LRG_486:g.45850_45895dup\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NG_012386\" Version=\"1\" Change=\"g.45850_45895dup\"\n                                   Type=\"HGVS, genomic, RefSeqGene\">NG_012386.1:g.45850_45895dup\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NC_000009\" Version=\"12\" Change=\"g.132903739_132903784dup\"\n                                   Type=\"HGVS, genomic, top level\" integerValue=\"38\">\n                            NC_000009.12:g.132903739_132903784dup\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NC_000009\" Version=\"11\" Change=\"g.135779126_135779171dup\"\n                                   Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">\n                            NC_000009.11:g.135779126_135779171dup\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"LRG_486p1\"\n                                   Change=\"p.Phe707delinsLeuArgProValAlaPheThrAlaGlnProValThrLeuTer\"\n                                   Type=\"HGVS, protein\">\n                            LRG_486p1:p.Phe707delinsLeuArgProValAlaPheThrAlaGlnProValThrLeuTer\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NP_001349106\" Version=\"1\"\n                                   Change=\"p.Phe586delinsLeuArgProValAlaPheThrAlaGlnProValThrLeuTer\"\n                                   Type=\"HGVS, protein, RefSeq\">\n                            NP_001349106.1:p.Phe586delinsLeuArgProValAlaPheThrAlaGlnProValThrLeuTer\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NP_001155899\" Version=\"1\"\n                                   Change=\"p.Phe656delinsLeuArgProValAlaPheThrAlaGlnProValThrLeuTer\"\n                                   Type=\"HGVS, protein, RefSeq\">\n                            NP_001155899.1:p.Phe656delinsLeuArgProValAlaPheThrAlaGlnProValThrLeuTer\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NP_001155898\" Version=\"1\"\n                                   Change=\"p.Phe706delinsLeuArgProValAlaPheThrAlaGlnProValThrLeuTer\"\n                                   Type=\"HGVS, protein, RefSeq\">\n                            NP_001155898.1:p.Phe706delinsLeuArgProValAlaPheThrAlaGlnProValThrLeuTer\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NP_000359\" Version=\"1\"\n                                   Change=\"p.Phe707delinsLeuArgProValAlaPheThrAlaGlnProValThrLeuTer\"\n                                   Type=\"HGVS, protein, RefSeq\">\n                            NP_000359.1:p.Phe707delinsLeuArgProValAlaPheThrAlaGlnProValThrLeuTer\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"MolecularConsequence\">nonsense</Attribute>\n                        <XRef ID=\"SO:0001587\" DB=\"Sequence Ontology\"/>\n                        <XRef ID=\"NM_000368.4:c.2075_2120dup\" DB=\"RefSeq\"/>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"MolecularConsequence\">nonsense</Attribute>\n                        <XRef ID=\"SO:0001587\" DB=\"Sequence Ontology\"/>\n                        <XRef ID=\"NM_001162426.2:c.2072_2117dup\" DB=\"RefSeq\"/>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"MolecularConsequence\">nonsense</Attribute>\n                        <XRef ID=\"SO:0001587\" DB=\"Sequence Ontology\"/>\n                        <XRef ID=\"NM_001162427.2:c.1922_1967dup\" DB=\"RefSeq\"/>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"MolecularConsequence\">nonsense</Attribute>\n                        <XRef ID=\"SO:0001587\" DB=\"Sequence Ontology\"/>\n                        <XRef ID=\"NM_001362177.2:c.1712_1757dup\" DB=\"RefSeq\"/>\n                    </AttributeSet>\n                    <CytogeneticLocation>9q34.13</CytogeneticLocation>\n                    <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\"\n                                      AssemblyStatus=\"current\" Chr=\"9\" Accession=\"NC_000009.12\" start=\"132903739\"\n                                      stop=\"132903784\" display_start=\"132903739\" display_stop=\"132903784\"\n                                      variantLength=\"46\" positionVCF=\"132903738\" referenceAlleleVCF=\"A\"\n                                      alternateAlleleVCF=\"AAAACGCTCATAGAGTAACTGGTTGTGCAGTAAAAGCAACTGGTCTC\"/>\n                    <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\"\n                                      AssemblyStatus=\"previous\" Chr=\"9\" Accession=\"NC_000009.11\" start=\"135779126\"\n                                      stop=\"135779171\" display_start=\"135779126\" display_stop=\"135779171\"\n                                      variantLength=\"1\" positionVCF=\"135779125\" referenceAlleleVCF=\"A\"\n                                      alternateAlleleVCF=\"AAAACGCTCATAGAGTAACTGGTTGTGCAGTAAAAGCAACTGGTCTC\"/>\n                    <MeasureRelationship Type=\"within single gene\">\n                        <Name>\n                            <ElementValue Type=\"Preferred\">TSC complex subunit 1</ElementValue>\n                        </Name>\n                        <Symbol>\n                            <ElementValue Type=\"Preferred\">TSC1</ElementValue>\n                        </Symbol>\n                        <AttributeSet>\n                            <Attribute dateValue=\"2012-04-19\" Type=\"Haploinsufficiency\">Sufficient evidence for dosage\n                                pathogenicity\n                            </Attribute>\n                            <Citation>\n                                <URL>https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=TSC1</URL>\n                            </Citation>\n                        </AttributeSet>\n                        <AttributeSet>\n                            <Attribute dateValue=\"2012-04-19\" Type=\"Triplosensitivity\">No evidence available</Attribute>\n                            <Citation>\n                                <URL>https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=TSC1</URL>\n                            </Citation>\n                        </AttributeSet>\n                        <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\"\n                                          AssemblyStatus=\"current\" Chr=\"9\" Accession=\"NC_000009.12\" start=\"132891349\"\n                                          stop=\"132945269\" display_start=\"132891349\" display_stop=\"132945269\"\n                                          Strand=\"-\"/>\n                        <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\"\n                                          AssemblyStatus=\"previous\" Chr=\"9\" Accession=\"NC_000009.11\" start=\"135766734\"\n                                          stop=\"135820019\" display_start=\"135766734\" display_stop=\"135820019\"\n                                          variantLength=\"53286\" Strand=\"-\"/>\n                        <XRef ID=\"7248\" DB=\"Gene\"/>\n                        <XRef Type=\"MIM\" ID=\"605284\" DB=\"OMIM\"/>\n                        <XRef ID=\"HGNC:12362\" DB=\"HGNC\"/>\n                        <Comment DataSource=\"NCBI curation\" Type=\"public\">This gene is cited in the ACMG recommendations\n                            of 2013 (PubMed 23788249) for reporting incidental findings in exons.\n                        </Comment>\n                        <Comment DataSource=\"NCBI curation\" Type=\"public\">This gene is cited in the ACMG recommendations\n                            of 2016 (PubMed 27854360) for reporting incidental findings in exons.\n                        </Comment>\n                    </MeasureRelationship>\n                    <XRef Type=\"rs\" ID=\"886042012\" DB=\"dbSNP\"/>\n                </Measure>\n                <Name>\n                    <ElementValue Type=\"Preferred\">NM_000368.4(TSC1):c.2075_2120dup\n                        (p.Phe707delinsLeuArgProValAlaPheThrAlaGlnProValThrLeuTer)\n                    </ElementValue>\n                </Name>\n                <XRef ID=\"CA10603025\" DB=\"ClinGen\"/>\n            </MeasureSet>\n            <TraitSet Type=\"Disease\" ID=\"9460\">\n                <Trait ID=\"17556\" Type=\"Disease\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">not provided</ElementValue>\n                        <XRef ID=\"13DG0619\"\n                              DB=\"Developmental Genetics Unit,King Faisal Specialist Hospital &amp; Research Centre\"/>\n                    </Name>\n                    <AttributeSet>\n                        <Attribute Type=\"public definition\">The term 'not provided' is registered in MedGen to support\n                            identification of submissions to ClinVar for which no condition was named when assessing the\n                            variant. 'not provided' differs from 'not specified', which is used when a variant is\n                            asserted to be benign, likely benign, or of uncertain significance for conditions that have\n                            not been specified.\n                        </Attribute>\n                    </AttributeSet>\n                    <XRef ID=\"CN517202\" DB=\"MedGen\"/>\n                </Trait>\n            </TraitSet>\n        </ReferenceClinVarAssertion>\n        <ClinVarAssertion ID=\"644191\" SubmissionName=\"SUB5098196\">\n            <ClinVarSubmissionID localKey=\"GDX:1281897|Not Provided\" submittedAssembly=\"GRCh37\" submitter=\"GeneDx\"\n                                 submitterDate=\"2019-01-29\"/>\n            <ClinVarAccession Acc=\"SCV000330841\" Version=\"4\" Type=\"SCV\" OrgID=\"26957\" OrganizationCategory=\"laboratory\"\n                              OrgType=\"primary\" DateUpdated=\"2019-05-09\"/>\n            <RecordStatus>current</RecordStatus>\n            <ClinicalSignificance DateLastEvaluated=\"2016-09-20\">\n                <ReviewStatus>criteria provided, single submitter</ReviewStatus>\n                <Description>Pathogenic</Description>\n                <Comment Type=\"public\">The c.2075_2120dup46 pathogenic variant in the TSC1 gene causes a frameshift\n                    starting with codon Phenylalanine 707, changes this amino acid to a Leucine residue and creates a\n                    premature Stop codon at position 14 of the new reading frame, denoted p.Phe707LeufsX14. This\n                    pathogenic variant is predicted to cause loss of normal protein function either through protein\n                    truncation or nonsense-mediated mRNA decay. Furthermore, it was not observed in approximately 6,500\n                    individuals of European and African American ancestry in the NHLBI Exome Sequencing Project,\n                    indicating it is not a common benign variant in these populations.\n                </Comment>\n            </ClinicalSignificance>\n            <Assertion Type=\"variation to disease\"/>\n            <ExternalID DB=\"GeneDx\" ID=\"GDX:1281897\"/>\n            <AttributeSet>\n                <Attribute Type=\"AssertionMethod\">GeneDx Variant Classification (06012015)</Attribute>\n                <Citation>\n                    <URL>\n                        https://submit.ncbi.nlm.nih.gov/ft/byid/7oynscmk/mdi-5616_26957_genedx_interprules_final_061215.pdf\n                    </URL>\n                </Citation>\n            </AttributeSet>\n            <ObservedIn>\n                <Sample>\n                    <Origin>germline</Origin>\n                    <Species TaxonomyId=\"9606\">human</Species>\n                    <AffectedStatus>yes</AffectedStatus>\n                </Sample>\n                <Method>\n                    <MethodType>clinical testing</MethodType>\n                </Method>\n                <ObservedData>\n                    <Attribute Type=\"Description\">not provided</Attribute>\n                </ObservedData>\n            </ObservedIn>\n            <MeasureSet Type=\"Variant\">\n                <Measure Type=\"Variation\">\n                    <AttributeSet>\n                        <Attribute Type=\"HGVS\">NM_000368.4:c.2075_2120dup46</Attribute>\n                    </AttributeSet>\n                    <SequenceLocation Assembly=\"GRCh37\" Chr=\"9\"\n                                      alternateAllele=\"AAACGCTCATAGAGTAACTGGTTGTGCAGTAAAAGCAACTGGTCTCAAACGCTCATAGAGTAACTGGTTGTGCAGTAAAAGCAACTGGTCTC\"\n                                      referenceAllele=\"AAACGCTCATAGAGTAACTGGTTGTGCAGTAAAAGCAACTGGTCTC\" start=\"135779125\"\n                                      stop=\"135779126\" variantLength=\"2\"/>\n                    <MeasureRelationship Type=\"variant in gene\">\n                        <Symbol>\n                            <ElementValue Type=\"Preferred\">TSC1</ElementValue>\n                        </Symbol>\n                    </MeasureRelationship>\n                </Measure>\n            </MeasureSet>\n            <TraitSet Type=\"Disease\">\n                <Trait Type=\"Disease\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">Not Provided</ElementValue>\n                    </Name>\n                </Trait>\n            </TraitSet>\n        </ClinVarAssertion>\n    </ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000373191.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\" xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n<ClinVarSet ID=\"47386811\"><RecordStatus>current</RecordStatus><Title>NM_004208.4(AIFM1):c.273T&gt;C (p.Asp91=) AND Combined oxidative phosphorylation deficiency</Title><ReferenceClinVarAssertion DateCreated=\"2016-12-05\" DateLastUpdated=\"2019-11-02\" ID=\"911942\"><ClinVarAccession Acc=\"RCV000373191\" Version=\"1\" Type=\"RCV\" DateUpdated=\"2019-11-02\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2016-06-14\"><ReviewStatus>criteria provided, single submitter</ReviewStatus><Description>Benign</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData ID=\"48283551\"><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\" ID=\"367892\" Acc=\"VCV000367892\" Version=\"1\"><Measure Type=\"single nucleotide variant\" ID=\"352059\"><Name><ElementValue Type=\"Preferred\">NM_004208.4(AIFM1):c.273T&gt;C (p.Asp91=)</ElementValue></Name><AttributeSet><Attribute Accession=\"NM_145812\" Version=\"2\" Change=\"c.261T&gt;C\" Type=\"HGVS, coding, RefSeq\">NM_145812.2:c.261T&gt;C</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_001130847\" Version=\"3\" Change=\"c.273T&gt;C\" Type=\"HGVS, coding, RefSeq\">NM_001130847.3:c.273T&gt;C</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_004208\" Version=\"4\" Change=\"c.273T&gt;C\" Type=\"HGVS, coding, RefSeq\">NM_004208.4:c.273T&gt;C</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NG_013217\" Version=\"1\" Change=\"g.21289T&gt;C\" Type=\"HGVS, genomic, RefSeqGene\">NG_013217.1:g.21289T&gt;C</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000023\" Version=\"11\" Change=\"g.130149545A&gt;G\" Type=\"HGVS, genomic, top level\" integerValue=\"38\">NC_000023.11:g.130149545A&gt;G</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000023\" Version=\"10\" Change=\"g.129283520A&gt;G\" Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">NC_000023.10:g.129283520A&gt;G</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NR_132647\" Version=\"1\" Change=\"n.361T&gt;C\" Type=\"HGVS, non-coding\">NR_132647.1:n.361T&gt;C</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_004208\" Version=\"3\" Change=\"c.273T&gt;C\" Type=\"HGVS, previous\">NM_004208.3:c.273T&gt;C</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_665811\" Version=\"1\" Change=\"p.Asp87=\" Type=\"HGVS, protein, RefSeq\">NP_665811.1:p.Asp87=</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_001124319\" Version=\"1\" Change=\"p.Asp91=\" Type=\"HGVS, protein, RefSeq\">NP_001124319.1:p.Asp91=</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_004199\" Version=\"1\" Change=\"p.Asp91=\" Type=\"HGVS, protein, RefSeq\">NP_004199.1:p.Asp91=</Attribute></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">non-coding transcript variant</Attribute><XRef ID=\"SO:0001619\" DB=\"Sequence Ontology\" /><XRef ID=\"NR_132647.1:n.361T&gt;C\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">synonymous variant</Attribute><XRef ID=\"SO:0001819\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_001130847.3:c.273T&gt;C\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">synonymous variant</Attribute><XRef ID=\"SO:0001819\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_004208.4:c.273T&gt;C\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">synonymous variant</Attribute><XRef ID=\"SO:0001819\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_145812.2:c.261T&gt;C\" DB=\"RefSeq\" /></AttributeSet><AlleleFrequencyList><AlleleFrequency Value=\"0.50980\" Source=\"NHLBI Exome Sequencing Project (ESP) Exome Variant Server\" /><AlleleFrequency Value=\"0.49113\" Source=\"1000 Genomes Project\" /><AlleleFrequency Value=\"0.43294\" Source=\"Exome Aggregation Consortium (ExAC)\" /><AlleleFrequency Value=\"0.48595\" Source=\"The Genome Aggregation Database (gnomAD)\" /><AlleleFrequency Value=\"0.43142\" Source=\"The Genome Aggregation Database (gnomAD), exomes\" /><AlleleFrequency Value=\"0.49079\" Source=\"Trans-Omics for Precision Medicine (TOPMed)\" /></AlleleFrequencyList><GlobalMinorAlleleFrequency Value=\"0.49113\" Source=\"1000 Genomes Project\" MinorAllele=\"G\" /><CytogeneticLocation>Xq26.1</CytogeneticLocation><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"X\" Accession=\"NC_000023.11\" start=\"130149545\" stop=\"130149545\" display_start=\"130149545\" display_stop=\"130149545\" variantLength=\"1\" positionVCF=\"130149545\" referenceAlleleVCF=\"A\" alternateAlleleVCF=\"G\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"X\" Accession=\"NC_000023.10\" start=\"129283520\" stop=\"129283520\" display_start=\"129283520\" display_stop=\"129283520\" variantLength=\"1\" positionVCF=\"129283520\" referenceAlleleVCF=\"A\" alternateAlleleVCF=\"G\" /><MeasureRelationship Type=\"within multiple genes by overlap\"><Name><ElementValue Type=\"Preferred\">RAB33A, member RAS oncogene family</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">RAB33A</ElementValue></Symbol><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"X\" Accession=\"NC_000023.11\" start=\"130110633\" stop=\"130184873\" display_start=\"130110633\" display_stop=\"130184873\" Strand=\"+\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"X\" Accession=\"NC_000023.10\" start=\"129305772\" stop=\"129318843\" display_start=\"129305772\" display_stop=\"129318843\" variantLength=\"13072\" Strand=\"+\" /><XRef ID=\"9363\" DB=\"Gene\" /><XRef Type=\"MIM\" ID=\"300333\" DB=\"OMIM\" /><XRef ID=\"HGNC:9773\" DB=\"HGNC\" /></MeasureRelationship><MeasureRelationship Type=\"within multiple genes by overlap\"><Name><ElementValue Type=\"Preferred\">apoptosis inducing factor mitochondria associated 1</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">AIFM1</ElementValue></Symbol><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"X\" Accession=\"NC_000023.11\" start=\"130129362\" stop=\"130165887\" display_start=\"130129362\" display_stop=\"130165887\" Strand=\"-\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"X\" Accession=\"NC_000023.10\" start=\"129263336\" stop=\"129299860\" display_start=\"129263336\" display_stop=\"129299860\" variantLength=\"36525\" Strand=\"-\" /><XRef ID=\"9131\" DB=\"Gene\" /><XRef Type=\"MIM\" ID=\"300169\" DB=\"OMIM\" /><XRef Type=\"MIM\" ID=\"300614\" DB=\"OMIM\" /><XRef Type=\"MIM\" ID=\"310490\" DB=\"OMIM\" /><XRef ID=\"HGNC:8768\" DB=\"HGNC\" /></MeasureRelationship><XRef ID=\"560801\" DB=\"Illumina Clinical Services Laboratory,Illumina\" /><XRef Type=\"rs\" ID=\"1139851\" DB=\"dbSNP\" /></Measure><Name><ElementValue Type=\"Preferred\">NM_004208.4(AIFM1):c.273T&gt;C (p.Asp91=)</ElementValue></Name><XRef ID=\"CA10515478\" DB=\"ClinGen\" /></MeasureSet><TraitSet Type=\"Disease\" ID=\"33450\"><Trait ID=\"33019\" Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">Combined oxidative phosphorylation deficiency</ElementValue><XRef Type=\"Phenotypic series\" ID=\"PS609060 \" DB=\"OMIM\" /></Name><Name><ElementValue Type=\"Alternate\">Mitochondrial oxidative phosphorylation disorder due to nuclear DNA anomalies</ElementValue><XRef ID=\"2443\" DB=\"Orphanet\" /></Name><XRef ID=\"CN228601\" DB=\"MedGen\" /><XRef ID=\"2443\" DB=\"Orphanet\" /><XRef Type=\"Phenotypic series\" ID=\"PS609060\" DB=\"OMIM\" /></Trait></TraitSet></ReferenceClinVarAssertion><ClinVarAssertion ID=\"795252\" SubmissionName=\"ICSL_2016Q4\"><ClinVarSubmissionID localKey=\"560801|Combined Oxidative Phosphorylation Deficiency\" submittedAssembly=\"GRCh37\" submitter=\"Illumina Clinical Services Laboratory,Illumina\" submitterDate=\"2016-10-18\" /><ClinVarAccession Acc=\"SCV000481731\" Version=\"2\" Type=\"SCV\" OrgID=\"504895\" OrganizationCategory=\"laboratory\" OrgType=\"primary\" DateUpdated=\"2019-03-31\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2016-06-14\"><ReviewStatus>criteria provided, single submitter</ReviewStatus><Description>Benign</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ExternalID DB=\"Illumina Clinical Services Laboratory\" ID=\"560801\" /><AttributeSet><Attribute Type=\"AssertionMethod\">ICSL Variant Classification 20161018</Attribute><Citation Type=\"general\"><URL>https://submit.ncbi.nlm.nih.gov/ft/byid/4jQgNGYk/ICSL_Variant_Classification_20161018.pdf</URL><CitationText>ICSL_Variant_Classification_20161018.pdf</CitationText></Citation></AttributeSet><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><AttributeSet><Attribute Type=\"HGVS\">NM_004208.3:c.273T&gt;C</Attribute></AttributeSet><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">AIFM1</ElementValue></Symbol></MeasureRelationship></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">Combined Oxidative Phosphorylation Deficiency</ElementValue></Name></Trait></TraitSet></ClinVarAssertion></ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000401212.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\" xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n<ClinVarSet ID=\"47414496\"><RecordStatus>current</RecordStatus><Title>NM_176824.3(BBS7):c.*690T&gt;C AND Bardet-Biedl syndrome</Title><ReferenceClinVarAssertion DateCreated=\"2016-12-05\" DateLastUpdated=\"2019-11-02\" ID=\"939963\"><ClinVarAccession Acc=\"RCV000401212\" Version=\"1\" Type=\"RCV\" DateUpdated=\"2019-11-02\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2016-06-14\"><ReviewStatus>criteria provided, single submitter</ReviewStatus><Description>Benign</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData ID=\"48290764\"><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\" ID=\"347473\" Acc=\"VCV000347473\" Version=\"1\"><Measure Type=\"single nucleotide variant\" ID=\"293627\"><Name><ElementValue Type=\"Preferred\">NM_176824.3(BBS7):c.*690T&gt;C</ElementValue></Name><AttributeSet><Attribute Accession=\"NM_176824\" Version=\"3\" Change=\"c.*690T&gt;C\" Type=\"HGVS, coding, RefSeq\">NM_176824.3:c.*690T&gt;C</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NG_052974\" Version=\"1\" Change=\"g.3832T&gt;C\" Type=\"HGVS, genomic, RefSeqGene\">NG_052974.1:g.3832T&gt;C</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NG_009111\" Version=\"1\" Change=\"g.50318T&gt;C\" Type=\"HGVS, genomic, RefSeqGene\">NG_009111.1:g.50318T&gt;C</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000004\" Version=\"12\" Change=\"g.121825170A&gt;G\" Type=\"HGVS, genomic, top level\" integerValue=\"38\">NC_000004.12:g.121825170A&gt;G</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000004\" Version=\"11\" Change=\"g.122746325A&gt;G\" Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">NC_000004.11:g.122746325A&gt;G</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_176824\" Version=\"2\" Change=\"c.*690T&gt;C\" Type=\"HGVS, previous\">NM_176824.2:c.*690T&gt;C</Attribute></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">3 prime UTR variant</Attribute><XRef ID=\"SO:0001624\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_176824.3:c.*690T&gt;C\" DB=\"RefSeq\" /></AttributeSet><AlleleFrequencyList><AlleleFrequency Value=\"0.24940\" Source=\"1000 Genomes Project\" /><AlleleFrequency Value=\"0.28803\" Source=\"The Genome Aggregation Database (gnomAD)\" /><AlleleFrequency Value=\"0.25597\" Source=\"Trans-Omics for Precision Medicine (TOPMed)\" /></AlleleFrequencyList><GlobalMinorAlleleFrequency Value=\"0.24940\" Source=\"1000 Genomes Project\" MinorAllele=\"G\" /><CytogeneticLocation>4q27</CytogeneticLocation><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"4\" Accession=\"NC_000004.12\" start=\"121825170\" stop=\"121825170\" display_start=\"121825170\" display_stop=\"121825170\" variantLength=\"1\" positionVCF=\"121825170\" referenceAlleleVCF=\"A\" alternateAlleleVCF=\"G\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"4\" Accession=\"NC_000004.11\" start=\"122746325\" stop=\"122746325\" display_start=\"122746325\" display_stop=\"122746325\" variantLength=\"1\" positionVCF=\"122746325\" referenceAlleleVCF=\"A\" alternateAlleleVCF=\"G\" /><MeasureRelationship Type=\"within single gene\"><Name><ElementValue Type=\"Preferred\">Bardet-Biedl syndrome 7</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">BBS7</ElementValue></Symbol><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"4\" Accession=\"NC_000004.12\" start=\"121824329\" stop=\"121870497\" display_start=\"121824329\" display_stop=\"121870497\" Strand=\"-\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"4\" Accession=\"NC_000004.11\" start=\"122745483\" stop=\"122791651\" display_start=\"122745483\" display_stop=\"122791651\" variantLength=\"46169\" Strand=\"-\" /><XRef ID=\"55212\" DB=\"Gene\" /><XRef Type=\"MIM\" ID=\"607590\" DB=\"OMIM\" /><XRef ID=\"HGNC:18758\" DB=\"HGNC\" /></MeasureRelationship><XRef ID=\"35481\" DB=\"Illumina Clinical Services Laboratory,Illumina\" /><XRef Type=\"rs\" ID=\"3217753\" DB=\"dbSNP\" /></Measure><Name><ElementValue Type=\"Preferred\">NM_176824.3(BBS7):c.*690T&gt;C</ElementValue></Name><XRef ID=\"CA10617888\" DB=\"ClinGen\" /></MeasureSet><TraitSet Type=\"Disease\" ID=\"6293\"><Trait ID=\"376\" Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">Bardet-Biedl syndrome</ElementValue><XRef ID=\"Bardet-Biedl+Syndrome/723\" DB=\"Genetic Alliance\" /><XRef ID=\"bardet-biedl-syndrome\" DB=\"Genetics Home Reference\" /><XRef ID=\"6866\" DB=\"Office of Rare Diseases\" /><XRef ID=\"5619004\" DB=\"SNOMED CT\" /></Name><Symbol><ElementValue Type=\"Preferred\">BBS</ElementValue><XRef ID=\"6866\" DB=\"Office of Rare Diseases\" /></Symbol><AttributeSet><Attribute Type=\"public definition\">Bardet-Biedl syndrome (BBS) is characterized by rod-cone dystrophy, truncal obesity, postaxial polydactyly, cognitive impairment, male hypogonadotropic hypogonadism, complex female genitourinary malformations, and renal abnormalities. The visual prognosis for children with BBS is poor. Night blindness is usually evident by age seven to eight years; the mean age of legal blindness is 15.5 years. Birth weight is usually normal, but significant weight gain begins within the first year and becomes a lifelong issue for most individuals. A majority of individuals have significant learning difficulties; a minority have severe impairment on IQ testing. Renal disease is a major cause of morbidity and mortality.</Attribute><XRef ID=\"NBK1363\" DB=\"GeneReviews\" /></AttributeSet><Citation Type=\"review\" Abbrev=\"GeneReviews\"><ID Source=\"PubMed\">20301537</ID><ID Source=\"BookShelf\">NBK1363</ID></Citation><Citation Type=\"Translational/Evidence-based\" Abbrev=\"EuroGentest, 2010\"><ID Source=\"pmc\">3061994</ID></Citation><XRef ID=\"C0752166\" DB=\"MedGen\" /><XRef ID=\"110\" DB=\"Orphanet\" /><XRef Type=\"Phenotypic series\" ID=\"PS209900\" DB=\"OMIM\" /></Trait></TraitSet></ReferenceClinVarAssertion><ClinVarAssertion ID=\"760900\" SubmissionName=\"ICSL_2016Q4\"><ClinVarSubmissionID localKey=\"35481|Bardet-Biedl Syndrome\" submittedAssembly=\"GRCh37\" submitter=\"Illumina Clinical Services Laboratory,Illumina\" submitterDate=\"2016-10-18\" /><ClinVarAccession Acc=\"SCV000447379\" Version=\"2\" Type=\"SCV\" OrgID=\"504895\" OrganizationCategory=\"laboratory\" OrgType=\"primary\" DateUpdated=\"2019-03-31\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2016-06-14\"><ReviewStatus>criteria provided, single submitter</ReviewStatus><Description>Benign</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ExternalID DB=\"Illumina Clinical Services Laboratory\" ID=\"35481\" /><AttributeSet><Attribute Type=\"AssertionMethod\">ICSL Variant Classification 20161018</Attribute><Citation Type=\"general\"><URL>https://submit.ncbi.nlm.nih.gov/ft/byid/4jQgNGYk/ICSL_Variant_Classification_20161018.pdf</URL><CitationText>ICSL_Variant_Classification_20161018.pdf</CitationText></Citation></AttributeSet><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><AttributeSet><Attribute Type=\"HGVS\">NM_176824.2:c.*690T&gt;C</Attribute></AttributeSet><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">BBS7</ElementValue></Symbol></MeasureRelationship></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">Bardet-Biedl Syndrome</ElementValue></Name></Trait></TraitSet></ClinVarAssertion></ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000406351.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\"\n            xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n    <ClinVarSet ID=\"48803074\">\n        <RecordStatus>current</RecordStatus>\n        <Title>NM_001080522.2(CC2D2A):c.676_678GAA[3] (p.Glu229del) AND Joubert syndrome</Title>\n        <ReferenceClinVarAssertion DateCreated=\"2016-12-05\" DateLastUpdated=\"2019-12-17\" ID=\"945102\">\n            <ClinVarAccession Acc=\"RCV000406351\" Version=\"1\" Type=\"RCV\" DateUpdated=\"2019-12-17\"/>\n            <RecordStatus>current</RecordStatus>\n            <ClinicalSignificance DateLastEvaluated=\"2016-06-14\">\n                <ReviewStatus>criteria provided, single submitter</ReviewStatus>\n                <Description>Likely benign</Description>\n            </ClinicalSignificance>\n            <Assertion Type=\"variation to disease\"/>\n            <ObservedIn>\n                <Sample>\n                    <Origin>germline</Origin>\n                    <Species TaxonomyId=\"9606\">human</Species>\n                    <AffectedStatus>unknown</AffectedStatus>\n                </Sample>\n                <Method>\n                    <MethodType>clinical testing</MethodType>\n                </Method>\n                <ObservedData ID=\"49436860\">\n                    <Attribute Type=\"Description\">not provided</Attribute>\n                </ObservedData>\n            </ObservedIn>\n            <MeasureSet Type=\"Variant\" ID=\"56316\" Acc=\"VCV000056316\" Version=\"2\">\n                <Measure Type=\"Microsatellite\" ID=\"70955\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">NM_001080522.2(CC2D2A):c.676_678GAA[3] (p.Glu229del)\n                        </ElementValue>\n                    </Name>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_001080522\" Version=\"2\" Change=\"c.676_678GAA[3]\" Type=\"HGVS, coding\">\n                            NM_001080522.2:c.676_678GAA[3]\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_001080522\" Version=\"2\" Change=\"c.685_687del\" Type=\"HGVS, coding\">\n                            NM_001080522.2:c.685_687del\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_001080522\" Version=\"2\" Change=\"c.685_687delGAA\" Type=\"HGVS, coding\">\n                            NM_001080522.2:c.685_687delGAA\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"LRG_697t1\" Change=\"c.685_687del\" Type=\"HGVS, coding, LRG\">\n                            LRG_697t1:c.685_687del\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"LRG_697\" Change=\"g.46517_46519GAA[3]\" Type=\"HGVS, genomic, LRG\">\n                            LRG_697:g.46517_46519GAA[3]\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NG_013035\" Version=\"1\" Change=\"g.46517_46519GAA[3]\"\n                                   Type=\"HGVS, genomic, RefSeqGene\">NG_013035.1:g.46517_46519GAA[3]\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NC_000004\" Version=\"12\" Change=\"g.15511382_15511384GAA[3]\"\n                                   Type=\"HGVS, genomic, top level\" integerValue=\"38\">\n                            NC_000004.12:g.15511382_15511384GAA[3]\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NC_000004\" Version=\"11\" Change=\"g.15513005_15513007GAA[3]\"\n                                   Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">\n                            NC_000004.11:g.15513005_15513007GAA[3]\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"LRG_697p1\" Change=\"p.Glu229del\" Type=\"HGVS, protein\">\n                            LRG_697p1:p.Glu229del\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NP_001073991\" Version=\"2\" Change=\"p.Glu229del\"\n                                   Type=\"HGVS, protein, RefSeq\">NP_001073991.2:p.Glu229del\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"Location\">NM_001080522.2:exon 9</Attribute>\n                        <XRef ID=\"CD110644\" DB=\"HGMD\"/>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"ProteinChange1LetterCode\">E229del</Attribute>\n                    </AttributeSet>\n                    <GlobalMinorAlleleFrequency Value=\"0.04333\" Source=\"1000 Genomes Project\" MinorAllele=\"GAAGAAGAA\"/>\n                    <CytogeneticLocation>4p15.32</CytogeneticLocation>\n                    <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\"\n                                      AssemblyStatus=\"current\" Chr=\"4\" Accession=\"NC_000004.12\" start=\"15511391\"\n                                      stop=\"15511393\" display_start=\"15511391\" display_stop=\"15511393\" variantLength=\"3\"\n                                      positionVCF=\"15511381\" referenceAlleleVCF=\"GGAA\" alternateAlleleVCF=\"G\"/>\n                    <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\"\n                                      AssemblyStatus=\"previous\" Chr=\"4\" Accession=\"NC_000004.11\" start=\"15513014\"\n                                      stop=\"15513016\" display_start=\"15513014\" display_stop=\"15513016\" variantLength=\"3\"\n                                      positionVCF=\"15513004\" referenceAlleleVCF=\"GGAA\" alternateAlleleVCF=\"G\"/>\n                    <MeasureRelationship Type=\"within single gene\">\n                        <Name>\n                            <ElementValue Type=\"Preferred\">coiled-coil and C2 domain containing 2A</ElementValue>\n                        </Name>\n                        <Symbol>\n                            <ElementValue Type=\"Preferred\">CC2D2A</ElementValue>\n                        </Symbol>\n                        <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\"\n                                          AssemblyStatus=\"current\" Chr=\"4\" Accession=\"NC_000004.12\" start=\"15468660\"\n                                          stop=\"15601971\" display_start=\"15468660\" display_stop=\"15601971\" Strand=\"+\"/>\n                        <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\"\n                                          AssemblyStatus=\"previous\" Chr=\"4\" Accession=\"NC_000004.11\" start=\"15471488\"\n                                          stop=\"15603179\" display_start=\"15471488\" display_stop=\"15603179\"\n                                          variantLength=\"131692\" Strand=\"+\"/>\n                        <XRef ID=\"57545\" DB=\"Gene\"/>\n                        <XRef Type=\"MIM\" ID=\"612013\" DB=\"OMIM\"/>\n                        <XRef ID=\"HGNC:29253\" DB=\"HGNC\"/>\n                    </MeasureRelationship>\n                    <Citation Type=\"general\">\n                        <ID Source=\"PubMed\">21068128</ID>\n                    </Citation>\n                    <XRef ID=\"498850\" DB=\"Illumina Clinical Services Laboratory,Illumina\"/>\n                    <XRef Type=\"rs\" ID=\"386833764\" DB=\"dbSNP\"/>\n                </Measure>\n                <Name>\n                    <ElementValue Type=\"Preferred\">NM_001080522.2(CC2D2A):c.676_678GAA[3] (p.Glu229del)</ElementValue>\n                </Name>\n                <XRef ID=\"CA144242\" DB=\"ClinGen\"/>\n            </MeasureSet>\n            <TraitSet Type=\"Disease\" ID=\"108\">\n                <Trait ID=\"1878\" Type=\"Disease\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">Joubert syndrome</ElementValue>\n                        <XRef ID=\"joubert-syndrome\" DB=\"Genetics Home Reference\"/>\n                        <XRef ID=\"6802\" DB=\"Office of Rare Diseases\"/>\n                        <XRef ID=\"716997004\" DB=\"SNOMED CT\"/>\n                    </Name>\n                    <Name>\n                        <ElementValue Type=\"Alternate\">CEREBELLOPARENCHYMAL DISORDER IV</ElementValue>\n                        <XRef Type=\"MIM\" ID=\"213300\" DB=\"OMIM\"/>\n                    </Name>\n                    <Name>\n                        <ElementValue Type=\"Alternate\">Familial aplasia of the vermis</ElementValue>\n                        <XRef ID=\"Familial+aplasia+of+the+vermis/8373\" DB=\"Genetic Alliance\"/>\n                        <XRef ID=\"253175003\" DB=\"SNOMED CT\"/>\n                    </Name>\n                    <Name>\n                        <ElementValue Type=\"Alternate\">Agenesis of cerebellar vermis</ElementValue>\n                        <XRef ID=\"HP:0002335\" DB=\"Human Phenotype Ontology\"/>\n                    </Name>\n                    <Name>\n                        <ElementValue Type=\"Alternate\">Cerebellar vermis aplasia</ElementValue>\n                        <XRef ID=\"HP:0002335\" DB=\"Human Phenotype Ontology\"/>\n                    </Name>\n                    <Symbol>\n                        <ElementValue Type=\"Preferred\">JBTS</ElementValue>\n                        <XRef Type=\"MIM\" ID=\"213300\" DB=\"OMIM\"/>\n                    </Symbol>\n                    <Symbol>\n                        <ElementValue Type=\"Alternate\">CPD4</ElementValue>\n                        <XRef Type=\"MIM\" ID=\"213300\" DB=\"OMIM\"/>\n                        <XRef ID=\"6802\" DB=\"Office of Rare Diseases\"/>\n                    </Symbol>\n                    <AttributeSet>\n                        <Attribute Type=\"public definition\">Classic Joubert syndrome (JS) is characterized by three\n                            primary findings: A distinctive cerebellar and brain stem malformation called the molar\n                            tooth sign (MTS). Hypotonia. Developmental delays. Often these findings are accompanied by\n                            episodic tachypnea or apnea and/or atypical eye movements. In general, the breathing\n                            abnormalities improve with age, truncal ataxia develops over time, and acquisition of gross\n                            motor milestones is delayed. Cognitive abilities are variable, ranging from severe\n                            intellectual disability to normal. Additional findings can include retinal dystrophy, renal\n                            disease, ocular colobomas, occipital encephalocele, hepatic fibrosis, polydactyly, oral\n                            hamartomas, and endocrine abnormalities. Both intra- and interfamilial variation are seen.\n                        </Attribute>\n                        <XRef ID=\"NBK1325\" DB=\"GeneReviews\"/>\n                    </AttributeSet>\n                    <Citation Type=\"review\" Abbrev=\"GeneReviews\">\n                        <ID Source=\"PubMed\">20301500</ID>\n                        <ID Source=\"BookShelf\">NBK1325</ID>\n                    </Citation>\n                    <Citation Type=\"Translational/Evidence-based\" Abbrev=\"EuroGenetest, 2011\">\n                        <ID Source=\"PubMed\">21448235</ID>\n                    </Citation>\n                    <XRef ID=\"C0431399\" DB=\"MedGen\"/>\n                    <XRef ID=\"475\" DB=\"Orphanet\"/>\n                    <XRef Type=\"Phenotypic series\" ID=\"PS213300\" DB=\"OMIM\"/>\n                    <XRef Type=\"primary\" ID=\"HP:0002335\" DB=\"Human Phenotype Ontology\"/>\n                    <XRef Type=\"secondary\" ID=\"HP:0007125\" DB=\"Human Phenotype Ontology\"/>\n                </Trait>\n            </TraitSet>\n        </ReferenceClinVarAssertion>\n        <ClinVarAssertion ID=\"761232\" SubmissionName=\"ICSL_2016Q4\">\n            <ClinVarSubmissionID localKey=\"498850|Joubert Syndrome\" submittedAssembly=\"GRCh37\"\n                                 submitter=\"Illumina Clinical Services Laboratory,Illumina\" submitterDate=\"2016-10-18\"/>\n            <ClinVarAccession Acc=\"SCV000447711\" Version=\"2\" Type=\"SCV\" OrgID=\"504895\" OrganizationCategory=\"laboratory\"\n                              OrgType=\"primary\" DateUpdated=\"2019-03-31\"/>\n            <RecordStatus>current</RecordStatus>\n            <ClinicalSignificance DateLastEvaluated=\"2016-06-14\">\n                <ReviewStatus>criteria provided, single submitter</ReviewStatus>\n                <Description>Likely benign</Description>\n            </ClinicalSignificance>\n            <Assertion Type=\"variation to disease\"/>\n            <ExternalID DB=\"Illumina Clinical Services Laboratory\" ID=\"498850\"/>\n            <AttributeSet>\n                <Attribute Type=\"AssertionMethod\">ICSL Variant Classification 20161018</Attribute>\n                <Citation Type=\"general\">\n                    <URL>https://submit.ncbi.nlm.nih.gov/ft/byid/4jQgNGYk/ICSL_Variant_Classification_20161018.pdf</URL>\n                    <CitationText>ICSL_Variant_Classification_20161018.pdf</CitationText>\n                </Citation>\n            </AttributeSet>\n            <ObservedIn>\n                <Sample>\n                    <Origin>germline</Origin>\n                    <Species TaxonomyId=\"9606\">human</Species>\n                    <AffectedStatus>unknown</AffectedStatus>\n                </Sample>\n                <Method>\n                    <MethodType>clinical testing</MethodType>\n                </Method>\n                <ObservedData>\n                    <Attribute Type=\"Description\">not provided</Attribute>\n                </ObservedData>\n            </ObservedIn>\n            <MeasureSet Type=\"Variant\">\n                <Measure Type=\"Variation\">\n                    <AttributeSet>\n                        <Attribute Type=\"HGVS\">NM_001080522.2:c.685_687delGAA</Attribute>\n                    </AttributeSet>\n                    <MeasureRelationship Type=\"variant in gene\">\n                        <Symbol>\n                            <ElementValue Type=\"Preferred\">CC2D2A</ElementValue>\n                        </Symbol>\n                    </MeasureRelationship>\n                </Measure>\n            </MeasureSet>\n            <TraitSet Type=\"Disease\">\n                <Trait Type=\"Disease\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">Joubert Syndrome</ElementValue>\n                    </Name>\n                </Trait>\n            </TraitSet>\n        </ClinVarAssertion>\n    </ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000435546.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\" xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n<ClinVarSet ID=\"47448202\"><RecordStatus>current</RecordStatus><Title>NM_021625.4(TRPV4):c.[2481_2484delCCGC;2486T&gt;A] AND Avascular necrosis of femoral head, primary, 2</Title><ReferenceClinVarAssertion DateCreated=\"2017-03-07\" DateLastUpdated=\"2019-11-02\" ID=\"1020239\"><ClinVarAccession Acc=\"RCV000435546\" Version=\"1\" Type=\"RCV\" DateUpdated=\"2019-11-02\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2017-03-03\"><ReviewStatus>no assertion criteria provided</ReviewStatus><Description>Pathogenic</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>not provided</AffectedStatus><FamilyData NumFamilies=\"1\" /></Sample><Method><MethodType>literature only</MethodType></Method><ObservedData ID=\"48326724\"><Attribute Type=\"Description\">In 4 sibs from a Greek family with avascular necrosis of the femoral head (ANFH2; 617383), Mah et al. (2016) identified heterozygosity for a 4-bp deletion (c.2480_2483delCCCG, NM_021625.4) followed by a c.2486T-A transversion (c.2486T-A, NM_021625.4) in a highly conserved region of the TRPV4 gene, causing a frameshift that results in a premature termination codon (Val829TrpfsTer3). The mutation was not found in an unaffected brother, or in the 1000 Genomes or Exome Variant Server databases; parental DNA was unavailable, but the sibs' father reportedly had symptoms of joint pain that were never evaluated. Functional analysis in patient fibroblasts and transduced HEK293 cells indicated that the mutation results in a gain-of-function of TRPV4 channels by impeding channel closure.</Attribute><Citation Type=\"general\"><ID Source=\"PubMed\">27330106</ID></Citation></ObservedData></ObservedIn><MeasureSet Type=\"Haplotype\" ID=\"378057\" Acc=\"VCV000378057\" Version=\"1\"><Measure Type=\"Deletion\" ID=\"380146\"><Name><ElementValue Type=\"Preferred\">NM_021625.4(TRPV4):c.2481_2484del (p.Arg828fs)</ElementValue></Name><AttributeSet><Attribute Accession=\"LRG_372t1\" Change=\"c.2481_2484del\" Type=\"HGVS, coding, LRG\">LRG_372t1:c.2481_2484del</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_001177433\" Version=\"1\" Change=\"c.2160_2163del\" Type=\"HGVS, coding, RefSeq\">NM_001177433.1:c.2160_2163del</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_147204\" Version=\"2\" Change=\"c.2301_2304del\" Type=\"HGVS, coding, RefSeq\">NM_147204.2:c.2301_2304del</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_001177428\" Version=\"1\" Change=\"c.2340_2343del\" Type=\"HGVS, coding, RefSeq\">NM_001177428.1:c.2340_2343del</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_001177431\" Version=\"1\" Change=\"c.2379_2382del\" Type=\"HGVS, coding, RefSeq\">NM_001177431.1:c.2379_2382del</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_021625\" Version=\"4\" Change=\"c.2481_2484del\" Type=\"HGVS, coding, RefSeq\">NM_021625.4:c.2481_2484del</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"LRG_372\" Change=\"g.54652_54655del\" Type=\"HGVS, genomic, LRG\">LRG_372:g.54652_54655del</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NG_017090\" Version=\"1\" Change=\"g.54652_54655del\" Type=\"HGVS, genomic, RefSeqGene\">NG_017090.1:g.54652_54655del</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000012\" Version=\"12\" Change=\"g.109783754_109783757del\" Type=\"HGVS, genomic, top level\" integerValue=\"38\">NC_000012.12:g.109783754_109783757del</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000012\" Version=\"11\" Change=\"g.110221559_110221562del\" Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">NC_000012.11:g.110221559_110221562del</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"LRG_372p1\" Change=\"p.Arg828fs\" Type=\"HGVS, protein\">LRG_372p1:p.Arg828fs</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_001170904\" Version=\"1\" Change=\"p.Arg721fs\" Type=\"HGVS, protein, RefSeq\">NP_001170904.1:p.Arg721fs</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_671737\" Version=\"1\" Change=\"p.Arg768fs\" Type=\"HGVS, protein, RefSeq\">NP_671737.1:p.Arg768fs</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_001170899\" Version=\"1\" Change=\"p.Arg781fs\" Type=\"HGVS, protein, RefSeq\">NP_001170899.1:p.Arg781fs</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_001170902\" Version=\"1\" Change=\"p.Arg794fs\" Type=\"HGVS, protein, RefSeq\">NP_001170902.1:p.Arg794fs</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_067638\" Version=\"3\" Change=\"p.Arg828fs\" Type=\"HGVS, protein, RefSeq\">NP_067638.3:p.Arg828fs</Attribute></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">frameshift variant</Attribute><XRef ID=\"SO:0001589\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_001177428.1:c.2340_2343del\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">frameshift variant</Attribute><XRef ID=\"SO:0001589\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_001177431.1:c.2379_2382del\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">frameshift variant</Attribute><XRef ID=\"SO:0001589\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_001177433.1:c.2160_2163del\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">frameshift variant</Attribute><XRef ID=\"SO:0001589\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_021625.4:c.2481_2484del\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">frameshift variant</Attribute><XRef ID=\"SO:0001589\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_147204.2:c.2301_2304del\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"ProteinChange1LetterCode\">R721fs</Attribute></AttributeSet><AttributeSet><Attribute Type=\"ProteinChange1LetterCode\">R768fs</Attribute></AttributeSet><AttributeSet><Attribute Type=\"ProteinChange1LetterCode\">R781fs</Attribute></AttributeSet><AttributeSet><Attribute Type=\"ProteinChange1LetterCode\">R794fs</Attribute></AttributeSet><AttributeSet><Attribute Type=\"ProteinChange1LetterCode\">R828fs</Attribute></AttributeSet><CytogeneticLocation>12q24.11</CytogeneticLocation><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"12\" Accession=\"NC_000012.12\" start=\"109783753\" stop=\"109783756\" display_start=\"109783753\" display_stop=\"109783756\" variantLength=\"4\" positionVCF=\"109783752\" referenceAlleleVCF=\"CGCGG\" alternateAlleleVCF=\"C\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"12\" Accession=\"NC_000012.11\" start=\"110221558\" stop=\"110221561\" display_start=\"110221558\" display_stop=\"110221561\" variantLength=\"4\" positionVCF=\"110221557\" referenceAlleleVCF=\"CGCGG\" alternateAlleleVCF=\"C\" /><MeasureRelationship Type=\"within single gene\"><Name><ElementValue Type=\"Preferred\">transient receptor potential cation channel subfamily V member 4</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">TRPV4</ElementValue></Symbol><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"12\" Accession=\"NC_000012.12\" start=\"109783087\" stop=\"109833398\" display_start=\"109783087\" display_stop=\"109833398\" Strand=\"-\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"12\" Accession=\"NC_000012.11\" start=\"110220891\" stop=\"110271211\" display_start=\"110220891\" display_stop=\"110271211\" variantLength=\"50321\" Strand=\"-\" /><XRef ID=\"59341\" DB=\"Gene\" /><XRef Type=\"MIM\" ID=\"605427\" DB=\"OMIM\" /><XRef ID=\"HGNC:18083\" DB=\"HGNC\" /></MeasureRelationship><XRef Type=\"Allelic variant\" ID=\"605427.0034\" DB=\"OMIM\" /><XRef Type=\"rs\" ID=\"1057520305\" DB=\"dbSNP\" /></Measure><Measure Type=\"single nucleotide variant\" ID=\"472257\"><Name><ElementValue Type=\"Preferred\">NM_021625.4(TRPV4):c.2486T&gt;A (p.Val829Glu)</ElementValue></Name><AttributeSet><Attribute Accession=\"LRG_372t1\" Change=\"c.2486T&gt;A\" Type=\"HGVS, coding, LRG\">LRG_372t1:c.2486T&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_001177433\" Version=\"1\" Change=\"c.2165T&gt;A\" Type=\"HGVS, coding, RefSeq\">NM_001177433.1:c.2165T&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_147204\" Version=\"2\" Change=\"c.2306T&gt;A\" Type=\"HGVS, coding, RefSeq\">NM_147204.2:c.2306T&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_001177428\" Version=\"1\" Change=\"c.2345T&gt;A\" Type=\"HGVS, coding, RefSeq\">NM_001177428.1:c.2345T&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_001177431\" Version=\"1\" Change=\"c.2384T&gt;A\" Type=\"HGVS, coding, RefSeq\">NM_001177431.1:c.2384T&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_021625\" Version=\"4\" Change=\"c.2486T&gt;A\" Type=\"HGVS, coding, RefSeq\">NM_021625.4:c.2486T&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"LRG_372\" Change=\"g.54657T&gt;A\" Type=\"HGVS, genomic, LRG\">LRG_372:g.54657T&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NG_017090\" Version=\"1\" Change=\"g.54657T&gt;A\" Type=\"HGVS, genomic, RefSeqGene\">NG_017090.1:g.54657T&gt;A</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000012\" Version=\"12\" Change=\"g.109783751A&gt;T\" Type=\"HGVS, genomic, top level\" integerValue=\"38\">NC_000012.12:g.109783751A&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000012\" Version=\"11\" Change=\"g.110221556A&gt;T\" Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">NC_000012.11:g.110221556A&gt;T</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"LRG_372p1\" Change=\"p.Val829Glu\" Type=\"HGVS, protein\">LRG_372p1:p.Val829Glu</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_001170904\" Version=\"1\" Change=\"p.Val722Glu\" Type=\"HGVS, protein, RefSeq\">NP_001170904.1:p.Val722Glu</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_671737\" Version=\"1\" Change=\"p.Val769Glu\" Type=\"HGVS, protein, RefSeq\">NP_671737.1:p.Val769Glu</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_001170899\" Version=\"1\" Change=\"p.Val782Glu\" Type=\"HGVS, protein, RefSeq\">NP_001170899.1:p.Val782Glu</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_001170902\" Version=\"1\" Change=\"p.Val795Glu\" Type=\"HGVS, protein, RefSeq\">NP_001170902.1:p.Val795Glu</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_067638\" Version=\"3\" Change=\"p.Val829Glu\" Type=\"HGVS, protein, RefSeq\">NP_067638.3:p.Val829Glu</Attribute></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">missense variant</Attribute><XRef ID=\"SO:0001583\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_001177428.1:c.2345T&gt;A\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">missense variant</Attribute><XRef ID=\"SO:0001583\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_001177431.1:c.2384T&gt;A\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">missense variant</Attribute><XRef ID=\"SO:0001583\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_001177433.1:c.2165T&gt;A\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">missense variant</Attribute><XRef ID=\"SO:0001583\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_021625.4:c.2486T&gt;A\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">missense variant</Attribute><XRef ID=\"SO:0001583\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_147204.2:c.2306T&gt;A\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"ProteinChange1LetterCode\">V722E</Attribute></AttributeSet><AttributeSet><Attribute Type=\"ProteinChange1LetterCode\">V769E</Attribute></AttributeSet><AttributeSet><Attribute Type=\"ProteinChange1LetterCode\">V782E</Attribute></AttributeSet><AttributeSet><Attribute Type=\"ProteinChange1LetterCode\">V795E</Attribute></AttributeSet><AttributeSet><Attribute Type=\"ProteinChange1LetterCode\">V829E</Attribute></AttributeSet><CytogeneticLocation>12q24.11</CytogeneticLocation><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"12\" Accession=\"NC_000012.12\" start=\"109783751\" stop=\"109783751\" display_start=\"109783751\" display_stop=\"109783751\" variantLength=\"1\" positionVCF=\"109783751\" referenceAlleleVCF=\"A\" alternateAlleleVCF=\"T\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"12\" Accession=\"NC_000012.11\" start=\"110221556\" stop=\"110221556\" display_start=\"110221556\" display_stop=\"110221556\" variantLength=\"1\" positionVCF=\"110221556\" referenceAlleleVCF=\"A\" alternateAlleleVCF=\"T\" /><MeasureRelationship Type=\"within single gene\"><Name><ElementValue Type=\"Preferred\">transient receptor potential cation channel subfamily V member 4</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">TRPV4</ElementValue></Symbol><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"12\" Accession=\"NC_000012.12\" start=\"109783087\" stop=\"109833398\" display_start=\"109783087\" display_stop=\"109833398\" Strand=\"-\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"12\" Accession=\"NC_000012.11\" start=\"110220891\" stop=\"110271211\" display_start=\"110220891\" display_stop=\"110271211\" variantLength=\"50321\" Strand=\"-\" /><XRef ID=\"59341\" DB=\"Gene\" /><XRef Type=\"MIM\" ID=\"605427\" DB=\"OMIM\" /><XRef ID=\"HGNC:18083\" DB=\"HGNC\" /></MeasureRelationship><XRef Type=\"rs\" ID=\"1555204446\" DB=\"dbSNP\" /></Measure><Name><ElementValue Type=\"Preferred\">NM_021625.4(TRPV4):c.[2481_2484delCCGC;2486T&gt;A]</ElementValue></Name><Name><ElementValue Type=\"Alternate\">TRPV4, 4-BP DEL, 2480CCCG AND 2486T-A</ElementValue><XRef Type=\"Allelic variant\" ID=\"605427.0034\" DB=\"OMIM\" /></Name><AttributeSet><Attribute Type=\"HGVS, coding, RefSeq\" Change=\"c.[2481_2484delCCGC;2486T&gt;A]\">NM_021625.4:c.[2481_2484delCCGC;2486T&gt;A]</Attribute></AttributeSet><XRef ID=\"CA658682643\" DB=\"ClinGen\" /><XRef Type=\"Allelic variant\" ID=\"605427.0034\" DB=\"OMIM\" /></MeasureSet><TraitSet Type=\"Disease\" ID=\"35229\"><Trait ID=\"37036\" Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">Avascular necrosis of femoral head, primary, 2</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">ANFH2</ElementValue><XRef Type=\"MIM\" ID=\"617383\" DB=\"OMIM\" /></Symbol><XRef ID=\"C4479260\" DB=\"MedGen\" /><XRef Type=\"MIM\" ID=\"617383\" DB=\"OMIM\" /></Trait></TraitSet></ReferenceClinVarAssertion><ClinVarAssertion ID=\"978775\"><ClinVarSubmissionID localKey=\"605427.0034_AVASCULAR NECROSIS OF FEMORAL HEAD, PRIMARY, 2 (1 family)\" submitter=\"OMIM\" submitterDate=\"2017-03-03\" title=\"TRPV4, 4-BP DEL, 2480CCCG AND 2486T-A_AVASCULAR NECROSIS OF FEMORAL HEAD, PRIMARY, 2 (1 family)\" /><ClinVarAccession Acc=\"SCV000513422\" Version=\"1\" Type=\"SCV\" OrgID=\"3\" OrganizationCategory=\"resource\" OrgType=\"primary\" DateUpdated=\"2019-08-03\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2017-03-03\"><ReviewStatus>no assertion criteria provided</ReviewStatus><Description>Pathogenic</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ExternalID DB=\"OMIM\" ID=\"605427.0034\" Type=\"Allelic variant\" /><ObservedIn><Sample><Origin>germline</Origin><Species>human</Species><AffectedStatus>not provided</AffectedStatus><FamilyData NumFamilies=\"1\" /></Sample><Method><MethodType>literature only</MethodType></Method><ObservedData><Attribute Type=\"Description\">In 4 sibs from a Greek family with avascular necrosis of the femoral head (ANFH2; 617383), Mah et al. (2016) identified heterozygosity for a 4-bp deletion (c.2480_2483delCCCG, NM_021625.4) followed by a c.2486T-A transversion (c.2486T-A, NM_021625.4) in a highly conserved region of the TRPV4 gene, causing a frameshift that results in a premature termination codon (Val829TrpfsTer3). The mutation was not found in an unaffected brother, or in the 1000 Genomes or Exome Variant Server databases; parental DNA was unavailable, but the sibs' father reportedly had symptoms of joint pain that were never evaluated. Functional analysis in patient fibroblasts and transduced HEK293 cells indicated that the mutation results in a gain-of-function of TRPV4 channels by impeding channel closure.</Attribute><Citation><ID Source=\"PubMed\">27330106</ID></Citation><XRef DB=\"OMIM\" ID=\"617383\" Type=\"MIM\" /></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><Name><ElementValue Type=\"Preferred\">TRPV4, 4-BP DEL, 2480CCCG AND 2486T-A</ElementValue></Name><AttributeSet><Attribute Type=\"NonHGVS\">4-BP DEL, 2480CCCG AND 2486T-A</Attribute></AttributeSet><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">TRPV4</ElementValue></Symbol></MeasureRelationship><XRef DB=\"OMIM\" ID=\"605427.0034\" Type=\"Allelic variant\" /></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">AVASCULAR NECROSIS OF FEMORAL HEAD, PRIMARY, 2 (1 family)</ElementValue></Name></Trait></TraitSet></ClinVarAssertion></ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000485802.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\" xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n<ClinVarSet ID=\"49293189\"><RecordStatus>current</RecordStatus><Title>NM_000044.6(AR):c.171_173GCA[36] (p.Gln68_Gln80dup) AND Bulbo-spinal atrophy X-linked</Title><ReferenceClinVarAssertion DateCreated=\"2017-04-26\" DateLastUpdated=\"2019-12-31\" ID=\"1117378\"><ClinVarAccession Acc=\"RCV000485802\" Version=\"1\" Type=\"RCV\" DateUpdated=\"2019-12-31\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2017-01-26\"><ReviewStatus>no assertion criteria provided</ReviewStatus><Description>Uncertain significance</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus></Sample><Method><MethodType>literature only</MethodType></Method><ObservedData ID=\"49997207\"><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\" ID=\"417975\" Acc=\"VCV000417975\" Version=\"1\"><Measure Type=\"Microsatellite\" ID=\"404864\"><Name><ElementValue Type=\"Preferred\">NM_000044.6(AR):c.171_173GCA[36] (p.Gln68_Gln80dup)</ElementValue></Name><AttributeSet><Attribute Accession=\"NM_001011645\" Version=\"3\" Change=\"c.-1613_-1611GCA[36]\" Type=\"HGVS, coding, RefSeq\">NM_001011645.3:c.-1613_-1611GCA[36]</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_000044\" Version=\"6\" Change=\"c.171_173GCA[36]\" Type=\"HGVS, coding, RefSeq\">NM_000044.6:c.171_173GCA[36]</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_001348061\" Version=\"1\" Change=\"c.171_173GCA[36]\" Type=\"HGVS, coding, RefSeq\">NM_001348061.1:c.171_173GCA[36]</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_001348063\" Version=\"1\" Change=\"c.171_173GCA[36]\" Type=\"HGVS, coding, RefSeq\">NM_001348063.1:c.171_173GCA[36]</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_001348064\" Version=\"1\" Change=\"c.171_173GCA[36]\" Type=\"HGVS, coding, RefSeq\">NM_001348064.1:c.171_173GCA[36]</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NG_052629\" Version=\"1\" Change=\"g.101_103GCA[36]\" Type=\"HGVS, genomic, RefSeqGene\">NG_052629.1:g.101_103GCA[36]</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NG_009014\" Version=\"2\" Change=\"g.6286_6288GCA[36]\" Type=\"HGVS, genomic, RefSeqGene\">NG_009014.2:g.6286_6288GCA[36]</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000023\" Version=\"11\" Change=\"g.67545317_67545319GCA[36]\" Type=\"HGVS, genomic, top level\" integerValue=\"38\">NC_000023.11:g.67545317_67545319GCA[36]</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000023\" Version=\"10\" Change=\"g.66765159_66765161GCA[36]\" Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">NC_000023.10:g.66765159_66765161GCA[36]</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_000044\" Version=\"3\" Change=\"c.172_174CAG[35]\" Type=\"HGVS, previous\">NM_000044.3:c.172_174CAG[35]</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_000035\" Version=\"2\" Change=\"p.Gln68_Gln80dup\" Type=\"HGVS, protein, RefSeq\">NP_000035.2:p.Gln68_Gln80dup</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_001334990\" Version=\"1\" Change=\"p.Gln68_Gln80dup\" Type=\"HGVS, protein, RefSeq\">NP_001334990.1:p.Gln68_Gln80dup</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_001334992\" Version=\"1\" Change=\"p.Gln68_Gln80dup\" Type=\"HGVS, protein, RefSeq\">NP_001334992.1:p.Gln68_Gln80dup</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_001334993\" Version=\"1\" Change=\"p.Gln68_Gln80dup\" Type=\"HGVS, protein, RefSeq\">NP_001334993.1:p.Gln68_Gln80dup</Attribute></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">5 prime UTR variant</Attribute><XRef ID=\"SO:0001623\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_001011645.3:c.-1613_-1611GCA[36]\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">inframe_insertion</Attribute><XRef ID=\"SO:0001821\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_000044.6:c.171_173GCA[36]\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">inframe_insertion</Attribute><XRef ID=\"SO:0001821\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_001348061.1:c.171_173GCA[36]\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">inframe_insertion</Attribute><XRef ID=\"SO:0001821\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_001348063.1:c.171_173GCA[36]\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">inframe_insertion</Attribute><XRef ID=\"SO:0001821\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_001348064.1:c.171_173GCA[36]\" DB=\"RefSeq\" /></AttributeSet><CytogeneticLocation>Xq12</CytogeneticLocation><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"X\" Accession=\"NC_000023.11\" start=\"67545318\" stop=\"67545320\" display_start=\"67545318\" display_stop=\"67545320\" positionVCF=\"67545316\" referenceAlleleVCF=\"T\" alternateAlleleVCF=\"TGCAGCAGCAGCAGCAGCAGCAGCAGCAGCAGCAGCAGCA\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"X\" Accession=\"NC_000023.10\" start=\"66765160\" stop=\"66765162\" display_start=\"66765160\" display_stop=\"66765162\" positionVCF=\"66765158\" referenceAlleleVCF=\"T\" alternateAlleleVCF=\"TGCAGCAGCAGCAGCAGCAGCAGCAGCAGCAGCAGCAGCA\" /><MeasureRelationship Type=\"within multiple genes by overlap\"><Name><ElementValue Type=\"Preferred\">androgen receptor repeat instability region</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">LOC109504725</ElementValue></Symbol><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"X\" Accession=\"NC_000023.11\" start=\"67545317\" stop=\"67545419\" display_start=\"67545317\" display_stop=\"67545419\" Strand=\"+\" /><XRef ID=\"109504725\" DB=\"Gene\" /></MeasureRelationship><MeasureRelationship Type=\"within multiple genes by overlap\"><Name><ElementValue Type=\"Preferred\">androgen receptor</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">AR</ElementValue></Symbol><AttributeSet><Attribute dateValue=\"2012-03-22\" Type=\"Haploinsufficiency\">Sufficient evidence for dosage pathogenicity</Attribute><Citation><URL>https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=AR</URL></Citation></AttributeSet><AttributeSet><Attribute dateValue=\"2012-03-22\" Type=\"Triplosensitivity\">No evidence available</Attribute><Citation><URL>https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=AR</URL></Citation></AttributeSet><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"X\" Accession=\"NC_000023.11\" start=\"67544021\" stop=\"67730619\" display_start=\"67544021\" display_stop=\"67730619\" Strand=\"+\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"X\" Accession=\"NC_000023.10\" start=\"66763873\" stop=\"66950460\" display_start=\"66763873\" display_stop=\"66950460\" variantLength=\"186588\" Strand=\"+\" /><XRef ID=\"367\" DB=\"Gene\" /><XRef Type=\"MIM\" ID=\"313700\" DB=\"OMIM\" /><XRef ID=\"HGNC:644\" DB=\"HGNC\" /></MeasureRelationship></Measure><Name><ElementValue Type=\"Preferred\">NM_000044.6(AR):c.171_173GCA[36] (p.Gln68_Gln80dup)</ElementValue></Name></MeasureSet><TraitSet Type=\"Disease\" ID=\"2766\"><Trait ID=\"4172\" Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">Bulbo-spinal atrophy X-linked</ElementValue><XRef ID=\"Kennedy+Disease/993\" DB=\"Genetic Alliance\" /><XRef ID=\"6818\" DB=\"Office of Rare Diseases\" /></Name><Name><ElementValue Type=\"Alternate\">SPINAL AND BULBAR MUSCULAR ATROPHY, X-LINKED 1</ElementValue><XRef Type=\"MIM\" ID=\"313200\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"313700.0014\" DB=\"OMIM\" /></Name><Symbol><ElementValue Type=\"Preferred\">SMAX1</ElementValue><XRef Type=\"MIM\" ID=\"313200\" DB=\"OMIM\" /></Symbol><Symbol><ElementValue Type=\"Alternate\">KD</ElementValue><XRef Type=\"MIM\" ID=\"313200\" DB=\"OMIM\" /></Symbol><Symbol><ElementValue Type=\"Alternate\">SBMA</ElementValue><XRef Type=\"MIM\" ID=\"313200\" DB=\"OMIM\" /></Symbol><Symbol><ElementValue Type=\"Alternate\">XBSN</ElementValue><XRef Type=\"MIM\" ID=\"313200\" DB=\"OMIM\" /></Symbol><AttributeSet><Attribute Type=\"public definition\">Spinal and bulbar muscular atrophy (SBMA) is a gradually progressive neuromuscular disorder in which degeneration of lower motor neurons results in muscle weakness, muscle atrophy, and fasciculations. SBMA occurs only in males. Affected individuals often show gynecomastia, testicular atrophy, and reduced fertility as a result of mild androgen insensitivity.</Attribute><XRef ID=\"NBK1333\" DB=\"GeneReviews\" /></AttributeSet><Citation Type=\"review\" Abbrev=\"GeneReviews\"><ID Source=\"PubMed\">20301508</ID><ID Source=\"BookShelf\">NBK1333</ID></Citation><XRef ID=\"C1839259\" DB=\"MedGen\" /><XRef ID=\"481\" DB=\"Orphanet\" /><XRef Type=\"MIM\" ID=\"313200\" DB=\"OMIM\" /></Trait></TraitSet></ReferenceClinVarAssertion><ClinVarAssertion ID=\"1099617\"><ClinVarSubmissionID localKey=\"NM_000044.3:c.172_174CAG(35)|OMIM:313200\" submittedAssembly=\"not applicable\" submitter=\"GeneReviews\" submitterDate=\"2017-01-26\" /><ClinVarAccession Acc=\"SCV000564510\" Version=\"1\" Type=\"SCV\" OrgID=\"500062\" OrganizationCategory=\"resource\" OrgType=\"primary\" DateUpdated=\"2019-07-02\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2017-01-26\"><ReviewStatus>no assertion criteria provided</ReviewStatus><Description>Uncertain significance</Description><Citation><URL>https://www.ncbi.nlm.nih.gov/books/NBK1333/</URL></Citation></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus></Sample><Method><MethodType>literature only</MethodType></Method><ObservedData><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><AttributeSet><Attribute Type=\"HGVS\">NM_000044.3:c.172_174CAG(35)</Attribute></AttributeSet><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">AR</ElementValue></Symbol></MeasureRelationship></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><XRef DB=\"OMIM\" ID=\"313200\" Type=\"MIM\" /></Trait></TraitSet><Comment Type=\"public\">CAG 35 repeats</Comment></ClinVarAssertion></ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000537563.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\" xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n<ClinVarSet ID=\"48439941\"><RecordStatus>current</RecordStatus><Title>NM_005214.5(CTLA4):c.75G&gt;C (p.Leu25=) AND not provided</Title><ReferenceClinVarAssertion DateCreated=\"2017-12-26\" DateLastUpdated=\"2019-12-15\" ID=\"1272309\"><ClinVarAccession Acc=\"RCV000537563\" Version=\"3\" Type=\"RCV\" DateUpdated=\"2019-12-16\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2019-02-28\"><ReviewStatus>criteria provided, single submitter</ReviewStatus><Description>Benign</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData ID=\"49478082\"><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\" ID=\"475280\" Acc=\"VCV000475280\" Version=\"2\"><Measure Type=\"single nucleotide variant\" ID=\"450403\"><Name><ElementValue Type=\"Preferred\">NM_005214.5(CTLA4):c.75G&gt;C (p.Leu25=)</ElementValue></Name><AttributeSet><Attribute Accession=\"LRG_1220t1\" Change=\"c.75G&gt;C\" Type=\"HGVS, coding, LRG\">LRG_1220t1:c.75G&gt;C</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_001037631\" Version=\"3\" Change=\"c.75G&gt;C\" Type=\"HGVS, coding, RefSeq\">NM_001037631.3:c.75G&gt;C</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_005214\" Version=\"5\" Change=\"c.75G&gt;C\" Type=\"HGVS, coding, RefSeq\">NM_005214.5:c.75G&gt;C</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"LRG_1220\" Change=\"g.5232G&gt;C\" Type=\"HGVS, genomic, LRG\">LRG_1220:g.5232G&gt;C</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NG_011502\" Version=\"1\" Change=\"g.5232G&gt;C\" Type=\"HGVS, genomic, RefSeqGene\">NG_011502.1:g.5232G&gt;C</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000002\" Version=\"12\" Change=\"g.203868017G&gt;C\" Type=\"HGVS, genomic, top level\" integerValue=\"38\">NC_000002.12:g.203868017G&gt;C</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NC_000002\" Version=\"11\" Change=\"g.204732740G&gt;C\" Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">NC_000002.11:g.204732740G&gt;C</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NM_005214\" Version=\"4\" Change=\"c.75G&gt;C\" Type=\"HGVS, previous\">NM_005214.4:c.75G&gt;C</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"LRG_1220p1\" Change=\"p.Leu25=\" Type=\"HGVS, protein\">LRG_1220p1:p.Leu25=</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_001032720\" Version=\"1\" Change=\"p.Leu25=\" Type=\"HGVS, protein, RefSeq\">NP_001032720.1:p.Leu25=</Attribute></AttributeSet><AttributeSet><Attribute Accession=\"NP_005205\" Version=\"2\" Change=\"p.Leu25=\" Type=\"HGVS, protein, RefSeq\">NP_005205.2:p.Leu25=</Attribute></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">synonymous variant</Attribute><XRef ID=\"SO:0001819\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_001037631.3:c.75G&gt;C\" DB=\"RefSeq\" /></AttributeSet><AttributeSet><Attribute Type=\"MolecularConsequence\">synonymous variant</Attribute><XRef ID=\"SO:0001819\" DB=\"Sequence Ontology\" /><XRef ID=\"NM_005214.5:c.75G&gt;C\" DB=\"RefSeq\" /></AttributeSet><AlleleFrequencyList><AlleleFrequency Value=\"0.00938\" Source=\"NHLBI Exome Sequencing Project (ESP) Exome Variant Server\" /><AlleleFrequency Value=\"0.00639\" Source=\"1000 Genomes Project\" /><AlleleFrequency Value=\"0.00227\" Source=\"Exome Aggregation Consortium (ExAC)\" /><AlleleFrequency Value=\"0.00669\" Source=\"The Genome Aggregation Database (gnomAD)\" /><AlleleFrequency Value=\"0.00192\" Source=\"The Genome Aggregation Database (gnomAD), exomes\" /><AlleleFrequency Value=\"0.00762\" Source=\"Trans-Omics for Precision Medicine (TOPMed)\" /></AlleleFrequencyList><GlobalMinorAlleleFrequency Value=\"0.00639\" Source=\"1000 Genomes Project\" MinorAllele=\"C\" /><CytogeneticLocation>2q33.2</CytogeneticLocation><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"2\" Accession=\"NC_000002.12\" start=\"203868017\" stop=\"203868017\" display_start=\"203868017\" display_stop=\"203868017\" variantLength=\"1\" positionVCF=\"203868017\" referenceAlleleVCF=\"G\" alternateAlleleVCF=\"C\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"2\" Accession=\"NC_000002.11\" start=\"204732740\" stop=\"204732740\" display_start=\"204732740\" display_stop=\"204732740\" variantLength=\"1\" positionVCF=\"204732740\" referenceAlleleVCF=\"G\" alternateAlleleVCF=\"C\" /><MeasureRelationship Type=\"within single gene\"><Name><ElementValue Type=\"Preferred\">cytotoxic T-lymphocyte associated protein 4</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">CTLA4</ElementValue></Symbol><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"2\" Accession=\"NC_000002.12\" start=\"203867771\" stop=\"203873965\" display_start=\"203867771\" display_stop=\"203873965\" Strand=\"+\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"2\" Accession=\"NC_000002.11\" start=\"204732510\" stop=\"204738682\" display_start=\"204732510\" display_stop=\"204738682\" variantLength=\"6173\" Strand=\"+\" /><XRef ID=\"1493\" DB=\"Gene\" /><XRef Type=\"MIM\" ID=\"123890\" DB=\"OMIM\" /><XRef ID=\"HGNC:2505\" DB=\"HGNC\" /></MeasureRelationship><XRef Type=\"rs\" ID=\"16840275\" DB=\"dbSNP\" /></Measure><Name><ElementValue Type=\"Preferred\">NM_005214.5(CTLA4):c.75G&gt;C (p.Leu25=)</ElementValue></Name><XRef ID=\"CA2067039\" DB=\"ClinGen\" /></MeasureSet><TraitSet Type=\"Disease\" ID=\"9460\"><Trait ID=\"17556\" Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">not provided</ElementValue><XRef ID=\"13DG0619\" DB=\"Developmental Genetics Unit,King Faisal Specialist Hospital &amp; Research Centre\" /></Name><AttributeSet><Attribute Type=\"public definition\">The term 'not provided' is registered in MedGen to support identification of submissions to ClinVar for which no condition was named when assessing the variant. 'not provided' differs from 'not specified', which is used when a variant is asserted to be benign, likely benign, or of uncertain significance for conditions that have not been specified.</Attribute></AttributeSet><XRef ID=\"CN517202\" DB=\"MedGen\" /></Trait></TraitSet></ReferenceClinVarAssertion><ClinVarAssertion ID=\"1253669\" SubmissionName=\"SUB5321749\"><ClinVarSubmissionID localKey=\"2012081|MedGen:CN517202\" submittedAssembly=\"GRCh37\" submitter=\"Invitae\" submitterDate=\"2019-03-14\" /><ClinVarAccession Acc=\"SCV000655460\" Version=\"3\" Type=\"SCV\" OrgID=\"500031\" OrganizationCategory=\"laboratory\" OrgType=\"primary\" DateUpdated=\"2019-12-16\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2019-02-28\"><ReviewStatus>criteria provided, single submitter</ReviewStatus><Description>Benign</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ExternalID DB=\"Invitae\" ID=\"2012081\" /><AttributeSet><Attribute Type=\"AssertionMethod\">Nykamp K et al. (Genet Med 2017)</Attribute><Citation><ID Source=\"PubMed\">28492532</ID></Citation></AttributeSet><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Variation\"><AttributeSet><Attribute Type=\"HGVS\">NM_005214.4:c.75G&gt;C</Attribute></AttributeSet><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">CTLA4</ElementValue></Symbol></MeasureRelationship></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">not provided</ElementValue></Name><XRef DB=\"MedGen\" ID=\"CN517202\" Type=\"CUI\" /></Trait></TraitSet></ClinVarAssertion></ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/RCV000724338.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet Dated=\"2016-07-04\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Type=\"full\" xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n<ClinVarSet ID=\"48508319\"><RecordStatus>current</RecordStatus><Title>NM_000059.3(BRCA2):c.156_157insAlu AND not provided</Title><ReferenceClinVarAssertion DateCreated=\"2018-12-11\" DateLastUpdated=\"2019-12-15\" ID=\"1668000\"><ClinVarAccession Acc=\"RCV000724338\" Version=\"1\" Type=\"RCV\" DateUpdated=\"2019-12-16\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2014-10-09\"><ReviewStatus>criteria provided, single submitter</ReviewStatus><Description>Pathogenic</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData ID=\"49538326\"><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\" ID=\"126018\" Acc=\"VCV000126018\" Version=\"1\"><Measure Type=\"Insertion\" ID=\"131556\"><Name><ElementValue Type=\"Preferred\">NM_000059.3(BRCA2):c.156_157insAlu</ElementValue></Name><Name><ElementValue Type=\"Alternate\">384insAlu</ElementValue></Name><Name><ElementValue Type=\"Alternate\">U43746.1:n.384_385insAlu</ElementValue></Name><Name><ElementValue Type=\"Alternate\">NM_000059.3:c.156_157insALU</ElementValue></Name><AttributeSet><Attribute Type=\"Location\">NM_000059.3:exon 3</Attribute></AttributeSet><AttributeSet><Attribute Type=\"Location\">U43746.1:exon 3</Attribute></AttributeSet><CytogeneticLocation>13q13.1</CytogeneticLocation><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"13\" Accession=\"NC_000013.10\" start=\"32893302\" stop=\"32893303\" display_start=\"32893302\" display_stop=\"32893303\" referenceAllele=\"-\" alternateAllele=\"Alu\" /><MeasureRelationship Type=\"within single gene\"><Name><ElementValue Type=\"Preferred\">BRCA2 DNA repair associated</ElementValue></Name><Symbol><ElementValue Type=\"Preferred\">BRCA2</ElementValue></Symbol><AttributeSet><Attribute dateValue=\"2012-07-06\" Type=\"Haploinsufficiency\">Sufficient evidence for dosage pathogenicity</Attribute><Citation><URL>https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=BRCA2</URL></Citation></AttributeSet><AttributeSet><Attribute dateValue=\"2012-07-06\" Type=\"Triplosensitivity\">No evidence available</Attribute><Citation><URL>https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=BRCA2</URL></Citation></AttributeSet><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"13\" Accession=\"NC_000013.11\" start=\"32315480\" stop=\"32399672\" display_start=\"32315480\" display_stop=\"32399672\" Strand=\"+\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"13\" Accession=\"NC_000013.10\" start=\"32889616\" stop=\"32973808\" display_start=\"32889616\" display_stop=\"32973808\" variantLength=\"84193\" Strand=\"+\" /><XRef ID=\"675\" DB=\"Gene\" /><XRef Type=\"MIM\" ID=\"600185\" DB=\"OMIM\" /><XRef ID=\"HGNC:1101\" DB=\"HGNC\" /><Comment DataSource=\"NCBI curation\" Type=\"public\">This gene is cited in the ACMG recommendations of 2013 (PubMed 23788249) for reporting incidental findings in exons.</Comment><Comment DataSource=\"NCBI curation\" Type=\"public\">This gene is cited in the ACMG recommendations of 2016 (PubMed 27854360) for reporting incidental findings in exons.</Comment></MeasureRelationship><XRef ID=\"384&amp;base_change=ins Alu\" DB=\"Breast Cancer Information Core (BIC) (BRCA2)\" /></Measure><Name><ElementValue Type=\"Preferred\">NM_000059.3(BRCA2):c.156_157insAlu</ElementValue></Name></MeasureSet><TraitSet Type=\"Disease\" ID=\"9460\"><Trait ID=\"17556\" Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">not provided</ElementValue><XRef ID=\"13DG0619\" DB=\"Developmental Genetics Unit,King Faisal Specialist Hospital &amp; Research Centre\" /></Name><AttributeSet><Attribute Type=\"public definition\">The term 'not provided' is registered in MedGen to support identification of submissions to ClinVar for which no condition was named when assessing the variant. 'not provided' differs from 'not specified', which is used when a variant is asserted to be benign, likely benign, or of uncertain significance for conditions that have not been specified.</Attribute></AttributeSet><XRef ID=\"CN517202\" DB=\"MedGen\" /></Trait></TraitSet></ReferenceClinVarAssertion><ClinVarAssertion ID=\"417084\" SubmissionName=\"SUB4534263\"><ClinVarSubmissionID localKey=\"24838_000000\" localKeyIsSubmitted=\"1\" submittedAssembly=\"GRCh37\" submitter=\"EGL Genetic Diagnostics,Eurofins Clinical Diagnostics\" submitterDate=\"2018-09-19\" /><ClinVarAccession Acc=\"SCV000228766\" Version=\"5\" Type=\"SCV\" OrgID=\"500060\" OrganizationCategory=\"laboratory\" OrgType=\"primary\" DateUpdated=\"2019-08-03\" /><RecordStatus>current</RecordStatus><ClinicalSignificance DateLastEvaluated=\"2014-10-09\"><ReviewStatus>criteria provided, single submitter</ReviewStatus><Description>Pathogenic</Description></ClinicalSignificance><Assertion Type=\"variation to disease\" /><ExternalID DB=\"EGL Genetic Diagnostics\" ID=\"24838\" /><AttributeSet><Attribute Type=\"AssertionMethod\">EGL_Classification_Definitions_2015</Attribute><Citation><URL>https://submit.ncbi.nlm.nih.gov/ft/byid/dn5yhybg/egl_classification_definitions_2015.pdf</URL></Citation></AttributeSet><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus><Gender>mixed</Gender></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn><MeasureSet Type=\"Variant\"><Measure Type=\"Insertion\"><AttributeSet><Attribute Type=\"Location\">NM_000059.3:Ex3</Attribute></AttributeSet><SequenceLocation Assembly=\"GRCh37\" Chr=\"13\" alternateAllele=\"Alu\" referenceAllele=\"-\" start=\"32893302\" stop=\"32893303\" variantLength=\"2\" /><MeasureRelationship Type=\"variant in gene\"><Symbol><ElementValue Type=\"Preferred\">BRCA2</ElementValue></Symbol></MeasureRelationship><XRef DB=\"HGMD\" ID=\"CP1619475\" /></Measure></MeasureSet><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">not provided</ElementValue></Name></Trait></TraitSet><Citation><URL>http://www.egl-eurofins.com/emvclass/emvclass.php?approved_symbol=BRCA2</URL></Citation></ClinVarAssertion></ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/Two_RCVs.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<ReleaseSet xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" Dated=\"2016-07-04\" Type=\"full\"\n            xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.35.xsd\">\n\n    <ClinVarSet ID=\"92830100\">\n        <RecordStatus>current</RecordStatus>\n        <Title>NM_005026.5(PIK3CD):c.231G&gt;A (p.Ala77=) AND not provided</Title>\n        <ReferenceClinVarAssertion DateCreated=\"2019-02-17\" DateLastUpdated=\"2022-04-21\" ID=\"1735041\">\n            <ClinVarAccession Acc=\"RCV000757616\" Version=\"4\" Type=\"RCV\" DateUpdated=\"2022-04-23\"/>\n            <RecordStatus>current</RecordStatus>\n            <ClinicalSignificance DateLastEvaluated=\"2017-07-03\">\n                <ReviewStatus>criteria provided, single submitter</ReviewStatus>\n                <Description>Likely benign</Description>\n            </ClinicalSignificance>\n            <Assertion Type=\"variation to disease\"/>\n            <ObservedIn>\n                <Sample>\n                    <Origin>germline</Origin>\n                    <Species TaxonomyId=\"9606\">human</Species>\n                    <AffectedStatus>unknown</AffectedStatus>\n                </Sample>\n                <Method>\n                    <MethodType>clinical testing</MethodType>\n                </Method>\n                <ObservedData ID=\"96097922\">\n                    <Attribute Type=\"Description\">not provided</Attribute>\n                </ObservedData>\n            </ObservedIn>\n            <MeasureSet Type=\"Variant\" ID=\"618791\" Acc=\"VCV000618791\" Version=\"5\">\n                <Measure Type=\"single nucleotide variant\" ID=\"609439\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">NM_005026.5(PIK3CD):c.231G&gt;A (p.Ala77=)</ElementValue>\n                    </Name>\n                    <CanonicalSPDI>NC_000001.11:9715629:G:A</CanonicalSPDI>\n                    <AttributeSet>\n                        <Attribute Accession=\"LRG_191t1\" Change=\"c.231G&gt;A\" Type=\"HGVS, coding, LRG\">LRG_191t1:c.231G&gt;A</Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_001350234\" Version=\"2\" Change=\"c.231G&gt;A\"\n                                   Type=\"HGVS, coding, RefSeq\">NM_001350234.2:c.231G&gt;A\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_001350235\" Version=\"1\" Change=\"c.231G&gt;A\"\n                                   Type=\"HGVS, coding, RefSeq\">NM_001350235.1:c.231G&gt;A\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_005026\" Version=\"5\" Change=\"c.231G&gt;A\" Type=\"HGVS, coding, RefSeq\"\n                                   MANESelect=\"true\">NM_005026.5:c.231G&gt;A\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"LRG_191\" Change=\"g.68899G&gt;A\" Type=\"HGVS, genomic, LRG\">LRG_191:g.68899G&gt;A</Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NG_023434\" Version=\"1\" Change=\"g.68899G&gt;A\"\n                                   Type=\"HGVS, genomic, RefSeqGene\">NG_023434.1:g.68899G&gt;A\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NC_000001\" Version=\"11\" Change=\"g.9715630G&gt;A\"\n                                   Type=\"HGVS, genomic, top level\" integerValue=\"38\">NC_000001.11:g.9715630G&gt;A\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NC_000001\" Version=\"10\" Change=\"g.9775688G&gt;A\"\n                                   Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">NC_000001.10:g.9775688G&gt;A\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_005026\" Version=\"3\" Change=\"c.231G&gt;A\" Type=\"HGVS, previous\">\n                            NM_005026.3:c.231G&gt;A\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"HGVS, protein\">p.Ala77Ala</Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NP_001337163\" Version=\"1\" Change=\"p.Ala77=\" Type=\"HGVS, protein, RefSeq\">\n                            NP_001337163.1:p.Ala77=\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NP_001337164\" Version=\"1\" Change=\"p.Ala77=\" Type=\"HGVS, protein, RefSeq\">\n                            NP_001337164.1:p.Ala77=\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NP_005017\" Version=\"3\" Change=\"p.Ala77=\" Type=\"HGVS, protein, RefSeq\">\n                            NP_005017.3:p.Ala77=\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"MolecularConsequence\">synonymous variant</Attribute>\n                        <XRef ID=\"SO:0001819\" DB=\"Sequence Ontology\"/>\n                        <XRef ID=\"NM_001350234.2:c.231G&gt;A\" DB=\"RefSeq\"/>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"MolecularConsequence\">synonymous variant</Attribute>\n                        <XRef ID=\"SO:0001819\" DB=\"Sequence Ontology\"/>\n                        <XRef ID=\"NM_001350235.1:c.231G&gt;A\" DB=\"RefSeq\"/>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"MolecularConsequence\">synonymous variant</Attribute>\n                        <XRef ID=\"SO:0001819\" DB=\"Sequence Ontology\"/>\n                        <XRef ID=\"NM_005026.5:c.231G&gt;A\" DB=\"RefSeq\"/>\n                    </AttributeSet>\n                    <AlleleFrequencyList>\n                        <AlleleFrequency Value=\"0.00007\" Source=\"Exome Aggregation Consortium (ExAC)\"/>\n                        <AlleleFrequency Value=\"0.00006\" Source=\"The Genome Aggregation Database (gnomAD)\"/>\n                        <AlleleFrequency Value=\"0.00007\" Source=\"The Genome Aggregation Database (gnomAD)\"/>\n                        <AlleleFrequency Value=\"0.00006\" Source=\"The Genome Aggregation Database (gnomAD), exomes\"/>\n                        <AlleleFrequency Value=\"0.00008\" Source=\"Trans-Omics for Precision Medicine (TOPMed)\"/>\n                        <AlleleFrequency Value=\"0.00006\" Source=\"Trans-Omics for Precision Medicine (TOPMed)\"/>\n                    </AlleleFrequencyList>\n                    <CytogeneticLocation>1p36.22</CytogeneticLocation>\n                    <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\"\n                                      AssemblyStatus=\"current\" Chr=\"1\" Accession=\"NC_000001.11\" start=\"9715630\"\n                                      stop=\"9715630\" display_start=\"9715630\" display_stop=\"9715630\" variantLength=\"1\"\n                                      positionVCF=\"9715630\" referenceAlleleVCF=\"G\" alternateAlleleVCF=\"A\"/>\n                    <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\"\n                                      AssemblyStatus=\"previous\" Chr=\"1\" Accession=\"NC_000001.10\" start=\"9775688\"\n                                      stop=\"9775688\" display_start=\"9775688\" display_stop=\"9775688\" variantLength=\"1\"\n                                      positionVCF=\"9775688\" referenceAlleleVCF=\"G\" alternateAlleleVCF=\"A\"/>\n                    <MeasureRelationship Type=\"within single gene\">\n                        <Name>\n                            <ElementValue Type=\"Preferred\">phosphatidylinositol-4,5-bisphosphate 3-kinase catalytic\n                                subunit delta\n                            </ElementValue>\n                        </Name>\n                        <Symbol>\n                            <ElementValue Type=\"Preferred\">PIK3CD</ElementValue>\n                        </Symbol>\n                        <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\"\n                                          AssemblyStatus=\"current\" Chr=\"1\" Accession=\"NC_000001.11\" start=\"9627258\"\n                                          stop=\"9729114\" display_start=\"9627258\" display_stop=\"9729114\" Strand=\"+\"/>\n                        <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\"\n                                          AssemblyStatus=\"previous\" Chr=\"1\" Accession=\"NC_000001.10\" start=\"9711789\"\n                                          stop=\"9789171\" display_start=\"9711789\" display_stop=\"9789171\"\n                                          variantLength=\"77383\" Strand=\"+\"/>\n                        <XRef ID=\"5293\" DB=\"Gene\"/>\n                        <XRef Type=\"MIM\" ID=\"602839\" DB=\"OMIM\"/>\n                        <XRef ID=\"HGNC:8977\" DB=\"HGNC\"/>\n                    </MeasureRelationship>\n                    <XRef ID=\"164534\" DB=\"ARUP Laboratories, Molecular Genetics and Genomics,ARUP Laboratories\"/>\n                    <XRef Type=\"rs\" ID=\"756139699\" DB=\"dbSNP\"/>\n                </Measure>\n                <Name>\n                    <ElementValue Type=\"Preferred\">NM_005026.5(PIK3CD):c.231G&gt;A (p.Ala77=)</ElementValue>\n                </Name>\n                <Name>\n                    <ElementValue Type=\"Preferred\">NM_005026.5(PIK3CD):c.231G&gt;A (p.Ala77=)</ElementValue>\n                </Name>\n            </MeasureSet>\n            <TraitSet Type=\"Disease\" ID=\"9460\">\n                <Trait ID=\"17556\" Type=\"Disease\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">not provided</ElementValue>\n                        <XRef ID=\"13DG0619\"\n                              DB=\"Developmental Genetics Unit,King Faisal Specialist Hospital &amp; Research Centre\"/>\n                    </Name>\n                    <Name>\n                        <ElementValue Type=\"Alternate\">none provided</ElementValue>\n                    </Name>\n                    <AttributeSet>\n                        <Attribute Type=\"public definition\">The term 'not provided' is registered in MedGen to support\n                            identification of submissions to ClinVar for which no condition was named when assessing the\n                            variant. 'not provided' differs from 'not specified', which is used when a variant is\n                            asserted to be benign, likely benign, or of uncertain significance for conditions that have\n                            not been specified.\n                        </Attribute>\n                    </AttributeSet>\n                    <XRef ID=\"CN517202\" DB=\"MedGen\"/>\n                </Trait>\n            </TraitSet>\n        </ReferenceClinVarAssertion>\n        <ClinVarAssertion ID=\"1732522\" SubmissionName=\"SUB4618058\">\n            <ClinVarSubmissionID localKey=\"164534|Not Provided\" submittedAssembly=\"GRCh37\"\n                                 submitter=\"ARUP Laboratories, Molecular Genetics and Genomics,ARUP Laboratories\"\n                                 submitterDate=\"2018-10-10\"/>\n            <ClinVarAccession Acc=\"SCV000885910\" Version=\"1\" Type=\"SCV\" OrgID=\"25969\" OrganizationCategory=\"laboratory\"\n                              OrgType=\"primary\" DateUpdated=\"2022-01-26\"/>\n            <RecordStatus>current</RecordStatus>\n            <ClinicalSignificance DateLastEvaluated=\"2017-07-03\">\n                <ReviewStatus>criteria provided, single submitter</ReviewStatus>\n                <Description>Likely benign</Description>\n                <Comment>The c.231G&gt;A variant (rs756139699) does not alter the amino acid sequence of the PIK3CD\n                    protein and computational splice site prediction algorithms do not predict a change in the nearest\n                    splice site or creation of a cryptic splice site. This variant has not been reported in association\n                    with primary antibody deficiency in medical literature or in gene specific variation databases. This\n                    variant is listed in the genome Aggregation Database (gnomAD) with an overall population frequency\n                    of 0.006 percent (identified on 17 out of 276,374 chromosomes). Based on these observations, the\n                    c.231G&gt;A variant is likely to be benign.\n                </Comment>\n            </ClinicalSignificance>\n            <Assertion Type=\"variation to disease\"/>\n            <ExternalID DB=\"Molecular Genetics\" ID=\"164534\"/>\n            <AttributeSet>\n                <Attribute Type=\"AssertionMethod\">ARUP Molecular Germline Variant Investigation Process</Attribute>\n                <Citation>\n                    <URL>\n                        https://submit.ncbi.nlm.nih.gov/ft/byid/w2yp3qyt/arup_molecular_germline_variant_investigation_process.pdf\n                    </URL>\n                </Citation>\n            </AttributeSet>\n            <ObservedIn>\n                <Sample>\n                    <Origin>germline</Origin>\n                    <Species TaxonomyId=\"9606\">human</Species>\n                    <AffectedStatus>unknown</AffectedStatus>\n                </Sample>\n                <Method>\n                    <MethodType>clinical testing</MethodType>\n                </Method>\n                <ObservedData>\n                    <Attribute Type=\"Description\">not provided</Attribute>\n                </ObservedData>\n            </ObservedIn>\n            <MeasureSet Type=\"Variant\">\n                <Measure Type=\"Variation\">\n                    <Name>\n                        <ElementValue Type=\"Alternate\">p.Ala77Ala</ElementValue>\n                    </Name>\n                    <AttributeSet>\n                        <Attribute Type=\"HGVS\">NM_005026.3:c.231G&gt;A</Attribute>\n                    </AttributeSet>\n                    <SequenceLocation Assembly=\"GRCh37\" Chr=\"1\" alternateAllele=\"A\" referenceAllele=\"G\" start=\"9775688\"\n                                      stop=\"9775688\" variantLength=\"1\"/>\n                    <XRef DB=\"dbSNP\" ID=\"756139699\" Type=\"rsNumber\"/>\n                </Measure>\n            </MeasureSet>\n            <TraitSet Type=\"Disease\">\n                <Trait Type=\"Disease\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">Not Provided</ElementValue>\n                    </Name>\n                </Trait>\n            </TraitSet>\n        </ClinVarAssertion>\n    </ClinVarSet>\n    <ClinVarSet ID=\"93549412\">\n        <RecordStatus>current</RecordStatus>\n        <Title>NM_005026.5(PIK3CD):c.231G&gt;A (p.Ala77=) AND Immunodeficiency 14</Title>\n        <ReferenceClinVarAssertion DateCreated=\"2021-06-06\" DateLastUpdated=\"2022-04-21\" ID=\"3353263\">\n            <ClinVarAccession Acc=\"RCV001488431\" Version=\"1\" Type=\"RCV\" DateUpdated=\"2022-04-23\"/>\n            <RecordStatus>current</RecordStatus>\n            <ClinicalSignificance DateLastEvaluated=\"2020-08-22\">\n                <ReviewStatus>criteria provided, single submitter</ReviewStatus>\n                <Description>Likely benign</Description>\n            </ClinicalSignificance>\n            <Assertion Type=\"variation to disease\"/>\n            <ObservedIn>\n                <Sample>\n                    <Origin>germline</Origin>\n                    <Species TaxonomyId=\"9606\">human</Species>\n                    <AffectedStatus>unknown</AffectedStatus>\n                </Sample>\n                <Method>\n                    <MethodType>clinical testing</MethodType>\n                </Method>\n                <ObservedData ID=\"95936168\">\n                    <Attribute Type=\"Description\">not provided</Attribute>\n                </ObservedData>\n            </ObservedIn>\n            <MeasureSet Type=\"Variant\" ID=\"618791\" Acc=\"VCV000618791\" Version=\"5\">\n                <Measure Type=\"single nucleotide variant\" ID=\"609439\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">NM_005026.5(PIK3CD):c.231G&gt;A (p.Ala77=)</ElementValue>\n                    </Name>\n                    <CanonicalSPDI>NC_000001.11:9715629:G:A</CanonicalSPDI>\n                    <AttributeSet>\n                        <Attribute Accession=\"LRG_191t1\" Change=\"c.231G&gt;A\" Type=\"HGVS, coding, LRG\">LRG_191t1:c.231G&gt;A</Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_001350234\" Version=\"2\" Change=\"c.231G&gt;A\"\n                                   Type=\"HGVS, coding, RefSeq\">NM_001350234.2:c.231G&gt;A\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_001350235\" Version=\"1\" Change=\"c.231G&gt;A\"\n                                   Type=\"HGVS, coding, RefSeq\">NM_001350235.1:c.231G&gt;A\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_005026\" Version=\"5\" Change=\"c.231G&gt;A\" Type=\"HGVS, coding, RefSeq\"\n                                   MANESelect=\"true\">NM_005026.5:c.231G&gt;A\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"LRG_191\" Change=\"g.68899G&gt;A\" Type=\"HGVS, genomic, LRG\">LRG_191:g.68899G&gt;A</Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NG_023434\" Version=\"1\" Change=\"g.68899G&gt;A\"\n                                   Type=\"HGVS, genomic, RefSeqGene\">NG_023434.1:g.68899G&gt;A\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NC_000001\" Version=\"11\" Change=\"g.9715630G&gt;A\"\n                                   Type=\"HGVS, genomic, top level\" integerValue=\"38\">NC_000001.11:g.9715630G&gt;A\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NC_000001\" Version=\"10\" Change=\"g.9775688G&gt;A\"\n                                   Type=\"HGVS, genomic, top level, previous\" integerValue=\"37\">NC_000001.10:g.9775688G&gt;A\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NM_005026\" Version=\"3\" Change=\"c.231G&gt;A\" Type=\"HGVS, previous\">\n                            NM_005026.3:c.231G&gt;A\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"HGVS, protein\">p.Ala77Ala</Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NP_001337163\" Version=\"1\" Change=\"p.Ala77=\" Type=\"HGVS, protein, RefSeq\">\n                            NP_001337163.1:p.Ala77=\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NP_001337164\" Version=\"1\" Change=\"p.Ala77=\" Type=\"HGVS, protein, RefSeq\">\n                            NP_001337164.1:p.Ala77=\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Accession=\"NP_005017\" Version=\"3\" Change=\"p.Ala77=\" Type=\"HGVS, protein, RefSeq\">\n                            NP_005017.3:p.Ala77=\n                        </Attribute>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"MolecularConsequence\">synonymous variant</Attribute>\n                        <XRef ID=\"SO:0001819\" DB=\"Sequence Ontology\"/>\n                        <XRef ID=\"NM_001350234.2:c.231G&gt;A\" DB=\"RefSeq\"/>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"MolecularConsequence\">synonymous variant</Attribute>\n                        <XRef ID=\"SO:0001819\" DB=\"Sequence Ontology\"/>\n                        <XRef ID=\"NM_001350235.1:c.231G&gt;A\" DB=\"RefSeq\"/>\n                    </AttributeSet>\n                    <AttributeSet>\n                        <Attribute Type=\"MolecularConsequence\">synonymous variant</Attribute>\n                        <XRef ID=\"SO:0001819\" DB=\"Sequence Ontology\"/>\n                        <XRef ID=\"NM_005026.5:c.231G&gt;A\" DB=\"RefSeq\"/>\n                    </AttributeSet>\n                    <AlleleFrequencyList>\n                        <AlleleFrequency Value=\"0.00007\" Source=\"Exome Aggregation Consortium (ExAC)\"/>\n                        <AlleleFrequency Value=\"0.00006\" Source=\"The Genome Aggregation Database (gnomAD)\"/>\n                        <AlleleFrequency Value=\"0.00007\" Source=\"The Genome Aggregation Database (gnomAD)\"/>\n                        <AlleleFrequency Value=\"0.00006\" Source=\"The Genome Aggregation Database (gnomAD), exomes\"/>\n                        <AlleleFrequency Value=\"0.00008\" Source=\"Trans-Omics for Precision Medicine (TOPMed)\"/>\n                        <AlleleFrequency Value=\"0.00006\" Source=\"Trans-Omics for Precision Medicine (TOPMed)\"/>\n                    </AlleleFrequencyList>\n                    <CytogeneticLocation>1p36.22</CytogeneticLocation>\n                    <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\"\n                                      AssemblyStatus=\"current\" Chr=\"1\" Accession=\"NC_000001.11\" start=\"9715630\"\n                                      stop=\"9715630\" display_start=\"9715630\" display_stop=\"9715630\" variantLength=\"1\"\n                                      positionVCF=\"9715630\" referenceAlleleVCF=\"G\" alternateAlleleVCF=\"A\"/>\n                    <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\"\n                                      AssemblyStatus=\"previous\" Chr=\"1\" Accession=\"NC_000001.10\" start=\"9775688\"\n                                      stop=\"9775688\" display_start=\"9775688\" display_stop=\"9775688\" variantLength=\"1\"\n                                      positionVCF=\"9775688\" referenceAlleleVCF=\"G\" alternateAlleleVCF=\"A\"/>\n                    <MeasureRelationship Type=\"within single gene\">\n                        <Name>\n                            <ElementValue Type=\"Preferred\">phosphatidylinositol-4,5-bisphosphate 3-kinase catalytic\n                                subunit delta\n                            </ElementValue>\n                        </Name>\n                        <Symbol>\n                            <ElementValue Type=\"Preferred\">PIK3CD</ElementValue>\n                        </Symbol>\n                        <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\"\n                                          AssemblyStatus=\"current\" Chr=\"1\" Accession=\"NC_000001.11\" start=\"9627258\"\n                                          stop=\"9729114\" display_start=\"9627258\" display_stop=\"9729114\" Strand=\"+\"/>\n                        <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\"\n                                          AssemblyStatus=\"previous\" Chr=\"1\" Accession=\"NC_000001.10\" start=\"9711789\"\n                                          stop=\"9789171\" display_start=\"9711789\" display_stop=\"9789171\"\n                                          variantLength=\"77383\" Strand=\"+\"/>\n                        <XRef ID=\"5293\" DB=\"Gene\"/>\n                        <XRef Type=\"MIM\" ID=\"602839\" DB=\"OMIM\"/>\n                        <XRef ID=\"HGNC:8977\" DB=\"HGNC\"/>\n                    </MeasureRelationship>\n                    <XRef ID=\"164534\" DB=\"ARUP Laboratories, Molecular Genetics and Genomics,ARUP Laboratories\"/>\n                    <XRef Type=\"rs\" ID=\"756139699\" DB=\"dbSNP\"/>\n                </Measure>\n                <Name>\n                    <ElementValue Type=\"Preferred\">NM_005026.5(PIK3CD):c.231G&gt;A (p.Ala77=)</ElementValue>\n                </Name>\n                <Name>\n                    <ElementValue Type=\"Preferred\">NM_005026.5(PIK3CD):c.231G&gt;A (p.Ala77=)</ElementValue>\n                </Name>\n            </MeasureSet>\n            <TraitSet Type=\"Disease\" ID=\"12475\">\n                <Trait ID=\"18461\" Type=\"Disease\">\n                    <Name>\n                        <ElementValue Type=\"Preferred\">Immunodeficiency 14</ElementValue>\n                        <XRef ID=\"MONDO:0014222\" DB=\"MONDO\"/>\n                    </Name>\n                    <Name>\n                        <ElementValue Type=\"Alternate\">p110-DELTA-ACTIVATING MUTATION CAUSING SENESCENT T CELLS,\n                            LYMPHADENOPATHY, AND IMMUNODEFICIENCY\n                        </ElementValue>\n                        <XRef Type=\"MIM\" ID=\"615513\" DB=\"OMIM\"/>\n                    </Name>\n                    <Name>\n                        <ElementValue Type=\"Alternate\">IMMUNODEFICIENCY 14A, AUTOSOMAL DOMINANT</ElementValue>\n                        <XRef Type=\"MIM\" ID=\"615513\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"602839.0001\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"602839.0002\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"602839.0003\" DB=\"OMIM\"/>\n                        <XRef Type=\"Allelic variant\" ID=\"602839.0004\" DB=\"OMIM\"/>\n                    </Name>\n                    <Symbol>\n                        <ElementValue Type=\"Preferred\">IMD14A</ElementValue>\n                        <XRef Type=\"MIM\" ID=\"615513\" DB=\"OMIM\"/>\n                    </Symbol>\n                    <Symbol>\n                        <ElementValue Type=\"Alternate\">PASLI</ElementValue>\n                        <XRef Type=\"MIM\" ID=\"615513\" DB=\"OMIM\"/>\n                    </Symbol>\n                    <XRef ID=\"MONDO:0014222\" DB=\"MONDO\"/>\n                    <XRef ID=\"C3714976\" DB=\"MedGen\"/>\n                    <XRef ID=\"397596\" DB=\"Orphanet\"/>\n                    <XRef Type=\"MIM\" ID=\"615513\" DB=\"OMIM\"/>\n                </Trait>\n            </TraitSet>\n        </ReferenceClinVarAssertion>\n        <ClinVarAssertion ID=\"3295756\" SubmissionName=\"SUB8755776\">\n            <ClinVarSubmissionID localKey=\"3436669|MedGen:C3714976\" submittedAssembly=\"GRCh37\" submitter=\"Invitae\"\n                                 submitterDate=\"2021-01-07\"/>\n            <ClinVarAccession Acc=\"SCV001692948\" Version=\"1\" Type=\"SCV\" OrgID=\"500031\" OrganizationCategory=\"laboratory\"\n                              OrgType=\"primary\" DateUpdated=\"2021-06-08\"/>\n            <RecordStatus>current</RecordStatus>\n            <ClinicalSignificance DateLastEvaluated=\"2020-08-22\">\n                <ReviewStatus>criteria provided, single submitter</ReviewStatus>\n                <Description>Likely benign</Description>\n            </ClinicalSignificance>\n            <Assertion Type=\"variation to disease\"/>\n            <ExternalID DB=\"Invitae\" ID=\"3436669\"/>\n            <AttributeSet>\n                <Attribute Type=\"AssertionMethod\">Invitae Variant Classification Sherloc (09022015)</Attribute>\n                <Citation>\n                    <ID Source=\"PubMed\">28492532</ID>\n                </Citation>\n            </AttributeSet>\n            <ObservedIn>\n                <Sample>\n                    <Origin>germline</Origin>\n                    <Species TaxonomyId=\"9606\">human</Species>\n                    <AffectedStatus>unknown</AffectedStatus>\n                </Sample>\n                <Method>\n                    <MethodType>clinical testing</MethodType>\n                </Method>\n                <ObservedData>\n                    <Attribute Type=\"Description\">not provided</Attribute>\n                </ObservedData>\n            </ObservedIn>\n            <MeasureSet Type=\"Variant\">\n                <Measure Type=\"Variation\">\n                    <AttributeSet>\n                        <Attribute Type=\"HGVS\">NC_000001.10:g.9775688G&gt;A</Attribute>\n                    </AttributeSet>\n                    <MeasureRelationship Type=\"variant in gene\">\n                        <Symbol>\n                            <ElementValue Type=\"Preferred\">PIK3CD</ElementValue>\n                        </Symbol>\n                    </MeasureRelationship>\n                </Measure>\n            </MeasureSet>\n            <TraitSet Type=\"Disease\">\n                <Trait Type=\"Disease\">\n                    <XRef DB=\"MedGen\" ID=\"C3714976\" Type=\"CUI\"/>\n                </Trait>\n            </TraitSet>\n        </ClinVarAssertion>\n    </ClinVarSet>\n\n</ReleaseSet>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/VCVs/TwoRecords.xml",
    "content": "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n<ClinVarVariationRelease xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_variation/variation_archive_1.4.xsd\" ReleaseDate=\"2019-12-31\">\n<VariationArchive VariationID=\"79\" VariationName=\"LIPA, 934G-A\" VariationType=\"single nucleotide variant\" DateCreated=\"2010-12-01\" DateLastUpdated=\"2019-03-29\" Accession=\"VCV000000079\" Version=\"1\" RecordType=\"interpreted\" NumberOfSubmissions=\"1\" NumberOfSubmitters=\"1\">\n  <RecordStatus>current</RecordStatus>\n  <Species>Homo sapiens</Species>\n  <InterpretedRecord>\n    <SimpleAllele AlleleID=\"15118\" VariationID=\"79\">\n      <GeneList>\n        <Gene Symbol=\"LIPA\" FullName=\"lipase A, lysosomal acid type\" GeneID=\"3988\" HGNC_ID=\"HGNC:6617\" Source=\"submitted\" RelationshipType=\"asserted, but not computed\">\n          <Location>\n            <CytogeneticLocation>10q23.31</CytogeneticLocation>\n            <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"10\" Accession=\"NC_000010.11\" start=\"89213569\" stop=\"89252039\" display_start=\"89213569\" display_stop=\"89252039\" Strand=\"-\"/>\n            <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"10\" Accession=\"NC_000010.10\" start=\"90973325\" stop=\"91011659\" display_start=\"90973325\" display_stop=\"91011659\" Strand=\"-\"/>\n          </Location>\n          <OMIM>613497</OMIM>\n        </Gene>\n      </GeneList>\n      <Name>LIPA, 934G-A</Name>\n      <VariantType>single nucleotide variant</VariantType>\n      <Location>\n        <CytogeneticLocation>10q23.31</CytogeneticLocation>\n      </Location>\n      <OtherNameList>\n        <Name>934G-A</Name>\n      </OtherNameList>\n      <XRefList>\n        <XRef Type=\"Allelic variant\" ID=\"613497.0002\" DB=\"OMIM\"/>\n      </XRefList>\n    </SimpleAllele>\n    <ReviewStatus>no assertion criteria provided</ReviewStatus>\n    <RCVList>\n      <RCVAccession Title=\"LIPA, 934G-A AND Lysosomal acid lipase deficiency\" DateLastEvaluated=\"1996-04-01\" ReviewStatus=\"no assertion criteria provided\" Interpretation=\"Pathogenic\" SubmissionCount=\"1\" Accession=\"RCV000000098\" Version=\"3\">\n        <InterpretedConditionList>\n          <InterpretedCondition DB=\"MedGen\" ID=\"C0043208\">Lysosomal acid lipase deficiency</InterpretedCondition>\n        </InterpretedConditionList>\n      </RCVAccession>\n    </RCVList>\n    <Interpretations>\n      <Interpretation DateLastEvaluated=\"1996-04-01\" NumberOfSubmissions=\"1\" NumberOfSubmitters=\"1\" Type=\"Clinical significance\">\n        <Description>Pathogenic</Description>\n        <Citation Type=\"general\">\n          <CitationText>Maslen, C. L., Illingworth, D. R. Molecular genetics of cholesterol ester hydrolase deficiency. (Abstract) Am. J. Hum. Genet. 53 (suppl.): A926, 1993.</CitationText>\n        </Citation>\n        <Citation Type=\"general\">\n          <ID Source=\"PubMed\">7759067</ID>\n        </Citation>\n        <Citation Type=\"general\">\n          <ID Source=\"PubMed\">8254026</ID>\n        </Citation>\n        <Citation Type=\"general\">\n          <ID Source=\"PubMed\">8598644</ID>\n        </Citation>\n        <Citation Type=\"general\">\n          <ID Source=\"PubMed\">8617513</ID>\n        </Citation>\n        <ConditionList>\n          <TraitSet ID=\"41\" Type=\"Disease\">\n            <Trait ID=\"2626\" Type=\"Disease\">\n              <Name>\n                <ElementValue Type=\"Alternate\">CHOLESTEROL ESTER HYDROLASE DEFICIENCY</ElementValue>\n                <XRef Type=\"MIM\" ID=\"278000\" DB=\"OMIM\"/>\n              </Name>\n              <Name>\n                <ElementValue Type=\"Preferred\">Lysosomal acid lipase deficiency</ElementValue>\n                <XRef ID=\"Wolman+disease/7523\" DB=\"Genetic Alliance\"/>\n              </Name>\n              <Name>\n                <ElementValue Type=\"Alternate\">LAL DEFICIENCY</ElementValue>\n                <XRef Type=\"MIM\" ID=\"278000\" DB=\"OMIM\"/>\n              </Name>\n              <AttributeSet>\n                <Attribute Type=\"public definition\">The phenotypic spectrum of lysosomal acid lipase (LAL) deficiency ranges from the infantile-onset form (Wolman disease) to later-onset forms collectively known as cholesterol ester storage disease (CESD). Wolman disease is characterized by infantile-onset malabsorption that results in malnutrition, storage of cholesterol esters and triglycerides in hepatic macrophages that results in hepatomegaly and liver disease, and adrenal gland calcification that results in adrenal cortical insufficiency. Unless successfully treated with hematopoietic stem cell transplantation (HSCT), infants with classic Wolman disease do not survive beyond age one year. CESD may present in childhood in a manner similar to Wolman disease or later in life with such findings as serum lipid abnormalities, hepatosplenomegaly, and/or elevated liver enzymes long before a diagnosis is made. The morbidity of late-onset CESD results from atherosclerosis (coronary artery disease, stroke), liver disease (e.g., altered liver function &#177; jaundice, steatosis, fibrosis, cirrhosis and related complications of esophageal varices, and/or liver failure), complications of secondary hypersplenism (i.e., anemia and/or thrombocytopenia), and/or malabsorption. Individuals with CESD may have a normal life span depending on the severity of disease manifestations.</Attribute>\n                <XRef ID=\"NBK305870\" DB=\"GeneReviews\"/>\n              </AttributeSet>\n              <Citation Type=\"Suggested Reading\" Abbrev=\"Shirley, 2015\">\n                <ID Source=\"PubMed\">26452566</ID>\n              </Citation>\n              <Citation Type=\"review\" Abbrev=\"GeneReviews\">\n                <ID Source=\"PubMed\">26225414</ID>\n                <ID Source=\"BookShelf\">NBK305870</ID>\n              </Citation>\n              <XRef ID=\"275761\" DB=\"Orphanet\"/>\n              <XRef ID=\"C0043208\" DB=\"MedGen\"/>\n              <XRef Type=\"MIM\" ID=\"278000\" DB=\"OMIM\"/>\n            </Trait>\n          </TraitSet>\n        </ConditionList>\n      </Interpretation>\n    </Interpretations>\n    <ClinicalAssertionList>\n      <ClinicalAssertion ID=\"20241\" DateCreated=\"2011-01-25\" DateLastUpdated=\"2019-03-31\" SubmissionDate=\"2010-12-30\">\n        <ClinVarSubmissionID localKey=\"613497.0002_CHOLESTERYL ESTER STORAGE DISEASE\" title=\"LIPA, 934G-A_CHOLESTERYL ESTER STORAGE DISEASE\"/>\n        <ClinVarAccession Accession=\"SCV000020241\" Type=\"SCV\" Version=\"2\" SubmitterName=\"OMIM\" OrgID=\"3\" OrganizationCategory=\"resource\"/>\n        <RecordStatus>current</RecordStatus>\n        <ReviewStatus>no assertion criteria provided</ReviewStatus>\n        <Interpretation DateLastEvaluated=\"1996-04-01\">\n          <Description>Pathogenic</Description>\n        </Interpretation>\n        <Assertion>variation to disease</Assertion>\n        <ObservedInList>\n          <ObservedIn>\n            <Sample>\n              <Origin>germline</Origin>\n              <Species>human</Species>\n              <AffectedStatus>not provided</AffectedStatus>\n            </Sample>\n            <Method>\n              <MethodType>literature only</MethodType>\n            </Method>\n            <ObservedData>\n              <Attribute Type=\"Description\">In a 12-year-old patient with cholesteryl ester storage disease (278000) from a nonconsanguineous Polish-German family, Klima et al. (1993) detected a 72-bp in-frame deletion resulting in the loss of amino acid codons 254 through 277. Analysis of genomic DNA revealed that the 72 bp represented an exon, indicating that the deletion in the mRNA was caused by defective splicing. Sequence analysis of the patient's genomic DNA revealed a G-to-A substitution in the last nucleotide of the 72-bp exon on 1 allele. No normal-sized mRNA was detectable in the propositus even though he was not homozygous for the splice site mutation. Klima et al. (1993) concluded that the patient was compound heterozygous for the splice site mutation and a null allele. The patient showed LIPA activity in cultured skin fibroblasts approximately 9% of normal. Hepatosplenomegaly had been present since age 5 years.</Attribute>\n              <Citation>\n                <ID Source=\"PubMed\">8254026</ID>\n              </Citation>\n              <XRef DB=\"OMIM\" ID=\"278000\" Type=\"MIM\"/>\n            </ObservedData>\n            <ObservedData>\n              <Attribute Type=\"Description\">Aslanidis et al. (1996) restudied the patient of Klima et al. (1993) and defined the splice site mutation as a G-to-A mutation at position -1 of the splice donor site following exon 8, resulting in incorrect splicing and the removal of the 72-bp exon 8 of the LIPA gene. They determined that the other allele of the patient carried a premature termination mutation (613497.0003) as well as the L179P mutation (613497.0001); the LIPA mRNA was rendered unstable by the premature stop codon. Aslanidis et al. (1996) demonstrated that the splice site mutation allowed the production of approximately 3 to 4% of correctly spliced mRNA relative to wildtype. Aslanidis et al. (1996) also identified a mutation at the same splice donor site, and also resulting in deletion of exon 8, in 2 sibs with Wolman disease; that mutation, at the +1 position, allowed no correct splicing, and patient fibroblasts were devoid of enzymatic activity. See 613497.0005.</Attribute>\n              <Citation>\n                <ID Source=\"PubMed\">8617513</ID>\n              </Citation>\n              <Citation>\n                <ID Source=\"PubMed\">8254026</ID>\n              </Citation>\n            </ObservedData>\n            <ObservedData>\n              <Attribute Type=\"Description\">In 2 sibs with CESD, Maslen and Illingworth (1993) and Maslen et al. (1995) identified compound heterozygosity for this splice site mutation in the LIPA gene, inherited from their father, and the L179P mutation (613497.0001). The affected children were a sister and brother who presented with idiopathic hepatomegaly at ages 6 and 8 years, respectively. Subsequent analyses indicated that they also had hypercholesterolemia and a severe reduction in cholesteryl ester hydrolase activity in cultured fibroblasts.</Attribute>\n              <Citation>\n                <CitationText>Maslen, C. L., Illingworth, D. R. Molecular genetics of cholesterol ester hydrolase deficiency. (Abstract) Am. J. Hum. Genet. 53 (suppl.): A926, 1993.</CitationText>\n              </Citation>\n              <Citation>\n                <ID Source=\"PubMed\">8598644</ID>\n              </Citation>\n            </ObservedData>\n            <ObservedData>\n              <Attribute Type=\"Description\">Muntoni et al. (1995) observed homozygosity for the splice site mutation (Klima et al., 1993) in a Spanish kindred with cholesterol ester storage disease. Exon 8 of the LIPA gene was deleted.</Attribute>\n              <Citation>\n                <ID Source=\"PubMed\">7759067</ID>\n              </Citation>\n              <Citation>\n                <ID Source=\"PubMed\">8254026</ID>\n              </Citation>\n            </ObservedData>\n          </ObservedIn>\n        </ObservedInList>\n        <SimpleAllele>\n          <GeneList>\n            <Gene Symbol=\"LIPA\"/>\n          </GeneList>\n          <Name>LIPA, 934G-A</Name>\n          <VariantType>Variation</VariantType>\n          <OtherNameList>\n            <Name Type=\"NonHGVS\">934G-A</Name>\n          </OtherNameList>\n          <XRefList>\n            <XRef DB=\"OMIM\" ID=\"613497.0002\" Type=\"Allelic variant\"/>\n          </XRefList>\n        </SimpleAllele>\n        <TraitSet Type=\"Disease\">\n          <Trait Type=\"Disease\">\n            <Name>\n              <ElementValue Type=\"Preferred\">CHOLESTERYL ESTER STORAGE DISEASE</ElementValue>\n            </Name>\n          </Trait>\n        </TraitSet>\n      </ClinicalAssertion>\n    </ClinicalAssertionList>\n    <TraitMappingList>\n      <TraitMapping ClinicalAssertionID=\"20241\" TraitType=\"Disease\" MappingType=\"Name\" MappingValue=\"CHOLESTERYL ESTER STORAGE DISEASE\" MappingRef=\"Preferred\">\n        <MedGen CUI=\"C0008384\" Name=\"Cholesteryl ester storage disease\"/>\n      </TraitMapping>\n    </TraitMappingList>\n  </InterpretedRecord>\n</VariationArchive>\n\n<VariationArchive VariationID=\"86\" VariationName=\"HPSE2, EX8-9DEL\" VariationType=\"Deletion\" DateCreated=\"2010-12-01\" DateLastUpdated=\"2019-03-29\" Accession=\"VCV000000086\" Version=\"1\" RecordType=\"interpreted\" NumberOfSubmissions=\"1\" NumberOfSubmitters=\"1\">\n  <RecordStatus>current</RecordStatus>\n  <Species>Homo sapiens</Species>\n  <InterpretedRecord>\n    <SimpleAllele AlleleID=\"15125\" VariationID=\"86\">\n      <GeneList>\n        <Gene Symbol=\"HPSE2\" FullName=\"heparanase 2 (inactive)\" GeneID=\"60495\" HGNC_ID=\"HGNC:18374\" Source=\"submitted\" RelationshipType=\"asserted, but not computed\">\n          <Location>\n            <CytogeneticLocation>10q24.2</CytogeneticLocation>\n            <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"10\" Accession=\"NC_000010.11\" start=\"98457077\" stop=\"99235875\" display_start=\"98457077\" display_stop=\"99235875\" Strand=\"-\"/>\n            <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"10\" Accession=\"NC_000010.10\" start=\"100216833\" stop=\"100995631\" display_start=\"100216833\" display_stop=\"100995631\" Strand=\"-\"/>\n          </Location>\n          <OMIM>613469</OMIM>\n        </Gene>\n      </GeneList>\n      <Name>HPSE2, EX8-9DEL</Name>\n      <VariantType>Deletion</VariantType>\n      <Location>\n        <CytogeneticLocation>10q23-q24</CytogeneticLocation>\n      </Location>\n      <OtherNameList>\n        <Name>EX8-9DEL</Name>\n      </OtherNameList>\n      <XRefList>\n        <XRef Type=\"Allelic variant\" ID=\"613469.0004\" DB=\"OMIM\"/>\n      </XRefList>\n    </SimpleAllele>\n    <ReviewStatus>no assertion criteria provided</ReviewStatus>\n    <RCVList>\n      <RCVAccession Title=\"HPSE2, EX8-9DEL AND Ochoa syndrome\" DateLastEvaluated=\"2010-06-11\" ReviewStatus=\"no assertion criteria provided\" Interpretation=\"Pathogenic\" SubmissionCount=\"1\" Accession=\"RCV000000106\" Version=\"5\">\n        <InterpretedConditionList>\n          <InterpretedCondition DB=\"MedGen\" ID=\"C0403555\">Ochoa syndrome</InterpretedCondition>\n        </InterpretedConditionList>\n      </RCVAccession>\n    </RCVList>\n    <Interpretations>\n      <Interpretation DateLastEvaluated=\"2010-06-11\" NumberOfSubmissions=\"1\" NumberOfSubmitters=\"1\" Type=\"Clinical significance\">\n        <Description>Pathogenic</Description>\n        <Citation Type=\"general\">\n          <ID Source=\"PubMed\">20560210</ID>\n        </Citation>\n        <ConditionList>\n          <TraitSet ID=\"42\" Type=\"Disease\">\n            <Trait ID=\"5090\" Type=\"Disease\">\n              <Name>\n                <ElementValue Type=\"Preferred\">Ochoa syndrome</ElementValue>\n                <XRef ID=\"Ochoa+syndrome/5288\" DB=\"Genetic Alliance\"/>\n                <XRef ID=\"ochoa-syndrome\" DB=\"Genetics Home Reference\"/>\n                <XRef ID=\"104\" DB=\"Office of Rare Diseases\"/>\n                <XRef ID=\"236533008\" DB=\"SNOMED CT\"/>\n              </Name>\n              <Name>\n                <ElementValue Type=\"Alternate\">Urofacial syndrome 1</ElementValue>\n              </Name>\n              <Name>\n                <ElementValue Type=\"Alternate\">HPSE2-Releated Urofacial Syndrome</ElementValue>\n              </Name>\n              <Name>\n                <ElementValue Type=\"Alternate\">Urofacial Syndrome</ElementValue>\n              </Name>\n              <Symbol>\n                <ElementValue Type=\"Alternate\">UFS</ElementValue>\n                <XRef Type=\"MIM\" ID=\"236730\" DB=\"OMIM\"/>\n                <XRef ID=\"104\" DB=\"Office of Rare Diseases\"/>\n              </Symbol>\n              <Symbol>\n                <ElementValue Type=\"Preferred\">UFS1</ElementValue>\n                <XRef Type=\"MIM\" ID=\"236730\" DB=\"OMIM\"/>\n              </Symbol>\n              <AttributeSet>\n                <Attribute Type=\"public definition\">Urofacial syndrome (UFS) is characterized by prenatal or infantile onset of urinary bladder voiding dysfunction, abnormal facial movement with expression (resulting from abnormal co-contraction of the corners of the mouth and eyes), and often bowel dysfunction (constipation and/or encopresis). Bladder voiding dysfunction increases the risk for urinary incontinence, megacystis, vesicoureteric reflux, hydroureteronephrosis, urosepsis, and progressive renal impairment. In rare instances, an individual who has (a) a molecularly confirmed diagnosis and/or (b) an affected relative meeting clinical diagnostic criteria manifests only the characteristic facial features or only the urinary bladder voiding dysfunction (not both). Nocturnal lagophthalmos (incomplete closing of the eyes during sleep) appears to be a common and significant finding.</Attribute>\n                <XRef ID=\"NBK154138\" DB=\"GeneReviews\"/>\n              </AttributeSet>\n              <Citation Type=\"review\" Abbrev=\"GeneReviews\">\n                <ID Source=\"PubMed\">23967498</ID>\n                <ID Source=\"BookShelf\">NBK154138</ID>\n              </Citation>\n              <XRef ID=\"2704\" DB=\"Orphanet\"/>\n              <XRef ID=\"C0403555\" DB=\"MedGen\"/>\n              <XRef Type=\"MIM\" ID=\"236730\" DB=\"OMIM\"/>\n            </Trait>\n          </TraitSet>\n        </ConditionList>\n      </Interpretation>\n    </Interpretations>\n    <ClinicalAssertionList>\n      <ClinicalAssertion ID=\"20249\" DateCreated=\"2011-01-25\" DateLastUpdated=\"2019-03-31\" SubmissionDate=\"2010-12-30\">\n        <ClinVarSubmissionID localKey=\"613469.0004_UROFACIAL SYNDROME 1\" title=\"HPSE2, EX8-9DEL_UROFACIAL SYNDROME 1\"/>\n        <ClinVarAccession Accession=\"SCV000020249\" Type=\"SCV\" Version=\"3\" SubmitterName=\"OMIM\" OrgID=\"3\" OrganizationCategory=\"resource\"/>\n        <RecordStatus>current</RecordStatus>\n        <ReviewStatus>no assertion criteria provided</ReviewStatus>\n        <Interpretation DateLastEvaluated=\"2010-06-11\">\n          <Description>Pathogenic</Description>\n        </Interpretation>\n        <Assertion>variation to disease</Assertion>\n        <ObservedInList>\n          <ObservedIn>\n            <Sample>\n              <Origin>germline</Origin>\n              <Species>human</Species>\n              <AffectedStatus>not provided</AffectedStatus>\n            </Sample>\n            <Method>\n              <MethodType>literature only</MethodType>\n            </Method>\n            <ObservedData>\n              <Attribute Type=\"Description\">By copy number analysis in a consanguineous British Pakistani family with urofacial syndrome (UFS1; 236730), Daly et al. (2010) identified homozygosity for an intragenic deletion encompassing exons 8 and 9 of the HPSE2 gene that segregated with the disease. PCR analysis and DNA sequencing across the breakpoints defined a 10.81-kb deletion and a 23-bp insertion at the breakpoints, predicted to cause an in-frame deletion of exons 8 and 9 and removal of 74 amino acids. The unaffected parents were heterozygous for the mutation, which was not found in 93 Pakistani controls.</Attribute>\n              <Citation>\n                <ID Source=\"PubMed\">20560210</ID>\n              </Citation>\n              <XRef DB=\"OMIM\" ID=\"236730\" Type=\"MIM\"/>\n            </ObservedData>\n          </ObservedIn>\n        </ObservedInList>\n        <SimpleAllele>\n          <GeneList>\n            <Gene Symbol=\"HPSE2\"/>\n          </GeneList>\n          <Name>HPSE2, EX8-9DEL</Name>\n          <VariantType>Variation</VariantType>\n          <OtherNameList>\n            <Name Type=\"NonHGVS\">EX8-9DEL</Name>\n          </OtherNameList>\n          <XRefList>\n            <XRef DB=\"OMIM\" ID=\"613469.0004\" Type=\"Allelic variant\"/>\n          </XRefList>\n        </SimpleAllele>\n        <TraitSet Type=\"Disease\">\n          <Trait Type=\"Disease\">\n            <Name>\n              <ElementValue Type=\"Preferred\">UROFACIAL SYNDROME 1</ElementValue>\n            </Name>\n          </Trait>\n        </TraitSet>\n      </ClinicalAssertion>\n    </ClinicalAssertionList>\n    <TraitMappingList>\n      <TraitMapping ClinicalAssertionID=\"20249\" TraitType=\"Disease\" MappingType=\"Name\" MappingValue=\"UROFACIAL SYNDROME 1\" MappingRef=\"Preferred\">\n        <MedGen CUI=\"C0403555\" Name=\"Ochoa syndrome\"/>\n      </TraitMapping>\n    </TraitMappingList>\n  </InterpretedRecord>\n</VariationArchive>\n</ClinVarVariationRelease>\n"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/VCVs/VCV000000081.xml",
    "content": "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n<ClinVarVariationRelease xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n                         xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_variation/variation_archive_1.4.xsd\"\n                         ReleaseDate=\"2019-12-31\">\n    <VariationArchive VariationID=\"81\" VariationName=\"NM_000235.4(LIPA):c.894+1G&gt;A\"\n                      VariationType=\"single nucleotide variant\" DateCreated=\"2010-12-01\" DateLastUpdated=\"2019-10-27\"\n                      Accession=\"VCV000000081\" Version=\"1\" RecordType=\"interpreted\" NumberOfSubmissions=\"1\"\n                      NumberOfSubmitters=\"1\">\n        <RecordStatus>current</RecordStatus>\n        <Species>Homo sapiens</Species>\n        <InterpretedRecord>\n            <SimpleAllele AlleleID=\"15120\" VariationID=\"81\">\n                <GeneList>\n                    <Gene Symbol=\"LIPA\" FullName=\"lipase A, lysosomal acid type\" GeneID=\"3988\" HGNC_ID=\"HGNC:6617\"\n                          Source=\"submitted\" RelationshipType=\"within single gene\">\n                        <Location>\n                            <CytogeneticLocation>10q23.31</CytogeneticLocation>\n                            <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\"\n                                              AssemblyStatus=\"current\" Chr=\"10\" Accession=\"NC_000010.11\"\n                                              start=\"89213569\" stop=\"89252039\" display_start=\"89213569\"\n                                              display_stop=\"89252039\" Strand=\"-\"/>\n                            <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\"\n                                              AssemblyStatus=\"previous\" Chr=\"10\" Accession=\"NC_000010.10\"\n                                              start=\"90973325\" stop=\"91011659\" display_start=\"90973325\"\n                                              display_stop=\"91011659\" Strand=\"-\"/>\n                        </Location>\n                        <OMIM>613497</OMIM>\n                    </Gene>\n                </GeneList>\n                <Name>NM_000235.4(LIPA):c.894+1G&gt;A</Name>\n                <VariantType>single nucleotide variant</VariantType>\n                <Location>\n                    <CytogeneticLocation>10q23.31</CytogeneticLocation>\n                    <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" forDisplay=\"true\"\n                                      AssemblyStatus=\"current\" Chr=\"10\" Accession=\"NC_000010.11\" start=\"89222510\"\n                                      stop=\"89222510\" display_start=\"89222510\" display_stop=\"89222510\" variantLength=\"1\"\n                                      positionVCF=\"89222510\" referenceAlleleVCF=\"C\" alternateAlleleVCF=\"T\"/>\n                    <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\"\n                                      AssemblyStatus=\"previous\" Chr=\"10\" Accession=\"NC_000010.10\" start=\"90982267\"\n                                      stop=\"90982267\" display_start=\"90982267\" display_stop=\"90982267\" variantLength=\"1\"\n                                      positionVCF=\"90982267\" referenceAlleleVCF=\"C\" alternateAlleleVCF=\"T\"/>\n                </Location>\n                <OtherNameList>\n                    <Name>IVS8, G-A, +1</Name>\n                </OtherNameList>\n                <HGVSlist>\n                    <HGVS Assembly=\"GRCh37\" Type=\"genomic, top-level\">\n                        <NucleotideExpression sequenceAccessionVersion=\"NC_000010.10\" sequenceAccession=\"NC_000010\"\n                                              sequenceVersion=\"10\" change=\"g.90982267C&gt;T\" Assembly=\"GRCh37\">\n                            <Expression>NC_000010.10:g.90982267C&gt;T</Expression>\n                        </NucleotideExpression>\n                    </HGVS>\n                    <HGVS Assembly=\"GRCh38\" Type=\"genomic, top-level\">\n                        <NucleotideExpression sequenceAccessionVersion=\"NC_000010.11\" sequenceAccession=\"NC_000010\"\n                                              sequenceVersion=\"11\" change=\"g.89222510C&gt;T\" Assembly=\"GRCh38\">\n                            <Expression>NC_000010.11:g.89222510C&gt;T</Expression>\n                        </NucleotideExpression>\n                    </HGVS>\n                    <HGVS Type=\"coding\">\n                        <NucleotideExpression sequenceAccessionVersion=\"NM_000235.4\" sequenceAccession=\"NM_000235\"\n                                              sequenceVersion=\"4\" change=\"c.894+1G&gt;A\">\n                            <Expression>NM_000235.4:c.894+1G&gt;A</Expression>\n                        </NucleotideExpression>\n                        <MolecularConsequence ID=\"SO:0001575\" Type=\"splice donor variant\" DB=\"SO\"/>\n                    </HGVS>\n                    <HGVS Type=\"coding\">\n                        <NucleotideExpression sequenceAccessionVersion=\"NM_001127605.3\" sequenceAccession=\"NM_001127605\"\n                                              sequenceVersion=\"3\" change=\"c.894+1G&gt;A\">\n                            <Expression>NM_001127605.3:c.894+1G&gt;A</Expression>\n                        </NucleotideExpression>\n                        <MolecularConsequence ID=\"SO:0001575\" Type=\"splice donor variant\" DB=\"SO\"/>\n                    </HGVS>\n                    <HGVS Type=\"coding\">\n                        <NucleotideExpression sequenceAccessionVersion=\"NM_001288979.1\" sequenceAccession=\"NM_001288979\"\n                                              sequenceVersion=\"1\" change=\"c.546+1G&gt;A\">\n                            <Expression>NM_001288979.1:c.546+1G&gt;A</Expression>\n                        </NucleotideExpression>\n                        <MolecularConsequence ID=\"SO:0001575\" Type=\"splice donor variant\" DB=\"SO\"/>\n                    </HGVS>\n                    <HGVS Type=\"genomic\">\n                        <NucleotideExpression sequenceAccessionVersion=\"NG_008194.1\" sequenceAccession=\"NG_008194\"\n                                              sequenceVersion=\"1\" change=\"g.34394G&gt;A\">\n                            <Expression>NG_008194.1:g.34394G&gt;A</Expression>\n                        </NucleotideExpression>\n                    </HGVS>\n                </HGVSlist>\n                <XRefList>\n                    <XRef Type=\"Allelic variant\" ID=\"613497.0005\" DB=\"OMIM\"/>\n                    <XRef Type=\"rs\" ID=\"1564751995\" DB=\"dbSNP\"/>\n                </XRefList>\n                <Comment DataSource=\"ClinGen\" Type=\"public\">ClinGen staff contributed the HGVS expression for this\n                    variant.\n                </Comment>\n            </SimpleAllele>\n            <ReviewStatus>no assertion criteria provided</ReviewStatus>\n            <RCVList>\n                <RCVAccession Title=\"NM_000235.4(LIPA):c.894+1G&gt;A AND Wolman disease\" DateLastEvaluated=\"1996-04-01\"\n                              ReviewStatus=\"no assertion criteria provided\" Interpretation=\"Pathogenic\"\n                              SubmissionCount=\"1\" Accession=\"RCV000000101\" Version=\"3\">\n                    <InterpretedConditionList>\n                        <InterpretedCondition DB=\"MedGen\" ID=\"CN438428\">Wolman disease</InterpretedCondition>\n                    </InterpretedConditionList>\n                </RCVAccession>\n            </RCVList>\n            <Interpretations>\n                <Interpretation DateLastEvaluated=\"1996-04-01\" NumberOfSubmissions=\"1\" NumberOfSubmitters=\"1\"\n                                Type=\"Clinical significance\">\n                    <Description>Pathogenic</Description>\n                    <Citation Type=\"general\">\n                        <ID Source=\"PubMed\">8617513</ID>\n                    </Citation>\n                    <ConditionList>\n                        <TraitSet ID=\"40\" Type=\"Disease\">\n                            <Trait ID=\"9048\" Type=\"Disease\">\n                                <Name>\n                                    <ElementValue Type=\"Preferred\">Wolman disease</ElementValue>\n                                    <XRef ID=\"7899\" DB=\"Office of Rare Diseases\"/>\n                                </Name>\n                                <XRef ID=\"75233\" DB=\"Orphanet\"/>\n                                <XRef ID=\"CN438428\" DB=\"MedGen\"/>\n                            </Trait>\n                        </TraitSet>\n                    </ConditionList>\n                </Interpretation>\n            </Interpretations>\n            <ClinicalAssertionList>\n                <ClinicalAssertion ID=\"20244\" DateCreated=\"2011-01-25\" DateLastUpdated=\"2019-03-31\"\n                                   SubmissionDate=\"2010-12-30\">\n                    <ClinVarSubmissionID localKey=\"613497.0005_WOLMAN DISEASE\"\n                                         title=\"LIPA, IVS8, G-A, +1_WOLMAN DISEASE\"/>\n                    <ClinVarAccession Accession=\"SCV000020244\" Type=\"SCV\" Version=\"2\" SubmitterName=\"OMIM\" OrgID=\"3\"\n                                      OrganizationCategory=\"resource\"/>\n                    <RecordStatus>current</RecordStatus>\n                    <ReviewStatus>no assertion criteria provided</ReviewStatus>\n                    <Interpretation DateLastEvaluated=\"1996-04-01\">\n                        <Description>Pathogenic</Description>\n                    </Interpretation>\n                    <Assertion>variation to disease</Assertion>\n                    <ObservedInList>\n                        <ObservedIn>\n                            <Sample>\n                                <Origin>germline</Origin>\n                                <Species>human</Species>\n                                <AffectedStatus>not provided</AffectedStatus>\n                            </Sample>\n                            <Method>\n                                <MethodType>literature only</MethodType>\n                            </Method>\n                            <ObservedData>\n                                <Attribute Type=\"Description\">In 2 sibs with Wolman disease (278000) from a\n                                    consanguineous family, Aslanidis et al. (1996) detected homozygosity for a G-to-A\n                                    mutation at position +1 of the splice donor site following exon 8 of the LIPA gene.\n                                    Both children died within the first year of life. The parents, who were heterozygous\n                                    for the mutation, had reduced enzymatic activity, while no enzymatic activity was\n                                    detectable in fibroblasts from the affected children. Although the same donor splice\n                                    site is involved as in the mutation reported in CESD (934G-A, 613497.0002), the\n                                    nucleotide at position +1 was changed in the Wolman disease mutation while the\n                                    nucleotide at position -1 was changed in the CESD mutation. Both mutations result in\n                                    deletion of the same 24 amino acids (exon 8), but the effects are dramatically\n                                    different: the -1 mutation allowed some correct splicing (3% of total LIPA RNA), but\n                                    the +1 splice site mutation, which affects one of the invariable nucleotides of the\n                                    splice consensus sequences, permits no correct splicing. Aslanidis et al. (1996)\n                                    suggested that the residual activity in CESD patients compared to Wolman patients\n                                    may result either from a partially active enzyme with the internal deletion of 24\n                                    amino acids (skipping of exon 8) or from the production of low amounts of the full\n                                    size of the protein due to inefficient exon exclusion from the mutated allele.\n                                </Attribute>\n                                <Citation>\n                                    <ID Source=\"PubMed\">8617513</ID>\n                                </Citation>\n                                <XRef DB=\"OMIM\" ID=\"278000\" Type=\"MIM\"/>\n                            </ObservedData>\n                        </ObservedIn>\n                    </ObservedInList>\n                    <SimpleAllele>\n                        <GeneList>\n                            <Gene Symbol=\"LIPA\"/>\n                        </GeneList>\n                        <Name>LIPA, IVS8, G-A, +1</Name>\n                        <VariantType>Variation</VariantType>\n                        <OtherNameList>\n                            <Name Type=\"NonHGVS\">IVS8, G-A, +1</Name>\n                        </OtherNameList>\n                        <XRefList>\n                            <XRef DB=\"OMIM\" ID=\"613497.0005\" Type=\"Allelic variant\"/>\n                        </XRefList>\n                    </SimpleAllele>\n                    <TraitSet Type=\"Disease\">\n                        <Trait Type=\"Disease\">\n                            <Name>\n                                <ElementValue Type=\"Preferred\">WOLMAN DISEASE</ElementValue>\n                            </Name>\n                        </Trait>\n                    </TraitSet>\n                </ClinicalAssertion>\n            </ClinicalAssertionList>\n            <TraitMappingList>\n                <TraitMapping ClinicalAssertionID=\"20244\" TraitType=\"Disease\" MappingType=\"Name\"\n                              MappingValue=\"WOLMAN DISEASE\" MappingRef=\"Preferred\">\n                    <MedGen CUI=\"CN438428\" Name=\"Wolman disease\"/>\n                </TraitMapping>\n            </TraitMappingList>\n        </InterpretedRecord>\n    </VariationArchive>\n</ClinVarVariationRelease>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/VCVs/VCV000137106.xml",
    "content": "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n<ClinVarVariationRelease xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n                         xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_variation/variation_archive_1.4.xsd\"\n                         ReleaseDate=\"2019-12-31\">\n    <VariationArchive VariationID=\"137106\" VariationName=\"NM_004006.2(DMD):c.8810A&gt;G (p.Gln2937Arg)\"\n                      VariationType=\"single nucleotide variant\" DateCreated=\"2014-06-18\" DateLastUpdated=\"2019-12-17\"\n                      Accession=\"VCV000137106\" Version=\"3\" RecordType=\"interpreted\" NumberOfSubmissions=\"4\"\n                      NumberOfSubmitters=\"4\">\n        <RecordStatus>current</RecordStatus>\n        <Species>Homo sapiens</Species>\n        <InterpretedRecord>\n            <SimpleAllele AlleleID=\"140809\" VariationID=\"137106\">\n                <GeneList>\n                    <Gene Symbol=\"DMD\" FullName=\"dystrophin\" GeneID=\"1756\" HGNC_ID=\"HGNC:2928\" Source=\"submitted\"\n                          RelationshipType=\"within single gene\">\n                        <Location>\n                            <CytogeneticLocation>Xp21.2-21.1</CytogeneticLocation>\n                            <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\"\n                                              AssemblyStatus=\"current\" Chr=\"X\" Accession=\"NC_000023.11\" start=\"31119219\"\n                                              stop=\"33339460\" display_start=\"31119219\" display_stop=\"33339460\"\n                                              Strand=\"-\"/>\n                            <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\"\n                                              AssemblyStatus=\"previous\" Chr=\"X\" Accession=\"NC_000023.10\"\n                                              start=\"31137344\" stop=\"33357725\" display_start=\"31137344\"\n                                              display_stop=\"33357725\" Strand=\"-\"/>\n                        </Location>\n                        <OMIM>300377</OMIM>\n                        <Haploinsufficiency last_evaluated=\"2019-11-20\"\n                                            ClinGen=\"https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=DMD\">\n                            Sufficient evidence for dosage pathogenicity\n                        </Haploinsufficiency>\n                        <Triplosensitivity last_evaluated=\"2019-11-20\"\n                                           ClinGen=\"https://www.ncbi.nlm.nih.gov/projects/dbvar/ISCA/isca_gene.cgi?sym=DMD\">\n                            No evidence available\n                        </Triplosensitivity>\n                    </Gene>\n                </GeneList>\n                <Name>NM_004006.2(DMD):c.8810A&gt;G (p.Gln2937Arg)</Name>\n                <VariantType>single nucleotide variant</VariantType>\n                <Location>\n                    <CytogeneticLocation>Xp21.2</CytogeneticLocation>\n                    <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" forDisplay=\"true\"\n                                      AssemblyStatus=\"current\" Chr=\"X\" Accession=\"NC_000023.11\" start=\"31478233\"\n                                      stop=\"31478233\" display_start=\"31478233\" display_stop=\"31478233\" variantLength=\"1\"\n                                      positionVCF=\"31478233\" referenceAlleleVCF=\"C\" alternateAlleleVCF=\"C\"/>\n                    <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\"\n                                      AssemblyStatus=\"previous\" Chr=\"X\" Accession=\"NC_000023.10\" start=\"31496350\"\n                                      stop=\"31496350\" display_start=\"31496350\" display_stop=\"31496350\" variantLength=\"1\"\n                                      positionVCF=\"31496350\" referenceAlleleVCF=\"C\" alternateAlleleVCF=\"C\"/>\n                </Location>\n                <OtherNameList>\n                    <Name>p.Q2937R:CAG&gt;CGG</Name>\n                </OtherNameList>\n                <ProteinChange>Q2937R</ProteinChange>\n                <ProteinChange>Q2814R</ProteinChange>\n                <ProteinChange>Q477R</ProteinChange>\n                <ProteinChange>Q208R</ProteinChange>\n                <ProteinChange>Q2933R</ProteinChange>\n                <HGVSlist>\n                    <HGVS Assembly=\"GRCh37\" Type=\"genomic, top-level\">\n                        <NucleotideExpression sequenceAccessionVersion=\"NC_000023.10\" sequenceAccession=\"NC_000023\"\n                                              sequenceVersion=\"10\" change=\"g.31496350=\" Assembly=\"GRCh37\">\n                            <Expression>NC_000023.10:g.31496350=</Expression>\n                        </NucleotideExpression>\n                    </HGVS>\n                    <HGVS Assembly=\"GRCh38\" Type=\"genomic, top-level\">\n                        <NucleotideExpression sequenceAccessionVersion=\"NC_000023.11\" sequenceAccession=\"NC_000023\"\n                                              sequenceVersion=\"11\" change=\"g.31478233=\" Assembly=\"GRCh38\">\n                            <Expression>NC_000023.11:g.31478233=</Expression>\n                        </NucleotideExpression>\n                    </HGVS>\n                    <HGVS Type=\"coding\">\n                        <NucleotideExpression sequenceAccessionVersion=\"NM_000109.4\" sequenceAccession=\"NM_000109\"\n                                              sequenceVersion=\"4\" change=\"c.8786=\">\n                            <Expression>NM_000109.4:c.8786=</Expression>\n                        </NucleotideExpression>\n                        <ProteinExpression sequenceAccessionVersion=\"NP_000100.3\" sequenceAccession=\"NP_000100\"\n                                           sequenceVersion=\"3\" change=\"p.Arg2929=\">\n                            <Expression>NP_000100.3:p.Arg2929=</Expression>\n                        </ProteinExpression>\n                        <MolecularConsequence ID=\"SO:0002073\" Type=\"no sequence alteration\" DB=\"SO\"/>\n                    </HGVS>\n                    <HGVS Type=\"coding\">\n                        <NucleotideExpression sequenceAccessionVersion=\"NM_004006.2\" sequenceAccession=\"NM_004006\"\n                                              sequenceVersion=\"2\" change=\"c.8810A&gt;G\">\n                            <Expression>NM_004006.2:c.8810A&gt;G</Expression>\n                        </NucleotideExpression>\n                        <ProteinExpression sequenceAccessionVersion=\"NP_003997.1\" sequenceAccession=\"NP_003997\"\n                                           sequenceVersion=\"1\" change=\"p.Gln2937Arg\">\n                            <Expression>NP_003997.1:p.Gln2937Arg</Expression>\n                        </ProteinExpression>\n                        <MolecularConsequence ID=\"SO:0001583\" Type=\"missense variant\" DB=\"SO\"/>\n                    </HGVS>\n                    <HGVS Type=\"coding\">\n                        <NucleotideExpression sequenceAccessionVersion=\"NM_004009.3\" sequenceAccession=\"NM_004009\"\n                                              sequenceVersion=\"3\" change=\"c.8798A&gt;G\">\n                            <Expression>NM_004009.3:c.8798A&gt;G</Expression>\n                        </NucleotideExpression>\n                        <ProteinExpression sequenceAccessionVersion=\"NP_004000.1\" sequenceAccession=\"NP_004000\"\n                                           sequenceVersion=\"1\" change=\"p.Gln2933Arg\">\n                            <Expression>NP_004000.1:p.Gln2933Arg</Expression>\n                        </ProteinExpression>\n                        <MolecularConsequence ID=\"SO:0001583\" Type=\"missense variant\" DB=\"SO\"/>\n                    </HGVS>\n                    <HGVS Type=\"coding\">\n                        <NucleotideExpression sequenceAccessionVersion=\"NM_004010.3\" sequenceAccession=\"NM_004010\"\n                                              sequenceVersion=\"3\" change=\"c.8441A&gt;G\">\n                            <Expression>NM_004010.3:c.8441A&gt;G</Expression>\n                        </NucleotideExpression>\n                        <ProteinExpression sequenceAccessionVersion=\"NP_004001.1\" sequenceAccession=\"NP_004001\"\n                                           sequenceVersion=\"1\" change=\"p.Gln2814Arg\">\n                            <Expression>NP_004001.1:p.Gln2814Arg</Expression>\n                        </ProteinExpression>\n                        <MolecularConsequence ID=\"SO:0001583\" Type=\"missense variant\" DB=\"SO\"/>\n                    </HGVS>\n                    <HGVS Type=\"coding\">\n                        <NucleotideExpression sequenceAccessionVersion=\"NM_004011.4\" sequenceAccession=\"NM_004011\"\n                                              sequenceVersion=\"4\" change=\"c.4787=\">\n                            <Expression>NM_004011.4:c.4787=</Expression>\n                        </NucleotideExpression>\n                        <ProteinExpression sequenceAccessionVersion=\"NP_004002.3\" sequenceAccession=\"NP_004002\"\n                                           sequenceVersion=\"3\" change=\"p.Arg1596=\">\n                            <Expression>NP_004002.3:p.Arg1596=</Expression>\n                        </ProteinExpression>\n                        <MolecularConsequence ID=\"SO:0002073\" Type=\"no sequence alteration\" DB=\"SO\"/>\n                    </HGVS>\n                    <HGVS Type=\"coding\">\n                        <NucleotideExpression sequenceAccessionVersion=\"NM_004012.4\" sequenceAccession=\"NM_004012\"\n                                              sequenceVersion=\"4\" change=\"c.4778=\">\n                            <Expression>NM_004012.4:c.4778=</Expression>\n                        </NucleotideExpression>\n                        <ProteinExpression sequenceAccessionVersion=\"NP_004003.2\" sequenceAccession=\"NP_004003\"\n                                           sequenceVersion=\"2\" change=\"p.Arg1593=\">\n                            <Expression>NP_004003.2:p.Arg1593=</Expression>\n                        </ProteinExpression>\n                        <MolecularConsequence ID=\"SO:0002073\" Type=\"no sequence alteration\" DB=\"SO\"/>\n                    </HGVS>\n                    <HGVS Type=\"coding\">\n                        <NucleotideExpression sequenceAccessionVersion=\"NM_004013.2\" sequenceAccession=\"NM_004013\"\n                                              sequenceVersion=\"2\" change=\"c.1430A&gt;G\">\n                            <Expression>NM_004013.2:c.1430A&gt;G</Expression>\n                        </NucleotideExpression>\n                        <ProteinExpression sequenceAccessionVersion=\"NP_004004.1\" sequenceAccession=\"NP_004004\"\n                                           sequenceVersion=\"1\" change=\"p.Gln477Arg\">\n                            <Expression>NP_004004.1:p.Gln477Arg</Expression>\n                        </ProteinExpression>\n                        <MolecularConsequence ID=\"SO:0001583\" Type=\"missense variant\" DB=\"SO\"/>\n                    </HGVS>\n                    <HGVS Type=\"coding\">\n                        <NucleotideExpression sequenceAccessionVersion=\"NM_004014.2\" sequenceAccession=\"NM_004014\"\n                                              sequenceVersion=\"2\" change=\"c.623A&gt;G\">\n                            <Expression>NM_004014.2:c.623A&gt;G</Expression>\n                        </NucleotideExpression>\n                        <ProteinExpression sequenceAccessionVersion=\"NP_004005.1\" sequenceAccession=\"NP_004005\"\n                                           sequenceVersion=\"1\" change=\"p.Gln208Arg\">\n                            <Expression>NP_004005.1:p.Gln208Arg</Expression>\n                        </ProteinExpression>\n                        <MolecularConsequence ID=\"SO:0001583\" Type=\"missense variant\" DB=\"SO\"/>\n                    </HGVS>\n                    <HGVS Type=\"coding\">\n                        <NucleotideExpression sequenceAccessionVersion=\"NM_004020.3\" sequenceAccession=\"NM_004020\"\n                                              sequenceVersion=\"3\" change=\"c.1430A&gt;G\">\n                            <Expression>NM_004020.3:c.1430A&gt;G</Expression>\n                        </NucleotideExpression>\n                        <ProteinExpression sequenceAccessionVersion=\"NP_004011.2\" sequenceAccession=\"NP_004011\"\n                                           sequenceVersion=\"2\" change=\"p.Gln477Arg\">\n                            <Expression>NP_004011.2:p.Gln477Arg</Expression>\n                        </ProteinExpression>\n                        <MolecularConsequence ID=\"SO:0001583\" Type=\"missense variant\" DB=\"SO\"/>\n                    </HGVS>\n                    <HGVS Type=\"coding\">\n                        <NucleotideExpression sequenceAccessionVersion=\"NM_004021.3\" sequenceAccession=\"NM_004021\"\n                                              sequenceVersion=\"3\" change=\"c.1430=\">\n                            <Expression>NM_004021.3:c.1430=</Expression>\n                        </NucleotideExpression>\n                        <ProteinExpression sequenceAccessionVersion=\"NP_004012.2\" sequenceAccession=\"NP_004012\"\n                                           sequenceVersion=\"2\" change=\"p.Arg477=\">\n                            <Expression>NP_004012.2:p.Arg477=</Expression>\n                        </ProteinExpression>\n                        <MolecularConsequence ID=\"SO:0002073\" Type=\"no sequence alteration\" DB=\"SO\"/>\n                    </HGVS>\n                    <HGVS Type=\"coding\">\n                        <NucleotideExpression sequenceAccessionVersion=\"NM_004022.2\" sequenceAccession=\"NM_004022\"\n                                              sequenceVersion=\"2\" change=\"c.1430A&gt;G\">\n                            <Expression>NM_004022.2:c.1430A&gt;G</Expression>\n                        </NucleotideExpression>\n                        <ProteinExpression sequenceAccessionVersion=\"NP_004013.1\" sequenceAccession=\"NP_004013\"\n                                           sequenceVersion=\"1\" change=\"p.Gln477Arg\">\n                            <Expression>NP_004013.1:p.Gln477Arg</Expression>\n                        </ProteinExpression>\n                        <MolecularConsequence ID=\"SO:0001583\" Type=\"missense variant\" DB=\"SO\"/>\n                    </HGVS>\n                    <HGVS Type=\"coding\">\n                        <NucleotideExpression sequenceAccessionVersion=\"NM_004023.3\" sequenceAccession=\"NM_004023\"\n                                              sequenceVersion=\"3\" change=\"c.1430=\">\n                            <Expression>NM_004023.3:c.1430=</Expression>\n                        </NucleotideExpression>\n                        <ProteinExpression sequenceAccessionVersion=\"NP_004014.2\" sequenceAccession=\"NP_004014\"\n                                           sequenceVersion=\"2\" change=\"p.Arg477=\">\n                            <Expression>NP_004014.2:p.Arg477=</Expression>\n                        </ProteinExpression>\n                        <MolecularConsequence ID=\"SO:0002073\" Type=\"no sequence alteration\" DB=\"SO\"/>\n                    </HGVS>\n                    <HGVS Type=\"coding\">\n                        <NucleotideExpression sequenceAccessionVersion=\"LRG_199t1\" sequenceAccession=\"LRG_199t1\"\n                                              change=\"c.8810A&gt;G\">\n                            <Expression>LRG_199t1:c.8810A&gt;G</Expression>\n                        </NucleotideExpression>\n                        <ProteinExpression sequenceAccessionVersion=\"LRG_199p1\" sequenceAccession=\"LRG_199p1\"\n                                           change=\"p.Gln2937Arg\">\n                            <Expression>LRG_199p1:p.Gln2937Arg</Expression>\n                        </ProteinExpression>\n                    </HGVS>\n                    <HGVS Type=\"genomic\">\n                        <NucleotideExpression sequenceAccessionVersion=\"LRG_199\" sequenceAccession=\"LRG_199\"\n                                              change=\"g.1866377A&gt;G\">\n                            <Expression>LRG_199:g.1866377A&gt;G</Expression>\n                        </NucleotideExpression>\n                    </HGVS>\n                    <HGVS Type=\"genomic\">\n                        <NucleotideExpression sequenceAccessionVersion=\"NG_012232.1\" sequenceAccession=\"NG_012232\"\n                                              sequenceVersion=\"1\" change=\"g.1866377A&gt;G\">\n                            <Expression>NG_012232.1:g.1866377A&gt;G</Expression>\n                        </NucleotideExpression>\n                    </HGVS>\n                </HGVSlist>\n                <XRefList>\n                    <XRef ID=\"CA290622\" DB=\"ClinGen\"/>\n                    <XRef Type=\"rs\" ID=\"1800280\" DB=\"dbSNP\"/>\n                </XRefList>\n                <AlleleFrequencyList>\n                    <AlleleFrequency Value=\"0.09548\" Source=\"Exome Aggregation Consortium (ExAC)\"/>\n                    <AlleleFrequency Value=\"0.04757\" Source=\"The Genome Aggregation Database (gnomAD)\"/>\n                    <AlleleFrequency Value=\"0.05797\" Source=\"Trans-Omics for Precision Medicine (TOPMed)\"/>\n                    <AlleleFrequency Value=\"0.10219\" Source=\"The Genome Aggregation Database (gnomAD), exomes\"/>\n                    <AlleleFrequency Value=\"0.04318\"\n                                     Source=\"NHLBI Exome Sequencing Project (ESP) Exome Variant Server\"/>\n                    <AlleleFrequency Value=\"0.11815\" Source=\"1000 Genomes Project\"/>\n                </AlleleFrequencyList>\n                <GlobalMinorAlleleFrequency Value=\"0.11815\" Source=\"1000 Genomes Project\" MinorAllele=\"C\"/>\n            </SimpleAllele>\n            <ReviewStatus>criteria provided, multiple submitters, no conflicts</ReviewStatus>\n            <RCVList>\n                <RCVAccession Title=\"NM_004006.2(DMD):c.8810A&gt;G (p.Gln2937Arg) AND not specified\"\n                              DateLastEvaluated=\"2018-07-19\"\n                              ReviewStatus=\"criteria provided, multiple submitters, no conflicts\"\n                              Interpretation=\"Benign\" SubmissionCount=\"2\" Accession=\"RCV000124712\" Version=\"3\">\n                    <InterpretedConditionList>\n                        <InterpretedCondition DB=\"MedGen\" ID=\"CN169374\">not specified</InterpretedCondition>\n                    </InterpretedConditionList>\n                </RCVAccession>\n                <RCVAccession Title=\"NM_004006.2(DMD):c.8810A&gt;G (p.Gln2937Arg) AND not provided\"\n                              DateLastEvaluated=\"2019-03-06\" ReviewStatus=\"criteria provided, single submitter\"\n                              Interpretation=\"Benign\" SubmissionCount=\"1\" Accession=\"RCV000206630\" Version=\"6\">\n                    <InterpretedConditionList>\n                        <InterpretedCondition DB=\"MedGen\" ID=\"CN517202\">not provided</InterpretedCondition>\n                    </InterpretedConditionList>\n                </RCVAccession>\n                <RCVAccession Title=\"NM_004006.2(DMD):c.8810A&gt;G (p.Gln2937Arg) AND multiple conditions\"\n                              DateLastEvaluated=\"2017-04-28\" ReviewStatus=\"criteria provided, single submitter\"\n                              Interpretation=\"Benign\" SubmissionCount=\"1\" Accession=\"RCV000576762\" Version=\"1\">\n                    <InterpretedConditionList>\n                        <InterpretedCondition DB=\"MedGen\" ID=\"C0917713\">Becker muscular dystrophy</InterpretedCondition>\n                        <InterpretedCondition DB=\"MedGen\" ID=\"C0013264\">Duchenne muscular dystrophy\n                        </InterpretedCondition>\n                    </InterpretedConditionList>\n                </RCVAccession>\n            </RCVList>\n            <Interpretations>\n                <Interpretation DateLastEvaluated=\"2019-03-06\" NumberOfSubmissions=\"4\" NumberOfSubmitters=\"4\"\n                                Type=\"Clinical significance\">\n                    <Description>Benign</Description>\n                    <ConditionList>\n                        <TraitSet ID=\"9460\" Type=\"Disease\">\n                            <Trait ID=\"17556\" Type=\"Disease\">\n                                <Name>\n                                    <ElementValue Type=\"Preferred\">not provided</ElementValue>\n                                    <XRef ID=\"13DG0619\"\n                                          DB=\"Developmental Genetics Unit,King Faisal Specialist Hospital &amp; Research Centre\"/>\n                                </Name>\n                                <AttributeSet>\n                                    <Attribute Type=\"public definition\">The term 'not provided' is registered in MedGen\n                                        to support identification of submissions to ClinVar for which no condition was\n                                        named when assessing the variant. 'not provided' differs from 'not specified',\n                                        which is used when a variant is asserted to be benign, likely benign, or of\n                                        uncertain significance for conditions that have not been specified.\n                                    </Attribute>\n                                </AttributeSet>\n                                <XRef ID=\"CN517202\" DB=\"MedGen\"/>\n                            </Trait>\n                        </TraitSet>\n                        <TraitSet ID=\"9590\" Type=\"Disease\">\n                            <Trait ID=\"16789\" Type=\"Disease\">\n                                <Name>\n                                    <ElementValue Type=\"Preferred\">not specified</ElementValue>\n                                </Name>\n                                <Name>\n                                    <ElementValue Type=\"Alternate\">AllHighlyPenetrant</ElementValue>\n                                </Name>\n                                <AttributeSet>\n                                    <Attribute Type=\"public definition\">The term 'not specified' was created for use in\n                                        ClinVar so that submitters can convey the concept that a variant is benign,\n                                        likely benign, or of uncertain significance for an unspecified set of disorders.\n                                        This usage was introduced in 2014 to replace AllHighlyPenetrant.\n                                    </Attribute>\n                                </AttributeSet>\n                                <XRef ID=\"CN169374\" DB=\"MedGen\"/>\n                            </Trait>\n                        </TraitSet>\n                        <TraitSet ID=\"9033\" Type=\"Disease\">\n                            <Trait ID=\"3056\" Type=\"Disease\">\n                                <Name>\n                                    <ElementValue Type=\"Preferred\">Becker muscular dystrophy</ElementValue>\n                                    <XRef ID=\"Becker+Muscular+Dystrophy/771\" DB=\"Genetic Alliance\"/>\n                                    <XRef ID=\"5900\" DB=\"Office of Rare Diseases\"/>\n                                    <XRef ID=\"387732009\" DB=\"SNOMED CT\"/>\n                                </Name>\n                                <Symbol>\n                                    <ElementValue Type=\"Preferred\">BMD</ElementValue>\n                                    <XRef Type=\"MIM\" ID=\"300376\" DB=\"OMIM\"/>\n                                </Symbol>\n                                <AttributeSet>\n                                    <Attribute Type=\"public definition\">The dystrophinopathies cover a spectrum of\n                                        X-linked muscle disease ranging from mild to severe that includes Duchenne\n                                        muscular dystrophy, Becker muscular dystrophy, and DMD-associated dilated\n                                        cardiomyopathy (DCM). The mild end of the spectrum includes the phenotypes of\n                                        asymptomatic increase in serum concentration of creatine phosphokinase (CK) and\n                                        muscle cramps with myoglobinuria. The severe end of the spectrum includes\n                                        progressive muscle diseases that are classified as Duchenne/Becker muscular\n                                        dystrophy when skeletal muscle is primarily affected and as DMD-associated\n                                        dilated cardiomyopathy (DCM) when the heart is primarily affected. Duchenne\n                                        muscular dystrophy (DMD) usually presents in early childhood with delayed motor\n                                        milestones including delays in walking independently and standing up from a\n                                        supine position. Proximal weakness causes a waddling gait and difficulty\n                                        climbing stairs, running, jumping, and standing up from a squatting position.\n                                        DMD is rapidly progressive, with affected children being wheelchair dependent by\n                                        age 12 years. Cardiomyopathy occurs in almost all individuals with DMD after age\n                                        18 years. Few survive beyond the third decade, with respiratory complications\n                                        and progressive cardiomyopathy being common causes of death. Becker muscular\n                                        dystrophy (BMD) is characterized by later-onset skeletal muscle weakness. With\n                                        improved diagnostic techniques, it has been recognized that the mild end of the\n                                        spectrum includes men with onset of symptoms after age 30 years who remain\n                                        ambulatory even into their 60s. Despite the milder skeletal muscle involvement,\n                                        heart failure from DCM is a common cause of morbidity and the most common cause\n                                        of death in BMD. Mean age of death is in the mid-40s. DMD-associated DCM is\n                                        characterized by left ventricular dilation and congestive heart failure. Females\n                                        heterozygous for a DMD pathogenic variant are at increased risk for DCM.\n                                    </Attribute>\n                                    <XRef ID=\"NBK1119\" DB=\"GeneReviews\"/>\n                                </AttributeSet>\n                                <Citation Type=\"Position Statement\" Abbrev=\"AAP, 2005\">\n                                    <ID Source=\"PubMed\">16322188</ID>\n                                </Citation>\n                                <Citation Type=\"review\" Abbrev=\"GeneReviews\">\n                                    <ID Source=\"PubMed\">20301298</ID>\n                                    <ID Source=\"BookShelf\">NBK1119</ID>\n                                </Citation>\n                                <Citation Type=\"practice guideline\" Abbrev=\"AAN, 2014\">\n                                    <ID Source=\"PubMed\">25313375</ID>\n                                </Citation>\n                                <XRef ID=\"98895\" DB=\"Orphanet\"/>\n                                <XRef ID=\"C0917713\" DB=\"MedGen\"/>\n                                <XRef Type=\"MIM\" ID=\"300376\" DB=\"OMIM\"/>\n                            </Trait>\n                            <Trait ID=\"10877\" Type=\"Disease\">\n                                <Name>\n                                    <ElementValue Type=\"Preferred\">Duchenne muscular dystrophy</ElementValue>\n                                    <XRef ID=\"Duchenne+Muscular+Dystrophy/2340\" DB=\"Genetic Alliance\"/>\n                                    <XRef ID=\"6291\" DB=\"Office of Rare Diseases\"/>\n                                    <XRef ID=\"76670001\" DB=\"SNOMED CT\"/>\n                                </Name>\n                                <Symbol>\n                                    <ElementValue Type=\"Preferred\">DMD</ElementValue>\n                                    <XRef Type=\"MIM\" ID=\"310200\" DB=\"OMIM\"/>\n                                    <XRef ID=\"6291\" DB=\"Office of Rare Diseases\"/>\n                                </Symbol>\n                                <AttributeSet>\n                                    <Attribute Type=\"public definition\">The dystrophinopathies cover a spectrum of\n                                        X-linked muscle disease ranging from mild to severe that includes Duchenne\n                                        muscular dystrophy, Becker muscular dystrophy, and DMD-associated dilated\n                                        cardiomyopathy (DCM). The mild end of the spectrum includes the phenotypes of\n                                        asymptomatic increase in serum concentration of creatine phosphokinase (CK) and\n                                        muscle cramps with myoglobinuria. The severe end of the spectrum includes\n                                        progressive muscle diseases that are classified as Duchenne/Becker muscular\n                                        dystrophy when skeletal muscle is primarily affected and as DMD-associated\n                                        dilated cardiomyopathy (DCM) when the heart is primarily affected. Duchenne\n                                        muscular dystrophy (DMD) usually presents in early childhood with delayed motor\n                                        milestones including delays in walking independently and standing up from a\n                                        supine position. Proximal weakness causes a waddling gait and difficulty\n                                        climbing stairs, running, jumping, and standing up from a squatting position.\n                                        DMD is rapidly progressive, with affected children being wheelchair dependent by\n                                        age 12 years. Cardiomyopathy occurs in almost all individuals with DMD after age\n                                        18 years. Few survive beyond the third decade, with respiratory complications\n                                        and progressive cardiomyopathy being common causes of death. Becker muscular\n                                        dystrophy (BMD) is characterized by later-onset skeletal muscle weakness. With\n                                        improved diagnostic techniques, it has been recognized that the mild end of the\n                                        spectrum includes men with onset of symptoms after age 30 years who remain\n                                        ambulatory even into their 60s. Despite the milder skeletal muscle involvement,\n                                        heart failure from DCM is a common cause of morbidity and the most common cause\n                                        of death in BMD. Mean age of death is in the mid-40s. DMD-associated DCM is\n                                        characterized by left ventricular dilation and congestive heart failure. Females\n                                        heterozygous for a DMD pathogenic variant are at increased risk for DCM.\n                                    </Attribute>\n                                    <XRef ID=\"NBK1119\" DB=\"GeneReviews\"/>\n                                </AttributeSet>\n                                <Citation Type=\"practice guideline\" Abbrev=\"AAN/CNS, 2005\">\n                                    <ID Source=\"PubMed\">15642897</ID>\n                                </Citation>\n                                <Citation Type=\"Position Statement\" Abbrev=\"AAP, 2005\">\n                                    <ID Source=\"PubMed\">16322188</ID>\n                                </Citation>\n                                <Citation Type=\"practice guideline\" Abbrev=\"DMD Management and Anesthesia - ACCP, 2007\">\n                                    <ID Source=\"PubMed\">18079231</ID>\n                                </Citation>\n                                <Citation Type=\"practice guideline\"\n                                          Abbrev=\"CDC DMD Care Considerations Working Group, 2010\">\n                                    <ID Source=\"PubMed\">19945913</ID>\n                                </Citation>\n                                <Citation Type=\"practice guideline\"\n                                          Abbrev=\"CDC DMD Care Considerations Working Group, 2010\">\n                                    <ID Source=\"PubMed\">19945914</ID>\n                                </Citation>\n                                <Citation Type=\"review\" Abbrev=\"GeneReviews\">\n                                    <ID Source=\"PubMed\">20301298</ID>\n                                    <ID Source=\"BookShelf\">NBK1119</ID>\n                                </Citation>\n                                <Citation Type=\"review\" Abbrev=\"GeneReviews\">\n                                    <ID Source=\"PubMed\">20301604</ID>\n                                    <ID Source=\"BookShelf\">NBK1431</ID>\n                                </Citation>\n                                <Citation Type=\"practice guideline\" Abbrev=\"CDC Respiratory Panel, 2010\">\n                                    <ID Source=\"PubMed\">20597083</ID>\n                                </Citation>\n                                <Citation Type=\"practice guideline\" Abbrev=\"Orphanet, 2013\">\n                                    <URL>\n                                        https://www.orpha.net/data/patho/Pro/en/Emergency_DuchenneMuscularDystrophy-enPro13913.pdf\n                                    </URL>\n                                    <CitationText>Orphanet, Duchenne muscular dystrophy, 2013</CitationText>\n                                </Citation>\n                                <XRef ID=\"98896\" DB=\"Orphanet\"/>\n                                <XRef ID=\"C0013264\" DB=\"MedGen\"/>\n                                <XRef Type=\"MIM\" ID=\"310200\" DB=\"OMIM\"/>\n                            </Trait>\n                        </TraitSet>\n                    </ConditionList>\n                </Interpretation>\n            </Interpretations>\n            <ClinicalAssertionList>\n                <ClinicalAssertion ID=\"1328435\" DateCreated=\"2018-01-04\" DateLastUpdated=\"2019-03-31\"\n                                   SubmissionDate=\"2017-07-17\">\n                    <ClinVarSubmissionID localKey=\"6379|OMIM:300376;310200\" submittedAssembly=\"GRCh37\"/>\n                    <ClinVarAccession Accession=\"SCV000677280\" Type=\"SCV\" Version=\"1\"\n                                      SubmitterName=\"Athena Diagnostics Inc\" OrgID=\"1012\"\n                                      OrganizationCategory=\"laboratory\"/>\n                    <RecordStatus>current</RecordStatus>\n                    <ReviewStatus>criteria provided, single submitter</ReviewStatus>\n                    <Interpretation DateLastEvaluated=\"2017-04-28\">\n                        <Description>Benign</Description>\n                    </Interpretation>\n                    <Assertion>variation to disease</Assertion>\n                    <AttributeSet>\n                        <Attribute Type=\"AssertionMethod\">Athena Diagnostics Criteria</Attribute>\n                        <Citation>\n                            <ID Source=\"PubMed\">26467025</ID>\n                        </Citation>\n                    </AttributeSet>\n                    <ObservedInList>\n                        <ObservedIn>\n                            <Sample>\n                                <Origin>germline</Origin>\n                                <Species TaxonomyId=\"9606\">human</Species>\n                                <AffectedStatus>unknown</AffectedStatus>\n                            </Sample>\n                            <Method>\n                                <MethodType>clinical testing</MethodType>\n                            </Method>\n                            <ObservedData>\n                                <Attribute Type=\"Description\">not provided</Attribute>\n                            </ObservedData>\n                        </ObservedIn>\n                    </ObservedInList>\n                    <SimpleAllele>\n                        <GeneList>\n                            <Gene Symbol=\"DMD\"/>\n                        </GeneList>\n                        <VariantType>Variation</VariantType>\n                        <OtherNameList>\n                            <Name>p.Gln2937Arg</Name>\n                        </OtherNameList>\n                        <AttributeSet>\n                            <Attribute Type=\"HGVS\">NM_004006.2:c.8810A&gt;G</Attribute>\n                        </AttributeSet>\n                    </SimpleAllele>\n                    <TraitSet Type=\"Disease\">\n                        <Trait Type=\"Disease\">\n                            <XRef DB=\"OMIM\" ID=\"300376\" Type=\"MIM\"/>\n                        </Trait>\n                        <Trait Type=\"Disease\">\n                            <XRef DB=\"OMIM\" ID=\"310200\" Type=\"MIM\"/>\n                        </Trait>\n                    </TraitSet>\n                    <SubmissionNameList>\n                        <SubmissionName>Benign_2017</SubmissionName>\n                    </SubmissionNameList>\n                </ClinicalAssertion>\n                <ClinicalAssertion ID=\"290072\" DateCreated=\"2014-06-18\" DateLastUpdated=\"2019-03-31\"\n                                   SubmissionDate=\"2018-03-26\">\n                    <ClinVarSubmissionID localKey=\"GDX:17557|Not Provided\" submittedAssembly=\"GRCh37\"/>\n                    <ClinVarAccession Accession=\"SCV000168148\" Type=\"SCV\" Version=\"11\" SubmitterName=\"GeneDx\"\n                                      OrgID=\"26957\" OrganizationCategory=\"laboratory\"/>\n                    <RecordStatus>current</RecordStatus>\n                    <ReviewStatus>criteria provided, single submitter</ReviewStatus>\n                    <Interpretation DateLastEvaluated=\"2013-11-22\">\n                        <Description>Benign</Description>\n                        <Comment Type=\"public\">This variant is considered likely benign or benign based on one or more\n                            of the following criteria: it is a conservative change, it occurs at a poorly conserved\n                            position in the protein, it is predicted to be benign by multiple in silico algorithms,\n                            and/or has population frequency not consistent with disease.\n                        </Comment>\n                    </Interpretation>\n                    <Assertion>variation to disease</Assertion>\n                    <AttributeSet>\n                        <Attribute Type=\"AssertionMethod\">GeneDX Variant Classification (06012015)</Attribute>\n                        <Citation>\n                            <URL>\n                                https://submit.ncbi.nlm.nih.gov/ft/byid/7oynscmk/mdi-5616_26957_genedx_interprules_final_061215.pdf\n                            </URL>\n                        </Citation>\n                    </AttributeSet>\n                    <ObservedInList>\n                        <ObservedIn>\n                            <Sample>\n                                <Origin>germline</Origin>\n                                <Species TaxonomyId=\"9606\">human</Species>\n                                <AffectedStatus>yes</AffectedStatus>\n                            </Sample>\n                            <Method>\n                                <MethodType>clinical testing</MethodType>\n                            </Method>\n                            <ObservedData>\n                                <Attribute Type=\"Description\">not provided</Attribute>\n                            </ObservedData>\n                        </ObservedIn>\n                    </ObservedInList>\n                    <SimpleAllele>\n                        <GeneList>\n                            <Gene Symbol=\"DMD\"/>\n                        </GeneList>\n                        <VariantType>Variation</VariantType>\n                        <Location>\n                            <SequenceLocation Assembly=\"GRCh37\" Chr=\"X\" alternateAllele=\"C\" referenceAllele=\"C\"\n                                              start=\"31496350\" stop=\"31496350\" variantLength=\"1\"/>\n                        </Location>\n                        <AttributeSet>\n                            <Attribute Type=\"HGVS\">NM_004006.2:c.8810A&gt;G</Attribute>\n                        </AttributeSet>\n                    </SimpleAllele>\n                    <TraitSet Type=\"Disease\">\n                        <Trait Type=\"Disease\">\n                            <Name>\n                                <ElementValue Type=\"Preferred\">not specified</ElementValue>\n                            </Name>\n                        </Trait>\n                    </TraitSet>\n                    <SubmissionNameList>\n                        <SubmissionName>SUB3839901</SubmissionName>\n                    </SubmissionNameList>\n                </ClinicalAssertion>\n                <ClinicalAssertion ID=\"1795888\" DateCreated=\"2019-05-29\" DateLastUpdated=\"2019-06-03\"\n                                   SubmissionDate=\"2019-04-24\">\n                    <ClinVarSubmissionID localKey=\"NM_004006.2:c.8810A&gt;G|MedGen:CN169374\"\n                                         submittedAssembly=\"GRCh37\"/>\n                    <ClinVarAccession Accession=\"SCV000919267\" Type=\"SCV\" Version=\"1\"\n                                      SubmitterName=\"Integrated Genetics/Laboratory Corporation of America\"\n                                      OrgID=\"500026\" OrganizationCategory=\"laboratory\" OrgAbbreviation=\"IG\"/>\n                    <RecordStatus>current</RecordStatus>\n                    <ReviewStatus>criteria provided, single submitter</ReviewStatus>\n                    <Interpretation DateLastEvaluated=\"2018-07-19\">\n                        <Description>Benign</Description>\n                    </Interpretation>\n                    <Assertion>variation to disease</Assertion>\n                    <AttributeSet>\n                        <Attribute Type=\"AssertionMethod\">LabCorp Variant Classification Summary - May 2015</Attribute>\n                        <Citation>\n                            <URL>\n                                https://submit.ncbi.nlm.nih.gov/ft/byid/pttb9itm/labcorp_variant_classification_method_-_may_2015.pdf\n                            </URL>\n                        </Citation>\n                    </AttributeSet>\n                    <ObservedInList>\n                        <ObservedIn>\n                            <Sample>\n                                <Origin>germline</Origin>\n                                <Species TaxonomyId=\"9606\">human</Species>\n                                <AffectedStatus>unknown</AffectedStatus>\n                            </Sample>\n                            <Method>\n                                <MethodType>clinical testing</MethodType>\n                            </Method>\n                            <ObservedData>\n                                <Attribute Type=\"Description\">not provided</Attribute>\n                            </ObservedData>\n                        </ObservedIn>\n                    </ObservedInList>\n                    <SimpleAllele>\n                        <GeneList>\n                            <Gene Symbol=\"DMD\"/>\n                        </GeneList>\n                        <VariantType>Variation</VariantType>\n                        <AttributeSet>\n                            <Attribute Type=\"HGVS\">NM_004006.2:c.8810A&gt;G</Attribute>\n                        </AttributeSet>\n                    </SimpleAllele>\n                    <TraitSet Type=\"Disease\">\n                        <Trait Type=\"Disease\">\n                            <Name>\n                                <ElementValue Type=\"Preferred\">not specified</ElementValue>\n                            </Name>\n                            <XRef DB=\"MedGen\" ID=\"CN169374\" Type=\"CUI\"/>\n                        </Trait>\n                    </TraitSet>\n                    <SubmissionNameList>\n                        <SubmissionName>SUB5494893</SubmissionName>\n                    </SubmissionNameList>\n                </ClinicalAssertion>\n                <ClinicalAssertion ID=\"487235\" DateCreated=\"2016-01-29\" DateLastUpdated=\"2019-12-17\"\n                                   SubmissionDate=\"2019-03-14\">\n                    <ClinVarSubmissionID localKey=\"99840|MedGen:CN517202\" submittedAssembly=\"GRCh37\"/>\n                    <ClinVarAccession Accession=\"SCV000262232\" Type=\"SCV\" Version=\"6\" SubmitterName=\"Invitae\"\n                                      OrgID=\"500031\" OrganizationCategory=\"laboratory\"/>\n                    <RecordStatus>current</RecordStatus>\n                    <ReviewStatus>criteria provided, single submitter</ReviewStatus>\n                    <Interpretation DateLastEvaluated=\"2019-03-06\">\n                        <Description>Benign</Description>\n                    </Interpretation>\n                    <Assertion>variation to disease</Assertion>\n                    <AttributeSet>\n                        <Attribute Type=\"AssertionMethod\">Nykamp K et al. (Genet Med 2017)</Attribute>\n                        <Citation>\n                            <ID Source=\"PubMed\">28492532</ID>\n                        </Citation>\n                    </AttributeSet>\n                    <ObservedInList>\n                        <ObservedIn>\n                            <Sample>\n                                <Origin>germline</Origin>\n                                <Species TaxonomyId=\"9606\">human</Species>\n                                <AffectedStatus>unknown</AffectedStatus>\n                            </Sample>\n                            <Method>\n                                <MethodType>clinical testing</MethodType>\n                            </Method>\n                            <ObservedData>\n                                <Attribute Type=\"Description\">not provided</Attribute>\n                            </ObservedData>\n                        </ObservedIn>\n                    </ObservedInList>\n                    <SimpleAllele>\n                        <GeneList>\n                            <Gene Symbol=\"DMD\"/>\n                        </GeneList>\n                        <VariantType>Variation</VariantType>\n                        <AttributeSet>\n                            <Attribute Type=\"HGVS\">NM_004006.2:c.8810A&gt;G</Attribute>\n                        </AttributeSet>\n                    </SimpleAllele>\n                    <TraitSet Type=\"Disease\">\n                        <Trait Type=\"Disease\">\n                            <Name>\n                                <ElementValue Type=\"Preferred\">not provided</ElementValue>\n                            </Name>\n                            <XRef DB=\"MedGen\" ID=\"CN517202\" Type=\"CUI\"/>\n                        </Trait>\n                    </TraitSet>\n                    <SubmissionNameList>\n                        <SubmissionName>SUB5321749</SubmissionName>\n                    </SubmissionNameList>\n                </ClinicalAssertion>\n            </ClinicalAssertionList>\n            <TraitMappingList>\n                <TraitMapping ClinicalAssertionID=\"1795888\" TraitType=\"Disease\" MappingType=\"Name\"\n                              MappingValue=\"not specified\" MappingRef=\"Preferred\">\n                    <MedGen CUI=\"CN169374\" Name=\"not specified\"/>\n                </TraitMapping>\n                <TraitMapping ClinicalAssertionID=\"290072\" TraitType=\"Disease\" MappingType=\"Name\"\n                              MappingValue=\"not specified\" MappingRef=\"Preferred\">\n                    <MedGen CUI=\"CN169374\" Name=\"not specified\"/>\n                </TraitMapping>\n                <TraitMapping ClinicalAssertionID=\"1328435\" TraitType=\"Disease\" MappingType=\"XRef\" MappingValue=\"310200\"\n                              MappingRef=\"OMIM\">\n                    <MedGen CUI=\"C0013264\" Name=\"Duchenne muscular dystrophy\"/>\n                </TraitMapping>\n                <TraitMapping ClinicalAssertionID=\"487235\" TraitType=\"Disease\" MappingType=\"Name\"\n                              MappingValue=\"not provided\" MappingRef=\"Preferred\">\n                    <MedGen CUI=\"CN517202\" Name=\"not provided\"/>\n                </TraitMapping>\n                <TraitMapping ClinicalAssertionID=\"1328435\" TraitType=\"Disease\" MappingType=\"XRef\" MappingValue=\"300376\"\n                              MappingRef=\"OMIM\">\n                    <MedGen CUI=\"C0917713\" Name=\"Becker muscular dystrophy\"/>\n                </TraitMapping>\n            </TraitMappingList>\n        </InterpretedRecord>\n    </VariationArchive>\n</ClinVarVariationRelease>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/VCVs/VCV000431749.xml",
    "content": "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n<ClinVarVariationRelease xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_variation/variation_archive_1.4.xsd\" ReleaseDate=\"2019-12-31\">\n<VariationArchive VariationID=\"431749\" VariationName=\"GRCh37/hg19 1p36.31(chr1:6051187-6158763)\" VariationType=\"copy number gain\" DateCreated=\"2017-08-12\" DateLastUpdated=\"2019-09-10\" Accession=\"VCV000431749\" Version=\"1\" RecordType=\"included\" NumberOfSubmissions=\"0\" NumberOfSubmitters=\"0\">\n  <RecordStatus>current</RecordStatus>\n  <Species>Homo sapiens</Species>\n  <IncludedRecord>\n    <SimpleAllele AlleleID=\"425239\" VariationID=\"431749\">\n      <GeneList>\n        <Gene Symbol=\"KCNAB2\" FullName=\"potassium voltage-gated channel subfamily A regulatory beta subunit 2\" GeneID=\"8514\" HGNC_ID=\"HGNC:6229\" Source=\"calculated\" RelationshipType=\"genes overlapped by variant\">\n          <Location>\n            <CytogeneticLocation>1p36.31</CytogeneticLocation>\n            <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"1\" Accession=\"NC_000001.11\" start=\"5992639\" stop=\"6101186\" display_start=\"5992639\" display_stop=\"6101186\" Strand=\"+\"/>\n            <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"1\" Accession=\"NC_000001.10\" start=\"6052357\" stop=\"6161252\" display_start=\"6052357\" display_stop=\"6161252\" Strand=\"+\"/>\n          </Location>\n          <OMIM>601142</OMIM>\n        </Gene>\n        <Gene Symbol=\"NPHP4\" FullName=\"nephrocystin 4\" GeneID=\"261734\" HGNC_ID=\"HGNC:19104\" Source=\"calculated\" RelationshipType=\"genes overlapped by variant\">\n          <Location>\n            <CytogeneticLocation>1p36.31</CytogeneticLocation>\n            <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"1\" Accession=\"NC_000001.11\" start=\"5862810\" stop=\"5992425\" display_start=\"5862810\" display_stop=\"5992425\" Strand=\"-\"/>\n            <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"1\" Accession=\"NC_000001.10\" start=\"5922869\" stop=\"6052532\" display_start=\"5922869\" display_stop=\"6052532\" Strand=\"-\"/>\n          </Location>\n          <OMIM>607215</OMIM>\n        </Gene>\n      </GeneList>\n      <Name>GRCh37/hg19 1p36.31(chr1:6051187-6158763)</Name>\n      <VariantType>copy number gain</VariantType>\n      <Location>\n        <CytogeneticLocation>1p36.31</CytogeneticLocation>\n        <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" forDisplay=\"true\" AssemblyStatus=\"previous\" Chr=\"1\" Accession=\"NC_000001.10\" start=\"6051187\" stop=\"6158763\" display_start=\"6051187\" display_stop=\"6158763\"/>\n      </Location>\n      <Interpretations>\n        <Interpretation NumberOfSubmissions=\"0\" NumberOfSubmitters=\"0\" Type=\"Clinical significance\">\n          <Description>no interpretation for the single variant</Description>\n        </Interpretation>\n      </Interpretations>\n      <XRefList>\n        <XRef Type=\"Interpreted\" ID=\"431733\" DB=\"ClinVar\"/>\n      </XRefList>\n    </SimpleAllele>\n    <ReviewStatus>no interpretation for the single variant</ReviewStatus>\n    <Interpretations>\n      <Interpretation NumberOfSubmissions=\"0\" NumberOfSubmitters=\"0\" Type=\"Clinical significance\">\n        <Description>no interpretation for the single variant</Description>\n      </Interpretation>\n    </Interpretations>\n    <SubmittedInterpretationList>\n      <SCV Title=\"SUB1895145\" Accession=\"SCV000296057\" Version=\"1\"/>\n    </SubmittedInterpretationList>\n    <InterpretedVariationList>\n      <InterpretedVariation VariationID=\"431733\" Accession=\"VCV000431733\" Version=\"1\"/>\n    </InterpretedVariationList>\n  </IncludedRecord>\n</VariationArchive>\n</ClinVarVariationRelease>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/VCVs/VCV000476472.xml",
    "content": "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n<ClinVarVariationRelease xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n                         xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_variation/variation_archive_1.4.xsd\"\n                         ReleaseDate=\"2019-12-31\">\n    <VariationArchive VariationID=\"476472\" VariationName=\"NM_001849.3(COL6A2):c.2697G&gt;A (p.Thr899=)\"\n                      VariationType=\"single nucleotide variant\" DateCreated=\"2017-12-20\" DateLastUpdated=\"2020-06-05\"\n                      Accession=\"VCV000476472\" Version=\"4\" RecordType=\"interpreted\" NumberOfSubmissions=\"3\"\n                      NumberOfSubmitters=\"3\">\n        <RecordStatus>current</RecordStatus>\n        <Species>Homo sapiens</Species>\n        <InterpretedRecord>\n            <SimpleAllele AlleleID=\"470827\" VariationID=\"476472\">\n                <GeneList>\n                    <Gene Symbol=\"COL6A2\" FullName=\"collagen type VI alpha 2 chain\" GeneID=\"1292\" HGNC_ID=\"HGNC:2212\"\n                          Source=\"submitted\" RelationshipType=\"within single gene\">\n                        <Location>\n                            <CytogeneticLocation>21q22.3</CytogeneticLocation>\n                            <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\"\n                                              AssemblyStatus=\"current\" Chr=\"21\" Accession=\"NC_000021.9\" start=\"46098071\"\n                                              stop=\"46132849\" display_start=\"46098071\" display_stop=\"46132849\"\n                                              Strand=\"+\"/>\n                            <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\"\n                                              AssemblyStatus=\"previous\" Chr=\"21\" Accession=\"NC_000021.8\"\n                                              start=\"47518032\" stop=\"47552762\" display_start=\"47518032\"\n                                              display_stop=\"47552762\" Strand=\"+\"/>\n                        </Location>\n                        <OMIM>120240</OMIM>\n                    </Gene>\n                </GeneList>\n                <Name>NM_001849.3(COL6A2):c.2697G&gt;A (p.Thr899=)</Name>\n                <CanonicalSPDI>NC_000021.9:46132188:G:A</CanonicalSPDI>\n                <VariantType>single nucleotide variant</VariantType>\n                <Location>\n                    <CytogeneticLocation>21q22.3</CytogeneticLocation>\n                    <SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" forDisplay=\"true\"\n                                      AssemblyStatus=\"current\" Chr=\"21\" Accession=\"NC_000021.9\" start=\"46132189\"\n                                      stop=\"46132189\" display_start=\"46132189\" display_stop=\"46132189\" variantLength=\"1\"\n                                      positionVCF=\"46132189\" referenceAlleleVCF=\"G\" alternateAlleleVCF=\"A\"/>\n                    <SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\"\n                                      AssemblyStatus=\"previous\" Chr=\"21\" Accession=\"NC_000021.8\" start=\"47552103\"\n                                      stop=\"47552103\" display_start=\"47552103\" display_stop=\"47552103\" variantLength=\"1\"\n                                      positionVCF=\"47552103\" referenceAlleleVCF=\"G\" alternateAlleleVCF=\"A\"/>\n                </Location>\n                <HGVSlist>\n                    <HGVS Assembly=\"GRCh37\" Type=\"genomic, top-level\">\n                        <NucleotideExpression sequenceAccessionVersion=\"NC_000021.8\" sequenceAccession=\"NC_000021\"\n                                              sequenceVersion=\"8\" change=\"g.47552103G&gt;A\" Assembly=\"GRCh37\">\n                            <Expression>NC_000021.8:g.47552103G&gt;A</Expression>\n                        </NucleotideExpression>\n                    </HGVS>\n                    <HGVS Assembly=\"GRCh38\" Type=\"genomic, top-level\">\n                        <NucleotideExpression sequenceAccessionVersion=\"NC_000021.9\" sequenceAccession=\"NC_000021\"\n                                              sequenceVersion=\"9\" change=\"g.46132189G&gt;A\" Assembly=\"GRCh38\">\n                            <Expression>NC_000021.9:g.46132189G&gt;A</Expression>\n                        </NucleotideExpression>\n                    </HGVS>\n                    <HGVS Type=\"coding\">\n                        <NucleotideExpression sequenceAccessionVersion=\"NM_001849.3\" sequenceAccession=\"NM_001849\"\n                                              sequenceVersion=\"3\" change=\"c.2697G&gt;A\">\n                            <Expression>NM_001849.3:c.2697G&gt;A</Expression>\n                        </NucleotideExpression>\n                        <ProteinExpression sequenceAccessionVersion=\"NP_001840.3\" sequenceAccession=\"NP_001840\"\n                                           sequenceVersion=\"3\" change=\"p.Thr899=\">\n                            <Expression>NP_001840.3:p.Thr899=</Expression>\n                        </ProteinExpression>\n                        <MolecularConsequence ID=\"SO:0001819\" Type=\"synonymous variant\" DB=\"SO\"/>\n                    </HGVS>\n                    <HGVS Type=\"coding\">\n                        <NucleotideExpression sequenceAccessionVersion=\"LRG_476t1\" sequenceAccession=\"LRG_476t1\"\n                                              change=\"c.2697G&gt;A\">\n                            <Expression>LRG_476t1:c.2697G&gt;A</Expression>\n                        </NucleotideExpression>\n                        <ProteinExpression sequenceAccessionVersion=\"LRG_476p1\" sequenceAccession=\"LRG_476p1\"\n                                           change=\"p.Thr899=\">\n                            <Expression>LRG_476p1:p.Thr899=</Expression>\n                        </ProteinExpression>\n                    </HGVS>\n                    <HGVS Type=\"genomic\">\n                        <NucleotideExpression sequenceAccessionVersion=\"NG_008675.1\" sequenceAccession=\"NG_008675\"\n                                              sequenceVersion=\"1\" change=\"g.39071G&gt;A\">\n                            <Expression>NG_008675.1:g.39071G&gt;A</Expression>\n                        </NucleotideExpression>\n                    </HGVS>\n                    <HGVS Type=\"genomic\">\n                        <NucleotideExpression sequenceAccessionVersion=\"LRG_476\" sequenceAccession=\"LRG_476\"\n                                              change=\"g.39071G&gt;A\">\n                            <Expression>LRG_476:g.39071G&gt;A</Expression>\n                        </NucleotideExpression>\n                    </HGVS>\n                </HGVSlist>\n                <XRefList>\n                    <XRef ID=\"CA10072979\" DB=\"ClinGen\"/>\n                    <XRef Type=\"rs\" ID=\"11554669\" DB=\"dbSNP\"/>\n                </XRefList>\n                <AlleleFrequencyList>\n                    <AlleleFrequency Value=\"0.00010\" Source=\"Trans-Omics for Precision Medicine (TOPMed)\"/>\n                </AlleleFrequencyList>\n                <GlobalMinorAlleleFrequency Value=\"0.01697\" Source=\"1000 Genomes Project\" MinorAllele=\"T\"/>\n            </SimpleAllele>\n            <ReviewStatus>criteria provided, conflicting interpretations</ReviewStatus>\n            <RCVList>\n                <RCVAccession Title=\"NM_001849.3(COL6A2):c.2697G&gt;A (p.Thr899=) AND Bethlem myopathy 1\"\n                              DateLastEvaluated=\"2019-12-31\" ReviewStatus=\"criteria provided, single submitter\"\n                              Interpretation=\"Likely benign\" SubmissionCount=\"1\" Accession=\"RCV000546795\" Version=\"3\">\n                    <InterpretedConditionList TraitSetID=\"5661\">\n                        <InterpretedCondition DB=\"MedGen\" ID=\"CN029274\">Bethlem myopathy 1</InterpretedCondition>\n                    </InterpretedConditionList>\n                </RCVAccession>\n                <RCVAccession Title=\"NM_001849.3(COL6A2):c.2697G&gt;A (p.Thr899=) AND not specified\"\n                              DateLastEvaluated=\"2017-09-11\" ReviewStatus=\"criteria provided, single submitter\"\n                              Interpretation=\"Likely benign\" SubmissionCount=\"1\" Accession=\"RCV000609723\" Version=\"1\">\n                    <InterpretedConditionList TraitSetID=\"9590\">\n                        <InterpretedCondition DB=\"MedGen\" ID=\"CN169374\">not specified</InterpretedCondition>\n                    </InterpretedConditionList>\n                </RCVAccession>\n                <RCVAccession Title=\"NM_001849.3(COL6A2):c.2697G&gt;A (p.Thr899=) AND Collagen VI-related myopathy\"\n                              DateLastEvaluated=\"2018-01-15\" ReviewStatus=\"criteria provided, single submitter\"\n                              Interpretation=\"Uncertain significance\" SubmissionCount=\"1\" Accession=\"RCV001143621\"\n                              Version=\"1\">\n                    <InterpretedConditionList TraitSetID=\"7429\">\n                        <InterpretedCondition DB=\"MedGen\" ID=\"CN117976\">Collagen VI-related myopathy\n                        </InterpretedCondition>\n                    </InterpretedConditionList>\n                </RCVAccession>\n            </RCVList>\n            <Interpretations>\n                <Interpretation DateLastEvaluated=\"2019-12-31\" NumberOfSubmissions=\"3\" NumberOfSubmitters=\"3\"\n                                Type=\"Clinical significance\">\n                    <Description>Conflicting interpretations of pathogenicity</Description>\n                    <Explanation DataSource=\"ClinVar\" Type=\"public\">Likely benign(2);Uncertain significance(1)\n                    </Explanation>\n                    <Comment DataSource=\"ClinVar\" Type=\"public\">Likely benign(2);Uncertain significance(1)</Comment>\n                    <DescriptionHistory Dated=\"2020-05-31\">\n                        <Description>Likely benign</Description>\n                    </DescriptionHistory>\n                    <ConditionList>\n                        <TraitSet ID=\"5661\" Type=\"Disease\">\n                            <Trait ID=\"492\" Type=\"Disease\">\n                                <Name>\n                                    <ElementValue Type=\"Alternate\">Myopathy, benign congenital, with contractures\n                                    </ElementValue>\n                                </Name>\n                                <Name>\n                                    <ElementValue Type=\"Alternate\">Muscular dystrophy, benign congenital</ElementValue>\n                                </Name>\n                                <Name>\n                                    <ElementValue Type=\"Preferred\">Bethlem myopathy 1</ElementValue>\n                                </Name>\n                                <Name>\n                                    <ElementValue Type=\"Alternate\">MUSCULAR DYSTROPHY, LIMB-GIRDLE, AUTOSOMAL DOMINANT\n                                        5\n                                    </ElementValue>\n                                    <XRef Type=\"MIM\" ID=\"158810\" DB=\"OMIM\"/>\n                                </Name>\n                                <Symbol>\n                                    <ElementValue Type=\"Preferred\">BTHLM1</ElementValue>\n                                    <XRef Type=\"MIM\" ID=\"158810\" DB=\"OMIM\"/>\n                                </Symbol>\n                                <Symbol>\n                                    <ElementValue Type=\"Alternate\">LGMDD5</ElementValue>\n                                    <XRef Type=\"MIM\" ID=\"158810\" DB=\"OMIM\"/>\n                                </Symbol>\n                                <AttributeSet>\n                                    <Attribute Type=\"public definition\">Collagen type VI-related disorders represent a\n                                        continuum of overlapping phenotypes with Bethlem myopathy at the mild end,\n                                        Ullrich congenital muscular dystrophy (CMD) at the severe end, and two rare,\n                                        less well-defined disorders – autosomal dominant limb-girdle muscular dystrophy\n                                        and autosomal recessive myosclerosis myopathy – in between. Although Bethlem\n                                        myopathy and Ullrich CMD were defined long before their molecular basis was\n                                        known, they remain useful for clarification of prognosis and management. Bethlem\n                                        myopathy, characterized by the combination of proximal muscle weakness and\n                                        variable contractures, affects most frequently the long finger flexors, elbows,\n                                        and ankles. Onset may be prenatal (characterized by decreased fetal movements),\n                                        neonatal (hypotonia or torticollis), in early childhood (delayed motor\n                                        milestones, muscle weakness, and contractures), or in adulthood (proximal\n                                        weakness and Achilles tendon or long finger flexor contractures). Because of\n                                        slow progression, more than two thirds of affected individuals over age 50 years\n                                        rely on supportive means for outdoor mobility. Respiratory involvement is rare\n                                        and appears to be related to more severe muscle weakness in later life. Ullrich\n                                        CMD is characterized by congenital weakness and hypotonia, proximal joint\n                                        contractures, and striking hyperlaxity of distal joints. Some affected children\n                                        acquire the ability to walk independently; however, progression of the disease\n                                        often results in later loss of ambulation. Early and severe respiratory\n                                        involvement may require ventilatory support in the first or second decade of\n                                        life.\n                                    </Attribute>\n                                    <XRef ID=\"NBK1503\" DB=\"GeneReviews\"/>\n                                </AttributeSet>\n                                <Citation Type=\"review\" Abbrev=\"GeneReviews\">\n                                    <ID Source=\"PubMed\">20301676</ID>\n                                    <ID Source=\"BookShelf\">NBK1503</ID>\n                                </Citation>\n                                <Citation Type=\"practice guideline\" Abbrev=\"Int'l SCC for CMD, 2010\">\n                                    <ID Source=\"PubMed\">21078917</ID>\n                                </Citation>\n                                <Citation Type=\"review\" Abbrev=\"GeneReviews\">\n                                    <ID Source=\"PubMed\">20301468</ID>\n                                    <ID Source=\"BookShelf\">NBK1291</ID>\n                                </Citation>\n                                <XRef ID=\"610\" DB=\"Orphanet\"/>\n                                <XRef ID=\"CN029274\" DB=\"MedGen\"/>\n                                <XRef ID=\"MONDO:0024530\" DB=\"MONDO\"/>\n                                <XRef Type=\"MIM\" ID=\"158810\" DB=\"OMIM\"/>\n                            </Trait>\n                        </TraitSet>\n                        <TraitSet ID=\"7429\" Type=\"Disease\">\n                            <Trait ID=\"16600\" Type=\"Disease\">\n                                <Name>\n                                    <ElementValue Type=\"Preferred\">Collagen VI-related myopathy</ElementValue>\n                                    <XRef ID=\"Collagen+VI-related+myopathy/8002\" DB=\"Genetic Alliance\"/>\n                                </Name>\n                                <Name>\n                                    <ElementValue Type=\"Alternate\">Collagen 6-related myopathy</ElementValue>\n                                </Name>\n                                <Symbol>\n                                    <ElementValue Type=\"Preferred\">COL6-RM</ElementValue>\n                                </Symbol>\n                                <AttributeSet>\n                                    <Attribute Type=\"public definition\">Collagen type VI-related disorders represent a\n                                        continuum of overlapping phenotypes with Bethlem myopathy at the mild end,\n                                        Ullrich congenital muscular dystrophy (CMD) at the severe end, and two rare,\n                                        less well-defined disorders – autosomal dominant limb-girdle muscular dystrophy\n                                        and autosomal recessive myosclerosis myopathy – in between. Although Bethlem\n                                        myopathy and Ullrich CMD were defined long before their molecular basis was\n                                        known, they remain useful for clarification of prognosis and management. Bethlem\n                                        myopathy, characterized by the combination of proximal muscle weakness and\n                                        variable contractures, affects most frequently the long finger flexors, elbows,\n                                        and ankles. Onset may be prenatal (characterized by decreased fetal movements),\n                                        neonatal (hypotonia or torticollis), in early childhood (delayed motor\n                                        milestones, muscle weakness, and contractures), or in adulthood (proximal\n                                        weakness and Achilles tendon or long finger flexor contractures). Because of\n                                        slow progression, more than two thirds of affected individuals over age 50 years\n                                        rely on supportive means for outdoor mobility. Respiratory involvement is rare\n                                        and appears to be related to more severe muscle weakness in later life. Ullrich\n                                        CMD is characterized by congenital weakness and hypotonia, proximal joint\n                                        contractures, and striking hyperlaxity of distal joints. Some affected children\n                                        acquire the ability to walk independently; however, progression of the disease\n                                        often results in later loss of ambulation. Early and severe respiratory\n                                        involvement may require ventilatory support in the first or second decade of\n                                        life.\n                                    </Attribute>\n                                    <XRef ID=\"NBK1503\" DB=\"GeneReviews\"/>\n                                </AttributeSet>\n                                <Citation Type=\"review\" Abbrev=\"GeneReviews\">\n                                    <ID Source=\"PubMed\">20301676</ID>\n                                    <ID Source=\"BookShelf\">NBK1503</ID>\n                                </Citation>\n                                <XRef ID=\"CN117976\" DB=\"MedGen\"/>\n                            </Trait>\n                        </TraitSet>\n                        <TraitSet ID=\"9590\" Type=\"Disease\">\n                            <Trait ID=\"16789\" Type=\"Disease\">\n                                <Name>\n                                    <ElementValue Type=\"Alternate\">AllHighlyPenetrant</ElementValue>\n                                </Name>\n                                <Name>\n                                    <ElementValue Type=\"Preferred\">not specified</ElementValue>\n                                </Name>\n                                <AttributeSet>\n                                    <Attribute Type=\"public definition\">The term 'not specified' was created for use in\n                                        ClinVar so that submitters can convey the concept that a variant is benign,\n                                        likely benign, or of uncertain significance for an unspecified set of disorders.\n                                        This usage was introduced in 2014 to replace AllHighlyPenetrant.\n                                    </Attribute>\n                                </AttributeSet>\n                                <XRef ID=\"CN169374\" DB=\"MedGen\"/>\n                            </Trait>\n                        </TraitSet>\n                    </ConditionList>\n                </Interpretation>\n            </Interpretations>\n            <ClinicalAssertionList>\n                <ClinicalAssertion ID=\"1403880\" DateCreated=\"2018-04-04\" DateLastUpdated=\"2019-03-31\"\n                                   SubmissionDate=\"2018-03-26\">\n                    <ClinVarSubmissionID localKey=\"GDX:1784188|Not Provided\" submittedAssembly=\"GRCh37\"/>\n                    <ClinVarAccession Accession=\"SCV000722725\" Type=\"SCV\" Version=\"1\" SubmitterName=\"GeneDx\"\n                                      OrgID=\"26957\" OrganizationCategory=\"laboratory\"/>\n                    <RecordStatus>current</RecordStatus>\n                    <ReviewStatus>criteria provided, single submitter</ReviewStatus>\n                    <Interpretation DateLastEvaluated=\"2017-09-11\">\n                        <Description>Likely benign</Description>\n                        <Comment Type=\"public\">This variant is considered likely benign or benign based on one or more\n                            of the following criteria: it is a conservative change, it occurs at a poorly conserved\n                            position in the protein, it is predicted to be benign by multiple in silico algorithms,\n                            and/or has population frequency not consistent with disease.\n                        </Comment>\n                    </Interpretation>\n                    <Assertion>variation to disease</Assertion>\n                    <AttributeSet>\n                        <Attribute Type=\"AssertionMethod\">GeneDX Variant Classification (06012015)</Attribute>\n                        <Citation>\n                            <URL>\n                                https://submit.ncbi.nlm.nih.gov/ft/byid/7oynscmk/mdi-5616_26957_genedx_interprules_final_061215.pdf\n                            </URL>\n                        </Citation>\n                    </AttributeSet>\n                    <ObservedInList>\n                        <ObservedIn>\n                            <Sample>\n                                <Origin>germline</Origin>\n                                <Species TaxonomyId=\"9606\">human</Species>\n                                <AffectedStatus>yes</AffectedStatus>\n                            </Sample>\n                            <Method>\n                                <MethodType>clinical testing</MethodType>\n                            </Method>\n                            <ObservedData>\n                                <Attribute Type=\"Description\">not provided</Attribute>\n                            </ObservedData>\n                        </ObservedIn>\n                    </ObservedInList>\n                    <SimpleAllele>\n                        <GeneList>\n                            <Gene Symbol=\"COL6A2\"/>\n                        </GeneList>\n                        <VariantType>Variation</VariantType>\n                        <Location>\n                            <SequenceLocation Assembly=\"GRCh37\" Chr=\"21\" alternateAllele=\"A\" referenceAllele=\"G\"\n                                              start=\"47552103\" stop=\"47552103\" variantLength=\"1\"/>\n                        </Location>\n                        <AttributeSet>\n                            <Attribute Type=\"HGVS\">NM_001849.3:c.2697G&gt;A</Attribute>\n                        </AttributeSet>\n                    </SimpleAllele>\n                    <TraitSet Type=\"Disease\">\n                        <Trait Type=\"Disease\">\n                            <Name>\n                                <ElementValue Type=\"Preferred\">not specified</ElementValue>\n                            </Name>\n                        </Trait>\n                    </TraitSet>\n                    <SubmissionNameList>\n                        <SubmissionName>SUB3839901</SubmissionName>\n                    </SubmissionNameList>\n                </ClinicalAssertion>\n                <ClinicalAssertion ID=\"1255376\" DateCreated=\"2017-12-20\" DateLastUpdated=\"2020-05-05\"\n                                   SubmissionDate=\"2020-01-29\">\n                    <ClinVarSubmissionID localKey=\"1519820|MedGen:CN029274\" submittedAssembly=\"GRCh37\"/>\n                    <ClinVarAccession Accession=\"SCV000657167\" Type=\"SCV\" Version=\"3\" SubmitterName=\"Invitae\"\n                                      OrgID=\"500031\" OrganizationCategory=\"laboratory\"/>\n                    <RecordStatus>current</RecordStatus>\n                    <ReviewStatus>criteria provided, single submitter</ReviewStatus>\n                    <Interpretation DateLastEvaluated=\"2019-12-31\">\n                        <Description>Likely benign</Description>\n                    </Interpretation>\n                    <Assertion>variation to disease</Assertion>\n                    <AttributeSet>\n                        <Attribute Type=\"AssertionMethod\">Invitae Variant Classification Sherloc (09022015)</Attribute>\n                        <Citation>\n                            <ID Source=\"PubMed\">28492532</ID>\n                        </Citation>\n                    </AttributeSet>\n                    <ObservedInList>\n                        <ObservedIn>\n                            <Sample>\n                                <Origin>germline</Origin>\n                                <Species TaxonomyId=\"9606\">human</Species>\n                                <AffectedStatus>unknown</AffectedStatus>\n                            </Sample>\n                            <Method>\n                                <MethodType>clinical testing</MethodType>\n                            </Method>\n                            <ObservedData>\n                                <Attribute Type=\"Description\">not provided</Attribute>\n                            </ObservedData>\n                        </ObservedIn>\n                    </ObservedInList>\n                    <SimpleAllele>\n                        <GeneList>\n                            <Gene Symbol=\"COL6A2\"/>\n                        </GeneList>\n                        <VariantType>Variation</VariantType>\n                        <AttributeSet>\n                            <Attribute Type=\"HGVS\">NM_001849.3:c.2697G&gt;A</Attribute>\n                        </AttributeSet>\n                    </SimpleAllele>\n                    <TraitSet Type=\"Disease\">\n                        <Trait Type=\"Disease\">\n                            <Name>\n                                <ElementValue Type=\"Preferred\">Bethlem myopathy 1</ElementValue>\n                            </Name>\n                            <XRef DB=\"MedGen\" ID=\"CN029274\" Type=\"CUI\"/>\n                        </Trait>\n                    </TraitSet>\n                    <SubmissionNameList>\n                        <SubmissionName>SUB6897608</SubmissionName>\n                    </SubmissionNameList>\n                </ClinicalAssertion>\n                <ClinicalAssertion ID=\"2530780\" DateCreated=\"2020-05-26\" DateLastUpdated=\"2020-06-01\"\n                                   SubmissionDate=\"2020-02-20\">\n                    <ClinVarSubmissionID localKey=\"1414007|MedGen:CN117976\" submittedAssembly=\"GRCh37\"/>\n                    <ClinVarAccession Accession=\"SCV001304160\" Type=\"SCV\" Version=\"1\"\n                                      SubmitterName=\"Illumina Clinical Services Laboratory,Illumina\" OrgID=\"504895\"\n                                      OrganizationCategory=\"laboratory\"/>\n                    <RecordStatus>current</RecordStatus>\n                    <ReviewStatus>criteria provided, single submitter</ReviewStatus>\n                    <Interpretation DateLastEvaluated=\"2018-01-15\">\n                        <Description>Uncertain significance</Description>\n                        <Comment>This variant was observed in the ICSL laboratory as part of a predisposition screen in\n                            an ostensibly healthy population. It had not been previously curated by ICSL or reported in\n                            the Human Gene Mutation Database (HGMD: prior to June 1st, 2018), and was therefore a\n                            candidate for classification through an automated scoring system. Utilizing variant allele\n                            frequency, disease prevalence and penetrance estimates, and inheritance mode, an automated\n                            score was calculated to assess if this variant is too frequent to cause the disease. Based\n                            on the score, this variant could not be ruled out of causing disease and therefore its\n                            association with disease required further investigation. A literature search was performed\n                            for the gene, cDNA change, and amino acid change (if applicable). No publications were found\n                            based on this search. This variant was therefore classified as a variant of unknown\n                            significance for this disease.\n                        </Comment>\n                    </Interpretation>\n                    <Assertion>variation to disease</Assertion>\n                    <AttributeSet>\n                        <Attribute Type=\"AssertionMethod\">ICSL Variant Classification Criteria 13 December 2019\n                        </Attribute>\n                        <Citation>\n                            <URL>\n                                https://submit.ncbi.nlm.nih.gov/ft/byid/r0x0xrmc/icsl_variant_classification_criteria_13_december_2019.pdf\n                            </URL>\n                        </Citation>\n                    </AttributeSet>\n                    <ObservedInList>\n                        <ObservedIn>\n                            <Sample>\n                                <Origin>germline</Origin>\n                                <Species TaxonomyId=\"9606\">human</Species>\n                                <AffectedStatus>unknown</AffectedStatus>\n                            </Sample>\n                            <Method>\n                                <MethodType>clinical testing</MethodType>\n                            </Method>\n                            <ObservedData>\n                                <Attribute Type=\"Description\">not provided</Attribute>\n                            </ObservedData>\n                        </ObservedIn>\n                    </ObservedInList>\n                    <SimpleAllele>\n                        <GeneList>\n                            <Gene Symbol=\"COL6A2\"/>\n                        </GeneList>\n                        <VariantType>Variation</VariantType>\n                        <Location>\n                            <SequenceLocation Assembly=\"GRCh37\" Chr=\"21\" alternateAllele=\"A\" referenceAllele=\"G\"\n                                              start=\"47552103\" stop=\"47552103\" variantLength=\"1\"/>\n                        </Location>\n                        <AttributeSet>\n                            <Attribute Type=\"HGVS\">NM_001849.3:c.2697G&gt;A</Attribute>\n                        </AttributeSet>\n                    </SimpleAllele>\n                    <TraitSet Type=\"Disease\">\n                        <Trait Type=\"Disease\">\n                            <Name>\n                                <ElementValue Type=\"Preferred\">Collagen VI-related myopathy</ElementValue>\n                            </Name>\n                            <XRef DB=\"MedGen\" ID=\"CN117976\" Type=\"CUI\"/>\n                        </Trait>\n                    </TraitSet>\n                    <SubmissionNameList>\n                        <SubmissionName>SUB6641900</SubmissionName>\n                    </SubmissionNameList>\n                </ClinicalAssertion>\n            </ClinicalAssertionList>\n            <TraitMappingList>\n                <TraitMapping ClinicalAssertionID=\"2530780\" TraitType=\"Disease\" MappingType=\"Name\"\n                              MappingValue=\"Collagen VI-related myopathy\" MappingRef=\"Preferred\">\n                    <MedGen CUI=\"CN117976\" Name=\"Collagen VI-related myopathy\"/>\n                </TraitMapping>\n                <TraitMapping ClinicalAssertionID=\"1255376\" TraitType=\"Disease\" MappingType=\"Name\"\n                              MappingValue=\"Bethlem myopathy 1\" MappingRef=\"Preferred\">\n                    <MedGen CUI=\"CN029274\" Name=\"Bethlem myopathy 1\"/>\n                </TraitMapping>\n                <TraitMapping ClinicalAssertionID=\"1403880\" TraitType=\"Disease\" MappingType=\"Name\"\n                              MappingValue=\"not specified\" MappingRef=\"Preferred\">\n                    <MedGen CUI=\"CN169374\" Name=\"not specified\"/>\n                </TraitMapping>\n            </TraitMappingList>\n        </InterpretedRecord>\n    </VariationArchive>\n</ClinVarVariationRelease>"
  },
  {
    "path": "UnitTests/Resources/ClinVarXmlFiles/VCVs/VCV000618791.xml",
    "content": "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n<ClinVarVariationRelease xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:noNamespaceSchemaLocation=\"http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_variation/variation_archive_1.4.xsd\" ReleaseDate=\"2019-12-31\">\n<VariationArchive VariationID=\"618791\" VariationName=\"NM_005026.5(PIK3CD):c.231G&gt;A (p.Ala77=)\" VariationType=\"single nucleotide variant\" DateCreated=\"2019-02-15\" DateLastUpdated=\"2022-04-21\" Accession=\"VCV000618791\" Version=\"5\" RecordType=\"interpreted\" NumberOfSubmissions=\"2\" NumberOfSubmitters=\"2\"><RecordStatus>current</RecordStatus><Species>Homo sapiens</Species><InterpretedRecord><SimpleAllele AlleleID=\"609439\" VariationID=\"618791\"><GeneList><Gene Symbol=\"PIK3CD\" FullName=\"phosphatidylinositol-4,5-bisphosphate 3-kinase catalytic subunit delta\" GeneID=\"5293\" HGNC_ID=\"HGNC:8977\" Source=\"submitted\" RelationshipType=\"within single gene\"><Location><CytogeneticLocation>1p36.22</CytogeneticLocation><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" AssemblyStatus=\"current\" Chr=\"1\" Accession=\"NC_000001.11\" start=\"9627258\" stop=\"9729114\" display_start=\"9627258\" display_stop=\"9729114\" Strand=\"+\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"1\" Accession=\"NC_000001.10\" start=\"9711789\" stop=\"9789171\" display_start=\"9711789\" display_stop=\"9789171\" Strand=\"+\" /></Location><OMIM>602839</OMIM></Gene></GeneList><Name>NM_005026.5(PIK3CD):c.231G&gt;A (p.Ala77=)</Name><CanonicalSPDI>NC_000001.11:9715629:G:A</CanonicalSPDI><VariantType>single nucleotide variant</VariantType><Location><CytogeneticLocation>1p36.22</CytogeneticLocation><SequenceLocation Assembly=\"GRCh38\" AssemblyAccessionVersion=\"GCF_000001405.38\" forDisplay=\"true\" AssemblyStatus=\"current\" Chr=\"1\" Accession=\"NC_000001.11\" start=\"9715630\" stop=\"9715630\" display_start=\"9715630\" display_stop=\"9715630\" variantLength=\"1\" positionVCF=\"9715630\" referenceAlleleVCF=\"G\" alternateAlleleVCF=\"A\" /><SequenceLocation Assembly=\"GRCh37\" AssemblyAccessionVersion=\"GCF_000001405.25\" AssemblyStatus=\"previous\" Chr=\"1\" Accession=\"NC_000001.10\" start=\"9775688\" stop=\"9775688\" display_start=\"9775688\" display_stop=\"9775688\" variantLength=\"1\" positionVCF=\"9775688\" referenceAlleleVCF=\"G\" alternateAlleleVCF=\"A\" /></Location><HGVSlist><HGVS Type=\"coding\"><NucleotideExpression sequenceAccessionVersion=\"LRG_191t1\" sequenceAccession=\"LRG_191t1\" change=\"c.231G&gt;A\"><Expression>LRG_191t1:c.231G&gt;A</Expression></NucleotideExpression></HGVS><HGVS Type=\"genomic\"><NucleotideExpression sequenceAccessionVersion=\"LRG_191\" sequenceAccession=\"LRG_191\" change=\"g.68899G&gt;A\"><Expression>LRG_191:g.68899G&gt;A</Expression></NucleotideExpression></HGVS><HGVS Assembly=\"GRCh38\" Type=\"genomic, top-level\"><NucleotideExpression sequenceAccessionVersion=\"NC_000001.11\" sequenceAccession=\"NC_000001\" sequenceVersion=\"11\" change=\"g.9715630G&gt;A\" Assembly=\"GRCh38\"><Expression>NC_000001.11:g.9715630G&gt;A</Expression></NucleotideExpression></HGVS><HGVS Type=\"coding\"><NucleotideExpression sequenceAccessionVersion=\"NM_001350234.2\" sequenceAccession=\"NM_001350234\" sequenceVersion=\"2\" change=\"c.231G&gt;A\"><Expression>NM_001350234.2:c.231G&gt;A</Expression></NucleotideExpression><ProteinExpression sequenceAccessionVersion=\"NP_001337163.1\" sequenceAccession=\"NP_001337163\" sequenceVersion=\"1\" change=\"p.Ala77=\"><Expression>NP_001337163.1:p.Ala77=</Expression></ProteinExpression><MolecularConsequence ID=\"SO:0001819\" Type=\"synonymous variant\" DB=\"SO\" /></HGVS><HGVS Type=\"coding\"><NucleotideExpression sequenceAccessionVersion=\"NM_001350235.1\" sequenceAccession=\"NM_001350235\" sequenceVersion=\"1\" change=\"c.231G&gt;A\"><Expression>NM_001350235.1:c.231G&gt;A</Expression></NucleotideExpression><ProteinExpression sequenceAccessionVersion=\"NP_001337164.1\" sequenceAccession=\"NP_001337164\" sequenceVersion=\"1\" change=\"p.Ala77=\"><Expression>NP_001337164.1:p.Ala77=</Expression></ProteinExpression><MolecularConsequence ID=\"SO:0001819\" Type=\"synonymous variant\" DB=\"SO\" /></HGVS><HGVS Type=\"coding\"><NucleotideExpression sequenceAccessionVersion=\"NM_005026.5\" sequenceAccession=\"NM_005026\" sequenceVersion=\"5\" change=\"c.231G&gt;A\" MANESelect=\"true\"><Expression>NM_005026.5:c.231G&gt;A</Expression></NucleotideExpression><ProteinExpression sequenceAccessionVersion=\"NP_005017.3\" sequenceAccession=\"NP_005017\" sequenceVersion=\"3\" change=\"p.Ala77=\"><Expression>NP_005017.3:p.Ala77=</Expression></ProteinExpression><MolecularConsequence ID=\"SO:0001819\" Type=\"synonymous variant\" DB=\"SO\" /></HGVS><HGVS Type=\"genomic\"><NucleotideExpression sequenceAccessionVersion=\"NG_023434.1\" sequenceAccession=\"NG_023434\" sequenceVersion=\"1\" change=\"g.68899G&gt;A\"><Expression>NG_023434.1:g.68899G&gt;A</Expression></NucleotideExpression></HGVS><HGVS Assembly=\"GRCh37\" Type=\"genomic, top-level\"><NucleotideExpression sequenceAccessionVersion=\"NC_000001.10\" sequenceAccession=\"NC_000001\" sequenceVersion=\"10\" change=\"g.9775688G&gt;A\" Assembly=\"GRCh37\"><Expression>NC_000001.10:g.9775688G&gt;A</Expression></NucleotideExpression></HGVS></HGVSlist><XRefList><XRef Type=\"rs\" ID=\"756139699\" DB=\"dbSNP\" /></XRefList><AlleleFrequencyList><AlleleFrequency Value=\"0.00006\" Source=\"The Genome Aggregation Database (gnomAD)\" /><AlleleFrequency Value=\"0.00006\" Source=\"The Genome Aggregation Database (gnomAD), exomes\" /><AlleleFrequency Value=\"0.00006\" Source=\"Trans-Omics for Precision Medicine (TOPMed)\" /><AlleleFrequency Value=\"0.00007\" Source=\"Exome Aggregation Consortium (ExAC)\" /><AlleleFrequency Value=\"0.00007\" Source=\"The Genome Aggregation Database (gnomAD)\" /><AlleleFrequency Value=\"0.00008\" Source=\"Trans-Omics for Precision Medicine (TOPMed)\" /></AlleleFrequencyList></SimpleAllele><ReviewStatus>criteria provided, multiple submitters, no conflicts</ReviewStatus><RCVList><RCVAccession Title=\"NM_005026.5(PIK3CD):c.231G&gt;A (p.Ala77=) AND not provided\" DateLastEvaluated=\"2017-07-03\" ReviewStatus=\"criteria provided, single submitter\" Interpretation=\"Likely benign\" SubmissionCount=\"1\" Accession=\"RCV000757616\" Version=\"4\"><InterpretedConditionList TraitSetID=\"9460\"><InterpretedCondition DB=\"MedGen\" ID=\"CN517202\">not provided</InterpretedCondition></InterpretedConditionList></RCVAccession><RCVAccession Title=\"NM_005026.5(PIK3CD):c.231G&gt;A (p.Ala77=) AND Immunodeficiency 14\" DateLastEvaluated=\"2020-08-22\" ReviewStatus=\"criteria provided, single submitter\" Interpretation=\"Likely benign\" SubmissionCount=\"1\" Accession=\"RCV001488431\" Version=\"1\"><InterpretedConditionList TraitSetID=\"12475\"><InterpretedCondition DB=\"MedGen\" ID=\"C3714976\">Immunodeficiency 14</InterpretedCondition></InterpretedConditionList></RCVAccession></RCVList><Interpretations><Interpretation DateLastEvaluated=\"2020-08-22\" NumberOfSubmissions=\"2\" NumberOfSubmitters=\"2\" Type=\"Clinical significance\"><Description>Likely benign</Description><ConditionList><TraitSet ID=\"9460\" Type=\"Disease\"><Trait ID=\"17556\" Type=\"Disease\"><Name><ElementValue Type=\"Alternate\">none provided</ElementValue></Name><Name><ElementValue Type=\"Preferred\">not provided</ElementValue><XRef ID=\"13DG0619\" DB=\"Developmental Genetics Unit,King Faisal Specialist Hospital &amp; Research Centre\" /></Name><AttributeSet><Attribute Type=\"public definition\">The term 'not provided' is registered in MedGen to support identification of submissions to ClinVar for which no condition was named when assessing the variant. 'not provided' differs from 'not specified', which is used when a variant is asserted to be benign, likely benign, or of uncertain significance for conditions that have not been specified.</Attribute></AttributeSet><XRef ID=\"CN517202\" DB=\"MedGen\" /></Trait></TraitSet><TraitSet ID=\"12475\" Type=\"Disease\"><Trait ID=\"18461\" Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">Immunodeficiency 14</ElementValue><XRef ID=\"MONDO:0014222\" DB=\"MONDO\" /></Name><Name><ElementValue Type=\"Alternate\">p110-DELTA-ACTIVATING MUTATION CAUSING SENESCENT T CELLS, LYMPHADENOPATHY, AND IMMUNODEFICIENCY</ElementValue><XRef Type=\"MIM\" ID=\"615513\" DB=\"OMIM\" /></Name><Name><ElementValue Type=\"Alternate\">IMMUNODEFICIENCY 14A, AUTOSOMAL DOMINANT</ElementValue><XRef Type=\"MIM\" ID=\"615513\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"602839.0001\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"602839.0002\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"602839.0004\" DB=\"OMIM\" /><XRef Type=\"Allelic variant\" ID=\"602839.0003\" DB=\"OMIM\" /></Name><Symbol><ElementValue Type=\"Preferred\">IMD14A</ElementValue><XRef Type=\"MIM\" ID=\"615513\" DB=\"OMIM\" /></Symbol><Symbol><ElementValue Type=\"Alternate\">PASLI</ElementValue><XRef Type=\"MIM\" ID=\"615513\" DB=\"OMIM\" /></Symbol><XRef ID=\"397596\" DB=\"Orphanet\" /><XRef ID=\"C3714976\" DB=\"MedGen\" /><XRef ID=\"MONDO:0014222\" DB=\"MONDO\" /><XRef Type=\"MIM\" ID=\"615513\" DB=\"OMIM\" /></Trait></TraitSet></ConditionList></Interpretation></Interpretations><ClinicalAssertionList><ClinicalAssertion ID=\"1732522\" DateCreated=\"2019-02-15\" DateLastUpdated=\"2022-01-26\" SubmissionDate=\"2018-10-10\"><ClinVarSubmissionID localKey=\"164534|Not Provided\" submittedAssembly=\"GRCh37\" /><ClinVarAccession Accession=\"SCV000885910\" Type=\"SCV\" Version=\"1\" SubmitterName=\"ARUP Laboratories, Molecular Genetics and Genomics,ARUP Laboratories\" OrgID=\"25969\" OrganizationCategory=\"laboratory\" /><RecordStatus>current</RecordStatus><ReviewStatus>criteria provided, single submitter</ReviewStatus><Interpretation DateLastEvaluated=\"2017-07-03\"><Description>Likely benign</Description><Comment>The c.231G&gt;A variant (rs756139699) does not alter the amino acid sequence of the PIK3CD protein and computational splice site prediction algorithms do not predict a change in the nearest splice site or creation of a cryptic splice site. This variant has not been reported in association with primary antibody deficiency in medical literature or in gene specific variation databases. This variant is listed in the genome Aggregation Database (gnomAD) with an overall population frequency of 0.006 percent (identified on 17 out of 276,374 chromosomes). Based on these observations, the c.231G&gt;A variant is likely to be benign.</Comment></Interpretation><Assertion>variation to disease</Assertion><AttributeSet><Attribute Type=\"AssertionMethod\">ARUP Molecular Germline Variant Investigation Process</Attribute><Citation><URL>https://submit.ncbi.nlm.nih.gov/ft/byid/w2yp3qyt/arup_molecular_germline_variant_investigation_process.pdf</URL></Citation></AttributeSet><ObservedInList><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn></ObservedInList><SimpleAllele><VariantType>Variation</VariantType><Location><SequenceLocation Assembly=\"GRCh37\" Chr=\"1\" alternateAllele=\"A\" referenceAllele=\"G\" start=\"9775688\" stop=\"9775688\" variantLength=\"1\" /></Location><OtherNameList><Name>p.Ala77Ala</Name></OtherNameList><XRefList><XRef DB=\"dbSNP\" ID=\"756139699\" Type=\"rsNumber\" /></XRefList><AttributeSet><Attribute Type=\"HGVS\">NM_005026.3:c.231G&gt;A</Attribute></AttributeSet></SimpleAllele><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><Name><ElementValue Type=\"Preferred\">Not Provided</ElementValue></Name></Trait></TraitSet><SubmissionNameList><SubmissionName>SUB4618058</SubmissionName></SubmissionNameList></ClinicalAssertion><ClinicalAssertion ID=\"3295756\" DateCreated=\"2021-06-05\" DateLastUpdated=\"2021-06-09\" SubmissionDate=\"2021-01-07\"><ClinVarSubmissionID localKey=\"3436669|MedGen:C3714976\" submittedAssembly=\"GRCh37\" /><ClinVarAccession Accession=\"SCV001692948\" Type=\"SCV\" Version=\"1\" SubmitterName=\"Invitae\" OrgID=\"500031\" OrganizationCategory=\"laboratory\" /><RecordStatus>current</RecordStatus><ReviewStatus>criteria provided, single submitter</ReviewStatus><Interpretation DateLastEvaluated=\"2020-08-22\"><Description>Likely benign</Description></Interpretation><Assertion>variation to disease</Assertion><AttributeSet><Attribute Type=\"AssertionMethod\">Invitae Variant Classification Sherloc (09022015)</Attribute><Citation><ID Source=\"PubMed\">28492532</ID></Citation></AttributeSet><ObservedInList><ObservedIn><Sample><Origin>germline</Origin><Species TaxonomyId=\"9606\">human</Species><AffectedStatus>unknown</AffectedStatus></Sample><Method><MethodType>clinical testing</MethodType></Method><ObservedData><Attribute Type=\"Description\">not provided</Attribute></ObservedData></ObservedIn></ObservedInList><SimpleAllele><GeneList><Gene Symbol=\"PIK3CD\" /></GeneList><VariantType>Variation</VariantType><AttributeSet><Attribute Type=\"HGVS\">NC_000001.10:g.9775688G&gt;A</Attribute></AttributeSet></SimpleAllele><TraitSet Type=\"Disease\"><Trait Type=\"Disease\"><XRef DB=\"MedGen\" ID=\"C3714976\" Type=\"CUI\" /></Trait></TraitSet><SubmissionNameList><SubmissionName>SUB8755776</SubmissionName></SubmissionNameList></ClinicalAssertion></ClinicalAssertionList><TraitMappingList><TraitMapping ClinicalAssertionID=\"3295756\" TraitType=\"Disease\" MappingType=\"XRef\" MappingValue=\"C3714976\" MappingRef=\"MedGen\"><MedGen CUI=\"C3714976\" Name=\"Immunodeficiency 14\" /></TraitMapping><TraitMapping ClinicalAssertionID=\"1732522\" TraitType=\"Disease\" MappingType=\"Name\" MappingValue=\"Not Provided\" MappingRef=\"Preferred\"><MedGen CUI=\"CN517202\" Name=\"not provided\" /></TraitMapping></TraitMappingList></InterpretedRecord></VariationArchive>\n</ClinVarVariationRelease>"
  },
  {
    "path": "UnitTests/Resources/SA/CosmicCNV.tsv",
    "content": "CNV_ID\tID_GENE\tgene_name\tID_SAMPLE\tID_TUMOUR\tPrimary site\tSite subtype 1\tSite subtype 2\tSite subtype 3\tPrimary histology\tHistology subtype 1\tHistology subtype 2\tHistology subtype 3\tSAMPLE_NAME\tTOTAL_CN\tMINOR_ALLELE\tMUT_TYPE\tID_STUDY\tGRCh\tChromosome:G_Start..G_Stop\n6119374\t68055\tLGALS9C\t683665\t611825\thaematopoietic_and_lymphoid_tissue\tNS\tNS\tNS\tlymphoid_neoplasm\tplasma_cell_myeloma\tNS\tNS\tMC-CAR\t0\t0\tloss\t619\t37\t17:18358950..18464587\n6119374\t107031\tFAM106A\t683665\t611825\thaematopoietic_and_lymphoid_tissue\tNS\tNS\tNS\tlymphoid_neoplasm\tplasma_cell_myeloma\tNS\tNS\tMC-CAR\t0\t0\tloss\t619\t37\t17:18358950..18464587\n6128754\t69785\tDAZ2\t683665\t611825\thaematopoietic_and_lymphoid_tissue\tNS\tNS\tNS\tlymphoid_neoplasm\tplasma_cell_myeloma\tNS\tNS\tMC-CAR\t7\t0\tgain\t619\t37\tY:24624108..26404340\n6119398\t94344\tPCDH11Y_ENST00000215473\t683665\t611825\thaematopoietic_and_lymphoid_tissue\tNS\tNS\tNS\tlymphoid_neoplasm\tplasma_cell_myeloma\tNS\tNS\tMC-CAR\t0\t0\tloss\t619\t37\tY:5532303..5565780\n6128754\t103307\tDAZ1\t683665\t611825\thaematopoietic_and_lymphoid_tissue\tNS\tNS\tNS\tlymphoid_neoplasm\tplasma_cell_myeloma\tNS\tNS\tMC-CAR\t7\t0\tgain\t619\t37\tY:24624108..26404340\n6128754\t66769\tPRY\t683665\t611825\thaematopoietic_and_lymphoid_tissue\tNS\tNS\tNS\tlymphoid_neoplasm\tplasma_cell_myeloma\tNS\tNS\tMC-CAR\t7\t0\tgain\t619\t37\tY:24624108..26404340\n6128754\t106281\tDAZ1_ENST00000382510\t683665\t611825\thaematopoietic_and_lymphoid_tissue\tNS\tNS\tNS\tlymphoid_neoplasm\tplasma_cell_myeloma\tNS\tNS\tMC-CAR\t7\t0\tgain\t619\t37\tY:24624108..26404340\n6128756\t75765\tBPY2B\t683665\t611825\thaematopoietic_and_lymphoid_tissue\tNS\tNS\tNS\tlymphoid_neoplasm\tplasma_cell_myeloma\tNS\tNS\tMC-CAR\t10\t0\tgain\t619\t37\tY:26409790..27684355\n6128756\t69787\tDAZ3\t683665\t611825\thaematopoietic_and_lymphoid_tissue\tNS\tNS\tNS\tlymphoid_neoplasm\tplasma_cell_myeloma\tNS\tNS\tMC-CAR\t10\t0\tgain\t619\t37\tY:26409790..27684355\n610835\t95782\tMT-CYB_ENST00000361789\t2384185\t2247017\tskin\tNS\tNS\tNS\tcarcinoma\tNS\tNS\tNS\tML_33_T_01\t\t\tgain\t656\t37\t25:2..15814"
  },
  {
    "path": "UnitTests/Resources/SA/MockSaFiles/not_sa.txt",
    "content": "﻿"
  },
  {
    "path": "UnitTests/Resources/SA/MockSaFiles/sa1.nsa",
    "content": "﻿"
  },
  {
    "path": "UnitTests/Resources/SA/MockSaFiles/sa1.nsa.idx",
    "content": "﻿"
  },
  {
    "path": "UnitTests/Resources/SA/MockSaFiles/sa2.nsa",
    "content": "﻿"
  },
  {
    "path": "UnitTests/Resources/SA/MockSaFiles/sa2.nsa.idx",
    "content": "﻿"
  },
  {
    "path": "UnitTests/Resources/SA/MockSaFiles/sa3.nsi",
    "content": "﻿"
  },
  {
    "path": "UnitTests/Resources/SA/MockSaFiles/sa4.nsi",
    "content": "﻿"
  },
  {
    "path": "UnitTests/Resources/SA/MockSaFiles/sa5.npd",
    "content": "﻿"
  },
  {
    "path": "UnitTests/Resources/SA/MockSaFiles/sa5.npd.idx",
    "content": "﻿"
  },
  {
    "path": "UnitTests/Resources/SA/MockSaFiles/sa6.nga",
    "content": "﻿"
  },
  {
    "path": "UnitTests/Resources/SA/MockSaFiles/sa7.nga",
    "content": "﻿"
  },
  {
    "path": "UnitTests/Resources/SA/MockSaFiles/sa8.rma",
    "content": "﻿"
  },
  {
    "path": "UnitTests/Resources/SA/MockSaFiles/sa8.rma.idx",
    "content": "﻿"
  },
  {
    "path": "UnitTests/Resources/TinyAnnotated.json",
    "content": "{\"header\":{\"annotator\":\"Illumina Annotation Engine 1.3.3.1633\",\"creationTime\":\"2016-12-09 09:49:24\",\"genomeAssembly\":\"GRCh37\",\"schemaVersion\":4,\"dataVersion\":\"84.22.36\",\"dataSources\":[{\"name\":\"VEP\",\"version\":\"84\",\"description\":\"Ensembl\",\"releaseDate\":\"2016-04-29\"},{\"name\":\"phyloP\",\"version\":\"hg19\",\"description\":\"46 way conservation score between humans and 45 other vertebrates\",\"releaseDate\":\"2009-11-10\"},{\"name\":\"OMIM\",\"version\":\"unknown\",\"description\":\"An Online Catalog of Human Genes and Genetic Disorders\",\"releaseDate\":\"2016-09-02\"},{\"name\":\"dbSNP\",\"version\":\"147\",\"description\":\"Identifiers for observed variants\",\"releaseDate\":\"2016-06-01\"},{\"name\":\"COSMIC\",\"version\":\"78\",\"description\":\"Somatic mutation and related details and information relating to human cancers\",\"releaseDate\":\"2016-09-05\"},{\"name\":\"1000 Genomes Project\",\"version\":\"Phase 3 v5a\",\"description\":\"A public catalogue of human variation and genotype data\",\"releaseDate\":\"2013-05-27\"},{\"name\":\"EVS\",\"version\":\"2\",\"releaseDate\":\"2013-11-13\"},{\"name\":\"ExAC\",\"version\":\"0.3.1\",\"description\":\"Allele frequency data from the ExAC project\",\"releaseDate\":\"2016-03-16\"},{\"name\":\"ClinVar\",\"version\":\"unknown\",\"description\":\"A freely accessible, public archive of reports of the relationships among human variations and phenotypes, with supporting evidence\",\"releaseDate\":\"2016-09-01\"},{\"name\":\"DGV\",\"version\":\"unknown\",\"description\":\"Provides a comprehensive summary of structural variation in the human genome\",\"releaseDate\":\"2016-05-15\"},{\"name\":\"ClinGen\",\"version\":\"unknown\",\"releaseDate\":\"2016-04-14\"}]},\"positions\":[\n{\"chromosome\":\"chr1\",\"refAllele\":\"TCC\",\"position\":9775924,\"altAlleles\":[\"TTT\"],\"cytogeneticBand\":\"1p36.22\",\"variants\":[{\"altAllele\":\"TT\",\"refAllele\":\"CC\",\"begin\":9775925,\"chromosome\":\"chr1\",\"end\":9775926,\"variantType\":\"MNV\",\"vid\":\"1:9775925:9775926:TT\",\"cosmic\":[{\"id\":\"COSM4517654\",\"isAlleleSpecific\":true,\"refAllele\":\"CC\",\"altAllele\":\"TT\",\"gene\":\"PIK3CD\",\"sampleCount\":1,\"studies\":[{\"histology\":\"carcinoma\",\"primarySite\":\"skin\"}]},{\"id\":\"COSM4517655\",\"isAlleleSpecific\":true,\"refAllele\":\"CC\",\"altAllele\":\"TT\",\"gene\":\"PIK3CD_ENST00000536656\",\"sampleCount\":1,\"studies\":[{\"histology\":\"carcinoma\",\"primarySite\":\"skin\"}]}],\"transcripts\":{\"ensembl\":[{\"transcript\":\"ENST00000536656\",\"bioType\":\"protein_coding\",\"aminoAcids\":\"S/F\",\"cDnaPos\":\"597-598\",\"codons\":\"tCC/tTT\",\"cdsPos\":\"389-390\",\"exons\":\"5/24\",\"geneId\":\"ENSG00000171608\",\"hgnc\":\"PIK3CD\",\"consequence\":[\"missense_variant\"],\"hgvsc\":\"ENST00000536656.1:c.389_390delCCinsTT\",\"hgvsp\":\"ENSP00000446444.1:p.Ser130Phe\",\"polyPhenScore\":0.781,\"polyPhenPrediction\":\"possibly damaging\",\"proteinId\":\"ENSP00000446444\",\"proteinPos\":\"130\",\"siftScore\":0,\"siftPrediction\":\"deleterious\"},{\"transcript\":\"ENST00000377346\",\"bioType\":\"protein_coding\",\"aminoAcids\":\"S/F\",\"cDnaPos\":\"584-585\",\"codons\":\"tCC/tTT\",\"cdsPos\":\"389-390\",\"exons\":\"5/24\",\"geneId\":\"ENSG00000171608\",\"hgnc\":\"PIK3CD\",\"consequence\":[\"missense_variant\"],\"hgvsc\":\"ENST00000377346.4:c.389_390delCCinsTT\",\"hgvsp\":\"ENSP00000366563.4:p.Ser130Phe\",\"isCanonical\":true,\"polyPhenScore\":0.786,\"polyPhenPrediction\":\"possibly damaging\",\"proteinId\":\"ENSP00000366563\",\"proteinPos\":\"130\",\"siftScore\":0,\"siftPrediction\":\"deleterious\"},{\"transcript\":\"ENST00000361110\",\"bioType\":\"protein_coding\",\"aminoAcids\":\"S/F\",\"cDnaPos\":\"504-505\",\"codons\":\"tCC/tTT\",\"cdsPos\":\"389-390\",\"exons\":\"4/23\",\"geneId\":\"ENSG00000171608\",\"hgnc\":\"PIK3CD\",\"consequence\":[\"missense_variant\"],\"hgvsc\":\"ENST00000361110.2:c.389_390delCCinsTT\",\"hgvsp\":\"ENSP00000354410.2:p.Ser130Phe\",\"polyPhenScore\":0.781,\"polyPhenPrediction\":\"possibly damaging\",\"proteinId\":\"ENSP00000354410\",\"proteinPos\":\"130\",\"siftScore\":0,\"siftPrediction\":\"deleterious\"},{\"transcript\":\"ENST00000481137\",\"bioType\":\"retained_intron\",\"cDnaPos\":\"90-91\",\"exons\":\"2/2\",\"geneId\":\"ENSG00000171608\",\"hgnc\":\"PIK3CD\",\"consequence\":[\"non_coding_transcript_exon_variant\",\"non_coding_transcript_variant\"],\"hgvsc\":\"ENST00000481137.1:n.90_91delCCinsTT\"},{\"transcript\":\"ENST00000479223\",\"bioType\":\"retained_intron\",\"geneId\":\"ENSG00000171608\",\"hgnc\":\"PIK3CD\",\"consequence\":[\"upstream_gene_variant\"]},{\"transcript\":\"ENST00000543390\",\"bioType\":\"protein_coding\",\"geneId\":\"ENSG00000171608\",\"hgnc\":\"PIK3CD\",\"consequence\":[\"upstream_gene_variant\"],\"proteinId\":\"ENSP00000443811\"}]}}]},\n{\"chromosome\":\"chr1\",\"refAllele\":\"G\",\"position\":9777113,\"altAlleles\":[\"GCC\"],\"cytogeneticBand\":\"1p36.22\",\"variants\":[{\"altAllele\":\"CC\",\"refAllele\":\"-\",\"begin\":9777114,\"chromosome\":\"chr1\",\"end\":9777113,\"variantType\":\"insertion\",\"vid\":\"1:9777114:9777113:CC\",\"regulatoryRegions\":[{\"id\":\"ENSR00000530352\",\"consequence\":[\"regulatory_region_variant\"]}],\"cosmic\":[{\"id\":\"COSM1474274\",\"isAlleleSpecific\":true,\"refAllele\":\"-\",\"altAllele\":\"CC\",\"gene\":\"PIK3CD\",\"sampleCount\":1,\"studies\":[{\"id\":414,\"histology\":\"carcinoma\",\"primarySite\":\"breast\"}]},{\"id\":\"COSM5832706\",\"refAllele\":\"-\",\"altAllele\":\"NN\",\"gene\":\"PIK3CD_ENST00000536656\",\"sampleCount\":1,\"studies\":[{\"id\":414,\"histology\":\"carcinoma\",\"primarySite\":\"breast\"}]}],\"transcripts\":{\"ensembl\":[{\"transcript\":\"ENST00000536656\",\"bioType\":\"protein_coding\",\"introns\":\"6/23\",\"geneId\":\"ENSG00000171608\",\"hgnc\":\"PIK3CD\",\"consequence\":[\"intron_variant\"],\"hgvsc\":\"ENST00000536656.1:c.781-4_781-3dupCC\",\"proteinId\":\"ENSP00000446444\"},{\"transcript\":\"ENST00000377346\",\"aminoAcids\":\"A/AX\",\"cDnaPos\":\"1072-1073\",\"codons\":\"gcc/gCCcc\",\"cdsPos\":\"877-878\",\"exons\":\"7/24\",\"geneId\":\"ENSG00000171608\",\"hgnc\":\"PIK3CD\",\"consequence\":[\"frameshift_variant\"],\"hgvsc\":\"ENST00000377346.4:c.882_883dupCC\",\"hgvsp\":\"ENSP00000366563.4:p.Gln295ProfsTer40\",\"isCanonical\":true,\"proteinId\":\"ENSP00000366563\",\"proteinPos\":\"293\"},{\"transcript\":\"ENST00000361110\",\"bioType\":\"protein_coding\",\"introns\":\"5/22\",\"geneId\":\"ENSG00000171608\",\"hgnc\":\"PIK3CD\",\"consequence\":[\"intron_variant\"],\"hgvsc\":\"ENST00000361110.2:c.781-4_781-3dupCC\",\"proteinId\":\"ENSP00000354410\"},{\"transcript\":\"ENST00000481137\",\"bioType\":\"retained_intron\",\"geneId\":\"ENSG00000171608\",\"hgnc\":\"PIK3CD\",\"consequence\":[\"downstream_gene_variant\"]},{\"transcript\":\"ENST00000479223\",\"bioType\":\"retained_intron\",\"cDnaPos\":\"441-442\",\"exons\":\"2/3\",\"geneId\":\"ENSG00000171608\",\"hgnc\":\"PIK3CD\",\"consequence\":[\"non_coding_transcript_exon_variant\",\"non_coding_transcript_variant\"],\"hgvsc\":\"ENST00000479223.1:n.446_447dupCC\"},{\"transcript\":\"ENST00000543390\",\"bioType\":\"protein_coding\",\"geneId\":\"ENSG00000171608\",\"hgnc\":\"PIK3CD\",\"consequence\":[\"upstream_gene_variant\"],\"proteinId\":\"ENSP00000443811\"}]}}]}\n]}\n"
  },
  {
    "path": "UnitTests/Resources/cosm5428243.tsv",
    "content": "Gene name\tAccession Number\tGene CDS length\tHGNC ID\tSample name\tID_sample\tID_tumour\tPrimary site\tSite subtype 1\tSite subtype 2\tSite subtype 3\tPrimary histology\tHistology subtype 1\tHistology subtype 2\tHistology subtype 3\tGenome-wide screen\tMutation ID\tMutation CDS\tMutation AA\tMutation Description\tMutation zygosity\tLOH\tGRCh\tMutation genome position\tMutation strand\tSNP\tFATHMM prediction\tFATHMM score\tMutation somatic status\tPubmed_PMID\tID_STUDY\tSample source\tTumour origin\tAge\nFAM138A\tENST00000417324\t258\t\tCN-AML-CR-42-Dx\t2340530\t2205513\thaematopoietic_and_lymphoid_tissue\tNS\tNS\tNS\thaematopoietic_neoplasm\tacute_myeloid_leukaemia\tNS\tNS\ty\tCOSM5428243\tc.82T>C\tp.S28P\tSubstitution - Missense\t\tu\t37\t1:35416-35416\t-\tn\t\t\tConfirmed somatic variant\t\t544\tblood-bone marrow\tprimary\t69\nFAM138A\tENST00000417324\t258\t\tCN-AML-CR-42-Dx\t2340530\t2205513\thaematopoietic;lymphoid_tissue\tNS\tNS\tNS\thaematopoietic_neoplasm\tacute_myeloid_leukaemia\tNS\tNS\ty\tCOSM5428243\tc.82T>C\tp.S28P\tSubstitution - Missense\t\tu\t37\t1:35416-35416\t-\tn\t\t\tConfirmed somatic variant\t\t544\tt-bone marrow\tprimary\t81\n"
  },
  {
    "path": "UnitTests/Resources/cosm5428243.vcf",
    "content": "1\t35416\tCOSM5428243\tA\tG\t.\t.\tGENE=FAM138A;STRAND=-;CDS=c.82T>C;AA=p.S28P;CNT=1\n"
  },
  {
    "path": "UnitTests/Resources/dbSNP.version",
    "content": "NAME=dbSNP\nVERSION=147\nDATE=2016-04-08\nDESCRIPTION=\n"
  },
  {
    "path": "UnitTests/Resources/manifest.txt",
    "content": "﻿ClinGen_Dosage_Sensitivity_Map_20190507.nga\r\nnot_exist.nsa\r\ngnomAD_gene_scores_2.1.nga\r\nanother_fake_file.nsi\r\nOMIM_20190812.nga"
  },
  {
    "path": "UnitTests/Resources/mini.WigFix",
    "content": "fixedStep chrom=chr1 start=100 step=1\n0.064\n0.058\n0.064\n0.058\n0.064\n0.064\n0.064\n0.064\n0.064\n0.058\n0.064\n0.064\n0.064\n0.064\n0.064\n0.064\n0.064\n0.064\n0.064\n0.064\n0.064\n0.064\n0.058\n0.064\n0.000\n0.000\n0.000\n0.000\n0.000\n0.000\n0.058\nfixedStep chrom=chr1 start=175 step=1\n0.064\n0.058\n0.064\n0.058\n0.064\n0.058\n0.058\n-2.088\n0.064\n0.058\n0.058\n0.064\n0.064\n0.064\n0.064\n0.064\n0.058\n0.064\n0.064\n-2.363\n0.064\n0.064\n0.064\n0.064\n0.000\n0.064\n0.064\n0.058\n0.064\n0.064\n-2.096\n0.064\n-2.039\n0.064\n0.064\n0.064\n0.064\n0.064\n-2.363\n0.064\n-2.381\n0.064\n0.064\n0.064\n-2.305\n0.064\n0.058\n0.064\nfixedStep chrom=chr1 start=250 step=1\n0.058\n0.064\n0.000\n0.064\n0.058\n-2.305\n0.064\n0.064\n0.064\n0.058\n0.058\n-2.096\n0.064\n0.064\n0.064\n0.064\n0.064\n0.064\n0.064\n0.064\n0.064\n0.058\n0.064\n0.058\n0.064\n0.058\n0.058\n-2.088\n0.064\n0.058\n0.058\n0.064\n0.064\n0.064\n0.064\n0.064\n0.058\n0.064\n0.064\n-2.363\n0.064\n0.064\n0.064\n0.064\n0.000\n0.064\n0.064\n0.058\n0.064\n0.064\n-2.096\n0.064\n-2.039\n0.064\n0.064\n0.064\n0.064\n0.064\n-2.363\n0.064\n-2.381\n0.064\n0.064\n0.064\n-2.305\n0.064\n0.058\n0.064\n0.064\n0.058\n0.064\n0.058\n0.064\n0.058\n0.058\n-2.088\n0.064\n0.058\n0.058\n0.064\n0.064\n0.064\n0.064\n0.064\n0.058\n0.064\n0.064\n-2.363\n0.064\n0.064\n0.064\n0.064\n0.000\n0.064\n0.064\n0.058\n0.064\n0.064\n-2.096\n0.064\n-2.039\n0.064\n0.064\n0.064\n0.064\n0.064\n-2.363\n0.064\n-2.381\n0.064\n0.064\n0.064\n-2.305\n0.064\n0.058\n0.064\n0.064\n0.058\n0.064\n0.058\n0.064\n0.058\n0.058\n-2.088\n0.064\n0.058\n0.058\n0.064\n0.064\n0.064\n0.064\n0.064\n0.058\n0.064\n0.064\n-2.363\n0.064\n0.064\n0.064\n0.064\n0.000\n0.064\n0.064\n0.058\n0.064\n0.064\n-2.096\n0.064\n-2.039\n0.064\n0.064\n0.064\n0.064\n0.064\n-2.363\n0.064\n-2.381\n0.064\n0.064\n0.064\n-2.305\n0.064\n0.058\n0.064\nfixedStep chrom=chr2 start=100 step=1\n0.064\n0.058\n0.064\n0.058\n0.064\n0.064\n0.064\n0.064\n0.064\n0.058\n0.064\n0.064\n0.064\n0.064\n0.064\n0.064\n0.064\n0.064\n0.064\n0.064\n0.064\n0.064\n0.058\n0.064\n0.000\n0.000\n0.000\n0.000\n0.000\n0.000\n0.058\nfixedStep chrom=chr2 start=175 step=1\n0.064\n0.058\n0.064\n0.058\n0.064\n0.058\n0.058\n-2.088\n0.064\n0.058\n0.058\n0.064\n0.064\n0.064\n0.064\n0.064\n0.058\n0.064\n0.064\n-2.363\n0.064\n0.064\n0.064\n0.064\n0.000\n0.064\n0.064\n0.058\n0.064\n0.064\n-2.096\n0.064\n-2.039\n0.064\n0.064\n0.064\n0.064\n0.064\n-2.363\n0.064\n-2.381\n0.064\n0.064\n0.064\n-2.305\n0.064\n0.058\n0.064"
  },
  {
    "path": "UnitTests/Resources/testDgvParser.txt",
    "content": "variantaccession\tchr\tstart\tend\tvarianttype\tvariantsubtype\treference\tpubmedid\tmethod\tplatform\tmergedvariants\tsupportingvariants\tmergedorsample\tfrequency\tsamplesize\tobservedgains\tobservedlosses\tcohortdescription\tgenes\tsamples\nnsv945265\t1\t352306\t371739\tOTHER\tcomplex\tSudmant_et_al_2013\t23825009\tOligo aCGH,Sequencing\t\t\tnssv1679650,nssv1756446,nssv1677687,nssv1756463,nssv1677742,nssv1679594,nssv1677709,nssv1756442,nssv1677642,nssv1756473,nssv1686360,nssv1756477,nssv1683216,nssv1756482,nssv1679661,nssv1756449,nssv1684709,nssv1756454,nssv1677754,nssv1756483,nssv1679639,nssv1682067,nssv1679628,nssv1756453,nssv1679627,nssv1756467,nssv1679672,nssv1756476,nssv1756443,nssv1680921,nssv1677665,nssv1756451,nssv1677731,nssv1756478,nssv1682237,nssv1679775,nssv1677641,nssv1756468,nssv1677720,nssv1756447,nssv1677753,nssv1756484,nssv1684698,nssv1756440,nssv1679705,nssv1756455,nssv1679716,nssv1756481,nssv1677676,nssv1756437,nssv1756439,nssv1756464,nssv1677698,nssv1679492,nssv1679694,nssv1756457,nssv1679616,nssv1679073,nssv1756444,nssv1756458,nssv1677643,nssv1677927,nssv1677765,nssv1682178,nssv1679738,nssv1756445,nssv1679683,nssv1679761,nssv1677654,nssv1756469,nssv1756462,nssv1679727,nssv1677816,nssv1756460,nssv1756480,nssv1756470,nssv1756461,nssv1756471,nssv1681956,nssv1756474,nssv1756452,nssv1756448,nssv1679739,nssv1756450,nssv1756475,nssv1756459,nssv1756472,nssv1756479,nssv1756456,nssv1678962,nssv1756465,nssv1680810,nssv1756441,nssv1679605,nssv1679750,nssv1756438,nssv1756466\tM\t\t97\t10\t0\t\tOR4F16,OR4F29,OR4F3\tHGDP00456,HGDP00521,HGDP00542,HGDP00665,HGDP00778,HGDP00927,HGDP00998,HGDP01029,HGDP01284,HGDP01307\nnsv161172\t1\t88190\t89153\tCNV\tdeletion\tMills_et_al_2006\t16902084\tSequencing\t\t\tnssv179750\tM\t\t24\t\t\t\t\t\nnsv951399\t1\t46501\t71800\tCNV\tduplication\tDogan_et_al_2014\t24416366\tSequencing\t\t\tnssv2997203\tM\t\t1\t1\t0\t\tOR4F5\tBILGI_BIOE\nnsv471522\t1\t522139\t756783\tCNV\tgain\tAlkan_et_al_2009\t19718026\tOligo aCGH,Sequencing\t\t\tnssv547898,nssv547899,nssv547897\tM\t\t3\t3\t0\t\tFAM87B,LOC100133331,LOC100288069,MIR6723,OR4F16,OR4F29,OR4F3\tJDW,NA18507,YH\nnsv10161\t1\t712111\t1708649\tCNV\tgain+loss\tPerry_et_al_2008\t18304495\tOligo aCGH\t\t\tnssv24602,nssv24600,nssv18108,nssv28946,nssv28533,nssv26882,nssv21436,nssv21448,nssv21431,nssv28031,nssv28029,nssv26877,nssv24610,nssv28921,nssv28038,nssv26876,nssv24621,nssv28552,nssv28048,nssv28542,nssv28940,nssv26892,nssv21429,nssv26879,nssv28558,nssv26888\tM\t\t31\t11\t7\t\tACAP3,AGRN,ANKRD65,ATAD3A,ATAD3B,ATAD3C,AURKAIP1,B3GALT6,C1orf159,C1orf170,C1orf233,CCNL2,CDK11A,CDK11B,CPSF3L,DVL1,FAM132A,FAM41C,FAM87B,GLTPD1,HES4,ISG15,KLHL17,LINC00115,LINC01128,LOC100130417,LOC100288069,LOC148413,LOC254099,MIB2,MIR200A,MIR200B,MIR429,MIR6726,MIR6727,MIR6808,MMP23A,MMP23B,MRPL20,MXRA8,NADK,NOC2L,PLEKHN1,PUSL1,RNF223,SAMD11,SCNN1D,SDF4,SLC35E2,SLC35E2B,SSU72,TAS1R3,TMEM240,TMEM88B,TNFRSF18,TNFRSF4,TTLL10,UBE2J2,VWA1\tNA07048,NA10839,NA10863,NA12740,NA12872,NA18504,NA18537,NA18552,NA18564,NA18572,NA18972,NA19144,NA19173,NA19221\nesv3358119\t1\t822853\t822861\tCNV\tinsertion\t1000_Genomes_Consortium_Pilot_Project\t20981092\tDigital array,Oligo aCGH,PCR,Sequencing\t\t\tessv7863668,essv7863667\tM\t\t185\t2\t0\t\t\tNA12005,NA18953\nesv6890\t1\t17006189\t17052558\tOTHER\tinversion\tAhn_et_al_2009\t19470904\tSequencing\t\t\tessv29331\tM\t\t1\t0\t0\t\tESPNP,MIR3675\t\nesv6517\t1\t964760\t965579\tCNV\tloss\tAhn_et_al_2009\t19470904\tSequencing\t\t\tessv28958\tM\t\t1\t0\t0\t\tAGRN\t\nesv3310333\t1\t17441132\t17441133\tCNV\tmobile element insertion\t1000_Genomes_Consortium_Pilot_Project\t20981092\tDigital array,Oligo aCGH,PCR,Sequencing\t\t\tessv7837611,essv7836931,essv7838900\tM\t\t185\t3\t0\t\tPADI2\tNA19238,NA19239,NA19240\nnsv479682\t1\t3787207\t3787207\tCNV\tnovel sequence insertion\tKidd_et_al_2010\t20440878\tOligo aCGH,Sequencing\t\t\tnssv3012592\tM\t\t9\t0\t0\t\tDFFB\t\nnsv506926\t1\t34597680\t34603680\tOTHER\tsequence alteration\tTeague_et_al_2010\t20534489\tBAC aCGH,Oligo aCGH,Optical mapping,Sequencing\t\t\tnssv619231,nssv617529,nssv623267,nssv620650\tM\t\t4\t0\t0\t\tCSMD2\tCHM,NA10860,NA15510,NA18994\nesv3302766\t1\t38583768\t38583926\tCNV\ttandem duplication\t1000_Genomes_Consortium_Pilot_Project\t20981092\tDigital array,Oligo aCGH,PCR,Sequencing\t\t\tessv7736661,essv7732953,essv7735590\tM\t\t185\t0\t0\t\t\tNA18563,NA18577,NA18582\n"
  },
  {
    "path": "UnitTests/SAUtils/AnnotationItems/CosmicCnvItemTests.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.IO;\nusing SAUtils.ExtractCosmicSvs;\nusing UnitTests.TestUtilities;\nusing Variants;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.AnnotationItems\n{\n    public sealed class CosmicCnvItemTests\n    {\n        [Fact]\n        public void Merge_add_new_items()\n        {\n            var item1 = new CosmicCnvItem(1, ChromosomeUtilities.Chr1, 100, 1000, VariantType.copy_number_gain, 3,\n                new Dictionary<string, int>\n                {\n                    {\"histology1\", 1},\n                    {\"histology2\", 2}\n                }, new Dictionary<string, int>\n                {\n                    { \"tissue1\", 2},\n                    { \"tissue2\", 1}\n                }, 1);\n\n            var item2 = new CosmicCnvItem(1, ChromosomeUtilities.Chr1, 100, 1000, VariantType.copy_number_gain, 3,\n                new Dictionary<string, int>\n                {\n                    {\"histology3\", 1},\n                    {\"histology4\", 2}\n                }, new Dictionary<string, int>\n                {\n                    { \"tissue3\", 2},\n                    { \"tissue4\", 1}\n                },2);\n\n            item1.Merge(item2);\n\n            Assert.Equal(4, item1.CancerTypeCount);\n            Assert.Equal(4, item1.TissueTypeCount);\n        }\n\n        [Fact]\n        public void GetJsonString()\n        {\n            var item1 = new CosmicCnvItem(1, ChromosomeUtilities.Chr1, 100, 1000, VariantType.copy_number_gain, 3,\n                new Dictionary<string, int>\n                {\n                    {\"histology1\", 1},\n                    {\"histology2\", 2}\n                }, new Dictionary<string, int>\n                {\n                    { \"tissue1\", 2},\n                    { \"tissue2\", 1}\n                },2);\n\n            \n            Assert.Equal(\"\\\"id\\\":1,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"copyNumber\\\":3,\\\"cancerTypes\\\":[{\\\"histology1\\\":1},{\\\"histology2\\\":2}],\\\"tissueTypes\\\":[{\\\"tissue1\\\":2},{\\\"tissue2\\\":1}]\", item1.GetJsonString());\n        }\n\n        [Fact]\n        public void GetJsonString_unspecified_copy_number()\n        {\n            var item1 = new CosmicCnvItem(1, ChromosomeUtilities.Chr1, 100, 1000, VariantType.copy_number_gain, -1,\n                new Dictionary<string, int>\n                {\n                    {\"histology1\", 1},\n                    {\"histology2\", 2}\n                }, new Dictionary<string, int>\n                {\n                    { \"tissue1\", 2},\n                    { \"tissue2\", 1}\n                }, 2);\n\n\n            Assert.Equal(\"\\\"id\\\":1,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"cancerTypes\\\":[{\\\"histology1\\\":1},{\\\"histology2\\\":2}],\\\"tissueTypes\\\":[{\\\"tissue1\\\":2},{\\\"tissue2\\\":1}]\", item1.GetJsonString());\n        }\n\n\n        [Fact]\n        public void Merge_same_histology_site()\n        {\n            var item1 = new CosmicCnvItem(1, ChromosomeUtilities.Chr1, 100, 1000, VariantType.copy_number_gain, 3,\n                new Dictionary<string, int>\n                {\n                    {\"histology1\", 1},\n                    {\"histology2\", 2}\n                }, new Dictionary<string, int>\n                {\n                    { \"tissue1\", 2},\n                    { \"tissue2\", 1}\n                },1);\n\n            var item2 = new CosmicCnvItem(1, ChromosomeUtilities.Chr1, 100, 1000, VariantType.copy_number_gain, 3,\n                new Dictionary<string, int>\n                {\n                    {\"histology1\", 1},\n                    {\"histology2\", 2}\n                }, new Dictionary<string, int>\n                {\n                    { \"tissue1\", 2},\n                    { \"tissue2\", 1}\n                },2);\n\n            item1.Merge(item2);\n\n            Assert.Equal(2, item1.CancerTypeCount);\n            Assert.Equal(2, item1.TissueTypeCount);\n        }\n\n        [Fact]\n        public void Merge_avoid_double_counting()\n        {\n            var item1 = new CosmicCnvItem(1, ChromosomeUtilities.Chr1, 100, 1000, VariantType.copy_number_gain, 3,\n                new Dictionary<string, int>\n                {\n                    {\"histology1\", 1},\n                    {\"histology2\", 2}\n                }, new Dictionary<string, int>\n                {\n                    { \"tissue1\", 2},\n                    { \"tissue2\", 1}\n                }, 1);\n\n            var item2 = new CosmicCnvItem(1, ChromosomeUtilities.Chr1, 100, 1000, VariantType.copy_number_gain, 3,\n                new Dictionary<string, int>\n                {\n                    {\"histology1\", 1},\n                    {\"histology2\", 2}\n                }, new Dictionary<string, int>\n                {\n                    { \"tissue1\", 2},\n                    { \"tissue2\", 1}\n                }, 1);\n\n            item1.Merge(item2);\n\n            Assert.Equal(\"\\\"id\\\":1,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"copyNumber\\\":3,\\\"cancerTypes\\\":[{\\\"histology1\\\":1},{\\\"histology2\\\":2}],\\\"tissueTypes\\\":[{\\\"tissue1\\\":2},{\\\"tissue2\\\":1}]\", item1.GetJsonString());\n        }\n\n        [Fact]\n        public void Merge_check_adjust_counts()\n        {\n            var item1 = new CosmicCnvItem(1, ChromosomeUtilities.Chr1, 100, 1000, VariantType.copy_number_gain, 3,\n                new Dictionary<string, int>\n                {\n                    {\"histology1\", 1},\n                    {\"histology2\", 2}\n                }, new Dictionary<string, int>\n                {\n                    { \"tissue1\", 2},\n                    { \"tissue2\", 1}\n                },1);\n\n            var item2 = new CosmicCnvItem(1, ChromosomeUtilities.Chr1, 100, 1000, VariantType.copy_number_gain, 3,\n                new Dictionary<string, int>\n                {\n                    {\"histology1\", 1},\n                    {\"histology2\", 2}\n                }, new Dictionary<string, int>\n                {\n                    { \"tissue1\", 2},\n                    { \"tissue2\", 1}\n                },2);\n\n            item1.Merge(item2);\n\n            Assert.Equal(\"\\\"id\\\":1,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"copyNumber\\\":3,\\\"cancerTypes\\\":[{\\\"histology1\\\":2},{\\\"histology2\\\":4}],\\\"tissueTypes\\\":[{\\\"tissue1\\\":4},{\\\"tissue2\\\":2}]\", item1.GetJsonString());\n        }\n\n        [Fact]\n        public void Merge_throws_exception_if_cnvs_differ()\n        {\n            var item1 = new CosmicCnvItem(1, ChromosomeUtilities.Chr1, 100, 1000, VariantType.copy_number_loss, 0,\n                new Dictionary<string, int>\n                {\n                    {\"histology1\", 1},\n                    {\"histology2\", 2}\n                }, new Dictionary<string, int>\n                {\n                    { \"tissue1\", 2},\n                    { \"tissue2\", 1}\n                },1);\n\n            var item2 = new CosmicCnvItem(1, ChromosomeUtilities.Chr1, 100, 1000, VariantType.copy_number_gain, 3,\n                new Dictionary<string, int>\n                {\n                    {\"histology1\", 1},\n                    {\"histology2\", 2}\n                }, new Dictionary<string, int>\n                {\n                    { \"tissue1\", 2},\n                    { \"tissue2\", 1}\n                },1);\n\n\n            Assert.Throws<InvalidDataException>(()=>item1.Merge(item2));\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/ClinGen/GeneDiseaseValidityTests.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.IO;\nusing SAUtils.ClinGen;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.ClinGen\n{\n    public sealed class GeneDiseaseValidityTests\n    {\n        private static Stream GetGeneValidityStream()\n        {\n            var stream = new MemoryStream();\n            var writer = new StreamWriter(stream);\n\n            writer.WriteLine(\"CLINGEN GENE VALIDITY CURATIONS\\t\\t\\t\\t\");\n            writer.WriteLine(\"FILE CREATED: 2019-12-02\\t\\t\\t\\t\");\n            writer.WriteLine(\"WEBPAGE: https://search.clinicalgenome.org/kb/gene-validity \\t\\t\\t\\t\");\n            writer.WriteLine(\"+++++++++++\\t++++++++++++++\\t+++++++++++++\\t++++++++++++++++++\\t+++++++++\\t++++++++++++++\\t+++++++++++++\\t+++++++++++++++++++\");\n            writer.WriteLine(\"GENE SYMBOL\\tGENE ID (HGNC)\\tDISEASE LABEL\\tDISEASE ID (MONDO)\\tSOP\\tCLASSIFICATION\\tONLINE REPORT\\tCLASSIFICATION DATE\");\n            writer.WriteLine(\"+++++++++++\\t++++++++++++++\\t+++++++++++++\\t++++++++++++++++++\\t+++++++++\\t++++++++++++++\\t+++++++++++++\\t+++++++++++++++++++\");\n            writer.WriteLine(\"A2ML1\\tHGNC:23336\\tNoonan syndrome with multiple lentigines\\tMONDO_0007893\\tSOP5\\tNo Reported Evidence\\thttps://search.clinicalgenome.org/kb/gene-validity/59b87033-dd91-4f1e-aec1-c9b1f5124b16--2018-06-07T14:37:47\\t2018-06-07T14:37:47.175Z\");\n            writer.WriteLine(\"A2ML1\\tHGNC:23336\\tcardiofaciocutaneous syndrome\\tMONDO_0015280\\tSOP5\\tNo Reported Evidence\\thttps://search.clinicalgenome.org/kb/gene-validity/fc3c41d8-8497-489b-a350-c9e30016bc6a--2018-06-07T14:31:03\\t2018-06-07T14:31:03.696Z\");\n            writer.WriteLine(\"A2ML1\\tHGNC:23336\\tCostello syndrome\\tMONDO_0009026\\tSOP5\\tNo Reported Evidence\\thttps://search.clinicalgenome.org/kb/gene-validity/ea72ba8d-cf62-44bc-86be-da64e3848eba--2018-06-07T14:34:05\\t2018-06-07T14:34:05.324Z\");\n            writer.WriteLine(\"AARS\\tHGNC:20\\tundetermined early-onset epileptic encephalopathy\\tMONDO_0018614\\tSOP6\\tLimited\\thttps://search.clinicalgenome.org/kb/gene-validity/ac62fe65-ee56-4146-9fe4-00dc1db2d958--2018-11-20T17:00:00\\t2018-11-20T17:00:00.000Z\");\n            writer.WriteLine(\"AASS\\tHGNC:17366\\thyperlysinemia (disease)\\tMONDO_0009388\\tSOP6\\tModerate\\thttps://search.clinicalgenome.org/kb/gene-validity/92e04f9e-f03e-4295-baac-e9fb6b48a258--2019-11-08T17:00:00\\t2019-11-08T17:00:00.000Z\");\n            writer.WriteLine(\"ABCC9\\tHGNC:60\\thypertrichotic osteochondrodysplasia Cantu type\\tMONDO_0009406\\tSOP4\\tDefinitive\\thttps://search.clinicalgenome.org/kb/gene-validity/10028\\t2017-09-27T00:00:00\");\n            //duplicate item\n            writer.WriteLine(\"ABCC9\\tHGNC:60\\thypertrichotic osteochondrodysplasia Cantu type\\tMONDO_0009406\\tSOP4\\tDefinitive\\thttps://search.clinicalgenome.org/kb/gene-validity/10028\\t2017-10-27T00:00:00\");\n\n            writer.Flush();\n\n            stream.Position = 0;\n            return stream;\n        }\n        \n        private static Dictionary<int, string> GetIdToSymbols()\n        {\n            return new Dictionary<int, string>\n            {\n                { 23336,\"A2ML1\" },\n                { 20, \"AARS\"},\n                { 60, \"ABCC9\" }\n            };\n        }\n\n        [Fact]\n        public void ParserTest()\n        {\n            var parser = new GeneDiseaseValidityParser(GetGeneValidityStream(), GetIdToSymbols());\n\n            var items = parser.GetItems();\n            Assert.Equal(3, items.Count);\n\n            var firstGene = items[\"A2ML1\"];\n            Assert.Equal(3, firstGene.Count);\n\n            Assert.Equal(\"{\\\"diseaseId\\\":\\\"MONDO_0007893\\\",\\\"disease\\\":\\\"Noonan syndrome with multiple lentigines\\\",\\\"classification\\\":\\\"no reported evidence\\\",\\\"classificationDate\\\":\\\"2018-06-07\\\"}\", firstGene[0].GetJsonString());\n\n            var thirdGene = items[\"ABCC9\"];\n            Assert.Single(thirdGene);\n            Assert.Equal(\"{\\\"diseaseId\\\":\\\"MONDO_0009406\\\",\\\"disease\\\":\\\"hypertrichotic osteochondrodysplasia Cantu type\\\",\\\"classification\\\":\\\"definitive\\\",\\\"classificationDate\\\":\\\"2017-10-27\\\"}\", thirdGene[0].GetJsonString());\n        }\n        \n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/CosmicGeneFusions/Cache/TranscriptCacheTests.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.IO;\nusing Intervals;\nusing SAUtils.CosmicGeneFusions.Cache;\nusing UnitTests.MockedData;\nusing UnitTests.TestUtilities;\nusing VariantAnnotation.Interface.AnnotatedPositions;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.CosmicGeneFusions.Cache\n{\n    public sealed class TranscriptCacheTests\n    {\n        [Theory]\n        [InlineData(\"ENST00000646891.1\", \"ENSG00000157764\", \"BRAF\")]\n        [InlineData(\"ENST00000242365.4\", \"ENSG00000122778\", \"KIAA1549\")]\n        [InlineData(\"ENST00000311979.3\", \"ENSG00000172660\", \"TAF15\")]\n        [InlineData(\"ENST00000529193.1\", \"ENSG00000157613\", \"CREB3L1\")]\n        [InlineData(\"ENST00000312675.4\", \"ENSG00000145012\", \"LPP\")]\n        [InlineData(\"ENST00000556625.1\", \"ENSG00000258389\", \"DUX4\")]\n        public void HandleMissingTranscripts_ExpectedResults(string transcriptId, string expectedGeneId, string expectedGeneSymbol)\n        {\n            (string actualGeneId, string actualGeneSymbol) = TranscriptCache.HandleMissingTranscripts(transcriptId);\n            Assert.Equal(expectedGeneId,     actualGeneId);\n            Assert.Equal(expectedGeneSymbol, actualGeneSymbol);\n        }\n\n        [Fact]\n        public void HandleMissingTranscripts_UnknownTranscriptId_ThrowException()\n        {\n            Assert.Throws<InvalidDataException>(delegate { TranscriptCache.HandleMissingTranscripts(\"ABC\"); });\n        }\n\n        [Fact]\n        public void GetTranscriptIdToTranscript()\n        {\n            var chr1 = new IntervalArray<ITranscript>(new Interval<ITranscript>[]\n            {\n                new(Transcripts.ENST00000290663.Start, Transcripts.ENST00000290663.End, Transcripts.ENST00000290663),\n                new(Transcripts.ENST00000370673.Start, Transcripts.ENST00000370673.End, Transcripts.ENST00000370673),\n                new(Transcripts.ENST00000427819.Start, Transcripts.ENST00000427819.End, Transcripts.ENST00000427819)\n            });\n\n            var chr2 = new IntervalArray<ITranscript>(new Interval<ITranscript>[]\n            {\n                new(Transcripts.ENST00000615053.Start, Transcripts.ENST00000615053.End, Transcripts.ENST00000615053),\n                new(Transcripts.ENST00000347849.Start, Transcripts.ENST00000347849.End, Transcripts.ENST00000347849)\n            });\n\n            var transcriptIntervalArrays = new IntervalArray<ITranscript>[ChromosomeUtilities.RefIndexToChromosome.Count];\n            transcriptIntervalArrays[ChromosomeUtilities.Chr1.Index] = chr1;\n            transcriptIntervalArrays[ChromosomeUtilities.Chr2.Index] = chr2;\n\n            Dictionary<string, ITranscript> idToTranscript = TranscriptCache.GetTranscriptIdToTranscript(transcriptIntervalArrays);\n\n            Assert.Equal(10, idToTranscript.Count);\n            Assert.True(idToTranscript.ContainsKey(\"ENST00000290663\"));\n            Assert.True(idToTranscript.ContainsKey(\"ENST00000290663.10\"));\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/CosmicGeneFusions/Conversion/CosmicConverterTests.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.IO;\nusing SAUtils.CosmicGeneFusions.Cache;\nusing SAUtils.CosmicGeneFusions.Conversion;\nusing VariantAnnotation.GeneFusions.Utilities;\nusing VariantAnnotation.Interface.AnnotatedPositions;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.CosmicGeneFusions.Conversion\n{\n    public sealed class CosmicConverterTests\n    {\n        [Fact]\n        public void Convert_ExpectedResults()\n        {\n            (TranscriptCache transcriptCache, ITranscript transcript, ITranscript transcript2) = HgvsRnaParserTests.GetTranscriptCache();\n            Dictionary<int, HashSet<RawCosmicGeneFusion>> fusionIdToEntries = GetFusionIdToEntries(transcript, transcript2);\n\n            ulong expectedFusionKey = GeneFusionKey.Create(\n                GeneFusionKey.CreateGeneKey(transcript.Gene.EnsemblId.WithoutVersion),\n                GeneFusionKey.CreateGeneKey(transcript2.Gene.EnsemblId.WithoutVersion));\n\n            string[] expectedJsonEntries =\n            {\n                \"\\\"id\\\":\\\"COSF665\\\",\\\"numSamples\\\":1,\\\"geneSymbols\\\":[\\\"MED8\\\",\\\"PTPN18\\\"],\\\"hgvsr\\\":\\\"ENST00000290663.10(MED8):r.1_3555::ENST00000347849.7(PTPN18):r.2100_3452\\\",\\\"histologies\\\":[{\\\"name\\\":\\\"ductal carcinoma\\\",\\\"numSamples\\\":1}],\\\"sites\\\":[{\\\"name\\\":\\\"breast\\\",\\\"numSamples\\\":1}],\\\"pubMedIds\\\":[20033038]\",\n                \"\\\"id\\\":\\\"COSF667\\\",\\\"numSamples\\\":1,\\\"geneSymbols\\\":[\\\"MED8\\\",\\\"PTPN18\\\"],\\\"hgvsr\\\":\\\"ENST00000290663.10(MED8):r.1_1234::ENST00000347849.7(PTPN18):r.5678_6789\\\",\\\"histologies\\\":[{\\\"name\\\":\\\"ductal carcinoma\\\",\\\"numSamples\\\":1}],\\\"sites\\\":[{\\\"name\\\":\\\"breast\\\",\\\"numSamples\\\":1}],\\\"pubMedIds\\\":[20033038]\"\n            };\n\n            Dictionary<ulong, string[]> actualFusionKeyToJson = CosmicConverter.Convert(fusionIdToEntries, transcriptCache);\n            Assert.Single(actualFusionKeyToJson);\n\n            string[] actualJsonEntries = actualFusionKeyToJson[expectedFusionKey];\n            Assert.NotNull(actualJsonEntries);\n            Assert.Equal(expectedJsonEntries, actualJsonEntries);\n        }\n\n        private static Dictionary<int, HashSet<RawCosmicGeneFusion>> GetFusionIdToEntries(ITranscript transcript, ITranscript transcript2)\n        {\n            string transcriptId5 = transcript.Id.WithVersion;\n            string geneSymbol5   = transcript.Gene.Symbol;\n            string transcriptId3 = transcript2.Id.WithVersion;\n            string geneSymbol3   = transcript2.Gene.Symbol;\n\n            var rawGeneFusion = new RawCosmicGeneFusion(749711, 665, \"breast\", \"NS\", \"carcinoma\", \"ductal carcinoma\",\n                $\"{transcriptId5}({geneSymbol5}):r.1_3555_{transcriptId3}({geneSymbol3}):r.2100_3452\", 20033038);\n\n            var rawGeneFusion2 = new RawCosmicGeneFusion(749712, 667, \"breast\", \"NS\", \"carcinoma\", \"ductal carcinoma\",\n                $\"{transcriptId5}({geneSymbol5}):r.1_1234_{transcriptId3}({geneSymbol3}):r.5678_6789\", 20033038);\n\n\n            return new Dictionary<int, HashSet<RawCosmicGeneFusion>>\n            {\n                [rawGeneFusion.FusionId]  = new() {rawGeneFusion},\n                [rawGeneFusion2.FusionId] = new() {rawGeneFusion2}\n            };\n        }\n\n        [Fact]\n        public void ToJsonArray_ExpectedResults()\n        {\n            var geneKeyToJsonList = new Dictionary<ulong, List<string>>\n            {\n                [123] = new() {\"A\", \"B\", \"C\"},\n                [456] = new() {\"A\"},\n                [789] = new()\n            };\n\n            Dictionary<ulong, string[]> actualResults = geneKeyToJsonList.ToJsonArray();\n            Assert.Equal(3, actualResults.Count);\n            Assert.Equal(3, actualResults[123].Length);\n            Assert.Single(actualResults[456]);\n            Assert.Empty(actualResults[789]);\n        }\n\n        [Fact]\n        public void GetCosmicGeneFusion_NullHgvs_ReturnNull()\n        {\n            const string hgvsNotation = \"ENST00000283243.12(PLA2R1):r.1_2802\";\n\n            var fusionEntries = new HashSet<RawCosmicGeneFusion>\n            {\n                new(10, 0, null, null, null, null, hgvsNotation, 123)\n            };\n\n            const ulong expectedFusionKey = 0;\n\n            (ulong actualFusionKey, string actualJson) = CosmicConverter.GetCosmicGeneFusion(0, fusionEntries, null);\n            Assert.Equal(expectedFusionKey, actualFusionKey);\n            Assert.Null(actualJson);\n        }\n\n        [Fact]\n        public void AggregateRawCosmicGeneFusions_ExpectedResults()\n        {\n            const int    expectedNumSamples   = 4;\n            const int    expectedNumPubMedIds = 2;\n            const string hgvsNotation         = \"ENST00000000123.1(ABC):r.1_1000_ENST00000000456.2(DEF):r.300_2000\";\n            const string expectedHgvsNotation = \"ENST00000000123.1(ABC):r.1_1000::ENST00000000456.2(DEF):r.300_2000\";\n\n            var fusionEntries = new HashSet<RawCosmicGeneFusion>\n            {\n                new(10, 0, null, null, null, null, hgvsNotation, 123),\n                new(20, 0, null, null, null, null, hgvsNotation, 123),\n                new(30, 0, null, null, null, null, hgvsNotation, 200),\n                new(40, 0, null, null, null, null, hgvsNotation, 123)\n            };\n\n            (int[] actualPubMedIds, int actualNumSamples, string actualHgvsNotation) = CosmicConverter.AggregateRawCosmicGeneFusions(fusionEntries);\n\n            Assert.Equal(expectedNumSamples,   actualNumSamples);\n            Assert.Equal(expectedNumPubMedIds, actualPubMedIds.Length);\n            Assert.Equal(expectedHgvsNotation, actualHgvsNotation);\n        }\n\n        [Fact]\n        public void AggregateRawCosmicGeneFusions_MultipleHgvsStrings_ThrowException()\n        {\n            const string hgvsNotation  = \"ENST00000000123.1(ABC):r.1_1000_ENST00000000456.2(DEF):r.300_2000\";\n            const string hgvsNotation2 = \"ENST00000000789.3(GHI):r.1_1000_ENST00000000456.2(DEF):r.300_2000\";\n\n            var fusionEntries = new HashSet<RawCosmicGeneFusion>\n            {\n                new(10, 0, null, null, null, null, hgvsNotation, 123),\n                new(20, 0, null, null, null, null, hgvsNotation, 123),\n                new(30, 0, null, null, null, null, hgvsNotation2, 200),\n                new(40, 0, null, null, null, null, hgvsNotation, 123)\n            };\n\n            Assert.Throws<InvalidDataException>(delegate { CosmicConverter.AggregateRawCosmicGeneFusions(fusionEntries); });\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/CosmicGeneFusions/Conversion/HgvsRnaFixerTests.cs",
    "content": "﻿using SAUtils.CosmicGeneFusions.Conversion;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.CosmicGeneFusions.Conversion\n{\n    public sealed class HgvsRnaFixerTests\n    {\n        [Theory]\n        [InlineData(\"ENST00000415083.2(SS18):r.1_1286_ENST00000415083.2(SS18):r.1286+683_1286+701_ENST00000336777.5(SSX2):r.351_1410\",\n            \"ENST00000415083.2(SS18):r.1_1286::ENST00000415083.2(SS18):r.1286+683_1286+701::ENST00000336777.5(SSX2):r.351_1410\")]\n        [InlineData(\"ENST00000397938.2(EWSR1):r.1_1293_ENST00000397938.2(EWSR1):r.1332_1364_ENST00000527786.2(FLI1):r.1079_4127\",\n            \"ENST00000397938.2(EWSR1):r.1_1293::ENST00000397938.2(EWSR1):r.1332_1364::ENST00000527786.2(FLI1):r.1079_4127\")]\n        [InlineData(\"ENST00000397938.2(EWSR1):r.1_1293_ENST00000397938.2(EWSR1):r.1293+1627_1293+1656_insU_ENST00000310015.6(SP3):r.2389_6359\",\n            \"ENST00000397938.2(EWSR1):r.1_1293::ENST00000397938.2(EWSR1):r.1293+1627_1293+1656_insU::ENST00000310015.6(SP3):r.2389_6359\")]\n        [InlineData(\"ENST00000397938.2(EWSR1):r.1_545_insAAGGGACCAGUACAG_ENST00000397938.2(EWSR1):r.546_1112_ENST00000332351.3(WT1):r.1535_3122\",\n            \"ENST00000397938.2(EWSR1):r.1_545_insAAGGGACCAGUACAG::ENST00000397938.2(EWSR1):r.546_1112::ENST00000332351.3(WT1):r.1535_3122\")]\n        [InlineData(\"ENST00000254108.7(FUS):r.1_727_ENST00000254108.7(FUS):r.904+1322_904+1354inv_ENST00000330387.6(CREB3L2):r.936_7412\",\n            \"ENST00000254108.7(FUS):r.1_727::ENST00000254108.7(FUS):r.904+1322_904+1354inv::ENST00000330387.6(CREB3L2):r.936_7412\")]\n        [InlineData(\"ENST00000429538.3(PAX8):r.?_ENST00000287820.6(PPARG):r.?\", \"ENST00000429538.3(PAX8):r.?::ENST00000287820.6(PPARG):r.?\")]\n        public void CorrectHgvsNotationUsingRegex_ExpectedResults(string originalHgvs, string expectedHgvs)\n        {\n            string actualHgvs = HgvsRnaFixer.Fix(originalHgvs);\n            Assert.Equal(expectedHgvs, actualHgvs);\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/CosmicGeneFusions/Conversion/HgvsRnaParserTests.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.IO;\nusing SAUtils.CosmicGeneFusions.Cache;\nusing SAUtils.CosmicGeneFusions.Conversion;\nusing VariantAnnotation.GeneFusions.Utilities;\nusing VariantAnnotation.Interface.AnnotatedPositions;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.CosmicGeneFusions.Conversion\n{\n    public sealed class HgvsRnaParserTests\n    {\n        [Theory]\n        [InlineData(             \"ENST00000332149.5(TMPRSS2):r.1_79+?_ENST00000442448.1(ERG):r.312_5034\", \"ENST00000332149.5\", \"ENST00000442448.1\")]\n        [InlineData(             \"ENST00000415083.2(SS18):r.1_1286_ENST00000415083.2(SS18):r.1286+683_1286+701_ENST00000336777.5(SSX2):r.351_1410\",\n            \"ENST00000415083.2\", \"ENST00000336777.5\")]\n        [InlineData(\n            \"ENST00000397938.2(EWSR1):r.1_1112_ENST00000527786.2(FLI1):r.1079_1144_ENST00000527786.2(FLI1):r.1145-1478_1145-1410_ENST00000527786.2(FLI1):r.1145_4127\",\n            \"ENST00000397938.2\", \"ENST00000527786.2\")]\n        [InlineData(\"ENST00000305877.8(BCR):r.1_2866::ENST00000372348.2(ABL1):r.511-?_511-?::ENST00000318560.5(ABL1):r.461_5766\", \"ENST00000305877.8\",\n            \"ENST00000318560.5\")]\n        [InlineData(              \"ENST00000305877.12(BCR):r.1_2866::ENST00000372348.6(ABL1):r.511-?_511-?::ENST00000318560.5(ABL1):r.461_5766\",\n            \"ENST00000305877.12\", \"ENST00000318560.5\")]\n        public void Parse_ExpectedResults(string hgvsString, string expectedTranscriptId5, string expectedTranscriptId3)\n        {\n            (string actualTranscriptId5, string actualTranscriptId3) = HgvsRnaParser.Parse(hgvsString);\n            Assert.Equal(expectedTranscriptId5, actualTranscriptId5);\n            Assert.Equal(expectedTranscriptId3, actualTranscriptId3);\n        }\n\n        [Theory]\n        [InlineData(\"ENST00000305877.8(BCR):r.1_2866\")]\n        [InlineData(\"ENST00000000123.1(ABC):r.1_2866::ENST00000000456.2(ABC):r.511-?_511-?::ENST00000000789.3(ABC):r.461_5766\")]\n        public void Parse_UnexpectedTranscriptCount_ThrowException(string hgvsString)\n        {\n            Assert.Throws<InvalidDataException>(delegate { HgvsRnaParser.Parse(hgvsString); });\n        }\n\n        [Fact]\n        public void GetTranscripts_ExpectedResults()\n        {\n            (TranscriptCache transcriptCache, ITranscript transcript, ITranscript transcript2) = GetTranscriptCache();\n            string[] expectedGeneSymbols = {transcript.Gene.Symbol, transcript2.Gene.Symbol};\n            ulong expectedFusionKey = GeneFusionKey.Create(\n                GeneFusionKey.CreateGeneKey(transcript.Gene.EnsemblId.WithoutVersion),\n                GeneFusionKey.CreateGeneKey(transcript2.Gene.EnsemblId.WithoutVersion));\n\n            (string[] actualGeneSymbols, ulong actualFusionKey) =\n                HgvsRnaParser.GetTranscripts(\"ENST00000290663.10(MED8):r.1_79+?_ENST00000347849.7(ERG):r.312_5034\", transcriptCache);\n\n            Assert.Equal(expectedGeneSymbols, actualGeneSymbols);\n            Assert.Equal(expectedFusionKey,   actualFusionKey);\n        }\n\n        public static (TranscriptCache TranscriptCache, ITranscript Transcript, ITranscript Transcript2) GetTranscriptCache()\n        {\n            ITranscript transcript  = MockedData.Transcripts.ENST00000290663;\n            ITranscript transcript2 = MockedData.Transcripts.ENST00000347849;\n\n            var idToTranscript = new Dictionary<string, ITranscript>\n            {\n                [transcript.Id.WithoutVersion]  = transcript,\n                [transcript2.Id.WithoutVersion] = transcript2\n            };\n\n            return (new TranscriptCache(idToTranscript), transcript, transcript2);\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/CosmicGeneFusions/Conversion/HistologyTests.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.IO;\nusing SAUtils.CosmicGeneFusions.Conversion;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.CosmicGeneFusions.Conversion\n{\n    public sealed class HistologyTests\n    {\n        private readonly HashSet<RawCosmicGeneFusion> _fusionEntries = new()\n        {\n            new RawCosmicGeneFusion(10, 0, null, null, \"carcinoma\", \"ductal carcinoma\",           null, 0),\n            new RawCosmicGeneFusion(20, 0, null, null, \"carcinoma\", \"ductal carcinoma\",           null, 0),\n            new RawCosmicGeneFusion(30, 0, null, null, \"carcinoma\", \"NS\",                         null, 0),\n            new RawCosmicGeneFusion(40, 0, null, null, \"carcinoma\", \"signet ring adenocarcinoma\", null, 0)\n        };\n\n        [Fact]\n        public void GetCounts_ExpectedResults()\n        {\n            const int     numSamples   = 4;\n            CosmicCount[] actualCounts = Histology.GetCounts(_fusionEntries, numSamples);\n\n            Assert.Equal(3, actualCounts.Length);\n\n            CosmicCount actualCount = actualCounts[0];\n            Assert.Equal(\"ductal carcinoma\", actualCount.name);\n            Assert.Equal(2,                  actualCount.numSamples);\n\n            actualCount = actualCounts[1];\n            Assert.Equal(\"carcinoma\", actualCount.name);\n            Assert.Equal(1,           actualCount.numSamples);\n\n            actualCount = actualCounts[2];\n            Assert.Equal(\"signet ring adenocarcinoma\", actualCount.name);\n            Assert.Equal(1,                            actualCount.numSamples);\n        }\n\n        [Theory]\n        [InlineData(3)]\n        [InlineData(5)]\n        public void GetCounts_WrongSampleCount_ThrowException(int numSamples)\n        {\n            Assert.Throws<InvalidDataException>(delegate { Histology.GetCounts(_fusionEntries, numSamples); });\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/CosmicGeneFusions/Conversion/SiteTests.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.IO;\nusing SAUtils.CosmicGeneFusions.Conversion;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.CosmicGeneFusions.Conversion\n{\n    public sealed class SiteTests\n    {\n        private readonly HashSet<RawCosmicGeneFusion> _fusionEntries = new()\n        {\n            new RawCosmicGeneFusion(10, 0, \"skin\",        \"ear\",          null, null, null, 0),\n            new RawCosmicGeneFusion(20, 0, \"skin\",        \"NS\",           null, null, null, 0),\n            new RawCosmicGeneFusion(30, 0, \"skin\",        \"ear\",          null, null, null, 0),\n            new RawCosmicGeneFusion(40, 0, \"soft tissue\", \"blood vessel\", null, null, null, 0)\n        };\n\n        [Fact]\n        public void GetCounts_ExpectedResults()\n        {\n            const int     numSamples   = 4;\n            CosmicCount[] actualCounts = Site.GetCounts(_fusionEntries, numSamples);\n\n            Assert.Equal(3, actualCounts.Length);\n\n            CosmicCount actualCount = actualCounts[0];\n            Assert.Equal(\"skin (ear)\", actualCount.name);\n            Assert.Equal(2,            actualCount.numSamples);\n\n            actualCount = actualCounts[1];\n            Assert.Equal(\"skin\", actualCount.name);\n            Assert.Equal(1,      actualCount.numSamples);\n\n            actualCount = actualCounts[2];\n            Assert.Equal(\"soft tissue (blood vessel)\", actualCount.name);\n            Assert.Equal(1,                            actualCount.numSamples);\n        }\n\n        [Fact]\n        public void GetCounts_TotalSampleCountTooHigh_ThrowException()\n        {\n            const int numSamples = 3;\n            Assert.Throws<InvalidDataException>(delegate { Site.GetCounts(_fusionEntries, numSamples); });\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/CosmicGeneFusions/CreateCosmicGeneFusionsTests.cs",
    "content": "﻿using System;\nusing SAUtils.CosmicGeneFusions;\nusing VariantAnnotation.Providers;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.CosmicGeneFusions\n{\n    public sealed class CreateCosmicGeneFusionsTests\n    {\n        [Fact]\n        public void CreateDataSourceVersion_ExpectedResults()\n        {\n            const string expectedName             = \"COSMIC gene fusions\";\n            const string expectedDescription      = \"manually curated somatic gene fusions\";\n            const string expectedVersion          = \"94\";\n            const string releaseDate              = \"2021-05-28\";\n            long         expectedReleaseDateTicks = DateTime.Parse(releaseDate).Ticks;\n\n            DataSourceVersion actualDataSourceVersion = CreateCosmicGeneFusions.CreateDataSourceVersion(expectedVersion, releaseDate);\n\n            Assert.Equal(expectedName,             actualDataSourceVersion.Name);\n            Assert.Equal(expectedDescription,      actualDataSourceVersion.Description);\n            Assert.Equal(expectedVersion,          actualDataSourceVersion.Version);\n            Assert.Equal(expectedReleaseDateTicks, actualDataSourceVersion.ReleaseDateTicks);\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/CosmicGeneFusions/IO/CosmicGeneFusionParserTests.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.IO;\nusing System.Linq;\nusing System.Text;\nusing SAUtils.CosmicGeneFusions.Conversion;\nusing SAUtils.CosmicGeneFusions.IO;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.CosmicGeneFusions.IO\n{\n    public sealed class CosmicGeneFusionParserTests\n    {\n        [Fact]\n        public void Parse_ExpectedResults()\n        {\n            var lines = new List<string>\n            {\n                \"SAMPLE_ID\tSAMPLE_NAME\tPRIMARY_SITE\tSITE_SUBTYPE_1\tSITE_SUBTYPE_2\tSITE_SUBTYPE_3\tPRIMARY_HISTOLOGY\tHISTOLOGY_SUBTYPE_1\tHISTOLOGY_SUBTYPE_2\tHISTOLOGY_SUBTYPE_3\tFUSION_ID\tTRANSLOCATION_NAME\t5'_CHROMOSOME\t5'_STRAND\t5'_GENE_ID\t5'_GENE_NAME\t5'_LAST_OBSERVED_EXON\t5'_GENOME_START_FROM\t5'_GENOME_START_TO\t5'_GENOME_STOP_FROM\t5'_GENOME_STOP_TO\t3'_CHROMOSOME\t3'_STRAND\t3'_GENE_ID\t3'_GENE_NAME\t3'_FIRST_OBSERVED_EXON\t3'_GENOME_START_FROM\t3'_GENOME_START_TO\t3'_GENOME_STOP_FROM\t3'_GENOME_STOP_TO\tFUSION_TYPE\tPUBMED_PMID\",\n                \"749711\tHCC1187\tbreast\tNS\tNS\tNS\tcarcinoma\tductal_carcinoma\tNS\tNS\t665\tENST00000360863.10(RGS22):r.1_3555_ENST00000369518.1(SYCP1):r.2100_3452\t8\t-\t197199\tRGS22\t22\t99981937\t99981937\t100106116\t100106116\t1\t+\t212470\tSYCP1_ENST00000369518\t24\t114944339\t114944339\t114995367\t114995367\tInferred Breakpoint\t20033038\",\n                \"749711\tHCC1187\tbreast\tNS\tNS\tNS\tcarcinoma\tductal_carcinoma\tNS\tNS\t665\tENST00000360863.10(RGS22):r.1_3555_ENST00000369518.1(SYCP1):r.2100_3452\t8\t-\t197199\tRGS22\t22\t99981937\t99981937\t100106116\t100106116\t1\t+\t212470\tSYCP1_ENST00000369518\t24\t114944339\t114944339\t114995367\t114995367\tObserved mRNA\t20033038\",\n                \"749712\tHCC1395\tbreast\tNS\tNS\tNS\tcarcinoma\tductal_carcinoma\tNS\tNS\t667\tENST00000395686.7(ERO1A):r.1_658_ENST00000395631.6(FERMT2):r.744_3369\t14\t-\t282967\tERO1A\t5\t52671795\t52671795\t52695705\t52695705\t14\t-\t268960\tFERMT2_ENST00000395631\t5\t52857268\t52857268\t52881469\t52881469\tInferred Breakpoint\t20033038\"\n            };\n\n            using var    ms     = new MemoryStream();\n            StreamReader reader = GetCosmicTestData(ms, lines);\n\n            Dictionary<int, HashSet<RawCosmicGeneFusion>> actualFusionIdToEntries = CosmicGeneFusionParser.Parse(reader);\n            Assert.Equal(2, actualFusionIdToEntries.Count);\n\n            HashSet<RawCosmicGeneFusion> geneFusions = actualFusionIdToEntries[665];\n            Assert.NotNull(geneFusions);\n            Assert.Single(geneFusions);\n\n            RawCosmicGeneFusion actualFusion = geneFusions.First();\n            Assert.Equal(749711,                                                                    actualFusion.SampleId);\n            Assert.Equal(665,                                                                       actualFusion.FusionId);\n            Assert.Equal(\"breast\",                                                                  actualFusion.PrimarySite);\n            Assert.Equal(\"NS\",                                                                      actualFusion.SiteSubtype1);\n            Assert.Equal(\"carcinoma\",                                                               actualFusion.PrimaryHistology);\n            Assert.Equal(\"ductal carcinoma\",                                                        actualFusion.HistologySubtype1);\n            Assert.Equal(\"ENST00000360863.10(RGS22):r.1_3555_ENST00000369518.1(SYCP1):r.2100_3452\", actualFusion.HgvsNotation);\n            Assert.Equal(20033038,                                                                  actualFusion.PubMedId);\n\n            geneFusions = actualFusionIdToEntries[667];\n            Assert.NotNull(geneFusions);\n            Assert.Single(geneFusions);\n\n            actualFusion = geneFusions.First();\n            Assert.Equal(749712,                                                                  actualFusion.SampleId);\n            Assert.Equal(667,                                                                     actualFusion.FusionId);\n            Assert.Equal(\"breast\",                                                                actualFusion.PrimarySite);\n            Assert.Equal(\"NS\",                                                                    actualFusion.SiteSubtype1);\n            Assert.Equal(\"carcinoma\",                                                             actualFusion.PrimaryHistology);\n            Assert.Equal(\"ductal carcinoma\",                                                      actualFusion.HistologySubtype1);\n            Assert.Equal(\"ENST00000395686.7(ERO1A):r.1_658_ENST00000395631.6(FERMT2):r.744_3369\", actualFusion.HgvsNotation);\n            Assert.Equal(20033038,                                                                actualFusion.PubMedId);\n        }\n\n        [Fact]\n        public void Parse_IncorrectColumnCount_ThrowException()\n        {\n            var lines = new List<string>\n            {\n                \"SAMPLE_ID\tSAMPLE_NAME\tPRIMARY_SITE\tSITE_SUBTYPE_1\tSITE_SUBTYPE_2\tSITE_SUBTYPE_3\tPRIMARY_HISTOLOGY\tHISTOLOGY_SUBTYPE_1\tHISTOLOGY_SUBTYPE_2\tHISTOLOGY_SUBTYPE_3\tFUSION_ID\tTRANSLOCATION_NAME\t5'_CHROMOSOME\t5'_STRAND\t5'_GENE_ID\t5'_GENE_NAME\t5'_LAST_OBSERVED_EXON\t5'_GENOME_START_FROM\t5'_GENOME_START_TO\t5'_GENOME_STOP_FROM\t5'_GENOME_STOP_TO\t3'_CHROMOSOME\t3'_STRAND\t3'_GENE_ID\t3'_GENE_NAME\t3'_FIRST_OBSERVED_EXON\t3'_GENOME_START_FROM\t3'_GENOME_START_TO\t3'_GENOME_STOP_FROM\t3'_GENOME_STOP_TO\tFUSION_TYPE\tPUBMED_PMID\",\n                \"749711\tHCC1187\"\n            };\n\n            using var    ms     = new MemoryStream();\n            StreamReader reader = GetCosmicTestData(ms, lines);\n\n            Assert.Throws<InvalidDataException>(delegate { CosmicGeneFusionParser.Parse(reader); });\n        }\n\n        private static StreamReader GetCosmicTestData(Stream stream, List<string> lines)\n        {\n            using (var writer = new StreamWriter(stream, Encoding.UTF8, 1024, true))\n            {\n                foreach (string line in lines) writer.WriteLine(line);\n            }\n\n            stream.Position = 0;\n\n            return new StreamReader(stream);\n        }\n\n        [Fact]\n        public void RemoveUnderlines_ExpectedResults()\n        {\n            const string input          = \"spindle_epithelial_tumour_with_thymus_like_differentiation\";\n            const string expectedResult = \"spindle epithelial tumour with thymus like differentiation\";\n            string       actualResult   = CosmicGeneFusionParser.RemoveUnderlines(input);\n            Assert.Equal(expectedResult, actualResult);\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/CosmicGeneFusions/IO/GeneFusionJsonWriterTests.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing SAUtils.CosmicGeneFusions.IO;\nusing VariantAnnotation.GeneFusions.IO;\nusing VariantAnnotation.Interface.Providers;\nusing VariantAnnotation.Providers;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.CosmicGeneFusions.IO\n{\n    public sealed class GeneFusionJsonWriterTests\n    {\n        [Fact]\n        public void GeneFusionJsonWriter_ExpectedResults()\n        {\n            Dictionary<ulong, string[]> expectedGeneKeyToJson = GetKeyToJson();\n            IDataSourceVersion          expectedVersion       = new DataSourceVersion(\"COSMIC Gene Fusions\", \"102\", DateTime.Now.Ticks, \"COSMIC\");\n\n            using var ms = new MemoryStream();\n            using (var writer = new GeneFusionJsonWriter(ms, \"cosmicGeneFusions\", expectedVersion, true))\n            {\n                writer.Write(expectedGeneKeyToJson);\n            }\n\n            ms.Position = 0;\n\n            Dictionary<ulong, string[]> actualGeneKeyToJson;\n            IDataSourceVersion          actualVersion;\n\n            using (var reader = new GeneFusionJsonReader(ms))\n            {\n                reader.LoadAnnotations();\n                actualGeneKeyToJson = reader.FusionKeyToFusions;\n                actualVersion       = reader.Version;\n            }\n\n            Assert.Equal(expectedVersion,             actualVersion, new DataSourceVersionComparer());\n            Assert.Equal(expectedGeneKeyToJson.Count, actualGeneKeyToJson.Count);\n            foreach (ulong geneKey in expectedGeneKeyToJson.Keys)\n            {\n                Assert.Equal(expectedGeneKeyToJson[geneKey], actualGeneKeyToJson[geneKey]);\n            }\n        }\n\n        private static Dictionary<ulong, string[]> GetKeyToJson()\n        {\n            var geneKeyToFusion = new Dictionary<ulong, string[]>();\n\n            var json =\n                \"\\\"id\\\":\\\"COSF2245\\\",\\\"numSamples\\\":13,\\\"geneSymbols\\\":[\\\"ETV6\\\",\\\"RUNX1\\\"],\\\"hgvsr\\\":\\\"ENST00000396373.8(ETV6):r.1_1283_ENST00000300305.7(RUNX1):r.504_6222\\\",\\\"histologies\\\":[{\\\"histology\\\":\\\"lymphoid neoplasm\\\",\\\"numSamples\\\":14}],\\\"sites\\\":[{\\\"site\\\":\\\"haematopoietic and lymphoid tissue\\\",\\\"numSamples\\\":11}]\";\n            var json2 =\n                \"\\\"id\\\":\\\"COSF100\\\",\\\"numSamples\\\":2,\\\"geneSymbols\\\":[\\\"A\\\",\\\"B\\\"],\\\"hgvsr\\\":\\\"ENST00000396373.8(A):r.1_1283_ENST00000300305.7(B):r.504_6222\\\",\\\"histologies\\\":[{\\\"histology\\\":\\\"lymphoid neoplasm\\\",\\\"numSamples\\\":14}]\";\n            var json3 =\n                \"\\\"id\\\":\\\"COSF200\\\",\\\"numSamples\\\":7,\\\"geneSymbols\\\":[\\\"C\\\",\\\"D\\\"],\\\"hgvsr\\\":\\\"ENST00000396373.8(C):r.1_1283_ENST00000300305.7(D):r.504_6222\\\",\\\"sites\\\":[{\\\"site\\\":\\\"haematopoietic and lymphoid tissue\\\",\\\"numSamples\\\":11}]\";\n\n            geneKeyToFusion[1000] = new[] {json, json2};\n            geneKeyToFusion[2000] = new[] {json3};\n\n            return geneKeyToFusion;\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/CustomAnnotations/AllowedValuesTests.cs",
    "content": "﻿using ErrorHandling.Exceptions;\r\nusing Xunit;\r\nusing SAUtils.Custom;\r\n\r\nnamespace UnitTests.SAUtils.CustomAnnotations\r\n{\r\n    public sealed class AllowedValuesTests\r\n    {\r\n        [Fact]\r\n        public void IsEmptyValue_AsExpected()\r\n        {\r\n            Assert.True(AllowedValues.IsEmptyValue(\"\"));\r\n            Assert.True(AllowedValues.IsEmptyValue(\".\"));\r\n            Assert.False(AllowedValues.IsEmptyValue(\"-\"));\r\n        }\r\n\r\n        [Fact]\r\n        public void ValidatePredictionValue_Pass()\r\n        {\r\n            AllowedValues.ValidatePredictionValue(\"\", \"\");\r\n            AllowedValues.ValidatePredictionValue(\".\", \"\");\r\n            AllowedValues.ValidatePredictionValue(\"P\", \"\");\r\n            AllowedValues.ValidatePredictionValue(\"Likely Benign\", \"\");\r\n            AllowedValues.ValidatePredictionValue(\"Vus\", \"\");\r\n        }\r\n\r\n        [Fact]\r\n        public void ValidatePredictionValue_ThrowException()\r\n        {\r\n            Assert.Throws<UserErrorException>(() => AllowedValues.ValidatePredictionValue(\"LikelyBenign\", \"\"));\r\n            Assert.Throws<UserErrorException>(() => AllowedValues.ValidatePredictionValue(\"Likely Benign, LB\", \"\"));\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/SAUtils/CustomAnnotations/GeneAnnotationParserTests.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing ErrorHandling.Exceptions;\r\nusing SAUtils.Custom;\r\nusing SAUtils.Schema;\r\nusing VariantAnnotation.SA;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.SAUtils.CustomAnnotations\r\n{\r\n    public sealed class GeneAnnotationParserTests\r\n    {\r\n\r\n        private static readonly Dictionary<string, string> EntrezGeneIdToSymbol = new Dictionary<string, string>\r\n        {\r\n            {\"1\", \"Gene1\" },\r\n            {\"2\", \"Gene2\" }\r\n        };\r\n\r\n        private static readonly Dictionary<string, string> EnsemblIdToSymbol = new Dictionary<string, string>\r\n        {\r\n            {\"ENSG1\", \"Gene1\" },\r\n            {\"ENSG2\", \"Gene2\" }\r\n        };\r\n\r\n        private static StreamReader GetReadStream(string text)\r\n        {\r\n            byte[] data;\r\n            using (var memStream = new MemoryStream())\r\n            using (var writer = new StreamWriter(memStream))\r\n            {\r\n                writer.Write(text);\r\n                writer.Flush();\r\n                data = memStream.ToArray();\r\n            }\r\n\r\n            return new StreamReader(new MemoryStream(data));\r\n        }\r\n\r\n        [Fact]\r\n        public void ParseHeaderLines_AsExpected()\r\n        {\r\n            const string headerLines = \"#title=InternalGeneAnnotation\\n\" +\r\n                                      \"#geneSymbol\\tgeneId\\tOMIM Description\\tIs Oncogene\\tphenotype\\tmimNumber\\tnotes\\n\" +\r\n                                      \"#categories\\t.\\tDescription\\tFilter\\t\\tIdentifier\\t.\\n\" +\r\n                                      \"#descriptions\\t.\\tGene description from OMIM\\t\\tGene phenotype\\t\\tFree text\\n\" +\r\n                                      \"#type\\t\\tstring\\tbool\\tstring\\tnumber\\tstring\\n\";\r\n\r\n\r\n            using (var parser = new GeneAnnotationsParser(GetReadStream(headerLines), EntrezGeneIdToSymbol, EnsemblIdToSymbol))\r\n            {\r\n                parser.ParseHeaderLines();\r\n                var expectedJsonKeys = new[] {\"OMIM Description\", \"Is Oncogene\", \"phenotype\", \"mimNumber\", \"notes\"};\r\n\r\n                var expectedCategories = new[]\r\n                {\r\n                    CustomAnnotationCategories.Description, CustomAnnotationCategories.Filter,\r\n                    CustomAnnotationCategories.Unknown, CustomAnnotationCategories.Identifier,\r\n                    CustomAnnotationCategories.Unknown\r\n                };\r\n                var expectedDescriptions = new[] { \"Gene description from OMIM\", null, \"Gene phenotype\", null, \"Free text\" };\r\n                var expectedTypes = new[]\r\n                {\r\n                    SaJsonValueType.String,\r\n                    SaJsonValueType.Bool,\r\n                    SaJsonValueType.String,\r\n                    SaJsonValueType.Number,\r\n                    SaJsonValueType.String\r\n                };\r\n\r\n                Assert.Equal(\"InternalGeneAnnotation\", parser.JsonTag);\r\n                Assert.Equal(expectedJsonKeys, parser.JsonKeys);\r\n                Assert.Equal(expectedCategories, parser.Categories);\r\n                Assert.Equal(expectedDescriptions, parser.Descriptions);\r\n                Assert.Equal(expectedTypes, parser.ValueTypes);\r\n            }\r\n        }\r\n        \r\n        [Fact]\r\n        public void ParseHeaderLines_version_and_Description()\r\n        {\r\n            const string headerLines = \"#title=InternalGeneAnnotation\\n\" +\r\n                                       \"#version=v1.1\\n\" +\r\n                                       \"#description=Internal Gene Annotation\\n\" +\r\n                                      \"#geneSymbol\\tgeneId\\tOMIM Description\\tIs Oncogene\\tphenotype\\tmimNumber\\tnotes\\n\" +\r\n                                      \"#categories\\t.\\tDescription\\tFilter\\t\\tIdentifier\\t.\\n\" +\r\n                                      \"#descriptions\\t.\\tGene description from OMIM\\t\\tGene phenotype\\t\\tFree text\\n\" +\r\n                                      \"#type\\t\\tstring\\tbool\\tstring\\tnumber\\tstring\\n\";\r\n\r\n\r\n            using (var parser = new GeneAnnotationsParser(GetReadStream(headerLines), EntrezGeneIdToSymbol, EnsemblIdToSymbol))\r\n            {\r\n                parser.ParseHeaderLines();\r\n                Assert.Equal(\"v1.1\", parser.Version);\r\n                Assert.Equal(\"Internal Gene Annotation\", parser.DataSourceDescription);\r\n            }\r\n        }\r\n        \r\n        [Fact]\r\n        public void ParseHeaderLines_InconsistentFields()\r\n        {\r\n            const string invalidHeaderLines = \"#title=InternalGeneAnnotation\\n\" +\r\n                                              \"#geneSymbol\\tgeneId\\tphenotype\\tmimNumber\\tnotes\\n\" +\r\n                                              \"#categories\\t\\t\\tstring\\tnumber\\t.\\n\" +\r\n                                              \"#descriptions\\t.\\t.\\t.\\t.\\tSome\\tText\\tHere\\n\" +\r\n                                              \"#type\\t\\t\\tstring\\tnumber\\t.\\n\";\r\n\r\n            using (var parser = new GeneAnnotationsParser(GetReadStream(invalidHeaderLines), EntrezGeneIdToSymbol, EnsemblIdToSymbol))\r\n            {\r\n                Assert.Throws<UserErrorException>(() => parser.ParseHeaderLines());\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void GetItems_UnrecognizedGeneId_ThrowException()\r\n        {\r\n            const string lines = \"#title=InternalGeneAnnotation\\n\" +\r\n                                 \"#geneSymbol\\tgeneId\\tOMIM Description\\tIs Oncogene\\tphenotype\\tmimNumber\\tnotes\\n\" +\r\n                                 \"#categories\\t.\\tDescription\\tFilter\\t\\tIdentifier\\t.\\n\" +\r\n                                 \"#descriptions\\t.\\tGene description from OMIM\\t\\tGene phenotype\\t\\tFree text\\n\" +\r\n                                 \"#type\\t\\tstring\\tbool\\tstring\\tnumber\\tstring\\n\" +\r\n                                 \"Abc\\t3\\tsome text\\ttrue\\tgood\\t234\\ttest\\n\";\r\n                                 \r\n            using (var parser = GeneAnnotationsParser.Create(GetReadStream(lines), EntrezGeneIdToSymbol, EnsemblIdToSymbol))\r\n            {\r\n                Assert.Throws<UserErrorException>(() => parser.GetItems());\r\n            }\r\n        }\r\n\r\n\r\n        [Fact]\r\n        public void GetItems_SameGene_MultipleEntries_ThrowException()\r\n        {\r\n            const string lines = \"#title=InternalGeneAnnotation\\n\" +\r\n                                 \"#geneSymbol\\tgeneId\\tOMIM Description\\tIs Oncogene\\tphenotype\\tmimNumber\\tnotes\\n\" +\r\n                                 \"#categories\\t.\\tDescription\\tFilter\\t\\tIdentifier\\t.\\n\" +\r\n                                 \"#descriptions\\t.\\tGene description from OMIM\\t\\tGene phenotype\\t\\tFree text\\n\" +\r\n                                 \"#type\\t\\tstring\\tbool\\tstring\\tnumber\\tstring\\n\"+\r\n                                 \"Abc\\t1\\tsome text\\ttrue\\tgood\\t234\\ttest\\n\" + \r\n                                 \"123\\tENSG1\\tsome other text\\tfalse\\tbad\\t200\\ttest2\\n\";\r\n\r\n            using (var parser = GeneAnnotationsParser.Create(GetReadStream(lines), EntrezGeneIdToSymbol, EnsemblIdToSymbol))\r\n            {\r\n                Assert.Throws<UserErrorException>(() => parser.GetItems());\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void GetItems_EmptyAnnotation_ThrowException()\r\n        {\r\n            const string lines = \"#title=InternalGeneAnnotation\\n\" +\r\n                                 \"#geneSymbol\\tgeneId\\tOMIM Description\\tIs Oncogene\\tphenotype\\tmimNumber\\tnotes\\n\" +\r\n                                 \"#categories\\t.\\tDescription\\tFilter\\t\\tIdentifier\\t.\\n\" +\r\n                                 \"#descriptions\\t.\\tGene description from OMIM\\t\\tGene phenotype\\t\\tFree text\\n\" +\r\n                                 \"#type\\t\\tstring\\tbool\\tstring\\tnumber\\tstring\\n\" +\r\n                                 \"Abc\\t1\\t\\t.\\t\\t.\\t\\n\" +\r\n                                 \"Abc\\tENSG2\\tsome other text\\tfalse\\tbad\\t200\\ttest2\\n\";\r\n\r\n            using (var parser = GeneAnnotationsParser.Create(GetReadStream(lines), EntrezGeneIdToSymbol, EnsemblIdToSymbol))\r\n            {\r\n                Assert.Throws<UserErrorException>(() => parser.GetItems());\r\n            }\r\n        }\r\n\r\n\r\n        [Fact]\r\n        public void GetItems_AsExpected()\r\n        {\r\n            const string lines = \"#title=InternalGeneAnnotation\\n\" +\r\n                                 \"#geneSymbol\\tgeneId\\tOMIM Description\\tIs Oncogene\\tphenotype\\tmimNumber\\tnotes\\n\" +\r\n                                 \"#categories\\t.\\tDescription\\tFilter\\t\\tIdentifier\\t.\\n\" +\r\n                                 \"#descriptions\\t.\\tGene description from OMIM\\t\\tGene phenotype\\t\\tFree text\\n\" +\r\n                                 \"#type\\t\\tstring\\tbool\\tstring\\tnumber\\tstring\\n\" +\r\n                                 \"Abc\\t1\\tsome text\\ttrue\\tgood\\t234\\ttest\\n\" +\r\n                                 \"Abc\\tENSG2\\tsome other text\\tfalse\\tbad\\t200\\ttest2\\n\";\r\n\r\n            using (var parser = GeneAnnotationsParser.Create(GetReadStream(lines), EntrezGeneIdToSymbol, EnsemblIdToSymbol))\r\n            {\r\n                var geneSymbol2Items = parser.GetItems();\r\n                Assert.Equal(2, geneSymbol2Items.Count);\r\n                Assert.Single(geneSymbol2Items[\"Gene1\"]);\r\n                Assert.Single(geneSymbol2Items[\"Gene2\"]);\r\n                Assert.Equal(\"{\\\"OMIM Description\\\":\\\"some text\\\",\\\"Is Oncogene\\\":true,\\\"phenotype\\\":\\\"good\\\",\\\"mimNumber\\\":234,\\\"notes\\\":\\\"test\\\"}\", geneSymbol2Items[\"Gene1\"][0].GetJsonString());\r\n                Assert.Equal(\"{\\\"OMIM Description\\\":\\\"some other text\\\",\\\"phenotype\\\":\\\"bad\\\",\\\"mimNumber\\\":200,\\\"notes\\\":\\\"test2\\\"}\", geneSymbol2Items[\"Gene2\"][0].GetJsonString());\r\n            }\r\n        }\r\n\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/SAUtils/CustomAnnotations/ParserUtilitiesTests.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing ErrorHandling.Exceptions;\r\nusing Genome;\r\nusing SAUtils.Custom;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.SAUtils.CustomAnnotations\r\n{\r\n    public sealed class ParserUtilitiesTests\r\n    {\r\n        private readonly HashSet<GenomeAssembly> _allowedGenomeAssemblies = new HashSet<GenomeAssembly> { GenomeAssembly.GRCh37, GenomeAssembly.GRCh38};\r\n        \r\n        [Fact]\r\n        public void CheckPrefix_InvalidPrefix_ThrowException()\r\n        {\r\n            Assert.Throws<UserErrorException>(() => ParserUtilities.CheckPrefix(\"invalidPrefix=someValue\", \"expectedPrefix\"));\r\n        }\r\n\r\n        [Fact]\r\n        public void ParseTags_LessThanRequiredColumns_ThrowException()\r\n        {\r\n            Assert.Throws<UserErrorException>(() => ParserUtilities.ParseTags(\"#CHROM\\tPOS\\tREF\", \"#CHROM\", 4));\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(\"String\")]\r\n        [InlineData(\"NUMBER\")]\r\n        [InlineData(\"Bool\")]\r\n        public void ParseTypes_ValidType_Pass(string type)\r\n        {\r\n            string typeLine = $\"#type\\t.\\t.\\t.\\t{type}\";\r\n            ParserUtilities.ParseTypes(typeLine, 4, 1);\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(\"boolean\")]\r\n        [InlineData(\"double\")]\r\n        [InlineData(\"int\")]\r\n        public void ParseTypes_InvalidType_ThrowException(string type)\r\n        {\r\n            string typeLine = $\"#type\\t.\\t.\\t.\\t{type}\";\r\n            Assert.Throws<UserErrorException>(() => ParserUtilities.ParseTypes(typeLine, 4, 1));\r\n        }\r\n\r\n        [Fact]\r\n        public void ParseCategories_InvalidValue_ThrowException()\r\n        {\r\n            Assert.Throws<UserErrorException>(() => ParserUtilities.ParseCategories(\"#categories\\tWOW\", 1, 1, null));\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/SAUtils/CustomAnnotations/VariantAnnotationsParserTests.cs",
    "content": "﻿using System.IO;\r\nusing System.Linq;\r\nusing ErrorHandling.Exceptions;\r\nusing Genome;\r\nusing Moq;\r\nusing SAUtils.Custom;\r\nusing SAUtils.Schema;\r\nusing UnitTests.TestUtilities;\r\nusing VariantAnnotation.Interface.Providers;\r\nusing VariantAnnotation.Interface.SA;\r\nusing VariantAnnotation.SA;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.SAUtils.CustomAnnotations\r\n{\r\n    public sealed class VariantAnnotationsParserTests\r\n    {\r\n        private static readonly ISequence Sequence = GetMockedSequence();\r\n\r\n        private static readonly ISequenceProvider SequenceProvider = GetMockedSequenceProvider();\r\n\r\n        private static StreamReader GetReadStream(string text)\r\n        {\r\n            byte[] data;\r\n            using (var memStream = new MemoryStream())\r\n            using (var writer = new StreamWriter(memStream))\r\n            {\r\n                writer.Write(text);\r\n                writer.Flush();\r\n                data = memStream.ToArray();\r\n            }\r\n\r\n            return new StreamReader(new MemoryStream(data));\r\n        }\r\n\r\n\r\n        [Fact]\r\n        public void CheckPosAndRefColumns_InvalidPosOrRef_ThrowException()\r\n        {\r\n            var caParser = new VariantAnnotationsParser(null, null) {Tags = new[] {\"#CHROM\", \"\", \"REF\", \"ALT\"}};\r\n\r\n            Assert.Throws<UserErrorException>(() => caParser.CheckPosAndRefColumns());\r\n\r\n            caParser.Tags = new[] { \"#CHROM\", \"POS\", \"REFERENCE\", \"ALT\" };\r\n            Assert.Throws<UserErrorException>(() => caParser.CheckPosAndRefColumns());\r\n        }\r\n\r\n        [Fact]\r\n        public void CheckAltAndEndColumns_NoAltAndEnd_ThrowException()\r\n        {\r\n            var caParser = new VariantAnnotationsParser(null, null) {Tags = new[] {\"#CHROM\", \"POS\", \"REF\", \"Note\"}};\r\n\r\n            Assert.Throws<UserErrorException>(() => caParser.CheckAltAndEndColumns());\r\n        }\r\n\r\n        [Fact]\r\n        public void ParseHeaderLines_AsExpected()\r\n        {\r\n            const string headerLines = \"#title=IcslAlleleFrequencies \\n\" +\r\n                                       \"#assembly=GRCh38\\t\\n\" +\r\n                                       \"#matchVariantsBy=allele\\n\" +\r\n                                       \"#CHROM\\tPOS\\tREF\\tALT\\tEND\\tallAc\\tallAn\\tallAf\\tfailedFilter\\tpathogenicity\\tdeNovoQual\\tnotes\\n\" +\r\n                                       \"#categories\\t.\\t.\\t.\\t.\\tAlleleCount\\tAlleleNumber\\tAlleleFrequency\\t.\\tPrediction\\tScore\\t.\\n\" +\r\n                                       \"#descriptions\\t.\\t.\\t.\\t.\\tALL\\tALL\\tALL\\t.\\t.\\t.\\t.\\n\" +\r\n                                       \"#type\\t.\\t.\\t.\\t.\\tnumber\\tnumber\\tnumber\\tbool\\tstring\\tnumber\\tstring\";\r\n\r\n\r\n            using (var custParser = new VariantAnnotationsParser(GetReadStream(headerLines), null))\r\n            {\r\n                custParser.ParseHeaderLines();\r\n                var expectedJsonKeys = new[]\r\n                    {\"refAllele\", \"altAllele\", \"allAc\", \"allAn\", \"allAf\", \"failedFilter\", \"pathogenicity\", \"deNovoQual\", \"notes\"};\r\n                var expectedIntervalJsonKeys = new[]\r\n                    {\"start\", \"end\", \"allAc\", \"allAn\", \"allAf\", \"failedFilter\", \"pathogenicity\", \"deNovoQual\", \"notes\"};\r\n                var expectedCategories = new[]\r\n                {\r\n                    CustomAnnotationCategories.AlleleCount, CustomAnnotationCategories.AlleleNumber,\r\n                    CustomAnnotationCategories.AlleleFrequency, CustomAnnotationCategories.Unknown,\r\n                    CustomAnnotationCategories.Prediction, CustomAnnotationCategories.Score,\r\n                    CustomAnnotationCategories.Unknown\r\n                };\r\n                var expectedDescriptions = new[] { \"ALL\", \"ALL\", \"ALL\", null, null, null, null };\r\n                var expectedTypes = new[]\r\n                {\r\n                    SaJsonValueType.Number,\r\n                    SaJsonValueType.Number,\r\n                    SaJsonValueType.Number,\r\n                    SaJsonValueType.Bool,\r\n                    SaJsonValueType.String,\r\n                    SaJsonValueType.Number, \r\n                    SaJsonValueType.String\r\n                };\r\n\r\n                Assert.Equal(\"IcslAlleleFrequencies\", custParser.JsonTag);\r\n                Assert.Equal(GenomeAssembly.GRCh38, custParser.Assembly);\r\n                Assert.Equal(expectedJsonKeys, custParser.JsonKeys);\r\n                Assert.Equal(expectedIntervalJsonKeys, custParser.IntervalJsonKeys);\r\n                Assert.Equal(expectedCategories, custParser.Categories);\r\n                Assert.Equal(expectedDescriptions, custParser.Descriptions);\r\n                Assert.Equal(expectedTypes, custParser.ValueTypes);\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void ParseHeaderLines_matchBy_sv()\r\n        {\r\n            const string headerLines = \"#title=IcslAlleleFrequencies\\n\" +\r\n                                       \"#assembly=GRCh38\\n\" +\r\n                                       \"#matchVariantsBy=sv\\n\" +\r\n                                       \"#CHROM\\tPOS\\tREF\\tALT\\tEND\\tallAc\\tallAn\\tallAf\\tfailedFilter\\tpathogenicity\\tnotes\\n\" +\r\n                                       \"#categories\\t.\\t.\\t.\\t.\\tAlleleCount\\tAlleleNumber\\tAlleleFrequency\\t.\\tPrediction\\t.\\n\" +\r\n                                       \"#descriptions\\t.\\t.\\t.\\t.\\tALL\\tALL\\tALL\\t.\\t.\\t.\\n\" +\r\n                                       \"#type\\t.\\t.\\t.\\t.\\tnumber\\tnumber\\tnumber\\tbool\\tstring\\tstring\";\r\n\r\n\r\n            using (var custParser = new VariantAnnotationsParser(GetReadStream(headerLines), null))\r\n            {\r\n                custParser.ParseHeaderLines();\r\n                Assert.Equal(ReportFor.StructuralVariants, custParser.ReportFor);\r\n            }\r\n        }\r\n        \r\n        [Fact]\r\n        public void ParseHeaderLines_InconsistentFields()\r\n        {\r\n            const string invalidHeaderLines = \"#title=IcslAlleleFrequencies\\n\" +\r\n                                       \"#assembly=GRCh38\\n\" +\r\n                                       \"#CHROM\\tPOS\\tREF\\tALT\\tEND\\tallAc\\tallAn\\tallAf\\tfailedFilter\\tpathogenicity\\n\" +\r\n                                       \"#categories\\t.\\t.\\t.\\t.\\tAlleleCount\\tAlleleNumber\\tAlleleFrequency\\t.\\tPrediction\\t.\\tMore\\n\" +\r\n                                       \"#descriptions\\t.\\t.\\t.\\t.\\tALL\\tALL\\tALL\\n\" +\r\n                                       \"#type\\t.\\t.\\t.\\t.\\tnumber\\tnumber\\tnumber\\tbool\\tstring\\tstring\";\r\n\r\n            using (var parser = new VariantAnnotationsParser(GetReadStream(invalidHeaderLines), null))\r\n            {\r\n                Assert.Throws<UserErrorException>(() => parser.ParseHeaderLines());\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void GetItems()\r\n        {\r\n            const string text = \"#title=IcslAlleleFrequencies\\n\" +\r\n                                \"#assembly=GRCh38\\n\" +\r\n                                \"#matchVariantsBy=allele\\n\" +\r\n                                \"#CHROM\\tPOS\\tREF\\tALT\\tEND\\tallAc\\tallAn\\tallAf\\tfailedFilter\\tpathogenicity\\tnotes\\n\" +\r\n                                \"#categories\\t.\\t.\\t.\\t.\\tAlleleCount\\tAlleleNumber\\tAlleleFrequency\\t.\\tPrediction\\t.\\n\" +\r\n                                \"#descriptions\\t.\\t.\\t.\\t.\\tALL\\tALL\\tALL\\t.\\t.\\t.\\n\" +\r\n                                \"#type\\t.\\t.\\t.\\t.\\tnumber\\tnumber\\tnumber\\tbool\\tstring\\tstring\\n\" +\r\n                                \"chr1\\t14783\\tG\\tA\\t.\\t20\\t125568\\t0.000159\\ttrue\\tVUS\\t\\n\" +\r\n                                \"chr2\\t10302\\tC\\tA\\t.\\t53\\t8928\\t0.001421\\tfalse\\t.\\t\\n\" +\r\n                                \"chr2\\t46993\\tA\\t<DEL>\\t50879\\t50\\t250\\t0.001\\tfalse\\tbenign\\t\";\r\n            using (var custParser = VariantAnnotationsParser.Create(GetReadStream(text), SequenceProvider))\r\n            {\r\n                var items = custParser.GetItems().ToArray();\r\n                Assert.Equal(2, items.Length);\r\n                Assert.Equal(\"\\\"refAllele\\\":\\\"G\\\",\\\"altAllele\\\":\\\"A\\\",\\\"allAc\\\":20,\\\"allAn\\\":125568,\\\"allAf\\\":0.000159,\\\"failedFilter\\\":true,\\\"pathogenicity\\\":\\\"VUS\\\"\", items[0].GetJsonString());\r\n                Assert.Equal(\"\\\"refAllele\\\":\\\"C\\\",\\\"altAllele\\\":\\\"A\\\",\\\"allAc\\\":53,\\\"allAn\\\":8928,\\\"allAf\\\":0.001421\", items[1].GetJsonString());\r\n            }\r\n        }\r\n        \r\n        [Fact]\r\n        public void GetIntervals_noALT()\r\n        {\r\n            const string text = \"#title=IcslAlleleFrequencies\\n\"                                                          +\r\n                                \"#assembly=GRCh38\\n\"                                                                      +\r\n                                \"#matchVariantsBy=allele\\n\"                                                               +\r\n                                \"#CHROM\\tPOS\\tREF\\tEND\\tnotes\\n\"   +\r\n                                \"#categories\\t.\\t.\\t.\\t.\\n\" +\r\n                                \"#descriptions\\t.\\t.\\t.\\t.\\n\"                                     +\r\n                                \"#type\\t.\\t.\\t.\\tstring\\n\"                       +\r\n                                \"chr16\\t20000000\\tT\\t70000000\\tLots of false positives in this region\";\r\n            using (var custParser = VariantAnnotationsParser.Create(GetReadStream(text), SequenceProvider))\r\n            {\r\n                var items = custParser.GetItems().ToArray();\r\n                Assert.Empty(items);\r\n                var intervals = custParser.GetCustomIntervals();\r\n                Assert.Single(intervals);\r\n                Assert.Equal(\"\\\"start\\\":20000000,\\\"end\\\":70000000,\\\"notes\\\":\\\"Lots of false positives in this region\\\"\", intervals[0].GetJsonString());\r\n            }\r\n        }\r\n        \r\n        [Fact]\r\n        public void GetIntervals_start()\r\n        {\r\n            const string text = \"#title=IcslAlleleFrequencies\\n\" +\r\n                                \"#assembly=GRCh38\\n\"             +\r\n                                \"#matchVariantsBy=allele\\n\"      +\r\n                                \"#CHROM\\tPOS\\tREF\\tALT\\tEND\\tnotes\\n\" +\r\n                                \"#categories\\t.\\t.\\t.\\t.\\t.\\n\"      +\r\n                                \"#descriptions\\t.\\t.\\t.\\t.\\t.\\n\"    +\r\n                                \"#type\\t.\\t.\\t.\\t.\\tstring\\n\"       +\r\n                                \"chr21\\t10510818\\tT\\t.\\t10699435\\tinterval 1\\n\"+\r\n                                \"chr21\\t10510818\\tT\\t<DEL>\\t10699435\\tinterval 2\";\r\n            using (var custParser = VariantAnnotationsParser.Create(GetReadStream(text), SequenceProvider))\r\n            {\r\n                var items = custParser.GetItems().ToArray();\r\n                Assert.Empty(items);\r\n                var intervals = custParser.GetCustomIntervals();\r\n                Assert.Equal(2,intervals.Count);\r\n                Assert.Equal(\"\\\"start\\\":10510818,\\\"end\\\":10699435,\\\"notes\\\":\\\"interval 1\\\"\", intervals[0].GetJsonString());\r\n                Assert.Equal(\"\\\"start\\\":10510819,\\\"end\\\":10699435,\\\"notes\\\":\\\"interval 2\\\"\", intervals[1].GetJsonString());\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void GetItems_OnlyAlleleFrequencyTreatedAsDouble_OtherNumbersPrintAsIs()\r\n        {\r\n            const string text = \"#title=IcslAlleleFrequencies\\n\" +\r\n                                \"#assembly=GRCh38\\n\" +\r\n                                \"#matchVariantsBy=allele\\n\" +\r\n                                \"#CHROM\\tPOS\\tREF\\tALT\\tEND\\tallAc\\tallAn\\tallAf\\tfailedFilter\\tpathogenicity\\tnotes\\tanyNumber\\n\" +\r\n                                \"#categories\\t.\\t.\\t.\\t.\\tAlleleCount\\tAlleleNumber\\tAlleleFrequency\\t.\\tPrediction\\t.\\tscore\\n\" +\r\n                                \"#descriptions\\t.\\t.\\t.\\t.\\tALL\\tALL\\tALL\\t.\\t.\\t.\\t.\\n\" +\r\n                                \"#type\\t.\\t.\\t.\\t.\\tnumber\\tnumber\\tnumber\\tbool\\tstring\\tstring\\tnumber\\n\" +\r\n                                \"chr1\\t12783\\tG\\tA\\t.\\t20\\t125568\\t0.000159\\ttrue\\tVUS\\t\\t1.000\\n\" +\r\n                                \"chr1\\t13302\\tC\\tA\\t.\\t53\\t8928\\t0.001421\\tfalse\\t.\\t\\t3\\n\" +\r\n                                \"chr1\\t18972\\tT\\tC\\t.\\t10\\t1000\\t0.01\\tfalse\\t.\\t\\t100.1234567\\n\" +\r\n                                \"chr1\\t46993\\tA\\t<DEL>\\t50879\\t50\\t250\\t0.001\\tfalse\\tbenign\\t\\t3.1415926\";\r\n            using (var custParser = VariantAnnotationsParser.Create(GetReadStream(text), SequenceProvider))\r\n            {\r\n                var items = custParser.GetItems().ToArray();\r\n                Assert.Equal(3, items.Length);\r\n                Assert.Equal(\"\\\"refAllele\\\":\\\"G\\\",\\\"altAllele\\\":\\\"A\\\",\\\"allAc\\\":20,\\\"allAn\\\":125568,\\\"allAf\\\":0.000159,\\\"failedFilter\\\":true,\\\"pathogenicity\\\":\\\"VUS\\\",\\\"anyNumber\\\":1.000\", items[0].GetJsonString());\r\n                Assert.Equal(\"\\\"refAllele\\\":\\\"C\\\",\\\"altAllele\\\":\\\"A\\\",\\\"allAc\\\":53,\\\"allAn\\\":8928,\\\"allAf\\\":0.001421,\\\"anyNumber\\\":3\", items[1].GetJsonString());\r\n                Assert.Equal(\"\\\"refAllele\\\":\\\"T\\\",\\\"altAllele\\\":\\\"C\\\",\\\"allAc\\\":10,\\\"allAn\\\":1000,\\\"allAf\\\":0.01,\\\"anyNumber\\\":100.1234567\", items[2].GetJsonString());\r\n            }\r\n        }\r\n        \r\n        [Fact]\r\n        public void GetItems_invalid_scores()\r\n        {\r\n            const string text = \"#title=IcslAlleleFrequencies\\n\" +\r\n                                \"#assembly=GRCh38\\n\" +\r\n                                \"#matchVariantsBy=allele\\n\" +\r\n                                \"#CHROM\\tPOS\\tREF\\tALT\\tEND\\tallAc\\tallAn\\tallAf\\tfailedFilter\\tpathogenicity\\tnotes\\tanyNumber\\n\" +\r\n                                \"#categories\\t.\\t.\\t.\\t.\\tAlleleCount\\tAlleleNumber\\tAlleleFrequency\\t.\\tPrediction\\t.\\tscore\\n\" +\r\n                                \"#descriptions\\t.\\t.\\t.\\t.\\tALL\\tALL\\tALL\\t.\\t.\\t.\\t.\\n\" +\r\n                                \"#type\\t.\\t.\\t.\\t.\\tnumber\\tnumber\\tnumber\\tbool\\tstring\\tstring\\tnumber\\n\" +\r\n                                \"chr1\\t12783\\tG\\tA\\t.\\t20\\t125568\\t0.000159\\ttrue\\tVUS\\t\\t1.0\\n\" +\r\n                                \"chr1\\t13302\\tC\\tA\\t.\\t53\\t8928\\t0.001421\\tfalse\\t.\\t\\t3\\n\" +\r\n                                \"chr1\\t18972\\tT\\tC\\t.\\t10\\t1000\\t0.01\\tfalse\\t.\\t\\t100.1234567\\n\" +\r\n                                \"chr1\\t46993\\tA\\t<DEL>\\t50879\\t50\\t250\\t0.001\\tfalse\\tbenign\\t\\tthree\";\r\n            using (var parser = VariantAnnotationsParser.Create(GetReadStream(text), SequenceProvider))\r\n            {\r\n                Assert.Throws<UserErrorException>(()=> parser.GetItems().ToArray());\r\n            }\r\n        }\r\n        \r\n        [Fact]\r\n        public void GetItems_missing_scores()\r\n        {\r\n            const string text = \"#title=IcslAlleleFrequencies\\n\"                                                                   +\r\n                                \"#assembly=GRCh38\\n\"                                                                               +\r\n                                \"#matchVariantsBy=allele\\n\"                                                                        +\r\n                                \"#CHROM\\tPOS\\tREF\\tALT\\tEND\\tallAc\\tallAn\\tallAf\\tfailedFilter\\tpathogenicity\\tnotes\\tanyNumber\\n\" +\r\n                                \"#categories\\t.\\t.\\t.\\t.\\tAlleleCount\\tAlleleNumber\\tAlleleFrequency\\t.\\tPrediction\\t.\\tscore\\n\"   +\r\n                                \"#descriptions\\t.\\t.\\t.\\t.\\tALL\\tALL\\tALL\\t.\\t.\\t.\\t.\\n\"                                           +\r\n                                \"#type\\t.\\t.\\t.\\t.\\tnumber\\tnumber\\tnumber\\tbool\\tstring\\tstring\\tnumber\\n\"                        +\r\n                                \"chr1\\t12783\\tG\\tA\\t.\\t20\\t125568\\t0.000159\\ttrue\\tVUS\\t\\t.\\n\"                                     +\r\n                                \"chr1\\t13302\\tC\\tA\\t.\\t53\\t8928\\t0.001421\\tfalse\\t.\\t\\t3\\n\"                                        +\r\n                                \"chr1\\t18972\\tT\\tC\\t.\\t10\\t1000\\t0.01\\tfalse\\t.\\t\\t100.1234567\\n\";\r\n                                \r\n            using (var parser = VariantAnnotationsParser.Create(GetReadStream(text), SequenceProvider))\r\n            {\r\n                var items = parser.GetItems().ToArray();\r\n                \r\n                Assert.DoesNotContain(\"anyNumber\", items[0].GetJsonString());\r\n                Assert.Contains(\"anyNumber\", items[1].GetJsonString());\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void GetItems_ExtractCustomFilters()\r\n        {\r\n            const string text = \"#title=IcslAlleleFrequencies\\n\" +\r\n                                \"#assembly=GRCh38\\n\" +\r\n                                \"#matchVariantsBy=allele\\n\" +\r\n                                \"#CHROM\\tPOS\\tREF\\tALT\\tEND\\tallAc\\tallAn\\tallAf\\tfailedFilter\\tpathogenicity\\tnotes\\tanyNumber\\tcustomFilter\\n\" +\r\n                                \"#categories\\t.\\t.\\t.\\t.\\tAlleleCount\\tAlleleNumber\\tAlleleFrequency\\t.\\tPrediction\\t.\\t.\\tFilter\\n\" +\r\n                                \"#descriptions\\t.\\t.\\t.\\t.\\tALL\\tALL\\tALL\\t.\\t.\\t.\\t.\\t.\\n\" +\r\n                                \"#type\\t.\\t.\\t.\\t.\\tnumber\\tnumber\\tnumber\\tbool\\tstring\\tstring\\tnumber\\tstring\\n\" +\r\n                                \"chr1\\t12783\\tG\\tA\\t.\\t20\\t125568\\t0.000159\\ttrue\\tVUS\\t\\t1.000\\tgood variant\\n\" +\r\n                                \"chr1\\t13302\\tC\\tA\\t.\\t53\\t8928\\t0.001421\\tfalse\\t.\\t\\t3\\tbad variant\\n\" +\r\n                                \"chr1\\t18972\\tT\\tC\\t.\\t10\\t1000\\t0.01\\tfalse\\t.\\t\\t100.1234567\\tugly variant\\n\" +\r\n                                \"chr1\\t46993\\tA\\t<DEL>\\t50879\\t50\\t250\\t0.001\\tfalse\\tbenign\\t\\t3.1415926\\tvery ugly variant\";\r\n            using (var custParser = VariantAnnotationsParser.Create(GetReadStream(text), SequenceProvider))\r\n            {\r\n                var items = custParser.GetItems().ToArray();\r\n                Assert.Equal(3, items.Length);\r\n                Assert.Equal(\"\\\"refAllele\\\":\\\"G\\\",\\\"altAllele\\\":\\\"A\\\",\\\"allAc\\\":20,\\\"allAn\\\":125568,\\\"allAf\\\":0.000159,\\\"failedFilter\\\":true,\\\"pathogenicity\\\":\\\"VUS\\\",\\\"anyNumber\\\":1.000,\\\"customFilter\\\":\\\"good variant\\\"\", items[0].GetJsonString());\r\n                Assert.Equal(\"\\\"refAllele\\\":\\\"C\\\",\\\"altAllele\\\":\\\"A\\\",\\\"allAc\\\":53,\\\"allAn\\\":8928,\\\"allAf\\\":0.001421,\\\"anyNumber\\\":3,\\\"customFilter\\\":\\\"bad variant\\\"\", items[1].GetJsonString());\r\n                Assert.Equal(\"\\\"refAllele\\\":\\\"T\\\",\\\"altAllele\\\":\\\"C\\\",\\\"allAc\\\":10,\\\"allAn\\\":1000,\\\"allAf\\\":0.01,\\\"anyNumber\\\":100.1234567,\\\"customFilter\\\":\\\"ugly variant\\\"\", items[2].GetJsonString());\r\n            }\r\n        }\r\n        \r\n        [Fact]\r\n        public void GetItems_missing_filter()\r\n        {\r\n            const string text = \"#title=IcslAlleleFrequencies\\n\" +\r\n                                \"#assembly=GRCh38\\n\" +\r\n                                \"#matchVariantsBy=allele\\n\" +\r\n                                \"#CHROM\\tPOS\\tREF\\tALT\\tEND\\tallAc\\tallAn\\tallAf\\tfailedFilter\\tpathogenicity\\tnotes\\tanyNumber\\tcustomFilter\\n\" +\r\n                                \"#categories\\t.\\t.\\t.\\t.\\tAlleleCount\\tAlleleNumber\\tAlleleFrequency\\t.\\tPrediction\\t.\\t.\\tFilter\\n\" +\r\n                                \"#descriptions\\t.\\t.\\t.\\t.\\tALL\\tALL\\tALL\\t.\\t.\\t.\\t.\\t.\\n\" +\r\n                                \"#type\\t.\\t.\\t.\\t.\\tnumber\\tnumber\\tnumber\\tbool\\tstring\\tstring\\tnumber\\tstring\\n\" +\r\n                                \"chr1\\t12783\\tG\\tA\\t.\\t20\\t125568\\t0.000159\\ttrue\\tVUS\\t\\t1.000\\tgood variant\\n\" +\r\n                                \"chr1\\t13302\\tC\\tA\\t.\\t53\\t8928\\t0.001421\\tfalse\\t.\\t\\t3\\tbad variant\\n\" +\r\n                                \"chr1\\t18972\\tT\\tC\\t.\\t10\\t1000\\t0.01\\tfalse\\t.\\t\\t100.1234567\\tugly variant\\n\" +\r\n                                \"chr1\\t46993\\tA\\tG\\t.\\t50\\t250\\t0.001\\tfalse\\tbenign\\t\\t3.1415926\\t.\";\r\n            using (var custParser = VariantAnnotationsParser.Create(GetReadStream(text), SequenceProvider))\r\n            {\r\n                var items = custParser.GetItems().ToArray();\r\n                Assert.Equal(4, items.Length);\r\n                Assert.Equal(\"\\\"refAllele\\\":\\\"G\\\",\\\"altAllele\\\":\\\"A\\\",\\\"allAc\\\":20,\\\"allAn\\\":125568,\\\"allAf\\\":0.000159,\\\"failedFilter\\\":true,\\\"pathogenicity\\\":\\\"VUS\\\",\\\"anyNumber\\\":1.000,\\\"customFilter\\\":\\\"good variant\\\"\", items[0].GetJsonString());\r\n                Assert.Equal(\"\\\"refAllele\\\":\\\"C\\\",\\\"altAllele\\\":\\\"A\\\",\\\"allAc\\\":53,\\\"allAn\\\":8928,\\\"allAf\\\":0.001421,\\\"anyNumber\\\":3,\\\"customFilter\\\":\\\"bad variant\\\"\", items[1].GetJsonString());\r\n                Assert.Equal(\"\\\"refAllele\\\":\\\"T\\\",\\\"altAllele\\\":\\\"C\\\",\\\"allAc\\\":10,\\\"allAn\\\":1000,\\\"allAf\\\":0.01,\\\"anyNumber\\\":100.1234567,\\\"customFilter\\\":\\\"ugly variant\\\"\", items[2].GetJsonString());\r\n                Assert.DoesNotContain(\"customFilter\",items[3].GetJsonString());\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void GetItems_ExtractCustomFilters_failsOnLargeText()\r\n        {\r\n            const string text = \"#title=IcslAlleleFrequencies\\n\" +\r\n                                \"#assembly=GRCh38\\n\" +\r\n                                \"#matchVariantsBy=allele\\n\" +\r\n                                \"#CHROM\\tPOS\\tREF\\tALT\\tEND\\tallAc\\tallAn\\tallAf\\tfailedFilter\\tpathogenicity\\tnotes\\tanyNumber\\tcustomFilter\\n\" +\r\n                                \"#categories\\t.\\t.\\t.\\t.\\tAlleleCount\\tAlleleNumber\\tAlleleFrequency\\t.\\tPrediction\\t.\\t.\\tFilter\\n\" +\r\n                                \"#descriptions\\t.\\t.\\t.\\t.\\tALL\\tALL\\tALL\\t.\\t.\\t.\\t.\\t.\\n\" +\r\n                                \"#type\\t.\\t.\\t.\\t.\\tnumber\\tnumber\\tnumber\\tbool\\tstring\\tstring\\tnumber\\tstring\\n\" +\r\n                                \"chr1\\t12783\\tG\\tA\\t.\\t20\\t125568\\t0.000159\\ttrue\\tVUS\\t\\t1.000\\tthe good variant, the bad variant and the ugly variant\\n\";\r\n\r\n            using (var custParser = VariantAnnotationsParser.Create(GetReadStream(text), SequenceProvider))\r\n            {\r\n                Assert.Throws<UserErrorException>(() => custParser.GetItems().ToArray());\r\n\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void GetItems_UnsortedData_ThrowException()\r\n        {\r\n            const string text = \"#title=IcslAlleleFrequencies\\n\" +\r\n                                \"#assembly=GRCh38\\n\" +\r\n                                \"#matchVariantsBy=allele\\n\" +\r\n                                \"#CHROM\\tPOS\\tREF\\tALT\\tEND\\tallAc\\tallAn\\tallAf\\tfailedFilter\\tpathogenicity\\tnotes\\tanyNumber\\n\" +\r\n                                \"#categories\\t.\\t.\\t.\\t.\\tAlleleCount\\tAlleleNumber\\tAlleleFrequency\\t.\\tPrediction\\t.\\t.\\n\" +\r\n                                \"#descriptions\\t.\\t.\\t.\\t.\\tALL\\tALL\\tALL\\t.\\t.\\t.\\t.\\n\" +\r\n                                \"#type\\t.\\t.\\t.\\t.\\tnumber\\tnumber\\tnumber\\tbool\\tstring\\tstring\\tnumber\\n\" +\r\n                                \"chr1\\t12783\\tG\\tA\\t.\\t20\\t125568\\t0.000159\\ttrue\\tVUS\\t\\t1.000\\n\" +\r\n                                \"chr1\\t3302\\tC\\tA\\t.\\t53\\t8928\\t0.001421\\tfalse\\t.\\t\\t3\\n\" +\r\n                                \"chr1\\t18972\\tT\\tC\\t.\\t10\\t1000\\t0.01\\tfalse\\t.\\t\\t100.1234567\\n\" +\r\n                                \"chr1\\t46993\\tA\\t<DEL>\\t50879\\t50\\t250\\t0.001\\tfalse\\tbenign\\t\\t3.1415926\";\r\n            using (var caParser = VariantAnnotationsParser.Create(GetReadStream(text), SequenceProvider))\r\n            {\r\n                Assert.Throws<UserErrorException>(() => caParser.GetItems().ToArray());\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void GetIntervals()\r\n        {\r\n            const string text = \"#title=IcslAlleleFrequencies\\n\" +\r\n                                \"#assembly=GRCh38\\n\" +\r\n                                \"#matchVariantsBy=sv\\n\" +\r\n                                \"#CHROM\\tPOS\\tREF\\tALT\\tEND\\tallAc\\tallAn\\tallAf\\tfailedFilter\\tpathogenicity\\tnotes\\n\" +\r\n                                \"#categories\\t.\\t.\\t.\\t.\\tAlleleCount\\tAlleleNumber\\tAlleleFrequency\\t.\\tPrediction\\t.\\n\" +\r\n                                \"#descriptions\\t.\\t.\\t.\\t.\\tALL\\tALL\\tALL\\t.\\t.\\t.\\n\" +\r\n                                \"#type\\t.\\t.\\t.\\t.\\tnumber\\tnumber\\tnumber\\tbool\\tstring\\tstring\\n\" +\r\n                                \"chr1\\t12783\\tG\\tA\\t.\\t20\\t125568\\t0.000159\\ttrue\\tVUS\\t\\n\" +\r\n                                \"chr1\\t13302\\tC\\tA\\t.\\t53\\t8928\\t0.001421\\tfalse\\t.\\t\\n\" +\r\n                                \"chr1\\t46993\\tA\\t<DEL>\\t50879\\t50\\t250\\t0.001\\tfalse\\tbenign\\t\";\r\n\r\n            using (var custParser = VariantAnnotationsParser.Create(GetReadStream(text), SequenceProvider))\r\n            {\r\n                var items = custParser.GetItems().ToArray();\r\n                Assert.Equal(ReportFor.StructuralVariants, custParser.ReportFor);\r\n                Assert.Equal(2, items.Length);\r\n\r\n                var intervals = custParser.GetCustomIntervals();\r\n                Assert.Single(intervals);\r\n                Assert.Equal(\"\\\"start\\\":46994,\\\"end\\\":50879,\\\"allAc\\\":50,\\\"allAn\\\":250,\\\"allAf\\\":0.001,\\\"pathogenicity\\\":\\\"benign\\\"\", intervals[0].GetJsonString());\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void IsValidNucleotideSequence_IsValidSequence_Pass()\r\n        {\r\n            Assert.True(VariantAnnotationsParser.IsValidAltAllele(\"actgnACTGN\"));\r\n            Assert.True(VariantAnnotationsParser.IsValidAltAllele(\"AAAAAAAAAAAAAAAAAATTAGTCAGGCAC[chr3:153444911[\"));\r\n            Assert.False(VariantAnnotationsParser.IsValidAltAllele(\"AC-GT\"));\r\n        }\r\n\r\n        [Fact]\r\n        public void ExtractItems_TrimmedAndLeftShifted()\r\n        {\r\n            const string text = \"#title=IcslAlleleFrequencies\\n\" +\r\n                                \"#assembly=GRCh38\\n\" +\r\n                                \"#matchVariantsBy=allele\\n\" +\r\n                                \"#CHROM\\tPOS\\tREF\\tALT\\tEND\\tallAc\\tallAn\\tallAf\\tfailedFilter\\tpathogenicity\\tnotes\\n\" +\r\n                                \"#categories\\t.\\t.\\t.\\t.\\tAlleleCount\\tAlleleNumber\\tAlleleFrequency\\t.\\tPrediction\\t.\\n\" +\r\n                                \"#descriptions\\t.\\t.\\t.\\t.\\tALL\\tALL\\tALL\\t.\\t.\\t.\\n\" +\r\n                                \"#type\\t.\\t.\\t.\\t.\\tnumber\\tnumber\\tnumber\\tbool\\tstring\\tstring\\n\";\r\n\r\n            using (var parser = VariantAnnotationsParser.Create(GetReadStream(text), SequenceProvider))\r\n            {\r\n                var item = parser.ExtractItems(\"chr1\\t12783\\tA\\tATA\\t.\\t20\\t125568\\t0.000159\\ttrue\\tVUS\\t\");\r\n                Assert.Equal(12782, item.Position);\r\n                Assert.Equal(\"\", item.RefAllele);\r\n                Assert.Equal(\"TA\", item.AltAllele);\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void Extract_symbolic_alleles()\r\n        {\r\n            const string text = \"#title=IcslAlleleFrequencies\\n\"                                                          +\r\n                                \"#assembly=GRCh38\\n\"                                                                      +\r\n                                \"#matchVariantsBy=allele\\n\"                                                               +\r\n                                \"#CHROM\\tPOS\\tREF\\tALT\\tEND\\tallAc\\tallAn\\tallAf\\tfailedFilter\\tpathogenicity\\tnotes\\n\"   +\r\n                                \"#categories\\t.\\t.\\t.\\t.\\tAlleleCount\\tAlleleNumber\\tAlleleFrequency\\t.\\tPrediction\\t.\\n\" +\r\n                                \"#descriptions\\t.\\t.\\t.\\t.\\tALL\\tALL\\tALL\\t.\\t.\\t.\\n\"                                     +\r\n                                \"#type\\t.\\t.\\t.\\t.\\tnumber\\tnumber\\tnumber\\tbool\\tstring\\tstring\\n\";\r\n\r\n            using (var parser = VariantAnnotationsParser.Create(GetReadStream(text), SequenceProvider))\r\n            {\r\n                parser.ExtractItems(\"chr1\\t12783\\tA\\t<DEL>\\t24486\\t20\\t125568\\t0.000159\\ttrue\\tVUS\\t\");\r\n                var intervals = parser.GetCustomIntervals();\r\n                Assert.Single(intervals);\r\n                Assert.Equal(12784, intervals[0].Start);\r\n                Assert.Equal(24486, intervals[0].End);\r\n\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void ParseTitle_Conflict_JsonTag()\r\n        {\r\n            const string text = \"#title=topmed\\n\"                                                          +\r\n                                \"#assembly=GRCh38\\n\"                                                                      +\r\n                                \"#matchVariantsBy=allele\\n\"                                                               +\r\n                                \"#CHROM\\tPOS\\tREF\\tALT\\tEND\\tallAc\\tallAn\\tallAf\\tfailedFilter\\tpathogenicity\\tnotes\\n\"   +\r\n                                \"#categories\\t.\\t.\\t.\\t.\\tAlleleCount\\tAlleleNumber\\tAlleleFrequency\\t.\\tPrediction\\t.\\n\" +\r\n                                \"#descriptions\\t.\\t.\\t.\\t.\\tALL\\tALL\\tALL\\t.\\t.\\t.\\n\"                                     +\r\n                                \"#type\\t.\\t.\\t.\\t.\\tnumber\\tnumber\\tnumber\\tbool\\tstring\\tstring\\n\";\r\n\r\n            Assert.Throws<UserErrorException>(() => VariantAnnotationsParser.Create(GetReadStream(text), SequenceProvider));\r\n            \r\n        }\r\n\r\n        [Fact]\r\n        public void ParseTitle_IncorrectFormat()\r\n        {\r\n            const string text = \"#title:IcslAlleleFrequencies\\n\"                                                          +\r\n                                \"#assembly=GRCh38\\n\"                                                                      +\r\n                                \"#matchVariantsBy=allele\\n\"                                                               +\r\n                                \"#CHROM\\tPOS\\tREF\\tALT\\tEND\\tallAc\\tallAn\\tallAf\\tfailedFilter\\tpathogenicity\\tnotes\\n\"   +\r\n                                \"#categories\\t.\\t.\\t.\\t.\\tAlleleCount\\tAlleleNumber\\tAlleleFrequency\\t.\\tPrediction\\t.\\n\" +\r\n                                \"#descriptions\\t.\\t.\\t.\\t.\\tALL\\tALL\\tALL\\t.\\t.\\t.\\n\"                                     +\r\n                                \"#type\\t.\\t.\\t.\\t.\\tnumber\\tnumber\\tnumber\\tbool\\tstring\\tstring\\n\";\r\n\r\n            Assert.Throws<UserErrorException>(() => VariantAnnotationsParser.Create(GetReadStream(text), SequenceProvider));\r\n        }\r\n\r\n        [Fact]\r\n        public void ParseGenomeAssembly_UnsupportedAssembly_ThrowException()\r\n        {\r\n            const string text = \"#title=IcslAlleleFrequencies\\n\"                                                          +\r\n                                \"#assembly=hg20\\n\"                                                                      +\r\n                                \"#matchVariantsBy=allele\\n\"                                                               +\r\n                                \"#CHROM\\tPOS\\tREF\\tALT\\tEND\\tallAc\\tallAn\\tallAf\\tfailedFilter\\tpathogenicity\\tnotes\\n\"   +\r\n                                \"#categories\\t.\\t.\\t.\\t.\\tAlleleCount\\tAlleleNumber\\tAlleleFrequency\\t.\\tPrediction\\t.\\n\" +\r\n                                \"#descriptions\\t.\\t.\\t.\\t.\\tALL\\tALL\\tALL\\t.\\t.\\t.\\n\"                                     +\r\n                                \"#type\\t.\\t.\\t.\\t.\\tnumber\\tnumber\\tnumber\\tbool\\tstring\\tstring\\n\";\r\n\r\n            Assert.Throws<UserErrorException>(() => VariantAnnotationsParser.Create(GetReadStream(text), SequenceProvider));\r\n        }\r\n\r\n        [Fact]\r\n        public void ParseGenomeAssembly_IncorrectFormat_ThrowException()\r\n        {\r\n            const string text = \"#title=IcslAlleleFrequencies\\n\"                                                          +\r\n                                \"#assembly-hg20\\n\"                                                                      +\r\n                                \"#matchVariantsBy=allele\\n\"                                                               +\r\n                                \"#CHROM\\tPOS\\tREF\\tALT\\tEND\\tallAc\\tallAn\\tallAf\\tfailedFilter\\tpathogenicity\\tnotes\\n\"   +\r\n                                \"#categories\\t.\\t.\\t.\\t.\\tAlleleCount\\tAlleleNumber\\tAlleleFrequency\\t.\\tPrediction\\t.\\n\" +\r\n                                \"#descriptions\\t.\\t.\\t.\\t.\\tALL\\tALL\\tALL\\t.\\t.\\t.\\n\"                                     +\r\n                                \"#type\\t.\\t.\\t.\\t.\\tnumber\\tnumber\\tnumber\\tbool\\tstring\\tstring\\n\";\r\n\r\n            Assert.Throws<UserErrorException>(() => VariantAnnotationsParser.Create(GetReadStream(text), SequenceProvider));\r\n        }\r\n\r\n\r\n        [Fact]\r\n        public void ParseHeader_version_and_description()\r\n        {\r\n            const string text = \"#title=IcslAlleleFrequencies\\n\" +\r\n                                \"#assembly=GRCh38\\n\" +\r\n                                \"#version=v4.5\\t\\n\"+\r\n                                \"#description=Internal allele frequencies\\t\\n\" +\r\n                                \"#matchVariantsBy=allele\\n\" +\r\n                                \"#CHROM\\tPOS\\tREF\\tALT\\tEND\\tallAc\\tallAn\\tallAf\\tfailedFilter\\tpathogenicity\\tnotes\\n\" +\r\n                                \"#categories\\t.\\t.\\t.\\t.\\tAlleleCount\\tAlleleNumber\\tAlleleFrequency\\t.\\tPrediction\\t.\\n\" +\r\n                                \"#descriptions\\t.\\t.\\t.\\t.\\tALL\\tALL\\tALL\\t.\\t.\\t.\\n\" +\r\n                                \"#type\\t.\\t.\\t.\\t.\\tnumber\\tnumber\\tnumber\\tbool\\tstring\\tstring\\n\";\r\n\r\n            using (var parser = VariantAnnotationsParser.Create(GetReadStream(text), SequenceProvider))\r\n            {\r\n                Assert.Equal(\"v4.5\", parser.Version);\r\n                Assert.Equal(\"Internal allele frequencies\", parser.DataSourceDescription);\r\n            }\r\n        }\r\n\r\n        private static ISequenceProvider GetMockedSequenceProvider()\r\n        {\r\n            var seqProviderMock = new Mock<ISequenceProvider>();\r\n            seqProviderMock.SetupGet(x => x.RefNameToChromosome).Returns(ChromosomeUtilities.RefNameToChromosome);\r\n            seqProviderMock.SetupGet(x => x.Sequence).Returns(Sequence);\r\n\r\n            return seqProviderMock.Object;\r\n        }\r\n\r\n        private static ISequence GetMockedSequence()\r\n        {\r\n            var sequenceMock = new Mock<ISequence>();\r\n            sequenceMock.Setup(x => x.Substring(12783, 0)).Returns(\"\");\r\n            sequenceMock.Setup(x => x.Substring(12733, 50)).Returns(\"ACGTA\");\r\n            sequenceMock.Setup(x => x.Substring(12283, 500)).Returns(\"ACGTA\");\r\n            return sequenceMock.Object;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/SAUtils/Dann/DannParserTests.cs",
    "content": "﻿using System.IO;\nusing System.Linq;\nusing SAUtils.GenericScore;\nusing SAUtils.GenericScore.GenericScoreParser;\nusing UnitTests.TestUtilities;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.Dann\n{\n    public sealed class DannParserTests\n    {\n        private static Stream GetStream()\n        {\n            var stream = new MemoryStream();\n            var writer = new StreamWriter(stream);\n\n            writer.WriteLine(\"##DANN\");\n            writer.WriteLine(\"#chr\\tpos\\tref\\talt\\tscore\");\n            writer.WriteLine(\"1\\t10001\\t10001\\tT\\tC\\t0.4396994049749739\");\n            writer.WriteLine(\"1\\t10001\\t10001\\tT\\tG\\t0.38108629377072734\");\n            writer.WriteLine(\"1\\t10002\\t10002\\tA\\tC\\t0.36182020272810128\");\n            writer.WriteLine(\"1\\t10002\\t10002\\tA\\tG\\t0.44413258111779291\");\n            writer.WriteLine(\"1\\t10002\\t10002\\tA\\tT\\t0.16812846819989813\");\n            writer.WriteLine(\"1\\t10003\\t10003\\tA\\tC\\t0.36516159615040267\");\n            writer.WriteLine(\"1\\t10003\\t10003\\tA\\tG\\t0.4480978029675266\");\n            writer.WriteLine(\"1\\t10003\\t10003\\tA\\tG\\taskdlj\");\n            writer.WriteLine(\"asd\\t10003\\t10003\\tA\\tG\\taskdlj\");\n            writer.Flush();\n\n            stream.Position = 0;\n            return stream;\n        }\n\n        [Fact]\n        public void DannReader_GetItems_AsExpected()\n        {\n            var nucleotides = new[] {\"A\", \"C\", \"G\", \"T\"};\n\n            var dannParserSettings = new ParserSettings(\n                new ColumnIndex(0, 2, 3, 4, 5, null),\n                nucleotides,\n                GenericScoreParser.MaxRepresentativeScores\n            );\n\n            using (var streamReader = new StreamReader(GetStream()))\n            using (var scoreParser = new GenericScoreParser(dannParserSettings, streamReader, ChromosomeUtilities.RefNameToChromosome))\n            {\n                GenericScoreItem[] dannItems = scoreParser.GetItems().ToArray();\n                Assert.Equal(7, dannItems.Length);\n\n                Assert.Equal(10001,              dannItems[0].Position);\n                Assert.Equal(\"T\",                dannItems[0].RefAllele);\n                Assert.Equal(\"C\",                dannItems[0].AltAllele);\n                Assert.Equal(0.4396994049749739, dannItems[0].Score);\n\n                Assert.Equal(10001,               dannItems[1].Position);\n                Assert.Equal(\"T\",                 dannItems[1].RefAllele);\n                Assert.Equal(\"G\",                 dannItems[1].AltAllele);\n                Assert.Equal(0.38108629377072734, dannItems[1].Score);\n\n                Assert.Equal(10002,               dannItems[4].Position);\n                Assert.Equal(\"A\",                 dannItems[4].RefAllele);\n                Assert.Equal(\"T\",                 dannItems[4].AltAllele);\n                Assert.Equal(0.16812846819989813, dannItems[4].Score);\n            }\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/DataStructures/CounterDictionaryTests.cs",
    "content": "using System.Collections.Generic;\nusing OptimizedCore;\nusing SAUtils.DataStructures;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.DataStructures;\n\npublic sealed class CounterDictionaryTests\n{\n    [Fact]\n    public void TestCounterDictionary()\n    {\n        var inputData = new[]\n        {\n            \"A\", \"B\", \"A\", \"A\", \"C\", \"B\"\n        };\n\n        var counterDict = new CounterDictionary<string>();\n        foreach (string keys in inputData)\n        {\n            counterDict.Add(keys);\n        }\n        \n        Assert.Equal<uint>(6, counterDict.Total);\n        \n        Assert.Equal<uint>(3, counterDict[\"A\"]);\n        Assert.Equal<uint>(2, counterDict[\"B\"]);\n        Assert.Equal<uint>(1, counterDict[\"C\"]);\n        Assert.Equal<uint>(0, counterDict.GetValueOrDefault<string, uint>(\"NOT THERE\", 0));\n\n        var sb = StringBuilderPool.Get();\n        counterDict.SerializeJson(sb);\n        \n        Assert.Equal(\"{\\\"count\\\":6,\\\"A\\\":3,\\\"B\\\":2,\\\"C\\\":1}\", sb.ToString());\n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/DbVar/DosageMapRegionParserTests.cs",
    "content": "using System.IO;\nusing System.Linq;\nusing SAUtils.ClinGen;\nusing UnitTests.TestUtilities;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.DbVar\n{\n    public sealed class DosageMapRegionParserTests\n    { \n        private static Stream GetStream()\n        {\n            var stream = new MemoryStream();\n            var writer = new StreamWriter(stream);\n\n            writer.WriteLine(\"#ClinGen Region Curation Results\");\n            writer.WriteLine(\"#07 May,2019\");\n            writer.WriteLine(\"#Genomic Locations are reported on GRCh37 (hg19): GCF_000001405.13\");\n            writer.WriteLine(\"#https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen\");\n            writer.WriteLine(\"#to create link: https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen/clingen_region.cgi?id=key\");\n            writer.WriteLine(\"#ISCA ID\\tISCA Region Name\\tcytoBand\\tGenomic Location\\tHaploinsufficiency Score\\tHaploinsufficiency Description\\tHaploinsufficiency PMID1\\tHaploinsufficiency PMID2\\tHaploinsufficiency PMID3\\tTriplosensitivity Score\\tTriplosensitivity Description\\tTriplosensitivity PMID1\\tTriplosensitivity PMID2\\tTriplosensitivity PMID3\\tDate Last Evaluated\\tLoss phenotype OMIM ID\\tTriplosensitive phenotype OMIM ID\");\n            writer.WriteLine(\"ISCA-46299\\tXp11.22 region (includes HUWE1)\\tXp11.22\\tchrX:53363456-53793054\\t0\\tNo evidence available\\t\\t\\t\\t3\\tSufficient evidence for dosage pathogenicity\\t22840365\\t20655035\\t26692240\\t2018-11-19\");\n            writer.WriteLine(\"ISCA-46295\\t15q13.3 recurrent region (D-CHRNA7 to BP5) (includes CHRNA7 and OTUD7A)\\t15q13.3\\tchr15:32019621-32445405\\t3\\tSufficient evidence for dosage pathogenicity\\t19898479\\t20236110\\t22775350\\t40\\tDosage sensitivity unlikely\\t26968334\\t22420048\\t\\t2018-05-10\");\n            writer.WriteLine(\"ISCA-46291\\t7q11.23 recurrent distal region (includes HIP1, YWHAG)\\t7q11.23\\tchr7:75158048-76063176\\t2\\tSome evidence for dosage pathogenicity\\t21109226\\t16971481\\t\\t1\\tLittle evidence for dosage pathogenicity\\t21109226\\t27867344\\t\\t2018-12-31\");\n\n            writer.Flush();\n\n            stream.Position = 0;\n            return stream;\n        }\n        \n        [Fact]\n        public void StandardParsing()\n        {\n            using (var dosageMapRegionParser = new DosageMapRegionParser(GetStream(), ChromosomeUtilities.RefNameToChromosome))\n            {\n                var items = dosageMapRegionParser.GetItems().OrderBy(x => x.Chromosome.Index).ToArray();\n\n                Assert.Equal(3, items.Length);\n                Assert.Equal(\"\\\"chromosome\\\":\\\"7\\\",\\\"begin\\\":75158048,\\\"end\\\":76063176,\\\"haploinsufficiency\\\":\\\"emerging evidence suggesting dosage sensitivity is associated with clinical phenotype\\\",\\\"triplosensitivity\\\":\\\"little evidence suggesting dosage sensitivity is associated with clinical phenotype\\\"\", items[0].GetJsonString());\n                Assert.Equal(\"\\\"chromosome\\\":\\\"15\\\",\\\"begin\\\":32019621,\\\"end\\\":32445405,\\\"haploinsufficiency\\\":\\\"sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype\\\",\\\"triplosensitivity\\\":\\\"dosage sensitivity unlikely\\\"\", items[1].GetJsonString());\n                Assert.Equal(\"\\\"chromosome\\\":\\\"X\\\",\\\"begin\\\":53363456,\\\"end\\\":53793054,\\\"haploinsufficiency\\\":\\\"no evidence to suggest that dosage sensitivity is associated with clinical phenotype\\\",\\\"triplosensitivity\\\":\\\"sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype\\\"\", items[2].GetJsonString());\n            }\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/DbVar/DosageSensitivityParserTests.cs",
    "content": "﻿using System.IO;\nusing SAUtils.ClinGen;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.DbVar\n{\n    public sealed class DosageSensitivityParserTests\n    {\n        private static Stream GetStream()\n        {\n            var stream = new MemoryStream();\n            var writer = new StreamWriter(stream);\n\n            writer.WriteLine(\"#ClinGen Gene Curation Results\");\n            writer.WriteLine(\"#07 May,2019\");\n            writer.WriteLine(\"#Genomic Locations are reported on GRCh37 (hg19): GCF_000001405.13\");\n            writer.WriteLine(\"#Gene Symbol\\tGene ID\\tcytoBand\\tGenomic Location\\tHaploinsufficiency Score\\tHaploinsufficiency Description\\tHaploinsufficiency PMID1\\tHaploinsufficiency PMID2\\tHaploinsufficiency PMID3\\tTriplosensitivity Score\\tTriplosensitivity Description\\tTriplosensitivity PMID1\\tTriplosensitivity PMID2\\tTriplosensitivity PMID3\\tDate Last Evaluated\\tLoss phenotype OMIM ID\\tTriplosensitive phenotype OMIM ID\");\n            \n            writer.WriteLine(\"A4GALT\\t53947\\t22q13.2\\tchr22:43088121-43117307\\t30\\tGene associated with autosomal recessive phenotype\\t\\t\\t\\t0\\tNo evidence available\\t\\t\\t\\t2014-12-11\\t111400\\t\");\n            writer.WriteLine(\"AAGAB\\t79719\\t15q23\\tchr15:67493013-67547536\\t3\\tSufficient evidence for dosage pathogenicity\\t23064416\\t23000146\\t\\t0\\tNo evidence available\\t\\t\\t\\t2013-02-28\\t148600\\t\");\n            writer.WriteLine(\"AARS\\t16\\t16q22.1\\tchr16:70286297-70323412\\t0\\tNo evidence available\\t\\t\\t\\t0\\tNo evidence available\\t\\t\\t\\t2018-01-11\\t\\t\");\n            writer.WriteLine(\"AARS2\\t57505\\t6p21.1\\tchr6:44266463-44281063\\t30\\tGene associated with autosomal recessive phenotype\\t\\t\\t\\tNot yet evaluated\\tNot yet evaluated\\t\\t\\t\\t2016-08-22\\t\\t\");\n            writer.WriteLine(\"RSPH1\\t89765\\t21q22.3\\tchr21:42472486-42496246\\t30\\tGene associated with autosomal recessive phenotype\\t\\t\\t\\t\\t\\t\\tNot yet evaluated\\tNot yet evaluated\\t\\t\\t\\t\\t\\t\\t2016-08-22\\t615481\\t\");\n\n            writer.Flush();\n\n            stream.Position = 0;\n            return stream;\n        }\n\n        [Fact]\n        public void StandardParsing()\n        {\n            using (var dbVarReader = new DosageSensitivityParser(GetStream()))\n            {\n                var items = dbVarReader.GetItems();\n\n                Assert.Equal(5, items.Count);\n                Assert.Equal(\"{\\\"haploinsufficiency\\\":\\\"gene associated with autosomal recessive phenotype\\\",\\\"triplosensitivity\\\":\\\"no evidence to suggest that dosage sensitivity is associated with clinical phenotype\\\"}\", items[\"A4GALT\"][0].GetJsonString());\n                Assert.Equal(\"{\\\"haploinsufficiency\\\":\\\"sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype\\\",\\\"triplosensitivity\\\":\\\"no evidence to suggest that dosage sensitivity is associated with clinical phenotype\\\"}\", items[\"AAGAB\"][0].GetJsonString());\n                Assert.Equal(\"{\\\"haploinsufficiency\\\":\\\"no evidence to suggest that dosage sensitivity is associated with clinical phenotype\\\",\\\"triplosensitivity\\\":\\\"no evidence to suggest that dosage sensitivity is associated with clinical phenotype\\\"}\", items[\"AARS\"][0].GetJsonString());\n                Assert.Equal(\"{\\\"haploinsufficiency\\\":\\\"gene associated with autosomal recessive phenotype\\\",\\\"triplosensitivity\\\":\\\"Not yet evaluated\\\"}\", items[\"AARS2\"][0].GetJsonString());\n                Assert.Equal(\"{\\\"haploinsufficiency\\\":\\\"gene associated with autosomal recessive phenotype\\\",\\\"triplosensitivity\\\":\\\"Not yet evaluated\\\"}\", items[\"RSPH1\"][0].GetJsonString());\n            }\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/FusionCatcher/FusionCatcherDataSourceTests.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.IO;\nusing System.Text;\nusing SAUtils.FusionCatcher;\nusing VariantAnnotation.GeneFusions.SA;\nusing VariantAnnotation.GeneFusions.Utilities;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.FusionCatcher\n{\n    public sealed class FusionCatcherDataSourceTests\n    {\n        [Fact]\n        public void Parse_ExpectedResults()\n        {\n            var geneKeyToFusion = new Dictionary<ulong, GeneFusionSourceBuilder>();\n            var knownEnsemblGenes = new HashSet<string>\n            {\n                \"ENSG00000035499\",\n                \"ENSG00000155959\"\n            };\n\n            using var ms = new MemoryStream();\n            using (var writer = new StreamWriter(ms, Encoding.UTF8, 1024, true))\n            {\n                writer.WriteLine(\"ENSG00000006210\\tENSG00000102962\");\n                writer.WriteLine(\"ENSG00000006652\\tENSG00000181016\");\n                writer.WriteLine(\"ENSG00000014138\\tENSG00000149798\");\n                writer.WriteLine(\"ENSG00000026297\\tENSG00000071242\");\n                writer.WriteLine(\"ENSG00000035499\\tENSG00000155959\");\n                writer.WriteLine(\"ENSG00000055211\\tENSG00000131013\");\n                writer.WriteLine(\"ENSG00000055332\\tENSG00000179915\");\n                writer.WriteLine(\"ENSG00000062485\\tENSG00000257727\");\n                writer.WriteLine(\"ENSG00000065978\\tENSG00000166501\");\n                writer.WriteLine(\"ENSG00000066044\\tENSG00000104980\");\n            }\n\n            ms.Position = 0;\n\n            FusionCatcherDataSource.Parse(ms, GeneFusionSource.OneK_Genomes_Project, CollectionType.Germline, geneKeyToFusion, knownEnsemblGenes);\n            Assert.Single(geneKeyToFusion);\n\n            ulong fusionKey = GeneFusionKey.Create(GeneFusionKey.CreateGeneKey(\"ENSG00000035499\"), GeneFusionKey.CreateGeneKey(\"ENSG00000155959\"));\n\n            bool hasEntry = geneKeyToFusion.TryGetValue(fusionKey, out GeneFusionSourceBuilder actualBuilder);\n            Assert.True(hasEntry);\n            Assert.False(actualBuilder.IsPseudogenePair);\n            Assert.False(actualBuilder.IsParalogPair);\n            Assert.False(actualBuilder.IsReadthrough);\n            Assert.Single(actualBuilder.GermlineSources);\n            Assert.Empty(actualBuilder.SomaticSources);\n            Assert.Equal(GeneFusionSource.OneK_Genomes_Project, actualBuilder.GermlineSources[0]);\n        }\n\n        [Fact]\n        public void Parse_IncorrectFileFormat_ThrowException()\n        {\n            var geneKeyToFusion   = new Dictionary<ulong, GeneFusionSourceBuilder>();\n            var knownEnsemblGenes = new HashSet<string>();\n\n            using var ms = new MemoryStream();\n            using (var writer = new StreamWriter(ms, Encoding.UTF8, 1024, true))\n            {\n                writer.WriteLine(\"ENSG00000006210\\tENSG00000102962\\tENSG00000181016\");\n            }\n\n            ms.Position = 0;\n\n            Assert.Throws<InvalidDataException>(delegate\n            {\n                FusionCatcherDataSource.Parse(ms, GeneFusionSource.OneK_Genomes_Project, CollectionType.Germline, geneKeyToFusion, knownEnsemblGenes);\n            });\n        }\n        \n        [Fact]\n        public void Parse_MultipleCollections_ExpectedResults()\n        {\n            var geneKeyToFusion = new Dictionary<ulong, GeneFusionSourceBuilder>();\n            var knownEnsemblGenes = new HashSet<string>\n            {\n                \"ENSG00000035499\",\n                \"ENSG00000155959\"\n            };\n\n            using var ms = new MemoryStream();\n            AddData(ms);\n            FusionCatcherDataSource.Parse(ms, GeneFusionSource.Bao_gliomas, CollectionType.Somatic, geneKeyToFusion, knownEnsemblGenes);\n            \n            using var ms2 = new MemoryStream();\n            AddData(ms2);\n            FusionCatcherDataSource.Parse(ms2, GeneFusionSource.Readthrough, CollectionType.Relationships, geneKeyToFusion, knownEnsemblGenes);\n            \n            Assert.Single(geneKeyToFusion);\n\n            ulong fusionKey = GeneFusionKey.Create(GeneFusionKey.CreateGeneKey(\"ENSG00000035499\"), GeneFusionKey.CreateGeneKey(\"ENSG00000155959\"));\n\n            bool hasEntry = geneKeyToFusion.TryGetValue(fusionKey, out GeneFusionSourceBuilder actualBuilder);\n            Assert.True(hasEntry);\n            Assert.False(actualBuilder.IsPseudogenePair);\n            Assert.False(actualBuilder.IsParalogPair);\n            Assert.True(actualBuilder.IsReadthrough);\n            Assert.Empty(actualBuilder.GermlineSources);\n            Assert.Single(actualBuilder.SomaticSources);\n        }\n\n        private static void AddData(MemoryStream ms)\n        {\n            using (var writer = new StreamWriter(ms, Encoding.UTF8, 1024, true))\n            {\n                writer.WriteLine(\"ENSG00000035499\\tENSG00000155959\");\n            }\n\n            ms.Position = 0;\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/FusionCatcher/GeneFusionSourceWriterTests.cs",
    "content": "﻿using System;\nusing System.IO;\nusing Genome;\nusing SAUtils.FusionCatcher;\nusing VariantAnnotation.GeneFusions.IO;\nusing VariantAnnotation.GeneFusions.SA;\nusing VariantAnnotation.Interface.Providers;\nusing VariantAnnotation.Providers;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.FusionCatcher\n{\n    public sealed class GeneFusionSourceWriterTests\n    {\n        [Fact]\n        public void GeneFusionSourceWriter_ExpectedResults()\n        {\n            (uint[] expectedOncogeneKeys, GeneFusionSourceCollection[] expectedIndex, GeneFusionIndexEntry[] expectedIndexEntries) =\n                GetKeyToGeneFusion();\n\n            IDataSourceVersion expectedVersion = new DataSourceVersion(\"FusionCatcher\", \"1.33\", DateTime.Now.Ticks, \"gene fusions\");\n            const string       expectedJsonKey = \"fusionCatcher\";\n\n            using var ms = new MemoryStream();\n            using (var writer = new GeneFusionSourceWriter(ms, expectedJsonKey, expectedVersion, true))\n            {\n                writer.Write(expectedOncogeneKeys, expectedIndex, expectedIndexEntries);\n            }\n\n            ms.Position = 0;\n\n            uint[]                       actualOncogeneKeys;\n            GeneFusionSourceCollection[] actualIndex;\n            GeneFusionIndexEntry[]       actualIndexEntries;\n            IDataSourceVersion           actualVersion;\n            string                       actualJsonKey;\n            GenomeAssembly               actualAssembly;\n\n            using (var reader = new GeneFusionSourceReader(ms))\n            {\n                reader.LoadAnnotations();\n                actualOncogeneKeys = reader.OncogeneKeys;\n                actualIndex        = reader.Index;\n                actualIndexEntries = reader.IndexEntries;\n                actualVersion      = reader.Version;\n                actualJsonKey      = reader.JsonKey;\n                actualAssembly     = reader.Assembly;\n            }\n\n            Assert.Equal(expectedVersion,             actualVersion, new DataSourceVersionComparer());\n            Assert.Equal(expectedJsonKey,             actualJsonKey);\n            Assert.Equal(expectedOncogeneKeys,        actualOncogeneKeys);\n            Assert.Equal(expectedIndex.Length,        actualIndex.Length);\n            Assert.Equal(expectedIndex,               actualIndex);\n            Assert.Equal(expectedIndexEntries.Length, actualIndexEntries.Length);\n            Assert.Equal(expectedIndexEntries,        actualIndexEntries);\n            Assert.Equal(GenomeAssembly.Unknown,      actualAssembly);\n        }\n\n        internal static (uint[] OncogeneKeys, GeneFusionSourceCollection[] Index, GeneFusionIndexEntry[] IndexEntries) GetKeyToGeneFusion()\n        {\n            uint[] oncogeneKeys = {123};\n            var    index        = new GeneFusionSourceCollection[3];\n\n            var fusionsWithBothSources = new GeneFusionSourceBuilder {IsParalogPair = true};\n            fusionsWithBothSources.GermlineSources.Add(GeneFusionSource.OneK_Genomes_Project);\n            fusionsWithBothSources.GermlineSources.Add(GeneFusionSource.Healthy_strong_support);\n            fusionsWithBothSources.GermlineSources.Add(GeneFusionSource.Illumina_BodyMap2);\n            fusionsWithBothSources.SomaticSources.Add(GeneFusionSource.Alaei_Mahabadi_18_Cancers);\n            fusionsWithBothSources.SomaticSources.Add(GeneFusionSource.CCLE);\n            index[0] = fusionsWithBothSources.Create();\n\n            var germlineFusions = new GeneFusionSourceBuilder {IsPseudogenePair = true, IsReadthrough = true};\n            germlineFusions.GermlineSources.Add(GeneFusionSource.CACG);\n            germlineFusions.GermlineSources.Add(GeneFusionSource.ConjoinG);\n            germlineFusions.GermlineSources.Add(GeneFusionSource.Healthy_prefrontal_cortex);\n            germlineFusions.GermlineSources.Add(GeneFusionSource.Duplicated_Genes_Database);\n            index[1] = germlineFusions.Create();\n\n            var somaticFusions = new GeneFusionSourceBuilder();\n            somaticFusions.SomaticSources.Add(GeneFusionSource.CCLE_Vellichirammal);\n            somaticFusions.SomaticSources.Add(GeneFusionSource.Cancer_Genome_Project);\n            index[2] = somaticFusions.Create();\n\n            var indexEntries = new GeneFusionIndexEntry[]\n            {\n                new(1000, 0),\n                new(2000, 1),\n                new(3000, 2)\n            };\n\n            return (oncogeneKeys, index, indexEntries);\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/FusionCatcher/IndexBuilderTests.cs",
    "content": "﻿using System.Collections.Generic;\nusing SAUtils.FusionCatcher;\nusing VariantAnnotation.GeneFusions.IO;\nusing VariantAnnotation.GeneFusions.SA;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.FusionCatcher\n{\n    public sealed class IndexBuilderTests\n    {\n        [Fact]\n        public void Convert_ExpectedResults()\n        {\n            var expectedSourceCollection = new GeneFusionSourceCollection(false, true, false,\n                new[] {GeneFusionSource.OneK_Genomes_Project, GeneFusionSource.Healthy}, new[] {GeneFusionSource.Alaei_Mahabadi_18_Cancers});\n\n            var expectedSourceCollection2 = new GeneFusionSourceCollection(false, true, false, null, null);\n\n            var expectedIndexEntries = new GeneFusionIndexEntry[]\n            {\n                new(1000, 0),\n                new(2000, 0),\n                new(3000, 0),\n                new(4000, 1),\n            };\n\n            Dictionary<ulong, GeneFusionSourceBuilder> geneKeyToSourceBuilder = GetGeneKeyToSourceBuilder();\n\n            (GeneFusionSourceCollection[] actualIndex, GeneFusionIndexEntry[] actualIndexEntries) = IndexBuilder.Convert(geneKeyToSourceBuilder);\n\n            Assert.Equal(2,                         actualIndex.Length);\n            Assert.Equal(expectedSourceCollection,  actualIndex[0]); // most common entry first\n            Assert.Equal(expectedSourceCollection2, actualIndex[1]);\n\n            Assert.Equal(4,                    actualIndexEntries.Length);\n            Assert.Equal(expectedIndexEntries, actualIndexEntries);\n        }\n\n        private static Dictionary<ulong, GeneFusionSourceBuilder> GetGeneKeyToSourceBuilder()\n        {\n            var builder = new GeneFusionSourceBuilder\n            {\n                IsParalogPair = true,\n                GermlineSources = {GeneFusionSource.OneK_Genomes_Project, GeneFusionSource.Healthy},\n                SomaticSources  = {GeneFusionSource.Alaei_Mahabadi_18_Cancers}\n            };\n            \n            var builder2 = new GeneFusionSourceBuilder\n            {\n                IsParalogPair   = true,\n                GermlineSources = {GeneFusionSource.OneK_Genomes_Project, GeneFusionSource.Healthy},\n                SomaticSources  = {GeneFusionSource.Alaei_Mahabadi_18_Cancers}\n            };\n            \n            var builder3 = new GeneFusionSourceBuilder\n            {\n                IsParalogPair   = true,\n                GermlineSources = {GeneFusionSource.OneK_Genomes_Project, GeneFusionSource.Healthy},\n                SomaticSources  = {GeneFusionSource.Alaei_Mahabadi_18_Cancers}\n            };\n            \n            var builder4 = new GeneFusionSourceBuilder\n            {\n                IsParalogPair = true\n            };\n\n            return new Dictionary<ulong, GeneFusionSourceBuilder>\n            {\n                [1000] = builder,\n                [2000] = builder2,\n                [3000] = builder3,\n                [4000] = builder4\n            };\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/GERP/GerpParserTests.cs",
    "content": "using System.IO;\nusing System.Linq;\nusing SAUtils.GenericScore;\nusing SAUtils.GenericScore.GenericScoreParser;\nusing UnitTests.TestUtilities;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.GERP\n{\n    public sealed class GerpParserTests\n    {\n        private static Stream GetGerpWigStream()\n        {\n            var stream = new MemoryStream();\n            var writer = new StreamWriter(stream);\n\n            writer.WriteLine(\"#bedGraph section 1:12646-13697\\n\" +\n                             \"1\\t12646\\t12647\\t0.298\\n\"          +\n                             \"1\\t12647\\t12648\\t2.63\\n\"           +\n                             \"1\\t12648\\t12649\\t1.87\\n\"           +\n                             \"1\\t12649\\t12650\\t0.252\\n\"          +\n                             \"1\\t12650\\t12651\\t-2.06\\n\"          +\n                             \"1\\t12651\\t12652\\t2.61\\n\"           +\n                             \"1\\t12652\\t12653\\t3.97\\n\"           +\n                             \"1\\t12653\\t12654\\t4.9\\n\"            +\n                             \"1\\t12654\\t12655\\t1.98\\n\"           +\n                             \"1\\t12655\\t12656\\t4.72\");\n\n            writer.Flush();\n\n            stream.Position = 0;\n            return stream;\n        }\n\n        private static Stream GetGerpTsvStream()\n        {\n            var stream = new MemoryStream();\n            var writer = new StreamWriter(stream);\n\n            writer.WriteLine(\"#chrom\\tpos\\tGERP\\n\" +\n                             \"1\\t10000\\t0\\n\"       +\n                             \"1\\t12596\\t-0.159\\n\"  +\n                             \"1\\t12597\\t0.848\\n\"   +\n                             \"1\\t12598\\t0.848\\n\"   +\n                             \"1\\t12599\\t-1.13\\n\"   +\n                             \"1\\t12600\\t-0.649\\n\"  +\n                             \"1\\t12601\\t0.698\\n\"   +\n                             \"1\\t12602\\t-0.194\\n\"  +\n                             \"1\\t12603\\t0.848\\n\"   +\n                             \"1\\t12604\\t-0.479\\n\"  +\n                             \"1\\t12605\\t0.848\");\n\n            writer.Flush();\n\n            stream.Position = 0;\n            return stream;\n        }\n\n        [Fact]\n        public void ReadWigItems()\n        {\n            var nucleotides = new[] {\"N\"};\n\n            var parserSettings = new ParserSettings(\n                new ColumnIndex(0, 2, null, null, 3, null),\n                nucleotides,\n                GenericScoreParser.NonConflictingScore\n            );\n\n            using (var streamReader = new StreamReader(GetGerpWigStream()))\n            using (var scoreParser = new GenericScoreParser(parserSettings, streamReader, ChromosomeUtilities.RefNameToChromosome))\n            {\n                GenericScoreItem[] items = scoreParser.GetItems().ToArray();\n                Assert.Equal(10, items.Length);\n            }\n        }\n\n        [Fact]\n        public void ReadTsvItems()\n        {\n            var nucleotides = new[] {\"N\"};\n\n            var parserSettings = new ParserSettings(\n                new ColumnIndex(0, 1, null, null, 2, null),\n                nucleotides,\n                GenericScoreParser.NonConflictingScore\n            );\n\n            using (var streamReader = new StreamReader(GetGerpTsvStream()))\n            using (var scoreParser = new GenericScoreParser(parserSettings, streamReader, ChromosomeUtilities.RefNameToChromosome))\n            {\n                GenericScoreItem[] items = scoreParser.GetItems().ToArray();\n                Assert.Equal(11, items.Length);\n            }\n        }\n        \n        [Fact]\n        public void TestScientificNotationScore()\n        {\n            var writer = new StreamWriter(new MemoryStream());\n            writer.WriteLine(\"#chr\\tpos\\tscore\");\n            writer.WriteLine(\"21\\t21757144\\t-2.57\");\n            writer.WriteLine(\"21\\t21757145\\t3.7e-5\");\n            writer.Flush();\n            writer.BaseStream.Position = 0;\n\n            var parserSettings = new ParserSettings(\n                new ColumnIndex(0, 1, null, null, 2, null),\n                new[] {\"N\"},\n                GenericScoreParser.NonConflictingScore\n            );\n            using (var streamReader = new StreamReader(writer.BaseStream))\n            using (var reader = new GenericScoreParser(parserSettings, streamReader, ChromosomeUtilities.RefNameToChromosome))\n            {\n                GenericScoreItem[] genericScoreItems = reader.GetItems().ToArray();\n                Assert.Equal(2,        genericScoreItems.Length);\n                Assert.Equal(-2.57,      genericScoreItems[0].Score);\n                Assert.Equal(0.000037, genericScoreItems[1].Score);\n\n            }\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/GERP/GerpReaderTests.cs",
    "content": "using System.IO;\nusing VariantAnnotation.GenericScore;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.GERP;\n\npublic sealed class GerpReaderTests\n{\n    /// <summary>\n    /// This test is used to test backward compatibility with reader and writer.\n    /// We do use schema versions to keep them in sync, but if one forgets to update\n    /// the schema version, then the reader will fail.\n    /// There are other tests that consider writing and reading in the same loop,\n    /// however, in that case, a new code tests the writer and reader.\n    /// Contrasting with this case, it will test the backward compatibility of the reader\n    /// in case the reader code has a breaking change that prevents it from reading\n    /// the old score files.\n    /// </summary>\n    [Fact]\n    public void TestReadGerpData()\n    {\n        // This is the raw data from the files as byte array generated using wig file with one position\n        // 1\t12646\t12647\t0.298\n        var indexStreamRaw = new byte[]\n        {\n            137, 78, 73, 82, 13, 10, 26, 10, 100, 25, 1, 0, 202, 250, 153, 145, 3, 135, 195, 225, 240, 2, 4, 71, 101, 114, 112, 8, 49, 49, 49, 49, 49,\n            49, 49, 49, 128, 128, 188, 209, 129, 179, 218, 238, 4, 59, 80, 97, 116, 104, 111, 103, 101, 110, 105, 99, 105, 116, 121, 32, 115, 99, 111,\n            114, 101, 115, 32, 111, 102, 32, 109, 105, 115, 115, 101, 110, 115, 101, 32, 118, 97, 114, 105, 97, 110, 116, 115, 32, 112, 114, 101, 100,\n            105, 99, 116, 101, 100, 32, 98, 121, 32, 71, 101, 114, 112, 22, 1, 0, 1, 231, 98, 21, 83, 1, 0, 1, 0, 0, 223, 79, 141, 151, 110, 18,\n            211, 63, 4, 103, 101, 114, 112, 5, 115, 99, 111, 114, 101, 1, 1, 78, 192, 132, 61\n        };\n\n        var dataStreamRaw = new byte[]\n        {\n            137, 78, 73, 82, 13, 10, 26, 10, 112, 23, 1, 0, 202, 250, 153, 145, 3, 135, 195, 225, 240, 40, 181, 47, 253, 160, 128, 132, 30, 0, 92, 0,\n            0, 24, 0, 0, 255, 1, 0, 250, 255, 57, 24, 2, 2, 0, 16, 255, 2, 0, 16, 255, 2, 0, 16, 255, 2, 0, 16, 255, 2, 0, 16, 255, 2, 0, 16, 255, 2,\n            0, 16, 255, 2, 0, 16, 255, 2, 0, 16, 255, 2, 0, 16, 255, 2, 0, 16, 255, 2, 0, 16, 255, 2, 0, 16, 255, 2, 0, 16, 255, 3, 36, 4, 255, 78,\n            73, 82, 255\n        };\n\n        using (var dataStream = new MemoryStream(dataStreamRaw))\n        using (var indexStream = new MemoryStream(indexStreamRaw))\n        {\n            var scoreReader = ScoreReader.Read(dataStream, indexStream);\n            Assert.Equal(0.298, scoreReader.GetScore(0, 12647, \"A\"));\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/GeneAnnotationsTest.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing SAUtils;\r\nusing SAUtils.DataStructures;\r\nusing SAUtils.Omim;\r\nusing VariantAnnotation.Interface.SA;\r\nusing VariantAnnotation.NSA;\r\nusing VariantAnnotation.Providers;\r\nusing VariantAnnotation.SA;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.SAUtils\r\n{\r\n    public sealed class GeneAnnotationsTest\r\n    {\r\n        private static Dictionary<string, List<ISuppGeneItem>> GetGeneAnnotations()\r\n        {\r\n            var omimJsonSchema = OmimSchema.Get();\r\n            return new Dictionary<string, List<ISuppGeneItem>>\r\n            {\r\n                { \"gene1\", new List<ISuppGeneItem>\r\n                    {\r\n                        new OmimItem(\"gene1\", \"gene name 1 (\\'minibrain\\', Drosophila, homolog of)\", \"describing gene 1\\n\\\"some citation\\\"\", 123,\r\n                            new List<OmimItem.Phenotype>\r\n                            {\r\n                                new OmimItem.Phenotype(1, \"disease 1\", \"This is disease 1\", OmimItem.Mapping.mapping_of_the_wildtype_gene, new [] {OmimItem.Comment.unconfirmed_or_possibly_spurious_mapping}, new HashSet<string> {\"autosomal recessive\"}, omimJsonSchema.GetSubSchema(\"phenotypes\"))\r\n                            }, omimJsonSchema) \r\n                    }\r\n                },\r\n                {\r\n                    \"gene2\", new List<ISuppGeneItem>\r\n                    {\r\n                        new OmimItem(\"gene2\", \"gene name 2\",\"\", 124,\r\n                            new List<OmimItem.Phenotype>\r\n                            {\r\n                                new OmimItem.Phenotype( 2, \"disease 2\", \"COVID-19\", OmimItem.Mapping.chromosome_deletion_or_duplication_syndrome, new [] {OmimItem.Comment.nondiseases}, new HashSet<string> {\"whatever\", \"never-ever\"}, omimJsonSchema.GetSubSchema(\"phenotypes\"))\r\n                            }, omimJsonSchema)\r\n                    }\r\n                }\r\n\r\n            };\r\n        }\r\n\r\n        [Fact]\r\n        public void ReadBackGeneAnnotations()\r\n        {\r\n            NgaReader reader;\r\n            var version          = new DataSourceVersion(\"source1\", \"v1\", DateTime.Now.Ticks);\r\n            const string jsonKey = \"mimo\";\r\n            const bool isArray   = true;\r\n\r\n            using (var ms = new MemoryStream())\r\n            {\r\n                using (var writer = new NgaWriter(ms, version, jsonKey, SaCommon.SchemaVersion, isArray, true))\r\n                {\r\n                    writer.Write(GetGeneAnnotations());\r\n                }\r\n\r\n                ms.Position = 0;\r\n                reader = NgaReader.Read(ms);\r\n            }\r\n\r\n            Assert.NotNull(reader);\r\n            Assert.Null(reader.GetAnnotation(\"gene3\"));\r\n            Assert.Equal(\"[{\\\"mimNumber\\\":123,\\\"geneName\\\":\\\"gene name 1 ('minibrain', Drosophila, homolog of)\\\",\\\"description\\\":\\\"describing gene 1\\\\n\\\\\\\"some citation\\\\\\\"\\\",\\\"phenotypes\\\":[{\\\"phenotype\\\":\\\"disease 1\\\",\\\"description\\\":\\\"This is disease 1\\\",\\\"mapping\\\":\\\"mapping of the wildtype gene\\\",\\\"inheritances\\\":[\\\"autosomal recessive\\\"],\\\"comments\\\":[\\\"unconfirmed or possibly spurious mapping\\\"]}]}]\", reader.GetAnnotation(\"gene1\"));\r\n            Assert.Equal(\"[{\\\"mimNumber\\\":124,\\\"geneName\\\":\\\"gene name 2\\\",\\\"phenotypes\\\":[{\\\"phenotype\\\":\\\"disease 2\\\",\\\"description\\\":\\\"COVID-19\\\",\\\"mapping\\\":\\\"chromosome deletion or duplication syndrome\\\",\\\"inheritances\\\":[\\\"whatever\\\",\\\"never-ever\\\"],\\\"comments\\\":[\\\"nondiseases\\\"]}]}]\", reader.GetAnnotation(\"gene2\"));\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/SAUtils/GenericScoreParserTests/GenericScoreParserTests.cs",
    "content": "using System.IO;\nusing System.Linq;\nusing ErrorHandling.Exceptions;\nusing SAUtils.GenericScore;\nusing SAUtils.GenericScore.GenericScoreParser;\nusing UnitTests.TestUtilities;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.GenericScoreParserTests\n{\n    public sealed class GenericScoreParserTests\n    {\n        private ParserSettings _parserSettings = new(\n            new ColumnIndex(0, 2, 3, 4, 5, null),\n            new[] {\"A\", \"C\", \"G\", \"T\"},\n            GenericScoreParser.MaxRepresentativeScores\n        );\n\n        [Fact]\n        public void TestParserNonNumericValues()\n        {\n            var writer = new StreamWriter(new MemoryStream());\n            writer.WriteLine(\"#chr\\tpos\\tref\\talt\\tscore\");\n            writer.WriteLine(\"1\\t10003\\t10003\\tA\\tG\\taskdlj\");\n            writer.WriteLine(\"asd\\t10003\\t10003\\tA\\tG\\taskdlj\");\n            writer.Flush();\n            writer.BaseStream.Position = 0;\n\n            using (var streamReader = new StreamReader(writer.BaseStream))\n            using (var reader = new GenericScoreParser(_parserSettings, streamReader, ChromosomeUtilities.RefNameToChromosome))\n            {\n                GenericScoreItem[] genericScoreItems = reader.GetItems().ToArray();\n                Assert.Empty(genericScoreItems);\n            }\n        }\n\n        [Fact]\n        public void TestMaxScore()\n        {\n            var writer = new StreamWriter(new MemoryStream());\n            writer.WriteLine(\"#chr\\tpos\\tref\\talt\\tscore\");\n            writer.WriteLine(\"1\\t10003\\t10003\\tA\\tG\\t0.1\");\n            writer.WriteLine(\"1\\t10003\\t10003\\tA\\tG\\t0.5\");\n            writer.Flush();\n\n            writer.BaseStream.Position = 0;\n            _parserSettings = new ParserSettings(\n                new ColumnIndex(0, 2, 3, 4, 5, null),\n                new[] {\"A\", \"C\", \"G\", \"T\"},\n                GenericScoreParser.MaxRepresentativeScores\n            );\n\n            using (var streamReader = new StreamReader(writer.BaseStream))\n            using (var reader = new GenericScoreParser(_parserSettings, streamReader, ChromosomeUtilities.RefNameToChromosome))\n            {\n                GenericScoreItem[] genericScoreItems = reader.GetItems().ToArray();\n                Assert.Single(genericScoreItems);\n                Assert.Equal(0.5, genericScoreItems[0].Score);\n            }\n        }\n\n        [Fact]\n        public void TestMinScore()\n        {\n            var writer = new StreamWriter(new MemoryStream());\n            writer.WriteLine(\"#chr\\tpos\\tref\\talt\\tscore\");\n            writer.WriteLine(\"1\\t10003\\t10003\\tA\\tG\\t0.1\");\n            writer.WriteLine(\"1\\t10003\\t10003\\tA\\tG\\t0.5\");\n            writer.Flush();\n\n            writer.BaseStream.Position = 0;\n            _parserSettings = new ParserSettings(\n                new ColumnIndex(0, 2, 3, 4, 5, null),\n                new[] {\"A\", \"C\", \"G\", \"T\"},\n                GenericScoreParser.MinRepresentativeScores\n            );\n            using (var streamReader = new StreamReader(writer.BaseStream))\n            using (var reader = new GenericScoreParser(_parserSettings, streamReader, ChromosomeUtilities.RefNameToChromosome))\n            {\n                GenericScoreItem[] genericScoreItems = reader.GetItems().ToArray();\n                Assert.Single(genericScoreItems);\n                Assert.Equal(0.1, genericScoreItems[0].Score);\n            }\n        }\n\n        [Fact]\n        public void TestNonConflictingScores()\n        {\n            var writer = new StreamWriter(new MemoryStream());\n            writer.WriteLine(\"#chr\\tpos\\tref\\talt\\tscore\");\n            writer.WriteLine(\"1\\t10003\\t10003\\tA\\tG\\t0.1\");\n            writer.WriteLine(\"1\\t10003\\t10003\\tA\\tG\\t0.5\");\n            writer.Flush();\n\n            writer.BaseStream.Position = 0;\n            _parserSettings = new ParserSettings(\n                new ColumnIndex(0, 2, 3, 4, 5, null),\n                new[] {\"A\", \"C\", \"G\", \"T\"},\n                GenericScoreParser.NonConflictingScore\n            );\n            using (var streamReader = new StreamReader(writer.BaseStream))\n            using (var reader = new GenericScoreParser(_parserSettings, streamReader, ChromosomeUtilities.RefNameToChromosome))\n            {\n                Assert.Throws<UserErrorException>(() => reader.GetItems().ToArray());\n            }\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/InputFileParsers/AlleleReaderTests.cs",
    "content": "﻿using System.IO;\nusing System.Linq;\nusing Genome;\nusing SAUtils.InputFileParsers;\nusing UnitTests.TestDataStructures;\nusing UnitTests.TestUtilities;\nusing Variants;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.InputFileParsers\n{\n    public sealed class AlleleReaderTests\n    {\n        private static Stream GetAncestralAlleleStream()\n        {\n            var stream = new MemoryStream();\n            var writer = new StreamWriter(stream);\n\n            writer.WriteLine(\"##AncestralAllele\");\n            writer.WriteLine(\"#CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\");\n            writer.WriteLine(\"1\\t13284\\trs548333521\\tG\\tA\\t100\\tPASS\\tAC=7;AF=0.00139776;AN=5008;NS=2504;DP=26384;EAS_AF=0.001;AMR_AF=0;AFR_AF=0.0045;EUR_AF=0;SAS_AF=0;AA=g|||;VT=SNP;EAS_AN=1008;EAS_AC=1;EUR_AN=1006;EUR_AC=0;AFR_AN=1322;AFR_AC=6;AMR_AN=694;AMR_AC=0;SAS_AN=978;SAS_AC=0\");\n            writer.WriteLine(\"1\\t13289\\trs568318295\\tC\\tT\\t100\\tPASS\\tAC=3;AF=0.000599042;AN=5008;NS=2504;DP=25361;EAS_AF=0.003;AMR_AF=0;AFR_AF=0;EUR_AF=0;SAS_AF=0;AA=c|||;VT=SNP;EAS_AN=1008;EAS_AC=3;EUR_AN=1006;EUR_AC=0;AFR_AN=1322;AFR_AC=0;AMR_AN=694;AMR_AC=0;SAS_AN=978;SAS_AC=0\");\n            writer.WriteLine(\"1\\t13313\\trs527952245\\tT\\tG\\t100\\tPASS\\tAC=1;AF=0.000199681;AN=5008;NS=2504;DP=20943;EAS_AF=0;AMR_AF=0;AFR_AF=0;EUR_AF=0.001;SAS_AF=0;AA=t|||;VT=SNP;EAS_AN=1008;EAS_AC=0;EUR_AN=1006;EUR_AC=1;AFR_AN=1322;AFR_AC=0;AMR_AN=694;AMR_AC=0;SAS_AN=978;SAS_AC=0\");\n\n            writer.Flush();\n\n            stream.Position = 0;\n            return stream;\n        }\n\n        [Fact]\n        public void GetItems_test()\n        {\n            var sequence = new SimpleSequence(new string('T', VariantUtils.MaxUpstreamLength) + \"G\" + new string('T', 13289 - 13284) + \"C\" + new string('T', 13313 - 13289) + \"T\", 13284 - 1 - VariantUtils.MaxUpstreamLength);\n\n            var seqProvider = new SimpleSequenceProvider(GenomeAssembly.GRCh37, sequence, ChromosomeUtilities.RefNameToChromosome);\n            var reader = new AncestralAlleleReader(new StreamReader(GetAncestralAlleleStream()), seqProvider);\n\n            var items = reader.GetItems().ToList();\n\n            Assert.Equal(3, items.Count);\n            Assert.Equal(\"\\\"g\\\"\", items[0].GetJsonString());\n        }\n\n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/InputFileParsers/ClinGenTests.cs",
    "content": "﻿using System.IO;\nusing System.Linq;\nusing SAUtils.InputFileParsers.ClinGen;\nusing UnitTests.TestUtilities;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.InputFileParsers\n{\n    public sealed class ClinGenTests\n    {\n        private static Stream GetStream()\n        {\n            var stream = new MemoryStream();\n            var writer = new StreamWriter(stream);\n\n            writer.WriteLine(\"nsv530705\\t1\\t564405\\t8597804\\t0\\t1\\tcopy_number_loss\\tpathogenic\\tFalse\\tDevelopmental delay AND/OR other significant developmental or morphological phenotypes\\t\");\n            writer.WriteLine(\"nsv530706\\t1\\t564424\\t3262790\\t0\\t1\\tcopy_number_loss\\tpathogenic\\tFalse\\tAbnormal facial shape,Abnormality of cardiac morphology,Global developmental delay,Muscular hypotonia\\tHP:0001252,HP:0001263,HP:0001627,HP:0001999,MedGen:CN001147,MedGen:CN001157,MedGen:CN001482,MedGen:CN001810\");\n            writer.WriteLine(\"nsv530300\\t1\\t728138\\t5066371\\t1\\t0\\tcopy_number_gain\\tpathogenic\\tFalse\\tAbnormality of cardiac morphology,Cleft palate,Global developmental delay\\tHP:0000175,HP:0001263,HP:0001627,MedGen:C2240378,MedGen:CN001157,MedGen:CN001482\");\n            writer.WriteLine(\"nsv530780\\t1\\t807685\\t2574042\\t1\\t1\\tcopy_number_variation\\tpathogenic\\tFalse\\tDevelopmental delay AND/OR other significant developmental or morphological phenotypes,Global developmental delay,Hirsutism,Obesity,Seizure,Short stature\\tHP:0001007,HP:0001250,HP:0001263,HP:0001513,HP:0004322,MedGen:C0019572,MedGen:C0349588,MedGen:C1959629,MedGen:C1963185,MedGen:CN001157\");\n            writer.Flush();\n\n            stream.Position = 0;\n            return stream;\n        }\n\n        [Fact]\n        public void GetItems()\n        {\n            using (var reader = new ClinGenReader(new StreamReader(GetStream()), ChromosomeUtilities.RefNameToChromosome))\n            {\n                var items = reader.GetItems().ToList();\n\n                Assert.Equal(4, items.Count);\n                Assert.Equal(\"\\\"chromosome\\\":\\\"1\\\",\\\"begin\\\":564405,\\\"end\\\":8597804,\\\"variantType\\\":\\\"copy_number_loss\\\",\\\"id\\\":\\\"nsv530705\\\",\\\"clinicalInterpretation\\\":\\\"pathogenic\\\",\\\"phenotypes\\\":[\\\"Developmental delay AND/OR other significant developmental or morphological phenotypes\\\"],\\\"observedLosses\\\":1\", items[0].GetJsonString());\n            }\n\n        }\n\n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/InputFileParsers/ClinVarXmlReaderTests.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Linq;\r\nusing Genome;\r\nusing IO;\r\nusing Moq;\r\nusing SAUtils.DataStructures;\r\nusing SAUtils.InputFileParsers.ClinVar;\r\nusing UnitTests.TestDataStructures;\r\nusing UnitTests.TestUtilities;\r\nusing VariantAnnotation.Interface.Providers;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.SAUtils.InputFileParsers\r\n{\r\n    public sealed class ClinVarXmlReaderTests\r\n    {\r\n        private static ISequenceProvider GetSequenceProvider(GenomeAssembly assembly, int start, string refSequence)\r\n        {\r\n            var seqProvider = new Mock<ISequenceProvider>();\r\n            seqProvider.Setup(x => x.RefNameToChromosome).Returns(ChromosomeUtilities.RefNameToChromosome);\r\n            seqProvider.Setup(x => x.Assembly).Returns(assembly);\r\n            seqProvider.Setup(x => x.Sequence).Returns(new SimpleSequence(refSequence, start - 1));\r\n            return seqProvider.Object;\r\n        }\r\n\r\n        [Fact]\r\n        public void BasicReadTest()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 41234419, \"A\");\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000077146.xml\")),Stream.Null, sequenceProvider);\r\n\r\n            var items = reader.GetRcvItems();\r\n            Assert.True(items.Any());\r\n\r\n            \r\n            var clinVarItem = items.First();\r\n            Assert.Equal(\"RCV000077146.3\", clinVarItem.Id);\r\n            Assert.Equal(\"17\", clinVarItem.Chromosome.EnsemblName);\r\n            Assert.Equal(41234419, clinVarItem.Position);\r\n            Assert.Equal(\"A\", clinVarItem.RefAllele);\r\n            Assert.Equal(\"C\", clinVarItem.AltAllele);\r\n            Assert.Equal(\"2019-12-15\", new DateTime(clinVarItem.LastUpdatedDate).ToString(\"yyyy-MM-dd\"));\r\n            Assert.Equal(clinVarItem.AlleleOrigins, new List<string> { \"germline\" });\r\n            Assert.Equal(\"C2676676\", clinVarItem.MedGenIds.First());\r\n            Assert.Equal(\"145\", clinVarItem.OrphanetIds.First());\r\n            Assert.Equal(\"604370\", clinVarItem.OmimIds.First());\r\n            Assert.Equal(\"Breast-ovarian cancer, familial 1\", clinVarItem.Phenotypes.First());\r\n        }\r\n\r\n        [Fact]\r\n        public void RCV000001373_NoExtraOmimId()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 3209662, \"AGCAGACGGGCA\");\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000001373.xml\")), Stream.Null, sequenceProvider);\r\n            var clinVarItems = reader.GetRcvItems().ToArray();\r\n            Assert.Single(clinVarItems);\r\n\r\n            var clinVarItem = clinVarItems[0];\r\n            Assert.Equal(\"RCV000001373.3\", clinVarItem.Id);\r\n\r\n            var omimIds = clinVarItem.OmimIds;\r\n            Assert.Single(omimIds);\r\n            Assert.Equal(\"610206.0007\", omimIds.First());\r\n        }\r\n\r\n        [Fact]\r\n        public void RCV000435546_NotMissing()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 110221557, \"CGCGG\");\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000435546.xml\")), Stream.Null, sequenceProvider);\r\n            var clinVarItems = reader.GetRcvItems();\r\n            Assert.True(clinVarItems.Any());\r\n        }\r\n\r\n\r\n        [Fact]\r\n        public void MissingAltAllele()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 118165691, \"C\");\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000120902.xml\")), Stream.Null, sequenceProvider);\r\n\r\n            var items = reader.GetRcvItems();\r\n            Assert.True(items.Any());\r\n\r\n            foreach (var clinVarItem in items)\r\n            {\r\n                Assert.Equal(\"C\", clinVarItem.RefAllele);\r\n                Assert.Equal(\"G\", clinVarItem.AltAllele);\r\n            }\r\n        }\r\n\r\n        \r\n        [Fact]\r\n        public void NonEnglishChars()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 225592188, \"TAGAAGA\");\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000087262.xml\")), Stream.Null, sequenceProvider);\r\n\r\n            var items = reader.GetRcvItems();\r\n            Assert.True(items.Any());\r\n\r\n            foreach (var clinVarItem in items)\r\n            {\r\n                Assert.Equal(\"Pelger-Huët anomaly\", clinVarItem.Phenotypes.First());\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void WrongPosition()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 112064826, \"G\");\r\n\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000073701.xml\")), Stream.Null, sequenceProvider);\r\n\r\n            var items = reader.GetRcvItems();\r\n            Assert.True(items.Any());\r\n\r\n            foreach (var clinVarItem in items)\r\n            {\r\n                switch (clinVarItem.Position)\r\n                {\r\n                    case 112064826:\r\n                        Assert.Equal(\"G\", clinVarItem.RefAllele);\r\n                        Assert.Equal(\"C\", clinVarItem.AltAllele);\r\n                        break;\r\n                    default:\r\n                        throw new InvalidDataException($\"Unexpected clinvar item start point : {clinVarItem.Position}\");\r\n                }\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void PubmedTest1()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 10183453, \"AGCGCGCACGCAGCTCCGCCCC\");\r\n\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000152657.xml\")),Stream.Null, sequenceProvider);\r\n\r\n            var items = reader.GetRcvItems();\r\n            Assert.True(items.Any());\r\n\r\n            foreach (var clinVarItem in items)\r\n            {\r\n                Assert.Equal( new List<long> { 12114475, 18836774, 22357542, 24033266 }, clinVarItem.PubmedIds);\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void PubmedTest2()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 5247992, \"CAAAG\");\r\n\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000016673.xml\")), Stream.Null, sequenceProvider);\r\n\r\n            var items = reader.GetRcvItems();\r\n            Assert.True(items.Any());\r\n\r\n            foreach (var clinVarItem in items)\r\n            {\r\n                Assert.Equal(new List<long> { 6826539, 9113933, 9845707, 12000828, 12383672 }, clinVarItem.PubmedIds);\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void PubmedTest3()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 55259485, \"C\");\r\n\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000038438.xml\")), Stream.Null, sequenceProvider);\r\n\r\n            var items = reader.GetRcvItems();\r\n            Assert.True(items.Any());\r\n\r\n            foreach (var clinVarItem in items)\r\n            {\r\n                Assert.Equal( new List<long> { 17285735, 17877814, 22848293, 24033266 }, clinVarItem.PubmedIds);\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void PubmedTest4()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 43609944, \"GCTGT\");\r\n\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000021819.xml\")), Stream.Null, sequenceProvider);\r\n\r\n            var items = reader.GetRcvItems();\r\n            Assert.True(items.Any());\r\n\r\n            foreach (var clinVarItem in items)\r\n            {\r\n                Assert.Equal( new List<long> { 7595167, 8099202, 8612479 }, clinVarItem.PubmedIds);\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void PubmedTest5()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 88907409, \"A\");\r\n\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000000734.xml\")), Stream.Null, sequenceProvider);\r\n\r\n            var items = reader.GetRcvItems();\r\n            Assert.True(items.Any());\r\n\r\n            foreach (var clinVarItem in items)\r\n            {\r\n                Assert.Equal(\"699\", clinVarItem.VariationId);\r\n                Assert.Null(clinVarItem.PubmedIds);\r\n                Assert.Contains(\"\\\"variationId\\\":\\\"699\\\"\", clinVarItem.GetJsonString());\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void PubmedTest6()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 118165691, \"C\");\r\n\r\n            //extracting from SCV record\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000120902.xml\")), Stream.Null, sequenceProvider);\r\n\r\n            var items = reader.GetRcvItems();\r\n            Assert.True(items.Any());\r\n\r\n            foreach (var clinVarItem in items)\r\n            {\r\n                Assert.Equal(clinVarItem.PubmedIds, new List<long> { 24728327 });\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void PubmedTest7_comma_trimming()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 41258568, \"A\");\r\n\r\n            //extracting from SCV record\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000167792.xml\")), Stream.Null, sequenceProvider);\r\n\r\n            var items = reader.GetRcvItems();\r\n            Assert.True(items.Any());\r\n\r\n            foreach (var clinVarItem in items)\r\n            {\r\n                Assert.Equal(clinVarItem.PubmedIds, new List<long> { 23239986, 28492532, 30472649 });\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void MultiScvPubmed()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 15589551, \"AG\");\r\n\r\n            //extracting from SCV record\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000194003.xml\")) , Stream.Null, sequenceProvider);\r\n\r\n            var items = reader.GetRcvItems();\r\n            Assert.True(items.Any());\r\n\r\n            foreach (var clinVarItem in items)\r\n            {\r\n                Assert.Equal(clinVarItem.PubmedIds, new List<long> {25741868, 26092869});\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void NoClinVarItem_due_to_ref_mismatch()\r\n        {\r\n            var sequenceProvider =\r\n                GetSequenceProvider(GenomeAssembly.GRCh37, 90982267, \"A\");\r\n\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000000101.xml\")), Stream.Null, sequenceProvider);\r\n\r\n            Assert.False(reader.GetRcvItems().Any());\r\n        }\r\n\r\n        [Fact]\r\n        public void ClinVarForRef()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 31496350, \"C\");\r\n\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000124712.xml\")), Stream.Null, sequenceProvider);\r\n\r\n            var clinVarList = new List<ClinVarItem>();\r\n            foreach (var clinVarItem in reader.GetRcvItems())\r\n            {\r\n                clinVarList.Add(clinVarItem);\r\n                Assert.Equal(clinVarItem.RefAllele, clinVarItem.AltAllele);\r\n            }\r\n\r\n            Assert.Single(clinVarList);\r\n        }\r\n\r\n        [Fact]\r\n        public void MultiplePhenotypes()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 172659738, \"C\");\r\n\r\n            //no citations show up for this RCV in the website. But the XML has these pubmed ids under fields that we parse pubmed ids from\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000144179.xml\")), Stream.Null, sequenceProvider);\r\n\r\n            var items = reader.GetRcvItems();\r\n            Assert.True(items.Any());\r\n\r\n            foreach (var clinVarItem in items)\r\n            {\r\n                var expectedPhenotypes = new List<string> { \"Single ventricle\", \"small Atrial septal defect\" };\r\n                Assert.Equal(expectedPhenotypes, clinVarItem.Phenotypes);\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void MultipleOrigins()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 18671566, \"G\");\r\n            //no citations show up for this RCV in the website. But the XML has these pubmed ids under fields that we parse pubmed ids from\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000080071.xml\")), Stream.Null, sequenceProvider);\r\n\r\n            var items = reader.GetRcvItems();\r\n            Assert.True(items.Any());\r\n\r\n            foreach (var clinVarItem in items)\r\n            {\r\n                var expectedOrigins = new List<string> { \"germline\", \"maternal\", \"unknown\" };\r\n                Assert.Equal(expectedOrigins, clinVarItem.AlleleOrigins);\r\n            }\r\n        }\r\n\r\n\r\n        [Fact]\r\n        public void SkipGeneralCitations()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 67705958, \"G\");\r\n            //no citations show up for this RCV in the website. But the XML has these pubmed ids under fields that we parse pubmed ids from\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000003254.xml\")), Stream.Null, sequenceProvider);\r\n\r\n            foreach (var clinVarItem in reader.GetRcvItems())\r\n            {\r\n                Assert.Equal(clinVarItem.PubmedIds, new List<long>\r\n                {\r\n                    12023369,\r\n                    17068223,\r\n                    17447842,\r\n                    17587057,\r\n                    17786191,\r\n                    17804789,\r\n                    18438406,\r\n                    19122664,\r\n                    20228799\r\n                });\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void IndelTest()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 187122303, \"TCATACAGGTCATCGCT\");\r\n\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000032548.xml\")), Stream.Null, sequenceProvider);\r\n\r\n            var items = reader.GetRcvItems();\r\n            Assert.True(items.Any());\r\n\r\n            foreach (var clinVarItem in items)\r\n            {\r\n                Assert.Equal(\"RCV000032548.8\", clinVarItem.Id);\r\n\r\n                switch (clinVarItem.Id)\r\n                {\r\n                    case \"RCV000032548.8\":\r\n                        Assert.Equal(\"4\", clinVarItem.Chromosome.EnsemblName);\r\n                        Assert.Equal(187122303, clinVarItem.Position);\r\n                        Assert.Equal(17, clinVarItem.RefAllele.Length);\r\n                        Assert.Equal(\"GC\", clinVarItem.AltAllele);\r\n                        break;\r\n                }\r\n            }\r\n        }\r\n\r\n\r\n        [Fact]\r\n        [Trait(\"jira\", \"NIR-2034\")]\r\n        public void MultiScvPubmeds()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 116411990, \"C\");\r\n\r\n            //extracting from SCV record\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000203290.xml\")), Stream.Null, sequenceProvider);\r\n\r\n            var items = reader.GetRcvItems();\r\n            Assert.True(items.Any());\r\n\r\n            foreach (var clinVarItem in items)\r\n            {\r\n                Assert.Equal(clinVarItem.PubmedIds, new List<long> { 23806086, 24088041, 25736269 });\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        [Trait(\"jira\", \"NIR-2034\")]\r\n        public void MultipleAlleleOrigins()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 32890572, \"G\");\r\n\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000112977.xml\")), Stream.Null, sequenceProvider);\r\n\r\n            var items = reader.GetRcvItems();\r\n            Assert.True(items.Any());\r\n\r\n            foreach (var clinVarItem in items)\r\n            {\r\n                Assert.Equal(2, clinVarItem.AlleleOrigins.Count());\r\n                Assert.NotEqual(clinVarItem.AlleleOrigins.First(), clinVarItem.AlleleOrigins.Last());\r\n\r\n                foreach (var origin in clinVarItem.AlleleOrigins)\r\n                {\r\n                    Assert.True(origin == \"unknown\" || origin == \"germline\");\r\n                }\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        [Trait(\"jira\", \"NIR-2748\")]\r\n        public void Discard_entries_with_unknown_variant_type()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 66765160, \"CAG\");\r\n\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000485802.xml\")), Stream.Null, sequenceProvider);\r\n\r\n            Assert.False(reader.GetRcvItems().Any());\r\n        }\r\n\r\n        [Fact]\r\n        [Trait(\"jira\", \"NIR-2035\")]\r\n        public void EmptyRefAndAlt()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 31805881, \"G\");\r\n\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000083638.xml\")), Stream.Null, sequenceProvider);\r\n\r\n            Assert.Empty(reader.GetRcvItems());\r\n        }\r\n\r\n        [Fact]\r\n        [Trait(\"jira\", \"NIR-2036\")]\r\n        public void SkipMicrosattelite()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 87637894, \"CTG\");\r\n\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000005426.xml\")), Stream.Null, sequenceProvider);\r\n\r\n            Assert.Empty(reader.GetRcvItems());\r\n        }\r\n\r\n        [Fact]\r\n        public void SkipAlus()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 32893302, \"TAAA\");\r\n\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000724338.xml\")), Stream.Null, sequenceProvider);\r\n\r\n            Assert.Empty(reader.GetRcvItems());\r\n        }\r\n\r\n        [Fact]\r\n        [Trait(\"jira\", \"NIR-2072\")]\r\n        public void MissingClinvarInsertion()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 2337967, \"G\");\r\n\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000179026.xml\")), Stream.Null, sequenceProvider);\r\n\r\n            var items = reader.GetRcvItems();\r\n            Assert.True(items.Any());\r\n\r\n            foreach (var clinVarItem in items)\r\n            {\r\n                Assert.Equal(2337968, clinVarItem.Position);\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        [Trait(\"jira\", \"NIR-2072\")]\r\n        public void MissingClinvarInsertionShift()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 3751645, \"G\");\r\n\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000207071.xml\")), Stream.Null, sequenceProvider);\r\n\r\n            var items = reader.GetRcvItems();\r\n            Assert.True(items.Any());\r\n\r\n            foreach (var clinVarItem in items)\r\n            {\r\n                Assert.Equal(3751646, clinVarItem.Position);\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        [Trait(\"jira\", \"NIR-2072\")]\r\n        public void MissingClinvarInsertionShift2()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 9324412, \"C\");\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000017510.xml\")), Stream.Null, sequenceProvider);\r\n\r\n            var items = reader.GetRcvItems();\r\n            Assert.True(items.Any());\r\n\r\n            foreach (var clinVarItem in items)\r\n            {\r\n                Assert.Equal(9324413, clinVarItem.Position);\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        [Trait(\"jira\", \"NIR-2045\")]\r\n        public void AlternatePhenotype()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 42018227, \"GTC\");\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000032707.xml\")), Stream.Null, sequenceProvider);\r\n\r\n            var items = reader.GetRcvItems();\r\n            Assert.True(items.Any());\r\n\r\n            foreach (var clinVarItem in items)\r\n            {\r\n                Assert.NotNull(clinVarItem.Phenotypes);\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        [Trait(\"jira\", \"NIR-2072\")]\r\n        public void IupacBases()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh38, 32339320, \"C\");\r\n\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000113363.xml\")), Stream.Null, sequenceProvider);\r\n\r\n            var items = reader.GetRcvItems();\r\n            Assert.True(items.Any());\r\n\r\n            var altAlleles = new List<string>();\r\n            foreach (var clinVarItem in items)\r\n            {\r\n                altAlleles.Add(clinVarItem.AltAllele);\r\n                Assert.Equal(new[] {\"pathogenic\"}, clinVarItem.Significances);\r\n            }\r\n            \r\n            Assert.Equal(2, altAlleles.Count);\r\n        }\r\n\r\n        [Fact]\r\n        [Trait(\"jira\", \"NIR-2072\")]\r\n        public void OmitOmimFromAltPhenotypes()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 55529187, \"G\");\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000030349.xml\")), Stream.Null, sequenceProvider);\r\n\r\n            var items = reader.GetRcvItems();\r\n            Assert.True(items.Any());\r\n\r\n            foreach (var clinVarItem in items)\r\n            {\r\n                Assert.Single(clinVarItem.OmimIds);\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        [Trait(\"jira\", \"NIR-2072\")]\r\n        public void TrimSpaceFromOmimIds()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 129283520, \"A\");\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000373191.xml\")), Stream.Null, sequenceProvider);\r\n\r\n            var items = reader.GetRcvItems();\r\n            Assert.True(items.Any());\r\n\r\n            foreach (var clinVarItem in items)\r\n            {\r\n                Assert.Single(clinVarItem.OmimIds);\r\n                Assert.Equal(\"609060\", clinVarItem.OmimIds.FirstOrDefault());\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        [Trait(\"jira\", \"NIR-2099\")]\r\n        public void ClinvarInsertion()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 122318386, \"A\");\r\n\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000153339.xml\")), Stream.Null, sequenceProvider);\r\n            var items = reader.GetRcvItems();\r\n            Assert.True(items.Any());\r\n\r\n            foreach (var clinVarItem in items)\r\n            {\r\n                Assert.Equal(122318387, clinVarItem.Position);\r\n            }\r\n        }\r\n\r\n\r\n        [Fact]\r\n        public void Remove9DigitsPubmedId()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 534286, \"C\");\r\n\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000207504.xml\")), Stream.Null, sequenceProvider);\r\n            var items = reader.GetRcvItems();\r\n            Assert.True(items.Any());\r\n\r\n            foreach (var clinVarItem in items)\r\n            {\r\n                Assert.Equal(clinVarItem.PubmedIds, new List<long> { 16329078, 16372351, 19213030, 21438134, 25741868 });\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void CaptureGeneOmimId()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 3494833, \"A\");\r\n\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000235027.xml\")), Stream.Null, sequenceProvider);\r\n            var items = reader.GetRcvItems();\r\n            Assert.True(items.Any());\r\n\r\n            foreach (var clinVarItem in items)\r\n            {\r\n                Assert.Equal(clinVarItem.OmimIds, new List<string> { \"601462\", \"610285.0001\" });\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void CapturePhenotypicSeriesOmimIDandUniq()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 122746325, \"A\");\r\n\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000401212.xml\")), Stream.Null, sequenceProvider);\r\n            var items = reader.GetRcvItems();\r\n            Assert.True(items.Any());\r\n\r\n            foreach (var clinVarItem in items)\r\n            {\r\n                Assert.Equal(clinVarItem.OmimIds, new List<string> { \"209900\" });\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void CapturePhenotypeSeriesOmimId()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 15513004, \"GGAA\");\r\n\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000406351.xml\")), Stream.Null, sequenceProvider);\r\n            var items = reader.GetRcvItems();\r\n            Assert.True(items.Any());\r\n\r\n            foreach (var clinVarItem in items)\r\n            {\r\n                Assert.Equal(clinVarItem.OmimIds, new List<string> { \"213300\" });\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void RemoveDuplicationWithWrongRefSequence()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 10183702, \"GCGGCCGCGGCCCG\");\r\n\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000267121.xml\")), Stream.Null, sequenceProvider);\r\n            Assert.False(reader.GetRcvItems().Any());\r\n        }\r\n\r\n        [Fact]\r\n        [Trait(\"jira\", \"NIR-2372\")]\r\n        public void AllelicOmimIdsForSnvs()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 111329354, \"G\");\r\n\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000170338.xml\")), Stream.Null, sequenceProvider);\r\n\r\n            var clinvarItems = reader.GetRcvItems().ToList();\r\n            Assert.Single(clinvarItems);\r\n\r\n            var clinvarItem = clinvarItems[0];\r\n            Assert.Single(clinvarItem.OmimIds);\r\n            Assert.Equal(\"612800.0003\", clinvarItem.OmimIds.First());\r\n        }\r\n\r\n        [Fact]\r\n        [Trait(\"jira\", \"NIR-2372\")]\r\n        public void AllelicOmimIdsForDeletions()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 111335401, \"GCTC\");\r\n\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000170338.xml\")), Stream.Null, sequenceProvider);\r\n\r\n            var clinvarItems = reader.GetRcvItems().ToList();\r\n            Assert.Single(clinvarItems);\r\n\r\n            var clinvarItem = clinvarItems[0];\r\n            Assert.Single(clinvarItem.OmimIds);\r\n            Assert.Equal(\"612800.0002\", clinvarItem.OmimIds.First());\r\n        }\r\n\r\n        [Fact]\r\n        [Trait(\"jira\", \"NIR-2372\")]\r\n        public void ExcludeAllelicOmimIdsFromTraits()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 100887648, \"AGAT\");\r\n\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000050055.xml\")), Stream.Null, sequenceProvider);\r\n\r\n            var clinvarItems = reader.GetRcvItems().ToList();\r\n            Assert.Single(clinvarItems);\r\n\r\n            var clinvarItem = clinvarItems[0];\r\n            Assert.Single(clinvarItem.OmimIds);\r\n            Assert.Equal(\"216550\", clinvarItem.OmimIds.First());\r\n        }\r\n\r\n        [Fact]\r\n        [Trait(\"jira\", \"NIR-2372\")]\r\n        public void AllelicOmimIdsFromAttributeSetChrX()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 595469, \"C\");\r\n\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000010551.xml\")), Stream.Null, sequenceProvider);\r\n\r\n            var clinvarItems = reader.GetRcvItems().ToList();\r\n\r\n            Assert.Single(clinvarItems);\r\n\r\n            foreach (var clinVarItem in clinvarItems)\r\n            {\r\n                Assert.Equal(2, clinVarItem.OmimIds.Count());\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        [Trait(\"jira\", \"NIR-2372\")]\r\n        public void AllelicOmimIdsFromAttributeSetChrY()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 545469, \"C\");\r\n\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000010551.xml\")), Stream.Null, sequenceProvider);\r\n\r\n            var clinvarItems = reader.GetRcvItems().ToList();\r\n\r\n            Assert.Single(clinvarItems);\r\n\r\n            foreach (var clinVarItem in clinvarItems)\r\n            {\r\n                Assert.Equal(2, clinVarItem.OmimIds.Count());\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        [Trait(\"jira\", \"NIR-2372\")]\r\n        public void MultipleEntryRecordVariant1()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 8045031, \"G\");\r\n\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000007484.xml\")), Stream.Null, sequenceProvider);\r\n\r\n            var clinvarItems = reader.GetRcvItems().ToList();\r\n\r\n            Assert.Single(clinvarItems);\r\n        }\r\n\r\n        [Fact]\r\n        [Trait(\"jira\", \"NIR-2372\")]\r\n        public void MultipleEntryRecordVariant2()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 8021910, \"G\");\r\n\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000007484.xml\")), Stream.Null, sequenceProvider);\r\n\r\n            var clinvarItems = reader.GetRcvItems().ToList();\r\n\r\n            Assert.Single(clinvarItems);\r\n        }\r\n\r\n        [Fact]\r\n        [Trait(\"jira\", \"NIR-2372\")]\r\n        public void SkipMicrosatellitesWithoutAltAllele()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 46191240, \"ATTCT\");\r\n\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000001054.xml\")), Stream.Null, sequenceProvider);\r\n\r\n            Assert.False(reader.GetRcvItems().Any());\r\n        }\r\n\r\n        [Fact]\r\n        [Trait(\"jira\", \"NIR-2029\")]\r\n        public void MissingClinvarInsertion2()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh38, 132903738, \"A\");\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000342164.xml\")), Stream.Null, sequenceProvider);\r\n\r\n            var clinvarItems = reader.GetRcvItems().ToList();\r\n            Assert.Single(clinvarItems);\r\n        }\r\n\r\n        [Fact]\r\n        public void Skip_entries_with_inconsistant_start_end()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 132903739, \"AAACGCTCATAGAGTAACTGGTTGTGCAGTAAAAGCAACTGGTCTCAAACGCTCATAGAGTAACTGGTTGTGCAGTAAAAGCAACTGGTCTC\");\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000342164.xml\")), Stream.Null, sequenceProvider);\r\n\r\n            Assert.False(reader.GetRcvItems().Any());\r\n        }\r\n\r\n        [Fact]\r\n        public void Alternate_phenotypes()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 204732740, \"G\");\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000537563.xml\")), Stream.Null, sequenceProvider);\r\n\r\n            var clinvarItems = reader.GetRcvItems().ToList();\r\n\r\n            Assert.Single(clinvarItems[0].Phenotypes);\r\n        }\r\n\r\n        [Fact]\r\n        public void Mising_entry()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 36888396, \"C\");\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000171474.xml\")), Stream.Null, sequenceProvider);\r\n\r\n            var clinvarItems = reader.GetRcvItems().ToList();\r\n\r\n            Assert.Equal(\"\",clinvarItems[0].RefAllele);\r\n        }\r\n\r\n        [Fact]\r\n        public void Multiple_significance()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh38, 72349076, \"T\");\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000169296.xml\")), Stream.Null, sequenceProvider);\r\n\r\n            var clinvarItems = reader.GetRcvItems().ToList();\r\n\r\n            Assert.Equal(new[]{ \"pathogenic\", \"likely pathogenic\" }, clinvarItems[0].Significances);\r\n        }\r\n\r\n        [Fact]\r\n        public void Multiple_significance_from_explanation()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh38, 12665750, \"T\");\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000001752.xml\")), Stream.Null, sequenceProvider);\r\n\r\n            var clinvarItems = reader.GetRcvItems().ToList();\r\n\r\n            Assert.Equal(new[] { \"pathogenic\", \"uncertain significance\" }, clinvarItems[0].Significances);\r\n        }\r\n\r\n        [Fact]\r\n        public void Override_microsatellite_type()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 88929173, \"CGAG\");\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000205418.xml\")), Stream.Null, sequenceProvider);\r\n\r\n            var clinvarItems = reader.GetRcvItems();\r\n\r\n            Assert.Single(clinvarItems);\r\n        }\r\n\r\n        [Fact]\r\n        public void OneRcv_oneVcv()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 31496350, \"C\");\r\n\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"RCV000124712.xml\")),\r\n                FileUtilities.GetReadStream(Resources.VcvXmlFiles(\"VCV000137106.xml\")), sequenceProvider);\r\n            \r\n            var items = reader.GetItems().ToArray();\r\n            Assert.Equal(2,items.Length);\r\n            var rcvJson = items[1].GetJsonString();\r\n            Assert.Contains(\"VCV000137106.3\", rcvJson);\r\n        }\r\n        \r\n        [Fact]\r\n        public void TwoRcv_oneVcv()\r\n        {\r\n            var sequenceProvider = GetSequenceProvider(GenomeAssembly.GRCh37, 9775688, \"G\");\r\n\r\n            var reader = new ClinVarParser(FileUtilities.GetReadStream(Resources.ClinvarXmlFiles(\"Two_RCVs.xml\")),\r\n                FileUtilities.GetReadStream(Resources.VcvXmlFiles(\"VCV000618791.xml\")), sequenceProvider);\r\n            \r\n            var items = reader.GetItems().ToArray();\r\n            Assert.Equal(3, items.Length);\r\n            var rcvJson = items[1].GetJsonString();\r\n            Assert.Contains(\"VCV000618791.5\", rcvJson);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/SAUtils/InputFileParsers/ClinvarVariationParserTests.cs",
    "content": "﻿using System.Linq;\nusing IO;\nusing SAUtils.InputFileParsers.ClinVar;\nusing UnitTests.TestUtilities;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.InputFileParsers\n{\n    public sealed class ClinvarVariationParserTests\n    {\n        [Fact]\n        public void InterpretedRecordsTest()\n        {\n            using (var reader = new ClinVarVariationReader(FileUtilities.GetReadStream(Resources.VcvXmlFiles(\"TwoRecords.xml\"))))\n            {\n                var items = reader.GetItems().ToArray();\n                Assert.Equal(2, items.Length);\n                Assert.Equal(79, items[0].VariantId);\n                Assert.Equal(ClinVarCommon.ReviewStatus.no_criteria, items[0].ReviewStatus);\n                Assert.Equal(new []{\"pathogenic\"}, items[0].Significances);\n                \n                Assert.Equal(86, items[1].VariantId);\n            }\n        }\n        \n        [Fact]\n        public void IncludedRecordTest()\n        {\n            using (var reader = new ClinVarVariationReader(FileUtilities.GetReadStream(Resources.VcvXmlFiles(\"VCV000431749.xml\"))))\n            {\n                var items = reader.GetItems().ToArray();\n                Assert.Equal(ClinVarCommon.ReviewStatus.no_interpretation_single, items[0].ReviewStatus);\n                Assert.Equal(new []{\"no interpretation for the single variant\"}, items[0].Significances);\n\n            }\n        }\n        \n        [Fact]\n        public void SignificanceTest()\n        {\n            using (var reader = new ClinVarVariationReader(FileUtilities.GetReadStream(Resources.VcvXmlFiles(\"VCV000476472.xml\"))))\n            {\n                var items = reader.GetItems().ToArray();\n                Assert.Equal(new []{\"likely benign\",\"uncertain significance\"}, items[0].Significances);\n                \n            }\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/InputFileParsers/CosmicCnvReaderTests.cs",
    "content": "﻿using System.IO;\nusing System.Linq;\nusing Genome;\nusing SAUtils.ExtractCosmicSvs;\nusing UnitTests.TestUtilities;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.InputFileParsers\n{\n    public sealed class CosmicCnvReaderTests\n    {\n        [Fact]\n        public void GetColumnIndices_valid_header()\n        {\n            const string header = @\"CNV_ID\tID_GENE\tgene_name\tID_SAMPLE\tID_TUMOUR\tPrimary site\tSite subtype 1\tSite subtype 2\tSite subtype 3\tPrimary histology\tHistology subtype 1\tHistology subtype 2\tHistology subtype 3\tSAMPLE_NAME\tTOTAL_CN\tMINOR_ALLELE\tMUT_TYPE\tID_STUDY\tGRCh\tChromosome:G_Start..G_Stop\";\n\n            var readStream = ResourceUtilities.GetReadStream(Resources.SaPath(\"CosmicCNV.tsv\"));\n\n            var cnvReader = new CosmicCnvReader(readStream,\n                ChromosomeUtilities.RefNameToChromosome,\n                GenomeAssembly.GRCh37);\n\n            cnvReader.GetColumnIndices(header);\n            //we do not need an assert because not getting an exception in the last line means pass\n        }\n\n        [Fact]\n        public void GetColumnIndices_missing_column()\n        {\n            const string header = @\"CNV_ID\tID_GENE\tgene_name\tID_SAMPLE\tID_TUMOUR\tPrimary site\tSite subtype 1\tSite subtype 2\tSite subtype 3\tPrimary histology\tHistology subtype 1\tHistology subtype 2\tHistology subtype 3\tSAMPLE_NAME\tTOTAL_CN\tMINOR_ALLELE\tMUT_TYPE\tID_STUDY\tChromosome:G_Start..G_Stop\";\n\n            var readStream = ResourceUtilities.GetReadStream(Resources.SaPath(\"CosmicCNV.tsv\"));\n\n            var cnvReader = new CosmicCnvReader(readStream,\n                ChromosomeUtilities.RefNameToChromosome,\n                GenomeAssembly.GRCh37);\n\n            Assert.Throws<InvalidDataException>(()=>cnvReader.GetColumnIndices(header));\n        }\n\n        [Fact]\n        public void GetEntries()\n        {\n            var readStream = ResourceUtilities.GetReadStream(Resources.SaPath(\"CosmicCNV.tsv\"));\n\n            var cnvReader = new CosmicCnvReader(readStream,\n                ChromosomeUtilities.RefNameToChromosome,\n                GenomeAssembly.GRCh37);\n\n            var cnvItems = cnvReader.GetEntries();\n\n            Assert.Equal(5, cnvItems.Count());\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/InputFileParsers/CosmicItemTests.cs",
    "content": "﻿using System.Collections.Generic;\nusing SAUtils.DataStructures;\nusing UnitTests.TestUtilities;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.InputFileParsers\n{\n    public sealed class CosmicItemTests\n    {\n        [Fact]\n        public void GetCancerSiteCount_same_study()\n        {\n            var cosmicItem = new CosmicItem(ChromosomeUtilities.Chr1, 100, \"rs101\", \"A\", \"C\", \"GENE0\", new HashSet<CosmicItem.CosmicStudy>\n            {\n                new CosmicItem.CosmicStudy(\"100\", new []{\"primary histology 0\", \"histology subtype 1\"}, new []{\"primarySite 0\", \"site subtype 1\"}),\n                new CosmicItem.CosmicStudy(\"100\", new []{\"primary histology 0\", \"histology subtype 1\"}, new []{\"primarySite 0\", \"site subtype 1\"})\n            }, 1);\n\n            var counts = cosmicItem.GetTissueCounts();\n            Assert.Equal(2, counts.Count);\n            Assert.Equal(1, counts[\"primarySite 0\"]);\n            Assert.Equal(1, counts[\"site subtype 1\"]);\n        }\n\n        [Fact]\n        public void GetTissueCount_different_studies()\n        {\n            var cosmicItem = new CosmicItem(ChromosomeUtilities.Chr1, 100, \"rs101\", \"A\", \"C\", \"GENE0\", new HashSet<CosmicItem.CosmicStudy>\n            {\n                new CosmicItem.CosmicStudy(\"100\", new []{\"primary histology 0\", \"histology subtype 1\"}, new []{\"primarySite 0\", \"site subtype 2\"}),\n                new CosmicItem.CosmicStudy(\"110\", new []{\"primary histology 0\", \"histology subtype 1\"}, new []{\"primarySite 0\", \"site subtype 1\"})\n            }, 1);\n\n            var counts = cosmicItem.GetTissueCounts();\n            Assert.Equal(3, counts.Count);\n            Assert.Equal(2, counts[\"primarySite 0\"]);\n            Assert.Equal(1, counts[\"site subtype 1\"]);\n            Assert.Equal(1, counts[\"site subtype 2\"]);\n        }\n\n        [Fact]\n        public void GetCancerTypeCount_same_study()\n        {\n            var cosmicItem = new CosmicItem(ChromosomeUtilities.Chr1, 100, \"rs101\", \"A\", \"C\", \"GENE0\", new HashSet<CosmicItem.CosmicStudy>\n            {\n                new CosmicItem.CosmicStudy(\"100\", new []{\"primary histology 0\", \"histology subtype 1\"}, new []{\"primarySite 0\", \"site subtype 1\"}),\n                new CosmicItem.CosmicStudy(\"100\", new []{\"primary histology 0\", \"histology subtype 1\"}, new []{\"primarySite 0\", \"site subtype 1\"})\n            }, 1);\n\n            var cancerTypeCounts = cosmicItem.GetCancerTypeCounts();\n            Assert.Equal(2, cancerTypeCounts.Count);\n            Assert.Equal(1, cancerTypeCounts[\"primary histology 0\"]);\n            Assert.Equal(1, cancerTypeCounts[\"histology subtype 1\"]);\n        }\n\n\n        [Fact]\n        public void GetCancerTypeCount_different_studies()\n        {\n            var cosmicItem = new CosmicItem(ChromosomeUtilities.Chr1, 100, \"rs101\", \"A\", \"C\", \"GENE0\", new HashSet<CosmicItem.CosmicStudy>\n            {\n                new CosmicItem.CosmicStudy(\"100\", new []{\"primary histology 0\", \"histology subtype 1\"}, new []{\"primarySite 0\", \"site subtype 1\"}),\n                new CosmicItem.CosmicStudy(\"101\", new []{\"primary histology 0\", \"histology subtype 2\"}, new []{\"primarySite 0\", \"site subtype 1\"})\n            }, 1);\n\n            var cancerTypeCounts = cosmicItem.GetCancerTypeCounts();\n            Assert.Equal(3, cancerTypeCounts.Count);\n            Assert.Equal(2, cancerTypeCounts[\"primary histology 0\"]);\n            Assert.Equal(1, cancerTypeCounts[\"histology subtype 1\"]);\n            Assert.Equal(1, cancerTypeCounts[\"histology subtype 2\"]);\n        }\n\n        [Fact]\n        public void GetJsonString()\n        {\n            var cosmicItem = new CosmicItem(ChromosomeUtilities.Chr1, 100, \"rs101\", \"A\", \"C\", \"GENE0\", new HashSet<CosmicItem.CosmicStudy>\n            {\n                new CosmicItem.CosmicStudy(\"100\", new []{\"primary histology 0\", \"histology subtype 1\"}, new []{\"primarySite 0\", \"site subtype 1\"}),\n                new CosmicItem.CosmicStudy(\"101\", new []{\"primary histology 0\", \"histology subtype 2\"}, new []{\"primarySite 0\", \"site subtype 1\"})\n            }, 1);\n\n            Assert.Equal(\"\\\"id\\\":\\\"rs101\\\",\\\"refAllele\\\":\\\"A\\\",\\\"altAllele\\\":\\\"C\\\",\\\"gene\\\":\\\"GENE0\\\",\\\"sampleCount\\\":1,\\\"cancerTypesAndCounts\\\":[{\\\"cancerType\\\":\\\"primary histology 0\\\",\\\"count\\\":2},{\\\"cancerType\\\":\\\"histology subtype 1\\\",\\\"count\\\":1},{\\\"cancerType\\\":\\\"histology subtype 2\\\",\\\"count\\\":1}],\\\"cancerSitesAndCounts\\\":[{\\\"cancerSite\\\":\\\"primarySite 0\\\",\\\"count\\\":2},{\\\"cancerSite\\\":\\\"site subtype 1\\\",\\\"count\\\":2}]\", cosmicItem.GetJsonString());\n        }\n\n    }\n}\n"
  },
  {
    "path": "UnitTests/SAUtils/InputFileParsers/DataSourceVersionTests.cs",
    "content": "﻿using System;\nusing IO;\nusing OptimizedCore;\nusing SAUtils.InputFileParsers;\nusing UnitTests.TestUtilities;\nusing VariantAnnotation.Providers;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.InputFileParsers\n{\n\tpublic sealed class DataSourceVersionTests\n\t{\n\t\t[Fact]\n\t\tpublic void ReadDataVersionFromFile()\n\t\t{\n            DataSourceVersion version;\n            using (var reader = new DataSourceVersionReader(FileUtilities.GetReadStream(Resources.TopPath(\"dbSNP.version\"))))\n            {\n                version = reader.GetVersion();\n            }\n\n\t\t\tAssert.Equal(\"dbSNP\", version.Name);\n\t\t\tAssert.Equal(\"147\", version.Version);\n\t\t\tAssert.Equal(DateTime.Parse(\"2016-04-08\").Ticks, version.ReleaseDateTicks);\n\t\t\tAssert.True(string.IsNullOrEmpty(version.Description));\n\t\t\tAssert.Contains(\"dataSource=dbSNP\", version.ToString());//vcf output\n\n\t\t\tvar sb = StringBuilderPool.Get();\n\t\t\tversion.SerializeJson(sb);\n\t\t\t\n\t\t\tAssert.Contains(\"name\\\":\\\"dbSNP\", StringBuilderPool.GetStringAndReturn(sb));//json output\n\t\t}\n\n\t\t[Fact]\n\t\tpublic void GetSourceVersionTest()\n\t\t{\n\t\t\tvar versionPath = Resources.TopPath(\"dbSNP.version\");\n\n\t\t\tvar version = DataSourceVersionReader.GetSourceVersion(versionPath);\n\n\t\t\tAssert.Equal(\"dbSNP\", version.Name);\n\t\t\tAssert.Equal(\"147\", version.Version);\n\t\t\tAssert.Equal(DateTime.Parse(\"2016-04-08\").Ticks, version.ReleaseDateTicks);\n\t\t}\n\t}\n}\n"
  },
  {
    "path": "UnitTests/SAUtils/InputFileParsers/DbSnpReaderTests.cs",
    "content": "﻿using System;\r\nusing System.IO;\r\nusing System.Linq;\r\nusing Genome;\r\nusing SAUtils.InputFileParsers.DbSnp;\r\nusing UnitTests.TestDataStructures;\r\nusing UnitTests.TestUtilities;\r\nusing Variants;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.SAUtils.InputFileParsers\r\n{\r\n    public sealed class DbSnpReaderTests\r\n    {\r\n        [Fact]\r\n        public void MissingEntry()\r\n        {\r\n            const string vcfLine =\r\n                \"1\t241369\trs11490246\tC\tT\t.\t.\tRS=11490246;RSPOS=241369;dbSNPBuildID=120;SSR=0;SAO=0;VP=0x050000000005000126000100;WGT=1;VC=SNV;ASP;GNO;KGPhase3;CAF=0,1;COMMON=0\";\r\n\r\n            var sequenceProvider = ParserTestUtils.GetSequenceProvider(241369, \"C\", 'A', ChromosomeUtilities.RefNameToChromosome);\r\n            var dbsnpReader = new DbSnpReader(null, sequenceProvider);\r\n            var dbSnpEntry = dbsnpReader.ExtractItem(vcfLine).First();\r\n\r\n            Assert.Equal(11490246, dbSnpEntry.RsId);\r\n        }\r\n\r\n        [Fact]\r\n        public void MissingEntry2()\r\n        {\r\n            const string vcfLine =\r\n                \"17\t828\trs62053745\tT\tC\t.\t.\tRS=62053745;RSPOS=828;dbSNPBuildID=129;SSR=0;SAO=0;VP=0x050100080005140136000100;WGT=1;VC=SNV;SLO;INT;ASP;VLD;GNO;KGPhase1;KGPhase3;CAF=0.2576,0.7424;COMMON=1\";\r\n\r\n            var sequenceProvider = ParserTestUtils.GetSequenceProvider(828, \"T\", 'A', ChromosomeUtilities.RefNameToChromosome);\r\n            var dbsnpReader = new DbSnpReader(null, sequenceProvider);\r\n            var dbSnpEntry = dbsnpReader.ExtractItem(vcfLine).First();\r\n\r\n            Assert.Equal(62053745, dbSnpEntry.RsId);\r\n        }\r\n\r\n        [Fact]\r\n        public void MissingDbsnpId()\r\n        {\r\n            const string vcfLine =\r\n                \"X\t21505833\trs12395602\tG\tA,C,T\t.\t.\tRS=12395602;RSPOS=21505833;dbSNPBuildID=120;SSR=0;SAO=0;VP=0x05010008000505051f000101;WGT=1;VC=SNV;SLO;INT;ASP;VLD;G5;HD;GNO;KGPhase1\";\r\n\r\n            var sequenceProvider = ParserTestUtils.GetSequenceProvider(21505833, \"G\", 'G', ChromosomeUtilities.RefNameToChromosome);\r\n            var dbsnpReader = new DbSnpReader(null, sequenceProvider);\r\n\r\n            var dbSnpEntries = dbsnpReader.ExtractItem(vcfLine).ToList();\r\n\r\n            Assert.Equal(3, dbSnpEntries.Count);\r\n            Assert.Equal(\"A\", dbSnpEntries[0].AltAllele);\r\n            Assert.Equal(12395602, dbSnpEntries[0].RsId);\r\n            Assert.Equal(\"C\", dbSnpEntries[1].AltAllele);\r\n            Assert.Equal(12395602, dbSnpEntries[1].RsId);\r\n            Assert.Equal(\"T\", dbSnpEntries[2].AltAllele);\r\n            Assert.Equal(12395602, dbSnpEntries[2].RsId);\r\n        }\r\n\r\n        [Obsolete(\"We should not have skipped unit tests.\")]\r\n        [Fact(Skip = \"redo test with AlleleFrequency object\")]\r\n        public void NoMinorAllele()\r\n        {\r\n            const string vcfLine = \"17\t828\trs62053745\tT\tC\t.\t.\tRS=62053745;RSPOS=828;dbSNPBuildID=129;SSR=0;SAO=0;VP=0x050100080005140136000100;WGT=1;VC=SNV;SLO;INT;ASP;VLD;GNO;KGPhase1;KGPhase3;CAF=.,0.7424;COMMON=1\";\r\n            var sequenceProvider = ParserTestUtils.GetSequenceProvider(828, \"T\", 'G', ChromosomeUtilities.RefNameToChromosome);\r\n            var dbsnpReader      = new DbSnpReader(null, sequenceProvider);\r\n            var dbSnpEntry       = dbsnpReader.ExtractItem(vcfLine).First();\r\n\r\n            Assert.Equal(\"C\", dbSnpEntry.AltAllele);            \r\n        }\r\n\r\n        [Obsolete(\"We should not have skipped unit tests.\")]\r\n        [Fact(Skip = \"redo test with AlleleFrequency object\")]\r\n        public void DisregardZeroFreq()\r\n        {\r\n            const string vcfLine = \"1\t241369\trs11490246\tC\tT\t.\t.\tRS=11490246;RSPOS=241369;dbSNPBuildID=120;SSR=0;SAO=0;VP=0x050100000005000126000100;WGT=1;VC=SNV;SLO;ASP;GNO;KGPhase3;CAF=0,1;COMMON=0\";\r\n            var sequenceProvider = ParserTestUtils.GetSequenceProvider(241369, \"C\", 'G', ChromosomeUtilities.RefNameToChromosome);\r\n            var dbsnpReader      = new DbSnpReader(null, sequenceProvider);\r\n            var dbSnpEntry       = dbsnpReader.ExtractItem(vcfLine).First();\r\n\r\n            Assert.Equal(\"T\", dbSnpEntry.AltAllele);            \r\n        }\r\n\r\n        private static Stream GetStream()\r\n        {\r\n            var stream = new MemoryStream();\r\n            var writer = new StreamWriter(stream);\r\n\r\n            writer.WriteLine(\"##dbSNP\");\r\n            writer.WriteLine(\"#CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\");\r\n            writer.WriteLine(\"1\\t10285\\trs866375379\\tT\\tA,C\\t.\\t.\\tRS=866375379;RSPOS=10285;dbSNPBuildID=147;SSR=0;SAO=0;VP=0x050100020005000002000100;GENEINFO=DDX11L1:100287102;WGT=1;VC=SNV;SLO;R5;ASP\");\r\n            writer.WriteLine(\"1\\t10329\\trs150969722\\tAC\\tA\\t.\\t.\\tRS=150969722;RSPOS=10330;dbSNPBuildID=134;SSR=0;SAO=0;VP=0x050000020005000002000200;GENEINFO=DDX11L1:100287102;WGT=1;VC=DIV;R5;ASP\");\r\n\r\n            writer.Flush();\r\n\r\n            stream.Position = 0;\r\n            return stream;\r\n        }\r\n\r\n        [Fact]\r\n        public void GetItems_test()\r\n        {\r\n            var sequence = new SimpleSequence(new string('A', VariantUtils.MaxUpstreamLength) + \"T\" + new string('G', 10329 - 10285) + \"AC\", 10284 - VariantUtils.MaxUpstreamLength);\r\n\r\n            var sequenceProvider = new SimpleSequenceProvider(GenomeAssembly.GRCh37, sequence, ChromosomeUtilities.RefNameToChromosome);\r\n\r\n            var reader = new DbSnpReader(GetStream(), sequenceProvider);\r\n\r\n            var items = reader.GetItems().ToList();\r\n\r\n            Assert.Equal(3, items.Count);\r\n            Assert.Equal(\"\\\"rs866375379\\\"\", items[0].GetJsonString());\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/SAUtils/InputFileParsers/DecipherReaderTest.cs",
    "content": "using System.IO;\nusing System.Linq;\nusing SAUtils.InputFileParsers.Decipher;\nusing UnitTests.TestUtilities;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.InputFileParsers\n{\n    public sealed class DecipherTests\n    {\n        private static Stream GetStream()\n        {\n            var stream = new MemoryStream();\n            var writer = new StreamWriter(stream);\n            // file has been modified to 7 columns\n            writer.WriteLine(\"#population_cnv_id\\tchr\\tstart\\tend\\tdeletion_observations\\tdeletion_frequency\\tdeletion_standard_error\\tduplication_observations\\tduplication_frequency\\tduplication_standard_error\\tobservations\\tfrequency\\tstandard_error\\ttype\\tsample_size\\tstudy\");\n            writer.WriteLine(\"1\\t1\\t10529\\t177368\\t0\\t0\\t1\\t3\\t0.075\\t0.555277708\\t3\\t0.075\\t0.555277708\\t1\\t40\\t42M calls\");\n            writer.WriteLine(\"2\\t1\\t13516\\t91073\\t0\\t0\\t1\\t27\\t0.675\\t0.109713431\\t27\\t0.675\\t0.109713431\\t1\\t40\\t42M call\");\n            writer.WriteLine(\"3\\t1\\t18888\\t35451\\t0\\t0\\t1\\t2\\t0.002366864\\t0.706269473\\t2\\t0.002366864\\t0.706269473\\t1\\t845\\tDDD\");\n            writer.WriteLine(\"4\\t1\\t23946\\t88271\\t27\\t0.031952663\\t0.189350482\\t21\\t0.024852071\\t0.215489247\\t48\\t0.056804734\\t0.140178106\\t0\\t845\\tDDD\");\n\n            writer.Flush();\n\n            stream.Position = 0;\n            return stream;\n        }\n\n        [Fact]\n        public void GetItemsTest()\n        {\n            var decipherReader = new DecipherParser(new StreamReader(GetStream()), ChromosomeUtilities.RefNameToChromosome);\n            var items = decipherReader.GetItems().ToList();\n\n            Assert.Equal(4, items.Count);\n            Assert.Equal(\"\\\"chromosome\\\":\\\"1\\\",\\\"begin\\\":10529,\\\"end\\\":177368,\\\"numDeletions\\\":0,\\\"deletionFrequency\\\":0,\\\"numDuplications\\\":3,\\\"duplicationFrequency\\\":0.075,\\\"sampleSize\\\":40\", items[0].GetJsonString());\n            Assert.Equal(\"\\\"chromosome\\\":\\\"1\\\",\\\"begin\\\":13516,\\\"end\\\":91073,\\\"numDeletions\\\":0,\\\"deletionFrequency\\\":0,\\\"numDuplications\\\":27,\\\"duplicationFrequency\\\":0.675,\\\"sampleSize\\\":40\", items[1].GetJsonString());\n            Assert.Equal(\"\\\"chromosome\\\":\\\"1\\\",\\\"begin\\\":18888,\\\"end\\\":35451,\\\"numDeletions\\\":0,\\\"deletionFrequency\\\":0,\\\"numDuplications\\\":2,\\\"duplicationFrequency\\\":0.002367,\\\"sampleSize\\\":845\", items[2].GetJsonString());\n            Assert.Equal(\"\\\"chromosome\\\":\\\"1\\\",\\\"begin\\\":23946,\\\"end\\\":88271,\\\"numDeletions\\\":27,\\\"deletionFrequency\\\":0.031953,\\\"numDuplications\\\":21,\\\"duplicationFrequency\\\":0.024852,\\\"sampleSize\\\":845\", items[3].GetJsonString());\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/InputFileParsers/DgvReaderTests.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.Linq;\r\nusing Compression.Utilities;\r\nusing SAUtils.DataStructures;\r\nusing SAUtils.InputFileParsers.DGV;\r\nusing UnitTests.TestUtilities;\r\nusing Variants;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.SAUtils.InputFileParsers\r\n{\r\n    public sealed class DgvReaderTests\r\n    {\r\n        private static readonly string TestDgvFile = Resources.TopPath(\"testDgvParser.txt\");\r\n\r\n        private static IEnumerable<DgvItem> CreateTruthDgvItemSequence()\r\n        {\r\n            yield return new DgvItem(\"nsv945265\", ChromosomeUtilities.Chr1, 352306, 371739, 97, 10, 0, VariantType.complex_structural_alteration);\r\n            yield return new DgvItem(\"nsv161172\", ChromosomeUtilities.Chr1, 88190, 89153, 24, 0, 0, VariantType.copy_number_loss);\r\n            yield return new DgvItem(\"nsv951399\", ChromosomeUtilities.Chr1, 46501, 71800, 1, 1, 0, VariantType.copy_number_gain);\r\n            yield return new DgvItem(\"nsv471522\", ChromosomeUtilities.Chr1, 522139, 756783, 3, 3, 0, VariantType.copy_number_gain);\r\n            yield return new DgvItem(\"nsv10161\", ChromosomeUtilities.Chr1, 712111, 1708649, 31, 11, 7, VariantType.copy_number_variation);\r\n            yield return new DgvItem(\"esv3358119\", ChromosomeUtilities.Chr1, 822853, 822861, 185, 2, 0, VariantType.insertion);\r\n            yield return new DgvItem(\"esv6890\", ChromosomeUtilities.Chr1, 17006189, 17052558, 1, 0, 0, VariantType.inversion);\r\n            yield return new DgvItem(\"esv6517\", ChromosomeUtilities.Chr1, 964760, 965579, 1, 0, 0, VariantType.copy_number_loss);\r\n            yield return new DgvItem(\"esv3310333\", ChromosomeUtilities.Chr1, 17441132, 17441133, 185, 3, 0, VariantType.mobile_element_insertion);\r\n            yield return new DgvItem(\"nsv479682\", ChromosomeUtilities.Chr1, 3787207, 3787207, 9, 0, 0, VariantType.novel_sequence_insertion);\r\n            yield return new DgvItem(\"nsv506926\", ChromosomeUtilities.Chr1, 34597680, 34603680, 4, 0, 0, VariantType.structural_alteration);\r\n            yield return new DgvItem(\"esv3302766\", ChromosomeUtilities.Chr1, 38583768, 38583926, 185, 0, 0, VariantType.tandem_duplication);\r\n        }\r\n\r\n        [Fact]\r\n        public void TestDbSnpReader()\r\n        {\r\n            using (var dgvReader = new DgvReader(GZipUtilities.GetAppropriateStreamReader(TestDgvFile), ChromosomeUtilities.RefNameToChromosome))\r\n            {\r\n                Assert.True(dgvReader.GetItems().SequenceEqual(CreateTruthDgvItemSequence()));\r\n            }\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/SAUtils/InputFileParsers/DgvTests.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing SAUtils.DataStructures;\r\nusing SAUtils.InputFileParsers.DGV;\r\nusing UnitTests.TestUtilities;\r\nusing Variants;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.SAUtils.InputFileParsers\r\n{\r\n    public sealed class DgvTests\r\n    {\r\n        [Fact]\r\n        public void ExtractDgvCnv()\r\n        {\r\n            const string dgvLine = \"nsv482937\t1\t1\t2300000\tCNV\tloss\tIafrate_et_al_2004\t15286789\tBAC aCGH,FISH\t\t\tnssv2995976\tM\t\t39\t0\t1\t\tACAP3,AGRN,WASH7P\t\";\r\n\r\n            var dgvItem = DgvReader.ExtractDgvItem(dgvLine, ChromosomeUtilities.RefNameToChromosome);\r\n            var jsonString = dgvItem.GetJsonString();\r\n            Assert.Equal(\"\\\"chromosome\\\":\\\"1\\\",\\\"begin\\\":1,\\\"end\\\":2300000,\\\"variantType\\\":\\\"copy_number_loss\\\",\\\"id\\\":\\\"nsv482937\\\",\\\"sampleSize\\\":39,\\\"observedLosses\\\":1,\\\"variantFreqAll\\\":0.02564\", jsonString );\r\n        }\r\n\r\n        [Fact]\r\n        public void ExtractDgvComplex()\r\n        {\r\n            const string dgvLine = \"esv2421662\t1\t12841928\t12971833\tOTHER\tcomplex\tAltshuler_et_al_2010\t20811451\tSNP array\t\t\tessv5038349,essv5012238\tM\t\t1184\t20\t70\t\tHNRNPCL1,LOC649330,PRAMEF1,PRAMEF10,PRAMEF11,PRAMEF2,PRAMEF4\tNA10838,NA10847\";\r\n\r\n            var dgvItem = DgvReader.ExtractDgvItem(dgvLine, ChromosomeUtilities.RefNameToChromosome);\r\n            var jsonString = dgvItem.GetJsonString();\r\n            Assert.Equal(\"\\\"chromosome\\\":\\\"1\\\",\\\"begin\\\":12841928,\\\"end\\\":12971833,\\\"variantType\\\":\\\"complex_structural_alteration\\\",\\\"id\\\":\\\"esv2421662\\\",\\\"sampleSize\\\":1184,\\\"observedGains\\\":20,\\\"observedLosses\\\":70,\\\"variantFreqAll\\\":0.07601\", jsonString);\r\n            \r\n        }\r\n\r\n        [Fact]\r\n        public void EmptyObservedLossesAndGains()\r\n        {\r\n            const string dgvLine = \"nsv161172\t1\t88190\t89153\tCNV\tdeletion\tMills_et_al_2006\t16902084\tSequencing\t\t\tnssv179750\tM\t\t24\t\t\t\t\t\";\r\n\r\n            var dgvItem = DgvReader.ExtractDgvItem(dgvLine, ChromosomeUtilities.RefNameToChromosome);\r\n            var jsonString = dgvItem.GetJsonString();\r\n            Assert.Equal(\"\\\"chromosome\\\":\\\"1\\\",\\\"begin\\\":88190,\\\"end\\\":89153,\\\"variantType\\\":\\\"copy_number_loss\\\",\\\"id\\\":\\\"nsv161172\\\",\\\"sampleSize\\\":24\", jsonString);\r\n            //Assert.Equal(\"1\", dgvInterval.Chromosome.EnsemblName);\r\n            //Assert.Equal(88190, dgvInterval.Start);\r\n            //Assert.Equal(89153, dgvInterval.End);\r\n            //Assert.Equal(\"copy_number_loss\", dgvInterval.VariantType.ToString());\r\n            //Assert.Equal(\"dgv\", dgvInterval.Source);\r\n            //Assert.Equal(\"nsv161172\", dgvInterval.StringValues[\"id\"]);\r\n            //Assert.Equal(24, dgvInterval.IntValues[\"sampleSize\"]);\r\n            //Assert.False(dgvInterval.IntValues.ContainsKey(\"observedGains\"));\r\n            //Assert.False(dgvInterval.IntValues.ContainsKey(\"observedLosses\"));\r\n            //Assert.False(dgvInterval.PopulationFrequencies.ContainsKey(\"variantFreqAll\"));\r\n\r\n        }\r\n\r\n        [Fact]\r\n        public void EqualityAndHash()\r\n        {\r\n            var dgvItem = new DgvItem(\"dgv101\", ChromosomeUtilities.Chr1, 100, 200, 123, 34, 32, VariantType.complex_structural_alteration);\r\n\r\n            var dgvHash = new HashSet<DgvItem> { dgvItem };\r\n\r\n            Assert.Single(dgvHash);\r\n            Assert.Contains(dgvItem, dgvHash);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/SAUtils/InputFileParsers/GlobalMinorReaderTests.cs",
    "content": "﻿using System.IO;\nusing System.Linq;\nusing SAUtils.DataStructures;\nusing SAUtils.InputFileParsers.DbSnp;\nusing UnitTests.TestUtilities;\nusing VariantAnnotation.Interface.SA;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.InputFileParsers\n{\n    public sealed class GlobalMinorReaderTests\n    {\n        private static Stream GetStream()\n        {\n            var stream = new MemoryStream();\n            var writer = new StreamWriter(stream);\n\n            writer.WriteLine(\"##dbSNP\");\n            writer.WriteLine(\"#CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\");\n            writer.WriteLine(\"1\\t15274\\trs2758118\\tA\\tG,T\\t.\\t.\\tRS=2758118;RSPOS=15274;RV;dbSNPBuildID=111;SSR=0;SAO=0;VP=0x050000080005000126000100;GENEINFO=WASH7P:653635;WGT=1;VC=SNV;INT;ASP;GNO;KGPhase3;CAF=0.01178,0.3472,0.641;COMMON=1\");\n\n            writer.Flush();\n\n            stream.Position = 0;\n            return stream;\n        }\n\n        [Fact]\n        public void GetItems_test()\n        {\n            var reader = new GlobalMinorReader(GetStream(), ChromosomeUtilities.RefNameToChromosome);\n\n            var items = reader.GetItems().Cast<ISupplementaryDataItem>().ToList();\n\n            var globalMinor = SuppDataUtilities.GetPositionalAnnotation(items);\n\n            Assert.Equal(\"{\\\"globalMinorAllele\\\":\\\"G\\\",\\\"globalMinorAlleleFrequency\\\":0.3472}\", globalMinor.GetJsonString());\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/InputFileParsers/GmeReaderTests.cs",
    "content": "using System.IO;\nusing System.Linq;\nusing Genome;\nusing SAUtils.InputFileParsers.Gme;\nusing UnitTests.TestDataStructures;\nusing UnitTests.TestUtilities;\nusing Variants;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.InputFileParsers\n{\n    public sealed class GmeTests\n    {\n        private static Stream GetStream()\n        {\n            var stream = new MemoryStream();\n            var writer = new StreamWriter(stream);\n            // file has been modified to 7 columns\n            writer.WriteLine(\"#chrom\\tpos\\tref\\talt\\tfilter\\tGME_GC\\tGME_AC\\tGME_AF\");\n            writer.WriteLine(\"1\\t69134\\tA\\tG\\tVQSRTrancheSNP99.90to100.00\\t10,192\\t0.04950495049504951\");\n            writer.WriteLine(\"1\\t69270\\tA\\tG\\tPASS\\t518,224\\t0.6981132075471698\");\n            writer.WriteLine(\"1\\t69428\\tT\\tG\\tPASS\\t74,1396\\t0.050340136054421766\");\n\n            writer.Flush();\n\n            stream.Position = 0;\n            return stream;\n        }\n\n        [Fact]\n        public void GetItems_test()\n        {\n            var sequence = new SimpleSequence(new string('T', VariantUtils.MaxUpstreamLength) + \"A\" +new string('T', 69270- 69134) + \"A\" +new string('T', 69428- 69270-1)+ \"T\", 69134 - 1 - VariantUtils.MaxUpstreamLength);\n\n            var seqProvider  = new SimpleSequenceProvider(GenomeAssembly.GRCh37, sequence, ChromosomeUtilities.RefNameToChromosome);\n            var gmeReader = new GmeParser(new StreamReader(GetStream()), seqProvider);\n\n            var items = gmeReader.GetItems().ToList();\n\n            Assert.Equal(3,                                                                   items.Count);\n            Assert.Equal(\"\\\"allAc\\\":10,\\\"allAn\\\":202,\\\"allAf\\\":0.0495,\\\"failedFilter\\\":true\", items[0].GetJsonString());\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/InputFileParsers/MergedCosmicReaderTests.cs",
    "content": "﻿using System.Collections;\r\nusing System.Linq;\r\nusing SAUtils.InputFileParsers.Cosmic;\r\nusing UnitTests.TestUtilities;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.SAUtils.InputFileParsers\r\n{\r\n    public sealed class MergedCosmicReaderTests\r\n    {\r\n        [Fact]\r\n        public void TwoStudyCosmicCoding()\r\n        {\r\n            var seqProvider = ParserTestUtils.GetSequenceProvider(35416, \"A\", 'C', ChromosomeUtilities.RefNameToChromosome);\r\n            var cosmicReader = new MergedCosmicReader(Resources.TopPath(\"cosm5428243.vcf\"), Resources.TopPath(\"cosm5428243.tsv\"), seqProvider);\r\n\r\n            var cosmicItem = cosmicReader.GetItems().ToList()[0];\r\n\r\n            var studies = cosmicItem.Studies.ToList();\r\n\r\n            Assert.Equal(\"544\", studies[0].Id);\r\n            Assert.Equal(new[] { \"haematopoietic and lymphoid tissue\" }, studies[0].Sites);\r\n            Assert.Equal(new[] { \"haematopoietic neoplasm\" }, studies[0].Histologies);\r\n            //Assert.Equal(new [] { \"haematopoietic neoplasm\", \"acute myeloid leukaemia\" }, study.Histologies);\r\n\r\n            Assert.Equal(\"544\", studies[1].Id);\r\n            Assert.Equal(new[] { \"haematopoietic;lymphoid tissue\" }, studies[1].Sites);\r\n            Assert.Equal(new[] { \"haematopoietic neoplasm\" }, studies[1].Histologies);\r\n            //Assert.Equal(new[] { \"haematopoietic_neoplasm\", \"acute_myeloid_leukaemia\" }, study.Histologies);\r\n        }\r\n\r\n        [Fact]\r\n        public void IndelWithNoLeadingBase()\r\n        {\r\n            var seqProvider = ParserTestUtils.GetSequenceProvider(10188320, \"GGTACTGAC\", 'A', ChromosomeUtilities.RefNameToChromosome);\r\n            //the files provided are just for the sake of construction. The main aim is to test the VCF line parsing capabilities\r\n            var cosmicReader = new MergedCosmicReader(Resources.TopPath(\"cosm5428243.vcf\"), Resources.TopPath(\"cosm5428243.tsv\"), seqProvider);\r\n\r\n            const string vcfLine1 = \"3\t10188320\tCOSM14426\tGGTACTGAC\tA\t.\t.\tGENE=VHL;STRAND=+;CDS=c.463G>A;AA=p.?;CNT=2\";\r\n            const string vcfLine2 = \"3\t10188320\tCOSM18152\tG\tA\t.\t.\tGENE=VHL;STRAND=+;CDS=c.463G>A;AA=p.V155M;CNT=7\";\r\n\r\n            var items = cosmicReader.ExtractCosmicItems(vcfLine1);\r\n            Assert.Equal(\"GGTACTGAC\", items[0].RefAllele);\r\n            Assert.Equal(\"A\", items[0].AltAllele);\r\n            Assert.Equal(10188320, items[0].Position);\r\n\r\n            var items2 = cosmicReader.ExtractCosmicItems(vcfLine2);\r\n            Assert.Equal(\"G\", items2[0].RefAllele);\r\n            Assert.Equal(\"A\", items2[0].AltAllele);\r\n            Assert.Equal(10188320, items2[0].Position);\r\n        }\r\n\r\n        /// <summary>\r\n        /// testing if cosmic alternate allele is correctly output\r\n        /// </summary>\r\n        [Fact]\r\n        public void CosmicAltAllele()\r\n        {\r\n            var seqProvider = ParserTestUtils.GetSequenceProvider(6928019, \"C\", 'A', ChromosomeUtilities.RefNameToChromosome);\r\n            var cosmicReader = new MergedCosmicReader(Resources.TopPath(\"COSM983708.vcf\"), Resources.TopPath(\"COSM983708.tsv\"), seqProvider);\r\n            var items = cosmicReader.GetItems().ToList();\r\n\r\n            Assert.Single((IEnumerable) items);\r\n            Assert.Contains(\"\\\"refAllele\\\":\\\"-\\\"\", items[0].GetJsonString());\r\n        }\r\n\r\n        [Fact]\r\n        public void CosmicAlleleSpecificIndel()\r\n        {\r\n            //10188320\r\n            var seqProvider = ParserTestUtils.GetSequenceProvider(10188320, \"G\", 'A', ChromosomeUtilities.RefNameToChromosome);\r\n            var cosmicReader = new MergedCosmicReader(Resources.TopPath(\"COSM18152.vcf\"), Resources.TopPath(\"COSM18152.tsv\"), seqProvider);\r\n            var items = cosmicReader.GetItems();\r\n\r\n            Assert.Single(items);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/SAUtils/InputFileParsers/OneKGenTests.cs",
    "content": "﻿using System.IO;\r\nusing System.Linq;\r\nusing System.Text.RegularExpressions;\r\nusing SAUtils.InputFileParsers.OneKGen;\r\nusing UnitTests.TestUtilities;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.SAUtils.InputFileParsers\r\n{\r\n    public sealed class OneKGenTests\r\n    {\r\n        private static string GetAlleleFrequency(string jsonString, string description)\r\n        {\r\n            var regexMatch = Regex.Match(jsonString, $\"\\\"{description}\\\":([0|1]\\\\.?\\\\d+)?\");\r\n            return regexMatch.Success ? regexMatch.Groups[1].ToString() : null;\r\n        }\r\n\r\n        [Fact]\r\n        public void AlleleFrequencyTest()\r\n        {\r\n            const string vcfLine =\r\n                \"1\t10352\trs555500075\tT\tTA\t100\tPAS\tAC=2191;AF=0.4375;AN=5008;NS=2504;DP=88915;EAS_AF=0.4306;AMR_AF=0.4107;AFR_AF=0.4788;EUR_AF=0.4264;SAS_AF=0.4192;AA=|||unknown(NO_COVERAGE); VT=INDEL;EAS_AN=1008;EAS_AC=434;EUR_AN=1006;EUR_AC=429;AFR_AN=1322;AFR_AC=633;AMR_AN=694;AMR_AC=285;SAS_AN=978;SAS_AC=410\";\r\n            var oneKGenReader = new OneKGenReader(null, ParserTestUtils.GetSequenceProvider(10352,\"T\",'C', ChromosomeUtilities.RefNameToChromosome));\r\n            var oneKItem = oneKGenReader.ExtractItems(vcfLine).First().GetJsonString();\r\n\r\n            Assert.Equal(\"0.4375\", GetAlleleFrequency(oneKItem, \"allAf\"));\r\n            Assert.Equal(\"0.47882\", GetAlleleFrequency(oneKItem, \"afrAf\"));\r\n            Assert.Equal(\"0.410663\", GetAlleleFrequency(oneKItem, \"amrAf\"));\r\n            Assert.Equal(\"0.430556\", GetAlleleFrequency(oneKItem, \"easAf\"));\r\n            Assert.Equal(\"0.426441\", GetAlleleFrequency(oneKItem, \"eurAf\"));\r\n            Assert.Equal(\"0.419223\", GetAlleleFrequency(oneKItem, \"sasAf\"));\r\n            Assert.DoesNotContain(\"ancestralAllele\", oneKItem);\r\n        }\r\n\r\n        [Fact]\r\n        public void MultiAltAlleleTest()\r\n        {\r\n            const string vcfLine =\r\n                \"1\t15274\trs62636497\tA\tG,T\t100\tPASS\tAC=1739,3210;AF=0.347244,0.640974;AN=5008;NS=2504;DP=23255;EAS_AF=0.4812,0.5188;AMR_AF=0.2752,0.7205;AFR_AF=0.323,0.6369;EUR_AF=0.2922,0.7078;SAS_AF=0.3497,0.6472;AA=g|||;VT=SNP;MULTI_ALLELIC;EAS_AN=1008;EAS_AC=485,523;EUR_AN=1006;EUR_AC=294,712;AFR_AN=1322;AFR_AC=427,842;AMR_AN=694;AMR_AC=191,500;SAS_AN=978;SAS_AC=342,633\";\r\n\r\n            var oneKGenReader = new OneKGenReader(null, ParserTestUtils.GetSequenceProvider(15274, \"A\", 'C', ChromosomeUtilities.RefNameToChromosome));\r\n            var oneKGenItems = oneKGenReader.ExtractItems(vcfLine).ToList();\r\n\r\n            Assert.Equal(2, oneKGenItems.Count);\r\n\r\n            var json1 = oneKGenItems[0].GetJsonString();\r\n            var json2 = oneKGenItems[1].GetJsonString();\r\n\r\n            Assert.Equal(\"0.347244\", GetAlleleFrequency(json1, \"allAf\"));\r\n            Assert.Equal(\"0.322995\", GetAlleleFrequency(json1, \"afrAf\"));\r\n            Assert.Equal(\"0.275216\", GetAlleleFrequency(json1, \"amrAf\"));\r\n            Assert.Equal(\"0.481151\", GetAlleleFrequency(json1, \"easAf\"));\r\n            Assert.Equal(\"0.292247\", GetAlleleFrequency(json1, \"eurAf\"));\r\n            Assert.Equal(\"0.349693\", GetAlleleFrequency(json1, \"sasAf\"));\r\n\r\n            Assert.Equal(\"0.640974\", GetAlleleFrequency(json2, \"allAf\"));\r\n            Assert.Equal(\"0.636914\", GetAlleleFrequency(json2, \"afrAf\"));\r\n            Assert.Equal(\"0.720461\", GetAlleleFrequency(json2, \"amrAf\"));\r\n            Assert.Equal(\"0.518849\", GetAlleleFrequency(json2, \"easAf\"));\r\n            Assert.Equal(\"0.707753\", GetAlleleFrequency(json2, \"eurAf\")); //double check this one: 0.7077535\r\n            Assert.Equal(\"0.647239\", GetAlleleFrequency(json2, \"sasAf\"));\r\n        }\r\n\r\n        [Fact]\r\n        public void PrioritizingSymbolicAllele4Svs()\r\n        {\r\n            const string vcfLine =\r\n                \"X\t101155257\trs373174489\tGTGCAAAAGCTCTTTAGTTTAATTAGGTCTCAGCTATTTATCTTTGTTCTTAT\tG\t100\tPASS\tAN=3775;AC=1723;AF=0.456424;AA=;EAS_AN=764;EAS_AC=90;EAS_AF=0.1178;EUR_AN=766;EUR_AC=439;EUR_AF=0.5731;AFR_AN=1003;AFR_AC=839;AFR_AF=0.8365;AMR_AN=524;AMR_AC=180;AMR_AF=0.3435;SAS_AN=718;SAS_AC=175;SAS_AF=0.2437\";\r\n\r\n            var oneKGenReader = new OneKGenReader(null, ParserTestUtils.GetSequenceProvider(101155257, \"GTGCAAAAGCTCTTTAGTTTAATTAGGTCTCAGCTATTTATCTTTGTTCTTAT\", 'C', ChromosomeUtilities.RefNameToChromosome));\r\n            var oneKItems = oneKGenReader.ExtractItems(vcfLine);\r\n            var json1 = oneKItems.First().GetJsonString();\r\n            Assert.Equal(\"0.456424\", GetAlleleFrequency(json1, \"allAf\"));\r\n            Assert.Equal(\"0.836491\", GetAlleleFrequency(json1, \"afrAf\"));\r\n            Assert.Equal(\"0.343511\", GetAlleleFrequency(json1, \"amrAf\"));\r\n            Assert.Equal(\"0.117801\", GetAlleleFrequency(json1, \"easAf\"));\r\n            Assert.Equal(\"0.573107\", GetAlleleFrequency(json1, \"eurAf\"));\r\n            Assert.Equal(\"0.243733\", GetAlleleFrequency(json1, \"sasAf\"));\r\n\r\n        }\r\n\r\n        [Fact]\r\n        public void MissingSubPopulationFrequencies()\r\n        {\r\n            const string vcfLine = \"1\\t10616\\trs376342519\\tCCGCCGTTGCAAAGGCGCGCCG\\tC\\t100\\tPASS\\tAN=5008;AC=4973;AF=0.993011;AA=;EAS_AN=1008;EAS_AC=999;EAS_AF=0.9911;EUR_AN=1006;EUR_AC=1000;EUR_AF=0.994;AFR_AN=1322;AFR_AC=1308;AFR_AF=0.9894;AMR_AN=694;AMR_AC=691;AMR_AF=0.9957;SAS_AN=978;SAS_AC=975;SAS_AF=0.9969\";\r\n\r\n            var oneKGenReader = new OneKGenReader(null, ParserTestUtils.GetSequenceProvider(10616, \"CCGCCGTTGCAAAGGCGCGCCG\", 'C', ChromosomeUtilities.RefNameToChromosome));\r\n            var items = oneKGenReader.ExtractItems(vcfLine).ToList();\r\n\r\n            Assert.Single(items);\r\n            Assert.Equal(\"\\\"allAf\\\":0.993011,\\\"afrAf\\\":0.98941,\\\"amrAf\\\":0.995677,\\\"easAf\\\":0.991071,\\\"eurAf\\\":0.994036,\\\"sasAf\\\":0.996933,\\\"allAn\\\":5008,\\\"afrAn\\\":1322,\\\"amrAn\\\":694,\\\"easAn\\\":1008,\\\"eurAn\\\":1006,\\\"sasAn\\\":978,\\\"allAc\\\":4973,\\\"afrAc\\\":1308,\\\"amrAc\\\":691,\\\"easAc\\\":999,\\\"eurAc\\\":1000,\\\"sasAc\\\":975\", items[0].GetJsonString());\r\n\r\n        }\r\n\r\n        private static Stream GetOneKgSvStream()\r\n        {\r\n            var stream = new MemoryStream();\r\n            var writer = new StreamWriter(stream);\r\n\r\n            writer.WriteLine(\"1\\t668630\\t850204\\tesv3584976\\t<CN2>\\tAC=64;AF=0.0127796;AN=5008;CIEND=-150,150;CIPOS=-150,150;CS=DUP_delly;END=850204;NS=2504;SVTYPE=DUP;IMPRECISE;DP=22135;EAS_AF=0.0595;AMR_AF=0;AFR_AF=0.0015;EUR_AF=0.001;SAS_AF=0.001;VT=SV;EX_TARGET\");\r\n            writer.WriteLine(\"1\\t713044\\t755966\\tesv3584977;esv3584978\\t<CN0>,<CN2>\\tAC=3,206;AF=0.000599042,0.0411342;AN=5008;CS=DUP_gs;END=755966;NS=2504;SVTYPE=CNV;DP=20698;EAS_AF=0.001,0.0615;AMR_AF=0.0014,0.0259;AFR_AF=0,0.0303;EUR_AF=0.001,0.0417;SAS_AF=0,0.045;VT=SV;EX_TARGET\");\r\n            writer.WriteLine(\"1\\t738570\\t742020\\tesv3584979\\t<CN0>\\tAC=1;AF=0.000199681;AN=5008;CIEND=0,354;CIPOS=-348,0;CS=DEL_union;END=742020;NS=2504;SVTYPE=DEL;DP=19859;EAS_AF=0.001;AMR_AF=0;AFR_AF=0;EUR_AF=0;SAS_AF=0;VT=SV;EX_TARGET\");\r\n            writer.WriteLine(\"1\\t645710\\t699999\\tesv3584975\\t<INS:ME:ALU>\\tAC=35;AF=0.00698882;AN=5008;CS=ALU_umary;MEINFO=AluYa4_5,1,223,-;NS=2504;SVLEN=222;SVTYPE=ALU;TSD=null;DP=12290;EAS_AF=0.0069;AMR_AF=0.0072;AFR_AF=0;EUR_AF=0.0189;SAS_AF=0.0041;VT=SV\");\r\n            writer.WriteLine(\"1\\t812283\\t876543\\tesv3584985\\t<INS:ME:LINE1>\\tAC=58;AF=0.0115815;AN=5008;CS=L1_umary;MEINFO=LINE1,2926,3363,+;NS=2504;SVLEN=437;SVTYPE=LINE1;TSD=null;DP=19016;EAS_AF=0.0109;AMR_AF=0.0187;AFR_AF=0.0098;EUR_AF=0.0179;SAS_AF=0.0031;VT=SV\");\r\n            writer.WriteLine(\"1\\t2397655\\t2401469\\t.;esv3585028\\t<CN0>,<CN2>\\tAC=0,96;AF=0,0.0191693;AN=5008;CS=DUP_gs;END=2401469;NS=2504;SVTYPE=DUP;DP=16784;EAS_AF=0,0.0248;AMR_AF=0,0.0216;AFR_AF=0,0.0287;EUR_AF=0,0.0119;SAS_AF=0,0.0061;VT=SV\");\r\n            writer.Flush();\r\n\r\n            stream.Position = 0;\r\n            return stream;\r\n        }\r\n\r\n        [Fact]\r\n        public void OnekGenSvReader()\r\n        {\r\n            using (var reader = new StreamReader(GetOneKgSvStream()))\r\n            {\r\n                var svReader = new OneKGenSvReader(reader, ChromosomeUtilities.RefNameToChromosome);\r\n\r\n                var svItemList = svReader.GetItems().ToList();\r\n\r\n                Assert.Equal(4, svItemList.Count);\r\n\r\n                Assert.Equal(\"\\\"chromosome\\\":\\\"1\\\",\\\"begin\\\":668631,\\\"end\\\":850204,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"id\\\":\\\"esv3584976\\\",\\\"allAn\\\":5008,\\\"allAc\\\":64,\\\"allAf\\\":0.01278,\\\"afrAf\\\":0.0015,\\\"amrAf\\\":0,\\\"eurAf\\\":0.001,\\\"easAf\\\":0.0595,\\\"sasAf\\\":0.001\", svItemList[0].GetJsonString());\r\n\r\n                Assert.Equal(\"\\\"chromosome\\\":\\\"1\\\",\\\"begin\\\":713045,\\\"end\\\":755966,\\\"variantType\\\":\\\"copy_number_variation\\\",\\\"id\\\":\\\"esv3584977;esv3584978\\\",\\\"allAn\\\":5008,\\\"allAc\\\":209,\\\"allAf\\\":0.041733,\\\"afrAf\\\":0.0303,\\\"amrAf\\\":0.0273,\\\"eurAf\\\":0.0427,\\\"easAf\\\":0.0625,\\\"sasAf\\\":0.045\", svItemList[1].GetJsonString());\r\n\r\n                Assert.Equal(\"\\\"chromosome\\\":\\\"1\\\",\\\"begin\\\":738571,\\\"end\\\":742020,\\\"variantType\\\":\\\"copy_number_loss\\\",\\\"id\\\":\\\"esv3584979\\\",\\\"allAn\\\":5008,\\\"allAc\\\":1,\\\"allAf\\\":0.0002,\\\"afrAf\\\":0,\\\"amrAf\\\":0,\\\"eurAf\\\":0,\\\"easAf\\\":0.001,\\\"sasAf\\\":0\", svItemList[2].GetJsonString());\r\n\r\n                Assert.Equal(\"\\\"chromosome\\\":\\\"1\\\",\\\"begin\\\":2397656,\\\"end\\\":2401469,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"id\\\":\\\"esv3585028\\\",\\\"allAn\\\":5008,\\\"allAc\\\":96,\\\"allAf\\\":0.019169,\\\"afrAf\\\":0.0287,\\\"amrAf\\\":0.0216,\\\"eurAf\\\":0.0119,\\\"easAf\\\":0.0248,\\\"sasAf\\\":0.0061\", svItemList[3].GetJsonString());\r\n\r\n            }\r\n\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/SAUtils/InputFileParsers/ParserTestUtils.cs",
    "content": "﻿using System.Collections.Generic;\nusing Genome;\nusing Moq;\nusing UnitTests.TestDataStructures;\nusing VariantAnnotation.Interface.Providers;\nusing Variants;\n\nnamespace UnitTests.SAUtils.InputFileParsers\n{\n    public static class ParserTestUtils\n    {\n        public static ISequenceProvider GetSequenceProvider(int position, string refAllele, char upstreamBase, Dictionary<string, Chromosome> refChromDict)\n        {\n            var sequence = new SimpleSequence(new string(upstreamBase, VariantUtils.MaxUpstreamLength) + refAllele, position - 1 - VariantUtils.MaxUpstreamLength);\n\n            return new SimpleSequenceProvider(GenomeAssembly.GRCh37, sequence, refChromDict);\n\n        }\n\n        public static IRefMinorProvider GetRefMinorProvider(List<(Chromosome chrom, int position, string globalMinor)> refMinors)\n        {\n            var refMinorProvider = new Mock<IRefMinorProvider>();\n            foreach (var (chrom, position, globalMinor) in refMinors)\n            {\n                refMinorProvider.Setup(x => x.GetGlobalMajorAllele(chrom, position)).Returns(globalMinor);\n            }\n\n            return refMinorProvider.Object;\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/InputFileParsers/RefMinorTests.cs",
    "content": "﻿using System;\nusing System.IO;\nusing System.Linq;\nusing Genome;\nusing IO;\nusing Moq;\nusing SAUtils.InputFileParsers.OneKGen;\nusing SAUtils.RefMinorDb;\nusing UnitTests.TestUtilities;\nusing VariantAnnotation.Interface.Providers;\nusing VariantAnnotation.NSA;\nusing VariantAnnotation.Providers;\nusing VariantAnnotation.SA;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.InputFileParsers\n{\n    public sealed class RefMinorTests\n    {\n        private static Stream GetStream()\n        {\n            var stream = new MemoryStream();\n            var writer = new StreamWriter(stream);\n\n            writer.WriteLine(\"##1000Genomes\");\n            writer.WriteLine(\"#CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\");\n            writer.WriteLine(\"1\\t15274\\trs62636497\\tA\\tG,T\\t100\\tPASS\\tAC=1739,3210;AF=0.347244,0.640974;AN=5008;NS=2504;DP=23255;EAS_AF=0.4812,0.5188;AMR_AF=0.2752,0.7205;AFR_AF=0.323,0.6369;EUR_AF=0.2922,0.7078;SAS_AF=0.3497,0.6472;AA=g|||;VT=SNP;MULTI_ALLELIC;EAS_AN=1008;EAS_AC=485,523;EUR_AN=1006;EUR_AC=294,712;AFR_AN=1322;AFR_AC=427,842;AMR_AN=694;AMR_AC=191,500;SAS_AN=978;SAS_AC=342,633\");\n            writer.WriteLine(\"1\\t241369\\trs11490246\\tC\\tT\\t100\\tPASS\\tAC=5008;AF=1;AN=5008;NS=2504;DP=8951;EAS_AF=1;AMR_AF=1;AFR_AF=1;EUR_AF=1;SAS_AF=1;AA=.|||;VT=SNP;EAS_AN=1008;EAS_AC=1008;EUR_AN=1006;EUR_AC=1006;AFR_AN=1322;AFR_AC=1322;AMR_AN=694;AMR_AC=694;SAS_AN=978;SAS_AC=978\");\n\n            writer.Flush();\n\n            stream.Position = 0;\n            return stream;\n        }\n\n        [Fact]\n        public void GetItems()\n        {\n            using (var reader = new RefMinorReader(new StreamReader(GetStream()), GetSequenceProvider()))\n            {\n                var items = reader.GetItems().ToList();\n\n                Assert.Equal(3, items.Count);\n            }\n            \n        }\n\n        private static ISequenceProvider GetSequenceProvider()\n        {\n            var seqProvider = new Mock<ISequenceProvider>();\n            seqProvider.SetupGet(x => x.Assembly).Returns(GenomeAssembly.GRCh37);\n            seqProvider.SetupGet(x => x.RefNameToChromosome).Returns(ChromosomeUtilities.RefNameToChromosome);\n            seqProvider.Setup(x => x.Sequence.Substring(15274 -1, 1)).Returns(\"A\");\n            seqProvider.Setup(x => x.Sequence.Substring(241369-1, 1)).Returns(\"C\");\n\n            return seqProvider.Object;\n        }\n\n\n        [Fact]\n        public void LoopBack()\n        {\n            var version = new DataSourceVersion(\"onekgen\", \"v0.3\", DateTime.Now.Ticks);\n            using (var reader = new RefMinorReader(new StreamReader(GetStream()), GetSequenceProvider()))\n            using (var stream = new MemoryStream())\n            using (var indexStream = new MemoryStream())\n            using (var writer = new RefMinorDbWriter(new ExtendedBinaryWriter(stream), new ExtendedBinaryWriter(indexStream), version, GetSequenceProvider(), SaCommon.SchemaVersion))\n            {\n                writer.Write(reader.GetItems());\n\n                stream.Position = 0;\n                indexStream.Position = 0;\n\n                using (var dbReader = new RefMinorDbReader(stream, indexStream))\n                {\n                    Assert.Equal(\"T\", dbReader.GetGlobalMajorAllele(ChromosomeUtilities.Chr1, 15274));\n                    Assert.Null(dbReader.GetGlobalMajorAllele(ChromosomeUtilities.Chr1, 1524));\n                }\n            }\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/InputFileParsers/TopMedReaderTests.cs",
    "content": "﻿using System.IO;\nusing System.Linq;\nusing Genome;\nusing SAUtils.InputFileParsers.TOPMed;\nusing UnitTests.TestDataStructures;\nusing UnitTests.TestUtilities;\nusing Variants;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.InputFileParsers\n{\n    public sealed class TopMedReaderTests\n    {\n        private static Stream GetStream()\n        {\n            var stream = new MemoryStream();\n            var writer = new StreamWriter(stream);\n\n            writer.WriteLine(\"##TopMED\");\n            writer.WriteLine(\"#CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\");\n            writer.WriteLine(\"chr1\\t10128\\trs796688738\\tA\\tAC\\t255\\tSVM;DISC\\tVRT=2;NS=62784;AN=125568;AC=334;AF=0.00265991;Het=334;Hom=0\\tNA:FRQ\\t125568:0.00265991\");\n            writer.WriteLine(\"chr1\\t10146\\trs779258992\\tAC\\tA\\t255\\tSVM;DISC;EXHET\\tVRT=2;NS=62784;AN=125568;AC=2897;AF=0.0230712;Het=2897;Hom=0\\tNA:FRQ\\t125568:0.0230712\");\n            writer.WriteLine(\"chr1\\t10177\\trs201752861\\tA\\tC\\t255\\tSVM;DISC\\tVRT=1;NS=62784;AN=125568;AC=488;AF=0.00388634;Het=488;Hom=0\\tNA:FRQ\\t125568:0.00388634\");\n\n            writer.Flush();\n\n            stream.Position = 0;\n            return stream;\n        }\n\n        [Fact]\n        public void GetItems_test()\n        {\n            var sequence = new SimpleSequence(new string('T', VariantUtils.MaxUpstreamLength) + \"A\" +new string('T', 10146- 10128) + \"AC\" +new string('T', 10177- 10146-1)+\"A\", 10128 - 1 - VariantUtils.MaxUpstreamLength);\n\n            var seqProvider = new SimpleSequenceProvider(GenomeAssembly.GRCh37, sequence, ChromosomeUtilities.RefNameToChromosome);\n            var gnomadReader = new TopMedReader(new StreamReader(GetStream()), seqProvider);\n\n            var items = gnomadReader.GetItems().ToList();\n\n            Assert.Equal(3, items.Count);\n            Assert.Equal(\"\\\"allAf\\\":0.00266,\\\"allAn\\\":125568,\\\"allAc\\\":334,\\\"allHc\\\":0,\\\"failedFilter\\\":true\", items[0].GetJsonString());\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/MitoHeteroplasmy/MitoHeteroplasmyTests.cs",
    "content": "﻿using System.IO;\nusing System.Linq;\nusing Moq;\nusing SAUtils.MitoHeteroplasmy;\nusing UnitTests.TestUtilities;\nusing VariantAnnotation.Interface.Providers;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.MitoHeteroplasmy\n{\n    public sealed class MitoHeteroplasmyTests\n    {\n        private static ISequenceProvider GetSequenceProvider()\n        {\n            var mockProvider = new Mock<ISequenceProvider>();\n            mockProvider.SetupGet(x => x.RefNameToChromosome).Returns(ChromosomeUtilities.RefNameToChromosome);\n            mockProvider.SetupGet(x => x.RefIndexToChromosome).Returns(ChromosomeUtilities.RefIndexToChromosome);\n            return mockProvider.Object;\n        }\n        private static Stream GetStream()\n        {\n            var stream = new MemoryStream();\n            var writer = new StreamWriter(stream);\n\n            writer.WriteLine(\"## num_samples=246\");\n            writer.WriteLine(\"MT\\t4\\t5\\t{}\");// 0 items\n            writer.WriteLine(\"MT\\t5\\t6\\t{\\\"C:A\\\":{\\\"ad\\\":[1],\\\"allele_type\\\":\\\"alt\\\",\\\"vrf\\\":[0.006329113924050633],\\\"vrf_stats\\\":{\\\"kurtosis\\\":241.00408163265314,\\\"max\\\":0.0063291139240506328,\\\"mean\\\":2.5728105382319646e-05,\\\"min\\\":0.0,\\\"nobs\\\":246,\\\"skewness\\\":15.588588185998534,\\\"stdev\\\":0.00040352956522996095,\\\"variance\\\":1.6283611001468132e-07}}}\");// 1 item\n            writer.WriteLine(\"MT\\t7\\t8\\t{\\\"G:A\\\":{\\\"ad\\\":[1,1,1,1],\\\"allele_type\\\":\\\"alt\\\",\\\"vrf\\\":[0.003205128205128205,0.002232142857142857,0.0037593984962406013,0.00273224043715847],\\\"vrf_stats\\\":{\\\"kurtosis\\\":64.96245848503843,\\\"max\\\":0.0037593984962406013,\\\"mean\\\":4.849150404743957e-05,\\\"min\\\":0.0,\\\"nobs\\\":246,\\\"skewness\\\":8.05974448165666,\\\"stdev\\\":0.00038478763089843624,\\\"variance\\\":1.4806152089243121e-07}},\\\"G:C\\\":{\\\"ad\\\":[1,1],\\\"allele_type\\\":\\\"alt\\\",\\\"vrf\\\":[0.0024813895781637717,0.004291845493562232],\\\"vrf_stats\\\":{\\\"kurtosis\\\":148.72822661048482,\\\"max\\\":0.0042918454935622317,\\\"mean\\\":2.7533475901325216e-05,\\\"min\\\":0.0,\\\"nobs\\\":246,\\\"skewness\\\":12.019856436922753,\\\"stdev\\\":0.00031552186298069995,\\\"variance\\\":9.9554046018811583e-08}},\\\"G:T\\\":{\\\"ad\\\":[1,1,1,1],\\\"allele_type\\\":\\\"alt\\\",\\\"vrf\\\":[0.0027624309392265192,0.002680965147453083,0.003236245954692557,0.0030211480362537764],\\\"vrf_stats\\\":{\\\"kurtosis\\\":57.92357810503749,\\\"max\\\":0.0032362459546925568,\\\"mean\\\":4.7564187307422503e-05,\\\"min\\\":0.0,\\\"nobs\\\":246,\\\"skewness\\\":7.717570354191911,\\\"stdev\\\":0.0003717728271743761,\\\"variance\\\":1.3821503502522855e-07}}}\");//3 items\n\n            writer.Flush();\n\n            stream.Position = 0;\n            return stream;\n        }\n\n        [Fact]\n        public void ParseItems()\n        {\n            using var parser = new MitoHeteroplasmyParser(GetStream());\n            var items = parser.GetOutputLines().ToList();\n\n            Assert.Equal(4, items.Count);\n\n            Assert.Equal(\"6\\tC\\tA\\t0.006\\t1\", items[0]);\n            Assert.Equal(\"8\\tG\\tA\\t0.002,0.003,0.004\\t1,2,1\", items[1]);\n        }\n\n        [Fact]\n        public void DeserializeStats()\n        {\n            const string input = \"{\\\"G:A\\\":{\\\"ad\\\":[1,1,1,1],\\\"allele_type\\\":\\\"alt\\\",\\\"vrf\\\":[0.003205128205128205,0.002232142857142857,0.0037593984962406013,0.00273224043715847],\\\"vrf_stats\\\":{\\\"kurtosis\\\":64.96245848503843,\\\"max\\\":0.0037593984962406013,\\\"mean\\\":4.849150404743957e-05,\\\"min\\\":0.0,\\\"nobs\\\":246,\\\"skewness\\\":8.05974448165666,\\\"stdev\\\":0.00038478763089843624,\\\"variance\\\":1.4806152089243121e-07}},\\\"G:C\\\":{\\\"ad\\\":[1,1],\\\"allele_type\\\":\\\"alt\\\",\\\"vrf\\\":[0.0024813895781637717,0.004291845493562232],\\\"vrf_stats\\\":{\\\"kurtosis\\\":148.72822661048482,\\\"max\\\":0.0042918454935622317,\\\"mean\\\":2.7533475901325216e-05,\\\"min\\\":0.0,\\\"nobs\\\":246,\\\"skewness\\\":12.019856436922753,\\\"stdev\\\":0.00031552186298069995,\\\"variance\\\":9.9554046018811583e-08}},\\\"G:T\\\":{\\\"ad\\\":[1,1,1,1],\\\"allele_type\\\":\\\"alt\\\",\\\"vrf\\\":[0.0027624309392265192,0.002680965147453083,0.003236245954692557,0.0030211480362537764],\\\"vrf_stats\\\":{\\\"kurtosis\\\":57.92357810503749,\\\"max\\\":0.0032362459546925568,\\\"mean\\\":4.7564187307422503e-05,\\\"min\\\":0.0,\\\"nobs\\\":246,\\\"skewness\\\":7.717570354191911,\\\"stdev\\\":0.0003717728271743761,\\\"variance\\\":1.3821503502522855e-07}}}\";\n\n            var stats = MitoHeteroplasmyParser.DeserializeStats(input);\n\n            Assert.NotNull(stats.G_A);\n            Assert.Equal(0.003205128205128205, stats.G_A.vrf[0]);\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/MitoMap/MitoMapSvReaderTests.cs",
    "content": "using System.IO;\nusing System.Linq;\nusing Genome;\nusing SAUtils.MitoMap;\nusing UnitTests.TestDataStructures;\nusing UnitTests.TestUtilities;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.MitoMap\n{\n    public sealed class MitoMapSvReaderTests\n    {\n        private static readonly string    RawSequence = \"ABC\" + new string('N', 200);\n        private static readonly ISequence Sequence    = new SimpleSequence(RawSequence);\n\n        private readonly SimpleSequenceProvider _sequenceProvider =\n            new SimpleSequenceProvider(GenomeAssembly.GRCh37, Sequence, ChromosomeUtilities.RefNameToChromosome);\n        \n        [Theory]\n        [InlineData(\"[\\\"5:105\\\",\\\"-101\\\",\\\"1837-1840/5447-5451\\\",\\\"D, 4/4\\\",\\\"<a href='/cgi-bin/print_ref_list?refs=253&title=mtDNA+Deletion%3A+1836%3A5447+-3610+D%2C+4%2F4+1837-1840%2F5447-5451+8' target='_blank'>1</a>\\\"],\", \n             \"DeletionsSingle\", \"\\\"chromosome\\\":\\\"MT\\\",\\\"begin\\\":4,\\\"end\\\":104,\\\"variantType\\\":\\\"deletion\\\"\")]\n        [InlineData(\"[\\\"2:122\\\",\\\"-121\\\",\\\"7439/13476\\\",\\\"D, 1/1\\\",\\\"<a href='/cgi-bin/print_ref_list?refs=149&title=mtDNA+Deletion%3A+7438%3A13476+-6037+D%2C+1%2F1+7439%2F13476+1' target='_blank'>1</a>\\\"],\", \n             \"DeletionsSingle\", \"\\\"chromosome\\\":\\\"MT\\\",\\\"begin\\\":3,\\\"end\\\":123,\\\"variantType\\\":\\\"deletion\\\"\")]\n        [InlineData(\"[\\\"Complete (16.5 kb)\\\",\\\"+266\\\",\\\"7-27 D-Loop region\\\",\\\"573 D-Loop region\\\",\\\"D, 7/7\\\",\\\"25\\\",\\\"<a href='/cgi-bin/print_ref_list?refs=39,556,945,952&title=Simple+Insertion+%2B266+308-573+D-Loop+region+573+D-Loop+region+D%2C+7%2F7+25' target='_blank'>4</a>\\\"],\", \n            \"InsertionsSimple\", \"\\\"chromosome\\\":\\\"MT\\\",\\\"begin\\\":16030,\\\"end\\\":16050,\\\"variantType\\\":\\\"duplication\\\"\")]\n        public void ParseLine_AsExpected(string line, string fileName, string expectedJsonString)\n        {\n            var  reader = new MitoMapSvReader(new FileInfo(fileName), _sequenceProvider);\n            var jsonString = reader.ParseLine(line).FirstOrDefault().GetJsonString();\n            Assert.Equal(expectedJsonString, jsonString);\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/MitoMap/MitoMapVariantReaderTests.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.Linq;\nusing CacheUtils.TranscriptCache;\nusing Genome;\nusing SAUtils.InputFileParsers.ClinVar;\nusing SAUtils.MitoMap;\nusing UnitTests.TestDataStructures;\nusing UnitTests.TestUtilities;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.MitoMap\n{\n    public sealed class MitoMapVariantReaderTests\n    {\n        private static readonly ISequence  Sequence    = new NSequence();\n        private static readonly SimpleSequenceProvider SequenceProvider = new SimpleSequenceProvider(GenomeAssembly.GRCh37, Sequence, \n            ChromosomeUtilities.RefNameToChromosome);\n        private static readonly VariantAligner VariantAligner = new VariantAligner(SequenceProvider?.Sequence);\n        private static readonly MitoMapInputDb MitoMapInputDb = new MitoMapInputDb(\n            new Dictionary<string, string> {{\"7616\", \"17616\"},{\"3510\", \"13510\"},{\"90282\",\"190282\"},{\"99016\",\"199016\"}});\n\n        [Fact]\n        public void GetAltAllelesTests()\n        {\n            const string altAlleleString1 = \"ACT\";\n            const string altAlleleString2 = \"ACT;AGT\";\n            const string altAlleleString3 = \"AKY\";\n            const string altAlleleString4 = \"ACT;AKY\";\n            const string altAlleleString5 = \"CNT;AKY\";\n            Assert.Equal(new[] { \"ACT\" }, MitoMapVariantReader.GetAltAlleles(altAlleleString1));\n            Assert.Equal(new[] { \"ACT\", \"AGT\" }, MitoMapVariantReader.GetAltAlleles(altAlleleString2));\n            Assert.Equal(new[] { \"AGC\", \"AGT\", \"ATC\", \"ATT\" }, MitoMapVariantReader.GetAltAlleles(altAlleleString3));\n            Assert.Equal(new[] { \"ACT\", \"AGC\", \"AGT\", \"ATC\", \"ATT\" }, MitoMapVariantReader.GetAltAlleles(altAlleleString4));\n            Assert.Equal(new[] { \"CNT\", \"AGC\", \"AGT\", \"ATC\", \"ATT\" }, MitoMapVariantReader.GetAltAlleles(altAlleleString5));\n        }\n\n        [Theory]\n        [InlineData(\"0 (0)\", MitoMapDataTypes.MitoMapMutationsRNA, 0)]\n        [InlineData(\"<a href='/cgi-bin/index_mitomap.cgi?title=RNA+Mutation+A+at+750&pos=750&ref=A&alt=A' target=_blank>858 (0)</a>\\\"\", MitoMapDataTypes.MitoMapMutationsRNA, 858)]\n        [InlineData(\"<a href='/cgi-bin/index_mitomap.cgi?title=Coding+Control+Mutation+T-C+at+16217&pos=16217&ref=T&alt=C' target=_blank>3657 (4688)</a>\", MitoMapDataTypes.MitoMapMutationsCodingControl, 3657)]\n        [InlineData(\"<a href='/cgi-bin/index_mitomap.cgi?title=Coding+Polymorphism+T-C+at+rCRS+position+650&pos=650&ref=T&alt=C&purge_type=' target='_blank'>36</a>\", MitoMapDataTypes.MitoMapPolymorphismsCoding, 36)]\n        [InlineData(\"0\", MitoMapDataTypes.MitoMapPolymorphismsCoding, 0)]\n        [InlineData(\"0\", MitoMapDataTypes.MitoMapPolymorphismsControl, 0)]\n        [InlineData(\"<a href='/cgi-bin/index_mitomap.cgi?title=Control+Polymorphism+T-C+at+rCRS+position+14&pos=14&ref=T&alt=C' target='_blank'>5 (3/2)</a>\", MitoMapDataTypes.MitoMapPolymorphismsControl, 3)]\n        [InlineData(\"<a href='/cgi-bin/index_mitomap.cgi?title=Control+Polymorphism+T-A+at+rCRS+position+14&pos=14&ref=T&alt=A' target='_blank'>38 (0/38)</a>\", MitoMapDataTypes.MitoMapPolymorphismsControl, 0)]\n        public void GetNumFullLengthSequences_AsExpected(string field, string dataType, int numFullLengthSequences)\n        {\n            Assert.Equal(numFullLengthSequences, MitoMapVariantReader.GetNumFullLengthSequences(field, dataType));\n        }\n\n        [Theory]\n        [InlineData(\"[\\\"618\\\",\\\"<a href='/MITOMAP/GenomeLoci#MTTF'>MT-TF</a>\\\",\\\"Ptosis CPEO MM & EXIT\\\",\\\"T618G\\\",\\\"tRNA Phe\\\",\\\"-\\\",\\\"+\\\",\\\"Reported\\\",\\\"<span style='display:inline-block;white-space:nowrap;'><a href='/cgi-bin/mitotip?pos=618&alt=G&quart=1'><u>77.50%</u></a> <i class='fa fa-arrow-up' style='color:red' aria-hidden='true'></i><i class='fa fa-arrow-up' style='color:red' aria-hidden='true'></i></span>\\\",\\\"0.0%<br>(0.0%)\\\",\\\"0 (0)\\\",\\\"<a href='/cgi-bin/print_ref_list?refs=7616&title=RNA+Mutation+T618G' target='_blank'>1</a>\\\"],\", \n            \"MutationsRNA\", \"\\\"refAllele\\\":\\\"T\\\",\\\"altAllele\\\":\\\"G\\\",\\\"diseases\\\":[\\\"Ptosis CPEO MM & EXIT\\\"],\\\"hasHomoplasmy\\\":false,\\\"hasHeteroplasmy\\\":true,\\\"status\\\":\\\"Reported\\\",\\\"clinicalSignificance\\\":\\\"likely pathogenic\\\",\\\"scorePercentile\\\":77.50,\\\"numGenBankFullLengthSeqs\\\":0,\\\"pubMedIds\\\":[\\\"17616\\\"]\")]\n        [InlineData(\"[\\\"3308\\\",\\\"<a href='/MITOMAP/GenomeLoci#MTND1'>MT-ND1</a>\\\",\\\"Sudden Infant Death\\\",\\\"T3308G\\\",\\\"T-G\\\",\\\"M-Term\\\",\\\"+\\\",\\\"+\\\",\\\"Reported\\\",\\\"0.0%<br>(0.0%)\\\",\\\"<a href='/cgi-bin/index_mitomap.cgi?title=Coding+Control+Mutation+T-G+at+3308&pos=3308&ref=T&alt=G' target=_blank>6 (0)</a>\\\",\\\"<a href='/cgi-bin/print_ref_list?refs=3510&title=Mutation+T-G+at+3308' target='_blank'>1</a>\\\"],\", \n             \"MutationsCodingControl\", \"\\\"refAllele\\\":\\\"T\\\",\\\"altAllele\\\":\\\"G\\\",\\\"diseases\\\":[\\\"Sudden Infant Death\\\"],\\\"hasHomoplasmy\\\":true,\\\"hasHeteroplasmy\\\":true,\\\"status\\\":\\\"Reported\\\",\\\"numGenBankFullLengthSeqs\\\":6,\\\"pubMedIds\\\":[\\\"13510\\\"]\")]\n        [InlineData(\"[\\\"606\\\",\\\"<a href='/MITOMAP/GenomeLoci#MTTF'>MT-TF</a>\\\",\\\"A-G\\\",\\\"-\\\",\\\"-\\\",\\\"tRNA\\\",\\\"0.0%\\\",\\\"<a href='/cgi-bin/index_mitomap.cgi?title=Coding+Polymorphism+A-G+at+rCRS+position+606&pos=606&ref=A&alt=G&purge_type=' target='_blank'>15</a>\\\",\\\"<a href='/cgi-bin/print_ref_list?refs=90282,99016&title=Coding+Polymorphism+A-G+at+606' target='_blank'>2</a>\\\"],\", \n             \"PolymorphismsCoding\", \"\\\"refAllele\\\":\\\"A\\\",\\\"altAllele\\\":\\\"G\\\",\\\"numGenBankFullLengthSeqs\\\":15,\\\"pubMedIds\\\":[\\\"190282\\\",\\\"199016\\\"]\")]\n        public void ParseLine_AsExpected(string line, string fileName, string expectedJsonString)\n        {\n            string jsonString = MitoMapVariantReader.ParseLine(line, fileName, SequenceProvider, VariantAligner, ChromosomeUtilities.ChrM, MitoMapInputDb)\n                                                    .FirstOrDefault()\n                                                    ?.GetJsonString();\n            Assert.Equal(expectedJsonString, jsonString);\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/MitoMap/ParsingUtilitiesTests.cs",
    "content": "﻿using System.Collections.Generic;\nusing SAUtils.MitoMap;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.MitoMap\n{\n    public sealed class ParsingUtilitiesTests\n    {\n        private static readonly MitoMapInputDb MitoMapInputDb = new MitoMapInputDb(new Dictionary<string, string>\n        {\n            {\"1\", \"101\"},\n            {\"2\", \"102\"},\n            {\"13\", \"103\"},\n            {\"4100\", \"104\"},\n            {\"5678\", \"105\"},\n            {\"23202\", \"105\"}\n        });\n\n        [Theory]\n        [InlineData(\"<a href='/cgi-bin/print_ref_list?refs=4100&title=Control+Polymorphism+T-A+at+14' target='_blank'>1</a>\", \"104\")]\n        [InlineData(\"<a href='/cgi-bin/print_ref_list?refs=1,13,5678,23202&title=Mutation+T-C+at+15784' target='_blank'>3</a>\", \"101,103,105\")]\n        [InlineData(\"<a href='/cgi-bin/print_ref_list?refs=1,2,13,4100&title=Simple+Insertion+%2B266+308-573+D-Loop+region+573+D-Loop+region+D%2C+7%2F7+25' target='_blank'>4</a>\", \"101,102,103,104\")]\n        public void GetPubMedIds_AsExpected(string field, string pubmedIds)\n        {\n            Assert.Equal(string.Join(',', ParsingUtilities.GetPubMedIds(field, MitoMapInputDb)), pubmedIds);\n        }\n\n        [Theory]\n        [InlineData(\"<a href='/cgi-bin/print_ref_list?refs=1,2,13,4100&title=Simple+Insertion+%2B266+308-573+D-Loop+region+573+D-Loop+region+D%2C+7%2F7+25' target='_blank'>4</a>\", \"1,2,13,4100\")]\n        [InlineData(\"<a href='/cgi-bin/print_ref_list?refs=45,247,280,303,312,330,332,394,396,541,3311,3370,3427,3569,3584,3732,3943,4287,4946,5113,5329,5348,5451,5452,5628,6169,6221,6228,6421,6490,6531,6603,6623,7647,7695,8405,8492,20372,20373,90237,90296,90302,90311,90331,90333,90365,90377,90453,90468,90477,90483,90534,90572,90627,90764,90809,90812,90835,90903,90904,90941,90963,91015,91020,91030,91041,91056,91068,91077,91081,91094,91099,91206,91214,91215,91221,91227,91228,91256,91269,91311,91318,91394,91420,91431,91514,91687,91737,91775,91789,91830,91958,91999,92063,92082,92100,99016&title=Coding+Polymorphism+G-A+at+3010' target='_blank'>97</a>\", \"45,247,280,303,312,330,332,394,396,541,3311,3370,3427,3569,3584,3732,3943,4287,4946,5113,5329,5348,5451,5452,5628,6169,6221,6228,6421,6490,6531,6603,6623,7647,7695,8405,8492,20372,20373,90237,90296,90302,90311,90331,90333,90365,90377,90453,90468,90477,90483,90534,90572,90627,90764,90809,90812,90835,90903,90904,90941,90963,91015,91020,91030,91041,91056,91068,91077,91081,91094,91099,91206,91214,91215,91221,91227,91228,91256,91269,91311,91318,91394,91420,91431,91514,91687,91737,91775,91789,91830,91958,91999,92063,92082,92100,99016\")]\n        public void ExtractInternalIds_AsExpected(string field, string internalIds)\n        {\n            Assert.Equal(string.Join(',', ParsingUtilities.ExtractInternalIds(field)), internalIds);\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/NsaWriters/IntervalWriterReaderTests.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Linq;\r\nusing Genome;\r\nusing SAUtils.DataStructures;\r\nusing UnitTests.TestUtilities;\r\nusing VariantAnnotation.Interface.SA;\r\nusing VariantAnnotation.NSA;\r\nusing VariantAnnotation.Pools;\r\nusing VariantAnnotation.Providers;\r\nusing VariantAnnotation.SA;\r\nusing Variants;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.SAUtils.NsaWriters\r\n{\r\n    public sealed class IntervalWriterReaderTests\r\n    {\r\n        private static IEnumerable<ClinGenItem> GetClinGenItems()\r\n        {\r\n            return new[]\r\n            {\r\n                new ClinGenItem(\"cg1\", ChromosomeUtilities.Chr1, 145, 2743, VariantType.copy_number_gain, 3, 0, ClinicalInterpretation.likely_benign,true, new HashSet<string> {\"phenotype1\", \"phenotype2\"}, new HashSet<string> {\"pid1\", \"pid2\"} ),\r\n                new ClinGenItem(\"cg2\", ChromosomeUtilities.Chr1, 14585, 5872743, VariantType.copy_number_loss, 0, 5, ClinicalInterpretation.likely_pathogenic,true, new HashSet<string> {\"phenotype3\", \"phenotype5\"}, new HashSet<string> {\"pid3\", \"pid5\"} ),\r\n                new ClinGenItem(\"cg3\", ChromosomeUtilities.Chr2, 45759, 8792743, VariantType.deletion, 3, 0, ClinicalInterpretation.pathogenic,true, new HashSet<string> {\"phenotype1\", \"phenotype4\"}, new HashSet<string> {\"pid1\", \"pid4\"} ),\r\n                new ClinGenItem(\"cg4\", ChromosomeUtilities.Chr2, 5589745, 7987923, VariantType.insertion, 3, 0, ClinicalInterpretation.uncertain_significance, true, new HashSet<string> {\"phenotype10\", \"phenotype14\"}, new HashSet<string> {\"pid10\", \"pid14\"} )\r\n            };\r\n        }\r\n\r\n        [Fact]\r\n        public void Readback_clingen()\r\n        {\r\n            var version = new DataSourceVersion(\"source1\", \"v1\", DateTime.Now.Ticks, \"description\");\r\n\r\n            using (var saStream = new MemoryStream())\r\n            {\r\n                using(var siWriter = new NsiWriter(saStream, version, GenomeAssembly.GRCh37, \"clingen\",\r\n                    ReportFor.StructuralVariants, SaCommon.SchemaVersion, true))\r\n                {\r\n                    siWriter.Write(GetClinGenItems());\r\n                }\r\n                saStream.Position = 0;\r\n\r\n                var siReader = NsiReader.Read(saStream);\r\n                var variant = VariantPool.Get(ChromosomeUtilities.Chr1, 100, 14590, \"\", \"<DEL>\", VariantType.deletion, \"1:100:14590:del\", false, false,\r\n                    false, null, null, true);\r\n                var annotations = siReader.GetAnnotation(variant).ToArray();\r\n\r\n                string[] expected = {\r\n                    \"\\\"chromosome\\\":\\\"1\\\",\\\"begin\\\":145,\\\"end\\\":2743,\\\"variantType\\\":\\\"copy_number_gain\\\",\\\"id\\\":\\\"cg1\\\",\\\"clinicalInterpretation\\\":\\\"likely benign\\\",\\\"phenotypes\\\":[\\\"phenotype1\\\",\\\"phenotype2\\\"],\\\"phenotypeIds\\\":[\\\"pid1\\\",\\\"pid2\\\"],\\\"observedGains\\\":3,\\\"validated\\\":true,\\\"reciprocalOverlap\\\":0.17935,\\\"annotationOverlap\\\":1\",\r\n                    \"\\\"chromosome\\\":\\\"1\\\",\\\"begin\\\":14585,\\\"end\\\":5872743,\\\"variantType\\\":\\\"copy_number_loss\\\",\\\"id\\\":\\\"cg2\\\",\\\"clinicalInterpretation\\\":\\\"likely pathogenic\\\",\\\"phenotypes\\\":[\\\"phenotype3\\\",\\\"phenotype5\\\"],\\\"phenotypeIds\\\":[\\\"pid3\\\",\\\"pid5\\\"],\\\"observedLosses\\\":5,\\\"validated\\\":true,\\\"reciprocalOverlap\\\":0,\\\"annotationOverlap\\\":0\"\r\n                };\r\n\r\n                Assert.Equal(2, annotations.Length);\r\n                Assert.Equal(expected, annotations);\r\n                VariantPool.Return(variant);\r\n            }\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/SAUtils/NsaWriters/NsaUtilitiesTests.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.IO;\nusing System.Linq;\nusing SAUtils.DataStructures;\nusing SAUtils.InputFileParsers.TOPMed;\nusing UnitTests.SAUtils.InputFileParsers;\nusing UnitTests.TestUtilities;\nusing VariantAnnotation.Interface.SA;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.NsaWriters\n{\n    public sealed class NsaUtilitiesTests\n    {\n        private static Stream GetDupItemsStream()\n        {\n            var stream = new MemoryStream();\n            var writer = new StreamWriter(stream);\n\n            writer.WriteLine(\"##TopMED\");\n            writer.WriteLine(\"#CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\");\n            writer.WriteLine(\"chr5\\t70220313\\trs377439976;rs372466088\\tTGCC\\tT\\t155\\tSVM;DISCVRT=2;NS=62784;AN=125568;AC=43904;AF=0.349643;Het=12194;Hom=15855\\tNA:FRQ 125568:0.349643\");\n            writer.WriteLine(\"chr5\\t70220313\\trs377439976;rs372466088\\tTGCC\\tT\\t155\\tSVM;DISCVRT=2;NS=62784;AN=125568;AC=43904;AF=0.349643;Het=12194;Hom=15855\\tNA:FRQ 125568:0.349643\");\n            \n            writer.Flush();\n\n            stream.Position = 0;\n            return stream;\n        }\n        [Fact]\n        public void RemoveConflictingAlleles_does_not_remove_duplicates()\n        {\n            var seqProvider = ParserTestUtils.GetSequenceProvider(70220313, \"TGCC\", 'A', ChromosomeUtilities.RefNameToChromosome);\n            var topMedReader = new TopMedReader(new StreamReader(GetDupItemsStream()), seqProvider);\n\n            var items = topMedReader.GetItems().ToList();\n            var saItems = new List<ISupplementaryDataItem>(items);\n            saItems = SuppDataUtilities.RemoveConflictingAlleles(saItems, false);\n            Assert.Single(saItems);\n\n        }\n        \n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/NsaWriters/WriterReaderTests.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing ErrorHandling.Exceptions;\r\nusing Genome;\r\nusing Moq;\r\nusing SAUtils;\r\nusing SAUtils.DataStructures;\r\nusing SAUtils.gnomAD;\r\nusing SAUtils.InputFileParsers.ClinVar;\r\nusing UnitTests.TestDataStructures;\r\nusing UnitTests.TestUtilities;\r\nusing VariantAnnotation.Interface.Providers;\r\nusing VariantAnnotation.Interface.SA;\r\nusing VariantAnnotation.NSA;\r\nusing VariantAnnotation.Providers;\r\nusing VariantAnnotation.SA;\r\nusing Variants;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.SAUtils.NsaWriters\r\n{\r\n    public sealed class WriterReaderTests\r\n    {\r\n        private static IEnumerable<ClinVarItem> GetClinvarItems()\r\n        {\r\n            var clinvarItems = new List<ClinVarItem>\r\n            {\r\n                new ClinVarItem(ChromosomeUtilities.Chr1, 100, 100, \"T\", \"A\", ClinVarSchema.Get(), new[] {\"origin1\"}, \"SNV\", \"RCV0001\",\r\n                    null, ClinVarCommon.ReviewStatus.no_assertion, new[] {\"medgen1\"}, new[] {\"omim1\"}, new[] {\"orpha1\"},\r\n                    new[] {\"phenotype1\"}, new[] {\"significance\"}, new[] {10024875684920}, 658794146787),\r\n\r\n                new ClinVarItem(ChromosomeUtilities.Chr1, 101, 101, \"A\", \"\", ClinVarSchema.Get(), new[] {\"origin1\"}, \"del\", \"RCV00011\",\r\n                    \"101\", ClinVarCommon.ReviewStatus.no_assertion, new[] {\"medgen1\"}, new[] {\"omim1\"}, new[] {\"orpha1\"},\r\n                    new[] {\"phenotype1\"}, new[] {\"significance\"}, new[] {10024875684920}, 658794146787),\r\n\r\n                new ClinVarItem(ChromosomeUtilities.Chr1, 106, 106, \"C\", \"\", ClinVarSchema.Get(), new[] {\"origin5\"}, \"del\", \"RCV0005\",\r\n                    null, ClinVarCommon.ReviewStatus.multiple_submitters, new[] {\"medgen5\"}, new[] {\"omim5\"}, new[] {\"orpha5\"},\r\n                    new[] {\"phenotype5\"}, new[] {\"significance5\"}, new[] {10024255684920}, 658794187787),\r\n\r\n                new ClinVarItem(ChromosomeUtilities.Chr2, 200, 200, \"G\", \"A\", ClinVarSchema.Get(),\r\n                    new[] {\"origin21\"}, \"SNV\", \"RCV20001\", null, ClinVarCommon.ReviewStatus.multiple_submitters_no_conflict,\r\n                    new[] {\"medgen20\"}, new[] {\"omim20\"}, new[] {\"orpha20\"}, new[] {\"phenotype20\"},\r\n                    new[] {\"significance20\"}, new[] {10024875684480}, 669794146787),\r\n\r\n                new ClinVarItem(ChromosomeUtilities.Chr2, 205, 205, \"T\", \"C\", ClinVarSchema.Get(), new[] {\"origin25\"}, \"ins\", \"RCV20005\",\r\n                    null, ClinVarCommon.ReviewStatus.expert_panel, new[] {\"medgen25\"}, new[] {\"omim25\"}, new[] {\"orpha25\"},\r\n                    new[] {\"phenotype25\"}, new[] {\"significance25\"}, new[] {10024255684925}, 658794187287)\r\n            };\r\n\r\n            return clinvarItems;\r\n        }\r\n\r\n        private static ISequenceProvider GetSequenceProvider()\r\n        {\r\n            var sequence = new SimpleSequence(new string('A', 99) + \"TAGTCGGTTAA\" + new string('A', 89) + \"GCCCAT\");\r\n            return new SimpleSequenceProvider(GenomeAssembly.GRCh37, sequence, ChromosomeUtilities.RefNameToChromosome);\r\n        }\r\n\r\n\r\n        [Fact]\r\n        public void Write_clinvar_basic()\r\n        {\r\n            var version = new DataSourceVersion(\"source1\", \"v1\", DateTime.Now.Ticks, \"description\");\r\n\r\n            using (var saStream = new MemoryStream())\r\n            using (var indexStream = new MemoryStream())\r\n            {\r\n                using (var saWriter = new NsaWriter(saStream, indexStream, version, GetSequenceProvider(), \"clinvar\",\r\n                    false, true, SaCommon.SchemaVersion, false, true, false, 1024, GenomeAssembly.GRCh37, true))\r\n                {\r\n                    saWriter.Write(GetClinvarItems());\r\n                }\r\n\r\n                saStream.Position = 0;\r\n                indexStream.Position = 0;\r\n\r\n                using (var saReader = new NsaReader(saStream, indexStream, 1024))\r\n                {\r\n                    Assert.Equal(GenomeAssembly.GRCh37, saReader.Assembly);\r\n                    Assert.Equal(version.ToString(), saReader.Version.ToString());\r\n                    saReader.PreLoad(ChromosomeUtilities.Chr1, new List<int> {100, 101, 106});\r\n                    var annotations = new List<(string refAllele, string altAllele, string annotation)>();\r\n                    saReader.GetAnnotation(100, annotations);\r\n\r\n                    Assert.Equal(\"T\", annotations[0].refAllele);\r\n                    Assert.Equal(\"A\", annotations[0].altAllele);\r\n                    Assert.Equal(\r\n                        \"\\\"id\\\":\\\"RCV0001\\\",\\\"reviewStatus\\\":\\\"no assertion provided\\\",\\\"alleleOrigins\\\":[\\\"origin1\\\"],\\\"refAllele\\\":\\\"T\\\",\\\"altAllele\\\":\\\"A\\\",\\\"phenotypes\\\":[\\\"phenotype1\\\"],\\\"medGenIds\\\":[\\\"medgen1\\\"],\\\"omimIds\\\":[\\\"omim1\\\"],\\\"orphanetIds\\\":[\\\"orpha1\\\"],\\\"significance\\\":[\\\"significance\\\"],\\\"lastUpdatedDate\\\":\\\"0001-01-01\\\",\\\"pubMedIds\\\":[\\\"10024875684920\\\"]\",\r\n                        annotations[0].annotation);\r\n\r\n                    saReader.GetAnnotation(101, annotations);\r\n                    Assert.Equal(\"A\", annotations[0].refAllele);\r\n                    Assert.Equal(\"\", annotations[0].altAllele);\r\n                    Assert.Equal(\r\n                        \"\\\"id\\\":\\\"RCV00011\\\",\\\"variationId\\\":\\\"101\\\",\\\"reviewStatus\\\":\\\"no assertion provided\\\",\\\"alleleOrigins\\\":[\\\"origin1\\\"],\\\"refAllele\\\":\\\"A\\\",\\\"altAllele\\\":\\\"-\\\",\\\"phenotypes\\\":[\\\"phenotype1\\\"],\\\"medGenIds\\\":[\\\"medgen1\\\"],\\\"omimIds\\\":[\\\"omim1\\\"],\\\"orphanetIds\\\":[\\\"orpha1\\\"],\\\"significance\\\":[\\\"significance\\\"],\\\"lastUpdatedDate\\\":\\\"0001-01-01\\\",\\\"pubMedIds\\\":[\\\"10024875684920\\\"]\",\r\n                        annotations[0].annotation);\r\n\r\n                    saReader.PreLoad(ChromosomeUtilities.Chr2, new List<int> {200, 205});\r\n                    saReader.GetAnnotation(200,annotations);\r\n                    var (refAllele, altAllele, annotation) = annotations[0];\r\n                    Assert.Equal(\"G\", refAllele);\r\n                    Assert.Equal(\"A\", altAllele);\r\n                    Assert.NotNull(annotation);\r\n                }\r\n            }\r\n\r\n        }\r\n\r\n\r\n        private static IEnumerable<DbSnpItem> GetDbsnpItems(int count)\r\n        {\r\n            var items = new List<DbSnpItem>();\r\n            var position = 100;\r\n            for (int i = 0; i < count; i++, position += 5)\r\n            {\r\n                items.Add(new DbSnpItem(ChromosomeUtilities.Chr1, position, position, \"A\", \"C\", null));\r\n            }\r\n\r\n            return items;\r\n        }\r\n        \r\n        private static IEnumerable<DbSnpItem> GetParRegionItems(int count)\r\n        {\r\n            var items    = new List<DbSnpItem>();\r\n            var position = 10_010;\r\n            for (int i = 0; i < count; i++, position += 2)\r\n            {\r\n                items.Add(new DbSnpItem(ChromosomeUtilities.ChrY, position, position, \"N\", \"C\", null));\r\n            }\r\n\r\n            return items;\r\n        }\r\n\r\n        private static ISequenceProvider GetAllASequenceProvider()\r\n        {\r\n            var seqProvider = new Mock<ISequenceProvider>();\r\n            seqProvider.SetupGet(x => x.Assembly).Returns(GenomeAssembly.GRCh37);\r\n            seqProvider.Setup(x => x.Sequence.Substring(It.IsAny<int>(), 1)).Returns(\"A\");\r\n\r\n            return seqProvider.Object;\r\n        }\r\n\r\n        [Fact]\r\n        public void Preload()\r\n        {\r\n            var version = new DataSourceVersion(\"source1\", \"v1\", DateTime.Now.Ticks, \"description\");\r\n\r\n            using (var saStream = new MemoryStream())\r\n            using (var indexStream = new MemoryStream())\r\n            {\r\n                using (var saWriter = new NsaWriter(saStream, indexStream, version, GetAllASequenceProvider(), \"dbsnp\",\r\n                    true, true, SaCommon.SchemaVersion, false, true, false, 1024, GenomeAssembly.GRCh37, true)) {\r\n                    saWriter.Write(GetDbsnpItems(1000));\r\n                }\r\n                saStream.Position = 0;\r\n                indexStream.Position = 0;\r\n\r\n                using (var saReader = new NsaReader(saStream, indexStream, 1024))\r\n                {\r\n                    saReader.PreLoad(ChromosomeUtilities.Chr1, GetAlternatePositions(50, 1000));\r\n                    var annotations = new List<(string refAllele, string altAllele, string annotation)>();\r\n                    \r\n                    saReader.GetAnnotation(90, annotations);\r\n                    Assert.True(annotations.Count==0); //before any SA existed\r\n\r\n                    saReader.GetAnnotation(100, annotations);\r\n                    Assert.True(annotations.Count > 0); //first entry of first block\r\n\r\n                    saReader.GetAnnotation(480, annotations);\r\n                    Assert.True(annotations.Count > 0); //last query of first block\r\n\r\n                    saReader.GetAnnotation(488, annotations);\r\n                    Assert.True(annotations.Count ==0);//between first and second block\r\n\r\n                    saReader.GetAnnotation(490, annotations);\r\n                    Assert.True(annotations.Count > 0);//first entry of second block\r\n                }\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void WriteParRegion()\r\n        {\r\n            var version = new DataSourceVersion(\"source1\", \"v1\", DateTime.Now.Ticks, \"description\");\r\n\r\n            var count = 1000;\r\n            using (var saStream = new MemoryStream())\r\n            using (var indexStream = new MemoryStream())\r\n            {\r\n                using (var saWriter = new NsaWriter(saStream, indexStream, version, GetAllASequenceProvider(), \"dbsnp\",\r\n                    true, true, SaCommon.SchemaVersion, false, true, false, 1024, GenomeAssembly.GRCh37, true)) {\r\n                    saWriter.Write(GetParRegionItems(count));\r\n                }\r\n                saStream.Position    = 0;\r\n                indexStream.Position = 0;\r\n\r\n                using (var saReader = new NsaReader(saStream, indexStream, 1024))\r\n                {\r\n                    saReader.PreLoad(ChromosomeUtilities.ChrY, GetAlternatePositions(10_010, 1000));\r\n                    var annotations = new List<(string refAllele, string altAllele, string annotation)>();\r\n                    \r\n                    var position = 10_010;\r\n                    for (int i = 0; i < count; i++, position += 2)\r\n                    {\r\n                        saReader.GetAnnotation(position, annotations);\r\n                        Assert.True(annotations.Count == 1); //before any SA existed\r\n                    }\r\n                }\r\n            }\r\n        }\r\n\r\n        private static List<int> GetAlternatePositions(int start, int count)\r\n        {\r\n            var positions = new List<int>();\r\n            for (var i = 0; i < count; i++, start += 2)\r\n            {\r\n                positions.Add(start);\r\n            }\r\n\r\n            return positions;\r\n        }\r\n\r\n        [Fact]\r\n        public void WrongRefAllele_ThrowUserException()\r\n        {\r\n            var customItem = new CustomItem(ChromosomeUtilities.Chr1, 100, \"A\", \"T\", null, null, null);\r\n\r\n            Assert.Throws<UserErrorException>(() => WriteCustomSaItem(customItem));\r\n        }\r\n\r\n        private static void WriteCustomSaItem(CustomItem customItem)\r\n        {\r\n            using (var saStream = new MemoryStream())\r\n            using (var indexStream = new MemoryStream())\r\n            using (var saWriter = new NsaWriter(\r\n                saStream,\r\n                indexStream,\r\n                new DataSourceVersion(\"customeSa\", \"test\", DateTime.Now.Ticks),\r\n                GetSequenceProvider(),\r\n                \"customeSa\", false, true, SaCommon.SchemaVersion, false, false))\r\n            {\r\n                saWriter.Write(new[] {customItem});\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void RemoveConflictinItems()\r\n        {\r\n            var version = new DataSourceVersion(\"source1\", \"v1\", DateTime.Now.Ticks, \"description\");\r\n\r\n            using (var saStream = new MemoryStream())\r\n            using (var indexStream = new MemoryStream())\r\n            using (var saWriter = new NsaWriter(saStream, indexStream, version, GetAllASequenceProvider(), \"gnomad\",\r\n                        true, true, SaCommon.SchemaVersion, false, true, false, 1024))\r\n            {\r\n                Assert.Equal(0, saWriter.Write(GetConflictingGnomadItems()));\r\n            }\r\n        }\r\n\r\n        private static Stream GetChr22_17467787_17467799_genome()\r\n        {\r\n            var stream = new MemoryStream();\r\n            var writer = new StreamWriter(stream);\r\n\r\n            writer.WriteLine(\"##gnomAD\");\r\n            writer.WriteLine(\"#CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\");\r\n            writer.WriteLine(\"22\\t17467787\\trs1013532764\\tAAAAG\\tA\\t5607.38\\tPASS\\tAC=9;AN=7342;AF=0.00122582;rf_tp_probability=0.526938;FS=1.835;InbreedingCoeff=-0.0586;MQ=60.31;MQRankSum=-0.363;QD=12.01;ReadPosRankSum=0.416;SOR=0.869;BaseQRankSum=0.067;ClippingRankSum=0.263;DP=659925;VQSLOD=-0.9495;VQSR_culprit=FS;variant_type=indel;allele_type=del;n_alt_alleles=1;pab_max=0.864166;gq_hist_alt_bin_freq=0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|17;gq_hist_all_bin_freq=2625|6415|2399|2552|894|245|475|590|299|567|573|228|560|58|171|68|135|8|78|194;dp_hist_alt_bin_freq=0|0|0|2|4|6|2|2|0|1|0|0|0|0|0|0|0|0|0|0;dp_hist_alt_n_larger=0;dp_hist_all_bin_freq=4|18|221|1132|2818|4248|4392|3451|2107|976|414|186|95|56|40|33|32|20|18|17;dp_hist_all_n_larger=32;ab_hist_alt_bin_freq=0|0|0|0|0|0|2|1|4|1|2|5|2|0|0|0|0|0|0|0;AC_nfe_seu=0;AN_nfe_seu=38;AF_nfe_seu=0;nhomalt_nfe_seu=0;controls_AC_afr_male=1;controls_AN_afr_male=132;controls_AF_afr_male=0.00757576;controls_nhomalt_afr_male=0;non_topmed_AC_amr=1;non_topmed_AN_amr=168;non_topmed_AF_amr=0.00595238;non_topmed_nhomalt_amr=0;AC_raw=9;AN_raw=29502;AF_raw=0.000305064;nhomalt_raw=0;AC_fin_female=0;AN_fin_female=598;AF_fin_female=0;nhomalt_fin_female=0;non_neuro_AC_asj_female=0;non_neuro_AN_asj_female=12;non_neuro_AF_asj_female=0;non_neuro_nhomalt_asj_female=0;non_neuro_AC_afr_male=1;non_neuro_AN_afr_male=154;non_neuro_AF_afr_male=0.00649351;non_neuro_nhomalt_afr_male=0;AC_afr_male=1;AN_afr_male=446;AF_afr_male=0.00224215;nhomalt_afr_male=0;AC_afr=2;AN_afr=756;AF_afr=0.0026455;nhomalt_afr=0;non_neuro_AC_afr_female=1;non_neuro_AN_afr_female=164;non_neuro_AF_afr_female=0.00609756;non_neuro_nhomalt_afr_female=0;non_topmed_AC_amr_female=1;non_topmed_AN_amr_female=72;non_topmed_AF_amr_female=0.0138889;non_topmed_nhomalt_amr_female=0;non_topmed_AC_oth_female=2;non_topmed_AN_oth_female=110;non_topmed_AF_oth_female=0.0181818;non_topmed_nhomalt_oth_female=0;AC_eas_female=0;AN_eas_female=12;AF_eas_female=0;nhomalt_eas_female=0;AC_afr_female=1;AN_afr_female=310;AF_afr_female=0.00322581;nhomalt_afr_female=0;non_neuro_AC_female=2;non_neuro_AN_female=2324;non_neuro_AF_female=0.000860585;non_neuro_nhomalt_female=0;controls_AC_afr=1;controls_AN_afr=228;controls_AF_afr=0.00438596;controls_nhomalt_afr=0;AC_nfe_onf=1;AN_nfe_onf=628;AF_nfe_onf=0.00159236;nhomalt_nfe_onf=0;controls_AC_fin_male=0;controls_AN_fin_male=200;controls_AF_fin_male=0;controls_nhomalt_fin_male=0;non_neuro_AC_nfe_nwe=2;non_neuro_AN_nfe_nwe=2582;non_neuro_AF_nfe_nwe=0.000774593;non_neuro_nhomalt_nfe_nwe=0;AC_fin_male=0;AN_fin_male=526;AF_fin_male=0;nhomalt_fin_male=0;AC_nfe_female=0;AN_nfe_female=2104;AF_nfe_female=0;nhomalt_nfe_female=0;AC_amr=1;AN_amr=178;AF_amr=0.00561798;nhomalt_amr=0;non_topmed_AC_nfe_male=3;non_topmed_AN_nfe_male=1778;non_topmed_AF_nfe_male=0.00168729;non_topmed_nhomalt_nfe_male=0;AC_eas=0;AN_eas=48;AF_eas=0;nhomalt_eas=0;nhomalt=0;non_neuro_AC_nfe_female=0;non_neuro_AN_nfe_female=1840;non_neuro_AF_nfe_female=0;non_neuro_nhomalt_nfe_female=0;non_neuro_AC_afr=2;non_neuro_AN_afr=318;non_neuro_AF_afr=0.00628931;non_neuro_nhomalt_afr=0;controls_AC_raw=2;controls_AN_raw=10110;controls_AF_raw=0.000197824;controls_nhomalt_raw=0;controls_AC_male=2;controls_AN_male=1340;controls_AF_male=0.00149254;controls_nhomalt_male=0;non_topmed_AC_male=5;non_topmed_AN_male=3004;non_topmed_AF_male=0.00166445;non_topmed_nhomalt_male=0;controls_AC_nfe_female=0;controls_AN_nfe_female=740;controls_AF_nfe_female=0;controls_nhomalt_nfe_female=0;non_neuro_AC_amr=0;non_neuro_AN_amr=114;non_neuro_AF_amr=0;non_neuro_nhomalt_amr=0;non_neuro_AC_eas_female=0;non_neuro_AN_eas_female=12;non_neuro_AF_eas_female=0;non_neuro_nhomalt_eas_female=0;AC_asj_male=1;AN_asj_male=50;AF_asj_male=0.02;nhomalt_asj_male=0;controls_AC_nfe_male=1;controls_AN_nfe_male=908;controls_AF_nfe_male=0.00110132;controls_nhomalt_nfe_male=0;non_neuro_AC_fin=0;non_neuro_AN_fin=378;non_neuro_AF_fin=0;non_neuro_nhomalt_fin=0;AC_oth_female=2;AN_oth_female=112;AF_oth_female=0.0178571;nhomalt_oth_female=0;controls_AC_nfe=1;controls_AN_nfe=1648;controls_AF_nfe=0.000606796;controls_nhomalt_nfe=0;controls_AC_oth_female=0;controls_AN_oth_female=48;controls_AF_oth_female=0;controls_nhomalt_oth_female=0;controls_AC_asj=0;controls_AN_asj=8;controls_AF_asj=0;controls_nhomalt_asj=0;non_neuro_AC_amr_male=0;non_neuro_AN_amr_male=58;non_neuro_AF_amr_male=0;non_neuro_nhomalt_amr_male=0;controls_AC_nfe_nwe=1;controls_AN_nfe_nwe=308;controls_AF_nfe_nwe=0.00324675;controls_nhomalt_nfe_nwe=0;AC_nfe_nwe=2;AN_nfe_nwe=2906;AF_nfe_nwe=0.000688231;nhomalt_nfe_nwe=0;controls_AC_nfe_seu=0;controls_AN_nfe_seu=16;controls_AF_nfe_seu=0;controls_nhomalt_nfe_seu=0;non_neuro_AC_amr_female=0;non_neuro_AN_amr_female=56;non_neuro_AF_amr_female=0;non_neuro_nhomalt_amr_female=0;non_neuro_AC_nfe_onf=1;non_neuro_AN_nfe_onf=464;non_neuro_AF_nfe_onf=0.00215517;non_neuro_nhomalt_nfe_onf=0;non_topmed_AC_eas_male=0;non_topmed_AN_eas_male=34;non_topmed_AF_eas_male=0;non_topmed_nhomalt_eas_male=0;controls_AC_amr_female=0;controls_AN_amr_female=16;controls_AF_amr_female=0;controls_nhomalt_amr_female=0;non_neuro_AC_fin_male=0;non_neuro_AN_fin_male=200;non_neuro_AF_fin_male=0;non_neuro_nhomalt_fin_male=0;AC_female=4;AN_female=3236;AF_female=0.00123609;nhomalt_female=0;non_neuro_AC_oth_male=0;non_neuro_AN_oth_male=84;non_neuro_AF_oth_male=0;non_neuro_nhomalt_oth_male=0;non_topmed_AC_nfe_est=0;non_topmed_AN_nfe_est=1352;non_topmed_AF_nfe_est=0;non_topmed_nhomalt_nfe_est=0;non_topmed_AC_nfe_nwe=2;non_topmed_AN_nfe_nwe=1632;non_topmed_AF_nfe_nwe=0.00122549;non_topmed_nhomalt_nfe_nwe=0;non_topmed_AC_amr_male=0;non_topmed_AN_amr_male=96;non_topmed_AF_amr_male=0;non_topmed_nhomalt_amr_male=0;non_topmed_AC_nfe_onf=1;non_topmed_AN_nfe_onf=448;non_topmed_AF_nfe_onf=0.00223214;non_topmed_nhomalt_nfe_onf=0;controls_AC_eas_male=0;controls_AN_eas_male=16;controls_AF_eas_male=0;controls_nhomalt_eas_male=0;controls_AC_oth_male=0;controls_AN_oth_male=52;controls_AF_oth_male=0;controls_nhomalt_oth_male=0;non_topmed_AC=9;non_topmed_AN=5806;non_topmed_AF=0.00155012;non_topmed_nhomalt=0;controls_AC_fin=0;controls_AN_fin=378;controls_AF_fin=0;controls_nhomalt_fin=0;non_neuro_AC_nfe=3;non_neuro_AN_nfe=4272;non_neuro_AF_nfe=0.000702247;non_neuro_nhomalt_nfe=0;non_neuro_AC_fin_female=0;non_neuro_AN_fin_female=178;non_neuro_AF_fin_female=0;non_neuro_nhomalt_fin_female=0;non_topmed_AC_nfe_seu=0;non_topmed_AN_nfe_seu=38;non_topmed_AF_nfe_seu=0;non_topmed_nhomalt_nfe_seu=0;controls_AC_eas_female=0;controls_AN_eas_female=12;controls_AF_eas_female=0;controls_nhomalt_eas_female=0;non_topmed_AC_asj=1;non_topmed_AN_asj=38;non_topmed_AF_asj=0.0263158;non_topmed_nhomalt_asj=0;controls_AC_nfe_onf=0;controls_AN_nfe_onf=124;controls_AF_nfe_onf=0;controls_nhomalt_nfe_onf=0;non_neuro_AC=7;non_neuro_AN=5332;non_neuro_AF=0.00131283;non_neuro_nhomalt=0;non_topmed_AC_nfe=3;non_topmed_AN_nfe=3470;non_topmed_AF_nfe=0.000864553;non_topmed_nhomalt_nfe=0;non_topmed_AC_raw=9;non_topmed_AN_raw=24832;non_topmed_AF_raw=0.000362436;non_topmed_nhomalt_raw=0;non_neuro_AC_nfe_est=0;non_neuro_AN_nfe_est=1212;non_neuro_AF_nfe_est=0;non_neuro_nhomalt_nfe_est=0;non_topmed_AC_oth_male=0;non_topmed_AN_oth_male=114;non_topmed_AF_oth_male=0;non_topmed_nhomalt_oth_male=0;AC_nfe_est=0;AN_nfe_est=1356;AF_nfe_est=0;nhomalt_nfe_est=0;non_topmed_AC_afr_male=1;non_topmed_AN_afr_male=434;non_topmed_AF_afr_male=0.00230415;non_topmed_nhomalt_afr_male=0;AC_eas_male=0;AN_eas_male=36;AF_eas_male=0;nhomalt_eas_male=0;controls_AC_eas=0;controls_AN_eas=28;controls_AF_eas=0;controls_nhomalt_eas=0;non_neuro_AC_eas_male=0;non_neuro_AN_eas_male=36;non_neuro_AF_eas_male=0;non_neuro_nhomalt_eas_male=0;non_neuro_AC_asj_male=1;non_neuro_AN_asj_male=44;non_neuro_AF_asj_male=0.0227273;non_neuro_nhomalt_asj_male=0;controls_AC_oth=0;controls_AN_oth=100;controls_AF_oth=0;controls_nhomalt_oth=0;AC_nfe=3;AN_nfe=4928;AF_nfe=0.000608766;nhomalt_nfe=0;non_topmed_AC_female=4;non_topmed_AN_female=2802;non_topmed_AF_female=0.00142755;non_topmed_nhomalt_female=0;non_neuro_AC_asj=1;non_neuro_AN_asj=56;non_neuro_AF_asj=0.0178571;non_neuro_nhomalt_asj=0;non_topmed_AC_eas_female=0;non_topmed_AN_eas_female=10;non_topmed_AF_eas_female=0;non_topmed_nhomalt_eas_female=0;non_neuro_AC_raw=7;non_neuro_AN_raw=20066;non_neuro_AF_raw=0.000348849;non_neuro_nhomalt_raw=0;non_topmed_AC_eas=0;non_topmed_AN_eas=44;non_topmed_AF_eas=0;non_topmed_nhomalt_eas=0;non_topmed_AC_fin_male=0;non_topmed_AN_fin_male=526;non_topmed_AF_fin_male=0;non_topmed_nhomalt_fin_male=0;AC_fin=0;AN_fin=1124;AF_fin=0;nhomalt_fin=0;AC_nfe_male=3;AN_nfe_male=2824;AF_nfe_male=0.00106232;nhomalt_nfe_male=0;controls_AC_amr_male=0;controls_AN_amr_male=30;controls_AF_amr_male=0;controls_nhomalt_amr_male=0;controls_AC_afr_female=0;controls_AN_afr_female=96;controls_AF_afr_female=0;controls_nhomalt_afr_female=0;controls_AC_amr=0;controls_AN_amr=46;controls_AF_amr=0;controls_nhomalt_amr=0;AC_asj_female=0;AN_asj_female=22;AF_asj_female=0;nhomalt_asj_female=0;non_neuro_AC_eas=0;non_neuro_AN_eas=48;non_neuro_AF_eas=0;non_neuro_nhomalt_eas=0;non_neuro_AC_male=5;non_neuro_AN_male=3008;non_neuro_AF_male=0.00166223;non_neuro_nhomalt_male=0;AC_asj=1;AN_asj=72;AF_asj=0.0138889;nhomalt_asj=0;controls_AC_nfe_est=0;controls_AN_nfe_est=1200;controls_AF_nfe_est=0;controls_nhomalt_nfe_est=0;non_topmed_AC_asj_female=0;non_topmed_AN_asj_female=16;non_topmed_AF_asj_female=0;non_topmed_nhomalt_asj_female=0;non_topmed_AC_oth=2;non_topmed_AN_oth=224;non_topmed_AF_oth=0.00892857;non_topmed_nhomalt_oth=0;non_topmed_AC_fin_female=0;non_topmed_AN_fin_female=598;non_topmed_AF_fin_female=0;non_topmed_nhomalt_fin_female=0;AC_oth=2;AN_oth=236;AF_oth=0.00847458;nhomalt_oth=0;non_neuro_AC_nfe_male=3;non_neuro_AN_nfe_male=2432;non_neuro_AF_nfe_male=0.00123355;non_neuro_nhomalt_nfe_male=0;controls_AC_female=0;controls_AN_female=1096;controls_AF_female=0;controls_nhomalt_female=0;non_topmed_AC_fin=0;non_topmed_AN_fin=1124;non_topmed_AF_fin=0;non_topmed_nhomalt_fin=0;non_topmed_AC_nfe_female=0;non_topmed_AN_nfe_female=1692;non_topmed_AF_nfe_female=0;non_topmed_nhomalt_nfe_female=0;controls_AC_asj_male=0;controls_AN_asj_male=2;controls_AF_asj_male=0;controls_nhomalt_asj_male=0;non_topmed_AC_asj_male=1;non_topmed_AN_asj_male=22;non_topmed_AF_asj_male=0.0454545;non_topmed_nhomalt_asj_male=0;non_neuro_AC_oth=1;non_neuro_AN_oth=146;non_neuro_AF_oth=0.00684932;non_neuro_nhomalt_oth=0;AC_male=5;AN_male=4106;AF_male=0.00121773;nhomalt_male=0;controls_AC_fin_female=0;controls_AN_fin_female=178;controls_AF_fin_female=0;controls_nhomalt_fin_female=0;controls_AC_asj_female=0;controls_AN_asj_female=6;controls_AF_asj_female=0;controls_nhomalt_asj_female=0;AC_amr_male=0;AN_amr_male=100;AF_amr_male=0;nhomalt_amr_male=0;AC_amr_female=1;AN_amr_female=78;AF_amr_female=0.0128205;nhomalt_amr_female=0;AC_oth_male=0;AN_oth_male=124;AF_oth_male=0;nhomalt_oth_male=0;non_neuro_AC_nfe_seu=0;non_neuro_AN_nfe_seu=14;non_neuro_AF_nfe_seu=0;non_neuro_nhomalt_nfe_seu=0;non_topmed_AC_afr_female=1;non_topmed_AN_afr_female=304;non_topmed_AF_afr_female=0.00328947;non_topmed_nhomalt_afr_female=0;non_topmed_AC_afr=2;non_topmed_AN_afr=738;non_topmed_AF_afr=0.00271003;non_topmed_nhomalt_afr=0;controls_AC=2;controls_AN=2436;controls_AF=0.000821018;controls_nhomalt=0;non_neuro_AC_oth_female=1;non_neuro_AN_oth_female=62;non_neuro_AF_oth_female=0.016129;non_neuro_nhomalt_oth_female=0;non_topmed_faf95_amr=0.000305;non_topmed_faf99_amr=0.000305;faf95_afr=0.00047001;faf99_afr=0.00046996;controls_faf95_afr=0.000224;controls_faf99_afr=0.000224;faf95_amr=0.000288;faf99_amr=0.000288;faf95_eas=0;faf99_eas=0;faf95=0.00063865;faf99=0.0006395;non_neuro_faf95_afr=0.00111728;non_neuro_faf99_afr=0.00111671;non_neuro_faf95_amr=0;non_neuro_faf99_amr=0;controls_faf95_nfe=3.1e-05;controls_faf99_nfe=3.1e-05;non_topmed_faf95=0.00080814;non_topmed_faf99=0.00080791;non_neuro_faf95_nfe=0.000191;non_neuro_faf99_nfe=0.00019047;non_neuro_faf95=0.00061599;non_neuro_faf99=0.00061588;non_topmed_faf95_nfe=0.0002353;non_topmed_faf99_nfe=0.00023558;controls_faf95_eas=0;controls_faf99_eas=0;faf95_nfe=0.0001658;faf99_nfe=0.00016511;non_topmed_faf95_eas=0;non_topmed_faf99_eas=0;controls_faf95_amr=0;controls_faf99_amr=0;non_neuro_faf95_eas=0;non_neuro_faf99_eas=0;non_topmed_faf95_afr=0.00048118;non_topmed_faf99_afr=0.00048064;controls_faf95=0.00014568;controls_faf99=0.00014565;controls_popmax=afr;controls_AC_popmax=1;controls_AN_popmax=228;controls_AF_popmax=0.00438596;controls_nhomalt_popmax=0;popmax=amr;AC_popmax=1;AN_popmax=178;AF_popmax=0.00561798;nhomalt_popmax=0;age_hist_het_bin_freq=1|0|1|1|0|2|0|0|0|0;age_hist_het_n_smaller=1;age_hist_het_n_larger=0;age_hist_hom_bin_freq=0|0|0|0|0|0|0|0|0|0;age_hist_hom_n_smaller=0;age_hist_hom_n_larger=0;non_neuro_popmax=afr;non_neuro_AC_popmax=2;non_neuro_AN_popmax=318;non_neuro_AF_popmax=0.00628931;non_neuro_nhomalt_popmax=0;non_topmed_popmax=amr;non_topmed_AC_popmax=1;non_topmed_AN_popmax=168;non_topmed_AF_popmax=0.00595238;non_topmed_nhomalt_popmax=0\");\r\n            writer.WriteLine(\"22\\t17467793\\trs200526150\\tAAGAA\\tA\\t2.96178e+06\\tPASS\\tAC=25;AN=13820;AF=0.00180897;rf_tp_probability=0.6944;FS=0;InbreedingCoeff=-0.0226;MQ=61.07;MQRankSum=0.061;QD=19.6;ReadPosRankSum=0.177;SOR=0.694;BaseQRankSum=-0.031;ClippingRankSum=-0.053;DP=657153;VQSLOD=5.11;VQSR_culprit=FS;variant_type=multi-indel;allele_type=del;n_alt_alleles=2;pab_max=1;gq_hist_alt_bin_freq=0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|1|0|36;gq_hist_all_bin_freq=2892|4902|1140|827|277|141|343|478|268|556|481|207|525|87|178|89|169|40|119|5100;dp_hist_alt_bin_freq=0|0|0|1|5|8|10|5|4|1|0|0|1|1|0|0|0|0|1|0;dp_hist_alt_n_larger=0;dp_hist_all_bin_freq=3|25|286|1366|3137|4439|4355|3211|1821|851|331|175|79|53|32|42|22|27|18|12;dp_hist_all_n_larger=25;ab_hist_alt_bin_freq=0|0|0|0|0|0|2|2|6|8|3|6|7|2|0|0|0|0|0|0;AC_nfe_seu=0;AN_nfe_seu=60;AF_nfe_seu=0;nhomalt_nfe_seu=0;controls_AC_afr_male=0;controls_AN_afr_male=654;controls_AF_afr_male=0;controls_nhomalt_afr_male=0;non_topmed_AC_amr=17;non_topmed_AN_amr=272;non_topmed_AF_amr=0.0625;non_topmed_nhomalt_amr=1;AC_raw=25;AN_raw=28996;AF_raw=0.000862188;nhomalt_raw=1;AC_fin_female=0;AN_fin_female=834;AF_fin_female=0;nhomalt_fin_female=0;non_neuro_AC_asj_female=0;non_neuro_AN_asj_female=38;non_neuro_AF_asj_female=0;non_neuro_nhomalt_asj_female=0;non_neuro_AC_afr_male=0;non_neuro_AN_afr_male=730;non_neuro_AF_afr_male=0;non_neuro_nhomalt_afr_male=0;AC_afr_male=2;AN_afr_male=2172;AF_afr_male=0.00092081;nhomalt_afr_male=0;AC_afr=2;AN_afr=3678;AF_afr=0.000543774;nhomalt_afr=0;non_neuro_AC_afr_female=0;non_neuro_AN_afr_female=754;non_neuro_AF_afr_female=0;non_neuro_nhomalt_afr_female=0;non_topmed_AC_amr_female=9;non_topmed_AN_amr_female=132;non_topmed_AF_amr_female=0.0681818;non_topmed_nhomalt_amr_female=1;non_topmed_AC_oth_female=2;non_topmed_AN_oth_female=190;non_topmed_AF_oth_female=0.0105263;non_topmed_nhomalt_oth_female=0;AC_eas_female=0;AN_eas_female=248;AF_eas_female=0;nhomalt_eas_female=0;AC_afr_female=0;AN_afr_female=1506;AF_afr_female=0;nhomalt_afr_female=0;non_neuro_AC_female=7;non_neuro_AN_female=4262;non_neuro_AF_female=0.00164242;non_neuro_nhomalt_female=0;controls_AC_afr=0;controls_AN_afr=1120;controls_AF_afr=0;controls_nhomalt_afr=0;AC_nfe_onf=0;AN_nfe_onf=904;AF_nfe_onf=0;nhomalt_nfe_onf=0;controls_AC_fin_male=0;controls_AN_fin_male=276;controls_AF_fin_male=0;controls_nhomalt_fin_male=0;non_neuro_AC_nfe_nwe=1;non_neuro_AN_nfe_nwe=3534;non_neuro_AF_nfe_nwe=0.000282965;non_neuro_nhomalt_nfe_nwe=0;AC_fin_male=0;AN_fin_male=708;AF_fin_male=0;nhomalt_fin_male=0;AC_nfe_female=1;AN_nfe_female=3128;AF_nfe_female=0.000319693;nhomalt_nfe_female=0;AC_amr=18;AN_amr=286;AF_amr=0.0629371;nhomalt_amr=1;non_topmed_AC_nfe_male=1;non_topmed_AN_nfe_male=2566;non_topmed_AF_nfe_male=0.000389712;non_topmed_nhomalt_nfe_male=0;AC_eas=0;AN_eas=656;AF_eas=0;nhomalt_eas=0;nhomalt=1;non_neuro_AC_nfe_female=1;non_neuro_AN_nfe_female=2732;non_neuro_AF_nfe_female=0.000366032;non_neuro_nhomalt_nfe_female=0;non_neuro_AC_afr=0;non_neuro_AN_afr=1484;non_neuro_AF_afr=0;non_neuro_nhomalt_afr=0;controls_AC_raw=4;controls_AN_raw=9932;controls_AF_raw=0.000402739;controls_nhomalt_raw=0;controls_AC_male=3;controls_AN_male=2680;controls_AF_male=0.0011194;controls_nhomalt_male=0;non_topmed_AC_male=11;non_topmed_AN_male=6164;non_topmed_AF_male=0.00178456;non_topmed_nhomalt_male=0;controls_AC_nfe_female=0;controls_AN_nfe_female=1186;controls_AF_nfe_female=0;controls_nhomalt_nfe_female=0;non_neuro_AC_amr=9;non_neuro_AN_amr=184;non_neuro_AF_amr=0.048913;non_neuro_nhomalt_amr=0;non_neuro_AC_eas_female=0;non_neuro_AN_eas_female=248;non_neuro_AF_eas_female=0;non_neuro_nhomalt_eas_female=0;AC_asj_male=0;AN_asj_male=92;AF_asj_male=0;nhomalt_asj_male=0;controls_AC_nfe_male=0;controls_AN_nfe_male=1378;controls_AF_nfe_male=0;controls_nhomalt_nfe_male=0;non_neuro_AC_fin=0;non_neuro_AN_fin=532;non_neuro_AF_fin=0;non_neuro_nhomalt_fin=0;AC_oth_female=2;AN_oth_female=194;AF_oth_female=0.0103093;nhomalt_oth_female=0;controls_AC_nfe=0;controls_AN_nfe=2564;controls_AF_nfe=0;controls_nhomalt_nfe=0;controls_AC_oth_female=0;controls_AN_oth_female=76;controls_AF_oth_female=0;controls_nhomalt_oth_female=0;controls_AC_asj=0;controls_AN_asj=20;controls_AF_asj=0;controls_nhomalt_asj=0;non_neuro_AC_amr_male=4;non_neuro_AN_amr_male=74;non_neuro_AF_amr_male=0.0540541;non_neuro_nhomalt_amr_male=0;controls_AC_nfe_nwe=0;controls_AN_nfe_nwe=426;controls_AF_nfe_nwe=0;controls_nhomalt_nfe_nwe=0;AC_nfe_nwe=2;AN_nfe_nwe=3958;AF_nfe_nwe=0.000505306;nhomalt_nfe_nwe=0;controls_AC_nfe_seu=0;controls_AN_nfe_seu=26;controls_AF_nfe_seu=0;controls_nhomalt_nfe_seu=0;non_neuro_AC_amr_female=5;non_neuro_AN_amr_female=110;non_neuro_AF_amr_female=0.0454545;non_neuro_nhomalt_amr_female=0;non_neuro_AC_nfe_onf=0;non_neuro_AN_nfe_onf=704;non_neuro_AF_nfe_onf=0;non_neuro_nhomalt_nfe_onf=0;non_topmed_AC_eas_male=0;non_topmed_AN_eas_male=400;non_topmed_AF_eas_male=0;non_topmed_nhomalt_eas_male=0;controls_AC_amr_female=1;controls_AN_amr_female=46;controls_AF_amr_female=0.0217391;controls_nhomalt_amr_female=0;non_neuro_AC_fin_male=0;non_neuro_AN_fin_male=276;non_neuro_AF_fin_male=0;non_neuro_nhomalt_fin_male=0;AC_female=13;AN_female=6098;AF_female=0.00213185;nhomalt_female=1;non_neuro_AC_oth_male=1;non_neuro_AN_oth_male=156;non_neuro_AF_oth_male=0.00641026;non_neuro_nhomalt_oth_male=0;non_topmed_AC_nfe_est=0;non_topmed_AN_nfe_est=2184;non_topmed_AF_nfe_est=0;non_topmed_nhomalt_nfe_est=0;non_topmed_AC_nfe_nwe=2;non_topmed_AN_nfe_nwe=2250;non_topmed_AF_nfe_nwe=0.000888889;non_topmed_nhomalt_nfe_nwe=0;non_topmed_AC_amr_male=8;non_topmed_AN_amr_male=140;non_topmed_AF_amr_male=0.0571429;non_topmed_nhomalt_amr_male=0;non_topmed_AC_nfe_onf=0;non_topmed_AN_nfe_onf=646;non_topmed_AF_nfe_onf=0;non_topmed_nhomalt_nfe_onf=0;controls_AC_eas_male=0;controls_AN_eas_male=244;controls_AF_eas_male=0;controls_nhomalt_eas_male=0;controls_AC_oth_male=0;controls_AN_oth_male=84;controls_AF_oth_male=0;controls_nhomalt_oth_male=0;non_topmed_AC=23;non_topmed_AN=11642;non_topmed_AF=0.00197561;non_topmed_nhomalt=1;controls_AC_fin=0;controls_AN_fin=532;controls_AF_fin=0;controls_nhomalt_fin=0;non_neuro_AC_nfe=1;non_neuro_AN_nfe=6226;non_neuro_AF_nfe=0.000160617;non_neuro_nhomalt_nfe=0;non_neuro_AC_fin_female=0;non_neuro_AN_fin_female=256;non_neuro_AF_fin_female=0;non_neuro_nhomalt_fin_female=0;non_topmed_AC_nfe_seu=0;non_topmed_AN_nfe_seu=60;non_topmed_AF_nfe_seu=0;non_topmed_nhomalt_nfe_seu=0;controls_AC_eas_female=0;controls_AN_eas_female=172;controls_AF_eas_female=0;controls_nhomalt_eas_female=0;non_topmed_AC_asj=0;non_topmed_AN_asj=68;non_topmed_AF_asj=0;non_topmed_nhomalt_asj=0;controls_AC_nfe_onf=0;controls_AN_nfe_onf=168;controls_AF_nfe_onf=0;controls_nhomalt_nfe_onf=0;non_neuro_AC=12;non_neuro_AN=9480;non_neuro_AF=0.00126582;non_neuro_nhomalt=0;non_topmed_AC_nfe=2;non_topmed_AN_nfe=5140;non_topmed_AF_nfe=0.000389105;non_topmed_nhomalt_nfe=0;non_topmed_AC_raw=23;non_topmed_AN_raw=24482;non_topmed_AF_raw=0.000939466;non_topmed_nhomalt_raw=1;non_neuro_AC_nfe_est=0;non_neuro_AN_nfe_est=1962;non_neuro_AF_nfe_est=0;non_neuro_nhomalt_nfe_est=0;non_topmed_AC_oth_male=1;non_topmed_AN_oth_male=184;non_topmed_AF_oth_male=0.00543478;non_topmed_nhomalt_oth_male=0;AC_nfe_est=0;AN_nfe_est=2192;AF_nfe_est=0;nhomalt_nfe_est=0;non_topmed_AC_afr_male=1;non_topmed_AN_afr_male=2132;non_topmed_AF_afr_male=0.000469043;non_topmed_nhomalt_afr_male=0;AC_eas_male=0;AN_eas_male=408;AF_eas_male=0;nhomalt_eas_male=0;controls_AC_eas=0;controls_AN_eas=416;controls_AF_eas=0;controls_nhomalt_eas=0;non_neuro_AC_eas_male=0;non_neuro_AN_eas_male=408;non_neuro_AF_eas_male=0;non_neuro_nhomalt_eas_male=0;non_neuro_AC_asj_male=0;non_neuro_AN_asj_male=80;non_neuro_AF_asj_male=0;non_neuro_nhomalt_asj_male=0;controls_AC_oth=0;controls_AN_oth=160;controls_AF_oth=0;controls_nhomalt_oth=0;AC_nfe=2;AN_nfe=7114;AF_nfe=0.000281136;nhomalt_nfe=0;non_topmed_AC_female=12;non_topmed_AN_female=5478;non_topmed_AF_female=0.00219058;non_topmed_nhomalt_female=1;non_neuro_AC_asj=0;non_neuro_AN_asj=118;non_neuro_AF_asj=0;non_neuro_nhomalt_asj=0;non_topmed_AC_eas_female=0;non_topmed_AN_eas_female=240;non_topmed_AF_eas_female=0;non_topmed_nhomalt_eas_female=0;non_neuro_AC_raw=12;non_neuro_AN_raw=19660;non_neuro_AF_raw=0.000610376;non_neuro_nhomalt_raw=0;non_topmed_AC_eas=0;non_topmed_AN_eas=640;non_topmed_AF_eas=0;non_topmed_nhomalt_eas=0;non_topmed_AC_fin_male=0;non_topmed_AN_fin_male=708;non_topmed_AF_fin_male=0;non_topmed_nhomalt_fin_male=0;AC_fin=0;AN_fin=1542;AF_fin=0;nhomalt_fin=0;AC_nfe_male=1;AN_nfe_male=3986;AF_nfe_male=0.000250878;nhomalt_nfe_male=0;controls_AC_amr_male=3;controls_AN_amr_male=38;controls_AF_amr_male=0.0789474;controls_nhomalt_amr_male=0;controls_AC_afr_female=0;controls_AN_afr_female=466;controls_AF_afr_female=0;controls_nhomalt_afr_female=0;controls_AC_amr=4;controls_AN_amr=84;controls_AF_amr=0.047619;controls_nhomalt_amr=0;AC_asj_female=0;AN_asj_female=46;AF_asj_female=0;nhomalt_asj_female=0;non_neuro_AC_eas=0;non_neuro_AN_eas=656;non_neuro_AF_eas=0;non_neuro_nhomalt_eas=0;non_neuro_AC_male=5;non_neuro_AN_male=5218;non_neuro_AF_male=0.000958222;non_neuro_nhomalt_male=0;AC_asj=0;AN_asj=138;AF_asj=0;nhomalt_asj=0;controls_AC_nfe_est=0;controls_AN_nfe_est=1944;controls_AF_nfe_est=0;controls_nhomalt_nfe_est=0;non_topmed_AC_asj_female=0;non_topmed_AN_asj_female=34;non_topmed_AF_asj_female=0;non_topmed_nhomalt_asj_female=0;non_topmed_AC_oth=3;non_topmed_AN_oth=374;non_topmed_AF_oth=0.00802139;non_topmed_nhomalt_oth=0;non_topmed_AC_fin_female=0;non_topmed_AN_fin_female=834;non_topmed_AF_fin_female=0;non_topmed_nhomalt_fin_female=0;AC_oth=3;AN_oth=406;AF_oth=0.00738916;nhomalt_oth=0;non_neuro_AC_nfe_male=0;non_neuro_AN_nfe_male=3494;non_neuro_AF_nfe_male=0;non_neuro_nhomalt_nfe_male=0;controls_AC_female=1;controls_AN_female=2216;controls_AF_female=0.000451264;controls_nhomalt_female=0;non_topmed_AC_fin=0;non_topmed_AN_fin=1542;non_topmed_AF_fin=0;non_topmed_nhomalt_fin=0;non_topmed_AC_nfe_female=1;non_topmed_AN_nfe_female=2574;non_topmed_AF_nfe_female=0.0003885;non_topmed_nhomalt_nfe_female=0;controls_AC_asj_male=0;controls_AN_asj_male=6;controls_AF_asj_male=0;controls_nhomalt_asj_male=0;non_topmed_AC_asj_male=0;non_topmed_AN_asj_male=34;non_topmed_AF_asj_male=0;non_topmed_nhomalt_asj_male=0;non_neuro_AC_oth=2;non_neuro_AN_oth=280;non_neuro_AF_oth=0.00714286;non_neuro_nhomalt_oth=0;AC_male=12;AN_male=7722;AF_male=0.001554;nhomalt_male=0;controls_AC_fin_female=0;controls_AN_fin_female=256;controls_AF_fin_female=0;controls_nhomalt_fin_female=0;controls_AC_asj_female=0;controls_AN_asj_female=14;controls_AF_asj_female=0;controls_nhomalt_asj_female=0;AC_amr_male=8;AN_amr_male=144;AF_amr_male=0.0555556;nhomalt_amr_male=0;AC_amr_female=10;AN_amr_female=142;AF_amr_female=0.0704225;nhomalt_amr_female=1;AC_oth_male=1;AN_oth_male=212;AF_oth_male=0.00471698;nhomalt_oth_male=0;non_neuro_AC_nfe_seu=0;non_neuro_AN_nfe_seu=26;non_neuro_AF_nfe_seu=0;non_neuro_nhomalt_nfe_seu=0;non_topmed_AC_afr_female=0;non_topmed_AN_afr_female=1474;non_topmed_AF_afr_female=0;non_topmed_nhomalt_afr_female=0;non_topmed_AC_afr=1;non_topmed_AN_afr=3606;non_topmed_AF_afr=0.000277316;non_topmed_nhomalt_afr=0;controls_AC=4;controls_AN=4896;controls_AF=0.000816993;controls_nhomalt=0;non_neuro_AC_oth_female=1;non_neuro_AN_oth_female=124;non_neuro_AF_oth_female=0.00806452;non_neuro_nhomalt_oth_female=0;non_topmed_faf95_amr=0.0398231;non_topmed_faf99_amr=0.0398236;faf95_afr=9.592e-05;faf99_afr=9.609e-05;controls_faf95_afr=0;controls_faf99_afr=0;faf95_amr=0.0406793;faf99_amr=0.0406792;faf95_eas=0;faf99_eas=0;faf95=0.00125772;faf99=0.00125736;non_neuro_faf95_afr=0;non_neuro_faf99_afr=0;non_neuro_faf95_amr=0.0255171;non_neuro_faf99_amr=0.0255167;controls_faf95_nfe=0;controls_faf99_nfe=0;non_topmed_faf95=0.00134988;non_topmed_faf99=0.00134945;non_neuro_faf95_nfe=8e-06;non_neuro_faf99_nfe=8e-06;non_neuro_faf95=0.00072973;non_neuro_faf99=0.00073008;non_topmed_faf95_nfe=6.881e-05;non_topmed_faf99_nfe=6.877e-05;controls_faf95_eas=0;controls_faf99_eas=0;faf95_nfe=4.922e-05;faf99_nfe=4.923e-05;non_topmed_faf95_eas=0;non_topmed_faf99_eas=0;controls_faf95_amr=0.0162655;controls_faf99_amr=0.0162653;non_neuro_faf95_eas=0;non_neuro_faf99_eas=0;non_topmed_faf95_afr=1.4e-05;non_topmed_faf99_afr=1.4e-05;controls_faf95=0.00027835;controls_faf99=0.00027827;controls_popmax=amr;controls_AC_popmax=4;controls_AN_popmax=84;controls_AF_popmax=0.047619;controls_nhomalt_popmax=0;popmax=amr;AC_popmax=18;AN_popmax=286;AF_popmax=0.0629371;nhomalt_popmax=1;age_hist_het_bin_freq=0|0|2|1|1|1|0|0|0|0;age_hist_het_n_smaller=4;age_hist_het_n_larger=0;age_hist_hom_bin_freq=0|0|0|0|0|0|0|0|0|0;age_hist_hom_n_smaller=0;age_hist_hom_n_larger=0;non_neuro_popmax=amr;non_neuro_AC_popmax=9;non_neuro_AN_popmax=184;non_neuro_AF_popmax=0.048913;non_neuro_nhomalt_popmax=0;non_topmed_popmax=amr;non_topmed_AC_popmax=17;non_topmed_AN_popmax=272;non_topmed_AF_popmax=0.0625;non_topmed_nhomalt_popmax=1\");\r\n            writer.WriteLine(\"22\\t17467793\\trs200526150\\tAAGAA\\tA\\t2.96178e+06\\tPASS\\tAC=4501;AN=13820;AF=0.325687;rf_tp_probability=0.6944;FS=0;InbreedingCoeff=-0.0226;MQ=61.07;MQRankSum=0.061;QD=19.6;ReadPosRankSum=0.177;SOR=0.694;BaseQRankSum=-0.031;ClippingRankSum=-0.053;DP=657153;VQSLOD=5.11;VQSR_culprit=FS;variant_type=multi-indel;allele_type=del;n_alt_alleles=2;pab_max=1;gq_hist_alt_bin_freq=3|3|4|4|5|3|4|6|8|10|21|14|36|33|27|47|34|35|43|4884;gq_hist_all_bin_freq=2897|4907|1144|830|282|143|344|482|273|559|484|208|528|92|176|87|149|45|119|5070;dp_hist_alt_bin_freq=0|6|126|551|1133|1285|1033|600|260|102|40|27|13|13|3|11|1|6|7|2;dp_hist_alt_n_larger=5;dp_hist_all_bin_freq=3|25|286|1366|3137|4439|4355|3211|1821|851|331|175|79|53|32|42|22|27|18|12;dp_hist_all_n_larger=25;ab_hist_alt_bin_freq=0|7|1|7|36|124|277|456|835|741|1055|616|404|155|42|25|5|6|5|0;AC_nfe_seu=19;AN_nfe_seu=60;AF_nfe_seu=0.316667;nhomalt_nfe_seu=1;controls_AC_afr_male=325;controls_AN_afr_male=654;controls_AF_afr_male=0.496942;controls_nhomalt_afr_male=35;non_topmed_AC_amr=77;non_topmed_AN_amr=272;non_topmed_AF_amr=0.283088;non_topmed_nhomalt_amr=2;AC_raw=4527;AN_raw=28996;AF_raw=0.156125;nhomalt_raw=356;AC_fin_female=187;AN_fin_female=834;AF_fin_female=0.224221;nhomalt_fin_female=6;non_neuro_AC_asj_female=15;non_neuro_AN_asj_female=38;non_neuro_AF_asj_female=0.394737;non_neuro_nhomalt_asj_female=0;non_neuro_AC_afr_male=358;non_neuro_AN_afr_male=730;non_neuro_AF_afr_male=0.490411;non_neuro_nhomalt_afr_male=37;AC_afr_male=1071;AN_afr_male=2172;AF_afr_male=0.493094;nhomalt_afr_male=113;AC_afr=1825;AN_afr=3678;AF_afr=0.496194;nhomalt_afr=196;non_neuro_AC_afr_female=376;non_neuro_AN_afr_female=754;non_neuro_AF_afr_female=0.498674;non_neuro_nhomalt_afr_female=42;non_topmed_AC_amr_female=35;non_topmed_AN_amr_female=132;non_topmed_AF_amr_female=0.265152;non_topmed_nhomalt_amr_female=0;non_topmed_AC_oth_female=58;non_topmed_AN_oth_female=190;non_topmed_AF_oth_female=0.305263;non_topmed_nhomalt_oth_female=6;AC_eas_female=135;AN_eas_female=248;AF_eas_female=0.544355;nhomalt_eas_female=14;AC_afr_female=754;AN_afr_female=1506;AF_afr_female=0.500664;nhomalt_afr_female=83;non_neuro_AC_female=1325;non_neuro_AN_female=4262;non_neuro_AF_female=0.310887;non_neuro_nhomalt_female=93;controls_AC_afr=566;controls_AN_afr=1120;controls_AF_afr=0.505357;controls_nhomalt_afr=67;AC_nfe_onf=233;AN_nfe_onf=904;AF_nfe_onf=0.257743;nhomalt_nfe_onf=13;controls_AC_fin_male=58;controls_AN_fin_male=276;controls_AF_fin_male=0.210145;controls_nhomalt_fin_male=2;non_neuro_AC_nfe_nwe=797;non_neuro_AN_nfe_nwe=3534;non_neuro_AF_nfe_nwe=0.225523;non_neuro_nhomalt_nfe_nwe=38;AC_fin_male=146;AN_fin_male=708;AF_fin_male=0.206215;nhomalt_fin_male=4;AC_nfe_female=774;AN_nfe_female=3128;AF_nfe_female=0.247442;nhomalt_nfe_female=42;AC_amr=79;AN_amr=286;AF_amr=0.276224;nhomalt_amr=2;non_topmed_AC_nfe_male=636;non_topmed_AN_nfe_male=2566;non_topmed_AF_nfe_male=0.247857;non_topmed_nhomalt_nfe_male=33;AC_eas=359;AN_eas=656;AF_eas=0.547256;nhomalt_eas=35;nhomalt=352;non_neuro_AC_nfe_female=666;non_neuro_AN_nfe_female=2732;non_neuro_AF_nfe_female=0.243777;non_neuro_nhomalt_nfe_female=30;non_neuro_AC_afr=734;non_neuro_AN_afr=1484;non_neuro_AF_afr=0.494609;non_neuro_nhomalt_afr=79;controls_AC_raw=1673;controls_AN_raw=9932;controls_AF_raw=0.168445;controls_nhomalt_raw=138;controls_AC_male=920;controls_AN_male=2680;controls_AF_male=0.343284;controls_nhomalt_male=78;non_topmed_AC_male=2163;non_topmed_AN_male=6164;non_topmed_AF_male=0.350909;non_topmed_nhomalt_male=179;controls_AC_nfe_female=300;controls_AN_nfe_female=1186;controls_AF_nfe_female=0.252951;controls_nhomalt_nfe_female=11;non_neuro_AC_amr=55;non_neuro_AN_amr=184;non_neuro_AF_amr=0.298913;non_neuro_nhomalt_amr=1;non_neuro_AC_eas_female=135;non_neuro_AN_eas_female=248;non_neuro_AF_eas_female=0.544355;non_neuro_nhomalt_eas_female=14;AC_asj_male=34;AN_asj_male=92;AF_asj_male=0.369565;nhomalt_asj_male=5;controls_AC_nfe_male=360;controls_AN_nfe_male=1378;controls_AF_nfe_male=0.261248;controls_nhomalt_nfe_male=21;non_neuro_AC_fin=118;non_neuro_AN_fin=532;non_neuro_AF_fin=0.221805;non_neuro_nhomalt_fin=3;AC_oth_female=60;AN_oth_female=194;AF_oth_female=0.309278;nhomalt_oth_female=7;controls_AC_nfe=660;controls_AN_nfe=2564;controls_AF_nfe=0.25741;controls_nhomalt_nfe=32;controls_AC_oth_female=19;controls_AN_oth_female=76;controls_AF_oth_female=0.25;controls_nhomalt_oth_female=1;controls_AC_asj=9;controls_AN_asj=20;controls_AF_asj=0.45;controls_nhomalt_asj=1;non_neuro_AC_amr_male=24;non_neuro_AN_amr_male=74;non_neuro_AF_amr_male=0.324324;non_neuro_nhomalt_amr_male=1;controls_AC_nfe_nwe=99;controls_AN_nfe_nwe=426;controls_AF_nfe_nwe=0.232394;controls_nhomalt_nfe_nwe=5;AC_nfe_nwe=894;AN_nfe_nwe=3958;AF_nfe_nwe=0.225872;nhomalt_nfe_nwe=44;controls_AC_nfe_seu=10;controls_AN_nfe_seu=26;controls_AF_nfe_seu=0.384615;controls_nhomalt_nfe_seu=0;non_neuro_AC_amr_female=31;non_neuro_AN_amr_female=110;non_neuro_AF_amr_female=0.281818;non_neuro_nhomalt_amr_female=0;non_neuro_AC_nfe_onf=190;non_neuro_AN_nfe_onf=704;non_neuro_AF_nfe_onf=0.269886;non_neuro_nhomalt_nfe_onf=12;non_topmed_AC_eas_male=219;non_topmed_AN_eas_male=400;non_topmed_AF_eas_male=0.5475;non_topmed_nhomalt_eas_male=20;controls_AC_amr_female=18;controls_AN_amr_female=46;controls_AF_amr_female=0.391304;controls_nhomalt_amr_female=0;non_neuro_AC_fin_male=58;non_neuro_AN_fin_male=276;non_neuro_AF_fin_male=0.210145;non_neuro_nhomalt_fin_male=2;AC_female=1965;AN_female=6098;AF_female=0.322237;nhomalt_female=152;non_neuro_AC_oth_male=49;non_neuro_AN_oth_male=156;non_neuro_AF_oth_male=0.314103;non_neuro_nhomalt_oth_male=5;non_topmed_AC_nfe_est=577;non_topmed_AN_nfe_est=2184;non_topmed_AF_nfe_est=0.264194;non_topmed_nhomalt_nfe_est=32;non_topmed_AC_nfe_nwe=515;non_topmed_AN_nfe_nwe=2250;non_topmed_AF_nfe_nwe=0.228889;non_topmed_nhomalt_nfe_nwe=28;non_topmed_AC_amr_male=42;non_topmed_AN_amr_male=140;non_topmed_AF_amr_male=0.3;non_topmed_nhomalt_amr_male=2;non_topmed_AC_nfe_onf=169;non_topmed_AN_nfe_onf=646;non_topmed_AF_nfe_onf=0.26161;non_topmed_nhomalt_nfe_onf=8;controls_AC_eas_male=136;controls_AN_eas_male=244;controls_AF_eas_male=0.557377;controls_nhomalt_eas_male=15;controls_AC_oth_male=25;controls_AN_oth_male=84;controls_AF_oth_male=0.297619;controls_nhomalt_oth_male=4;non_topmed_AC=3972;non_topmed_AN=11642;non_topmed_AF=0.341178;non_topmed_nhomalt=324;controls_AC_fin=118;controls_AN_fin=532;controls_AF_fin=0.221805;controls_nhomalt_fin=3;non_neuro_AC_nfe=1506;non_neuro_AN_nfe=6226;non_neuro_AF_nfe=0.241889;non_neuro_nhomalt_nfe=73;non_neuro_AC_fin_female=60;non_neuro_AN_fin_female=256;non_neuro_AF_fin_female=0.234375;non_neuro_nhomalt_fin_female=1;non_topmed_AC_nfe_seu=19;non_topmed_AN_nfe_seu=60;non_topmed_AF_nfe_seu=0.316667;non_topmed_nhomalt_nfe_seu=1;controls_AC_eas_female=95;controls_AN_eas_female=172;controls_AF_eas_female=0.552326;controls_nhomalt_eas_female=12;non_topmed_AC_asj=24;non_topmed_AN_asj=68;non_topmed_AF_asj=0.352941;non_topmed_nhomalt_asj=1;controls_AC_nfe_onf=46;controls_AN_nfe_onf=168;controls_AF_nfe_onf=0.27381;controls_nhomalt_nfe_onf=4;non_neuro_AC=2909;non_neuro_AN=9480;non_neuro_AF=0.306857;non_neuro_nhomalt=207;non_topmed_AC_nfe=1280;non_topmed_AN_nfe=5140;non_topmed_AF_nfe=0.249027;non_topmed_nhomalt_nfe=69;non_topmed_AC_raw=3996;non_topmed_AN_raw=24482;non_topmed_AF_raw=0.163222;non_topmed_nhomalt_raw=327;non_neuro_AC_nfe_est=509;non_neuro_AN_nfe_est=1962;non_neuro_AF_nfe_est=0.259429;non_neuro_nhomalt_nfe_est=23;non_topmed_AC_oth_male=56;non_topmed_AN_oth_male=184;non_topmed_AF_oth_male=0.304348;non_topmed_nhomalt_oth_male=6;AC_nfe_est=579;AN_nfe_est=2192;AF_nfe_est=0.264142;nhomalt_nfe_est=32;non_topmed_AC_afr_male=1054;non_topmed_AN_afr_male=2132;non_topmed_AF_afr_male=0.494371;non_topmed_nhomalt_afr_male=113;AC_eas_male=224;AN_eas_male=408;AF_eas_male=0.54902;nhomalt_eas_male=21;controls_AC_eas=231;controls_AN_eas=416;controls_AF_eas=0.555288;controls_nhomalt_eas=27;non_neuro_AC_eas_male=224;non_neuro_AN_eas_male=408;non_neuro_AF_eas_male=0.54902;non_neuro_nhomalt_eas_male=21;non_neuro_AC_asj_male=31;non_neuro_AN_asj_male=80;non_neuro_AF_asj_male=0.3875;non_neuro_nhomalt_asj_male=5;controls_AC_oth=44;controls_AN_oth=160;controls_AF_oth=0.275;controls_nhomalt_oth=5;AC_nfe=1725;AN_nfe=7114;AF_nfe=0.24248;nhomalt_nfe=90;non_topmed_AC_female=1809;non_topmed_AN_female=5478;non_topmed_AF_female=0.33023;non_topmed_nhomalt_female=145;non_neuro_AC_asj=46;non_neuro_AN_asj=118;non_neuro_AF_asj=0.389831;non_neuro_nhomalt_asj=5;non_topmed_AC_eas_female=132;non_topmed_AN_eas_female=240;non_topmed_AF_eas_female=0.55;non_topmed_nhomalt_eas_female=14;non_neuro_AC_raw=2928;non_neuro_AN_raw=19660;non_neuro_AF_raw=0.148932;non_neuro_nhomalt_raw=211;non_topmed_AC_eas=351;non_topmed_AN_eas=640;non_topmed_AF_eas=0.548438;non_topmed_nhomalt_eas=34;non_topmed_AC_fin_male=146;non_topmed_AN_fin_male=708;non_topmed_AF_fin_male=0.206215;non_topmed_nhomalt_fin_male=4;AC_fin=333;AN_fin=1542;AF_fin=0.215953;nhomalt_fin=10;AC_nfe_male=951;AN_nfe_male=3986;AF_nfe_male=0.238585;nhomalt_nfe_male=48;controls_AC_amr_male=12;controls_AN_amr_male=38;controls_AF_amr_male=0.315789;controls_nhomalt_amr_male=0;controls_AC_afr_female=241;controls_AN_afr_female=466;controls_AF_afr_female=0.517167;controls_nhomalt_afr_female=32;controls_AC_amr=30;controls_AN_amr=84;controls_AF_amr=0.357143;controls_nhomalt_amr=0;AC_asj_female=18;AN_asj_female=46;AF_asj_female=0.391304;nhomalt_asj_female=0;non_neuro_AC_eas=359;non_neuro_AN_eas=656;non_neuro_AF_eas=0.547256;non_neuro_nhomalt_eas=35;non_neuro_AC_male=1584;non_neuro_AN_male=5218;non_neuro_AF_male=0.303565;non_neuro_nhomalt_male=114;AC_asj=52;AN_asj=138;AF_asj=0.376812;nhomalt_asj=5;controls_AC_nfe_est=505;controls_AN_nfe_est=1944;controls_AF_nfe_est=0.259774;controls_nhomalt_nfe_est=23;non_topmed_AC_asj_female=14;non_topmed_AN_asj_female=34;non_topmed_AF_asj_female=0.411765;non_topmed_nhomalt_asj_female=0;non_topmed_AC_oth=114;non_topmed_AN_oth=374;non_topmed_AF_oth=0.304813;non_topmed_nhomalt_oth=12;non_topmed_AC_fin_female=187;non_topmed_AN_fin_female=834;non_topmed_AF_fin_female=0.224221;non_topmed_nhomalt_fin_female=6;AC_oth=128;AN_oth=406;AF_oth=0.315271;nhomalt_oth=14;non_neuro_AC_nfe_male=840;non_neuro_AN_nfe_male=3494;non_neuro_AF_nfe_male=0.240412;non_neuro_nhomalt_nfe_male=43;controls_AC_female=738;controls_AN_female=2216;controls_AF_female=0.333032;controls_nhomalt_female=57;non_topmed_AC_fin=333;non_topmed_AN_fin=1542;non_topmed_AF_fin=0.215953;non_topmed_nhomalt_fin=10;non_topmed_AC_nfe_female=644;non_topmed_AN_nfe_female=2574;non_topmed_AF_nfe_female=0.250194;non_topmed_nhomalt_nfe_female=36;controls_AC_asj_male=4;controls_AN_asj_male=6;controls_AF_asj_male=0.666667;controls_nhomalt_asj_male=1;non_topmed_AC_asj_male=10;non_topmed_AN_asj_male=34;non_topmed_AF_asj_male=0.294118;non_topmed_nhomalt_asj_male=1;non_neuro_AC_oth=91;non_neuro_AN_oth=280;non_neuro_AF_oth=0.325;non_neuro_nhomalt_oth=11;AC_male=2536;AN_male=7722;AF_male=0.328412;nhomalt_male=200;controls_AC_fin_female=60;controls_AN_fin_female=256;controls_AF_fin_female=0.234375;controls_nhomalt_fin_female=1;controls_AC_asj_female=5;controls_AN_asj_female=14;controls_AF_asj_female=0.357143;controls_nhomalt_asj_female=0;AC_amr_male=42;AN_amr_male=144;AF_amr_male=0.291667;nhomalt_amr_male=2;AC_amr_female=37;AN_amr_female=142;AF_amr_female=0.260563;nhomalt_amr_female=0;AC_oth_male=68;AN_oth_male=212;AF_oth_male=0.320755;nhomalt_oth_male=7;non_neuro_AC_nfe_seu=10;non_neuro_AN_nfe_seu=26;non_neuro_AF_nfe_seu=0.384615;non_neuro_nhomalt_nfe_seu=0;non_topmed_AC_afr_female=739;non_topmed_AN_afr_female=1474;non_topmed_AF_afr_female=0.501357;non_topmed_nhomalt_afr_female=83;non_topmed_AC_afr=1793;non_topmed_AN_afr=3606;non_topmed_AF_afr=0.497227;non_topmed_nhomalt_afr=196;controls_AC=1658;controls_AN=4896;controls_AF=0.338644;controls_nhomalt=135;non_neuro_AC_oth_female=42;non_neuro_AN_oth_female=124;non_neuro_AF_oth_female=0.33871;non_neuro_nhomalt_oth_female=6;non_topmed_faf95_amr=0.232194;non_topmed_faf99_amr=0.232194;faf95_afr=0.477244;faf99_afr=0.477244;controls_faf95_afr=0.470932;controls_faf99_afr=0.470932;faf95_amr=0.227168;faf99_amr=0.227169;faf95_eas=0.500629;faf99_eas=0.500629;faf95=0.317744;faf99=0.317744;non_neuro_faf95_afr=0.464967;non_neuro_faf99_afr=0.464967;non_neuro_faf95_amr=0.235846;non_neuro_faf99_amr=0.235846;controls_faf95_nfe=0.241154;controls_faf99_nfe=0.241154;non_topmed_faf95=0.332322;non_topmed_faf99=0.332323;non_neuro_faf95_nfe=0.231727;non_neuro_faf99_nfe=0.231728;non_neuro_faf95=0.297558;non_neuro_faf99=0.297559;non_topmed_faf95_nfe=0.237689;non_topmed_faf99_nfe=0.23769;controls_faf95_eas=0.49659;controls_faf99_eas=0.49659;faf95_nfe=0.232957;faf99_nfe=0.232956;non_topmed_faf95_eas=0.501191;non_topmed_faf99_eas=0.501191;controls_faf95_amr=0.257071;controls_faf99_amr=0.257071;non_neuro_faf95_eas=0.500629;non_neuro_faf99_eas=0.500629;non_topmed_faf95_afr=0.47807;non_topmed_faf99_afr=0.47807;controls_faf95=0.32508;controls_faf99=0.325081;controls_popmax=eas;controls_AC_popmax=231;controls_AN_popmax=416;controls_AF_popmax=0.555288;controls_nhomalt_popmax=27;popmax=eas;AC_popmax=359;AN_popmax=656;AF_popmax=0.547256;nhomalt_popmax=35;age_hist_het_bin_freq=128|162|214|283|349|260|234|152|93|46;age_hist_het_n_smaller=717;age_hist_het_n_larger=23;age_hist_hom_bin_freq=9|11|18|24|26|15|20|8|12|6;age_hist_hom_n_smaller=82;age_hist_hom_n_larger=4;non_neuro_popmax=eas;non_neuro_AC_popmax=359;non_neuro_AN_popmax=656;non_neuro_AF_popmax=0.547256;non_neuro_nhomalt_popmax=35;non_topmed_popmax=eas;non_topmed_AC_popmax=351;non_topmed_AN_popmax=640;non_topmed_AF_popmax=0.548438;non_topmed_nhomalt_popmax=34\");\r\n\r\n            writer.Flush();\r\n\r\n            stream.Position = 0;\r\n            return stream;\r\n        }\r\n\r\n        private static IEnumerable<ISupplementaryDataItem> GetConflictingGnomadItems()\r\n        {\r\n            var sequence = new SimpleSequence(new string('T', VariantUtils.MaxUpstreamLength) + \"AAAGAAAGAAAG\", 17467787 - 1 - VariantUtils.MaxUpstreamLength);\r\n            var sequenceProvider = new SimpleSequenceProvider(GenomeAssembly.GRCh38, sequence, ChromosomeUtilities.RefNameToChromosome);\r\n\r\n            var gnomadReader = new GnomadSnvReader(new StreamReader(GetChr22_17467787_17467799_genome()), null, sequenceProvider);\r\n\r\n            return gnomadReader.GetCombinedItems();\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/SAUtils/Omim/OmimUtilitiesTests.cs",
    "content": "﻿using System.Linq;\r\nusing Newtonsoft.Json;\r\nusing SAUtils.DataStructures;\r\nusing SAUtils.Omim;\r\nusing SAUtils.Omim.EntryApiResponse;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.SAUtils.Omim\r\n{\r\n    public sealed class OmimUtilitiesTests\r\n    {\r\n        [Theory]\r\n        [InlineData(\"In unstressed cells, p53 (not removed) is {kept} inactive essentially through the actions of the ubiquitin ligase MDM2 ({164785}) and a 28-kD beta subunits (ETFB; {130410}), which inhibits p53 transcriptional activity and ubiquitinates p53 to promote its degradation. Activity of p53 is ubiquitously lost in human cancer either by mutation of the p53 gene itself or by loss of cell signaling upstream or downstream of p53 ({305:Toledo and Wahl, 2006}; {30:Bourdon, 2007}; {324:Vousden and Lane, 2007}).\", \"In unstressed cells, p53 (not removed) is kept inactive essentially through the actions of the ubiquitin ligase MDM2 and a 28-kD beta subunits (ETFB), which inhibits p53 transcriptional activity and ubiquitinates p53 to promote its degradation. Activity of p53 is ubiquitously lost in human cancer either by mutation of the p53 gene itself or by loss of cell signaling upstream or downstream of p53 (Toledo and Wahl, 2006; Bourdon, 2007; Vousden and Lane, 2007).\")]\r\n        [InlineData(\"macules (summary by {2:Baas et al., 2013}).\", \"macules (summary by Baas et al., 2013).\")]\r\n        [InlineData(\"(MMR) ({18,17:Fishel et al., 1993, 1994}).\", \"(MMR) (Fishel et al., 1993, 1994).\")]\r\n        [InlineData(\"({516030}, {516040}, and {516050})\", \"\")]\r\n        [InlineData(\"(e.g., D1, {168461}; D2, {123833}; D3, {123834})\", \"(e.g., D1; D2; D3)\")]\r\n        [InlineData(\"(desmocollins; see DSC2, {125645})\", \"(desmocollins; see DSC2)\")]\r\n        [InlineData(\"(e.g., see {102700}, {300755})\", \"\")]\r\n        [InlineData(\"(ADH, see {103700}). See also liver mitochondrial ALDH2 ({100650})\", \"(ADH). See also liver mitochondrial ALDH2\")]\r\n        [InlineData(\"(see, e.g., CACNA1A; {601011})\", \"(see, e.g., CACNA1A)\")]\r\n        [InlineData(\"(e.g., GSTA1; {138359}), mu (e.g., {138350})\", \"(e.g., GSTA1), mu\")]\r\n        [InlineData(\"(NFKB; see {164011})\", \"(NFKB)\")]\r\n        [InlineData(\"(see ISGF3G, {147574})\", \"(see ISGF3G)\")]\r\n        [InlineData(\"(DCK; {EC 2.7.1.74}; {125450})\", \"(DCK; EC 2.7.1.74)\")]\r\n        [InlineData(\"chromosome 13q21 (see {603680.0001} and {613289.0001}).\", \"chromosome 13q21.\")]\r\n        [InlineData(\"common genetic haptoglobin types, Hp1 ({140100.0001}), Hp2 ({140100.0002}), and the heterozygous phenotype Hp2-1.\", \"common genetic haptoglobin types, Hp1, Hp2, and the heterozygous phenotype Hp2-1.\")]\r\n        [InlineData(\"and RBBP7/4 ({300825}/{602923}).\", \"and RBBP7/4.\")]\r\n        [InlineData(\"ultimately to formation of fibrin ({134570}/{134580}).\", \"ultimately to formation of fibrin.\")]\r\n        public void RemoveLinks_AsExpected(string input, string output)\r\n        {\r\n            Assert.Equal(output, input.RemoveLinks());\r\n        }\r\n        \r\n        [Theory]\r\n        [InlineData(\"<Subhead> UGT1A Gene Complex\", \" UGT1A Gene Complex\")]\r\n        public void RemoveFormatControl_AsExpected(string input, string output)\r\n        {\r\n            Assert.Equal(output, input.RemoveFormatControl());\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(\"[Beta-glycopyranoside tasting], (3) {Alcohol dependence, susceptibility to}\", \"[Beta-glycopyranoside tasting], {Alcohol dependence, susceptibility to}\", \"2,3\")]\r\n        [InlineData(\"?Proteasome-associated autoinflammatory syndrome 3, digenic\", \"?Proteasome-associated autoinflammatory syndrome 3, digenic\", \"1\")]\r\n        [InlineData(\"{?Thyroid cancer, nonmedullary, 5}\", \"{?Thyroid cancer, nonmedullary, 5}\", \"3,1\")]\r\n        [InlineData(\"Methylmalonic aciduria, mut(0) type\", \"Methylmalonic aciduria, mut(0) type\", \"0\")]\r\n        [InlineData(\"?{Diabetes, susceptibility to},\", \"?{Diabetes, susceptibility to}\", \"1,3\")]\r\n        public void ExtractPhenotypeAndComments_AsExpected(string input, string expectedPhenotype, string commentsEnumString)\r\n        {\r\n            (string phenotype, var comments) = OmimUtilities.ExtractPhenotypeAndComments(input);\r\n\r\n            var expectedComments = commentsEnumString.Split(',').Select(x => (OmimItem.Comment) byte.Parse(x)).Where(x => x != OmimItem.Comment.unknown).ToArray();\r\n            \r\n            Assert.Equal(expectedPhenotype, phenotype);\r\n            Assert.Equal(expectedComments, comments);\r\n        }\r\n\r\n\r\n        [Fact]\r\n        public void ExtractAndProcessItemDescription_AsExpected()\r\n        {\r\n            const string textSectionJson = \"{\\\"textSection\\\":{\\\"textSectionName\\\": \\\"description\\\",\\\"textSectionTitle\\\": \\\"Description\\\",\\\"textSectionContent\\\": \\\"Constitutional mismatch repair deficiency is a rare childhood cancer predisposition syndrome with 4 main tumor types: hematologic malignancies, brain/central nervous system tumors, colorectal tumors and multiple intestinal polyps, and other malignancies including embryonic tumors and rhabdomyosarcoma. Many patients show signs reminiscent of neurofibromatosis type I (NF1; {162200}), particularly multiple cafe-au-lait macules (summary by {2:Baas et al., 2013}).\\n\\n'Turcot syndrome' classically refers to the combination of colorectal polyposis and primary tumors of the central nervous system ({13:Hamilton et al., 1995}).\\\"}}\";\r\n            var textSection = JsonConvert.DeserializeObject<TextSection>(textSectionJson);\r\n            var entryItem = new EntryItem{textSectionList = new []{textSection}};\r\n            var description = OmimUtilities.ExtractAndProcessItemDescription(entryItem);\r\n\r\n            const string expected = \"Constitutional mismatch repair deficiency is a rare childhood cancer predisposition syndrome with 4 main tumor types: hematologic malignancies, brain/central nervous system tumors, colorectal tumors and multiple intestinal polyps, and other malignancies including embryonic tumors and rhabdomyosarcoma. Many patients show signs reminiscent of neurofibromatosis type I (NF1), particularly multiple cafe-au-lait macules (summary by Baas et al., 2013).\\n\\n'Turcot syndrome' classically refers to the combination of colorectal polyposis and primary tumors of the central nervous system (Hamilton et al., 1995).\";\r\n\r\n            Assert.Equal(expected, description);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/SAUtils/ParseUtils/SplitLineTests.cs",
    "content": "using SAUtils.ParseUtils;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.ParseUtils;\n\npublic sealed class SplitLineTests\n{\n    [Theory]\n    [InlineData(\"SomeString\\tAnotherString\", 0, \"SomeString\")]\n    [InlineData(\"SomeString\\tAnotherString\", 1, \"AnotherString\")]\n    [InlineData(\"\\tAnotherString\",           0, \"\")]\n    [InlineData(\"\\tAnotherString\",           1, \"AnotherString\")]\n    [InlineData(\"SomeString\\t\",              1, \"\")]\n    [InlineData(\"SomeString\\t\",              0, \"SomeString\")]\n    [InlineData(\"\\t\",                        0, \"\")]\n    [InlineData(\"\",                          0, \"\")]\n    public void TestGetString(string inputLine, int index, string expectedString)\n    {\n        var splitLine = new SplitLine(inputLine, '\\t');\n        Assert.Equal(expectedString, splitLine.GetString(index));\n    }\n\n    [Theory]\n    [InlineData(\"SomeString\\t1\",   0, null)]\n    [InlineData(\"SomeString\\t1\",   1, 1)]\n    [InlineData(\"SomeString\\t2.0\", 1, 2)]\n    [InlineData(\"\\t1\",             0, null)]\n    [InlineData(\"\\t1\",             1, 1)]\n    [InlineData(\"SomeString\\t\",    1, null)]\n    [InlineData(\"SomeString\\t\",    0, null)]\n    [InlineData(\"\\t\",              0, null)]\n    [InlineData(\"\",                0, null)]\n    [InlineData(\"A1\",              0, null)]\n    [InlineData(\"-1\",              0, -1)]\n    public void TestParseInteger(string inputLine, int index, int? expectedInt)\n    {\n        var splitLine = new SplitLine(inputLine, '\\t');\n        Assert.Equal(expectedInt, splitLine.ParseInteger(index));\n    }\n    \n    [Theory]\n    [InlineData(\"SomeString\\t1\",   0, null)]\n    [InlineData(\"SomeString\\t1\",   1, 1.0)]\n    [InlineData(\"SomeString\\t2.0\", 1, 2.0)]\n    [InlineData(\"\\t1\",             0, null)]\n    [InlineData(\"\\t1\",             1, 1.0)]\n    [InlineData(\"SomeString\\t\",    1, null)]\n    [InlineData(\"SomeString\\t\",    0, null)]\n    [InlineData(\"\\t\",              0, null)]\n    [InlineData(\"\",                0, null)]\n    [InlineData(\"A1\",              0, null)]\n    [InlineData(\"-1\",              0, -1.0)]\n    public void TestParseDouble(string inputLine, int index, double? expectedDouble)\n    {\n        var splitLine = new SplitLine(inputLine, '\\t');\n        Assert.Equal(expectedDouble, splitLine.ParseDouble(index));\n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/ParseUtils/TsvIndicesTests.cs",
    "content": "using SAUtils.ParseUtils;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.ParseUtils;\n\npublic class TsvIndicesTests\n{\n    [Theory]\n    [InlineData(0, 1)]\n    public void TestTsvIndices(ushort chromosomeIndex, ushort startIndex)\n    {\n        var tsvIndices = new TsvIndices()\n        {\n            Chromosome = chromosomeIndex,\n            Start = startIndex\n        };\n        \n        Assert.Equal(tsvIndices.Chromosome, chromosomeIndex);\n        Assert.Equal(tsvIndices.Start, startIndex);\n        Assert.Equal(tsvIndices.SvType, ushort.MaxValue);\n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/PhylopTests.cs",
    "content": "﻿using System;\nusing System.IO;\nusing Genome;\nusing IO;\nusing SAUtils.InputFileParsers;\nusing SAUtils.PhyloP;\nusing UnitTests.TestUtilities;\nusing VariantAnnotation.PhyloP;\nusing VariantAnnotation.Providers;\nusing VariantAnnotation.SA;\nusing Xunit;\n\nnamespace UnitTests.SAUtils\n{\n    public sealed class PhylopTests\n    {\n        [Fact]\n        public void LoopbackTest()\n        {\n            var wigFixFile = Resources.TopPath(\"mini.WigFix\");\n            var version = new DataSourceVersion(\"phylop\", \"0\", DateTime.Now.Ticks, \"unit test\");\n            \n            using(var reader      = new PhylopParser(FileUtilities.GetReadStream(wigFixFile),GenomeAssembly.GRCh37, ChromosomeUtilities.RefNameToChromosome))\n            using (var npdStream  = new MemoryStream())\n            using(var indexStream = new MemoryStream())\n            using (var npdWriter  = new NpdWriter(npdStream, indexStream, version, GenomeAssembly.GRCh37, SaCommon.PhylopTag, SaCommon.SchemaVersion))\n            {\n                npdWriter.Write(reader.GetItems());\n\n                npdStream.Position = 0;\n                indexStream.Position = 0;\n\n                using (var phylopReader = new NpdReader(npdStream, indexStream))\n                {\n                    Assert.Equal(0.1, phylopReader.GetAnnotation(ChromosomeUtilities.Chr1, 100));//first position of first block\n                    Assert.Equal(0.1, phylopReader.GetAnnotation(ChromosomeUtilities.Chr1, 101));// second position\n                    Assert.Equal(0.1, phylopReader.GetAnnotation(ChromosomeUtilities.Chr1, 120));// some internal position\n                    Assert.Equal(0.1, phylopReader.GetAnnotation(ChromosomeUtilities.Chr1, 130));//last position of first block\n\n                    //moving on to the next block: should cause reloading from file\n                    Assert.Equal(0.1, phylopReader.GetAnnotation(ChromosomeUtilities.Chr1, 175));//first position of second block\n                    Assert.Equal(-2.1, phylopReader.GetAnnotation(ChromosomeUtilities.Chr1, 182));// some negative value\n\n                    //chrom 2\n                    Assert.Null(phylopReader.GetAnnotation(ChromosomeUtilities.Chr2, 400));//values past the last phylop positions should return null\n                }\n            }\n            \n        }\n\n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/PrimateAi/PrimateAiTests.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.IO;\nusing System.Linq;\nusing System.Text;\nusing Moq;\nusing SAUtils.DataStructures;\nusing SAUtils.PrimateAi;\nusing UnitTests.TestUtilities;\nusing VariantAnnotation.Interface.Providers;\nusing VariantAnnotation.Interface.SA;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.PrimateAi\n{\n    public sealed class PrimateAiTests\n    {\n        private static ISequenceProvider GetSequenceProvider()\n        {\n            var mockProvider = new Mock<ISequenceProvider>();\n            mockProvider.SetupGet(x => x.RefNameToChromosome).Returns(ChromosomeUtilities.RefNameToChromosome);\n            mockProvider.SetupGet(x => x.RefIndexToChromosome).Returns(ChromosomeUtilities.RefIndexToChromosome);\n            return mockProvider.Object;\n        }\n        private static Stream GetStream()\n        {\n            var stream = new MemoryStream();\n            var writer = new StreamWriter(stream);\n\n            writer.WriteLine(\"#CHROM\\tPOS\\tREF\\tALT\\tGeneId\\tScorePercentile\");\n            writer.WriteLine(\"1\\t69094\\tG\\tA\\t79501\\t0.79\");\n            writer.WriteLine(\"1\\t69094\\tG\\tC\\t79501\\t0.75\");\n            writer.WriteLine(\"1\\t69094\\tG\\tT\\t79501\\t0.75\");\n\n            writer.WriteLine(\"1\\t69097\\tA\\tG\\t79501\\t0.56\");\n            writer.WriteLine(\"1\\t69097\\tA\\tC\\t79501\\t0.57\");\n            writer.WriteLine(\"1\\t69097\\tA\\tT\\t79501\\t0.54\");\n\n            writer.WriteLine(\"1\\t56197104\\tA\\tG\\tENSG00000234810\\t0.80\");\n            writer.WriteLine(\"1\\t56197443\\tC\\tT\\tENSG00000234810\\t0.20\");\n            writer.WriteLine(\"1\\t56197476\\tC\\tT\\tENSG00000234810\\t0.40\");\n\n            writer.Flush();\n\n            stream.Position = 0;\n            return stream;\n        }\n\n        [Fact]\n        public void ExtractEntries()\n        {\n            var entrezToHgnc = new Dictionary<string, string>\n            {\n                { \"79501\", \"Gene1\" }\n            };\n\n            var ensemblToHgnc = new Dictionary<string, string>\n            {\n                {\"ENSG00000234810\", \"Gene2\" }\n            };\n            var primateParser = new PrimateAiParser(GetStream(), GetSequenceProvider(), entrezToHgnc, ensemblToHgnc);\n\n            var items = primateParser.GetItems().ToList();\n\n            Assert.Equal(9, items.Count);\n            Assert.Equal(\"\\\"hgnc\\\":\\\"Gene1\\\",\\\"scorePercentile\\\":0.79\", items[0].GetJsonString());\n            Assert.Equal(\"\\\"hgnc\\\":\\\"Gene2\\\",\\\"scorePercentile\\\":0.2\", items[7].GetJsonString());\n\n        }\n\n        private static Stream GetDuplicateItemStream()\n        {\n            var stream = new MemoryStream();\n            using (var writer = new StreamWriter(stream, Encoding.Default, 1024, true))\n            {\n                writer.WriteLine(\"#CHROM\\tPOS\\tREF\\tALT\\tGeneId\\tScorePercentile\");\n                writer.WriteLine(\"4\\t155713\\tA\\tG\\t255403\\t0.03\");\n                writer.WriteLine(\"4\\t155713\\tA\\tG\\t255403\\t0.93\");\n            }\n            stream.Position = 0;\n            return stream;\n        }\n\n        [Fact]\n        public void ResolveDuplicates()\n        {\n            var entrezToHgnc = new Dictionary<string, string>\n            {\n                { \"255403\", \"Gene1\"}\n            };\n\n            var ensemblToHgnc = new Dictionary<string, string>\n            {\n                {\"ENSG00000234810\", \"Gene2\" }\n            };\n            var primateParser = new PrimateAiParser(GetDuplicateItemStream(), GetSequenceProvider(), entrezToHgnc, ensemblToHgnc);\n\n            var items = primateParser.GetItems().Cast<ISupplementaryDataItem>().ToList();\n\n            var deDupItems = SuppDataUtilities.DeDuplicatePrimateAiItems(items);\n\n            Assert.Single(deDupItems);\n            Assert.Equal(\"\\\"hgnc\\\":\\\"Gene1\\\",\\\"scorePercentile\\\":0.93\", deDupItems[0].GetJsonString());\n\n        }\n\n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/ProteinConservation/ParserTests.cs",
    "content": "﻿using System.IO;\nusing System.Linq;\nusing SAUtils.AAConservation;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.ProteinConservation\n{\n    public sealed class ParserTests\n    {\n        private static Stream GetStream()\n        {\n            var stream = new MemoryStream();\n            var writer = new StreamWriter(stream);\n\n            writer.WriteLine(\"#Ensembl\\tChromosome\\tProteinSequence\\tPercent Conservation at each AA residue\");\n            writer.WriteLine(\"ENST00000641515\\tchr1\\tMKKVTAEAISWNESTSETNNSMVTEFIFLGLSDSQELQTFLFMLFFVFYGGIVFGNLLIVITVVSDSHLHSPMYFLLANLSLIDLSLSSVTAPKMITDFFSQRKVISFKGCLVQIFLLHFFGGSEMVILIAMGFDRYIAICKPLHYTTIMCGNACVGIMAVTWGIGFLHSVSQLAFAVHLLFCGPNEVDSFYCDLPRVIKLACTDTYRLDIMVIANSGVLTVCSFVLLIISYTIILMTIQHRPLDKSSKALSTLTAHITVVLLFFGPCVFIYAWPFPIKSLDKFLAVFYSVITPLLNPIIYTLRNKDMKTAIRQLRKWDAHSSVKFZ\\t40,38,11,34,37,11,31,17,25,31,38,39,37,38,12,35,33,33,52,9,47,20,66,56,61,73,50,40,57,71,62,55,28,47,34,55,51,61,6,44,58,61,5,29,66,38,42,43,71,10,41,55,46,35,65,79,54,49,56,44,32,47,54,17,34,63,40,49,79,67,54,81,78,77,63,52,74,46,65,72,61,37,40,71,55,23,41,50,57,55,57,44,79,65,53,40,18,60,45,43,46,16,27,60,40,73,70,57,35,61,81,46,15,80,41,62,37,43,60,45,50,58,51,44,69,20,42,41,79,37,48,78,9,49,81,81,75,17,77,72,76,46,80,79,37,80,44,49,64,70,24,30,29,8,66,31,41,30,8,40,15,5,73,38,23,63,54,50,57,56,41,41,57,39,41,49,40,39,9,73,1,67,75,62,58,63,36,40,53,44,54,48,76,71,42,58,44,56,47,52,74,60,77,43,62,57,48,36,43,37,49,41,47,41,48,51,56,60,44,44,39,45,36,37,58,33,43,55,44,50,70,73,43,31,66,61,20,45,48,36,18,27,43,5,25,10,42,41,81,72,52,61,79,43,39,44,76,49,52,67,66,42,63,64,57,52,55,48,11,56,49,67,43,40,63,50,43,35,35,47,45,58,58,49,41,49,58,47,53,39,56,22,55,72,46,62,80,76,43,78,80,56,70,61,65,61,43,52,64,30,57,19,39,4,50,35,31,28,10,8,8,30,8,19,33,7,39\");\n            writer.WriteLine(\"ENST00000335137\\tchr1\\tMVTEFIFLGLSDSQELQTFLFMLFFVFYGGIVFGNLLIVITVVSDSHLHSPMYFLLANLSLIDLSLSSVTAPKMITDFFSQRKVISFKGCLVQIFLLHFFGGSEMVILIAMGFDRYIAICKPLHYTTIMCGNACVGIMAVTWGIGFLHSVSQLAFAVHLLFCGPNEVDSFYCDLPRVIKLACTDTYRLDIMVIANSGVLTVCSFVLLIISYTIILMTIQHRPLDKSSKALSTLTAHITVVLLFFGPCVFIYAWPFPIKSLDKFLAVFYSVITPLLNPIIYTLRNKDMKTAIRQLRKWDAHSSVKFZ\\t20,66,56,61,73,50,40,57,71,62,55,28,47,34,55,51,61,6,44,58,61,5,29,66,38,42,43,71,10,41,55,46,35,65,79,54,49,56,44,32,47,54,17,34,63,40,49,79,67,54,81,78,77,63,52,74,46,65,72,61,37,40,71,55,23,41,50,57,55,57,44,79,65,53,40,18,60,45,43,46,16,27,60,40,73,70,57,35,61,81,46,15,80,41,62,37,43,60,45,50,58,51,44,69,20,42,41,79,37,48,78,9,49,81,81,75,17,77,72,76,46,80,79,37,80,44,49,64,70,24,30,29,8,66,31,41,30,8,40,15,5,73,38,23,63,54,50,57,56,41,41,57,39,41,49,40,39,9,73,1,67,75,62,58,63,36,40,53,44,54,48,76,71,42,58,44,56,47,52,74,60,77,43,62,57,48,36,43,37,49,41,47,41,48,51,56,60,44,44,39,45,36,37,58,33,43,55,44,50,70,73,43,31,66,61,20,45,48,36,18,27,43,5,25,10,42,41,81,72,52,61,79,43,39,44,76,49,52,67,66,42,63,64,57,52,55,48,11,56,49,67,43,40,63,50,43,35,35,47,45,58,58,49,41,49,58,47,53,39,56,22,55,72,46,62,80,76,43,78,80,56,70,61,65,61,43,52,64,30,57,19,39,4,50,35,31,28,10,8,8,30,8,19,33,7,39\");\n            writer.WriteLine(\"ENST00000379407\\tchr1\\tMGNSHCVPQAPRRLRASFSRKPSLKGNREDSARMSAGLPGPEAARSGDAAANKLFHYIPGTDILDLENQRENLEQPFLSVFKKGRRRVPVRNLGKVVHYAKVQLRFQHSQDVSDCYLELFPAHLYFQAHGSEGLTFQGLLPLTELSVCPLEGSREHAFQITGPLPAPLLVLCPSRAELDRWLYHLEKQTALLGGPRRCHSAPPQGSCGDELPWTLQRRLTRLRTASGHEPGGSAVCASRVKLQHLPAQEQWDRLLVLYPTSLAIFSEELDGLCFKGELPLRAVHINLEEKEKQIRSFLIEGPLINTIRVVCASYEDYGHWLLCLRAVTHREGAPPLPGAESFPGSQVMGSGRGSLSSGGQTSWDSGCLAPPSTRTSHSLPESSVPSTVGCSSQHTPLHRLSLESSPDAPDHTSETSHSPLYADPYTPPATSHRRVTDVRGLEEFLSAMQSARGPTPSSPLPSVPVSVPASDPRSCSSGPAGPYLLSKKGALQSRAAQRHRGSAKDGGPQPPDAPQLVSSAREGSPEPWLPLTDGRSPRRSRDPGYDHLWDETLSSSHQKCPQLGGPEASGGLVQWIZ\\t67,70,66,54,41,68,60,69,67,60,66,55,63,54,71,61,68,66,59,68,67,63,66,67,68,51,40,56,55,53,20,35,57,9,3,50,52,55,1,64,12,52,47,12,7,11,47,58,28,39,53,15,50,15,49,25,56,53,61,56,45,48,60,15,13,50,60,31,65,31,72,52,67,49,73,61,67,60,71,59,72,71,62,68,65,71,49,42,51,80,73,60,73,79,66,58,48,60,74,48,76,78,53,60,59,75,72,77,60,76,56,46,57,69,74,64,76,73,75,73,53,15,57,80,69,72,72,60,54,70,58,40,77,73,73,53,75,67,61,59,61,66,25,56,63,52,42,52,46,41,54,53,48,46,59,55,57,59,57,59,61,56,70,68,58,51,70,69,54,68,52,69,57,51,11,53,68,62,14,40,70,73,65,66,70,68,71,69,11,57,72,16,61,68,9,22,40,52,48,39,38,46,48,71,51,12,7,27,47,46,50,52,48,39,44,41,30,33,44,40,36,53,41,33,36,47,50,22,28,12,12,53,38,47,12,61,54,58,55,59,55,61,52,52,53,52,9,52,59,53,42,54,59,52,64,56,61,57,60,39,52,59,52,59,52,59,57,55,8,46,56,59,42,57,57,66,64,66,62,62,13,60,18,47,49,54,54,54,57,39,47,57,53,55,54,58,63,63,62,65,64,18,65,68,66,54,69,70,69,41,70,57,65,66,62,68,66,13,43,70,65,54,70,67,33,17,38,12,18,51,13,45,29,29,27,52,41,54,11,50,44,45,43,65,15,29,41,8,54,33,54,52,63,60,58,50,60,10,61,13,54,55,56,40,51,53,47,17,45,41,36,47,42,25,55,64,40,56,57,61,51,44,42,39,45,37,44,14,47,45,9,13,36,8,11,46,35,9,38,30,39,55,45,13,26,40,9,46,51,41,35,6,12,48,36,11,53,61,64,59,57,60,58,60,57,59,59,64,56,54,57,49,25,15,15,52,57,51,33,47,60,14,56,56,57,46,52,44,47,46,5,3,55,59,2,48,37,60,44,11,51,34,48,55,52,59,54,68,12,51,46,45,3,23,2,19,13,42,27,5,35,46,5,49,47,43,41,57,46,39,36,49,31,56,51,6,50,53,51,52,51,60,16,54,10,8,44,43,23,54,43,46,3,53,53,44,53,50,19,31,44,50,31,43,55,13,49,3,28,52,15,13,44,44,15,22,43,28,17,9,4,6,18,13,33,20,10,12,47,42,16,24,21,45,17,56,36,39,34,11,50,19,23,38,18,47,49,44,10,36,52,49,18,58,59,59,58\");\n            writer.WriteLine(\"ENST00000379319\\tchr1\\tMALRHLALLAGLLVGVASKSMENTAQLPECCVDVVGVNASCPGASLCGPGCYRRWNADGSASCVRCGNGTLPAYNGSECRSFAGPGAPFPMNRSSGTPGRPHPGAPRVAASLFLGTFFISSGLILSVAGFFYLKRSSKLPRACYRRNKAPALQPGEAAAMIPPPQSSVRKPRYVRRERPLDRATDPAAFPGEARISNVZ\\t57,48,53,12,18,25,12,54,62,51,49,52,51,45,23,51,44,56,41,52,8,44,9,4,26,52,10,55,53,61,61,46,52,47,33,11,26,59,39,19,64,55,53,10,33,54,64,53,62,60,62,59,53,24,38,39,24,55,63,52,21,57,61,38,40,61,13,60,45,59,17,29,22,21,40,32,59,52,65,55,31,11,34,49,2,49,33,18,41,52,31,54,51,55,18,55,36,55,56,30,49,25,17,49,14,73,19,75,68,77,77,76,56,78,78,78,46,50,66,78,44,55,80,80,79,68,77,77,37,76,78,78,78,76,78,47,57,63,80,76,23,3,13,75,57,71,55,64,52,50,42,54,54,54,46,56,47,57,49,58,52,59,45,63,47,48,58,52,65,64,67,66,58,52,65,64,58,60,50,34,46,39,20,18,23,44,26,45,25,12,3,47,50,59,21,64,52,64,59\");\n            \n            writer.Flush();\n\n            stream.Position = 0;\n            return stream;\n        }\n\n        [Fact]\n        public void ReadItemsTest()\n        {\n            using (var stream = GetStream())\n            using (var parser = new ProteinConservationParser(stream))\n            {\n                var items = parser.GetItems().ToArray();\n                \n                Assert.Equal(4, items.Length);\n                \n                Assert.Equal(\"ENST00000641515\", items[0].TranscriptId);\n                Assert.Equal(new byte[]{40,38,11,34,37,11,31,17,25,31,38,39,37,38,12,35,33,33,52,9,47,20,66,56,61,73,50,40,57,71,62,55,28,47,34,55,51,61,6,44,58,61,5,29,66,38,42,43,71,10,41,55,46,35,65,79,54,49,56,44,32,47,54,17,34,63,40,49,79,67,54,81,78,77,63,52,74,46,65,72,61,37,40,71,55,23,41,50,57,55,57,44,79,65,53,40,18,60,45,43,46,16,27,60,40,73,70,57,35,61,81,46,15,80,41,62,37,43,60,45,50,58,51,44,69,20,42,41,79,37,48,78,9,49,81,81,75,17,77,72,76,46,80,79,37,80,44,49,64,70,24,30,29,8,66,31,41,30,8,40,15,5,73,38,23,63,54,50,57,56,41,41,57,39,41,49,40,39,9,73,1,67,75,62,58,63,36,40,53,44,54,48,76,71,42,58,44,56,47,52,74,60,77,43,62,57,48,36,43,37,49,41,47,41,48,51,56,60,44,44,39,45,36,37,58,33,43,55,44,50,70,73,43,31,66,61,20,45,48,36,18,27,43,5,25,10,42,41,81,72,52,61,79,43,39,44,76,49,52,67,66,42,63,64,57,52,55,48,11,56,49,67,43,40,63,50,43,35,35,47,45,58,58,49,41,49,58,47,53,39,56,22,55,72,46,62,80,76,43,78,80,56,70,61,65,61,43,52,64,30,57,19,39,4,50,35,31,28,10,8,8,30,8,19,33,7,39} , items[0].Scores);\n                \n                Assert.Equal(\"ENST00000379319\", items[3].TranscriptId);\n            }\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/Revel/RevelParserTests.cs",
    "content": "﻿using System.IO;\nusing System.Linq;\nusing SAUtils.GenericScore;\nusing SAUtils.GenericScore.GenericScoreParser;\nusing UnitTests.TestUtilities;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.Revel\n{\n    public sealed class RevelParserTests\n    {\n        private static Stream GetStream()\n        {\n            var stream = new MemoryStream();\n            var writer = new StreamWriter(stream);\n\n            writer.WriteLine(\"##REVEL\");\n            writer.WriteLine(\"#chr\\tpos\\tref\\talt\\trefAA\\taltAA\\tscore\");\n            writer.WriteLine(\"1\\t35290\\tG\\tA\\tP\\tD\\t0.035\");\n            writer.WriteLine(\"1\\t35290\\tG\\tA\\tP\\tS\\t0.031\");\n            writer.WriteLine(\"1\\t35290\\tG\\tC\\tP\\tA\\t0.040\");\n            writer.WriteLine(\"1\\t35290\\tG\\tT\\tP\\tT\\t0.035\");\n            writer.WriteLine(\"1\\t35290\\tG\\tC\\tP\\tA\\t0.063\");\n            writer.WriteLine(\"1\\t35291\\tG\\tC\\tF\\tL\\t0.022\");\n            writer.WriteLine(\"1\\t35291\\tG\\tT\\tF\\tL\\t0.022\");\n            writer.Flush();\n\n            stream.Position = 0;\n            return stream;\n        }\n\n        [Fact]\n        public void RevelReader_GetItems_AsExpected()\n        {\n            var nucleotides = new[] {\"A\", \"C\", \"G\", \"T\"};\n\n            var revelParserSettings = new ParserSettings(\n                new ColumnIndex(0, 1, 2, 3, 6, null),\n                nucleotides,\n                GenericScoreParser.MaxRepresentativeScores\n            );\n\n            using (var streamReader = new StreamReader(GetStream()))\n            using (var reader = new GenericScoreParser(revelParserSettings, streamReader, ChromosomeUtilities.RefNameToChromosome))\n            {\n                var revelItems = reader.GetItems().ToArray();\n                Assert.Equal(5,                 revelItems.Length);\n                Assert.Equal(35290,             revelItems[0].Position);\n                Assert.Equal(\"G\",               revelItems[0].RefAllele);\n                Assert.Equal(\"A\",               revelItems[0].AltAllele);\n                Assert.Equal(\"\\\"score\\\":0.035\", revelItems[0].GetJsonString());\n                Assert.Equal(35290,             revelItems[1].Position);\n                Assert.Equal(\"G\",               revelItems[1].RefAllele);\n                Assert.Equal(\"C\",               revelItems[1].AltAllele);\n                Assert.Equal(\"\\\"score\\\":0.063\", revelItems[1].GetJsonString());\n                Assert.Equal(35291,             revelItems[4].Position);\n                Assert.Equal(\"G\",               revelItems[4].RefAllele);\n                Assert.Equal(\"T\",               revelItems[4].AltAllele);\n                Assert.Equal(\"\\\"score\\\":0.022\", revelItems[4].GetJsonString());\n            }\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/SaJsonSchemaTests.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.Text;\r\nusing ErrorHandling.Exceptions;\r\nusing SAUtils.Schema;\r\nusing VariantAnnotation.SA;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.SAUtils\r\n{\r\n    public sealed class SaJsonSchemaTests\r\n    {\r\n        private const string SchemaVersion = \"http://json-schema.org/draft-06/schema#\";\r\n\r\n        [Fact]\r\n        public void Create_InitialJsonObject_AsExpected()\r\n        {\r\n            var sb = new StringBuilder();\r\n            SaJsonSchema.Create(sb, \"test\", SaJsonValueType.ObjectArray, new List<string>());\r\n            const string expectedJsonString = \"{\\\"$schema\\\":\\\"\" + SchemaVersion + \"\\\",\\\"type\\\":\\\"object\\\",\\\"properties\\\":{\\\"test\\\":{\\\"type\\\":\\\"array\\\",\\\"items\\\":{\\\"type\\\":\\\"object\\\",\\\"properties\\\":{\";\r\n            Assert.Equal(expectedJsonString, sb.ToString());\r\n        }\r\n\r\n        [Fact]\r\n        public void OutputKeyAnnotation_AsExpected()\r\n        {\r\n            var sb = new StringBuilder();\r\n            var jsonSchema = new SaJsonSchema(sb);\r\n            jsonSchema.AddAnnotation(\"name\", SaJsonKeyAnnotation.CreateFromProperties(SaJsonValueType.String, 0, null));\r\n            jsonSchema.OutputKeyAnnotation(\"name\");\r\n            Assert.Equal(\"\\\"name\\\":{\\\"type\\\":\\\"string\\\"}\", sb.ToString());\r\n        }\r\n\r\n        [Fact]\r\n        public void ToString_AsExpected()\r\n        {\r\n            var jsonSchema = SaJsonSchema.Create(new StringBuilder(), \"test\", SaJsonValueType.ObjectArray, new List<string> { \"name\", \"phone\", \"employed\" });\r\n            jsonSchema.AddAnnotation(\"name\", SaJsonKeyAnnotation.CreateFromProperties(SaJsonValueType.String, 0, null));\r\n            jsonSchema.AddAnnotation(\"phone\", SaJsonKeyAnnotation.CreateFromProperties(SaJsonValueType.Number, 0, \"phone number\"));\r\n            jsonSchema.AddAnnotation(\"employed\", SaJsonKeyAnnotation.CreateFromProperties(SaJsonValueType.Bool, 0, null));\r\n            jsonSchema.TotalItems = 100;\r\n            jsonSchema.KeyCounts[\"name\"] = 100;\r\n            jsonSchema.KeyCounts[\"phone\"] = 50;\r\n            jsonSchema.KeyCounts[\"employed\"] = 0;\r\n\r\n            const string expectedJsonSchemaString = \"{\\\"$schema\\\":\\\"\" + SchemaVersion + \"\\\",\\\"type\\\":\\\"object\\\",\\\"properties\\\":{\\\"test\\\":{\\\"type\\\":\\\"array\\\",\\\"items\\\":{\\\"type\\\":\\\"object\\\",\\\"properties\\\":{\"\r\n                                                  + \"\\\"name\\\":{\\\"type\\\":\\\"string\\\"},\\\"phone\\\":{\\\"type\\\":\\\"number\\\",\\\"description\\\":\\\"phone number\\\"}},\"\r\n                                                  + \"\\\"required\\\":[\\\"name\\\"],\\\"additionalProperties\\\":false}}}}\";\r\n\r\n            Assert.Equal(expectedJsonSchemaString, jsonSchema.ToString());\r\n            // make sure the returned string is the same when ToString method is called more than once\r\n            Assert.Equal(expectedJsonSchemaString, jsonSchema.ToString());\r\n        }\r\n\r\n        [Fact]\r\n        public void GetJsonString_AsExpected()\r\n        {\r\n            var jsonSchema = SaJsonSchema.Create(new StringBuilder(), \"test\", SaJsonValueType.ObjectArray, new List<string> { \"name\", \"phone\", \"employed\" });\r\n            jsonSchema.AddAnnotation(\"name\", SaJsonKeyAnnotation.CreateFromProperties(SaJsonValueType.String, 0, null));\r\n            jsonSchema.AddAnnotation(\"phone\", SaJsonKeyAnnotation.CreateFromProperties(SaJsonValueType.Number, 0, \"phone number\"));\r\n            jsonSchema.AddAnnotation(\"employed\", SaJsonKeyAnnotation.CreateFromProperties(SaJsonValueType.Bool, 0, null));\r\n            var jsonString = jsonSchema.GetJsonString(new List<string[]> { new[] { \"Ada\" }, new[] { \"123456\" }, new[] { \"true\" } });\r\n\r\n            Assert.Equal(\"\\\"name\\\":\\\"Ada\\\",\\\"phone\\\":123456,\\\"employed\\\":true\", jsonString);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetJsonString_DoubleValueHandling_AsExpected()\r\n        {\r\n            var jsonSchema = SaJsonSchema.Create(new StringBuilder(), \"test\", SaJsonValueType.ObjectArray, new List<string> { \"allAf\", \"doubleValue1\", \"doubleValue2\" });\r\n            jsonSchema.AddAnnotation(\"allAf\", SaJsonKeyAnnotation.CreateFromProperties(SaJsonValueType.Number, CustomAnnotationCategories.AlleleFrequency, null));\r\n            jsonSchema.AddAnnotation(\"doubleValue1\", SaJsonKeyAnnotation.CreateFromProperties(SaJsonValueType.Number, 0, \"A double value\"));\r\n            jsonSchema.AddAnnotation(\"doubleValue2\", SaJsonKeyAnnotation.CreateFromProperties(SaJsonValueType.Number, 0, \"Another double value\"));\r\n            var jsonString = jsonSchema.GetJsonString(new List<string[]> { new[] { \"0.12345678\" }, new[] { \"0.12\" }, new[] { \"0.12345678\" } });\r\n\r\n            Assert.Equal(\"\\\"allAf\\\":0.123457,\\\"doubleValue1\\\":0.12,\\\"doubleValue2\\\":0.12345678\", jsonString);\r\n        }\r\n\r\n        [Fact]\r\n        public void CheckAndGetBoolFromString_AsExpected()\r\n        {\r\n            Assert.True(SaJsonSchema.CheckAndGetBoolFromString(\"true\"));\r\n            Assert.True(SaJsonSchema.CheckAndGetBoolFromString(\"TRUE\"));\r\n            Assert.False(SaJsonSchema.CheckAndGetBoolFromString(\"false\"));\r\n            Assert.False(SaJsonSchema.CheckAndGetBoolFromString(\"False\"));\r\n            Assert.False(SaJsonSchema.CheckAndGetBoolFromString(\"\"));\r\n            Assert.False(SaJsonSchema.CheckAndGetBoolFromString(\".\"));\r\n        }\r\n\r\n        [Fact]\r\n        public void CheckAndGetBoolFromString_InvalidValue_ThrowException()\r\n        {\r\n            Assert.Throws<UserErrorException>(() => SaJsonSchema.CheckAndGetBoolFromString(\"T\"));\r\n            Assert.Throws<UserErrorException>(() => SaJsonSchema.CheckAndGetBoolFromString(\"F\"));\r\n            Assert.Throws<UserErrorException>(() => SaJsonSchema.CheckAndGetBoolFromString(\"0\"));\r\n            Assert.Throws<UserErrorException>(() => SaJsonSchema.CheckAndGetBoolFromString(\"-\"));\r\n        }\r\n\r\n        [Fact]\r\n        public void CheckAndGetNullableDoubleFromString_GetNull_AsExpected()\r\n        {\r\n            Assert.Null(SaJsonSchema.CheckAndGetNullableDoubleFromString(\"\"));\r\n            Assert.Null(SaJsonSchema.CheckAndGetNullableDoubleFromString(\".\"));\r\n        }\r\n\r\n        [Fact]\r\n        public void CheckAndGetNullableDoubleFromString_NotANum_ThrowException()\r\n        {\r\n            Assert.Throws<UserErrorException>(() => SaJsonSchema.CheckAndGetNullableDoubleFromString(\"Bob\"));\r\n            Assert.Throws<UserErrorException>(() => SaJsonSchema.CheckAndGetNullableDoubleFromString(\"1+1\"));\r\n            Assert.Throws<UserErrorException>(() => SaJsonSchema.CheckAndGetNullableDoubleFromString(\"bool\"));\r\n        }\r\n\r\n        [Fact]\r\n        public void GetJsonString__AsExpected()\r\n        {\r\n            var jsonSchema = SaJsonSchema.Create(new StringBuilder(), \"test\", SaJsonValueType.ObjectArray, new List<string> { \"allAf\", \"doubleValue1\", \"doubleValue2\" });\r\n            jsonSchema.AddAnnotation(\"allAf\", SaJsonKeyAnnotation.CreateFromProperties(SaJsonValueType.Number, CustomAnnotationCategories.AlleleFrequency, null ));\r\n            jsonSchema.AddAnnotation(\"doubleValue1\", SaJsonKeyAnnotation.CreateFromProperties(SaJsonValueType.Number, 0, \"A double value\" ));\r\n            jsonSchema.AddAnnotation(\"doubleValue2\", SaJsonKeyAnnotation.CreateFromProperties(SaJsonValueType.Number, 0, \"Another double value\" ));\r\n            var jsonString = jsonSchema.GetJsonString(new List<string[]> { new[] { \"0.12345678\" }, new[] { \"0.12\" }, new[] { \"0.12345678\" } });\r\n\r\n            Assert.Equal(\"\\\"allAf\\\":0.123457,\\\"doubleValue1\\\":0.12,\\\"doubleValue2\\\":0.12345678\", jsonString);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/SAUtils/SpliceAi/SpliceAiTests.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.IO;\nusing System.Linq;\nusing CacheUtils.TranscriptCache;\nusing Genome;\nusing Intervals;\nusing Moq;\nusing SAUtils.SpliceAi;\nusing UnitTests.TestDataStructures;\nusing UnitTests.TestUtilities;\nusing VariantAnnotation.AnnotatedPositions.Transcript;\nusing VariantAnnotation.Caches;\nusing VariantAnnotation.Caches.DataStructures;\nusing VariantAnnotation.Interface.AnnotatedPositions;\nusing VariantAnnotation.Interface.Caches;\nusing VariantAnnotation.Interface.Providers;\nusing VariantAnnotation.IO.Caches;\nusing Variants;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.SpliceAi\n{\n    public sealed class SpliceAiTests\n    {\n        private static Dictionary<string, string> GetSpliceToNirvanaGenes()\n        {\n            return new Dictionary<string, string>\n            {\n                {\"TUBB8\", \"TUBB8\"},\n                {\"CDK11B\", \"CDK11B\" },\n                {\"MMP23B\", \"MMP23B\" },\n                {\"KRTAP19-3\", \"KRTAP19-3\" },\n                {\"KRTAP19-2\", \"KRTAP19-2\" },\n                { \"CECR5\", \"CECR5\"},\n                { \"SPLICE\", \"NIR91\"}\n            };\n        }\n\n        private static ISequenceProvider GetSequenceProvider()\n        {\n            var mockProvider = new Mock<ISequenceProvider>();\n            mockProvider.SetupGet(x => x.RefNameToChromosome).Returns(ChromosomeUtilities.RefNameToChromosome);\n            mockProvider.SetupGet(x => x.RefIndexToChromosome).Returns(ChromosomeUtilities.RefIndexToChromosome);\n            //only for unit tests that uses variants at 17148654 \n            mockProvider.SetupGet(x => x.Sequence).Returns(new SimpleSequence(new string('T', VariantUtils.MaxUpstreamLength) + \"GAAAAA\", 17148654 - 1 - VariantUtils.MaxUpstreamLength));\n            return mockProvider.Object;\n        }\n\n        private static ISequenceProvider GetCacheSequenceProvider()\n        {\n            var mockProvider = new Mock<ISequenceProvider>();\n            mockProvider.SetupGet(x => x.RefNameToChromosome).Returns(ChromosomeUtilities.RefNameToChromosome);\n            mockProvider.SetupGet(x => x.RefIndexToChromosome).Returns(ChromosomeUtilities.RefIndexToChromosome);\n            return mockProvider.Object;\n        }\n\n        private static Dictionary<ushort, IntervalArray<byte>> GetSpliceIntervals()\n        {\n            var intervals10 = new[]\n            {\n                new Interval<byte>(92946 - SpliceUtilities.SpliceFlankLength, 92946 + SpliceUtilities.SpliceFlankLength, 0),\n                new Interval<byte>(93816 - SpliceUtilities.SpliceFlankLength, 93816 + SpliceUtilities.SpliceFlankLength, 0)\n            };\n\n            var intervals1 = new[]\n            {\n                new Interval<byte>(1577180 - SpliceUtilities.SpliceFlankLength, 1577180 + SpliceUtilities.SpliceFlankLength, 0)\n            };\n\n            var intervals21 = new[]\n            {\n                new Interval<byte>(31859677 - SpliceUtilities.SpliceFlankLength, 31859677 + SpliceUtilities.SpliceFlankLength, 0),\n                new Interval<byte>(35275955 - SpliceUtilities.SpliceFlankLength, 35275955 + SpliceUtilities.SpliceFlankLength, 0)\n            };\n\n            var intervals22 = new[]\n            {\n                new Interval<byte>(17148600 - SpliceUtilities.SpliceFlankLength, 17148600 + SpliceUtilities.SpliceFlankLength, 0)\n            };\n\n            return new Dictionary<ushort, IntervalArray<byte>>\n            {\n                {ChromosomeUtilities.Chr1.Index, new IntervalArray<byte>(intervals1)},\n                {ChromosomeUtilities.Chr10.Index, new IntervalArray<byte>(intervals10)},\n                {ChromosomeUtilities.Chr21.Index, new IntervalArray<byte>(intervals21)},\n                {ChromosomeUtilities.Chr22.Index, new IntervalArray<byte>(intervals22)}\n            };  \n        }\n        private static Stream GetStream()\n        {\n            var stream = new MemoryStream();\n            var writer = new StreamWriter(stream);\n\n            writer.WriteLine(\"##fileformat=VCFv4.0\");\n            writer.WriteLine(\"##INFO=<ID=SpliceAI,Number=.,Type=String,Description=\\\"SpliceAIv1.3 variant annotation.These include delta scores(DS) and delta positions(DP) for acceptor gain (AG), acceptor loss(AL), donor gain(DG), and donor loss(DL).Format:ALLELE|SYMBOL|DS_AG|DS_AL|DS_DG|DS_DL|DP_AG|DP_AL|DP_DG|DP_DL\\\">\") ;\n            writer.WriteLine(\"#CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\");\n            //this line should not produce any objects since all values are <0.10 and its far from splice sites\n            writer.WriteLine(\"10\\t92900\\t.\\tC\\tT\\t.\\t.\\tSpliceAI=A|TUBB8|0.00|0.00|0.00|0.00|-4|-2|-12|25\");\n            // values are small but it is close to a splice site. So we report all of it\n            writer.WriteLine(\"10\\t92946\\t.\\tC\\tT\\t.\\t.\\tSpliceAI=T|TUBB8|0.00|0.00|0.00|0.00|-26|-10|3|35\");\n            // not around a splice site but has higher than 0.1 value. So, we report the one that is significant \n            writer.WriteLine(\"10\\t93389\\t.\\tC\\tA\\t.\\t.\\tSpliceAI=A|TUBB8|0.11|0.00|0.00|0.00|-11|-29|-11|-32\");\n            //should be reported back with 4 object since it is within splice interval;\n            writer.WriteLine(\"10\\t93816\\t.\\tC\\tG\\t.\\t.\\tSpliceAI=G|TUBB8|0.19|0.00|0.00|0.00|-7|-50|-7|-6\");\n\n            writer.Flush();\n\n            stream.Position = 0;\n            return stream;\n        }\n\n        private static Stream GetPositionCachingStream()\n        {\n            //testing the position caching using minHeap. All entries have significant entries, so all of them should be reported\n            var stream = new MemoryStream();\n            var writer = new StreamWriter(stream);\n\n            writer.WriteLine(\"##fileformat=VCFv4.0\");\n            writer.WriteLine(\"##INFO=<ID=SpliceAI,Number=.,Type=String,Description=\\\"SpliceAIv1.3 variant annotation.These include delta scores(DS) and delta positions(DP) for acceptor gain (AG), acceptor loss(AL), donor gain(DG), and donor loss(DL).Format:ALLELE|SYMBOL|DS_AG|DS_AL|DS_DG|DS_DL|DP_AG|DP_AL|DP_DG|DP_DL\\\">\");\n            writer.WriteLine(\"#CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\");\n            writer.WriteLine(\"10\\t92900\\t.\\tC\\tT\\t.\\t.\\tSpliceAI=A|TUBB8|0.80|0.00|0.00|0.00|-4|-2|-12|25\");\n            writer.WriteLine(\"10\\t92946\\t.\\tC\\tT\\t.\\t.\\tSpliceAI=T|TUBB8|0.00|0.00|0.00|0.00|-26|-10|3|35\");\n            writer.WriteLine(\"10\\t92946\\t.\\tC\\tA\\t.\\t.\\tSpliceAI=A|TUBB8|0.00|0.00|0.00|0.00|-10|-48|35|-21\");\n            writer.WriteLine(\"10\\t93389\\t.\\tC\\tA\\t.\\t.\\tSpliceAI=A|TUBB8|0.11|0.00|0.00|0.00|-11|-29|-11|-32\");\n            writer.WriteLine(\"10\\t93816\\t.\\tC\\tG\\t.\\t.\\tSpliceAI=G|TUBB8|0.19|0.00|0.00|0.00|-7|-50|-7|-6\");\n\n            writer.Flush();\n\n            stream.Position = 0;\n            return stream;\n        }\n\n        private static Stream GetMultiGeneStream()\n        {\n            var stream = new MemoryStream();\n            var writer = new StreamWriter(stream);\n\n            writer.WriteLine(\"##fileformat=VCFv4.0\");\n            writer.WriteLine(\"##INFO=<ID=SpliceAI,Number=.,Type=String,Description=\\\"SpliceAIv1.3 variant annotation.These include delta scores(DS) and delta positions(DP) for acceptor gain (AG), acceptor loss(AL), donor gain(DG), and donor loss(DL).Format:ALLELE|SYMBOL|DS_AG|DS_AL|DS_DG|DS_DL|DP_AG|DP_AL|DP_DG|DP_DL\\\">\");\n            writer.WriteLine(\"#CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\");\n            writer.WriteLine(\"1\\t1577180\\t.\\tC\\tT\\t.\\t.\\tSpliceAI=T|MMP23B|0.00|0.00|0.00|0.00|8|-16|-16|26\");\n            writer.WriteLine(\"1\\t1577180\\t.\\tC\\tT\\t.\\t.\\tSpliceAI=T|CDK11B|0.92|0.00|0.00|0.00|-2|-8|33|-13\");\n\n            writer.Flush();\n\n            stream.Position = 0;\n            return stream;\n        }\n\n        private static Stream GetMissingEntryStream()\n        {\n            var stream = new MemoryStream();\n            var writer = new StreamWriter(stream);\n\n            writer.WriteLine(\"##fileformat=VCFv4.0\");\n            writer.WriteLine(\"##INFO=<ID=SpliceAI,Number=.,Type=String,Description=\\\"SpliceAIv1.3 variant annotation.These include delta scores(DS) and delta positions(DP) for acceptor gain (AG), acceptor loss(AL), donor gain(DG), and donor loss(DL).Format:ALLELE|SYMBOL|DS_AG|DS_AL|DS_DG|DS_DL|DP_AG|DP_AL|DP_DG|DP_DL\\\">\");\n            writer.WriteLine(\"#CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\");\n            writer.WriteLine(\"21\\t35275955\\t.\\tG\\tA\\t.\\t.\\tSpliceAI=A|AP000304.12|0.14|0.00|0.00|0.00|-12|24|-41|5\");\n            writer.WriteLine(\"21\\t35275955\\t.\\tG\\tA\\t.\\t.\\tSpliceAI=A|ATP5O|0.00|0.00|0.00|0.00|-12|24|-41|-12\");\n\n            writer.Flush();\n\n            stream.Position = 0;\n            return stream;\n        }\n        private static Stream GetMultChromosomeStream()\n        {\n            var stream = new MemoryStream();\n            var writer = new StreamWriter(stream);\n\n            writer.WriteLine(\"##fileformat=VCFv4.0\");\n            writer.WriteLine(\"##INFO=<ID=SpliceAI,Number=.,Type=String,Description=\\\"SpliceAIv1.3 variant annotation.These include delta scores(DS) and delta positions(DP) for acceptor gain (AG), acceptor loss(AL), donor gain(DG), and donor loss(DL).Format:ALLELE|SYMBOL|DS_AG|DS_AL|DS_DG|DS_DL|DP_AG|DP_AL|DP_DG|DP_DL\\\">\");\n            writer.WriteLine(\"#CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\");\n            //having two gene symbols at the same position should avoid updating gene symbol\n            writer.WriteLine(\"10\\t92900\\t.\\tC\\tT\\t.\\t.\\tSpliceAI=A|TUBB8|0.00|0.50|0.00|0.00|-4|-2|-12|25\");\n            writer.WriteLine(\"10\\t92900\\t.\\tC\\tT\\t.\\t.\\tSpliceAI=A|SPLICE|0.00|0.00|0.00|0.20|-4|-2|-12|25\");\n            //The previous entries should be flushed since we changed chromosome\n            writer.WriteLine(\"1\\t92900\\t.\\tC\\tT\\t.\\t.\\tSpliceAI=A|TUBB8|0.30|0.00|0.00|0.00|-4|-2|-12|25\");\n\n            writer.Flush();\n\n            stream.Position = 0;\n            return stream;\n        }\n\n        [Fact]\n        public void Check_multi_chromosome_gene_update()\n        {\n            using (var spliceParser = new SpliceAiParser(GetMultChromosomeStream(), GetSequenceProvider(), GetSpliceIntervals(), GetSpliceToNirvanaGenes()))\n            {\n                var spliceItems = spliceParser.GetItems().ToList();\n\n                // the third item will be skipped since hgnc is null\n                Assert.Equal(3, spliceItems.Count);\n                Assert.Equal(\"TUBB8\", spliceItems[0].Hgnc);\n                //checking a case where the splice AI gene is different from Nirvana\n                Assert.Equal(\"NIR91\", spliceItems[1].Hgnc);\n\n            }\n        }\n\n        [Fact]\n        public void Parse_standard_lines()\n        {\n            using (var spliceParser = new SpliceAiParser(GetStream(), GetSequenceProvider(), GetSpliceIntervals(), GetSpliceToNirvanaGenes()))\n            {\n                var spliceItems = spliceParser.GetItems().ToList();\n\n                Assert.Equal(3,spliceItems.Count);\n                Assert.Equal(\"\\\"hgnc\\\":\\\"TUBB8\\\",\\\"acceptorGainScore\\\":0,\\\"acceptorGainDistance\\\":-26,\\\"acceptorLossScore\\\":0,\\\"acceptorLossDistance\\\":-10,\\\"donorGainScore\\\":0,\\\"donorGainDistance\\\":3,\\\"donorLossScore\\\":0,\\\"donorLossDistance\\\":35\", spliceItems[0].GetJsonString());\n                Assert.Equal(\"\\\"hgnc\\\":\\\"TUBB8\\\",\\\"acceptorGainScore\\\":0.1,\\\"acceptorGainDistance\\\":-11\", spliceItems[1].GetJsonString());\n                Assert.Equal(\"\\\"hgnc\\\":\\\"TUBB8\\\",\\\"acceptorGainScore\\\":0.2,\\\"acceptorGainDistance\\\":-7,\\\"acceptorLossScore\\\":0,\\\"acceptorLossDistance\\\":-50,\\\"donorGainScore\\\":0,\\\"donorGainDistance\\\":-7,\\\"donorLossScore\\\":0,\\\"donorLossDistance\\\":-6\", spliceItems[2].GetJsonString());\n            }\n        }\n\n        [Fact]\n        public void MissingEntry()\n        {\n            using (var spliceParser = new SpliceAiParser(GetMissingEntryStream(), GetSequenceProvider(), GetSpliceIntervals(), GetSpliceToNirvanaGenes()))\n            {\n                var spliceItems = spliceParser.GetItems().ToList();\n\n                Assert.Equal(2,spliceItems.Count);\n                Assert.Equal(\"\\\"hgnc\\\":\\\"AP000304.12\\\",\\\"acceptorGainScore\\\":0.1,\\\"acceptorGainDistance\\\":-12,\\\"acceptorLossScore\\\":0,\\\"acceptorLossDistance\\\":24,\\\"donorGainScore\\\":0,\\\"donorGainDistance\\\":-41,\\\"donorLossScore\\\":0,\\\"donorLossDistance\\\":5\", spliceItems[0].GetJsonString());\n            }\n        }\n\n        private static Stream GetMultiScoreStream()\n        {\n            var stream = new MemoryStream();\n            var writer = new StreamWriter(stream);\n\n            writer.WriteLine(\"##fileformat=VCFv4.0\");\n            writer.WriteLine(\"##INFO=<ID=SpliceAI,Number=.,Type=String,Description=\\\"SpliceAIv1.3 variant annotation.These include delta scores(DS) and delta positions(DP) for acceptor gain (AG), acceptor loss(AL), donor gain(DG), and donor loss(DL).Format:ALLELE|SYMBOL|DS_AG|DS_AL|DS_DG|DS_DL|DP_AG|DP_AL|DP_DG|DP_DL\\\">\");\n            writer.WriteLine(\"#CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\");\n            writer.WriteLine(\"10\\t93816\\t.\\tC\\tG\\t.\\t.\\tSpliceAI=G|TUBB8|0.19|0.40|0.00|0.20|-7|-50|-7|-6\");\n\n            writer.Flush();\n\n            stream.Position = 0;\n            return stream;\n        }\n\n        [Fact]\n        public void Parse_multiScore_entry()\n        {\n            using (var spliceParser = new SpliceAiParser(GetMultiScoreStream(), GetSequenceProvider(), GetSpliceIntervals(), GetSpliceToNirvanaGenes()))\n            {\n                var spliceItems = spliceParser.GetItems().ToList();\n\n                Assert.Single(spliceItems);\n                Assert.Equal(\"\\\"hgnc\\\":\\\"TUBB8\\\",\\\"acceptorGainScore\\\":0.2,\\\"acceptorGainDistance\\\":-7,\\\"acceptorLossScore\\\":0.4,\\\"acceptorLossDistance\\\":-50,\\\"donorGainScore\\\":0,\\\"donorGainDistance\\\":-7,\\\"donorLossScore\\\":0.2,\\\"donorLossDistance\\\":-6\", spliceItems[0].GetJsonString());\n            }\n        }\n\n        [Fact]\n        public void Parse_multiGene_entry()\n        {\n            using (var spliceParser = new SpliceAiParser(GetMultiGeneStream(), GetSequenceProvider(), GetSpliceIntervals(), GetSpliceToNirvanaGenes()))\n            {\n                var spliceItems = spliceParser.GetItems().ToList();\n\n                Assert.Equal(2,spliceItems.Count);\n                Assert.Equal(\"\\\"hgnc\\\":\\\"MMP23B\\\",\\\"acceptorGainScore\\\":0,\\\"acceptorGainDistance\\\":8,\\\"acceptorLossScore\\\":0,\\\"acceptorLossDistance\\\":-16,\\\"donorGainScore\\\":0,\\\"donorGainDistance\\\":-16,\\\"donorLossScore\\\":0,\\\"donorLossDistance\\\":26\", spliceItems[0].GetJsonString());\n                Assert.Equal(\"\\\"hgnc\\\":\\\"CDK11B\\\",\\\"acceptorGainScore\\\":0.9,\\\"acceptorGainDistance\\\":-2,\\\"acceptorLossScore\\\":0,\\\"acceptorLossDistance\\\":-8,\\\"donorGainScore\\\":0,\\\"donorGainDistance\\\":33,\\\"donorLossScore\\\":0,\\\"donorLossDistance\\\":-13\", spliceItems[1].GetJsonString());\n            }\n        }\n\n        private static Stream GetShiftableInsertionStream()\n        {\n            var stream = new MemoryStream();\n            var writer = new StreamWriter(stream);\n\n            writer.WriteLine(\"##fileformat=VCFv4.0\");\n            writer.WriteLine(\"##INFO=<ID=SpliceAI,Number=.,Type=String,Description=\\\"SpliceAIv1.3 variant annotation.These include delta scores(DS) and delta positions(DP) for acceptor gain (AG), acceptor loss(AL), donor gain(DG), and donor loss(DL).Format:ALLELE|SYMBOL|DS_AG|DS_AL|DS_DG|DS_DL|DP_AG|DP_AL|DP_DG|DP_DL\\\">\");\n            writer.WriteLine(\"#CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\");\n            writer.WriteLine(\"22\\t17148654\\t.\\tG\\tGA\\t.\\t.\\tSpliceAI=GA|CECR5|0.10|0.00|0.00|0.00|-10|10|-10|-15\");\n            writer.WriteLine(\"22\\t17148655\\t.\\tA\\tAA\\t.\\t.\\tSpliceAI=AA|CECR5|0.10|0.00|0.00|0.00|-11|9|-11|-16\");\n\n            writer.Flush();\n\n            stream.Position = 0;\n            return stream;\n        }\n\n        [Fact]\n        public void Skip_shiftable_indels()\n        {\n            using (var spliceParser = new SpliceAiParser(GetShiftableInsertionStream(), GetSequenceProvider(), GetSpliceIntervals(), GetSpliceToNirvanaGenes()))\n            {\n                var spliceItems = spliceParser.GetItems().ToList();\n\n                Assert.Single(spliceItems);\n                Assert.Equal(\"\\\"hgnc\\\":\\\"CECR5\\\",\\\"acceptorGainScore\\\":0.1,\\\"acceptorGainDistance\\\":-10\", spliceItems[0].GetJsonString());\n                \n            }\n        }\n\n        [Fact]\n        public void Check_position_caching()\n        {\n            using (var spliceParser = new SpliceAiParser(GetPositionCachingStream(), GetSequenceProvider(), GetSpliceIntervals(), GetSpliceToNirvanaGenes()))\n            {\n                var spliceItems = spliceParser.GetItems().ToList();\n\n                Assert.Equal(5, spliceItems.Count);\n            }\n        }\n        \n        private static Stream GetCacheStream()\n        {\n            const GenomeAssembly genomeAssembly = GenomeAssembly.GRCh38;\n\n            var baseHeader = new Header(\"test\", 2, 3, Source.BothRefSeqAndEnsembl, 4, genomeAssembly);\n            var customHeader = new TranscriptCacheCustomHeader(1, 2);\n            var expectedHeader = new CacheHeader(baseHeader, customHeader);\n\n            var transcriptRegions = new ITranscriptRegion[]\n            {\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 100, 199, 300, 399),\n                new TranscriptRegion(TranscriptRegionType.Intron, 1, 200, 299, 399, 400),\n                new TranscriptRegion(TranscriptRegionType.Exon, 2, 300, 399, 400, 499)\n            };\n\n            var mirnas = new IInterval[2];\n            mirnas[0] = new Interval(100, 200);\n            mirnas[1] = new Interval(300, 400);\n\n            var peptideSeqs = new[] { \"MASE*\" };\n\n            var genes = new IGene[1];\n            genes[0] = new Gene(ChromosomeUtilities.Chr3, 100, 200, true, \"TP53\", 300, CompactId.Convert(\"7157\"),\n                CompactId.Convert(\"ENSG00000141510\"));\n\n            var regulatoryRegions = new IRegulatoryRegion[2];\n            regulatoryRegions[0] = new RegulatoryRegion(ChromosomeUtilities.Chr3, 1200, 1300, CompactId.Convert(\"123\"), RegulatoryRegionType.enhancer);\n            regulatoryRegions[1] = new RegulatoryRegion(ChromosomeUtilities.Chr3, 1250, 1450, CompactId.Convert(\"456\"), RegulatoryRegionType.enhancer);\n            var regulatoryRegionIntervalArrays = regulatoryRegions.ToIntervalArrays(3);\n\n            var transcripts = GetTranscripts(ChromosomeUtilities.Chr3, genes, transcriptRegions, mirnas);\n            var transcriptIntervalArrays = transcripts.ToIntervalArrays(3);\n\n            var expectedCacheData = new TranscriptCacheData(expectedHeader, genes, transcriptRegions, mirnas, peptideSeqs,\n                transcriptIntervalArrays, regulatoryRegionIntervalArrays);\n\n            var ms = new MemoryStream();\n            using (var writer = new TranscriptCacheWriter(ms, expectedHeader, true))\n            {\n                writer.Write(expectedCacheData);\n            }\n\n            ms.Position = 0;\n\n            return ms;\n        }\n\n        private static ITranscript[] GetTranscripts(Chromosome chromosome, IGene[] genes, ITranscriptRegion[] regions,\n            IInterval[] mirnas)\n        {\n            return new ITranscript[]\n            {\n                new Transcript(chromosome, 120, 180, CompactId.Convert(\"789\"), null, BioType.IG_D_gene, genes[0], 0, 0,\n                    false, regions, 0, mirnas, -1, -1, Source.None, false, false, null, null)\n            };\n        }\n\n        [Fact]\n        public void GetSpliceIntervals_standard()\n        {\n            using (var transcriptCacheReader = new TranscriptCacheReader(GetCacheStream()))\n            {\n                var seqProvider     = GetCacheSequenceProvider();\n                var transcriptData  = transcriptCacheReader.Read(seqProvider.RefIndexToChromosome);\n                var spliceIntervals = SpliceUtilities.GetSpliceIntervals(seqProvider, transcriptData);\n\n                Assert.Single(spliceIntervals);\n                //given 2 exons, there should be 2 splice intervals\n                Assert.Equal(2, spliceIntervals[2].Array.Length);\n            }\n        }\n\n        private static Stream GetMultiGeneAtSameLocationStream()\n        {\n            var stream = new MemoryStream();\n            var writer = new StreamWriter(stream);\n\n            writer.WriteLine(\"##fileformat=VCFv4.0\");\n            writer.WriteLine(\"##INFO=<ID=SpliceAI,Number=.,Type=String,Description=\\\"SpliceAIv1.3 variant annotation.These include delta scores(DS) and delta positions(DP) for acceptor gain (AG), acceptor loss(AL), donor gain(DG), and donor loss(DL).Format:ALLELE|SYMBOL|DS_AG|DS_AL|DS_DG|DS_DL|DP_AG|DP_AL|DP_DG|DP_DL\\\">\");\n            writer.WriteLine(\"#CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\");\n            writer.WriteLine(\"21\\t31859677\\t.\\tG\\tA\\t.\\t.\\tSpliceAI=A|KRTAP19-3|0.00|0.00|0.00|0.00|-42|38|23|38\");\n            writer.WriteLine(\"21\\t31859677\\t.\\tG\\tA\\t.\\t.\\tSpliceAI=A|KRTAP19-2|0.01|0.00|0.0262|0.00|-42|38|23|-11\");\n\n            writer.Flush();\n\n            stream.Position = 0;\n            return stream;\n        }\n\n        [Fact]\n        public void Two_symbols_in_spliceAi()\n        {\n            using (var spliceParser = new SpliceAiParser(GetMultiGeneAtSameLocationStream(), GetSequenceProvider(), GetSpliceIntervals(), GetSpliceToNirvanaGenes()))\n            {\n                var spliceItems = spliceParser.GetItems().ToList();\n\n                Assert.Equal(2, spliceItems.Count);\n                Assert.Equal(\"\\\"hgnc\\\":\\\"KRTAP19-3\\\",\\\"acceptorGainScore\\\":0,\\\"acceptorGainDistance\\\":-42,\\\"acceptorLossScore\\\":0,\\\"acceptorLossDistance\\\":38,\\\"donorGainScore\\\":0,\\\"donorGainDistance\\\":23,\\\"donorLossScore\\\":0,\\\"donorLossDistance\\\":38\", spliceItems[0].GetJsonString());\n                Assert.Equal(\"\\\"hgnc\\\":\\\"KRTAP19-2\\\",\\\"acceptorGainScore\\\":0,\\\"acceptorGainDistance\\\":-42,\\\"acceptorLossScore\\\":0,\\\"acceptorLossDistance\\\":38,\\\"donorGainScore\\\":0,\\\"donorGainDistance\\\":23,\\\"donorLossScore\\\":0,\\\"donorLossDistance\\\":-11\", spliceItems[1].GetJsonString());\n            }\n        }\n\n        private static Stream GetInsertionStream()\n        {\n            var stream = new MemoryStream();\n            var writer = new StreamWriter(stream);\n\n            writer.WriteLine(\"##fileformat=VCFv4.0\");\n            writer.WriteLine(\"##INFO=<ID=SpliceAI,Number=.,Type=String,Description=\\\"SpliceAIv1.3 variant annotation.These include delta scores(DS) and delta positions(DP) for acceptor gain (AG), acceptor loss(AL), donor gain(DG), and donor loss(DL).Format:ALLELE|SYMBOL|DS_AG|DS_AL|DS_DG|DS_DL|DP_AG|DP_AL|DP_DG|DP_DL\\\">\");\n            writer.WriteLine(\"#CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\");\n            // insertions at the boundary of splice intervals\n            // splice boundary is [92931- 92961]\n\n            //insertion just before the interval should be skipped\n            writer.WriteLine(\"10\\t92930\\t.\\tC\\tCT\\t.\\t.\\tSpliceAI=A|TUBB8|0.00|0.00|0.00|0.00|-4|-2|-12|25\");\n            // insertion right on the boundary should be kept\n            writer.WriteLine(\"10\\t92931\\t.\\tC\\tCT\\t.\\t.\\tSpliceAI=A|TUBB8|0.00|0.00|0.00|0.00|-4|-2|-12|25\");\n            // insertion just after the interval should be skipped\n            writer.WriteLine(\"10\\t92961\\t.\\tC\\tCT\\t.\\t.\\tSpliceAI=A|TUBB8|0.00|0.00|0.00|0.00|-4|-2|-12|25\");\n            // insertion right on the interval boundary should be kept\n            writer.WriteLine(\"10\\t92960\\t.\\tC\\tCT\\t.\\t.\\tSpliceAI=A|TUBB8|0.00|0.00|0.00|0.00|-4|-2|-12|25\");\n\n            writer.Flush();\n\n            stream.Position = 0;\n            return stream;\n        }\n        [Fact]\n        public void Parse_insertions()\n        {\n            using (var spliceParser = new SpliceAiParser(GetInsertionStream(), GetSequenceProvider(), GetSpliceIntervals(), GetSpliceToNirvanaGenes()))\n            {\n                var spliceItems = spliceParser.GetItems().ToList();\n\n                Assert.Equal(2, spliceItems.Count);\n                \n            }\n        }\n\n        private static Stream GetDeletionStream()\n        {\n            var stream = new MemoryStream();\n            var writer = new StreamWriter(stream);\n\n            writer.WriteLine(\"##fileformat=VCFv4.0\");\n            writer.WriteLine(\"#CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\");\n            writer.WriteLine(\"##INFO=<ID=SpliceAI,Number=.,Type=String,Description=\\\"SpliceAIv1.3 variant annotation.These include delta scores(DS) and delta positions(DP) for acceptor gain (AG), acceptor loss(AL), donor gain(DG), and donor loss(DL).Format:ALLELE|SYMBOL|DS_AG|DS_AL|DS_DG|DS_DL|DP_AG|DP_AL|DP_DG|DP_DL\\\">\");\n\n            // deletions at the boundary of splice intervals\n            // splice boundary is [92931- 92961]\n\n            // deletion just before the interval should be skipped\n            writer.WriteLine(\"10\\t92929\\t.\\tCT\\tC\\t.\\t.\\tSpliceAI=A|TUBB8|0.00|0.00|0.00|0.00|-4|-2|-12|25\");\n            // deletion that start before the interval but is long enough to go into the interval is kept\n            writer.WriteLine(\"10\\t92929\\t.\\tCTA\\tC\\t.\\t.\\tSpliceAI=A|TUBB8|0.00|0.00|0.00|0.00|-4|-2|-12|25\");\n            // deletion right on the boundary should be kept\n            writer.WriteLine(\"10\\t92930\\t.\\tCT\\tC\\t.\\t.\\tSpliceAI=A|TUBB8|0.00|0.00|0.00|0.00|-4|-2|-12|25\");\n            // deletion just after the interval should be skipped\n            writer.WriteLine(\"10\\t92961\\t.\\tCT\\tC\\t.\\t.\\tSpliceAI=A|TUBB8|0.00|0.00|0.00|0.00|-4|-2|-12|25\");\n            // deletion right on the interval boundary should be kept\n            writer.WriteLine(\"10\\t92959\\t.\\tCT\\tC\\t.\\t.\\tSpliceAI=A|TUBB8|0.00|0.00|0.00|0.00|-4|-2|-12|25\");\n\n            writer.Flush();\n\n            stream.Position = 0;\n            return stream;\n        }\n        [Fact]\n        public void Parse_deletions()\n        {\n            using (var spliceParser = new SpliceAiParser(GetDeletionStream(), GetSequenceProvider(), GetSpliceIntervals(), GetSpliceToNirvanaGenes()))\n            {\n                var spliceItems = spliceParser.GetItems().ToList();\n\n                Assert.Equal(3, spliceItems.Count);\n                \n            }\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/gnomAD/GnomadGeneScoreTests.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.IO;\nusing System.Linq;\nusing SAUtils.GnomadGeneScores;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.gnomAD\n{\n    public sealed class GnomadGeneScoreTests\n    {\n        private static Stream GetStream()\n        {\n            var stream = new MemoryStream();\n            var writer = new StreamWriter(stream);\n\n            writer.WriteLine(\"gene\\ttranscript\\tobs_mis\\texp_mis\\toe_mis\\tmu_mis\\tpossible_mis\\tobs_mis_pphen\\texp_mis_pphen\\toe_mis_pphen\\tpossible_mis_pphen\\tobs_syn\\texp_syn\\toe_syn\\tmu_syn\\tpossible_syn\\tobs_lof\\tmu_lof\\tpossible_lof\\texp_lof\\tpLI\\tpNull\\tpRec\\toe_lof\\toe_syn_lower\\toe_syn_upper\\toe_mis_lower\\toe_mis_upper\\toe_lof_lower\\toe_lof_upper\\tconstraint_flag\\tsyn_z\\tmis_z\\tlof_z\\toe_lof_upper_rank\\toe_lof_upper_bin\\toe_lof_upper_bin_6\\tn_sites\\tclassic_caf\\tmax_af\\tno_lofs\\tobs_het_lof\\tobs_hom_lof\\tdefined\\tp\\texp_hom_lof\\tclassic_caf_afr\\tclassic_caf_amr\\tclassic_caf_asj\\tclassic_caf_eas\\tclassic_caf_fin\\tclassic_caf_nfe\\tclassic_caf_oth\\tclassic_caf_sas\\tp_afr\\tp_amr\\tp_asj\\tp_eas\\tp_fin\\tp_nfe\\tp_oth\\tp_sas\\ttranscript_type\\tgene_id\\ttranscript_level\\tcds_length\\tnum_coding_exons\\tgene_type\\tgene_length\\texac_pLI\\texac_obs_lof\\texac_exp_lof\\texac_oe_lof\\tbrain_expression\\tchromosome\\tstart_position\\tend_position\");\n            writer.WriteLine(\"MED13\\tENST00000397786\\t871\\t1.1178e+03\\t7.7921e-01\\t5.5598e-05\\t14195\\t314\\t5.2975e+02\\t5.9273e-01\\t6708\\t422\\t3.8753e+02\\t1.0890e+00\\t1.9097e-05\\t4248\\t0\\t4.9203e-06\\t1257\\t9.8429e+01\\t1.0000e+00\\t8.9436e-40\\t1.8383e-16\\t0.0000e+00\\t1.0050e+00\\t1.1800e+00\\t7.3600e-01\\t8.2400e-01\\t0.0000e+00\\t3.0000e-02\\t\\t-1.3765e+00\\t2.6232e+00\\t9.1935e+00\\t0\\t0\\t0\\t2\\t1.2058e-05\\t8.0492e-06\\t124782\\t3\\t0\\t124785\\t1.2021e-05\\t1.8031e-05\\t0.0000e+00\\t0.0000e+00\\t0.0000e+00\\t0.0000e+00\\t9.2812e-05\\t8.8571e-06\\t0.0000e+00\\t0.0000e+00\\t0.0000e+00\\t0.0000e+00\\t0.0000e+00\\t0.0000e+00\\t9.2760e-05\\t8.8276e-06\\t0.0000e+00\\t0.0000e+00\\tprotein_coding\\tENSG00000108510\\t2\\t6522\\t30\\tprotein_coding\\t122678\\t1.0000e+00\\t0\\t6.4393e+01\\t0.0000e+00\\tNA\\t17\\t60019966\\t60142643\");\n            writer.WriteLine(\"NIPBL\\tENST00000282516\\t846\\t1.4415e+03\\t5.8688e-01\\t7.3808e-05\\t18540\\t158\\t5.4310e+02\\t2.9092e-01\\t7135\\t496\\t4.9501e+02\\t1.0020e+00\\t2.4942e-05\\t5211\\t1\\t9.4214e-06\\t1781\\t1.5032e+02\\t1.0000e+00\\t2.9773e-59\\t3.5724e-24\\t6.6527e-03\\t9.3000e-01\\t1.0790e+00\\t5.5400e-01\\t6.2100e-01\\t1.0000e-03\\t3.2000e-02\\t\\t-3.5119e-02\\t5.5737e+00\\t1.1286e+01\\t1\\t0\\t0\\t2\\t1.1943e-05\\t7.9636e-06\\t125693\\t3\\t0\\t125696\\t1.1934e-05\\t1.7901e-05\\t0.0000e+00\\t0.0000e+00\\t9.9246e-05\\t0.0000e+00\\t0.0000e+00\\t0.0000e+00\\t0.0000e+00\\t6.5338e-05\\t0.0000e+00\\t0.0000e+00\\t9.9231e-05\\t0.0000e+00\\t0.0000e+00\\t0.0000e+00\\t0.0000e+00\\t6.5327e-05\\tprotein_coding\\tENSG00000164190\\t2\\t8412\\t46\\tprotein_coding\\t189655\\t1.0000e+00\\t1\\t1.1057e+02\\t9.0443e-03\\tNA\\t5\\t36876861\\t37066515\");\n            writer.WriteLine(\"RPS17\\tENST00000330339\\t0\\t0.0000e+00\\tNaN\\t0.0000e+00\\t879\\t0\\t3.4388e-02\\t0.0000e+00\\t75\\t0\\t0.0000e+00\\tNaN\\t0.0000e+00\\t254\\t0\\t1.7630e-08\\t85\\t4.1103e-01\\t3.2566e-01\\t1.7556e-01\\t4.9878e-01\\t0.0000e+00\\t0.0000e+00\\t1.9000e+00\\t0.0000e+00\\t1.9000e+00\\t0.0000e+00\\t1.8490e+00\\tno_exp_mis|no_exp_syn|no_variants\\tNaN\\tNaN\\t5.9410e-01\\t17933\\t9\\t5\\t0\\t0.0000e+00\\t0.0000e+00\\t0\\t0\\t0\\t0\\t0.0000e+00\\t0.0000e+00\\t0.0000e+00\\t0.0000e+00\\t0.0000e+00\\t0.0000e+00\\t0.0000e+00\\t0.0000e+00\\t0.0000e+00\\t0.0000e+00\\t0.0000e+00\\t0.0000e+00\\t0.0000e+00\\t0.0000e+00\\t0.0000e+00\\t0.0000e+00\\t0.0000e+00\\t0.0000e+00\\tprotein_coding\\tENSG00000184779\\t2\\t405\\t5\\tprotein_coding\\t3815\\tNA\\tNA\\tNA\\tNA\\tNA\\t15\\t82821158\\t82824972\");\n            writer.Flush();\n\n            stream.Position = 0;\n            return stream;\n        }\n\n        [Fact]\n        public void GetItems()\n        {\n            var geneIdToSymbols = new Dictionary<string, string>\n            {\n                {\"ENSG00000108510\", \"MED13\"},\n                {\"ENSG00000164190\", \"NIPBL\"},\n                {\"tENSG00000184779\", \"RPS17\" }\n            };\n            using (var reader = new GnomadGeneParser(new StreamReader(GetStream()), geneIdToSymbols))\n            {\n                var items = reader.GetItems().ToList();\n\n                Assert.Equal(3, items.Count);\n                Assert.Equal(\"{\\\"pLi\\\":1.00e0,\\\"pRec\\\":1.84e-16,\\\"pNull\\\":8.94e-40,\\\"synZ\\\":-1.38e0,\\\"misZ\\\":2.62e0,\\\"loeuf\\\":3.00e-2}\", items[0].Value[0].GetJsonString());\n                Assert.Equal(\"{\\\"pLi\\\":3.26e-1,\\\"pRec\\\":4.99e-1,\\\"pNull\\\":1.76e-1,\\\"loeuf\\\":1.85e0}\", items[2].Value[0].GetJsonString());\n            }\n        }\n\n        private static Stream GetStream_with_duplicate_gene_entries()\n        {\n            var stream = new MemoryStream();\n            var writer = new StreamWriter(stream);\n\n            writer.WriteLine(\"gene\\ttranscript\\tobs_mis\\texp_mis\\toe_mis\\tmu_mis\\tpossible_mis\\tobs_mis_pphen\\texp_mis_pphen\\toe_mis_pphen\\tpossible_mis_pphen\\tobs_syn\\texp_syn\\toe_syn\\tmu_syn\\tpossible_syn\\tobs_lof\\tmu_lof\\tpossible_lof\\texp_lof\\tpLI\\tpNull\\tpRec\\toe_lof\\toe_syn_lower\\toe_syn_upper\\toe_mis_lower\\toe_mis_upper\\toe_lof_lower\\toe_lof_upper\\tconstraint_flag\\tsyn_z\\tmis_z\\tlof_z\\toe_lof_upper_rank\\toe_lof_upper_bin\\toe_lof_upper_bin_6\\tn_sites\\tclassic_caf\\tmax_af\\tno_lofs\\tobs_het_lof\\tobs_hom_lof\\tdefined\\tp\\texp_hom_lof\\tclassic_caf_afr\\tclassic_caf_amr\\tclassic_caf_asj\\tclassic_caf_eas\\tclassic_caf_fin\\tclassic_caf_nfe\\tclassic_caf_oth\\tclassic_caf_sas\\tp_afr\\tp_amr\\tp_asj\\tp_eas\\tp_fin\\tp_nfe\\tp_oth\\tp_sas\\ttranscript_type\\tgene_id\\ttranscript_level\\tcds_length\\tnum_coding_exons\\tgene_type\\tgene_length\\texac_pLI\\texac_obs_lof\\texac_exp_lof\\texac_oe_lof\\tbrain_expression\\tchromosome\\tstart_position\\tend_position\");\n            writer.WriteLine(\"MDGA2\\tENST00000426342\\t306\\t4.0043e+02\\t7.6419e-01\\t2.1096e-05\\t4724\\t78\\t1.6525e+02\\t4.7202e-01\\t1923\\t125\\t1.3737e+02\\t9.0993e-01\\t7.1973e-06\\t1413\\t4\\t2.0926e-06\\t453\\t3.8316e+01\\t9.9922e-01\\t8.6490e-12\\t7.8128e-04\\t1.0440e-01\\t7.8600e-01\\t1.0560e+00\\t6.9500e-01\\t8.4000e-01\\t5.0000e-02\\t2.3900e-01\\t\\t8.2988e-01\\t1.6769e+00\\t5.1372e+00\\t1529\\t0\\t0\\t7\\t2.8103e-05\\t4.0317e-06\\t124784\\t7\\t0\\t124791\\t2.8047e-05\\t9.8167e-05\\t0.0000e+00\\t2.8962e-05\\t0.0000e+00\\t0.0000e+00\\t0.0000e+00\\t3.5391e-05\\t1.6672e-04\\t3.2680e-05\\t0.0000e+00\\t2.8962e-05\\t0.0000e+00\\t0.0000e+00\\t0.0000e+00\\t3.5308e-05\\t1.6492e-04\\t3.2678e-05\\tprotein_coding\\tENSG00000139915\\t2\\t2181\\t13\\tprotein_coding\\t835332\\t9.9322e-01\\t3\\t2.7833e+01\\t1.0779e-01\\tNA\\t14\\t47308826\\t48144157\");\n            writer.WriteLine(\"MDGA2\\tENST00000439988\\t438\\t5.5311e+02\\t7.9189e-01\\t2.9490e-05\\t6608\\t105\\t2.0496e+02\\t5.1228e-01\\t2386\\t180\\t1.9491e+02\\t9.2351e-01\\t9.8371e-06\\t2048\\t11\\t2.8074e-06\\t627\\t5.1882e+01\\t6.6457e-01\\t5.5841e-10\\t3.3543e-01\\t2.1202e-01\\t8.1700e-01\\t1.0450e+00\\t7.3100e-01\\t8.5700e-01\\t1.3200e-01\\t3.5100e-01\\t\\t8.3940e-01\\t1.7393e+00\\t5.2595e+00\\t2989\\t1\\t0\\t9\\t3.6173e-05\\t4.0463e-06\\t124782\\t9\\t0\\t124791\\t3.6061e-05\\t1.6228e-04\\t6.4986e-05\\t2.8962e-05\\t0.0000e+00\\t0.0000e+00\\t0.0000e+00\\t4.4275e-05\\t1.6672e-04\\t3.2680e-05\\t6.4577e-05\\t2.8962e-05\\t0.0000e+00\\t0.0000e+00\\t0.0000e+00\\t4.4135e-05\\t1.6492e-04\\t3.2678e-05\\tprotein_coding\\tENSG00000272781\\t3\\t3075\\t17\\tprotein_coding\\t832866\\tNA\\tNA\\tNA\\tNA\\tNA\\t14\\t47311134\\t48143999\");\n            writer.Flush();\n\n            stream.Position = 0;\n            return stream;\n        }\n\n        [Fact]\n        public void GetNonDuplicateItems()\n        {\n            var geneIdToSymbols = new Dictionary<string, string>\n            {\n                {\"ENST00000426342\", \"MDGA2\"},\n                {\"ENST00000439988\", \"MDGA2\"}\n            };\n            using (var reader = new GnomadGeneParser(new StreamReader(GetStream_with_duplicate_gene_entries()), geneIdToSymbols))\n            {\n                var items = reader.GetItems().ToList();\n\n                Assert.Single(items);\n                Assert.Equal(\"{\\\"pLi\\\":9.99e-1,\\\"pRec\\\":7.81e-4,\\\"pNull\\\":8.65e-12,\\\"synZ\\\":8.30e-1,\\\"misZ\\\":1.68e0,\\\"loeuf\\\":2.39e-1}\", items[0].Value[0].GetJsonString());\n            }\n        }\n        private static Stream GetStream_resolve_without_loeuf()\n        {\n            var stream = new MemoryStream();\n            var writer = new StreamWriter(stream);\n\n            writer.WriteLine(\"gene\\ttranscript\\tobs_mis\\texp_mis\\toe_mis\\tmu_mis\\tpossible_mis\\tobs_mis_pphen\\texp_mis_pphen\\toe_mis_pphen\\tpossible_mis_pphen\\tobs_syn\\texp_syn\\toe_syn\\tmu_syn\\tpossible_syn\\tobs_lof\\tmu_lof\\tpossible_lof\\texp_lof\\tpLI\\tpNull\\tpRec\\toe_lof\\toe_syn_lower\\toe_syn_upper\\toe_mis_lower\\toe_mis_upper\\toe_lof_lower\\toe_lof_upper\\tconstraint_flag\\tsyn_z\\tmis_z\\tlof_z\\toe_lof_upper_rank\\toe_lof_upper_bin\\toe_lof_upper_bin_6\\tn_sites\\tclassic_caf\\tmax_af\\tno_lofs\\tobs_het_lof\\tobs_hom_lof\\tdefined\\tp\\texp_hom_lof\\tclassic_caf_afr\\tclassic_caf_amr\\tclassic_caf_asj\\tclassic_caf_eas\\tclassic_caf_fin\\tclassic_caf_nfe\\tclassic_caf_oth\\tclassic_caf_sas\\tp_afr\\tp_amr\\tp_asj\\tp_eas\\tp_fin\\tp_nfe\\tp_oth\\tp_sas\\ttranscript_type\\tgene_id\\ttranscript_level\\tcds_length\\tnum_coding_exons\\tgene_type\\tgene_length\\texac_pLI\\texac_obs_lof\\texac_exp_lof\\texac_oe_lof\\tbrain_expression\\tchromosome\\tstart_position\\tend_position\");\n            writer.WriteLine(\"NBPF8\\tENST00000369365\\t75\\t3.0353e+01\\t2.4709e+00\\t1.4000e-06\\t24299\\t60\\t2.5873e+01\\t2.3190e+00\\t14469\\t22\\t1.3347e+01\\t1.6483e+00\\t5.9757e-07\\t6838\\t8\\t1.5346e-07\\t3145\\t3.7051e+00\\t1.4181e-07\\t9.6601e-01\\t3.3989e-02\\t2.1592e+00\\t1.1460e+00\\t1.9490e+00\\t1.7400e+00\\t1.9940e+00\\t1.0280e+00\\t1.9670e+00\\tmis_too_many\\t-1.8618e+00\\t-2.8797e+00\\t-2.0676e+00\\t19130\\t9\\t5\\t11\\t8.5551e-05\\t2.3640e-05\\t120533\\t19\\t0\\t120552\\t7.8807e-05\\t7.4870e-04\\t2.1519e-04\\t3.0820e-05\\t0.0000e+00\\t1.6820e-04\\t4.8035e-05\\t1.1691e-04\\t0.0000e+00\\t0.0000e+00\\t2.1446e-04\\t2.9290e-05\\t0.0000e+00\\t1.6411e-04\\t4.7779e-05\\t1.0166e-04\\t0.0000e+00\\t0.0000e+00\\tprotein_coding\\tENSG00000162825\\t2\\t11420\\t91\\tprotein_coding\\t77674\\tNA\\tNA\\tNA\\tNA\\tNA\\t1\\t144146808\\t144224481\");\n            writer.WriteLine(\"NBPF20\\tENST00000369202\\t450\\t1.6927e+02\\t2.6584e+00\\t8.8521e-06\\t29674\\t110\\t2.3246e+01\\t4.7320e+00\\t17017\\t169\\t6.7362e+01\\t2.5089e+00\\t3.9243e-06\\t8313\\t26\\t6.7292e-07\\t3810\\t1.3886e+01\\t1.9221e-22\\t9.9999e-01\\t7.9649e-06\\t1.8724e+00\\t1.8630e+00\\t1.9970e+00\\t1.9490e+00\\t1.9990e+00\\t1.2990e+00\\t1.9710e+00\\tmis_too_many|syn_outlier\\t-9.7346e+00\\t-7.6675e+00\\t-3.0124e+00\\t19150\\t9\\t5\\t22\\t3.0519e-04\\t1.0143e-04\\t125629\\t75\\t0\\t125704\\t2.9836e-04\\t1.1190e-02\\t1.5454e-04\\t2.3190e-04\\t1.0022e-04\\t5.6199e-05\\t7.5773e-04\\t3.3476e-04\\t3.2954e-04\\t2.6141e-04\\t1.2342e-04\\t2.3129e-04\\t9.9211e-05\\t5.4367e-05\\t7.3937e-04\\t3.2537e-04\\t3.2600e-04\\t2.6134e-04\\tprotein_coding\\tENSG00000203832\\t2\\t13863\\t110\\tprotein_coding\\t97258\\t4.4592e-04\\t7\\t1.1898e+01\\t5.8834e-01\\tNA\\t1\\t148250249\\t148347506\");\n            writer.WriteLine(\"FAM231B\\tENST00000601199\\t100\\t6.6880e+01\\t1.4952e+00\\t3.5319e-06\\t1044\\t29\\t1.9078e+01\\t1.5201e+00\\t297\\t40\\t2.6953e+01\\t1.4840e+00\\t1.4161e-06\\t357\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\t1.1490e+00\\t1.8690e+00\\t1.2710e+00\\t1.7630e+00\\tNA\\tNA\\tno_exp_lof\\t-1.9754e+00\\t-1.4391e+00\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tprotein_coding\\tENSG00000268674\\t3\\t507\\t1\\tprotein_coding\\t510\\tNA\\tNA\\tNA\\tNA\\tNA\\t1\\t16865561\\t16866070\");\n            writer.WriteLine(\"FAM231D\\tENST00000369173\\t67\\t7.4600e+01\\t8.9812e-01\\t3.7640e-06\\t963\\t18\\t1.9611e+01\\t9.1786e-01\\t240\\t20\\t2.7120e+01\\t7.3747e-01\\t1.1812e-06\\t318\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\t5.1800e-01\\t1.0720e+00\\t7.3700e-01\\t1.1010e+00\\tNA\\tNA\\tno_exp_lof\\t1.0747e+00\\t3.1268e-01\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tprotein_coding\\tENSG00000203815\\t3\\t507\\t1\\tprotein_coding\\t632\\tNA\\tNA\\tNA\\tNA\\tNA\\t1\\t149675978\\t149676609\");\n            writer.Flush();\n\n            stream.Position = 0;\n            return stream;\n        }\n        [Fact]\n        public void ResolveDuplicateWithout_loeuf()\n        {\n            var geneIdToSymbols = new Dictionary<string, string>\n            {\n                {\"ENSG00000162825\", \"NBPF20\"},\n                {\"ENSG00000203832\", \"NBPF20\"},\n                {\"ENSG00000268674\", \"FAM231D\"},\n                {\"ENSG00000203815\", \"FAM231D\"}\n            };\n            using (var reader = new GnomadGeneParser(new StreamReader(GetStream_resolve_without_loeuf()), geneIdToSymbols))\n            {\n                var items = reader.GetItems().ToList();\n\n                Assert.Equal(2, items.Count);\n                Assert.Equal(\"{\\\"pLi\\\":1.42e-7,\\\"pRec\\\":3.40e-2,\\\"pNull\\\":9.66e-1,\\\"synZ\\\":-1.86e0,\\\"misZ\\\":-2.88e0,\\\"loeuf\\\":1.97e0}\", items[0].Value[0].GetJsonString());\n                Assert.Equal(\"{\\\"synZ\\\":-1.98e0,\\\"misZ\\\":-1.44e0}\", items[1].Value[0].GetJsonString());\n            }\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/gnomAD/GnomadReaderTests.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.IO;\nusing System.Linq;\nusing Genome;\nusing SAUtils;\nusing SAUtils.DataStructures;\nusing SAUtils.gnomAD;\nusing SAUtils.ParseUtils;\nusing UnitTests.TestDataStructures;\nusing UnitTests.TestUtilities;\nusing VariantAnnotation.Interface.SA;\nusing Variants;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.gnomAD\n{\n    public sealed class GnomadReaderTests\n    {\n        private static Stream GetGenomeStream()\n        {\n            var stream = new MemoryStream();\n            var writer = new StreamWriter(stream);\n\n            writer.WriteLine(\"##gnomAD\");\n            writer.WriteLine(\"#CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\");\n            writer.WriteLine(\"chr1\\t10031\\t.\\tT\\tC\\t.\\tAC0;AS_VQSR\\tAC=0;AN=56642;AC_oth=0;AN_oth=782;nhomalt_oth=0;AC_sas=0;AN_sas=1120;nhomalt_sas=0;AC_XX=0;AN_XX=29308;nhomalt_XX=0;AC_fin=0;AN_fin=4326;nhomalt_fin=0;AC_XY=0;AN_XY=27334;nhomalt_XY=0;AC_eas=0;AN_eas=1712;nhomalt_eas=0;AC_amr=0;AN_amr=6420;nhomalt_amr=0;AC_afr=0;AN_afr=14642;nhomalt_afr=0;nhomalt=0;AC_asj=0;AN_asj=1550;nhomalt_asj=0;AC_controls_and_biobanks=0;AN_controls_and_biobanks=11608;AC_nfe=0;AN_nfe=25546;nhomalt_nfe=0;VarDP=35\");\n            writer.WriteLine(\"chr1\\t10114\\trs1570391787\\tT\\tC\\t.\\tAS_VQSR\\tAC=5;AN=22208;AC_oth=0;AN_oth=368;nhomalt_oth=0;AC_sas=0;AN_sas=518;nhomalt_sas=0;AC_XX=4;AN_XX=12336;nhomalt_XX=0;AC_fin=1;AN_fin=888;nhomalt_fin=0;AC_XY=1;AN_XY=9872;nhomalt_XY=0;AC_eas=0;AN_eas=560;nhomalt_eas=0;AC_amr=0;AN_amr=1580;nhomalt_amr=0;AC_afr=1;AN_afr=5362;nhomalt_afr=0;nhomalt=0;AC_asj=0;AN_asj=702;nhomalt_asj=0;AC_controls_and_biobanks=1;AN_controls_and_biobanks=3484;AC_nfe=3;AN_nfe=12078;nhomalt_nfe=0;VarDP=1597\");\n            writer.WriteLine(\"chr1\\t10120\\trs1390810297\\tT\\tC\\t.\\tAC0;AS_VQSR\\tAC=0;AN=34082;AC_oth=0;AN_oth=468;nhomalt_oth=0;AC_sas=0;AN_sas=716;nhomalt_sas=0;AC_XX=0;AN_XX=18954;nhomalt_XX=0;AC_fin=0;AN_fin=1110;nhomalt_fin=0;AC_XY=0;AN_XY=15128;nhomalt_XY=0;AC_eas=0;AN_eas=872;nhomalt_eas=0;AC_amr=0;AN_amr=2280;nhomalt_amr=0;AC_afr=0;AN_afr=8666;nhomalt_afr=0;nhomalt=0;AC_asj=0;AN_asj=1076;nhomalt_asj=0;AC_controls_and_biobanks=0;AN_controls_and_biobanks=4984;AC_nfe=0;AN_nfe=18672;nhomalt_nfe=0;VarDP=1035\");\n\n            writer.Flush();\n\n            stream.Position = 0;\n            return stream;\n        }\n\n\n        [Fact]\n        public void GetItems_test()\n        {\n            var sequence = new SimpleSequence(new string('A', VariantUtils.MaxUpstreamLength) + \"TGTGTTGTTATTCTGTGTGCAT\", 10114 - VariantUtils.MaxUpstreamLength);\n\n            var sequenceProvider = new SimpleSequenceProvider(GenomeAssembly.GRCh38, sequence, ChromosomeUtilities.RefNameToChromosome);\n            var reader           = new StreamReader(GetGenomeStream());\n            var gnomadReader     = new GnomadSnvReader(reader, null, sequenceProvider);\n\n            var items = gnomadReader.GetCombinedItems().ToList();\n\n            Assert.Equal(3, items.Count);\n            Assert.Equal(\"\\\"coverage\\\":0,\\\"failedFilter\\\":true,\\\"allAf\\\":0,\\\"allAn\\\":56642,\\\"allAc\\\":0,\\\"allHc\\\":0,\\\"afrAf\\\":0,\\\"afrAn\\\":14642,\\\"afrAc\\\":0,\\\"afrHc\\\":0,\\\"amrAf\\\":0,\\\"amrAn\\\":6420,\\\"amrAc\\\":0,\\\"amrHc\\\":0,\\\"easAf\\\":0,\\\"easAn\\\":1712,\\\"easAc\\\":0,\\\"easHc\\\":0,\\\"finAf\\\":0,\\\"finAn\\\":4326,\\\"finAc\\\":0,\\\"finHc\\\":0,\\\"nfeAf\\\":0,\\\"nfeAn\\\":25546,\\\"nfeAc\\\":0,\\\"nfeHc\\\":0,\\\"asjAf\\\":0,\\\"asjAn\\\":1550,\\\"asjAc\\\":0,\\\"asjHc\\\":0,\\\"sasAf\\\":0,\\\"sasAn\\\":1120,\\\"sasAc\\\":0,\\\"sasHc\\\":0,\\\"othAf\\\":0,\\\"othAn\\\":782,\\\"othAc\\\":0,\\\"othHc\\\":0,\\\"maleAf\\\":0,\\\"maleAn\\\":27334,\\\"maleAc\\\":0,\\\"maleHc\\\":0,\\\"femaleAf\\\":0,\\\"femaleAn\\\":29308,\\\"femaleAc\\\":0,\\\"femaleHc\\\":0,\\\"controlsAllAf\\\":0,\\\"controlsAllAn\\\":11608,\\\"controlsAllAc\\\":0\", items[0].GetJsonString());\n            Assert.Equal(\"\\\"coverage\\\":0,\\\"failedFilter\\\":true,\\\"allAf\\\":0.000225,\\\"allAn\\\":22208,\\\"allAc\\\":5,\\\"allHc\\\":0,\\\"afrAf\\\":0.000186,\\\"afrAn\\\":5362,\\\"afrAc\\\":1,\\\"afrHc\\\":0,\\\"amrAf\\\":0,\\\"amrAn\\\":1580,\\\"amrAc\\\":0,\\\"amrHc\\\":0,\\\"easAf\\\":0,\\\"easAn\\\":560,\\\"easAc\\\":0,\\\"easHc\\\":0,\\\"finAf\\\":0.001126,\\\"finAn\\\":888,\\\"finAc\\\":1,\\\"finHc\\\":0,\\\"nfeAf\\\":0.000248,\\\"nfeAn\\\":12078,\\\"nfeAc\\\":3,\\\"nfeHc\\\":0,\\\"asjAf\\\":0,\\\"asjAn\\\":702,\\\"asjAc\\\":0,\\\"asjHc\\\":0,\\\"sasAf\\\":0,\\\"sasAn\\\":518,\\\"sasAc\\\":0,\\\"sasHc\\\":0,\\\"othAf\\\":0,\\\"othAn\\\":368,\\\"othAc\\\":0,\\\"othHc\\\":0,\\\"maleAf\\\":0.000101,\\\"maleAn\\\":9872,\\\"maleAc\\\":1,\\\"maleHc\\\":0,\\\"femaleAf\\\":0.000324,\\\"femaleAn\\\":12336,\\\"femaleAc\\\":4,\\\"femaleHc\\\":0,\\\"controlsAllAf\\\":0.000287,\\\"controlsAllAn\\\":3484,\\\"controlsAllAc\\\":1\", items[1].GetJsonString());\n        }\n\n        private static Stream GetConflictingItemsStream()\n        {\n            var stream = new MemoryStream();\n            var writer = new StreamWriter(stream);\n\n            writer.WriteLine(\"##gnomAD\");\n            writer.WriteLine(\"#CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\");\n            writer.WriteLine(\"22\\t16558315\\trs369787349\\tT\\tC,G,T,ACTGGCTGCCTGGCTTG\\t818363\\tAC0;LCR;RF;SEGDUP\\tAC=87,7,0,2;AF=5.30488e-01,4.26829e-02,0.00000e+00,1.21951e-02;AN=164;AC_AFR=31,1,0,2;AC_AMR=3,0,0,0;AC_ASJ=0,0,0,0;AC_EAS=4,0,0,0;AC_FIN=33,5,0,0;AC_NFE=13,1,0,0;AC_OTH=3,0,0,0;AC_Male=40,1,0,0;AC_Female=47,6,0,2;AN_AFR=56;AN_AMR=4;AN_ASJ=0;AN_EAS=6;AN_FIN=64;AN_NFE=28;AN_OTH=6;AN_Male=78;AN_Female=86;AF_AFR=5.53571e-01,1.78571e-02,0.00000e+00,3.57143e-02;AF_AMR=7.50000e-01,0.00000e+00,0.00000e+00,0.00000e+00;AF_ASJ=.,.,.,.;AF_EAS=6.66667e-01,0.00000e+00,0.00000e+00,0.00000e+00;AF_FIN=5.15625e-01,7.81250e-02,0.00000e+00,0.00000e+00;AF_NFE=4.64286e-01,3.57143e-02,0.00000e+00,0.00000e+00;AF_OTH=5.00000e-01,0.00000e+00,0.00000e+00,0.00000e+00;AF_Male=5.12821e-01,1.28205e-02,0.00000e+00,0.00000e+00;AF_Female=5.46512e-01,6.97674e-02,0.00000e+00,2.32558e-02;GC_AFR=3,16,7,0,1,0,0,0,0,0,0,0,0,0,1;GC_AMR=0,1,1,0,0,0,0,0,0,0,0,0,0,0,0;GC_ASJ=0,0,0,0,0,0,0,0,0,0,0,0,0,0,0;GC_EAS=0,2,1,0,0,0,0,0,0,0,0,0,0,0,0;GC_FIN=2,18,6,0,3,1,0,0,0,0,0,0,0,0,0;GC_NFE=3,8,2,0,1,0,0,0,0,0,0,0,0,0,0;GC_OTH=1,1,1,0,0,0,0,0,0,0,0,0,0,0,0;GC_Male=6,23,8,0,1,0,0,0,0,0,0,0,0,0,0;GC_Female=3,23,10,0,4,1,0,0,0,0,0,0,0,0,1;AC_raw=7179,402,23,4;AN_raw=13956;AF_raw=5.14402e-01,2.88048e-02,1.64804e-03,2.86615e-04;GC_raw=2158,1885,2598,68,90,122,3,8,0,6,0,0,0,0,2;GC=9,46,18,0,5,1,0,0,0,0,0,0,0,0,1;AC_POPMAX=3,5,.,2;AN_POPMAX=4,64,.,56;AF_POPMAX=7.50000e-01,7.81250e-02,.,3.57143e-02\");\n            writer.WriteLine(\"22\\t16558315\\trs376808508\\tTAAGCCAGCCAGCCAGCCAAGCTGGCCAAGCCAGACAGGCAGCCAAGCCAACCAAGACACCCAGGCAGCCAAGCCAGC\\tCAAGCCAGCCAGCCAGCCAAGCTGGCCAAGCCAGACAGGCAGCCAAGCCAACCAAGACACCCAGGCAGCCAAGCCAGC,T\\t3.62825e+06\\tLCR;RF;SEGDUP\\tAC=155,1;AF=9.63451e-03,6.21581e-05;AN=16088;AC_AFR=46,1;AC_AMR=6,0;AC_ASJ=1,0;AC_EAS=3,0;AC_FIN=27,0;AC_NFE=67,0;AC_OTH=5,0;AC_Male=83,1;AC_Female=72,0;AN_AFR=3744;AN_AMR=534;AN_ASJ=186;AN_EAS=986;AN_FIN=1770;AN_NFE=8370;AN_OTH=498;AN_Male=8994;AN_Female=7094;AF_AFR=1.22863e-02,2.67094e-04;AF_AMR=1.12360e-02,0.00000e+00;AF_ASJ=5.37634e-03,0.00000e+00;AF_EAS=3.04260e-03,0.00000e+00;AF_FIN=1.52542e-02,0.00000e+00;AF_NFE=8.00478e-03,0.00000e+00;AF_OTH=1.00402e-02,0.00000e+00;AF_Male=9.22837e-03,1.11185e-04;AF_Female=1.01494e-02,0.00000e+00;GC_AFR=602,46,0,1,0,0;GC_AMR=64,6,0,0,0,0;GC_ASJ=20,1,0,0,0,0;GC_EAS=204,3,0,0,0,0;GC_FIN=255,23,2,0,0,0;GC_NFE=1083,51,8,0,0,0;GC_OTH=59,5,0,0,0,0;GC_Male=1304,71,6,1,0,0;GC_Female=983,64,4,0,0,0;AC_raw=413,1;AN_raw=28686;AF_raw=1.43973e-02,3.48602e-05;GC_raw=7802,349,30,1,0,0;GC=2287,135,10,1,0,0;AC_POPMAX=27,1;AN_POPMAX=1770,3744;AF_POPMAX=1.52542e-02,2.67094e-04\");\n\n            writer.Flush();\n\n            stream.Position = 0;\n            return stream;\n        }\n\n        [Fact]\n        public void IdentifyConflictingItems()\n        {\n            var sequence = new SimpleSequence(new string('A', VariantUtils.MaxUpstreamLength) + \"TAAGCCAGCCAGCCAGCCAAGCTGGCCAAGCCAGACAGGCAGCCAAGCCAACCAAGACACCCAGGCAGCCAAGCCAGC\", 16558315 - VariantUtils.MaxUpstreamLength);\n\n            var sequenceProvider = new SimpleSequenceProvider(GenomeAssembly.GRCh38, sequence, ChromosomeUtilities.RefNameToChromosome);\n            var reader           = new StreamReader(GetConflictingItemsStream());\n            var gnomadReader     = new GnomadSnvReader(reader, null, sequenceProvider);\n\n            var items = new List<ISupplementaryDataItem>();\n            foreach (GnomadItem item in gnomadReader.GetCombinedItems())\n            {\n                //item.Trim();\n                if (item.Position == 16558315)\n                    items.Add(item);\n            }\n\n            items = SuppDataUtilities.RemoveConflictingAlleles(items, false);\n\n            //two if the items were removed as conflicting items\n            Assert.Equal(3, items.Count);\n        }\n\n        private static Stream GetShiftingItemsStream()\n        {\n            var stream = new MemoryStream();\n            var writer = new StreamWriter(stream);\n\n            writer.WriteLine(\"##gnomAD\");\n            writer.WriteLine(\"#CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\");\n            writer.WriteLine(\"6\\t157100396\\trs572236007\\tGCGC\\tGCGCCGC,G,GCGCCGCCGC\\t584951.32\\tPASS\\tAC=1218,2,16;AF=5.55505e-02,9.12159e-05,7.29727e-04;AN=21926;BaseQRankSum=-1.09000e-01;ClippingRankSum=0.00000e+00;DP=300864;FS=0.00000e+00;InbreedingCoeff=1.17500e-01;MQ=6.00000e+01;MQRankSum=5.00000e-02;QD=2.04400e+01;ReadPosRankSum=-3.20000e-02;SOR=6.96000e-01;VQSLOD=1.36000e+00;VQSR_culprit=FS;GQ_HIST_ALT=2|51|6|22|28|15|24|50|9|10|13|14|14|30|30|40|21|2|7|1303,0|0|0|0|0|0|0|1|0|0|2|0|2|0|1|1|0|0|0|0,0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|1|0|0|0|18;DP_HIST_ALT=118|405|463|353|224|88|27|9|4|0|0|0|0|0|0|0|0|0|0|0,1|3|3|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0,0|3|3|5|4|3|0|1|0|0|0|0|0|0|0|0|0|0|0|0;AB_HIST_ALT=0|0|2|2|15|66|110|105|196|124|306|173|171|111|49|51|27|13|4|0,0|0|0|1|3|0|1|0|1|0|0|0|0|1|0|0|0|0|0|0,0|0|0|0|0|0|1|0|2|3|3|3|5|2|0|0|0|0|0|0;GQ_HIST_ALL=884|807|422|735|789|518|941|1060|612|1106|1091|578|1371|377|750|327|591|92|328|1739;DP_HIST_ALL=1331|2305|2891|3184|2842|1490|781|244|45|4|1|0|0|0|0|0|0|0|0|0;AB_HIST_ALL=0|0|2|3|18|66|112|105|199|127|309|175|176|115|49|51|27|13|4|0;AC_Male=661,1,6;AC_Female=557,1,10;AN_Male=12450;AN_Female=9476;AF_Male=5.30924e-02,8.03213e-05,4.81928e-04;AF_Female=5.87801e-02,1.05530e-04,1.05530e-03;GC_Male=5587,601,30,1,0,0,6,0,0,0;GC_Female=4205,487,35,1,0,0,10,0,0,0;GC_raw=13401,1525,166,7,0,0,19,0,0,0;AC_raw=1857,7,19;AN_raw=30236;GC=9792,1088,65,2,0,0,16,0,0,0;AF_raw=6.14169e-02,2.31512e-04,6.28390e-04;Hom_AFR=0,0,0;Hom_AMR=1,0,0;Hom_ASJ=0,0,0;Hom_EAS=0,0,0;Hom_FIN=5,0,0;Hom_NFE=56,0,0;Hom_OTH=3,0,0;Hom=65,0,0;Hom_raw=166,0,0;AC_AFR=123,0,15;AC_AMR=8,0,0;AC_ASJ=6,0,0;AC_EAS=0,0,0;AC_FIN=69,0,0;AC_NFE=979,2,1;AC_OTH=33,0,0;AN_AFR=7512;AN_AMR=376;AN_ASJ=206;AN_EAS=1556;AN_FIN=694;AN_NFE=11022;AN_OTH=560;AF_AFR=1.63738e-02,0.00000e+00,1.99681e-03;AF_AMR=2.12766e-02,0.00000e+00,0.00000e+00;AF_ASJ=2.91262e-02,0.00000e+00,0.00000e+00;AF_EAS=0.00000e+00,0.00000e+00,0.00000e+00;AF_FIN=9.94236e-02,0.00000e+00,0.00000e+00;AF_NFE=8.88224e-02,1.81455e-04,9.07276e-05;AF_OTH=5.89286e-02,0.00000e+00,0.00000e+00;POPMAX=FIN,NFE,AFR;AC_POPMAX=69,2,15;AN_POPMAX=694,11022,7512;AF_POPMAX=9.94236e-02,1.81455e-04,1.99681e-03;DP_MEDIAN=13,7,17;DREF_MEDIAN=1.00000e-25,5.31547e-07,3.16228e-39;GQ_MEDIAN=99,60,99;AB_MEDIAN=5.00000e-01,2.77778e-01,5.71429e-01;AS_RF=7.69554e-01,3.84245e-02,8.79158e-01;AS_FilterStatus=PASS,RF,PASS;CSQ=CGCCGC|inframe_insertion|MODERATE|ARID1B|ENSG00000049618|Transcript|ENST00000275248|protein_coding|1/20||ENST00000275248.4:c.1171_1176dupCCGCCG|ENSP00000275248.4:p.Pro391_Pro392dup|1314-1315|1162-1163|388|P/PPP|ccg/cCGCCGCcg|rs766249098|3||1||insertion|1|HGNC|18040|||||ENSP00000275248||G3XAA0|UPI0000231CAD|1|||Low_complexity_(Seg):seg&hmmpanther:PTHR12656&hmmpanther:PTHR12656:SF11|14|||||||||||||||||||||||||||||,CGCCGC|inframe_insertion|MODERATE|ARID1B|ENSG00000049618|Transcript|ENST00000346085|protein_coding|1/20||ENST00000346085.5:c.1345_1350dupCCGCCG|ENSP00000344546.4:p.Pro449_Pro450dup|1337-1338|1336-1337|446|P/PPP|ccg/cCGCCGCcg|rs766249098|3||1||insertion|1|HGNC|18040|YES|||CCDS55072.1|ENSP00000344546|Q8NFD5||UPI000058E4B2|1|||Low_complexity_(Seg):seg&hmmpanther:PTHR12656:SF11&hmmpanther:PTHR12656|14|||||||||||||||||||||||||||||,CGCCGC|inframe_insertion|MODERATE|ARID1B|ENSG00000049618|Transcript|ENST00000350026|protein_coding|1/19||ENST00000350026.5:c.1345_1350dupCCGCCG|ENSP00000055163.7:p.Pro449_Pro450dup|1337-1338|1336-1337|446|P/PPP|ccg/cCGCCGCcg|rs766249098|3||1||insertion|1|HGNC|18040||||CCDS5251.2|ENSP00000055163|Q8NFD5||UPI000058E2EA|1|||Low_complexity_(Seg):seg&hmmpanther:PTHR12656&hmmpanther:PTHR12656:SF11|14|||||||||||||||||||||||||||||,CGCCGC|inframe_insertion|MODERATE|ARID1B|ENSG00000049618|Transcript|ENST00000367148|protein_coding|1/20||ENST00000367148.1:c.1345_1350dupCCGCCG|ENSP00000356116.1:p.Pro449_Pro450dup|1336-1337|1336-1337|446|P/PPP|ccg/cCGCCGCcg|rs766249098|3||1||insertion|1|HGNC|18040|||||ENSP00000356116|Q8NFD5|G3XAA0|UPI000058E4B3|1|||Low_complexity_(Seg):seg&hmmpanther:PTHR12656:SF11&hmmpanther:PTHR12656|14|||||||||||||||||||||||||||||,CGCCGC|upstream_gene_variant|MODIFIER|ARID1B|ENSG00000049618|Transcript|ENST00000414678|protein_coding||||||||||rs766249098|3|167|1|cds_start_NF|insertion|1|HGNC|18040|||||ENSP00000412835||H0Y7H8|UPI0001D3BCFD|1|||||||||||||||||||||||||||||||||,CGCCGC|upstream_gene_variant|MODIFIER|RP11-230C9.2|ENSG00000271551|Transcript|ENST00000603191|lincRNA||||||||||rs766249098|3|2188|-1||insertion|1|Clone_based_vega_gene||YES|||||||||||||||||||||||||||||||||||||||||,CGCCGC|upstream_gene_variant|MODIFIER|RP11-230C9.4|ENSG00000271265|Transcript|ENST00000604082|lincRNA||||||||||rs766249098|3|4603|-1||insertion|1|Clone_based_vega_gene||YES|||||||||||||||||||||||||||||||||||||||||,CGCCGC|downstream_gene_variant|MODIFIER|RP11-230C9.3|ENSG00000270487|Transcript|ENST00000604792|antisense||||||||||rs766249098|3|1061|-1||insertion|1|Clone_based_vega_gene||YES|||||||||||||||||||||||||||||||||||||||||,CGCCGC|downstream_gene_variant|MODIFIER|MIR4466|ENSG00000271899|Transcript|ENST00000606121|miRNA||||||||||rs766249098|3|412|-1||insertion|1|HGNC|41726|YES|||||||||||||||||||||||||||||||||||||||||,-|inframe_deletion|MODERATE|ARID1B|ENSG00000049618|Transcript|ENST00000275248|protein_coding|1/20||ENST00000275248.4:c.1174_1176delCCG|ENSP00000275248.4:p.Pro392del|1312-1314|1160-1162|387-388|AP/A|gCGCcg/gcg|rs766249098|2||1||insertion|1|HGNC|18040|||||ENSP00000275248||G3XAA0|UPI0000231CAD|1|||Low_complexity_(Seg):seg&hmmpanther:PTHR12656&hmmpanther:PTHR12656:SF11|14|||||||||||||||||||||||||||||,-|inframe_deletion|MODERATE|ARID1B|ENSG00000049618|Transcript|ENST00000346085|protein_coding|1/20||ENST00000346085.5:c.1348_1350delCCG|ENSP00000344546.4:p.Pro450del|1335-1337|1334-1336|445-446|AP/A|gCGCcg/gcg|rs766249098|2||1||insertion|1|HGNC|18040|YES|||CCDS55072.1|ENSP00000344546|Q8NFD5||UPI000058E4B2|1|||Low_complexity_(Seg):seg&hmmpanther:PTHR12656:SF11&hmmpanther:PTHR12656|14|||||||||||||||||||||||||||||,-|inframe_deletion|MODERATE|ARID1B|ENSG00000049618|Transcript|ENST00000350026|protein_coding|1/19||ENST00000350026.5:c.1348_1350delCCG|ENSP00000055163.7:p.Pro450del|1335-1337|1334-1336|445-446|AP/A|gCGCcg/gcg|rs766249098|2||1||insertion|1|HGNC|18040||||CCDS5251.2|ENSP00000055163|Q8NFD5||UPI000058E2EA|1|||Low_complexity_(Seg):seg&hmmpanther:PTHR12656&hmmpanther:PTHR12656:SF11|14|||||||||||||||||||||||||||||,-|inframe_deletion|MODERATE|ARID1B|ENSG00000049618|Transcript|ENST00000367148|protein_coding|1/20||ENST00000367148.1:c.1348_1350delCCG|ENSP00000356116.1:p.Pro450del|1334-1336|1334-1336|445-446|AP/A|gCGCcg/gcg|rs766249098|2||1||insertion|1|HGNC|18040|||||ENSP00000356116|Q8NFD5|G3XAA0|UPI000058E4B3|1|||Low_complexity_(Seg):seg&hmmpanther:PTHR12656:SF11&hmmpanther:PTHR12656|14|||||||||||||||||||||||||||||,-|upstream_gene_variant|MODIFIER|ARID1B|ENSG00000049618|Transcript|ENST00000414678|protein_coding||||||||||rs766249098|2|168|1|cds_start_NF|insertion|1|HGNC|18040|||||ENSP00000412835||H0Y7H8|UPI0001D3BCFD|1|||||||||||||||||||||||||||||||||,-|upstream_gene_variant|MODIFIER|RP11-230C9.2|ENSG00000271551|Transcript|ENST00000603191|lincRNA||||||||||rs766249098|2|2186|-1||insertion|1|Clone_based_vega_gene||YES|||||||||||||||||||||||||||||||||||||||||,-|upstream_gene_variant|MODIFIER|RP11-230C9.4|ENSG00000271265|Transcript|ENST00000604082|lincRNA||||||||||rs766249098|2|4601|-1||insertion|1|Clone_based_vega_gene||YES|||||||||||||||||||||||||||||||||||||||||,-|downstream_gene_variant|MODIFIER|RP11-230C9.3|ENSG00000270487|Transcript|ENST00000604792|antisense||||||||||rs766249098|2|1062|-1||insertion|1|Clone_based_vega_gene||YES|||||||||||||||||||||||||||||||||||||||||,-|downstream_gene_variant|MODIFIER|MIR4466|ENSG00000271899|Transcript|ENST00000606121|miRNA||||||||||rs766249098|2|413|-1||insertion|1|HGNC|41726|YES|||||||||||||||||||||||||||||||||||||||||,CGC|inframe_insertion|MODERATE|ARID1B|ENSG00000049618|Transcript|ENST00000275248|protein_coding|1/20||ENST00000275248.4:c.1174_1176dupCCG|ENSP00000275248.4:p.Pro392dup|1314-1315|1162-1163|388|P/PP|ccg/cCGCcg|rs766249098|1||1||insertion|1|HGNC|18040|||||ENSP00000275248||G3XAA0|UPI0000231CAD|1|||Low_complexity_(Seg):seg&hmmpanther:PTHR12656&hmmpanther:PTHR12656:SF11|14|||||||||||||||||||||||||||||,CGC|inframe_insertion|MODERATE|ARID1B|ENSG00000049618|Transcript|ENST00000346085|protein_coding|1/20||ENST00000346085.5:c.1348_1350dupCCG|ENSP00000344546.4:p.Pro450dup|1337-1338|1336-1337|446|P/PP|ccg/cCGCcg|rs766249098|1||1||insertion|1|HGNC|18040|YES|||CCDS55072.1|ENSP00000344546|Q8NFD5||UPI000058E4B2|1|||Low_complexity_(Seg):seg&hmmpanther:PTHR12656:SF11&hmmpanther:PTHR12656|14|||||||||||||||||||||||||||||,CGC|inframe_insertion|MODERATE|ARID1B|ENSG00000049618|Transcript|ENST00000350026|protein_coding|1/19||ENST00000350026.5:c.1348_1350dupCCG|ENSP00000055163.7:p.Pro450dup|1337-1338|1336-1337|446|P/PP|ccg/cCGCcg|rs766249098|1||1||insertion|1|HGNC|18040||||CCDS5251.2|ENSP00000055163|Q8NFD5||UPI000058E2EA|1|||Low_complexity_(Seg):seg&hmmpanther:PTHR12656&hmmpanther:PTHR12656:SF11|14|||||||||||||||||||||||||||||,CGC|inframe_insertion|MODERATE|ARID1B|ENSG00000049618|Transcript|ENST00000367148|protein_coding|1/20||ENST00000367148.1:c.1348_1350dupCCG|ENSP00000356116.1:p.Pro450dup|1336-1337|1336-1337|446|P/PP|ccg/cCGCcg|rs766249098|1||1||insertion|1|HGNC|18040|||||ENSP00000356116|Q8NFD5|G3XAA0|UPI000058E4B3|1|||Low_complexity_(Seg):seg&hmmpanther:PTHR12656:SF11&hmmpanther:PTHR12656|14|||||||||||||||||||||||||||||,CGC|upstream_gene_variant|MODIFIER|ARID1B|ENSG00000049618|Transcript|ENST00000414678|protein_coding||||||||||rs766249098|1|167|1|cds_start_NF|insertion|1|HGNC|18040|||||ENSP00000412835||H0Y7H8|UPI0001D3BCFD|1|||||||||||||||||||||||||||||||||,CGC|upstream_gene_variant|MODIFIER|RP11-230C9.2|ENSG00000271551|Transcript|ENST00000603191|lincRNA||||||||||rs766249098|1|2188|-1||insertion|1|Clone_based_vega_gene||YES|||||||||||||||||||||||||||||||||||||||||,CGC|upstream_gene_variant|MODIFIER|RP11-230C9.4|ENSG00000271265|Transcript|ENST00000604082|lincRNA||||||||||rs766249098|1|4603|-1||insertion|1|Clone_based_vega_gene||YES|||||||||||||||||||||||||||||||||||||||||,CGC|downstream_gene_variant|MODIFIER|RP11-230C9.3|ENSG00000270487|Transcript|ENST00000604792|antisense||||||||||rs766249098|1|1061|-1||insertion|1|Clone_based_vega_gene||YES|||||||||||||||||||||||||||||||||||||||||,CGC|downstream_gene_variant|MODIFIER|MIR4466|ENSG00000271899|Transcript|ENST00000606121|miRNA||||||||||rs766249098|1|412|-1||insertion|1|HGNC|41726|YES|||||||||||||||||||||||||||||||||||||||||,CGCCGC|regulatory_region_variant|MODIFIER|||RegulatoryFeature|ENSR00001231649|promoter||||||||||rs766249098|3||||insertion|1||||||||||||||||||||||||||||||||||||||||||||,-|regulatory_region_variant|MODIFIER|||RegulatoryFeature|ENSR00001231649|promoter||||||||||rs766249098|2||||insertion|1||||||||||||||||||||||||||||||||||||||||||||,CGC|regulatory_region_variant|MODIFIER|||RegulatoryFeature|ENSR00001231649|promoter||||||||||rs766249098|1||||insertion|1||||||||||||||||||||||||||||||||||||||||||||,CGCCGC|TF_binding_site_variant|MODIFIER|||MotifFeature|MA0162.2|||||||||||rs766249098|3||1||insertion|1|||||||||||||||||||||||||||||||||||||Egr1:MA0162.2|5|N|||||,CGCCGC|TF_binding_site_variant|MODIFIER|||MotifFeature|MA0162.2|||||||||||rs766249098|3||1||insertion|1|||||||||||||||||||||||||||||||||||||Egr1:MA0162.2|2|N|||||,CGC|TF_binding_site_variant|MODIFIER|||MotifFeature|MA0162.2|||||||||||rs766249098|1||1||insertion|1|||||||||||||||||||||||||||||||||||||Egr1:MA0162.2|5|N|||||,CGC|TF_binding_site_variant|MODIFIER|||MotifFeature|MA0162.2|||||||||||rs766249098|1||1||insertion|1|||||||||||||||||||||||||||||||||||||Egr1:MA0162.2|2|N|||||,-|TF_binding_site_variant|MODIFIER|||MotifFeature|MA0162.2|||||||||||rs766249098|2||1||insertion|1|||||||||||||||||||||||||||||||||||||Egr1:MA0162.2|2|N|||||,-|TF_binding_site_variant|MODIFIER|||MotifFeature|MA0162.2|||||||||||rs766249098|2||1||insertion|1|||||||||||||||||||||||||||||||||||||Egr1:MA0162.2|-1|N|||||;GC_AFR=3618,123,0,0,0,0,15,0,0,0;GC_AMR=181,6,1,0,0,0,0,0,0,0;GC_ASJ=97,6,0,0,0,0,0,0,0,0;GC_EAS=778,0,0,0,0,0,0,0,0,0;GC_FIN=283,59,5,0,0,0,0,0,0,0;GC_NFE=4585,867,56,2,0,0,1,0,0,0;GC_OTH=250,27,3,0,0,0,0,0,0,0;Hom_Male=30,0,0;Hom_Female=35,0,0\\n\");\n\n            writer.Flush();\n\n            stream.Position = 0;\n            return stream;\n        }\n\n        [Fact]\n        public void LeftShiftingItems()\n        {\n            var sequence = new SimpleSequence(new string('A', VariantUtils.MaxUpstreamLength) + \"GCGCGC\", 157100394 -1 - VariantUtils.MaxUpstreamLength);\n\n            var sequenceProvider = new SimpleSequenceProvider(GenomeAssembly.GRCh38, sequence, ChromosomeUtilities.RefNameToChromosome);\n            var reader           = new StreamReader(GetShiftingItemsStream());\n            var gnomadReader     = new GnomadSnvReader(reader, null, sequenceProvider);\n\n            var items = gnomadReader.GetCombinedItems().ToList();\n            Assert.Equal(3,         items.Count);\n            Assert.Equal(157100397, items[0].Position);\n            Assert.Equal(157100397, items[1].Position);\n            Assert.Equal(157100397, items[2].Position);\n        }\n\n        private static Stream GetChr22ExomeStream()\n        {\n            var stream = new MemoryStream();\n            var writer = new StreamWriter(stream);\n\n            writer.WriteLine(\"##gnomAD\");\n            writer.WriteLine(\"#CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\");\n            writer.WriteLine(\"22\\t15528107\\trs1231414491\\tT\\tC\\t434.31\\tPASS\\tAC=2;AN=57352;AF=3.48724e-05;rf_tp_probability=0.205705;FS=0;InbreedingCoeff=0.0527;MQ=21.67;MQRankSum=0.289;QD=1.65;ReadPosRankSum=0.413;SOR=0.085;BaseQRankSum=0.881;ClippingRankSum=-0.175;DP=1563548;VQSLOD=2.41;VQSR_culprit=QD;segdup;rf_negative_label;rf_label=FP;rf_train;variant_type=multi-snv;allele_type=snv;n_alt_alleles=2;pab_max=0.387695;gq_hist_alt_bin_freq=0|0|0|0|0|1|0|0|0|0|0|0|0|1|0|1|0|0|0|0;gq_hist_all_bin_freq=35633|21017|2883|2706|1410|581|1244|1433|923|2307|2802|1588|3776|2801|1906|2362|2569|822|1708|3862;dp_hist_alt_bin_freq=0|0|1|0|0|1|1|0|0|0|0|0|0|0|0|0|0|0|0|0;dp_hist_alt_n_larger=0;dp_hist_all_bin_freq=126884|6290|2994|4863|7505|6812|4743|2599|957|376|174|63|25|20|15|5|2|2|1|1;dp_hist_all_n_larger=0;ab_hist_alt_bin_freq=0|0|0|1|1|0|1|0|0|0|0|0|0|0|0|0|0|0|0|0;AC_nfe_seu=0;AN_nfe_seu=120;AF_nfe_seu=0;nhomalt_nfe_seu=0;controls_AC_afr_male=0;controls_AN_afr_male=958;controls_AF_afr_male=0;controls_nhomalt_afr_male=0;non_neuro_AC_eas_kor=0;non_neuro_AN_eas_kor=1844;non_neuro_AF_eas_kor=0;non_neuro_nhomalt_eas_kor=0;non_topmed_AC_amr=0;non_topmed_AN_amr=10422;non_topmed_AF_amr=0;non_topmed_nhomalt_amr=0;non_cancer_AC_asj_female=0;non_cancer_AN_asj_female=828;non_cancer_AF_asj_female=0;non_cancer_nhomalt_asj_female=0;AC_raw=3;AN_raw=151718;AF_raw=1.97735e-05;nhomalt_raw=0;AC_fin_female=0;AN_fin_female=840;AF_fin_female=0;nhomalt_fin_female=0;non_cancer_AC_oth_female=0;non_cancer_AN_oth_female=944;non_cancer_AF_oth_female=0;non_cancer_nhomalt_oth_female=0;AC_nfe_bgr=0;AN_nfe_bgr=28;AF_nfe_bgr=0;nhomalt_nfe_bgr=0;non_neuro_AC_asj_female=0;non_neuro_AN_asj_female=324;non_neuro_AF_asj_female=0;non_neuro_nhomalt_asj_female=0;AC_sas_male=0;AN_sas_male=3672;AF_sas_male=0;nhomalt_sas_male=0;non_neuro_AC_afr_male=0;non_neuro_AN_afr_male=2218;non_neuro_AF_afr_male=0;non_neuro_nhomalt_afr_male=0;AC_afr_male=0;AN_afr_male=2234;AF_afr_male=0;nhomalt_afr_male=0;AC_afr=0;AN_afr=5236;AF_afr=0;nhomalt_afr=0;controls_AC_nfe_swe=0;controls_AN_nfe_swe=6;controls_AF_nfe_swe=0;controls_nhomalt_nfe_swe=0;non_neuro_AC_afr_female=0;non_neuro_AN_afr_female=2988;non_neuro_AF_afr_female=0;non_neuro_nhomalt_afr_female=0;non_topmed_AC_amr_female=0;non_topmed_AN_amr_female=5636;non_topmed_AF_amr_female=0;non_topmed_nhomalt_amr_female=0;non_cancer_AC_female=1;non_cancer_AN_female=27774;non_cancer_AF_female=3.60049e-05;non_cancer_nhomalt_female=0;non_cancer_AC_nfe_onf=0;non_cancer_AN_nfe_onf=4264;non_cancer_AF_nfe_onf=0;non_cancer_nhomalt_nfe_onf=0;non_cancer_AC_male=1;non_cancer_AN_male=28730;non_cancer_AF_male=3.48068e-05;non_cancer_nhomalt_male=0;non_topmed_AC_oth_female=0;non_topmed_AN_oth_female=940;non_topmed_AF_oth_female=0;non_topmed_nhomalt_oth_female=0;AC_eas_female=0;AN_eas_female=3794;AF_eas_female=0;nhomalt_eas_female=0;non_cancer_AC_sas_female=0;non_cancer_AN_sas_female=2380;non_cancer_AF_sas_female=0;non_cancer_nhomalt_sas_female=0;AC_afr_female=0;AN_afr_female=3002;AF_afr_female=0;nhomalt_afr_female=0;AC_sas=0;AN_sas=6056;AF_sas=0;nhomalt_sas=0;non_neuro_AC_female=1;non_neuro_AN_female=24066;non_neuro_AF_female=4.15524e-05;non_neuro_nhomalt_female=0;controls_AC_afr=0;controls_AN_afr=2244;controls_AF_afr=0;controls_nhomalt_afr=0;non_neuro_AC_eas_jpn=0;non_neuro_AN_eas_jpn=12;non_neuro_AF_eas_jpn=0;non_neuro_nhomalt_eas_jpn=0;AC_nfe_onf=0;AN_nfe_onf=4602;AF_nfe_onf=0;nhomalt_nfe_onf=0;non_cancer_AC_amr_male=0;non_cancer_AN_amr_male=4802;non_cancer_AF_amr_male=0;non_cancer_nhomalt_amr_male=0;controls_AC_fin_male=0;controls_AN_fin_male=718;controls_AF_fin_male=0;controls_nhomalt_fin_male=0;non_neuro_AC_nfe_nwe=2;non_neuro_AN_nfe_nwe=13330;non_neuro_AF_nfe_nwe=0.000150038;non_neuro_nhomalt_nfe_nwe=0;AC_fin_male=0;AN_fin_male=1076;AF_fin_male=0;nhomalt_fin_male=0;AC_nfe_female=1;AN_nfe_female=10978;AF_nfe_female=9.10913e-05;nhomalt_nfe_female=0;AC_amr=0;AN_amr=10460;AF_amr=0;nhomalt_amr=0;non_topmed_AC_nfe_male=1;non_topmed_AN_nfe_male=10888;non_topmed_AF_nfe_male=9.18442e-05;non_topmed_nhomalt_nfe_male=0;non_neuro_AC_sas=0;non_neuro_AN_sas=6050;non_neuro_AF_sas=0;non_neuro_nhomalt_sas=0;non_cancer_AC_fin_male=0;non_cancer_AN_fin_male=1076;non_cancer_AF_fin_male=0;non_cancer_nhomalt_fin_male=0;non_cancer_AC_nfe_seu=0;non_cancer_AN_nfe_seu=58;non_cancer_AF_nfe_seu=0;non_cancer_nhomalt_nfe_seu=0;AC_eas=0;AN_eas=7828;AF_eas=0;nhomalt_eas=0;nhomalt=0;non_neuro_AC_nfe_female=1;non_neuro_AN_nfe_female=9114;non_neuro_AF_nfe_female=0.000109721;non_neuro_nhomalt_nfe_female=0;non_neuro_AC_afr=0;non_neuro_AN_afr=5206;non_neuro_AF_afr=0;non_neuro_nhomalt_afr=0;controls_AC_raw=0;controls_AN_raw=68088;controls_AF_raw=0;controls_nhomalt_raw=0;non_cancer_AC_eas=0;non_cancer_AN_eas=7814;non_cancer_AF_eas=0;non_cancer_nhomalt_eas=0;non_cancer_AC_amr_female=0;non_cancer_AN_amr_female=5648;non_cancer_AF_amr_female=0;non_cancer_nhomalt_amr_female=0;non_neuro_AC_nfe_swe=0;non_neuro_AN_nfe_swe=66;non_neuro_AF_nfe_swe=0;non_neuro_nhomalt_nfe_swe=0;controls_AC_male=0;controls_AN_male=11462;controls_AF_male=0;controls_nhomalt_male=0;non_topmed_AC_male=1;non_topmed_AN_male=28306;non_topmed_AF_male=3.53282e-05;non_topmed_nhomalt_male=0;controls_AC_eas_jpn=0;controls_AN_eas_jpn=12;controls_AF_eas_jpn=0;controls_nhomalt_eas_jpn=0;controls_AC_nfe_female=0;controls_AN_nfe_female=3742;controls_AF_nfe_female=0;controls_nhomalt_nfe_female=0;non_neuro_AC_amr=0;non_neuro_AN_amr=7252;non_neuro_AF_amr=0;non_neuro_nhomalt_amr=0;non_neuro_AC_eas_female=0;non_neuro_AN_eas_female=3788;non_neuro_AF_eas_female=0;non_neuro_nhomalt_eas_female=0;AC_asj_male=0;AN_asj_male=694;AF_asj_male=0;nhomalt_asj_male=0;controls_AC_nfe_male=0;controls_AN_nfe_male=3332;controls_AF_nfe_male=0;controls_nhomalt_nfe_male=0;non_neuro_AC_fin=0;non_neuro_AN_fin=1690;non_neuro_AF_fin=0;non_neuro_nhomalt_fin=0;non_topmed_AC_sas=0;non_topmed_AN_sas=6056;non_topmed_AF_sas=0;non_topmed_nhomalt_sas=0;non_cancer_AC_nfe_female=1;non_cancer_AN_nfe_female=10408;non_cancer_AF_nfe_female=9.60799e-05;non_cancer_nhomalt_nfe_female=0;AC_oth_female=0;AN_oth_female=962;AF_oth_female=0;nhomalt_oth_female=0;non_cancer_AC_asj=0;non_cancer_AN_asj=1506;non_cancer_AF_asj=0;non_cancer_nhomalt_asj=0;AC_nfe_swe=0;AN_nfe_swe=78;AF_nfe_swe=0;nhomalt_nfe_swe=0;controls_AC_nfe=0;controls_AN_nfe=7074;controls_AF_nfe=0;controls_nhomalt_nfe=0;controls_AC_oth_female=0;controls_AN_oth_female=358;controls_AF_oth_female=0;controls_nhomalt_oth_female=0;controls_AC_asj=0;controls_AN_asj=276;controls_AF_asj=0;controls_nhomalt_asj=0;non_neuro_AC_amr_male=0;non_neuro_AN_amr_male=3272;non_neuro_AF_amr_male=0;non_neuro_nhomalt_amr_male=0;controls_AC_nfe_nwe=0;controls_AN_nfe_nwe=5804;controls_AF_nfe_nwe=0;controls_nhomalt_nfe_nwe=0;AC_nfe_nwe=2;AN_nfe_nwe=17428;AF_nfe_nwe=0.000114758;nhomalt_nfe_nwe=0;controls_AC_nfe_seu=0;controls_AN_nfe_seu=8;controls_AF_nfe_seu=0;controls_nhomalt_nfe_seu=0;controls_AC_sas_female=0;controls_AN_sas_female=1110;controls_AF_sas_female=0;controls_nhomalt_sas_female=0;non_neuro_AC_amr_female=0;non_neuro_AN_amr_female=3980;non_neuro_AF_amr_female=0;non_neuro_nhomalt_amr_female=0;non_cancer_AC_eas_jpn=0;non_cancer_AN_eas_jpn=12;non_cancer_AF_eas_jpn=0;non_cancer_nhomalt_eas_jpn=0;non_neuro_AC_nfe_onf=0;non_neuro_AN_nfe_onf=3664;non_neuro_AF_nfe_onf=0;non_neuro_nhomalt_nfe_onf=0;non_topmed_AC_eas_male=0;non_topmed_AN_eas_male=4032;non_topmed_AF_eas_male=0;non_topmed_nhomalt_eas_male=0;AC_eas_jpn=0;AN_eas_jpn=12;AF_eas_jpn=0;nhomalt_eas_jpn=0;non_cancer_AC_afr_male=0;non_cancer_AN_afr_male=2224;non_cancer_AF_afr_male=0;non_cancer_nhomalt_afr_male=0;non_cancer_AC_afr=0;non_cancer_AN_afr=5168;non_cancer_AF_afr=0;non_cancer_nhomalt_afr=0;controls_AC_amr_female=0;controls_AN_amr_female=3004;controls_AF_amr_female=0;controls_nhomalt_amr_female=0;non_neuro_AC_fin_male=0;non_neuro_AN_fin_male=1004;non_neuro_AF_fin_male=0;non_neuro_nhomalt_fin_male=0;AC_female=1;AN_female=28482;AF_female=3.51099e-05;nhomalt_female=0;non_neuro_AC_nfe_bgr=0;non_neuro_AN_nfe_bgr=12;non_neuro_AF_nfe_bgr=0;non_neuro_nhomalt_nfe_bgr=0;non_neuro_AC_oth_male=0;non_neuro_AN_oth_male=744;non_neuro_AF_oth_male=0;non_neuro_nhomalt_oth_male=0;non_topmed_AC_nfe_est=0;non_topmed_AN_nfe_est=66;non_topmed_AF_nfe_est=0;non_topmed_nhomalt_nfe_est=0;non_topmed_AC_nfe_nwe=2;non_topmed_AN_nfe_nwe=16722;non_topmed_AF_nfe_nwe=0.000119603;non_topmed_nhomalt_nfe_nwe=0;non_topmed_AC_amr_male=0;non_topmed_AN_amr_male=4786;non_topmed_AF_amr_male=0;non_topmed_nhomalt_amr_male=0;non_cancer_AC_amr=0;non_cancer_AN_amr=10450;non_cancer_AF_amr=0;non_cancer_nhomalt_amr=0;non_topmed_AC_nfe_swe=0;non_topmed_AN_nfe_swe=78;non_topmed_AF_nfe_swe=0;non_topmed_nhomalt_nfe_swe=0;non_topmed_AC_nfe_onf=0;non_topmed_AN_nfe_onf=4310;non_topmed_AF_nfe_onf=0;non_topmed_nhomalt_nfe_onf=0;controls_AC_eas_kor=0;controls_AN_eas_kor=918;controls_AF_eas_kor=0;controls_nhomalt_eas_kor=0;non_topmed_AC_eas_oea=0;non_topmed_AN_eas_oea=5968;non_topmed_AF_eas_oea=0;non_topmed_nhomalt_eas_oea=0;controls_AC_eas_male=0;controls_AN_eas_male=1922;controls_AF_eas_male=0;controls_nhomalt_eas_male=0;controls_AC_oth_male=0;controls_AN_oth_male=340;controls_AF_oth_male=0;controls_nhomalt_oth_male=0;non_topmed_AC=2;non_topmed_AN=56086;non_topmed_AF=3.56595e-05;non_topmed_nhomalt=0;controls_AC_fin=0;controls_AN_fin=1248;controls_AF_fin=0;controls_nhomalt_fin=0;AC_eas_kor=0;AN_eas_kor=1846;AF_eas_kor=0;nhomalt_eas_kor=0;non_neuro_AC_nfe=2;non_neuro_AN_nfe=17204;non_neuro_AF_nfe=0.000116252;non_neuro_nhomalt_nfe=0;non_neuro_AC_fin_female=0;non_neuro_AN_fin_female=686;non_neuro_AF_fin_female=0;non_neuro_nhomalt_fin_female=0;non_cancer_AC_nfe_male=1;non_cancer_AN_nfe_male=11238;non_cancer_AF_nfe_male=8.89838e-05;non_cancer_nhomalt_nfe_male=0;controls_AC_eas_oea=0;controls_AN_eas_oea=2942;controls_AF_eas_oea=0;controls_nhomalt_eas_oea=0;non_topmed_AC_nfe_seu=0;non_topmed_AN_nfe_seu=120;non_topmed_AF_nfe_seu=0;non_topmed_nhomalt_nfe_seu=0;controls_AC_eas_female=0;controls_AN_eas_female=1950;controls_AF_eas_female=0;controls_nhomalt_eas_female=0;non_topmed_AC_asj=0;non_topmed_AN_asj=1528;non_topmed_AF_asj=0;non_topmed_nhomalt_asj=0;controls_AC_nfe_onf=0;controls_AN_nfe_onf=1234;controls_AF_nfe_onf=0;controls_nhomalt_nfe_onf=0;non_neuro_AC=2;non_neuro_AN=47470;non_neuro_AF=4.21319e-05;non_neuro_nhomalt=0;AC_eas_oea=0;AN_eas_oea=5970;AF_eas_oea=0;nhomalt_eas_oea=0;non_topmed_AC_nfe=2;non_topmed_AN_nfe=21324;non_topmed_AF_nfe=9.3791e-05;non_topmed_nhomalt_nfe=0;non_cancer_AC_oth=0;non_cancer_AN_oth=1952;non_cancer_AF_oth=0;non_cancer_nhomalt_oth=0;non_topmed_AC_raw=3;non_topmed_AN_raw=148374;non_topmed_AF_raw=2.02192e-05;non_topmed_nhomalt_raw=0;non_neuro_AC_nfe_est=0;non_neuro_AN_nfe_est=60;non_neuro_AF_nfe_est=0;non_neuro_nhomalt_nfe_est=0;non_topmed_AC_oth_male=0;non_topmed_AN_oth_male=990;non_topmed_AF_oth_male=0;non_topmed_nhomalt_oth_male=0;non_cancer_AC_oth_male=0;non_cancer_AN_oth_male=1008;non_cancer_AF_oth_male=0;non_cancer_nhomalt_oth_male=0;AC_nfe_est=0;AN_nfe_est=70;AF_nfe_est=0;nhomalt_nfe_est=0;non_cancer_AC_afr_female=0;non_cancer_AN_afr_female=2944;non_cancer_AF_afr_female=0;non_cancer_nhomalt_afr_female=0;non_topmed_AC_afr_male=0;non_topmed_AN_afr_male=2182;non_topmed_AF_afr_male=0;non_topmed_nhomalt_afr_male=0;AC_eas_male=0;AN_eas_male=4034;AF_eas_male=0;nhomalt_eas_male=0;controls_AC_eas=0;controls_AN_eas=3872;controls_AF_eas=0;controls_nhomalt_eas=0;non_neuro_AC_eas_male=0;non_neuro_AN_eas_male=4034;non_neuro_AF_eas_male=0;non_neuro_nhomalt_eas_male=0;non_cancer_AC_nfe_nwe=2;non_cancer_AN_nfe_nwe=17224;non_cancer_AF_nfe_nwe=0.000116117;non_cancer_nhomalt_nfe_nwe=0;controls_AC_sas=0;controls_AN_sas=2804;controls_AF_sas=0;controls_nhomalt_sas=0;non_neuro_AC_sas_male=0;non_neuro_AN_sas_male=3666;non_neuro_AF_sas_male=0;non_neuro_nhomalt_sas_male=0;non_neuro_AC_asj_male=0;non_neuro_AN_asj_male=376;non_neuro_AF_asj_male=0;non_neuro_nhomalt_asj_male=0;non_cancer_AC_nfe_bgr=0;non_cancer_AN_nfe_bgr=20;non_cancer_AF_nfe_bgr=0;non_cancer_nhomalt_nfe_bgr=0;controls_AC_oth=0;controls_AN_oth=698;controls_AF_oth=0;controls_nhomalt_oth=0;non_cancer_AC_eas_female=0;non_cancer_AN_eas_female=3782;non_cancer_AF_eas_female=0;non_cancer_nhomalt_eas_female=0;AC_nfe=2;AN_nfe=22326;AF_nfe=8.95817e-05;nhomalt_nfe=0;non_topmed_AC_female=1;non_topmed_AN_female=27780;non_topmed_AF_female=3.59971e-05;non_topmed_nhomalt_female=0;non_neuro_AC_asj=0;non_neuro_AN_asj=700;non_neuro_AF_asj=0;non_neuro_nhomalt_asj=0;non_topmed_AC_eas_female=0;non_topmed_AN_eas_female=3794;non_topmed_AF_eas_female=0;non_topmed_nhomalt_eas_female=0;non_neuro_AC_raw=2;non_neuro_AN_raw=125778;non_neuro_AF_raw=1.5901e-05;non_neuro_nhomalt_raw=0;non_topmed_AC_eas=0;non_topmed_AN_eas=7826;non_topmed_AF_eas=0;non_topmed_nhomalt_eas=0;non_topmed_AC_fin_male=0;non_topmed_AN_fin_male=1076;non_topmed_AF_fin_male=0;non_topmed_nhomalt_fin_male=0;non_cancer_AC_asj_male=0;non_cancer_AN_asj_male=678;non_cancer_AF_asj_male=0;non_cancer_nhomalt_asj_male=0;AC_fin=0;AN_fin=1916;AF_fin=0;nhomalt_fin=0;AC_nfe_male=1;AN_nfe_male=11348;AF_nfe_male=8.81213e-05;nhomalt_nfe_male=0;non_topmed_AC_eas_kor=0;non_topmed_AN_eas_kor=1846;non_topmed_AF_eas_kor=0;non_topmed_nhomalt_eas_kor=0;controls_AC_amr_male=0;controls_AN_amr_male=2346;controls_AF_amr_male=0;controls_nhomalt_amr_male=0;non_neuro_AC_eas_oea=0;non_neuro_AN_eas_oea=5966;non_neuro_AF_eas_oea=0;non_neuro_nhomalt_eas_oea=0;AC_sas_female=0;AN_sas_female=2384;AF_sas_female=0;nhomalt_sas_female=0;controls_AC_afr_female=0;controls_AN_afr_female=1286;controls_AF_afr_female=0;controls_nhomalt_afr_female=0;controls_AC_amr=0;controls_AN_amr=5350;controls_AF_amr=0;controls_nhomalt_amr=0;non_topmed_AC_eas_jpn=0;non_topmed_AN_eas_jpn=12;non_topmed_AF_eas_jpn=0;non_topmed_nhomalt_eas_jpn=0;AC_asj_female=0;AN_asj_female=864;AF_asj_female=0;nhomalt_asj_female=0;non_topmed_AC_nfe_bgr=0;non_topmed_AN_nfe_bgr=28;non_topmed_AF_nfe_bgr=0;non_topmed_nhomalt_nfe_bgr=0;non_cancer_AC_nfe_est=0;non_cancer_AN_nfe_est=62;non_cancer_AF_nfe_est=0;non_cancer_nhomalt_nfe_est=0;non_neuro_AC_eas=0;non_neuro_AN_eas=7822;non_neuro_AF_eas=0;non_neuro_nhomalt_eas=0;non_cancer_AC_nfe=2;non_cancer_AN_nfe=21646;non_cancer_AF_nfe=9.23958e-05;non_cancer_nhomalt_nfe=0;non_neuro_AC_male=1;non_neuro_AN_male=23404;non_neuro_AF_male=4.27277e-05;non_neuro_nhomalt_male=0;non_neuro_AC_sas_female=0;non_neuro_AN_sas_female=2384;non_neuro_AF_sas_female=0;non_neuro_nhomalt_sas_female=0;AC_asj=0;AN_asj=1558;AF_asj=0;nhomalt_asj=0;controls_AC_nfe_est=0;controls_AN_nfe_est=22;controls_AF_nfe_est=0;controls_nhomalt_nfe_est=0;non_topmed_AC_asj_female=0;non_topmed_AN_asj_female=848;non_topmed_AF_asj_female=0;non_topmed_nhomalt_asj_female=0;non_cancer_AC_nfe_swe=0;non_cancer_AN_nfe_swe=18;non_cancer_AF_nfe_swe=0;non_cancer_nhomalt_nfe_swe=0;non_cancer_AC=2;non_cancer_AN=56504;non_cancer_AF=3.53957e-05;non_cancer_nhomalt=0;non_topmed_AC_oth=0;non_topmed_AN_oth=1930;non_topmed_AF_oth=0;non_topmed_nhomalt_oth=0;non_topmed_AC_fin_female=0;non_topmed_AN_fin_female=838;non_topmed_AF_fin_female=0;non_topmed_nhomalt_fin_female=0;non_cancer_AC_fin_female=0;non_cancer_AN_fin_female=840;non_cancer_AF_fin_female=0;non_cancer_nhomalt_fin_female=0;AC_oth=0;AN_oth=1972;AF_oth=0;nhomalt_oth=0;non_neuro_AC_nfe_male=1;non_neuro_AN_nfe_male=8090;non_neuro_AF_nfe_male=0.000123609;non_neuro_nhomalt_nfe_male=0;controls_AC_female=0;controls_AN_female=12104;controls_AF_female=0;controls_nhomalt_female=0;non_cancer_AC_fin=0;non_cancer_AN_fin=1916;non_cancer_AF_fin=0;non_cancer_nhomalt_fin=0;non_topmed_AC_fin=0;non_topmed_AN_fin=1914;non_topmed_AF_fin=0;non_topmed_nhomalt_fin=0;non_cancer_AC_eas_oea=0;non_cancer_AN_eas_oea=5956;non_cancer_AF_eas_oea=0;non_cancer_nhomalt_eas_oea=0;non_topmed_AC_nfe_female=1;non_topmed_AN_nfe_female=10436;non_topmed_AF_nfe_female=9.58222e-05;non_topmed_nhomalt_nfe_female=0;non_cancer_AC_sas_male=0;non_cancer_AN_sas_male=3672;non_cancer_AF_sas_male=0;non_cancer_nhomalt_sas_male=0;controls_AC_asj_male=0;controls_AN_asj_male=152;controls_AF_asj_male=0;controls_nhomalt_asj_male=0;non_cancer_AC_raw=3;non_cancer_AN_raw=142294;non_cancer_AF_raw=2.10831e-05;non_cancer_nhomalt_raw=0;non_cancer_AC_eas_male=0;non_cancer_AN_eas_male=4032;non_cancer_AF_eas_male=0;non_cancer_nhomalt_eas_male=0;non_topmed_AC_asj_male=0;non_topmed_AN_asj_male=680;non_topmed_AF_asj_male=0;non_topmed_nhomalt_asj_male=0;non_neuro_AC_oth=0;non_neuro_AN_oth=1546;non_neuro_AF_oth=0;non_neuro_nhomalt_oth=0;AC_male=1;AN_male=28870;AF_male=3.4638e-05;nhomalt_male=0;controls_AC_fin_female=0;controls_AN_fin_female=530;controls_AF_fin_female=0;controls_nhomalt_fin_female=0;controls_AC_nfe_bgr=0;controls_AN_nfe_bgr=0;controls_nhomalt_nfe_bgr=0;controls_AC_asj_female=0;controls_AN_asj_female=124;controls_AF_asj_female=0;controls_nhomalt_asj_female=0;AC_amr_male=0;AN_amr_male=4802;AF_amr_male=0;nhomalt_amr_male=0;AC_amr_female=0;AN_amr_female=5658;AF_amr_female=0;nhomalt_amr_female=0;non_topmed_AC_sas_male=0;non_topmed_AN_sas_male=3672;non_topmed_AF_sas_male=0;non_topmed_nhomalt_sas_male=0;AC_oth_male=0;AN_oth_male=1010;AF_oth_male=0;nhomalt_oth_male=0;non_cancer_AC_sas=0;non_cancer_AN_sas=6052;non_cancer_AF_sas=0;non_cancer_nhomalt_sas=0;non_neuro_AC_nfe_seu=0;non_neuro_AN_nfe_seu=72;non_neuro_AF_nfe_seu=0;non_neuro_nhomalt_nfe_seu=0;non_cancer_AC_eas_kor=0;non_cancer_AN_eas_kor=1846;non_cancer_AF_eas_kor=0;non_cancer_nhomalt_eas_kor=0;non_topmed_AC_afr_female=0;non_topmed_AN_afr_female=2904;non_topmed_AF_afr_female=0;non_topmed_nhomalt_afr_female=0;controls_AC_sas_male=0;controls_AN_sas_male=1694;controls_AF_sas_male=0;controls_nhomalt_sas_male=0;non_topmed_AC_sas_female=0;non_topmed_AN_sas_female=2384;non_topmed_AF_sas_female=0;non_topmed_nhomalt_sas_female=0;non_topmed_AC_afr=0;non_topmed_AN_afr=5086;non_topmed_AF_afr=0;non_topmed_nhomalt_afr=0;controls_AC=0;controls_AN=23566;controls_AF=0;controls_nhomalt=0;non_neuro_AC_oth_female=0;non_neuro_AN_oth_female=802;non_neuro_AF_oth_female=0;non_neuro_nhomalt_oth_female=0;non_topmed_faf95_amr=0;non_topmed_faf99_amr=0;faf95_afr=0;faf99_afr=0;faf95_sas=0;faf99_sas=0;controls_faf95_afr=0;controls_faf99_afr=0;faf95_amr=0;faf99_amr=0;non_neuro_faf95_sas=0;non_neuro_faf99_sas=0;faf95_eas=0;faf99_eas=0;faf95=5.78e-06;faf99=6.16e-06;non_neuro_faf95_afr=0;non_neuro_faf99_afr=0;non_cancer_faf95_eas=0;non_cancer_faf99_eas=0;non_neuro_faf95_amr=0;non_neuro_faf99_amr=0;non_topmed_faf95_sas=0;non_topmed_faf99_sas=0;controls_faf95_nfe=0;controls_faf99_nfe=0;non_cancer_faf95_afr=0;non_cancer_faf99_afr=0;non_cancer_faf95_amr=0;non_cancer_faf99_amr=0;non_topmed_faf95=5.91e-06;non_topmed_faf99=6.21e-06;non_neuro_faf95_nfe=2.021e-05;non_neuro_faf99_nfe=2.018e-05;non_neuro_faf95=6.98e-06;non_neuro_faf99=6.61e-06;non_topmed_faf95_nfe=1.651e-05;non_topmed_faf99_nfe=1.58e-05;controls_faf95_eas=0;controls_faf99_eas=0;controls_faf95_sas=0;controls_faf99_sas=0;faf95_nfe=1.581e-05;faf99_nfe=1.554e-05;non_topmed_faf95_eas=0;non_topmed_faf99_eas=0;controls_faf95_amr=0;controls_faf99_amr=0;non_neuro_faf95_eas=0;non_neuro_faf99_eas=0;non_cancer_faf95_nfe=1.628e-05;non_cancer_faf99_nfe=1.571e-05;non_cancer_faf95=5.87e-06;non_cancer_faf99=6.2e-06;non_cancer_faf95_sas=0;non_cancer_faf99_sas=0;non_topmed_faf95_afr=0;non_topmed_faf99_afr=0;controls_faf95=0;controls_faf99=0;popmax=nfe;AC_popmax=2;AN_popmax=22326;AF_popmax=8.95817e-05;nhomalt_popmax=0;age_hist_het_bin_freq=0|0|0|0|0|0|0|0|0|0;age_hist_het_n_smaller=0;age_hist_het_n_larger=0;age_hist_hom_bin_freq=0|0|0|0|0|0|0|0|0|0;age_hist_hom_n_smaller=0;age_hist_hom_n_larger=0;non_topmed_popmax=nfe;non_topmed_AC_popmax=2;non_topmed_AN_popmax=21324;non_topmed_AF_popmax=9.3791e-05;non_topmed_nhomalt_popmax=0;non_neuro_popmax=nfe;non_neuro_AC_popmax=2;non_neuro_AN_popmax=17204;non_neuro_AF_popmax=0.000116252;non_neuro_nhomalt_popmax=0;non_cancer_popmax=nfe;non_cancer_AC_popmax=2;non_cancer_AN_popmax=21646;non_cancer_AF_popmax=9.23958e-05;non_cancer_nhomalt_popmax=0\");\n            writer.WriteLine(\"22\\t15528109\\trs755148717\\tT\\tG\\t137.61\\tRF\\tAC=7;AN=57430;AF=0.000121888;rf_tp_probability=0.0196743;FS=0;InbreedingCoeff=-0.0742;MQ=21.11;MQRankSum=-0.938;QD=0.39;ReadPosRankSum=0.505;SOR=0.108;BaseQRankSum=-1.754;ClippingRankSum=0.331;DP=1566925;VQSLOD=3.54;VQSR_culprit=QD;segdup;rf_negative_label;rf_label=FP;rf_train;variant_type=snv;allele_type=snv;n_alt_alleles=1;pab_max=1;gq_hist_alt_bin_freq=0|5|3|5|1|5|2|2|1|1|2|1|0|0|1|0|0|0|0|0;gq_hist_all_bin_freq=36157|22748|3213|2957|1501|619|1254|1305|872|2125|2649|1489|4471|1984|2560|1805|3116|433|2157|4076;dp_hist_alt_bin_freq=8|8|3|4|4|0|2|0|0|0|0|0|0|0|0|0|0|0|0|0;dp_hist_alt_n_larger=0;dp_hist_all_bin_freq=126796|6316|2994|4736|7636|6775|4792|2642|957|379|174|63|25|20|15|5|2|2|1|1;dp_hist_all_n_larger=0;ab_hist_alt_bin_freq=0|0|2|4|5|4|0|0|3|0|2|1|0|3|0|2|0|0|0|0;AC_nfe_seu=0;AN_nfe_seu=124;AF_nfe_seu=0;nhomalt_nfe_seu=0;controls_AC_afr_male=0;controls_AN_afr_male=940;controls_AF_afr_male=0;controls_nhomalt_afr_male=0;non_neuro_AC_eas_kor=0;non_neuro_AN_eas_kor=1846;non_neuro_AF_eas_kor=0;non_neuro_nhomalt_eas_kor=0;non_topmed_AC_amr=0;non_topmed_AN_amr=10454;non_topmed_AF_amr=0;non_topmed_nhomalt_amr=0;non_cancer_AC_asj_female=0;non_cancer_AN_asj_female=830;non_cancer_AF_asj_female=0;non_cancer_nhomalt_asj_female=0;AC_raw=29;AN_raw=156464;AF_raw=0.000185346;nhomalt_raw=2;AC_fin_female=0;AN_fin_female=862;AF_fin_female=0;nhomalt_fin_female=0;non_cancer_AC_oth_female=0;non_cancer_AN_oth_female=946;non_cancer_AF_oth_female=0;non_cancer_nhomalt_oth_female=0;AC_nfe_bgr=0;AN_nfe_bgr=28;AF_nfe_bgr=0;nhomalt_nfe_bgr=0;non_neuro_AC_asj_female=0;non_neuro_AN_asj_female=328;non_neuro_AF_asj_female=0;non_neuro_nhomalt_asj_female=0;AC_sas_male=0;AN_sas_male=3676;AF_sas_male=0;nhomalt_sas_male=0;non_neuro_AC_afr_male=1;non_neuro_AN_afr_male=2182;non_neuro_AF_afr_male=0.000458295;non_neuro_nhomalt_afr_male=0;AC_afr_male=1;AN_afr_male=2198;AF_afr_male=0.000454959;nhomalt_afr_male=0;AC_afr=7;AN_afr=5128;AF_afr=0.00136505;nhomalt_afr=0;controls_AC_nfe_swe=0;controls_AN_nfe_swe=6;controls_AF_nfe_swe=0;controls_nhomalt_nfe_swe=0;non_neuro_AC_afr_female=6;non_neuro_AN_afr_female=2916;non_neuro_AF_afr_female=0.00205761;non_neuro_nhomalt_afr_female=0;non_topmed_AC_amr_female=0;non_topmed_AN_amr_female=5658;non_topmed_AF_amr_female=0;non_topmed_nhomalt_amr_female=0;non_cancer_AC_female=6;non_cancer_AN_female=27772;non_cancer_AF_female=0.000216045;non_cancer_nhomalt_female=0;non_cancer_AC_nfe_onf=0;non_cancer_AN_nfe_onf=4272;non_cancer_AF_nfe_onf=0;non_cancer_nhomalt_nfe_onf=0;non_cancer_AC_male=1;non_cancer_AN_male=28776;non_cancer_AF_male=3.47512e-05;non_cancer_nhomalt_male=0;non_topmed_AC_oth_female=0;non_topmed_AN_oth_female=942;non_topmed_AF_oth_female=0;non_topmed_nhomalt_oth_female=0;AC_eas_female=0;AN_eas_female=3796;AF_eas_female=0;nhomalt_eas_female=0;non_cancer_AC_sas_female=0;non_cancer_AN_sas_female=2380;non_cancer_AF_sas_female=0;non_cancer_nhomalt_sas_female=0;AC_afr_female=6;AN_afr_female=2930;AF_afr_female=0.00204778;nhomalt_afr_female=0;AC_sas=0;AN_sas=6060;AF_sas=0;nhomalt_sas=0;non_neuro_AC_female=6;non_neuro_AN_female=24070;non_neuro_AF_female=0.000249273;non_neuro_nhomalt_female=0;controls_AC_afr=3;controls_AN_afr=2194;controls_AF_afr=0.00136737;controls_nhomalt_afr=0;non_neuro_AC_eas_jpn=0;non_neuro_AN_eas_jpn=12;non_neuro_AF_eas_jpn=0;non_neuro_nhomalt_eas_jpn=0;AC_nfe_onf=0;AN_nfe_onf=4622;AF_nfe_onf=0;nhomalt_nfe_onf=0;non_cancer_AC_amr_male=0;non_cancer_AN_amr_male=4812;non_cancer_AF_amr_male=0;non_cancer_nhomalt_amr_male=0;controls_AC_fin_male=0;controls_AN_fin_male=748;controls_AF_fin_male=0;controls_nhomalt_fin_male=0;non_neuro_AC_nfe_nwe=0;non_neuro_AN_nfe_nwe=13348;non_neuro_AF_nfe_nwe=0;non_neuro_nhomalt_nfe_nwe=0;AC_fin_male=0;AN_fin_male=1106;AF_fin_male=0;nhomalt_fin_male=0;AC_nfe_female=0;AN_nfe_female=11018;AF_nfe_female=0;nhomalt_nfe_female=0;AC_amr=0;AN_amr=10492;AF_amr=0;nhomalt_amr=0;non_topmed_AC_nfe_male=0;non_topmed_AN_nfe_male=10926;non_topmed_AF_nfe_male=0;non_topmed_nhomalt_nfe_male=0;non_neuro_AC_sas=0;non_neuro_AN_sas=6054;non_neuro_AF_sas=0;non_neuro_nhomalt_sas=0;non_cancer_AC_fin_male=0;non_cancer_AN_fin_male=1106;non_cancer_AF_fin_male=0;non_cancer_nhomalt_fin_male=0;non_cancer_AC_nfe_seu=0;non_cancer_AN_nfe_seu=60;non_cancer_AF_nfe_seu=0;non_cancer_nhomalt_nfe_seu=0;AC_eas=0;AN_eas=7834;AF_eas=0;nhomalt_eas=0;nhomalt=0;non_neuro_AC_nfe_female=0;non_neuro_AN_nfe_female=9140;non_neuro_AF_nfe_female=0;non_neuro_nhomalt_nfe_female=0;non_neuro_AC_afr=7;non_neuro_AN_afr=5098;non_neuro_AF_afr=0.00137309;non_neuro_nhomalt_afr=0;controls_AC_raw=14;controls_AN_raw=70130;controls_AF_raw=0.000199629;controls_nhomalt_raw=1;non_cancer_AC_eas=0;non_cancer_AN_eas=7820;non_cancer_AF_eas=0;non_cancer_nhomalt_eas=0;non_cancer_AC_amr_female=0;non_cancer_AN_amr_female=5670;non_cancer_AF_amr_female=0;non_cancer_nhomalt_amr_female=0;non_neuro_AC_nfe_swe=0;non_neuro_AN_nfe_swe=68;non_neuro_AF_nfe_swe=0;non_neuro_nhomalt_nfe_swe=0;controls_AC_male=0;controls_AN_male=11500;controls_AF_male=0;controls_nhomalt_male=0;non_topmed_AC_male=1;non_topmed_AN_male=28366;non_topmed_AF_male=3.52535e-05;non_topmed_nhomalt_male=0;controls_AC_eas_jpn=0;controls_AN_eas_jpn=12;controls_AF_eas_jpn=0;controls_nhomalt_eas_jpn=0;controls_AC_nfe_female=0;controls_AN_nfe_female=3742;controls_AF_nfe_female=0;controls_nhomalt_nfe_female=0;non_neuro_AC_amr=0;non_neuro_AN_amr=7284;non_neuro_AF_amr=0;non_neuro_nhomalt_amr=0;non_neuro_AC_eas_female=0;non_neuro_AN_eas_female=3790;non_neuro_AF_eas_female=0;non_neuro_nhomalt_eas_female=0;AC_asj_male=0;AN_asj_male=698;AF_asj_male=0;nhomalt_asj_male=0;controls_AC_nfe_male=0;controls_AN_nfe_male=3336;controls_AF_nfe_male=0;controls_nhomalt_nfe_male=0;non_neuro_AC_fin=0;non_neuro_AN_fin=1742;non_neuro_AF_fin=0;non_neuro_nhomalt_fin=0;non_topmed_AC_sas=0;non_topmed_AN_sas=6060;non_topmed_AF_sas=0;non_topmed_nhomalt_sas=0;non_cancer_AC_nfe_female=0;non_cancer_AN_nfe_female=10426;non_cancer_AF_nfe_female=0;non_cancer_nhomalt_nfe_female=0;AC_oth_female=0;AN_oth_female=964;AF_oth_female=0;nhomalt_oth_female=0;non_cancer_AC_asj=0;non_cancer_AN_asj=1512;non_cancer_AF_asj=0;non_cancer_nhomalt_asj=0;AC_nfe_swe=0;AN_nfe_swe=82;AF_nfe_swe=0;nhomalt_nfe_swe=0;controls_AC_nfe=0;controls_AN_nfe=7078;controls_AF_nfe=0;controls_nhomalt_nfe=0;controls_AC_oth_female=0;controls_AN_oth_female=360;controls_AF_oth_female=0;controls_nhomalt_oth_female=0;controls_AC_asj=0;controls_AN_asj=276;controls_AF_asj=0;controls_nhomalt_asj=0;non_neuro_AC_amr_male=0;non_neuro_AN_amr_male=3284;non_neuro_AF_amr_male=0;non_neuro_nhomalt_amr_male=0;controls_AC_nfe_nwe=0;controls_AN_nfe_nwe=5808;controls_AF_nfe_nwe=0;controls_nhomalt_nfe_nwe=0;AC_nfe_nwe=0;AN_nfe_nwe=17476;AF_nfe_nwe=0;nhomalt_nfe_nwe=0;controls_AC_nfe_seu=0;controls_AN_nfe_seu=8;controls_AF_nfe_seu=0;controls_nhomalt_nfe_seu=0;controls_AC_sas_female=0;controls_AN_sas_female=1110;controls_AF_sas_female=0;controls_nhomalt_sas_female=0;non_neuro_AC_amr_female=0;non_neuro_AN_amr_female=4000;non_neuro_AF_amr_female=0;non_neuro_nhomalt_amr_female=0;non_cancer_AC_eas_jpn=0;non_cancer_AN_eas_jpn=12;non_cancer_AF_eas_jpn=0;non_cancer_nhomalt_eas_jpn=0;non_neuro_AC_nfe_onf=0;non_neuro_AN_nfe_onf=3678;non_neuro_AF_nfe_onf=0;non_neuro_nhomalt_nfe_onf=0;non_topmed_AC_eas_male=0;non_topmed_AN_eas_male=4036;non_topmed_AF_eas_male=0;non_topmed_nhomalt_eas_male=0;AC_eas_jpn=0;AN_eas_jpn=12;AF_eas_jpn=0;nhomalt_eas_jpn=0;non_cancer_AC_afr_male=1;non_cancer_AN_afr_male=2186;non_cancer_AF_afr_male=0.000457457;non_cancer_nhomalt_afr_male=0;non_cancer_AC_afr=7;non_cancer_AN_afr=5060;non_cancer_AF_afr=0.0013834;non_cancer_nhomalt_afr=0;controls_AC_amr_female=0;controls_AN_amr_female=3012;controls_AF_amr_female=0;controls_nhomalt_amr_female=0;non_neuro_AC_fin_male=0;non_neuro_AN_fin_male=1034;non_neuro_AF_fin_male=0;non_neuro_nhomalt_fin_male=0;AC_female=6;AN_female=28502;AF_female=0.000210512;nhomalt_female=0;non_neuro_AC_nfe_bgr=0;non_neuro_AN_nfe_bgr=12;non_neuro_AF_nfe_bgr=0;non_neuro_nhomalt_nfe_bgr=0;non_neuro_AC_oth_male=0;non_neuro_AN_oth_male=748;non_neuro_AF_oth_male=0;non_neuro_nhomalt_oth_male=0;non_topmed_AC_nfe_est=0;non_topmed_AN_nfe_est=66;non_topmed_AF_nfe_est=0;non_topmed_nhomalt_nfe_est=0;non_topmed_AC_nfe_nwe=0;non_topmed_AN_nfe_nwe=16772;non_topmed_AF_nfe_nwe=0;non_topmed_nhomalt_nfe_nwe=0;non_topmed_AC_amr_male=0;non_topmed_AN_amr_male=4796;non_topmed_AF_amr_male=0;non_topmed_nhomalt_amr_male=0;non_cancer_AC_amr=0;non_cancer_AN_amr=10482;non_cancer_AF_amr=0;non_cancer_nhomalt_amr=0;non_topmed_AC_nfe_swe=0;non_topmed_AN_nfe_swe=82;non_topmed_AF_nfe_swe=0;non_topmed_nhomalt_nfe_swe=0;non_topmed_AC_nfe_onf=0;non_topmed_AN_nfe_onf=4330;non_topmed_AF_nfe_onf=0;non_topmed_nhomalt_nfe_onf=0;controls_AC_eas_kor=0;controls_AN_eas_kor=920;controls_AF_eas_kor=0;controls_nhomalt_eas_kor=0;non_topmed_AC_eas_oea=0;non_topmed_AN_eas_oea=5972;non_topmed_AF_eas_oea=0;non_topmed_nhomalt_eas_oea=0;controls_AC_eas_male=0;controls_AN_eas_male=1924;controls_AF_eas_male=0;controls_nhomalt_eas_male=0;controls_AC_oth_male=0;controls_AN_oth_male=342;controls_AF_oth_male=0;controls_nhomalt_oth_male=0;non_topmed_AC=7;non_topmed_AN=56168;non_topmed_AF=0.000124626;non_topmed_nhomalt=0;controls_AC_fin=0;controls_AN_fin=1300;controls_AF_fin=0;controls_nhomalt_fin=0;AC_eas_kor=0;AN_eas_kor=1848;AF_eas_kor=0;nhomalt_eas_kor=0;non_neuro_AC_nfe=0;non_neuro_AN_nfe=17240;non_neuro_AF_nfe=0;non_neuro_nhomalt_nfe=0;non_neuro_AC_fin_female=0;non_neuro_AN_fin_female=708;non_neuro_AF_fin_female=0;non_neuro_nhomalt_fin_female=0;non_cancer_AC_nfe_male=0;non_cancer_AN_nfe_male=11268;non_cancer_AF_nfe_male=0;non_cancer_nhomalt_nfe_male=0;controls_AC_eas_oea=0;controls_AN_eas_oea=2942;controls_AF_eas_oea=0;controls_nhomalt_eas_oea=0;non_topmed_AC_nfe_seu=0;non_topmed_AN_nfe_seu=124;non_topmed_AF_nfe_seu=0;non_topmed_nhomalt_nfe_seu=0;controls_AC_eas_female=0;controls_AN_eas_female=1950;controls_AF_eas_female=0;controls_nhomalt_eas_female=0;non_topmed_AC_asj=0;non_topmed_AN_asj=1536;non_topmed_AF_asj=0;non_topmed_nhomalt_asj=0;controls_AC_nfe_onf=0;controls_AN_nfe_onf=1234;controls_AF_nfe_onf=0;controls_nhomalt_nfe_onf=0;non_neuro_AC=7;non_neuro_AN=47502;non_neuro_AF=0.000147362;non_neuro_nhomalt=0;AC_eas_oea=0;AN_eas_oea=5974;AF_eas_oea=0;nhomalt_eas_oea=0;non_topmed_AC_nfe=0;non_topmed_AN_nfe=21402;non_topmed_AF_nfe=0;non_topmed_nhomalt_nfe=0;non_cancer_AC_oth=0;non_cancer_AN_oth=1956;non_cancer_AF_oth=0;non_cancer_nhomalt_oth=0;non_topmed_AC_raw=25;non_topmed_AN_raw=153024;non_topmed_AF_raw=0.000163373;non_topmed_nhomalt_raw=2;non_neuro_AC_nfe_est=0;non_neuro_AN_nfe_est=60;non_neuro_AF_nfe_est=0;non_neuro_nhomalt_nfe_est=0;non_topmed_AC_oth_male=0;non_topmed_AN_oth_male=996;non_topmed_AF_oth_male=0;non_topmed_nhomalt_oth_male=0;non_cancer_AC_oth_male=0;non_cancer_AN_oth_male=1010;non_cancer_AF_oth_male=0;non_cancer_nhomalt_oth_male=0;AC_nfe_est=0;AN_nfe_est=70;AF_nfe_est=0;nhomalt_nfe_est=0;non_cancer_AC_afr_female=6;non_cancer_AN_afr_female=2874;non_cancer_AF_afr_female=0.00208768;non_cancer_nhomalt_afr_female=0;non_topmed_AC_afr_male=1;non_topmed_AN_afr_male=2146;non_topmed_AF_afr_male=0.000465983;non_topmed_nhomalt_afr_male=0;AC_eas_male=0;AN_eas_male=4038;AF_eas_male=0;nhomalt_eas_male=0;controls_AC_eas=0;controls_AN_eas=3874;controls_AF_eas=0;controls_nhomalt_eas=0;non_neuro_AC_eas_male=0;non_neuro_AN_eas_male=4038;non_neuro_AF_eas_male=0;non_neuro_nhomalt_eas_male=0;non_cancer_AC_nfe_nwe=0;non_cancer_AN_nfe_nwe=17260;non_cancer_AF_nfe_nwe=0;non_cancer_nhomalt_nfe_nwe=0;controls_AC_sas=0;controls_AN_sas=2808;controls_AF_sas=0;controls_nhomalt_sas=0;non_neuro_AC_sas_male=0;non_neuro_AN_sas_male=3670;non_neuro_AF_sas_male=0;non_neuro_nhomalt_sas_male=0;non_neuro_AC_asj_male=0;non_neuro_AN_asj_male=376;non_neuro_AF_asj_male=0;non_neuro_nhomalt_asj_male=0;non_cancer_AC_nfe_bgr=0;non_cancer_AN_nfe_bgr=20;non_cancer_AF_nfe_bgr=0;non_cancer_nhomalt_nfe_bgr=0;controls_AC_oth=0;controls_AN_oth=702;controls_AF_oth=0;controls_nhomalt_oth=0;non_cancer_AC_eas_female=0;non_cancer_AN_eas_female=3784;non_cancer_AF_eas_female=0;non_cancer_nhomalt_eas_female=0;AC_nfe=0;AN_nfe=22402;AF_nfe=0;nhomalt_nfe=0;non_topmed_AC_female=6;non_topmed_AN_female=27802;non_topmed_AF_female=0.000215812;non_topmed_nhomalt_female=0;non_neuro_AC_asj=0;non_neuro_AN_asj=704;non_neuro_AF_asj=0;non_neuro_nhomalt_asj=0;non_topmed_AC_eas_female=0;non_topmed_AN_eas_female=3796;non_topmed_AF_eas_female=0;non_topmed_nhomalt_eas_female=0;non_neuro_AC_raw=28;non_neuro_AN_raw=129652;non_neuro_AF_raw=0.000215963;non_neuro_nhomalt_raw=2;non_topmed_AC_eas=0;non_topmed_AN_eas=7832;non_topmed_AF_eas=0;non_topmed_nhomalt_eas=0;non_topmed_AC_fin_male=0;non_topmed_AN_fin_male=1106;non_topmed_AF_fin_male=0;non_topmed_nhomalt_fin_male=0;non_cancer_AC_asj_male=0;non_cancer_AN_asj_male=682;non_cancer_AF_asj_male=0;non_cancer_nhomalt_asj_male=0;AC_fin=0;AN_fin=1968;AF_fin=0;nhomalt_fin=0;AC_nfe_male=0;AN_nfe_male=11384;AF_nfe_male=0;nhomalt_nfe_male=0;non_topmed_AC_eas_kor=0;non_topmed_AN_eas_kor=1848;non_topmed_AF_eas_kor=0;non_topmed_nhomalt_eas_kor=0;controls_AC_amr_male=0;controls_AN_amr_male=2360;controls_AF_amr_male=0;controls_nhomalt_amr_male=0;non_neuro_AC_eas_oea=0;non_neuro_AN_eas_oea=5970;non_neuro_AF_eas_oea=0;non_neuro_nhomalt_eas_oea=0;AC_sas_female=0;AN_sas_female=2384;AF_sas_female=0;nhomalt_sas_female=0;controls_AC_afr_female=3;controls_AN_afr_female=1254;controls_AF_afr_female=0.00239234;controls_nhomalt_afr_female=0;controls_AC_amr=0;controls_AN_amr=5372;controls_AF_amr=0;controls_nhomalt_amr=0;non_topmed_AC_eas_jpn=0;non_topmed_AN_eas_jpn=12;non_topmed_AF_eas_jpn=0;non_topmed_nhomalt_eas_jpn=0;AC_asj_female=0;AN_asj_female=868;AF_asj_female=0;nhomalt_asj_female=0;non_topmed_AC_nfe_bgr=0;non_topmed_AN_nfe_bgr=28;non_topmed_AF_nfe_bgr=0;non_topmed_nhomalt_nfe_bgr=0;non_cancer_AC_nfe_est=0;non_cancer_AN_nfe_est=62;non_cancer_AF_nfe_est=0;non_cancer_nhomalt_nfe_est=0;non_neuro_AC_eas=0;non_neuro_AN_eas=7828;non_neuro_AF_eas=0;non_neuro_nhomalt_eas=0;non_cancer_AC_nfe=0;non_cancer_AN_nfe=21694;non_cancer_AF_nfe=0;non_cancer_nhomalt_nfe=0;non_neuro_AC_male=1;non_neuro_AN_male=23432;non_neuro_AF_male=4.26767e-05;non_neuro_nhomalt_male=0;non_neuro_AC_sas_female=0;non_neuro_AN_sas_female=2384;non_neuro_AF_sas_female=0;non_neuro_nhomalt_sas_female=0;AC_asj=0;AN_asj=1566;AF_asj=0;nhomalt_asj=0;controls_AC_nfe_est=0;controls_AN_nfe_est=22;controls_AF_nfe_est=0;controls_nhomalt_nfe_est=0;non_topmed_AC_asj_female=0;non_topmed_AN_asj_female=852;non_topmed_AF_asj_female=0;non_topmed_nhomalt_asj_female=0;non_cancer_AC_nfe_swe=0;non_cancer_AN_nfe_swe=20;non_cancer_AF_nfe_swe=0;non_cancer_nhomalt_nfe_swe=0;non_cancer_AC=7;non_cancer_AN=56548;non_cancer_AF=0.000123789;non_cancer_nhomalt=0;non_topmed_AC_oth=0;non_topmed_AN_oth=1938;non_topmed_AF_oth=0;non_topmed_nhomalt_oth=0;non_topmed_AC_fin_female=0;non_topmed_AN_fin_female=860;non_topmed_AF_fin_female=0;non_topmed_nhomalt_fin_female=0;non_cancer_AC_fin_female=0;non_cancer_AN_fin_female=862;non_cancer_AF_fin_female=0;non_cancer_nhomalt_fin_female=0;AC_oth=0;AN_oth=1980;AF_oth=0;nhomalt_oth=0;non_neuro_AC_nfe_male=0;non_neuro_AN_nfe_male=8100;non_neuro_AF_nfe_male=0;non_neuro_nhomalt_nfe_male=0;controls_AC_female=3;controls_AN_female=12104;controls_AF_female=0.000247852;controls_nhomalt_female=0;non_cancer_AC_fin=0;non_cancer_AN_fin=1968;non_cancer_AF_fin=0;non_cancer_nhomalt_fin=0;non_topmed_AC_fin=0;non_topmed_AN_fin=1966;non_topmed_AF_fin=0;non_topmed_nhomalt_fin=0;non_cancer_AC_eas_oea=0;non_cancer_AN_eas_oea=5960;non_cancer_AF_eas_oea=0;non_cancer_nhomalt_eas_oea=0;non_topmed_AC_nfe_female=0;non_topmed_AN_nfe_female=10476;non_topmed_AF_nfe_female=0;non_topmed_nhomalt_nfe_female=0;non_cancer_AC_sas_male=0;non_cancer_AN_sas_male=3676;non_cancer_AF_sas_male=0;non_cancer_nhomalt_sas_male=0;controls_AC_asj_male=0;controls_AN_asj_male=152;controls_AF_asj_male=0;controls_nhomalt_asj_male=0;non_cancer_AC_raw=26;non_cancer_AN_raw=146718;non_cancer_AF_raw=0.000177211;non_cancer_nhomalt_raw=2;non_cancer_AC_eas_male=0;non_cancer_AN_eas_male=4036;non_cancer_AF_eas_male=0;non_cancer_nhomalt_eas_male=0;non_topmed_AC_asj_male=0;non_topmed_AN_asj_male=684;non_topmed_AF_asj_male=0;non_topmed_nhomalt_asj_male=0;non_neuro_AC_oth=0;non_neuro_AN_oth=1552;non_neuro_AF_oth=0;non_neuro_nhomalt_oth=0;AC_male=1;AN_male=28928;AF_male=3.45686e-05;nhomalt_male=0;controls_AC_fin_female=0;controls_AN_fin_female=552;controls_AF_fin_female=0;controls_nhomalt_fin_female=0;controls_AC_nfe_bgr=0;controls_AN_nfe_bgr=0;controls_nhomalt_nfe_bgr=0;controls_AC_asj_female=0;controls_AN_asj_female=124;controls_AF_asj_female=0;controls_nhomalt_asj_female=0;AC_amr_male=0;AN_amr_male=4812;AF_amr_male=0;nhomalt_amr_male=0;AC_amr_female=0;AN_amr_female=5680;AF_amr_female=0;nhomalt_amr_female=0;non_topmed_AC_sas_male=0;non_topmed_AN_sas_male=3676;non_topmed_AF_sas_male=0;non_topmed_nhomalt_sas_male=0;AC_oth_male=0;AN_oth_male=1016;AF_oth_male=0;nhomalt_oth_male=0;non_cancer_AC_sas=0;non_cancer_AN_sas=6056;non_cancer_AF_sas=0;non_cancer_nhomalt_sas=0;non_neuro_AC_nfe_seu=0;non_neuro_AN_nfe_seu=74;non_neuro_AF_nfe_seu=0;non_neuro_nhomalt_nfe_seu=0;non_cancer_AC_eas_kor=0;non_cancer_AN_eas_kor=1848;non_cancer_AF_eas_kor=0;non_cancer_nhomalt_eas_kor=0;non_topmed_AC_afr_female=6;non_topmed_AN_afr_female=2834;non_topmed_AF_afr_female=0.00211715;non_topmed_nhomalt_afr_female=0;controls_AC_sas_male=0;controls_AN_sas_male=1698;controls_AF_sas_male=0;controls_nhomalt_sas_male=0;non_topmed_AC_sas_female=0;non_topmed_AN_sas_female=2384;non_topmed_AF_sas_female=0;non_topmed_nhomalt_sas_female=0;non_topmed_AC_afr=7;non_topmed_AN_afr=4980;non_topmed_AF_afr=0.00140562;non_topmed_nhomalt_afr=0;controls_AC=3;controls_AN=23604;controls_AF=0.000127097;controls_nhomalt=0;non_neuro_AC_oth_female=0;non_neuro_AN_oth_female=804;non_neuro_AF_oth_female=0;non_neuro_nhomalt_oth_female=0;non_topmed_faf95_amr=0;non_topmed_faf99_amr=0;faf95_afr=0.0006402;faf99_afr=0.00064042;faf95_sas=0;faf99_sas=0;controls_faf95_afr=0.00037237;controls_faf99_afr=0.00037266;faf95_amr=0;faf99_amr=0;non_neuro_faf95_sas=0;non_neuro_faf99_sas=0;faf95_eas=0;faf99_eas=0;faf95=5.65e-05;faf99=5.643e-05;non_neuro_faf95_afr=0.00064349;non_neuro_faf99_afr=0.00064347;non_cancer_faf95_eas=0;non_cancer_faf99_eas=0;non_neuro_faf95_amr=0;non_neuro_faf99_amr=0;non_topmed_faf95_sas=0;non_topmed_faf99_sas=0;controls_faf95_nfe=0;controls_faf99_nfe=0;non_cancer_faf95_afr=0.00064865;non_cancer_faf99_afr=0.00064914;non_cancer_faf95_amr=0;non_cancer_faf99_amr=0;non_topmed_faf95=5.763e-05;non_topmed_faf99=5.815e-05;non_neuro_faf95_nfe=0;non_neuro_faf99_nfe=0;non_neuro_faf95=6.903e-05;non_neuro_faf99=6.818e-05;non_topmed_faf95_nfe=0;non_topmed_faf99_nfe=0;controls_faf95_eas=0;controls_faf99_eas=0;controls_faf95_sas=0;controls_faf99_sas=0;faf95_nfe=0;faf99_nfe=0;non_topmed_faf95_eas=0;non_topmed_faf99_eas=0;controls_faf95_amr=0;controls_faf99_amr=0;non_neuro_faf95_eas=0;non_neuro_faf99_eas=0;non_cancer_faf95_nfe=0;non_cancer_faf99_nfe=0;non_cancer_faf95=5.729e-05;non_cancer_faf99=5.793e-05;non_cancer_faf95_sas=0;non_cancer_faf99_sas=0;non_topmed_faf95_afr=0.00065872;non_topmed_faf99_afr=0.0006589;controls_faf95=3.464e-05;controls_faf99=3.431e-05;popmax=afr;AC_popmax=7;AN_popmax=5128;AF_popmax=0.00136505;nhomalt_popmax=0;age_hist_het_bin_freq=0|0|0|1|1|0|1|1|1|2;age_hist_het_n_smaller=0;age_hist_het_n_larger=0;age_hist_hom_bin_freq=0|0|0|0|0|0|0|0|0|0;age_hist_hom_n_smaller=0;age_hist_hom_n_larger=0;non_topmed_popmax=afr;non_topmed_AC_popmax=7;non_topmed_AN_popmax=4980;non_topmed_AF_popmax=0.00140562;non_topmed_nhomalt_popmax=0;non_neuro_popmax=afr;non_neuro_AC_popmax=7;non_neuro_AN_popmax=5098;non_neuro_AF_popmax=0.00137309;non_neuro_nhomalt_popmax=0;non_cancer_popmax=afr;non_cancer_AC_popmax=7;non_cancer_AN_popmax=5060;non_cancer_AF_popmax=0.0013834;non_cancer_nhomalt_popmax=0;controls_popmax=afr;controls_AC_popmax=3;controls_AN_popmax=2194;controls_AF_popmax=0.00136737;controls_nhomalt_popmax=0\");\n            writer.WriteLine(\"22\\t15528127\\trs1458314445\\tGA\\tA\\t274.09\\tRF\\tAC=2;AN=60120;AF=3.32668e-05;rf_tp_probability=0.178222;FS=0;InbreedingCoeff=-0.0715;MQ=21.62;MQRankSum=1.31;QD=6.53;ReadPosRankSum=-0.261;SOR=0.237;BaseQRankSum=1.6;ClippingRankSum=0.933;DP=1659589;VQSLOD=-0.4257;VQSR_culprit=FS;segdup;rf_negative_label;rf_label=FP;rf_train;variant_type=indel;allele_type=del;n_alt_alleles=1;pab_max=0.189247;gq_hist_alt_bin_freq=0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|2;gq_hist_all_bin_freq=36581|32253|5753|5266|2344|885|1349|1351|748|1737|2154|1284|4459|1376|2844|1696|3341|644|2633|6374;dp_hist_alt_bin_freq=0|0|0|0|2|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0;dp_hist_alt_n_larger=0;dp_hist_all_bin_freq=121880|9604|3553|4752|7628|6874|5091|3164|1075|401|173|65|25|20|15|5|2|2|1|1;dp_hist_all_n_larger=0;ab_hist_alt_bin_freq=0|0|0|0|0|0|2|0|0|0|0|0|0|0|0|0|0|0|0|0;AC_nfe_seu=0;AN_nfe_seu=198;AF_nfe_seu=0;nhomalt_nfe_seu=0;controls_AC_afr_male=0;controls_AN_afr_male=980;controls_AF_afr_male=0;controls_nhomalt_afr_male=0;non_neuro_AC_eas_kor=0;non_neuro_AN_eas_kor=1852;non_neuro_AF_eas_kor=0;non_neuro_nhomalt_eas_kor=0;non_topmed_AC_amr=0;non_topmed_AN_amr=10508;non_topmed_AF_amr=0;non_topmed_nhomalt_amr=0;non_cancer_AC_asj_female=0;non_cancer_AN_asj_female=838;non_cancer_AF_asj_female=0;non_cancer_nhomalt_asj_female=0;AC_raw=2;AN_raw=182702;AF_raw=1.09468e-05;nhomalt_raw=0;AC_fin_female=0;AN_fin_female=1502;AF_fin_female=0;nhomalt_fin_female=0;non_cancer_AC_oth_female=0;non_cancer_AN_oth_female=950;non_cancer_AF_oth_female=0;non_cancer_nhomalt_oth_female=0;AC_nfe_bgr=0;AN_nfe_bgr=68;AF_nfe_bgr=0;nhomalt_nfe_bgr=0;non_neuro_AC_asj_female=0;non_neuro_AN_asj_female=340;non_neuro_AF_asj_female=0;non_neuro_nhomalt_asj_female=0;AC_sas_male=0;AN_sas_male=3712;AF_sas_male=0;nhomalt_sas_male=0;non_neuro_AC_afr_male=0;non_neuro_AN_afr_male=2252;non_neuro_AF_afr_male=0;non_neuro_nhomalt_afr_male=0;AC_afr_male=0;AN_afr_male=2270;AF_afr_male=0;nhomalt_afr_male=0;AC_afr=0;AN_afr=5334;AF_afr=0;nhomalt_afr=0;controls_AC_nfe_swe=0;controls_AN_nfe_swe=8;controls_AF_nfe_swe=0;controls_nhomalt_nfe_swe=0;non_neuro_AC_afr_female=0;non_neuro_AN_afr_female=3050;non_neuro_AF_afr_female=0;non_neuro_nhomalt_afr_female=0;non_topmed_AC_amr_female=0;non_topmed_AN_amr_female=5682;non_topmed_AF_amr_female=0;non_topmed_nhomalt_amr_female=0;non_cancer_AC_female=1;non_cancer_AN_female=28894;non_cancer_AF_female=3.46093e-05;non_cancer_nhomalt_female=0;non_cancer_AC_nfe_onf=0;non_cancer_AN_nfe_onf=4430;non_cancer_AF_nfe_onf=0;non_cancer_nhomalt_nfe_onf=0;non_cancer_AC_male=1;non_cancer_AN_male=29890;non_cancer_AF_male=3.3456e-05;non_cancer_nhomalt_male=0;non_topmed_AC_oth_female=0;non_topmed_AN_oth_female=960;non_topmed_AF_oth_female=0;non_topmed_nhomalt_oth_female=0;AC_eas_female=1;AN_eas_female=3838;AF_eas_female=0.000260552;nhomalt_eas_female=0;non_cancer_AC_sas_female=0;non_cancer_AN_sas_female=2396;non_cancer_AF_sas_female=0;non_cancer_nhomalt_sas_female=0;AC_afr_female=0;AN_afr_female=3064;AF_afr_female=0;nhomalt_afr_female=0;AC_sas=0;AN_sas=6114;AF_sas=0;nhomalt_sas=0;non_neuro_AC_female=1;non_neuro_AN_female=25264;non_neuro_AF_female=3.9582e-05;non_neuro_nhomalt_female=0;controls_AC_afr=0;controls_AN_afr=2304;controls_AF_afr=0;controls_nhomalt_afr=0;non_neuro_AC_eas_jpn=0;non_neuro_AN_eas_jpn=34;non_neuro_AF_eas_jpn=0;non_neuro_nhomalt_eas_jpn=0;AC_nfe_onf=0;AN_nfe_onf=4940;AF_nfe_onf=0;nhomalt_nfe_onf=0;non_cancer_AC_amr_male=0;non_cancer_AN_amr_male=4840;non_cancer_AF_amr_male=0;non_cancer_nhomalt_amr_male=0;controls_AC_fin_male=0;controls_AN_fin_male=1312;controls_AF_fin_male=0;controls_nhomalt_fin_male=0;non_neuro_AC_nfe_nwe=0;non_neuro_AN_nfe_nwe=13618;non_neuro_AF_nfe_nwe=0;non_neuro_nhomalt_nfe_nwe=0;AC_fin_male=0;AN_fin_male=1686;AF_fin_male=0;nhomalt_fin_male=0;AC_nfe_female=0;AN_nfe_female=11558;AF_nfe_female=0;nhomalt_nfe_female=0;AC_amr=0;AN_amr=10546;AF_amr=0;nhomalt_amr=0;non_topmed_AC_nfe_male=0;non_topmed_AN_nfe_male=11396;non_topmed_AF_nfe_male=0;non_topmed_nhomalt_nfe_male=0;non_neuro_AC_sas=0;non_neuro_AN_sas=6108;non_neuro_AF_sas=0;non_neuro_nhomalt_sas=0;non_cancer_AC_fin_male=0;non_cancer_AN_fin_male=1686;non_cancer_AF_fin_male=0;non_cancer_nhomalt_fin_male=0;non_cancer_AC_nfe_seu=0;non_cancer_AN_nfe_seu=98;non_cancer_AF_nfe_seu=0;non_cancer_nhomalt_nfe_seu=0;AC_eas=2;AN_eas=7908;AF_eas=0.000252908;nhomalt_eas=0;nhomalt=0;non_neuro_AC_nfe_female=0;non_neuro_AN_nfe_female=9464;non_neuro_AF_nfe_female=0;non_neuro_nhomalt_nfe_female=0;non_neuro_AC_afr=0;non_neuro_AN_afr=5302;non_neuro_AF_afr=0;non_neuro_nhomalt_afr=0;controls_AC_raw=1;controls_AN_raw=81800;controls_AF_raw=1.22249e-05;controls_nhomalt_raw=0;non_cancer_AC_eas=2;non_cancer_AN_eas=7872;non_cancer_AF_eas=0.000254065;non_cancer_nhomalt_eas=0;non_cancer_AC_amr_female=0;non_cancer_AN_amr_female=5692;non_cancer_AF_amr_female=0;non_cancer_nhomalt_amr_female=0;non_neuro_AC_nfe_swe=0;non_neuro_AN_nfe_swe=82;non_neuro_AF_nfe_swe=0;non_neuro_nhomalt_nfe_swe=0;controls_AC_male=0;controls_AN_male=12202;controls_AF_male=0;controls_nhomalt_male=0;non_topmed_AC_male=1;non_topmed_AN_male=29614;non_topmed_AF_male=3.37678e-05;non_topmed_nhomalt_male=0;controls_AC_eas_jpn=0;controls_AN_eas_jpn=34;controls_AF_eas_jpn=0;controls_nhomalt_eas_jpn=0;controls_AC_nfe_female=0;controls_AN_nfe_female=3772;controls_AF_nfe_female=0;controls_nhomalt_nfe_female=0;non_neuro_AC_amr=0;non_neuro_AN_amr=7334;non_neuro_AF_amr=0;non_neuro_nhomalt_amr=0;non_neuro_AC_eas_female=1;non_neuro_AN_eas_female=3832;non_neuro_AF_eas_female=0.00026096;non_neuro_nhomalt_eas_female=0;AC_asj_male=0;AN_asj_male=714;AF_asj_male=0;nhomalt_asj_male=0;controls_AC_nfe_male=0;controls_AN_nfe_male=3362;controls_AF_nfe_male=0;controls_nhomalt_nfe_male=0;non_neuro_AC_fin=0;non_neuro_AN_fin=2934;non_neuro_AF_fin=0;non_neuro_nhomalt_fin=0;non_topmed_AC_sas=0;non_topmed_AN_sas=6114;non_topmed_AF_sas=0;non_topmed_nhomalt_sas=0;non_cancer_AC_nfe_female=0;non_cancer_AN_nfe_female=10722;non_cancer_AF_nfe_female=0;non_cancer_nhomalt_nfe_female=0;AC_oth_female=0;AN_oth_female=982;AF_oth_female=0;nhomalt_oth_female=0;non_cancer_AC_asj=0;non_cancer_AN_asj=1530;non_cancer_AF_asj=0;non_cancer_nhomalt_asj=0;AC_nfe_swe=0;AN_nfe_swe=114;AF_nfe_swe=0;nhomalt_nfe_swe=0;controls_AC_nfe=0;controls_AN_nfe=7134;controls_AF_nfe=0;controls_nhomalt_nfe=0;controls_AC_oth_female=0;controls_AN_oth_female=362;controls_AF_oth_female=0;controls_nhomalt_oth_female=0;controls_AC_asj=0;controls_AN_asj=276;controls_AF_asj=0;controls_nhomalt_asj=0;non_neuro_AC_amr_male=0;non_neuro_AN_amr_male=3312;non_neuro_AF_amr_male=0;non_neuro_nhomalt_amr_male=0;controls_AC_nfe_nwe=0;controls_AN_nfe_nwe=5834;controls_AF_nfe_nwe=0;controls_nhomalt_nfe_nwe=0;AC_nfe_nwe=0;AN_nfe_nwe=18022;AF_nfe_nwe=0;nhomalt_nfe_nwe=0;controls_AC_nfe_seu=0;controls_AN_nfe_seu=20;controls_AF_nfe_seu=0;controls_nhomalt_nfe_seu=0;controls_AC_sas_female=0;controls_AN_sas_female=1122;controls_AF_sas_female=0;controls_nhomalt_sas_female=0;non_neuro_AC_amr_female=0;non_neuro_AN_amr_female=4022;non_neuro_AF_amr_female=0;non_neuro_nhomalt_amr_female=0;non_cancer_AC_eas_jpn=0;non_cancer_AN_eas_jpn=34;non_cancer_AF_eas_jpn=0;non_cancer_nhomalt_eas_jpn=0;non_neuro_AC_nfe_onf=0;non_neuro_AN_nfe_onf=3866;non_neuro_AF_nfe_onf=0;non_neuro_nhomalt_nfe_onf=0;non_topmed_AC_eas_male=1;non_topmed_AN_eas_male=4068;non_topmed_AF_eas_male=0.000245821;non_topmed_nhomalt_eas_male=0;AC_eas_jpn=0;AN_eas_jpn=34;AF_eas_jpn=0;nhomalt_eas_jpn=0;non_cancer_AC_afr_male=0;non_cancer_AN_afr_male=2254;non_cancer_AF_afr_male=0;non_cancer_nhomalt_afr_male=0;non_cancer_AC_afr=0;non_cancer_AN_afr=5234;non_cancer_AF_afr=0;non_cancer_nhomalt_afr=0;controls_AC_amr_female=0;controls_AN_amr_female=3018;controls_AF_amr_female=0;controls_nhomalt_amr_female=0;non_neuro_AC_fin_male=0;non_neuro_AN_fin_male=1600;non_neuro_AF_fin_male=0;non_neuro_nhomalt_fin_male=0;AC_female=1;AN_female=29938;AF_female=3.34024e-05;nhomalt_female=0;non_neuro_AC_nfe_bgr=0;non_neuro_AN_nfe_bgr=16;non_neuro_AF_nfe_bgr=0;non_neuro_nhomalt_nfe_bgr=0;non_neuro_AC_oth_male=0;non_neuro_AN_oth_male=758;non_neuro_AF_oth_male=0;non_neuro_nhomalt_oth_male=0;non_topmed_AC_nfe_est=0;non_topmed_AN_nfe_est=72;non_topmed_AF_nfe_est=0;non_topmed_nhomalt_nfe_est=0;non_topmed_AC_nfe_nwe=0;non_topmed_AN_nfe_nwe=17314;non_topmed_AF_nfe_nwe=0;non_topmed_nhomalt_nfe_nwe=0;non_topmed_AC_amr_male=0;non_topmed_AN_amr_male=4826;non_topmed_AF_amr_male=0;non_topmed_nhomalt_amr_male=0;non_cancer_AC_amr=0;non_cancer_AN_amr=10532;non_cancer_AF_amr=0;non_cancer_nhomalt_amr=0;non_topmed_AC_nfe_swe=0;non_topmed_AN_nfe_swe=114;non_topmed_AF_nfe_swe=0;non_topmed_nhomalt_nfe_swe=0;non_topmed_AC_nfe_onf=0;non_topmed_AN_nfe_onf=4648;non_topmed_AF_nfe_onf=0;non_topmed_nhomalt_nfe_onf=0;controls_AC_eas_kor=0;controls_AN_eas_kor=922;controls_AF_eas_kor=0;controls_nhomalt_eas_kor=0;non_topmed_AC_eas_oea=2;non_topmed_AN_eas_oea=6018;non_topmed_AF_eas_oea=0.000332336;non_topmed_nhomalt_eas_oea=0;controls_AC_eas_male=0;controls_AN_eas_male=1940;controls_AF_eas_male=0;controls_nhomalt_eas_male=0;controls_AC_oth_male=0;controls_AN_oth_male=346;controls_AF_oth_male=0;controls_nhomalt_oth_male=0;non_topmed_AC=2;non_topmed_AN=58850;non_topmed_AF=3.39847e-05;non_topmed_nhomalt=0;controls_AC_fin=0;controls_AN_fin=2490;controls_AF_fin=0;controls_nhomalt_fin=0;AC_eas_kor=0;AN_eas_kor=1854;AF_eas_kor=0;nhomalt_eas_kor=0;non_neuro_AC_nfe=0;non_neuro_AN_nfe=17768;non_neuro_AF_nfe=0;non_neuro_nhomalt_nfe=0;non_neuro_AC_fin_female=0;non_neuro_AN_fin_female=1334;non_neuro_AF_fin_female=0;non_neuro_nhomalt_fin_female=0;non_cancer_AC_nfe_male=0;non_cancer_AN_nfe_male=11634;non_cancer_AF_nfe_male=0;non_cancer_nhomalt_nfe_male=0;controls_AC_eas_oea=1;controls_AN_eas_oea=2964;controls_AF_eas_oea=0.000337382;controls_nhomalt_eas_oea=0;non_topmed_AC_nfe_seu=0;non_topmed_AN_nfe_seu=198;non_topmed_AF_nfe_seu=0;non_topmed_nhomalt_nfe_seu=0;controls_AC_eas_female=1;controls_AN_eas_female=1980;controls_AF_eas_female=0.000505051;controls_nhomalt_eas_female=0;non_topmed_AC_asj=0;non_topmed_AN_asj=1572;non_topmed_AF_asj=0;non_topmed_nhomalt_asj=0;controls_AC_nfe_onf=0;controls_AN_nfe_onf=1250;controls_AF_nfe_onf=0;controls_nhomalt_nfe_onf=0;non_neuro_AC=2;non_neuro_AN=49650;non_neuro_AF=4.0282e-05;non_neuro_nhomalt=0;AC_eas_oea=2;AN_eas_oea=6020;AF_eas_oea=0.000332226;nhomalt_eas_oea=0;non_topmed_AC_nfe=0;non_topmed_AN_nfe=22414;non_topmed_AF_nfe=0;non_topmed_nhomalt_nfe=0;non_cancer_AC_oth=0;non_cancer_AN_oth=1966;non_cancer_AF_oth=0;non_cancer_nhomalt_oth=0;non_topmed_AC_raw=2;non_topmed_AN_raw=178626;non_topmed_AF_raw=1.11966e-05;non_topmed_nhomalt_raw=0;non_neuro_AC_nfe_est=0;non_neuro_AN_nfe_est=64;non_neuro_AF_nfe_est=0;non_neuro_nhomalt_nfe_est=0;non_topmed_AC_oth_male=0;non_topmed_AN_oth_male=1008;non_topmed_AF_oth_male=0;non_topmed_nhomalt_oth_male=0;non_cancer_AC_oth_male=0;non_cancer_AN_oth_male=1016;non_cancer_AF_oth_male=0;non_cancer_nhomalt_oth_male=0;AC_nfe_est=0;AN_nfe_est=76;AF_nfe_est=0;nhomalt_nfe_est=0;non_cancer_AC_afr_female=0;non_cancer_AN_afr_female=2980;non_cancer_AF_afr_female=0;non_cancer_nhomalt_afr_female=0;non_topmed_AC_afr_male=0;non_topmed_AN_afr_male=2218;non_topmed_AF_afr_male=0;non_topmed_nhomalt_afr_male=0;AC_eas_male=1;AN_eas_male=4070;AF_eas_male=0.0002457;nhomalt_eas_male=0;controls_AC_eas=1;controls_AN_eas=3920;controls_AF_eas=0.000255102;controls_nhomalt_eas=0;non_neuro_AC_eas_male=1;non_neuro_AN_eas_male=4070;non_neuro_AF_eas_male=0.0002457;non_neuro_nhomalt_eas_male=0;non_cancer_AC_nfe_nwe=0;non_cancer_AN_nfe_nwe=17670;non_cancer_AF_nfe_nwe=0;non_cancer_nhomalt_nfe_nwe=0;controls_AC_sas=0;controls_AN_sas=2854;controls_AF_sas=0;controls_nhomalt_sas=0;non_neuro_AC_sas_male=0;non_neuro_AN_sas_male=3706;non_neuro_AF_sas_male=0;non_neuro_nhomalt_sas_male=0;non_neuro_AC_asj_male=0;non_neuro_AN_asj_male=384;non_neuro_AF_asj_male=0;non_neuro_nhomalt_asj_male=0;non_cancer_AC_nfe_bgr=0;non_cancer_AN_nfe_bgr=56;non_cancer_AF_nfe_bgr=0;non_cancer_nhomalt_nfe_bgr=0;controls_AC_oth=0;controls_AN_oth=708;controls_AF_oth=0;controls_nhomalt_oth=0;non_cancer_AC_eas_female=1;non_cancer_AN_eas_female=3816;non_cancer_AF_eas_female=0.000262055;non_cancer_nhomalt_eas_female=0;AC_nfe=0;AN_nfe=23418;AF_nfe=0;nhomalt_nfe=0;non_topmed_AC_female=1;non_topmed_AN_female=29236;non_topmed_AF_female=3.42044e-05;non_topmed_nhomalt_female=0;non_neuro_AC_asj=0;non_neuro_AN_asj=724;non_neuro_AF_asj=0;non_neuro_nhomalt_asj=0;non_topmed_AC_eas_female=1;non_topmed_AN_eas_female=3838;non_topmed_AF_eas_female=0.000260552;non_topmed_nhomalt_eas_female=0;non_neuro_AC_raw=2;non_neuro_AN_raw=151340;non_neuro_AF_raw=1.32153e-05;non_neuro_nhomalt_raw=0;non_topmed_AC_eas=2;non_topmed_AN_eas=7906;non_topmed_AF_eas=0.000252972;non_topmed_nhomalt_eas=0;non_topmed_AC_fin_male=0;non_topmed_AN_fin_male=1686;non_topmed_AF_fin_male=0;non_topmed_nhomalt_fin_male=0;non_cancer_AC_asj_male=0;non_cancer_AN_asj_male=692;non_cancer_AF_asj_male=0;non_cancer_nhomalt_asj_male=0;AC_fin=0;AN_fin=3188;AF_fin=0;nhomalt_fin=0;AC_nfe_male=0;AN_nfe_male=11860;AF_nfe_male=0;nhomalt_nfe_male=0;non_topmed_AC_eas_kor=0;non_topmed_AN_eas_kor=1854;non_topmed_AF_eas_kor=0;non_topmed_nhomalt_eas_kor=0;controls_AC_amr_male=0;controls_AN_amr_male=2378;controls_AF_amr_male=0;controls_nhomalt_amr_male=0;non_neuro_AC_eas_oea=2;non_neuro_AN_eas_oea=6016;non_neuro_AF_eas_oea=0.000332447;non_neuro_nhomalt_eas_oea=0;AC_sas_female=0;AN_sas_female=2402;AF_sas_female=0;nhomalt_sas_female=0;controls_AC_afr_female=0;controls_AN_afr_female=1324;controls_AF_afr_female=0;controls_nhomalt_afr_female=0;controls_AC_amr=0;controls_AN_amr=5396;controls_AF_amr=0;controls_nhomalt_amr=0;non_topmed_AC_eas_jpn=0;non_topmed_AN_eas_jpn=34;non_topmed_AF_eas_jpn=0;non_topmed_nhomalt_eas_jpn=0;AC_asj_female=0;AN_asj_female=888;AF_asj_female=0;nhomalt_asj_female=0;non_topmed_AC_nfe_bgr=0;non_topmed_AN_nfe_bgr=68;non_topmed_AF_nfe_bgr=0;non_topmed_nhomalt_nfe_bgr=0;non_cancer_AC_nfe_est=0;non_cancer_AN_nfe_est=64;non_cancer_AF_nfe_est=0;non_cancer_nhomalt_nfe_est=0;non_neuro_AC_eas=2;non_neuro_AN_eas=7902;non_neuro_AF_eas=0.0002531;non_neuro_nhomalt_eas=0;non_cancer_AC_nfe=0;non_cancer_AN_nfe=22356;non_cancer_AF_nfe=0;non_cancer_nhomalt_nfe=0;non_neuro_AC_male=1;non_neuro_AN_male=24386;non_neuro_AF_male=4.10071e-05;non_neuro_nhomalt_male=0;non_neuro_AC_sas_female=0;non_neuro_AN_sas_female=2402;non_neuro_AF_sas_female=0;non_neuro_nhomalt_sas_female=0;AC_asj=0;AN_asj=1602;AF_asj=0;nhomalt_asj=0;controls_AC_nfe_est=0;controls_AN_nfe_est=22;controls_AF_nfe_est=0;controls_nhomalt_nfe_est=0;non_topmed_AC_asj_female=0;non_topmed_AN_asj_female=872;non_topmed_AF_asj_female=0;non_topmed_nhomalt_asj_female=0;non_cancer_AC_nfe_swe=0;non_cancer_AN_nfe_swe=38;non_cancer_AF_nfe_swe=0;non_cancer_nhomalt_nfe_swe=0;non_cancer_AC=2;non_cancer_AN=58784;non_cancer_AF=3.40229e-05;non_cancer_nhomalt=0;non_topmed_AC_oth=0;non_topmed_AN_oth=1968;non_topmed_AF_oth=0;non_topmed_nhomalt_oth=0;non_topmed_AC_fin_female=0;non_topmed_AN_fin_female=1500;non_topmed_AF_fin_female=0;non_topmed_nhomalt_fin_female=0;non_cancer_AC_fin_female=0;non_cancer_AN_fin_female=1500;non_cancer_AF_fin_female=0;non_cancer_nhomalt_fin_female=0;AC_oth=0;AN_oth=2010;AF_oth=0;nhomalt_oth=0;non_neuro_AC_nfe_male=0;non_neuro_AN_nfe_male=8304;non_neuro_AF_nfe_male=0;non_neuro_nhomalt_nfe_male=0;controls_AC_female=1;controls_AN_female=12880;controls_AF_female=7.76398e-05;controls_nhomalt_female=0;non_cancer_AC_fin=0;non_cancer_AN_fin=3186;non_cancer_AF_fin=0;non_cancer_nhomalt_fin=0;non_topmed_AC_fin=0;non_topmed_AN_fin=3186;non_topmed_AF_fin=0;non_topmed_nhomalt_fin=0;non_cancer_AC_eas_oea=2;non_cancer_AN_eas_oea=5984;non_cancer_AF_eas_oea=0.000334225;non_cancer_nhomalt_eas_oea=0;non_topmed_AC_nfe_female=0;non_topmed_AN_nfe_female=11018;non_topmed_AF_nfe_female=0;non_topmed_nhomalt_nfe_female=0;non_cancer_AC_sas_male=0;non_cancer_AN_sas_male=3712;non_cancer_AF_sas_male=0;non_cancer_nhomalt_sas_male=0;controls_AC_asj_male=0;controls_AN_asj_male=152;controls_AF_asj_male=0;controls_nhomalt_asj_male=0;non_cancer_AC_raw=2;non_cancer_AN_raw=171340;non_cancer_AF_raw=1.16727e-05;non_cancer_nhomalt_raw=0;non_cancer_AC_eas_male=1;non_cancer_AN_eas_male=4056;non_cancer_AF_eas_male=0.000246548;non_cancer_nhomalt_eas_male=0;non_topmed_AC_asj_male=0;non_topmed_AN_asj_male=700;non_topmed_AF_asj_male=0;non_topmed_nhomalt_asj_male=0;non_neuro_AC_oth=0;non_neuro_AN_oth=1578;non_neuro_AF_oth=0;non_neuro_nhomalt_oth=0;AC_male=1;AN_male=30182;AF_male=3.31323e-05;nhomalt_male=0;controls_AC_fin_female=0;controls_AN_fin_female=1178;controls_AF_fin_female=0;controls_nhomalt_fin_female=0;controls_AC_nfe_bgr=0;controls_AN_nfe_bgr=0;controls_nhomalt_nfe_bgr=0;controls_AC_asj_female=0;controls_AN_asj_female=124;controls_AF_asj_female=0;controls_nhomalt_asj_female=0;AC_amr_male=0;AN_amr_male=4842;AF_amr_male=0;nhomalt_amr_male=0;AC_amr_female=0;AN_amr_female=5704;AF_amr_female=0;nhomalt_amr_female=0;non_topmed_AC_sas_male=0;non_topmed_AN_sas_male=3712;non_topmed_AF_sas_male=0;non_topmed_nhomalt_sas_male=0;AC_oth_male=0;AN_oth_male=1028;AF_oth_male=0;nhomalt_oth_male=0;non_cancer_AC_sas=0;non_cancer_AN_sas=6108;non_cancer_AF_sas=0;non_cancer_nhomalt_sas=0;non_neuro_AC_nfe_seu=0;non_neuro_AN_nfe_seu=122;non_neuro_AF_nfe_seu=0;non_neuro_nhomalt_nfe_seu=0;non_cancer_AC_eas_kor=0;non_cancer_AN_eas_kor=1854;non_cancer_AF_eas_kor=0;non_cancer_nhomalt_eas_kor=0;non_topmed_AC_afr_female=0;non_topmed_AN_afr_female=2964;non_topmed_AF_afr_female=0;non_topmed_nhomalt_afr_female=0;controls_AC_sas_male=0;controls_AN_sas_male=1732;controls_AF_sas_male=0;controls_nhomalt_sas_male=0;non_topmed_AC_sas_female=0;non_topmed_AN_sas_female=2402;non_topmed_AF_sas_female=0;non_topmed_nhomalt_sas_female=0;non_topmed_AC_afr=0;non_topmed_AN_afr=5182;non_topmed_AF_afr=0;non_topmed_nhomalt_afr=0;controls_AC=1;controls_AN=25082;controls_AF=3.98692e-05;controls_nhomalt=0;non_neuro_AC_oth_female=0;non_neuro_AN_oth_female=820;non_neuro_AF_oth_female=0;non_neuro_nhomalt_oth_female=0;non_topmed_faf95_amr=0;non_topmed_faf99_amr=0;faf95_afr=0;faf99_afr=0;faf95_sas=0;faf99_sas=0;controls_faf95_afr=0;controls_faf99_afr=0;faf95_amr=0;faf99_amr=0;non_neuro_faf95_sas=0;non_neuro_faf99_sas=0;faf95_eas=4.461e-05;faf99_eas=4.452e-05;faf95=5.52e-06;faf99=5.07e-06;non_neuro_faf95_afr=0;non_neuro_faf99_afr=0;non_cancer_faf95_eas=4.479e-05;non_cancer_faf99_eas=4.459e-05;non_neuro_faf95_amr=0;non_neuro_faf99_amr=0;non_topmed_faf95_sas=0;non_topmed_faf99_sas=0;controls_faf95_nfe=0;controls_faf99_nfe=0;non_cancer_faf95_afr=0;non_cancer_faf99_afr=0;non_cancer_faf95_amr=0;non_cancer_faf99_amr=0;non_topmed_faf95=5.64e-06;non_topmed_faf99=5.11e-06;non_neuro_faf95_nfe=0;non_neuro_faf99_nfe=0;non_neuro_faf95=6.68e-06;non_neuro_faf99=6.5e-06;non_topmed_faf95_nfe=0;non_topmed_faf99_nfe=0;controls_faf95_eas=1.3e-05;controls_faf99_eas=1.3e-05;controls_faf95_sas=0;controls_faf99_sas=0;faf95_nfe=0;faf99_nfe=0;non_topmed_faf95_eas=4.462e-05;non_topmed_faf99_eas=4.452e-05;controls_faf95_amr=0;controls_faf99_amr=0;non_neuro_faf95_eas=4.464e-05;non_neuro_faf99_eas=4.453e-05;non_cancer_faf95_nfe=0;non_cancer_faf99_nfe=0;non_cancer_faf95=5.64e-06;non_cancer_faf99=5.11e-06;non_cancer_faf95_sas=0;non_cancer_faf99_sas=0;non_topmed_faf95_afr=0;non_topmed_faf99_afr=0;controls_faf95=2e-06;controls_faf99=2e-06;popmax=eas;AC_popmax=2;AN_popmax=7908;AF_popmax=0.000252908;nhomalt_popmax=0;age_hist_het_bin_freq=0|1|0|0|1|0|0|0|0|0;age_hist_het_n_smaller=0;age_hist_het_n_larger=0;age_hist_hom_bin_freq=0|0|0|0|0|0|0|0|0|0;age_hist_hom_n_smaller=0;age_hist_hom_n_larger=0;non_topmed_popmax=eas;non_topmed_AC_popmax=2;non_topmed_AN_popmax=7906;non_topmed_AF_popmax=0.000252972;non_topmed_nhomalt_popmax=0;non_neuro_popmax=eas;non_neuro_AC_popmax=2;non_neuro_AN_popmax=7902;non_neuro_AF_popmax=0.0002531;non_neuro_nhomalt_popmax=0;non_cancer_popmax=eas;non_cancer_AC_popmax=2;non_cancer_AN_popmax=7872;non_cancer_AF_popmax=0.000254065;non_cancer_nhomalt_popmax=0;controls_popmax=eas;controls_AC_popmax=1;controls_AN_popmax=3920;controls_AF_popmax=0.000255102;controls_nhomalt_popmax=0\");\n            writer.WriteLine(\"22\\t15528135\\trs1260541090\\tT\\tC\\t38.68\\tAC0;RF\\tAC=0;AN=60606;AF=0;rf_tp_probability=0.0116862;FS=0;InbreedingCoeff=-0.0683;MQ=21.65;MQRankSum=0.922;QD=0.99;ReadPosRankSum=0.198;SOR=0.126;BaseQRankSum=0.198;ClippingRankSum=1.04;DP=1695127;VQSLOD=2.04;VQSR_culprit=QD;segdup;rf_negative_label;rf_label=FP;rf_train;variant_type=snv;allele_type=snv;n_alt_alleles=1;pab_max=0.000113074;gq_hist_alt_bin_freq=0|0|0|0|0|0|0|1|0|0|0|0|0|0|0|0|0|0|0|0;gq_hist_all_bin_freq=35156|35322|7058|6597|2982|985|1608|1457|808|1793|2156|1278|4500|1271|3081|1464|3504|580|2731|6670;dp_hist_alt_bin_freq=0|0|0|0|0|0|1|0|0|0|0|0|0|0|0|0|0|0|0|0;dp_hist_alt_n_larger=0;dp_hist_all_bin_freq=120746|10478|3527|4643|7607|6825|5314|3379|1100|402|174|65|25|20|15|5|2|2|1|1;dp_hist_all_n_larger=0;ab_hist_alt_bin_freq=0|0|0|1|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0;AC_nfe_seu=0;AN_nfe_seu=202;AF_nfe_seu=0;nhomalt_nfe_seu=0;controls_AC_afr_male=0;controls_AN_afr_male=994;controls_AF_afr_male=0;controls_nhomalt_afr_male=0;non_neuro_AC_eas_kor=0;non_neuro_AN_eas_kor=1852;non_neuro_AF_eas_kor=0;non_neuro_nhomalt_eas_kor=0;non_topmed_AC_amr=0;non_topmed_AN_amr=10546;non_topmed_AF_amr=0;non_topmed_nhomalt_amr=0;non_cancer_AC_asj_female=0;non_cancer_AN_asj_female=838;non_cancer_AF_asj_female=0;non_cancer_nhomalt_asj_female=0;AC_raw=1;AN_raw=191364;AF_raw=5.22564e-06;nhomalt_raw=0;AC_fin_female=0;AN_fin_female=1632;AF_fin_female=0;nhomalt_fin_female=0;non_cancer_AC_oth_female=0;non_cancer_AN_oth_female=950;non_cancer_AF_oth_female=0;non_cancer_nhomalt_oth_female=0;AC_nfe_bgr=0;AN_nfe_bgr=66;AF_nfe_bgr=0;nhomalt_nfe_bgr=0;non_neuro_AC_asj_female=0;non_neuro_AN_asj_female=342;non_neuro_AF_asj_female=0;non_neuro_nhomalt_asj_female=0;AC_sas_male=0;AN_sas_male=3722;AF_sas_male=0;nhomalt_sas_male=0;non_neuro_AC_afr_male=0;non_neuro_AN_afr_male=2270;non_neuro_AF_afr_male=0;non_neuro_nhomalt_afr_male=0;AC_afr_male=0;AN_afr_male=2288;AF_afr_male=0;nhomalt_afr_male=0;AC_afr=0;AN_afr=5362;AF_afr=0;nhomalt_afr=0;controls_AC_nfe_swe=0;controls_AN_nfe_swe=8;controls_AF_nfe_swe=0;controls_nhomalt_nfe_swe=0;non_neuro_AC_afr_female=0;non_neuro_AN_afr_female=3060;non_neuro_AF_afr_female=0;non_neuro_nhomalt_afr_female=0;non_topmed_AC_amr_female=0;non_topmed_AN_amr_female=5704;non_topmed_AF_amr_female=0;non_topmed_nhomalt_amr_female=0;non_cancer_AC_female=0;non_cancer_AN_female=29084;non_cancer_AF_female=0;non_cancer_nhomalt_female=0;non_cancer_AC_nfe_onf=0;non_cancer_AN_nfe_onf=4460;non_cancer_AF_nfe_onf=0;non_cancer_nhomalt_nfe_onf=0;non_cancer_AC_male=0;non_cancer_AN_male=30116;non_cancer_AF_male=0;non_cancer_nhomalt_male=0;non_topmed_AC_oth_female=0;non_topmed_AN_oth_female=964;non_topmed_AF_oth_female=0;non_topmed_nhomalt_oth_female=0;AC_eas_female=0;AN_eas_female=3842;AF_eas_female=0;nhomalt_eas_female=0;non_cancer_AC_sas_female=0;non_cancer_AN_sas_female=2398;non_cancer_AF_sas_female=0;non_cancer_nhomalt_sas_female=0;AC_afr_female=0;AN_afr_female=3074;AF_afr_female=0;nhomalt_afr_female=0;AC_sas=0;AN_sas=6126;AF_sas=0;nhomalt_sas=0;non_neuro_AC_female=0;non_neuro_AN_female=25486;non_neuro_AF_female=0;non_neuro_nhomalt_female=0;controls_AC_afr=0;controls_AN_afr=2324;controls_AF_afr=0;controls_nhomalt_afr=0;non_neuro_AC_eas_jpn=0;non_neuro_AN_eas_jpn=34;non_neuro_AF_eas_jpn=0;non_neuro_nhomalt_eas_jpn=0;AC_nfe_onf=0;AN_nfe_onf=5004;AF_nfe_onf=0;nhomalt_nfe_onf=0;non_cancer_AC_amr_male=0;non_cancer_AN_amr_male=4856;non_cancer_AF_amr_male=0;non_cancer_nhomalt_amr_male=0;controls_AC_fin_male=0;controls_AN_fin_male=1434;controls_AF_fin_male=0;controls_nhomalt_fin_male=0;non_neuro_AC_nfe_nwe=0;non_neuro_AN_nfe_nwe=13672;non_neuro_AF_nfe_nwe=0;non_neuro_nhomalt_nfe_nwe=0;AC_fin_male=0;AN_fin_male=1812;AF_fin_male=0;nhomalt_fin_male=0;AC_nfe_female=0;AN_nfe_female=11620;AF_nfe_female=0;nhomalt_nfe_female=0;AC_amr=0;AN_amr=10584;AF_amr=0;nhomalt_amr=0;non_topmed_AC_nfe_male=0;non_topmed_AN_nfe_male=11470;non_topmed_AF_nfe_male=0;non_topmed_nhomalt_nfe_male=0;non_neuro_AC_sas=0;non_neuro_AN_sas=6120;non_neuro_AF_sas=0;non_neuro_nhomalt_sas=0;non_cancer_AC_fin_male=0;non_cancer_AN_fin_male=1812;non_cancer_AF_fin_male=0;non_cancer_nhomalt_fin_male=0;non_cancer_AC_nfe_seu=0;non_cancer_AN_nfe_seu=98;non_cancer_AF_nfe_seu=0;non_cancer_nhomalt_nfe_seu=0;AC_eas=0;AN_eas=7914;AF_eas=0;nhomalt_eas=0;nhomalt=0;non_neuro_AC_nfe_female=0;non_neuro_AN_nfe_female=9518;non_neuro_AF_nfe_female=0;non_neuro_nhomalt_nfe_female=0;non_neuro_AC_afr=0;non_neuro_AN_afr=5330;non_neuro_AF_afr=0;non_neuro_nhomalt_afr=0;controls_AC_raw=1;controls_AN_raw=85756;controls_AF_raw=1.1661e-05;controls_nhomalt_raw=0;non_cancer_AC_eas=0;non_cancer_AN_eas=7874;non_cancer_AF_eas=0;non_cancer_nhomalt_eas=0;non_cancer_AC_amr_female=0;non_cancer_AN_amr_female=5714;non_cancer_AF_amr_female=0;non_cancer_nhomalt_amr_female=0;non_neuro_AC_nfe_swe=0;non_neuro_AN_nfe_swe=80;non_neuro_AF_nfe_swe=0;non_neuro_nhomalt_nfe_swe=0;controls_AC_male=0;controls_AN_male=12354;controls_AF_male=0;controls_nhomalt_male=0;non_topmed_AC_male=0;non_topmed_AN_male=29860;non_topmed_AF_male=0;non_topmed_nhomalt_male=0;controls_AC_eas_jpn=0;controls_AN_eas_jpn=34;controls_AF_eas_jpn=0;controls_nhomalt_eas_jpn=0;controls_AC_nfe_female=0;controls_AN_nfe_female=3774;controls_AF_nfe_female=0;controls_nhomalt_nfe_female=0;non_neuro_AC_amr=0;non_neuro_AN_amr=7370;non_neuro_AF_amr=0;non_neuro_nhomalt_amr=0;non_neuro_AC_eas_female=0;non_neuro_AN_eas_female=3836;non_neuro_AF_eas_female=0;non_neuro_nhomalt_eas_female=0;AC_asj_male=0;AN_asj_male=718;AF_asj_male=0;nhomalt_asj_male=0;controls_AC_nfe_male=0;controls_AN_nfe_male=3370;controls_AF_nfe_male=0;controls_nhomalt_nfe_male=0;non_neuro_AC_fin=0;non_neuro_AN_fin=3180;non_neuro_AF_fin=0;non_neuro_nhomalt_fin=0;non_topmed_AC_sas=0;non_topmed_AN_sas=6126;non_topmed_AF_sas=0;non_topmed_nhomalt_sas=0;non_cancer_AC_nfe_female=0;non_cancer_AN_nfe_female=10750;non_cancer_AF_nfe_female=0;non_cancer_nhomalt_nfe_female=0;AC_oth_female=0;AN_oth_female=986;AF_oth_female=0;nhomalt_oth_female=0;non_cancer_AC_asj=0;non_cancer_AN_asj=1532;non_cancer_AF_asj=0;non_cancer_nhomalt_asj=0;AC_nfe_swe=0;AN_nfe_swe=110;AF_nfe_swe=0;nhomalt_nfe_swe=0;controls_AC_nfe=0;controls_AN_nfe=7144;controls_AF_nfe=0;controls_nhomalt_nfe=0;controls_AC_oth_female=0;controls_AN_oth_female=362;controls_AF_oth_female=0;controls_nhomalt_oth_female=0;controls_AC_asj=0;controls_AN_asj=276;controls_AF_asj=0;controls_nhomalt_asj=0;non_neuro_AC_amr_male=0;non_neuro_AN_amr_male=3326;non_neuro_AF_amr_male=0;non_neuro_nhomalt_amr_male=0;controls_AC_nfe_nwe=0;controls_AN_nfe_nwe=5838;controls_AF_nfe_nwe=0;controls_nhomalt_nfe_nwe=0;AC_nfe_nwe=0;AN_nfe_nwe=18098;AF_nfe_nwe=0;nhomalt_nfe_nwe=0;controls_AC_nfe_seu=0;controls_AN_nfe_seu=20;controls_AF_nfe_seu=0;controls_nhomalt_nfe_seu=0;controls_AC_sas_female=0;controls_AN_sas_female=1124;controls_AF_sas_female=0;controls_nhomalt_sas_female=0;non_neuro_AC_amr_female=0;non_neuro_AN_amr_female=4044;non_neuro_AF_amr_female=0;non_neuro_nhomalt_amr_female=0;non_cancer_AC_eas_jpn=0;non_cancer_AN_eas_jpn=34;non_cancer_AF_eas_jpn=0;non_cancer_nhomalt_eas_jpn=0;non_neuro_AC_nfe_onf=0;non_neuro_AN_nfe_onf=3914;non_neuro_AF_nfe_onf=0;non_neuro_nhomalt_nfe_onf=0;non_topmed_AC_eas_male=0;non_topmed_AN_eas_male=4070;non_topmed_AF_eas_male=0;non_topmed_nhomalt_eas_male=0;AC_eas_jpn=0;AN_eas_jpn=34;AF_eas_jpn=0;nhomalt_eas_jpn=0;non_cancer_AC_afr_male=0;non_cancer_AN_afr_male=2270;non_cancer_AF_afr_male=0;non_cancer_nhomalt_afr_male=0;non_cancer_AC_afr=0;non_cancer_AN_afr=5256;non_cancer_AF_afr=0;non_cancer_nhomalt_afr=0;controls_AC_amr_female=0;controls_AN_amr_female=3036;controls_AF_amr_female=0;controls_nhomalt_amr_female=0;non_neuro_AC_fin_male=0;non_neuro_AN_fin_male=1722;non_neuro_AF_fin_male=0;non_neuro_nhomalt_fin_male=0;AC_female=0;AN_female=30174;AF_female=0;nhomalt_female=0;non_neuro_AC_nfe_bgr=0;non_neuro_AN_nfe_bgr=16;non_neuro_AF_nfe_bgr=0;non_neuro_nhomalt_nfe_bgr=0;non_neuro_AC_oth_male=0;non_neuro_AN_oth_male=756;non_neuro_AF_oth_male=0;non_neuro_nhomalt_oth_male=0;non_topmed_AC_nfe_est=0;non_topmed_AN_nfe_est=72;non_topmed_AF_nfe_est=0;non_topmed_nhomalt_nfe_est=0;non_topmed_AC_nfe_nwe=0;non_topmed_AN_nfe_nwe=17390;non_topmed_AF_nfe_nwe=0;non_topmed_nhomalt_nfe_nwe=0;non_topmed_AC_amr_male=0;non_topmed_AN_amr_male=4842;non_topmed_AF_amr_male=0;non_topmed_nhomalt_amr_male=0;non_cancer_AC_amr=0;non_cancer_AN_amr=10570;non_cancer_AF_amr=0;non_cancer_nhomalt_amr=0;non_topmed_AC_nfe_swe=0;non_topmed_AN_nfe_swe=110;non_topmed_AF_nfe_swe=0;non_topmed_nhomalt_nfe_swe=0;non_topmed_AC_nfe_onf=0;non_topmed_AN_nfe_onf=4712;non_topmed_AF_nfe_onf=0;non_topmed_nhomalt_nfe_onf=0;controls_AC_eas_kor=0;controls_AN_eas_kor=924;controls_AF_eas_kor=0;controls_nhomalt_eas_kor=0;non_topmed_AC_eas_oea=0;non_topmed_AN_eas_oea=6024;non_topmed_AF_eas_oea=0;non_topmed_nhomalt_eas_oea=0;controls_AC_eas_male=0;controls_AN_eas_male=1940;controls_AF_eas_male=0;controls_nhomalt_eas_male=0;controls_AC_oth_male=0;controls_AN_oth_male=344;controls_AF_oth_male=0;controls_nhomalt_oth_male=0;non_topmed_AC=0;non_topmed_AN=59334;non_topmed_AF=0;non_topmed_nhomalt=0;controls_AC_fin=0;controls_AN_fin=2736;controls_AF_fin=0;controls_nhomalt_fin=0;AC_eas_kor=0;AN_eas_kor=1854;AF_eas_kor=0;nhomalt_eas_kor=0;non_neuro_AC_nfe=0;non_neuro_AN_nfe=17872;non_neuro_AF_nfe=0;non_neuro_nhomalt_nfe=0;non_neuro_AC_fin_female=0;non_neuro_AN_fin_female=1458;non_neuro_AF_fin_female=0;non_neuro_nhomalt_fin_female=0;non_cancer_AC_nfe_male=0;non_cancer_AN_nfe_male=11692;non_cancer_AF_nfe_male=0;non_cancer_nhomalt_nfe_male=0;controls_AC_eas_oea=0;controls_AN_eas_oea=2964;controls_AF_eas_oea=0;controls_nhomalt_eas_oea=0;non_topmed_AC_nfe_seu=0;non_topmed_AN_nfe_seu=202;non_topmed_AF_nfe_seu=0;non_topmed_nhomalt_nfe_seu=0;controls_AC_eas_female=0;controls_AN_eas_female=1982;controls_AF_eas_female=0;controls_nhomalt_eas_female=0;non_topmed_AC_asj=0;non_topmed_AN_asj=1578;non_topmed_AF_asj=0;non_topmed_nhomalt_asj=0;controls_AC_nfe_onf=0;controls_AN_nfe_onf=1256;controls_AF_nfe_onf=0;controls_nhomalt_nfe_onf=0;non_neuro_AC=0;non_neuro_AN=50088;non_neuro_AF=0;non_neuro_nhomalt=0;AC_eas_oea=0;AN_eas_oea=6026;AF_eas_oea=0;nhomalt_eas_oea=0;non_topmed_AC_nfe=0;non_topmed_AN_nfe=22552;non_topmed_AF_nfe=0;non_topmed_nhomalt_nfe=0;non_cancer_AC_oth=0;non_cancer_AN_oth=1964;non_cancer_AF_oth=0;non_cancer_nhomalt_oth=0;non_topmed_AC_raw=1;non_topmed_AN_raw=187054;non_topmed_AF_raw=5.34605e-06;non_topmed_nhomalt_raw=0;non_neuro_AC_nfe_est=0;non_neuro_AN_nfe_est=64;non_neuro_AF_nfe_est=0;non_neuro_nhomalt_nfe_est=0;non_topmed_AC_oth_male=0;non_topmed_AN_oth_male=1006;non_topmed_AF_oth_male=0;non_topmed_nhomalt_oth_male=0;non_cancer_AC_oth_male=0;non_cancer_AN_oth_male=1014;non_cancer_AF_oth_male=0;non_cancer_nhomalt_oth_male=0;AC_nfe_est=0;AN_nfe_est=76;AF_nfe_est=0;nhomalt_nfe_est=0;non_cancer_AC_afr_female=0;non_cancer_AN_afr_female=2986;non_cancer_AF_afr_female=0;non_cancer_nhomalt_afr_female=0;non_topmed_AC_afr_male=0;non_topmed_AN_afr_male=2234;non_topmed_AF_afr_male=0;non_topmed_nhomalt_afr_male=0;AC_eas_male=0;AN_eas_male=4072;AF_eas_male=0;nhomalt_eas_male=0;controls_AC_eas=0;controls_AN_eas=3922;controls_AF_eas=0;controls_nhomalt_eas=0;non_neuro_AC_eas_male=0;non_neuro_AN_eas_male=4072;non_neuro_AF_eas_male=0;non_neuro_nhomalt_eas_male=0;non_cancer_AC_nfe_nwe=0;non_cancer_AN_nfe_nwe=17730;non_cancer_AF_nfe_nwe=0;non_cancer_nhomalt_nfe_nwe=0;controls_AC_sas=0;controls_AN_sas=2864;controls_AF_sas=0;controls_nhomalt_sas=0;non_neuro_AC_sas_male=0;non_neuro_AN_sas_male=3716;non_neuro_AF_sas_male=0;non_neuro_nhomalt_sas_male=0;non_neuro_AC_asj_male=0;non_neuro_AN_asj_male=386;non_neuro_AF_asj_male=0;non_neuro_nhomalt_asj_male=0;non_cancer_AC_nfe_bgr=0;non_cancer_AN_nfe_bgr=54;non_cancer_AF_nfe_bgr=0;non_cancer_nhomalt_nfe_bgr=0;controls_AC_oth=0;controls_AN_oth=706;controls_AF_oth=0;controls_nhomalt_oth=0;non_cancer_AC_eas_female=0;non_cancer_AN_eas_female=3818;non_cancer_AF_eas_female=0;non_cancer_nhomalt_eas_female=0;AC_nfe=0;AN_nfe=23556;AF_nfe=0;nhomalt_nfe=0;non_topmed_AC_female=0;non_topmed_AN_female=29474;non_topmed_AF_female=0;non_topmed_nhomalt_female=0;non_neuro_AC_asj=0;non_neuro_AN_asj=728;non_neuro_AF_asj=0;non_neuro_nhomalt_asj=0;non_topmed_AC_eas_female=0;non_topmed_AN_eas_female=3842;non_topmed_AF_eas_female=0;non_topmed_nhomalt_eas_female=0;non_neuro_AC_raw=1;non_neuro_AN_raw=158316;non_neuro_AF_raw=6.31648e-06;non_neuro_nhomalt_raw=0;non_topmed_AC_eas=0;non_topmed_AN_eas=7912;non_topmed_AF_eas=0;non_topmed_nhomalt_eas=0;non_topmed_AC_fin_male=0;non_topmed_AN_fin_male=1812;non_topmed_AF_fin_male=0;non_topmed_nhomalt_fin_male=0;non_cancer_AC_asj_male=0;non_cancer_AN_asj_male=694;non_cancer_AF_asj_male=0;non_cancer_nhomalt_asj_male=0;AC_fin=0;AN_fin=3444;AF_fin=0;nhomalt_fin=0;AC_nfe_male=0;AN_nfe_male=11936;AF_nfe_male=0;nhomalt_nfe_male=0;non_topmed_AC_eas_kor=0;non_topmed_AN_eas_kor=1854;non_topmed_AF_eas_kor=0;non_topmed_nhomalt_eas_kor=0;controls_AC_amr_male=0;controls_AN_amr_male=2380;controls_AF_amr_male=0;controls_nhomalt_amr_male=0;non_neuro_AC_eas_oea=0;non_neuro_AN_eas_oea=6022;non_neuro_AF_eas_oea=0;non_neuro_nhomalt_eas_oea=0;AC_sas_female=0;AN_sas_female=2404;AF_sas_female=0;nhomalt_sas_female=0;controls_AC_afr_female=0;controls_AN_afr_female=1330;controls_AF_afr_female=0;controls_nhomalt_afr_female=0;controls_AC_amr=0;controls_AN_amr=5416;controls_AF_amr=0;controls_nhomalt_amr=0;non_topmed_AC_eas_jpn=0;non_topmed_AN_eas_jpn=34;non_topmed_AF_eas_jpn=0;non_topmed_nhomalt_eas_jpn=0;AC_asj_female=0;AN_asj_female=890;AF_asj_female=0;nhomalt_asj_female=0;non_topmed_AC_nfe_bgr=0;non_topmed_AN_nfe_bgr=66;non_topmed_AF_nfe_bgr=0;non_topmed_nhomalt_nfe_bgr=0;non_cancer_AC_nfe_est=0;non_cancer_AN_nfe_est=64;non_cancer_AF_nfe_est=0;non_cancer_nhomalt_nfe_est=0;non_neuro_AC_eas=0;non\");\n\n            writer.Flush();\n\n            stream.Position = 0;\n            return stream;\n        }\n\n        private static Stream GetChr22GenomeStream()\n        {\n            var stream = new MemoryStream();\n            var writer = new StreamWriter(stream);\n\n            writer.WriteLine(\"##gnomAD\");\n            writer.WriteLine(\"#CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\");\n            writer.WriteLine(\"chr22\\t15528101\\trs1340431007\\tT\\tC\\t.\\tAS_VQSR\\tAC=2;AN=147274;AF=1.35801e-05;popmax=afr;faf95_popmax=8.52000e-06;AC-non_v2-XX=1;AN-non_v2-XX=58332;AF-non_v2-XX=1.71432e-05;nhomalt-non_v2-XX=0;AC-non_cancer-fin-XX=0;AN-non_cancer-fin-XX=2470;AF-non_cancer-fin-XX=0.00000;nhomalt-non_cancer-fin-XX=0;AC-non_neuro-nfe=0;AN-non_neuro-nfe=63168;AF-non_neuro-nfe=0.00000;nhomalt-non_neuro-nfe=0;AC-non_neuro-afr-XY=0;AN-non_neuro-afr-XY=12982;AF-non_neuro-afr-XY=0.00000;nhomalt-non_neuro-afr-XY=0;AC-non_neuro-nfe-XY=0;AN-non_neuro-nfe-XY=25918;AF-non_neuro-nfe-XY=0.00000;nhomalt-non_neuro-nfe-XY=0;AC-controls_and_biobanks-eas-XY=0;AN-controls_and_biobanks-eas-XY=1362;AF-controls_and_biobanks-eas-XY=0.00000;nhomalt-controls_and_biobanks-eas-XY=0;AC-non_neuro-sas-XX=0;AN-non_neuro-sas-XX=1126;AF-non_neuro-sas-XX=0.00000;nhomalt-non_neuro-sas-XX=0;AC-non_v2=1;AN-non_v2=110588;AF-non_v2=9.04257e-06;nhomalt-non_v2=0;AC-non_topmed-nfe-XX=0;AN-non_topmed-nfe-XX=9102;AF-non_topmed-nfe-XX=0.00000;nhomalt-non_topmed-nfe-XX=0;AC-non_v2-mid=0;AN-non_v2-mid=308;AF-non_v2-mid=0.00000;nhomalt-non_v2-mid=0;AC-non_topmed-sas=0;AN-non_topmed-sas=4616;AF-non_topmed-sas=0.00000;nhomalt-non_topmed-sas=0;AC-non_cancer-eas-XX=0;AN-non_cancer-eas-XX=2106;AF-non_cancer-eas-XX=0.00000;nhomalt-non_cancer-eas-XX=0;AC-amr-XY=0;AN-amr-XY=8204;AF-amr-XY=0.00000;nhomalt-amr-XY=0;AC-non_v2-nfe-XX=0;AN-non_v2-nfe-XX=31470;AF-non_v2-nfe-XX=0.00000;nhomalt-non_v2-nfe-XX=0;AC-controls_and_biobanks-XY=0;AN-controls_and_biobanks-XY=18696;AF-controls_and_biobanks-XY=0.00000;nhomalt-controls_and_biobanks-XY=0;AC-non_neuro-asj-XY=0;AN-non_neuro-asj-XY=1530;AF-non_neuro-asj-XY=0.00000;nhomalt-non_neuro-asj-XY=0;AC-oth=0;AN-oth=1992;AF-oth=0.00000;nhomalt-oth=0;AC-non_topmed-mid-XY=0;AN-non_topmed-mid-XY=130;AF-non_topmed-mid-XY=0.00000;nhomalt-non_topmed-mid-XY=0;AC-non_cancer-asj-XX=0;AN-non_cancer-asj-XX=1730;AF-non_cancer-asj-XX=0.00000;nhomalt-non_cancer-asj-XX=0;AC-sas-XY=0;AN-sas-XY=3518;AF-sas-XY=0.00000;nhomalt-sas-XY=0;AC-non_neuro-fin=0;AN-non_neuro-fin=6798;AF-non_neuro-fin=0.00000;nhomalt-non_neuro-fin=0;AC-non_topmed-amr-XY=0;AN-non_topmed-amr-XY=7318;AF-non_topmed-amr-XY=0.00000;nhomalt-non_topmed-amr-XY=0;AC-non_neuro-XX=1;AN-non_neuro-XX=68726;AF-non_neuro-XX=1.45505e-05;nhomalt-non_neuro-XX=0;AC-fin-XX=0;AN-fin-XX=2470;AF-fin-XX=0.00000;nhomalt-fin-XX=0;AC-controls_and_biobanks-asj-XX=0;AN-controls_and_biobanks-asj-XX=84;AF-controls_and_biobanks-asj-XX=0.00000;nhomalt-controls_and_biobanks-asj-XX=0;AC-non_v2-raw=1;AN-non_v2-raw=114266;AF-non_v2-raw=8.75151e-06;nhomalt-non_v2-raw=0;AC-non_v2-asj=0;AN-non_v2-asj=3036;AF-non_v2-asj=0.00000;nhomalt-non_v2-asj=0;AC-nfe-XX=0;AN-nfe-XX=38906;AF-nfe-XX=0.00000;nhomalt-nfe-XX=0;AC-controls_and_biobanks-raw=0;AN-controls_and_biobanks-raw=32822;AF-controls_and_biobanks-raw=0.00000;nhomalt-controls_and_biobanks-raw=0;AC-controls_and_biobanks-ami=0;AN-controls_and_biobanks-ami=58;AF-controls_and_biobanks-ami=0.00000;nhomalt-controls_and_biobanks-ami=0;AC-non_topmed-eas=0;AN-non_topmed-eas=3516;AF-non_topmed-eas=0.00000;nhomalt-non_topmed-eas=0;AC-non_v2-amr=0;AN-non_v2-amr=13232;AF-non_v2-amr=0.00000;nhomalt-non_v2-amr=0;AC-non_neuro-sas=0;AN-non_neuro-sas=4642;AF-non_neuro-sas=0.00000;nhomalt-non_neuro-sas=0;AC-non_cancer-fin-XY=0;AN-non_cancer-fin-XY=7820;AF-non_cancer-fin-XY=0.00000;nhomalt-non_cancer-fin-XY=0;AC-non_cancer-nfe-XY=0;AN-non_cancer-nfe-XY=26344;AF-non_cancer-nfe-XY=0.00000;nhomalt-non_cancer-nfe-XY=0;AC-non_v2-oth=0;AN-non_v2-oth=1762;AF-non_v2-oth=0.00000;nhomalt-non_v2-oth=0;AC-ami=0;AN-ami=898;AF-ami=0.00000;nhomalt-ami=0;AC-non_cancer-XY=1;AN-non_cancer-XY=69278;AF-non_cancer-XY=1.44346e-05;nhomalt-non_cancer-XY=0;AC-non_v2-sas=0;AN-non_v2-sas=3718;AF-non_v2-sas=0.00000;nhomalt-non_v2-sas=0;AC-non_topmed-afr-XX=0;AN-non_topmed-afr-XX=10838;AF-non_topmed-afr-XX=0.00000;nhomalt-non_topmed-afr-XX=0;AC-sas=0;AN-sas=4644;AF-sas=0.00000;nhomalt-sas=0;AC-non_neuro-nfe-XX=0;AN-non_neuro-nfe-XX=37250;AF-non_neuro-nfe-XX=0.00000;nhomalt-non_neuro-nfe-XX=0;AC-non_topmed-ami-XX=0;AN-non_topmed-ami-XX=64;AF-non_topmed-ami-XX=0.00000;nhomalt-non_topmed-ami-XX=0;AC-ami-XY=0;AN-ami-XY=438;AF-ami-XY=0.00000;nhomalt-ami-XY=0;AC-oth-XX=0;AN-oth-XX=986;AF-oth-XX=0.00000;nhomalt-oth-XX=0;AC-non_cancer-eas=0;AN-non_cancer-eas=4718;AF-non_cancer-eas=0.00000;nhomalt-non_cancer-eas=0;AC-non_topmed-XY=1;AN-non_topmed-XY=45584;AF-non_topmed-XY=2.19375e-05;nhomalt-non_topmed-XY=0;AC-non_v2-ami=0;AN-non_v2-ami=896;AF-non_v2-ami=0.00000;nhomalt-non_v2-ami=0;AC-non_neuro=1;AN-non_neuro=131044;AF-non_neuro=7.63102e-06;nhomalt-non_neuro=0;AC-amr-XX=0;AN-amr-XX=6532;AF-amr-XX=0.00000;nhomalt-amr-XX=0;AC-controls_and_biobanks-nfe-XY=0;AN-controls_and_biobanks-nfe-XY=3508;AF-controls_and_biobanks-nfe-XY=0.00000;nhomalt-controls_and_biobanks-nfe-XY=0;AC-controls_and_biobanks-eas=0;AN-controls_and_biobanks-eas=2378;AF-controls_and_biobanks-eas=0.00000;nhomalt-controls_and_biobanks-eas=0;AC-XX=1;AN-XX=75568;AF-XX=1.32331e-05;nhomalt-XX=0;AC-non_cancer-oth-XY=0;AN-non_cancer-oth-XY=962;AF-non_cancer-oth-XY=0.00000;nhomalt-non_cancer-oth-XY=0;AC-non_v2-XY=0;AN-non_v2-XY=52256;AF-non_v2-XY=0.00000;nhomalt-non_v2-XY=0;AC-non_topmed-amr-XX=0;AN-non_topmed-amr-XX=5084;AF-non_topmed-amr-XX=0.00000;nhomalt-non_topmed-amr-XX=0;AC-fin=0;AN-fin=10290;AF-fin=0.00000;nhomalt-fin=0;AC-controls_and_biobanks-nfe-XX=0;AN-controls_and_biobanks-nfe-XX=3210;AF-controls_and_biobanks-nfe-XX=0.00000;nhomalt-controls_and_biobanks-nfe-XX=0;AC-controls_and_biobanks-afr=0;AN-controls_and_biobanks-afr=8390;AF-controls_and_biobanks-afr=0.00000;nhomalt-controls_and_biobanks-afr=0;AC-asj-XX=0;AN-asj-XX=1850;AF-asj-XX=0.00000;nhomalt-asj-XX=0;AC-non_topmed-mid=0;AN-non_topmed-mid=270;AF-non_topmed-mid=0.00000;nhomalt-non_topmed-mid=0;AC-non_cancer-sas-XY=0;AN-non_cancer-sas-XY=3504;AF-non_cancer-sas-XY=0.00000;nhomalt-non_cancer-sas-XY=0;AC-sas-XX=0;AN-sas-XX=1126;AF-sas-XX=0.00000;nhomalt-sas-XX=0;AC-non_topmed=1;AN-non_topmed=76604;AF-non_topmed=1.30541e-05;nhomalt-non_topmed=0;AC-non_v2-oth-XX=0;AN-non_v2-oth-XX=886;AF-non_v2-oth-XX=0.00000;nhomalt-non_v2-oth-XX=0;AC-non_neuro-ami-XY=0;AN-non_neuro-ami-XY=428;AF-non_neuro-ami-XY=0.00000;nhomalt-non_neuro-ami-XY=0;AC-controls_and_biobanks-afr-XY=0;AN-controls_and_biobanks-afr-XY=4022;AF-controls_and_biobanks-afr-XY=0.00000;nhomalt-controls_and_biobanks-afr-XY=0;AC-controls_and_biobanks-amr-XX=0;AN-controls_and_biobanks-amr-XX=2404;AF-controls_and_biobanks-amr-XX=0.00000;nhomalt-controls_and_biobanks-amr-XX=0;AC-non_topmed-amr=0;AN-non_topmed-amr=12402;AF-non_topmed-amr=0.00000;nhomalt-non_topmed-amr=0;AC-controls_and_biobanks-sas-XX=0;AN-controls_and_biobanks-sas-XX=828;AF-controls_and_biobanks-sas-XX=0.00000;nhomalt-controls_and_biobanks-sas-XX=0;AC-controls_and_biobanks-amr=0;AN-controls_and_biobanks-amr=4556;AF-controls_and_biobanks-amr=0.00000;nhomalt-controls_and_biobanks-amr=0;AC-non_neuro-fin-XX=0;AN-non_neuro-fin-XX=644;AF-non_neuro-fin-XX=0.00000;nhomalt-non_neuro-fin-XX=0;AC-non_cancer-raw=2;AN-non_cancer-raw=147644;AF-non_cancer-raw=1.35461e-05;nhomalt-non_cancer-raw=0;AC-non_neuro-mid=0;AN-non_neuro-mid=308;AF-non_neuro-mid=0.00000;nhomalt-non_neuro-mid=0;AC-non_v2-asj-XY=0;AN-non_v2-asj-XY=1364;AF-non_v2-asj-XY=0.00000;nhomalt-non_v2-asj-XY=0;AC-non_v2-afr=1;AN-non_v2-afr=26636;AF-non_v2-afr=3.75432e-05;nhomalt-non_v2-afr=0;AC-non_neuro-fin-XY=0;AN-non_neuro-fin-XY=6154;AF-non_neuro-fin-XY=0.00000;nhomalt-non_neuro-fin-XY=0;AC-non_cancer-afr=2;AN-non_cancer-afr=38572;AF-non_cancer-afr=5.18511e-05;nhomalt-non_cancer-afr=0;AC-non_topmed-sas-XY=0;AN-non_topmed-sas-XY=3496;AF-non_topmed-sas-XY=0.00000;nhomalt-non_topmed-sas-XY=0;AC-mid-XY=0;AN-mid-XY=152;AF-mid-XY=0.00000;nhomalt-mid-XY=0;AC-non_v2-oth-XY=0;AN-non_v2-oth-XY=876;AF-non_v2-oth-XY=0.00000;nhomalt-non_v2-oth-XY=0;AC-controls_and_biobanks-fin=0;AN-controls_and_biobanks-fin=5356;AF-controls_and_biobanks-fin=0.00000;nhomalt-controls_and_biobanks-fin=0;AC-non_neuro-eas-XY=0;AN-non_neuro-eas-XY=2740;AF-non_neuro-eas-XY=0.00000;nhomalt-non_neuro-eas-XY=0;AC-non_topmed-eas-XX=0;AN-non_topmed-eas-XX=1344;AF-non_topmed-eas-XX=0.00000;nhomalt-non_topmed-eas-XX=0;AC-non_v2-afr-XX=1;AN-non_v2-afr-XX=14752;AF-non_v2-afr-XX=6.77874e-05;nhomalt-non_v2-afr-XX=0;AC-non_neuro-amr-XX=0;AN-non_neuro-amr-XX=6348;AF-non_neuro-amr-XX=0.00000;nhomalt-non_neuro-amr-XX=0;AC-non_cancer-ami=0;AN-non_cancer-ami=898;AF-non_cancer-ami=0.00000;nhomalt-non_cancer-ami=0;AC-XY=1;AN-XY=71706;AF-XY=1.39458e-05;nhomalt-XY=0;AC-non_topmed-asj-XX=0;AN-non_topmed-asj-XX=272;AF-non_topmed-asj-XX=0.00000;nhomalt-non_topmed-asj-XX=0;AC-non_topmed-eas-XY=0;AN-non_topmed-eas-XY=2172;AF-non_topmed-eas-XY=0.00000;nhomalt-non_topmed-eas-XY=0;AC-non_v2-eas-XY=0;AN-non_v2-eas-XY=1362;AF-non_v2-eas-XY=0.00000;nhomalt-non_v2-eas-XY=0;AC-eas=0;AN-eas=4944;AF-eas=0.00000;nhomalt-eas=0;AC-asj-XY=0;AN-asj-XY=1576;AF-asj-XY=0.00000;nhomalt-asj-XY=0;AC-non_v2-eas-XX=0;AN-non_v2-eas-XX=1258;AF-non_v2-eas-XX=0.00000;nhomalt-non_v2-eas-XX=0;AC-controls_and_biobanks-mid-XY=0;AN-controls_and_biobanks-mid-XY=112;AF-controls_and_biobanks-mid-XY=0.00000;nhomalt-controls_and_biobanks-mid-XY=0;AC-fin-XY=0;AN-fin-XY=7820;AF-fin-XY=0.00000;nhomalt-fin-XY=0;AC-non_topmed-nfe=0;AN-non_topmed-nfe=20552;AF-non_topmed-nfe=0.00000;nhomalt-non_topmed-nfe=0;AC-amr=0;AN-amr=14736;AF-amr=0.00000;nhomalt-amr=0;AC-non_neuro-ami=0;AN-non_neuro-ami=854;AF-non_neuro-ami=0.00000;nhomalt-non_neuro-ami=0;AC-non_cancer-nfe-XX=0;AN-non_cancer-nfe-XX=37672;AF-non_cancer-nfe-XX=0.00000;nhomalt-non_cancer-nfe-XX=0;AC-non_cancer-mid=0;AN-non_cancer-mid=304;AF-non_cancer-mid=0.00000;nhomalt-non_cancer-mid=0;AC-non_v2-mid-XY=0;AN-non_v2-mid-XY=146;AF-non_v2-mid-XY=0.00000;nhomalt-non_v2-mid-XY=0;AC-controls_and_biobanks-amr-XY=0;AN-controls_and_biobanks-amr-XY=2152;AF-controls_and_biobanks-amr-XY=0.00000;nhomalt-controls_and_biobanks-amr-XY=0;AC-non_cancer-ami-XY=0;AN-non_cancer-ami-XY=438;AF-non_cancer-ami-XY=0.00000;nhomalt-non_cancer-ami-XY=0;AC-non_neuro-asj-XX=0;AN-non_neuro-asj-XX=1814;AF-non_neuro-asj-XX=0.00000;nhomalt-non_neuro-asj-XX=0;AC-afr=2;AN-afr=38874;AF-afr=5.14483e-05;nhomalt-afr=0;AC-non_v2-sas-XX=0;AN-non_v2-sas-XX=758;AF-non_v2-sas-XX=0.00000;nhomalt-non_v2-sas-XX=0;AC-non_neuro-afr-XX=1;AN-non_neuro-afr-XX=17802;AF-non_neuro-afr-XX=5.61735e-05;nhomalt-non_neuro-afr-XX=0;AC-non_cancer-sas=0;AN-non_cancer-sas=4612;AF-non_cancer-sas=0.00000;nhomalt-non_cancer-sas=0;AC-non_topmed-fin=0;AN-non_topmed-fin=10198;AF-non_topmed-fin=0.00000;nhomalt-non_topmed-fin=0;AC-non_cancer-asj-XY=0;AN-non_cancer-asj-XY=1532;AF-non_cancer-asj-XY=0.00000;nhomalt-non_cancer-asj-XY=0;AC-non_cancer-mid-XY=0;AN-non_cancer-mid-XY=144;AF-non_cancer-mid-XY=0.00000;nhomalt-non_cancer-mid-XY=0;AC-raw=2;AN-raw=151834;AF-raw=1.31723e-05;nhomalt-raw=0;AC-non_topmed-XX=0;AN-non_topmed-XX=31020;AF-non_topmed-XX=0.00000;nhomalt-non_topmed-XX=0;AC-ami-XX=0;AN-ami-XX=460;AF-ami-XX=0.00000;nhomalt-ami-XX=0;AC-eas-XY=0;AN-eas-XY=2740;AF-eas-XY=0.00000;nhomalt-eas-XY=0;AC-controls_and_biobanks-mid=0;AN-controls_and_biobanks-mid=246;AF-controls_and_biobanks-mid=0.00000;nhomalt-controls_and_biobanks-mid=0;AC-non_v2-nfe-XY=0;AN-non_v2-nfe-XY=19880;AF-non_v2-nfe-XY=0.00000;nhomalt-non_v2-nfe-XY=0;AC-controls_and_biobanks-sas=0;AN-controls_and_biobanks-sas=3022;AF-controls_and_biobanks-sas=0.00000;nhomalt-controls_and_biobanks-sas=0;AC-non_v2-eas=0;AN-non_v2-eas=2620;AF-non_v2-eas=0.00000;nhomalt-non_v2-eas=0;AC-mid=0;AN-mid=314;AF-mid=0.00000;nhomalt-mid=0;AC-oth-XY=0;AN-oth-XY=1006;AF-oth-XY=0.00000;nhomalt-oth-XY=0;AC-non_cancer-nfe=0;AN-non_cancer-nfe=64016;AF-non_cancer-nfe=0.00000;nhomalt-non_cancer-nfe=0;AC-non_neuro-eas-XX=0;AN-non_neuro-eas-XX=2204;AF-non_neuro-eas-XX=0.00000;nhomalt-non_neuro-eas-XX=0;AC-non_neuro-sas-XY=0;AN-non_neuro-sas-XY=3516;AF-non_neuro-sas-XY=0.00000;nhomalt-non_neuro-sas-XY=0;AC-non_cancer-ami-XX=0;AN-non_cancer-ami-XX=460;AF-non_cancer-ami-XX=0.00000;nhomalt-non_cancer-ami-XX=0;AC-mid-XX=0;AN-mid-XX=162;AF-mid-XX=0.00000;nhomalt-mid-XX=0;AC-non_topmed-asj=0;AN-non_topmed-asj=978;AF-non_topmed-asj=0.00000;nhomalt-non_topmed-asj=0;AC-non_v2-asj-XX=0;AN-non_v2-asj-XX=1672;AF-non_v2-asj-XX=0.00000;nhomalt-non_v2-asj-XX=0;nhomalt=0;AC-non_v2-amr-XY=0;AN-non_v2-amr-XY=7436;AF-non_v2-amr-XY=0.00000;nhomalt-non_v2-amr-XY=0;AC-non_cancer-amr-XX=0;AN-non_cancer-amr-XX=6482;AF-non_cancer-amr-XX=0.00000;nhomalt-non_cancer-amr-XX=0;AC-controls_and_biobanks-afr-XX=0;AN-controls_and_biobanks-afr-XX=4368;AF-controls_and_biobanks-afr-XX=0.00000;nhomalt-controls_and_biobanks-afr-XX=0;AC-asj=0;AN-asj=3426;AF-asj=0.00000;nhomalt-asj=0;AC-non_topmed-asj-XY=0;AN-non_topmed-asj-XY=706;AF-non_topmed-asj-XY=0.00000;nhomalt-non_topmed-asj-XY=0;AC-non_v2-fin-XX=0;AN-non_v2-fin-XX=1118;AF-non_v2-fin-XX=0.00000;nhomalt-non_v2-fin-XX=0;AC-non_topmed-ami=0;AN-non_topmed-ami=104;AF-non_topmed-ami=0.00000;nhomalt-non_topmed-ami=0;AC-controls_and_biobanks-eas-XX=0;AN-controls_and_biobanks-eas-XX=1016;AF-controls_and_biobanks-eas-XX=0.00000;nhomalt-controls_and_biobanks-eas-XX=0;AC-controls_and_biobanks-fin-XX=0;AN-controls_and_biobanks-fin-XX=486;AF-controls_and_biobanks-fin-XX=0.00000;nhomalt-controls_and_biobanks-fin-XX=0;AC-non_topmed-raw=1;AN-non_topmed-raw=80440;AF-non_topmed-raw=1.24316e-05;nhomalt-non_topmed-raw=0;AC-non_cancer-eas-XY=0;AN-non_cancer-eas-XY=2612;AF-non_cancer-eas-XY=0.00000;nhomalt-non_cancer-eas-XY=0;AC-non_cancer=2;AN-non_cancer=143152;AF-non_cancer=1.39712e-05;nhomalt-non_cancer=0;AC-controls_and_biobanks-ami-XY=0;AN-controls_and_biobanks-ami-XY=28;AF-controls_and_biobanks-ami-XY=0.00000;nhomalt-controls_and_biobanks-ami-XY=0;AC-controls_and_biobanks-mid-XX=0;AN-controls_and_biobanks-mid-XX=134;AF-controls_and_biobanks-mid-XX=0.00000;nhomalt-controls_and_biobanks-mid-XX=0;AC-non_v2-afr-XY=0;AN-non_v2-afr-XY=11884;AF-non_v2-afr-XY=0.00000;nhomalt-non_v2-afr-XY=0;AC-non_v2-sas-XY=0;AN-non_v2-sas-XY=2960;AF-non_v2-sas-XY=0.00000;nhomalt-non_v2-sas-XY=0;AC-non_v2-fin=0;AN-non_v2-fin=7030;AF-non_v2-fin=0.00000;nhomalt-non_v2-fin=0;AC-non_neuro-oth=0;AN-non_neuro-oth=1902;AF-non_neuro-oth=0.00000;nhomalt-non_neuro-oth=0;AC-non_cancer-sas-XX=0;AN-non_cancer-sas-XX=1108;AF-non_cancer-sas-XX=0.00000;nhomalt-non_cancer-sas-XX=0;AC-non_neuro-asj=0;AN-non_neuro-asj=3344;AF-non_neuro-asj=0.00000;nhomalt-non_neuro-asj=0;AC-non_topmed-afr=1;AN-non_topmed-afr=22548;AF-non_topmed-afr=4.43498e-05;nhomalt-non_topmed-afr=0;AC-non_topmed-afr-XY=1;AN-non_topmed-afr-XY=11710;AF-non_topmed-afr-XY=8.53971e-05;nhomalt-non_topmed-afr-XY=0;AC-non_neuro-eas=0;AN-non_neuro-eas=4944;AF-non_neuro-eas=0.00000;nhomalt-non_neuro-eas=0;AC-afr-XX=1;AN-afr-XX=20872;AF-afr-XX=4.79111e-05;nhomalt-afr-XX=0;AC-non_neuro-mid-XY=0;AN-non_neuro-mid-XY=146;AF-non_neuro-mid-XY=0.00000;nhomalt-non_neuro-mid-XY=0;AC-non_topmed-fin-XX=0;AN-non_topmed-fin-XX=2412;AF-non_topmed-fin-XX=0.00000;nhomalt-non_topmed-fin-XX=0;AC-non_cancer-amr=0;AN-non_cancer-amr=14556;AF-non_cancer-amr=0.00000;nhomalt-non_cancer-amr=0;AC-non_v2-ami-XX=0;AN-non_v2-ami-XX=460;AF-non_v2-ami-XX=0.00000;nhomalt-non_v2-ami-XX=0;AC-afr-XY=1;AN-afr-XY=18002;AF-afr-XY=5.55494e-05;nhomalt-afr-XY=0;AC-non_v2-mid-XX=0;AN-non_v2-mid-XX=162;AF-non_v2-mid-XX=0.00000;nhomalt-non_v2-mid-XX=0;AC-non_topmed-fin-XY=0;AN-non_topmed-fin-XY=7786;AF-non_topmed-fin-XY=0.00000;nhomalt-non_topmed-fin-XY=0;AC-non_neuro-amr-XY=0;AN-non_neuro-amr-XY=7952;AF-non_neuro-amr-XY=0.00000;nhomalt-non_neuro-amr-XY=0;AC-non_topmed-mid-XX=0;AN-non_topmed-mid-XX=140;AF-non_topmed-mid-XX=0.00000;nhomalt-non_topmed-mid-XX=0;AC-controls_and_biobanks-asj-XY=0;AN-controls_and_biobanks-asj-XY=50;AF-controls_and_biobanks-asj-XY=0.00000;nhomalt-controls_and_biobanks-asj-XY=0;AC-non_v2-fin-XY=0;AN-non_v2-fin-XY=5912;AF-non_v2-fin-XY=0.00000;nhomalt-non_v2-fin-XY=0;AC-controls_and_biobanks-ami-XX=0;AN-controls_and_biobanks-ami-XX=30;AF-controls_and_biobanks-ami-XX=0.00000;nhomalt-controls_and_biobanks-ami-XX=0;AC-eas-XX=0;AN-eas-XX=2204;AF-eas-XX=0.00000;nhomalt-eas-XX=0;AC-non_cancer-amr-XY=0;AN-non_cancer-amr-XY=8074;AF-non_cancer-amr-XY=0.00000;nhomalt-non_cancer-amr-XY=0;AC-non_neuro-ami-XX=0;AN-non_neuro-ami-XX=426;AF-non_neuro-ami-XX=0.00000;nhomalt-non_neuro-ami-XX=0;AC-controls_and_biobanks=0;AN-controls_and_biobanks=31606;AF-controls_and_biobanks=0.00000;nhomalt-controls_and_biobanks=0;AC-controls_and_biobanks-oth=0;AN-controls_and_biobanks-oth=748;AF-controls_and_biobanks-oth=0.00000;nhomalt-controls_and_biobanks-oth=0;AC-nfe-XY=0;AN-nfe-XY=28250;AF-nfe-XY=0.00000;nhomalt-nfe-XY=0;AC-non_cancer-afr-XX=1;AN-non_cancer-afr-XX=20724;AF-non_cancer-afr-XX=4.82532e-05;nhomalt-non_cancer-afr-XX=0;AC-controls_and_biobanks-sas-XY=0;AN-controls_and_biobanks-sas-XY=2194;AF-controls_and_biobanks-sas-XY=0.00000;nhomalt-controls_and_biobanks-sas-XY=0;AC-non_cancer-oth=0;AN-non_cancer-oth=1924;AF-non_cancer-oth=0.00000;nhomalt-non_cancer-oth=0;AC-non_topmed-oth=0;AN-non_topmed-oth=1420;AF-non_topmed-oth=0.00000;nhomalt-non_topmed-oth=0;AC-non_topmed-nfe-XY=0;AN-non_topmed-nfe-XY=11450;AF-non_topmed-nfe-XY=0.00000;nhomalt-non_topmed-nfe-XY=0;AC-non_topmed-sas-XX=0;AN-non_topmed-sas-XX=1120;AF-non_topmed-sas-XX=0.00000;nhomalt-non_topmed-sas-XX=0;AC-non_v2-nfe=0;AN-non_v2-nfe=51350;AF-non_v2-nfe=0.00000;nhomalt-non_v2-nfe=0;AC-non_topmed-oth-XX=0;AN-non_topmed-oth-XX=644;AF-non_topmed-oth-XX=0.00000;nhomalt-non_topmed-oth-XX=0;AC-non_cancer-mid-XX=0;AN-non_cancer-mid-XX=160;AF-non_cancer-mid-XX=0.00000;nhomalt-non_cancer-mid-XX=0;AC-controls_and_biobanks-nfe=0;AN-controls_and_biobanks-nfe=6718;AF-controls_and_biobanks-nfe=0.00000;nhomalt-controls_and_biobanks-nfe=0;AC-controls_and_biobanks-oth-XY=0;AN-controls_and_biobanks-oth-XY=398;AF-controls_and_biobanks-oth-XY=0.00000;nhomalt-controls_and_biobanks-oth-XY=0;AC-controls_and_biobanks-fin-XY=0;AN-controls_and_biobanks-fin-XY=4870;AF-controls_and_biobanks-fin-XY=0.00000;nhomalt-controls_and_biobanks-fin-XY=0;AC-non_v2-amr-XX=0;AN-non_v2-amr-XX=5796;AF-non_v2-amr-XX=0.00000;nhomalt-non_v2-amr-XX=0;AC-non_cancer-asj=0;AN-non_cancer-asj=3262;AF-non_cancer-asj=0.00000;nhomalt-non_cancer-asj=0;AC-non_cancer-oth-XX=0;AN-non_cancer-oth-XX=962;AF-non_cancer-oth-XX=0.00000;nhomalt-non_cancer-oth-XX=0;AC-non_neuro-amr=0;AN-non_neuro-amr=14300;AF-non_neuro-amr=0.00000;nhomalt-non_neuro-amr=0;AC-non_cancer-XX=1;AN-non_cancer-XX=73874;AF-non_cancer-XX=1.35366e-05;nhomalt-non_cancer-XX=0;AC-non_v2-ami-XY=0;AN-non_v2-ami-XY=436;AF-non_v2-ami-XY=0.00000;nhomalt-non_v2-ami-XY=0;AC-non_neuro-raw=1;AN-non_neuro-raw=134494;AF-non_neuro-raw=7.43528e-06;nhomalt-non_neuro-raw=0;AC-non_neuro-afr=1;AN-non_neuro-afr=30784;AF-non_neuro-afr=3.24844e-05;nhomalt-non_neuro-afr=0;AC-non_topmed-ami-XY=0;AN-non_topmed-ami-XY=40;AF-non_topmed-ami-XY=0.00000;nhomalt-non_topmed-ami-XY=0;AC-non_neuro-oth-XY=0;AN-non_neuro-oth-XY=952;AF-non_neuro-oth-XY=0.00000;nhomalt-non_neuro-oth-XY=0;AC-non_neuro-oth-XX=0;AN-non_neuro-oth-XX=950;AF-non_neuro-oth-XX=0.00000;nhomalt-non_neuro-oth-XX=0;AC-controls_and_biobanks-XX=0;AN-controls_and_biobanks-XX=12910;AF-controls_and_biobanks-XX=0.00000;nhomalt-controls_and_biobanks-XX=0;AC-non_cancer-afr-XY=1;AN-non_cancer-afr-XY=17848;AF-non_cancer-afr-XY=5.60287e-05;nhomalt-non_cancer-afr-XY=0;AC-non_cancer-fin=0;AN-non_cancer-fin=10290;AF-non_cancer-fin=0.00000;nhomalt-non_cancer-fin=0;AC-controls_and_biobanks-asj=0;AN-controls_and_biobanks-asj=134;AF-controls_and_biobanks-asj=0.00000;nhomalt-controls_and_biobanks-asj=0;AC-non_topmed-oth-XY=0;AN-non_topmed-oth-XY=776;AF-non_topmed-oth-XY=0.00000;nhomalt-non_topmed-oth-XY=0;AC-non_neuro-mid-XX=0;AN-non_neuro-mid-XX=162;AF-non_neuro-mid-XX=0.00000;nhomalt-non_neuro-mid-XX=0;AC-controls_and_biobanks-oth-XX=0;AN-controls_and_biobanks-oth-XX=350;AF-controls_and_biobanks-oth-XX=0.00000;nhomalt-controls_and_biobanks-oth-XX=0;AC-non_neuro-XY=0;AN-non_neuro-XY=62318;AF-non_neuro-XY=0.00000;nhomalt-non_neuro-XY=0;AC-nfe=0;AN-nfe=67156;AF-nfe=0.00000;nhomalt-nfe=0;AC_popmax=2;AN_popmax=38874;AF_popmax=5.14483e-05;nhomalt_popmax=0;faf95-sas=0.00000;faf99-sas=0.00000;faf95-eas=0.00000;faf99-eas=0.00000;faf95-amr=0.00000;faf99-amr=0.00000;faf95-afr=8.52000e-06;faf99-afr=3.19000e-06;faf95=2.26000e-06;faf99=8.50000e-07;faf95-nfe=0.00000;faf99-nfe=0.00000;age_hist_het_bin_freq=0|0|0|0|0|0|1|0|0|0;age_hist_het_n_smaller=1;age_hist_het_n_larger=0;age_hist_hom_bin_freq=0|0|0|0|0|0|0|0|0|0;age_hist_hom_n_smaller=0;age_hist_hom_n_larger=0;FS=.;MQ=30.0616;MQRankSum=0.369000;QD=8.54054;ReadPosRankSum=0.791000;VarDP=37;QUALapprox=316;AS_FS=.;AS_MQ=30.0616;AS_MQRankSum=0.369000;AS_pab_max=0.823803;AS_QD=8.54054;AS_ReadPosRankSum=0.791000;AS_SOR=0.329753;InbreedingCoeff=-1.32322e-05;AS_VQSLOD=-3.36890;AS_culprit=AS_MQ;allele_type=snv;n_alt_alleles=1;variant_type=snv;segdup;gq_hist_alt_bin_freq=0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|2;gq_hist_all_bin_freq=0|0|0|0|40527|14808|9280|4429|2130|1202|599|208|149|95|57|53|35|17|12|36;dp_hist_alt_bin_freq=0|0|0|1|1|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0;dp_hist_alt_n_larger=0;dp_hist_all_bin_freq=0|0|7908|30432|19007|11405|4364|412|62|15|14|4|6|4|2|1|0|0|1|0;dp_hist_all_n_smaller=0;dp_hist_all_n_larger=0;ab_hist_alt_bin_freq=0|0|0|0|0|1|0|0|1|0|0|0|0|0|0|0|0|0|0|0;cadd_raw_score=0.738478;cadd_phred=8.80700;vep=C|upstream_gene_variant|MODIFIER|OR11H1|ENSG00000130538|Transcript|ENST00000252835|protein_coding||||||||||1|58|1|SNV||HGNC|HGNC:15404|YES||P2|CCDS74807.1|ENSP00000252835||||||||||||||||,C|upstream_gene_variant|MODIFIER|OR11H1|ENSG00000130538|Transcript|ENST00000643195|protein_coding||||||||||1|91|1|SNV||HGNC|HGNC:15404|||A2||ENSP00000495403||||||||||||||||,C|upstream_gene_variant|MODIFIER|OR11H1|81061|Transcript|NM_001005239.1|protein_coding||||||||||1|58|1|SNV||EntrezGene|HGNC:15404|YES||||NP_001005239.1||||||||||||||||\");\n            writer.WriteLine(\"chr22\\t15528106\\trs1285219414\\tT\\tC\\t.\\tAS_VQSR\\tAC=2;AN=148296;AF=1.34865e-05;popmax=sas;faf95_popmax=0.00000;AC-non_v2-XX=1;AN-non_v2-XX=58720;AF-non_v2-XX=1.70300e-05;nhomalt-non_v2-XX=0;AC-non_cancer-fin-XX=0;AN-non_cancer-fin-XX=2506;AF-non_cancer-fin-XX=0.00000;nhomalt-non_cancer-fin-XX=0;AC-non_neuro-nfe=0;AN-non_neuro-nfe=63364;AF-non_neuro-nfe=0.00000;nhomalt-non_neuro-nfe=0;AC-non_neuro-afr-XY=0;AN-non_neuro-afr-XY=13138;AF-non_neuro-afr-XY=0.00000;nhomalt-non_neuro-afr-XY=0;AC-non_neuro-nfe-XY=0;AN-non_neuro-nfe-XY=26012;AF-non_neuro-nfe-XY=0.00000;nhomalt-non_neuro-nfe-XY=0;AC-controls_and_biobanks-eas-XY=0;AN-controls_and_biobanks-eas-XY=1376;AF-controls_and_biobanks-eas-XY=0.00000;nhomalt-controls_and_biobanks-eas-XY=0;AC-non_neuro-sas-XX=1;AN-non_neuro-sas-XX=1132;AF-non_neuro-sas-XX=0.000883392;nhomalt-non_neuro-sas-XX=0;AC-non_v2=1;AN-non_v2=111410;AF-non_v2=8.97585e-06;nhomalt-non_v2=0;AC-non_topmed-nfe-XX=0;AN-non_topmed-nfe-XX=9164;AF-non_topmed-nfe-XX=0.00000;nhomalt-non_topmed-nfe-XX=0;AC-non_v2-mid=0;AN-non_v2-mid=308;AF-non_v2-mid=0.00000;nhomalt-non_v2-mid=0;AC-non_topmed-sas=1;AN-non_topmed-sas=4650;AF-non_topmed-sas=0.000215054;nhomalt-non_topmed-sas=0;AC-non_cancer-eas-XX=0;AN-non_cancer-eas-XX=2120;AF-non_cancer-eas-XX=0.00000;nhomalt-non_cancer-eas-XX=0;AC-amr-XY=0;AN-amr-XY=8258;AF-amr-XY=0.00000;nhomalt-amr-XY=0;AC-non_v2-nfe-XX=0;AN-non_v2-nfe-XX=31546;AF-non_v2-nfe-XX=0.00000;nhomalt-non_v2-nfe-XX=0;AC-controls_and_biobanks-XY=0;AN-controls_and_biobanks-XY=18868;AF-controls_and_biobanks-XY=0.00000;nhomalt-controls_and_biobanks-XY=0;AC-non_neuro-asj-XY=0;AN-non_neuro-asj-XY=1536;AF-non_neuro-asj-XY=0.00000;nhomalt-non_neuro-asj-XY=0;AC-oth=0;AN-oth=2008;AF-oth=0.00000;nhomalt-oth=0;AC-non_topmed-mid-XY=0;AN-non_topmed-mid-XY=130;AF-non_topmed-mid-XY=0.00000;nhomalt-non_topmed-mid-XY=0;AC-non_cancer-asj-XX=0;AN-non_cancer-asj-XX=1730;AF-non_cancer-asj-XX=0.00000;nhomalt-non_cancer-asj-XX=0;AC-sas-XY=0;AN-sas-XY=3546;AF-sas-XY=0.00000;nhomalt-sas-XY=0;AC-non_neuro-fin=0;AN-non_neuro-fin=6858;AF-non_neuro-fin=0.00000;nhomalt-non_neuro-fin=0;AC-non_topmed-amr-XY=0;AN-non_topmed-amr-XY=7364;AF-non_topmed-amr-XY=0.00000;nhomalt-non_topmed-amr-XY=0;AC-non_neuro-XX=1;AN-non_neuro-XX=69114;AF-non_neuro-XX=1.44688e-05;nhomalt-non_neuro-XX=0;AC-fin-XX=0;AN-fin-XX=2506;AF-fin-XX=0.00000;nhomalt-fin-XX=0;AC-controls_and_biobanks-asj-XX=0;AN-controls_and_biobanks-asj-XX=84;AF-controls_and_biobanks-asj-XX=0.00000;nhomalt-controls_and_biobanks-asj-XX=0;AC-non_v2-raw=1;AN-non_v2-raw=114440;AF-non_v2-raw=8.73820e-06;nhomalt-non_v2-raw=0;AC-non_v2-asj=0;AN-non_v2-asj=3044;AF-non_v2-asj=0.00000;nhomalt-non_v2-asj=0;AC-nfe-XX=0;AN-nfe-XX=39022;AF-nfe-XX=0.00000;nhomalt-nfe-XX=0;AC-controls_and_biobanks-raw=0;AN-controls_and_biobanks-raw=32864;AF-controls_and_biobanks-raw=0.00000;nhomalt-controls_and_biobanks-raw=0;AC-controls_and_biobanks-ami=0;AN-controls_and_biobanks-ami=58;AF-controls_and_biobanks-ami=0.00000;nhomalt-controls_and_biobanks-ami=0;AC-non_topmed-eas=0;AN-non_topmed-eas=3540;AF-non_topmed-eas=0.00000;nhomalt-non_topmed-eas=0;AC-non_v2-amr=0;AN-non_v2-amr=13332;AF-non_v2-amr=0.00000;nhomalt-non_v2-amr=0;AC-non_neuro-sas=1;AN-non_neuro-sas=4676;AF-non_neuro-sas=0.000213858;nhomalt-non_neuro-sas=0;AC-non_cancer-fin-XY=0;AN-non_cancer-fin-XY=7900;AF-non_cancer-fin-XY=0.00000;nhomalt-non_cancer-fin-XY=0;AC-non_cancer-nfe-XY=0;AN-non_cancer-nfe-XY=26438;AF-non_cancer-nfe-XY=0.00000;nhomalt-non_cancer-nfe-XY=0;AC-non_v2-oth=0;AN-non_v2-oth=1780;AF-non_v2-oth=0.00000;nhomalt-non_v2-oth=0;AC-ami=0;AN-ami=902;AF-ami=0.00000;nhomalt-ami=0;AC-non_cancer-XY=0;AN-non_cancer-XY=69802;AF-non_cancer-XY=0.00000;nhomalt-non_cancer-XY=0;AC-non_v2-sas=0;AN-non_v2-sas=3746;AF-non_v2-sas=0.00000;nhomalt-non_v2-sas=0;AC-non_topmed-afr-XX=1;AN-non_topmed-afr-XX=11042;AF-non_topmed-afr-XX=9.05633e-05;nhomalt-non_topmed-afr-XX=0;AC-sas=1;AN-sas=4678;AF-sas=0.000213767;nhomalt-sas=0;AC-non_neuro-nfe-XX=0;AN-non_neuro-nfe-XX=37352;AF-non_neuro-nfe-XX=0.00000;nhomalt-non_neuro-nfe-XX=0;AC-non_topmed-ami-XX=0;AN-non_topmed-ami-XX=64;AF-non_topmed-ami-XX=0.00000;nhomalt-non_topmed-ami-XX=0;AC-ami-XY=0;AN-ami-XY=436;AF-ami-XY=0.00000;nhomalt-ami-XY=0;AC-oth-XX=0;AN-oth-XX=996;AF-oth-XX=0.00000;nhomalt-oth-XX=0;AC-non_cancer-eas=0;AN-non_cancer-eas=4744;AF-non_cancer-eas=0.00000;nhomalt-non_cancer-eas=0;AC-non_topmed-XY=0;AN-non_topmed-XY=46034;AF-non_topmed-XY=0.00000;nhomalt-non_topmed-XY=0;AC-non_v2-ami=0;AN-non_v2-ami=900;AF-non_v2-ami=0.00000;nhomalt-non_v2-ami=0;AC-non_neuro=1;AN-non_neuro=131840;AF-non_neuro=7.58495e-06;nhomalt-non_neuro=0;AC-amr-XX=0;AN-amr-XX=6584;AF-amr-XX=0.00000;nhomalt-amr-XX=0;AC-controls_and_biobanks-nfe-XY=0;AN-controls_and_biobanks-nfe-XY=3528;AF-controls_and_biobanks-nfe-XY=0.00000;nhomalt-controls_and_biobanks-nfe-XY=0;AC-controls_and_biobanks-eas=0;AN-controls_and_biobanks-eas=2396;AF-controls_and_biobanks-eas=0.00000;nhomalt-controls_and_biobanks-eas=0;AC-XX=2;AN-XX=76046;AF-XX=2.62999e-05;nhomalt-XX=0;AC-non_cancer-oth-XY=0;AN-non_cancer-oth-XY=968;AF-non_cancer-oth-XY=0.00000;nhomalt-non_cancer-oth-XY=0;AC-non_v2-XY=0;AN-non_v2-XY=52690;AF-non_v2-XY=0.00000;nhomalt-non_v2-XY=0;AC-non_topmed-amr-XX=0;AN-non_topmed-amr-XX=5130;AF-non_topmed-amr-XX=0.00000;nhomalt-non_topmed-amr-XX=0;AC-fin=0;AN-fin=10406;AF-fin=0.00000;nhomalt-fin=0;AC-controls_and_biobanks-nfe-XX=0;AN-controls_and_biobanks-nfe-XX=3224;AF-controls_and_biobanks-nfe-XX=0.00000;nhomalt-controls_and_biobanks-nfe-XX=0;AC-controls_and_biobanks-afr=0;AN-controls_and_biobanks-afr=8532;AF-controls_and_biobanks-afr=0.00000;nhomalt-controls_and_biobanks-afr=0;AC-asj-XX=0;AN-asj-XX=1852;AF-asj-XX=0.00000;nhomalt-asj-XX=0;AC-non_topmed-mid=0;AN-non_topmed-mid=270;AF-non_topmed-mid=0.00000;nhomalt-non_topmed-mid=0;AC-non_cancer-sas-XY=0;AN-non_cancer-sas-XY=3532;AF-non_cancer-sas-XY=0.00000;nhomalt-non_cancer-sas-XY=0;AC-sas-XX=1;AN-sas-XX=1132;AF-sas-XX=0.000883392;nhomalt-sas-XX=0;AC-non_topmed=2;AN-non_topmed=77434;AF-non_topmed=2.58284e-05;nhomalt-non_topmed=0;AC-non_v2-oth-XX=0;AN-non_v2-oth-XX=898;AF-non_v2-oth-XX=0.00000;nhomalt-non_v2-oth-XX=0;AC-non_neuro-ami-XY=0;AN-non_neuro-ami-XY=426;AF-non_neuro-ami-XY=0.00000;nhomalt-non_neuro-ami-XY=0;AC-controls_and_biobanks-afr-XY=0;AN-controls_and_biobanks-afr-XY=4086;AF-controls_and_biobanks-afr-XY=0.00000;nhomalt-controls_and_biobanks-afr-XY=0;AC-controls_and_biobanks-amr-XX=0;AN-controls_and_biobanks-amr-XX=2432;AF-controls_and_biobanks-amr-XX=0.00000;nhomalt-controls_and_biobanks-amr-XX=0;AC-non_topmed-amr=0;AN-non_topmed-amr=12494;AF-non_topmed-amr=0.00000;nhomalt-non_topmed-amr=0;AC-controls_and_biobanks-sas-XX=0;AN-controls_and_biobanks-sas-XX=832;AF-controls_and_biobanks-sas-XX=0.00000;nhomalt-controls_and_biobanks-sas-XX=0;AC-controls_and_biobanks-amr=0;AN-controls_and_biobanks-amr=4590;AF-controls_and_biobanks-amr=0.00000;nhomalt-controls_and_biobanks-amr=0;AC-non_neuro-fin-XX=0;AN-non_neuro-fin-XX=648;AF-non_neuro-fin-XX=0.00000;nhomalt-non_neuro-fin-XX=0;AC-non_cancer-raw=2;AN-non_cancer-raw=147812;AF-non_cancer-raw=1.35307e-05;nhomalt-non_cancer-raw=0;AC-non_neuro-mid=0;AN-non_neuro-mid=308;AF-non_neuro-mid=0.00000;nhomalt-non_neuro-mid=0;AC-non_v2-asj-XY=0;AN-non_v2-asj-XY=1370;AF-non_v2-asj-XY=0.00000;nhomalt-non_v2-asj-XY=0;AC-non_v2-afr=1;AN-non_v2-afr=27044;AF-non_v2-afr=3.69768e-05;nhomalt-non_v2-afr=0;AC-non_neuro-fin-XY=0;AN-non_neuro-fin-XY=6210;AF-non_neuro-fin-XY=0.00000;nhomalt-non_neuro-fin-XY=0;AC-non_cancer-afr=1;AN-non_cancer-afr=39046;AF-non_cancer-afr=2.56108e-05;nhomalt-non_cancer-afr=0;AC-non_topmed-sas-XY=0;AN-non_topmed-sas-XY=3524;AF-non_topmed-sas-XY=0.00000;nhomalt-non_topmed-sas-XY=0;AC-mid-XY=0;AN-mid-XY=152;AF-mid-XY=0.00000;nhomalt-mid-XY=0;AC-non_v2-oth-XY=0;AN-non_v2-oth-XY=882;AF-non_v2-oth-XY=0.00000;nhomalt-non_v2-oth-XY=0;AC-controls_and_biobanks-fin=0;AN-controls_and_biobanks-fin=5400;AF-controls_and_biobanks-fin=0.00000;nhomalt-controls_and_biobanks-fin=0;AC-non_neuro-eas-XY=0;AN-non_neuro-eas-XY=2752;AF-non_neuro-eas-XY=0.00000;nhomalt-non_neuro-eas-XY=0;AC-non_topmed-eas-XX=0;AN-non_topmed-eas-XX=1356;AF-non_topmed-eas-XX=0.00000;nhomalt-non_topmed-eas-XX=0;AC-non_v2-afr-XX=1;AN-non_v2-afr-XX=14962;AF-non_v2-afr-XX=6.68360e-05;nhomalt-non_v2-afr-XX=0;AC-non_neuro-amr-XX=0;AN-non_neuro-amr-XX=6398;AF-non_neuro-amr-XX=0.00000;nhomalt-non_neuro-amr-XX=0;AC-non_cancer-ami=0;AN-non_cancer-ami=902;AF-non_cancer-ami=0.00000;nhomalt-non_cancer-ami=0;AC-XY=0;AN-XY=72250;AF-XY=0.00000;nhomalt-XY=0;AC-non_topmed-asj-XX=0;AN-non_topmed-asj-XX=276;AF-non_topmed-asj-XX=0.00000;nhomalt-non_topmed-asj-XX=0;AC-non_topmed-eas-XY=0;AN-non_topmed-eas-XY=2184;AF-non_topmed-eas-XY=0.00000;nhomalt-non_topmed-eas-XY=0;AC-non_v2-eas-XY=0;AN-non_v2-eas-XY=1364;AF-non_v2-eas-XY=0.00000;nhomalt-non_v2-eas-XY=0;AC-eas=0;AN-eas=4974;AF-eas=0.00000;nhomalt-eas=0;AC-asj-XY=0;AN-asj-XY=1584;AF-asj-XY=0.00000;nhomalt-asj-XY=0;AC-non_v2-eas-XX=0;AN-non_v2-eas-XX=1262;AF-non_v2-eas-XX=0.00000;nhomalt-non_v2-eas-XX=0;AC-controls_and_biobanks-mid-XY=0;AN-controls_and_biobanks-mid-XY=112;AF-controls_and_biobanks-mid-XY=0.00000;nhomalt-controls_and_biobanks-mid-XY=0;AC-fin-XY=0;AN-fin-XY=7900;AF-fin-XY=0.00000;nhomalt-fin-XY=0;AC-non_topmed-nfe=0;AN-non_topmed-nfe=20666;AF-non_topmed-nfe=0.00000;nhomalt-non_topmed-nfe=0;AC-amr=0;AN-amr=14842;AF-amr=0.00000;nhomalt-amr=0;AC-non_neuro-ami=0;AN-non_neuro-ami=858;AF-non_neuro-ami=0.00000;nhomalt-non_neuro-ami=0;AC-non_cancer-nfe-XX=0;AN-non_cancer-nfe-XX=37772;AF-non_cancer-nfe-XX=0.00000;nhomalt-non_cancer-nfe-XX=0;AC-non_cancer-mid=0;AN-non_cancer-mid=304;AF-non_cancer-mid=0.00000;nhomalt-non_cancer-mid=0;AC-non_v2-mid-XY=0;AN-non_v2-mid-XY=146;AF-non_v2-mid-XY=0.00000;nhomalt-non_v2-mid-XY=0;AC-controls_and_biobanks-amr-XY=0;AN-controls_and_biobanks-amr-XY=2158;AF-controls_and_biobanks-amr-XY=0.00000;nhomalt-controls_and_biobanks-amr-XY=0;AC-non_cancer-ami-XY=0;AN-non_cancer-ami-XY=436;AF-non_cancer-ami-XY=0.00000;nhomalt-non_cancer-ami-XY=0;AC-non_neuro-asj-XX=0;AN-non_neuro-asj-XX=1816;AF-non_neuro-asj-XX=0.00000;nhomalt-non_neuro-asj-XX=0;AC-afr=1;AN-afr=39358;AF-afr=2.54078e-05;nhomalt-afr=0;AC-non_v2-sas-XX=0;AN-non_v2-sas-XX=760;AF-non_v2-sas-XX=0.00000;nhomalt-non_v2-sas-XX=0;AC-non_neuro-afr-XX=0;AN-non_neuro-afr-XX=17992;AF-non_neuro-afr-XX=0.00000;nhomalt-non_neuro-afr-XX=0;AC-non_cancer-sas=1;AN-non_cancer-sas=4646;AF-non_cancer-sas=0.000215239;nhomalt-non_cancer-sas=0;AC-non_topmed-fin=0;AN-non_topmed-fin=10314;AF-non_topmed-fin=0.00000;nhomalt-non_topmed-fin=0;AC-non_cancer-asj-XY=0;AN-non_cancer-asj-XY=1538;AF-non_cancer-asj-XY=0.00000;nhomalt-non_cancer-asj-XY=0;AC-non_cancer-mid-XY=0;AN-non_cancer-mid-XY=144;AF-non_cancer-mid-XY=0.00000;nhomalt-non_cancer-mid-XY=0;AC-raw=2;AN-raw=152032;AF-raw=1.31551e-05;nhomalt-raw=0;AC-non_topmed-XX=2;AN-non_topmed-XX=31400;AF-non_topmed-XX=6.36943e-05;nhomalt-non_topmed-XX=0;AC-ami-XX=0;AN-ami-XX=466;AF-ami-XX=0.00000;nhomalt-ami-XX=0;AC-eas-XY=0;AN-eas-XY=2752;AF-eas-XY=0.00000;nhomalt-eas-XY=0;AC-controls_and_biobanks-mid=0;AN-controls_and_biobanks-mid=246;AF-controls_and_biobanks-mid=0.00000;nhomalt-controls_and_biobanks-mid=0;AC-non_v2-nfe-XY=0;AN-non_v2-nfe-XY=19952;AF-non_v2-nfe-XY=0.00000;nhomalt-non_v2-nfe-XY=0;AC-controls_and_biobanks-sas=0;AN-controls_and_biobanks-sas=3046;AF-controls_and_biobanks-sas=0.00000;nhomalt-controls_and_biobanks-sas=0;AC-non_v2-eas=0;AN-non_v2-eas=2626;AF-non_v2-eas=0.00000;nhomalt-non_v2-eas=0;AC-mid=0;AN-mid=314;AF-mid=0.00000;nhomalt-mid=0;AC-oth-XY=0;AN-oth-XY=1012;AF-oth-XY=0.00000;nhomalt-oth-XY=0;AC-non_cancer-nfe=0;AN-non_cancer-nfe=64210;AF-non_cancer-nfe=0.00000;nhomalt-non_cancer-nfe=0;AC-non_neuro-eas-XX=0;AN-non_neuro-eas-XX=2222;AF-non_neuro-eas-XX=0.00000;nhomalt-non_neuro-eas-XX=0;AC-non_neuro-sas-XY=0;AN-non_neuro-sas-XY=3544;AF-non_neuro-sas-XY=0.00000;nhomalt-non_neuro-sas-XY=0;AC-non_cancer-ami-XX=0;AN-non_cancer-ami-XX=466;AF-non_cancer-ami-XX=0.00000;nhomalt-non_cancer-ami-XX=0;AC-mid-XX=0;AN-mid-XX=162;AF-mid-XX=0.00000;nhomalt-mid-XX=0;AC-non_topmed-asj=0;AN-non_topmed-asj=990;AF-non_topmed-asj=0.00000;nhomalt-non_topmed-asj=0;AC-non_v2-asj-XX=0;AN-non_v2-asj-XX=1674;AF-non_v2-asj-XX=0.00000;nhomalt-non_v2-asj-XX=0;nhomalt=0;AC-non_v2-amr-XY=0;AN-non_v2-amr-XY=7490;AF-non_v2-amr-XY=0.00000;nhomalt-non_v2-amr-XY=0;AC-non_cancer-amr-XX=0;AN-non_cancer-amr-XX=6534;AF-non_cancer-amr-XX=0.00000;nhomalt-non_cancer-amr-XX=0;AC-controls_and_biobanks-afr-XX=0;AN-controls_and_biobanks-afr-XX=4446;AF-controls_and_biobanks-afr-XX=0.00000;nhomalt-controls_and_biobanks-afr-XX=0;AC-asj=0;AN-asj=3436;AF-asj=0.00000;nhomalt-asj=0;AC-non_topmed-asj-XY=0;AN-non_topmed-asj-XY=714;AF-non_topmed-asj-XY=0.00000;nhomalt-non_topmed-asj-XY=0;AC-non_v2-fin-XX=0;AN-non_v2-fin-XX=1148;AF-non_v2-fin-XX=0.00000;nhomalt-non_v2-fin-XX=0;AC-non_topmed-ami=0;AN-non_topmed-ami=104;AF-non_topmed-ami=0.00000;nhomalt-non_topmed-ami=0;AC-controls_and_biobanks-eas-XX=0;AN-controls_and_biobanks-eas-XX=1020;AF-controls_and_biobanks-eas-XX=0.00000;nhomalt-controls_and_biobanks-eas-XX=0;AC-controls_and_biobanks-fin-XX=0;AN-controls_and_biobanks-fin-XX=486;AF-controls_and_biobanks-fin-XX=0.00000;nhomalt-controls_and_biobanks-fin-XX=0;AC-non_topmed-raw=2;AN-non_topmed-raw=80614;AF-non_topmed-raw=2.48096e-05;nhomalt-non_topmed-raw=0;AC-non_cancer-eas-XY=0;AN-non_cancer-eas-XY=2624;AF-non_cancer-eas-XY=0.00000;nhomalt-non_cancer-eas-XY=0;AC-non_cancer=2;AN-non_cancer=144126;AF-non_cancer=1.38767e-05;nhomalt-non_cancer=0;AC-controls_and_biobanks-ami-XY=0;AN-controls_and_biobanks-ami-XY=28;AF-controls_and_biobanks-ami-XY=0.00000;nhomalt-controls_and_biobanks-ami-XY=0;AC-controls_and_biobanks-mid-XX=0;AN-controls_and_biobanks-mid-XX=134;AF-controls_and_biobanks-mid-XX=0.00000;nhomalt-controls_and_biobanks-mid-XX=0;AC-non_v2-afr-XY=0;AN-non_v2-afr-XY=12082;AF-non_v2-afr-XY=0.00000;nhomalt-non_v2-afr-XY=0;AC-non_v2-sas-XY=0;AN-non_v2-sas-XY=2986;AF-non_v2-sas-XY=0.00000;nhomalt-non_v2-sas-XY=0;AC-non_v2-fin=0;AN-non_v2-fin=7132;AF-non_v2-fin=0.00000;nhomalt-non_v2-fin=0;AC-non_neuro-oth=0;AN-non_neuro-oth=1918;AF-non_neuro-oth=0.00000;nhomalt-non_neuro-oth=0;AC-non_cancer-sas-XX=1;AN-non_cancer-sas-XX=1114;AF-non_cancer-sas-XX=0.000897666;nhomalt-non_cancer-sas-XX=0;AC-non_neuro-asj=0;AN-non_neuro-asj=3352;AF-non_neuro-asj=0.00000;nhomalt-non_neuro-asj=0;AC-non_topmed-afr=1;AN-non_topmed-afr=22970;AF-non_topmed-afr=4.35350e-05;nhomalt-non_topmed-afr=0;AC-non_topmed-afr-XY=0;AN-non_topmed-afr-XY=11928;AF-non_topmed-afr-XY=0.00000;nhomalt-non_topmed-afr-XY=0;AC-non_neuro-eas=0;AN-non_neuro-eas=4974;AF-non_neuro-eas=0.00000;nhomalt-non_neuro-eas=0;AC-afr-XX=1;AN-afr-XX=21104;AF-afr-XX=4.73844e-05;nhomalt-afr-XX=0;AC-non_neuro-mid-XY=0;AN-non_neuro-mid-XY=146;AF-non_neuro-mid-XY=0.00000;nhomalt-non_neuro-mid-XY=0;AC-non_topmed-fin-XX=0;AN-non_topmed-fin-XX=2448;AF-non_topmed-fin-XX=0.00000;nhomalt-non_topmed-fin-XX=0;AC-non_cancer-amr=0;AN-non_cancer-amr=14660;AF-non_cancer-amr=0.00000;nhomalt-non_cancer-amr=0;AC-non_v2-ami-XX=0;AN-non_v2-ami-XX=466;AF-non_v2-ami-XX=0.00000;nhomalt-non_v2-ami-XX=0;AC-afr-XY=0;AN-afr-XY=18254;AF-afr-XY=0.00000;nhomalt-afr-XY=0;AC-non_v2-mid-XX=0;AN-non_v2-mid-XX=162;AF-non_v2-mid-XX=0.00000;nhomalt-non_v2-mid-XX=0;AC-non_topmed-fin-XY=0;AN-non_topmed-fin-XY=7866;AF-non_topmed-fin-XY=0.00000;nhomalt-non_topmed-fin-XY=0;AC-non_neuro-amr-XY=0;AN-non_neuro-amr-XY=8004;AF-non_neuro-amr-XY=0.00000;nhomalt-non_neuro-amr-XY=0;AC-non_topmed-mid-XX=0;AN-non_topmed-mid-XX=140;AF-non_topmed-mid-XX=0.00000;nhomalt-non_topmed-mid-XX=0;AC-controls_and_biobanks-asj-XY=0;AN-controls_and_biobanks-asj-XY=50;AF-controls_and_biobanks-asj-XY=0.00000;nhomalt-controls_and_biobanks-asj-XY=0;AC-non_v2-fin-XY=0;AN-non_v2-fin-XY=5984;AF-non_v2-fin-XY=0.00000;nhomalt-non_v2-fin-XY=0;AC-controls_and_biobanks-ami-XX=0;AN-controls_and_biobanks-ami-XX=30;AF-controls_and_biobanks-ami-XX=0.00000;nhomalt-controls_and_biobanks-ami-XX=0;AC-eas-XX=0;AN-eas-XX=2222;AF-eas-XX=0.00000;nhomalt-eas-XX=0;AC-non_cancer-amr-XY=0;AN-non_cancer-amr-XY=8126;AF-non_cancer-amr-XY=0.00000;nhomalt-non_cancer-amr-XY=0;AC-non_neuro-ami-XX=0;AN-non_neuro-ami-XX=432;AF-non_neuro-ami-XX=0.00000;nhomalt-non_neuro-ami-XX=0;AC-controls_and_biobanks=0;AN-controls_and_biobanks=31908;AF-controls_and_biobanks=0.00000;nhomalt-controls_and_biobanks=0;AC-controls_and_biobanks-oth=0;AN-controls_and_biobanks-oth=754;AF-controls_and_biobanks-oth=0.00000;nhomalt-controls_and_biobanks-oth=0;AC-nfe-XY=0;AN-nfe-XY=28356;AF-nfe-XY=0.00000;nhomalt-nfe-XY=0;AC-non_cancer-afr-XX=1;AN-non_cancer-afr-XX=20950;AF-non_cancer-afr-XX=4.77327e-05;nhomalt-non_cancer-afr-XX=0;AC-controls_and_biobanks-sas-XY=0;AN-controls_and_biobanks-sas-XY=2214;AF-controls_and_biobanks-sas-XY=0.00000;nhomalt-controls_and_biobanks-sas-XY=0;AC-non_cancer-oth=0;AN-non_cancer-oth=1940;AF-non_cancer-oth=0.00000;nhomalt-non_cancer-oth=0;AC-non_topmed-oth=0;AN-non_topmed-oth=1436;AF-non_topmed-oth=0.00000;nhomalt-non_topmed-oth=0;AC-non_topmed-nfe-XY=0;AN-non_topmed-nfe-XY=11502;AF-non_topmed-nfe-XY=0.00000;nhomalt-non_topmed-nfe-XY=0;AC-non_topmed-sas-XX=1;AN-non_topmed-sas-XX=1126;AF-non_topmed-sas-XX=0.000888099;nhomalt-non_topmed-sas-XX=0;AC-non_v2-nfe=0;AN-non_v2-nfe=51498;AF-non_v2-nfe=0.00000;nhomalt-non_v2-nfe=0;AC-non_topmed-oth-XX=0;AN-non_topmed-oth-XX=654;AF-non_topmed-oth-XX=0.00000;nhomalt-non_topmed-oth-XX=0;AC-non_cancer-mid-XX=0;AN-non_cancer-mid-XX=160;AF-non_cancer-mid-XX=0.00000;nhomalt-non_cancer-mid-XX=0;AC-controls_and_biobanks-nfe=0;AN-controls_and_biobanks-nfe=6752;AF-controls_and_biobanks-nfe=0.00000;nhomalt-controls_and_biobanks-nfe=0;AC-controls_and_biobanks-oth-XY=0;AN-controls_and_biobanks-oth-XY=402;AF-controls_and_biobanks-oth-XY=0.00000;nhomalt-controls_and_biobanks-oth-XY=0;AC-controls_and_biobanks-fin-XY=0;AN-controls_and_biobanks-fin-XY=4914;AF-controls_and_biobanks-fin-XY=0.00000;nhomalt-controls_and_biobanks-fin-XY=0;AC-non_v2-amr-XX=0;AN-non_v2-amr-XX=5842;AF-non_v2-amr-XX=0.00000;nhomalt-non_v2-amr-XX=0;AC-non_cancer-asj=0;AN-non_cancer-asj=3268;AF-non_cancer-asj=0.00000;nhomalt-non_cancer-asj=0;AC-non_cancer-oth-XX=0;AN-non_cancer-oth-XX=972;AF-non_cancer-oth-XX=0.00000;nhomalt-non_cancer-oth-XX=0;AC-non_neuro-amr=0;AN-non_neuro-amr=14402;AF-non_neuro-amr=0.00000;nhomalt-non_neuro-amr=0;AC-non_cancer-XX=2;AN-non_cancer-XX=74324;AF-non_cancer-XX=2.69092e-05;nhomalt-non_cancer-XX=0;AC-non_v2-ami-XY=0;AN-non_v2-ami-XY=434;AF-non_v2-ami-XY=0.00000;nhomalt-non_v2-ami-XY=0;AC-non_neuro-raw=1;AN-non_neuro-raw=134658;AF-non_neuro-raw=7.42622e-06;nhomalt-non_neuro-raw=0;AC-non_neuro-afr=0;AN-non_neuro-afr=31130;AF-non_neuro-afr=0.00000;nhomalt-non_neuro-afr=0;AC-non_topmed-ami-XY=0;AN-non_topmed-ami-XY=40;AF-non_topmed-ami-XY=0.00000;nhomalt-non_topmed-ami-XY=0;AC-non_neuro-oth-XY=0;AN-non_neuro-oth-XY=958;AF-non_neuro-oth-XY=0.00000;nhomalt-non_neuro-oth-XY=0;AC-non_neuro-oth-XX=0;AN-non_neuro-oth-XX=960;AF-non_neuro-oth-XX=0.00000;nhomalt-non_neuro-oth-XX=0;AC-controls_and_biobanks-XX=0;AN-controls_and_biobanks-XX=13040;AF-controls_and_biobanks-XX=0.00000;nhomalt-controls_and_biobanks-XX=0;AC-non_cancer-afr-XY=0;AN-non_cancer-afr-XY=18096;AF-non_cancer-afr-XY=0.00000;nhomalt-non_cancer-afr-XY=0;AC-non_cancer-fin=0;AN-non_cancer-fin=10406;AF-non_cancer-fin=0.00000;nhomalt-non_cancer-fin=0;AC-controls_and_biobanks-asj=0;AN-controls_and_biobanks-asj=134;AF-controls_and_biobanks-asj=0.00000;nhomalt-controls_and_biobanks-asj=0;AC-non_topmed-oth-XY=0;AN-non_topmed-oth-XY=782;AF-non_topmed-oth-XY=0.00000;nhomalt-non_topmed-oth-XY=0;AC-non_neuro-mid-XX=0;AN-non_neuro-mid-XX=162;AF-non_neuro-mid-XX=0.00000;nhomalt-non_neuro-mid-XX=0;AC-controls_and_biobanks-oth-XX=0;AN-controls_and_biobanks-oth-XX=352;AF-controls_and_biobanks-oth-XX=0.00000;nhomalt-controls_and_biobanks-oth-XX=0;AC-non_neuro-XY=0;AN-non_neuro-XY=62726;AF-non_neuro-XY=0.00000;nhomalt-non_neuro-XY=0;AC-nfe=0;AN-nfe=67378;AF-nfe=0.00000;nhomalt-nfe=0;AC_popmax=1;AN_popmax=4678;AF_popmax=0.000213767;nhomalt_popmax=0;faf95-sas=0.00000;faf99-sas=0.00000;faf95-eas=0.00000;faf99-eas=0.00000;faf95-amr=0.00000;faf99-amr=0.00000;faf95-afr=0.00000;faf99-afr=0.00000;faf95=2.24000e-06;faf99=8.40000e-07;faf95-nfe=0.00000;faf99-nfe=0.00000;age_hist_het_bin_freq=0|0|0|0|0|1|0|0|0|0;age_hist_het_n_smaller=0;age_hist_het_n_larger=0;age_hist_hom_bin_freq=0|0|0|0|0|0|0|0|0|0;age_hist_hom_n_smaller=0;age_hist_hom_n_larger=0;FS=.;MQ=29.1577;MQRankSum=1.04300;QD=6.02439;ReadPosRankSum=0.231000;VarDP=41;QUALapprox=247;AS_FS=.;AS_MQ=29.1577;AS_MQRankSum=1.04300;AS_pab_max=0.266846;AS_QD=6.02439;AS_ReadPosRankSum=0.231000;AS_SOR=0.172084;InbreedingCoeff=-1.32322e-05;AS_VQSLOD=-3.89780;AS_culprit=AS_MQ;allele_type=snv;n_alt_alleles=1;variant_type=snv;segdup;gq_hist_alt_bin_freq=0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|1|0|1;gq_hist_all_bin_freq=0|0|0|0|40506|14946|9410|4551|2182|1256|619|214|153|96|57|56|36|18|12|36;dp_hist_alt_bin_freq=0|0|1|0|0|1|0|0|0|0|0|0|0|0|0|0|0|0|0|0;dp_hist_alt_n_larger=0;dp_hist_all_bin_freq=0|0|7574|30642|19321|11617|4452|425|69|16|14|4|6|4|2|1|0|0|1|0;dp_hist_all_n_smaller=0;dp_hist_all_n_larger=0;ab_hist_alt_bin_freq=0|0|0|0|0|1|1|0|0|0|0|0|0|0|0|0|0|0|0|0;cadd_raw_score=0.476082;cadd_phred=6.25300;vep=C|upstream_gene_variant|MODIFIER|OR11H1|ENSG00000130538|Transcript|ENST00000252835|protein_coding||||||||||1|53|1|SNV||HGNC|HGNC:15404|YES||P2|CCDS74807.1|ENSP00000252835||||||||||||||||,C|upstream_gene_variant|MODIFIER|OR11H1|ENSG00000130538|Transcript|ENST00000643195|protein_coding||||||||||1|86|1|SNV||HGNC|HGNC:15404|||A2||ENSP00000495403||||||||||||||||,C|upstream_gene_variant|MODIFIER|OR11H1|81061|Transcript|NM_001005239.1|protein_coding||||||||||1|53|1|SNV||EntrezGene|HGNC:15404|YES||||NP_001005239.1||||||||||||||||\");\n            writer.WriteLine(\"chr22\\t15528109\\trs755148717\\tT\\tG\\t.\\tAS_VQSR\\tAC=222;AN=147506;AF=0.00150502;popmax=afr;faf95_popmax=0.00503537;AC-non_v2-XX=88;AN-non_v2-XX=58422;AF-non_v2-XX=0.00150628;nhomalt-non_v2-XX=0;AC-non_cancer-fin-XX=0;AN-non_cancer-fin-XX=2524;AF-non_cancer-fin-XX=0.00000;nhomalt-non_cancer-fin-XX=0;AC-non_neuro-nfe=0;AN-non_neuro-nfe=63442;AF-non_neuro-nfe=0.00000;nhomalt-non_neuro-nfe=0;AC-non_neuro-afr-XY=63;AN-non_neuro-afr-XY=12798;AF-non_neuro-afr-XY=0.00492264;nhomalt-non_neuro-afr-XY=0;AC-non_neuro-nfe-XY=0;AN-non_neuro-nfe-XY=26044;AF-non_neuro-nfe-XY=0.00000;nhomalt-non_neuro-nfe-XY=0;AC-controls_and_biobanks-eas-XY=0;AN-controls_and_biobanks-eas-XY=1376;AF-controls_and_biobanks-eas-XY=0.00000;nhomalt-controls_and_biobanks-eas-XY=0;AC-non_neuro-sas-XX=0;AN-non_neuro-sas-XX=1138;AF-non_neuro-sas-XX=0.00000;nhomalt-non_neuro-sas-XX=0;AC-non_v2=138;AN-non_v2=110904;AF-non_v2=0.00124432;nhomalt-non_v2=0;AC-non_topmed-nfe-XX=0;AN-non_topmed-nfe-XX=9198;AF-non_topmed-nfe-XX=0.00000;nhomalt-non_topmed-nfe-XX=0;AC-non_v2-mid=0;AN-non_v2-mid=308;AF-non_v2-mid=0.00000;nhomalt-non_v2-mid=0;AC-non_topmed-sas=0;AN-non_topmed-sas=4672;AF-non_topmed-sas=0.00000;nhomalt-non_topmed-sas=0;AC-non_cancer-eas-XX=0;AN-non_cancer-eas-XX=2124;AF-non_cancer-eas-XX=0.00000;nhomalt-non_cancer-eas-XX=0;AC-amr-XY=2;AN-amr-XY=8270;AF-amr-XY=0.000241838;nhomalt-amr-XY=0;AC-non_v2-nfe-XX=0;AN-non_v2-nfe-XX=31582;AF-non_v2-nfe-XX=0.00000;nhomalt-non_v2-nfe-XX=0;AC-controls_and_biobanks-XY=23;AN-controls_and_biobanks-XY=18818;AF-controls_and_biobanks-XY=0.00122223;nhomalt-controls_and_biobanks-XY=0;AC-non_neuro-asj-XY=0;AN-non_neuro-asj-XY=1534;AF-non_neuro-asj-XY=0.00000;nhomalt-non_neuro-asj-XY=0;AC-oth=1;AN-oth=2004;AF-oth=0.000499002;nhomalt-oth=0;AC-non_topmed-mid-XY=0;AN-non_topmed-mid-XY=130;AF-non_topmed-mid-XY=0.00000;nhomalt-non_topmed-mid-XY=0;AC-non_cancer-asj-XX=0;AN-non_cancer-asj-XX=1732;AF-non_cancer-asj-XX=0.00000;nhomalt-non_cancer-asj-XX=0;AC-sas-XY=0;AN-sas-XY=3562;AF-sas-XY=0.00000;nhomalt-sas-XY=0;AC-non_neuro-fin=0;AN-non_neuro-fin=6870;AF-non_neuro-fin=0.00000;nhomalt-non_neuro-fin=0;AC-non_topmed-amr-XY=2;AN-non_topmed-amr-XY=7376;AF-non_topmed-amr-XY=0.000271150;nhomalt-non_topmed-amr-XY=0;AC-non_neuro-XX=122;AN-non_neuro-XX=68724;AF-non_neuro-XX=0.00177522;nhomalt-non_neuro-XX=0;AC-fin-XX=0;AN-fin-XX=2524;AF-fin-XX=0.00000;nhomalt-fin-XX=0;AC-controls_and_biobanks-asj-XX=0;AN-controls_and_biobanks-asj-XX=84;AF-controls_and_biobanks-asj-XX=0.00000;nhomalt-controls_and_biobanks-asj-XX=0;AC-non_v2-raw=266;AN-non_v2-raw=113936;AF-non_v2-raw=0.00233464;nhomalt-non_v2-raw=2;AC-non_v2-asj=0;AN-non_v2-asj=3044;AF-non_v2-asj=0.00000;nhomalt-non_v2-asj=0;AC-nfe-XX=0;AN-nfe-XX=39080;AF-nfe-XX=0.00000;nhomalt-nfe-XX=0;AC-controls_and_biobanks-raw=109;AN-controls_and_biobanks-raw=32720;AF-controls_and_biobanks-raw=0.00333130;nhomalt-controls_and_biobanks-raw=0;AC-controls_and_biobanks-ami=0;AN-controls_and_biobanks-ami=58;AF-controls_and_biobanks-ami=0.00000;nhomalt-controls_and_biobanks-ami=0;AC-non_topmed-eas=0;AN-non_topmed-eas=3550;AF-non_topmed-eas=0.00000;nhomalt-non_topmed-eas=0;AC-non_v2-amr=4;AN-non_v2-amr=13344;AF-non_v2-amr=0.000299760;nhomalt-non_v2-amr=0;AC-non_neuro-sas=0;AN-non_neuro-sas=4698;AF-non_neuro-sas=0.00000;nhomalt-non_neuro-sas=0;AC-non_cancer-fin-XY=0;AN-non_cancer-fin-XY=7912;AF-non_cancer-fin-XY=0.00000;nhomalt-non_cancer-fin-XY=0;AC-non_cancer-nfe-XY=0;AN-non_cancer-nfe-XY=26472;AF-non_cancer-nfe-XY=0.00000;nhomalt-non_cancer-nfe-XY=0;AC-non_v2-oth=1;AN-non_v2-oth=1778;AF-non_v2-oth=0.000562430;nhomalt-non_v2-oth=0;AC-ami=0;AN-ami=904;AF-ami=0.00000;nhomalt-ami=0;AC-non_cancer-XY=85;AN-non_cancer-XY=69406;AF-non_cancer-XY=0.00122468;nhomalt-non_cancer-XY=0;AC-non_v2-sas=0;AN-non_v2-sas=3766;AF-non_v2-sas=0.00000;nhomalt-non_v2-sas=0;AC-non_topmed-afr-XX=75;AN-non_topmed-afr-XX=10846;AF-non_topmed-afr-XX=0.00691499;nhomalt-non_topmed-afr-XX=0;AC-sas=0;AN-sas=4700;AF-sas=0.00000;nhomalt-sas=0;AC-non_neuro-nfe-XX=0;AN-non_neuro-nfe-XX=37398;AF-non_neuro-nfe-XX=0.00000;nhomalt-non_neuro-nfe-XX=0;AC-non_topmed-ami-XX=0;AN-non_topmed-ami-XX=64;AF-non_topmed-ami-XX=0.00000;nhomalt-non_topmed-ami-XX=0;AC-ami-XY=0;AN-ami-XY=438;AF-ami-XY=0.00000;nhomalt-ami-XY=0;AC-oth-XX=1;AN-oth-XX=990;AF-oth-XX=0.00101010;nhomalt-oth-XX=0;AC-non_cancer-eas=0;AN-non_cancer-eas=4762;AF-non_cancer-eas=0.00000;nhomalt-non_cancer-eas=0;AC-non_topmed-XY=59;AN-non_topmed-XY=45836;AF-non_topmed-XY=0.00128720;nhomalt-non_topmed-XY=0;AC-non_v2-ami=0;AN-non_v2-ami=902;AF-non_v2-ami=0.00000;nhomalt-non_v2-ami=0;AC-non_neuro=187;AN-non_neuro=131196;AF-non_neuro=0.00142535;nhomalt-non_neuro=0;AC-amr-XX=2;AN-amr-XX=6588;AF-amr-XX=0.000303582;nhomalt-amr-XX=0;AC-controls_and_biobanks-nfe-XY=0;AN-controls_and_biobanks-nfe-XY=3532;AF-controls_and_biobanks-nfe-XY=0.00000;nhomalt-controls_and_biobanks-nfe-XY=0;AC-controls_and_biobanks-eas=0;AN-controls_and_biobanks-eas=2398;AF-controls_and_biobanks-eas=0.00000;nhomalt-controls_and_biobanks-eas=0;AC-XX=137;AN-XX=75644;AF-XX=0.00181112;nhomalt-XX=0;AC-non_cancer-oth-XY=0;AN-non_cancer-oth-XY=968;AF-non_cancer-oth-XY=0.00000;nhomalt-non_cancer-oth-XY=0;AC-non_v2-XY=50;AN-non_v2-XY=52482;AF-non_v2-XY=0.000952708;nhomalt-non_v2-XY=0;AC-non_topmed-amr-XX=2;AN-non_topmed-amr-XX=5134;AF-non_topmed-amr-XX=0.000389560;nhomalt-non_topmed-amr-XX=0;AC-fin=0;AN-fin=10436;AF-fin=0.00000;nhomalt-fin=0;AC-controls_and_biobanks-nfe-XX=0;AN-controls_and_biobanks-nfe-XX=3228;AF-controls_and_biobanks-nfe-XX=0.00000;nhomalt-controls_and_biobanks-nfe-XX=0;AC-controls_and_biobanks-afr=69;AN-controls_and_biobanks-afr=8388;AF-controls_and_biobanks-afr=0.00822604;nhomalt-controls_and_biobanks-afr=0;AC-asj-XX=0;AN-asj-XX=1854;AF-asj-XX=0.00000;nhomalt-asj-XX=0;AC-non_topmed-mid=0;AN-non_topmed-mid=270;AF-non_topmed-mid=0.00000;nhomalt-non_topmed-mid=0;AC-non_cancer-sas-XY=0;AN-non_cancer-sas-XY=3548;AF-non_cancer-sas-XY=0.00000;nhomalt-non_cancer-sas-XY=0;AC-sas-XX=0;AN-sas-XX=1138;AF-sas-XX=0.00000;nhomalt-sas-XX=0;AC-non_topmed=137;AN-non_topmed=77092;AF-non_topmed=0.00177710;nhomalt-non_topmed=0;AC-non_v2-oth-XX=1;AN-non_v2-oth-XX=894;AF-non_v2-oth-XX=0.00111857;nhomalt-non_v2-oth-XX=0;AC-non_neuro-ami-XY=0;AN-non_neuro-ami-XY=426;AF-non_neuro-ami-XY=0.00000;nhomalt-non_neuro-ami-XY=0;AC-controls_and_biobanks-afr-XY=21;AN-controls_and_biobanks-afr-XY=4012;AF-controls_and_biobanks-afr-XY=0.00523430;nhomalt-controls_and_biobanks-afr-XY=0;AC-controls_and_biobanks-amr-XX=2;AN-controls_and_biobanks-amr-XX=2424;AF-controls_and_biobanks-amr-XX=0.000825083;nhomalt-controls_and_biobanks-amr-XX=0;AC-non_topmed-amr=4;AN-non_topmed-amr=12510;AF-non_topmed-amr=0.000319744;nhomalt-non_topmed-amr=0;AC-controls_and_biobanks-sas-XX=0;AN-controls_and_biobanks-sas-XX=836;AF-controls_and_biobanks-sas-XX=0.00000;nhomalt-controls_and_biobanks-sas-XX=0;AC-controls_and_biobanks-amr=4;AN-controls_and_biobanks-amr=4594;AF-controls_and_biobanks-amr=0.000870701;nhomalt-controls_and_biobanks-amr=0;AC-non_neuro-fin-XX=0;AN-non_neuro-fin-XX=650;AF-non_neuro-fin-XX=0.00000;nhomalt-non_neuro-fin-XX=0;AC-non_cancer-raw=418;AN-non_cancer-raw=147154;AF-non_cancer-raw=0.00284056;nhomalt-non_cancer-raw=2;AC-non_neuro-mid=0;AN-non_neuro-mid=308;AF-non_neuro-mid=0.00000;nhomalt-non_neuro-mid=0;AC-non_v2-asj-XY=0;AN-non_v2-asj-XY=1368;AF-non_v2-asj-XY=0.00000;nhomalt-non_v2-asj-XY=0;AC-non_v2-afr=133;AN-non_v2-afr=26392;AF-non_v2-afr=0.00503941;nhomalt-non_v2-afr=0;AC-non_neuro-fin-XY=0;AN-non_neuro-fin-XY=6220;AF-non_neuro-fin-XY=0.00000;nhomalt-non_neuro-fin-XY=0;AC-non_cancer-afr=217;AN-non_cancer-afr=38086;AF-non_cancer-afr=0.00569763;nhomalt-non_cancer-afr=0;AC-non_topmed-sas-XY=0;AN-non_topmed-sas-XY=3540;AF-non_topmed-sas-XY=0.00000;nhomalt-non_topmed-sas-XY=0;AC-mid-XY=0;AN-mid-XY=152;AF-mid-XY=0.00000;nhomalt-mid-XY=0;AC-non_v2-oth-XY=0;AN-non_v2-oth-XY=884;AF-non_v2-oth-XY=0.00000;nhomalt-non_v2-oth-XY=0;AC-controls_and_biobanks-fin=0;AN-controls_and_biobanks-fin=5404;AF-controls_and_biobanks-fin=0.00000;nhomalt-controls_and_biobanks-fin=0;AC-non_neuro-eas-XY=0;AN-non_neuro-eas-XY=2766;AF-non_neuro-eas-XY=0.00000;nhomalt-non_neuro-eas-XY=0;AC-non_topmed-eas-XX=0;AN-non_topmed-eas-XX=1352;AF-non_topmed-eas-XX=0.00000;nhomalt-non_topmed-eas-XX=0;AC-non_v2-afr-XX=85;AN-non_v2-afr-XX=14598;AF-non_v2-afr-XX=0.00582272;nhomalt-non_v2-afr-XX=0;AC-non_neuro-amr-XX=2;AN-non_neuro-amr-XX=6402;AF-non_neuro-amr-XX=0.000312402;nhomalt-non_neuro-amr-XX=0;AC-non_cancer-ami=0;AN-non_cancer-ami=904;AF-non_cancer-ami=0.00000;nhomalt-non_cancer-ami=0;AC-XY=85;AN-XY=71862;AF-XY=0.00118282;nhomalt-XY=0;AC-non_topmed-asj-XX=0;AN-non_topmed-asj-XX=276;AF-non_topmed-asj-XX=0.00000;nhomalt-non_topmed-asj-XX=0;AC-non_topmed-eas-XY=0;AN-non_topmed-eas-XY=2198;AF-non_topmed-eas-XY=0.00000;nhomalt-non_topmed-eas-XY=0;AC-non_v2-eas-XY=0;AN-non_v2-eas-XY=1378;AF-non_v2-eas-XY=0.00000;nhomalt-non_v2-eas-XY=0;AC-eas=0;AN-eas=4986;AF-eas=0.00000;nhomalt-eas=0;AC-asj-XY=0;AN-asj-XY=1582;AF-asj-XY=0.00000;nhomalt-asj-XY=0;AC-non_v2-eas-XX=0;AN-non_v2-eas-XX=1262;AF-non_v2-eas-XX=0.00000;nhomalt-non_v2-eas-XX=0;AC-controls_and_biobanks-mid-XY=0;AN-controls_and_biobanks-mid-XY=112;AF-controls_and_biobanks-mid-XY=0.00000;nhomalt-controls_and_biobanks-mid-XY=0;AC-fin-XY=0;AN-fin-XY=7912;AF-fin-XY=0.00000;nhomalt-fin-XY=0;AC-non_topmed-nfe=0;AN-non_topmed-nfe=20720;AF-non_topmed-nfe=0.00000;nhomalt-non_topmed-nfe=0;AC-amr=4;AN-amr=14858;AF-amr=0.000269215;nhomalt-amr=0;AC-non_neuro-ami=0;AN-non_neuro-ami=858;AF-non_neuro-ami=0.00000;nhomalt-non_neuro-ami=0;AC-non_cancer-nfe-XX=0;AN-non_cancer-nfe-XX=37828;AF-non_cancer-nfe-XX=0.00000;nhomalt-non_cancer-nfe-XX=0;AC-non_cancer-mid=0;AN-non_cancer-mid=304;AF-non_cancer-mid=0.00000;nhomalt-non_cancer-mid=0;AC-non_v2-mid-XY=0;AN-non_v2-mid-XY=146;AF-non_v2-mid-XY=0.00000;nhomalt-non_v2-mid-XY=0;AC-controls_and_biobanks-amr-XY=2;AN-controls_and_biobanks-amr-XY=2170;AF-controls_and_biobanks-amr-XY=0.000921659;nhomalt-controls_and_biobanks-amr-XY=0;AC-non_cancer-ami-XY=0;AN-non_cancer-ami-XY=438;AF-non_cancer-ami-XY=0.00000;nhomalt-non_cancer-ami-XY=0;AC-non_neuro-asj-XX=0;AN-non_neuro-asj-XX=1818;AF-non_neuro-asj-XX=0.00000;nhomalt-non_neuro-asj-XX=0;AC-afr=217;AN-afr=38392;AF-afr=0.00565222;nhomalt-afr=0;AC-non_v2-sas-XX=0;AN-non_v2-sas-XX=766;AF-non_v2-sas-XX=0.00000;nhomalt-non_v2-sas-XX=0;AC-non_neuro-afr-XX=119;AN-non_neuro-afr-XX=17550;AF-non_neuro-afr-XX=0.00678063;nhomalt-non_neuro-afr-XX=0;AC-non_cancer-sas=0;AN-non_cancer-sas=4668;AF-non_cancer-sas=0.00000;nhomalt-non_cancer-sas=0;AC-non_topmed-fin=0;AN-non_topmed-fin=10344;AF-non_topmed-fin=0.00000;nhomalt-non_topmed-fin=0;AC-non_cancer-asj-XY=0;AN-non_cancer-asj-XY=1536;AF-non_cancer-asj-XY=0.00000;nhomalt-non_cancer-asj-XY=0;AC-non_cancer-mid-XY=0;AN-non_cancer-mid-XY=144;AF-non_cancer-mid-XY=0.00000;nhomalt-non_cancer-mid-XY=0;AC-raw=421;AN-raw=151380;AF-raw=0.00278108;nhomalt-raw=2;AC-non_topmed-XX=78;AN-non_topmed-XX=31256;AF-non_topmed-XX=0.00249552;nhomalt-non_topmed-XX=0;AC-ami-XX=0;AN-ami-XX=466;AF-ami-XX=0.00000;nhomalt-ami-XX=0;AC-eas-XY=0;AN-eas-XY=2766;AF-eas-XY=0.00000;nhomalt-eas-XY=0;AC-controls_and_biobanks-mid=0;AN-controls_and_biobanks-mid=246;AF-controls_and_biobanks-mid=0.00000;nhomalt-controls_and_biobanks-mid=0;AC-non_v2-nfe-XY=0;AN-non_v2-nfe-XY=19988;AF-non_v2-nfe-XY=0.00000;nhomalt-non_v2-nfe-XY=0;AC-controls_and_biobanks-sas=0;AN-controls_and_biobanks-sas=3056;AF-controls_and_biobanks-sas=0.00000;nhomalt-controls_and_biobanks-sas=0;AC-non_v2-eas=0;AN-non_v2-eas=2640;AF-non_v2-eas=0.00000;nhomalt-non_v2-eas=0;AC-mid=0;AN-mid=314;AF-mid=0.00000;nhomalt-mid=0;AC-oth-XY=0;AN-oth-XY=1014;AF-oth-XY=0.00000;nhomalt-oth-XY=0;AC-non_cancer-nfe=0;AN-non_cancer-nfe=64300;AF-non_cancer-nfe=0.00000;nhomalt-non_cancer-nfe=0;AC-non_neuro-eas-XX=0;AN-non_neuro-eas-XX=2220;AF-non_neuro-eas-XX=0.00000;nhomalt-non_neuro-eas-XX=0;AC-non_neuro-sas-XY=0;AN-non_neuro-sas-XY=3560;AF-non_neuro-sas-XY=0.00000;nhomalt-non_neuro-sas-XY=0;AC-non_cancer-ami-XX=0;AN-non_cancer-ami-XX=466;AF-non_cancer-ami-XX=0.00000;nhomalt-non_cancer-ami-XX=0;AC-mid-XX=0;AN-mid-XX=162;AF-mid-XX=0.00000;nhomalt-mid-XX=0;AC-non_topmed-asj=0;AN-non_topmed-asj=988;AF-non_topmed-asj=0.00000;nhomalt-non_topmed-asj=0;AC-non_v2-asj-XX=0;AN-non_v2-asj-XX=1676;AF-non_v2-asj-XX=0.00000;nhomalt-non_v2-asj-XX=0;nhomalt=0;AC-non_v2-amr-XY=2;AN-non_v2-amr-XY=7496;AF-non_v2-amr-XY=0.000266809;nhomalt-non_v2-amr-XY=0;AC-non_cancer-amr-XX=2;AN-non_cancer-amr-XX=6538;AF-non_cancer-amr-XX=0.000305904;nhomalt-non_cancer-amr-XX=0;AC-controls_and_biobanks-afr-XX=48;AN-controls_and_biobanks-afr-XX=4376;AF-controls_and_biobanks-afr-XX=0.0109689;nhomalt-controls_and_biobanks-afr-XX=0;AC-asj=0;AN-asj=3436;AF-asj=0.00000;nhomalt-asj=0;AC-non_topmed-asj-XY=0;AN-non_topmed-asj-XY=712;AF-non_topmed-asj-XY=0.00000;nhomalt-non_topmed-asj-XY=0;AC-non_v2-fin-XX=0;AN-non_v2-fin-XX=1168;AF-non_v2-fin-XX=0.00000;nhomalt-non_v2-fin-XX=0;AC-non_topmed-ami=0;AN-non_topmed-ami=106;AF-non_topmed-ami=0.00000;nhomalt-non_topmed-ami=0;AC-controls_and_biobanks-eas-XX=0;AN-controls_and_biobanks-eas-XX=1022;AF-controls_and_biobanks-eas-XX=0.00000;nhomalt-controls_and_biobanks-eas-XX=0;AC-controls_and_biobanks-fin-XX=0;AN-controls_and_biobanks-fin-XX=486;AF-controls_and_biobanks-fin-XX=0.00000;nhomalt-controls_and_biobanks-fin-XX=0;AC-non_topmed-raw=228;AN-non_topmed-raw=80170;AF-non_topmed-raw=0.00284396;nhomalt-non_topmed-raw=1;AC-non_cancer-eas-XY=0;AN-non_cancer-eas-XY=2638;AF-non_cancer-eas-XY=0.00000;nhomalt-non_cancer-eas-XY=0;AC-non_cancer=222;AN-non_cancer=143336;AF-non_cancer=0.00154881;nhomalt-non_cancer=0;AC-controls_and_biobanks-ami-XY=0;AN-controls_and_biobanks-ami-XY=28;AF-controls_and_biobanks-ami-XY=0.00000;nhomalt-controls_and_biobanks-ami-XY=0;AC-controls_and_biobanks-mid-XX=0;AN-controls_and_biobanks-mid-XX=134;AF-controls_and_biobanks-mid-XX=0.00000;nhomalt-controls_and_biobanks-mid-XX=0;AC-non_v2-afr-XY=48;AN-non_v2-afr-XY=11794;AF-non_v2-afr-XY=0.00406987;nhomalt-non_v2-afr-XY=0;AC-non_v2-sas-XY=0;AN-non_v2-sas-XY=3000;AF-non_v2-sas-XY=0.00000;nhomalt-non_v2-sas-XY=0;AC-non_v2-fin=0;AN-non_v2-fin=7160;AF-non_v2-fin=0.00000;nhomalt-non_v2-fin=0;AC-non_neuro-oth=1;AN-non_neuro-oth=1914;AF-non_neuro-oth=0.000522466;nhomalt-non_neuro-oth=0;AC-non_cancer-sas-XX=0;AN-non_cancer-sas-XX=1120;AF-non_cancer-sas-XX=0.00000;nhomalt-non_cancer-sas-XX=0;AC-non_neuro-asj=0;AN-non_neuro-asj=3352;AF-non_neuro-asj=0.00000;nhomalt-non_neuro-asj=0;AC-non_topmed-afr=132;AN-non_topmed-afr=22500;AF-non_topmed-afr=0.00586667;nhomalt-non_topmed-afr=0;AC-non_topmed-afr-XY=57;AN-non_topmed-afr-XY=11654;AF-non_topmed-afr-XY=0.00489102;nhomalt-non_topmed-afr-XY=0;AC-non_neuro-eas=0;AN-non_neuro-eas=4986;AF-non_neuro-eas=0.00000;nhomalt-non_neuro-eas=0;AC-afr-XX=134;AN-afr-XX=20622;AF-afr-XX=0.00649791;nhomalt-afr-XX=0;AC-non_neuro-mid-XY=0;AN-non_neuro-mid-XY=146;AF-non_neuro-mid-XY=0.00000;nhomalt-non_neuro-mid-XY=0;AC-non_topmed-fin-XX=0;AN-non_topmed-fin-XX=2466;AF-non_topmed-fin-XX=0.00000;nhomalt-non_topmed-fin-XX=0;AC-non_cancer-amr=4;AN-non_cancer-amr=14672;AF-non_cancer-amr=0.000272628;nhomalt-non_cancer-amr=0;AC-non_v2-ami-XX=0;AN-non_v2-ami-XX=466;AF-non_v2-ami-XX=0.00000;nhomalt-non_v2-ami-XX=0;AC-afr-XY=83;AN-afr-XY=17770;AF-afr-XY=0.00467079;nhomalt-afr-XY=0;AC-non_v2-mid-XX=0;AN-non_v2-mid-XX=162;AF-non_v2-mid-XX=0.00000;nhomalt-non_v2-mid-XX=0;AC-non_topmed-fin-XY=0;AN-non_topmed-fin-XY=7878;AF-non_topmed-fin-XY=0.00000;nhomalt-non_topmed-fin-XY=0;AC-non_neuro-amr-XY=2;AN-non_neuro-amr-XY=8018;AF-non_neuro-amr-XY=0.000249439;nhomalt-non_neuro-amr-XY=0;AC-non_topmed-mid-XX=0;AN-non_topmed-mid-XX=140;AF-non_topmed-mid-XX=0.00000;nhomalt-non_topmed-mid-XX=0;AC-controls_and_biobanks-asj-XY=0;AN-controls_and_biobanks-asj-XY=50;AF-controls_and_biobanks-asj-XY=0.00000;nhomalt-controls_and_biobanks-asj-XY=0;AC-non_v2-fin-XY=0;AN-non_v2-fin-XY=5992;AF-non_v2-fin-XY=0.00000;nhomalt-non_v2-fin-XY=0;AC-controls_and_biobanks-ami-XX=0;AN-controls_and_biobanks-ami-XX=30;AF-controls_and_biobanks-ami-XX=0.00000;nhomalt-controls_and_biobanks-ami-XX=0;AC-eas-XX=0;AN-eas-XX=2220;AF-eas-XX=0.00000;nhomalt-eas-XX=0;AC-non_cancer-amr-XY=2;AN-non_cancer-amr-XY=8134;AF-non_cancer-amr-XY=0.000245881;nhomalt-non_cancer-amr-XY=0;AC-non_neuro-ami-XX=0;AN-non_neuro-ami-XX=432;AF-non_neuro-ami-XX=0.00000;nhomalt-non_neuro-ami-XX=0;AC-controls_and_biobanks=74;AN-controls_and_biobanks=31786;AF-controls_and_biobanks=0.00232807;nhomalt-controls_and_biobanks=0;AC-controls_and_biobanks-oth=1;AN-controls_and_biobanks-oth=748;AF-controls_and_biobanks-oth=0.00133690;nhomalt-controls_and_biobanks-oth=0;AC-nfe-XY=0;AN-nfe-XY=28396;AF-nfe-XY=0.00000;nhomalt-nfe-XY=0;AC-non_cancer-afr-XX=134;AN-non_cancer-afr-XX=20470;AF-non_cancer-afr-XX=0.00654617;nhomalt-non_cancer-afr-XX=0;AC-controls_and_biobanks-sas-XY=0;AN-controls_and_biobanks-sas-XY=2220;AF-controls_and_biobanks-sas-XY=0.00000;nhomalt-controls_and_biobanks-sas-XY=0;AC-non_cancer-oth=1;AN-non_cancer-oth=1936;AF-non_cancer-oth=0.000516529;nhomalt-non_cancer-oth=0;AC-non_topmed-oth=1;AN-non_topmed-oth=1432;AF-non_topmed-oth=0.000698324;nhomalt-non_topmed-oth=0;AC-non_topmed-nfe-XY=0;AN-non_topmed-nfe-XY=11522;AF-non_topmed-nfe-XY=0.00000;nhomalt-non_topmed-nfe-XY=0;AC-non_topmed-sas-XX=0;AN-non_topmed-sas-XX=1132;AF-non_topmed-sas-XX=0.00000;nhomalt-non_topmed-sas-XX=0;AC-non_v2-nfe=0;AN-non_v2-nfe=51570;AF-non_v2-nfe=0.00000;nhomalt-non_v2-nfe=0;AC-non_topmed-oth-XX=1;AN-non_topmed-oth-XX=648;AF-non_topmed-oth-XX=0.00154321;nhomalt-non_topmed-oth-XX=0;AC-non_cancer-mid-XX=0;AN-non_cancer-mid-XX=160;AF-non_cancer-mid-XX=0.00000;nhomalt-non_cancer-mid-XX=0;AC-controls_and_biobanks-nfe=0;AN-controls_and_biobanks-nfe=6760;AF-controls_and_biobanks-nfe=0.00000;nhomalt-controls_and_biobanks-nfe=0;AC-controls_and_biobanks-oth-XY=0;AN-controls_and_biobanks-oth-XY=400;AF-controls_and_biobanks-oth-XY=0.00000;nhomalt-controls_and_biobanks-oth-XY=0;AC-controls_and_biobanks-fin-XY=0;AN-controls_and_biobanks-fin-XY=4918;AF-controls_and_biobanks-fin-XY=0.00000;nhomalt-controls_and_biobanks-fin-XY=0;AC-non_v2-amr-XX=2;AN-non_v2-amr-XX=5848;AF-non_v2-amr-XX=0.000341997;nhomalt-non_v2-amr-XX=0;AC-non_cancer-asj=0;AN-non_cancer-asj=3268;AF-non_cancer-asj=0.00000;nhomalt-non_cancer-asj=0;AC-non_cancer-oth-XX=1;AN-non_cancer-oth-XX=968;AF-non_cancer-oth-XX=0.00103306;nhomalt-non_cancer-oth-XX=0;AC-non_neuro-amr=4;AN-non_neuro-amr=14420;AF-non_neuro-amr=0.000277393;nhomalt-non_neuro-amr=0;AC-non_cancer-XX=137;AN-non_cancer-XX=73930;AF-non_cancer-XX=0.00185310;nhomalt-non_cancer-XX=0;AC-non_v2-ami-XY=0;AN-non_v2-ami-XY=436;AF-non_v2-ami-XY=0.00000;nhomalt-non_v2-ami-XY=0;AC-non_neuro-raw=358;AN-non_neuro-raw=134154;AF-non_neuro-raw=0.00266857;nhomalt-non_neuro-raw=2;AC-non_neuro-afr=182;AN-non_neuro-afr=30348;AF-non_neuro-afr=0.00599710;nhomalt-non_neuro-afr=0;AC-non_topmed-ami-XY=0;AN-non_topmed-ami-XY=42;AF-non_topmed-ami-XY=0.00000;nhomalt-non_topmed-ami-XY=0;AC-non_neuro-oth-XY=0;AN-non_neuro-oth-XY=960;AF-non_neuro-oth-XY=0.00000;nhomalt-non_neuro-oth-XY=0;AC-non_neuro-oth-XX=1;AN-non_neuro-oth-XX=954;AF-non_neuro-oth-XX=0.00104822;nhomalt-non_neuro-oth-XX=0;AC-controls_and_biobanks-XX=51;AN-controls_and_biobanks-XX=12968;AF-controls_and_biobanks-XX=0.00393276;nhomalt-controls_and_biobanks-XX=0;AC-non_cancer-afr-XY=83;AN-non_cancer-afr-XY=17616;AF-non_cancer-afr-XY=0.00471163;nhomalt-non_cancer-afr-XY=0;AC-non_cancer-fin=0;AN-non_cancer-fin=10436;AF-non_cancer-fin=0.00000;nhomalt-non_cancer-fin=0;AC-controls_and_biobanks-asj=0;AN-controls_and_biobanks-asj=134;AF-controls_and_biobanks-asj=0.00000;nhomalt-controls_and_biobanks-asj=0;AC-non_topmed-oth-XY=0;AN-non_topmed-oth-XY=784;AF-non_topmed-oth-XY=0.00000;nhomalt-non_topmed-oth-XY=0;AC-non_neuro-mid-XX=0;AN-non_neuro-mid-XX=162;AF-non_neuro-mid-XX=0.00000;nhomalt-non_neuro-mid-XX=0;AC-controls_and_biobanks-oth-XX=1;AN-controls_and_biobanks-oth-XX=348;AF-controls_and_biobanks-oth-XX=0.00287356;nhomalt-controls_and_biobanks-oth-XX=0;AC-non_neuro-XY=65;AN-non_neuro-XY=62472;AF-non_neuro-XY=0.00104047;nhomalt-non_neuro-XY=0;AC-nfe=0;AN-nfe=67476;AF-nfe=0.00000;nhomalt-nfe=0;AC_popmax=217;AN_popmax=38392;AF_popmax=0.00565222;nhomalt_popmax=0;faf95-sas=0.00000;faf99-sas=0.00000;faf95-eas=0.00000;faf99-eas=0.00000;faf95-amr=9.15000e-05;faf99-amr=5.50200e-05;faf95-afr=0.00503537;faf99-afr=0.00479761;faf95=0.00134206;faf99=0.00127995;faf95-nfe=0.00000;faf99-nfe=0.00000;age_hist_het_bin_freq=1|7|10|9|17|9|19|6|4|5;age_hist_het_n_smaller=14;age_hist_het_n_larger=1;age_hist_hom_bin_freq=0|0|0|0|0|0|0|0|0|0;age_hist_hom_n_smaller=0;age_hist_hom_n_larger=0;FS=4.82164e-16;MQ=29.2276;MQRankSum=-0.221000;QD=3.15747;ReadPosRankSum=0.267000;VarDP=17667;QUALapprox=55783;AS_FS=4.82164e-16;AS_MQ=29.2276;AS_MQRankSum=-0.230000;AS_pab_max=1.00000;AS_QD=3.15747;AS_ReadPosRankSum=0.265000;AS_SOR=6.79842;InbreedingCoeff=0.00673884;AS_VQSLOD=-40.1202;AS_culprit=AS_SOR;allele_type=snv;n_alt_alleles=1;variant_type=snv;segdup;gq_hist_alt_bin_freq=0|0|0|0|0|2|6|12|13|15|16|11|21|4|10|15|8|14|12|63;gq_hist_all_bin_freq=0|0|0|0|40100|14870|9325|4541|2186|1270|637|224|171|97|66|72|43|31|24|96;dp_hist_alt_bin_freq=0|0|72|72|44|11|16|5|1|0|1|0|0|0|0|0|0|0|0|0;dp_hist_alt_n_larger=0;dp_hist_all_bin_freq=0|0|7226|30432|19384|11668|4490|433|70|17|15|4|6|4|2|1|0|0|1|0;dp_hist_all_n_smaller=0;dp_hist_all_n_larger=0;ab_hist_alt_bin_freq=0|0|0|0|115|53|40|8|3|1|2|0|0|0|0|0|0|0|0|0;cadd_raw_score=0.439889;cadd_phred=5.86400;vep=G|upstream_gene_variant|MODIFIER|OR11H1|ENSG00000130538|Transcript|ENST00000252835|protein_coding||||||||||1|50|1|SNV||HGNC|HGNC:15404|YES||P2|CCDS74807.1|ENSP00000252835||||||||||||||||,G|upstream_gene_variant|MODIFIER|OR11H1|ENSG00000130538|Transcript|ENST00000643195|protein_coding||||||||||1|83|1|SNV||HGNC|HGNC:15404|||A2||ENSP00000495403||||||||||||||||,G|upstream_gene_variant|MODIFIER|OR11H1|81061|Transcript|NM_001005239.1|protein_coding||||||||||1|50|1|SNV||EntrezGene|HGNC:15404|YES||||NP_001005239.1||||||||||||||||\");\n            writer.WriteLine(\"chr22\\t15528137\\t.\\tT\\tA\\t.\\tAS_VQSR\\tAC=3;AN=150706;AF=1.99063e-05;popmax=afr;faf95_popmax=1.95500e-05;AC-non_v2-XX=1;AN-non_v2-XX=59624;AF-non_v2-XX=1.67718e-05;nhomalt-non_v2-XX=0;AC-non_cancer-fin-XX=0;AN-non_cancer-fin-XX=2558;AF-non_cancer-fin-XX=0.00000;nhomalt-non_cancer-fin-XX=0;AC-non_neuro-nfe=0;AN-non_neuro-nfe=63704;AF-non_neuro-nfe=0.00000;nhomalt-non_neuro-nfe=0;AC-non_neuro-afr-XY=1;AN-non_neuro-afr-XY=13522;AF-non_neuro-afr-XY=7.39536e-05;nhomalt-non_neuro-afr-XY=0;AC-non_neuro-nfe-XY=0;AN-non_neuro-nfe-XY=26178;AF-non_neuro-nfe-XY=0.00000;nhomalt-non_neuro-nfe-XY=0;AC-controls_and_biobanks-eas-XY=0;AN-controls_and_biobanks-eas-XY=1388;AF-controls_and_biobanks-eas-XY=0.00000;nhomalt-controls_and_biobanks-eas-XY=0;AC-non_neuro-sas-XX=0;AN-non_neuro-sas-XX=1158;AF-non_neuro-sas-XX=0.00000;nhomalt-non_neuro-sas-XX=0;AC-non_v2=2;AN-non_v2=113394;AF-non_v2=1.76376e-05;nhomalt-non_v2=0;AC-non_topmed-nfe-XX=0;AN-non_topmed-nfe-XX=9250;AF-non_topmed-nfe-XX=0.00000;nhomalt-non_topmed-nfe-XX=0;AC-non_v2-mid=0;AN-non_v2-mid=308;AF-non_v2-mid=0.00000;nhomalt-non_v2-mid=0;AC-non_topmed-sas=0;AN-non_topmed-sas=4746;AF-non_topmed-sas=0.00000;nhomalt-non_topmed-sas=0;AC-non_cancer-eas-XX=0;AN-non_cancer-eas-XX=2152;AF-non_cancer-eas-XX=0.00000;nhomalt-non_cancer-eas-XX=0;AC-amr-XY=0;AN-amr-XY=8402;AF-amr-XY=0.00000;nhomalt-amr-XY=0;AC-non_v2-nfe-XX=0;AN-non_v2-nfe-XX=31698;AF-non_v2-nfe-XX=0.00000;nhomalt-non_v2-nfe-XX=0;AC-controls_and_biobanks-XY=0;AN-controls_and_biobanks-XY=19242;AF-controls_and_biobanks-XY=0.00000;nhomalt-controls_and_biobanks-XY=0;AC-non_neuro-asj-XY=0;AN-non_neuro-asj-XY=1548;AF-non_neuro-asj-XY=0.00000;nhomalt-non_neuro-asj-XY=0;AC-oth=0;AN-oth=2058;AF-oth=0.00000;nhomalt-oth=0;AC-non_topmed-mid-XY=0;AN-non_topmed-mid-XY=130;AF-non_topmed-mid-XY=0.00000;nhomalt-non_topmed-mid-XY=0;AC-non_cancer-asj-XX=0;AN-non_cancer-asj-XX=1736;AF-non_cancer-asj-XX=0.00000;nhomalt-non_cancer-asj-XX=0;AC-sas-XY=0;AN-sas-XY=3616;AF-sas-XY=0.00000;nhomalt-sas-XY=0;AC-non_neuro-fin=0;AN-non_neuro-fin=6944;AF-non_neuro-fin=0.00000;nhomalt-non_neuro-fin=0;AC-non_topmed-amr-XY=0;AN-non_topmed-amr-XY=7502;AF-non_topmed-amr-XY=0.00000;nhomalt-non_topmed-amr-XY=0;AC-non_neuro-XX=1;AN-non_neuro-XX=69968;AF-non_neuro-XX=1.42922e-05;nhomalt-non_neuro-XX=0;AC-fin-XX=0;AN-fin-XX=2558;AF-fin-XX=0.00000;nhomalt-fin-XX=0;AC-controls_and_biobanks-asj-XX=0;AN-controls_and_biobanks-asj-XX=82;AF-controls_and_biobanks-asj-XX=0.00000;nhomalt-controls_and_biobanks-asj-XX=0;AC-non_v2-raw=2;AN-non_v2-raw=114570;AF-non_v2-raw=1.74566e-05;nhomalt-non_v2-raw=0;AC-non_v2-asj=0;AN-non_v2-asj=3066;AF-non_v2-asj=0.00000;nhomalt-non_v2-asj=0;AC-nfe-XX=0;AN-nfe-XX=39218;AF-nfe-XX=0.00000;nhomalt-nfe-XX=0;AC-controls_and_biobanks-raw=1;AN-controls_and_biobanks-raw=32894;AF-controls_and_biobanks-raw=3.04007e-05;nhomalt-controls_and_biobanks-raw=0;AC-controls_and_biobanks-ami=0;AN-controls_and_biobanks-ami=60;AF-controls_and_biobanks-ami=0.00000;nhomalt-controls_and_biobanks-ami=0;AC-non_topmed-eas=0;AN-non_topmed-eas=3652;AF-non_topmed-eas=0.00000;nhomalt-non_topmed-eas=0;AC-non_v2-amr=0;AN-non_v2-amr=13588;AF-non_v2-amr=0.00000;nhomalt-non_v2-amr=0;AC-non_neuro-sas=0;AN-non_neuro-sas=4772;AF-non_neuro-sas=0.00000;nhomalt-non_neuro-sas=0;AC-non_cancer-fin-XY=0;AN-non_cancer-fin-XY=7992;AF-non_cancer-fin-XY=0.00000;nhomalt-non_cancer-fin-XY=0;AC-non_cancer-nfe-XY=0;AN-non_cancer-nfe-XY=26600;AF-non_cancer-nfe-XY=0.00000;nhomalt-non_cancer-nfe-XY=0;AC-non_v2-oth=0;AN-non_v2-oth=1832;AF-non_v2-oth=0.00000;nhomalt-non_v2-oth=0;AC-ami=0;AN-ami=910;AF-ami=0.00000;nhomalt-ami=0;AC-non_cancer-XY=1;AN-non_cancer-XY=71068;AF-non_cancer-XY=1.40710e-05;nhomalt-non_cancer-XY=0;AC-non_v2-sas=0;AN-non_v2-sas=3836;AF-non_v2-sas=0.00000;nhomalt-non_v2-sas=0;AC-non_topmed-afr-XX=1;AN-non_topmed-afr-XX=11566;AF-non_topmed-afr-XX=8.64603e-05;nhomalt-non_topmed-afr-XX=0;AC-sas=0;AN-sas=4774;AF-sas=0.00000;nhomalt-sas=0;AC-non_neuro-nfe-XX=0;AN-non_neuro-nfe-XX=37526;AF-non_neuro-nfe-XX=0.00000;nhomalt-non_neuro-nfe-XX=0;AC-non_topmed-ami-XX=0;AN-non_topmed-ami-XX=66;AF-non_topmed-ami-XX=0.00000;nhomalt-non_topmed-ami-XX=0;AC-ami-XY=0;AN-ami-XY=442;AF-ami-XY=0.00000;nhomalt-ami-XY=0;AC-oth-XX=0;AN-oth-XX=1016;AF-oth-XX=0.00000;nhomalt-oth-XX=0;AC-non_cancer-eas=0;AN-non_cancer-eas=4862;AF-non_cancer-eas=0.00000;nhomalt-non_cancer-eas=0;AC-non_topmed-XY=1;AN-non_topmed-XY=47186;AF-non_topmed-XY=2.11927e-05;nhomalt-non_topmed-XY=0;AC-non_v2-ami=0;AN-non_v2-ami=908;AF-non_v2-ami=0.00000;nhomalt-non_v2-ami=0;AC-non_neuro=2;AN-non_neuro=133672;AF-non_neuro=1.49620e-05;nhomalt-non_neuro=0;AC-amr-XX=0;AN-amr-XX=6720;AF-amr-XX=0.00000;nhomalt-amr-XX=0;AC-controls_and_biobanks-nfe-XY=0;AN-controls_and_biobanks-nfe-XY=3564;AF-controls_and_biobanks-nfe-XY=0.00000;nhomalt-controls_and_biobanks-nfe-XY=0;AC-controls_and_biobanks-eas=0;AN-controls_and_biobanks-eas=2416;AF-controls_and_biobanks-eas=0.00000;nhomalt-controls_and_biobanks-eas=0;AC-XX=2;AN-XX=77162;AF-XX=2.59195e-05;nhomalt-XX=0;AC-non_cancer-oth-XY=0;AN-non_cancer-oth-XY=996;AF-non_cancer-oth-XY=0.00000;nhomalt-non_cancer-oth-XY=0;AC-non_v2-XY=1;AN-non_v2-XY=53770;AF-non_v2-XY=1.85977e-05;nhomalt-non_v2-XY=0;AC-non_topmed-amr-XX=0;AN-non_topmed-amr-XX=5256;AF-non_topmed-amr-XX=0.00000;nhomalt-non_topmed-amr-XX=0;AC-fin=0;AN-fin=10550;AF-fin=0.00000;nhomalt-fin=0;AC-controls_and_biobanks-nfe-XX=0;AN-controls_and_biobanks-nfe-XX=3234;AF-controls_and_biobanks-nfe-XX=0.00000;nhomalt-controls_and_biobanks-nfe-XX=0;AC-controls_and_biobanks-afr=0;AN-controls_and_biobanks-afr=8878;AF-controls_and_biobanks-afr=0.00000;nhomalt-controls_and_biobanks-afr=0;AC-asj-XX=0;AN-asj-XX=1856;AF-asj-XX=0.00000;nhomalt-asj-XX=0;AC-non_topmed-mid=0;AN-non_topmed-mid=272;AF-non_topmed-mid=0.00000;nhomalt-non_topmed-mid=0;AC-non_cancer-sas-XY=0;AN-non_cancer-sas-XY=3602;AF-non_cancer-sas-XY=0.00000;nhomalt-non_cancer-sas-XY=0;AC-sas-XX=0;AN-sas-XX=1158;AF-sas-XX=0.00000;nhomalt-sas-XX=0;AC-non_topmed=2;AN-non_topmed=79442;AF-non_topmed=2.51756e-05;nhomalt-non_topmed=0;AC-non_v2-oth-XX=0;AN-non_v2-oth-XX=918;AF-non_v2-oth-XX=0.00000;nhomalt-non_v2-oth-XX=0;AC-non_neuro-ami-XY=0;AN-non_neuro-ami-XY=430;AF-non_neuro-ami-XY=0.00000;nhomalt-non_neuro-ami-XY=0;AC-controls_and_biobanks-afr-XY=0;AN-controls_and_biobanks-afr-XY=4272;AF-controls_and_biobanks-afr-XY=0.00000;nhomalt-controls_and_biobanks-afr-XY=0;AC-controls_and_biobanks-amr-XX=0;AN-controls_and_biobanks-amr-XX=2464;AF-controls_and_biobanks-amr-XX=0.00000;nhomalt-controls_and_biobanks-amr-XX=0;AC-non_topmed-amr=0;AN-non_topmed-amr=12758;AF-non_topmed-amr=0.00000;nhomalt-non_topmed-amr=0;AC-controls_and_biobanks-sas-XX=0;AN-controls_and_biobanks-sas-XX=840;AF-controls_and_biobanks-sas-XX=0.00000;nhomalt-controls_and_biobanks-sas-XX=0;AC-controls_and_biobanks-amr=0;AN-controls_and_biobanks-amr=4654;AF-controls_and_biobanks-amr=0.00000;nhomalt-controls_and_biobanks-amr=0;AC-non_neuro-fin-XX=0;AN-non_neuro-fin-XX=654;AF-non_neuro-fin-XX=0.00000;nhomalt-non_neuro-fin-XX=0;AC-non_cancer-raw=4;AN-non_cancer-raw=147934;AF-non_cancer-raw=2.70391e-05;nhomalt-non_cancer-raw=0;AC-non_neuro-mid=0;AN-non_neuro-mid=310;AF-non_neuro-mid=0.00000;nhomalt-non_neuro-mid=0;AC-non_v2-asj-XY=0;AN-non_v2-asj-XY=1382;AF-non_v2-asj-XY=0.00000;nhomalt-non_v2-asj-XY=0;AC-non_v2-afr=2;AN-non_v2-afr=28078;AF-non_v2-afr=7.12301e-05;nhomalt-non_v2-afr=0;AC-non_neuro-fin-XY=0;AN-non_neuro-fin-XY=6290;AF-non_neuro-fin-XY=0.00000;nhomalt-non_neuro-fin-XY=0;AC-non_cancer-afr=3;AN-non_cancer-afr=40360;AF-non_cancer-afr=7.43310e-05;nhomalt-non_cancer-afr=0;AC-non_topmed-sas-XY=0;AN-non_topmed-sas-XY=3594;AF-non_topmed-sas-XY=0.00000;nhomalt-non_topmed-sas-XY=0;AC-mid-XY=0;AN-mid-XY=152;AF-mid-XY=0.00000;nhomalt-mid-XY=0;AC-non_v2-oth-XY=0;AN-non_v2-oth-XY=914;AF-non_v2-oth-XY=0.00000;nhomalt-non_v2-oth-XY=0;AC-controls_and_biobanks-fin=0;AN-controls_and_biobanks-fin=5460;AF-controls_and_biobanks-fin=0.00000;nhomalt-controls_and_biobanks-fin=0;AC-non_neuro-eas-XY=0;AN-non_neuro-eas-XY=2838;AF-non_neuro-eas-XY=0.00000;nhomalt-non_neuro-eas-XY=0;AC-non_topmed-eas-XX=0;AN-non_topmed-eas-XX=1382;AF-non_topmed-eas-XX=0.00000;nhomalt-non_topmed-eas-XX=0;AC-non_v2-afr-XX=1;AN-non_v2-afr-XX=15472;AF-non_v2-afr-XX=6.46329e-05;nhomalt-non_v2-afr-XX=0;AC-non_neuro-amr-XX=0;AN-non_neuro-amr-XX=6534;AF-non_neuro-amr-XX=0.00000;nhomalt-non_neuro-amr-XX=0;AC-non_cancer-ami=0;AN-non_cancer-ami=910;AF-non_cancer-ami=0.00000;nhomalt-non_cancer-ami=0;AC-XY=1;AN-XY=73544;AF-XY=1.35973e-05;nhomalt-XY=0;AC-non_topmed-asj-XX=0;AN-non_topmed-asj-XX=270;AF-non_topmed-asj-XX=0.00000;nhomalt-non_topmed-asj-XX=0;AC-non_topmed-eas-XY=0;AN-non_topmed-eas-XY=2270;AF-non_topmed-eas-XY=0.00000;nhomalt-non_topmed-eas-XY=0;AC-non_v2-eas-XY=0;AN-non_v2-eas-XY=1438;AF-non_v2-eas-XY=0.00000;nhomalt-non_v2-eas-XY=0;AC-eas=0;AN-eas=5092;AF-eas=0.00000;nhomalt-eas=0;AC-asj-XY=0;AN-asj-XY=1594;AF-asj-XY=0.00000;nhomalt-asj-XY=0;AC-non_v2-eas-XX=0;AN-non_v2-eas-XX=1286;AF-non_v2-eas-XX=0.00000;nhomalt-non_v2-eas-XX=0;AC-controls_and_biobanks-mid-XY=0;AN-controls_and_biobanks-mid-XY=112;AF-controls_and_biobanks-mid-XY=0.00000;nhomalt-controls_and_biobanks-mid-XY=0;AC-fin-XY=0;AN-fin-XY=7992;AF-fin-XY=0.00000;nhomalt-fin-XY=0;AC-non_topmed-nfe=0;AN-non_topmed-nfe=20870;AF-non_topmed-nfe=0.00000;nhomalt-non_topmed-nfe=0;AC-amr=0;AN-amr=15122;AF-amr=0.00000;nhomalt-amr=0;AC-non_neuro-ami=0;AN-non_neuro-ami=862;AF-non_neuro-ami=0.00000;nhomalt-non_neuro-ami=0;AC-non_cancer-nfe-XX=0;AN-non_cancer-nfe-XX=37954;AF-non_cancer-nfe-XX=0.00000;nhomalt-non_cancer-nfe-XX=0;AC-non_cancer-mid=0;AN-non_cancer-mid=304;AF-non_cancer-mid=0.00000;nhomalt-non_cancer-mid=0;AC-non_v2-mid-XY=0;AN-non_v2-mid-XY=146;AF-non_v2-mid-XY=0.00000;nhomalt-non_v2-mid-XY=0;AC-controls_and_biobanks-amr-XY=0;AN-controls_and_biobanks-amr-XY=2190;AF-controls_and_biobanks-amr-XY=0.00000;nhomalt-controls_and_biobanks-amr-XY=0;AC-non_cancer-ami-XY=0;AN-non_cancer-ami-XY=442;AF-non_cancer-ami-XY=0.00000;nhomalt-non_cancer-ami-XY=0;AC-non_neuro-asj-XX=0;AN-non_neuro-asj-XX=1822;AF-non_neuro-asj-XX=0.00000;nhomalt-non_neuro-asj-XX=0;AC-afr=3;AN-afr=40676;AF-afr=7.37536e-05;nhomalt-afr=0;AC-non_v2-sas-XX=0;AN-non_v2-sas-XX=784;AF-non_v2-sas-XX=0.00000;nhomalt-non_v2-sas-XX=0;AC-non_neuro-afr-XX=1;AN-non_neuro-afr-XX=18444;AF-non_neuro-afr-XX=5.42182e-05;nhomalt-non_neuro-afr-XX=0;AC-non_cancer-sas=0;AN-non_cancer-sas=4742;AF-non_cancer-sas=0.00000;nhomalt-non_cancer-sas=0;AC-non_topmed-fin=0;AN-non_topmed-fin=10458;AF-non_topmed-fin=0.00000;nhomalt-non_topmed-fin=0;AC-non_cancer-asj-XY=0;AN-non_cancer-asj-XY=1550;AF-non_cancer-asj-XY=0.00000;nhomalt-non_cancer-asj-XY=0;AC-non_cancer-mid-XY=0;AN-non_cancer-mid-XY=144;AF-non_cancer-mid-XY=0.00000;nhomalt-non_cancer-mid-XY=0;AC-raw=4;AN-raw=152178;AF-raw=2.62850e-05;nhomalt-raw=0;AC-non_topmed-XX=1;AN-non_topmed-XX=32256;AF-non_topmed-XX=3.10020e-05;nhomalt-non_topmed-XX=0;AC-ami-XX=0;AN-ami-XX=468;AF-ami-XX=0.00000;nhomalt-ami-XX=0;AC-eas-XY=0;AN-eas-XY=2838;AF-eas-XY=0.00000;nhomalt-eas-XY=0;AC-controls_and_biobanks-mid=0;AN-controls_and_biobanks-mid=246;AF-controls_and_biobanks-mid=0.00000;nhomalt-controls_and_biobanks-mid=0;AC-non_v2-nfe-XY=0;AN-non_v2-nfe-XY=20096;AF-non_v2-nfe-XY=0.00000;nhomalt-non_v2-nfe-XY=0;AC-controls_and_biobanks-sas=0;AN-controls_and_biobanks-sas=3084;AF-controls_and_biobanks-sas=0.00000;nhomalt-controls_and_biobanks-sas=0;AC-non_v2-eas=0;AN-non_v2-eas=2724;AF-non_v2-eas=0.00000;nhomalt-non_v2-eas=0;AC-mid=0;AN-mid=316;AF-mid=0.00000;nhomalt-mid=0;AC-oth-XY=0;AN-oth-XY=1042;AF-oth-XY=0.00000;nhomalt-oth-XY=0;AC-non_cancer-nfe=0;AN-non_cancer-nfe=64554;AF-non_cancer-nfe=0.00000;nhomalt-non_cancer-nfe=0;AC-non_neuro-eas-XX=0;AN-non_neuro-eas-XX=2254;AF-non_neuro-eas-XX=0.00000;nhomalt-non_neuro-eas-XX=0;AC-non_neuro-sas-XY=0;AN-non_neuro-sas-XY=3614;AF-non_neuro-sas-XY=0.00000;nhomalt-non_neuro-sas-XY=0;AC-non_cancer-ami-XX=0;AN-non_cancer-ami-XX=468;AF-non_cancer-ami-XX=0.00000;nhomalt-non_cancer-ami-XX=0;AC-mid-XX=0;AN-mid-XX=164;AF-mid-XX=0.00000;nhomalt-mid-XX=0;AC-non_topmed-asj=0;AN-non_topmed-asj=982;AF-non_topmed-asj=0.00000;nhomalt-non_topmed-asj=0;AC-non_v2-asj-XX=0;AN-non_v2-asj-XX=1684;AF-non_v2-asj-XX=0.00000;nhomalt-non_v2-asj-XX=0;nhomalt=0;AC-non_v2-amr-XY=0;AN-non_v2-amr-XY=7628;AF-non_v2-amr-XY=0.00000;nhomalt-non_v2-amr-XY=0;AC-non_cancer-amr-XX=0;AN-non_cancer-amr-XX=6668;AF-non_cancer-amr-XX=0.00000;nhomalt-non_cancer-amr-XX=0;AC-controls_and_biobanks-afr-XX=0;AN-controls_and_biobanks-afr-XX=4606;AF-controls_and_biobanks-afr-XX=0.00000;nhomalt-controls_and_biobanks-afr-XX=0;AC-asj=0;AN-asj=3450;AF-asj=0.00000;nhomalt-asj=0;AC-non_topmed-asj-XY=0;AN-non_topmed-asj-XY=712;AF-non_topmed-asj-XY=0.00000;nhomalt-non_topmed-asj-XY=0;AC-non_v2-fin-XX=0;AN-non_v2-fin-XX=1192;AF-non_v2-fin-XX=0.00000;nhomalt-non_v2-fin-XX=0;AC-non_topmed-ami=0;AN-non_topmed-ami=110;AF-non_topmed-ami=0.00000;nhomalt-non_topmed-ami=0;AC-controls_and_biobanks-eas-XX=0;AN-controls_and_biobanks-eas-XX=1028;AF-controls_and_biobanks-eas-XX=0.00000;nhomalt-controls_and_biobanks-eas-XX=0;AC-controls_and_biobanks-fin-XX=0;AN-controls_and_biobanks-fin-XX=486;AF-controls_and_biobanks-fin-XX=0.00000;nhomalt-controls_and_biobanks-fin-XX=0;AC-non_topmed-raw=3;AN-non_topmed-raw=80736;AF-non_topmed-raw=3.71581e-05;nhomalt-non_topmed-raw=0;AC-non_cancer-eas-XY=0;AN-non_cancer-eas-XY=2710;AF-non_cancer-eas-XY=0.00000;nhomalt-non_cancer-eas-XY=0;AC-non_cancer=3;AN-non_cancer=146490;AF-non_cancer=2.04792e-05;nhomalt-non_cancer=0;AC-controls_and_biobanks-ami-XY=0;AN-controls_and_biobanks-ami-XY=30;AF-controls_and_biobanks-ami-XY=0.00000;nhomalt-controls_and_biobanks-ami-XY=0;AC-controls_and_biobanks-mid-XX=0;AN-controls_and_biobanks-mid-XX=134;AF-controls_and_biobanks-mid-XX=0.00000;nhomalt-controls_and_biobanks-mid-XX=0;AC-non_v2-afr-XY=1;AN-non_v2-afr-XY=12606;AF-non_v2-afr-XY=7.93273e-05;nhomalt-non_v2-afr-XY=0;AC-non_v2-sas-XY=0;AN-non_v2-sas-XY=3052;AF-non_v2-sas-XY=0.00000;nhomalt-non_v2-sas-XY=0;AC-non_v2-fin=0;AN-non_v2-fin=7260;AF-non_v2-fin=0.00000;nhomalt-non_v2-fin=0;AC-non_neuro-oth=0;AN-non_neuro-oth=1970;AF-non_neuro-oth=0.00000;nhomalt-non_neuro-oth=0;AC-non_cancer-sas-XX=0;AN-non_cancer-sas-XX=1140;AF-non_cancer-sas-XX=0.00000;nhomalt-non_cancer-sas-XX=0;AC-non_neuro-asj=0;AN-non_neuro-asj=3370;AF-non_neuro-asj=0.00000;nhomalt-non_neuro-asj=0;AC-non_topmed-afr=2;AN-non_topmed-afr=24110;AF-non_topmed-afr=8.29531e-05;nhomalt-non_topmed-afr=0;AC-non_topmed-afr-XY=1;AN-non_topmed-afr-XY=12544;AF-non_topmed-afr-XY=7.97194e-05;nhomalt-non_topmed-afr-XY=0;AC-non_neuro-eas=0;AN-non_neuro-eas=5092;AF-non_neuro-eas=0.00000;nhomalt-non_neuro-eas=0;AC-afr-XX=2;AN-afr-XX=21750;AF-afr-XX=9.19540e-05;nhomalt-afr-XX=0;AC-non_neuro-mid-XY=0;AN-non_neuro-mid-XY=146;AF-non_neuro-mid-XY=0.00000;nhomalt-non_neuro-mid-XY=0;AC-non_topmed-fin-XX=0;AN-non_topmed-fin-XX=2500;AF-non_topmed-fin-XX=0.00000;nhomalt-non_topmed-fin-XX=0;AC-non_cancer-amr=0;AN-non_cancer-amr=14934;AF-non_cancer-amr=0.00000;nhomalt-non_cancer-amr=0;AC-non_v2-ami-XX=0;AN-non_v2-ami-XX=468;AF-non_v2-ami-XX=0.00000;nhomalt-non_v2-ami-XX=0;AC-afr-XY=1;AN-afr-XY=18926;AF-afr-XY=5.28374e-05;nhomalt-afr-XY=0;AC-non_v2-mid-XX=0;AN-non_v2-mid-XX=162;AF-non_v2-mid-XX=0.00000;nhomalt-non_v2-mid-XX=0;AC-non_topmed-fin-XY=0;AN-non_topmed-fin-XY=7958;AF-non_topmed-fin-XY=0.00000;nhomalt-non_topmed-fin-XY=0;AC-non_neuro-amr-XY=0;AN-non_neuro-amr-XY=8148;AF-non_neuro-amr-XY=0.00000;nhomalt-non_neuro-amr-XY=0;AC-non_topmed-mid-XX=0;AN-non_topmed-mid-XX=142;AF-non_topmed-mid-XX=0.00000;nhomalt-non_topmed-mid-XX=0;AC-controls_and_biobanks-asj-XY=0;AN-controls_and_biobanks-asj-XY=52;AF-controls_and_biobanks-asj-XY=0.00000;nhomalt-controls_and_biobanks-asj-XY=0;AC-non_v2-fin-XY=0;AN-non_v2-fin-XY=6068;AF-non_v2-fin-XY=0.00000;nhomalt-non_v2-fin-XY=0;AC-controls_and_biobanks-ami-XX=0;AN-controls_and_biobanks-ami-XX=30;AF-controls_and_biobanks-ami-XX=0.00000;nhomalt-controls_and_biobanks-ami-XX=0;AC-eas-XX=0;AN-eas-XX=2254;AF-eas-XX=0.00000;nhomalt-eas-XX=0;AC-non_cancer-amr-XY=0;AN-non_cancer-amr-XY=8266;AF-non_cancer-amr-XY=0.00000;nhomalt-non_cancer-amr-XY=0;AC-non_neuro-ami-XX=0;AN-non_neuro-ami-XX=432;AF-non_neuro-ami-XX=0.00000;nhomalt-non_neuro-ami-XX=0;AC-controls_and_biobanks=0;AN-controls_and_biobanks=32504;AF-controls_and_biobanks=0.00000;nhomalt-controls_and_biobanks=0;AC-controls_and_biobanks-oth=0;AN-controls_and_biobanks-oth=774;AF-controls_and_biobanks-oth=0.00000;nhomalt-controls_and_biobanks-oth=0;AC-nfe-XY=0;AN-nfe-XY=28540;AF-nfe-XY=0.00000;nhomalt-nfe-XY=0;AC-non_cancer-afr-XX=2;AN-non_cancer-afr-XX=21594;AF-non_cancer-afr-XX=9.26183e-05;nhomalt-non_cancer-afr-XX=0;AC-controls_and_biobanks-sas-XY=0;AN-controls_and_biobanks-sas-XY=2244;AF-controls_and_biobanks-sas-XY=0.00000;nhomalt-controls_and_biobanks-sas-XY=0;AC-non_cancer-oth=0;AN-non_cancer-oth=1988;AF-non_cancer-oth=0.00000;nhomalt-non_cancer-oth=0;AC-non_topmed-oth=0;AN-non_topmed-oth=1484;AF-non_topmed-oth=0.00000;nhomalt-non_topmed-oth=0;AC-non_topmed-nfe-XY=0;AN-non_topmed-nfe-XY=11620;AF-non_topmed-nfe-XY=0.00000;nhomalt-non_topmed-nfe-XY=0;AC-non_topmed-sas-XX=0;AN-non_topmed-sas-XX=1152;AF-non_topmed-sas-XX=0.00000;nhomalt-non_topmed-sas-XX=0;AC-non_v2-nfe=0;AN-non_v2-nfe=51794;AF-non_v2-nfe=0.00000;nhomalt-non_v2-nfe=0;AC-non_topmed-oth-XX=0;AN-non_topmed-oth-XX=672;AF-non_topmed-oth-XX=0.00000;nhomalt-non_topmed-oth-XX=0;AC-non_cancer-mid-XX=0;AN-non_cancer-mid-XX=160;AF-non_cancer-mid-XX=0.00000;nhomalt-non_cancer-mid-XX=0;AC-controls_and_biobanks-nfe=0;AN-controls_and_biobanks-nfe=6798;AF-controls_and_biobanks-nfe=0.00000;nhomalt-controls_and_biobanks-nfe=0;AC-controls_and_biobanks-oth-XY=0;AN-controls_and_biobanks-oth-XY=416;AF-controls_and_biobanks-oth-XY=0.00000;nhomalt-controls_and_biobanks-oth-XY=0;AC-controls_and_biobanks-fin-XY=0;AN-controls_and_biobanks-fin-XY=4974;AF-controls_and_biobanks-fin-XY=0.00000;nhomalt-controls_and_biobanks-fin-XY=0;AC-non_v2-amr-XX=0;AN-non_v2-amr-XX=5960;AF-non_v2-amr-XX=0.00000;nhomalt-non_v2-amr-XX=0;AC-non_cancer-asj=0;AN-non_cancer-asj=3286;AF-non_cancer-asj=0.00000;nhomalt-non_cancer-asj=0;AC-non_cancer-oth-XX=0;AN-non_cancer-oth-XX=992;AF-non_cancer-oth-XX=0.00000;nhomalt-non_cancer-oth-XX=0;AC-non_neuro-amr=0;AN-non_neuro-amr=14682;AF-non_neuro-amr=0.00000;nhomalt-non_neuro-amr=0;AC-non_cancer-XX=2;AN-non_cancer-XX=75422;AF-non_cancer-XX=2.65175e-05;nhomalt-non_cancer-XX=0;AC-non_v2-ami-XY=0;AN-non_v2-ami-XY=440;AF-non_v2-ami-XY=0.00000;nhomalt-non_v2-ami-XY=0;AC-non_neuro-raw=3;AN-non_neuro-raw=134764;AF-non_neuro-raw=2.22611e-05;nhomalt-non_neuro-raw=0;AC-non_neuro-afr=2;AN-non_neuro-afr=31966;AF-non_neuro-afr=6.25665e-05;nhomalt-non_neuro-afr=0;AC-non_topmed-ami-XY=0;AN-non_topmed-ami-XY=44;AF-non_topmed-ami-XY=0.00000;nhomalt-non_topmed-ami-XY=0;AC-non_neuro-oth-XY=0;AN-non_neuro-oth-XY=990;AF-non_neuro-oth-XY=0.00000;nhomalt-non_neuro-oth-XY=0;AC-non_neuro-oth-XX=0;AN-non_neuro-oth-XX=980;AF-non_neuro-oth-XX=0.00000;nhomalt-non_neuro-oth-XX=0;AC-controls_and_biobanks-XX=0;AN-controls_and_biobanks-XX=13262;AF-controls_and_biobanks-XX=0.00000;nhomalt-controls_and_biobanks-XX=0;AC-non_cancer-afr-XY=1;AN-non_cancer-afr-XY=18766;AF-non_cancer-afr-XY=5.32879e-05;nhomalt-non_cancer-afr-XY=0;AC-non_cancer-fin=0;AN-non_cancer-fin=10550;AF-non_cancer-fin=0.00000;nhomalt-non_cancer-fin=0;AC-controls_and_biobanks-asj=0;AN-controls_and_biobanks-asj=134;AF-controls_and_biobanks-asj=0.00000;nhomalt-controls_and_biobanks-asj=0;AC-non_topmed-oth-XY=0;AN-non_topmed-oth-XY=812;AF-non_topmed-oth-XY=0.00000;nhomalt-non_topmed-oth-XY=0;AC-non_neuro-mid-XX=0;AN-non_neuro-mid-XX=164;AF-non_neuro-mid-XX=0.00000;nhomalt-non_neuro-mid-XX=0;AC-controls_and_biobanks-oth-XX=0;AN-controls_and_biobanks-oth-XX=358;AF-controls_and_biobanks-oth-XX=0.00000;nhomalt-controls_and_biobanks-oth-XX=0;AC-non_neuro-XY=1;AN-non_neuro-XY=63704;AF-non_neuro-XY=1.56976e-05;nhomalt-non_neuro-XY=0;AC-nfe=0;AN-nfe=67758;AF-nfe=0.00000;nhomalt-nfe=0;AC_popmax=3;AN_popmax=40676;AF_popmax=7.37536e-05;nhomalt_popmax=0;faf95-sas=0.00000;faf99-sas=0.00000;faf95-eas=0.00000;faf99-eas=0.00000;faf95-amr=0.00000;faf99-amr=0.00000;faf95-afr=1.95500e-05;faf99-afr=1.04200e-05;faf95=5.29000e-06;faf99=2.47000e-06;faf95-nfe=0.00000;faf99-nfe=0.00000;age_hist_het_bin_freq=0|0|0|0|0|0|0|0|0|0;age_hist_het_n_smaller=1;age_hist_het_n_larger=0;age_hist_hom_bin_freq=0|0|0|0|0|0|0|0|0|0;age_hist_hom_n_smaller=0;age_hist_hom_n_larger=0;FS=2.82574;MQ=29.6952;MQRankSum=0.0770000;QD=5.56039;ReadPosRankSum=0.405000;VarDP=207;QUALapprox=1151;AS_FS=4.28972;AS_MQ=29.6880;AS_MQRankSum=-0.429000;AS_pab_max=0.332306;AS_QD=5.44792;AS_ReadPosRankSum=1.14700;AS_SOR=1.84939;InbreedingCoeff=-2.63453e-05;AS_VQSLOD=-3.01260;AS_culprit=AS_MQ;allele_type=snv;n_alt_alleles=2;variant_type=multi-snv;segdup;gq_hist_alt_bin_freq=0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|3;gq_hist_all_bin_freq=0|0|0|0|39783|15257|9716|4889|2468|1467|768|285|210|143|89|82|54|34|32|76;dp_hist_alt_bin_freq=0|0|0|1|0|1|1|0|0|0|0|0|0|0|0|0|0|0|0|0;dp_hist_alt_n_larger=0;dp_hist_all_bin_freq=0|0|5915|31045|20271|12374|4976|552|128|35|24|9|9|5|3|2|2|0|3|0;dp_hist_all_n_smaller=0;dp_hist_all_n_larger=0;ab_hist_alt_bin_freq=0|0|0|0|0|2|0|1|0|0|0|0|0|0|0|0|0|0|0|0;cadd_raw_score=0.495494;cadd_phred=6.45700;vep=A|upstream_gene_variant|MODIFIER|OR11H1|ENSG00000130538|Transcript|ENST00000252835|protein_coding||||||||||1|22|1|SNV||HGNC|HGNC:15404|YES||P2|CCDS74807.1|ENSP00000252835||||||||||||||||,A|upstream_gene_variant|MODIFIER|OR11H1|ENSG00000130538|Transcript|ENST00000643195|protein_coding||||||||||1|55|1|SNV||HGNC|HGNC:15404|||A2||ENSP00000495403||||||||||||||||,A|upstream_gene_variant|MODIFIER|OR11H1|81061|Transcript|NM_001005239.1|protein_coding||||||||||1|22|1|SNV||EntrezGene|HGNC:15404|YES||||NP_001005239.1||||||||||||||||\");\n\n            writer.Flush();\n\n            stream.Position = 0;\n            return stream;\n        }\n\n        /*[Fact]\n        public void GetMergedItems()\n        {\n            var sequence = new SimpleSequence(new string('A', VariantUtils.MaxUpstreamLength) + \"TTATCTTCTCTTCATTCTTAAAAAAGGAACACATTTTATA\", 15528100 - VariantUtils.MaxUpstreamLength);\n            var sequenceProvider = new SimpleSequenceProvider(GenomeAssembly.GRCh38, sequence, ChromosomeUtilities.RefNameToChromosome);\n            var gnomadReader = new GnomadSnvReader(new StreamReader(GetChr22GenomeStream()), new StreamReader(GetChr22ExomeStream()), sequenceProvider);\n\n            var items = gnomadReader.GetCombinedItems().ToList();\n\n            Assert.Equal(7, items.Count);\n            Assert.Equal(15528101, items[0].Position);\n            Assert.Equal(15528106, items[1].Position);\n            Assert.Equal(15528107, items[2].Position);\n            Assert.Equal(15528109, items[3].Position);//merged item\n            Assert.Equal(15528126, items[4].Position);\n            Assert.Equal(15528135, items[5].Position);\n            Assert.Equal(15528137, items[6].Position);\n\n            //this is the merged item.\n            Assert.Equal(\"\\\"coverage\\\":23,\\\"failedFilter\\\":true,\\\"allAf\\\":0.000476,\\\"allAn\\\":86114,\\\"allAc\\\":41,\\\"allHc\\\":0,\\\"afrAf\\\":0.003149,\\\"afrAn\\\":13018,\\\"afrAc\\\":41,\\\"afrHc\\\":0,\\\"amrAf\\\":0,\\\"amrAn\\\":11274,\\\"amrAc\\\":0,\\\"amrHc\\\":0,\\\"easAf\\\":0,\\\"easAn\\\":9354,\\\"easAc\\\":0,\\\"easHc\\\":0,\\\"finAf\\\":0,\\\"finAn\\\":5344,\\\"finAc\\\":0,\\\"finHc\\\":0,\\\"nfeAf\\\":0,\\\"nfeAn\\\":36372,\\\"nfeAc\\\":0,\\\"nfeHc\\\":0,\\\"asjAf\\\":0,\\\"asjAn\\\":1780,\\\"asjAc\\\":0,\\\"asjHc\\\":0,\\\"sasAf\\\":0,\\\"sasAn\\\":6060,\\\"sasAc\\\":0,\\\"sasHc\\\":0,\\\"othAf\\\":0,\\\"othAn\\\":2912,\\\"othAc\\\":0,\\\"othHc\\\":0,\\\"maleAf\\\":0.000335,\\\"maleAn\\\":44796,\\\"maleAc\\\":15,\\\"maleHc\\\":0,\\\"femaleAf\\\":0.000629,\\\"femaleAn\\\":41318,\\\"femaleAc\\\":26,\\\"femaleHc\\\":0,\\\"controlsAllAf\\\":0.000476,\\\"controlsAllAn\\\":33612,\\\"controlsAllAc\\\":16\", items[3].GetJsonString());\n        }\n        */\n\n        private static Stream GetChr22_21006257_genome()\n        {\n            var stream = new MemoryStream();\n            var writer = new StreamWriter(stream);\n\n            writer.WriteLine(\"##gnomAD\");\n            writer.WriteLine(\"#CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\");\n            writer.WriteLine(\"22\\t21006258\\trs992970331\\tTAAA\\tT\\t8.74487e+06\\tPASS\\tAC=8785;AN=23616;AF=0.371994;rf_tp_probability=0.780993;FS=3.613;InbreedingCoeff=-0.0098;MQ=59.37;MQRankSum=0.038;QD=27.94;ReadPosRankSum=0.328;SOR=0.737;VQSR_POSITIVE_TRAIN_SITE;BaseQRankSum=0.204;ClippingRankSum=0;DP=452644;VQSLOD=1.05;VQSR_culprit=FS;lcr;rf_positive_label;rf_label=TP;rf_train;variant_type=mixed;allele_type=del;n_alt_alleles=6;was_mixed;pab_max=1;gq_hist_alt_bin_freq=222|154|167|171|189|203|185|250|180|248|248|215|290|269|209|244|214|136|184|6868;gq_hist_all_bin_freq=1339|303|316|362|449|460|480|587|494|564|572|457|589|544|414|425|443|260|300|10317;dp_hist_alt_bin_freq=40|850|2172|2746|2354|1452|750|287|129|38|8|8|5|5|0|1|0|1|0|0;dp_hist_alt_n_larger=0;dp_hist_all_bin_freq=279|2039|4133|4783|4115|2626|1400|572|236|74|18|11|9|7|1|4|0|2|0|1;dp_hist_all_n_larger=0;ab_hist_alt_bin_freq=85|7|20|47|139|324|609|709|1183|809|1365|834|713|425|229|268|236|171|65|63;AC_nfe_seu=27;AN_nfe_seu=74;AF_nfe_seu=0.364865;nhomalt_nfe_seu=3;controls_AC_afr_male=236;controls_AN_afr_male=1098;controls_AF_afr_male=0.214936;controls_nhomalt_afr_male=27;non_topmed_AC_amr=281;non_topmed_AN_amr=548;non_topmed_AF_amr=0.512774;non_topmed_nhomalt_amr=65;AC_raw=10257;AN_raw=30508;AF_raw=0.336207;nhomalt_raw=1994;AC_fin_female=250;AN_fin_female=780;AF_fin_female=0.320513;nhomalt_fin_female=38;non_neuro_AC_asj_female=17;non_neuro_AN_asj_female=50;non_neuro_AF_asj_female=0.34;non_neuro_nhomalt_asj_female=3;non_neuro_AC_afr_male=266;non_neuro_AN_afr_male=1272;non_neuro_AF_afr_male=0.209119;non_neuro_nhomalt_afr_male=29;AC_afr_male=947;AN_afr_male=4094;AF_afr_male=0.231314;nhomalt_afr_male=114;AC_afr=1641;AN_afr=7126;AF_afr=0.230283;nhomalt_afr=183;non_neuro_AC_afr_female=333;non_neuro_AN_afr_female=1468;non_neuro_AF_afr_female=0.226839;non_neuro_nhomalt_afr_female=34;non_topmed_AC_amr_female=128;non_topmed_AN_amr_female=270;non_topmed_AF_amr_female=0.474074;non_topmed_nhomalt_amr_female=24;non_topmed_AC_oth_female=144;non_topmed_AN_oth_female=354;non_topmed_AF_oth_female=0.40678;non_topmed_nhomalt_oth_female=29;AC_eas_female=75;AN_eas_female=464;AF_eas_female=0.161638;nhomalt_eas_female=6;AC_afr_female=694;AN_afr_female=3032;AF_afr_female=0.228892;nhomalt_afr_female=69;non_neuro_AC_female=2837;non_neuro_AN_female=7250;non_neuro_AF_female=0.39131;non_neuro_nhomalt_female=587;controls_AC_afr=459;controls_AN_afr=2006;controls_AF_afr=0.228814;controls_nhomalt_afr=51;AC_nfe_onf=831;AN_nfe_onf=1714;AF_nfe_onf=0.484831;nhomalt_nfe_onf=205;controls_AC_fin_male=76;controls_AN_fin_male=236;controls_AF_fin_male=0.322034;controls_nhomalt_fin_male=8;non_neuro_AC_nfe_nwe=3319;non_neuro_AN_nfe_nwe=6584;non_neuro_AF_nfe_nwe=0.504101;non_neuro_nhomalt_nfe_nwe=795;AC_fin_male=243;AN_fin_male=724;AF_fin_male=0.335635;nhomalt_fin_male=32;AC_nfe_female=2452;AN_nfe_female=5278;AF_nfe_female=0.46457;nhomalt_nfe_female=552;AC_amr=302;AN_amr=582;AF_amr=0.5189;nhomalt_amr=71;non_topmed_AC_nfe_male=1863;non_topmed_AN_nfe_male=3966;non_topmed_AF_nfe_male=0.469743;non_topmed_nhomalt_nfe_male=437;AC_eas=206;AN_eas=1372;AF_eas=0.150146;nhomalt_eas=17;nhomalt=1732;non_neuro_AC_nfe_female=2132;non_neuro_AN_nfe_female=4586;non_neuro_AF_nfe_female=0.464893;non_neuro_nhomalt_nfe_female=489;non_neuro_AC_afr=599;non_neuro_AN_afr=2740;non_neuro_AF_afr=0.218613;non_neuro_nhomalt_afr=63;controls_AC_raw=3012;controls_AN_raw=10336;controls_AF_raw=0.291409;controls_nhomalt_raw=537;controls_AC_male=1253;controls_AN_male=3856;controls_AF_male=0.324948;controls_nhomalt_male=225;non_topmed_AC_male=3484;non_topmed_AN_male=10244;non_topmed_AF_male=0.340102;non_topmed_nhomalt_male=665;controls_AC_nfe_female=642;controls_AN_nfe_female=1538;controls_AF_nfe_female=0.417425;controls_nhomalt_nfe_female=131;non_neuro_AC_amr=220;non_neuro_AN_amr=418;non_neuro_AF_amr=0.526316;non_neuro_nhomalt_amr=53;non_neuro_AC_eas_female=75;non_neuro_AN_eas_female=464;non_neuro_AF_eas_female=0.161638;non_neuro_nhomalt_eas_female=6;AC_asj_male=90;AN_asj_male=182;AF_asj_male=0.494505;nhomalt_asj_male=20;controls_AC_nfe_male=766;controls_AN_nfe_male=1798;controls_AF_nfe_male=0.426029;controls_nhomalt_nfe_male=160;non_neuro_AC_fin=139;non_neuro_AN_fin=438;non_neuro_AF_fin=0.317352;non_neuro_nhomalt_fin=18;AC_oth_female=149;AN_oth_female=366;AF_oth_female=0.407104;nhomalt_oth_female=30;controls_AC_nfe=1408;controls_AN_nfe=3336;controls_AF_nfe=0.422062;controls_nhomalt_nfe=291;controls_AC_oth_female=42;controls_AN_oth_female=122;controls_AF_oth_female=0.344262;controls_nhomalt_oth_female=6;controls_AC_asj=8;controls_AN_asj=28;controls_AF_asj=0.285714;controls_nhomalt_asj=0;non_neuro_AC_amr_male=102;non_neuro_AN_amr_male=186;non_neuro_AF_amr_male=0.548387;non_neuro_nhomalt_amr_male=27;controls_AC_nfe_nwe=239;controls_AN_nfe_nwe=510;controls_AF_nfe_nwe=0.468627;controls_nhomalt_nfe_nwe=54;AC_nfe_nwe=3663;AN_nfe_nwe=7306;AF_nfe_nwe=0.501369;nhomalt_nfe_nwe=862;controls_AC_nfe_seu=15;controls_AN_nfe_seu=40;controls_AF_nfe_seu=0.375;controls_nhomalt_nfe_seu=3;non_neuro_AC_amr_female=118;non_neuro_AN_amr_female=232;non_neuro_AF_amr_female=0.508621;non_neuro_nhomalt_amr_female=26;non_neuro_AC_nfe_onf=673;non_neuro_AN_nfe_onf=1414;non_neuro_AF_nfe_onf=0.475955;non_neuro_nhomalt_nfe_onf=155;non_topmed_AC_eas_male=130;non_topmed_AN_eas_male=892;non_topmed_AF_eas_male=0.14574;non_topmed_nhomalt_eas_male=11;controls_AC_amr_female=28;controls_AN_amr_female=60;controls_AF_amr_female=0.466667;controls_nhomalt_amr_female=5;non_neuro_AC_fin_male=76;non_neuro_AN_fin_male=236;non_neuro_AF_fin_male=0.322034;non_neuro_nhomalt_fin_male=8;AC_female=3780;AN_female=10272;AF_female=0.367991;nhomalt_female=726;non_neuro_AC_oth_male=108;non_neuro_AN_oth_male=274;non_neuro_AF_oth_male=0.394161;non_neuro_nhomalt_oth_male=21;non_topmed_AC_nfe_est=1206;non_topmed_AN_nfe_est=2938;non_topmed_AF_nfe_est=0.410483;non_topmed_nhomalt_nfe_est=240;non_topmed_AC_nfe_nwe=2010;non_topmed_AN_nfe_nwe=4018;non_topmed_AF_nfe_nwe=0.500249;non_topmed_nhomalt_nfe_nwe=488;non_topmed_AC_amr_male=153;non_topmed_AN_amr_male=278;non_topmed_AF_amr_male=0.55036;non_topmed_nhomalt_amr_male=41;non_topmed_AC_nfe_onf=581;non_topmed_AN_nfe_onf=1172;non_topmed_AF_nfe_onf=0.495734;non_topmed_nhomalt_nfe_onf=152;controls_AC_eas_male=74;controls_AN_eas_male=514;controls_AF_eas_male=0.143969;controls_nhomalt_eas_male=6;controls_AC_oth_male=55;controls_AN_oth_male=124;controls_AF_oth_male=0.443548;controls_nhomalt_oth_male=12;non_topmed_AC=6738;non_topmed_AN=19350;non_topmed_AF=0.348217;non_topmed_nhomalt=1276;controls_AC_fin=139;controls_AN_fin=436;controls_AF_fin=0.318807;controls_nhomalt_fin=18;non_neuro_AC_nfe=5059;non_neuro_AN_nfe=10588;non_neuro_AF_nfe=0.477805;non_neuro_nhomalt_nfe=1167;non_neuro_AC_fin_female=63;non_neuro_AN_fin_female=202;non_neuro_AF_fin_female=0.311881;non_neuro_nhomalt_fin_female=10;non_topmed_AC_nfe_seu=27;non_topmed_AN_nfe_seu=74;non_topmed_AF_nfe_seu=0.364865;non_topmed_nhomalt_nfe_seu=3;controls_AC_eas_female=49;controls_AN_eas_female=298;controls_AF_eas_female=0.16443;controls_nhomalt_eas_female=3;non_topmed_AC_asj=50;non_topmed_AN_asj=110;non_topmed_AF_asj=0.454545;non_topmed_nhomalt_asj=9;controls_AC_nfe_onf=118;controls_AN_nfe_onf=266;controls_AF_nfe_onf=0.443609;controls_nhomalt_nfe_onf=23;non_neuro_AC=6526;non_neuro_AN=16292;non_neuro_AF=0.400565;non_neuro_nhomalt=1378;non_topmed_AC_nfe=3824;non_topmed_AN_nfe=8202;non_topmed_AF_nfe=0.466228;non_topmed_nhomalt_nfe=883;non_topmed_AC_raw=8100;non_topmed_AN_raw=25734;non_topmed_AF_raw=0.314759;non_topmed_nhomalt_raw=1515;non_neuro_AC_nfe_est=1052;non_neuro_AN_nfe_est=2552;non_neuro_AF_nfe_est=0.412226;non_neuro_nhomalt_nfe_est=214;non_topmed_AC_oth_male=130;non_topmed_AN_oth_male=314;non_topmed_AF_oth_male=0.414013;non_topmed_nhomalt_oth_male=25;AC_nfe_est=1215;AN_nfe_est=2958;AF_nfe_est=0.410751;nhomalt_nfe_est=241;non_topmed_AC_afr_male=930;non_topmed_AN_afr_male=4008;non_topmed_AF_afr_male=0.232036;non_topmed_nhomalt_afr_male=111;AC_eas_male=131;AN_eas_male=908;AF_eas_male=0.144273;nhomalt_eas_male=11;controls_AC_eas=123;controls_AN_eas=812;controls_AF_eas=0.151478;controls_nhomalt_eas=9;non_neuro_AC_eas_male=131;non_neuro_AN_eas_male=908;non_neuro_AF_eas_male=0.144273;non_neuro_nhomalt_eas_male=11;non_neuro_AC_asj_male=79;non_neuro_AN_asj_male=164;non_neuro_AF_asj_male=0.481707;non_neuro_nhomalt_asj_male=17;controls_AC_oth=97;controls_AN_oth=246;controls_AF_oth=0.394309;controls_nhomalt_oth=18;AC_nfe=5736;AN_nfe=12052;AF_nfe=0.475938;nhomalt_nfe=1311;non_topmed_AC_female=3254;non_topmed_AN_female=9106;non_topmed_AF_female=0.357347;non_topmed_nhomalt_female=611;non_neuro_AC_asj=96;non_neuro_AN_asj=214;non_neuro_AF_asj=0.448598;non_neuro_nhomalt_asj=20;non_topmed_AC_eas_female=74;non_topmed_AN_eas_female=448;non_topmed_AF_eas_female=0.165179;non_topmed_nhomalt_eas_female=6;non_neuro_AC_raw=7450;non_neuro_AN_raw=20680;non_neuro_AF_raw=0.360251;non_neuro_nhomalt_raw=1557;non_topmed_AC_eas=204;non_topmed_AN_eas=1340;non_topmed_AF_eas=0.152239;non_topmed_nhomalt_eas=17;non_topmed_AC_fin_male=243;non_topmed_AN_fin_male=724;non_topmed_AF_fin_male=0.335635;non_topmed_nhomalt_fin_male=32;AC_fin=493;AN_fin=1504;AF_fin=0.327793;nhomalt_fin=70;AC_nfe_male=3284;AN_nfe_male=6774;AF_nfe_male=0.484795;nhomalt_nfe_male=759;controls_AC_amr_male=42;controls_AN_amr_male=76;controls_AF_amr_male=0.552632;controls_nhomalt_amr_male=12;controls_AC_afr_female=223;controls_AN_afr_female=908;controls_AF_afr_female=0.245595;controls_nhomalt_afr_female=24;controls_AC_amr=70;controls_AN_amr=136;controls_AF_amr=0.514706;controls_nhomalt_amr=17;AC_asj_female=21;AN_asj_female=64;AF_asj_female=0.328125;nhomalt_asj_female=3;non_neuro_AC_eas=206;non_neuro_AN_eas=1372;non_neuro_AF_eas=0.150146;non_neuro_nhomalt_eas=17;non_neuro_AC_male=3689;non_neuro_AN_male=9042;non_neuro_AF_male=0.407985;non_neuro_nhomalt_male=791;AC_asj=111;AN_asj=246;AF_asj=0.45122;nhomalt_asj=23;controls_AC_nfe_est=1036;controls_AN_nfe_est=2520;controls_AF_nfe_est=0.411111;controls_nhomalt_nfe_est=211;non_topmed_AC_asj_female=15;non_topmed_AN_asj_female=48;non_topmed_AF_asj_female=0.3125;non_topmed_nhomalt_asj_female=1;non_topmed_AC_oth=274;non_topmed_AN_oth=668;non_topmed_AF_oth=0.41018;non_topmed_nhomalt_oth=54;non_topmed_AC_fin_female=250;non_topmed_AN_fin_female=780;non_topmed_AF_fin_female=0.320513;non_topmed_nhomalt_fin_female=38;AC_oth=296;AN_oth=734;AF_oth=0.40327;nhomalt_oth=57;non_neuro_AC_nfe_male=2927;non_neuro_AN_nfe_male=6002;non_neuro_AF_nfe_male=0.487671;non_neuro_nhomalt_nfe_male=678;controls_AC_female=1051;controls_AN_female=3144;controls_AF_female=0.334288;controls_nhomalt_female=179;non_topmed_AC_fin=493;non_topmed_AN_fin=1504;non_topmed_AF_fin=0.327793;non_topmed_nhomalt_fin=70;non_topmed_AC_nfe_female=1961;non_topmed_AN_nfe_female=4236;non_topmed_AF_nfe_female=0.462937;non_topmed_nhomalt_nfe_female=446;controls_AC_asj_male=4;controls_AN_asj_male=10;controls_AF_asj_male=0.4;controls_nhomalt_asj_male=0;non_topmed_AC_asj_male=35;non_topmed_AN_asj_male=62;non_topmed_AF_asj_male=0.564516;non_topmed_nhomalt_asj_male=8;non_neuro_AC_oth=207;non_neuro_AN_oth=522;non_neuro_AF_oth=0.396552;non_neuro_nhomalt_oth=40;AC_male=5005;AN_male=13344;AF_male=0.375075;nhomalt_male=1006;controls_AC_fin_female=63;controls_AN_fin_female=200;controls_AF_fin_female=0.315;controls_nhomalt_fin_female=10;controls_AC_asj_female=4;controls_AN_asj_female=18;controls_AF_asj_female=0.222222;controls_nhomalt_asj_female=0;AC_amr_male=163;AN_amr_male=294;AF_amr_male=0.554422;nhomalt_amr_male=43;AC_amr_female=139;AN_amr_female=288;AF_amr_female=0.482639;nhomalt_amr_female=28;AC_oth_male=147;AN_oth_male=368;AF_oth_male=0.399457;nhomalt_oth_male=27;non_neuro_AC_nfe_seu=15;non_neuro_AN_nfe_seu=38;non_neuro_AF_nfe_seu=0.394737;non_neuro_nhomalt_nfe_seu=3;non_topmed_AC_afr_female=682;non_topmed_AN_afr_female=2970;non_topmed_AF_afr_female=0.22963;non_topmed_nhomalt_afr_female=67;non_topmed_AC_afr=1612;non_topmed_AN_afr=6978;non_topmed_AF_afr=0.231012;non_topmed_nhomalt_afr=178;controls_AC=2304;controls_AN=7000;controls_AF=0.329143;controls_nhomalt=404;non_neuro_AC_oth_female=99;non_neuro_AN_oth_female=248;non_neuro_AF_oth_female=0.399194;non_neuro_nhomalt_oth_female=19;non_topmed_faf95_amr=0.463516;non_topmed_faf99_amr=0.463517;faf95_afr=0.221013;faf99_afr=0.221013;controls_faf95_afr=0.211534;controls_faf99_afr=0.211534;faf95_amr=0.470781;faf99_amr=0.470782;faf95_eas=0.133362;faf99_eas=0.133363;faf95=0.36549;faf99=0.365489;non_neuro_faf95_afr=0.204131;non_neuro_faf99_afr=0.20413;non_neuro_faf95_amr=0.46934;non_neuro_faf99_amr=0.469341;controls_faf95_nfe=0.403732;controls_faf99_nfe=0.403733;non_topmed_faf95=0.341269;non_topmed_faf99=0.341269;non_neuro_faf95_nfe=0.466809;non_neuro_faf99_nfe=0.466809;non_neuro_faf95=0.392443;non_neuro_faf99=0.392443;non_topmed_faf95_nfe=0.453896;non_topmed_faf99_nfe=0.453895;controls_faf95_eas=0.129733;controls_faf99_eas=0.129733;faf95_nfe=0.465648;faf99_nfe=0.465648;non_topmed_faf95_eas=0.13514;non_topmed_faf99_eas=0.135141;controls_faf95_amr=0.417865;controls_faf99_amr=0.417865;non_neuro_faf95_eas=0.133362;non_neuro_faf99_eas=0.133363;non_topmed_faf95_afr=0.221629;non_topmed_faf99_afr=0.221629;controls_faf95=0.317945;controls_faf99=0.317945;controls_popmax=amr;controls_AC_popmax=70;controls_AN_popmax=136;controls_AF_popmax=0.514706;controls_nhomalt_popmax=17;popmax=amr;AC_popmax=302;AN_popmax=582;AF_popmax=0.5189;nhomalt_popmax=71;age_hist_het_bin_freq=187|208|358|521|663|483|366|190|104|64;age_hist_het_n_smaller=786;age_hist_het_n_larger=21;age_hist_hom_bin_freq=59|80|122|225|270|191|128|86|31|9;age_hist_hom_n_smaller=184;age_hist_hom_n_larger=5;non_neuro_popmax=amr;non_neuro_AC_popmax=220;non_neuro_AN_popmax=418;non_neuro_AF_popmax=0.526316;non_neuro_nhomalt_popmax=53;non_topmed_popmax=amr;non_topmed_AC_popmax=281;non_topmed_AN_popmax=548;non_topmed_AF_popmax=0.512774;non_topmed_nhomalt_popmax=65\");\n            writer.Flush();\n\n            stream.Position = 0;\n            return stream;\n        }\n\n        private static Stream GetChr22_21006257_exome()\n        {\n            var stream = new MemoryStream();\n            var writer = new StreamWriter(stream);\n\n            writer.WriteLine(\"##gnomAD\");\n            writer.WriteLine(\"#CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\");\n            writer.WriteLine(\"22\\t21006258\\trs992970331\\tTAAA\\tT\\t134845\\tAC0\\tAC=0;AN=0;rf_tp_probability=0.858094;FS=1.002;InbreedingCoeff=0.7272;MQ=60;MQRankSum=0.727;QD=37.76;ReadPosRankSum=0.736;SOR=1.029;VQSR_POSITIVE_TRAIN_SITE;BaseQRankSum=0.727;ClippingRankSum=0.731;DP=33139;VQSLOD=2.99;VQSR_culprit=MQRankSum;lcr;rf_positive_label;rf_label=TP;rf_train;variant_type=mixed;allele_type=del;n_alt_alleles=6;was_mixed;pab_max=1;gq_hist_alt_bin_freq=23|313|25|5|4|2|1|1|2|1|0|1|1|0|1|0|0|0|0|1;gq_hist_all_bin_freq=4777|1217|53|11|6|11|6|1|4|1|0|1|2|2|4|0|1|0|0|1;dp_hist_alt_bin_freq=373|8|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0;dp_hist_alt_n_larger=0;dp_hist_all_bin_freq=164272|59|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0;dp_hist_all_n_larger=0;ab_hist_alt_bin_freq=0|0|0|0|0|0|0|0|0|0|3|0|1|15|0|3|1|0|0|1;AC_nfe_seu=0;AN_nfe_seu=0;nhomalt_nfe_seu=0;controls_AC_afr_male=0;controls_AN_afr_male=0;controls_nhomalt_afr_male=0;non_neuro_AC_eas_kor=0;non_neuro_AN_eas_kor=0;non_neuro_nhomalt_eas_kor=0;non_topmed_AC_amr=0;non_topmed_AN_amr=0;non_topmed_nhomalt_amr=0;non_cancer_AC_asj_female=0;non_cancer_AN_asj_female=0;non_cancer_nhomalt_asj_female=0;AC_raw=578;AN_raw=9596;AF_raw=0.0602334;nhomalt_raw=279;AC_fin_female=0;AN_fin_female=0;nhomalt_fin_female=0;non_cancer_AC_oth_female=0;non_cancer_AN_oth_female=0;non_cancer_nhomalt_oth_female=0;AC_nfe_bgr=0;AN_nfe_bgr=0;nhomalt_nfe_bgr=0;non_neuro_AC_asj_female=0;non_neuro_AN_asj_female=0;non_neuro_nhomalt_asj_female=0;AC_sas_male=0;AN_sas_male=0;nhomalt_sas_male=0;non_neuro_AC_afr_male=0;non_neuro_AN_afr_male=0;non_neuro_nhomalt_afr_male=0;AC_afr_male=0;AN_afr_male=0;nhomalt_afr_male=0;AC_afr=0;AN_afr=0;nhomalt_afr=0;controls_AC_nfe_swe=0;controls_AN_nfe_swe=0;controls_nhomalt_nfe_swe=0;non_neuro_AC_afr_female=0;non_neuro_AN_afr_female=0;non_neuro_nhomalt_afr_female=0;non_topmed_AC_amr_female=0;non_topmed_AN_amr_female=0;non_topmed_nhomalt_amr_female=0;non_cancer_AC_female=0;non_cancer_AN_female=0;non_cancer_nhomalt_female=0;non_cancer_AC_nfe_onf=0;non_cancer_AN_nfe_onf=0;non_cancer_nhomalt_nfe_onf=0;non_cancer_AC_male=0;non_cancer_AN_male=0;non_cancer_nhomalt_male=0;non_topmed_AC_oth_female=0;non_topmed_AN_oth_female=0;non_topmed_nhomalt_oth_female=0;AC_eas_female=0;AN_eas_female=0;nhomalt_eas_female=0;non_cancer_AC_sas_female=0;non_cancer_AN_sas_female=0;non_cancer_nhomalt_sas_female=0;AC_afr_female=0;AN_afr_female=0;nhomalt_afr_female=0;AC_sas=0;AN_sas=0;nhomalt_sas=0;non_neuro_AC_female=0;non_neuro_AN_female=0;non_neuro_nhomalt_female=0;controls_AC_afr=0;controls_AN_afr=0;controls_nhomalt_afr=0;non_neuro_AC_eas_jpn=0;non_neuro_AN_eas_jpn=0;non_neuro_nhomalt_eas_jpn=0;AC_nfe_onf=0;AN_nfe_onf=0;nhomalt_nfe_onf=0;non_cancer_AC_amr_male=0;non_cancer_AN_amr_male=0;non_cancer_nhomalt_amr_male=0;controls_AC_fin_male=0;controls_AN_fin_male=0;controls_nhomalt_fin_male=0;non_neuro_AC_nfe_nwe=0;non_neuro_AN_nfe_nwe=0;non_neuro_nhomalt_nfe_nwe=0;AC_fin_male=0;AN_fin_male=0;nhomalt_fin_male=0;AC_nfe_female=0;AN_nfe_female=0;nhomalt_nfe_female=0;AC_amr=0;AN_amr=0;nhomalt_amr=0;non_topmed_AC_nfe_male=0;non_topmed_AN_nfe_male=0;non_topmed_nhomalt_nfe_male=0;non_neuro_AC_sas=0;non_neuro_AN_sas=0;non_neuro_nhomalt_sas=0;non_cancer_AC_fin_male=0;non_cancer_AN_fin_male=0;non_cancer_nhomalt_fin_male=0;non_cancer_AC_nfe_seu=0;non_cancer_AN_nfe_seu=0;non_cancer_nhomalt_nfe_seu=0;AC_eas=0;AN_eas=0;nhomalt_eas=0;nhomalt=0;non_neuro_AC_nfe_female=0;non_neuro_AN_nfe_female=0;non_neuro_nhomalt_nfe_female=0;non_neuro_AC_afr=0;non_neuro_AN_afr=0;non_neuro_nhomalt_afr=0;controls_AC_raw=246;controls_AN_raw=4364;controls_AF_raw=0.0563703;controls_nhomalt_raw=119;non_cancer_AC_eas=0;non_cancer_AN_eas=0;non_cancer_nhomalt_eas=0;non_cancer_AC_amr_female=0;non_cancer_AN_amr_female=0;non_cancer_nhomalt_amr_female=0;non_neuro_AC_nfe_swe=0;non_neuro_AN_nfe_swe=0;non_neuro_nhomalt_nfe_swe=0;controls_AC_male=0;controls_AN_male=0;controls_nhomalt_male=0;non_topmed_AC_male=0;non_topmed_AN_male=0;non_topmed_nhomalt_male=0;controls_AC_eas_jpn=0;controls_AN_eas_jpn=0;controls_nhomalt_eas_jpn=0;controls_AC_nfe_female=0;controls_AN_nfe_female=0;controls_nhomalt_nfe_female=0;non_neuro_AC_amr=0;non_neuro_AN_amr=0;non_neuro_nhomalt_amr=0;non_neuro_AC_eas_female=0;non_neuro_AN_eas_female=0;non_neuro_nhomalt_eas_female=0;AC_asj_male=0;AN_asj_male=0;nhomalt_asj_male=0;controls_AC_nfe_male=0;controls_AN_nfe_male=0;controls_nhomalt_nfe_male=0;non_neuro_AC_fin=0;non_neuro_AN_fin=0;non_neuro_nhomalt_fin=0;non_topmed_AC_sas=0;non_topmed_AN_sas=0;non_topmed_nhomalt_sas=0;non_cancer_AC_nfe_female=0;non_cancer_AN_nfe_female=0;non_cancer_nhomalt_nfe_female=0;AC_oth_female=0;AN_oth_female=0;nhomalt_oth_female=0;non_cancer_AC_asj=0;non_cancer_AN_asj=0;non_cancer_nhomalt_asj=0;AC_nfe_swe=0;AN_nfe_swe=0;nhomalt_nfe_swe=0;controls_AC_nfe=0;controls_AN_nfe=0;controls_nhomalt_nfe=0;controls_AC_oth_female=0;controls_AN_oth_female=0;controls_nhomalt_oth_female=0;controls_AC_asj=0;controls_AN_asj=0;controls_nhomalt_asj=0;non_neuro_AC_amr_male=0;non_neuro_AN_amr_male=0;non_neuro_nhomalt_amr_male=0;controls_AC_nfe_nwe=0;controls_AN_nfe_nwe=0;controls_nhomalt_nfe_nwe=0;AC_nfe_nwe=0;AN_nfe_nwe=0;nhomalt_nfe_nwe=0;controls_AC_nfe_seu=0;controls_AN_nfe_seu=0;controls_nhomalt_nfe_seu=0;controls_AC_sas_female=0;controls_AN_sas_female=0;controls_nhomalt_sas_female=0;non_neuro_AC_amr_female=0;non_neuro_AN_amr_female=0;non_neuro_nhomalt_amr_female=0;non_cancer_AC_eas_jpn=0;non_cancer_AN_eas_jpn=0;non_cancer_nhomalt_eas_jpn=0;non_neuro_AC_nfe_onf=0;non_neuro_AN_nfe_onf=0;non_neuro_nhomalt_nfe_onf=0;non_topmed_AC_eas_male=0;non_topmed_AN_eas_male=0;non_topmed_nhomalt_eas_male=0;AC_eas_jpn=0;AN_eas_jpn=0;nhomalt_eas_jpn=0;non_cancer_AC_afr_male=0;non_cancer_AN_afr_male=0;non_cancer_nhomalt_afr_male=0;non_cancer_AC_afr=0;non_cancer_AN_afr=0;non_cancer_nhomalt_afr=0;controls_AC_amr_female=0;controls_AN_amr_female=0;controls_nhomalt_amr_female=0;non_neuro_AC_fin_male=0;non_neuro_AN_fin_male=0;non_neuro_nhomalt_fin_male=0;AC_female=0;AN_female=0;nhomalt_female=0;non_neuro_AC_nfe_bgr=0;non_neuro_AN_nfe_bgr=0;non_neuro_nhomalt_nfe_bgr=0;non_neuro_AC_oth_male=0;non_neuro_AN_oth_male=0;non_neuro_nhomalt_oth_male=0;non_topmed_AC_nfe_est=0;non_topmed_AN_nfe_est=0;non_topmed_nhomalt_nfe_est=0;non_topmed_AC_nfe_nwe=0;non_topmed_AN_nfe_nwe=0;non_topmed_nhomalt_nfe_nwe=0;non_topmed_AC_amr_male=0;non_topmed_AN_amr_male=0;non_topmed_nhomalt_amr_male=0;non_cancer_AC_amr=0;non_cancer_AN_amr=0;non_cancer_nhomalt_amr=0;non_topmed_AC_nfe_swe=0;non_topmed_AN_nfe_swe=0;non_topmed_nhomalt_nfe_swe=0;non_topmed_AC_nfe_onf=0;non_topmed_AN_nfe_onf=0;non_topmed_nhomalt_nfe_onf=0;controls_AC_eas_kor=0;controls_AN_eas_kor=0;controls_nhomalt_eas_kor=0;non_topmed_AC_eas_oea=0;non_topmed_AN_eas_oea=0;non_topmed_nhomalt_eas_oea=0;controls_AC_eas_male=0;controls_AN_eas_male=0;controls_nhomalt_eas_male=0;controls_AC_oth_male=0;controls_AN_oth_male=0;controls_nhomalt_oth_male=0;non_topmed_AC=0;non_topmed_AN=0;non_topmed_nhomalt=0;controls_AC_fin=0;controls_AN_fin=0;controls_nhomalt_fin=0;AC_eas_kor=0;AN_eas_kor=0;nhomalt_eas_kor=0;non_neuro_AC_nfe=0;non_neuro_AN_nfe=0;non_neuro_nhomalt_nfe=0;non_neuro_AC_fin_female=0;non_neuro_AN_fin_female=0;non_neuro_nhomalt_fin_female=0;non_cancer_AC_nfe_male=0;non_cancer_AN_nfe_male=0;non_cancer_nhomalt_nfe_male=0;controls_AC_eas_oea=0;controls_AN_eas_oea=0;controls_nhomalt_eas_oea=0;non_topmed_AC_nfe_seu=0;non_topmed_AN_nfe_seu=0;non_topmed_nhomalt_nfe_seu=0;controls_AC_eas_female=0;controls_AN_eas_female=0;controls_nhomalt_eas_female=0;non_topmed_AC_asj=0;non_topmed_AN_asj=0;non_topmed_nhomalt_asj=0;controls_AC_nfe_onf=0;controls_AN_nfe_onf=0;controls_nhomalt_nfe_onf=0;non_neuro_AC=0;non_neuro_AN=0;non_neuro_nhomalt=0;AC_eas_oea=0;AN_eas_oea=0;nhomalt_eas_oea=0;non_topmed_AC_nfe=0;non_topmed_AN_nfe=0;non_topmed_nhomalt_nfe=0;non_cancer_AC_oth=0;non_cancer_AN_oth=0;non_cancer_nhomalt_oth=0;non_topmed_AC_raw=574;non_topmed_AN_raw=9488;non_topmed_AF_raw=0.0604975;non_topmed_nhomalt_raw=277;non_neuro_AC_nfe_est=0;non_neuro_AN_nfe_est=0;non_neuro_nhomalt_nfe_est=0;non_topmed_AC_oth_male=0;non_topmed_AN_oth_male=0;non_topmed_nhomalt_oth_male=0;non_cancer_AC_oth_male=0;non_cancer_AN_oth_male=0;non_cancer_nhomalt_oth_male=0;AC_nfe_est=0;AN_nfe_est=0;nhomalt_nfe_est=0;non_cancer_AC_afr_female=0;non_cancer_AN_afr_female=0;non_cancer_nhomalt_afr_female=0;non_topmed_AC_afr_male=0;non_topmed_AN_afr_male=0;non_topmed_nhomalt_afr_male=0;AC_eas_male=0;AN_eas_male=0;nhomalt_eas_male=0;controls_AC_eas=0;controls_AN_eas=0;controls_nhomalt_eas=0;non_neuro_AC_eas_male=0;non_neuro_AN_eas_male=0;non_neuro_nhomalt_eas_male=0;non_cancer_AC_nfe_nwe=0;non_cancer_AN_nfe_nwe=0;non_cancer_nhomalt_nfe_nwe=0;controls_AC_sas=0;controls_AN_sas=0;controls_nhomalt_sas=0;non_neuro_AC_sas_male=0;non_neuro_AN_sas_male=0;non_neuro_nhomalt_sas_male=0;non_neuro_AC_asj_male=0;non_neuro_AN_asj_male=0;non_neuro_nhomalt_asj_male=0;non_cancer_AC_nfe_bgr=0;non_cancer_AN_nfe_bgr=0;non_cancer_nhomalt_nfe_bgr=0;controls_AC_oth=0;controls_AN_oth=0;controls_nhomalt_oth=0;non_cancer_AC_eas_female=0;non_cancer_AN_eas_female=0;non_cancer_nhomalt_eas_female=0;AC_nfe=0;AN_nfe=0;nhomalt_nfe=0;non_topmed_AC_female=0;non_topmed_AN_female=0;non_topmed_nhomalt_female=0;non_neuro_AC_asj=0;non_neuro_AN_asj=0;non_neuro_nhomalt_asj=0;non_topmed_AC_eas_female=0;non_topmed_AN_eas_female=0;non_topmed_nhomalt_eas_female=0;non_neuro_AC_raw=457;non_neuro_AN_raw=8156;non_neuro_AF_raw=0.0560324;non_neuro_nhomalt_raw=220;non_topmed_AC_eas=0;non_topmed_AN_eas=0;non_topmed_nhomalt_eas=0;non_topmed_AC_fin_male=0;non_topmed_AN_fin_male=0;non_topmed_nhomalt_fin_male=0;non_cancer_AC_asj_male=0;non_cancer_AN_asj_male=0;non_cancer_nhomalt_asj_male=0;AC_fin=0;AN_fin=0;nhomalt_fin=0;AC_nfe_male=0;AN_nfe_male=0;nhomalt_nfe_male=0;non_topmed_AC_eas_kor=0;non_topmed_AN_eas_kor=0;non_topmed_nhomalt_eas_kor=0;controls_AC_amr_male=0;controls_AN_amr_male=0;controls_nhomalt_amr_male=0;non_neuro_AC_eas_oea=0;non_neuro_AN_eas_oea=0;non_neuro_nhomalt_eas_oea=0;AC_sas_female=0;AN_sas_female=0;nhomalt_sas_female=0;controls_AC_afr_female=0;controls_AN_afr_female=0;controls_nhomalt_afr_female=0;controls_AC_amr=0;controls_AN_amr=0;controls_nhomalt_amr=0;non_topmed_AC_eas_jpn=0;non_topmed_AN_eas_jpn=0;non_topmed_nhomalt_eas_jpn=0;AC_asj_female=0;AN_asj_female=0;nhomalt_asj_female=0;non_topmed_AC_nfe_bgr=0;non_topmed_AN_nfe_bgr=0;non_topmed_nhomalt_nfe_bgr=0;non_cancer_AC_nfe_est=0;non_cancer_AN_nfe_est=0;non_cancer_nhomalt_nfe_est=0;non_neuro_AC_eas=0;non_neuro_AN_eas=0;non_neuro_nhomalt_eas=0;non_cancer_AC_nfe=0;non_cancer_AN_nfe=0;non_cancer_nhomalt_nfe=0;non_neuro_AC_male=0;non_neuro_AN_male=0;non_neuro_nhomalt_male=0;non_neuro_AC_sas_female=0;non_neuro_AN_sas_female=0;non_neuro_nhomalt_sas_female=0;AC_asj=0;AN_asj=0;nhomalt_asj=0;controls_AC_nfe_est=0;controls_AN_nfe_est=0;controls_nhomalt_nfe_est=0;non_topmed_AC_asj_female=0;non_topmed_AN_asj_female=0;non_topmed_nhomalt_asj_female=0;non_cancer_AC_nfe_swe=0;non_cancer_AN_nfe_swe=0;non_cancer_nhomalt_nfe_swe=0;non_cancer_AC=0;non_cancer_AN=0;non_cancer_nhomalt=0;non_topmed_AC_oth=0;non_topmed_AN_oth=0;non_topmed_nhomalt_oth=0;non_topmed_AC_fin_female=0;non_topmed_AN_fin_female=0;non_topmed_nhomalt_fin_female=0;non_cancer_AC_fin_female=0;non_cancer_AN_fin_female=0;non_cancer_nhomalt_fin_female=0;AC_oth=0;AN_oth=0;nhomalt_oth=0;non_neuro_AC_nfe_male=0;non_neuro_AN_nfe_male=0;non_neuro_nhomalt_nfe_male=0;controls_AC_female=0;controls_AN_female=0;controls_nhomalt_female=0;non_cancer_AC_fin=0;non_cancer_AN_fin=0;non_cancer_nhomalt_fin=0;non_topmed_AC_fin=0;non_topmed_AN_fin=0;non_topmed_nhomalt_fin=0;non_cancer_AC_eas_oea=0;non_cancer_AN_eas_oea=0;non_cancer_nhomalt_eas_oea=0;non_topmed_AC_nfe_female=0;non_topmed_AN_nfe_female=0;non_topmed_nhomalt_nfe_female=0;non_cancer_AC_sas_male=0;non_cancer_AN_sas_male=0;non_cancer_nhomalt_sas_male=0;controls_AC_asj_male=0;controls_AN_asj_male=0;controls_nhomalt_asj_male=0;non_cancer_AC_raw=562;non_cancer_AN_raw=9344;non_cancer_AF_raw=0.0601455;non_cancer_nhomalt_raw=272;non_cancer_AC_eas_male=0;non_cancer_AN_eas_male=0;non_cancer_nhomalt_eas_male=0;non_topmed_AC_asj_male=0;non_topmed_AN_asj_male=0;non_topmed_nhomalt_asj_male=0;non_neuro_AC_oth=0;non_neuro_AN_oth=0;non_neuro_nhomalt_oth=0;AC_male=0;AN_male=0;nhomalt_male=0;controls_AC_fin_female=0;controls_AN_fin_female=0;controls_nhomalt_fin_female=0;controls_AC_nfe_bgr=0;controls_AN_nfe_bgr=0;controls_nhomalt_nfe_bgr=0;controls_AC_asj_female=0;controls_AN_asj_female=0;controls_nhomalt_asj_female=0;AC_amr_male=0;AN_amr_male=0;nhomalt_amr_male=0;AC_amr_female=0;AN_amr_female=0;nhomalt_amr_female=0;non_topmed_AC_sas_male=0;non_topmed_AN_sas_male=0;non_topmed_nhomalt_sas_male=0;AC_oth_male=0;AN_oth_male=0;nhomalt_oth_male=0;non_cancer_AC_sas=0;non_cancer_AN_sas=0;non_cancer_nhomalt_sas=0;non_neuro_AC_nfe_seu=0;non_neuro_AN_nfe_seu=0;non_neuro_nhomalt_nfe_seu=0;non_cancer_AC_eas_kor=0;non_cancer_AN_eas_kor=0;non_cancer_nhomalt_eas_kor=0;non_topmed_AC_afr_female=0;non_topmed_AN_afr_female=0;non_topmed_nhomalt_afr_female=0;controls_AC_sas_male=0;controls_AN_sas_male=0;controls_nhomalt_sas_male=0;non_topmed_AC_sas_female=0;non_topmed_AN_sas_female=0;non_topmed_nhomalt_sas_female=0;non_topmed_AC_afr=0;non_topmed_AN_afr=0;non_topmed_nhomalt_afr=0;controls_AC=0;controls_AN=0;controls_nhomalt=0;non_neuro_AC_oth_female=0;non_neuro_AN_oth_female=0;non_neuro_nhomalt_oth_female=0;non_topmed_faf95_amr=0;non_topmed_faf99_amr=0;faf95_afr=0;faf99_afr=0;faf95_sas=0;faf99_sas=0;controls_faf95_afr=0;controls_faf99_afr=0;faf95_amr=0;faf99_amr=0;non_neuro_faf95_sas=0;non_neuro_faf99_sas=0;faf95_eas=0;faf99_eas=0;faf95=0;faf99=0;non_neuro_faf95_afr=0;non_neuro_faf99_afr=0;non_cancer_faf95_eas=0;non_cancer_faf99_eas=0;non_neuro_faf95_amr=0;non_neuro_faf99_amr=0;non_topmed_faf95_sas=0;non_topmed_faf99_sas=0;controls_faf95_nfe=0;controls_faf99_nfe=0;non_cancer_faf95_afr=0;non_cancer_faf99_afr=0;non_cancer_faf95_amr=0;non_cancer_faf99_amr=0;non_topmed_faf95=0;non_topmed_faf99=0;non_neuro_faf95_nfe=0;non_neuro_faf99_nfe=0;non_neuro_faf95=0;non_neuro_faf99=0;non_topmed_faf95_nfe=0;non_topmed_faf99_nfe=0;controls_faf95_eas=0;controls_faf99_eas=0;controls_faf95_sas=0;controls_faf99_sas=0;faf95_nfe=0;faf99_nfe=0;non_topmed_faf95_eas=0;non_topmed_faf99_eas=0;controls_faf95_amr=0;controls_faf99_amr=0;non_neuro_faf95_eas=0;non_neuro_faf99_eas=0;non_cancer_faf95_nfe=0;non_cancer_faf99_nfe=0;non_cancer_faf95=0;non_cancer_faf99=0;non_cancer_faf95_sas=0;non_cancer_faf99_sas=0;non_topmed_faf95_afr=0;non_topmed_faf99_afr=0;controls_faf95=0;controls_faf99=0;age_hist_het_bin_freq=0|0|0|0|0|0|0|0|0|0;age_hist_het_n_smaller=0;age_hist_het_n_larger=0;age_hist_hom_bin_freq=0|0|0|0|0|0|0|0|0|0;age_hist_hom_n_smaller=0;age_hist_hom_n_larger=0\");\n            writer.Flush();\n\n            stream.Position = 0;\n            return stream;\n        }\n\n        /*[Fact]\n        public void CombineCoverage_when_one_AN_is_zero()\n        {\n            var sequence = new SimpleSequence(new string('G', VariantUtils.MaxUpstreamLength) + \"TAAA\", 21006257 - VariantUtils.MaxUpstreamLength);\n            var sequenceProvider = new SimpleSequenceProvider(GenomeAssembly.GRCh38, sequence, ChromosomeUtilities.RefNameToChromosome);\n\n            var gnomadReader = new GnomadSnvReader(new StreamReader(GetChr22_21006257_genome()), new StreamReader(GetChr22_21006257_exome()), sequenceProvider);\n\n            var items = gnomadReader.GetCombinedItems().ToList();\n\n            Assert.Single(items);\n            Assert.Equal(21, items[0].Coverage);\n        }*/\n\n        private static Stream GetChr22_22055876_genome()\n        {\n            var stream = new MemoryStream();\n            var writer = new StreamWriter(stream);\n\n            writer.WriteLine(\"##gnomAD\");\n            writer.WriteLine(\"#CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\");\n            writer.WriteLine(\"22\\t22055876\\trs113132860\\tG\\tT\\t2.28838e+06\\tPASS\\tAC=228;AN=18686;AF=0.0122016;rf_tp_probability=0.374872;FS=2.447;InbreedingCoeff=0.0575;MQ=59.76;MQRankSum=-0.152;QD=15.22;ReadPosRankSum=-0.259;SOR=0.701;BaseQRankSum=-0.771;ClippingRankSum=0;DP=490478;VQSLOD=0.36;VQSR_culprit=FS;lcr;variant_type=mixed;allele_type=snv;n_alt_alleles=5;was_mixed;has_star;pab_max=1;gq_hist_alt_bin_freq=79|95|60|65|65|46|37|41|28|38|24|19|20|18|14|11|15|8|4|103;gq_hist_all_bin_freq=2799|865|616|841|825|614|753|768|561|775|568|463|553|397|310|247|235|179|221|5310;dp_hist_alt_bin_freq=18|144|272|199|78|56|18|5|0|0|0|0|0|0|0|0|0|0|0|0;dp_hist_alt_n_larger=0;dp_hist_all_bin_freq=95|956|2823|4449|4634|3537|2105|978|396|166|82|35|24|11|5|4|2|1|0|1;dp_hist_all_n_larger=6;ab_hist_alt_bin_freq=0|18|83|129|138|140|103|45|48|13|28|14|13|9|0|3|0|0|0|0;AC_nfe_seu=2;AN_nfe_seu=52;AF_nfe_seu=0.0384615;nhomalt_nfe_seu=0;controls_AC_afr_male=5;controls_AN_afr_male=806;controls_AF_afr_male=0.00620347;controls_nhomalt_afr_male=0;non_topmed_AC_amr=1;non_topmed_AN_amr=426;non_topmed_AF_amr=0.00234742;non_topmed_nhomalt_amr=0;AC_raw=559;AN_raw=27704;AF_raw=0.0201776;nhomalt_raw=4;AC_fin_female=32;AN_fin_female=618;AF_fin_female=0.0517799;nhomalt_fin_female=0;non_neuro_AC_asj_female=0;non_neuro_AN_asj_female=48;non_neuro_AF_asj_female=0;non_neuro_nhomalt_asj_female=0;non_neuro_AC_afr_male=5;non_neuro_AN_afr_male=914;non_neuro_AF_afr_male=0.00547046;non_neuro_nhomalt_afr_male=0;AC_afr_male=15;AN_afr_male=2982;AF_afr_male=0.00503018;nhomalt_afr_male=0;AC_afr=25;AN_afr=5118;AF_afr=0.00488472;nhomalt_afr=0;non_neuro_AC_afr_female=3;non_neuro_AN_afr_female=1086;non_neuro_AF_afr_female=0.00276243;non_neuro_nhomalt_afr_female=0;non_topmed_AC_amr_female=1;non_topmed_AN_amr_female=200;non_topmed_AF_amr_female=0.005;non_topmed_nhomalt_amr_female=0;non_topmed_AC_oth_female=7;non_topmed_AN_oth_female=260;non_topmed_AF_oth_female=0.0269231;non_topmed_nhomalt_oth_female=0;AC_eas_female=2;AN_eas_female=486;AF_eas_female=0.00411523;nhomalt_eas_female=0;AC_afr_female=10;AN_afr_female=2136;AF_afr_female=0.00468165;nhomalt_afr_female=0;non_neuro_AC_female=70;non_neuro_AN_female=5902;non_neuro_AF_female=0.0118604;non_neuro_nhomalt_female=0;controls_AC_afr=8;controls_AN_afr=1498;controls_AF_afr=0.00534045;controls_nhomalt_afr=0;AC_nfe_onf=11;AN_nfe_onf=1346;AF_nfe_onf=0.00817236;nhomalt_nfe_onf=0;controls_AC_fin_male=10;controls_AN_fin_male=180;controls_AF_fin_male=0.0555556;controls_nhomalt_fin_male=0;non_neuro_AC_nfe_nwe=39;non_neuro_AN_nfe_nwe=5416;non_neuro_AF_nfe_nwe=0.00720089;non_neuro_nhomalt_nfe_nwe=0;AC_fin_male=26;AN_fin_male=508;AF_fin_male=0.0511811;nhomalt_fin_male=0;AC_nfe_female=54;AN_nfe_female=4252;AF_nfe_female=0.0126999;nhomalt_nfe_female=0;AC_amr=1;AN_amr=450;AF_amr=0.00222222;nhomalt_amr=0;non_topmed_AC_nfe_male=52;non_topmed_AN_nfe_male=3174;non_topmed_AF_nfe_male=0.0163831;non_topmed_nhomalt_nfe_male=0;AC_eas=4;AN_eas=1398;AF_eas=0.00286123;nhomalt_eas=0;nhomalt=0;non_neuro_AC_nfe_female=52;non_neuro_AN_nfe_female=3740;non_neuro_AF_nfe_female=0.0139037;non_neuro_nhomalt_nfe_female=0;non_neuro_AC_afr=8;non_neuro_AN_afr=2000;non_neuro_AF_afr=0.004;non_neuro_nhomalt_afr=0;controls_AC_raw=303;controls_AN_raw=9668;controls_AF_raw=0.0313405;controls_nhomalt_raw=0;controls_AC_male=65;controls_AN_male=3126;controls_AF_male=0.0207933;controls_nhomalt_male=0;non_topmed_AC_male=107;non_topmed_AN_male=8010;non_topmed_AF_male=0.0133583;non_topmed_nhomalt_male=0;controls_AC_nfe_female=40;controls_AN_nfe_female=1234;controls_AF_nfe_female=0.0324149;controls_nhomalt_nfe_female=0;non_neuro_AC_amr=0;non_neuro_AN_amr=320;non_neuro_AF_amr=0;non_neuro_nhomalt_amr=0;non_neuro_AC_eas_female=2;non_neuro_AN_eas_female=486;non_neuro_AF_eas_female=0.00411523;non_neuro_nhomalt_eas_female=0;AC_asj_male=3;AN_asj_male=156;AF_asj_male=0.0192308;nhomalt_asj_male=0;controls_AC_nfe_male=41;controls_AN_nfe_male=1460;controls_AF_nfe_male=0.0280822;controls_nhomalt_nfe_male=0;non_neuro_AC_fin=21;non_neuro_AN_fin=350;non_neuro_AF_fin=0.06;non_neuro_nhomalt_fin=0;AC_oth_female=7;AN_oth_female=274;AF_oth_female=0.0255474;nhomalt_oth_female=0;controls_AC_nfe=81;controls_AN_nfe=2694;controls_AF_nfe=0.0300668;controls_nhomalt_nfe=0;controls_AC_oth_female=2;controls_AN_oth_female=102;controls_AF_oth_female=0.0196078;controls_nhomalt_oth_female=0;controls_AC_asj=1;controls_AN_asj=18;controls_AF_asj=0.0555556;controls_nhomalt_asj=0;non_neuro_AC_amr_male=0;non_neuro_AN_amr_male=146;non_neuro_AF_amr_male=0;non_neuro_nhomalt_amr_male=0;controls_AC_nfe_nwe=13;controls_AN_nfe_nwe=342;controls_AF_nfe_nwe=0.0380117;controls_nhomalt_nfe_nwe=0;AC_nfe_nwe=47;AN_nfe_nwe=5898;AF_nfe_nwe=0.0079688;nhomalt_nfe_nwe=0;controls_AC_nfe_seu=2;controls_AN_nfe_seu=28;controls_AF_nfe_seu=0.0714286;controls_nhomalt_nfe_seu=0;non_neuro_AC_amr_female=0;non_neuro_AN_amr_female=174;non_neuro_AF_amr_female=0;non_neuro_nhomalt_amr_female=0;non_neuro_AC_nfe_onf=8;non_neuro_AN_nfe_onf=1138;non_neuro_AF_nfe_onf=0.00702988;non_neuro_nhomalt_nfe_onf=0;non_topmed_AC_eas_male=2;non_topmed_AN_eas_male=894;non_topmed_AF_eas_male=0.00223714;non_topmed_nhomalt_eas_male=0;controls_AC_amr_female=0;controls_AN_amr_female=48;controls_AF_amr_female=0;controls_nhomalt_amr_female=0;non_neuro_AC_fin_male=10;non_neuro_AN_fin_male=180;non_neuro_AF_fin_male=0.0555556;non_neuro_nhomalt_fin_male=0;AC_female=106;AN_female=8036;AF_female=0.0131906;nhomalt_female=0;non_neuro_AC_oth_male=6;non_neuro_AN_oth_male=222;non_neuro_AF_oth_male=0.027027;non_neuro_nhomalt_oth_male=0;non_topmed_AC_nfe_est=61;non_topmed_AN_nfe_est=2506;non_topmed_AF_nfe_est=0.0243416;non_topmed_nhomalt_nfe_est=0;non_topmed_AC_nfe_nwe=28;non_topmed_AN_nfe_nwe=3100;non_topmed_AF_nfe_nwe=0.00903226;non_topmed_nhomalt_nfe_nwe=0;non_topmed_AC_amr_male=0;non_topmed_AN_amr_male=226;non_topmed_AF_amr_male=0;non_topmed_nhomalt_amr_male=0;non_topmed_AC_nfe_onf=9;non_topmed_AN_nfe_onf=898;non_topmed_AF_nfe_onf=0.0100223;non_topmed_nhomalt_nfe_onf=0;controls_AC_eas_male=2;controls_AN_eas_male=514;controls_AF_eas_male=0.00389105;controls_nhomalt_eas_male=0;controls_AC_oth_male=6;controls_AN_oth_male=94;controls_AF_oth_male=0.0638298;controls_nhomalt_oth_male=0;non_topmed_AC=206;non_topmed_AN=15064;non_topmed_AF=0.013675;non_topmed_nhomalt=0;controls_AC_fin=21;controls_AN_fin=348;controls_AF_fin=0.0603448;controls_nhomalt_fin=0;non_neuro_AC_nfe=110;non_neuro_AN_nfe=8768;non_neuro_AF_nfe=0.0125456;non_neuro_nhomalt_nfe=0;non_neuro_AC_fin_female=11;non_neuro_AN_fin_female=170;non_neuro_AF_fin_female=0.0647059;non_neuro_nhomalt_fin_female=0;non_topmed_AC_nfe_seu=2;non_topmed_AN_nfe_seu=52;non_topmed_AF_nfe_seu=0.0384615;non_topmed_nhomalt_nfe_seu=0;controls_AC_eas_female=2;controls_AN_eas_female=314;controls_AF_eas_female=0.00636943;controls_nhomalt_eas_female=0;non_topmed_AC_asj=3;non_topmed_AN_asj=78;non_topmed_AF_asj=0.0384615;non_topmed_nhomalt_asj=0;controls_AC_nfe_onf=5;controls_AN_nfe_onf=164;controls_AF_nfe_onf=0.0304878;controls_nhomalt_nfe_onf=0;non_neuro_AC=152;non_neuro_AN=13452;non_neuro_AF=0.0112994;non_neuro_nhomalt=0;non_topmed_AC_nfe=100;non_topmed_AN_nfe=6556;non_topmed_AF_nfe=0.0152532;non_topmed_nhomalt_nfe=0;non_topmed_AC_raw=512;non_topmed_AN_raw=23304;non_topmed_AF_raw=0.0219705;non_topmed_nhomalt_raw=4;non_neuro_AC_nfe_est=61;non_neuro_AN_nfe_est=2188;non_neuro_AF_nfe_est=0.0278793;non_neuro_nhomalt_nfe_est=0;non_topmed_AC_oth_male=9;non_topmed_AN_oth_male=248;non_topmed_AF_oth_male=0.0362903;non_topmed_nhomalt_oth_male=0;AC_nfe_est=61;AN_nfe_est=2522;AF_nfe_est=0.0241872;nhomalt_nfe_est=0;non_topmed_AC_afr_male=15;non_topmed_AN_afr_male=2918;non_topmed_AF_afr_male=0.00514051;non_topmed_nhomalt_afr_male=0;AC_eas_male=2;AN_eas_male=912;AF_eas_male=0.00219298;nhomalt_eas_male=0;controls_AC_eas=4;controls_AN_eas=828;controls_AF_eas=0.00483092;controls_nhomalt_eas=0;non_neuro_AC_eas_male=2;non_neuro_AN_eas_male=912;non_neuro_AF_eas_male=0.00219298;non_neuro_nhomalt_eas_male=0;non_neuro_AC_asj_male=1;non_neuro_AN_asj_male=148;non_neuro_AF_asj_male=0.00675676;non_neuro_nhomalt_asj_male=0;controls_AC_oth=8;controls_AN_oth=196;controls_AF_oth=0.0408163;controls_nhomalt_oth=0;AC_nfe=121;AN_nfe=9818;AF_nfe=0.0123243;nhomalt_nfe=0;non_topmed_AC_female=99;non_topmed_AN_female=7054;non_topmed_AF_female=0.0140346;non_topmed_nhomalt_female=0;non_neuro_AC_asj=1;non_neuro_AN_asj=196;non_neuro_AF_asj=0.00510204;non_neuro_nhomalt_asj=0;non_topmed_AC_eas_female=2;non_topmed_AN_eas_female=470;non_topmed_AF_eas_female=0.00425532;non_topmed_nhomalt_eas_female=0;non_neuro_AC_raw=362;non_neuro_AN_raw=19100;non_neuro_AF_raw=0.0189529;non_neuro_nhomalt_raw=0;non_topmed_AC_eas=4;non_topmed_AN_eas=1364;non_topmed_AF_eas=0.00293255;non_topmed_nhomalt_eas=0;non_topmed_AC_fin_male=26;non_topmed_AN_fin_male=508;non_topmed_AF_fin_male=0.0511811;non_topmed_nhomalt_fin_male=0;AC_fin=58;AN_fin=1126;AF_fin=0.0515098;nhomalt_fin=0;AC_nfe_male=67;AN_nfe_male=5566;AF_nfe_male=0.0120374;nhomalt_nfe_male=0;controls_AC_amr_male=0;controls_AN_amr_male=68;controls_AF_amr_male=0;controls_nhomalt_amr_male=0;controls_AC_afr_female=3;controls_AN_afr_female=692;controls_AF_afr_female=0.00433526;controls_nhomalt_afr_female=0;controls_AC_amr=0;controls_AN_amr=116;controls_AF_amr=0;controls_nhomalt_amr=0;AC_asj_female=0;AN_asj_female=54;AF_asj_female=0;nhomalt_asj_female=0;non_neuro_AC_eas=4;non_neuro_AN_eas=1398;non_neuro_AF_eas=0.00286123;non_neuro_nhomalt_eas=0;non_neuro_AC_male=82;non_neuro_AN_male=7550;non_neuro_AF_male=0.0108609;non_neuro_nhomalt_male=0;AC_asj=3;AN_asj=210;AF_asj=0.0142857;nhomalt_asj=0;controls_AC_nfe_est=61;controls_AN_nfe_est=2160;controls_AF_nfe_est=0.0282407;controls_nhomalt_nfe_est=0;non_topmed_AC_asj_female=0;non_topmed_AN_asj_female=36;non_topmed_AF_asj_female=0;non_topmed_nhomalt_asj_female=0;non_topmed_AC_oth=16;non_topmed_AN_oth=508;non_topmed_AF_oth=0.0314961;non_topmed_nhomalt_oth=0;non_topmed_AC_fin_female=32;non_topmed_AN_fin_female=618;non_topmed_AF_fin_female=0.0517799;non_topmed_nhomalt_fin_female=0;AC_oth=16;AN_oth=566;AF_oth=0.0282686;nhomalt_oth=0;non_neuro_AC_nfe_male=58;non_neuro_AN_nfe_male=5028;non_neuro_AF_nfe_male=0.0115354;non_neuro_nhomalt_nfe_male=0;controls_AC_female=58;controls_AN_female=2572;controls_AF_female=0.0225505;controls_nhomalt_female=0;non_topmed_AC_fin=58;non_topmed_AN_fin=1126;non_topmed_AF_fin=0.0515098;non_topmed_nhomalt_fin=0;non_topmed_AC_nfe_female=48;non_topmed_AN_nfe_female=3382;non_topmed_AF_nfe_female=0.0141928;non_topmed_nhomalt_nfe_female=0;controls_AC_asj_male=1;controls_AN_asj_male=4;controls_AF_asj_male=0.25;controls_nhomalt_asj_male=0;non_topmed_AC_asj_male=3;non_topmed_AN_asj_male=42;non_topmed_AF_asj_male=0.0714286;non_topmed_nhomalt_asj_male=0;non_neuro_AC_oth=8;non_neuro_AN_oth=420;non_neuro_AF_oth=0.0190476;non_neuro_nhomalt_oth=0;AC_male=122;AN_male=10650;AF_male=0.0114554;nhomalt_male=0;controls_AC_fin_female=11;controls_AN_fin_female=168;controls_AF_fin_female=0.0654762;controls_nhomalt_fin_female=0;controls_AC_asj_female=0;controls_AN_asj_female=14;controls_AF_asj_female=0;controls_nhomalt_asj_female=0;AC_amr_male=0;AN_amr_male=234;AF_amr_male=0;nhomalt_amr_male=0;AC_amr_female=1;AN_amr_female=216;AF_amr_female=0.00462963;nhomalt_amr_female=0;AC_oth_male=9;AN_oth_male=292;AF_oth_male=0.0308219;nhomalt_oth_male=0;non_neuro_AC_nfe_seu=2;non_neuro_AN_nfe_seu=26;non_neuro_AF_nfe_seu=0.0769231;non_neuro_nhomalt_nfe_seu=0;non_topmed_AC_afr_female=9;non_topmed_AN_afr_female=2088;non_topmed_AF_afr_female=0.00431034;non_topmed_nhomalt_afr_female=0;non_topmed_AC_afr=24;non_topmed_AN_afr=5006;non_topmed_AF_afr=0.00479425;non_topmed_nhomalt_afr=0;controls_AC=123;controls_AN=5698;controls_AF=0.0215865;controls_nhomalt=0;non_neuro_AC_oth_female=2;non_neuro_AN_oth_female=198;non_neuro_AF_oth_female=0.010101;non_neuro_nhomalt_oth_female=0;non_topmed_faf95_amr=0.00012;non_topmed_faf99_amr=0.00012;faf95_afr=0.00339534;faf99_afr=0.00339557;controls_faf95_afr=0.00265678;controls_faf99_afr=0.00265672;faf95_amr=0.000113;faf99_amr=0.000113;faf95_eas=0.00097636;faf99_eas=0.00097723;faf95=0.0109033;faf99=0.0109026;non_neuro_faf95_afr=0.00199003;non_neuro_faf99_afr=0.00198958;non_neuro_faf95_amr=0;non_neuro_faf99_amr=0;controls_faf95_nfe=0.0247904;controls_faf99_nfe=0.0247903;non_topmed_faf95=0.0121462;non_topmed_faf99=0.0121458;non_neuro_faf95_nfe=0.0106444;non_neuro_faf99_nfe=0.0106446;non_neuro_faf95=0.00983491;non_neuro_faf99=0.00983455;non_topmed_faf95_nfe=0.0128339;non_topmed_faf99_nfe=0.0128338;controls_faf95_eas=0.00164922;controls_faf99_eas=0.00164971;faf95_nfe=0.0105404;faf99_nfe=0.0105404;non_topmed_faf95_eas=0.00100131;non_topmed_faf99_eas=0.00100135;controls_faf95_amr=0;controls_faf99_amr=0;non_neuro_faf95_eas=0.00097636;non_neuro_faf99_eas=0.00097723;non_topmed_faf95_afr=0.00330485;non_topmed_faf99_afr=0.0033051;controls_faf95=0.018487;controls_faf99=0.0184878;controls_popmax=nfe;controls_AC_popmax=81;controls_AN_popmax=2694;controls_AF_popmax=0.0300668;controls_nhomalt_popmax=0;popmax=nfe;AC_popmax=121;AN_popmax=9818;AF_popmax=0.0123243;nhomalt_popmax=0;age_hist_het_bin_freq=12|8|16|15|13|24|18|13|9|2;age_hist_het_n_smaller=33;age_hist_het_n_larger=0;age_hist_hom_bin_freq=0|0|0|0|0|0|0|0|0|0;age_hist_hom_n_smaller=0;age_hist_hom_n_larger=0;non_neuro_popmax=nfe;non_neuro_AC_popmax=110;non_neuro_AN_popmax=8768;non_neuro_AF_popmax=0.0125456;non_neuro_nhomalt_popmax=0;non_topmed_popmax=nfe;non_topmed_AC_popmax=100;non_topmed_AN_popmax=6556;non_topmed_AF_popmax=0.0152532;non_topmed_nhomalt_popmax=0\");\n            writer.WriteLine(\"22\\t22055876\\trs78003688\\tG\\tT\\t4.27261e+06\\tRF\\tAC=195;AN=21390;AF=0.00911641;rf_tp_probability=0.0468734;FS=4.051;InbreedingCoeff=-0.0363;MQ=59.94;MQRankSum=-0.258;QD=21.64;ReadPosRankSum=-1.148;SOR=0.757;VQSR_NEGATIVE_TRAIN_SITE;BaseQRankSum=-1.472;ClippingRankSum=-0.047;DP=494695;VQSLOD=-1.473;VQSR_culprit=MQ;lcr;variant_type=snv;allele_type=snv;n_alt_alleles=1;has_star;pab_max=1;gq_hist_alt_bin_freq=88|80|83|69|65|58|49|34|24|24|28|14|24|9|14|6|7|6|5|40;gq_hist_all_bin_freq=4274|544|589|606|608|617|697|698|573|702|686|478|749|320|385|273|334|114|204|6729;dp_hist_alt_bin_freq=8|90|181|166|124|81|50|15|7|2|2|0|0|0|0|0|1|0|0|0;dp_hist_alt_n_larger=0;dp_hist_all_bin_freq=52|524|2070|3958|4924|4228|2550|1134|487|196|78|51|20|11|13|5|3|1|2|1;dp_hist_all_n_larger=2;ab_hist_alt_bin_freq=1|17|92|144|168|106|89|39|25|10|23|6|1|3|1|1|1|0|0|0;AC_nfe_seu=0;AN_nfe_seu=80;AF_nfe_seu=0;nhomalt_nfe_seu=0;controls_AC_afr_male=2;controls_AN_afr_male=1042;controls_AF_afr_male=0.00191939;controls_nhomalt_afr_male=0;non_topmed_AC_amr=3;non_topmed_AN_amr=558;non_topmed_AF_amr=0.00537634;non_topmed_nhomalt_amr=0;AC_raw=553;AN_raw=31242;AF_raw=0.0177005;nhomalt_raw=0;AC_fin_female=11;AN_fin_female=954;AF_fin_female=0.0115304;nhomalt_fin_female=0;non_neuro_AC_asj_female=0;non_neuro_AN_asj_female=42;non_neuro_AF_asj_female=0;non_neuro_nhomalt_asj_female=0;non_neuro_AC_afr_male=2;non_neuro_AN_afr_male=1210;non_neuro_AF_afr_male=0.00165289;non_neuro_nhomalt_afr_male=0;AC_afr_male=18;AN_afr_male=3792;AF_afr_male=0.00474684;nhomalt_afr_male=0;AC_afr=40;AN_afr=6628;AF_afr=0.006035;nhomalt_afr=0;non_neuro_AC_afr_female=10;non_neuro_AN_afr_female=1392;non_neuro_AF_afr_female=0.00718391;non_neuro_nhomalt_afr_female=0;non_topmed_AC_amr_female=0;non_topmed_AN_amr_female=278;non_topmed_AF_amr_female=0;non_topmed_nhomalt_amr_female=0;non_topmed_AC_oth_female=1;non_topmed_AN_oth_female=334;non_topmed_AF_oth_female=0.00299401;non_topmed_nhomalt_oth_female=0;AC_eas_female=3;AN_eas_female=340;AF_eas_female=0.00882353;nhomalt_eas_female=0;AC_afr_female=22;AN_afr_female=2836;AF_afr_female=0.0077574;nhomalt_afr_female=0;non_neuro_AC_female=66;non_neuro_AN_female=6530;non_neuro_AF_female=0.0101072;non_neuro_nhomalt_female=0;controls_AC_afr=11;controls_AN_afr=1910;controls_AF_afr=0.00575916;controls_nhomalt_afr=0;AC_nfe_onf=13;AN_nfe_onf=1572;AF_nfe_onf=0.00826972;nhomalt_nfe_onf=0;controls_AC_fin_male=4;controls_AN_fin_male=328;controls_AF_fin_male=0.0121951;controls_nhomalt_fin_male=0;non_neuro_AC_nfe_nwe=49;non_neuro_AN_nfe_nwe=5624;non_neuro_AF_nfe_nwe=0.00871266;non_neuro_nhomalt_nfe_nwe=0;AC_fin_male=12;AN_fin_male=808;AF_fin_male=0.0148515;nhomalt_fin_male=0;AC_nfe_female=50;AN_nfe_female=4632;AF_nfe_female=0.0107945;nhomalt_nfe_female=0;AC_amr=3;AN_amr=590;AF_amr=0.00508475;nhomalt_amr=0;non_topmed_AC_nfe_male=42;non_topmed_AN_nfe_male=3640;non_topmed_AF_nfe_male=0.0115385;non_topmed_nhomalt_nfe_male=0;AC_eas=8;AN_eas=916;AF_eas=0.00873362;nhomalt_eas=0;nhomalt=0;non_neuro_AC_nfe_female=49;non_neuro_AN_nfe_female=4026;non_neuro_AF_nfe_female=0.0121709;non_neuro_nhomalt_nfe_female=0;non_neuro_AC_afr=12;non_neuro_AN_afr=2602;non_neuro_AF_afr=0.00461184;non_neuro_nhomalt_afr=0;controls_AC_raw=230;controls_AN_raw=10790;controls_AF_raw=0.021316;controls_nhomalt_raw=0;controls_AC_male=44;controls_AN_male=3586;controls_AF_male=0.0122699;controls_nhomalt_male=0;non_topmed_AC_male=84;non_topmed_AN_male=9378;non_topmed_AF_male=0.00895713;non_topmed_nhomalt_male=0;controls_AC_nfe_female=25;controls_AN_nfe_female=1444;controls_AF_nfe_female=0.017313;controls_nhomalt_nfe_female=0;non_neuro_AC_amr=2;non_neuro_AN_amr=390;non_neuro_AF_amr=0.00512821;non_neuro_nhomalt_amr=0;non_neuro_AC_eas_female=3;non_neuro_AN_eas_female=340;non_neuro_AF_eas_female=0.00882353;non_neuro_nhomalt_eas_female=0;AC_asj_male=0;AN_asj_male=154;AF_asj_male=0;nhomalt_asj_male=0;controls_AC_nfe_male=33;controls_AN_nfe_male=1688;controls_AF_nfe_male=0.0195498;controls_nhomalt_nfe_male=0;non_neuro_AC_fin=7;non_neuro_AN_fin=602;non_neuro_AF_fin=0.0116279;non_neuro_nhomalt_fin=0;AC_oth_female=1;AN_oth_female=344;AF_oth_female=0.00290698;nhomalt_oth_female=0;controls_AC_nfe=58;controls_AN_nfe=3132;controls_AF_nfe=0.0185185;controls_nhomalt_nfe=0;controls_AC_oth_female=0;controls_AN_oth_female=122;controls_AF_oth_female=0;controls_nhomalt_oth_female=0;controls_AC_asj=0;controls_AN_asj=28;controls_AF_asj=0;controls_nhomalt_asj=0;non_neuro_AC_amr_male=2;non_neuro_AN_amr_male=162;non_neuro_AF_amr_male=0.0123457;non_neuro_nhomalt_amr_male=0;controls_AC_nfe_nwe=5;controls_AN_nfe_nwe=578;controls_AF_nfe_nwe=0.00865052;controls_nhomalt_nfe_nwe=0;AC_nfe_nwe=51;AN_nfe_nwe=6324;AF_nfe_nwe=0.00806452;nhomalt_nfe_nwe=0;controls_AC_nfe_seu=0;controls_AN_nfe_seu=44;controls_AF_nfe_seu=0;controls_nhomalt_nfe_seu=0;non_neuro_AC_amr_female=0;non_neuro_AN_amr_female=228;non_neuro_AF_amr_female=0;non_neuro_nhomalt_amr_female=0;non_neuro_AC_nfe_onf=12;non_neuro_AN_nfe_onf=1254;non_neuro_AF_nfe_onf=0.00956938;non_neuro_nhomalt_nfe_onf=0;non_topmed_AC_eas_male=5;non_topmed_AN_eas_male=564;non_topmed_AF_eas_male=0.00886525;non_topmed_nhomalt_eas_male=0;controls_AC_amr_female=0;controls_AN_amr_female=80;controls_AF_amr_female=0;controls_nhomalt_amr_female=0;non_neuro_AC_fin_male=4;non_neuro_AN_fin_male=328;non_neuro_AF_fin_male=0.0121951;non_neuro_nhomalt_fin_male=0;AC_female=87;AN_female=9456;AF_female=0.00920051;nhomalt_female=0;non_neuro_AC_oth_male=3;non_neuro_AN_oth_male=250;non_neuro_AF_oth_male=0.012;non_neuro_nhomalt_oth_male=0;non_topmed_AC_nfe_est=52;non_topmed_AN_nfe_est=2578;non_topmed_AF_nfe_est=0.0201707;non_topmed_nhomalt_nfe_est=0;non_topmed_AC_nfe_nwe=26;non_topmed_AN_nfe_nwe=3644;non_topmed_AF_nfe_nwe=0.00713502;non_topmed_nhomalt_nfe_nwe=0;non_topmed_AC_amr_male=3;non_topmed_AN_amr_male=280;non_topmed_AF_amr_male=0.0107143;non_topmed_nhomalt_amr_male=0;non_topmed_AC_nfe_onf=7;non_topmed_AN_nfe_onf=1098;non_topmed_AF_nfe_onf=0.00637523;non_topmed_nhomalt_nfe_onf=0;controls_AC_eas_male=2;controls_AN_eas_male=320;controls_AF_eas_male=0.00625;controls_nhomalt_eas_male=0;controls_AC_oth_male=1;controls_AN_oth_male=126;controls_AF_oth_male=0.00793651;controls_nhomalt_oth_male=0;non_topmed_AC=164;non_topmed_AN=17866;non_topmed_AF=0.00917945;non_topmed_nhomalt=0;controls_AC_fin=7;controls_AN_fin=600;controls_AF_fin=0.0116667;controls_nhomalt_fin=0;non_neuro_AC_nfe=112;non_neuro_AN_nfe=9188;non_neuro_AF_nfe=0.0121898;non_neuro_nhomalt_nfe=0;non_neuro_AC_fin_female=3;non_neuro_AN_fin_female=274;non_neuro_AF_fin_female=0.0109489;non_neuro_nhomalt_fin_female=0;non_topmed_AC_nfe_seu=0;non_topmed_AN_nfe_seu=80;non_topmed_AF_nfe_seu=0;non_topmed_nhomalt_nfe_seu=0;controls_AC_eas_female=1;controls_AN_eas_female=212;controls_AF_eas_female=0.00471698;controls_nhomalt_eas_female=0;non_topmed_AC_asj=0;non_topmed_AN_asj=100;non_topmed_AF_asj=0;non_topmed_nhomalt_asj=0;controls_AC_nfe_onf=2;controls_AN_nfe_onf=274;controls_AF_nfe_onf=0.00729927;controls_nhomalt_nfe_onf=0;non_neuro_AC=145;non_neuro_AN=14356;non_neuro_AF=0.0101003;non_neuro_nhomalt=0;non_topmed_AC_nfe=85;non_topmed_AN_nfe=7400;non_topmed_AF_nfe=0.0114865;non_topmed_nhomalt_nfe=0;non_topmed_AC_raw=461;non_topmed_AN_raw=26444;non_topmed_AF_raw=0.0174331;non_topmed_nhomalt_raw=0;non_neuro_AC_nfe_est=51;non_neuro_AN_nfe_est=2268;non_neuro_AF_nfe_est=0.0224868;non_neuro_nhomalt_nfe_est=0;non_topmed_AC_oth_male=4;non_topmed_AN_oth_male=320;non_topmed_AF_oth_male=0.0125;non_topmed_nhomalt_oth_male=0;AC_nfe_est=52;AN_nfe_est=2596;AF_nfe_est=0.0200308;nhomalt_nfe_est=0;non_topmed_AC_afr_male=18;non_topmed_AN_afr_male=3710;non_topmed_AF_afr_male=0.00485175;non_topmed_nhomalt_afr_male=0;AC_eas_male=5;AN_eas_male=576;AF_eas_male=0.00868056;nhomalt_eas_male=0;controls_AC_eas=3;controls_AN_eas=532;controls_AF_eas=0.0056391;controls_nhomalt_eas=0;non_neuro_AC_eas_male=5;non_neuro_AN_eas_male=576;non_neuro_AF_eas_male=0.00868056;non_neuro_nhomalt_eas_male=0;non_neuro_AC_asj_male=0;non_neuro_AN_asj_male=138;non_neuro_AF_asj_male=0;non_neuro_nhomalt_asj_male=0;controls_AC_oth=1;controls_AN_oth=248;controls_AF_oth=0.00403226;controls_nhomalt_oth=0;AC_nfe=116;AN_nfe=10572;AF_nfe=0.0109724;nhomalt_nfe=0;non_topmed_AC_female=80;non_topmed_AN_female=8488;non_topmed_AF_female=0.00942507;non_topmed_nhomalt_female=0;non_neuro_AC_asj=0;non_neuro_AN_asj=180;non_neuro_AF_asj=0;non_neuro_nhomalt_asj=0;non_topmed_AC_eas_female=3;non_topmed_AN_eas_female=332;non_topmed_AF_eas_female=0.00903614;non_topmed_nhomalt_eas_female=0;non_neuro_AC_raw=396;non_neuro_AN_raw=21158;non_neuro_AF_raw=0.0187163;non_neuro_nhomalt_raw=0;non_topmed_AC_eas=8;non_topmed_AN_eas=896;non_topmed_AF_eas=0.00892857;non_topmed_nhomalt_eas=0;non_topmed_AC_fin_male=12;non_topmed_AN_fin_male=808;non_topmed_AF_fin_male=0.0148515;non_topmed_nhomalt_fin_male=0;AC_fin=23;AN_fin=1762;AF_fin=0.0130533;nhomalt_fin=0;AC_nfe_male=66;AN_nfe_male=5940;AF_nfe_male=0.0111111;nhomalt_nfe_male=0;controls_AC_amr_male=2;controls_AN_amr_male=70;controls_AF_amr_male=0.0285714;controls_nhomalt_amr_male=0;controls_AC_afr_female=9;controls_AN_afr_female=868;controls_AF_afr_female=0.0103687;controls_nhomalt_afr_female=0;controls_AC_amr=2;controls_AN_amr=150;controls_AF_amr=0.0133333;controls_nhomalt_amr=0;AC_asj_female=0;AN_asj_female=56;AF_asj_female=0;nhomalt_asj_female=0;non_neuro_AC_eas=8;non_neuro_AN_eas=916;non_neuro_AF_eas=0.00873362;non_neuro_nhomalt_eas=0;non_neuro_AC_male=79;non_neuro_AN_male=7826;non_neuro_AF_male=0.0100946;non_neuro_nhomalt_male=0;AC_asj=0;AN_asj=210;AF_asj=0;nhomalt_asj=0;controls_AC_nfe_est=51;controls_AN_nfe_est=2236;controls_AF_nfe_est=0.0228086;controls_nhomalt_nfe_est=0;non_topmed_AC_asj_female=0;non_topmed_AN_asj_female=44;non_topmed_AF_asj_female=0;non_topmed_nhomalt_asj_female=0;non_topmed_AC_oth=5;non_topmed_AN_oth=654;non_topmed_AF_oth=0.00764526;non_topmed_nhomalt_oth=0;non_topmed_AC_fin_female=11;non_topmed_AN_fin_female=954;non_topmed_AF_fin_female=0.0115304;non_topmed_nhomalt_fin_female=0;AC_oth=5;AN_oth=712;AF_oth=0.00702247;nhomalt_oth=0;non_neuro_AC_nfe_male=63;non_neuro_AN_nfe_male=5162;non_neuro_AF_nfe_male=0.0122046;non_neuro_nhomalt_nfe_male=0;controls_AC_female=38;controls_AN_female=3014;controls_AF_female=0.0126078;controls_nhomalt_female=0;non_topmed_AC_fin=23;non_topmed_AN_fin=1762;non_topmed_AF_fin=0.0130533;non_topmed_nhomalt_fin=0;non_topmed_AC_nfe_female=43;non_topmed_AN_nfe_female=3760;non_topmed_AF_nfe_female=0.0114362;non_topmed_nhomalt_nfe_female=0;controls_AC_asj_male=0;controls_AN_asj_male=12;controls_AF_asj_male=0;controls_nhomalt_asj_male=0;non_topmed_AC_asj_male=0;non_topmed_AN_asj_male=56;non_topmed_AF_asj_male=0;non_topmed_nhomalt_asj_male=0;non_neuro_AC_oth=4;non_neuro_AN_oth=478;non_neuro_AF_oth=0.0083682;non_neuro_nhomalt_oth=0;AC_male=108;AN_male=11934;AF_male=0.00904977;nhomalt_male=0;controls_AC_fin_female=3;controls_AN_fin_female=272;controls_AF_fin_female=0.0110294;controls_nhomalt_fin_female=0;controls_AC_asj_female=0;controls_AN_asj_female=16;controls_AF_asj_female=0;controls_nhomalt_asj_female=0;AC_amr_male=3;AN_amr_male=296;AF_amr_male=0.0101351;nhomalt_amr_male=0;AC_amr_female=0;AN_amr_female=294;AF_amr_female=0;nhomalt_amr_female=0;AC_oth_male=4;AN_oth_male=368;AF_oth_male=0.0108696;nhomalt_oth_male=0;non_neuro_AC_nfe_seu=0;non_neuro_AN_nfe_seu=42;non_neuro_AF_nfe_seu=0;non_neuro_nhomalt_nfe_seu=0;non_topmed_AC_afr_female=22;non_topmed_AN_afr_female=2786;non_topmed_AF_afr_female=0.00789663;non_topmed_nhomalt_afr_female=0;non_topmed_AC_afr=40;non_topmed_AN_afr=6496;non_topmed_AF_afr=0.00615764;non_topmed_nhomalt_afr=0;controls_AC=82;controls_AN=6600;controls_AF=0.0124242;controls_nhomalt=0;non_neuro_AC_oth_female=1;non_neuro_AN_oth_female=228;non_neuro_AF_oth_female=0.00438596;non_neuro_nhomalt_oth_female=0;non_topmed_faf95_amr=0.00146497;non_topmed_faf99_amr=0.00146446;faf95_afr=0.00455485;faf99_afr=0.0045552;controls_faf95_afr=0.0032293;controls_faf99_afr=0.00322926;faf95_amr=0.00138541;faf99_amr=0.00138591;faf95_eas=0.00434534;faf99_eas=0.00434526;faf95=0.00806925;faf99=0.00806959;non_neuro_faf95_afr=0.00266046;non_neuro_faf99_afr=0.00266027;non_neuro_faf95_amr=0.0009111;non_neuro_faf99_amr=0.00091051;controls_faf95_nfe=0.0147076;controls_faf99_nfe=0.014708;non_topmed_faf95=0.00803255;non_topmed_faf99=0.00803289;non_neuro_faf95_nfe=0.010359;non_neuro_faf99_nfe=0.0103585;non_neuro_faf95=0.00876127;non_neuro_faf99=0.00876123;non_topmed_faf95_nfe=0.00951637;non_topmed_faf99_nfe=0.00951633;controls_faf95_eas=0.00153645;controls_faf99_eas=0.00153604;faf95_nfe=0.00935211;faf99_nfe=0.00935142;non_topmed_faf95_eas=0.00444224;non_topmed_faf99_eas=0.00444266;controls_faf95_amr=0.00236834;controls_faf99_amr=0.00236814;non_neuro_faf95_eas=0.00434534;non_neuro_faf99_eas=0.00434526;non_topmed_faf95_afr=0.00464745;non_topmed_faf99_afr=0.0046478;controls_faf95=0.0102559;controls_faf99=0.0102563;controls_popmax=nfe;controls_AC_popmax=58;controls_AN_popmax=3132;controls_AF_popmax=0.0185185;controls_nhomalt_popmax=0;popmax=nfe;AC_popmax=116;AN_popmax=10572;AF_popmax=0.0109724;nhomalt_popmax=0;age_hist_het_bin_freq=14|10|14|12|23|18|16|7|5|4;age_hist_het_n_smaller=30;age_hist_het_n_larger=0;age_hist_hom_bin_freq=0|0|0|0|0|0|0|0|0|0;age_hist_hom_n_smaller=0;age_hist_hom_n_larger=0;non_neuro_popmax=nfe;non_neuro_AC_popmax=112;non_neuro_AN_popmax=9188;non_neuro_AF_popmax=0.0121898;non_neuro_nhomalt_popmax=0;non_topmed_popmax=nfe;non_topmed_AC_popmax=85;non_topmed_AN_popmax=7400;non_topmed_AF_popmax=0.0114865;non_topmed_nhomalt_popmax=0\");\n            writer.Flush();\n\n            stream.Position = 0;\n            return stream;\n        }\n\n        private static Stream GetChr22_22055876_exome()\n        {\n            var stream = new MemoryStream();\n            var writer = new StreamWriter(stream);\n\n            writer.WriteLine(\"##gnomAD\");\n            writer.WriteLine(\"#CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\");\n            writer.WriteLine(\"22\\t22055876\\trs113132860\\tG\\tT\\t2.28838e+06\\tPASS\\tAC=228;AN=18686;AF=0.0122016;rf_tp_probability=0.374872;FS=2.447;InbreedingCoeff=0.0575;MQ=59.76;MQRankSum=-0.152;QD=15.22;ReadPosRankSum=-0.259;SOR=0.701;BaseQRankSum=-0.771;ClippingRankSum=0;DP=490478;VQSLOD=0.36;VQSR_culprit=FS;lcr;variant_type=mixed;allele_type=snv;n_alt_alleles=5;was_mixed;has_star;pab_max=1;gq_hist_alt_bin_freq=79|95|60|65|65|46|37|41|28|38|24|19|20|18|14|11|15|8|4|103;gq_hist_all_bin_freq=2799|865|616|841|825|614|753|768|561|775|568|463|553|397|310|247|235|179|221|5310;dp_hist_alt_bin_freq=18|144|272|199|78|56|18|5|0|0|0|0|0|0|0|0|0|0|0|0;dp_hist_alt_n_larger=0;dp_hist_all_bin_freq=95|956|2823|4449|4634|3537|2105|978|396|166|82|35|24|11|5|4|2|1|0|1;dp_hist_all_n_larger=6;ab_hist_alt_bin_freq=0|18|83|129|138|140|103|45|48|13|28|14|13|9|0|3|0|0|0|0;AC_nfe_seu=2;AN_nfe_seu=52;AF_nfe_seu=0.0384615;nhomalt_nfe_seu=0;controls_AC_afr_male=5;controls_AN_afr_male=806;controls_AF_afr_male=0.00620347;controls_nhomalt_afr_male=0;non_topmed_AC_amr=1;non_topmed_AN_amr=426;non_topmed_AF_amr=0.00234742;non_topmed_nhomalt_amr=0;AC_raw=559;AN_raw=27704;AF_raw=0.0201776;nhomalt_raw=4;AC_fin_female=32;AN_fin_female=618;AF_fin_female=0.0517799;nhomalt_fin_female=0;non_neuro_AC_asj_female=0;non_neuro_AN_asj_female=48;non_neuro_AF_asj_female=0;non_neuro_nhomalt_asj_female=0;non_neuro_AC_afr_male=5;non_neuro_AN_afr_male=914;non_neuro_AF_afr_male=0.00547046;non_neuro_nhomalt_afr_male=0;AC_afr_male=15;AN_afr_male=2982;AF_afr_male=0.00503018;nhomalt_afr_male=0;AC_afr=25;AN_afr=5118;AF_afr=0.00488472;nhomalt_afr=0;non_neuro_AC_afr_female=3;non_neuro_AN_afr_female=1086;non_neuro_AF_afr_female=0.00276243;non_neuro_nhomalt_afr_female=0;non_topmed_AC_amr_female=1;non_topmed_AN_amr_female=200;non_topmed_AF_amr_female=0.005;non_topmed_nhomalt_amr_female=0;non_topmed_AC_oth_female=7;non_topmed_AN_oth_female=260;non_topmed_AF_oth_female=0.0269231;non_topmed_nhomalt_oth_female=0;AC_eas_female=2;AN_eas_female=486;AF_eas_female=0.00411523;nhomalt_eas_female=0;AC_afr_female=10;AN_afr_female=2136;AF_afr_female=0.00468165;nhomalt_afr_female=0;non_neuro_AC_female=70;non_neuro_AN_female=5902;non_neuro_AF_female=0.0118604;non_neuro_nhomalt_female=0;controls_AC_afr=8;controls_AN_afr=1498;controls_AF_afr=0.00534045;controls_nhomalt_afr=0;AC_nfe_onf=11;AN_nfe_onf=1346;AF_nfe_onf=0.00817236;nhomalt_nfe_onf=0;controls_AC_fin_male=10;controls_AN_fin_male=180;controls_AF_fin_male=0.0555556;controls_nhomalt_fin_male=0;non_neuro_AC_nfe_nwe=39;non_neuro_AN_nfe_nwe=5416;non_neuro_AF_nfe_nwe=0.00720089;non_neuro_nhomalt_nfe_nwe=0;AC_fin_male=26;AN_fin_male=508;AF_fin_male=0.0511811;nhomalt_fin_male=0;AC_nfe_female=54;AN_nfe_female=4252;AF_nfe_female=0.0126999;nhomalt_nfe_female=0;AC_amr=1;AN_amr=450;AF_amr=0.00222222;nhomalt_amr=0;non_topmed_AC_nfe_male=52;non_topmed_AN_nfe_male=3174;non_topmed_AF_nfe_male=0.0163831;non_topmed_nhomalt_nfe_male=0;AC_eas=4;AN_eas=1398;AF_eas=0.00286123;nhomalt_eas=0;nhomalt=0;non_neuro_AC_nfe_female=52;non_neuro_AN_nfe_female=3740;non_neuro_AF_nfe_female=0.0139037;non_neuro_nhomalt_nfe_female=0;non_neuro_AC_afr=8;non_neuro_AN_afr=2000;non_neuro_AF_afr=0.004;non_neuro_nhomalt_afr=0;controls_AC_raw=303;controls_AN_raw=9668;controls_AF_raw=0.0313405;controls_nhomalt_raw=0;controls_AC_male=65;controls_AN_male=3126;controls_AF_male=0.0207933;controls_nhomalt_male=0;non_topmed_AC_male=107;non_topmed_AN_male=8010;non_topmed_AF_male=0.0133583;non_topmed_nhomalt_male=0;controls_AC_nfe_female=40;controls_AN_nfe_female=1234;controls_AF_nfe_female=0.0324149;controls_nhomalt_nfe_female=0;non_neuro_AC_amr=0;non_neuro_AN_amr=320;non_neuro_AF_amr=0;non_neuro_nhomalt_amr=0;non_neuro_AC_eas_female=2;non_neuro_AN_eas_female=486;non_neuro_AF_eas_female=0.00411523;non_neuro_nhomalt_eas_female=0;AC_asj_male=3;AN_asj_male=156;AF_asj_male=0.0192308;nhomalt_asj_male=0;controls_AC_nfe_male=41;controls_AN_nfe_male=1460;controls_AF_nfe_male=0.0280822;controls_nhomalt_nfe_male=0;non_neuro_AC_fin=21;non_neuro_AN_fin=350;non_neuro_AF_fin=0.06;non_neuro_nhomalt_fin=0;AC_oth_female=7;AN_oth_female=274;AF_oth_female=0.0255474;nhomalt_oth_female=0;controls_AC_nfe=81;controls_AN_nfe=2694;controls_AF_nfe=0.0300668;controls_nhomalt_nfe=0;controls_AC_oth_female=2;controls_AN_oth_female=102;controls_AF_oth_female=0.0196078;controls_nhomalt_oth_female=0;controls_AC_asj=1;controls_AN_asj=18;controls_AF_asj=0.0555556;controls_nhomalt_asj=0;non_neuro_AC_amr_male=0;non_neuro_AN_amr_male=146;non_neuro_AF_amr_male=0;non_neuro_nhomalt_amr_male=0;controls_AC_nfe_nwe=13;controls_AN_nfe_nwe=342;controls_AF_nfe_nwe=0.0380117;controls_nhomalt_nfe_nwe=0;AC_nfe_nwe=47;AN_nfe_nwe=5898;AF_nfe_nwe=0.0079688;nhomalt_nfe_nwe=0;controls_AC_nfe_seu=2;controls_AN_nfe_seu=28;controls_AF_nfe_seu=0.0714286;controls_nhomalt_nfe_seu=0;non_neuro_AC_amr_female=0;non_neuro_AN_amr_female=174;non_neuro_AF_amr_female=0;non_neuro_nhomalt_amr_female=0;non_neuro_AC_nfe_onf=8;non_neuro_AN_nfe_onf=1138;non_neuro_AF_nfe_onf=0.00702988;non_neuro_nhomalt_nfe_onf=0;non_topmed_AC_eas_male=2;non_topmed_AN_eas_male=894;non_topmed_AF_eas_male=0.00223714;non_topmed_nhomalt_eas_male=0;controls_AC_amr_female=0;controls_AN_amr_female=48;controls_AF_amr_female=0;controls_nhomalt_amr_female=0;non_neuro_AC_fin_male=10;non_neuro_AN_fin_male=180;non_neuro_AF_fin_male=0.0555556;non_neuro_nhomalt_fin_male=0;AC_female=106;AN_female=8036;AF_female=0.0131906;nhomalt_female=0;non_neuro_AC_oth_male=6;non_neuro_AN_oth_male=222;non_neuro_AF_oth_male=0.027027;non_neuro_nhomalt_oth_male=0;non_topmed_AC_nfe_est=61;non_topmed_AN_nfe_est=2506;non_topmed_AF_nfe_est=0.0243416;non_topmed_nhomalt_nfe_est=0;non_topmed_AC_nfe_nwe=28;non_topmed_AN_nfe_nwe=3100;non_topmed_AF_nfe_nwe=0.00903226;non_topmed_nhomalt_nfe_nwe=0;non_topmed_AC_amr_male=0;non_topmed_AN_amr_male=226;non_topmed_AF_amr_male=0;non_topmed_nhomalt_amr_male=0;non_topmed_AC_nfe_onf=9;non_topmed_AN_nfe_onf=898;non_topmed_AF_nfe_onf=0.0100223;non_topmed_nhomalt_nfe_onf=0;controls_AC_eas_male=2;controls_AN_eas_male=514;controls_AF_eas_male=0.00389105;controls_nhomalt_eas_male=0;controls_AC_oth_male=6;controls_AN_oth_male=94;controls_AF_oth_male=0.0638298;controls_nhomalt_oth_male=0;non_topmed_AC=206;non_topmed_AN=15064;non_topmed_AF=0.013675;non_topmed_nhomalt=0;controls_AC_fin=21;controls_AN_fin=348;controls_AF_fin=0.0603448;controls_nhomalt_fin=0;non_neuro_AC_nfe=110;non_neuro_AN_nfe=8768;non_neuro_AF_nfe=0.0125456;non_neuro_nhomalt_nfe=0;non_neuro_AC_fin_female=11;non_neuro_AN_fin_female=170;non_neuro_AF_fin_female=0.0647059;non_neuro_nhomalt_fin_female=0;non_topmed_AC_nfe_seu=2;non_topmed_AN_nfe_seu=52;non_topmed_AF_nfe_seu=0.0384615;non_topmed_nhomalt_nfe_seu=0;controls_AC_eas_female=2;controls_AN_eas_female=314;controls_AF_eas_female=0.00636943;controls_nhomalt_eas_female=0;non_topmed_AC_asj=3;non_topmed_AN_asj=78;non_topmed_AF_asj=0.0384615;non_topmed_nhomalt_asj=0;controls_AC_nfe_onf=5;controls_AN_nfe_onf=164;controls_AF_nfe_onf=0.0304878;controls_nhomalt_nfe_onf=0;non_neuro_AC=152;non_neuro_AN=13452;non_neuro_AF=0.0112994;non_neuro_nhomalt=0;non_topmed_AC_nfe=100;non_topmed_AN_nfe=6556;non_topmed_AF_nfe=0.0152532;non_topmed_nhomalt_nfe=0;non_topmed_AC_raw=512;non_topmed_AN_raw=23304;non_topmed_AF_raw=0.0219705;non_topmed_nhomalt_raw=4;non_neuro_AC_nfe_est=61;non_neuro_AN_nfe_est=2188;non_neuro_AF_nfe_est=0.0278793;non_neuro_nhomalt_nfe_est=0;non_topmed_AC_oth_male=9;non_topmed_AN_oth_male=248;non_topmed_AF_oth_male=0.0362903;non_topmed_nhomalt_oth_male=0;AC_nfe_est=61;AN_nfe_est=2522;AF_nfe_est=0.0241872;nhomalt_nfe_est=0;non_topmed_AC_afr_male=15;non_topmed_AN_afr_male=2918;non_topmed_AF_afr_male=0.00514051;non_topmed_nhomalt_afr_male=0;AC_eas_male=2;AN_eas_male=912;AF_eas_male=0.00219298;nhomalt_eas_male=0;controls_AC_eas=4;controls_AN_eas=828;controls_AF_eas=0.00483092;controls_nhomalt_eas=0;non_neuro_AC_eas_male=2;non_neuro_AN_eas_male=912;non_neuro_AF_eas_male=0.00219298;non_neuro_nhomalt_eas_male=0;non_neuro_AC_asj_male=1;non_neuro_AN_asj_male=148;non_neuro_AF_asj_male=0.00675676;non_neuro_nhomalt_asj_male=0;controls_AC_oth=8;controls_AN_oth=196;controls_AF_oth=0.0408163;controls_nhomalt_oth=0;AC_nfe=121;AN_nfe=9818;AF_nfe=0.0123243;nhomalt_nfe=0;non_topmed_AC_female=99;non_topmed_AN_female=7054;non_topmed_AF_female=0.0140346;non_topmed_nhomalt_female=0;non_neuro_AC_asj=1;non_neuro_AN_asj=196;non_neuro_AF_asj=0.00510204;non_neuro_nhomalt_asj=0;non_topmed_AC_eas_female=2;non_topmed_AN_eas_female=470;non_topmed_AF_eas_female=0.00425532;non_topmed_nhomalt_eas_female=0;non_neuro_AC_raw=362;non_neuro_AN_raw=19100;non_neuro_AF_raw=0.0189529;non_neuro_nhomalt_raw=0;non_topmed_AC_eas=4;non_topmed_AN_eas=1364;non_topmed_AF_eas=0.00293255;non_topmed_nhomalt_eas=0;non_topmed_AC_fin_male=26;non_topmed_AN_fin_male=508;non_topmed_AF_fin_male=0.0511811;non_topmed_nhomalt_fin_male=0;AC_fin=58;AN_fin=1126;AF_fin=0.0515098;nhomalt_fin=0;AC_nfe_male=67;AN_nfe_male=5566;AF_nfe_male=0.0120374;nhomalt_nfe_male=0;controls_AC_amr_male=0;controls_AN_amr_male=68;controls_AF_amr_male=0;controls_nhomalt_amr_male=0;controls_AC_afr_female=3;controls_AN_afr_female=692;controls_AF_afr_female=0.00433526;controls_nhomalt_afr_female=0;controls_AC_amr=0;controls_AN_amr=116;controls_AF_amr=0;controls_nhomalt_amr=0;AC_asj_female=0;AN_asj_female=54;AF_asj_female=0;nhomalt_asj_female=0;non_neuro_AC_eas=4;non_neuro_AN_eas=1398;non_neuro_AF_eas=0.00286123;non_neuro_nhomalt_eas=0;non_neuro_AC_male=82;non_neuro_AN_male=7550;non_neuro_AF_male=0.0108609;non_neuro_nhomalt_male=0;AC_asj=3;AN_asj=210;AF_asj=0.0142857;nhomalt_asj=0;controls_AC_nfe_est=61;controls_AN_nfe_est=2160;controls_AF_nfe_est=0.0282407;controls_nhomalt_nfe_est=0;non_topmed_AC_asj_female=0;non_topmed_AN_asj_female=36;non_topmed_AF_asj_female=0;non_topmed_nhomalt_asj_female=0;non_topmed_AC_oth=16;non_topmed_AN_oth=508;non_topmed_AF_oth=0.0314961;non_topmed_nhomalt_oth=0;non_topmed_AC_fin_female=32;non_topmed_AN_fin_female=618;non_topmed_AF_fin_female=0.0517799;non_topmed_nhomalt_fin_female=0;AC_oth=16;AN_oth=566;AF_oth=0.0282686;nhomalt_oth=0;non_neuro_AC_nfe_male=58;non_neuro_AN_nfe_male=5028;non_neuro_AF_nfe_male=0.0115354;non_neuro_nhomalt_nfe_male=0;controls_AC_female=58;controls_AN_female=2572;controls_AF_female=0.0225505;controls_nhomalt_female=0;non_topmed_AC_fin=58;non_topmed_AN_fin=1126;non_topmed_AF_fin=0.0515098;non_topmed_nhomalt_fin=0;non_topmed_AC_nfe_female=48;non_topmed_AN_nfe_female=3382;non_topmed_AF_nfe_female=0.0141928;non_topmed_nhomalt_nfe_female=0;controls_AC_asj_male=1;controls_AN_asj_male=4;controls_AF_asj_male=0.25;controls_nhomalt_asj_male=0;non_topmed_AC_asj_male=3;non_topmed_AN_asj_male=42;non_topmed_AF_asj_male=0.0714286;non_topmed_nhomalt_asj_male=0;non_neuro_AC_oth=8;non_neuro_AN_oth=420;non_neuro_AF_oth=0.0190476;non_neuro_nhomalt_oth=0;AC_male=122;AN_male=10650;AF_male=0.0114554;nhomalt_male=0;controls_AC_fin_female=11;controls_AN_fin_female=168;controls_AF_fin_female=0.0654762;controls_nhomalt_fin_female=0;controls_AC_asj_female=0;controls_AN_asj_female=14;controls_AF_asj_female=0;controls_nhomalt_asj_female=0;AC_amr_male=0;AN_amr_male=234;AF_amr_male=0;nhomalt_amr_male=0;AC_amr_female=1;AN_amr_female=216;AF_amr_female=0.00462963;nhomalt_amr_female=0;AC_oth_male=9;AN_oth_male=292;AF_oth_male=0.0308219;nhomalt_oth_male=0;non_neuro_AC_nfe_seu=2;non_neuro_AN_nfe_seu=26;non_neuro_AF_nfe_seu=0.0769231;non_neuro_nhomalt_nfe_seu=0;non_topmed_AC_afr_female=9;non_topmed_AN_afr_female=2088;non_topmed_AF_afr_female=0.00431034;non_topmed_nhomalt_afr_female=0;non_topmed_AC_afr=24;non_topmed_AN_afr=5006;non_topmed_AF_afr=0.00479425;non_topmed_nhomalt_afr=0;controls_AC=123;controls_AN=5698;controls_AF=0.0215865;controls_nhomalt=0;non_neuro_AC_oth_female=2;non_neuro_AN_oth_female=198;non_neuro_AF_oth_female=0.010101;non_neuro_nhomalt_oth_female=0;non_topmed_faf95_amr=0.00012;non_topmed_faf99_amr=0.00012;faf95_afr=0.00339534;faf99_afr=0.00339557;controls_faf95_afr=0.00265678;controls_faf99_afr=0.00265672;faf95_amr=0.000113;faf99_amr=0.000113;faf95_eas=0.00097636;faf99_eas=0.00097723;faf95=0.0109033;faf99=0.0109026;non_neuro_faf95_afr=0.00199003;non_neuro_faf99_afr=0.00198958;non_neuro_faf95_amr=0;non_neuro_faf99_amr=0;controls_faf95_nfe=0.0247904;controls_faf99_nfe=0.0247903;non_topmed_faf95=0.0121462;non_topmed_faf99=0.0121458;non_neuro_faf95_nfe=0.0106444;non_neuro_faf99_nfe=0.0106446;non_neuro_faf95=0.00983491;non_neuro_faf99=0.00983455;non_topmed_faf95_nfe=0.0128339;non_topmed_faf99_nfe=0.0128338;controls_faf95_eas=0.00164922;controls_faf99_eas=0.00164971;faf95_nfe=0.0105404;faf99_nfe=0.0105404;non_topmed_faf95_eas=0.00100131;non_topmed_faf99_eas=0.00100135;controls_faf95_amr=0;controls_faf99_amr=0;non_neuro_faf95_eas=0.00097636;non_neuro_faf99_eas=0.00097723;non_topmed_faf95_afr=0.00330485;non_topmed_faf99_afr=0.0033051;controls_faf95=0.018487;controls_faf99=0.0184878;controls_popmax=nfe;controls_AC_popmax=81;controls_AN_popmax=2694;controls_AF_popmax=0.0300668;controls_nhomalt_popmax=0;popmax=nfe;AC_popmax=121;AN_popmax=9818;AF_popmax=0.0123243;nhomalt_popmax=0;age_hist_het_bin_freq=12|8|16|15|13|24|18|13|9|2;age_hist_het_n_smaller=33;age_hist_het_n_larger=0;age_hist_hom_bin_freq=0|0|0|0|0|0|0|0|0|0;age_hist_hom_n_smaller=0;age_hist_hom_n_larger=0;non_neuro_popmax=nfe;non_neuro_AC_popmax=110;non_neuro_AN_popmax=8768;non_neuro_AF_popmax=0.0125456;non_neuro_nhomalt_popmax=0;non_topmed_popmax=nfe;non_topmed_AC_popmax=100;non_topmed_AN_popmax=6556;non_topmed_AF_popmax=0.0152532;non_topmed_nhomalt_popmax=0\");\n            \n            writer.Flush();\n\n            stream.Position = 0;\n            return stream;\n        }\n\n        [Fact]\n        public void RemoveConflictingItems()\n        {\n            var sequence = new SimpleSequence(new string('A', VariantUtils.MaxUpstreamLength) + \"G\", 22055875 - VariantUtils.MaxUpstreamLength);\n            var sequenceProvider = new SimpleSequenceProvider(GenomeAssembly.GRCh38, sequence, ChromosomeUtilities.RefNameToChromosome);\n\n            var gnomadReader = new GnomadSnvReader(new StreamReader(GetChr22_22055876_genome()), new StreamReader(GetChr22_22055876_exome()), sequenceProvider);\n\n            var items = gnomadReader.GetCombinedItems().ToList();\n\n            Assert.Single(items);\n            //the genome items result in a conflict. Only the exome item should be reported back\n            Assert.Equal(18686, items[0].AllAlleleNumber);\n        }\n\n        private static Stream GetChr22_16689800_16689902_genome()\n        {\n            var stream = new MemoryStream();\n            var writer = new StreamWriter(stream);\n\n            writer.WriteLine(\"##gnomAD\");\n            writer.WriteLine(\"#CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\");\n            writer.WriteLine(\"22\\t16689820\\trs1302088526\\tC\\tG\\t30.01\\tAC0;RF\\tAC=0;AN=0;rf_tp_probability=0.0655868;FS=0;InbreedingCoeff=-0.1286;MQ=27;QD=10;SOR=2.833;DP=19893;VQSLOD=-82.37;VQSR_culprit=MQ;segdup;rf_negative_label;rf_label=FP;rf_train;variant_type=snv;allele_type=snv;n_alt_alleles=1;gq_hist_alt_bin_freq=0|1|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0;gq_hist_all_bin_freq=4957|4490|585|258|29|1|2|0|0|0|0|0|0|0|0|0|0|0|0|0;dp_hist_alt_bin_freq=1|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0;dp_hist_alt_n_larger=0;dp_hist_all_bin_freq=19964|344|2|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0;dp_hist_all_n_larger=0;ab_hist_alt_bin_freq=0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0;AC_nfe_seu=0;AN_nfe_seu=0;nhomalt_nfe_seu=0;controls_AC_afr_male=0;controls_AN_afr_male=0;controls_nhomalt_afr_male=0;non_topmed_AC_amr=0;non_topmed_AN_amr=0;non_topmed_nhomalt_amr=0;AC_raw=2;AN_raw=17984;AF_raw=0.00011121;nhomalt_raw=1;AC_fin_female=0;AN_fin_female=0;nhomalt_fin_female=0;non_neuro_AC_asj_female=0;non_neuro_AN_asj_female=0;non_neuro_nhomalt_asj_female=0;non_neuro_AC_afr_male=0;non_neuro_AN_afr_male=0;non_neuro_nhomalt_afr_male=0;AC_afr_male=0;AN_afr_male=0;nhomalt_afr_male=0;AC_afr=0;AN_afr=0;nhomalt_afr=0;non_neuro_AC_afr_female=0;non_neuro_AN_afr_female=0;non_neuro_nhomalt_afr_female=0;non_topmed_AC_amr_female=0;non_topmed_AN_amr_female=0;non_topmed_nhomalt_amr_female=0;non_topmed_AC_oth_female=0;non_topmed_AN_oth_female=0;non_topmed_nhomalt_oth_female=0;AC_eas_female=0;AN_eas_female=0;nhomalt_eas_female=0;AC_afr_female=0;AN_afr_female=0;nhomalt_afr_female=0;non_neuro_AC_female=0;non_neuro_AN_female=0;non_neuro_nhomalt_female=0;controls_AC_afr=0;controls_AN_afr=0;controls_nhomalt_afr=0;AC_nfe_onf=0;AN_nfe_onf=0;nhomalt_nfe_onf=0;controls_AC_fin_male=0;controls_AN_fin_male=0;controls_nhomalt_fin_male=0;non_neuro_AC_nfe_nwe=0;non_neuro_AN_nfe_nwe=0;non_neuro_nhomalt_nfe_nwe=0;AC_fin_male=0;AN_fin_male=0;nhomalt_fin_male=0;AC_nfe_female=0;AN_nfe_female=0;nhomalt_nfe_female=0;AC_amr=0;AN_amr=0;nhomalt_amr=0;non_topmed_AC_nfe_male=0;non_topmed_AN_nfe_male=0;non_topmed_nhomalt_nfe_male=0;AC_eas=0;AN_eas=0;nhomalt_eas=0;nhomalt=0;non_neuro_AC_nfe_female=0;non_neuro_AN_nfe_female=0;non_neuro_nhomalt_nfe_female=0;non_neuro_AC_afr=0;non_neuro_AN_afr=0;non_neuro_nhomalt_afr=0;controls_AC_raw=0;controls_AN_raw=5542;controls_AF_raw=0;controls_nhomalt_raw=0;controls_AC_male=0;controls_AN_male=0;controls_nhomalt_male=0;non_topmed_AC_male=0;non_topmed_AN_male=0;non_topmed_nhomalt_male=0;controls_AC_nfe_female=0;controls_AN_nfe_female=0;controls_nhomalt_nfe_female=0;non_neuro_AC_amr=0;non_neuro_AN_amr=0;non_neuro_nhomalt_amr=0;non_neuro_AC_eas_female=0;non_neuro_AN_eas_female=0;non_neuro_nhomalt_eas_female=0;AC_asj_male=0;AN_asj_male=0;nhomalt_asj_male=0;controls_AC_nfe_male=0;controls_AN_nfe_male=0;controls_nhomalt_nfe_male=0;non_neuro_AC_fin=0;non_neuro_AN_fin=0;non_neuro_nhomalt_fin=0;AC_oth_female=0;AN_oth_female=0;nhomalt_oth_female=0;controls_AC_nfe=0;controls_AN_nfe=0;controls_nhomalt_nfe=0;controls_AC_oth_female=0;controls_AN_oth_female=0;controls_nhomalt_oth_female=0;controls_AC_asj=0;controls_AN_asj=0;controls_nhomalt_asj=0;non_neuro_AC_amr_male=0;non_neuro_AN_amr_male=0;non_neuro_nhomalt_amr_male=0;controls_AC_nfe_nwe=0;controls_AN_nfe_nwe=0;controls_nhomalt_nfe_nwe=0;AC_nfe_nwe=0;AN_nfe_nwe=0;nhomalt_nfe_nwe=0;controls_AC_nfe_seu=0;controls_AN_nfe_seu=0;controls_nhomalt_nfe_seu=0;non_neuro_AC_amr_female=0;non_neuro_AN_amr_female=0;non_neuro_nhomalt_amr_female=0;non_neuro_AC_nfe_onf=0;non_neuro_AN_nfe_onf=0;non_neuro_nhomalt_nfe_onf=0;non_topmed_AC_eas_male=0;non_topmed_AN_eas_male=0;non_topmed_nhomalt_eas_male=0;controls_AC_amr_female=0;controls_AN_amr_female=0;controls_nhomalt_amr_female=0;non_neuro_AC_fin_male=0;non_neuro_AN_fin_male=0;non_neuro_nhomalt_fin_male=0;AC_female=0;AN_female=0;nhomalt_female=0;non_neuro_AC_oth_male=0;non_neuro_AN_oth_male=0;non_neuro_nhomalt_oth_male=0;non_topmed_AC_nfe_est=0;non_topmed_AN_nfe_est=0;non_topmed_nhomalt_nfe_est=0;non_topmed_AC_nfe_nwe=0;non_topmed_AN_nfe_nwe=0;non_topmed_nhomalt_nfe_nwe=0;non_topmed_AC_amr_male=0;non_topmed_AN_amr_male=0;non_topmed_nhomalt_amr_male=0;non_topmed_AC_nfe_onf=0;non_topmed_AN_nfe_onf=0;non_topmed_nhomalt_nfe_onf=0;controls_AC_eas_male=0;controls_AN_eas_male=0;controls_nhomalt_eas_male=0;controls_AC_oth_male=0;controls_AN_oth_male=0;controls_nhomalt_oth_male=0;non_topmed_AC=0;non_topmed_AN=0;non_topmed_nhomalt=0;controls_AC_fin=0;controls_AN_fin=0;controls_nhomalt_fin=0;non_neuro_AC_nfe=0;non_neuro_AN_nfe=0;non_neuro_nhomalt_nfe=0;non_neuro_AC_fin_female=0;non_neuro_AN_fin_female=0;non_neuro_nhomalt_fin_female=0;non_topmed_AC_nfe_seu=0;non_topmed_AN_nfe_seu=0;non_topmed_nhomalt_nfe_seu=0;controls_AC_eas_female=0;controls_AN_eas_female=0;controls_nhomalt_eas_female=0;non_topmed_AC_asj=0;non_topmed_AN_asj=0;non_topmed_nhomalt_asj=0;controls_AC_nfe_onf=0;controls_AN_nfe_onf=0;controls_nhomalt_nfe_onf=0;non_neuro_AC=0;non_neuro_AN=0;non_neuro_nhomalt=0;non_topmed_AC_nfe=0;non_topmed_AN_nfe=0;non_topmed_nhomalt_nfe=0;non_topmed_AC_raw=2;non_topmed_AN_raw=14534;non_topmed_AF_raw=0.000137608;non_topmed_nhomalt_raw=1;non_neuro_AC_nfe_est=0;non_neuro_AN_nfe_est=0;non_neuro_nhomalt_nfe_est=0;non_topmed_AC_oth_male=0;non_topmed_AN_oth_male=0;non_topmed_nhomalt_oth_male=0;AC_nfe_est=0;AN_nfe_est=0;nhomalt_nfe_est=0;non_topmed_AC_afr_male=0;non_topmed_AN_afr_male=0;non_topmed_nhomalt_afr_male=0;AC_eas_male=0;AN_eas_male=0;nhomalt_eas_male=0;controls_AC_eas=0;controls_AN_eas=0;controls_nhomalt_eas=0;non_neuro_AC_eas_male=0;non_neuro_AN_eas_male=0;non_neuro_nhomalt_eas_male=0;non_neuro_AC_asj_male=0;non_neuro_AN_asj_male=0;non_neuro_nhomalt_asj_male=0;controls_AC_oth=0;controls_AN_oth=0;controls_nhomalt_oth=0;AC_nfe=0;AN_nfe=0;nhomalt_nfe=0;non_topmed_AC_female=0;non_topmed_AN_female=0;non_topmed_nhomalt_female=0;non_neuro_AC_asj=0;non_neuro_AN_asj=0;non_neuro_nhomalt_asj=0;non_topmed_AC_eas_female=0;non_topmed_AN_eas_female=0;non_topmed_nhomalt_eas_female=0;non_neuro_AC_raw=0;non_neuro_AN_raw=13036;non_neuro_AF_raw=0;non_neuro_nhomalt_raw=0;non_topmed_AC_eas=0;non_topmed_AN_eas=0;non_topmed_nhomalt_eas=0;non_topmed_AC_fin_male=0;non_topmed_AN_fin_male=0;non_topmed_nhomalt_fin_male=0;AC_fin=0;AN_fin=0;nhomalt_fin=0;AC_nfe_male=0;AN_nfe_male=0;nhomalt_nfe_male=0;controls_AC_amr_male=0;controls_AN_amr_male=0;controls_nhomalt_amr_male=0;controls_AC_afr_female=0;controls_AN_afr_female=0;controls_nhomalt_afr_female=0;controls_AC_amr=0;controls_AN_amr=0;controls_nhomalt_amr=0;AC_asj_female=0;AN_asj_female=0;nhomalt_asj_female=0;non_neuro_AC_eas=0;non_neuro_AN_eas=0;non_neuro_nhomalt_eas=0;non_neuro_AC_male=0;non_neuro_AN_male=0;non_neuro_nhomalt_male=0;AC_asj=0;AN_asj=0;nhomalt_asj=0;controls_AC_nfe_est=0;controls_AN_nfe_est=0;controls_nhomalt_nfe_est=0;non_topmed_AC_asj_female=0;non_topmed_AN_asj_female=0;non_topmed_nhomalt_asj_female=0;non_topmed_AC_oth=0;non_topmed_AN_oth=0;non_topmed_nhomalt_oth=0;non_topmed_AC_fin_female=0;non_topmed_AN_fin_female=0;non_topmed_nhomalt_fin_female=0;AC_oth=0;AN_oth=0;nhomalt_oth=0;non_neuro_AC_nfe_male=0;non_neuro_AN_nfe_male=0;non_neuro_nhomalt_nfe_male=0;controls_AC_female=0;controls_AN_female=0;controls_nhomalt_female=0;non_topmed_AC_fin=0;non_topmed_AN_fin=0;non_topmed_nhomalt_fin=0;non_topmed_AC_nfe_female=0;non_topmed_AN_nfe_female=0;non_topmed_nhomalt_nfe_female=0;controls_AC_asj_male=0;controls_AN_asj_male=0;controls_nhomalt_asj_male=0;non_topmed_AC_asj_male=0;non_topmed_AN_asj_male=0;non_topmed_nhomalt_asj_male=0;non_neuro_AC_oth=0;non_neuro_AN_oth=0;non_neuro_nhomalt_oth=0;AC_male=0;AN_male=0;nhomalt_male=0;controls_AC_fin_female=0;controls_AN_fin_female=0;controls_nhomalt_fin_female=0;controls_AC_asj_female=0;controls_AN_asj_female=0;controls_nhomalt_asj_female=0;AC_amr_male=0;AN_amr_male=0;nhomalt_amr_male=0;AC_amr_female=0;AN_amr_female=0;nhomalt_amr_female=0;AC_oth_male=0;AN_oth_male=0;nhomalt_oth_male=0;non_neuro_AC_nfe_seu=0;non_neuro_AN_nfe_seu=0;non_neuro_nhomalt_nfe_seu=0;non_topmed_AC_afr_female=0;non_topmed_AN_afr_female=0;non_topmed_nhomalt_afr_female=0;non_topmed_AC_afr=0;non_topmed_AN_afr=0;non_topmed_nhomalt_afr=0;controls_AC=0;controls_AN=0;controls_nhomalt=0;non_neuro_AC_oth_female=0;non_neuro_AN_oth_female=0;non_neuro_nhomalt_oth_female=0;non_topmed_faf95_amr=0;non_topmed_faf99_amr=0;faf95_afr=0;faf99_afr=0;controls_faf95_afr=0;controls_faf99_afr=0;faf95_amr=0;faf99_amr=0;faf95_eas=0;faf99_eas=0;faf95=0;faf99=0;non_neuro_faf95_afr=0;non_neuro_faf99_afr=0;non_neuro_faf95_amr=0;non_neuro_faf99_amr=0;controls_faf95_nfe=0;controls_faf99_nfe=0;non_topmed_faf95=0;non_topmed_faf99=0;non_neuro_faf95_nfe=0;non_neuro_faf99_nfe=0;non_neuro_faf95=0;non_neuro_faf99=0;non_topmed_faf95_nfe=0;non_topmed_faf99_nfe=0;controls_faf95_eas=0;controls_faf99_eas=0;faf95_nfe=0;faf99_nfe=0;non_topmed_faf95_eas=0;non_topmed_faf99_eas=0;controls_faf95_amr=0;controls_faf99_amr=0;non_neuro_faf95_eas=0;non_neuro_faf99_eas=0;non_topmed_faf95_afr=0;non_topmed_faf99_afr=0;controls_faf95=0;controls_faf99=0;age_hist_het_bin_freq=0|0|0|0|0|0|0|0|0|0;age_hist_het_n_smaller=0;age_hist_het_n_larger=0;age_hist_hom_bin_freq=0|0|0|0|0|0|0|0|0|0;age_hist_hom_n_smaller=0;age_hist_hom_n_larger=0\");\n            writer.WriteLine(\"22\\t16689902\\trs7289731\\tG\\tA\\t3096.01\\tAC0;RF\\tAC=0;AN=0;rf_tp_probability=0.0665037;FS=0;InbreedingCoeff=-0.0964;MQ=27;MQRankSum=0.736;QD=15.8;ReadPosRankSum=0.736;SOR=3.525;BaseQRankSum=0.736;ClippingRankSum=0.736;DP=7972;VQSLOD=-105;VQSR_culprit=MQ;segdup;rf_negative_label;rf_label=FP;rf_train;variant_type=snv;allele_type=snv;n_alt_alleles=1;pab_max=1;gq_hist_alt_bin_freq=7|59|2|2|14|0|0|0|0|3|1|0|0|0|0|0|0|0|0|0;gq_hist_all_bin_freq=4019|1333|47|9|15|0|0|0|0|3|1|0|0|0|0|0|0|0|0|0;dp_hist_alt_bin_freq=86|2|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0;dp_hist_alt_n_larger=0;dp_hist_all_bin_freq=20297|13|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0;dp_hist_all_n_larger=0;ab_hist_alt_bin_freq=0|0|0|0|0|1|0|0|1|0|5|0|0|13|0|1|0|0|0|0;AC_nfe_seu=0;AN_nfe_seu=0;nhomalt_nfe_seu=0;controls_AC_afr_male=0;controls_AN_afr_male=0;controls_nhomalt_afr_male=0;non_topmed_AC_amr=0;non_topmed_AN_amr=0;non_topmed_nhomalt_amr=0;AC_raw=133;AN_raw=9506;AF_raw=0.0139912;nhomalt_raw=59;AC_fin_female=0;AN_fin_female=0;nhomalt_fin_female=0;non_neuro_AC_asj_female=0;non_neuro_AN_asj_female=0;non_neuro_nhomalt_asj_female=0;non_neuro_AC_afr_male=0;non_neuro_AN_afr_male=0;non_neuro_nhomalt_afr_male=0;AC_afr_male=0;AN_afr_male=0;nhomalt_afr_male=0;AC_afr=0;AN_afr=0;nhomalt_afr=0;non_neuro_AC_afr_female=0;non_neuro_AN_afr_female=0;non_neuro_nhomalt_afr_female=0;non_topmed_AC_amr_female=0;non_topmed_AN_amr_female=0;non_topmed_nhomalt_amr_female=0;non_topmed_AC_oth_female=0;non_topmed_AN_oth_female=0;non_topmed_nhomalt_oth_female=0;AC_eas_female=0;AN_eas_female=0;nhomalt_eas_female=0;AC_afr_female=0;AN_afr_female=0;nhomalt_afr_female=0;non_neuro_AC_female=0;non_neuro_AN_female=0;non_neuro_nhomalt_female=0;controls_AC_afr=0;controls_AN_afr=0;controls_nhomalt_afr=0;AC_nfe_onf=0;AN_nfe_onf=0;nhomalt_nfe_onf=0;controls_AC_fin_male=0;controls_AN_fin_male=0;controls_nhomalt_fin_male=0;non_neuro_AC_nfe_nwe=0;non_neuro_AN_nfe_nwe=0;non_neuro_nhomalt_nfe_nwe=0;AC_fin_male=0;AN_fin_male=0;nhomalt_fin_male=0;AC_nfe_female=0;AN_nfe_female=0;nhomalt_nfe_female=0;AC_amr=0;AN_amr=0;nhomalt_amr=0;non_topmed_AC_nfe_male=0;non_topmed_AN_nfe_male=0;non_topmed_nhomalt_nfe_male=0;AC_eas=0;AN_eas=0;nhomalt_eas=0;nhomalt=0;non_neuro_AC_nfe_female=0;non_neuro_AN_nfe_female=0;non_neuro_nhomalt_nfe_female=0;non_neuro_AC_afr=0;non_neuro_AN_afr=0;non_neuro_nhomalt_afr=0;controls_AC_raw=28;controls_AN_raw=2820;controls_AF_raw=0.00992908;controls_nhomalt_raw=12;controls_AC_male=0;controls_AN_male=0;controls_nhomalt_male=0;non_topmed_AC_male=0;non_topmed_AN_male=0;non_topmed_nhomalt_male=0;controls_AC_nfe_female=0;controls_AN_nfe_female=0;controls_nhomalt_nfe_female=0;non_neuro_AC_amr=0;non_neuro_AN_amr=0;non_neuro_nhomalt_amr=0;non_neuro_AC_eas_female=0;non_neuro_AN_eas_female=0;non_neuro_nhomalt_eas_female=0;AC_asj_male=0;AN_asj_male=0;nhomalt_asj_male=0;controls_AC_nfe_male=0;controls_AN_nfe_male=0;controls_nhomalt_nfe_male=0;non_neuro_AC_fin=0;non_neuro_AN_fin=0;non_neuro_nhomalt_fin=0;AC_oth_female=0;AN_oth_female=0;nhomalt_oth_female=0;controls_AC_nfe=0;controls_AN_nfe=0;controls_nhomalt_nfe=0;controls_AC_oth_female=0;controls_AN_oth_female=0;controls_nhomalt_oth_female=0;controls_AC_asj=0;controls_AN_asj=0;controls_nhomalt_asj=0;non_neuro_AC_amr_male=0;non_neuro_AN_amr_male=0;non_neuro_nhomalt_amr_male=0;controls_AC_nfe_nwe=0;controls_AN_nfe_nwe=0;controls_nhomalt_nfe_nwe=0;AC_nfe_nwe=0;AN_nfe_nwe=0;nhomalt_nfe_nwe=0;controls_AC_nfe_seu=0;controls_AN_nfe_seu=0;controls_nhomalt_nfe_seu=0;non_neuro_AC_amr_female=0;non_neuro_AN_amr_female=0;non_neuro_nhomalt_amr_female=0;non_neuro_AC_nfe_onf=0;non_neuro_AN_nfe_onf=0;non_neuro_nhomalt_nfe_onf=0;non_topmed_AC_eas_male=0;non_topmed_AN_eas_male=0;non_topmed_nhomalt_eas_male=0;controls_AC_amr_female=0;controls_AN_amr_female=0;controls_nhomalt_amr_female=0;non_neuro_AC_fin_male=0;non_neuro_AN_fin_male=0;non_neuro_nhomalt_fin_male=0;AC_female=0;AN_female=0;nhomalt_female=0;non_neuro_AC_oth_male=0;non_neuro_AN_oth_male=0;non_neuro_nhomalt_oth_male=0;non_topmed_AC_nfe_est=0;non_topmed_AN_nfe_est=0;non_topmed_nhomalt_nfe_est=0;non_topmed_AC_nfe_nwe=0;non_topmed_AN_nfe_nwe=0;non_topmed_nhomalt_nfe_nwe=0;non_topmed_AC_amr_male=0;non_topmed_AN_amr_male=0;non_topmed_nhomalt_amr_male=0;non_topmed_AC_nfe_onf=0;non_topmed_AN_nfe_onf=0;non_topmed_nhomalt_nfe_onf=0;controls_AC_eas_male=0;controls_AN_eas_male=0;controls_nhomalt_eas_male=0;controls_AC_oth_male=0;controls_AN_oth_male=0;controls_nhomalt_oth_male=0;non_topmed_AC=0;non_topmed_AN=0;non_topmed_nhomalt=0;controls_AC_fin=0;controls_AN_fin=0;controls_nhomalt_fin=0;non_neuro_AC_nfe=0;non_neuro_AN_nfe=0;non_neuro_nhomalt_nfe=0;non_neuro_AC_fin_female=0;non_neuro_AN_fin_female=0;non_neuro_nhomalt_fin_female=0;non_topmed_AC_nfe_seu=0;non_topmed_AN_nfe_seu=0;non_topmed_nhomalt_nfe_seu=0;controls_AC_eas_female=0;controls_AN_eas_female=0;controls_nhomalt_eas_female=0;non_topmed_AC_asj=0;non_topmed_AN_asj=0;non_topmed_nhomalt_asj=0;controls_AC_nfe_onf=0;controls_AN_nfe_onf=0;controls_nhomalt_nfe_onf=0;non_neuro_AC=0;non_neuro_AN=0;non_neuro_nhomalt=0;non_topmed_AC_nfe=0;non_topmed_AN_nfe=0;non_topmed_nhomalt_nfe=0;non_topmed_AC_raw=86;non_topmed_AN_raw=7812;non_topmed_AF_raw=0.0110087;non_topmed_nhomalt_raw=38;non_neuro_AC_nfe_est=0;non_neuro_AN_nfe_est=0;non_neuro_nhomalt_nfe_est=0;non_topmed_AC_oth_male=0;non_topmed_AN_oth_male=0;non_topmed_nhomalt_oth_male=0;AC_nfe_est=0;AN_nfe_est=0;nhomalt_nfe_est=0;non_topmed_AC_afr_male=0;non_topmed_AN_afr_male=0;non_topmed_nhomalt_afr_male=0;AC_eas_male=0;AN_eas_male=0;nhomalt_eas_male=0;controls_AC_eas=0;controls_AN_eas=0;controls_nhomalt_eas=0;non_neuro_AC_eas_male=0;non_neuro_AN_eas_male=0;non_neuro_nhomalt_eas_male=0;non_neuro_AC_asj_male=0;non_neuro_AN_asj_male=0;non_neuro_nhomalt_asj_male=0;controls_AC_oth=0;controls_AN_oth=0;controls_nhomalt_oth=0;AC_nfe=0;AN_nfe=0;nhomalt_nfe=0;non_topmed_AC_female=0;non_topmed_AN_female=0;non_topmed_nhomalt_female=0;non_neuro_AC_asj=0;non_neuro_AN_asj=0;non_neuro_nhomalt_asj=0;non_topmed_AC_eas_female=0;non_topmed_AN_eas_female=0;non_topmed_nhomalt_eas_female=0;non_neuro_AC_raw=106;non_neuro_AN_raw=6790;non_neuro_AF_raw=0.0156112;non_neuro_nhomalt_raw=47;non_topmed_AC_eas=0;non_topmed_AN_eas=0;non_topmed_nhomalt_eas=0;non_topmed_AC_fin_male=0;non_topmed_AN_fin_male=0;non_topmed_nhomalt_fin_male=0;AC_fin=0;AN_fin=0;nhomalt_fin=0;AC_nfe_male=0;AN_nfe_male=0;nhomalt_nfe_male=0;controls_AC_amr_male=0;controls_AN_amr_male=0;controls_nhomalt_amr_male=0;controls_AC_afr_female=0;controls_AN_afr_female=0;controls_nhomalt_afr_female=0;controls_AC_amr=0;controls_AN_amr=0;controls_nhomalt_amr=0;AC_asj_female=0;AN_asj_female=0;nhomalt_asj_female=0;non_neuro_AC_eas=0;non_neuro_AN_eas=0;non_neuro_nhomalt_eas=0;non_neuro_AC_male=0;non_neuro_AN_male=0;non_neuro_nhomalt_male=0;AC_asj=0;AN_asj=0;nhomalt_asj=0;controls_AC_nfe_est=0;controls_AN_nfe_est=0;controls_nhomalt_nfe_est=0;non_topmed_AC_asj_female=0;non_topmed_AN_asj_female=0;non_topmed_nhomalt_asj_female=0;non_topmed_AC_oth=0;non_topmed_AN_oth=0;non_topmed_nhomalt_oth=0;non_topmed_AC_fin_female=0;non_topmed_AN_fin_female=0;non_topmed_nhomalt_fin_female=0;AC_oth=0;AN_oth=0;nhomalt_oth=0;non_neuro_AC_nfe_male=0;non_neuro_AN_nfe_male=0;non_neuro_nhomalt_nfe_male=0;controls_AC_female=0;controls_AN_female=0;controls_nhomalt_female=0;non_topmed_AC_fin=0;non_topmed_AN_fin=0;non_topmed_nhomalt_fin=0;non_topmed_AC_nfe_female=0;non_topmed_AN_nfe_female=0;non_topmed_nhomalt_nfe_female=0;controls_AC_asj_male=0;controls_AN_asj_male=0;controls_nhomalt_asj_male=0;non_topmed_AC_asj_male=0;non_topmed_AN_asj_male=0;non_topmed_nhomalt_asj_male=0;non_neuro_AC_oth=0;non_neuro_AN_oth=0;non_neuro_nhomalt_oth=0;AC_male=0;AN_male=0;nhomalt_male=0;controls_AC_fin_female=0;controls_AN_fin_female=0;controls_nhomalt_fin_female=0;controls_AC_asj_female=0;controls_AN_asj_female=0;controls_nhomalt_asj_female=0;AC_amr_male=0;AN_amr_male=0;nhomalt_amr_male=0;AC_amr_female=0;AN_amr_female=0;nhomalt_amr_female=0;AC_oth_male=0;AN_oth_male=0;nhomalt_oth_male=0;non_neuro_AC_nfe_seu=0;non_neuro_AN_nfe_seu=0;non_neuro_nhomalt_nfe_seu=0;non_topmed_AC_afr_female=0;non_topmed_AN_afr_female=0;non_topmed_nhomalt_afr_female=0;non_topmed_AC_afr=0;non_topmed_AN_afr=0;non_topmed_nhomalt_afr=0;controls_AC=0;controls_AN=0;controls_nhomalt=0;non_neuro_AC_oth_female=0;non_neuro_AN_oth_female=0;non_neuro_nhomalt_oth_female=0;non_topmed_faf95_amr=0;non_topmed_faf99_amr=0;faf95_afr=0;faf99_afr=0;controls_faf95_afr=0;controls_faf99_afr=0;faf95_amr=0;faf99_amr=0;faf95_eas=0;faf99_eas=0;faf95=0;faf99=0;non_neuro_faf95_afr=0;non_neuro_faf99_afr=0;non_neuro_faf95_amr=0;non_neuro_faf99_amr=0;controls_faf95_nfe=0;controls_faf99_nfe=0;non_topmed_faf95=0;non_topmed_faf99=0;non_neuro_faf95_nfe=0;non_neuro_faf99_nfe=0;non_neuro_faf95=0;non_neuro_faf99=0;non_topmed_faf95_nfe=0;non_topmed_faf99_nfe=0;controls_faf95_eas=0;controls_faf99_eas=0;faf95_nfe=0;faf99_nfe=0;non_topmed_faf95_eas=0;non_topmed_faf99_eas=0;controls_faf95_amr=0;controls_faf99_amr=0;non_neuro_faf95_eas=0;non_neuro_faf99_eas=0;non_topmed_faf95_afr=0;non_topmed_faf99_afr=0;controls_faf95=0;controls_faf99=0;age_hist_het_bin_freq=0|0|0|0|0|0|0|0|0|0;age_hist_het_n_smaller=0;age_hist_het_n_larger=0;age_hist_hom_bin_freq=0|0|0|0|0|0|0|0|0|0;age_hist_hom_n_smaller=0;age_hist_hom_n_larger=0\");\n            writer.Flush();\n\n            stream.Position = 0;\n            return stream;\n        }\n\n        [Fact]\n        public void DoNotReportCoverage_whenAnZero()\n        {\n            var sequence = new SimpleSequence(new string('A', VariantUtils.MaxUpstreamLength) + \"C\" + new string('G', 16689902 - 16689820) + \"GGGGA\", 16689820 - 1 - VariantUtils.MaxUpstreamLength);\n            var sequenceProvider = new SimpleSequenceProvider(GenomeAssembly.GRCh38, sequence, ChromosomeUtilities.RefNameToChromosome);\n\n            var gnomadReader = new GnomadSnvReader(new StreamReader(GetChr22_16689800_16689902_genome()), null, sequenceProvider);\n\n            var items = gnomadReader.GetCombinedItems().ToList();\n\n            Assert.Empty(items);\n        }\n\n\n        [Theory]\n        [InlineData(null, null, null)]\n        [InlineData(null, 1,    1)]\n        [InlineData(1,    null, 1)]\n        [InlineData(1,    1,    2)]\n        public void AddNullableInts(int? x, int? y, int? sum)\n        {\n            // I was not expecting null + 1 == null by default!!\n            //var z = x + y;\n            //Assert.Equal(sum, z);\n            Assert.Equal(sum, SaParseUtilities.Add(x, y));\n        }\n        //chr1    76226858  G   GCTAGAATGAGTTA  Sas_An=30614    Sas_An=\n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/gnomAD/GnomadSvBedParserTests.cs",
    "content": "using System.Collections.Generic;\nusing System.IO;\nusing System.Linq;\nusing SAUtils.DataStructures;\nusing SAUtils.gnomAD;\nusing UnitTests.TestUtilities;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.gnomAD;\n\npublic sealed class GnomadSvBedParserTests\n{\n    private static Stream GetStreamData(string dataString)\n    {\n        var stream = new MemoryStream();\n        var writer = new StreamWriter(stream);\n        writer.Write(dataString);\n        writer.Flush();\n        stream.Position = 0;\n        return stream;\n    }\n\n    [Fact]\n    public void TestGnomadSvBedParser()\n    {\n        const string bedData =\n            \"#chrom\\tstart\\tend\\tname\\tsvtype\\tALGORITHMS\\tBOTHSIDES_SUPPORT\\tCHR2\\tCPX_INTERVALS\\tCPX_TYPE\\tEND2\\tEND\\tEVIDENCE\\tHIGH_SR_BACKGROUND\\tPCRPLUS_DEPLETED\\tPESR_GT_OVERDISPERSION\\tPOS2\\tPROTEIN_CODING__COPY_GAIN\\tPROTEIN_CODING__DUP_LOF\\tPROTEIN_CODING__DUP_PARTIAL\\tPROTEIN_CODING__INTERGENIC\\tPROTEIN_CODING__INTRONIC\\tPROTEIN_CODING__INV_SPAN\\tPROTEIN_CODING__LOF\\tPROTEIN_CODING__MSV_EXON_OVR\\tPROTEIN_CODING__NEAREST_TSS\\tPROTEIN_CODING__PROMOTER\\tPROTEIN_CODING__UTR\\tSOURCE\\tSTRANDS\\tSVLEN\\tSVTYPE\\tUNRESOLVED_TYPE\\tUNSTABLE_AF_PCRPLUS\\tVARIABLE_ACROSS_BATCHES\\tAN\\tAC\\tAF\\tN_BI_GENOS\\tN_HOMREF\\tN_HET\\tN_HOMALT\\tFREQ_HOMREF\\tFREQ_HET\\tFREQ_HOMALT\\tMALE_AN\\tMALE_AC\\tMALE_AF\\tMALE_N_BI_GENOS\\tMALE_N_HOMREF\\tMALE_N_HET\\tMALE_N_HOMALT\\tMALE_FREQ_HOMREF\\tMALE_FREQ_HET\\tMALE_FREQ_HOMALT\\tMALE_N_HEMIREF\\tMALE_N_HEMIALT\\tMALE_FREQ_HEMIREF\\tMALE_FREQ_HEMIALT\\tPAR\\tFEMALE_AN\\tFEMALE_AC\\tFEMALE_AF\\tFEMALE_N_BI_GENOS\\tFEMALE_N_HOMREF\\tFEMALE_N_HET\\tFEMALE_N_HOMALT\\tFEMALE_FREQ_HOMREF\\tFEMALE_FREQ_HET\\tFEMALE_FREQ_HOMALT\\tPOPMAX_AF\\tAFR_AN\\tAFR_AC\\tAFR_AF\\tAFR_N_BI_GENOS\\tAFR_N_HOMREF\\tAFR_N_HET\\tAFR_N_HOMALT\\tAFR_FREQ_HOMREF\\tAFR_FREQ_HET\\tAFR_FREQ_HOMALT\\tAFR_MALE_AN\\tAFR_MALE_AC\\tAFR_MALE_AF\\tAFR_MALE_N_BI_GENOS\\tAFR_MALE_N_HOMREF\\tAFR_MALE_N_HET\\tAFR_MALE_N_HOMALT\\tAFR_MALE_FREQ_HOMREF\\tAFR_MALE_FREQ_HET\\tAFR_MALE_FREQ_HOMALT\\tAFR_MALE_N_HEMIREF\\tAFR_MALE_N_HEMIALT\\tAFR_MALE_FREQ_HEMIREF\\tAFR_MALE_FREQ_HEMIALT\\tAFR_FEMALE_AN\\tAFR_FEMALE_AC\\tAFR_FEMALE_AF\\tAFR_FEMALE_N_BI_GENOS\\tAFR_FEMALE_N_HOMREF\\tAFR_FEMALE_N_HET\\tAFR_FEMALE_N_HOMALT\\tAFR_FEMALE_FREQ_HOMREF\\tAFR_FEMALE_FREQ_HET\\tAFR_FEMALE_FREQ_HOMALT\\tAMR_AN\\tAMR_AC\\tAMR_AF\\tAMR_N_BI_GENOS\\tAMR_N_HOMREF\\tAMR_N_HET\\tAMR_N_HOMALT\\tAMR_FREQ_HOMREF\\tAMR_FREQ_HET\\tAMR_FREQ_HOMALT\\tAMR_MALE_AN\\tAMR_MALE_AC\\tAMR_MALE_AF\\tAMR_MALE_N_BI_GENOS\\tAMR_MALE_N_HOMREF\\tAMR_MALE_N_HET\\tAMR_MALE_N_HOMALT\\tAMR_MALE_FREQ_HOMREF\\tAMR_MALE_FREQ_HET\\tAMR_MALE_FREQ_HOMALT\\tAMR_MALE_N_HEMIREF\\tAMR_MALE_N_HEMIALT\\tAMR_MALE_FREQ_HEMIREF\\tAMR_MALE_FREQ_HEMIALT\\tAMR_FEMALE_AN\\tAMR_FEMALE_AC\\tAMR_FEMALE_AF\\tAMR_FEMALE_N_BI_GENOS\\tAMR_FEMALE_N_HOMREF\\tAMR_FEMALE_N_HET\\tAMR_FEMALE_N_HOMALT\\tAMR_FEMALE_FREQ_HOMREF\\tAMR_FEMALE_FREQ_HET\\tAMR_FEMALE_FREQ_HOMALT\\tEAS_AN\\tEAS_AC\\tEAS_AF\\tEAS_N_BI_GENOS\\tEAS_N_HOMREF\\tEAS_N_HET\\tEAS_N_HOMALT\\tEAS_FREQ_HOMREF\\tEAS_FREQ_HET\\tEAS_FREQ_HOMALT\\tEAS_MALE_AN\\tEAS_MALE_AC\\tEAS_MALE_AF\\tEAS_MALE_N_BI_GENOS\\tEAS_MALE_N_HOMREF\\tEAS_MALE_N_HET\\tEAS_MALE_N_HOMALT\\tEAS_MALE_FREQ_HOMREF\\tEAS_MALE_FREQ_HET\\tEAS_MALE_FREQ_HOMALT\\tEAS_MALE_N_HEMIREF\\tEAS_MALE_N_HEMIALT\\tEAS_MALE_FREQ_HEMIREF\\tEAS_MALE_FREQ_HEMIALT\\tEAS_FEMALE_AN\\tEAS_FEMALE_AC\\tEAS_FEMALE_AF\\tEAS_FEMALE_N_BI_GENOS\\tEAS_FEMALE_N_HOMREF\\tEAS_FEMALE_N_HET\\tEAS_FEMALE_N_HOMALT\\tEAS_FEMALE_FREQ_HOMREF\\tEAS_FEMALE_FREQ_HET\\tEAS_FEMALE_FREQ_HOMALT\\tEUR_AN\\tEUR_AC\\tEUR_AF\\tEUR_N_BI_GENOS\\tEUR_N_HOMREF\\tEUR_N_HET\\tEUR_N_HOMALT\\tEUR_FREQ_HOMREF\\tEUR_FREQ_HET\\tEUR_FREQ_HOMALT\\tEUR_MALE_AN\\tEUR_MALE_AC\\tEUR_MALE_AF\\tEUR_MALE_N_BI_GENOS\\tEUR_MALE_N_HOMREF\\tEUR_MALE_N_HET\\tEUR_MALE_N_HOMALT\\tEUR_MALE_FREQ_HOMREF\\tEUR_MALE_FREQ_HET\\tEUR_MALE_FREQ_HOMALT\\tEUR_MALE_N_HEMIREF\\tEUR_MALE_N_HEMIALT\\tEUR_MALE_FREQ_HEMIREF\\tEUR_MALE_FREQ_HEMIALT\\tEUR_FEMALE_AN\\tEUR_FEMALE_AC\\tEUR_FEMALE_AF\\tEUR_FEMALE_N_BI_GENOS\\tEUR_FEMALE_N_HOMREF\\tEUR_FEMALE_N_HET\\tEUR_FEMALE_N_HOMALT\\tEUR_FEMALE_FREQ_HOMREF\\tEUR_FEMALE_FREQ_HET\\tEUR_FEMALE_FREQ_HOMALT\\tOTH_AN\\tOTH_AC\\tOTH_AF\\tOTH_N_BI_GENOS\\tOTH_N_HOMREF\\tOTH_N_HET\\tOTH_N_HOMALT\\tOTH_FREQ_HOMREF\\tOTH_FREQ_HET\\tOTH_FREQ_HOMALT\\tOTH_MALE_AN\\tOTH_MALE_AC\\tOTH_MALE_AF\\tOTH_MALE_N_BI_GENOS\\tOTH_MALE_N_HOMREF\\tOTH_MALE_N_HET\\tOTH_MALE_N_HOMALT\\tOTH_MALE_FREQ_HOMREF\\tOTH_MALE_FREQ_HET\\tOTH_MALE_FREQ_HOMALT\\tOTH_MALE_N_HEMIREF\\tOTH_MALE_N_HEMIALT\\tOTH_MALE_FREQ_HEMIREF\\tOTH_MALE_FREQ_HEMIALT\\tOTH_FEMALE_AN\\tOTH_FEMALE_AC\\tOTH_FEMALE_AF\\tOTH_FEMALE_N_BI_GENOS\\tOTH_FEMALE_N_HOMREF\\tOTH_FEMALE_N_HET\\tOTH_FEMALE_N_HOMALT\\tOTH_FEMALE_FREQ_HOMREF\\tOTH_FEMALE_FREQ_HET\\tOTH_FEMALE_FREQ_HOMALT\\tFILTER\\n\" +\n            \"1\\t10641\\t10642\\tgnomAD-SV_v2.1_BND_1_1\\tBND\\tmanta\\tFalse\\t15\\tNA\\tNA\\t10643\\t10643\\tPE,SR\\tFalse\\tFalse\\tTrue\\t10642\\tNA\\tNA\\tNA\\tFalse\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\t-1\\tBND\\tSINGLE_ENDER_--\\tFalse\\tFalse\\t21366\\t145\\t0.006785999983549118\\t10683\\t10543\\t135\\t5\\t0.9868950247764587\\t0.012636899948120117\\t0.00046803298755548894\\t10866\\t69\\t0.00634999992325902\\t5433\\t5366\\t65\\t2\\t0.987667977809906\\t0.011963900178670883\\t0.000368120992789045\\tNA\\tNA\\tNA\\tNA\\tFalse\\t10454\\t76\\t0.007269999943673611\\t5227\\t5154\\t70\\t3\\t0.9860339760780334\\t0.013392000459134579\\t0.0005739430198445916\\t0.015956999734044075\\t9398\\t72\\t0.007660999894142151\\t4699\\t4629\\t68\\t2\\t0.9851030111312866\\t0.014471200294792652\\t0.0004256220126990229\\t5154\\t33\\t0.006403000093996525\\t2577\\t2544\\t33\\t0\\t0.9871940016746521\\t0.012805599719285965\\t0.0\\tNA\\tNA\\tNA\\tNA\\t4232\\t39\\t0.009216000325977802\\t2116\\t2079\\t35\\t2\\t0.9825140237808228\\t0.01654059998691082\\t0.0009451800142414868\\t1910\\t7\\t0.003664999967440963\\t955\\t949\\t5\\t1\\t0.9937170147895813\\t0.00523559981957078\\t0.001047119963914156\\t950\\t4\\t0.004211000166833401\\t475\\t472\\t2\\t1\\t0.9936839938163757\\t0.00421052984893322\\t0.0021052600350230932\\tNA\\tNA\\tNA\\tNA\\t952\\t3\\t0.0031510000117123127\\t476\\t473\\t3\\t0\\t0.9936969876289368\\t0.006302520167082548\\t0.0\\t2296\\t31\\t0.013501999899744987\\t1148\\t1117\\t31\\t0\\t0.9729970097541809\\t0.02700350061058998\\t0.0\\t1312\\t13\\t0.009909000247716904\\t656\\t643\\t13\\t0\\t0.9801830053329468\\t0.01981710083782673\\t0.0\\tNA\\tNA\\tNA\\tNA\\t976\\t18\\t0.018442999571561813\\t488\\t470\\t18\\t0\\t0.9631149768829346\\t0.03688519820570946\\t0.0\\t7574\\t32\\t0.004224999807775021\\t3787\\t3757\\t28\\t2\\t0.9920780062675476\\t0.007393720094114542\\t0.0005281229969114065\\t3374\\t17\\t0.005038999952375889\\t1687\\t1671\\t15\\t1\\t0.9905160069465637\\t0.008891520090401173\\t0.000592768017668277\\tNA\\tNA\\tNA\\tNA\\t4182\\t15\\t0.003587000072002411\\t2091\\t2077\\t13\\t1\\t0.9933050274848938\\t0.006217120215296745\\t0.00047823999193497\\t188\\t3\\t0.015956999734044075\\t94\\t91\\t3\\t0\\t0.968084990978241\\t0.03191490098834038\\t0.0\\t76\\t2\\t0.026316000148653984\\t38\\t36\\t2\\t0\\t0.9473680257797241\\t0.05263160169124603\\t0.0\\tNA\\tNA\\tNA\\tNA\\t112\\t1\\t0.008929000236093998\\t56\\t55\\t1\\t0\\t0.982142984867096\\t0.017857100814580917\\t0.0\\tUNRESOLVED\\n\" +\n            \"1\\t20999\\t26000\\tgnomAD-SV_v2.1_DEL_1_1\\tDEL\\tdepth\\tFalse\\tNA\\tNA\\tNA\\tNA\\t26000\\tRD\\tFalse\\tFalse\\tFalse\\tNA\\tNA\\tNA\\tNA\\tTrue\\tNA\\tNA\\tNA\\tNA\\tOR4F5\\tNA\\tNA\\tNA\\tNA\\t5000\\tDEL\\tNA\\tFalse\\tFalse\\t8586\\t138\\t0.01607299968600273\\t4293\\t4155\\t138\\t0\\t0.9678549766540527\\t0.03214539960026741\\t0.0\\t4302\\t69\\t0.01603900082409382\\t2151\\t2082\\t69\\t0\\t0.9679219722747803\\t0.0320780985057354\\t0.0\\tNA\\tNA\\tNA\\tNA\\tFalse\\t4272\\t68\\t0.015917999669909477\\t2136\\t2068\\t68\\t0\\t0.9681649804115295\\t0.031835198402404785\\t0.0\\t0.07199999690055847\\t3718\\t27\\t0.007261999882757664\\t1859\\t1832\\t27\\t0\\t0.985476016998291\\t0.014523900113999844\\t0.0\\t2016\\t16\\t0.007937000133097172\\t1008\\t992\\t16\\t0\\t0.9841269850730896\\t0.015873000025749207\\t0.0\\tNA\\tNA\\tNA\\tNA\\t1702\\t11\\t0.00646300008520484\\t851\\t840\\t11\\t0\\t0.9870740175247192\\t0.01292600017040968\\t0.0\\t684\\t8\\t0.011695999652147293\\t342\\t334\\t8\\t0\\t0.9766079783439636\\t0.02339180000126362\\t0.0\\t326\\t2\\t0.006134999915957451\\t163\\t161\\t2\\t0\\t0.9877300262451172\\t0.012269900180399418\\t0.0\\tNA\\tNA\\tNA\\tNA\\t358\\t6\\t0.016759999096393585\\t179\\t173\\t6\\t0\\t0.966480016708374\\t0.033519599586725235\\t0.0\\t750\\t54\\t0.07199999690055847\\t375\\t321\\t54\\t0\\t0.8560000061988831\\t0.14399999380111694\\t0.0\\t418\\t28\\t0.06698600202798843\\t209\\t181\\t28\\t0\\t0.8660290241241455\\t0.13397100567817688\\t0.0\\tNA\\tNA\\tNA\\tNA\\t328\\t25\\t0.07621999830007553\\t164\\t139\\t25\\t0\\t0.8475610017776489\\t0.15243899822235107\\t0.0\\t3346\\t48\\t0.014344999566674232\\t1673\\t1625\\t48\\t0\\t0.9713090062141418\\t0.028690999373793602\\t0.0\\t1498\\t22\\t0.014685999602079391\\t749\\t727\\t22\\t0\\t0.9706270098686218\\t0.029372500255703926\\t0.0\\tNA\\tNA\\tNA\\tNA\\t1840\\t26\\t0.01413000002503395\\t920\\t894\\t26\\t0\\t0.9717389941215515\\t0.02826089970767498\\t0.0\\t88\\t1\\t0.011363999918103218\\t44\\t43\\t1\\t0\\t0.9772729873657227\\t0.022727299481630325\\t0.0\\t44\\t1\\t0.0227269995957613\\t22\\t21\\t1\\t0\\t0.9545450210571289\\t0.04545449838042259\\t0.0\\tNA\\tNA\\tNA\\tNA\\t44\\t0\\t0.0\\t22\\t22\\t0\\t0\\t1.0\\t0.0\\t0.0\\tUNSTABLE_AF_PCRMINUS,LOW_CALL_RATE\\n\" +\n            \"1\\t39999\\t47200\\tgnomAD-SV_v2.1_DUP_1_1\\tDUP\\tdepth\\tFalse\\tNA\\tNA\\tNA\\tNA\\t47200\\tRD\\tFalse\\tFalse\\tFalse\\tNA\\tNA\\tNA\\tNA\\tTrue\\tNA\\tNA\\tNA\\tNA\\tOR4F5\\tNA\\tNA\\tNA\\tNA\\t7200\\tDUP\\tNA\\tFalse\\tFalse\\t13674\\t943\\t0.06896299868822098\\t6837\\t5985\\t761\\t91\\t0.8753839731216431\\t0.11130599677562714\\t0.013309899717569351\\t6878\\t499\\t0.07254999876022339\\t3439\\t2987\\t405\\t47\\t0.8685659766197205\\t0.11776699870824814\\t0.01366680022329092\\tNA\\tNA\\tNA\\tNA\\tFalse\\t6770\\t442\\t0.0652879998087883\\t3385\\t2987\\t354\\t44\\t0.8824219703674316\\t0.10457900166511536\\t0.012998499907553196\\t0.1356939971446991\\t6382\\t866\\t0.1356939971446991\\t3191\\t2415\\t686\\t90\\t0.756816029548645\\t0.21498000621795654\\t0.0282042995095253\\t3470\\t460\\t0.13256500661373138\\t1735\\t1322\\t366\\t47\\t0.7619600296020508\\t0.21095100045204163\\t0.027089299634099007\\tNA\\tNA\\tNA\\tNA\\t2904\\t404\\t0.1391180008649826\\t1452\\t1091\\t318\\t43\\t0.7513769865036011\\t0.21900799870491028\\t0.029614299535751343\\t918\\t21\\t0.02287600003182888\\t459\\t439\\t19\\t1\\t0.9564269781112671\\t0.041394300758838654\\t0.0021786498837172985\\t426\\t12\\t0.028169000521302223\\t213\\t201\\t12\\t0\\t0.9436619877815247\\t0.056338001042604446\\t0.0\\tNA\\tNA\\tNA\\tNA\\t490\\t9\\t0.018366999924182892\\t245\\t237\\t7\\t1\\t0.9673470258712769\\t0.02857140079140663\\t0.004081630147993565\\t1544\\t17\\t0.011009999550879002\\t772\\t755\\t17\\t0\\t0.9779790043830872\\t0.022020699456334114\\t0.0\\t902\\t11\\t0.012195000424981117\\t451\\t440\\t11\\t0\\t0.9756100177764893\\t0.024390200152993202\\t0.0\\tNA\\tNA\\tNA\\tNA\\t638\\t6\\t0.009403999894857407\\t319\\t313\\t6\\t0\\t0.9811909794807434\\t0.018808800727128983\\t0.0\\t4716\\t37\\t0.007845999673008919\\t2358\\t2321\\t37\\t0\\t0.9843090176582336\\t0.015691300854086876\\t0.0\\t2034\\t15\\t0.007375000044703484\\t1017\\t1002\\t15\\t0\\t0.9852510094642639\\t0.014749299734830856\\t0.0\\tNA\\tNA\\tNA\\tNA\\t2670\\t22\\t0.00824000034481287\\t1335\\t1313\\t22\\t0\\t0.9835209846496582\\t0.016479400917887688\\t0.0\\t114\\t2\\t0.01754399947822094\\t57\\t55\\t2\\t0\\t0.9649119973182678\\t0.035087700933218\\t0.0\\t46\\t1\\t0.02173900045454502\\t23\\t22\\t1\\t0\\t0.9565219879150391\\t0.04347829893231392\\t0.0\\tNA\\tNA\\tNA\\tNA\\t68\\t1\\t0.014705999754369259\\t34\\t33\\t1\\t0\\t0.9705880284309387\\t0.02941180020570755\\t0.0\\tUNSTABLE_AF_PCRMINUS,LOW_CALL_RATE\\n\" +\n            \"1\\t54664\\t54666\\tgnomAD-SV_v2.1_INS_1_1\\tINS\\tmanta\\tFalse\\t1\\tNA\\tNA\\t54717\\t54666\\tSR\\tTrue\\tFalse\\tFalse\\t54716\\tNA\\tNA\\tNA\\tTrue\\tNA\\tNA\\tNA\\tNA\\tOR4F5\\tNA\\tNA\\tNA\\tNA\\t52\\tINS\\tNA\\tFalse\\tFalse\\t21306\\t2\\t9.40000027185306e-05\\t10653\\t10651\\t2\\t0\\t0.9998120069503784\\t0.0001877409958979115\\t0.0\\t10870\\t1\\t9.200000204145908e-05\\t5435\\t5434\\t1\\t0\\t0.9998160004615784\\t0.00018399300461169332\\t0.0\\tNA\\tNA\\tNA\\tNA\\tFalse\\t10390\\t1\\t9.600000339560211e-05\\t5195\\t5194\\t1\\t0\\t0.9998080134391785\\t0.00019249299657531083\\t0.0\\t0.0001340000017080456\\t9380\\t1\\t0.00010699999984353781\\t4690\\t4689\\t1\\t0\\t0.9997869729995728\\t0.00021322000247891992\\t0.0\\t5148\\t1\\t0.0001939999929163605\\t2574\\t2573\\t1\\t0\\t0.9996110200881958\\t0.0003884999896399677\\t0.0\\tNA\\tNA\\tNA\\tNA\\t4220\\t0\\t0.0\\t2110\\t2110\\t0\\t0\\t1.0\\t0.0\\t0.0\\t1908\\t0\\t0.0\\t954\\t954\\t0\\t0\\t1.0\\t0.0\\t0.0\\t952\\t0\\t0.0\\t476\\t476\\t0\\t0\\t1.0\\t0.0\\t0.0\\tNA\\tNA\\tNA\\tNA\\t948\\t0\\t0.0\\t474\\t474\\t0\\t0\\t1.0\\t0.0\\t0.0\\t2366\\t0\\t0.0\\t1183\\t1183\\t0\\t0\\t1.0\\t0.0\\t0.0\\t1366\\t0\\t0.0\\t683\\t683\\t0\\t0\\t1.0\\t0.0\\t0.0\\tNA\\tNA\\tNA\\tNA\\t992\\t0\\t0.0\\t496\\t496\\t0\\t0\\t1.0\\t0.0\\t0.0\\t7462\\t1\\t0.0001340000017080456\\t3731\\t3730\\t1\\t0\\t0.9997320175170898\\t0.00026802500360645354\\t0.0\\t3328\\t0\\t0.0\\t1664\\t1664\\t0\\t0\\t1.0\\t0.0\\t0.0\\tNA\\tNA\\tNA\\tNA\\t4116\\t1\\t0.00024300000222865492\\t2058\\t2057\\t1\\t0\\t0.9995139837265015\\t0.00048590899677947164\\t0.0\\t190\\t0\\t0.0\\t95\\t95\\t0\\t0\\t1.0\\t0.0\\t0.0\\t76\\t0\\t0.0\\t38\\t38\\t0\\t0\\t1.0\\t0.0\\t0.0\\tNA\\tNA\\tNA\\tNA\\t114\\t0\\t0.0\\t57\\t57\\t0\\t0\\t1.0\\t0.0\\t0.0\\tPASS\\n\";\n\n        using var reader         = new StreamReader(GetStreamData(bedData));\n        using var gnomadSvParser = new GnomadSvBedParser(reader, ChromosomeUtilities.RefNameToChromosome);\n\n        List<GnomadSvItem> svItemList = gnomadSvParser.GetItems().ToList();\n\n        // Count is 3 becuase breakends are skipped\n        Assert.Equal(3, svItemList.Count);\n\n        Assert.Equal(\n            \"\\\"chromosome\\\":\\\"1\\\",\\\"begin\\\":21001,\\\"end\\\":26000,\\\"variantId\\\":\\\"gnomAD-SV_v2.1_DEL_1_1\\\",\\\"variantType\\\":\\\"deletion\\\",\\\"failedFilter\\\":true,\\\"allAf\\\":0.016073,\\\"afrAf\\\":0.007262,\\\"amrAf\\\":0.011696,\\\"easAf\\\":0.072,\\\"eurAf\\\":0.014345,\\\"othAf\\\":0.011364,\\\"femaleAf\\\":0.015918,\\\"maleAf\\\":0.016039,\\\"allAc\\\":138,\\\"afrAc\\\":27,\\\"amrAc\\\":8,\\\"easAc\\\":54,\\\"eurAc\\\":48,\\\"othAc\\\":1,\\\"femaleAc\\\":68,\\\"maleAc\\\":69,\\\"allAn\\\":8586,\\\"afrAn\\\":3718,\\\"amrAn\\\":684,\\\"easAn\\\":750,\\\"eurAn\\\":3346,\\\"othAn\\\":88,\\\"femaleAn\\\":4272,\\\"maleAn\\\":4302,\\\"allHc\\\":0,\\\"afrHc\\\":0,\\\"amrHc\\\":0,\\\"easHc\\\":0,\\\"eurHc\\\":0,\\\"othHc\\\":43,\\\"femaleHc\\\":0,\\\"maleHc\\\":0\",\n            svItemList[0].GetJsonString()\n        );\n        Assert.Equal(\n            \"\\\"chromosome\\\":\\\"1\\\",\\\"begin\\\":40001,\\\"end\\\":47200,\\\"variantId\\\":\\\"gnomAD-SV_v2.1_DUP_1_1\\\",\\\"variantType\\\":\\\"duplication\\\",\\\"failedFilter\\\":true,\\\"allAf\\\":0.068963,\\\"afrAf\\\":0.135694,\\\"amrAf\\\":0.022876,\\\"easAf\\\":0.01101,\\\"eurAf\\\":0.007846,\\\"othAf\\\":0.017544,\\\"femaleAf\\\":0.065288,\\\"maleAf\\\":0.07255,\\\"allAc\\\":943,\\\"afrAc\\\":866,\\\"amrAc\\\":21,\\\"easAc\\\":17,\\\"eurAc\\\":37,\\\"othAc\\\":2,\\\"femaleAc\\\":442,\\\"maleAc\\\":499,\\\"allAn\\\":13674,\\\"afrAn\\\":6382,\\\"amrAn\\\":918,\\\"easAn\\\":1544,\\\"eurAn\\\":4716,\\\"othAn\\\":114,\\\"femaleAn\\\":6770,\\\"maleAn\\\":6878,\\\"allHc\\\":91,\\\"afrHc\\\":90,\\\"amrHc\\\":1,\\\"easHc\\\":0,\\\"eurHc\\\":0,\\\"othHc\\\":55,\\\"femaleHc\\\":44,\\\"maleHc\\\":47\",\n            svItemList[1].GetJsonString()\n        );\n        Assert.Equal(\n            \"\\\"chromosome\\\":\\\"1\\\",\\\"begin\\\":54666,\\\"end\\\":54665,\\\"variantId\\\":\\\"gnomAD-SV_v2.1_INS_1_1\\\",\\\"variantType\\\":\\\"insertion\\\",\\\"allAf\\\":0.000094,\\\"afrAf\\\":0.000107,\\\"amrAf\\\":0,\\\"easAf\\\":0,\\\"eurAf\\\":0.000134,\\\"othAf\\\":0,\\\"femaleAf\\\":0.000096,\\\"maleAf\\\":0.000092,\\\"allAc\\\":2,\\\"afrAc\\\":1,\\\"amrAc\\\":0,\\\"easAc\\\":0,\\\"eurAc\\\":1,\\\"othAc\\\":0,\\\"femaleAc\\\":1,\\\"maleAc\\\":1,\\\"allAn\\\":21306,\\\"afrAn\\\":9380,\\\"amrAn\\\":1908,\\\"easAn\\\":2366,\\\"eurAn\\\":7462,\\\"othAn\\\":190,\\\"femaleAn\\\":10390,\\\"maleAn\\\":10870,\\\"allHc\\\":0,\\\"afrHc\\\":0,\\\"amrHc\\\":0,\\\"easHc\\\":0,\\\"eurHc\\\":0,\\\"othHc\\\":95,\\\"femaleHc\\\":0,\\\"maleHc\\\":0\",\n            svItemList[2].GetJsonString()\n        );\n    }\n\n    [Fact]\n    public void TestUnknownChromosome()\n    {\n        const string bedData =\n            \"#chrom\\tstart\\tend\\tname\\tsvtype\\tALGORITHMS\\tBOTHSIDES_SUPPORT\\tCHR2\\tCPX_INTERVALS\\tCPX_TYPE\\tEND2\\tEND\\tEVIDENCE\\tHIGH_SR_BACKGROUND\\tPCRPLUS_DEPLETED\\tPESR_GT_OVERDISPERSION\\tPOS2\\tPROTEIN_CODING__COPY_GAIN\\tPROTEIN_CODING__DUP_LOF\\tPROTEIN_CODING__DUP_PARTIAL\\tPROTEIN_CODING__INTERGENIC\\tPROTEIN_CODING__INTRONIC\\tPROTEIN_CODING__INV_SPAN\\tPROTEIN_CODING__LOF\\tPROTEIN_CODING__MSV_EXON_OVR\\tPROTEIN_CODING__NEAREST_TSS\\tPROTEIN_CODING__PROMOTER\\tPROTEIN_CODING__UTR\\tSOURCE\\tSTRANDS\\tSVLEN\\tSVTYPE\\tUNRESOLVED_TYPE\\tUNSTABLE_AF_PCRPLUS\\tVARIABLE_ACROSS_BATCHES\\tAN\\tAC\\tAF\\tN_BI_GENOS\\tN_HOMREF\\tN_HET\\tN_HOMALT\\tFREQ_HOMREF\\tFREQ_HET\\tFREQ_HOMALT\\tMALE_AN\\tMALE_AC\\tMALE_AF\\tMALE_N_BI_GENOS\\tMALE_N_HOMREF\\tMALE_N_HET\\tMALE_N_HOMALT\\tMALE_FREQ_HOMREF\\tMALE_FREQ_HET\\tMALE_FREQ_HOMALT\\tMALE_N_HEMIREF\\tMALE_N_HEMIALT\\tMALE_FREQ_HEMIREF\\tMALE_FREQ_HEMIALT\\tPAR\\tFEMALE_AN\\tFEMALE_AC\\tFEMALE_AF\\tFEMALE_N_BI_GENOS\\tFEMALE_N_HOMREF\\tFEMALE_N_HET\\tFEMALE_N_HOMALT\\tFEMALE_FREQ_HOMREF\\tFEMALE_FREQ_HET\\tFEMALE_FREQ_HOMALT\\tPOPMAX_AF\\tAFR_AN\\tAFR_AC\\tAFR_AF\\tAFR_N_BI_GENOS\\tAFR_N_HOMREF\\tAFR_N_HET\\tAFR_N_HOMALT\\tAFR_FREQ_HOMREF\\tAFR_FREQ_HET\\tAFR_FREQ_HOMALT\\tAFR_MALE_AN\\tAFR_MALE_AC\\tAFR_MALE_AF\\tAFR_MALE_N_BI_GENOS\\tAFR_MALE_N_HOMREF\\tAFR_MALE_N_HET\\tAFR_MALE_N_HOMALT\\tAFR_MALE_FREQ_HOMREF\\tAFR_MALE_FREQ_HET\\tAFR_MALE_FREQ_HOMALT\\tAFR_MALE_N_HEMIREF\\tAFR_MALE_N_HEMIALT\\tAFR_MALE_FREQ_HEMIREF\\tAFR_MALE_FREQ_HEMIALT\\tAFR_FEMALE_AN\\tAFR_FEMALE_AC\\tAFR_FEMALE_AF\\tAFR_FEMALE_N_BI_GENOS\\tAFR_FEMALE_N_HOMREF\\tAFR_FEMALE_N_HET\\tAFR_FEMALE_N_HOMALT\\tAFR_FEMALE_FREQ_HOMREF\\tAFR_FEMALE_FREQ_HET\\tAFR_FEMALE_FREQ_HOMALT\\tAMR_AN\\tAMR_AC\\tAMR_AF\\tAMR_N_BI_GENOS\\tAMR_N_HOMREF\\tAMR_N_HET\\tAMR_N_HOMALT\\tAMR_FREQ_HOMREF\\tAMR_FREQ_HET\\tAMR_FREQ_HOMALT\\tAMR_MALE_AN\\tAMR_MALE_AC\\tAMR_MALE_AF\\tAMR_MALE_N_BI_GENOS\\tAMR_MALE_N_HOMREF\\tAMR_MALE_N_HET\\tAMR_MALE_N_HOMALT\\tAMR_MALE_FREQ_HOMREF\\tAMR_MALE_FREQ_HET\\tAMR_MALE_FREQ_HOMALT\\tAMR_MALE_N_HEMIREF\\tAMR_MALE_N_HEMIALT\\tAMR_MALE_FREQ_HEMIREF\\tAMR_MALE_FREQ_HEMIALT\\tAMR_FEMALE_AN\\tAMR_FEMALE_AC\\tAMR_FEMALE_AF\\tAMR_FEMALE_N_BI_GENOS\\tAMR_FEMALE_N_HOMREF\\tAMR_FEMALE_N_HET\\tAMR_FEMALE_N_HOMALT\\tAMR_FEMALE_FREQ_HOMREF\\tAMR_FEMALE_FREQ_HET\\tAMR_FEMALE_FREQ_HOMALT\\tEAS_AN\\tEAS_AC\\tEAS_AF\\tEAS_N_BI_GENOS\\tEAS_N_HOMREF\\tEAS_N_HET\\tEAS_N_HOMALT\\tEAS_FREQ_HOMREF\\tEAS_FREQ_HET\\tEAS_FREQ_HOMALT\\tEAS_MALE_AN\\tEAS_MALE_AC\\tEAS_MALE_AF\\tEAS_MALE_N_BI_GENOS\\tEAS_MALE_N_HOMREF\\tEAS_MALE_N_HET\\tEAS_MALE_N_HOMALT\\tEAS_MALE_FREQ_HOMREF\\tEAS_MALE_FREQ_HET\\tEAS_MALE_FREQ_HOMALT\\tEAS_MALE_N_HEMIREF\\tEAS_MALE_N_HEMIALT\\tEAS_MALE_FREQ_HEMIREF\\tEAS_MALE_FREQ_HEMIALT\\tEAS_FEMALE_AN\\tEAS_FEMALE_AC\\tEAS_FEMALE_AF\\tEAS_FEMALE_N_BI_GENOS\\tEAS_FEMALE_N_HOMREF\\tEAS_FEMALE_N_HET\\tEAS_FEMALE_N_HOMALT\\tEAS_FEMALE_FREQ_HOMREF\\tEAS_FEMALE_FREQ_HET\\tEAS_FEMALE_FREQ_HOMALT\\tEUR_AN\\tEUR_AC\\tEUR_AF\\tEUR_N_BI_GENOS\\tEUR_N_HOMREF\\tEUR_N_HET\\tEUR_N_HOMALT\\tEUR_FREQ_HOMREF\\tEUR_FREQ_HET\\tEUR_FREQ_HOMALT\\tEUR_MALE_AN\\tEUR_MALE_AC\\tEUR_MALE_AF\\tEUR_MALE_N_BI_GENOS\\tEUR_MALE_N_HOMREF\\tEUR_MALE_N_HET\\tEUR_MALE_N_HOMALT\\tEUR_MALE_FREQ_HOMREF\\tEUR_MALE_FREQ_HET\\tEUR_MALE_FREQ_HOMALT\\tEUR_MALE_N_HEMIREF\\tEUR_MALE_N_HEMIALT\\tEUR_MALE_FREQ_HEMIREF\\tEUR_MALE_FREQ_HEMIALT\\tEUR_FEMALE_AN\\tEUR_FEMALE_AC\\tEUR_FEMALE_AF\\tEUR_FEMALE_N_BI_GENOS\\tEUR_FEMALE_N_HOMREF\\tEUR_FEMALE_N_HET\\tEUR_FEMALE_N_HOMALT\\tEUR_FEMALE_FREQ_HOMREF\\tEUR_FEMALE_FREQ_HET\\tEUR_FEMALE_FREQ_HOMALT\\tOTH_AN\\tOTH_AC\\tOTH_AF\\tOTH_N_BI_GENOS\\tOTH_N_HOMREF\\tOTH_N_HET\\tOTH_N_HOMALT\\tOTH_FREQ_HOMREF\\tOTH_FREQ_HET\\tOTH_FREQ_HOMALT\\tOTH_MALE_AN\\tOTH_MALE_AC\\tOTH_MALE_AF\\tOTH_MALE_N_BI_GENOS\\tOTH_MALE_N_HOMREF\\tOTH_MALE_N_HET\\tOTH_MALE_N_HOMALT\\tOTH_MALE_FREQ_HOMREF\\tOTH_MALE_FREQ_HET\\tOTH_MALE_FREQ_HOMALT\\tOTH_MALE_N_HEMIREF\\tOTH_MALE_N_HEMIALT\\tOTH_MALE_FREQ_HEMIREF\\tOTH_MALE_FREQ_HEMIALT\\tOTH_FEMALE_AN\\tOTH_FEMALE_AC\\tOTH_FEMALE_AF\\tOTH_FEMALE_N_BI_GENOS\\tOTH_FEMALE_N_HOMREF\\tOTH_FEMALE_N_HET\\tOTH_FEMALE_N_HOMALT\\tOTH_FEMALE_FREQ_HOMREF\\tOTH_FEMALE_FREQ_HET\\tOTH_FEMALE_FREQ_HOMALT\\tFILTER\\n\" +\n            \"InvalidNumber\\t10641\\t10642\\tgnomAD-SV_v2.1_BND_1_1\\tBND\\tmanta\\tFalse\\t15\\tNA\\tNA\\t10643\\t10643\\tPE,SR\\tFalse\\tFalse\\tTrue\\t10642\\tNA\\tNA\\tNA\\tFalse\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\t-1\\tBND\\tSINGLE_ENDER_--\\tFalse\\tFalse\\t21366\\t145\\t0.006785999983549118\\t10683\\t10543\\t135\\t5\\t0.9868950247764587\\t0.012636899948120117\\t0.00046803298755548894\\t10866\\t69\\t0.00634999992325902\\t5433\\t5366\\t65\\t2\\t0.987667977809906\\t0.011963900178670883\\t0.000368120992789045\\tNA\\tNA\\tNA\\tNA\\tFalse\\t10454\\t76\\t0.007269999943673611\\t5227\\t5154\\t70\\t3\\t0.9860339760780334\\t0.013392000459134579\\t0.0005739430198445916\\t0.015956999734044075\\t9398\\t72\\t0.007660999894142151\\t4699\\t4629\\t68\\t2\\t0.9851030111312866\\t0.014471200294792652\\t0.0004256220126990229\\t5154\\t33\\t0.006403000093996525\\t2577\\t2544\\t33\\t0\\t0.9871940016746521\\t0.012805599719285965\\t0.0\\tNA\\tNA\\tNA\\tNA\\t4232\\t39\\t0.009216000325977802\\t2116\\t2079\\t35\\t2\\t0.9825140237808228\\t0.01654059998691082\\t0.0009451800142414868\\t1910\\t7\\t0.003664999967440963\\t955\\t949\\t5\\t1\\t0.9937170147895813\\t0.00523559981957078\\t0.001047119963914156\\t950\\t4\\t0.004211000166833401\\t475\\t472\\t2\\t1\\t0.9936839938163757\\t0.00421052984893322\\t0.0021052600350230932\\tNA\\tNA\\tNA\\tNA\\t952\\t3\\t0.0031510000117123127\\t476\\t473\\t3\\t0\\t0.9936969876289368\\t0.006302520167082548\\t0.0\\t2296\\t31\\t0.013501999899744987\\t1148\\t1117\\t31\\t0\\t0.9729970097541809\\t0.02700350061058998\\t0.0\\t1312\\t13\\t0.009909000247716904\\t656\\t643\\t13\\t0\\t0.9801830053329468\\t0.01981710083782673\\t0.0\\tNA\\tNA\\tNA\\tNA\\t976\\t18\\t0.018442999571561813\\t488\\t470\\t18\\t0\\t0.9631149768829346\\t0.03688519820570946\\t0.0\\t7574\\t32\\t0.004224999807775021\\t3787\\t3757\\t28\\t2\\t0.9920780062675476\\t0.007393720094114542\\t0.0005281229969114065\\t3374\\t17\\t0.005038999952375889\\t1687\\t1671\\t15\\t1\\t0.9905160069465637\\t0.008891520090401173\\t0.000592768017668277\\tNA\\tNA\\tNA\\tNA\\t4182\\t15\\t0.003587000072002411\\t2091\\t2077\\t13\\t1\\t0.9933050274848938\\t0.006217120215296745\\t0.00047823999193497\\t188\\t3\\t0.015956999734044075\\t94\\t91\\t3\\t0\\t0.968084990978241\\t0.03191490098834038\\t0.0\\t76\\t2\\t0.026316000148653984\\t38\\t36\\t2\\t0\\t0.9473680257797241\\t0.05263160169124603\\t0.0\\tNA\\tNA\\tNA\\tNA\\t112\\t1\\t0.008929000236093998\\t56\\t55\\t1\\t0\\t0.982142984867096\\t0.017857100814580917\\t0.0\\tUNRESOLVED\\n\" +\n            \"1\\t20999\\t26000\\tgnomAD-SV_v2.1_DEL_1_1\\tDEL\\tdepth\\tFalse\\tNA\\tNA\\tNA\\tNA\\t26000\\tRD\\tFalse\\tFalse\\tFalse\\tNA\\tNA\\tNA\\tNA\\tTrue\\tNA\\tNA\\tNA\\tNA\\tOR4F5\\tNA\\tNA\\tNA\\tNA\\t5000\\tDEL\\tNA\\tFalse\\tFalse\\t8586\\t138\\t0.01607299968600273\\t4293\\t4155\\t138\\t0\\t0.9678549766540527\\t0.03214539960026741\\t0.0\\t4302\\t69\\t0.01603900082409382\\t2151\\t2082\\t69\\t0\\t0.9679219722747803\\t0.0320780985057354\\t0.0\\tNA\\tNA\\tNA\\tNA\\tFalse\\t4272\\t68\\t0.015917999669909477\\t2136\\t2068\\t68\\t0\\t0.9681649804115295\\t0.031835198402404785\\t0.0\\t0.07199999690055847\\t3718\\t27\\t0.007261999882757664\\t1859\\t1832\\t27\\t0\\t0.985476016998291\\t0.014523900113999844\\t0.0\\t2016\\t16\\t0.007937000133097172\\t1008\\t992\\t16\\t0\\t0.9841269850730896\\t0.015873000025749207\\t0.0\\tNA\\tNA\\tNA\\tNA\\t1702\\t11\\t0.00646300008520484\\t851\\t840\\t11\\t0\\t0.9870740175247192\\t0.01292600017040968\\t0.0\\t684\\t8\\t0.011695999652147293\\t342\\t334\\t8\\t0\\t0.9766079783439636\\t0.02339180000126362\\t0.0\\t326\\t2\\t0.006134999915957451\\t163\\t161\\t2\\t0\\t0.9877300262451172\\t0.012269900180399418\\t0.0\\tNA\\tNA\\tNA\\tNA\\t358\\t6\\t0.016759999096393585\\t179\\t173\\t6\\t0\\t0.966480016708374\\t0.033519599586725235\\t0.0\\t750\\t54\\t0.07199999690055847\\t375\\t321\\t54\\t0\\t0.8560000061988831\\t0.14399999380111694\\t0.0\\t418\\t28\\t0.06698600202798843\\t209\\t181\\t28\\t0\\t0.8660290241241455\\t0.13397100567817688\\t0.0\\tNA\\tNA\\tNA\\tNA\\t328\\t25\\t0.07621999830007553\\t164\\t139\\t25\\t0\\t0.8475610017776489\\t0.15243899822235107\\t0.0\\t3346\\t48\\t0.014344999566674232\\t1673\\t1625\\t48\\t0\\t0.9713090062141418\\t0.028690999373793602\\t0.0\\t1498\\t22\\t0.014685999602079391\\t749\\t727\\t22\\t0\\t0.9706270098686218\\t0.029372500255703926\\t0.0\\tNA\\tNA\\tNA\\tNA\\t1840\\t26\\t0.01413000002503395\\t920\\t894\\t26\\t0\\t0.9717389941215515\\t0.02826089970767498\\t0.0\\t88\\t1\\t0.011363999918103218\\t44\\t43\\t1\\t0\\t0.9772729873657227\\t0.022727299481630325\\t0.0\\t44\\t1\\t0.0227269995957613\\t22\\t21\\t1\\t0\\t0.9545450210571289\\t0.04545449838042259\\t0.0\\tNA\\tNA\\tNA\\tNA\\t44\\t0\\t0.0\\t22\\t22\\t0\\t0\\t1.0\\t0.0\\t0.0\\tUNSTABLE_AF_PCRMINUS,LOW_CALL_RATE\\n\";\n\n        using var reader         = new StreamReader(GetStreamData(bedData));\n        using var gnomadSvParser = new GnomadSvBedParser(reader, ChromosomeUtilities.RefNameToChromosome);\n\n        List<GnomadSvItem> svItemList = gnomadSvParser.GetItems().ToList();\n\n        Assert.Single(svItemList);\n\n\n        Assert.Equal(\n            \"\\\"chromosome\\\":\\\"1\\\",\\\"begin\\\":21001,\\\"end\\\":26000,\\\"variantId\\\":\\\"gnomAD-SV_v2.1_DEL_1_1\\\",\\\"variantType\\\":\\\"deletion\\\",\\\"failedFilter\\\":true,\\\"allAf\\\":0.016073,\\\"afrAf\\\":0.007262,\\\"amrAf\\\":0.011696,\\\"easAf\\\":0.072,\\\"eurAf\\\":0.014345,\\\"othAf\\\":0.011364,\\\"femaleAf\\\":0.015918,\\\"maleAf\\\":0.016039,\\\"allAc\\\":138,\\\"afrAc\\\":27,\\\"amrAc\\\":8,\\\"easAc\\\":54,\\\"eurAc\\\":48,\\\"othAc\\\":1,\\\"femaleAc\\\":68,\\\"maleAc\\\":69,\\\"allAn\\\":8586,\\\"afrAn\\\":3718,\\\"amrAn\\\":684,\\\"easAn\\\":750,\\\"eurAn\\\":3346,\\\"othAn\\\":88,\\\"femaleAn\\\":4272,\\\"maleAn\\\":4302,\\\"allHc\\\":0,\\\"afrHc\\\":0,\\\"amrHc\\\":0,\\\"easHc\\\":0,\\\"eurHc\\\":0,\\\"othHc\\\":43,\\\"femaleHc\\\":0,\\\"maleHc\\\":0\",\n            svItemList[0].GetJsonString()\n        );\n    }\n\n    [Fact]\n    public void TestInvalidStart()\n    {\n        const string bedData =\n            \"#chrom\\tstart\\tend\\tname\\tsvtype\\tALGORITHMS\\tBOTHSIDES_SUPPORT\\tCHR2\\tCPX_INTERVALS\\tCPX_TYPE\\tEND2\\tEND\\tEVIDENCE\\tHIGH_SR_BACKGROUND\\tPCRPLUS_DEPLETED\\tPESR_GT_OVERDISPERSION\\tPOS2\\tPROTEIN_CODING__COPY_GAIN\\tPROTEIN_CODING__DUP_LOF\\tPROTEIN_CODING__DUP_PARTIAL\\tPROTEIN_CODING__INTERGENIC\\tPROTEIN_CODING__INTRONIC\\tPROTEIN_CODING__INV_SPAN\\tPROTEIN_CODING__LOF\\tPROTEIN_CODING__MSV_EXON_OVR\\tPROTEIN_CODING__NEAREST_TSS\\tPROTEIN_CODING__PROMOTER\\tPROTEIN_CODING__UTR\\tSOURCE\\tSTRANDS\\tSVLEN\\tSVTYPE\\tUNRESOLVED_TYPE\\tUNSTABLE_AF_PCRPLUS\\tVARIABLE_ACROSS_BATCHES\\tAN\\tAC\\tAF\\tN_BI_GENOS\\tN_HOMREF\\tN_HET\\tN_HOMALT\\tFREQ_HOMREF\\tFREQ_HET\\tFREQ_HOMALT\\tMALE_AN\\tMALE_AC\\tMALE_AF\\tMALE_N_BI_GENOS\\tMALE_N_HOMREF\\tMALE_N_HET\\tMALE_N_HOMALT\\tMALE_FREQ_HOMREF\\tMALE_FREQ_HET\\tMALE_FREQ_HOMALT\\tMALE_N_HEMIREF\\tMALE_N_HEMIALT\\tMALE_FREQ_HEMIREF\\tMALE_FREQ_HEMIALT\\tPAR\\tFEMALE_AN\\tFEMALE_AC\\tFEMALE_AF\\tFEMALE_N_BI_GENOS\\tFEMALE_N_HOMREF\\tFEMALE_N_HET\\tFEMALE_N_HOMALT\\tFEMALE_FREQ_HOMREF\\tFEMALE_FREQ_HET\\tFEMALE_FREQ_HOMALT\\tPOPMAX_AF\\tAFR_AN\\tAFR_AC\\tAFR_AF\\tAFR_N_BI_GENOS\\tAFR_N_HOMREF\\tAFR_N_HET\\tAFR_N_HOMALT\\tAFR_FREQ_HOMREF\\tAFR_FREQ_HET\\tAFR_FREQ_HOMALT\\tAFR_MALE_AN\\tAFR_MALE_AC\\tAFR_MALE_AF\\tAFR_MALE_N_BI_GENOS\\tAFR_MALE_N_HOMREF\\tAFR_MALE_N_HET\\tAFR_MALE_N_HOMALT\\tAFR_MALE_FREQ_HOMREF\\tAFR_MALE_FREQ_HET\\tAFR_MALE_FREQ_HOMALT\\tAFR_MALE_N_HEMIREF\\tAFR_MALE_N_HEMIALT\\tAFR_MALE_FREQ_HEMIREF\\tAFR_MALE_FREQ_HEMIALT\\tAFR_FEMALE_AN\\tAFR_FEMALE_AC\\tAFR_FEMALE_AF\\tAFR_FEMALE_N_BI_GENOS\\tAFR_FEMALE_N_HOMREF\\tAFR_FEMALE_N_HET\\tAFR_FEMALE_N_HOMALT\\tAFR_FEMALE_FREQ_HOMREF\\tAFR_FEMALE_FREQ_HET\\tAFR_FEMALE_FREQ_HOMALT\\tAMR_AN\\tAMR_AC\\tAMR_AF\\tAMR_N_BI_GENOS\\tAMR_N_HOMREF\\tAMR_N_HET\\tAMR_N_HOMALT\\tAMR_FREQ_HOMREF\\tAMR_FREQ_HET\\tAMR_FREQ_HOMALT\\tAMR_MALE_AN\\tAMR_MALE_AC\\tAMR_MALE_AF\\tAMR_MALE_N_BI_GENOS\\tAMR_MALE_N_HOMREF\\tAMR_MALE_N_HET\\tAMR_MALE_N_HOMALT\\tAMR_MALE_FREQ_HOMREF\\tAMR_MALE_FREQ_HET\\tAMR_MALE_FREQ_HOMALT\\tAMR_MALE_N_HEMIREF\\tAMR_MALE_N_HEMIALT\\tAMR_MALE_FREQ_HEMIREF\\tAMR_MALE_FREQ_HEMIALT\\tAMR_FEMALE_AN\\tAMR_FEMALE_AC\\tAMR_FEMALE_AF\\tAMR_FEMALE_N_BI_GENOS\\tAMR_FEMALE_N_HOMREF\\tAMR_FEMALE_N_HET\\tAMR_FEMALE_N_HOMALT\\tAMR_FEMALE_FREQ_HOMREF\\tAMR_FEMALE_FREQ_HET\\tAMR_FEMALE_FREQ_HOMALT\\tEAS_AN\\tEAS_AC\\tEAS_AF\\tEAS_N_BI_GENOS\\tEAS_N_HOMREF\\tEAS_N_HET\\tEAS_N_HOMALT\\tEAS_FREQ_HOMREF\\tEAS_FREQ_HET\\tEAS_FREQ_HOMALT\\tEAS_MALE_AN\\tEAS_MALE_AC\\tEAS_MALE_AF\\tEAS_MALE_N_BI_GENOS\\tEAS_MALE_N_HOMREF\\tEAS_MALE_N_HET\\tEAS_MALE_N_HOMALT\\tEAS_MALE_FREQ_HOMREF\\tEAS_MALE_FREQ_HET\\tEAS_MALE_FREQ_HOMALT\\tEAS_MALE_N_HEMIREF\\tEAS_MALE_N_HEMIALT\\tEAS_MALE_FREQ_HEMIREF\\tEAS_MALE_FREQ_HEMIALT\\tEAS_FEMALE_AN\\tEAS_FEMALE_AC\\tEAS_FEMALE_AF\\tEAS_FEMALE_N_BI_GENOS\\tEAS_FEMALE_N_HOMREF\\tEAS_FEMALE_N_HET\\tEAS_FEMALE_N_HOMALT\\tEAS_FEMALE_FREQ_HOMREF\\tEAS_FEMALE_FREQ_HET\\tEAS_FEMALE_FREQ_HOMALT\\tEUR_AN\\tEUR_AC\\tEUR_AF\\tEUR_N_BI_GENOS\\tEUR_N_HOMREF\\tEUR_N_HET\\tEUR_N_HOMALT\\tEUR_FREQ_HOMREF\\tEUR_FREQ_HET\\tEUR_FREQ_HOMALT\\tEUR_MALE_AN\\tEUR_MALE_AC\\tEUR_MALE_AF\\tEUR_MALE_N_BI_GENOS\\tEUR_MALE_N_HOMREF\\tEUR_MALE_N_HET\\tEUR_MALE_N_HOMALT\\tEUR_MALE_FREQ_HOMREF\\tEUR_MALE_FREQ_HET\\tEUR_MALE_FREQ_HOMALT\\tEUR_MALE_N_HEMIREF\\tEUR_MALE_N_HEMIALT\\tEUR_MALE_FREQ_HEMIREF\\tEUR_MALE_FREQ_HEMIALT\\tEUR_FEMALE_AN\\tEUR_FEMALE_AC\\tEUR_FEMALE_AF\\tEUR_FEMALE_N_BI_GENOS\\tEUR_FEMALE_N_HOMREF\\tEUR_FEMALE_N_HET\\tEUR_FEMALE_N_HOMALT\\tEUR_FEMALE_FREQ_HOMREF\\tEUR_FEMALE_FREQ_HET\\tEUR_FEMALE_FREQ_HOMALT\\tOTH_AN\\tOTH_AC\\tOTH_AF\\tOTH_N_BI_GENOS\\tOTH_N_HOMREF\\tOTH_N_HET\\tOTH_N_HOMALT\\tOTH_FREQ_HOMREF\\tOTH_FREQ_HET\\tOTH_FREQ_HOMALT\\tOTH_MALE_AN\\tOTH_MALE_AC\\tOTH_MALE_AF\\tOTH_MALE_N_BI_GENOS\\tOTH_MALE_N_HOMREF\\tOTH_MALE_N_HET\\tOTH_MALE_N_HOMALT\\tOTH_MALE_FREQ_HOMREF\\tOTH_MALE_FREQ_HET\\tOTH_MALE_FREQ_HOMALT\\tOTH_MALE_N_HEMIREF\\tOTH_MALE_N_HEMIALT\\tOTH_MALE_FREQ_HEMIREF\\tOTH_MALE_FREQ_HEMIALT\\tOTH_FEMALE_AN\\tOTH_FEMALE_AC\\tOTH_FEMALE_AF\\tOTH_FEMALE_N_BI_GENOS\\tOTH_FEMALE_N_HOMREF\\tOTH_FEMALE_N_HET\\tOTH_FEMALE_N_HOMALT\\tOTH_FEMALE_FREQ_HOMREF\\tOTH_FEMALE_FREQ_HET\\tOTH_FEMALE_FREQ_HOMALT\\tFILTER\\n\" +\n            \"1\\tInvalid-10641\\t10642\\tgnomAD-SV_v2.1_BND_1_1\\tBND\\tmanta\\tFalse\\t15\\tNA\\tNA\\t10643\\t10643\\tPE,SR\\tFalse\\tFalse\\tTrue\\t10642\\tNA\\tNA\\tNA\\tFalse\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\tNA\\t-1\\tBND\\tSINGLE_ENDER_--\\tFalse\\tFalse\\t21366\\t145\\t0.006785999983549118\\t10683\\t10543\\t135\\t5\\t0.9868950247764587\\t0.012636899948120117\\t0.00046803298755548894\\t10866\\t69\\t0.00634999992325902\\t5433\\t5366\\t65\\t2\\t0.987667977809906\\t0.011963900178670883\\t0.000368120992789045\\tNA\\tNA\\tNA\\tNA\\tFalse\\t10454\\t76\\t0.007269999943673611\\t5227\\t5154\\t70\\t3\\t0.9860339760780334\\t0.013392000459134579\\t0.0005739430198445916\\t0.015956999734044075\\t9398\\t72\\t0.007660999894142151\\t4699\\t4629\\t68\\t2\\t0.9851030111312866\\t0.014471200294792652\\t0.0004256220126990229\\t5154\\t33\\t0.006403000093996525\\t2577\\t2544\\t33\\t0\\t0.9871940016746521\\t0.012805599719285965\\t0.0\\tNA\\tNA\\tNA\\tNA\\t4232\\t39\\t0.009216000325977802\\t2116\\t2079\\t35\\t2\\t0.9825140237808228\\t0.01654059998691082\\t0.0009451800142414868\\t1910\\t7\\t0.003664999967440963\\t955\\t949\\t5\\t1\\t0.9937170147895813\\t0.00523559981957078\\t0.001047119963914156\\t950\\t4\\t0.004211000166833401\\t475\\t472\\t2\\t1\\t0.9936839938163757\\t0.00421052984893322\\t0.0021052600350230932\\tNA\\tNA\\tNA\\tNA\\t952\\t3\\t0.0031510000117123127\\t476\\t473\\t3\\t0\\t0.9936969876289368\\t0.006302520167082548\\t0.0\\t2296\\t31\\t0.013501999899744987\\t1148\\t1117\\t31\\t0\\t0.9729970097541809\\t0.02700350061058998\\t0.0\\t1312\\t13\\t0.009909000247716904\\t656\\t643\\t13\\t0\\t0.9801830053329468\\t0.01981710083782673\\t0.0\\tNA\\tNA\\tNA\\tNA\\t976\\t18\\t0.018442999571561813\\t488\\t470\\t18\\t0\\t0.9631149768829346\\t0.03688519820570946\\t0.0\\t7574\\t32\\t0.004224999807775021\\t3787\\t3757\\t28\\t2\\t0.9920780062675476\\t0.007393720094114542\\t0.0005281229969114065\\t3374\\t17\\t0.005038999952375889\\t1687\\t1671\\t15\\t1\\t0.9905160069465637\\t0.008891520090401173\\t0.000592768017668277\\tNA\\tNA\\tNA\\tNA\\t4182\\t15\\t0.003587000072002411\\t2091\\t2077\\t13\\t1\\t0.9933050274848938\\t0.006217120215296745\\t0.00047823999193497\\t188\\t3\\t0.015956999734044075\\t94\\t91\\t3\\t0\\t0.968084990978241\\t0.03191490098834038\\t0.0\\t76\\t2\\t0.026316000148653984\\t38\\t36\\t2\\t0\\t0.9473680257797241\\t0.05263160169124603\\t0.0\\tNA\\tNA\\tNA\\tNA\\t112\\t1\\t0.008929000236093998\\t56\\t55\\t1\\t0\\t0.982142984867096\\t0.017857100814580917\\t0.0\\tUNRESOLVED\\n\";\n\n        using var reader         = new StreamReader(GetStreamData(bedData));\n        using var gnomadSvParser = new GnomadSvBedParser(reader, ChromosomeUtilities.RefNameToChromosome);\n\n        Assert.Throws<InvalidDataException>(() => gnomadSvParser.GetItems().ToList());\n    }\n\n    [Fact]\n    public void TestInvalidEnd()\n    {\n        const string bedData =\n            \"#chrom\\tstart\\tend\\tname\\tsvtype\\tALGORITHMS\\tBOTHSIDES_SUPPORT\\tCHR2\\tCPX_INTERVALS\\tCPX_TYPE\\tEND2\\tEND\\tEVIDENCE\\tHIGH_SR_BACKGROUND\\tPCRPLUS_DEPLETED\\tPESR_GT_OVERDISPERSION\\tPOS2\\tPROTEIN_CODING__COPY_GAIN\\tPROTEIN_CODING__DUP_LOF\\tPROTEIN_CODING__DUP_PARTIAL\\tPROTEIN_CODING__INTERGENIC\\tPROTEIN_CODING__INTRONIC\\tPROTEIN_CODING__INV_SPAN\\tPROTEIN_CODING__LOF\\tPROTEIN_CODING__MSV_EXON_OVR\\tPROTEIN_CODING__NEAREST_TSS\\tPROTEIN_CODING__PROMOTER\\tPROTEIN_CODING__UTR\\tSOURCE\\tSTRANDS\\tSVLEN\\tSVTYPE\\tUNRESOLVED_TYPE\\tUNSTABLE_AF_PCRPLUS\\tVARIABLE_ACROSS_BATCHES\\tAN\\tAC\\tAF\\tN_BI_GENOS\\tN_HOMREF\\tN_HET\\tN_HOMALT\\tFREQ_HOMREF\\tFREQ_HET\\tFREQ_HOMALT\\tMALE_AN\\tMALE_AC\\tMALE_AF\\tMALE_N_BI_GENOS\\tMALE_N_HOMREF\\tMALE_N_HET\\tMALE_N_HOMALT\\tMALE_FREQ_HOMREF\\tMALE_FREQ_HET\\tMALE_FREQ_HOMALT\\tMALE_N_HEMIREF\\tMALE_N_HEMIALT\\tMALE_FREQ_HEMIREF\\tMALE_FREQ_HEMIALT\\tPAR\\tFEMALE_AN\\tFEMALE_AC\\tFEMALE_AF\\tFEMALE_N_BI_GENOS\\tFEMALE_N_HOMREF\\tFEMALE_N_HET\\tFEMALE_N_HOMALT\\tFEMALE_FREQ_HOMREF\\tFEMALE_FREQ_HET\\tFEMALE_FREQ_HOMALT\\tPOPMAX_AF\\tAFR_AN\\tAFR_AC\\tAFR_AF\\tAFR_N_BI_GENOS\\tAFR_N_HOMREF\\tAFR_N_HET\\tAFR_N_HOMALT\\tAFR_FREQ_HOMREF\\tAFR_FREQ_HET\\tAFR_FREQ_HOMALT\\tAFR_MALE_AN\\tAFR_MALE_AC\\tAFR_MALE_AF\\tAFR_MALE_N_BI_GENOS\\tAFR_MALE_N_HOMREF\\tAFR_MALE_N_HET\\tAFR_MALE_N_HOMALT\\tAFR_MALE_FREQ_HOMREF\\tAFR_MALE_FREQ_HET\\tAFR_MALE_FREQ_HOMALT\\tAFR_MALE_N_HEMIREF\\tAFR_MALE_N_HEMIALT\\tAFR_MALE_FREQ_HEMIREF\\tAFR_MALE_FREQ_HEMIALT\\tAFR_FEMALE_AN\\tAFR_FEMALE_AC\\tAFR_FEMALE_AF\\tAFR_FEMALE_N_BI_GENOS\\tAFR_FEMALE_N_HOMREF\\tAFR_FEMALE_N_HET\\tAFR_FEMALE_N_HOMALT\\tAFR_FEMALE_FREQ_HOMREF\\tAFR_FEMALE_FREQ_HET\\tAFR_FEMALE_FREQ_HOMALT\\tAMR_AN\\tAMR_AC\\tAMR_AF\\tAMR_N_BI_GENOS\\tAMR_N_HOMREF\\tAMR_N_HET\\tAMR_N_HOMALT\\tAMR_FREQ_HOMREF\\tAMR_FREQ_HET\\tAMR_FREQ_HOMALT\\tAMR_MALE_AN\\tAMR_MALE_AC\\tAMR_MALE_AF\\tAMR_MALE_N_BI_GENOS\\tAMR_MALE_N_HOMREF\\tAMR_MALE_N_HET\\tAMR_MALE_N_HOMALT\\tAMR_MALE_FREQ_HOMREF\\tAMR_MALE_FREQ_HET\\tAMR_MALE_FREQ_HOMALT\\tAMR_MALE_N_HEMIREF\\tAMR_MALE_N_HEMIALT\\tAMR_MALE_FREQ_HEMIREF\\tAMR_MALE_FREQ_HEMIALT\\tAMR_FEMALE_AN\\tAMR_FEMALE_AC\\tAMR_FEMALE_AF\\tAMR_FEMALE_N_BI_GENOS\\tAMR_FEMALE_N_HOMREF\\tAMR_FEMALE_N_HET\\tAMR_FEMALE_N_HOMALT\\tAMR_FEMALE_FREQ_HOMREF\\tAMR_FEMALE_FREQ_HET\\tAMR_FEMALE_FREQ_HOMALT\\tEAS_AN\\tEAS_AC\\tEAS_AF\\tEAS_N_BI_GENOS\\tEAS_N_HOMREF\\tEAS_N_HET\\tEAS_N_HOMALT\\tEAS_FREQ_HOMREF\\tEAS_FREQ_HET\\tEAS_FREQ_HOMALT\\tEAS_MALE_AN\\tEAS_MALE_AC\\tEAS_MALE_AF\\tEAS_MALE_N_BI_GENOS\\tEAS_MALE_N_HOMREF\\tEAS_MALE_N_HET\\tEAS_MALE_N_HOMALT\\tEAS_MALE_FREQ_HOMREF\\tEAS_MALE_FREQ_HET\\tEAS_MALE_FREQ_HOMALT\\tEAS_MALE_N_HEMIREF\\tEAS_MALE_N_HEMIALT\\tEAS_MALE_FREQ_HEMIREF\\tEAS_MALE_FREQ_HEMIALT\\tEAS_FEMALE_AN\\tEAS_FEMALE_AC\\tEAS_FEMALE_AF\\tEAS_FEMALE_N_BI_GENOS\\tEAS_FEMALE_N_HOMREF\\tEAS_FEMALE_N_HET\\tEAS_FEMALE_N_HOMALT\\tEAS_FEMALE_FREQ_HOMREF\\tEAS_FEMALE_FREQ_HET\\tEAS_FEMALE_FREQ_HOMALT\\tEUR_AN\\tEUR_AC\\tEUR_AF\\tEUR_N_BI_GENOS\\tEUR_N_HOMREF\\tEUR_N_HET\\tEUR_N_HOMALT\\tEUR_FREQ_HOMREF\\tEUR_FREQ_HET\\tEUR_FREQ_HOMALT\\tEUR_MALE_AN\\tEUR_MALE_AC\\tEUR_MALE_AF\\tEUR_MALE_N_BI_GENOS\\tEUR_MALE_N_HOMREF\\tEUR_MALE_N_HET\\tEUR_MALE_N_HOMALT\\tEUR_MALE_FREQ_HOMREF\\tEUR_MALE_FREQ_HET\\tEUR_MALE_FREQ_HOMALT\\tEUR_MALE_N_HEMIREF\\tEUR_MALE_N_HEMIALT\\tEUR_MALE_FREQ_HEMIREF\\tEUR_MALE_FREQ_HEMIALT\\tEUR_FEMALE_AN\\tEUR_FEMALE_AC\\tEUR_FEMALE_AF\\tEUR_FEMALE_N_BI_GENOS\\tEUR_FEMALE_N_HOMREF\\tEUR_FEMALE_N_HET\\tEUR_FEMALE_N_HOMALT\\tEUR_FEMALE_FREQ_HOMREF\\tEUR_FEMALE_FREQ_HET\\tEUR_FEMALE_FREQ_HOMALT\\tOTH_AN\\tOTH_AC\\tOTH_AF\\tOTH_N_BI_GENOS\\tOTH_N_HOMREF\\tOTH_N_HET\\tOTH_N_HOMALT\\tOTH_FREQ_HOMREF\\tOTH_FREQ_HET\\tOTH_FREQ_HOMALT\\tOTH_MALE_AN\\tOTH_MALE_AC\\tOTH_MALE_AF\\tOTH_MALE_N_BI_GENOS\\tOTH_MALE_N_HOMREF\\tOTH_MALE_N_HET\\tOTH_MALE_N_HOMALT\\tOTH_MALE_FREQ_HOMREF\\tOTH_MALE_FREQ_HET\\tOTH_MALE_FREQ_HOMALT\\tOTH_MALE_N_HEMIREF\\tOTH_MALE_N_HEMIALT\\tOTH_MALE_FREQ_HEMIREF\\tOTH_MALE_FREQ_HEMIALT\\tOTH_FEMALE_AN\\tOTH_FEMALE_AC\\tOTH_FEMALE_AF\\tOTH_FEMALE_N_BI_GENOS\\tOTH_FEMALE_N_HOMREF\\tOTH_FEMALE_N_HET\\tOTH_FEMALE_N_HOMALT\\tOTH_FEMALE_FREQ_HOMREF\\tOTH_FEMALE_FREQ_HET\\tOTH_FEMALE_FREQ_HOMALT\\tFILTER\\n\" +\n            \"1\\t20999\\tInvalid-26000\\tgnomAD-SV_v2.1_DEL_1_1\\tDEL\\tdepth\\tFalse\\tNA\\tNA\\tNA\\tNA\\t26000\\tRD\\tFalse\\tFalse\\tFalse\\tNA\\tNA\\tNA\\tNA\\tTrue\\tNA\\tNA\\tNA\\tNA\\tOR4F5\\tNA\\tNA\\tNA\\tNA\\t5000\\tDEL\\tNA\\tFalse\\tFalse\\t8586\\t138\\t0.01607299968600273\\t4293\\t4155\\t138\\t0\\t0.9678549766540527\\t0.03214539960026741\\t0.0\\t4302\\t69\\t0.01603900082409382\\t2151\\t2082\\t69\\t0\\t0.9679219722747803\\t0.0320780985057354\\t0.0\\tNA\\tNA\\tNA\\tNA\\tFalse\\t4272\\t68\\t0.015917999669909477\\t2136\\t2068\\t68\\t0\\t0.9681649804115295\\t0.031835198402404785\\t0.0\\t0.07199999690055847\\t3718\\t27\\t0.007261999882757664\\t1859\\t1832\\t27\\t0\\t0.985476016998291\\t0.014523900113999844\\t0.0\\t2016\\t16\\t0.007937000133097172\\t1008\\t992\\t16\\t0\\t0.9841269850730896\\t0.015873000025749207\\t0.0\\tNA\\tNA\\tNA\\tNA\\t1702\\t11\\t0.00646300008520484\\t851\\t840\\t11\\t0\\t0.9870740175247192\\t0.01292600017040968\\t0.0\\t684\\t8\\t0.011695999652147293\\t342\\t334\\t8\\t0\\t0.9766079783439636\\t0.02339180000126362\\t0.0\\t326\\t2\\t0.006134999915957451\\t163\\t161\\t2\\t0\\t0.9877300262451172\\t0.012269900180399418\\t0.0\\tNA\\tNA\\tNA\\tNA\\t358\\t6\\t0.016759999096393585\\t179\\t173\\t6\\t0\\t0.966480016708374\\t0.033519599586725235\\t0.0\\t750\\t54\\t0.07199999690055847\\t375\\t321\\t54\\t0\\t0.8560000061988831\\t0.14399999380111694\\t0.0\\t418\\t28\\t0.06698600202798843\\t209\\t181\\t28\\t0\\t0.8660290241241455\\t0.13397100567817688\\t0.0\\tNA\\tNA\\tNA\\tNA\\t328\\t25\\t0.07621999830007553\\t164\\t139\\t25\\t0\\t0.8475610017776489\\t0.15243899822235107\\t0.0\\t3346\\t48\\t0.014344999566674232\\t1673\\t1625\\t48\\t0\\t0.9713090062141418\\t0.028690999373793602\\t0.0\\t1498\\t22\\t0.014685999602079391\\t749\\t727\\t22\\t0\\t0.9706270098686218\\t0.029372500255703926\\t0.0\\tNA\\tNA\\tNA\\tNA\\t1840\\t26\\t0.01413000002503395\\t920\\t894\\t26\\t0\\t0.9717389941215515\\t0.02826089970767498\\t0.0\\t88\\t1\\t0.011363999918103218\\t44\\t43\\t1\\t0\\t0.9772729873657227\\t0.022727299481630325\\t0.0\\t44\\t1\\t0.0227269995957613\\t22\\t21\\t1\\t0\\t0.9545450210571289\\t0.04545449838042259\\t0.0\\tNA\\tNA\\tNA\\tNA\\t44\\t0\\t0.0\\t22\\t22\\t0\\t0\\t1.0\\t0.0\\t0.0\\tUNSTABLE_AF_PCRMINUS,LOW_CALL_RATE\\n\";\n\n        using var reader         = new StreamReader(GetStreamData(bedData));\n        using var gnomadSvParser = new GnomadSvBedParser(reader, ChromosomeUtilities.RefNameToChromosome);\n\n        Assert.Throws<InvalidDataException>(() => gnomadSvParser.GetItems().ToList());\n    }\n\n    [Fact]\n    public void TestInvalidSvType()\n    {\n        const string bedData =\n            \"#chrom\\tstart\\tend\\tname\\tsvtype\\tALGORITHMS\\tBOTHSIDES_SUPPORT\\tCHR2\\tCPX_INTERVALS\\tCPX_TYPE\\tEND2\\tEND\\tEVIDENCE\\tHIGH_SR_BACKGROUND\\tPCRPLUS_DEPLETED\\tPESR_GT_OVERDISPERSION\\tPOS2\\tPROTEIN_CODING__COPY_GAIN\\tPROTEIN_CODING__DUP_LOF\\tPROTEIN_CODING__DUP_PARTIAL\\tPROTEIN_CODING__INTERGENIC\\tPROTEIN_CODING__INTRONIC\\tPROTEIN_CODING__INV_SPAN\\tPROTEIN_CODING__LOF\\tPROTEIN_CODING__MSV_EXON_OVR\\tPROTEIN_CODING__NEAREST_TSS\\tPROTEIN_CODING__PROMOTER\\tPROTEIN_CODING__UTR\\tSOURCE\\tSTRANDS\\tSVLEN\\tSVTYPE\\tUNRESOLVED_TYPE\\tUNSTABLE_AF_PCRPLUS\\tVARIABLE_ACROSS_BATCHES\\tAN\\tAC\\tAF\\tN_BI_GENOS\\tN_HOMREF\\tN_HET\\tN_HOMALT\\tFREQ_HOMREF\\tFREQ_HET\\tFREQ_HOMALT\\tMALE_AN\\tMALE_AC\\tMALE_AF\\tMALE_N_BI_GENOS\\tMALE_N_HOMREF\\tMALE_N_HET\\tMALE_N_HOMALT\\tMALE_FREQ_HOMREF\\tMALE_FREQ_HET\\tMALE_FREQ_HOMALT\\tMALE_N_HEMIREF\\tMALE_N_HEMIALT\\tMALE_FREQ_HEMIREF\\tMALE_FREQ_HEMIALT\\tPAR\\tFEMALE_AN\\tFEMALE_AC\\tFEMALE_AF\\tFEMALE_N_BI_GENOS\\tFEMALE_N_HOMREF\\tFEMALE_N_HET\\tFEMALE_N_HOMALT\\tFEMALE_FREQ_HOMREF\\tFEMALE_FREQ_HET\\tFEMALE_FREQ_HOMALT\\tPOPMAX_AF\\tAFR_AN\\tAFR_AC\\tAFR_AF\\tAFR_N_BI_GENOS\\tAFR_N_HOMREF\\tAFR_N_HET\\tAFR_N_HOMALT\\tAFR_FREQ_HOMREF\\tAFR_FREQ_HET\\tAFR_FREQ_HOMALT\\tAFR_MALE_AN\\tAFR_MALE_AC\\tAFR_MALE_AF\\tAFR_MALE_N_BI_GENOS\\tAFR_MALE_N_HOMREF\\tAFR_MALE_N_HET\\tAFR_MALE_N_HOMALT\\tAFR_MALE_FREQ_HOMREF\\tAFR_MALE_FREQ_HET\\tAFR_MALE_FREQ_HOMALT\\tAFR_MALE_N_HEMIREF\\tAFR_MALE_N_HEMIALT\\tAFR_MALE_FREQ_HEMIREF\\tAFR_MALE_FREQ_HEMIALT\\tAFR_FEMALE_AN\\tAFR_FEMALE_AC\\tAFR_FEMALE_AF\\tAFR_FEMALE_N_BI_GENOS\\tAFR_FEMALE_N_HOMREF\\tAFR_FEMALE_N_HET\\tAFR_FEMALE_N_HOMALT\\tAFR_FEMALE_FREQ_HOMREF\\tAFR_FEMALE_FREQ_HET\\tAFR_FEMALE_FREQ_HOMALT\\tAMR_AN\\tAMR_AC\\tAMR_AF\\tAMR_N_BI_GENOS\\tAMR_N_HOMREF\\tAMR_N_HET\\tAMR_N_HOMALT\\tAMR_FREQ_HOMREF\\tAMR_FREQ_HET\\tAMR_FREQ_HOMALT\\tAMR_MALE_AN\\tAMR_MALE_AC\\tAMR_MALE_AF\\tAMR_MALE_N_BI_GENOS\\tAMR_MALE_N_HOMREF\\tAMR_MALE_N_HET\\tAMR_MALE_N_HOMALT\\tAMR_MALE_FREQ_HOMREF\\tAMR_MALE_FREQ_HET\\tAMR_MALE_FREQ_HOMALT\\tAMR_MALE_N_HEMIREF\\tAMR_MALE_N_HEMIALT\\tAMR_MALE_FREQ_HEMIREF\\tAMR_MALE_FREQ_HEMIALT\\tAMR_FEMALE_AN\\tAMR_FEMALE_AC\\tAMR_FEMALE_AF\\tAMR_FEMALE_N_BI_GENOS\\tAMR_FEMALE_N_HOMREF\\tAMR_FEMALE_N_HET\\tAMR_FEMALE_N_HOMALT\\tAMR_FEMALE_FREQ_HOMREF\\tAMR_FEMALE_FREQ_HET\\tAMR_FEMALE_FREQ_HOMALT\\tEAS_AN\\tEAS_AC\\tEAS_AF\\tEAS_N_BI_GENOS\\tEAS_N_HOMREF\\tEAS_N_HET\\tEAS_N_HOMALT\\tEAS_FREQ_HOMREF\\tEAS_FREQ_HET\\tEAS_FREQ_HOMALT\\tEAS_MALE_AN\\tEAS_MALE_AC\\tEAS_MALE_AF\\tEAS_MALE_N_BI_GENOS\\tEAS_MALE_N_HOMREF\\tEAS_MALE_N_HET\\tEAS_MALE_N_HOMALT\\tEAS_MALE_FREQ_HOMREF\\tEAS_MALE_FREQ_HET\\tEAS_MALE_FREQ_HOMALT\\tEAS_MALE_N_HEMIREF\\tEAS_MALE_N_HEMIALT\\tEAS_MALE_FREQ_HEMIREF\\tEAS_MALE_FREQ_HEMIALT\\tEAS_FEMALE_AN\\tEAS_FEMALE_AC\\tEAS_FEMALE_AF\\tEAS_FEMALE_N_BI_GENOS\\tEAS_FEMALE_N_HOMREF\\tEAS_FEMALE_N_HET\\tEAS_FEMALE_N_HOMALT\\tEAS_FEMALE_FREQ_HOMREF\\tEAS_FEMALE_FREQ_HET\\tEAS_FEMALE_FREQ_HOMALT\\tEUR_AN\\tEUR_AC\\tEUR_AF\\tEUR_N_BI_GENOS\\tEUR_N_HOMREF\\tEUR_N_HET\\tEUR_N_HOMALT\\tEUR_FREQ_HOMREF\\tEUR_FREQ_HET\\tEUR_FREQ_HOMALT\\tEUR_MALE_AN\\tEUR_MALE_AC\\tEUR_MALE_AF\\tEUR_MALE_N_BI_GENOS\\tEUR_MALE_N_HOMREF\\tEUR_MALE_N_HET\\tEUR_MALE_N_HOMALT\\tEUR_MALE_FREQ_HOMREF\\tEUR_MALE_FREQ_HET\\tEUR_MALE_FREQ_HOMALT\\tEUR_MALE_N_HEMIREF\\tEUR_MALE_N_HEMIALT\\tEUR_MALE_FREQ_HEMIREF\\tEUR_MALE_FREQ_HEMIALT\\tEUR_FEMALE_AN\\tEUR_FEMALE_AC\\tEUR_FEMALE_AF\\tEUR_FEMALE_N_BI_GENOS\\tEUR_FEMALE_N_HOMREF\\tEUR_FEMALE_N_HET\\tEUR_FEMALE_N_HOMALT\\tEUR_FEMALE_FREQ_HOMREF\\tEUR_FEMALE_FREQ_HET\\tEUR_FEMALE_FREQ_HOMALT\\tOTH_AN\\tOTH_AC\\tOTH_AF\\tOTH_N_BI_GENOS\\tOTH_N_HOMREF\\tOTH_N_HET\\tOTH_N_HOMALT\\tOTH_FREQ_HOMREF\\tOTH_FREQ_HET\\tOTH_FREQ_HOMALT\\tOTH_MALE_AN\\tOTH_MALE_AC\\tOTH_MALE_AF\\tOTH_MALE_N_BI_GENOS\\tOTH_MALE_N_HOMREF\\tOTH_MALE_N_HET\\tOTH_MALE_N_HOMALT\\tOTH_MALE_FREQ_HOMREF\\tOTH_MALE_FREQ_HET\\tOTH_MALE_FREQ_HOMALT\\tOTH_MALE_N_HEMIREF\\tOTH_MALE_N_HEMIALT\\tOTH_MALE_FREQ_HEMIREF\\tOTH_MALE_FREQ_HEMIALT\\tOTH_FEMALE_AN\\tOTH_FEMALE_AC\\tOTH_FEMALE_AF\\tOTH_FEMALE_N_BI_GENOS\\tOTH_FEMALE_N_HOMREF\\tOTH_FEMALE_N_HET\\tOTH_FEMALE_N_HOMALT\\tOTH_FEMALE_FREQ_HOMREF\\tOTH_FEMALE_FREQ_HET\\tOTH_FEMALE_FREQ_HOMALT\\tFILTER\\n\" +\n            \"1\\t20999\\t26000\\tgnomAD-SV_v2.1_DEL_1_1\\tINVALID-DEL\\tdepth\\tFalse\\tNA\\tNA\\tNA\\tNA\\t26000\\tRD\\tFalse\\tFalse\\tFalse\\tNA\\tNA\\tNA\\tNA\\tTrue\\tNA\\tNA\\tNA\\tNA\\tOR4F5\\tNA\\tNA\\tNA\\tNA\\t5000\\tDEL\\tNA\\tFalse\\tFalse\\t8586\\t138\\t0.01607299968600273\\t4293\\t4155\\t138\\t0\\t0.9678549766540527\\t0.03214539960026741\\t0.0\\t4302\\t69\\t0.01603900082409382\\t2151\\t2082\\t69\\t0\\t0.9679219722747803\\t0.0320780985057354\\t0.0\\tNA\\tNA\\tNA\\tNA\\tFalse\\t4272\\t68\\t0.015917999669909477\\t2136\\t2068\\t68\\t0\\t0.9681649804115295\\t0.031835198402404785\\t0.0\\t0.07199999690055847\\t3718\\t27\\t0.007261999882757664\\t1859\\t1832\\t27\\t0\\t0.985476016998291\\t0.014523900113999844\\t0.0\\t2016\\t16\\t0.007937000133097172\\t1008\\t992\\t16\\t0\\t0.9841269850730896\\t0.015873000025749207\\t0.0\\tNA\\tNA\\tNA\\tNA\\t1702\\t11\\t0.00646300008520484\\t851\\t840\\t11\\t0\\t0.9870740175247192\\t0.01292600017040968\\t0.0\\t684\\t8\\t0.011695999652147293\\t342\\t334\\t8\\t0\\t0.9766079783439636\\t0.02339180000126362\\t0.0\\t326\\t2\\t0.006134999915957451\\t163\\t161\\t2\\t0\\t0.9877300262451172\\t0.012269900180399418\\t0.0\\tNA\\tNA\\tNA\\tNA\\t358\\t6\\t0.016759999096393585\\t179\\t173\\t6\\t0\\t0.966480016708374\\t0.033519599586725235\\t0.0\\t750\\t54\\t0.07199999690055847\\t375\\t321\\t54\\t0\\t0.8560000061988831\\t0.14399999380111694\\t0.0\\t418\\t28\\t0.06698600202798843\\t209\\t181\\t28\\t0\\t0.8660290241241455\\t0.13397100567817688\\t0.0\\tNA\\tNA\\tNA\\tNA\\t328\\t25\\t0.07621999830007553\\t164\\t139\\t25\\t0\\t0.8475610017776489\\t0.15243899822235107\\t0.0\\t3346\\t48\\t0.014344999566674232\\t1673\\t1625\\t48\\t0\\t0.9713090062141418\\t0.028690999373793602\\t0.0\\t1498\\t22\\t0.014685999602079391\\t749\\t727\\t22\\t0\\t0.9706270098686218\\t0.029372500255703926\\t0.0\\tNA\\tNA\\tNA\\tNA\\t1840\\t26\\t0.01413000002503395\\t920\\t894\\t26\\t0\\t0.9717389941215515\\t0.02826089970767498\\t0.0\\t88\\t1\\t0.011363999918103218\\t44\\t43\\t1\\t0\\t0.9772729873657227\\t0.022727299481630325\\t0.0\\t44\\t1\\t0.0227269995957613\\t22\\t21\\t1\\t0\\t0.9545450210571289\\t0.04545449838042259\\t0.0\\tNA\\tNA\\tNA\\tNA\\t44\\t0\\t0.0\\t22\\t22\\t0\\t0\\t1.0\\t0.0\\t0.0\\tUNSTABLE_AF_PCRMINUS,LOW_CALL_RATE\\n\";\n\n        using var reader         = new StreamReader(GetStreamData(bedData));\n        using var gnomadSvParser = new GnomadSvBedParser(reader, ChromosomeUtilities.RefNameToChromosome);\n\n        Assert.Throws<InvalidDataException>(() => gnomadSvParser.GetItems().ToList());\n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/gnomAD/GnomadSvItemTests.cs",
    "content": "using SAUtils.DataStructures;\nusing UnitTests.TestUtilities;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.gnomAD;\n\npublic sealed class GnomadSvItemTests\n{\n    [Fact]\n    public void TestGnomadSvItem()\n    {\n        var gnomadSvItem = new GnomadSvItem(ChromosomeUtilities.Chr1, \"\");\n\n        Assert.Equal(\"\",                                                                       gnomadSvItem.InputLine);\n        Assert.Equal(\"\\\"chromosome\\\":\\\"1\\\",\\\"begin\\\":0,\\\"end\\\":0,\\\"variantType\\\":\\\"unknown\\\"\", gnomadSvItem.GetJsonString());\n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/gnomAD/GnomadSvTsvParserTests.cs",
    "content": "using System.Collections.Generic;\nusing System.IO;\nusing System.Linq;\nusing SAUtils.DataStructures;\nusing SAUtils.gnomAD;\nusing UnitTests.TestUtilities;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.gnomAD;\n\npublic sealed class GnomadSvTsvParserTests\n{\n    private static Stream GetStreamData(string dataString)\n    {\n        var stream = new MemoryStream();\n        var writer = new StreamWriter(stream);\n        writer.Write(dataString);\n        writer.Flush();\n        stream.Position = 0;\n        return stream;\n    }\n\n    [Fact]\n    public void TestGnomadSvTsvParser()\n    {\n        const string tsvData =\n            \"#variant_call_accession\\tvariant_call_id\\tvariant_call_type\\texperiment_id\\tsample_id\\tsampleset_id\\tassembly\\tchrcontig\\touter_start\\tstart\\tinner_start\\tinner_stop\\tstop\\touter_stop\\tinsertion_length\\tvariant_region_acc\\tvariant_region_id\\tcopy_number\\tdescription\\tvalidation\\tzygosity\\torigin\\tphenotype\\thgvs_name\\tplacement_method\\tplacement_rank\\tplacements_per_assembly\\tremap_alignment\\tremap_best_within_cluster\\tremap_coverage\\tremap_diff_chr\\tremap_failure_code\\tallele_count\\tallele_frequency\\tallele_number\\n\" +\n            \"nssv15777856\\tgnomAD-SV_v2.1_CNV_10_564_alt_1\\tcopy number variation\\t1\\t\\t1\\tGRCh38.p12\\t10\\t\\t\\t736806\\t\\t\\t738184\\t\\t\\tnsv4039284\\t10__782746___784124______GRCh37.p13_copy_number_variation\\t0\\t\\t\\t\\t\\t\\t\\tRemapped\\tBestAvailable\\tSingle\\tFirst Pass\\t0\\t1\\t\\t\\tAC=21,AFR_AC=10,AMR_AC=9,EAS_AC=0,EUR_AC=2,OTH_AC=0\\tAF=0.038889,AFR_AF=0.044643,AMR_AF=0.03913,EAS_AF=0,EUR_AF=0.023256,OTH_AF=0\\tAN=540,AFR_AN=224,AMR_AN=230,EAS_AN=0,EUR_AN=86,OTH_AN=0\\n\" +\n            \"nssv15777857\\tgnomAD-SV_v2.1_CNV_10_564_alt_10\\talu insertion\\t1\\t\\t1\\tGRCh38.p12\\t10\\t\\t\\t736806\\t\\t\\t738184\\t\\t\\tnsv4039284\\t10__782746___784124______GRCh37.p13_copy_number_variation\\t9\\t\\t\\t\\t\\t\\t\\tRemapped\\tBestAvailable\\tSingle\\tFirst Pass\\t0\\t1\\t\\t\\tAC=0,AFR_AC=0,AMR_AC=0,EAS_AC=0,EUR_AC=0,OTH_AC=0\\tAF=0,AFR_AF=0,AMR_AF=0,EAS_AF=0,EUR_AF=0,OTH_AF=0\\tAN=540,AFR_AN=224,AMR_AN=230,EAS_AN=0,EUR_AN=86,OTH_AN=0\\n\" +\n            \"nssv15777858\\tgnomAD-SV_v2.1_CNV_10_564_alt_11\\tdeletion\\t1\\t\\t1\\tGRCh38.p12\\t10\\t\\t\\t736806\\t\\t\\t738184\\t\\t\\tnsv4039284\\t10__782746___784124______GRCh37.p13_copy_number_variation\\t10\\t\\t\\t\\t\\t\\t\\tRemapped\\tBestAvailable\\tSingle\\tFirst Pass\\t0\\t1\\t\\t\\tAC=0,AFR_AC=0,AMR_AC=0,EAS_AC=0,EUR_AC=0,OTH_AC=0\\tAF=0,AFR_AF=0,AMR_AF=0,EAS_AF=0,EUR_AF=0,OTH_AF=0\\tAN=540,AFR_AN=224,AMR_AN=230,EAS_AN=0,EUR_AN=86,OTH_AN=0\\n\" +\n            \"nssv15982321\\tgnomAD-SV_v2.1_INS_11_75807\\tinsertion\\t1\\t\\t1\\tGRCh38.p12\\t11\\t\\t\\t11946244\\t\\t\\t11946244\\t\\t58\\tnsv4549918\\t11__11967791___11967792______GRCh37.p13_insertion\\t\\t\\t\\t\\t\\t\\tNC_000011.10:g.11946244_11946245ins58\\tRemapped\\tBestAvailable\\tSingle\\tFirst Pass\\t0\\t1\\t\\t\\tAC=1,AFR_AC=0,AMR_AC=1,EAS_AC=0,EUR_AC=0,OTH_AC=0\\tAF=4.6e-05,AFR_AF=0,AMR_AF=0.000518,EAS_AF=0,EUR_AF=0,OTH_AF=0\\tAN=21694,AFR_AN=9534,AMR_AN=1930,EAS_AN=2416,EUR_AN=7624,OTH_AN=190\\n\";\n        \n        using var reader         = new StreamReader(GetStreamData(tsvData));\n        using var gnomadSvParser = new GnomadSvTsvParser(reader, ChromosomeUtilities.RefNameToChromosome);\n\n        List<GnomadSvItem> svItemList = gnomadSvParser.GetItems().ToList();\n\n        Assert.Equal(4, svItemList.Count);\n\n        Assert.Equal(\n            \"\\\"chromosome\\\":\\\"10\\\",\\\"begin\\\":736807,\\\"end\\\":738184,\\\"variantId\\\":\\\"gnomAD-SV_v2.1_CNV_10_564_alt_1\\\",\\\"variantType\\\":\\\"copy_number_variation\\\",\\\"allAf\\\":0.038889,\\\"afrAf\\\":0.044643,\\\"amrAf\\\":0.03913,\\\"easAf\\\":0,\\\"eurAf\\\":0.023256,\\\"othAf\\\":0,\\\"allAc\\\":21,\\\"afrAc\\\":10,\\\"amrAc\\\":9,\\\"easAc\\\":0,\\\"eurAc\\\":2,\\\"othAc\\\":0,\\\"allAn\\\":540,\\\"afrAn\\\":224,\\\"amrAn\\\":230,\\\"easAn\\\":0,\\\"eurAn\\\":86,\\\"othAn\\\":0\",\n            svItemList[0].GetJsonString()\n        );\n        Assert.Equal(\n            \"\\\"chromosome\\\":\\\"10\\\",\\\"begin\\\":736807,\\\"end\\\":738184,\\\"variantId\\\":\\\"gnomAD-SV_v2.1_CNV_10_564_alt_10\\\",\\\"variantType\\\":\\\"mobile_element_insertion\\\",\\\"allAf\\\":0,\\\"afrAf\\\":0,\\\"amrAf\\\":0,\\\"easAf\\\":0,\\\"eurAf\\\":0,\\\"othAf\\\":0,\\\"allAc\\\":0,\\\"afrAc\\\":0,\\\"amrAc\\\":0,\\\"easAc\\\":0,\\\"eurAc\\\":0,\\\"othAc\\\":0,\\\"allAn\\\":540,\\\"afrAn\\\":224,\\\"amrAn\\\":230,\\\"easAn\\\":0,\\\"eurAn\\\":86,\\\"othAn\\\":0\",\n            svItemList[1].GetJsonString()\n        );\n        Assert.Equal(\n            \"\\\"chromosome\\\":\\\"10\\\",\\\"begin\\\":736807,\\\"end\\\":738184,\\\"variantId\\\":\\\"gnomAD-SV_v2.1_CNV_10_564_alt_11\\\",\\\"variantType\\\":\\\"deletion\\\",\\\"allAf\\\":0,\\\"afrAf\\\":0,\\\"amrAf\\\":0,\\\"easAf\\\":0,\\\"eurAf\\\":0,\\\"othAf\\\":0,\\\"allAc\\\":0,\\\"afrAc\\\":0,\\\"amrAc\\\":0,\\\"easAc\\\":0,\\\"eurAc\\\":0,\\\"othAc\\\":0,\\\"allAn\\\":540,\\\"afrAn\\\":224,\\\"amrAn\\\":230,\\\"easAn\\\":0,\\\"eurAn\\\":86,\\\"othAn\\\":0\",\n            svItemList[2].GetJsonString()\n        );\n        Assert.Equal(\n            \"\\\"chromosome\\\":\\\"11\\\",\\\"begin\\\":11946245,\\\"end\\\":11946244,\\\"variantId\\\":\\\"gnomAD-SV_v2.1_INS_11_75807\\\",\\\"variantType\\\":\\\"insertion\\\",\\\"allAf\\\":0.000046,\\\"afrAf\\\":0,\\\"amrAf\\\":0.000518,\\\"easAf\\\":0,\\\"eurAf\\\":0,\\\"othAf\\\":0,\\\"allAc\\\":1,\\\"afrAc\\\":0,\\\"amrAc\\\":1,\\\"easAc\\\":0,\\\"eurAc\\\":0,\\\"othAc\\\":0,\\\"allAn\\\":21694,\\\"afrAn\\\":9534,\\\"amrAn\\\":1930,\\\"easAn\\\":2416,\\\"eurAn\\\":7624,\\\"othAn\\\":190\",\n            svItemList[3].GetJsonString()\n        );\n    }\n\n    [Fact]\n    public void TestUnknownChromosome()\n    {\n        const string tsvData =\n            \"#variant_call_accession\\tvariant_call_id\\tvariant_call_type\\texperiment_id\\tsample_id\\tsampleset_id\\tassembly\\tchrcontig\\touter_start\\tstart\\tinner_start\\tinner_stop\\tstop\\touter_stop\\tinsertion_length\\tvariant_region_acc\\tvariant_region_id\\tcopy_number\\tdescription\\tvalidation\\tzygosity\\torigin\\tphenotype\\thgvs_name\\tplacement_method\\tplacement_rank\\tplacements_per_assembly\\tremap_alignment\\tremap_best_within_cluster\\tremap_coverage\\tremap_diff_chr\\tremap_failure_code\\tallele_count\\tallele_frequency\\tallele_number\\n\" +\n            \"nssv15777856\\tgnomAD-SV_v2.1_CNV_10_564_alt_1\\tcopy number variation\\t1\\t\\t1\\tGRCh38.p12\\tINVALID-1\\t\\t\\t736806\\t\\t\\t738184\\t\\t\\tnsv4039284\\t10__782746___784124______GRCh37.p13_copy_number_variation\\t0\\t\\t\\t\\t\\t\\t\\tRemapped\\tBestAvailable\\tSingle\\tFirst Pass\\t0\\t1\\t\\t\\tAC=21,AFR_AC=10,AMR_AC=9,EAS_AC=0,EUR_AC=2,OTH_AC=0\\tAF=0.038889,AFR_AF=0.044643,AMR_AF=0.03913,EAS_AF=0,EUR_AF=0.023256,OTH_AF=0\\tAN=540,AFR_AN=224,AMR_AN=230,EAS_AN=0,EUR_AN=86,OTH_AN=0\\n\" +\n            \"nssv15777857\\tgnomAD-SV_v2.1_CNV_10_564_alt_10\\tduplication\\t1\\t\\t1\\tGRCh38.p12\\t10\\t\\t\\t736806\\t\\t\\t738184\\t\\t\\tnsv4039284\\t10__782746___784124______GRCh37.p13_copy_number_variation\\t9\\t\\t\\t\\t\\t\\t\\tRemapped\\tBestAvailable\\tSingle\\tFirst Pass\\t0\\t1\\t\\t\\tAC=0,AFR_AC=0,AMR_AC=0,EAS_AC=0,EUR_AC=0,OTH_AC=0\\tAF=0,AFR_AF=0,AMR_AF=0,EAS_AF=0,EUR_AF=0,OTH_AF=0\\tAN=540,AFR_AN=224,AMR_AN=230,EAS_AN=0,EUR_AN=86,OTH_AN=0\\n\";\n\n        using var reader         = new StreamReader(GetStreamData(tsvData));\n        using var gnomadSvParser = new GnomadSvTsvParser(reader, ChromosomeUtilities.RefNameToChromosome);\n\n        List<GnomadSvItem> svItemList = gnomadSvParser.GetItems().ToList();\n\n        Assert.Single(svItemList);\n\n\n        Assert.Equal(\n            \"\\\"chromosome\\\":\\\"10\\\",\\\"begin\\\":736807,\\\"end\\\":738184,\\\"variantId\\\":\\\"gnomAD-SV_v2.1_CNV_10_564_alt_10\\\",\\\"variantType\\\":\\\"duplication\\\",\\\"allAf\\\":0,\\\"afrAf\\\":0,\\\"amrAf\\\":0,\\\"easAf\\\":0,\\\"eurAf\\\":0,\\\"othAf\\\":0,\\\"allAc\\\":0,\\\"afrAc\\\":0,\\\"amrAc\\\":0,\\\"easAc\\\":0,\\\"eurAc\\\":0,\\\"othAc\\\":0,\\\"allAn\\\":540,\\\"afrAn\\\":224,\\\"amrAn\\\":230,\\\"easAn\\\":0,\\\"eurAn\\\":86,\\\"othAn\\\":0\",\n            svItemList[0].GetJsonString()\n        );\n    }\n\n    [Fact]\n    public void TestInvalidStart()\n    {\n        const string tsvData =\n            \"#variant_call_accession\\tvariant_call_id\\tvariant_call_type\\texperiment_id\\tsample_id\\tsampleset_id\\tassembly\\tchrcontig\\touter_start\\tstart\\tinner_start\\tinner_stop\\tstop\\touter_stop\\tinsertion_length\\tvariant_region_acc\\tvariant_region_id\\tcopy_number\\tdescription\\tvalidation\\tzygosity\\torigin\\tphenotype\\thgvs_name\\tplacement_method\\tplacement_rank\\tplacements_per_assembly\\tremap_alignment\\tremap_best_within_cluster\\tremap_coverage\\tremap_diff_chr\\tremap_failure_code\\tallele_count\\tallele_frequency\\tallele_number\\n\" +            \"nssv15777856\\tgnomAD-SV_v2.1_CNV_10_564_alt_1\\tcopy number variation\\t1\\t\\t1\\tGRCh38.p12\\t10\\t\\t\\tInvalid-736806\\t\\t\\t738184\\t\\t\\tnsv4039284\\t10__782746___784124______GRCh37.p13_copy_number_variation\\t0\\t\\t\\t\\t\\t\\t\\tRemapped\\tBestAvailable\\tSingle\\tFirst Pass\\t0\\t1\\t\\t\\tAC=21,AFR_AC=10,AMR_AC=9,EAS_AC=0,EUR_AC=2,OTH_AC=0\\tAF=0.038889,AFR_AF=0.044643,AMR_AF=0.03913,EAS_AF=0,EUR_AF=0.023256,OTH_AF=0\\tAN=540,AFR_AN=224,AMR_AN=230,EAS_AN=0,EUR_AN=86,OTH_AN=0\\n\";\n\n        using var reader         = new StreamReader(GetStreamData(tsvData));\n        using var gnomadSvParser = new GnomadSvTsvParser(reader, ChromosomeUtilities.RefNameToChromosome);\n\n        Assert.Throws<InvalidDataException>(() => gnomadSvParser.GetItems().ToList());\n    }\n\n    [Fact]\n    public void TestInvalidEnd()\n    {\n        const string tsvData =\n            \"#variant_call_accession\\tvariant_call_id\\tvariant_call_type\\texperiment_id\\tsample_id\\tsampleset_id\\tassembly\\tchrcontig\\touter_start\\tstart\\tinner_start\\tinner_stop\\tstop\\touter_stop\\tinsertion_length\\tvariant_region_acc\\tvariant_region_id\\tcopy_number\\tdescription\\tvalidation\\tzygosity\\torigin\\tphenotype\\thgvs_name\\tplacement_method\\tplacement_rank\\tplacements_per_assembly\\tremap_alignment\\tremap_best_within_cluster\\tremap_coverage\\tremap_diff_chr\\tremap_failure_code\\tallele_count\\tallele_frequency\\tallele_number\\n\" +            \"nssv15777856\\tgnomAD-SV_v2.1_CNV_10_564_alt_1\\tcopy number variation\\t1\\t\\t1\\tGRCh38.p12\\t10\\t\\t\\t736806\\t\\t\\tInvalid-738184\\t\\t\\tnsv4039284\\t10__782746___784124______GRCh37.p13_copy_number_variation\\t0\\t\\t\\t\\t\\t\\t\\tRemapped\\tBestAvailable\\tSingle\\tFirst Pass\\t0\\t1\\t\\t\\tAC=21,AFR_AC=10,AMR_AC=9,EAS_AC=0,EUR_AC=2,OTH_AC=0\\tAF=0.038889,AFR_AF=0.044643,AMR_AF=0.03913,EAS_AF=0,EUR_AF=0.023256,OTH_AF=0\\tAN=540,AFR_AN=224,AMR_AN=230,EAS_AN=0,EUR_AN=86,OTH_AN=0\\n\";\n\n        using var reader         = new StreamReader(GetStreamData(tsvData));\n        using var gnomadSvParser = new GnomadSvTsvParser(reader, ChromosomeUtilities.RefNameToChromosome);\n\n        Assert.Throws<InvalidDataException>(() => gnomadSvParser.GetItems().ToList());\n    }\n\n    [Fact]\n    public void TestInvalidSvType()\n    {\n        const string tsvData =\n            \"#variant_call_accession\\tvariant_call_id\\tvariant_call_type\\texperiment_id\\tsample_id\\tsampleset_id\\tassembly\\tchrcontig\\touter_start\\tstart\\tinner_start\\tinner_stop\\tstop\\touter_stop\\tinsertion_length\\tvariant_region_acc\\tvariant_region_id\\tcopy_number\\tdescription\\tvalidation\\tzygosity\\torigin\\tphenotype\\thgvs_name\\tplacement_method\\tplacement_rank\\tplacements_per_assembly\\tremap_alignment\\tremap_best_within_cluster\\tremap_coverage\\tremap_diff_chr\\tremap_failure_code\\tallele_count\\tallele_frequency\\tallele_number\\n\" +\n            \"nssv15777856\\tgnomAD-SV_v2.1_CNV_10_564_alt_1\\tINVALID copy number variation\\t1\\t\\t1\\tGRCh38.p12\\t10\\t\\t\\t736806\\t\\t\\t738184\\t\\t\\tnsv4039284\\t10__782746___784124______GRCh37.p13_copy_number_variation\\t0\\t\\t\\t\\t\\t\\t\\tRemapped\\tBestAvailable\\tSingle\\tFirst Pass\\t0\\t1\\t\\t\\tAC=21,AFR_AC=10,AMR_AC=9,EAS_AC=0,EUR_AC=2,OTH_AC=0\\tAF=0.038889,AFR_AF=0.044643,AMR_AF=0.03913,EAS_AF=0,EUR_AF=0.023256,OTH_AF=0\\tAN=540,AFR_AN=224,AMR_AN=230,EAS_AN=0,EUR_AN=86,OTH_AN=0\\n\";\n\n        using var reader         = new StreamReader(GetStreamData(tsvData));\n        using var gnomadSvParser = new GnomadSvTsvParser(reader, ChromosomeUtilities.RefNameToChromosome);\n\n        Assert.Throws<InvalidDataException>(() => gnomadSvParser.GetItems().ToList());\n    }\n}"
  },
  {
    "path": "UnitTests/SAUtils/gnomAD/LcrParserTests.cs",
    "content": "using System.IO;\nusing System.Linq;\nusing System.Text;\nusing Moq;\nusing SAUtils.gnomAD;\nusing UnitTests.TestUtilities;\nusing VariantAnnotation.Interface.Providers;\nusing Xunit;\n\nnamespace UnitTests.SAUtils.gnomAD\n{\n    public class LcrParserTests\n    {\n        private Stream GetGRCh37Stream()\n        {\n            var stream = new MemoryStream();\n            using (var writer = new StreamWriter(stream, Encoding.Default, 512*1024, true))\n            {\n                writer.WriteLine(\"1:1-10000\");\n                writer.WriteLine(\"1:40637-40658\");\n                writer.WriteLine(\"1:77172-77195\");\n            }\n\n            stream.Position = 0;\n            return stream;\n        }\n        \n        private ISequenceProvider GetGRCh37()\n        {\n            var seqProvider = new Mock<ISequenceProvider>();\n\n            seqProvider.Setup(x => x.Sequence.Substring(0, It.IsAny<int>())).\n                Returns(new string('n',500)+new string ('N',500));\n            seqProvider.Setup(x => x.Sequence.Substring(40637-1, It.IsAny<int>())).\n                Returns(new string('A',50) +new string ('C',50));\n            seqProvider.Setup(x => x.Sequence.Substring(77172 -1, It.IsAny<int>())).\n                Returns(new string('T',50) +new string ('G',50));\n\n            seqProvider.SetupGet(x => x.RefNameToChromosome).Returns(\n                ChromosomeUtilities.RefNameToChromosome);\n            return seqProvider.Object;\n        }\n        \n        private Stream GetGRCh38Stream()\n        {\n            var stream = new MemoryStream();\n            using (var writer = new StreamWriter(stream, Encoding.Default, 512 *1024, true))\n            {\n                writer.WriteLine(\"chr1\\t9999\\t10468\");\n                writer.WriteLine(\"chr1\\t30853\\t30959\");\n                writer.WriteLine(\"chr1\\t47317\\t47328\");\n            }\n\n            stream.Position = 0;\n            return stream;\n        }\n\n        private ISequenceProvider GetGRCh38()\n        {\n            var seqProvider = new Mock<ISequenceProvider>();\n\n            seqProvider.Setup(x => x.Sequence.Substring(9999 -1, It.IsAny<int>())).\n                Returns(new string('G',50) +new string ('C',50));\n            seqProvider.Setup(x => x.Sequence.Substring(30853 -1, It.IsAny<int>())).\n                Returns(new string('A',50) +new string ('C',50));\n            seqProvider.Setup(x => x.Sequence.Substring(47317 -1, It.IsAny<int>())).\n                Returns(new string('T',50) +new string ('G',50));\n\n            seqProvider.SetupGet(x => x.RefNameToChromosome).Returns(\n                ChromosomeUtilities.RefNameToChromosome);\n            \n            return seqProvider.Object;\n        }\n\n        [Fact]\n        public void GetGRCh37Lcrs()\n        {\n            var parser = new LcrRegionParser(new StreamReader(GetGRCh37Stream()), GetGRCh37());\n\n            var items = parser.GetItems().ToList();\n            \n            Assert.Equal(2, items.Count);\n            \n        }\n        \n        [Fact]\n        public void GetGRCh38Lcrs()\n        {\n            var parser = new LcrRegionParser(new StreamReader(GetGRCh38Stream()), GetGRCh38());\n\n            var items = parser.GetItems().ToList();\n            \n            Assert.Equal(3, items.Count);\n            \n        }\n    }\n}"
  },
  {
    "path": "UnitTests/SingleAnnotationLambda/SingleConfigTests.cs",
    "content": "﻿using System;\r\nusing Cloud.Messages.Single;\r\nusing ErrorHandling.Exceptions;\r\nusing SingleAnnotationLambda;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.SingleAnnotationLambda\r\n{\r\n    public sealed class SingleConfigTests\r\n    {\r\n        [Fact]\r\n        public void Validate_Success()\r\n        {\r\n            SingleConfig config = GetConfig();\r\n            Exception ex = Record.Exception(() => { config.Validate(); });\r\n            Assert.Null(ex);\r\n        }\r\n\r\n        [Fact]\r\n        public void Validate_NullId_ThrowException()\r\n        {\r\n            SingleConfig config = GetConfig();\r\n            config.id = null;\r\n            Assert.Throws<UserErrorException>(() => config.Validate());\r\n        }\r\n\r\n        [Fact]\r\n        public void Validate_NullGenomeAssembly_ThrowException()\r\n        {\r\n            SingleConfig config = GetConfig();\r\n            config.genomeAssembly = null;\r\n            Assert.Throws<UserErrorException>(() => config.Validate());\r\n        }\r\n\r\n        [Fact]\r\n        public void Validate_NullVariant_ThrowException()\r\n        {\r\n            SingleConfig config = GetConfig();\r\n            config.variant = null;\r\n            Assert.Throws<UserErrorException>(() => config.Validate());\r\n        }\r\n\r\n        private static SingleConfig GetConfig() => new SingleConfig\r\n        {\r\n            id             = \"Test\",\r\n            genomeAssembly = \"Assembly\",\r\n            variant        = new SingleVariant\r\n            {\r\n                chromosome = \"1\",\r\n                position   = 100,\r\n                refAllele  = \"A\",\r\n                altAlleles = new[] { \"T\", \"C\"}\r\n            }\r\n        };\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/SingleAnnotationLambda/SingleVariantTests.cs",
    "content": "﻿using System;\r\nusing System.Linq;\r\nusing Cloud.Messages.Single;\r\nusing ErrorHandling.Exceptions;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.SingleAnnotationLambda\r\n{\r\n    public sealed class SingleVariantTests\r\n    {\r\n        [Fact]\r\n        public void GetVcfFields_AsExpected()\r\n        {\r\n            var variant = new SingleVariant\r\n            {\r\n                chromosome   = \"1\",\r\n                position     = 100,\r\n                refAllele    = \"A\",\r\n                altAlleles   = new[] { \"C\", \"AC\" },\r\n                filters      = new[] { \"LowGQX\", \"NoPassedVariantGTs\" },\r\n                infoField    = \"SNVHPOL=2;MQ=34\",\r\n                formatField  = \"GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL:ME:DQ\",\r\n                sampleFields = new[]\r\n                {\r\n                    \"0|0:15:15:6:4:6,0:4,0:2,0:0:PASS:0,18,170:0:.\", \"0|1:13:0:7:6:6,1:3,0:3,1:0:LowGQX:15,0,147:.:.\",\r\n                    \"0|1:18:0:9:8:8,1:2,0:6,1:0:LowGQX:20,0,156:.:.\"\r\n                },\r\n                sampleNames  = new[] { \"NA12878\", \"NA12891\", \"NA12892\" }\r\n            };\r\n\r\n            string[] vcfFields = variant.GetVcfFields();\r\n            Assert.Equal(12, vcfFields.Length);\r\n            Assert.Equal(\"1\", vcfFields[0]);\r\n            Assert.Equal(\"100\", vcfFields[1]);\r\n            Assert.True(vcfFields.SequenceEqual(new[]\r\n            {\r\n                \"1\", \"100\", \".\", \"A\", \"C,AC\", \".\", \"LowGQX;NoPassedVariantGTs\", \"SNVHPOL=2;MQ=34\",\r\n                \"GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL:ME:DQ\",\r\n                \"0|0:15:15:6:4:6,0:4,0:2,0:0:PASS:0,18,170:0:.\", \"0|1:13:0:7:6:6,1:3,0:3,1:0:LowGQX:15,0,147:.:.\",\r\n                \"0|1:18:0:9:8:8,1:2,0:6,1:0:LowGQX:20,0,156:.:.\"\r\n            }));\r\n        }\r\n\r\n        [Fact]\r\n        public void Validate_Success()\r\n        {\r\n            SingleVariant variant = GetConfig();\r\n            Exception ex = Record.Exception(() => { variant.Validate(); });\r\n            Assert.Null(ex);\r\n        }\r\n\r\n        [Fact]\r\n        public void Validate_NullChromosome_ThrowException()\r\n        {\r\n            SingleVariant variant = GetConfig();\r\n            variant.chromosome = null;\r\n            Assert.Throws<UserErrorException>(() => variant.Validate());\r\n        }\r\n\r\n        [Fact]\r\n        public void Validate_NullPosition_ThrowException()\r\n        {\r\n            SingleVariant variant = GetConfig();\r\n            variant.position = null;\r\n            Assert.Throws<UserErrorException>(() => variant.Validate());\r\n        }\r\n\r\n        [Fact]\r\n        public void Validate_NullReferenceAllele_ThrowException()\r\n        {\r\n            SingleVariant variant = GetConfig();\r\n            variant.refAllele = null;\r\n            Assert.Throws<UserErrorException>(() => variant.Validate());\r\n        }\r\n\r\n        [Fact]\r\n        public void Validate_NullAlternateAlleles_ThrowException()\r\n        {\r\n            SingleVariant variant = GetConfig();\r\n            variant.altAlleles = null;\r\n            Assert.Throws<UserErrorException>(() => variant.Validate());\r\n        }\r\n\r\n        [Fact]\r\n        public void Validate_ZeroAlternateAlleles_ThrowException()\r\n        {\r\n            SingleVariant variant = GetConfig();\r\n            variant.altAlleles = new string[0];\r\n            Assert.Throws<UserErrorException>(() => variant.Validate());\r\n        }\r\n\r\n        [Fact]\r\n        public void Validate_SampleNamesAndSampleFields_NoFormatField_ThrowException()\r\n        {\r\n            SingleVariant variant = GetConfig();\r\n            variant.sampleNames   = new[] {\"Bob\"};\r\n            variant.sampleFields  = new[] { \"0/1\" };\r\n            Assert.Throws<UserErrorException>(() => variant.Validate());\r\n        }\r\n\r\n        [Fact]\r\n        public void Validate_FormatField_NoSampleNamesAndSampleFields_ThrowException()\r\n        {\r\n            SingleVariant variant = GetConfig();\r\n            variant.formatField   = \"GT\";\r\n            Assert.Throws<UserErrorException>(() => variant.Validate());\r\n        }\r\n\r\n        private static SingleVariant GetConfig() => new SingleVariant\r\n        {\r\n            chromosome = \"1\",\r\n            position   = 100,\r\n            refAllele  = \"A\",\r\n            altAlleles = new[] { \"T\", \"C\" }\r\n        };\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/Tabix/BgzfBlockVcfReaderTests.cs",
    "content": "﻿using Compression.FileHandling;\r\nusing IO;\r\nusing Tabix;\r\nusing UnitTests.TestUtilities;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Tabix\r\n{\r\n    public sealed class BgzfBlockVcfReaderTests\r\n    {\r\n        private const    long      FileOffset = 61413;\r\n        private readonly BgzfBlock _block     = new BgzfBlock();\r\n\r\n        [Fact]\r\n        public void FindVariantsInBlock_NoVariants_ReturnFalse()\r\n        {\r\n            using (var stream = FileUtilities.GetReadStream(Resources.TopPath(\"miniHEXA_minimal.vcf.gz\")))\r\n            {\r\n                bool observedResults = BgzfBlockVcfReader.FindVariantsInBlocks(stream, FileOffset, FileOffset, _block,\r\n                    ChromosomeUtilities.Chr15, 1, 71589359);\r\n                Assert.False(observedResults);\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void FindVariantsInBlock_ReturnTrue()\r\n        {\r\n            using (var stream = FileUtilities.GetReadStream(Resources.TopPath(\"miniHEXA_minimal.vcf.gz\")))\r\n            {\r\n                bool observedResults = BgzfBlockVcfReader.FindVariantsInBlocks(stream, FileOffset, FileOffset, _block,\r\n                    ChromosomeUtilities.Chr15, 71589360, 71589361);\r\n                Assert.False(observedResults);\r\n            }\r\n        }\r\n\r\n        private const string MixedLineEndingsInput = \"C\\t39\\t.\\t.\\tGT\\t0/1\\t.\\t1/1\\n1\\t100\\t.\\tT\\tC\\t39\\t.\\t.\\tGT\\t0/1\\t.\\t1/1\\r\\n2\\t55927\\t.\\tT\\tC\\t39\\t.\\t.\\tGT\\t0/1\\t.\\t1/1\\n2\\t55928\\t.\\tT\\tC\\t39\\t.\\t.\\tGT\\t0/1\\t.\\t1/1\\r\\n2\\t55929\\t.\\tT\\tC\\t39\\t.\\t.\\tGT\\t0/1\\t.\\t1/1\\n3\\t200\\t.\\tT\\tC\\t39\\t.\\t.\\tGT\\t0/1\\t.\\t1/1\\r\\n1\\t\";\r\n        \r\n        [Fact]\r\n        public void GetVcfPositions_MixedLineEndings_PartialEntries_MultipleChromosomes_ReturnTrue()\r\n        {\r\n            bool observedResults = BgzfBlockVcfReader.HasVcfPositionsOnInterval(MixedLineEndingsInput, ChromosomeUtilities.Chr2, 55927, 55928);\r\n            Assert.True(observedResults);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetVcfPositions_MixedLineEndings_PartialEntries_MultipleChromosomes_ReturnFalse()\r\n        {\r\n            bool observedResults = BgzfBlockVcfReader.HasVcfPositionsOnInterval(MixedLineEndingsInput, ChromosomeUtilities.Chr2, 55930, 55940);\r\n            Assert.False(observedResults);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetVcfPositions_SkipCorruptPositions()\r\n        {\r\n            const string input = \"2\\t55927i\\t.\\tT\\tC\\t39\\t.\\t.\\tGT\\t0/1\\t.\\t1/1\\n2\\t55928\\t.\\tT\\tC\\t39\\t.\\t.\\tGT\\t0/1\\t.\\t1/1\";\r\n            bool observedResults = BgzfBlockVcfReader.HasVcfPositionsOnInterval(input, ChromosomeUtilities.Chr2, 55927, 55927);\r\n            Assert.False(observedResults);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/Tabix/BinUtilitiesTests.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.Linq;\r\nusing Tabix;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Tabix\r\n{\r\n    public sealed class BinUtilitiesTests\r\n    {\r\n        [Fact]\r\n        public void BottomBin_Nominal()\r\n        {\r\n            int observedResults = BinUtilities.BottomBin(12517);\r\n            Assert.Equal(7836, observedResults);\r\n        }\r\n\r\n        [Fact]\r\n        public void ConvertPositionToBin_Nominal()\r\n        {\r\n            int observedResults = BinUtilities.ConvertPositionToBin(26699126);\r\n            Assert.Equal(6310, observedResults);\r\n        }\r\n\r\n        [Fact]\r\n        public void OverlappingBinsWithVariants_EndBeforeBegin_ReturnEmptyList()\r\n        {\r\n            IEnumerable<int> results = BinUtilities.OverlappingBinsWithVariants(20, 10, null);\r\n            Assert.Empty(results);\r\n        }\r\n\r\n        [Fact]\r\n        public void OverlappingBinsWithVariants_EndBeyondMaxRefLen_CorrectEnd()\r\n        {\r\n            const int expectedBinId = 6310;\r\n\r\n            var idToChunks = new Dictionary<int, Interval[]>\r\n            {\r\n                [expectedBinId] = new[] { new Interval(1, 1) }\r\n            };\r\n\r\n            List<int> results = BinUtilities.OverlappingBinsWithVariants(10, int.MaxValue, idToChunks).ToList();\r\n            Assert.Single(results);\r\n            Assert.Equal(expectedBinId, results[0]);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/Tabix/ReaderTests.cs",
    "content": "﻿using System.IO;\r\nusing System.Text;\r\nusing IO;\r\nusing Tabix;\r\nusing UnitTests.TestUtilities;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Tabix\r\n{\r\n    public sealed class ReaderTests\r\n    {\r\n        [Fact]\r\n        public void Read_Nominal()\r\n        {\r\n            using (var stream = FileUtilities.GetReadStream(Resources.TopPath(\"miniHEXA_minimal.vcf.gz.tbi\")))\r\n            {\r\n                Index index = Reader.GetTabixIndex(stream, ChromosomeUtilities.RefNameToChromosome);\r\n\r\n                Assert.Equal(1, index.BeginIndex);\r\n                Assert.Equal('#', index.CommentChar);\r\n                Assert.Equal(-1, index.EndIndex);\r\n                Assert.Equal(Constants.VcfFormat, index.Format);\r\n                Assert.Equal(0, index.NumLinesToSkip);\r\n                Assert.Equal(0, index.SequenceNameIndex);\r\n\r\n                Assert.Single(index.ReferenceSequences);\r\n\r\n                var refSeq = index.ReferenceSequences[0];\r\n                Assert.Equal(\"chr15\", refSeq.Chromosome.UcscName);\r\n                Assert.Equal(4675, refSeq.LinearFileOffsets.Length);\r\n                Assert.Equal((ulong)4587, refSeq.LinearFileOffsets[4370]);\r\n\r\n                Assert.Equal(306, refSeq.IdToChunks.Count);\r\n\r\n                var chunks = refSeq.IdToChunks[9062];\r\n                Assert.NotNull(chunks);\r\n                Assert.Single(chunks);\r\n\r\n                var chunk = chunks[0];\r\n                Assert.Equal((ulong)61269, chunk.Begin);\r\n                Assert.Equal((ulong)991626923, chunk.End);\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void Read_NotTabixFormat()\r\n        {\r\n            using (var ms = new MemoryStream())\r\n            {\r\n                using (var writer = new BinaryWriter(ms, Encoding.UTF8, true))\r\n                {\r\n                    writer.Write(\"The quick brown fox jumped over the lazy dog.\");\r\n                }\r\n\r\n                ms.Position = 0;\r\n\r\n                using (var reader = new BinaryReader(ms))\r\n                {\r\n                    Assert.Throws<InvalidDataException>(delegate\r\n                    {\r\n                        Reader.Read(reader, null);\r\n                    });\r\n                }\r\n            }\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/Tabix/SearchTests.cs",
    "content": "﻿using IO;\r\nusing Tabix;\r\nusing Xunit;\r\nusing UnitTests.TestUtilities;\r\n\r\nnamespace UnitTests.Tabix\r\n{\r\n    public sealed class SearchTests\r\n    {\r\n        private readonly Search _search;\r\n        private const string ChromosomeName = \"chr15\";\r\n\r\n        public SearchTests()\r\n        {\r\n            Index index;\r\n            using (var stream = FileUtilities.GetReadStream(Resources.TopPath(\"miniHEXA_minimal.vcf.gz.tbi\")))\r\n            {\r\n                index = Reader.GetTabixIndex(stream, ChromosomeUtilities.RefNameToChromosome);\r\n            }\r\n\r\n            var vcfStream = FileUtilities.GetReadStream(Resources.TopPath(\"miniHEXA_minimal.vcf.gz\"));\r\n            _search       = new Search(index, vcfStream);\r\n        }\r\n\r\n        [Fact]\r\n        public void HasVariants_IntervalBeforeReads_ReturnsFalse()\r\n        {\r\n            bool observedResult = _search.HasVariants(ChromosomeName, 1, 71589359);\r\n            Assert.False(observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void HasVariants_IntervalOverlapsReads_HasVcfPositionsOnIntervalTrue_ReturnsTrue()\r\n        {\r\n            bool observedResult = _search.HasVariants(ChromosomeName, 1, 71589360);\r\n            Assert.True(observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void HasVariants_IntervalOverlapsReads_ReturnsTrue()\r\n        {\r\n            bool observedResult = _search.HasVariants(ChromosomeName, 71589360, 76592131);\r\n            Assert.True(observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void HasVariants_NoOverlap_ReturnsFalse()\r\n        {\r\n            bool observedResult = _search.HasVariants(ChromosomeName, 76591006, 76592130);\r\n            Assert.False(observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void HasVariants_IntervalAfterReads_ReturnsFalse()\r\n        {\r\n            bool observedResult = _search.HasVariants(ChromosomeName, 76592132, 101991189);\r\n            Assert.False(observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void HasVariants_NullRefSeq_ReturnsFalse()\r\n        {\r\n            bool observedResult = _search.HasVariants(\"chr18\", 71589360, 76592131);\r\n            Assert.False(observedResult);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/Tabix/SearchTestsLocalMother.cs",
    "content": "﻿#if EXPANDED_TESTS\r\n\r\nusing Genome;\r\nusing System.Collections.Generic;\r\nusing IO;\r\nusing Tabix;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Tabix\r\n{\r\n    public sealed class SearchTestsLocalMother\r\n    {\r\n        private readonly Search _search;\r\n        private const string ChromosomeName = \"chr2\";\r\n\r\n        public SearchTestsLocalMother()\r\n        {\r\n            var chr2 = new Chromosome(\"chr2\", \"2\", 1);\r\n\r\n            var refNameToChromosome = new Dictionary<string, Chromosome>\r\n            {\r\n                [chr2.EnsemblName] = chr2,\r\n                [chr2.UcscName]    = chr2\r\n            };\r\n\r\n            Index index;\r\n            using (var stream = FileUtilities.GetReadStream(@\"E:\\Data\\Nirvana\\Data\\Mother\\Mother.vcf.gz.tbi\"))\r\n            {\r\n                index = Reader.GetTabixIndex(stream, refNameToChromosome);\r\n            }\r\n\r\n            var vcfStream = FileUtilities.GetReadStream(@\"E:\\Data\\Nirvana\\Data\\Mother\\Mother.vcf.gz\");\r\n            _search = new Search(index, vcfStream);\r\n        }\r\n\r\n        [Fact]\r\n        public void HasVariants_IntervalBeforeReads_ReturnsFalse()\r\n        {\r\n            bool observedResult = _search.HasVariants(ChromosomeName, 1, 11319);\r\n            Assert.False(observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void HasVariants_IntervalOverlapsReads_HasVcfPositionsOnIntervalTrue_ReturnsTrue()\r\n        {\r\n            bool observedResult = _search.HasVariants(ChromosomeName, 1, 11320);\r\n            Assert.True(observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void HasVariants_IntervalOverlapsReads_ReturnsTrue()\r\n        {\r\n            bool observedResult = _search.HasVariants(ChromosomeName, 217826, 435772);\r\n            Assert.True(observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void HasVariants_NoOverlap_ReturnsFalse()\r\n        {\r\n            bool observedResult = _search.HasVariants(ChromosomeName, 431200, 434667);\r\n            Assert.False(observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void HasVariants_IntervalAfterReads_ReturnsTrue()\r\n        {\r\n            bool observedResult = _search.HasVariants(ChromosomeName, 243172390, 243199373);\r\n            Assert.True(observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void HasVariants_IntervalAfterReads_ReturnsFalse()\r\n        {\r\n            bool observedResult = _search.HasVariants(ChromosomeName, 243172391, 243199373);\r\n            Assert.False(observedResult);\r\n        }\r\n    }\r\n}\r\n\r\n#endif\r\n"
  },
  {
    "path": "UnitTests/Tabix/SearchTestsRemoteMother.cs",
    "content": "﻿#if EXPANDED_TESTS\r\n\r\nusing Genome;\r\nusing System.Collections.Generic;\r\nusing IO;\r\nusing Tabix;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Tabix\r\n{\r\n    public sealed class SearchTestsRemoteMother\r\n    {\r\n        private readonly Search _search;\r\n        private const string ChromosomeName = \"chr2\";\r\n\r\n        public SearchTestsRemoteMother()\r\n        {\r\n            var chr2 = new Chromosome(\"chr2\", \"2\", 1);\r\n\r\n            var refNameToChromosome = new Dictionary<string, Chromosome>\r\n            {\r\n                [chr2.EnsemblName] = chr2,\r\n                [chr2.UcscName]    = chr2\r\n            };\r\n\r\n            Index index;\r\n            using (var stream = PersistentStreamUtils.GetReadStream(\"https://illumina-annotation.s3.amazonaws.com/Test/Mother.vcf.gz.tbi\"))\r\n            {\r\n                index = Reader.GetTabixIndex(stream, refNameToChromosome);\r\n            }\r\n\r\n            var vcfStream = PersistentStreamUtils.GetReadStream(\"https://illumina-annotation.s3.amazonaws.com/Test/Mother.vcf.gz\");\r\n            _search = new Search(index, vcfStream);\r\n        }\r\n\r\n        [Fact]\r\n        public void HasVariants_IntervalBeforeReads_ReturnsFalse()\r\n        {\r\n            bool observedResult = _search.HasVariants(ChromosomeName, 1, 11319);\r\n            Assert.False(observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void HasVariants_IntervalOverlapsReads_HasVcfPositionsOnIntervalTrue_ReturnsTrue()\r\n        {\r\n            bool observedResult = _search.HasVariants(ChromosomeName, 1, 11320);\r\n            Assert.True(observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void HasVariants_IntervalOverlapsReads_ReturnsTrue()\r\n        {\r\n            bool observedResult = _search.HasVariants(ChromosomeName, 217826, 435772);\r\n            Assert.True(observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void HasVariants_NoOverlap_ReturnsFalse()\r\n        {\r\n            bool observedResult = _search.HasVariants(ChromosomeName, 431200, 434667);\r\n            Assert.False(observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void HasVariants_IntervalAfterReads_ReturnsTrue()\r\n        {\r\n            bool observedResult = _search.HasVariants(ChromosomeName, 243172390, 243199373);\r\n            Assert.True(observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void HasVariants_IntervalAfterReads_ReturnsFalse()\r\n        {\r\n            bool observedResult = _search.HasVariants(ChromosomeName, 243172391, 243199373);\r\n            Assert.False(observedResult);\r\n        }\r\n    }\r\n}\r\n\r\n#endif"
  },
  {
    "path": "UnitTests/Tabix/SearchUtilitiesTests.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing Tabix;\r\nusing UnitTests.TestUtilities;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Tabix\r\n{\r\n    public sealed class SearchUtilitiesTests\r\n    {\r\n        private readonly Dictionary<string, ushort> _refNameToTabixIndex;\r\n\r\n        public SearchUtilitiesTests()\r\n        {\r\n            _refNameToTabixIndex = new Dictionary<string, ushort>\r\n            {\r\n                [\"chr1\"]  = 0,\r\n                [\"1\"]     = 0,\r\n                [\"chr2\"]  = 1,\r\n                [\"2\"]     = 1,\r\n                [\"chr15\"] = 14,\r\n                [\"15\"]    = 14\r\n            };\r\n        }\r\n\r\n        [Fact]\r\n        public void GetMinOffset_Nominal()\r\n        {\r\n            const ulong expectedResults = 3591443256775;\r\n            var linearFileOffsets = new ulong[1630];\r\n            linearFileOffsets[1629] = expectedResults;\r\n\r\n            var idToChunks = new Dictionary<int, Interval[]>\r\n            {\r\n                [6310] = new[] { new Interval(1, 1) }\r\n            };\r\n\r\n            var refSeq = new ReferenceIndex(ChromosomeUtilities.Chr2, idToChunks, linearFileOffsets);\r\n            ulong observedResults = SearchUtilities.GetMinOffset(refSeq, 26699125);\r\n\r\n            Assert.Equal(expectedResults, observedResults);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetMinOffset_MissingBin()\r\n        {\r\n            const ulong expectedResults = 3723191187417;\r\n            var linearFileOffsets = new ulong[2196];\r\n            linearFileOffsets[2195] = expectedResults;\r\n\r\n            var idToChunks = new Dictionary<int, Interval[]>\r\n            {\r\n                [6876] = new[] { new Interval(1, 1) }\r\n            };\r\n\r\n            var refSeq = new ReferenceIndex(ChromosomeUtilities.Chr2, idToChunks, linearFileOffsets);\r\n            ulong observedResults = SearchUtilities.GetMinOffset(refSeq, 35979265);\r\n\r\n            Assert.Equal(expectedResults, observedResults);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetMinOffset_MissingFirstBin()\r\n        {\r\n            const ulong expectedResults = 4351134646660;\r\n            var linearFileOffsets = new ulong[5353];\r\n            linearFileOffsets[5352] = expectedResults;\r\n\r\n            var idToChunks = new Dictionary<int, Interval[]>\r\n            {\r\n                [1254] = new[] { new Interval(1, 1) }\r\n            };\r\n\r\n            var refSeq = new ReferenceIndex(ChromosomeUtilities.Chr2, idToChunks, linearFileOffsets);\r\n            ulong observedResults = SearchUtilities.GetMinOffset(refSeq, 87687168);\r\n\r\n            Assert.Equal(expectedResults, observedResults);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetMaxOffset_Nominal()\r\n        {\r\n            const ulong expectedResults = 3591443312067;\r\n\r\n            var idToChunks = new Dictionary<int, Interval[]>\r\n            {\r\n                [6311] = new[] { new Interval(3591443312067, 3592132724129) }\r\n            };\r\n\r\n            var refSeq = new ReferenceIndex(ChromosomeUtilities.Chr2, idToChunks, null);\r\n            ulong observedResults = SearchUtilities.GetMaxOffset(refSeq, 26699126);\r\n\r\n            Assert.Equal(expectedResults, observedResults);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetMaxOffset_MissingBin()\r\n        {\r\n            const ulong expectedResults = 3724057593420;\r\n\r\n            var idToChunks = new Dictionary<int, Interval[]>\r\n            {\r\n                [6878] = new[] { new Interval(3724057593420, 3724057615020) }\r\n            };\r\n\r\n            var refSeq = new ReferenceIndex(ChromosomeUtilities.Chr2, idToChunks, null);\r\n            ulong observedResults = SearchUtilities.GetMaxOffset(refSeq, 35962881);\r\n\r\n            Assert.Equal(expectedResults, observedResults);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetMaxOffset_MissingFirstBin()\r\n        {\r\n            const ulong expectedResults = 3724908138137;\r\n\r\n            var idToChunks = new Dictionary<int, Interval[]>\r\n            {\r\n                [860] = new[] { new Interval(3724908138137, 3724908155075) }\r\n            };\r\n\r\n            var refSeq = new ReferenceIndex(ChromosomeUtilities.Chr2, idToChunks, null);\r\n            ulong observedResults = SearchUtilities.GetMaxOffset(refSeq, 36028417);\r\n\r\n            Assert.Equal(expectedResults, observedResults);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetMaxOffset_MissingAllOverlappingBins_ReturnMaxOffset()\r\n        {\r\n            const ulong expectedResults = ulong.MaxValue;\r\n\r\n            var idToChunks = new Dictionary<int, Interval[]>();\r\n\r\n            var refSeq = new ReferenceIndex(ChromosomeUtilities.Chr2, idToChunks, null);\r\n            ulong observedResults = SearchUtilities.GetMaxOffset(refSeq, 243171329);\r\n\r\n            Assert.Equal(expectedResults, observedResults);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetMinOverlapOffset_SingleBin()\r\n        {\r\n            const long expectedResults = 3591443256857;\r\n            const ulong minOffset = 3591443256775;\r\n            const ulong maxOffset = 3591443312067;\r\n\r\n            var chunks = new[] { new Interval(3591443256857, 3591443311984) };\r\n\r\n            long observedResults = SearchUtilities.GetMinOverlapOffset(chunks, minOffset, maxOffset);\r\n\r\n            Assert.Equal(expectedResults, observedResults);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetMinOverlapOffset_SingleBin_NullChunks()\r\n        {\r\n            const ulong minOffset = 3591443256775;\r\n            const ulong maxOffset = 3591443312067;\r\n\r\n            long observedResults = SearchUtilities.GetMinOverlapOffset(null, minOffset, maxOffset);\r\n\r\n            Assert.Equal(0, observedResults);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetOffset_Nominal()\r\n        {\r\n            var linearFileOffsets = new ulong[1630];\r\n            linearFileOffsets[1629] = 3591443256775;\r\n\r\n            var idToChunks = GetIdToChunks();\r\n\r\n            var refSeqs = new ReferenceIndex[2];\r\n            refSeqs[1] = new ReferenceIndex(ChromosomeUtilities.Chr2, idToChunks, linearFileOffsets);\r\n\r\n            var index = new Index(Constants.VcfFormat, 0, 0, 0, '#', 0, refSeqs, _refNameToTabixIndex);\r\n            long observedResult = index.GetOffset(\"chr2\", 26699126);\r\n\r\n            Assert.Equal(3591443256857, observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetOffset_HandleDiff_TabixIndex_And_RefIndex()\r\n        {\r\n            var linearFileOffsets = new ulong[1630];\r\n            linearFileOffsets[1629] = 3591443256775;\r\n\r\n            var idToChunks = GetIdToChunks();\r\n\r\n            // tabix index 10 = chr2 = ref index 1\r\n            var refSeqs = new ReferenceIndex[11];\r\n            refSeqs[10] = new ReferenceIndex(ChromosomeUtilities.Chr2, idToChunks, linearFileOffsets);\r\n\r\n            var refNameToTabixIndex = new Dictionary<string, ushort> { [\"chr2\"] = 10 };\r\n            var index = new Index(Constants.VcfFormat, 0, 0, 0, '#', 0, refSeqs, refNameToTabixIndex);\r\n\r\n            long observedResult = index.GetOffset(\"chr2\", 26699126);\r\n\r\n            Assert.Equal(3591443256857, observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetOffset_UnknownChromosome_ReturnMinusOne()\r\n        {\r\n            var index = new Index(Constants.VcfFormat, 0, 0, 0, '#', 0, null, _refNameToTabixIndex);\r\n            long observedResult = index.GetOffset(\"chrUn\", 26699126);\r\n            Assert.Equal(-1, observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetOffset_FixNegativeBeginCoordinate()\r\n        {\r\n            var linearFileOffsets = new ulong[1];\r\n            linearFileOffsets[0] = 3213608733669;\r\n\r\n            var idToChunks = new Dictionary<int, Interval[]>\r\n            {\r\n                [585] = new[] { new Interval(3213608740412, 3213608740487) },\r\n                [4681] = new[] { new Interval(3213608733669, 3213608740412) },\r\n                [4682] = new[] { new Interval(3213608740487, 3214303562687) }\r\n            };\r\n\r\n            var refSeqs = new ReferenceIndex[2];\r\n            refSeqs[1] = new ReferenceIndex(ChromosomeUtilities.Chr2, idToChunks, linearFileOffsets);\r\n\r\n            var index = new Index(Constants.VcfFormat, 0, 0, 0, '#', 0, refSeqs, _refNameToTabixIndex);\r\n            long observedResult = index.GetOffset(\"chr2\", 0);\r\n\r\n            Assert.Equal(3213608733669, observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetOffset_NoOverlappingBins_UseLinearIndex()\r\n        {\r\n            const long expectedOffset = 11418;\r\n\r\n            var linearFileOffsets = new ulong[7];\r\n            linearFileOffsets[6] = expectedOffset;\r\n\r\n            var idToChunks = new Dictionary<int, Interval[]>();\r\n\r\n            var refSeqs = new ReferenceIndex[2];\r\n            refSeqs[0] = new ReferenceIndex(ChromosomeUtilities.Chr1, idToChunks, linearFileOffsets);\r\n\r\n            var index = new Index(Constants.VcfFormat, 0, 0, 0, '#', 0, refSeqs, _refNameToTabixIndex);\r\n            long observedResult = index.GetOffset(\"chr1\", 100_000);\r\n\r\n            Assert.Equal(expectedOffset, observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetOffset_NoOverlappingBins_UseLinearIndex_WithTruncatedIndex_ReturnMinusOne()\r\n        {\r\n            var linearFileOffsets = new ulong[1];\r\n            linearFileOffsets[0] = 11418;\r\n\r\n            var idToChunks = new Dictionary<int, Interval[]>();\r\n\r\n            var refSeqs = new ReferenceIndex[2];\r\n            refSeqs[0] = new ReferenceIndex(ChromosomeUtilities.Chr1, idToChunks, linearFileOffsets);\r\n\r\n            var index = new Index(Constants.VcfFormat, 0, 0, 0, '#', 0, refSeqs, _refNameToTabixIndex);\r\n\r\n            long observedResult = index.GetOffset(\"chr1\", 100_000);\r\n\r\n            Assert.Equal(-1, observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetFirstNonZeroValue_WithoutZeros()\r\n        {\r\n            var offsets = new ulong[10];\r\n            for (var i = 0; i < offsets.Length; i++) offsets[i] = (ulong)i + 1;\r\n\r\n            long observedResult = offsets.FirstNonZeroValue();\r\n            Assert.Equal(1, observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetFirstNonZeroValue_WithLeadingZeros()\r\n        {\r\n            var offsets = new ulong[10];\r\n            for (var i = 5; i < offsets.Length; i++) offsets[i] = (ulong)i + 1;\r\n\r\n            long observedResult = offsets.FirstNonZeroValue();\r\n            Assert.Equal(6, observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetFirstNonZeroValue_AllZeros_ReturnMinusOne()\r\n        {\r\n            var offsets = new ulong[10];\r\n\r\n            long observedResult = offsets.FirstNonZeroValue();\r\n            Assert.Equal(-1, observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetTabixReferenceSequence_NullChromosome_ReturnNull()\r\n        {\r\n            var linearFileOffsets = new ulong[1630];\r\n            linearFileOffsets[1629] = 3591443256775;\r\n\r\n            var idToChunks = GetIdToChunks();\r\n\r\n            var refSeqs = new ReferenceIndex[2];\r\n            refSeqs[1] = new ReferenceIndex(ChromosomeUtilities.Chr2, idToChunks, linearFileOffsets);\r\n\r\n            var index = new Index(Constants.VcfFormat, 0, 0, 0, '#', 0, refSeqs, _refNameToTabixIndex);\r\n\r\n            var refSeq = index.GetTabixReferenceSequence(null);\r\n            Assert.Null(refSeq);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetTabixReferenceSequence_Nominal()\r\n        {\r\n            var linearFileOffsets = new ulong[1630];\r\n            linearFileOffsets[1629] = 3591443256775;\r\n\r\n            var idToChunks = GetIdToChunks();\r\n\r\n            var refSeqs = new ReferenceIndex[2];\r\n            refSeqs[1] = new ReferenceIndex(ChromosomeUtilities.Chr2, idToChunks, linearFileOffsets);\r\n\r\n            var index = new Index(Constants.VcfFormat, 0, 0, 0, '#', 0, refSeqs, _refNameToTabixIndex);\r\n\r\n            var refSeq = index.GetTabixReferenceSequence(\"chr2\");\r\n            Assert.Equal(\"chr2\", refSeq.Chromosome.UcscName);\r\n        }\r\n\r\n        [Fact]\r\n        public void AdjustBegin_Nominal()\r\n        {\r\n            int observedResult = SearchUtilities.AdjustBegin(5);\r\n            Assert.Equal(4, observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void AdjustBegin_CorrectNegativeNumbers()\r\n        {\r\n            int observedResult = SearchUtilities.AdjustBegin(0);\r\n            Assert.Equal(0, observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetMinMaxFileOffset_Nominal()\r\n        {\r\n            var intervals = new []\r\n            {\r\n                new Interval(3, 3),\r\n                new Interval(2, 2),\r\n                new Interval(1, 5),\r\n                new Interval(5, 10),\r\n                new Interval(2, 6),\r\n                new Interval(8, 9)\r\n            };\r\n\r\n            (long observedMinOffset, long observedMaxOffset) = SearchUtilities.GetMinMaxVirtualFileOffset(intervals);\r\n            Assert.Equal(1, observedMinOffset);\r\n            Assert.Equal(10, observedMaxOffset);\r\n        }\r\n\r\n        private static Dictionary<int, Interval[]> GetIdToChunks()\r\n        {\r\n            return new Dictionary<int, Interval[]>\r\n            {\r\n                [0]    = new[] { new Interval(4099908124223, 4099908124304), new Interval(4951477375210, 4951477375293), new Interval(5624484975997, 5624484976080) },\r\n                [1]    = new[] { new Interval(3340253330084, 3340253330164), new Interval(3465184408915, 3465184408994), new Interval(3568724955460, 3568724955542), new Interval(3691147500084, 3691147500165), new Interval(3795841311087, 3795841311169), new Interval(3910417270243, 3910417270325), new Interval(4000555183327, 4000555183408) },\r\n                [12]   = new[] { new Interval(3584204706120, 3584204706202), new Interval(3603789121700, 3603789121782), new Interval(3618810913033, 3618810913115), new Interval(3636616069222, 3636616069304), new Interval(3651735457673, 3651735457755), new Interval(3666758669972, 3666758670054), new Interval(3678665150304, 3678665150385) },\r\n                [98]   = new[] { new Interval(3586357202663, 3586357202745), new Interval(3587723007951, 3587723008032), new Interval(3589980566127, 3589980566208), new Interval(3592834453845, 3592834453927), new Interval(3595721982714, 3595721982795), new Interval(3598606802778, 3598606802860), new Interval(3600879093088, 3600879093169) },\r\n                [788]  = new[] { new Interval(3589980579562, 3589980605258), new Interval(3590735269728, 3590735292546), new Interval(3591443256775, 3591443312067), new Interval(3592132724129, 3592132724210) },\r\n                [6310] = new[] { new Interval(3591443256857, 3591443311984) },\r\n                [6311] = new[] { new Interval(3591443312067, 3592132724129) }\r\n            };\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/Tabix/VirtualPositionTests.cs",
    "content": "﻿using Tabix;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Tabix\r\n{\r\n    public sealed class VirtualPositionTests\r\n    {\r\n        [Fact]\r\n        public void VirtualPosition_LoopBack()\r\n        {\r\n            const long expectedVirtualPosition = 3591443256775;\r\n\r\n            (long fileOffset, int blockOffset) = VirtualPosition.From(expectedVirtualPosition);\r\n            long observedVirtualPosition = VirtualPosition.To(fileOffset, blockOffset);\r\n\r\n            Assert.Equal(expectedVirtualPosition, observedVirtualPosition);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/TestDataStructures/SimpleSequence.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing Genome;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.Interface.Providers;\r\n\r\nnamespace UnitTests.TestDataStructures\r\n{\r\n    public sealed class SimpleSequence : ISequence\r\n    {\r\n        private readonly string _sequence;\r\n        private readonly int _zeroBasedStartOffset;\r\n        public int Length => _zeroBasedStartOffset + _sequence.Length;\r\n        public Band[] CytogeneticBands => null;\r\n\r\n        public SimpleSequence(string s, int zeroBasedStartOffset = 0)\r\n        {\r\n            _zeroBasedStartOffset = zeroBasedStartOffset;\r\n            _sequence             = s;\r\n        }\r\n\r\n        public string Substring(int offset, int length)\r\n        {\r\n            if (offset - _zeroBasedStartOffset + length > _sequence.Length || \r\n                offset < _zeroBasedStartOffset) return \"\";\r\n            return _sequence.Substring(offset - _zeroBasedStartOffset, length);\r\n        }\r\n    }\r\n\r\n    public sealed class SimpleSequenceProvider : ISequenceProvider\r\n    {\r\n        public string Name { get; }\r\n        public GenomeAssembly Assembly { get; }\r\n        public IEnumerable<IDataSourceVersion> DataSourceVersions { get; }\r\n        public void Annotate(IAnnotatedPosition annotatedPosition)\r\n        {\r\n            throw new System.NotImplementedException();\r\n        }\r\n\r\n        public void PreLoad(Chromosome chromosome, List<int> positions)\r\n        {\r\n            throw new System.NotImplementedException();\r\n        }\r\n\r\n        public ISequence Sequence { get; }\r\n        public Dictionary<string, Chromosome> RefNameToChromosome { get; }\r\n        public Dictionary<ushort, Chromosome> RefIndexToChromosome { get; }\r\n        public void LoadChromosome(Chromosome chromosome) { }\r\n\r\n        public SimpleSequenceProvider(GenomeAssembly assembly, ISequence sequence,\r\n            Dictionary<string, Chromosome> refNameToChromosome)\r\n        {\r\n            Assembly            = assembly;\r\n            Sequence            = sequence;\r\n            RefNameToChromosome = refNameToChromosome;\r\n        }\r\n\r\n        public void Dispose() { }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/TestUtilities/AnnotationUtilities.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing Genome;\r\nusing Nirvana;\r\nusing OptimizedCore;\r\nusing VariantAnnotation;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.Interface.IO;\r\nusing VariantAnnotation.Interface.Positions;\r\nusing VariantAnnotation.Interface.Providers;\r\nusing Vcf;\r\nusing Vcf.VariantCreator;\r\n\r\nnamespace UnitTests.TestUtilities\r\n{\r\n    public static class AnnotationUtilities\r\n\t{\r\n        internal static IAnnotatedPosition GetAnnotatedPosition(string cacheFilePrefix, List<string> saPaths, IMitoHeteroplasmyProvider mitoHeteroplasmyProvider,\r\n            string vcfLine)\r\n        {\r\n            var annotationFiles = new AnnotationFiles();\r\n            saPaths?.ForEach(x => annotationFiles.AddFiles(x));\r\n\r\n            var refMinorProvider  = ProviderUtilities.GetRefMinorProvider(annotationFiles);\r\n            var (annotator, sequenceProvider)   = GetAnnotatorAndSequenceProvider(cacheFilePrefix, saPaths);\r\n\r\n            var variantFactory    = new VariantFactory(sequenceProvider.Sequence, new VariantId());\r\n            var position          = ParseVcfLine(vcfLine, refMinorProvider, sequenceProvider, mitoHeteroplasmyProvider, variantFactory);\r\n            var annotatedPosition = annotator.Annotate(position);\r\n\r\n            return annotatedPosition;\r\n        }\r\n\r\n\t    internal static IPosition ParseVcfLine(string vcfLine, IRefMinorProvider refMinorProvider, ISequenceProvider sequenceProvider, \r\n            IMitoHeteroplasmyProvider mitoHeteroplasmyProvider, VariantFactory variantFactory, HashSet<string> customInfoKeys=null)\r\n\t    {\r\n\t        var simplePosition = GetSimplePosition(vcfLine, sequenceProvider.RefNameToChromosome);\r\n\t        return Position.ToPosition(simplePosition, refMinorProvider, sequenceProvider, mitoHeteroplasmyProvider, variantFactory, false, customInfoKeys);\r\n\t    }\r\n\r\n        internal static SimplePosition GetSimplePosition(string vcfLine,\r\n            Dictionary<string, Chromosome> refNameToChromosome)\r\n        {\r\n            string[] vcfFields = vcfLine.OptimizedSplit('\\t');\r\n            var chromosome     = ReferenceNameUtilities.GetChromosome(refNameToChromosome, vcfFields[VcfCommon.ChromIndex]);\r\n            int position       = int.Parse(vcfFields[VcfCommon.PosIndex]);\r\n\r\n            return SimplePosition.GetSimplePosition(chromosome, position, vcfFields, new NullVcfFilter());\r\n        }\r\n\r\n        private static (Annotator Annotator, ISequenceProvider SequenceProvider) GetAnnotatorAndSequenceProvider(string cacheFilePrefix, List<string> saPaths)\r\n        {\r\n            var annotationFiles = new AnnotationFiles();\r\n            saPaths?.ForEach(x => annotationFiles.AddFiles(x));\r\n\r\n            string sequenceFilePath          = cacheFilePrefix + \".bases\";\r\n            var sequenceProvider             = ProviderUtilities.GetSequenceProvider(sequenceFilePath);\r\n            var transcriptAnnotationProvider = ProviderUtilities.GetTranscriptAnnotationProvider(cacheFilePrefix, sequenceProvider, null);\r\n            var saProvider                   = ProviderUtilities.GetNsaProvider(annotationFiles);\r\n            var lcrProvider                  = ProviderUtilities.GetLcrProvider(annotationFiles);\r\n            var conservationProvider         = ProviderUtilities.GetConservationProvider(annotationFiles);\r\n\r\n            var annotator = new Annotator(transcriptAnnotationProvider, sequenceProvider, saProvider,\r\n                conservationProvider, lcrProvider, null, null, null);\r\n            return (annotator,sequenceProvider);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/TestUtilities/ByteUtilities.cs",
    "content": "﻿using System.Security.Cryptography;\r\n\r\nnamespace UnitTests.TestUtilities\r\n{\r\n\tpublic static class ByteUtilities\r\n\t{\r\n\t\tpublic static byte[] GetRandomBytes(int numBytes)\r\n\t\t{\r\n\t\t\tvar buffer = new byte[numBytes];\r\n\t\t\tusing (var csp = RandomNumberGenerator.Create()) csp.GetBytes(buffer);\r\n\t\t\treturn buffer;\r\n\t\t}\r\n\r\n\t}\r\n}"
  },
  {
    "path": "UnitTests/TestUtilities/ChromosomeUtilities.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing Genome;\r\n\r\nnamespace UnitTests.TestUtilities\r\n{\r\n    public static class ChromosomeUtilities\r\n    {\r\n        public static readonly Chromosome Chr1  = new Chromosome(\"chr1\", \"1\", \"\", \"\", 1, 0);\r\n        public static readonly Chromosome Chr2  = new Chromosome(\"chr2\", \"2\", \"\", \"\", 1, 1);\r\n        public static readonly Chromosome Chr3  = new Chromosome(\"chr3\", \"3\", \"\", \"\", 1, 2);\r\n        public static readonly Chromosome Chr4  = new Chromosome(\"chr4\", \"4\", \"\", \"\", 1, 3);\r\n        public static readonly Chromosome Chr5  = new Chromosome(\"chr5\", \"5\", \"\", \"\", 1, 4);\r\n        public static readonly Chromosome Chr6  = new Chromosome(\"chr6\", \"6\", \"\", \"\", 1, 5);\r\n        public static readonly Chromosome Chr7  = new Chromosome(\"chr7\", \"7\", \"\", \"\", 1, 6);\r\n        public static readonly Chromosome Chr8  = new Chromosome(\"chr8\", \"8\", \"\", \"\", 1, 7);\r\n        public static readonly Chromosome Chr9  = new Chromosome(\"chr9\", \"9\", \"\", \"\", 1, 8);\r\n        public static readonly Chromosome Chr10 = new Chromosome(\"chr10\", \"10\", \"\", \"\", 1, 9);\r\n        public static readonly Chromosome Chr11 = new Chromosome(\"chr11\", \"11\", \"\", \"\", 1, 10);\r\n        public static readonly Chromosome Chr12 = new Chromosome(\"chr12\", \"12\", \"\", \"\", 1, 11);\r\n        public static readonly Chromosome Chr13 = new Chromosome(\"chr13\", \"13\", \"\", \"\", 1, 12);\r\n        public static readonly Chromosome Chr14 = new Chromosome(\"chr14\", \"14\", \"\", \"\", 1, 13);\r\n        public static readonly Chromosome Chr15 = new Chromosome(\"chr15\", \"15\", \"\", \"\", 1, 14);\r\n        public static readonly Chromosome Chr16 = new Chromosome(\"chr16\", \"16\", \"\", \"\", 1, 15);\r\n        public static readonly Chromosome Chr17 = new Chromosome(\"chr17\", \"17\", \"\", \"\", 1, 16);\r\n        public static readonly Chromosome Chr18 = new Chromosome(\"chr18\", \"18\", \"\", \"\", 1, 17);\r\n        public static readonly Chromosome Chr19 = new Chromosome(\"chr19\", \"19\", \"\", \"\", 1, 18);\r\n        public static readonly Chromosome Chr20 = new Chromosome(\"chr20\", \"20\", \"\", \"\", 1, 19);\r\n        public static readonly Chromosome Chr21 = new Chromosome(\"chr21\", \"21\", \"\", \"\", 1, 20);\r\n        public static readonly Chromosome Chr22 = new Chromosome(\"chr22\", \"22\", \"\", \"\", 1, 21);\r\n        public static readonly Chromosome ChrX  = new Chromosome(\"chrX\", \"X\", \"\", \"\", 1, 22);\r\n        public static readonly Chromosome ChrY  = new Chromosome(\"chrY\", \"Y\", \"\", \"\", 1, 23);\r\n        public static readonly Chromosome ChrM  = new Chromosome(\"chrM\", \"MT\", \"\", \"\", 1, 24);\r\n\r\n        public static readonly Chromosome Bob = new Chromosome(\"bob\", \"bob\", \"\", \"\", 1, Chromosome.UnknownReferenceIndex);\r\n\r\n        public static readonly Dictionary<string, Chromosome> RefNameToChromosome = new Dictionary<string, Chromosome>();\r\n        public static readonly Dictionary<ushort, Chromosome> RefIndexToChromosome = new Dictionary<ushort, Chromosome>();\r\n\r\n        static ChromosomeUtilities()\r\n        {\r\n            Chromosome[] chromosomes =\r\n            {\r\n                Chr1, Chr2, Chr3, Chr4, Chr5, Chr6, Chr7, Chr8, Chr9, Chr10, Chr11, Chr12, Chr13, Chr14, Chr15, Chr16,\r\n                Chr17, Chr18, Chr19, Chr20, Chr21, Chr22, ChrX, ChrY, ChrM\r\n            };\r\n            foreach (var chromosome in chromosomes) AddChromosome(chromosome);\r\n        }\r\n\r\n        private static void AddChromosome(Chromosome chromosome)\r\n        {\r\n            RefIndexToChromosome[chromosome.Index]      = chromosome;\r\n            RefNameToChromosome[chromosome.EnsemblName] = chromosome;\r\n            RefNameToChromosome[chromosome.UcscName]    = chromosome;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/TestUtilities/RandomPath.cs",
    "content": "﻿using System.IO;\r\n\r\nnamespace UnitTests.TestUtilities\r\n{\r\n\tpublic static class RandomPath\r\n\t{\r\n\t\tpublic static string GetRandomPath() => Path.Combine(Path.GetTempPath(), Path.GetRandomFileName());\r\n\t}\r\n}"
  },
  {
    "path": "UnitTests/TestUtilities/ResourceUtilities.cs",
    "content": "﻿using System.IO;\r\nusing IO;\r\n\r\nnamespace UnitTests.TestUtilities\r\n{\r\n\tpublic static class ResourceUtilities\r\n\t{\r\n\t\tpublic static Stream GetReadStream(string path, bool checkMissingFile = true)\r\n\t\t{\r\n\t\t\tvar missingFile = !File.Exists(path);\r\n\t\t\tif (!checkMissingFile && missingFile) return null;\r\n\r\n\t\t\tif (missingFile)\r\n\t\t\t{\r\n\t\t\t\tthrow new FileNotFoundException($\"ERROR: The unit test resource file ({path}) was not found.\");\r\n\t\t\t}\r\n\r\n\t\t\treturn FileUtilities.GetReadStream(path);\r\n\t\t}\r\n\t}\r\n}"
  },
  {
    "path": "UnitTests/TestUtilities/Resources.cs",
    "content": "﻿using System;\r\nusing System.IO;\r\n\r\nnamespace UnitTests.TestUtilities\r\n{\r\n\tpublic static class Resources\r\n\t{\r\n\t\t// ReSharper disable once MemberCanBePrivate.Global\r\n\t\tpublic static readonly string Top;\r\n\t\tpublic static string TopPath(string path) => Path.Combine(Top, path);\r\n\t\tpublic static string EndToEnd37(string path) => Path.Combine(Top, \"EndToEnd\", \"GRCh37\", path);\r\n\t\tpublic static string InputFiles(string path) => Path.Combine(Top, \"InputFiles\", path);\r\n\t    public static string ClinvarXmlFiles(string path) => Path.Combine(Top, \"ClinVarXmlFiles\", path);\r\n\t    public static string VcvXmlFiles(string path) => Path.Combine(Top, \"ClinVarXmlFiles\",\"VCVs\", path);\r\n\t\tpublic static string SaGRCh37(string path) => Path.Combine(Top, \"SA\", \"GRCh37\", path);\r\n\t\tpublic static string SaPath(string path) => Path.Combine(Top, \"SA\", path);\r\n        public static string MockSaFiles => Path.Combine(Top, \"SA\", \"MockSaFiles\");\r\n\r\n\t\tstatic Resources()\r\n\t\t{\r\n            var solutionDir = GetParentDirectory(AppContext.BaseDirectory);\r\n\t\t\tTop = Path.Combine(solutionDir, \"UnitTests\", \"Resources\");\r\n\t\t}\r\n\r\n\t\tprivate static string GetParentDirectory(string directory)\r\n\t\t{\r\n\t\t    while (true)\r\n\t\t    {\r\n\t\t        directory = Path.GetDirectoryName(directory);\r\n\t\t        if (directory == null) return string.Empty;\r\n\r\n\t\t        var unitTestDir = Path.Combine(directory, \"UnitTests\");\r\n\t\t        if (Directory.Exists(unitTestDir)) break;\r\n\t\t    }\r\n\r\n\t\t\treturn directory;\r\n\t\t}\r\n\t}\r\n}"
  },
  {
    "path": "UnitTests/TestUtilities/TestDataGenerator.cs",
    "content": "using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing Genome;\nusing SAUtils.GenericScore;\nusing SAUtils.GenericScore.GenericScoreParser;\nusing UnitTests.TestDataStructures;\nusing VariantAnnotation.GenericScore;\nusing VariantAnnotation.Interface.Providers;\nusing VariantAnnotation.Providers;\nusing VariantAnnotation.SA;\n\nnamespace UnitTests.TestUtilities\n{\n    public static class TestDataGenerator\n    {\n        public static void GenerateTestData(\n            Dictionary<Chromosome, List<Dictionary<string, object>>> testSetup,\n            List<GenericScoreItem> saItems,\n            Func<int, int, double> scoreFunc,\n            ISequenceProvider sequenceProvider\n        )\n        {\n            foreach ((Chromosome chromosome, List<Dictionary<string, object>> chromosomeTests) in testSetup)\n            {\n                foreach (Dictionary<string, object> chromosomeTest in chromosomeTests)\n                {\n                    var startPosition = (int) chromosomeTest[\"startPosition\"];\n                    var endPosition   = (int) chromosomeTest[\"endPosition\"];\n\n                    var expectedScores = new List<double>();\n                    for (int i = startPosition; i <= endPosition; i++)\n                    {\n                        double score = scoreFunc(i, endPosition);\n                        expectedScores.Add(score);\n                        string refAllele = sequenceProvider.Sequence.Substring(i - 1, 1);\n                        saItems.Add(new GenericScoreItem(chromosome, i, refAllele, \"A\", score));\n                    }\n\n                    chromosomeTest[\"expectedScores\"] = expectedScores;\n                }\n            }\n        }\n\n        public static void GenerateRandomScoreData(\n            Dictionary<Chromosome, List<Dictionary<string, object>>> testSetup,\n            List<GenericScoreItem> saItems,\n            ISequenceProvider sequenceProvider\n        )\n        {\n            var random = new Random(1);\n            GenerateTestData(testSetup, saItems, (_, _) => Math.Round(random.NextDouble(), 8), sequenceProvider);\n        }\n\n        public static ISequenceProvider GetSequenceProvider()\n        {\n            var sequence = new SimpleSequence(new string('A', 1_000_000));\n            return new SimpleSequenceProvider(GenomeAssembly.GRCh37, sequence, ChromosomeUtilities.RefNameToChromosome);\n        }\n\n        public static ScoreReader GetScoreReaderWithRandomData(\n            Dictionary<Chromosome, List<Dictionary<string, object>>> testSetup\n        )\n        {\n            string[] nucleotides = {\"A\", \"C\", \"G\", \"T\"};\n\n            var writeStream = new MemoryStream();\n            var indexStream = new MemoryStream();\n            var saItems     = new List<GenericScoreItem>();\n            var version     = new DataSourceVersion(\"Test\", \"1\", DateTime.Parse(DateTime.Now.ToString(\"yyyy-MM-dd\")).Ticks, \"No description\");\n            var writerSettings = new WriterSettings(\n                10_000,\n                nucleotides,\n                false,\n                EncoderType.ZeroToOne,\n                new ZeroToOneScoreEncoder(2, 1.0),\n                new ScoreJsonEncoder(\"TestKey\", null),\n                new SaItemValidator(true, true)\n            );\n\n            // Scoring function to fill random scores\n            GenerateRandomScoreData(testSetup, saItems, GetSequenceProvider());\n\n            using var scoreFileWriter = new ScoreFileWriter(\n                writerSettings,\n                writeStream,\n                indexStream,\n                version,\n                GetSequenceProvider(),\n                SaCommon.SchemaVersion,\n                leaveOpen: true\n            );\n            // Write saItems to stream\n            scoreFileWriter.Write(saItems);\n\n            // Reset streams in preparation for reading them\n            indexStream.Position = 0;\n            writeStream.Position = 0;\n\n            // Read the scores\n            return ScoreReader.Read(writeStream, indexStream);\n        }\n\n        public static (\n            List<GenericScoreItem> saItems,\n            WriterSettings writerSettings,\n            MemoryStream indexStream,\n            MemoryStream writeStream,\n            DataSourceVersion\n            version,\n            Dictionary<Chromosome, List<Dictionary<string, object>>> testSetup\n            ) GetRandomSingleChromosomeData(Chromosome chromosome, int startPosition, int endPosition)\n        {\n            const int blockLength = 10_000;\n\n            string[] nucleotides = {\"A\", \"C\", \"G\", \"T\"};\n\n            var testSetup = new Dictionary<Chromosome, List<Dictionary<string, object>>>\n            {\n                {\n                    chromosome, new List<Dictionary<string, object>>\n                    {\n                        new Dictionary<string, object>\n                        {\n                            {\"startPosition\", startPosition},\n                            {\"endPosition\", endPosition},\n                        }\n                    }\n                },\n            };\n\n            var writeStream = new MemoryStream();\n            var indexStream = new MemoryStream();\n            var saItems     = new List<GenericScoreItem>();\n            var version     = new DataSourceVersion(\"Test\", \"1\", DateTime.Parse(DateTime.Now.ToString(\"yyyy-MM-dd\")).Ticks, \"No description\");\n            GenerateRandomScoreData(testSetup, saItems, TestDataGenerator.GetSequenceProvider());\n\n            var writerSettings = new WriterSettings(\n                blockLength,\n                nucleotides,\n                false,\n                EncoderType.ZeroToOne,\n                new ZeroToOneScoreEncoder(2, 1.0),\n                new ScoreJsonEncoder(\"TestKey\", \"TestSubKey\"),\n                new SaItemValidator(true, true)\n            );\n\n            return (saItems, writerSettings, indexStream, writeStream, version, testSetup);\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/UnitTests.csproj",
    "content": "﻿<Project Sdk=\"Microsoft.NET.Sdk\">\r\n  <PropertyGroup>\r\n    <TargetFramework>net6.0</TargetFramework>\r\n    <OutputPath>..\\bin\\$(Configuration)</OutputPath>\r\n    \r\n    <SonarQubeExclude>true</SonarQubeExclude>\r\n  </PropertyGroup>\r\n  <ItemGroup>\r\n    <PackageReference Include=\"AWSSDK.Lambda\" Version=\"3.7.9.3\" />\r\n    <PackageReference Include=\"Microsoft.NET.Test.Sdk\" Version=\"17.1.0\" />\r\n    <PackageReference Include=\"Moq\" Version=\"4.17.1\" />\r\n    <PackageReference Include=\"Newtonsoft.Json\" Version=\"13.0.1\" />\r\n    <PackageReference Include=\"xunit\" Version=\"2.4.1\" />\r\n    <PackageReference Include=\"xunit.runner.visualstudio\" Version=\"2.4.3\">\r\n      <PrivateAssets>all</PrivateAssets>\r\n      <IncludeAssets>runtime; build; native; contentfiles; analyzers</IncludeAssets>\r\n    </PackageReference>\r\n  </ItemGroup>\r\n  <ItemGroup>\r\n    <ProjectReference Include=\"..\\AnnotationLambda\\AnnotationLambda.csproj\" />\r\n    <ProjectReference Include=\"..\\CacheUtils\\CacheUtils.csproj\" />\r\n    <ProjectReference Include=\"..\\CommandLine\\CommandLine.csproj\" />\r\n    <ProjectReference Include=\"..\\Compression\\Compression.csproj\" />\r\n    <ProjectReference Include=\"..\\CustomAnnotationLambda\\CustomAnnotationLambda.csproj\" />\r\n    <ProjectReference Include=\"..\\Downloader\\Downloader.csproj\" />\r\n    <ProjectReference Include=\"..\\ErrorHandling\\ErrorHandling.csproj\" />\r\n    <ProjectReference Include=\"..\\GeneAnnotationLambda\\GeneAnnotationLambda.csproj\" />\r\n    <ProjectReference Include=\"..\\Genome\\Genome.csproj\" />\r\n    <ProjectReference Include=\"..\\Intervals\\Intervals.csproj\" />\r\n    <ProjectReference Include=\"..\\IO\\IO.csproj\" />\r\n    <ProjectReference Include=\"..\\Jasix\\Jasix.csproj\" />\r\n    <ProjectReference Include=\"..\\Jist\\Jist.csproj\" />\r\n    <ProjectReference Include=\"..\\NirvanaLambda\\NirvanaLambda.csproj\" />\r\n    <ProjectReference Include=\"..\\Nirvana\\Nirvana.csproj\" />\r\n    <ProjectReference Include=\"..\\OptimizedCore\\OptimizedCore.csproj\" />\r\n    <ProjectReference Include=\"..\\ReferenceSequence\\ReferenceSequence.csproj\" />\r\n    <ProjectReference Include=\"..\\SAUtils\\SAUtils.csproj\" />\r\n    <ProjectReference Include=\"..\\SingleAnnotationLambda\\SingleAnnotationLambda.csproj\" />\r\n    <ProjectReference Include=\"..\\Tabix\\Tabix.csproj\" />\r\n    <ProjectReference Include=\"..\\VariantAnnotation.Interface\\VariantAnnotation.Interface.csproj\" />\r\n    <ProjectReference Include=\"..\\VariantAnnotation\\VariantAnnotation.csproj\" />\r\n    <ProjectReference Include=\"..\\Variants\\Variants.csproj\" />\r\n    <ProjectReference Include=\"..\\Vcf\\Vcf.csproj\" />\r\n    <ProjectReference Include=\"..\\Cloud\\Cloud.csproj\" />\r\n  </ItemGroup>\r\n  <ItemGroup>\r\n    <Service Include=\"{82a7f48d-3b50-4b1e-b82e-3ada8210c358}\" />\r\n  </ItemGroup>\r\n  <Import Project=\"..\\CommonAssemblyInfo.props\" />\r\n</Project>"
  },
  {
    "path": "UnitTests/VariantAnnotation/Algorithms/SwapTests.cs",
    "content": "﻿using VariantAnnotation.Algorithms;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.Algorithms\r\n{\r\n    public sealed class SwapTests\r\n    {\r\n        [Fact]\r\n        public void Swap_Int()\r\n        {\r\n            const int expectedA = 5;\r\n            const int expectedB = 3;\r\n\r\n            int observedA = expectedB;\r\n            int observedB = expectedA;\r\n            Swap.Int(ref observedA, ref observedB);\r\n\r\n            Assert.Equal(expectedA, observedA);\r\n            Assert.Equal(expectedB, observedB);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/VariantAnnotation/AnnotatedPositions/AnnotatedPositionTests.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing Moq;\r\nusing OptimizedCore;\r\nusing UnitTests.SAUtils.InputFileParsers;\r\nusing UnitTests.TestUtilities;\r\nusing VariantAnnotation;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.Interface.IO;\r\nusing VariantAnnotation.Interface.Positions;\r\nusing VariantAnnotation.Interface.Providers;\r\nusing VariantAnnotation.Pools;\r\nusing Variants;\r\nusing Vcf;\r\nusing Vcf.Info;\r\nusing Vcf.Sample;\r\nusing Vcf.VariantCreator;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.AnnotatedPositions\r\n{\r\n    public sealed class AnnotatedPositionTests\r\n    {\r\n        [Fact]\r\n        public void GetJsonString_DifferentOriginalChromosomeName()\r\n        {\r\n            const string originalChromosomeName = \"originalChr1\";\r\n\r\n            IVariant[] variants = GetVariants();\r\n            ISample[] samples   = GetSamples();\r\n            IAnnotatedVariant[] annotatedVariants = Annotator.GetAnnotatedVariants(variants);\r\n\r\n            var position          = GetPosition(originalChromosomeName, variants, samples);\r\n            var annotatedPosition = AnnotatedPositionPool.Get(position, annotatedVariants);\r\n            \r\n            var    sb             = annotatedPosition.GetJsonStringBuilder();\r\n            var    observedResult = sb.ToString();\r\n            StringBuilderPool.Return(sb);\r\n            PositionPool.Return((Position)annotatedPosition.Position);\r\n            AnnotatedPositionPool.Return(annotatedPosition);\r\n\r\n            Assert.NotNull(observedResult);\r\n            Assert.Contains($\"\\\"chromosome\\\":\\\"{originalChromosomeName}\\\"\", observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetJsonString_NullAnnotatedVariants()\r\n        {\r\n            const string originalChromosomeName = \"originalChr1\";\r\n\r\n            var position          = GetPosition(originalChromosomeName, null, null);\r\n            var annotatedPosition = AnnotatedPositionPool.Get(position, null);\r\n\r\n            var sb= annotatedPosition.GetJsonStringBuilder();\r\n            AnnotatedPositionPool.Return(annotatedPosition);\r\n            \r\n            Assert.Null(sb);\r\n        }\r\n        \r\n        //21    9411410    .    C    T    9.51    DRAGENSnpHardQUAL    AC=2;AF=1.000;AN=2;DP=2;FS=0.000;MQ=100.00;QD=9.51;SOR=1.609    GT:AD:AF:DP:GQ:FT:F1R2:F2R1:PL:GP:PP    ./.:.:.:0:0:.:.:.    ./.:.:.:0:0:.:.:.    1/1:0,1:1.000:1:3:PASS:0,1:0,0:45,3,0:1.0415e+01,3.4301e+00,3.4199e+00:45,3,0\r\n        [Fact]\r\n        public void GetJsonString_fisherStrand()\r\n        {\r\n            const string vcfLine = \"21\\t9411410\\t.\\tC\\tT\\t9.51\\tDRAGENSnpHardQUAL\\tAC=2;AF=1.000;AN=2;DP=2;FS=0.000;MQ=100.00;QD=9.51;SOR=1.609\";\r\n\r\n            var refMinorProvider = new Mock<IRefMinorProvider>();\r\n            var seqProvider      = ParserTestUtils.GetSequenceProvider(9411410, \"C\", 'A', ChromosomeUtilities.RefNameToChromosome);\r\n            var variantFactory   = new VariantFactory(seqProvider.Sequence, new VariantId());\r\n\r\n            var position = AnnotationUtilities.ParseVcfLine(vcfLine, refMinorProvider.Object, seqProvider, null, variantFactory);\r\n\r\n            IVariant[]          variants          = GetVariants();\r\n            IAnnotatedVariant[] annotatedVariants = Annotator.GetAnnotatedVariants(variants);\r\n            var                 annotatedPosition = AnnotatedPositionPool.Get(position, annotatedVariants);\r\n\r\n            var sb             = annotatedPosition.GetJsonStringBuilder();\r\n            var observedResult = sb.ToString();\r\n            StringBuilderPool.Return(sb);\r\n            AnnotatedPositionPool.Return(annotatedPosition);\r\n\r\n            Assert.NotNull(observedResult);\r\n            Assert.Contains(\"\\\"fisherStrandBias\\\":0\", observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetJsonString_StrelkaSomatic()\r\n        {\r\n            const string vcfLine = \"chr1\t13813\t.\tT\tG\t.\tLowQscore\tSOMATIC;QSS=33;TQSS=1;NT=ref;QSS_NT=16;TQSS_NT=1;SGT=TT->GT;DP=266;MQ=23.89;MQ0=59;ALTPOS=69;ALTMAP=37;ReadPosRankSum=1.22;SNVSB=5.92;PNOISE=0.00;PNOISE2=0.00;VQSR=1.93;FS=12.123\";\r\n\r\n            var refMinorProvider = new Mock<IRefMinorProvider>();\r\n            var seqProvider = ParserTestUtils.GetSequenceProvider(13813, \"T\", 'C', ChromosomeUtilities.RefNameToChromosome);\r\n            var variantFactory = new VariantFactory(seqProvider.Sequence, new VariantId());\r\n            \r\n            var position = AnnotationUtilities.ParseVcfLine(vcfLine, refMinorProvider.Object, seqProvider, null, variantFactory);\r\n\r\n            IVariant[]          variants          = GetVariants();\r\n            IAnnotatedVariant[] annotatedVariants = Annotator.GetAnnotatedVariants(variants);\r\n            var                 annotatedPosition = AnnotatedPositionPool.Get(position, annotatedVariants);\r\n\r\n            var sb             = annotatedPosition.GetJsonStringBuilder();\r\n            var observedResult = sb.ToString();\r\n            StringBuilderPool.Return(sb);\r\n            AnnotatedPositionPool.Return(annotatedPosition);\r\n\r\n            Assert.NotNull(observedResult);\r\n            Assert.Contains(\"\\\"jointSomaticNormalQuality\\\":16\", observedResult);\r\n            Assert.Contains(\"\\\"recalibratedQuality\\\":1.93\", observedResult);\r\n            Assert.Contains(\"\\\"mappingQuality\\\":23.89\", observedResult);\r\n            Assert.Contains(\"\\\"fisherStrandBias\\\":12.123\", observedResult);\r\n        }\r\n        \r\n        [Fact]\r\n        public void GetJsonString_custom_info()\r\n        {\r\n            const string vcfLine = \"chr1\t13813\t.\tT\tG\t.\tLowQscore\tSOMATIC;QSS=33;TQSS=1;NT=ref;QSS_NT=16;TQSS_NT=1;SGT=TT->GT;DP=266;MQ=23.89;MQ0=59;ALTPOS=69;ALTMAP=37;ReadPosRankSum=1.22;SNVSB=5.92;PNOISE=0.00;PNOISE2=0.00;VQSR=1.93;FS=12.123\";\r\n\r\n            var refMinorProvider = new Mock<IRefMinorProvider>();\r\n            var seqProvider      = ParserTestUtils.GetSequenceProvider(13813, \"T\", 'C', ChromosomeUtilities.RefNameToChromosome);\r\n            var variantFactory   = new VariantFactory(seqProvider.Sequence, new VariantId());\r\n\r\n            var customInfoKeys = new HashSet<string>() {\"SGT\",\"SOMATIC\" };\r\n\r\n            var position = AnnotationUtilities.ParseVcfLine(vcfLine, refMinorProvider.Object, seqProvider, null, variantFactory, customInfoKeys);\r\n\r\n            IVariant[]          variants          = GetVariants();\r\n            IAnnotatedVariant[] annotatedVariants = Annotator.GetAnnotatedVariants(variants);\r\n            var                 annotatedPosition = AnnotatedPositionPool.Get(position, annotatedVariants);\r\n\r\n            var sb             = annotatedPosition.GetJsonStringBuilder();\r\n            var observedResult = sb.ToString();\r\n            StringBuilderPool.Return(sb);\r\n            AnnotatedPositionPool.Return(annotatedPosition);\r\n\r\n            Assert.NotNull(observedResult);\r\n            Assert.Contains(\"\\\"jointSomaticNormalQuality\\\":16\", observedResult);\r\n            Assert.Contains(\"\\\"recalibratedQuality\\\":1.93\",     observedResult);\r\n            Assert.Contains(\"\\\"mappingQuality\\\":23.89\",         observedResult);\r\n            Assert.Contains(\"\\\"fisherStrandBias\\\":12.123\",      observedResult);\r\n            Assert.Contains(\"vcfInfo\",                         observedResult);\r\n            Assert.Contains(\"\\\"SGT\\\":\\\"TT->GT\\\"\",               observedResult);\r\n            Assert.Contains(\"\\\"SOMATIC\\\":\\\"true\\\"\",             observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetJsonString_BreakEndEventId()\r\n        {\r\n            const string vcfLine = \"1\\t38432782\\tMantaBND:2312:0:1:0:0:0:0\\tG\\tG]6:28863899]\\t971\\tPASS\\tSVTYPE=BND;MATEID=MantaBND:2312:0:1:0:0:0:1;EVENT=MantaBND:2312:0:1:0:0:0:0;JUNCTION_QUAL=716;BND_DEPTH=52;MATE_BND_DEPTH=56\";\r\n\r\n            var refMinorProvider = new Mock<IRefMinorProvider>();\r\n            var seqProvider      = ParserTestUtils.GetSequenceProvider(38432782, \"G\", 'C', ChromosomeUtilities.RefNameToChromosome);\r\n            var variantFactory   = new VariantFactory(seqProvider.Sequence, new VariantId());\r\n\r\n            var position = AnnotationUtilities.ParseVcfLine(vcfLine, refMinorProvider.Object, seqProvider, null, variantFactory);\r\n\r\n            IVariant[]          variants          = GetVariants();\r\n            IAnnotatedVariant[] annotatedVariants = Annotator.GetAnnotatedVariants(variants);\r\n            var                 annotatedPosition = AnnotatedPositionPool.Get(position, annotatedVariants);\r\n\r\n            var sb             = annotatedPosition.GetJsonStringBuilder();\r\n            var observedResult = sb.ToString();\r\n            StringBuilderPool.Return(sb);\r\n            PositionPool.Return((Position)annotatedPosition.Position);\r\n            AnnotatedPositionPool.Return(annotatedPosition);\r\n\r\n            Assert.NotNull(observedResult);\r\n            Assert.Contains(\"\\\"breakendEventId\\\":\\\"MantaBND:2312:0:1:0:0:0:0\\\"\", observedResult);\r\n        }\r\n        \r\n        [Fact]\r\n        public void GetJsonString_LogOddsRatio()\r\n        {\r\n            const string vcfLine = \"1\\t38432782\\tMantaBND:2312:0:1:0:0:0:0\\tG\\tG]6:28863899]\\t971\\tPASS\\tSVTYPE=BND;LOD=3.1456;MATEID=MantaBND:2312:0:1:0:0:0:1;EVENT=MantaBND:2312:0:1:0:0:0:0;JUNCTION_QUAL=716;BND_DEPTH=52;MATE_BND_DEPTH=56\";\r\n\r\n            var refMinorProvider = new Mock<IRefMinorProvider>();\r\n            var seqProvider      = ParserTestUtils.GetSequenceProvider(38432782, \"G\", 'C', ChromosomeUtilities.RefNameToChromosome);\r\n            var variantFactory   = new VariantFactory(seqProvider.Sequence, new VariantId());\r\n\r\n            var position = AnnotationUtilities.ParseVcfLine(vcfLine, refMinorProvider.Object, seqProvider, null, variantFactory);\r\n\r\n            IVariant[]          variants          = GetVariants();\r\n            IAnnotatedVariant[] annotatedVariants = Annotator.GetAnnotatedVariants(variants);\r\n            var                 annotatedPosition = AnnotatedPositionPool.Get(position, annotatedVariants);\r\n\r\n            var sb             = annotatedPosition.GetJsonStringBuilder();\r\n            var observedResult = sb.ToString();\r\n            StringBuilderPool.Return(sb);\r\n            PositionPool.Return((Position)annotatedPosition.Position);\r\n            AnnotatedPositionPool.Return(annotatedPosition);\r\n\r\n            Assert.NotNull(observedResult);\r\n            Assert.Contains(\"\\\"logOddsRatio\\\":3.146\", observedResult);\r\n        }\r\n        \r\n        private static ISample[] GetSamples() => new ISample[] { Sample.EmptySample };\r\n\r\n        private static IVariant[] GetVariants()\r\n        {\r\n            var variant = new Mock<IVariant>();\r\n            variant.SetupGet(x => x.Chromosome).Returns(ChromosomeUtilities.Chr1);\r\n            variant.SetupGet(x => x.Type).Returns(VariantType.SNV);\r\n            variant.SetupGet(x => x.Start).Returns(949523);\r\n            variant.SetupGet(x => x.End).Returns(949523);\r\n            variant.SetupGet(x => x.RefAllele).Returns(\"C\");\r\n            variant.SetupGet(x => x.AltAllele).Returns(\"T\");\r\n            variant.SetupGet(x => x.Behavior).Returns(AnnotationBehavior.SmallVariants);\r\n            return new[] { variant.Object };\r\n        }\r\n\r\n        private static IPosition GetPosition(string originalChromosomeName, IVariant[] variants, ISample[] samples)\r\n        {\r\n            var vcfFields = new string[8];\r\n            vcfFields[0] = originalChromosomeName;\r\n            InfoData infoData = new InfoDataBuilder().Create();\r\n\r\n            return PositionPool.Get(ChromosomeUtilities.Chr1, 949523, 949523, \"C\", new[] {\"T\"}, null, null, variants, samples, infoData,\r\n                vcfFields, new[] { false }, false);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/VariantAnnotation/AnnotatedPositions/AnnotatedVariantTests.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing Moq;\r\nusing OptimizedCore;\r\nusing UnitTests.TestUtilities;\r\nusing VariantAnnotation.AnnotatedPositions;\r\nusing VariantAnnotation.AnnotatedPositions.Transcript;\r\nusing VariantAnnotation.Caches.DataStructures;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.Interface.Caches;\r\nusing VariantAnnotation.Pools;\r\nusing Variants;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.AnnotatedPositions\r\n{\r\n    public sealed class AnnotatedVariantTests\r\n    {\r\n        private const string OriginalChromosomeName = \"BoB\";\r\n        \r\n        [Fact]\r\n        public void GetJsonString_RefMinor_WithTranscripts()\r\n        {\r\n            IVariant variant          = GetRefMinorVariant();\r\n            var      annotatedVariant = AnnotatedVariantPool.Get(variant);\r\n\r\n            AddRegulatoryRegion(annotatedVariant);\r\n            AddTranscript(annotatedVariant);\r\n\r\n            const string expectedResult = \"{\\\"vid\\\":\\\"bob:100:G\\\",\\\"chromosome\\\":\\\"BoB\\\",\\\"begin\\\":100,\\\"end\\\":200,\\\"isReferenceMinorAllele\\\":true,\\\"refAllele\\\":\\\"A\\\",\\\"altAllele\\\":\\\"G\\\",\\\"variantType\\\":\\\"SNV\\\",\\\"linkedVids\\\":[\\\"bob:100:102:TAT\\\"],\\\"regulatoryRegions\\\":[{\\\"id\\\":\\\"7157\\\",\\\"type\\\":\\\"TF_binding_site\\\",\\\"consequence\\\":[\\\"regulatory_region_amplification\\\"]}],\\\"transcripts\\\":[]}\";\r\n            var sb = annotatedVariant.GetJsonStringBuilder(OriginalChromosomeName);\r\n            var observedResult = sb.ToString();\r\n            StringBuilderPool.Return(sb);\r\n            VariantPool.Return((Variant)variant);\r\n            AnnotatedVariantPool.Return(annotatedVariant);\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetJsonString_RecomposedSnvAfterTrimming_IsRecomposedTrue()\r\n        {\r\n            IVariant variant = VariantPool.Get(ChromosomeUtilities.Bob, 100, 200, \"A\", \"G\", VariantType.SNV, \"bob-100-A-G\", false, false, true,\r\n                new[] { \"bob-100-A-G\" }, AnnotationBehavior.SmallVariants, false);\r\n            var annotatedVariant = AnnotatedVariantPool.Get(variant);\r\n\r\n            const string expectedResult = \"{\\\"vid\\\":\\\"bob-100-A-G\\\",\\\"chromosome\\\":\\\"BoB\\\",\\\"begin\\\":100,\\\"end\\\":200,\\\"refAllele\\\":\\\"A\\\",\\\"altAllele\\\":\\\"G\\\",\\\"variantType\\\":\\\"SNV\\\",\\\"isRecomposedVariant\\\":true,\\\"linkedVids\\\":[\\\"bob-100-A-G\\\"]}\";\r\n            var sb = annotatedVariant.GetJsonStringBuilder(OriginalChromosomeName);\r\n            var observedResult = sb.ToString();\r\n            StringBuilderPool.Return(sb);\r\n            AnnotatedVariantPool.Return(annotatedVariant);\r\n            Assert.Equal(expectedResult, observedResult);\r\n            VariantPool.Return((Variant)variant);\r\n        }\r\n\r\n        private static void AddRegulatoryRegion(IAnnotatedVariant annotatedVariant)\r\n        {\r\n            var regulatoryRegion = new RegulatoryRegion(ChromosomeUtilities.Bob, 103, 104, CompactId.Convert(\"7157\"),\r\n                RegulatoryRegionType.TF_binding_site);\r\n            var consequences = new List<ConsequenceTag> { ConsequenceTag.regulatory_region_amplification };\r\n\r\n            annotatedVariant.RegulatoryRegions.Add(new AnnotatedRegulatoryRegion(regulatoryRegion, consequences));\r\n        }\r\n\r\n        private static void AddTranscript(IAnnotatedVariant annotatedVariant)\r\n        {\r\n            var annotatedTranscript = new Mock<IAnnotatedTranscript>();\r\n            annotatedTranscript.SetupGet(x => x.Transcript.Id).Returns(CompactId.Convert(\"ENST00000540021\"));\r\n            annotatedTranscript.SetupGet(x => x.Transcript.Start).Returns(966300);\r\n            annotatedTranscript.SetupGet(x => x.Transcript.End).Returns(966405);\r\n            annotatedTranscript.SetupGet(x => x.AlternateCodons).Returns(\"cAt/cGt\");\r\n\r\n            annotatedVariant.Transcripts.Add(annotatedTranscript.Object);\r\n        }\r\n\r\n        private static IVariant GetRefMinorVariant()\r\n        {\r\n            return  VariantPool.Get(ChromosomeUtilities.Bob, 100, 200, \"A\", \"G\", VariantType.SNV, \"bob:100:G\", true, false, false,\r\n                new[] { \"bob:100:102:TAT\" }, AnnotationBehavior.SmallVariants, false);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/VariantAnnotation/AnnotatedPositions/ConsequenceTests.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing Moq;\r\nusing VariantAnnotation.AnnotatedPositions.Consequence;\r\nusing VariantAnnotation.AnnotatedPositions.Transcript;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.Pools;\r\nusing Variants;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.AnnotatedPositions\r\n{\r\n    public sealed class ConsequenceTests\r\n    {\r\n        [Theory]\r\n        [InlineData(false, ConsequenceTag.upstream_gene_variant)]\r\n        [InlineData(true, ConsequenceTag.downstream_gene_variant)]\r\n        public void DetermineFlankingVariantEffects(bool isDownStreamVariant, ConsequenceTag expectedConsequence)\r\n        {\r\n            List<ConsequenceTag> observedConsequences = Consequences.DetermineFlankingVariantEffects(isDownStreamVariant);\r\n            Assert.Single(observedConsequences);\r\n            Assert.Equal(expectedConsequence, observedConsequences[0]);\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(VariantType.deletion,         true,  false, ConsequenceTag.transcript_ablation)]\r\n        [InlineData(VariantType.copy_number_gain, false, true,  ConsequenceTag.transcript_amplification)]\r\n        public void DetermineSmallVariantEffects_Tier1(VariantType variantType, bool isAblation, bool isAmplification, ConsequenceTag expectedResult)\r\n        {\r\n            var featureEffect = new Mock<IFeatureVariantEffects>();\r\n            featureEffect.Setup(x => x.Ablation()).Returns(isAblation);\r\n            featureEffect.Setup(x => x.Amplification()).Returns(isAmplification);\r\n\r\n            var variantEffect = new Mock<IVariantEffect>();\r\n\r\n            // make sure these tier 2 effects don't show up\r\n            featureEffect.Setup(x => x.Elongation()).Returns(true);\r\n            variantEffect.Setup(x => x.IsMatureMirnaVariant()).Returns(true);\r\n\r\n            var consequence = new Consequences(variantType, variantEffect.Object, featureEffect.Object);\r\n            consequence.DetermineSmallVariantEffects();\r\n\r\n            List<ConsequenceTag> observedConsequences = consequence.GetConsequences();\r\n            Assert.Contains(expectedResult, observedConsequences);\r\n        }\r\n\r\n        [Fact]\r\n        public void DetermineSmallVariantEffects_Tier2()\r\n        {\r\n            var featureEffect = new Mock<IFeatureVariantEffects>();\r\n            featureEffect.Setup(x => x.Ablation()).Returns(false);\r\n            featureEffect.Setup(x => x.Amplification()).Returns(false);\r\n\r\n            var variantEffect = new Mock<IVariantEffect>();\r\n            variantEffect.Setup(x => x.IsMatureMirnaVariant()).Returns(true);\r\n\r\n            // make sure these tier 3 effects don't show up\r\n            variantEffect.Setup(x => x.IsStartLost()).Returns(true);\r\n\r\n            var consequence = new Consequences(VariantType.SNV, variantEffect.Object, featureEffect.Object);\r\n            consequence.DetermineSmallVariantEffects();\r\n\r\n            List<ConsequenceTag> observedConsequences = consequence.GetConsequences();\r\n            Assert.Single(observedConsequences);\r\n            Assert.Equal(ConsequenceTag.mature_miRNA_variant, observedConsequences[0]);\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(VariantType.SNV,                           true)]\r\n        [InlineData(VariantType.insertion,                     true)]\r\n        [InlineData(VariantType.deletion,                      true)]\r\n        [InlineData(VariantType.indel,                         true)]\r\n        [InlineData(VariantType.MNV,                           true)]\r\n        [InlineData(VariantType.duplication,                   false)] // no change\r\n        [InlineData(VariantType.complex_structural_alteration, true)]\r\n        [InlineData(VariantType.structural_alteration,         true)]\r\n        [InlineData(VariantType.tandem_duplication,            false)] // no change\r\n        [InlineData(VariantType.translocation_breakend,        true)]\r\n        [InlineData(VariantType.inversion,                     true)]\r\n        [InlineData(VariantType.short_tandem_repeat_variation, true)]\r\n        [InlineData(VariantType.copy_number_variation,         false)] // no change\r\n        [InlineData(VariantType.copy_number_loss,              false)] // no change\r\n        [InlineData(VariantType.copy_number_gain,              false)] // no change\r\n        [InlineData(VariantType.run_of_homozygosity,           false)] // no change\r\n        public void NeedsTranscriptVariant_NoConsequences_EvaluateByVariantType(VariantType variantType, bool expectedResult)\r\n        {\r\n            var  consequences   = new List<ConsequenceTag>();\r\n            bool observedResult = Consequences.NeedsTranscriptVariant(variantType, consequences);\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n        \r\n        [Theory]\r\n        [InlineData(ConsequenceTag.transcript_ablation,      true)]  // parallel\r\n        [InlineData(ConsequenceTag.transcript_amplification, false)] // parallel, no change\r\n        public void NeedsTranscriptVariant_Tier1(ConsequenceTag consequence, bool expectedResult)\r\n        {\r\n            var  consequences   = new List<ConsequenceTag> {consequence};\r\n            bool observedResult = Consequences.NeedsTranscriptVariant(VariantType.unknown, consequences);\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void NeedsTranscriptVariant_Tier2_MatureMirnaVariant()\r\n        {\r\n            var  consequences   = new List<ConsequenceTag> {ConsequenceTag.mature_miRNA_variant};\r\n            bool observedResult = Consequences.NeedsTranscriptVariant(VariantType.unknown, consequences);\r\n            Assert.False(observedResult);\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(ConsequenceTag.splice_donor_variant,               false)]\r\n        [InlineData(ConsequenceTag.splice_acceptor_variant,            false)]\r\n        [InlineData(ConsequenceTag.stop_gained,                        false)]\r\n        [InlineData(ConsequenceTag.frameshift_variant,                 false)]\r\n        [InlineData(ConsequenceTag.stop_lost,                          false)]\r\n        [InlineData(ConsequenceTag.start_lost,                         false)]\r\n        [InlineData(ConsequenceTag.inframe_insertion,                  false)]\r\n        [InlineData(ConsequenceTag.inframe_deletion,                   false)]\r\n        [InlineData(ConsequenceTag.missense_variant,                   false)]\r\n        [InlineData(ConsequenceTag.protein_altering_variant,           false)]\r\n        [InlineData(ConsequenceTag.splice_region_variant,              false)]\r\n        [InlineData(ConsequenceTag.incomplete_terminal_codon_variant,  false)]\r\n        [InlineData(ConsequenceTag.start_retained_variant,             false)]\r\n        [InlineData(ConsequenceTag.stop_retained_variant,              false)]\r\n        [InlineData(ConsequenceTag.synonymous_variant,                 false)]\r\n        [InlineData(ConsequenceTag.coding_sequence_variant,            false)]\r\n        [InlineData(ConsequenceTag.five_prime_UTR_variant,             false)]\r\n        [InlineData(ConsequenceTag.three_prime_UTR_variant,            false)]\r\n        [InlineData(ConsequenceTag.non_coding_transcript_exon_variant, false)]\r\n        [InlineData(ConsequenceTag.intron_variant,                     false)]\r\n        [InlineData(ConsequenceTag.NMD_transcript_variant,             false)]\r\n        [InlineData(ConsequenceTag.non_coding_transcript_variant,      false)]\r\n        [InlineData(ConsequenceTag.feature_elongation,                 true)] // parallel\r\n        [InlineData(ConsequenceTag.feature_truncation,                 true)] // parallel\r\n        public void NeedsTranscriptVariant_Tier3(ConsequenceTag consequence, bool expectedResult)\r\n        {\r\n            var  consequences   = new List<ConsequenceTag> {consequence};\r\n            bool observedResult = Consequences.NeedsTranscriptVariant(VariantType.unknown, consequences);\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(ConsequenceTag.feature_elongation,                true)]  // parallel\r\n        [InlineData(ConsequenceTag.feature_truncation,                true)]  // parallel\r\n        [InlineData(ConsequenceTag.five_prime_duplicated_transcript,  false)] // child\r\n        [InlineData(ConsequenceTag.three_prime_duplicated_transcript, false)] // child\r\n        public void NeedsTranscriptVariant_Tier2_SV(ConsequenceTag consequence, bool expectedResult)\r\n        {\r\n            var  consequences   = new List<ConsequenceTag> {consequence};\r\n            bool observedResult = Consequences.NeedsTranscriptVariant(VariantType.unknown, consequences);\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(ConsequenceTag.copy_number_increase, false)] // no change\r\n        [InlineData(ConsequenceTag.copy_number_decrease, false)] // no change\r\n        [InlineData(ConsequenceTag.copy_number_change,   false)] // no change\r\n        public void NeedsTranscriptVariant_CNV(ConsequenceTag consequence, bool expectedResult)\r\n        {\r\n            var  consequences   = new List<ConsequenceTag> {consequence};\r\n            bool observedResult = Consequences.NeedsTranscriptVariant(VariantType.unknown, consequences);\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(ConsequenceTag.short_tandem_repeat_change,      true)] // parallel\r\n        [InlineData(ConsequenceTag.short_tandem_repeat_expansion,   true)] // parallel\r\n        [InlineData(ConsequenceTag.short_tandem_repeat_contraction, true)] // parallel\r\n        public void NeedsTranscriptVariant_STR(ConsequenceTag consequence, bool expectedResult)\r\n        {\r\n            var  consequences   = new List<ConsequenceTag> {consequence};\r\n            bool observedResult = Consequences.NeedsTranscriptVariant(VariantType.unknown, consequences);\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void DetermineSmallVariantEffects_Tier3()\r\n        {\r\n            var cache = new VariantEffectCache();\r\n            cache.Add(ConsequenceTag.mature_miRNA_variant, false);\r\n\r\n            cache.Add(ConsequenceTag.splice_donor_variant,               true);\r\n            cache.Add(ConsequenceTag.splice_acceptor_variant,            true);\r\n            cache.Add(ConsequenceTag.stop_gained,                        true);\r\n            cache.Add(ConsequenceTag.frameshift_variant,                 true);\r\n            cache.Add(ConsequenceTag.stop_lost,                          true);\r\n            cache.Add(ConsequenceTag.start_lost,                         true);\r\n            cache.Add(ConsequenceTag.inframe_insertion,                  true);\r\n            cache.Add(ConsequenceTag.inframe_deletion,                   true);\r\n            cache.Add(ConsequenceTag.missense_variant,                   true);\r\n            cache.Add(ConsequenceTag.protein_altering_variant,           true);\r\n            cache.Add(ConsequenceTag.splice_region_variant,              true);\r\n            cache.Add(ConsequenceTag.incomplete_terminal_codon_variant,  true);\r\n            cache.Add(ConsequenceTag.stop_retained_variant,              true);\r\n            cache.Add(ConsequenceTag.synonymous_variant,                 true);\r\n            cache.Add(ConsequenceTag.coding_sequence_variant,            true);\r\n            cache.Add(ConsequenceTag.five_prime_UTR_variant,             true);\r\n            cache.Add(ConsequenceTag.three_prime_UTR_variant,            true);\r\n            cache.Add(ConsequenceTag.non_coding_transcript_exon_variant, true);\r\n            cache.Add(ConsequenceTag.intron_variant,                     true);\r\n            cache.Add(ConsequenceTag.NMD_transcript_variant,             true);\r\n            cache.Add(ConsequenceTag.non_coding_transcript_variant,      true);\r\n\r\n            var simpleVariant = new Mock<ISimpleVariant>();\r\n            simpleVariant.SetupGet(x => x.RefAllele).Returns(\"G\");\r\n            simpleVariant.SetupGet(x => x.AltAllele).Returns(\"C\");\r\n\r\n            var positionalEffect = new TranscriptPositionalEffect {IsWithinIntron = true};\r\n            var variantEffect    = new VariantEffect(positionalEffect, simpleVariant.Object, null, null, null, null, null, null, null, null, cache);\r\n\r\n            var featureEffect = new Mock<IFeatureVariantEffects>();\r\n            featureEffect.Setup(x => x.Ablation()).Returns(false);\r\n            featureEffect.Setup(x => x.Amplification()).Returns(false);\r\n            featureEffect.Setup(x => x.Truncation()).Returns(true);\r\n            featureEffect.Setup(x => x.Elongation()).Returns(true);\r\n\r\n            var consequence = new Consequences(VariantType.SNV, variantEffect, featureEffect.Object);\r\n\r\n            consequence.DetermineSmallVariantEffects();\r\n            List<ConsequenceTag> observedConsequence = consequence.GetConsequences();\r\n            Assert.Equal(ConsequenceTag.splice_donor_variant,               observedConsequence[0]);\r\n            Assert.Equal(ConsequenceTag.splice_acceptor_variant,            observedConsequence[1]);\r\n            Assert.Equal(ConsequenceTag.stop_gained,                        observedConsequence[2]);\r\n            Assert.Equal(ConsequenceTag.frameshift_variant,                 observedConsequence[3]);\r\n            Assert.Equal(ConsequenceTag.stop_lost,                          observedConsequence[4]);\r\n            Assert.Equal(ConsequenceTag.start_lost,                         observedConsequence[5]);\r\n            Assert.Equal(ConsequenceTag.inframe_insertion,                  observedConsequence[6]);\r\n            Assert.Equal(ConsequenceTag.inframe_deletion,                   observedConsequence[7]);\r\n            Assert.Equal(ConsequenceTag.missense_variant,                   observedConsequence[8]);\r\n            Assert.Equal(ConsequenceTag.protein_altering_variant,           observedConsequence[9]);\r\n            Assert.Equal(ConsequenceTag.splice_region_variant,              observedConsequence[10]);\r\n            Assert.Equal(ConsequenceTag.incomplete_terminal_codon_variant,  observedConsequence[11]);\r\n            Assert.Equal(ConsequenceTag.stop_retained_variant,              observedConsequence[12]);\r\n            Assert.Equal(ConsequenceTag.synonymous_variant,                 observedConsequence[13]);\r\n            Assert.Equal(ConsequenceTag.coding_sequence_variant,            observedConsequence[14]);\r\n            Assert.Equal(ConsequenceTag.five_prime_UTR_variant,             observedConsequence[15]);\r\n            Assert.Equal(ConsequenceTag.three_prime_UTR_variant,            observedConsequence[16]);\r\n            Assert.Equal(ConsequenceTag.non_coding_transcript_exon_variant, observedConsequence[17]);\r\n            Assert.Equal(ConsequenceTag.intron_variant,                     observedConsequence[18]);\r\n            Assert.Equal(ConsequenceTag.NMD_transcript_variant,             observedConsequence[19]);\r\n            Assert.Equal(ConsequenceTag.non_coding_transcript_variant,      observedConsequence[20]);\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(true, true)]\r\n        [InlineData(true, false)]\r\n        [InlineData(false, true)]\r\n        [InlineData(false, false)]\r\n        public void DetermineRegulatoryVariantEffects(bool isAmplification, bool isAblation)\r\n        {\r\n            var featureEffect = new Mock<IFeatureVariantEffects>();\r\n            featureEffect.Setup(x => x.Ablation()).Returns(isAblation);\r\n            featureEffect.Setup(x => x.Amplification()).Returns(isAmplification);\r\n\r\n            var consequence = new Consequences(VariantType.unknown, null, featureEffect.Object);\r\n            consequence.DetermineRegulatoryVariantEffects();\r\n            List<ConsequenceTag> observedConsequences = consequence.GetConsequences();\r\n\r\n            Assert.Contains(ConsequenceTag.regulatory_region_variant, observedConsequences);\r\n            if (isAblation)\r\n            {\r\n                Assert.Contains(ConsequenceTag.regulatory_region_ablation, observedConsequences);\r\n            }\r\n            else\r\n            {\r\n                Assert.DoesNotContain(ConsequenceTag.regulatory_region_ablation, observedConsequences);\r\n            }\r\n\r\n            if (isAmplification)\r\n            {\r\n                Assert.Contains(ConsequenceTag.regulatory_region_amplification, observedConsequences);\r\n            }\r\n            else\r\n            {\r\n                Assert.DoesNotContain(ConsequenceTag.regulatory_region_amplification, observedConsequences);\r\n            }\r\n        }\r\n\r\n        public static IEnumerable<object[]> SvTheoryParameters()\r\n        {\r\n            yield return new object[] {VariantType.copy_number_loss,   true,  false, false, false, false, false, new[] {ConsequenceTag.transcript_ablation, ConsequenceTag.copy_number_decrease}};\r\n            yield return new object[] {VariantType.copy_number_gain,   false, true,  false, false, false, false, new[] {ConsequenceTag.transcript_amplification, ConsequenceTag.copy_number_increase}};\r\n            yield return new object[] {VariantType.deletion,           true,  false, true,  false, false, false, new[] {ConsequenceTag.transcript_ablation, ConsequenceTag.transcript_variant}};\r\n            yield return new object[] {VariantType.duplication,        false, true,  true,  false, false, false, new[] {ConsequenceTag.transcript_amplification}};\r\n            yield return new object[] {VariantType.tandem_duplication, false, false, true,  false, false, false, new[] {ConsequenceTag.feature_elongation, ConsequenceTag.transcript_variant}};\r\n            yield return new object[] {VariantType.copy_number_loss,   false, false, false, true,  false, false, new[] {ConsequenceTag.feature_truncation, ConsequenceTag.copy_number_decrease}};\r\n            yield return new object[] {VariantType.copy_number_gain,   false, false, false, false, true,  false, new[] {ConsequenceTag.five_prime_duplicated_transcript, ConsequenceTag.copy_number_increase}};\r\n            yield return new object[] {VariantType.duplication,        false, false, false, false, false, true,  new[] {ConsequenceTag.three_prime_duplicated_transcript}};\r\n        }\r\n\r\n        [Theory]\r\n        [MemberData(nameof(SvTheoryParameters))]\r\n        public void DetermineStructuralVariantEffect(VariantType variantType, bool isAblation, bool isAmplification, bool isElongation,\r\n                                                     bool isTruncation, bool isFivePrimeDuplicatedTranscript, bool isThreePrimeDuplicatedTranscript,\r\n                                                     ConsequenceTag[] expectedResults)\r\n        {\r\n            IFeatureVariantEffects featureVariantEffects = GetFeatureVariantEffects(isAblation, isAmplification, isTruncation, isElongation,\r\n                isFivePrimeDuplicatedTranscript, isThreePrimeDuplicatedTranscript);\r\n\r\n            var variant = VariantPool.Get(null, 0, 0, null, null, variantType, null, false, false, false, null, AnnotationBehavior.StructuralVariants,\r\n                true);\r\n\r\n            var consequence = new Consequences(variantType, null, featureVariantEffects);\r\n            consequence.DetermineStructuralVariantEffect(variant);\r\n            ConsequenceTag[] observedResults = consequence.GetConsequences().ToArray();\r\n\r\n            Assert.Equal(expectedResults, observedResults);\r\n            VariantPool.Return(variant);\r\n        }\r\n\r\n        private static IFeatureVariantEffects GetFeatureVariantEffects(bool isAblation,   bool isAmplification,\r\n                                                                       bool isTruncation, bool isElongation,\r\n                                                                       bool isFivePrimeDuplicatedTranscript,\r\n                                                                       bool isThreePrimeDuplicatedTranscript)\r\n        {\r\n            var featureEffectsMock = new Mock<IFeatureVariantEffects>();\r\n            featureEffectsMock.Setup(x => x.Ablation()).Returns(isAblation);\r\n            featureEffectsMock.Setup(x => x.Amplification()).Returns(isAmplification);\r\n            featureEffectsMock.Setup(x => x.Elongation()).Returns(isElongation);\r\n            featureEffectsMock.Setup(x => x.Truncation()).Returns(isTruncation);\r\n            featureEffectsMock.Setup(x => x.FivePrimeDuplicatedTranscript()).Returns(isFivePrimeDuplicatedTranscript);\r\n            featureEffectsMock.Setup(x => x.ThreePrimeDuplicatedTranscript()).Returns(isThreePrimeDuplicatedTranscript);\r\n            return featureEffectsMock.Object;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/VariantAnnotation/AnnotatedPositions/HgvsCodingNomenclatureTests.cs",
    "content": "﻿using Genome;\r\nusing Moq;\r\nusing UnitTests.TestUtilities;\r\nusing VariantAnnotation.AnnotatedPositions;\r\nusing VariantAnnotation.AnnotatedPositions.Transcript;\r\nusing VariantAnnotation.Caches.DataStructures;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing Variants;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.AnnotatedPositions\r\n{\r\n    public sealed class HgvsCodingNomenclatureTests\r\n    {\r\n        private readonly ITranscript _forwardTranscript;\r\n        private readonly ITranscript _reverseTranscript;\r\n        private readonly ITranscript _gapTranscript;\r\n\r\n        public HgvsCodingNomenclatureTests()\r\n        {\r\n            _forwardTranscript = GetForwardTranscript();\r\n            _reverseTranscript = GetReverseTranscript();\r\n            _gapTranscript     = GetGapTranscript();\r\n        }\r\n\r\n        internal static ITranscript GetForwardTranscript()\r\n        {\r\n            // get info from ENST00000343938.4 \r\n            var regions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 1260147, 1260482, 1, 336),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 1, 1260483, 1262215, 336, 337),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 2, 1262216, 1262412, 337, 533),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 2, 1262413, 1262620, 533, 534),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 3, 1262621, 1264277, 534, 2190)\r\n            };\r\n\r\n            var translation = new Mock<ITranslation>();\r\n            translation.SetupGet(x => x.CodingRegion).Returns(new CodingRegion(1262291, 1263143, 412, 1056, 645));\r\n\r\n            var transcript = new Mock<ITranscript>();\r\n            transcript.SetupGet(x => x.Id).Returns(CompactId.Convert(\"ENST00000343938\", 4));\r\n            transcript.SetupGet(x => x.Chromosome).Returns(ChromosomeUtilities.Chr1);\r\n            transcript.SetupGet(x => x.Start).Returns(1260147);\r\n            transcript.SetupGet(x => x.End).Returns(1264277);\r\n            transcript.SetupGet(x => x.Gene.OnReverseStrand).Returns(false);\r\n            transcript.SetupGet(x => x.TranscriptRegions).Returns(regions);\r\n            transcript.SetupGet(x => x.Translation).Returns(translation.Object);\r\n            transcript.SetupGet(x => x.TotalExonLength).Returns(2190);\r\n            return transcript.Object;\r\n        }\r\n\r\n        private static ITranscript GetForwardTranscriptWithoutUtr()\r\n        {\r\n            //ENST00000579622.1  chrX:70361035-70361156, non-coding, forward strand, no utr\r\n            var regions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 70361035,70361156, 1, 122)\r\n            };\r\n\r\n            var transcript = new Mock<ITranscript>();\r\n            transcript.SetupGet(x => x.Id).Returns(CompactId.Convert(\"ENST00000579622\", 1));\r\n            transcript.SetupGet(x => x.Chromosome).Returns(ChromosomeUtilities.ChrX);\r\n            transcript.SetupGet(x => x.Start).Returns(70361035);\r\n            transcript.SetupGet(x => x.End).Returns(70361156);\r\n            transcript.SetupGet(x => x.Gene.OnReverseStrand).Returns(false);\r\n            transcript.SetupGet(x => x.TranscriptRegions).Returns(regions);\r\n            transcript.SetupGet(x => x.TotalExonLength).Returns(122);\r\n            return transcript.Object;\r\n        }\r\n        internal static ITranscript GetReverseTranscript()\r\n        {\r\n            // get info from \"ENST00000423372.3\r\n            var regions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 2, 134901, 135802, 1760, 2661),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 1, 135803, 137620, 1759, 1760),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 137621, 139379, 1, 1759)\r\n            };\r\n\r\n            var translation = new Mock<ITranslation>();\r\n            translation.SetupGet(x => x.CodingRegion).Returns(new CodingRegion(138530, 139309, 71, 850, 780));\r\n\r\n            var transcript = new Mock<ITranscript>();\r\n            transcript.SetupGet(x => x.Id).Returns(CompactId.Convert(\"ENST00000423372\", 3));\r\n            transcript.SetupGet(x => x.Chromosome).Returns(ChromosomeUtilities.Chr1);\r\n            transcript.SetupGet(x => x.Start).Returns(134901);\r\n            transcript.SetupGet(x => x.End).Returns(139379);\r\n            transcript.SetupGet(x => x.Gene.OnReverseStrand).Returns(true);\r\n            transcript.SetupGet(x => x.TranscriptRegions).Returns(regions);\r\n            transcript.SetupGet(x => x.Translation).Returns(translation.Object);\r\n            transcript.SetupGet(x => x.TotalExonLength).Returns(2661);\r\n            return transcript.Object;\r\n        }\r\n\r\n        private static ITranscript GetGapTranscript()\r\n        {\r\n            //NM_000314.4\r\n            var regions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 89623195, 89623860, 1, 666),\r\n                new TranscriptRegion(TranscriptRegionType.Gap, 1, 89623861, 89623861, 666, 667),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 89623862, 89624305, 667, 1110),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 1, 89624306, 89653781, 1110, 1111),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 2, 89653782, 89653866, 1111, 1195),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 2, 89653867, 89685269, 1195, 1196),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 3, 89685270, 89685314, 1196, 1240),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 3, 89685315, 89690802, 1240, 1241),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 4, 89690803, 89690846, 1241, 1284),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 4, 89690847, 89692769, 1284, 1285),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 5, 89692770, 89693008, 1285, 1523),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 5, 89693009, 89711874, 1523, 1524),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 6, 89711875, 89712016, 1524, 1665),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 6, 89712017, 89717609, 1665, 1666),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 7, 89717610, 89717776, 1666, 1832),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 7, 89717777, 89720650, 1832, 1833),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 8, 89720651, 89720875, 1833, 2057),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 8, 89720876, 89725043, 2057, 2058),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 9, 89725044, 89728532, 2058, 5546)\r\n            };\r\n\r\n            var translation = new Mock<ITranslation>();\r\n            translation.SetupGet(x => x.CodingRegion).Returns(new CodingRegion(89624227, 89725229, 1032, 2243, 1212));\r\n            \r\n            var rnaEdits    = new IRnaEdit[3];\r\n            rnaEdits[0] = new RnaEdit(667,  667, null);\r\n            rnaEdits[1] = new RnaEdit(707,  707, \"C\");\r\n            rnaEdits[2] = new RnaEdit(5548, 5547, \"AAAAAAAAAAAAAAAAAAAAAAAAAA\");\r\n            \r\n            var transcript = new Mock<ITranscript>();\r\n            transcript.SetupGet(x => x.Id).Returns(CompactId.Convert(\"NM_000314\", 4));\r\n            transcript.SetupGet(x => x.Chromosome).Returns(ChromosomeUtilities.Chr10);\r\n            transcript.SetupGet(x => x.Start).Returns(89623195);\r\n            transcript.SetupGet(x => x.End).Returns(89728532);\r\n            transcript.SetupGet(x => x.Gene.OnReverseStrand).Returns(false);\r\n            transcript.SetupGet(x => x.TranscriptRegions).Returns(regions);\r\n            transcript.SetupGet(x => x.Translation).Returns(translation.Object);\r\n            transcript.SetupGet(x => x.TotalExonLength).Returns(5546);\r\n            transcript.SetupGet(x => x.RnaEdits).Returns(rnaEdits);\r\n            return transcript.Object;\r\n        }\r\n        \r\n        [Theory]\r\n        [InlineData(89623861, 89623861, \"T\", \"\", \"T\", VariantType.deletion, null)]\r\n        [InlineData(89623861, 89623861, \"T\", \"G\", \"T\", VariantType.SNV, null)]\r\n        [InlineData(89623901, 89623901, \"G\", \"C\", \"C\", VariantType.SNV, \"NM_000314.4:c.-326=\")]\r\n        [InlineData(89623901, 89623901, \"G\", \"T\", \"C\", VariantType.SNV, \"NM_000314.4:c.-326C>T\")]\r\n        [InlineData(89623861, 89623863, \"TGG\", \"\", \"GG\", VariantType.deletion, \"NM_000314.4:c.-365_-364del\")]\r\n        [InlineData(89623859, 89623861, \"GCT\", \"\", \"GC\", VariantType.deletion, \"NM_000314.4:c.-367_-366del\")]\r\n        [InlineData(89623860, 89623862, \"CTG\", \"\", \"CG\", VariantType.deletion, \"NM_000314.4:c.-366_-365del\")]\r\n        [InlineData(89624304, 89624308, \"CTGTA\", \"\", \"CT\", VariantType.deletion, \"NM_000314.4:c.78_79+3del\")]\r\n        [InlineData(89624308, 89624310, \"ATC\", \"\", \"ATC\", VariantType.deletion, \"NM_000314.4:c.79+3_79+5del\")]\r\n        public void GetHgvscAnnotation_in_intron_gap_substitution(int variantStart, int variantEnd, string reference, string alt,\r\n                                                                  string transcriptRef, VariantType variantType, string expectedHgvsc)\r\n        {\r\n            var (startIndex, _) =\r\n                MappedPositionUtilities.FindRegion(_gapTranscript.TranscriptRegions, variantStart);\r\n            var (endIndex, _) =\r\n                MappedPositionUtilities.FindRegion(_gapTranscript.TranscriptRegions, variantEnd);\r\n            var variant = new SimpleVariant(ChromosomeUtilities.Chr10, variantStart, variantEnd, reference, alt, variantType);\r\n            var observedHgvsc =\r\n                HgvsCodingNomenclature.GetHgvscAnnotation(_gapTranscript, variant, null, startIndex, endIndex, transcriptRef, null);\r\n            \r\n            Assert.Equal(expectedHgvsc, observedHgvsc);\r\n        }\r\n        [Fact]\r\n        public void GetHgvscAnnotation_substitution_in_3UTR()\r\n        {\r\n            var variant       = new SimpleVariant(ChromosomeUtilities.Chr1, 1260247, 1260247, \"A\", \"G\", VariantType.SNV);\r\n            var observedHgvsc = HgvsCodingNomenclature.GetHgvscAnnotation(_forwardTranscript, variant, null, 0, 0, null, null);\r\n\r\n            Assert.Equal(\"ENST00000343938.4:c.-311A>G\", observedHgvsc);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetHgvscAnnotation_substitution_in_intron_before_TSS()\r\n        {\r\n            var variant       = new SimpleVariant(ChromosomeUtilities.Chr1, 1262210, 1262210, \"C\", \"G\", VariantType.SNV);\r\n            var observedHgvsc = HgvsCodingNomenclature.GetHgvscAnnotation(_forwardTranscript, variant, null, 1, 1, null, null);\r\n\r\n            Assert.Equal(\"ENST00000343938.4:c.-75-6C>G\", observedHgvsc);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetHgvscAnnotation_insertion_in_coding_region()\r\n        {\r\n            var sequence = new Mock<ISequence>();\r\n            sequence.Setup(x => x.Substring(1262627, 1)).Returns(\"A\");\r\n\r\n            var variant       = new SimpleVariant(ChromosomeUtilities.Chr1, 1262629, 1262628, \"\", \"G\", VariantType.insertion);\r\n            var observedHgvsc = HgvsCodingNomenclature.GetHgvscAnnotation(_forwardTranscript, variant, sequence.Object, 4, 4, null, null);\r\n\r\n            Assert.Equal(\"ENST00000343938.4:c.130_131insG\", observedHgvsc);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetHgvscAnnotation_insertion_after_coding_region()\r\n        {\r\n            var sequence = new Mock<ISequence>();\r\n            sequence.Setup(x => x.Substring(1262627, 1)).Returns(\"A\");\r\n\r\n            var variant       = new SimpleVariant(ChromosomeUtilities.Chr1, 1263159, 1263158, \"\", \"G\", VariantType.insertion);\r\n            var observedHgvsc = HgvsCodingNomenclature.GetHgvscAnnotation(_forwardTranscript, variant, sequence.Object, 4, 4, null, null);\r\n\r\n            Assert.Equal(\"ENST00000343938.4:c.*15_*16insG\", observedHgvsc);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetHgvscAnnotation_duplication_in_coding_region()\r\n        {\r\n            var sequence = new Mock<ISequence>();\r\n            sequence.Setup(x => x.Substring(1262626, 2)).Returns(\"TA\");\r\n\r\n            var variant       = new SimpleVariant(ChromosomeUtilities.Chr1, 1262629, 1262628, \"\", \"TA\", VariantType.insertion);\r\n            var observedHgvsc = HgvsCodingNomenclature.GetHgvscAnnotation(_forwardTranscript, variant, sequence.Object, 4, 4, null, null);\r\n\r\n            Assert.Equal(\"ENST00000343938.4:c.129_130dup\", observedHgvsc);\r\n        }\r\n\r\n        [Fact]\r\n        public void ApplyDuplicationAdjustments_NonCoding_Reverse()\r\n        {\r\n            var regions = new ITranscriptRegion[3];\r\n            regions[0] = new TranscriptRegion(TranscriptRegionType.Exon, 2, 20976856, 20977050, 154, 348);\r\n            regions[1] = new TranscriptRegion(TranscriptRegionType.Intron, 1, 20977051, 20977054, 153, 154);\r\n            regions[2] = new TranscriptRegion(TranscriptRegionType.Exon, 1, 20977055, 20977207, 1, 153);\r\n\r\n            var observedResults = regions.ShiftDuplication(20977006, \"AACT\", true);\r\n\r\n            Assert.Equal(\"AACT\", observedResults.RefAllele);\r\n            Assert.Equal(20977009, observedResults.Start);\r\n            Assert.Equal(20977006, observedResults.End);\r\n        }\r\n\r\n        [Fact]\r\n        public void ApplyDuplicationAdjustments_Coding_Forward()\r\n        {\r\n            var regions = new ITranscriptRegion[41];\r\n            for (int i = 0; i < 22; i++)              regions[i] = new TranscriptRegion(TranscriptRegionType.Exon, 0, 107000000, 107334926, 1, 1564);\r\n            for (int i = 23; i < regions.Length; i++) regions[i] = new TranscriptRegion(TranscriptRegionType.Exon, 0, 107335162, 108000000, 1662, 1700);\r\n            regions[21] = new TranscriptRegion(TranscriptRegionType.Intron, 11, 107334926, 107335065, 1565, 1566);\r\n            regions[22] = new TranscriptRegion(TranscriptRegionType.Exon, 12, 107335066, 107335161, 1566, 1661);\r\n\r\n            var observedResults = regions.ShiftDuplication(107335068, \"AGTC\", false);\r\n\r\n            Assert.Equal(\"AGTC\", observedResults.RefAllele);\r\n            Assert.Equal(107335064, observedResults.Start);\r\n            Assert.Equal(107335067, observedResults.End);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetHgvscAnnotation_Deletion_start_before_transcript()\r\n        {\r\n            var variant       = new SimpleVariant(ChromosomeUtilities.Chr1, 1260144, 1260148, \"ATGTC\", \"\", VariantType.deletion);\r\n            var observedHgvsc = HgvsCodingNomenclature.GetHgvscAnnotation(_forwardTranscript, variant, null, -1, 0, null, null);\r\n\r\n            Assert.Null(observedHgvsc);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetHgvscAnnotation_Delin_start_from_Exon_end_in_intron()\r\n        {\r\n            var variant       = new SimpleVariant(ChromosomeUtilities.Chr1, 1262410, 1262414, \"ATGTC\", \"TG\", VariantType.indel);\r\n            var observedHgvsc = HgvsCodingNomenclature.GetHgvscAnnotation(_forwardTranscript, variant, null, 2, 3, null, null);\r\n\r\n            Assert.Equal(\"ENST00000343938.4:c.120_122+2delinsTG\", observedHgvsc);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetHgvscAnnotation_inversion_start_from_Exon_end_in_intron()\r\n        {\r\n            var variant       = new SimpleVariant(ChromosomeUtilities.Chr1, 1262410, 1262414, \"ATGTC\", \"GACAT\", VariantType.MNV);\r\n            var observedHgvsc = HgvsCodingNomenclature.GetHgvscAnnotation(_forwardTranscript, variant, null, 2, 3, null, null);\r\n\r\n            Assert.Equal(\"ENST00000343938.4:c.120_122+2inv\", observedHgvsc);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetHgvscAnnotation_Deletion_end_after_transcript()\r\n        {\r\n            var variant       = new SimpleVariant(ChromosomeUtilities.Chr1, 1260143, 1260148, \"ATGTC\", \"\", VariantType.deletion);\r\n            var observedHgvsc = HgvsCodingNomenclature.GetHgvscAnnotation(_forwardTranscript, variant, null, -1, 0, null, null);\r\n\r\n            Assert.Null(observedHgvsc);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetHgvscAnnotation_Reference_no_hgvsc()\r\n        {\r\n            var variant       = new SimpleVariant(ChromosomeUtilities.Chr1, 1260138, 1260138, \"A\", \"A\", VariantType.reference);\r\n            var observedHgvsc = HgvsCodingNomenclature.GetHgvscAnnotation(_forwardTranscript, variant, null, -1, -1, null, null);\r\n\r\n            Assert.Null(observedHgvsc);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetHgvscAnnotation_substitution_in_intron_of_reverse_gene()\r\n        {\r\n            var variant       = new SimpleVariant(ChromosomeUtilities.Chr1, 136000, 136000, \"A\", \"G\", VariantType.SNV);\r\n            var observedHgvsc = HgvsCodingNomenclature.GetHgvscAnnotation(_reverseTranscript, variant, null, 1, 1, null, null);\r\n\r\n            Assert.Equal(\"ENST00000423372.3:c.*910-198T>C\", observedHgvsc);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetHgvscAnnotation_substitution_after_stopCodon_of_reverse_gene()\r\n        {\r\n            var variant       = new SimpleVariant(ChromosomeUtilities.Chr1, 138529, 138529, \"A\", \"G\", VariantType.SNV);\r\n            var observedHgvsc = HgvsCodingNomenclature.GetHgvscAnnotation(_reverseTranscript, variant, null, 2, -1, null, null);\r\n\r\n            Assert.Equal(\"ENST00000423372.3:c.*1T>C\", observedHgvsc);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetHgvscAnnotation_deletion_of_reverse_gene()\r\n        {\r\n            var variant       = new SimpleVariant(ChromosomeUtilities.Chr1, 135802, 137619, \"ATCGTGGGTTGT\", \"\", VariantType.deletion);\r\n            var observedHgvsc = HgvsCodingNomenclature.GetHgvscAnnotation(_reverseTranscript, variant, null, 0, 1, null, null);\r\n\r\n            Assert.Equal(\"ENST00000423372.3:c.*909+2_*910del\", observedHgvsc);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetHgvscAnnotation_insertion_at_last_position()\r\n        {\r\n            var sequence = new Mock<ISequence>();\r\n            sequence.Setup(x => x.Substring(70361157-12, 12)).Returns(\"TATATATATATA\");\r\n\r\n            var variant = new SimpleVariant(ChromosomeUtilities.ChrX, 70361157, 70361156, \"\", \"ACACCAGCAGCA\", VariantType.insertion);//right shifted variant\r\n            var observedHgvsc = HgvsCodingNomenclature.GetHgvscAnnotation(GetForwardTranscriptWithoutUtr(), variant, sequence.Object, 0, 0, null, null);\r\n\r\n            Assert.Equal(\"ENST00000579622.1:n.122_123insACACCAGCAGCA\", observedHgvsc);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetHgvscAnnotation_duplication_at_last_position()\r\n        {\r\n            var sequence = new Mock<ISequence>();\r\n            sequence.Setup(x => x.Substring(70361156 - 4, 4)).Returns(\"ACAC\");\r\n\r\n            var variant = new SimpleVariant(ChromosomeUtilities.ChrX, 70361157, 70361156, \"\", \"ACAC\", VariantType.insertion);//right shifted variant\r\n            var observedHgvsc = HgvsCodingNomenclature.GetHgvscAnnotation(GetForwardTranscriptWithoutUtr(), variant, sequence.Object, 0, 0, null, null);\r\n\r\n            Assert.Equal(\"ENST00000579622.1:n.119_122dup\", observedHgvsc);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/VariantAnnotation/AnnotatedPositions/HgvsProteinNomenclatureTests.cs",
    "content": "﻿using Moq;\r\nusing UnitTests.TestDataStructures;\r\nusing UnitTests.TestUtilities;\r\nusing VariantAnnotation.AnnotatedPositions.Transcript;\r\nusing VariantAnnotation.Caches.DataStructures;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.Pools;\r\nusing VariantAnnotation.TranscriptAnnotation;\r\nusing Variants;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.AnnotatedPositions\r\n{\r\n    public sealed class HgvsProteinNomenclatureTests\r\n    {\r\n        public const string Enst00000343938GenomicSequence = \"GAGGGCGGGGCGAGGGCGGGGCGGTGGGCGGGGACGGGGCCCGCACGGCGGCTACGGCCTAGGTGAGCGGCTCGGACTCGGCGGCCGCACCTGCCCAACCCAACCCGCACGGTCCGGAAGTCGCCGAGGGGCCGGGAGCGGGAGGGGACGTCGTCCTAGAGGGCCGGAGCGGGCGGGCGGCCGAGGACCCGGCTCCCGCGCAGGACGGAGCCGTGGCTCAGGTCGGCCCCTCCCCAACACCACCCCGGGCCTCCGCCCCTTCCTGGGCCTCTCGGTGGAGCAGGGACCCGAACCGGTGCCCATCCAGTCCGGTGCCATCTGAAGCCCCCTTCCCAGGTGAGACTCGTAGCGCTCGCTCGACAGGGTCTGGTCCCACCCACAAGGCCTGGGGCGCCGTGGGGCCCCGTCTCCTGCTGGCCCCCCAGCCTGCTGTCAGCCCCCGTGCTCTGTGCTCAGGCCGCCCTCGCGCCCGGCCCTGACCTTGGGCCGTTGGGCTGCCCTGGGAAAGGCCTGGAGGTGTCCTGGGTCACCTTCCTGGGCTGGCAAGCTGCCTGCCTCCTGCACAGCCACTGCCCTTCCTGTTGTTACCGAGCCACCAGCCACAGCTCTGAGAAGCTCCTGGCAGCTTCTGTTTGCCACTGGCTCGAATCTGGGCAGGAAGGCAAGGCCCGCAGAATATCTGGTGACCAAGAAGGAAACCCCAGAGCCTCAGAGACCATCTTCTCAGTGGACAAAATTAAGGCCCGAGGAGGGGAGGGGCGTGCTGGAAGTCTATGGGACTGCATCTTTCTGAGGCCCAGGAGCAGCCATCCCCCACACCTGAAGCCCGGTGAGCTCACATCTGGGGCCTCCGCCTGGTGCCAAGCATGCAACCCAACCTGTGGGGCCTGCAACGCCAGGCTTCAGCACCCTGCAGGCACCAGTGCTCCAGCAGCCTGGGCCACGGGCTGGGCAGGGCTTGCAGCCCATGATCCCTAGTGATGAAGGGCCCAGTCCTAGGGTGCTGAGCAACCTGCCCACCTGCTCCTGGCCAGGAGCTCTCACCACGGCTGGGTGCCCTTCCCCCTCCCCCACCGATGGAGTCCCTGCAGCCAGGGAGGCCAGGACAGGGCTCCCAGCACCAACCGGCCTAGGAACCCCCAGGCCCTCTTCCTGGTCGAGGTGGAATGCAGCTGACTCTCAGGTTCCCCAGAGCAGGTGCGGGCCCGTGGGGCACCCGGGGAGACAGGGCAAGGGTGCTTGGCAACACTCACACAAAGCATGGGTGCCTGGATGTCTGTGGATCTGTGGAGTGACTATGTGAATGCCAGCAGAATCCAAAGCAGGGCCTGGGCCACTCGTGGAAGGCTCCCTAGGGCTAGTACAAGAGCCTCGTGGCAATCTTCTGAGTGGTAAAACCCATCTGTGTGGGACATGGAGTTTCAGCAACAGGAGTGAAAACACGTGTCCATCCATCCAGCAAGTGCCAGCCCTACAGCCTCTTTTCTGCTTTTGGGGATGTAGCAGTGAGGAAGATGGGGCAGCCTGCCCGGCAGCATCCCCCCACCCCCGGCCCCACCTGTCTCTGCTTTCTGCTGTGTCTGTTTTCTTGTCTAGGACTTCAGAACTTCCTGTCTTTGTTGTCATCTGACCCCACCCCAGATGGCTGCTCGCACTCCCCATGCACCCAGATAGATGGCTAGGATGGTGCTTGGCTCTCGGCAGGGGCTTAGTATTTCTCCAGCTGGTAAAAGCAGATACAGCATCTAGAGAGAGAAACAAAAACAAGAAAGCACCAGCAGAGACACCTGCTGCAGACAGCGGGGCCTAGTGGTCTGATAAAGCCAGAGGGGGCCACTCTCGGGGTCAGGGACTGACACGGAGTCAGTGGCCTGATCCACAGGAGGGGCTGTGCCAAGGTCCCTGAATGCGCAATCCTGATGAAGGGTGGGTCAGGGTGGTGTGCCTGAGAGCCTGCGGCTTGGCTGGGAGCAGAGCCAGGCAGCTCCTGGGAGGAAGCTCCATGAGGGGCATGAGTGTTCAGTGAGCGGCAATGGGATCGCAGCTATTTTGTTCCCCTCCACACACAGAAAATGAGCCACAGAGCAAGCTGACCCCAGCGACACAGCCCCCCAGCCCTACTGTATTTCCGTTCCTATCAAAAAATGGATGACTCGGAGACAGGTTTCAATCTGAAAGTCGTCCTGGTCAGTTTCAAGCAGTGTCTCGATGAGAAGGAAGAGGTCTTGCTGGACCCCTACATTGCCAGCTGGAAGGGCCTGGTCAGGTGCGTGTGCCAGGGCTGCCTCCTGAGGTGGGCGCTCCCCTGGCCCGAGTCCCATATGTGGCATCTGCCTCCCGACTGCCTGTCCCCACCAGCTTTGCTGCCCGTTTCCAGATGGGTGTGAGCCCCCGCAGGCTGGGCAGCGTCCCCTGCACCCCAGGCGGGCTGCCCCAGGCCTGGGCGAGGACTCGAGCCCCGCTCCCTTCCACAGGTTTCTGAACAGCCTGGGCACCATCTTCTCATTCATCTCCAAGGACGTGGTCTCCAAGCTGCGGATCATGGAGCGCCTCAGGGGCGGCCCGCAGAGCGAGCACTACCGCAGCCTGCAGGCCATGGTGGCCCACGAGCTGAGCAACCGGCTGGTGGACCTGGAGCGCCGCTCCCACCACCCGGAGTCTGGCTGCCGGACGGTGCTGCGCCTGCACCGCGCCCTGCACTGGCTGCAGCTGTTCCTGGAGGGCCTGCGTACCAGCCCCGAGGACGCACGCACCTCCGCGCTCTGCGCCGACTCCTACAACGCCTCGCTGGCCGCCTACCACCCCTGGGTCGTGCGCCGCGCCGTCACCGTGGCCTTCTGCACGCTGCCCACACGCGAGGTCTTCCTGGAGGCCATGAACGTGGGGCCCCCGGAGCAGGCCGTGCAGATGCTAGGCGAGGCCCTCCCCTTCATCCAGCGTGTCTACAACGTCTCCCAGAAGCTCTACGCCGAGCACTCCCTGCTGGACCTGCCCTAGGGGCGGGAAGCCAGGGCCGCACCGGCTTTCCTGCTGCAGATCTGGGCTGCGGTGGCCAGGGCCGTGAGTCCCGTGGCAGAGCCTTCTGGGCGCTGCGGGAACAGGAGATCCTCTGTCGCCCCTGTGAGCTGAGCTGGTTAGGAACCACAGACTGTGACAGAGAAGGTGGCGACCAGCCCAGAAGAGGCCCACCCTCTCGGTCCGGAACAAGACGCCTCGGCCACGGCTCCCCCTCGGCCTATTACACGCGTGCGCAGCCAGGCCTCGCCAGGGTGCGGTGCAGAGCAGAGCAGGCAGGGGTGGGGGCCGGGCCTGCAAGAGCCCGAAAGGTCGCCACCCCCTAGCCTGTGGGGTGCATCTGCGAACCAGGGTGAAGTCACAGGTCCCGGGGTGTGGAGGCTCCATCCTTTCTCCTTTCTGCCAGCCGATGTGTCCTCATCTCAGGCCCGTGCCTGGGACCCCGTGTCTGCCCAGGTGGGCAGCCTTGAGCCCAGGGGACTCAGTGCCCTCCATGCCCTGGCTGGCAGAAACCCTCAACAGCAGTCTGGGCACTGTGGGGCTCTCCCCGCCTCTCCTGCCTTGTTTGCCCCTCAGCGTGCCAGGCAGACTGGGGGCAGGACAGCCGGAAGCTGAGACCAAGGCTCCTCACAGAAGGGCCCAGGAAGTCCCCGCCCTTGGGACAGCCTCCTCCGTAGCCCCTGCACGGCACCAGTTCCCCGAGGGACGCAGCAGGCCGCCTCCCGCAGCGGCCGTGGGTCTGCACAGCCCAGCCCAGCCCAAGGCCCCCAGGAGCTGGGACTCTGCTACACCCAGTGAAATGCTGTGTCCCTTCTCCCCCGTGCCCCTTGATGCCCCCTCCCCACAGTGCTCAGGAGACCCGTGGGGCACGGAACAGGAGGGTCTGGACCCTGTGGCCCAGCCAAAGGCTACCAGACAGCCACAACCAGCCCAGCCACCATCCAGTGCCTGGGGCCTGGCCACTGGCTCTTCACAGTGGACCCCAGCACCTCGGGGTGGCAGAGGGACGGCCCCCACGGCCCAGCAGACATGCGAGCTTCCAGAGTGCAATCTATGTGATGTCTTCCAACGTTAATAAATCACACAGCCTCCCAGGAGGGAGACGCTGGGGTGCAC\";\r\n\r\n        public static ITranscript GetMockedTranscriptOnForwardStrand()\r\n        {\r\n            var mockedTranscript = new Mock<ITranscript>(); //get info from ENST00000343938.4\r\n            const int start      = 1260147;\r\n            const int end        = 1264277;\r\n\r\n            var transcriptRegions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 1260147, 1260482, 1, 336),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 1, 1260483, 1262215, 336, 337),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 2, 1262216, 1262412, 337, 533),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 2, 1262413, 1262620, 533,534),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 3, 1262621, 1264277, 534, 2190)\r\n            };\r\n\r\n            var translation = new Mock<ITranslation>();\r\n            translation.SetupGet(x => x.CodingRegion).Returns(new CodingRegion(1262291, 1263143, 412, 1056, 645));\r\n            translation.SetupGet(x => x.ProteinId).Returns(CompactId.Convert(\"ENST00000343938\", 4));\r\n            translation.SetupGet(x => x.PeptideSeq).Returns(\"MDDSETGFNLKVVLVSFKQCLDEKEEVLLDPYIASWKGLVRFLNSLGTIFSFISKDVVSKLRIMERLRGGPQSEHYRSLQAMVAHELSNRLVDLERRSHHPESGCRTVLRLHRALHWLQLFLEGLRTSPEDARTSALCADSYNASLAAYHPWVVRRAVTVAFCTLPTREVFLEAMNVGPPEQAVQMLGEALPFIQRVYNVSQKLYAEHSLLDLP\");\r\n\r\n            var gene = new Mock<IGene>();\r\n            gene.SetupGet(x => x.OnReverseStrand).Returns(false);\r\n            gene.SetupGet(x => x.EnsemblId).Returns(CompactId.Convert(\"ENSG00000224051 \"));\r\n\r\n            mockedTranscript.SetupGet(x => x.Id).Returns(CompactId.Convert(\"ENST00000343938\", 4));\r\n            mockedTranscript.SetupGet(x => x.Source).Returns(Source.Ensembl);\r\n            mockedTranscript.SetupGet(x => x.Chromosome).Returns(ChromosomeUtilities.Chr1);\r\n            mockedTranscript.SetupGet(x => x.Start).Returns(start);\r\n            mockedTranscript.SetupGet(x => x.End).Returns(end);\r\n            mockedTranscript.SetupGet(x => x.Gene).Returns(gene.Object);\r\n            mockedTranscript.SetupGet(x => x.TranscriptRegions).Returns(transcriptRegions);\r\n            mockedTranscript.SetupGet(x => x.Translation).Returns(translation.Object);\r\n            mockedTranscript.SetupGet(x => x.TotalExonLength).Returns(2190);\r\n\r\n            return mockedTranscript.Object;\r\n        }\r\n\r\n        [Fact]\r\n        public void GetHgvsProteinAnnotation_substitution()\r\n        {\r\n            var variant     = VariantPool.Get(ChromosomeUtilities.Chr1, 1262295, 1262295, \"A\", \"C\", VariantType.SNV, \"1:1262295:A>C\", false, false, false, null, AnnotationBehavior.SmallVariants, false);\r\n            var refSequence = new SimpleSequence(Enst00000343938GenomicSequence, 1260147 - 1);\r\n            var transcript  = GetMockedTranscriptOnForwardStrand();\r\n\r\n            var annotatedTranscript = FullTranscriptAnnotator.GetAnnotatedTranscript(transcript, variant, refSequence, null, null, new AminoAcids(false));\r\n\r\n            var hgvspNotation = annotatedTranscript.HgvsProtein;\r\n\r\n            Assert.Equal(\"ENST00000343938.4:p.(Asp2Ala)\", hgvspNotation);\r\n            VariantPool.Return(variant);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetHgvsProteinAnnotation_insertion()\r\n        {\r\n            var variant     = VariantPool.Get(ChromosomeUtilities.Chr1, 1262297, 1262296, \"\", \"TTC\", VariantType.insertion, \"1:1262295:T>TTTC\", false, false, false, null, AnnotationBehavior.SmallVariants, false);\r\n            var refSequence = new SimpleSequence(Enst00000343938GenomicSequence, 1260147 - 1);\r\n            var transcript  = GetMockedTranscriptOnForwardStrand();\r\n\r\n            var annotatedTranscript = FullTranscriptAnnotator.GetAnnotatedTranscript(transcript, variant, refSequence, null, null, new AminoAcids(false));\r\n\r\n            var hgvspNotation = annotatedTranscript.HgvsProtein;\r\n\r\n            Assert.Equal(\"ENST00000343938.4:p.(Asp2_Asp3insPhe)\", hgvspNotation);\r\n            VariantPool.Return(variant);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetHgvsProteinAnnotation_duplication_right_shifted()\r\n        {\r\n            var variant     = VariantPool.Get(ChromosomeUtilities.Chr1, 1262297, 1262296, \"\", \"GAC\", VariantType.insertion, \"1:1262295:T>GAC\", false, false, false, null, AnnotationBehavior.SmallVariants, false);\r\n            var refSequence = new SimpleSequence(Enst00000343938GenomicSequence, 1260147 - 1);\r\n            var transcript  = GetMockedTranscriptOnForwardStrand();\r\n\r\n            var annotatedTranscript = FullTranscriptAnnotator.GetAnnotatedTranscript(transcript, variant, refSequence, null, null, new AminoAcids(false));\r\n\r\n            var hgvspNotation = annotatedTranscript.HgvsProtein;\r\n\r\n            Assert.Equal(\"ENST00000343938.4:p.(Asp3dup)\", hgvspNotation);\r\n            VariantPool.Return(variant);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetHgvsProteinAnnotation_deletion()\r\n        {\r\n            var variant     = VariantPool.Get(ChromosomeUtilities.Chr1, 1262300, 1262302, \"TCG\", \"\", VariantType.deletion, \"1:1262300:1262302\", false, false, false, null, AnnotationBehavior.SmallVariants, false);\r\n            var refSequence = new SimpleSequence(Enst00000343938GenomicSequence, 1260147 - 1);\r\n            var transcript  = GetMockedTranscriptOnForwardStrand();\r\n\r\n            var annotatedTranscript = FullTranscriptAnnotator.GetAnnotatedTranscript(transcript, variant, refSequence, null, null, new AminoAcids(false));\r\n\r\n            var hgvspNotation = annotatedTranscript.HgvsProtein;\r\n\r\n            Assert.Equal(\"ENST00000343938.4:p.(Ser4del)\", hgvspNotation);\r\n            VariantPool.Return(variant);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetHgvsProteinAnnotation_delIns()\r\n        {\r\n            var variant = VariantPool.Get(ChromosomeUtilities.Chr1, 1262300, 1262305, \"TCGGAG\", \"GAGACA\", VariantType.indel, \"1:1262300:1262305\", false, false, false, null, AnnotationBehavior.SmallVariants, false);\r\n            var refSequence = new SimpleSequence(Enst00000343938GenomicSequence, 1260147 - 1);\r\n            var transcript = GetMockedTranscriptOnForwardStrand();\r\n\r\n            var annotatedTranscript = FullTranscriptAnnotator.GetAnnotatedTranscript(transcript, variant, refSequence, null, null, new AminoAcids(false));\r\n\r\n            var hgvspNotation = annotatedTranscript.HgvsProtein;\r\n\r\n            Assert.Equal(\"ENST00000343938.4:p.(Ser4_Glu5delinsGluThr)\", hgvspNotation);\r\n            VariantPool.Return(variant);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetHgvsProteinAnnotation_no_change()\r\n        {\r\n            var variant = VariantPool.Get(ChromosomeUtilities.Chr1, 1262300, 1262302, \"TCG\", \"AGT\", VariantType.indel, \"1:1262300:1262302\", false, false, false, null, AnnotationBehavior.SmallVariants, false);\r\n            var refSequence = new SimpleSequence(Enst00000343938GenomicSequence, 1260147 - 1);\r\n            var transcript  = GetMockedTranscriptOnForwardStrand();\r\n\r\n            var annotatedTranscript = FullTranscriptAnnotator.GetAnnotatedTranscript(transcript, variant, refSequence, null, null, new AminoAcids(false));\r\n\r\n            var hgvspNotation = annotatedTranscript.HgvsProtein;\r\n\r\n            Assert.Equal(\"ENST00000343938.4:c.10_12delinsAGT(p.(Ser4=))\", hgvspNotation);\r\n            VariantPool.Return(variant);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetHgvsProteinAnnotation_frameshift()\r\n        {\r\n            var variant = VariantPool.Get(ChromosomeUtilities.Chr1, 1262300, 1262301, \"TC\", \"\", VariantType.deletion, \"1:1262300:1262301\", false, false, false, null, AnnotationBehavior.SmallVariants, false);\r\n            var refSequence = new SimpleSequence(Enst00000343938GenomicSequence, 1260147 - 1);\r\n            var transcript  = GetMockedTranscriptOnForwardStrand();\r\n\r\n            var annotatedTranscript = FullTranscriptAnnotator.GetAnnotatedTranscript(transcript, variant, refSequence, null, null, new AminoAcids(false));\r\n\r\n            var hgvspNotation = annotatedTranscript.HgvsProtein;\r\n\r\n            Assert.Equal(\"ENST00000343938.4:p.(Ser4GlyfsTer19)\", hgvspNotation);\r\n            VariantPool.Return(variant);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetHgvsProteinAnnotation_frameshift_stop_gain()\r\n        {\r\n            var variant = VariantPool.Get(ChromosomeUtilities.Chr1, 1262313, 1262312, \"\", \"GA\", VariantType.insertion, \"1:1262333:1262332\", false, false, false, null, AnnotationBehavior.SmallVariants, false);\r\n            var refSequence = new SimpleSequence(Enst00000343938GenomicSequence, 1260147 - 1);\r\n            var transcript  = GetMockedTranscriptOnForwardStrand();\r\n\r\n            var annotatedTranscript = FullTranscriptAnnotator.GetAnnotatedTranscript(transcript, variant, refSequence, null, null, new AminoAcids(false));\r\n\r\n            var hgvspNotation = annotatedTranscript.HgvsProtein;\r\n\r\n            Assert.Equal(\"ENST00000343938.4:p.(Phe8Ter)\", hgvspNotation);\r\n            VariantPool.Return(variant);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetHgvsProteinAnnotation_extension()\r\n        {\r\n            var variant = VariantPool.Get(ChromosomeUtilities.Chr1, 1263141, 1263143, \"TAG\", \"\", VariantType.deletion, \"1:1263141:1263143\", false, false, false, null, AnnotationBehavior.SmallVariants, false);\r\n            var refSequence = new SimpleSequence(Enst00000343938GenomicSequence, 1260147 - 1);\r\n            var transcript  = GetMockedTranscriptOnForwardStrand();\r\n\r\n            var annotatedTranscript = FullTranscriptAnnotator.GetAnnotatedTranscript(transcript, variant, refSequence, null, null, new AminoAcids(false));\r\n\r\n            var hgvspNotation = annotatedTranscript.HgvsProtein;\r\n\r\n            Assert.Equal(\"ENST00000343938.4:p.(Ter215GlyextTer43)\", hgvspNotation);\r\n            VariantPool.Return(variant);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/VariantAnnotation/AnnotatedPositions/HgvsUtilitiesTests.cs",
    "content": "﻿using Genome;\r\nusing Moq;\r\nusing UnitTests.TestDataStructures;\r\nusing VariantAnnotation.AnnotatedPositions;\r\nusing VariantAnnotation.Caches.DataStructures;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.AnnotatedPositions\r\n{\r\n    public sealed class HgvsUtilitiesTests\r\n    {\r\n        [Fact]\r\n        public void ShiftAndRotateAlleles_Rotated()\r\n        {\r\n            int observedStart            = 98;\r\n            string observedRefAminoAcids = \"YYAKEV\";\r\n            string observedAltAminoAcids = \"Y\";\r\n\r\n            HgvsUtilities.ShiftAndRotateAlleles(ref observedStart, ref observedRefAminoAcids, ref observedAltAminoAcids,\r\n                \"MHYCVLSAFLILHLVTVALSLSTCSTLDMDQFMRKRIEAIRGQILSKLKLTSPPEDYPEPEEVPPEVISIYNSTRDLLQEKASRRAAACERERSDEEYYAKEVYKIDMPPFFPSENAIPPTFYRPYFRIVRFDVSAMEKNASNLVKAEFRVFRLQNPKARVPEQRIELYQILKSKDLTSPTQRYIDSKVVKTRAEGEWLSFDVTDAVHEWLHHKDRNLGFKISLHCPCCTFVPSNNYIIPNKSEELEARFAGIDGTSTYTSGDQKTIKSTRKKNSGKTPHLLLMLLPSYRLESQQTNRRKKRALDAAYCFRNVQDNCCLRPLYIDFKRDLGWKWIHEPKGYNANFCAGACPYLWSSDTQHSRVLSLYNTINPEASASPCCVSQDLEPLTILYYIGKTPKIEQLSNMIVKSCKCS\");\r\n\r\n            Assert.Equal(100,     observedStart);\r\n            Assert.Equal(\"AKEVY\", observedRefAminoAcids);\r\n            Assert.Equal(\"\",      observedAltAminoAcids);\r\n        }\r\n\r\n        [Fact]\r\n        public void Rotate3Prime_Identity_Insertion()\r\n        {\r\n            const int expectedStart            = 46;\r\n            const string expectedRefAminoAcids = \"\";\r\n            const string expectedAltAminoAcids = \"A\";\r\n\r\n            var observedResult = HgvsUtilities.Rotate3Prime(\"\", \"A\", 44,\r\n                \"MAAQVAPAAASSLGNPPPPPPSELKKAEQQQREEAGGEAAAAAAAERGEMKAAAGQESEGPAVGPPQPLGKELQDGAESNGGGGGGGAGSGGGPGAEPDLKNSNGNAGPRPALNNNLTEPPGGGGGGSSDGVGAPPHSAAAALPPPAYGFGQPYGRSPSAVAAAAAAVFHQQHGGQQSPGLAALQSGGGGGLEPYAGPQQNSHDHGFPNHQYNSYYPNRSAYPPPAPAYALSSPRGGTPGSGAAAAAGSKPPPSSSASASSSSSSFAQQRFGAMGGGGPSAAGGGTPQPTATPTLNQLLTSPSSARGYQGYPGGDYSGGPQDGGAGKGPADMASQCWGAAAAAAAAAAASGGAQQRSHHAPMSPGSSGGGGQPLARTPQPSSPMDQMGKMRPQPYGGTNPYSQQQGPPSGPQQGHGYPGQPYGSQTPQRYPMTMQGRAQSAMGGLSYTQQIPPYGQQGPSGYGQQGQTPYYNQQSPHPQQQQPPYSQQPPSQTPHAQPSYQQQPQSQPPQLQSSQPPYSQQPSQPPHQQSPAPYPSQQSTTQQHPQSQPPYSQPQAQSPYQQQQPQQPAPSTLSQQAAYPQPQSQQSQQTAYSQQRFPPPQELSQDSFGSQASSAPSMTSSKGGQEDMNLSLQSRPSSLPDLSGSIDDLPMGTEGALSPGVSTSGISSSQGEQSNPAQSPFSPHTSPHLPGIRGPSPSPVGSPASVAQSRSGPLSPAAVPGNQMPPRPPSGQSDSIMHPSMNQSSIAQDRGYMQRNPQMPQYSSPQPGSALSPRQPSGGQIHTGMGSYQQNSMGSYGPQGGQYGPQGGYPRQPNYNALPNANYPSAGMAGGINPMGAGGQMHGQPGIPPYGTLPPGRMSHASMGNRPYGPNMANMPPQVGSGMCPPPGGMNRKTQETAVAMHVAANSIQNRPPGYPNMNQGGMMGTGPPYGQGINSMAGMINPQGPPYSMGGTMANNSAGMAASPEMMGLGDVKLTPATKMNNKADGTPKTESKSKKSSSSTTTNEKITKLYELGGEPERKMWVDRYLAFTEEKAMGMTNLPAVGRKPLDLYRLYVSVKEIGGLTQVNKNKKWRELATNLNVGTSSSAASSLKKQYIQCLYAFECKIERGEDPPPDIFAAADSKKSQPKIQPPSPAGSGSMQGPQTPQSTSSSMAEGGDLKPPTPASTPHSQIPPLPGMSRSNSVGIQDAFNDGSDSTFQKRNSMTPNPGYQPSMNTSDMMGRMSYEPNKDPYGSMRKAPGSDPFMSSGQGPNGGMGDPYSRAAGPGLGNVAMGPRQHYPYGGPYDRVRTEPGIGPEGNMSTGAPQPNLMPSNPDSGMYSPSRYPPQQQQQQQQRHDSYGNQFSTQGTPSGSPFPSQQTTMYQQQQQNYKRPMDGTYGPPAKRHEGEMYSVPYSTGQGQPQQQQLPPAQPQPASQQQAAQPSPQQDVYNQYGNAYPATATAATERRPAGGPQNQFPFQFGRDRVSAPPGTNAQQNMPPQMMGGPIQASAEVAQQGTMWQGRNDMTYNYANRQSTGSAPQGPAYHGVNRTDEMLHTDQRANHEGSWPSHGTRQPPYGPSAPVPPMTRPPPSNYQPPPSMQNHIPQVSSPAPLPRPMENRTSPSKSPFLHSGMKMQKAGPPVPASHIAPAPVQPPMIRRDITFPPGSVEATQPVLKQRRRLTMKDIGTPEAWRVMMSLKSGLLAESTWALDTINILLYDDNSIMTFNLSQLPGLLELLVEYFRRCLIEIFGILKEYEVGDPGQRTLLDPGRFSKVSSPAPMEGGEEEEELLGPKLEEEEEEEVVENDEEIAFSGKDKPASENSEEKLISKFDKLPVKIVQKNDPFVVDCSDKLGRVQEFDSGLLHWRIGGGDTTEHIQTHFESKTELLPSRPHAPCPPAPRKHVTTAEGTPGTTDQEGPPPDGPPEKRITATMDDMLSTRSSTLTEDGAKSSEAIKESSKFPFGISPAQSHRNIKILEDEPHSKDETPLCTLLDWQDSLAKRCVCVSNTIRSLSFVPGNDFEMSKHPGLLLILGKLILLHHKHPERKQAPLTYEKEEEQDQGVSCNKVEWWWDCLEMLRENTLVTLANISGQLDLSPYPESICLPVLDGLLHWAVCPSAEAQDPFSTLGPNAVLSPQRLVLETLSKLSIQDNNVDLILATPPFSRLEKLYSTMVRFLSDRKNPVCREMAVVLLANLAQGDSLAARAIAVQKGSIGNLLGFLEDSLAATQFQQSQASLLHMQNPPFEPTSVDMMRRAARALLALAKVDENHSEFTLYESRLLDISVSPLMNSLVSQVICDVLFLIGQS\");\r\n\r\n            Assert.Equal(expectedStart,         observedResult.Start);\r\n            Assert.Equal(expectedRefAminoAcids, observedResult.RefAminoAcids);\r\n            Assert.Equal(expectedAltAminoAcids, observedResult.AltAminoAcids);\r\n        }\r\n\r\n        [Fact]\r\n        public void Rotate3Prime_Identity_Deletion()\r\n        {\r\n            const int expectedStart            = 530;\r\n            const string expectedRefAminoAcids = \"A\";\r\n            const string expectedAltAminoAcids = \"\";\r\n\r\n            var observedResult = HgvsUtilities.Rotate3Prime(\"A\", \"\", 529,\r\n                \"MEAAAGGRGCFQPHPGLQKTLEQFHLSSMSSLGGPAAFSARWAQEAYKKESAKEAGAAAVPAPVPAATEPPPVLHLPAIQPPPPVLPGPFFMPSDRSTERCETVLEGETISCFVVGGEKRLCLPQILNSVLRDFSLQQINAVCDELHIYCSRCTADQLEILKVMGILPFSAPSCGLITKTDAERLCNALLYGGAYPPPCKKELAASLALGLELSERSVRVYHECFGKCKGLLVPELYSSPSAACIQCLDCRLMYPPHKFVVHSHKALENRTCHWGFDSANWRAYILLSQDYTGKEEQARLGRCLDDVKEKFDYGNKYKRRVPRVSSEPPASIRPKTDDTSSQSPAPSEKDKPSSWLRTLAGSSNKSLGCVHPRQRLSAFRPWSPAVSASEKELSPHLPALIRDSFYSYKSFETAVAPNVALAPPAQQKVVSSPPCAAAVSRAPEPLATCTQPRKRKLTVDTPGAPETLAPVAAPEEDKDSEAEVEVESREEFTSSLSSLSSPSFTSSSSAKDLGSPGARALPSAVPDAAAPADAPSGLEAELEHLRQALEGGLDTKEAKEKFLHEVVKMRVKQEEKLSAALQAKRSLHQELEFLRVAKKEKLREATEAKRNLRKEIERLRAENEKKMKEANESRLRLKRELEQARQARVCDKGCEAGRLRAKYSAQIEDLQVKLQHAEADREQLRADLLREREAREHLEKVVKELQEQLWPRARPEAAGSEGAAELEP\");\r\n\r\n            Assert.Equal(expectedStart,         observedResult.Start);\r\n            Assert.Equal(expectedRefAminoAcids, observedResult.RefAminoAcids);\r\n            Assert.Equal(expectedAltAminoAcids, observedResult.AltAminoAcids);\r\n        }\r\n\r\n        [Fact]\r\n        public void Rotate3Prime_Identity_WithNullAminoAcids()\r\n        {\r\n            const int expectedStart            = 55;\r\n            const string expectedRefAminoAcids = \"Q\";\r\n            const string expectedAltAminoAcids = \"*\";\r\n\r\n            var observedResult = HgvsUtilities.Rotate3Prime(expectedRefAminoAcids, expectedAltAminoAcids, expectedStart,\r\n                \"MGWDLTVKMLAGNEFQVSLSSSMSVSELKAQITQKIGVHAFQQRLAVHPSGVALQDRVPLASQGLGPGSTVLLVVDKCDEPLSILVRNNKGRSSTYEVRLTQTVAHLKQQVSGLEGVQDDLFWLTFEGKPLEDQLPLGEYGLKPLSTVFMNLRLRGGGTEPGGRS\");\r\n\r\n            Assert.Equal(expectedStart,         observedResult.Start);\r\n            Assert.Equal(expectedRefAminoAcids, observedResult.RefAminoAcids);\r\n            Assert.Equal(expectedAltAminoAcids, observedResult.AltAminoAcids);\r\n        }\r\n\r\n        [Fact]\r\n        public void IsAminoAcidDuplicate_True()\r\n        {\r\n            var observedResult = HgvsUtilities.IsAminoAcidDuplicate(85, \"P\",\r\n                \"MEAAAGGRGCFQPHPGLQKTLEQFHLSSMSSLGGPAAFSARWAQEAYKKESAKEAGAAAVPAPVPAATEPPPVLHLPAIQPPPPVLPGPFFMPSDRSTERCETVLEGETISCFVVGGEKRLCLPQILNSVLRDFSLQQINAVCDELHIYCSRCTADQLEILKVMGILPFSAPSCGLITKTDAERLCNALLYGGAYPPPCKKELAASLALGLELSERSVRVYHECFGKCKGLLVPELYSSPSAACIQCLDCRLMYPPHKFVVHSHKALENRTCHWGFDSANWRAYILLSQDYTGKEEQARLGRCLDDVKEKFDYGNKYKRRVPRVSSEPPASIRPKTDDTSSQSPAPSEKDKPSSWLRTLAGSSNKSLGCVHPRQRLSAFRPWSPAVSASEKELSPHLPALIRDSFYSYKSFETAVAPNVALAPPAQQKVVSSPPCAAAVSRAPEPLATCTQPRKRKLTVDTPGAPETLAPVAAPEEDKDSEAEVEVESREEFTSSLSSLSSPSFTSSSSAKDLGSPGARALPSAVPDAAAPADAPSGLEAELEHLRQALEGGLDTKEAKEKFLHEVVKMRVKQEEKLSAALQAKRSLHQELEFLRVAKKEKLREATEAKRNLRKEIERLRAENEKKMKEANESRLRLKRELEQARQARVCDKGCEAGRLRAKYSAQIEDLQVKLQHAEADREQLRADLLREREAREHLEKVVKELQEQLWPRARPEAAGSEGAAELEP\");\r\n            Assert.True(observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void IsAminoAcidDuplicate_False()\r\n        {\r\n            var observedResult = HgvsUtilities.IsAminoAcidDuplicate(307, \"*RX\",\r\n                \"MHYDGHVRFDLPPQGSVLARNVSTRSCPPRTSPAVDLEEEEEESSVDGKGDRKSTGLKLSKKKARRRHTDDPSKECFTLKFDLNVDIETEIVPAMKKKSLGEVLLPVFERKGIALGKVDIYLDQSNTPLSLTFEAYRFGGHYLRVKAPAKPGDEGKVEQGMKDSKSLSLPILRPAGTGPPALERVDAQSRRESLDILAPGRRRKNMSEFLGEASIPGQEPPTPSSCSLPSGSSGSTNTGDSWKNRAASRFSGFFSSGPSTSAFGREVDKMEQLEGKLHTYSLFGLPRLPRGLRFDHDSWEEEYDEDEDEDNACLRLEDSWRELIDGHEKLTRRQCHQQEAVWELLHTEASYIRKLRVIINLFLCCLLNLQESGLLCEVEAERLFSNIPEIAQLHRRLWASVMAPVLEKARRTRALLQPGDFLKGFKMFGSLFKPYIRYCMEEEGCMEYMRGLLRDNDLFRAYITWAEKHPQCQRLKLSDMLAKPHQRLTKYPLLLKSVLRKTEEPRAKEAVVAMIGSVERFIHHVNACMRQRQERQRLAAVVSRIDAYEVVESSSDEVDKLLKEFLHLDLTAPIPGASPEETRQLLLEGSLRMKEGKDSKMDVYCFLFTDLLLVTKAVKKAERTRVIRPPLLVDKIVCRELRDPGSFLLIYLNEFHSAVGAYTFQASGQALCRGWVDTIYNAQNQLQQLRAQEPPGSQQPLQSLEEEEDEQEEEEEEEEEEEEGEDSGTSAASSPTIMRKSSGSPDSQHCASDGSTETLAMVVVEPGDTLSSPEFDSGPFSSQSDETSLSTTASSATPTSELLPLGPVDGRSCSMDSAYGTLSPTSLQDFVAPGPMAELVPRAPESPRVPSPPPSPRLRRRTPVQLLSCPPHLLKSKSEASLLQLLAGAGTHGTPSAPSRSLSELCLAVPAPGIRTQGSPQEAGPSWDCRGAPSPGSGPGLVGCLAGEPAGSHRKRCGDLPSGASPRVQPEPPPGVSAQHRKLTLAQLYRIRTTLLLNSTLTASEV\");\r\n            Assert.False(observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void IsAminoAcidDuplicate_False_WhenAminoAcidsNull()\r\n        {\r\n            var observedResult = HgvsUtilities.IsAminoAcidDuplicate(307, null, null);\r\n            Assert.False(observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void IsAminoAcidDuplicate_False_StartEqualToAminoAcidLength()\r\n        {\r\n            var observedResult = HgvsUtilities.IsAminoAcidDuplicate(3, \"ABC\", \"DEF\");\r\n            Assert.False(observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetNumAminoAcidsUntilStopCodon_FirstAminoAcidIsStop()\r\n        {\r\n            const int expectedResult = -1;\r\n            var observedResult =\r\n                HgvsUtilities.GetNumAminoAcidsUntilStopCodon(\r\n                    \"RHRNRNTQTETNTETQRHRNTQKHRNKHRDTETHRNTETNTETQKHTETQKQTQRHRNTQKHTDRNKHRNTETQKYRNTQKHRNKHRDTETQKHSDAETQQHKHRNTETHRNRNTETNTETQTHRHRETQKHTETLKHSGRCPGCRGSIA\",\r\n                    \"RHRNRNTQTETNTETQRHRNTQKHRNKHRDTETHRNTQKHRNKHRDTETHRNTETNTETQKHTETHRQKQTQKHRDTEIQKHTETQKQTQRHRDTETQRRRNTATQTQKHRNTQKQKHRNKHRDTDTQTQRNTETHRNTETQWAVSRLQRLHRC\",\r\n                    37, true);\r\n\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetNumAminoAcidsUntilStopCodon_FoundExtraAminoAcids()\r\n        {\r\n            const int expectedResult = 38;\r\n            var observedResult =\r\n                HgvsUtilities.GetNumAminoAcidsUntilStopCodon(\r\n                    \"MLAEPFNWHVEYAHSGDVLGPSGLPASPGAPGTCLHNPAGSNWGPG*EVLMAGTVPAVPG*SGEGSQF*LPWSCSDSPQAGSRAHGQGPGIPLLPQGHGTQSLCRAQGSVPAAEPVPPTEDGRGLSGPEQGHRGTAPARRPPGGWQDLLLLSATLRL*RHCPQHQQ*LPARPGRLAAGCP*ETLWP*PLLSPAAGHRTRDLFPGGGDVPGGPLLRQAGCGADPAFPRPEPQGFGRPLEPAPCGAGGDHHERDRGC*RPHQLL*GVRCHSRRPPEPSDGGPHPRGHGAAPQCQQCGGCAAAQASGLPGAAGPAEGQCRRGPVPVLQ*AGAQRAAEARQLPQPDADLRSRPSAH*QPSLGGRAFHPDVWQSLGRESGLRSDLVQEPGLLCAERKALGRGAEPVPAPAARLPHRPWRPGQPCRAGQQEPVQALPALQLEGNGGTTWAPPFRQPSVRLLRLQPCAGAGRPLRPLIPYLPWPEEFLHHHRELAGLLELLDPSAGEPGP*GPTPLPWRS*EWPSVGL*VQ*RPVVLFPAAAGAAGARARAGPNAQ*LPGPQGQVPREPAGLRLVRGADL*AG**HRGHRCASRAALWPVPPGTVGGLEPRGPVPAAGHGALWLPLGPHAPVAG*RALRPTLRPGVQLPGPAGPPAAARPDPLLQHPPHACAPAAAALRRGGPGRPDLCQGDLSPGGQQQLRPGAAGHGCRRAHSLPLPTVTHWPGWRAAGRADHEPLPATPPHEP*PASHQPRQEGGSPGHGQDEA*DHHAGEPGGP*AQEVAHLGCPAALRPAGVVHGLRRLPGMRAPVPLARFAPVLSFARVFPPFSAPPPAQRALALQNLLSHSQAPERAGQALSRCL*PAALCIGG*MQKQGRNGVCS*EASNSGQERSLKKRPPAVTHSYQPAQHGMAPKLRRSQEET*RGGLRLIREGFLGEVILELAPGEHSEHDW*TEGCRGAQGSTLPRAKQGHWGLS*DPEGVKPLLPQLPLLLEPLHI*PLALLFTASTCSRLPSLSPPSWLCSRNSRLLPVSLLFFRLHL*RMRADNRNTVAKTRLWKGFQKSFFFFN*KKYLQR*ALAMLPRLVSNSWAQAILPSQPPRVLGLQV*ATAPSPRNLSAVWSSISHLMTCSAWGGGVSFPQLPQGGPLPSAAPLSC*PSSRKHTGCR*SGHSRDPQFKRVISISGDSRMGVSALNSPSCFTRKDPVKSPTEVTAH*RGERWSIE*HWAIQAALLPPDRS*ASLAGGLPTAFSGARLAGDGAAARPSLPAPW*PRGFLSAGLSCYLSLHHELSA*DWGSKRVSSQ*A*VGDCDLEKPWASNTCFSEAPKEGSDILFKNTTKQNSQDMCSFVCSVSHNLRLGDGTLG*GRFFCLASPHLPLALWIRQI*TF*RILREGFLG*GSMAKSVSLWTVYTSRRWI*RNPGFHFQCQSETCSQAGALVHTTYSGHQQQPRPDRASLFFFFETESLSPRLEPSGEILAHYNLHLPGSGNSRA*ASRVAATTGAGQHACLIFVF**RQGFTMLPRLVSNS*AQAVYPPQPPKVLGLQA*ATAPSQNICFYTQRAPLVRTEPRCPEPGSRPPGAQHLSFYT*WAGSGEDRESWWKFHSWPRGGALAPHCRLLTAPIPAAPVPDFISLLSPRVPGPSTLPSVLQEPTPLQLQHQGERGLHMPKYPCRMKGRPALDVPFLNNSHCRRV*DVLF*LSPASDAPPICAEWVWECG*GSKCQRSTFQNQVPSANHVGKVQTWRCPCASAPTHPFSFSCVRKEKFSEPSRLVAFKLQTMICSKKRAFHHKSVHLFTTVFQAGFIKKFLTLE\",\r\n                    \"MLAEPFNWHPGMWNMLIVAMCLALLGCLQAQELQGHVSIILLGATGDLAKKYLWQGLFQLYLDEAGRGHSFSFHGAALTAPKQGQELMAKALESLSCPKDMAPSHCAEHKDQFLQLSQYRQLKTAEDYQALNKDIEAQLQHAGLREAGRIFYFSVPPFAYEDIARNINSSCRPGPGAWLRVVLEKPFGHDHFSAQQLATELGTFFQEEEMYRVDHYLGKQAVAQILPFRDQNRKALDGLWNRHHVERVEIIMKETVDAEGRTSFYEEYGVIRDVLQNHLTEVLTLVAMELPHNVSSAEAVLRHKLQVFQALRGLQRGSAVVGQYQSYSEQVRRELQKPDSFHSLTPTFAAVLVHIDNLRWEGVPFILMSGKALDERVGYARILFKNQACCVQSEKHWAAAQSQCLPRQLVFHIGHGDLGSPAVLVSRNLFRPSLPSSWKEMEGPPGLRLFGSPLSDYYAYSPVRERDAHSVLLSHIFHGRKNFFITTENLLASWNFWTPLLESLAHKAPRLYPGGAENGRLLDFEFSSGRLFFSQQQPEQLVPGPGPAPMPSDFQVLRAKYRESPLVSAWSEELISKLANDIEATAVRAVRRFGQFHLALSGGSSPVALFQQLATAHYGFPWAHTHLWLVDERCVPLSDPESNFQGLQAHLLQHVRIPYYNIHPMPVHLQQRLCAEEDQGAQIYAREISALVANSSFDLVLLGMGADGHTASLFPQSPTGLDGEQLVVLTTSPSQPHRRMSLSLPLINRAKKVAVLVMGRMKREITTLVSRVGHEPKKWPISGVLPHSGQLVWYMDYDAFLG\",\r\n                    9, true);\r\n\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetChangesAfterFrameshift_AfterFrameshift()\r\n        {\r\n            var observedResult = HgvsUtilities.GetChangesAfterFrameshift(4, \"MABCDEFGHIIIKL\", \"MABCEFGH*\");\r\n\r\n            Assert.Equal(5,   observedResult.Start);\r\n            Assert.Equal('D', observedResult.RefAminoAcid);\r\n            Assert.Equal('E', observedResult.AltAminoAcid);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetChangesAfterFrameshift_AtEndAfterFrameshift()\r\n        {\r\n            var observedResult = HgvsUtilities.GetChangesAfterFrameshift(4, \"MABCDEFGHIIIKL\", \"MABCDEFGHIIIKLL*\");\r\n\r\n            Assert.Equal(15,  observedResult.Start);\r\n            Assert.Equal('*', observedResult.RefAminoAcid);\r\n            Assert.Equal('L', observedResult.AltAminoAcid);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetChangesAfterFrameshift_WhenStopRetained()\r\n        {\r\n            var observedResult = HgvsUtilities.GetChangesAfterFrameshift(4, \"MABCDEFGHIIIKL\", \"MABCDEFGHIIIKL*\");\r\n\r\n            Assert.Equal(15,  observedResult.Start);\r\n            Assert.Equal('*', observedResult.RefAminoAcid);\r\n            Assert.Equal('*', observedResult.AltAminoAcid);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetChangesAfterFrameshift_FirstAminoAcidIsStop()\r\n        {\r\n            var observedResult = HgvsUtilities.GetChangesAfterFrameshift(4, \"MABCDEFGHIIIKL\", \"MABCDEFGHIIIKL*\");\r\n\r\n            Assert.Equal(15,  observedResult.Start);\r\n            Assert.Equal('*', observedResult.RefAminoAcid);\r\n            Assert.Equal('*', observedResult.AltAminoAcid);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetAltPeptideSequence_Genomic()\r\n        {\r\n            var refSequence                  = GetGenomicRefSequence();\r\n            var transcript                   = GetGenomicTranscript();\r\n            const int cdsBegin               = 112;\r\n            const int cdsEnd                 = 121;\r\n            const string transcriptAltAllele = \"\";\r\n\r\n            const string expectedResult = \"RHRNRNTQTETNTETQRHRNTQKHRNKHRDTETHRNTETNTETQKHTETQKQTQRHRNTQKHTDRNKHRNTETQKYRNTQKHRNKHRDTETQKHSDAETQQHKHRNTETHRNRNTETNTETQTHRHRETQKHTETLKHSGRCPGCRGSIA\";\r\n            var observedResult =\r\n                HgvsUtilities.GetAltPeptideSequence(refSequence, cdsBegin, cdsEnd, transcriptAltAllele, transcript,\r\n                    false);\r\n\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetCdnaPositionOffset_Intron_RltL_Reverse()\r\n        {\r\n            var transcript = HgvsCodingNomenclatureTests.GetReverseTranscript();\r\n            var po = HgvsUtilities.GetPositionOffset(transcript, 137619, 1,true);\r\n\r\n            Assert.NotNull(po);\r\n            Assert.Equal(2, po.Offset);\r\n            Assert.Equal(1759, po.Position);\r\n            Assert.Equal(\"*909+2\", po.Value);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetCdnaPositionOffset_Intron_ReqL_Reverse()\r\n        {\r\n            var regions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 10, 108901173, 108918171, 422, 423)\r\n            };\r\n\r\n            var translation = new Mock<ITranslation>();\r\n            translation.SetupGet(x => x.CodingRegion).Returns(new CodingRegion(108813927, 108941437, 129, 1613, 1485));\r\n\r\n            var transcript = new Mock<ITranscript>();\r\n            transcript.SetupGet(x => x.Start).Returns(108810721);\r\n            transcript.SetupGet(x => x.End).Returns(108918171);\r\n            transcript.SetupGet(x => x.Gene.OnReverseStrand).Returns(true);\r\n            transcript.SetupGet(x => x.TranscriptRegions).Returns(regions);\r\n            transcript.SetupGet(x => x.Translation).Returns(translation.Object);\r\n\r\n            var po = HgvsUtilities.GetPositionOffset(transcript.Object, 108909672, 0, true);\r\n\r\n            Assert.NotNull(po);\r\n            Assert.Equal(8500, po.Offset);\r\n            Assert.Equal(422, po.Position);\r\n            Assert.Equal(\"294+8500\", po.Value);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetCdnaPositionOffset_Intron_LltR_Reverse()\r\n        {\r\n            var transcript = HgvsCodingNomenclatureTests.GetReverseTranscript();\r\n            var po = HgvsUtilities.GetPositionOffset(transcript, 136000, 1, true);\r\n\r\n            Assert.NotNull(po);\r\n            Assert.Equal(-198, po.Offset);\r\n            Assert.Equal(1760, po.Position);\r\n            Assert.Equal(\"*910-198\", po.Value);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetCdnaPositionOffset_Intron_LeqR_Reverse()\r\n        {\r\n            var regions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 2, 134901, 135802, 1760, 2661),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 1, 135803, 137619, 1759, 1760),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 137620, 139379, 1, 1759)\r\n            };\r\n\r\n            var translation = new Mock<ITranslation>();\r\n            translation.SetupGet(x => x.CodingRegion).Returns(new CodingRegion(138530, 139309, 71, 850, 780));\r\n\r\n            var transcript = new Mock<ITranscript>();\r\n            transcript.SetupGet(x => x.Start).Returns(134901);\r\n            transcript.SetupGet(x => x.End).Returns(139379);\r\n            transcript.SetupGet(x => x.Gene.OnReverseStrand).Returns(true);\r\n            transcript.SetupGet(x => x.TranscriptRegions).Returns(regions);\r\n            transcript.SetupGet(x => x.Translation).Returns(translation.Object);\r\n\r\n            var po = HgvsUtilities.GetPositionOffset(transcript.Object, 136711, 1, true);\r\n\r\n            Assert.NotNull(po);\r\n            Assert.Equal(909, po.Offset);\r\n            Assert.Equal(1759, po.Position);\r\n            Assert.Equal(\"*909+909\", po.Value);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetCdnaPositionOffset_Gap_LeftSide_Forward()\r\n        {\r\n            var transcript = GetForwardGapTranscript();\r\n            var po         = HgvsUtilities.GetPositionOffset(transcript, 1101, 1,false);\r\n\r\n            Assert.NotNull(po);\r\n            Assert.Equal(0, po.Offset);\r\n            Assert.Equal(100, po.Position);\r\n            Assert.Equal(\"50\", po.Value);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetCdnaPositionOffset_Gap_RightSide_Forward()\r\n        {\r\n            var transcript = GetForwardGapTranscript();\r\n            var po         = HgvsUtilities.GetPositionOffset(transcript, 1102, 1,true);\r\n\r\n            Assert.NotNull(po);\r\n            Assert.Equal(0, po.Offset);\r\n            Assert.Equal(101, po.Position);\r\n            Assert.Equal(\"51\", po.Value);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetCdnaPositionOffset_Gap_LeftSide_Reverse()\r\n        {\r\n            var transcript = GetReverseGapTranscript();\r\n            var po         = HgvsUtilities.GetPositionOffset(transcript, 1102, 1,true);\r\n\r\n            Assert.NotNull(po);\r\n            Assert.Equal(0, po.Offset);\r\n            Assert.Equal(201, po.Position);\r\n            Assert.Equal(\"151\", po.Value);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetCdnaPositionOffset_Gap_RightSide_Reverse()\r\n        {\r\n            var transcript = GetReverseGapTranscript();\r\n            var po         = HgvsUtilities.GetPositionOffset(transcript, 1103, 1,false);\r\n\r\n            Assert.NotNull(po);\r\n            Assert.Equal(0, po.Offset);\r\n            Assert.Equal(200, po.Position);\r\n            Assert.Equal(\"150\", po.Value);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetCdnaPositionOffset_Intron_RltL_Forward()\r\n        {\r\n            var transcript = HgvsCodingNomenclatureTests.GetForwardTranscript();\r\n            var po = HgvsUtilities.GetPositionOffset(transcript, 1262210, 1,true);\r\n\r\n            Assert.NotNull(po);\r\n            Assert.Equal(-6, po.Offset);\r\n            Assert.Equal(337, po.Position);\r\n            Assert.Equal(\"-75-6\", po.Value);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetCdnaPositionOffset_Intron_LltR_Forward()\r\n        {\r\n            var transcript = HgvsCodingNomenclatureTests.GetForwardTranscript();\r\n            var po = HgvsUtilities.GetPositionOffset(transcript, 1260583, 1,true);\r\n\r\n            Assert.NotNull(po);\r\n            Assert.Equal(101, po.Offset);\r\n            Assert.Equal(336, po.Position);\r\n            Assert.Equal(\"-76+101\", po.Value);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetCdnaPositionOffset_Intron_LeqR_Forward()\r\n        {\r\n            var transcript = HgvsCodingNomenclatureTests.GetForwardTranscript();\r\n            var po = HgvsUtilities.GetPositionOffset(transcript, 1261349, 1,true);\r\n\r\n            Assert.NotNull(po);\r\n            Assert.Equal(867, po.Offset);\r\n            Assert.Equal(336, po.Position);\r\n            Assert.Equal(\"-76+867\", po.Value);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetCdnaPositionOffset_Exon_Forward()\r\n        {\r\n            var transcript = HgvsCodingNomenclatureTests.GetForwardTranscript();\r\n            var po = HgvsUtilities.GetPositionOffset(transcript, 1262627, 4,true);\r\n\r\n            Assert.NotNull(po);\r\n            Assert.Equal(0, po.Offset);\r\n            Assert.Equal(540, po.Position);\r\n            Assert.Equal(\"129\", po.Value);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetCdnaPositionOffset_Exon_Reverse()\r\n        {\r\n            var transcript = HgvsCodingNomenclatureTests.GetReverseTranscript();\r\n            var po = HgvsUtilities.GetPositionOffset(transcript, 137721, 2,true);\r\n\r\n            Assert.NotNull(po);\r\n            Assert.Equal(0, po.Offset);\r\n            Assert.Equal(1659, po.Position);\r\n            Assert.Equal(\"*809\", po.Value);\r\n        }\r\n        //temp skipping to run smoke tests\r\n        //[Fact]\r\n        //public void GetCdnaPositionOffset_RnaEdits()\r\n        //{\r\n        //    var transcript = GetRnaEditTranscript();\r\n        //    var positionOffset = HgvsUtilities.GetCdnaPositionOffset(transcript, 51135987, 20);\r\n\r\n        //    Assert.NotNull(positionOffset);\r\n        //    Assert.False(positionOffset.HasStopCodonNotation);\r\n        //    Assert.Equal(0, positionOffset.Offset);\r\n        //    Assert.Equal(1343, positionOffset.Position);\r\n        //    Assert.Equal(\"1343\", positionOffset.Value);\r\n        //}\r\n\r\n        [Fact]\r\n        public void GetCdnaPositionOffset_Gap_Forward_ReturnNull()\r\n        {\r\n            var regions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Gap, 1, 134901, 135802, 1760, 2661)\r\n            };\r\n\r\n            var translation = new Mock<ITranslation>();\r\n            translation.SetupGet(x => x.CodingRegion).Returns(new CodingRegion(138530, 139309, 71, 850, 780));\r\n\r\n            var transcript = new Mock<ITranscript>();\r\n            transcript.SetupGet(x => x.Start).Returns(134901);\r\n            transcript.SetupGet(x => x.End).Returns(139379);\r\n            transcript.SetupGet(x => x.Gene.OnReverseStrand).Returns(false);\r\n            transcript.SetupGet(x => x.TranscriptRegions).Returns(regions);\r\n            transcript.SetupGet(x => x.Translation).Returns(translation.Object);\r\n\r\n            var po = HgvsUtilities.GetPositionOffset(transcript.Object, 135001, 0,false);\r\n\r\n            Assert.NotNull(po);\r\n            Assert.Equal(0, po.Offset);\r\n            Assert.Equal(1760, po.Position);\r\n            Assert.Equal(\"*910\", po.Value);\r\n        }\r\n\r\n        private static ISequence GetGenomicRefSequence()\r\n        {\r\n            return new SimpleSequence(\r\n                \"AGACACAGAAACAGAAACACACAGACAGAAACAAACACAGAGACACAGAGACACAGAAACACACAGAAACACAGAAACAAACACAGAGACACAGAAACACACAGAAACACACAGAAACACAGAAACAAACACAGAGACACAGAAACACACAGAAACACAGAAACAAACACAGAGACACAGAAACACACAGAAACACACAGACAGAAACAAACACAGAAACACAGAGACACAGAAATACAGAAACACACAGAAACACAGAAACAAACACAGAGACACAGAGACACAGAAACACAGCGACGCAGAAACACAGCAACACAAACACAGAAACACAGAAACACACAGAAACAGAAACACAGAAACAAACACAGAGACACAGACACACAGACACAGAGAAACACAGAAACACACAGAAACACTGAAACACAGTGGGCGGTGTCCAGGCTGCAGAGGCTCCATCGCTGT\",\r\n                2258580);\r\n        }\r\n\r\n        private static ITranscript GetGenomicTranscript()\r\n        {\r\n            var transcriptRegions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 2258581, 2259042, 1, 462)\r\n            };\r\n\r\n            var transcript = new Mock<ITranscript>();\r\n            transcript.SetupGet(x => x.TranscriptRegions).Returns(transcriptRegions);\r\n            transcript.SetupGet(x => x.StartExonPhase).Returns(0);\r\n            transcript.SetupGet(x => x.Gene.OnReverseStrand).Returns(false);\r\n            transcript.SetupGet(x => x.Translation.CodingRegion.CdnaStart).Returns(1);\r\n            return transcript.Object;\r\n        }\r\n\r\n        private static ITranscript GetReverseGapTranscript()\r\n        {\r\n            var regions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   1, 1001, 1100, 201, 300),\r\n                new TranscriptRegion(TranscriptRegionType.Gap,    1, 1101, 1103, 200, 201),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   1, 1104, 1203, 101, 200),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 1, 1204, 1303, 100, 101),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   1, 1304, 1403, 1, 100)\r\n            };\r\n\r\n            var translation = new Mock<ITranslation>();\r\n            translation.SetupGet(x => x.CodingRegion).Returns(new CodingRegion(1051, 1353, 51, 250, 200));\r\n\r\n            var transcript = new Mock<ITranscript>();\r\n            transcript.SetupGet(x => x.Start).Returns(1001);\r\n            transcript.SetupGet(x => x.End).Returns(1403);\r\n            transcript.SetupGet(x => x.Gene.OnReverseStrand).Returns(true);\r\n            transcript.SetupGet(x => x.TranscriptRegions).Returns(regions);\r\n            transcript.SetupGet(x => x.Translation).Returns(translation.Object);\r\n\r\n            return transcript.Object;\r\n        }\r\n\r\n        private static ITranscript GetForwardGapTranscript()\r\n        {\r\n            var regions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   1, 1001, 1100, 1, 100),\r\n                new TranscriptRegion(TranscriptRegionType.Gap,    1, 1101, 1103, 100, 101),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   1, 1104, 1203, 101, 200),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 1, 1204, 1303, 200, 201),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   2, 1304, 1403, 201, 300)\r\n            };\r\n\r\n            var translation = new Mock<ITranslation>();\r\n            translation.SetupGet(x => x.CodingRegion).Returns(new CodingRegion(1051, 1353, 51, 250, 200));\r\n\r\n            var transcript = new Mock<ITranscript>();\r\n            transcript.SetupGet(x => x.Start).Returns(1001);\r\n            transcript.SetupGet(x => x.End).Returns(1403);\r\n            transcript.SetupGet(x => x.Gene.OnReverseStrand).Returns(false);\r\n            transcript.SetupGet(x => x.TranscriptRegions).Returns(regions);\r\n            transcript.SetupGet(x => x.Translation).Returns(translation.Object);\r\n\r\n            return transcript.Object;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/VariantAnnotation/AnnotatedPositions/HgvscNotationTests.cs",
    "content": "﻿using VariantAnnotation.AnnotatedPositions;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.AnnotatedPositions\r\n{\r\n    public sealed class HgvscNotationTests\r\n    {\r\n        // NM_004006.1:c.93G>T\r\n        [Fact]\r\n        public void ToString_substitution()\r\n        {\r\n            var startPosOff = new PositionOffset(93, 0, \"93\");\r\n            var endPosOff   = new PositionOffset(93, 0, \"93\");\r\n\r\n            var hgvsc = new HgvscNotation(\"G\", \"T\", \"NM_004006.1\", GenomicChange.Substitution, startPosOff, endPosOff, true);\r\n\r\n            Assert.Equal(\"NM_004006.1:c.93G>T\", hgvsc.ToString());\r\n        }\r\n\r\n        // NM_012232.1:c.19del (one nucleotide)\r\n        [Fact]\r\n        public void ToString_deletion_one_base()\r\n        {\r\n            var startPosOff = new PositionOffset(19, 0, \"19\");\r\n            var endPosOff   = new PositionOffset(19, 0, \"19\");\r\n\r\n            var hgvsc = new HgvscNotation(\"T\", \"\", \"NM_012232.1\", GenomicChange.Deletion, startPosOff, endPosOff, true);\r\n\r\n            Assert.Equal(\"NM_012232.1:c.19del\", hgvsc.ToString());\r\n        }\r\n\r\n        // NM_012232.1:c.19_21delTGC (multiple nucleotide)\r\n        [Fact]\r\n        public void ToString_deletion_multiple_base()\r\n        {\r\n            var startPosOff = new PositionOffset(19, 0, \"19\");\r\n            var endPosOff   = new PositionOffset(21, 0, \"21\");\r\n\r\n            var hgvsc = new HgvscNotation(\"TGC\", \"\", \"NM_012232.1\", GenomicChange.Deletion, startPosOff, endPosOff, true);\r\n\r\n            Assert.Equal(\"NM_012232.1:c.19_21del\", hgvsc.ToString());\r\n        }\r\n\r\n        // NM_012232.1:c.7dupT (one base duplication)\r\n        [Fact]\r\n        public void ToString_one_base_duplication()\r\n        {\r\n            var startPosOff = new PositionOffset(7, 0, \"7\");\r\n            var endPosOff   = new PositionOffset(7, 0, \"7\");\r\n\r\n            var hgvsc = new HgvscNotation(\"T\", \"T\", \"NM_012232.1\", GenomicChange.Duplication, startPosOff, endPosOff, true);\r\n\r\n            Assert.Equal(\"NM_012232.1:c.7dup\", hgvsc.ToString());\r\n        }\r\n\r\n        // NM_012232.1:c.6_8dupTGC (multi base duplication)\r\n        [Fact]\r\n        public void ToString_multi_base_duplication()\r\n        {\r\n            var startPosOff = new PositionOffset(6, 0, \"6\");\r\n            var endPosOff   = new PositionOffset(8, 0, \"8\");\r\n\r\n            var hgvsc = new HgvscNotation(\"TGC\", \"TGC\", \"NM_012232.1\", GenomicChange.Duplication, startPosOff, endPosOff, true);\r\n\r\n            Assert.Equal(\"NM_012232.1:c.6_8dup\", hgvsc.ToString());\r\n        }\r\n\r\n        // NM_012232.1:c.5756_5757insAGG (multi base insertion)\r\n        [Fact]\r\n        public void ToString_insertion()\r\n        {\r\n            var startPosOff = new PositionOffset(5756, 0, \"5756\");\r\n            var endPosOff   = new PositionOffset(5757, 0, \"5757\");\r\n\r\n            var hgvsc = new HgvscNotation(\"\", \"AGG\", \"NM_012232.1\", GenomicChange.Insertion, startPosOff, endPosOff, true);\r\n\r\n            Assert.Equal(\"NM_012232.1:c.5756_5757insAGG\", hgvsc.ToString());\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/VariantAnnotation/AnnotatedPositions/HgvsgNotationTests.cs",
    "content": "﻿using Genome;\r\nusing Intervals;\r\nusing UnitTests.TestDataStructures;\r\nusing UnitTests.TestUtilities;\r\nusing VariantAnnotation.AnnotatedPositions;\r\nusing Variants;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.AnnotatedPositions\r\n{\r\n    public sealed class HgvsgNotationTests\r\n    {\r\n        private static readonly ISequence SimpleSequence     = new SimpleSequence(\"ATCGGTGCTGACGATACCTGACGTAAGTA\");\r\n        private readonly        IInterval _referenceInterval = new Interval(0, SimpleSequence.Length);\r\n        private const           string    RefSeqAccession    = \"NC_012920.1\";\r\n\r\n        [Theory]\r\n        [InlineData(5, 5, \"G\", \"T\", VariantType.SNV, \"NC_012920.1:m.5G>T\")]\r\n        [InlineData(5, 5, \"G\", \"G\", VariantType.SNV, \"NC_012920.1:m.5=\")]\r\n        [InlineData(5, 7, \"GTG\", \"\", VariantType.deletion, \"NC_012920.1:m.5_7del\")]\r\n        [InlineData(10, 12, \"GAC\", \"\", VariantType.deletion, \"NC_012920.1:m.12_14del\")]\r\n        [InlineData(16, 15, \"\", \"GATA\", VariantType.insertion, \"NC_012920.1:m.15_16insGATA\")]\r\n        [InlineData(19, 22, \"TGAC\", \"GTCA\", VariantType.MNV, \"NC_012920.1:m.19_22inv\")]\r\n        [InlineData(10, 9, \"\", \"GAC\", VariantType.insertion, \"NC_012920.1:m.12_14dup\")]\r\n        public void GetNotation_MT(int start, int end, string referenceAllele, string altAllele, VariantType type, string expectedHgvs)\r\n        {\r\n            var    simpleVariant = new SimpleVariant(ChromosomeUtilities.ChrM, start, end, referenceAllele, altAllele, type);\r\n            string actualHgvs    = HgvsgNotation.GetNotation(RefSeqAccession, simpleVariant, SimpleSequence, _referenceInterval);\r\n            Assert.Equal(expectedHgvs, actualHgvs);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/VariantAnnotation/AnnotatedPositions/HgvspNotationTests.cs",
    "content": "using VariantAnnotation.AnnotatedPositions;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.AnnotatedPositions\r\n{\r\n    public sealed class HgvspNotationTests\r\n    {\r\n        [Fact]\r\n        //hgvs example: LRG_199p1:p.Trp24Cys\r\n        public void Missense_substitution()\r\n        {\r\n            Assert.Equal(\"LRG_199p1:p.(Trp24Cys)\", HgvspNotation.GetSubstitutionNotation(\"LRG_199p1\",24, \"Trp\", \"Cys\"));\r\n        }\r\n\r\n        [Fact]\r\n        //hgvs example: LRG_199p1:p.Trp24Ter \r\n        public void Nonsense_substitution()\r\n        {\r\n            Assert.Equal(\"LRG_199p1:p.(Trp24Ter)\", HgvspNotation.GetSubstitutionNotation(\"LRG_199p1\", 24,\"Trp\", \"Ter\"));\r\n        }\r\n\r\n        [Fact]\r\n        //hgvs example: NP_003997.1:p.Cys188=\r\n        public void Silent_substitution()\r\n        {\r\n            Assert.Equal(\"NP_003997.1:c.XXX(p.(Cys188=))\", HgvspNotation.GetSilentNotation(\"NP_003997.1:c.XXX\", 188, \"Cys\", false));\r\n        }\r\n\r\n        [Fact]\r\n        //hgvs example: LRG_199p1:p.(Met1?)\r\n        public void StartLost_due_to_substitution()\r\n        {\r\n            Assert.Equal(\"LRG_199p1:p.(Met1?)\", HgvspNotation.GetSubstitutionNotation(\"LRG_199p1\", 1, \"Met\",\"Cys\"));\r\n        }\r\n\r\n        [Fact]\r\n        // hgvs example:NP_003997.1:p.(Ala3del)\r\n        public void One_aminoAcid_deletion()\r\n        {\r\n\t\t\tAssert.Equal(\"NP_003997.1:p.(Ala3del)\", HgvspNotation.GetDeletionNotation(\"NP_003997.1\", 3, 3, \"Ala\", false));\r\n        }\r\n\r\n        [Fact]\r\n        // hgvs example:NP_003997.1:p.(Ala3_Ser5del)\r\n        public void Multiple_aminoAcid_deletion()\r\n        {\r\n            Assert.Equal(\"NP_003997.1:p.(Ala3_Ser5del)\", HgvspNotation.GetDeletionNotation(\"NP_003997.1\", 3, 5,\"AlaLysSer\", false));\r\n        }\r\n\r\n\t    [Fact]\r\n\t\t//p.Trp26Ter\r\n\t\tpublic void Deletion_gained_stop()\r\n\t    {\r\n\t\t    Assert.Equal(\"NP_003997.1:p.(Trp26Ter)\", HgvspNotation.GetDeletionNotation(\"NP_003997.1\", 26, 27, \"Trp\", true));\r\n\t\t}\r\n\r\n\t    [Fact]\r\n\t    public void Unknown_start_equals_end()\r\n\t    {\r\n\t\t\tAssert.Equal(\"NP_003997.1:p.(Arg26Cys)\", HgvspNotation.GetUnknownNotation(\"NP_003997.1\", 26, 26,  \"Arg\",\"Cys\"));\r\n\t    }\r\n\r\n\t    [Fact]\r\n\t    public void Unknown_start_not_equals_end()\r\n\t    {\r\n\t\t    Assert.Equal(\"NP_003997.1:p.(Arg26_Cys27)\", HgvspNotation.GetUnknownNotation(\"NP_003997.1\", 26, 27, \"Arg\", \"Cys\"));\r\n\t    }\r\n\r\n\t\t[Fact]\r\n        // hgvs example:NP_003997.1:p.(Ala3dup)\r\n        public void One_aminoAcid_duplication()\r\n        {\r\n            Assert.Equal(\"NP_003997.1:p.(Ala3dup)\", HgvspNotation.GetDuplicationNotation(\"NP_003997.1\", 3, 3, \"Ala\"));\r\n        }\r\n\r\n        [Fact]\r\n        // hgvs example:NP_003997.1:p.(Ala3_Ser5dup)\r\n        public void Multiple_aminoAcid_duplication()\r\n        {\r\n            Assert.Equal(\"NP_003997.1:p.(Ala3_Ser5dup)\", HgvspNotation.GetDuplicationNotation(\"NP_003997.1\",3, 5, \"AlaLysSer\"));\r\n        }\r\n\r\n        [Fact]\r\n        // hgvs example:NP_003997.1:p.(His4_Gln5insAla)\r\n        public void One_aminoAcid_insertion()\r\n        {\r\n            Assert.Equal(\"NP_003997.1:p.(His4_Gln5insAla)\", HgvspNotation.GetInsertionNotation(\"NP_003997.1\", 4, 5, \"Ala\",\"MBCHQDE\"));\r\n        }\r\n\r\n\t\t[Fact]\r\n\t\tpublic void Insert_stop_codon()\r\n\t\t{\r\n\t\t\tAssert.Equal(\"NP_003997.1:p.(Gln5Ter)\", HgvspNotation.GetInsertionNotation(\"NP_003997.1\", 4, 5, \"TerAla\", \"MBCHQDE\"));\r\n\t\t}\r\n\r\n\t    [Fact]\r\n\t    public void Insert_past_stop()\r\n\t    {\r\n\t\t\tAssert.Null(HgvspNotation.GetInsertionNotation(\"NP_003997.1\", 8, 9, \"TerAla\", \"MBCHQDE\"));\r\n\t    }\r\n\t\t\r\n\t    [Fact]\r\n        // hgvs example:NP_003997.1:p.(Lys2_Gly3insGlnSerLys)\r\n        public void Multiple_aminoAcid_insertion()\r\n        {\r\n            Assert.Equal(\"NP_003997.1:p.(Lys2_Gly3insGlnSerLys)\", HgvspNotation.GetInsertionNotation(\"NP_003997.1\", 2, 3, \"GlnSerLys\", \"MKGABC\"));\r\n        }\r\n\r\n\t    [Fact]\r\n\t    // hgvs example:NP_003997.1:p.(Lys2_Gly3insGlnSerLys)\r\n\t    public void Insertion_at_end()\r\n\t    {\r\n\t\t    Assert.Equal(\"NP_003997.1:p.(Cys6_Ter7insGlnSerLys)\", HgvspNotation.GetInsertionNotation(\"NP_003997.1\", 6, 7, \"GlnSerLys\", \"MKGABC*\"));\r\n\t    }\r\n\r\n\t\t[Fact]\r\n        // hgvs example:NP_003997.1:p.(Cys28delinsTrpVal)\r\n        public void Del_one_ins_two()\r\n        {\r\n            Assert.Equal(\"NP_003997.1:p.(Cys28delinsTrpVal)\", HgvspNotation.GetDelInsNotation(\"NP_003997.1\", 28, 28, \"Cys\", \"TrpVal\"));\r\n        }\r\n\r\n        [Fact]\r\n        // hgvs example:NP_003997.1:p.(Cys28_Lys29delinsTrp)\r\n        public void Del_two_ins_one()\r\n        {\r\n            Assert.Equal(\"NP_003997.1:p.(Cys28_Lys29delinsTrp)\", HgvspNotation.GetDelInsNotation(\"NP_003997.1\", 28, 29, \"CysLys\", \"Trp\"));\r\n        }\r\n\r\n        [Fact]\r\n        // hgvs example:NP_003997.1:p.(Pro578_Lys579delinsLeuTer)\r\n        public void Del_two_ins_stop()\r\n        {\r\n            Assert.Equal(\"NP_003997.1:p.(Pro578_Lys579delinsLeuTer)\", HgvspNotation.GetDelInsNotation(\"NP_003997.1\", 578, 579, \"ProLys\", \"LeuTer\"));\r\n        }\r\n\r\n\t    [Fact]\r\n\t\t//Pro578_Lys579 goes to TerLeu\r\n\t\tpublic void Delins_becomes_substitution_of_Ter()\r\n\t    {\r\n\t\t    Assert.Equal(\"NP_003997.1:p.(Pro578Ter)\", HgvspNotation.GetDelInsNotation(\"NP_003997.1\", 578, 579, \"ProLys\", \"TerLeu\"));\r\n\t\t}\r\n\r\n\t    [Fact]\r\n        // hgvs example:NP_003997.1:p.(Arg97ProfsTer23)\r\n        public void Frameshift_with_known_countToStop()\r\n        {\r\n            Assert.Equal(\"NP_003997.1:p.(Arg97ProfsTer23)\", HgvspNotation.GetFrameshiftNotation(\"NP_003997.1\", 97, \"Arg\", \"Pro\",23));\r\n        }\r\n\r\n        [Fact]\r\n        // hgvs example:NP_003997.1:p.(Tyr4Ter)\r\n        public void Frameshift_gains_immediate_stop()\r\n        {\r\n            Assert.Equal(\"NP_003997.1:p.(Tyr4Ter)\", HgvspNotation.GetFrameshiftNotation(\"NP_003997.1\", 4, \"Tyr\", \"TerCysIle\", -1));\r\n        }\r\n\r\n        [Fact]\r\n        // hgvs example:NP_003997.1:p.(Ile327ArgfsTer?)\r\n        public void Frameshift_unknown_countToStop()\r\n        {\r\n            Assert.Equal(\"NP_003997.1:p.(Ile327ArgfsTer?)\", HgvspNotation.GetFrameshiftNotation(\"NP_003997.1\", 327, \"Ile\", \"Arg\", -1));\r\n        }\r\n\r\n\t    [Fact]\r\n\t    \r\n\t    public void Frameshift_due_to_insertion()\r\n\t    {\r\n\t\t    Assert.Equal(\"NP_003997.1:p.(Cys3ArgfsTer40)\", HgvspNotation.GetFrameshiftNotation(\"NP_003997.1\", 3, \"Cys\", \"Arg\", 40));\r\n\t    }\r\n\r\n\t    [Fact]\r\n\t\t//NP_001263627.1:p.(Met1?)\r\n\t\tpublic void Start_lost_start_equals_end()\r\n\t    {\r\n\t\t    Assert.Equal(\"NP_001263627.1:p.?\", HgvspNotation.GetStartLostNotation(\"NP_001263627.1\"));\r\n\t\t}\r\n\r\n\t    [Fact]\r\n\t    //NP_001263627.1:p.(Met1?)\r\n\t    public void Start_lost_start_not_equals_end()\r\n\t    {\r\n\t\t    Assert.Equal(\"NP_001263627.1:p.?\", HgvspNotation.GetStartLostNotation(\"NP_001263627.1\"));\r\n\t    }\r\n\r\n\t    [Fact]\r\n\t\t// from varnom: p.Ter110Glnext*17\r\n\t\tpublic void Stop_lost_with_countToEnd()\r\n\t    {\r\n\t\t    Assert.Equal(\"NP_001263627.1:p.(Ter110GlnextTer17)\", HgvspNotation.GetExtensionNotation(\"NP_001263627.1\",110, \"Ter\", \"Gln\", 17));\r\n\t\t}\r\n\r\n\t\t//p.Ter327Argext*?\r\n\t\t[Fact]\r\n\t    public void Stop_lost_without_countToEnd()\r\n\t    {\r\n\t\t    Assert.Equal(\"NP_001263627.1:p.(Ter327ArgextTer?)\", HgvspNotation.GetExtensionNotation(\"NP_001263627.1\", 327, \"Ter\", \"Arg\", -1));\r\n\t    }\r\n\r\n\t}\r\n}\r\n"
  },
  {
    "path": "UnitTests/VariantAnnotation/AnnotatedPositions/ProteinChangeTests.cs",
    "content": "﻿using VariantAnnotation.Interface.AnnotatedPositions;\r\nusing Moq;\r\nusing VariantAnnotation.AnnotatedPositions;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.AnnotatedPositions\r\n{\r\n\tpublic sealed class ProteinChangeTests\r\n\t{\r\n\t\t[Fact]\r\n\t\tpublic void Substitution()\r\n\t\t{\r\n\t\t\tvar variantEffect = new Mock<IVariantEffect>();\r\n\t\t\tvariantEffect.Setup(x => x.IsFrameshiftVariant()).Returns(false);\r\n\r\n\t\t\tvariantEffect.Setup(x => x.IsStopRetained()).Returns(false);\r\n\t\t\tvariantEffect.Setup(x => x.IsStartLost()).Returns(false);\r\n\t\t\tvariantEffect.Setup(x => x.IsStopLost()).Returns(false);\r\n\r\n\t\t\tvar proteinChange = HgvsProteinNomenclature.GetProteinChange(5, \"A\", \"B\", \"MACTAWR\", variantEffect.Object);\r\n\r\n\t\t\tAssert.Equal(ProteinChange.Substitution, proteinChange);\r\n\t\t}\r\n\r\n\t\t[Fact]\r\n\t\tpublic void Single_base_deletion()\r\n\t\t{\r\n\t\t\tvar variantEffect = new Mock<IVariantEffect>();\r\n\t\t\tvariantEffect.Setup(x => x.IsFrameshiftVariant()).Returns(false);\r\n\r\n\t\t\tvariantEffect.Setup(x => x.IsStopRetained()).Returns(false);\r\n\t\t\tvariantEffect.Setup(x => x.IsStartLost()).Returns(false);\r\n\t\t\tvariantEffect.Setup(x => x.IsStopLost()).Returns(false);\r\n\r\n\t\t\tvar proteinChange = HgvsProteinNomenclature.GetProteinChange(5, \"A\", \"\", \"MACTAWR\", variantEffect.Object);\r\n\r\n\t\t\tAssert.Equal(ProteinChange.Deletion, proteinChange);\r\n\t\t}\r\n\r\n\t\t[Fact]\r\n\t\tpublic void Frameshift()\r\n\t\t{\r\n\t\t\tvar variantEffect = new Mock<IVariantEffect>();\r\n\t\t\tvariantEffect.Setup(x => x.IsFrameshiftVariant()).Returns(true);\r\n\r\n\t\t\tvariantEffect.Setup(x => x.IsStopRetained()).Returns(false);\r\n\t\t\tvariantEffect.Setup(x => x.IsStartLost()).Returns(false);\r\n\t\t\tvariantEffect.Setup(x => x.IsStopLost()).Returns(false);\r\n\r\n\t\t\tvar proteinChange = HgvsProteinNomenclature.GetProteinChange(5, \"A\", \"C\", \"MACTAWR\", variantEffect.Object);\r\n\r\n\t\t\tAssert.Equal(ProteinChange.Frameshift, proteinChange);\r\n\t\t}\r\n\r\n\t\t[Fact]\r\n\t\tpublic void Extension()\r\n\t\t{\r\n\t\t\tvar variantEffect = new Mock<IVariantEffect>();\r\n\t\t\tvariantEffect.Setup(x => x.IsFrameshiftVariant()).Returns(false);\r\n\r\n\t\t\tvariantEffect.Setup(x => x.IsStopRetained()).Returns(false);\r\n\t\t\tvariantEffect.Setup(x => x.IsStartLost()).Returns(false);\r\n\t\t\tvariantEffect.Setup(x => x.IsStopLost()).Returns(true);\r\n\r\n\t\t\tvar proteinChange = HgvsProteinNomenclature.GetProteinChange(5, \"*\", \"C\", \"MACTAWR\", variantEffect.Object);\r\n\r\n\t\t\tAssert.Equal(ProteinChange.Extension, proteinChange);\r\n\t\t}\r\n\r\n\t\t[Fact]\r\n\t\tpublic void Duplication()\r\n\t\t{\r\n\t\t\tvar variantEffect = new Mock<IVariantEffect>();\r\n\t\t\tvariantEffect.Setup(x => x.IsFrameshiftVariant()).Returns(false);\r\n\r\n\t\t\tvariantEffect.Setup(x => x.IsStopRetained()).Returns(false);\r\n\t\t\tvariantEffect.Setup(x => x.IsStartLost()).Returns(false);\r\n\t\t\tvariantEffect.Setup(x => x.IsStopLost()).Returns(false);\r\n\r\n\t\t\tvar proteinChange = HgvsProteinNomenclature.GetProteinChange(6, \"\", \"A\", \"MACTAWR\", variantEffect.Object);\r\n\r\n\t\t\tAssert.Equal(ProteinChange.Duplication, proteinChange);\r\n\t\t}\r\n\r\n\t\t[Fact]\r\n\t\tpublic void Insertion()\r\n\t\t{\r\n\t\t\tvar variantEffect = new Mock<IVariantEffect>();\r\n\t\t\tvariantEffect.Setup(x => x.IsFrameshiftVariant()).Returns(false);\r\n\r\n\t\t\tvariantEffect.Setup(x => x.IsStopRetained()).Returns(false);\r\n\t\t\tvariantEffect.Setup(x => x.IsStartLost()).Returns(false);\r\n\t\t\tvariantEffect.Setup(x => x.IsStopLost()).Returns(false);\r\n\r\n\t\t\tvar proteinChange = HgvsProteinNomenclature.GetProteinChange(4, \"\", \"A\", \"MACTAWR\", variantEffect.Object);\r\n\r\n\t\t\tAssert.Equal(ProteinChange.Insertion, proteinChange);\r\n\t\t}\r\n\t}\r\n}"
  },
  {
    "path": "UnitTests/VariantAnnotation/AnnotatedPositions/RegulatoryRegionAnnotatorTests.cs",
    "content": "﻿using System.Linq;\r\nusing Moq;\r\nusing UnitTests.TestUtilities;\r\nusing VariantAnnotation.AnnotatedPositions;\r\nusing VariantAnnotation.AnnotatedPositions.Transcript;\r\nusing VariantAnnotation.Caches.DataStructures;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.Interface.Caches;\r\nusing Variants;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.AnnotatedPositions\r\n{\r\n    public sealed class RegulatoryRegionAnnotatorTests\r\n    {\r\n        [Fact]\r\n        public void Annotate_Promoter()\r\n        {\r\n            var variant          = GetVariant();\r\n            var regulatoryRegion = GetRegulatoryRegion();\r\n\r\n            const ConsequenceTag expectedConsequence = ConsequenceTag.regulatory_region_variant;\r\n            var annotatedRegulatoryRegion = RegulatoryRegionAnnotator.Annotate(variant, regulatoryRegion);\r\n            var consequences = annotatedRegulatoryRegion.Consequences.ToList();\r\n\r\n            Assert.NotNull(annotatedRegulatoryRegion);\r\n            Assert.Single(consequences);\r\n            Assert.Equal(expectedConsequence, consequences[0]);\r\n        }\r\n\r\n        private static IRegulatoryRegion GetRegulatoryRegion()\r\n        {\r\n            return new RegulatoryRegion(ChromosomeUtilities.Chr1, 948000, 950401, CompactId.Convert(\"ENSR00001037666\"),\r\n                RegulatoryRegionType.promoter);\r\n        }\r\n\r\n        private static IVariant GetVariant()\r\n        {\r\n            var variant = new Mock<IVariant>();\r\n            variant.SetupGet(x => x.Type).Returns(VariantType.SNV);\r\n            variant.SetupGet(x => x.Start).Returns(949523);\r\n            variant.SetupGet(x => x.End).Returns(949523);\r\n            variant.SetupGet(x => x.Behavior).Returns(AnnotationBehavior.SmallVariants);\r\n            return variant.Object;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/VariantAnnotation/AnnotatedPositions/Transcript/AminoAcidTests.cs",
    "content": "﻿using System;\r\nusing VariantAnnotation.AnnotatedPositions.Transcript;\r\nusing VariantAnnotation.TranscriptAnnotation;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.AnnotatedPositions.Transcript\r\n{\r\n    public sealed class AminoAcidTests\r\n    {\r\n        private readonly AminoAcids _standardAminoAcids = new(false);\r\n        private readonly AminoAcids _mitoAminoAcids     = new(true);\r\n\r\n        [Fact]\r\n        public void AddUnknownAminoAcid_ExpectedResults()\r\n        {\r\n            const string aminoAcids = \"MACGYIL\";\r\n            Assert.Equal(aminoAcids + 'X', AminoAcids.AddUnknownAminoAcid(aminoAcids));\r\n        }\r\n\r\n        [Fact]\r\n        public void AddUnknownAminoAcid_SameIfStopCodon()\r\n        {\r\n            const string aminoAcids = \"*\";\r\n            Assert.Equal(aminoAcids, AminoAcids.AddUnknownAminoAcid(aminoAcids));\r\n        }\r\n\r\n        [Fact]\r\n        public void Translate_ExpectedResults()\r\n        {\r\n            SequenceChange aa = _standardAminoAcids.Translate(\"TTC\", \"CTC\");\r\n            Assert.Equal(\"F\", aa.Reference);\r\n            Assert.Equal(\"L\", aa.Alternate);\r\n        }\r\n\r\n        [Fact]\r\n        public void Translate_NullOrEmptyInput_ReturnEmpty()\r\n        {\r\n            SequenceChange aa = _standardAminoAcids.Translate(null, null);\r\n            Assert.Equal(\"\", aa.Reference);\r\n            Assert.Equal(\"\", aa.Alternate);\r\n\r\n            aa = _standardAminoAcids.Translate(\"\", \"\");\r\n            Assert.Equal(\"\", aa.Reference);\r\n            Assert.Equal(\"\", aa.Alternate);\r\n        }\r\n\r\n        [Fact]\r\n        public void Translate_NsInInput_ReturnEmpty()\r\n        {\r\n            SequenceChange aa = _standardAminoAcids.Translate(\"ANA\", \"AAA\");\r\n            Assert.Equal(\"\", aa.Reference);\r\n            Assert.Equal(\"\", aa.Alternate);\r\n\r\n            aa = _standardAminoAcids.Translate(\"AAA\", \"ANA\");\r\n            Assert.Equal(\"\", aa.Reference);\r\n            Assert.Equal(\"\", aa.Alternate);\r\n        }\r\n\r\n        [Fact]\r\n        public void ConvertAminoAcidToAbbreviation_ThrowException()\r\n        {\r\n            Assert.Throws<NotSupportedException>(delegate { AminoAcids.ConvertAminoAcidToAbbreviation('a'); });\r\n        }\r\n\r\n        [Theory]\r\n        [ClassData(typeof(StandardGeneticCodeData))]\r\n        public void ConvertTripletToAminoAcid_StandardGeneticCode(char expectedResult, string[] triplets)\r\n        {\r\n            foreach (string triplet in triplets)\r\n            {\r\n                char actualResult = _standardAminoAcids.ConvertTripletToAminoAcid(triplet);\r\n                Assert.Equal(expectedResult, actualResult);\r\n            }\r\n        }\r\n\r\n        [Theory]\r\n        [ClassData(typeof(VertebrateMitochondrialCodeData))]\r\n        public void ConvertTripletToAminoAcid_VertebrateMitochondrialCode(char expectedResult, string[] triplets)\r\n        {\r\n            foreach (string triplet in triplets)\r\n            {\r\n                char actualResult = _mitoAminoAcids.ConvertTripletToAminoAcid(triplet);\r\n                Assert.Equal(expectedResult, actualResult);\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void GetAbbreviations_ExpectedResults()\r\n        {\r\n            Assert.Equal(\"AspTyrCys\", AminoAcids.GetAbbreviations(\"DYC\"));\r\n        }\r\n\r\n        [Fact]\r\n        public void GetAbbreviations_SingleAA_ExpectedResults()\r\n        {\r\n            Assert.Equal(\"Tyr\", AminoAcids.GetAbbreviations(\"Y\"));\r\n        }\r\n\r\n        [Fact]\r\n        public void GetAbbreviations_NullOrEmptyInput_ReturnEmpty()\r\n        {\r\n            Assert.Equal(\"\", AminoAcids.GetAbbreviations(null));\r\n            Assert.Equal(\"\", AminoAcids.GetAbbreviations(\"\"));\r\n        }\r\n\r\n        [Fact]\r\n        public void TranslateBases_ExpectedResults()\r\n        {\r\n            const string expectedResult = \"RAD\";\r\n            string       actualResult   = _standardAminoAcids.TranslateBases(\"CGCGCAGAT\", true);\r\n            Assert.Equal(expectedResult, actualResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void TranslateBases_NullInput_ReturnNull()\r\n        {\r\n            Assert.Null(_standardAminoAcids.TranslateBases(null, true));\r\n        }\r\n\r\n        private sealed class StandardGeneticCodeData : TheoryData<char, string[]>\r\n        {\r\n            public StandardGeneticCodeData()\r\n            {\r\n                Add('A', new[] {\"GCT\", \"GCC\", \"GCA\", \"GCG\"});\r\n                Add('R', new[] {\"CGT\", \"CGC\", \"CGA\", \"CGG\", \"AGA\", \"AGG\"});\r\n                Add('N', new[] {\"AAT\", \"AAC\"});\r\n                Add('D', new[] {\"GAT\", \"GAC\"});\r\n                Add('C', new[] {\"TGT\", \"TGC\"});\r\n                Add('Q', new[] {\"CAA\", \"CAG\"});\r\n                Add('E', new[] {\"GAA\", \"GAG\"});\r\n                Add('G', new[] {\"GGT\", \"GGC\", \"GGA\", \"GGG\"});\r\n                Add('H', new[] {\"CAT\", \"CAC\"});\r\n                Add('I', new[] {\"ATT\", \"ATC\", \"ATA\"});\r\n                Add('L', new[] {\"CTT\", \"CTC\", \"CTA\", \"CTG\", \"TTA\", \"TTG\"});\r\n                Add('K', new[] {\"AAA\", \"AAG\"});\r\n                Add('M', new[] {\"ATG\"});\r\n                Add('F', new[] {\"TTT\", \"TTC\"});\r\n                Add('P', new[] {\"CCT\", \"CCC\", \"CCA\", \"CCG\"});\r\n                Add('S', new[] {\"TCT\", \"TCC\", \"TCA\", \"TCG\", \"AGT\", \"AGC\"});\r\n                Add('T', new[] {\"ACT\", \"ACC\", \"ACA\", \"ACG\"});\r\n                Add('W', new[] {\"TGG\"});\r\n                Add('Y', new[] {\"TAT\", \"TAC\"});\r\n                Add('V', new[] {\"GTT\", \"GTC\", \"GTA\", \"GTG\"});\r\n                Add('*', new[] {\"TAA\", \"TGA\", \"TAG\"});\r\n            }\r\n        }\r\n\r\n        private sealed class VertebrateMitochondrialCodeData : TheoryData<char, string[]>\r\n        {\r\n            public VertebrateMitochondrialCodeData()\r\n            {\r\n                Add('A', new[] {\"GCT\", \"GCC\", \"GCA\", \"GCG\"});\r\n                Add('R', new[] {\"CGT\", \"CGC\", \"CGA\", \"CGG\"});\r\n                Add('N', new[] {\"AAT\", \"AAC\"});\r\n                Add('D', new[] {\"GAT\", \"GAC\"});\r\n                Add('C', new[] {\"TGT\", \"TGC\"});\r\n                Add('Q', new[] {\"CAA\", \"CAG\"});\r\n                Add('E', new[] {\"GAA\", \"GAG\"});\r\n                Add('G', new[] {\"GGT\", \"GGC\", \"GGA\", \"GGG\"});\r\n                Add('H', new[] {\"CAT\", \"CAC\"});\r\n                Add('I', new[] {\"ATT\", \"ATC\"});\r\n                Add('L', new[] {\"CTT\", \"CTC\", \"CTA\", \"CTG\", \"TTA\", \"TTG\"});\r\n                Add('K', new[] {\"AAA\", \"AAG\"});\r\n                Add('M', new[] {\"ATG\", \"ATA\"});\r\n                Add('F', new[] {\"TTT\", \"TTC\"});\r\n                Add('P', new[] {\"CCT\", \"CCC\", \"CCA\", \"CCG\"});\r\n                Add('S', new[] {\"TCT\", \"TCC\", \"TCA\", \"TCG\", \"AGT\", \"AGC\"});\r\n                Add('T', new[] {\"ACT\", \"ACC\", \"ACA\", \"ACG\"});\r\n                Add('W', new[] {\"TGG\", \"TGA\"});\r\n                Add('Y', new[] {\"TAT\", \"TAC\"});\r\n                Add('V', new[] {\"GTT\", \"GTC\", \"GTA\", \"GTG\"});\r\n                Add('*', new[] {\"TAA\", \"TAG\", \"AGA\", \"AGG\"});\r\n            }\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/VariantAnnotation/AnnotatedPositions/Transcript/AnnotatedTranscriptTests.cs",
    "content": "﻿using OptimizedCore;\r\nusing UnitTests.TestDataStructures;\r\nusing UnitTests.TestUtilities;\r\nusing VariantAnnotation.AnnotatedPositions.Transcript;\r\nusing VariantAnnotation.Pools;\r\nusing VariantAnnotation.TranscriptAnnotation;\r\nusing Variants;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.AnnotatedPositions.Transcript\r\n{\r\n    public sealed class AnnotatedTranscriptTests\r\n    {\r\n        [Fact]\r\n        public void SerializeJson_NominalUsage()\r\n        {\r\n            var variant     = VariantPool.Get(ChromosomeUtilities.Chr1, 1263141, 1263143, \"TAG\", \"\", VariantType.deletion, \"1:1263141:1263143\", false, false, false, null, AnnotationBehavior.SmallVariants, false);\r\n            var refSequence = new SimpleSequence(HgvsProteinNomenclatureTests.Enst00000343938GenomicSequence, 1260147 - 1);\r\n            var transcript  = HgvsProteinNomenclatureTests.GetMockedTranscriptOnForwardStrand();\r\n\r\n            var annotatedTranscript = FullTranscriptAnnotator.GetAnnotatedTranscript(transcript, variant, refSequence, null, null, new AminoAcids(false));\r\n            var sb = StringBuilderPool.Get();\r\n            annotatedTranscript.SerializeJson(sb);\r\n            var jsonString = StringBuilderPool.GetStringAndReturn(sb);\r\n\r\n            Assert.Contains(\"ENST00000343938.4:p.(Ter215GlyextTer43)\", jsonString);\r\n            VariantPool.Return(variant);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/VariantAnnotation/AnnotatedPositions/Transcript/CdnaSequenceTests.cs",
    "content": "using UnitTests.TestDataStructures;\nusing VariantAnnotation.AnnotatedPositions.Transcript;\nusing VariantAnnotation.Caches.DataStructures;\nusing VariantAnnotation.Interface.AnnotatedPositions;\nusing Xunit;\n\nnamespace UnitTests.VariantAnnotation.AnnotatedPositions.Transcript\n{\n    public sealed class CdnaSequenceTests\n    {\n        [Fact]\n        public void RnaEdits_snv_forward_no_utr()\n        {\n            //NR_002754.2\n            var genomicSeq = new SimpleSequence(\"actctggtttctcttcaaatcgtataaatctttcgccttttactaaagatttccgtggagagaaacgagtgtgagtctgaaaccaattttttgaggccttgcgtttattagcagggctt\", 11968210);\n            var codingRegion = new CodingRegion(11968211, 11968329, 1, 119, 119);\n\n            var regions = new ITranscriptRegion[]\n            {\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 11968211, 11968329, 1, 119)\n            };\n\n            var rnaEdits = new IRnaEdit[] { new RnaEdit(107,107,\"t\") };\n\n            var codingSequence = new CdnaSequence(genomicSeq, codingRegion, regions, false, rnaEdits);\n\n            Assert.Equal(\"actctggtttctcttcaaatcgtataaatctttcgccttttactaaagatttccgtggagagaaacgagtgtgagtctgaaaccaattttttgaggccttgcgttttttagcagggctt\", codingSequence.GetCdnaSequence());\n\n        }\n        \n        [Fact]\n        public void RnaEdits_snv_forward_with_utr()\n        {\n            //NM_001144032.2 chr1:148644011-148644795\n            var genomicSeq = new SimpleSequence(\"ACTATAAAGACAGTGAAAAGATCAGTGGTTATCTTTGCAGACGCCACCATCGCTGTGAGCCCTGTACTATCAGCCATGGTCAACTCCGTCGTCTTTTTTGAAATCACCAGGGATGGCAAGCCCTTGGGCCGCATCTCCATCAAACTGTTTGCAGACAAGATTCCAAAGACAGCAGAAAACTTTCGTGCTCTGAGCACTGGAGAGAAAGGATTTCGTTATAAGGGTTCCTGCTTTCACAGAATTATTCCAGGGTTTATGTGTCAGGGTGGTGACTTCACACGCCCTAATGGCACCGGTGACAAGTCCATCTATGGGGAGAAATTTGATGATGAGAACCTCATCCGAAAGCATACAGGTTCTGGCATCTTGTCCATGGCAAATGCTGGACCCAACACAAATGGTTCCCAGTTTTTCATCTGTGCTGCCAAGACTGAGTGGTTGGATGGCAAGCATGTGGCGTTTGGCAAGGTGAAAGAACGTGTGAATATTGTGGAAGCCACGGAGCACTTTGGGTACAGGAATAGCAAGACCAGCAAGAAGATCACCATTGCTGACTGTGGACAATTCTAATGAGTTTGACTTGTGTTTTATTTTCACCACCAGACCCATTCCTTCTGTAGCTCAGGAGAGCACCCCTCCACCACATTTGCTTGCAATATCCTAGAATCTTTGTGCTCTTGCTGCAGTTCCCTTTGGGTTCCATGTTTTCCTTGTTCCCTTCCATGCCTAGCTGGATGGCAGAGTTGAGTTAAGTTTATGATTATGAAATAAAAACTAAGTAACAA\", 148644011 - 1);\n\n            var codingRegion = new CodingRegion(148644086, 148644580, 76, 570, 495);\n\n            var regions = new ITranscriptRegion[]\n            {\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 148644011,148644795, 1, 785)\n            };\n\n            var rnaEdits = new IRnaEdit[]\n            {\n                new RnaEdit(420, 420, \"C\"),\n                new RnaEdit(500, 500, \"T\"),\n                new RnaEdit(737, 737, \"T\")\n            };\n\n            var codingSequence = new CdnaSequence(genomicSeq, codingRegion, regions, false, rnaEdits);\n            \n            Assert.Equal(\"ATGGTCAACTCCGTCGTCTTTTTTGAAATCACCAGGGATGGCAAGCCCTTGGGCCGCATCTCCATCAAACTGTTTGCAGACAAGATTCCAAAGACAGCAGAAAACTTTCGTGCTCTGAGCACTGGAGAGAAAGGATTTCGTTATAAGGGTTCCTGCTTTCACAGAATTATTCCAGGGTTTATGTGTCAGGGTGGTGACTTCACACGCCCTAATGGCACCGGTGACAAGTCCATCTATGGGGAGAAATTTGATGATGAGAACCTCATCCGAAAGCATACAGGTTCTGGCATCTTGTCCATGGCAAATGCTGGACCCAACACAAATGGTTCCCAGTTTTTCATCTGCGCTGCCAAGACTGAGTGGTTGGATGGCAAGCATGTGGCGTTTGGCAAGGTGAAAGAACGTGTGAATATTGTGGAAGCCATGGAGCACTTTGGGTACAGGAATAGCAAGACCAGCAAGAAGATCACCATTGCTGACTGTGGACAATTCTAA\", codingSequence.GetCdnaSequence().Substring(codingRegion.CdnaStart-1, codingRegion.Length));\n\n        }\n\n        [Fact]\n        public void RnaEdits_in_coding_sequence_reverse_insertion()\n        {\n            //NM_000682.6, chrom: chr2:96778623-96781984\n            var genomicSeq = new SimpleSequence(\"CTTATTACAAAATATCCTTTATTGATAAAATAGCTCAGAGTTTAAAAAAAAAAAAAACACCACCTGCATGTCGCAATAAGAGGTCACAGGCAAGAACACTGGGGGTCCCATGGGGCGCACACAAGACCGGCCAGCAGAGGGTCACAGTCAGTCCCTCTCCTGGCCCAGCTCCCCACCACATCCCAGGGCGATACTCTGGCCTCAACAACCCACTGAGGACCAAGCTGGGAAGCCTCCCACACCCCAGGAAGGACTCTTTTTGGTCCCCTCCATTCTCTCTACACCCAGAAAACTCCCTCGGTGCCCTTCCAAATCTAGCAGGTCCATCTGGCCCATTCCCCCGACACCTGCCAAGCTAAGATGCCTACTGGCCCAATGTTGAAGCCAGGCCCTCTCCAAGGGAAGGCCGATAAACCTCCTTTCCACACTTCCAACTGTTCTGGGTGCCAGGTTTTGGGGTGGGACTGAGAACCAGGAAGCAGGGGTCCTCAATGCACAGCCCCATCAGCATTGCGGGGAGCAGCGTGGCTGGGTCCGAGGCAGTCCACAAGCACCCACCTGGGGGGATCAGTTGTGGTTCACAAGGACTCATTTGGGGCTTGGAGACCTGGCCGGGCACTCCAGTGGGAGGCTCCCCTAGGGGCGCACCAGGCTCTGATGCCAGTACCCCACCTGGGGGCGCTGCCACCTGTCACAGGCTCTCATCTTAGACTGTTGCCGAGGTGTGGATATTTTGAGCTGTCTTGGGGAGACAATTTGCCTCCTTGATGACAAAAGACTTATCCCCCACTGGGGAGACCCAAGCCACTAAAAACCCTCTTGGTGTTGCCGGTGAAATGTCGAAACGTTGTCATGTAGCGTAATAACTCAGACCTTTGCAGCCAGAAGAACACATTCTCAAAGAGATCCTTTAACTTGAAATAGTGATTCTGTCTGCCACTCCCGGCTTCCAGTTCGGGGTAGGAATTCACACACCCCAGGGACAGAACAAAAGTCTACAGGAAGACAGGTGGTGGTAAACACAGAGGAAAGGGATTTTTATATCACCATATAATCACATTTTTGGTTCTCTAGTGTGTTCCCCCACAGAGCTCAAAGCTTTCTGCAAAGCCTTTCATCTCCCTGCAGCAAGTAGGCAGTGAGCTATTGTCGCCCCGATTTTTGCAGGGGGTGAATGCCAGTGATCGGGGATCTCCCGTCGAGGCAGAGACCAGGCCTCCAAGACCGCCCCAGCGAGGCATCCACGTGGCCACCCACCTACCGGAGGGGTGCTGGGTAAGGAAGCCGATCCATTGTTCTGGCTTTCAAAGGAACCACAGATCCGAAAACAGGCAAAGGGGGAAAGGAGGGCCCAGAGACGATGCCACCCCATAAGCCCCCATCCCAGCGCCTGCCAGGGACCGCGAGTGCCTAGCGTGGGTGATCAGTCTTCGTTTCTTCCTCCCCCTCAGCAGCAGGCCCCACTGGGAAAAGTGGAAGGCTGGCTCCGTGCTCTTTGTGGGTGGGGGGGAGATGAAAAAGAAACGAAAACACCACAAGCAAGTGACCTGCCAGGAACACAAGGTCCTCAAGAAAGGGAAGCCCAGACATTGGTCTGGAGAGCATGGGGCTCTGGGAAGAAAGTGCTCTCTCTTCTCCTGGTCTTGGCTATGTTCCAGAGGATTTGAACCACCTCCATCGGCCTGTGCTCAGGGAGAGGGTGGAGAAGGGGTCCCCCACAGCTAAGCCGGCAAGGGGAAGCTTCACTGGGACCCTTGCTAGCAGCCCCCCTGCCCACCCCTCCCAAGGGGTTCCTAAGATGAGGCCTACAGGATCTGGGCAGGGAGCAGAAAGCCCAGGGGAGGCAGCCACACACAGCAGGGCAAGAAGCAGGGTGACCCCGGCGCCACCGCACCAACCCCACAGGGGCAGCGCAGGCGGGCTCACCAGGCCGTCTGGGTCCACGGGCGGCACAGGATCCTCCGGAAGGCACGGCGGAAGTCCTGGTTGAAGATGGTGTAGATAACAGGGTTCAGTGAGCTGTTGCAGTAGCCGATCCAGAAGAAGAACTGGAAGAGGCCATGGGGCACCTTGCAGTGCTTCGGGCAGATGGCTCCCAGGCTGTAGCTGAAGAAGAAGGGGAACCAGCAGAGCACAAAAACGCCAATGACCACAGCCAGCACGAAGGTGAAGCGCTTCTCCCGGGTCAGCTGCGCCCGTCGACGCCACCACTGCCCACCTATAGCACCCACGCCCCTGCCCAGGAGCACCTGGCCACGTAGGGTGGCCAGCACCCGGGAGCCCTGTGGCTGCTGCAGCGGGGGGCTGCAAGCTGAGGCCGGAGACACTGGCACTGCCTGGGGTTCACACTCTTCCTCCTCCTCCTCCTCCTCTTCAGCTTCATCCTCTGGAGATGCCCCACAAACACCCTCCTTCTGGCCCTGGCCTGAGTTGGGAAGGGCAGCCCAACTGGGTGGCAAGGCCCGGGTCCCAGTATCTTCAGGGGTCTCCCCCTCCTCCTTCTCCCCAGTGGACTTCGAGTGTCCGTTGACCTCTCTGGCAGAAGCCACAGAGGCCAGGGCTGGCAGTTTGGCTGAGGCCAAAGCCCCACCATGGTCGGGTCGGGGCTGCTTGGACTCACCCTGCCCAGGCCCCCCCTTGGCCCTGGGACCTCTGCGGTTGCTGCGTTTGGCGATCAGGTAGATGCGCAGGTAGACAAGGATCATGATGAGGCAAGGAGCAAAGAAAGATCCGATGCTGGAGGCCAGGATGTACCAGGCCTCCTGGTTGAGCTTGCACTGGGGGCGCCCGCGCGGCTGGGGGCCCTGGTCGCCCTTGTAGATGAGGGGCGGCAGCGAGATGACGGCGGCGATGAGCCACACAGTGAGGATGATGCACTTGATGCGGCGCGGGGTGCGCTTGGAGTTGTACTCCAGCGCGCGGCTCACGGCCCAGTAGCGGTCCAGGCTGATGGCGCACAGGTGCACGATGGACGAGGTGCAGAAGAGCACGTCGAGCGCCAGGTACACCTCGCACCACGTGCGCCGGAAGTACCAGTAGCCCAGCAGCTCGTTGGCCAGCGAGAAAGGGATGATGAGCGTGGCCACCAGGATGTCGGCGGCGGCCAGCGACACCAGGAACAGGTTCTGAGGGGCGCGCAGCGAGCGGCTGGTCAACACAGCCAGGATGACCAGAGCGTTGCCGAAGATGGTAAAGAGAATGAGGAAGGTGATGGCCGCCGCTATGGCCGCTGTGGCCTGCACGGAGTAGGGGTCCTGGTGGTCCATGACGGGGCGGGAGGTGGGCAGAGGGAGCGCTGCCCGCCCAGTGCGCACCGTGGACGACAGCGCTGCCCGGCTCGGCTAGACAAGAGCGTCGCCCCT\", 96778623 - 1);\n            var codingRegion = new CodingRegion(96780545, 96781888, 97, 1449, 1344);\n\n            var regions = new ITranscriptRegion[]\n            {\n                new TranscriptRegion(TranscriptRegionType.Exon,1, 96778623,96780986, 1008, 3371),\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 96780987,96781984, 1, 998)\n            };\n\n            var rnaEdits = new IRnaEdit[]\n            {\n                new RnaEdit(999, 998, \"AGAGGAGGA\")\n            };\n            const bool onReverseStrand = true;\n            var        codingSequence  = new CdnaSequence(genomicSeq, codingRegion, regions, onReverseStrand, rnaEdits);\n\n            var expectedCodingSeq = \"ATGGACCACCAGGACCCCTACTCCGTGCAGGCCACAGCGGCCATAGCGGCGGCCATCACCTTCCTCATTCTCTTTACCATCTTCGGCAACGCTCTGGTCATCCTGGCTGTGTTGACCAGCCGCTCGCTGCGCGCCCCTCAGAACCTGTTCCTGGTGTCGCTGGCCGCCGCCGACATCCTGGTGGCCACGCTCATCATCCCTTTCTCGCTGGCCAACGAGCTGCTGGGCTACTGGTACTTCCGGCGCACGTGGTGCGAGGTGTACCTGGCGCTCGACGTGCTCTTCTGCACCTCGTCCATCGTGCACCTGTGCGCCATCAGCCTGGACCGCTACTGGGCCGTGAGCCGCGCGCTGGAGTACAACTCCAAGCGCACCCCGCGCCGCATCAAGTGCATCATCCTCACTGTGTGGCTCATCGCCGCCGTCATCTCGCTGCCGCCCCTCATCTACAAGGGCGACCAGGGCCCCCAGCCGCGCGGGCGCCCCCAGTGCAAGCTCAACCAGGAGGCCTGGTACATCCTGGCCTCCAGCATCGGATCTTTCTTTGCTCCTTGCCTCATCATGATCCTTGTCTACCTGCGCATCTACCTGATCGCCAAACGCAGCAACCGCAGAGGTCCCAGGGCCAAGGGGGGGCCTGGGCAGGGTGAGTCCAAGCAGCCCCGACCCGACCATGGTGGGGCTTTGGCCTCAGCCAAACTGCCAGCCCTGGCCTCTGTGGCTTCTGCCAGAGAGGTCAACGGACACTCGAAGTCCACTGGGGAGAAGGAGGAGGGGGAGACCCCTGAAGATACTGGGACCCGGGCCTTGCCACCCAGTTGGGCTGCCCTTCCCAACTCAGGCCAGGGCCAGAAGGAGGGTGTTTGTGGGGCATCTCCAGAGGATGAAGCTGAAGAGGAGGAAGAGGAGGAGGAGGAGGAGGAAGAGTGTGAACCCCAGGCAGTGCCAGTGTCTCCGGCCTCAGCTTGCAGCCCCCCGCTGCAGCAGCCACAGGGCTCCCGGGTGCTGGCCACCCTACGTGGCCAGGTGCTCCTGGGCAGGGGCGTGGGTGCTATAGGTGGGCAGTGGTGGCGTCGACGGGCGCAGCTGACCCGGGAGAAGCGCTTCACCTTCGTGCTGGCTGTGGTCATTGGCGTTTTTGTGCTCTGCTGGTTCCCCTTCTTCTTCAGCTACAGCCTGGGAGCCATCTGCCCGAAGCACTGCAAGGTGCCCCATGGCCTCTTCCAGTTCTTCTTCTGGATCGGCTACTGCAACAGCTCACTGAACCCTGTTATCTACACCATCTTCAACCAGGACTTCCGCCGTGCCTTCCGGAGGATCCTGTGCCGCCCGTGGACCCAGACGGCCTGGTGA\";\n\n            var rnaEditLength = rnaEdits[0].Bases.Length;\n\n            Assert.Equal(expectedCodingSeq, codingSequence.GetCdnaSequence().Substring(codingRegion.CdnaStart-1, codingRegion.Length+rnaEditLength));\n\n        }\n        \n        [Fact]\n        public void With_rnaEdits_reverse_deletion_utr()\n        {\n            //NM_001317107.1 chr14:22138125-22139232\n            var genomicSeq = new SimpleSequence(\"ATATGGTATGTAACTTATTCTTTGCAAGGCGCTTCTTTAATTTGGAGCACCACGTATCCTAAGGACGTAGACATTTTCATTTTTCTTCTTTTCTCTCTTTTCTCCCCACTAACTTGTTTAAGGCACTCTTCATTTCTTCATTCCTAAGGGTATAGATAATGGGGTTCAGCAGGGGGGTGACTGCAGTGAAAAACACAGATACTGCCTTGTCCTCTGGGAGGCTGGTGGATGGGCGGGAATAGATGAAGATGCAGTGTCCCAGGAACAGTGTAACTACAGTGAGATGGGCTGCACAGGTGGACAGGGCCTTCCACTTGCCCTTGGAGATCTGCTGCCTCAGACTCACCAGGATGACTGCGTAGGACACCACCAGGACCACAAAACAGACCACGGAGATCAATCCACTGTTGGAGACAATGAGGATCTCAAGGACGTGGGTGTGTCAATGCAGGCCAGCTTGATCACCTGAGGTACATCACAGAAGAAGTTGTCAATCTCATCAGGACCACAGTAGGGCAGCTTGATGGTAAGGGAGGTGAGGGCTATGGAGTGGATGGTCCCTCCTGTCCAGAGGGCCACAGCCAGCAGCACACATACCTTCCAGTTCATCACTATCATGTACTGCAGGGGTTTACAGATGGCCACATACCGATCATAGGCCATGACGGTGAGGAGGAAGATCTCTGTGCAGGCAAAGAGGTGCAGGAAGAACATCTGGGTCACACAGGCATCAAAAGAGATGAGCTTTTCCTCTGACCACACGTCTCTCAGCATCTTGGGGACAGTGACAGTGGAGTGGCAGACATCAATAAAGGACAGGTTGCTGAGGAAGAAATACATGGGAGTATGGAGCCGGTGGTCATAGATAATAGTTATGACAATGAGAACATTCCCAATCAGTGTCAGGACATAAAAAATGAGGAACATGGAAAACATAGCTATCCGTGCCTTATGATTTACAGATAAACCTCTAAGCCGAAAATATGTCACTAAAGAAGTTTGATTGAGTAGGATGGCCTCTTCCATTCTCTTTGTTAGACAACCTGTAAAGAATTAGAAAAAAAGTCTAATATAACACAGTATCTGCATCAATCATTTGGTCATTTAA\", 22138125 - 1);\n            var codingRegion = new CodingRegion(22138201, 22139150, 83, 1030, 948);\n\n            var regions = new ITranscriptRegion[]\n            {\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 22138125,22138561, 670, 1106),\n                new TranscriptRegion(TranscriptRegionType.Gap, 1, 22138562,22138563, 669, 670),\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 22138564,22139232, 1, 669)\n            };\n\n            var rnaEdits = new IRnaEdit[]\n            {\n                new RnaEdit(905, 905, \"T\"),\n                new RnaEdit(796, 796, \"C\"),\n                new RnaEdit(679, 679, \"A\"),\n                new RnaEdit(670, 671, \"\")\n            };\n\n            var        codingSequence = new CdnaSequence(genomicSeq, codingRegion, regions, true, rnaEdits);\n\n            var expectedCodingSeq = \"ATGGAAGAGGCCATCCTACTCAATCAAACTTCTTTAGTGACATATTTTCGGCTTAGAGGTTTATCTGTAAATCATAAGGCACGGATAGCTATGTTTTCCATGTTCCTCATTTTTTATGTCCTGACACTGATTGGGAATGTTCTCATTGTCATAACTATTATCTATGACCACCGGCTCCATACTCCCATGTATTTCTTCCTCAGCAACCTGTCCTTTATTGATGTCTGCCACTCCACTGTCACTGTCCCCAAGATGCTGAGAGACGTGTGGTCAGAGGAAAAGCTCATCTCTTTTGATGCCTGTGTGACCCAGATGTTCTTCCTGCACCTCTTTGCCTGCACAGAGATCTTCCTCCTCACCGTCATGGCCTATGATCGGTATGTGGCCATCTGTAAACCCCTGCAGTACATGATAGTGATGAACTGGAAGGTATGTGTGCTGCTGGCTGTGGCCCTCTGGACAGGAGGGACCATCCACTCCATAGCCCTCACCTCCCTTACCATCAAGCTGCCCTACTGTGGTCCTGATGAGATTGACAACTTCTTCTGTGATGTACCTCAGGTGATCAAGCTGGCCTGCATTGACACCCACGTCATTGAGATCCTCATTGTCTCCAACAGTGGATTGATCTCCGTGGTCTGTTTTGTGGTCCTGGTGGTGTCCTACGCAGTCATCCTGGTGAGTCTGAGGCAGCAGATCTCCAAGGGCAAGCGGAAGGCCCTGTCCACCTGTGCAGCCCATCTCACTGTAGTTACACTGTTCCTGGGACACTGCATCTTCATCTATTCCCGCCCATCCACCAGCCTCCCAGAGGACAAGGTAGTATCTGTGTTTTTCACTGCAGTCACCCCCCTGCTGAACCCCATTATCTATACCCTTAGGAATGAAGAAATGAAGAGTGCCTTAAACAAGTTAGTGGGGAGAAAAGAGAGAAAAGAAGAAAAATGA\";\n\n            Assert.Equal(expectedCodingSeq, codingSequence.GetCdnaSequence().Substring(codingRegion.CdnaStart-1, codingRegion.Length));\n\n        }\n\n\n    }\n}"
  },
  {
    "path": "UnitTests/VariantAnnotation/AnnotatedPositions/Transcript/CodingSequenceTests.cs",
    "content": "﻿using CacheUtils.TranscriptCache;\r\nusing UnitTests.TestDataStructures;\r\nusing VariantAnnotation.AnnotatedPositions.Transcript;\r\nusing VariantAnnotation.Caches.DataStructures;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.AnnotatedPositions.Transcript\r\n{\r\n    public sealed class CodingSequenceTests\r\n    {\r\n        [Fact]\r\n        public void Create()\r\n        {\r\n            // ENST00000374673.3\r\n            var sequence = new SimpleSequence(\r\n                \"GGGGTGTGTCTCCAGGGCCTTCCGCACTCAGCCAGGGAGAGCAAACAAACAGGCTTGGGGGACTGGGGAGGGGGGAAAGCGGAGGGGCAGGGTAGGGGCGGGGCAGGAGTGGAAGGCGGGGCAGGAGCAAGCGGCCTGGGCAGGGCAAGGGGGCCTCAGCTGGACCCTCGGATACTCACGGCAGTTGGCTTCATCAGTTCGGTCCTCACAGTCAAAGTCACCATCGCAGCGCCACAGCTTGAGGGCACAATGTCCATTCCCGCAGGGGAACTCGTTGGGCTCACAGGGTGGCGGGGGGCCTAGGAGACCGGGCAGGGGTCAGCAGCATCCTCCCGGGCCAGCTTCCTGCTCCCCGCACCCACCTGCACCCCTGCCGGTGCGCACCACAGTCTAGCTCATCGCTGCCGTCCTCGCAGTCCTCCTGTCCGTCGCAGAGGTAGTCTCTGGGGATGCAGTGCCCATTGCGGCATGCGGCCTCCTGGGGCCCACAGGGCAGGGGCCTGACGGAACCGGGAAGCAGGGGCTGAGGAGCGTGGGTGACTGGTGGCTGTCGCATGATGGTTGTCTCTGGCCGGGGCGGTAAAGATGTCGTCTCCACAAGGAGAGAGAATGTGGGGCTGATACCCAGGACTGGCTCCTCTGTGGATAGATTCCGCTTGGCATTTGGCAGAAGCAGATGGCTCCTCACCTGCTCCTTGTCCCCAACCCTCCCCAGGCCCACCCTGTACTCCCCAACACCACTCCCTGCCACCCCCTGCCTGGCTCTGTCATCACCCTTCCTATGCCCCCATCCTCTGCCTGCACCAAACCCTCATAGTCCTTGATGGGCTCCAAGACCCAGGTGTAGGACCCTGGCCCTCCCCTGGCACCCAAACCACTCGTGGCCCCGGACATCCCCTCACCACAATTGAGCTCATCAGACATGTCCCTGCAGTCGGGCCGCCGGTCACAGCGATACTCCAGGGCCACACACTCATTGTAGCTGTGGCAGGCAAACTCGGCCTCCGTGCAGGCTCTTGGGAACTGGGGCACTGCAGGTGGAAAGGAAGCAGACTGGAGTCAGAGGCGGCAGGAGGCAGGTGCGGGAAGCTGTAGGTGCTGTGTGGCTGGAGTGGGCTCCAGGGCCCTGTGTCAGGCAGCTCGGTTTCTGGCAGGCACAACGAGGGCAAGCAGCACACACTAGACACATCCACAGCACACGTGGGGCATGGGACATGCGGCAGTGGCCTCCCCCATCTCTAAAACAGACCCCACACACAGTTGACATGCCACACGCATGCAACCACCACACCACACACATGCAGGCCACAGCCTGGCCCAGTGAGGACAAAGAAGGAGGGGAGAAGGGAGTGCCCAGCTGTCTTGGGCTGTGCCCAGCCAGCCATCTTGCCCACACCCTTCTTTCCTCTCCATCCTTTAAAAAATTTTTTTCTCTCTTCTTTTTTATTTTTTTAGAGACAGGGTCTTGCTACGTTGCCCAAGCTGGTCTCGAACTCTTTGCCTCAAGCAATCCTCCCGTCTTGGCCTCCCAAAGTGCTGGGGTTACAGGCGTGAGCCCCTGCACCCGGCCTCCTCTCCAACCTTAACTTCTCTAGGAACCTGGCTGGGCCTCGGCCTGGCTTACACTCTCACCTGGTGTCACTGCGACCGCCACAGCGGCCGGCGGGGGTGGGGGGGTCTGTGCTGGAAAGGAAGATGTGATCAGTGGCTGTTCCACCTGGGAGCCGGGAGCTGAGGGCTGCAGGGCTGGGCCACATTCCACCATCCCTAGCCAGGAGGACTTATTGAAAAGTGAGAGAGGAGGGCTGGACCCCCAGCAGTCTTTAGACCTGGGCCTGATGATGCAGAAGAGCAAGCTTGATCTCTGGGTGCAATAATTAAGGGTTTTTGTTTGTTTGTCTTGTTTTAGAGGCAGGGTTTTGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCATGATCCTAGCTCACTGCAGCCTCAAACTCCTGGGCTCCGGTGATCCTC\",\r\n                22213528);\r\n\r\n            var codingRegion = new CodingRegion(22213728, 22215214, 1, 538, 538);\r\n\r\n            var regions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   4, 22213728, 22213827, 439, 538),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 3, 22213828, 22213912, 438, 439),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   3, 22213913, 22214167, 184, 438),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 2, 22214166, 22214430, 183, 184),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   2, 22214431, 22214559, 55,  183),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 1, 22214560, 22215160, 54,  55),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   1, 22215161, 22215214, 1,   54)\r\n            };\r\n\r\n            const string expectedResults =\r\n                \"NCAGCACAGACCCCCCCACCCCCGCCGGCCGCTGTGGCGGTCGCAGTGACACCAGTGCCCCAGTTCCCAAGAGCCTGCACGGAGGCCGAGTTTGCCTGCCACAGCTACAATGAGTGTGTGGCCCTGGAGTATCGCTGTGACCGGCGGCCCGACTGCAGGGACATGTCTGATGAGCTCAATTGTGAGGAGCCAGTCCTGGGTATCAGCCCCACATTCTCTCTCCTTGTGGAGACGACATCTTTACCGCCCCGGCCAGAGACAACCATCATGCGACAGCCACCAGTCACCCACGCTCCTCAGCCCCTGCTTCCCGGTTCCGTCAGGCCCCTGCCCTGTGGGCCCCAGGAGGCCGCATGCCGCAATGGGCACTGCATCCCCAGAGACTACCTCTGCGACGGACAGGAGGACTGCGAGGACGGCAGCGATGAGCTAGACTGTGGCCCCCCGCCACCCTGTGAGCCCAACGAGTTCCCCTGCGGGAATGGACATTGTGCCCTCAAGCTGTGGCGCTGCGATGGTGACTTTGACTGTGAGGACCG\";\r\n            var codingSequence  = new CodingSequence(sequence, codingRegion, regions, true, 1, null);\r\n            var observedResults = codingSequence.Substring(0, expectedResults.Length);\r\n\r\n            Assert.Equal(expectedResults, observedResults);\r\n        }\r\n\r\n        [Fact]\r\n        public void Length_ReturnTrueLength_WhenGapsArePresent()\r\n        {\r\n            const int expectedResults = 720;\r\n        \r\n            var        sequence       = new NSequence();\r\n            var        codingRegion   = new CodingRegion(10051, 12770, 51, 769, 720);\r\n            const byte startExonPhase = 1;\r\n        \r\n            int naiveCodingRegionLen = codingRegion.CdnaEnd - codingRegion.CdnaStart + 1;\r\n        \r\n            var regions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   1, 10001, 10299, 1,   299),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 1, 10300, 12300, 229, 331),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   2, 12301, 12970, 331, 1000)\r\n            };\r\n        \r\n            var codingSequence  = new CodingSequence(sequence, codingRegion, regions, false, startExonPhase, null);\r\n            var observedResults = codingSequence.Length;\r\n        \r\n            Assert.Equal(expectedResults, observedResults);\r\n            Assert.NotEqual(expectedResults, naiveCodingRegionLen);\r\n        }\r\n\r\n        [Fact]\r\n        public void RnaEdits_snv_forward_no_utr()\r\n        {\r\n            //NR_002754.2\r\n            var genomicSeq =\r\n                new SimpleSequence(\r\n                    \"actctggtttctcttcaaatcgtataaatctttcgccttttactaaagatttccgtggagagaaacgagtgtgagtctgaaaccaattttttgaggccttgcgtttattagcagggctt\",\r\n                    11968210);\r\n            var codingRegion = new CodingRegion(11968211, 11968329, 1, 119, 119);\r\n\r\n            var regions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 11968211, 11968329, 1, 119)\r\n            };\r\n\r\n            var        rnaEdits       = new IRnaEdit[] {new RnaEdit(107, 107, \"t\")};\r\n            const byte startExonPhase = 0;\r\n\r\n            var codingSequence = new CodingSequence(genomicSeq, codingRegion, regions, false, startExonPhase, rnaEdits);\r\n\r\n            Assert.Equal(\r\n                \"actctggtttctcttcaaatcgtataaatctttcgccttttactaaagatttccgtggagagaaacgagtgtgagtctgaaaccaattttttgaggccttgcgttttttagcagggctt\",\r\n                codingSequence.GetCodingSequence());\r\n        }\r\n\r\n        [Fact]\r\n        public void RnaEdits_snv_forward_with_utr()\r\n        {\r\n            //NM_001144032.2 chr1:148644011-148644795\r\n            var genomicSeq = new SimpleSequence(\r\n                \"ACTATAAAGACAGTGAAAAGATCAGTGGTTATCTTTGCAGACGCCACCATCGCTGTGAGCCCTGTACTATCAGCCATGGTCAACTCCGTCGTCTTTTTTGAAATCACCAGGGATGGCAAGCCCTTGGGCCGCATCTCCATCAAACTGTTTGCAGACAAGATTCCAAAGACAGCAGAAAACTTTCGTGCTCTGAGCACTGGAGAGAAAGGATTTCGTTATAAGGGTTCCTGCTTTCACAGAATTATTCCAGGGTTTATGTGTCAGGGTGGTGACTTCACACGCCCTAATGGCACCGGTGACAAGTCCATCTATGGGGAGAAATTTGATGATGAGAACCTCATCCGAAAGCATACAGGTTCTGGCATCTTGTCCATGGCAAATGCTGGACCCAACACAAATGGTTCCCAGTTTTTCATCTGTGCTGCCAAGACTGAGTGGTTGGATGGCAAGCATGTGGCGTTTGGCAAGGTGAAAGAACGTGTGAATATTGTGGAAGCCACGGAGCACTTTGGGTACAGGAATAGCAAGACCAGCAAGAAGATCACCATTGCTGACTGTGGACAATTCTAATGAGTTTGACTTGTGTTTTATTTTCACCACCAGACCCATTCCTTCTGTAGCTCAGGAGAGCACCCCTCCACCACATTTGCTTGCAATATCCTAGAATCTTTGTGCTCTTGCTGCAGTTCCCTTTGGGTTCCATGTTTTCCTTGTTCCCTTCCATGCCTAGCTGGATGGCAGAGTTGAGTTAAGTTTATGATTATGAAATAAAAACTAAGTAACAA\",\r\n                148644011 - 1);\r\n\r\n            var codingRegion = new CodingRegion(148644086, 148644580, 76, 570, 495);\r\n\r\n            var regions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 148644011, 148644795, 1, 785)\r\n            };\r\n\r\n            var rnaEdits = new IRnaEdit[]\r\n            {\r\n                new RnaEdit(420, 420, \"C\"),\r\n                new RnaEdit(500, 500, \"T\"),\r\n                new RnaEdit(737, 737, \"T\")\r\n            };\r\n            const byte startExonPhase = 0;\r\n\r\n            var codingSequence = new CodingSequence(genomicSeq, codingRegion, regions, false, startExonPhase, rnaEdits);\r\n\r\n            Assert.Equal(\r\n                \"ATGGTCAACTCCGTCGTCTTTTTTGAAATCACCAGGGATGGCAAGCCCTTGGGCCGCATCTCCATCAAACTGTTTGCAGACAAGATTCCAAAGACAGCAGAAAACTTTCGTGCTCTGAGCACTGGAGAGAAAGGATTTCGTTATAAGGGTTCCTGCTTTCACAGAATTATTCCAGGGTTTATGTGTCAGGGTGGTGACTTCACACGCCCTAATGGCACCGGTGACAAGTCCATCTATGGGGAGAAATTTGATGATGAGAACCTCATCCGAAAGCATACAGGTTCTGGCATCTTGTCCATGGCAAATGCTGGACCCAACACAAATGGTTCCCAGTTTTTCATCTGCGCTGCCAAGACTGAGTGGTTGGATGGCAAGCATGTGGCGTTTGGCAAGGTGAAAGAACGTGTGAATATTGTGGAAGCCATGGAGCACTTTGGGTACAGGAATAGCAAGACCAGCAAGAAGATCACCATTGCTGACTGTGGACAATTCTAA\",\r\n                codingSequence.GetCodingSequence());\r\n        }\r\n\r\n        [Fact]\r\n        public void RnaEdits_snv_reverse_utr()\r\n        {\r\n            //NM_031947.3, chr5:140682196-140683630\r\n            var genomicSeq = new SimpleSequence(\r\n                \"TGCATGTACACACAAATGGCTTTATGCAAAGGCCCTGACAGAACGATATTTAGTTTTTCAGATTAGGTACATAGGGCCAACCAGCCCACCCTGTACATTCCAGCAAGTGCAAGAGCAGCAACTTTCCTATTTCAATACAATTATGGGCAGAAATTATATGATGTAAAATAGAGGCCCTTCCATAAAGTTAAGATTTAGGGTAGAAGAAGGGAAGATAAAACCAAAATTCCCATGAAGTCAAAATTAGACAGTGGTCTTGTACTCTGCTGAACCCTGTGATGAACTGTAGTCCTCAAACTCATGGACTCGGATCCAGGTTCACCAAGACACTTCAGTATGCTTCCAACTGTTTCATCATCATCTTCCTGCTGTATTCGTAGGCCACAAACAGTGCCCCATTGGCAGGGATTGCTCGAATCATAGTAGCTTTCAGTCCAGAATATAAGGCTACTATTCCTTCATTTCTCACAACACTTAAGAGGGTACCAATAAATCCTGCCTGTTTCCCATACATGGAAAGAACTTGAATTCTGGATTTAATACAATCCACTGGGAACACGACAAGCCACAGGCAAATTCCAGCAACTCCACCACTTAACATCAAATGGACAGGGCCTAGTTCATCTTTTGATCTCCCTGACGCAAAAAACGATCGGCTCAGTTCATAGCCACCAAAGAAAAAGAAATAACCCGGTACTTCTTGAAGTAGAGTACTCGAGAGTCCATGGTAGAAGCCCAAGGGGCCATCCTTTTTAAGGATACCCTTCACGACAGACCAAATTGTATTATGGCTTTTTGCTATCTTCCCTGACATCTCCATTTCATACATGGTCTGTAGCCGGCACTTCACAAGCTCAGTGGGGCAGAGAGCCAGTGCAGCAAATGCAGAGGCGAAGGACCCCGCGGCTGCAGTCTGGAGATCACTCAGCTTTGCCTGCTTGTCCATTCCAGCCACTTTCCTGACAAACTGCTGGCAGAACCCGTAGCACATGAAGAGGACCGAGTTTTCGGCGACGTAGGCCATAAGTGCCGGGCCGGTGCCCTTGTAGAAGCCCCGGAGACCCACTTGGGCGTATGTCTTCAGGAAGCAGTCGGTGAGGCCCTTGTACAGGTCAGGGAACGTCTGCATCTTCACTTTTATTGTGTCGAAGGGCTGCCCAGTCAGTACACACGCTGTCCCCCCTGCGGCCCCCGCTGTGAGGTCGATGGCGGCTTGGATGCCAGGACCGGACTTCATGTTCGCTCACTCGTCTGAGGGTCCCAGTGGAAGGCGACTAACTCCCCAGAGCGTGAGACCGGCTTTTCACGTCCAGCCGCAGCGAGCGCGGGGAATGGAGTTGGGGGTGGTGGGGTGGCTCTACCGCCTGTTCTGGGCTCTCACCCCAGTGCGGGGGAAGCCGCTCAACCCTACGCTCCGCCGCGGGCCGCCCCCTCC\",\r\n                140682196 - 1);\r\n\r\n            var codingRegion = new CodingRegion(140682527, 140683432, 199, 1104, 906);\r\n\r\n            var regions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 140682196, 140683630, 1, 1435)\r\n            };\r\n\r\n            var rnaEdits = new IRnaEdit[]\r\n            {\r\n                new RnaEdit(366, 366, \"T\")\r\n            };\r\n\r\n            const byte startExonPhase = 0;\r\n\r\n            var codingSequence = new CodingSequence(genomicSeq, codingRegion, regions, true, startExonPhase, rnaEdits);\r\n\r\n            Assert.Equal(\r\n                \"ATGAAGTCCGGTCCTGGCATCCAAGCCGCCATCGACCTCACAGCGGGGGCCGCAGGGGGGACAGCGTGTGTACTGACTGGGCAGCCCTTCGACACAATAAAAGTGAAGATGCAGACGTTCCCTGACCTGTACAAGGGCCTCACCGACTGCTTCCTGAAGACATACGCTCAAGTGGGTCTCCGGGGCTTCTACAAGGGCACCGGCCCGGCACTTATGGCCTACGTCGCCGAAAACTCGGTCCTCTTCATGTGCTACGGGTTCTGCCAGCAGTTTGTCAGGAAAGTGGCTGGAATGGACAAGCAGGCAAAGCTGAGTGATCTCCAGACTGCAGCCGCGGGGTCCTTCGCCTCTGCATTTGCTGCACTGGCTCTCTGCCCCACTGAGCTTGTGAAGTGCCGGCTACAGACCATGTATGAAATGGAGATGTCAGGGAAGATAGCAAAAAGCCATAATACAATTTGGTCTGTCGTGAAGGGTATCCTTAAAAAGGATGGCCCCTTGGGCTTCTACCATGGACTCTCGAGTACTCTACTTCAAGAAGTACCGGGTTATTTCTTTTTCTTTGGTGGCTATGAACTGAGCCGATCGTTTTTTGCGTCAGGGAGATCAAAAGATGAACTAGGCCCTGTCCATTTGATGTTAAGTGGTGGAGTTGCTGGAATTTGCCTGTGGCTTGTCGTGTTCCCAGTGGATTGTATTAAATCCAGAATTCAAGTTCTTTCCATGTATGGGAAACAGGCAGGATTTATTGGTACCCTCTTAAGTGTTGTGAGAAATGAAGGAATAGTAGCCTTATATTCTGGACTGAAAGCTACTATGATTCGAGCAATCCCTGCCAATGGGGCACTGTTTGTGGCCTACGAATACAGCAGGAAGATGATGATGAAACAGTTGGAAGCATACTGA\",\r\n                codingSequence.GetCodingSequence());\r\n        }\r\n\r\n        [Fact]\r\n        public void RnaEdits_insertion_in_utr()\r\n        {\r\n            //NM_080431.4, chrom: chr1:2938046-2939467\r\n            var genomicSeq = new SimpleSequence(\r\n                \"TGGAAGAGGCCTCAGCAGGCCCAGGCCACCTGGAGGGAGAGCAGACCTGCGGCTGAGGATGCAGGGCTCCCGGGCACGGTGCTAGCCCTGCCTTGAGACACCCCGAGAGCTGTGGGAAGAGCTGTGGGATCCCCTATTGCATCACAAAGCGGCCCTGGAGGGCTGGTCTTTATTTTGATGAGGCTGAGAAGGGAAGGCTGCGGGCATGTTTAATCCGCACGCTTTAGACTCCCCGGCTGTGATTTTTGACAATGGCTCGGGGTTCTGCAAAGCGGGCCTGTCTGGGGAGTTTGGACCCCGGCACATGGTCAGCTCCATCGTGGGGCACCTGAAATTCCAGGCTCCCTCAGCAGAGGCCAACCAGAAGAAGTACTTTGTGGGGGAGGAGGCCCTGTACAAGCAGGAGGCCCTGCAGCTGCACTCCCCTTTCGAGCGTGGCCTGATCACAGGGTGGGATGACGTGGAGAGACTCTGGAAGCACCTCTTTGAGTGGGAGCTAGGCGTGAAACCCAGCGACCAGCCCCTGCTTGCAACGGAGCCCTCCCTGAACCCCAGGGAGAACCGTGAGAAGATGGCAGAAGTCATGTTCGAGAACTTCGGCGTGCCCGCTTTCTACCTGTCGGACCAGGCGGTGCTGGCTCTCTACGCCTCTGCCTGTGTCACGGGCCTGGTGGTGGACAGCGGGGATGCGGTCACCTGCACTGTCCCCATCTTTGAGGGTTACTCCCTGCCCCACGCAGTCACCAAGCTCCACGTGGCGGGCAGGGACATCACGGAGCTCCTCATGCAGCTGCTCCTGGCCAGCGGCCACACCTTCCCCTGCCAGCTGGACAAGGGTCTCGTGGACGACATCAAAAAGAAGCTGTGCTACGTGGCCTTGGAGCCCGAGAAGGAGCTTTCCCGGAGGCCGGAGGAGGTCCTGAGGGAGTACAAGCTGCCCGACGGGAACATCATCAGCCTCGGGGACCCGCTGCACCAGGCGCCCGAGGCCCTGTTCGTGCCCCAGCAGCTGGGCAGCCAGAGCCCCGGGCTCTCGAATATGGTCTCCAGCAGCATCACCAAGTGTGATACCGACATCCAGAAGATCCTCTTTGGGGAGATTGTGCTGTCGGGGGGCACTACCCTGTTCCACGGGCTGGATGACCGGCTTCTCAAGGAGCTGGAGCAGCTGGCCTCCAAGGACACCCCCATCAAGATCACGGCTCCCCCCGACCGGTGGTTCTCCACCTGGATTGGAGCCTCCATCGTCACCTCTCTGAGTAGCTTCAAGCAGATGTGGGTCACCGCCGCAGACTTCAAGGAGTTTGGGACCTCCGTGGTGCAGAGAAGATGCTTCTGAAGGCCGCTTCTCGTTGGGTACCGTGGGGGGTGAACCCTAGCCCCAGCTTTGGGAGGATGTTCAATAAAGGACCAATGCCGGAA\",\r\n                2938046 - 1);\r\n            var codingRegion = new CodingRegion(2938251, 2939384, 206, 1339, 1134);\r\n\r\n            var regions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 2938046, 2939467, 1, 1422)\r\n            };\r\n\r\n            var        rnaEdits       = new IRnaEdit[] {new RnaEdit(1423, 1422, \"AAAAAAAAAAAAAAA\")};\r\n            const byte startExonPhase = 0;\r\n\r\n            var codingSequence = new CodingSequence(genomicSeq, codingRegion, regions, false, startExonPhase, rnaEdits);\r\n\r\n            var expectedCodingSeq =\r\n                \"ATGTTTAATCCGCACGCTTTAGACTCCCCGGCTGTGATTTTTGACAATGGCTCGGGGTTCTGCAAAGCGGGCCTGTCTGGGGAGTTTGGACCCCGGCACATGGTCAGCTCCATCGTGGGGCACCTGAAATTCCAGGCTCCCTCAGCAGAGGCCAACCAGAAGAAGTACTTTGTGGGGGAGGAGGCCCTGTACAAGCAGGAGGCCCTGCAGCTGCACTCCCCTTTCGAGCGTGGCCTGATCACAGGGTGGGATGACGTGGAGAGACTCTGGAAGCACCTCTTTGAGTGGGAGCTAGGCGTGAAACCCAGCGACCAGCCCCTGCTTGCAACGGAGCCCTCCCTGAACCCCAGGGAGAACCGTGAGAAGATGGCAGAAGTCATGTTCGAGAACTTCGGCGTGCCCGCTTTCTACCTGTCGGACCAGGCGGTGCTGGCTCTCTACGCCTCTGCCTGTGTCACGGGCCTGGTGGTGGACAGCGGGGATGCGGTCACCTGCACTGTCCCCATCTTTGAGGGTTACTCCCTGCCCCACGCAGTCACCAAGCTCCACGTGGCGGGCAGGGACATCACGGAGCTCCTCATGCAGCTGCTCCTGGCCAGCGGCCACACCTTCCCCTGCCAGCTGGACAAGGGTCTCGTGGACGACATCAAAAAGAAGCTGTGCTACGTGGCCTTGGAGCCCGAGAAGGAGCTTTCCCGGAGGCCGGAGGAGGTCCTGAGGGAGTACAAGCTGCCCGACGGGAACATCATCAGCCTCGGGGACCCGCTGCACCAGGCGCCCGAGGCCCTGTTCGTGCCCCAGCAGCTGGGCAGCCAGAGCCCCGGGCTCTCGAATATGGTCTCCAGCAGCATCACCAAGTGTGATACCGACATCCAGAAGATCCTCTTTGGGGAGATTGTGCTGTCGGGGGGCACTACCCTGTTCCACGGGCTGGATGACCGGCTTCTCAAGGAGCTGGAGCAGCTGGCCTCCAAGGACACCCCCATCAAGATCACGGCTCCCCCCGACCGGTGGTTCTCCACCTGGATTGGAGCCTCCATCGTCACCTCTCTGAGTAGCTTCAAGCAGATGTGGGTCACCGCCGCAGACTTCAAGGAGTTTGGGACCTCCGTGGTGCAGAGAAGATGCTTCTGA\";\r\n\r\n            Assert.Equal(expectedCodingSeq, codingSequence.GetCodingSequence());\r\n        }\r\n\r\n        [Fact]\r\n        public void RnaEdits_insertion_reverse_in_utr()\r\n        {\r\n            //NM_001242659.1, chrom: chr1:1533388-1535476\r\n            var genomicSeq = new SimpleSequence(\r\n                \"TCTGTTGGTCTGAGAATGATGGACATTTAGACACTGGCGCCAGGTTTGCGCCTGACCGGCGCCACGCAGGGGTGGGCGGAGCAAAGACACACAGGTGGGCTACAGGTGTCACACGGCACCAGCCAGGGCCCGGGGTGGCTGGGGTGAGGATGGGTGTTTGGCCAGTGACCAGGAGTCAGGTCAAGTCCAGGTGGTCAGTGCCAGGGGCTCCAGGAGGGGAGGGCAGTGCCATAACCCTCCTGGTGTCCAGCGTCACCAGGCGGTCGTCACAGAAAGCAACCTCGGCCCGGGGCCCGGGTCTGCAGCAGGTGGGCAGGGTCAGCTTTTCTTCCATGGCGGGTGGCATTGTCTGGGCCGGATACTGGCTCTCGACCCCTGGGCATGCAAAGGCTAGGGGTGGTGCTGTCAGTCACACCGTTGCCACCAAGGTCCCCTGGGTCGGCTGAGGCTTGGGATCCAGGCAGCGGTGGAAGGTCGGGCTGCTCAAGGCCGGTATCTAAGCTTCTGCCCTGGGACCCAGTGGTGATGGCCGCCATCTGCCCCATTCCCACAGGGACCTAGTCAGAGGTCGCACACACAAAAGGGGTACCTGGCCCTGGAGAACCACCAGCTGCCCGGGGTCTGAGAACACTCACCCTGGCCGCTGGGCCAGGCCTGCCAGGCTCCCGGCTGGTCCAACACCCTAAACGGTACAGAGCGCTGCAGGCCCTCACCTCATGCTTCTGCAGCGCTTGAGGGTGAAGGTGTCTCCAAGGGGACGCTGGCCAGATGCATGGAGAGGCCGGCCAATCTTAGGGCCACAGACCCCCCCTGGACAGCAGAGGGTGTTGGCTCCTGCAGTGGGCCCGAGATACTAAGGCACGAAGCTAACCTAAGCCCAGTGGGGTGGGGCGGGGCAGGACAGGCTTGGAGAGCTGCGCCCCAGGCCTGCAAAGCACAGTGACCGAGCAATGGCGACGGTCTGTCTGGGACAATTCGGCACAGGATGGAGGTGCGGGGTGAGCCGGAGTGCCCATGGCTCTTGCTGGAAGGGGCTCCATGCCCTGGCCGCCTCTATAAAGGCCTGCGGAGAGCGGGGAGAGCCCTGGATGCGGCTGGCACAGCAGCGCAAGCCCAGGGGCCAATCCGGGGCCAGAGTCTGGGAGTCTGACGCCCGGCTGGAAAGGGCGTGTGATGATGCCAAAGTGCCGGAGCCGTCGCCGGCAGGTCCTCCTCCGCGGGGATCTTAAGGAGGCAGCAGGAATGAGGAGAGGAGAGCGGGCGGAGGACCTGGGAGCTCAGGCGCCCTCAGGCAGGTGGCGCAAAGATGGGCGGGCGGCCTCGCGCTTCAGGGGTGTCTGCGCAGGCCGGGGCGCGCGAGGGCCGGGCGCATGAGGTTCTCGGTGATGTAGGCCACCAGCAGGCAGATGACCACCAGCATGACGCAGATGGAGCCGCCCACCGCCGTCATGGCCACCACGATGTCCTGCATGCCGGCCGGCTCGGCGGTGAACTCCACGCACTCGGCCGGCTCGGGGGTCTCTGGCGCGGCGGCGGCGGGCCCAGCGCGCAGCGGCAGCGGCTGCAGGCACAGGCGGTAGAGGACGCTGTCGTGCACGTCGGGCAGCAGGTAGTCGCGGCAGGAGGCCCCGAGGAGCACGCGCTCGCACGGGAAGCGCGTGTAGGCGCCGCGCCACGAGCAGTTGAGCGCGAAGGCGCGCACGCGGCGCGCGGCGGCCGGGGCCAGGCGCCACTGCAGGAGGACGCTGCGGTTGCGCAGGACGCTGGCGCGCAGGGAGCGGCCGGCCGGGGCGTGCAGCACGCAGCCCGGAGCCTGGCAGCGGAAGCCGCGCGCGGGGCTGCGGAAGCACAGGCGCCCGCCGCCCGCCTCGGGGCCCTCGGGCAGCACCTTGTAGGGGCACCAGGGCGCGTCGGGGGTCGGCTCCCAGCCCGGCGGCGTCGGGGCGGCCGCGGCGCAGGGCGGCGGCGCGCAGGCGGCCAGCAGCAGCAGCAGCGGCGGGGCGCGCATCCTGCGGCGGGGCCACGGGGCGCGGCGCTGGGTCACGCGGGCCGCGCCGCCGCCGTCCCCGCTGCCCGCTCCCCGCGATCC\",\r\n                1533387);\r\n            var codingRegion = new CodingRegion(1534715, 1535395, 82, 762, 681);\r\n\r\n            var regions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 1533388, 1535476, 1, 2089)\r\n            };\r\n\r\n            var        rnaEdits       = new IRnaEdit[] {new RnaEdit(2090, 2089, \"AAAAAAAAAAAAAAA\")};\r\n            const byte startExonPhase = 0;\r\n\r\n            var codingSequence = new CodingSequence(genomicSeq, codingRegion, regions, true, startExonPhase, rnaEdits);\r\n\r\n            var expectedCodingSeq =\r\n                \"ATGCGCGCCCCGCCGCTGCTGCTGCTGCTGGCCGCCTGCGCGCCGCCGCCCTGCGCCGCGGCCGCCCCGACGCCGCCGGGCTGGGAGCCGACCCCCGACGCGCCCTGGTGCCCCTACAAGGTGCTGCCCGAGGGCCCCGAGGCGGGCGGCGGGCGCCTGTGCTTCCGCAGCCCCGCGCGCGGCTTCCGCTGCCAGGCTCCGGGCTGCGTGCTGCACGCCCCGGCCGGCCGCTCCCTGCGCGCCAGCGTCCTGCGCAACCGCAGCGTCCTCCTGCAGTGGCGCCTGGCCCCGGCCGCCGCGCGCCGCGTGCGCGCCTTCGCGCTCAACTGCTCGTGGCGCGGCGCCTACACGCGCTTCCCGTGCGAGCGCGTGCTCCTCGGGGCCTCCTGCCGCGACTACCTGCTGCCCGACGTGCACGACAGCGTCCTCTACCGCCTGTGCCTGCAGCCGCTGCCGCTGCGCGCTGGGCCCGCCGCCGCCGCGCCAGAGACCCCCGAGCCGGCCGAGTGCGTGGAGTTCACCGCCGAGCCGGCCGGCATGCAGGACATCGTGGTGGCCATGACGGCGGTGGGCGGCTCCATCTGCGTCATGCTGGTGGTCATCTGCCTGCTGGTGGCCTACATCACCGAGAACCTCATGCGCCCGGCCCTCGCGCGCCCCGGCCTGCGCAGACACCCCTGA\";\r\n\r\n            Assert.Equal(expectedCodingSeq, codingSequence.GetCodingSequence());\r\n        }\r\n\r\n        [Fact]\r\n        public void GetCodingSequence_InsertionGeneModel_InsertionRnaEdit()\r\n        {\r\n            // NM_019119.4, chr5:140566701-140571111\r\n            var genomicSeq = new SimpleSequence(\r\n                \"AGAATGCTACGGAAGTCCTTGACAAAAAGGAAACACTGAGACAGATGGGCTGAGAAGAAGAGCTGTCGAGTCCCTGATTGGGAAAGGAAAAATTAAAAACCCTAGATCTCTGGTACACATAAGTCTGGGTTTGCGATTGCTATTTGTGCTGGGGCAGTGTGATTGAGACTGACATTGAGGAAAGAAGCAGCTATGAAGACCAGGGGGTTCAGCTTTCCAAGACAAAGGCAAGTCCTGTTTCTTTTTCTTTTCTGGGGAGTGTCCTTGGCAGGTTCTGGGTTTGGACGTTATTCGGTGACTGAGGAAACAGAGAAAGGATCCTTTGTGGTCAATCTGGCAAAGGATCTGGGACTAGCAGAGGGGGAGCTGGCTGCAAGGGGAACCAGGGTGGTTTCCGATGATAACAAACAATACCTGCTCCTGGATTCACATACCGGGAATTTGCTCACAAATGAGAAACTGGACCGAGAGAAGCTGTGTGGCCCTAAAGAGCCCTGTATGCTGTATTTCCAAATTTTAATGGATGATCCCTTTCAGATTTACCGGGCTGAGCTGAGAGTCAGGGATATAAATGATCACTCGCCAGTGTTTCGGCACAAAGAGATGGTCTTAAAAATATCAGAAAATACAGCTGAAGGGACAGCATTTAGACTAGAAAGAGCACAGGATCCAGATGAAGGTCATAACAGTATCCAAAACTACACGATCAGCTCCAACTCTTTTTTCCATATTAAAATTAGTGGCAGTGATGAAGGCATGATATATCCAGAGCTAGTGTTGGACAAAGCACTGGATCGGGAGGAGCAGGAAGAGCTCAGCTTAACCCTCACAGCGCTGGATGGTGGGTCTCCATCCAGGTCTGGGACCTCCACTATACGCATTGTGGTCTTGGATGTCAATGACAATGCCCCACAGTTTGCCCAGGCTCTGTATGAGACCCAGGCTCCAGAAAACAGTCCAGTAGGGTCCCTTATTGTTAAAGTGTCTGCAGGAGATGCAGACTCAGGAGTCAATGCAGAAGTATCCTATTCATTTTTTGATGCTTCTGAAGATATTTTAACAACGTTTCAAATCAATCCTTTTTCTGGGGAAATCTTTCTCAGAGAATTGCTTGATTATGAGTTAGTAAATTCTTACAAAATAAATATACAGGCAATGGACGGCGGAGGCCTTTCTGCAAGATGTACAGTTTTGATAAAAGTATTAGATTCCAATGACAATCCTCCTGAACTGATCATATCATCACTTTCCAACTCTGTTGCTGAAAACTCTCCTGGGATAGTATTGGCTGTTTTTAAGATTAAAGACAGAGACTCCGGAGAAAATGGAAAGACATTTGCTATGTTCAAGATAATCTGCCTTTTTTTCTGAAACCGTCTGTTGACAATTTTTACATCCTAATGACTGAAGGTGCACTGGACAGAGAGAGCAAAGCTGAGTACAACATCACCATCACCGTCACTGACTTGGGGACACCCAGGCTGAAAACCGAGCACAGCATAACCCTGCAGGTCTCCGACGTCAATGACAACGCCCCCGCCTTCACCCAAACCTCCTACACCCTGTTCGTCCGGGAGAACAACAGCCCCGCCCTGCACATCGGCAGTGTCAGCGCCACAGACAGAGACTCAGGCACCAACGCCCAGGTCACCTACTCGCTGCTGCCGCCCCAGGACCCACACCTGCCCCTCGCCTCCCTGGTCTCCATCAACGCGGACAATGGCCACCTGTTTGCCCTCAGGTCGCTGGACTACGAGGCCCTGCAGGCTTTCGACTTCCGCGTGGGCGCCTCAGACCGCGGCTCCCCGGCTTTGAGCAGCGAGGCGCTGGTGCGCGTACTGGTGCTGGACGCCAACGACAACTCGCCCTTCGTGCTGTACCCGCTGCAGAACGGCTCCGCGCCCTGCACCGAGCTGGTGCCCCGGGCGGCCGAGCCGGGCTACCTGGTGACCAAGGTGGTGGCGGTGGACGGCGACTCGGGCCAGAACGCCTGGCTGTCGTACCAGCTGCTCAAGGCCACGGAGCCCGGGCTGTTCGGTGTGTGGGCGCACAATGGGGAGGTGCGCACCGCCAGGCTGCTGAGCGAGCGCGACGCAGCCAAGCACAGGCTGGTGGTGCTTGTCAAGGACAATGGCGAGCCTCCTCGCTCGGCCACCGCCACGCTGCACGTGCTCCTGGTGGACGGCTTCTCCCAGCCCTACCTGCCTCTCCCGGAGGCGGCCCCGGCCCAGGCCCAGGCCGACTTGCTCACCGTCTACCTGGTGGTGGCGTTGGCCTCGGTGTCTTCGCTCTTCCTCCTCTCGGTGCTCCTGTTCGTGGCGGTGCGGCTGTGCAGGAGGAGCAGGGCGGCCTCGGTGGGTCGCTGCTCGGTGCCCGAGGGTCCTTTTCCAGGGCATCTGGTGGACGTGAGCGGCACCGGGACCCTGTTCCAGAGCTACCAGTACGAGGTGTGTCTGACTGGAGGTTCAGAGACCGGCGAGTTCAAGTTCTTGAAGCCGATTACCCCCCACCTCCCGCCCCATAGGGGTGGGAAAGAAATAGAGGAAAATTCTACTCTCCCCAATAGCTTTGGATTTAATTATTGAAAGGAACCCACTTAATAAAGACATTTACTTCTTTAATATATTCTTGTTGGCTAACTAAATTGTGTATGCCCACCACAAAGAAGGTACTATTTTTTGTTTGATTCATCTTCAACTTTGCGTATTATGCTTAACTTCACAAGTTAACTTTTTCTTATTTTGTATCCTGATGAGGCATTTCTTACTAGAATCCCATAAGTGAAATATAATATTTTTCAAAGTTGATATCATTTAAAAATTTTTGGTCGTTTTAAATGTCTTTATTGACTTTAAATTCATTGCCTCTACATTATTCATTAGTTCTTCTTTTCCTAAAACTTTTTACTTGTTAAAATAGTCTGCTGCATGTAATATGTGCTTTTACTATTTGATATTTCTTCTATTTTTCTTTTGAAACCGGTGTTCTTATTGGTTTGCCATCCTTGTTCATTACAACTGTTTTTTGTTTGTTTGTTTGTTTTTTGGTTTGTTTGTTTTTTTTTTTTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCAGCTCACTGCAACCTCCGCCTCCCAGGTTCAAGCGATTCTCCTGCCTCAGCCTCCAGAGTATCTGGGACTACAGTTGCATGTCACCACGTTCGGCTAATTTTTGTATTTTCAGTAGAGACGGGTTTCATCATGGTGGCCAGGATGGTCTATCTCTTGACCTCGTGATCCACCCCACTCAGCCTCCCAAATTGCTGGGATTTACAGGCATGAGCCACCGCACCCAGCCTACAATAATTTTCTTAAACTTTACCTTTTATTTTAAAGTTCTAGTTTCCCGGCATTGATAGTTCCCTATTTGAAATATAATGTTTCTCTTGTAAGTGATATGATAAATAAACCCCTAATTAGCCTTAGAAGAAAAACCACTGCAAGATATTAAGCGTGTGTAAATGGGCTTTAGTCTGGAAACCAAAAAAAAAAAAAAAATTTAGTCATTCTATAGGATCATGTGAAAATATTTAATTTGCTCCTTTTAATTCTGTATAAACAAATCAGAGGTTCCTGAGGTTCCTGTTAAATTTTTAATGGCTAATAGCCCAGTGCCATCCAGTTGAAAAAACAACAGCAATCACAAAGTAGAGGTTTATATTGTGCGGCTTTTATATTCAGCTATTAGAGTGTTATTGGTAGTGTCTAGCCTTTTCCTCCACGACATTCCTTGACTTAATCCATTTGGGCCTATTATAGACAAAATAGAGCTTCTTTCTAGATATAAGGTCTTTGAGGCAGGGCTCAGTGGCTCATTCCTGTAATCCCAGCACTTTGGGAGGCCAAGGCGGGCAGATCACCTTAGGTCACGAGTTTGAGACCAGCCTGACCAACGTTAAGTAACCCCGTCTTTACTAAAAATACAAAATTAGCCAGGCATGGTGGCACATGCTTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCAGGAGGTGGAAGTTGCTTTGAGCCGAGATTGCACCATTGTACTCCAGCCTGGGCAATAAGAGCAAAACTCCATCAAAATAAAATAAAATAAAATATAAAATAACTTAAAAAGAACTTTGAATAAAATTCTATGAAAAAAGACACTAGAATGCTGTTCTTAATTTTAATAGTGTTAAGATAGGTGTTAGTGTGGTCTGTTCTTTACCTCCCTTTATTTGGTGCAGAGAAGTTAGATCCTGCTAAATTTCAATTAAGAGGGGACCTTAAAATAAGGATCAATCTCTTATTTAACCCTGTAAGTTACTTTAAAGCTAATACAAGAAAAACAAAGACAAGTGAAAGTAAGGAAACAGAAATTGC\",\r\n                140566701 - 1);\r\n            var codingRegion = new CodingRegion(140566893, 140569285, 193, 2586, 2394);\r\n\r\n            var regions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 140566701, 140568035, 1,    1335),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 140568036, 140571111, 1337, 4412)\r\n            };\r\n\r\n            var rnaEdits = new IRnaEdit[]\r\n            {\r\n                new RnaEdit(908,  908,  \"T\"),\r\n                new RnaEdit(1336, 1335, \"A\"),\r\n                new RnaEdit(2096, 2096, \"G\")\r\n            };\r\n            \r\n            const byte startExonPhase = 0;\r\n            var codingSequence = new CodingSequence(genomicSeq, codingRegion, regions, false, startExonPhase, rnaEdits);\r\n\r\n            const string expectedCodingSeq = \"ATGAAGACCAGGGGGTTCAGCTTTCCAAGACAAAGGCAAGTCCTGTTTCTTTTTCTTTTCTGGGGAGTGTCCTTGGCAGGTTCTGGGTTTGGACGTTATTCGGTGACTGAGGAAACAGAGAAAGGATCCTTTGTGGTCAATCTGGCAAAGGATCTGGGACTAGCAGAGGGGGAGCTGGCTGCAAGGGGAACCAGGGTGGTTTCCGATGATAACAAACAATACCTGCTCCTGGATTCACATACCGGGAATTTGCTCACAAATGAGAAACTGGACCGAGAGAAGCTGTGTGGCCCTAAAGAGCCCTGTATGCTGTATTTCCAAATTTTAATGGATGATCCCTTTCAGATTTACCGGGCTGAGCTGAGAGTCAGGGATATAAATGATCACTCGCCAGTGTTTCGGCACAAAGAGATGGTCTTAAAAATATCAGAAAATACAGCTGAAGGGACAGCATTTAGACTAGAAAGAGCACAGGATCCAGATGAAGGTCATAACAGTATCCAAAACTACACGATCAGCTCCAACTCTTTTTTCCATATTAAAATTAGTGGCAGTGATGAAGGCATGATATATCCAGAGCTAGTGTTGGACAAAGCACTGGATCGGGAGGAGCAGGAAGAGCTCAGCTTAACCCTCACAGCGCTGGATGGTGGGTCTCCATCCAGGTCTGGGACCTCCACTATACGCATTGTGGTCTTGGATGTCAATGACAATGTCCCACAGTTTGCCCAGGCTCTGTATGAGACCCAGGCTCCAGAAAACAGTCCAGTAGGGTCCCTTATTGTTAAAGTGTCTGCAGGAGATGCAGACTCAGGAGTCAATGCAGAAGTATCCTATTCATTTTTTGATGCTTCTGAAGATATTTTAACAACGTTTCAAATCAATCCTTTTTCTGGGGAAATCTTTCTCAGAGAATTGCTTGATTATGAGTTAGTAAATTCTTACAAAATAAATATACAGGCAATGGACGGCGGAGGCCTTTCTGCAAGATGTACAGTTTTGATAAAAGTATTAGATTCCAATGACAATCCTCCTGAACTGATCATATCATCACTTTCCAACTCTGTTGCTGAAAACTCTCCTGGGATAGTATTGGCTGTTTTTAAGATTAAAGACAGAGACTCCGGAGAAAATGGAAAGACAATTTGCTATGTTCAAGATAATCTGCCTTTTTTTCTGAAACCGTCTGTTGACAATTTTTACATCCTAATGACTGAAGGTGCACTGGACAGAGAGAGCAAAGCTGAGTACAACATCACCATCACCGTCACTGACTTGGGGACACCCAGGCTGAAAACCGAGCACAGCATAACCCTGCAGGTCTCCGACGTCAATGACAACGCCCCCGCCTTCACCCAAACCTCCTACACCCTGTTCGTCCGGGAGAACAACAGCCCCGCCCTGCACATCGGCAGTGTCAGCGCCACAGACAGAGACTCAGGCACCAACGCCCAGGTCACCTACTCGCTGCTGCCGCCCCAGGACCCACACCTGCCCCTCGCCTCCCTGGTCTCCATCAACGCGGACAATGGCCACCTGTTTGCCCTCAGGTCGCTGGACTACGAGGCCCTGCAGGCTTTCGACTTCCGCGTGGGCGCCTCAGACCGCGGCTCCCCGGCTTTGAGCAGCGAGGCGCTGGTGCGCGTACTGGTGCTGGACGCCAACGACAACTCGCCCTTCGTGCTGTACCCGCTGCAGAACGGCTCCGCGCCCTGCACCGAGCTGGTGCCCCGGGCGGCCGAGCCGGGCTACCTGGTGACCAAGGTGGTGGCGGTGGACGGCGACTCGGGCCAGAACGCCTGGCTGTCGTACCAGCTGCTCAAGGCCACGGAGCCCGGGCTGTTCGGTGTGTGGGCGCACAATGGGGAGGTGCGCACCGCCAGGCTGCTGAGCGAGCGCGACGCGGCCAAGCACAGGCTGGTGGTGCTTGTCAAGGACAATGGCGAGCCTCCTCGCTCGGCCACCGCCACGCTGCACGTGCTCCTGGTGGACGGCTTCTCCCAGCCCTACCTGCCTCTCCCGGAGGCGGCCCCGGCCCAGGCCCAGGCCGACTTGCTCACCGTCTACCTGGTGGTGGCGTTGGCCTCGGTGTCTTCGCTCTTCCTCCTCTCGGTGCTCCTGTTCGTGGCGGTGCGGCTGTGCAGGAGGAGCAGGGCGGCCTCGGTGGGTCGCTGCTCGGTGCCCGAGGGTCCTTTTCCAGGGCATCTGGTGGACGTGAGCGGCACCGGGACCCTGTTCCAGAGCTACCAGTACGAGGTGTGTCTGACTGGAGGTTCAGAGACCGGCGAGTTCAAGTTCTTGAAGCCGATTACCCCCCACCTCCCGCCCCATAGGGGTGGGAAAGAAATAGAGGAAAATTCTACTCTCCCCAATAGCTTTGGATTTAATTATTGA\";\r\n\r\n            Assert.Equal(expectedCodingSeq, codingSequence.GetCodingSequence());\r\n        }\r\n\r\n        [Fact]\r\n        public void RnaEdits_in_coding_sequence_reverse_insertion()\r\n        {\r\n            //NM_000682.6, chrom: chr2:96778623-96781984\r\n            var genomicSeq = new SimpleSequence(\r\n                \"CTTATTACAAAATATCCTTTATTGATAAAATAGCTCAGAGTTTAAAAAAAAAAAAAACACCACCTGCATGTCGCAATAAGAGGTCACAGGCAAGAACACTGGGGGTCCCATGGGGCGCACACAAGACCGGCCAGCAGAGGGTCACAGTCAGTCCCTCTCCTGGCCCAGCTCCCCACCACATCCCAGGGCGATACTCTGGCCTCAACAACCCACTGAGGACCAAGCTGGGAAGCCTCCCACACCCCAGGAAGGACTCTTTTTGGTCCCCTCCATTCTCTCTACACCCAGAAAACTCCCTCGGTGCCCTTCCAAATCTAGCAGGTCCATCTGGCCCATTCCCCCGACACCTGCCAAGCTAAGATGCCTACTGGCCCAATGTTGAAGCCAGGCCCTCTCCAAGGGAAGGCCGATAAACCTCCTTTCCACACTTCCAACTGTTCTGGGTGCCAGGTTTTGGGGTGGGACTGAGAACCAGGAAGCAGGGGTCCTCAATGCACAGCCCCATCAGCATTGCGGGGAGCAGCGTGGCTGGGTCCGAGGCAGTCCACAAGCACCCACCTGGGGGGATCAGTTGTGGTTCACAAGGACTCATTTGGGGCTTGGAGACCTGGCCGGGCACTCCAGTGGGAGGCTCCCCTAGGGGCGCACCAGGCTCTGATGCCAGTACCCCACCTGGGGGCGCTGCCACCTGTCACAGGCTCTCATCTTAGACTGTTGCCGAGGTGTGGATATTTTGAGCTGTCTTGGGGAGACAATTTGCCTCCTTGATGACAAAAGACTTATCCCCCACTGGGGAGACCCAAGCCACTAAAAACCCTCTTGGTGTTGCCGGTGAAATGTCGAAACGTTGTCATGTAGCGTAATAACTCAGACCTTTGCAGCCAGAAGAACACATTCTCAAAGAGATCCTTTAACTTGAAATAGTGATTCTGTCTGCCACTCCCGGCTTCCAGTTCGGGGTAGGAATTCACACACCCCAGGGACAGAACAAAAGTCTACAGGAAGACAGGTGGTGGTAAACACAGAGGAAAGGGATTTTTATATCACCATATAATCACATTTTTGGTTCTCTAGTGTGTTCCCCCACAGAGCTCAAAGCTTTCTGCAAAGCCTTTCATCTCCCTGCAGCAAGTAGGCAGTGAGCTATTGTCGCCCCGATTTTTGCAGGGGGTGAATGCCAGTGATCGGGGATCTCCCGTCGAGGCAGAGACCAGGCCTCCAAGACCGCCCCAGCGAGGCATCCACGTGGCCACCCACCTACCGGAGGGGTGCTGGGTAAGGAAGCCGATCCATTGTTCTGGCTTTCAAAGGAACCACAGATCCGAAAACAGGCAAAGGGGGAAAGGAGGGCCCAGAGACGATGCCACCCCATAAGCCCCCATCCCAGCGCCTGCCAGGGACCGCGAGTGCCTAGCGTGGGTGATCAGTCTTCGTTTCTTCCTCCCCCTCAGCAGCAGGCCCCACTGGGAAAAGTGGAAGGCTGGCTCCGTGCTCTTTGTGGGTGGGGGGGAGATGAAAAAGAAACGAAAACACCACAAGCAAGTGACCTGCCAGGAACACAAGGTCCTCAAGAAAGGGAAGCCCAGACATTGGTCTGGAGAGCATGGGGCTCTGGGAAGAAAGTGCTCTCTCTTCTCCTGGTCTTGGCTATGTTCCAGAGGATTTGAACCACCTCCATCGGCCTGTGCTCAGGGAGAGGGTGGAGAAGGGGTCCCCCACAGCTAAGCCGGCAAGGGGAAGCTTCACTGGGACCCTTGCTAGCAGCCCCCCTGCCCACCCCTCCCAAGGGGTTCCTAAGATGAGGCCTACAGGATCTGGGCAGGGAGCAGAAAGCCCAGGGGAGGCAGCCACACACAGCAGGGCAAGAAGCAGGGTGACCCCGGCGCCACCGCACCAACCCCACAGGGGCAGCGCAGGCGGGCTCACCAGGCCGTCTGGGTCCACGGGCGGCACAGGATCCTCCGGAAGGCACGGCGGAAGTCCTGGTTGAAGATGGTGTAGATAACAGGGTTCAGTGAGCTGTTGCAGTAGCCGATCCAGAAGAAGAACTGGAAGAGGCCATGGGGCACCTTGCAGTGCTTCGGGCAGATGGCTCCCAGGCTGTAGCTGAAGAAGAAGGGGAACCAGCAGAGCACAAAAACGCCAATGACCACAGCCAGCACGAAGGTGAAGCGCTTCTCCCGGGTCAGCTGCGCCCGTCGACGCCACCACTGCCCACCTATAGCACCCACGCCCCTGCCCAGGAGCACCTGGCCACGTAGGGTGGCCAGCACCCGGGAGCCCTGTGGCTGCTGCAGCGGGGGGCTGCAAGCTGAGGCCGGAGACACTGGCACTGCCTGGGGTTCACACTCTTCCTCCTCCTCCTCCTCCTCTTCAGCTTCATCCTCTGGAGATGCCCCACAAACACCCTCCTTCTGGCCCTGGCCTGAGTTGGGAAGGGCAGCCCAACTGGGTGGCAAGGCCCGGGTCCCAGTATCTTCAGGGGTCTCCCCCTCCTCCTTCTCCCCAGTGGACTTCGAGTGTCCGTTGACCTCTCTGGCAGAAGCCACAGAGGCCAGGGCTGGCAGTTTGGCTGAGGCCAAAGCCCCACCATGGTCGGGTCGGGGCTGCTTGGACTCACCCTGCCCAGGCCCCCCCTTGGCCCTGGGACCTCTGCGGTTGCTGCGTTTGGCGATCAGGTAGATGCGCAGGTAGACAAGGATCATGATGAGGCAAGGAGCAAAGAAAGATCCGATGCTGGAGGCCAGGATGTACCAGGCCTCCTGGTTGAGCTTGCACTGGGGGCGCCCGCGCGGCTGGGGGCCCTGGTCGCCCTTGTAGATGAGGGGCGGCAGCGAGATGACGGCGGCGATGAGCCACACAGTGAGGATGATGCACTTGATGCGGCGCGGGGTGCGCTTGGAGTTGTACTCCAGCGCGCGGCTCACGGCCCAGTAGCGGTCCAGGCTGATGGCGCACAGGTGCACGATGGACGAGGTGCAGAAGAGCACGTCGAGCGCCAGGTACACCTCGCACCACGTGCGCCGGAAGTACCAGTAGCCCAGCAGCTCGTTGGCCAGCGAGAAAGGGATGATGAGCGTGGCCACCAGGATGTCGGCGGCGGCCAGCGACACCAGGAACAGGTTCTGAGGGGCGCGCAGCGAGCGGCTGGTCAACACAGCCAGGATGACCAGAGCGTTGCCGAAGATGGTAAAGAGAATGAGGAAGGTGATGGCCGCCGCTATGGCCGCTGTGGCCTGCACGGAGTAGGGGTCCTGGTGGTCCATGACGGGGCGGGAGGTGGGCAGAGGGAGCGCTGCCCGCCCAGTGCGCACCGTGGACGACAGCGCTGCCCGGCTCGGCTAGACAAGAGCGTCGCCCCT\",\r\n                96778623 - 1);\r\n            var codingRegion = new CodingRegion(96780545, 96781888, 97, 1449, 1353);\r\n\r\n            var regions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 96778623, 96780986, 1008, 3371),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 96780987, 96781984, 1,    998)\r\n            };\r\n\r\n            var rnaEdits = new IRnaEdit[]\r\n            {\r\n                new RnaEdit(999, 998, \"AGAGGAGGA\")\r\n            };\r\n\r\n            const byte startExonPhase  = 0;\r\n            const bool onReverseStrand = true;\r\n\r\n            var codingSequence = new CodingSequence(genomicSeq, codingRegion, regions, onReverseStrand, startExonPhase,\r\n                rnaEdits);\r\n\r\n            var expectedCodingSeq =\r\n                \"ATGGACCACCAGGACCCCTACTCCGTGCAGGCCACAGCGGCCATAGCGGCGGCCATCACCTTCCTCATTCTCTTTACCATCTTCGGCAACGCTCTGGTCATCCTGGCTGTGTTGACCAGCCGCTCGCTGCGCGCCCCTCAGAACCTGTTCCTGGTGTCGCTGGCCGCCGCCGACATCCTGGTGGCCACGCTCATCATCCCTTTCTCGCTGGCCAACGAGCTGCTGGGCTACTGGTACTTCCGGCGCACGTGGTGCGAGGTGTACCTGGCGCTCGACGTGCTCTTCTGCACCTCGTCCATCGTGCACCTGTGCGCCATCAGCCTGGACCGCTACTGGGCCGTGAGCCGCGCGCTGGAGTACAACTCCAAGCGCACCCCGCGCCGCATCAAGTGCATCATCCTCACTGTGTGGCTCATCGCCGCCGTCATCTCGCTGCCGCCCCTCATCTACAAGGGCGACCAGGGCCCCCAGCCGCGCGGGCGCCCCCAGTGCAAGCTCAACCAGGAGGCCTGGTACATCCTGGCCTCCAGCATCGGATCTTTCTTTGCTCCTTGCCTCATCATGATCCTTGTCTACCTGCGCATCTACCTGATCGCCAAACGCAGCAACCGCAGAGGTCCCAGGGCCAAGGGGGGGCCTGGGCAGGGTGAGTCCAAGCAGCCCCGACCCGACCATGGTGGGGCTTTGGCCTCAGCCAAACTGCCAGCCCTGGCCTCTGTGGCTTCTGCCAGAGAGGTCAACGGACACTCGAAGTCCACTGGGGAGAAGGAGGAGGGGGAGACCCCTGAAGATACTGGGACCCGGGCCTTGCCACCCAGTTGGGCTGCCCTTCCCAACTCAGGCCAGGGCCAGAAGGAGGGTGTTTGTGGGGCATCTCCAGAGGATGAAGCTGAAGAGGAGGAAGAGGAGGAGGAGGAGGAGGAAGAGTGTGAACCCCAGGCAGTGCCAGTGTCTCCGGCCTCAGCTTGCAGCCCCCCGCTGCAGCAGCCACAGGGCTCCCGGGTGCTGGCCACCCTACGTGGCCAGGTGCTCCTGGGCAGGGGCGTGGGTGCTATAGGTGGGCAGTGGTGGCGTCGACGGGCGCAGCTGACCCGGGAGAAGCGCTTCACCTTCGTGCTGGCTGTGGTCATTGGCGTTTTTGTGCTCTGCTGGTTCCCCTTCTTCTTCAGCTACAGCCTGGGAGCCATCTGCCCGAAGCACTGCAAGGTGCCCCATGGCCTCTTCCAGTTCTTCTTCTGGATCGGCTACTGCAACAGCTCACTGAACCCTGTTATCTACACCATCTTCAACCAGGACTTCCGCCGTGCCTTCCGGAGGATCCTGTGCCGCCCGTGGACCCAGACGGCCTGGTGA\";\r\n\r\n            Assert.Equal(expectedCodingSeq, codingSequence.GetCodingSequence());\r\n        }\r\n\r\n        [Fact]\r\n        public void With_rnaEdits_snv_mnv()\r\n        {\r\n            //NM_001242659.1\r\n            var genomicSeq = new SimpleSequence(\r\n                \"ACTATAAAGACAGTAAAAAGATCAGTGGTTATCTTTGCAGACGCCACCATCXCTGTGAGCCCTGTACTATCAGCCATGGTCAACTCCGTCGTCTTTTTTGACATCACCGTCGACGGCAAGCCCTTGGGCCGCATCTCCATCAAACTGTTTGCAGACAAGATTCXAAAGACAGCXGAAAACTTTCGTGCTCTGAGCACTGGAGAGAAAGGATTTCGTTATAAGGGTTCCTGCTTTCACAGAATTATTCCAGGGTTTATGTGTCAGGGTGGTGACTTCACACGCCXTAATGGCACXGGTGACAAGTCCATCTATGGGGAGAAATTTGATGATGAGAACCTCATCCGAAAGCATACAGGTTCTGGCATCTTGTCCATGGCAAATGCTGGACCCAACACAAATGGTTCCCAGTTTTTCATCTGXXCTGCCAAGACTGAGTGGTTGGATGGCAAGCATGTGGCCTTTGGCAAGGTGAAAGAACGTGTGAATATTGTGGAAGCCATGGAGCACTTTGGGTACAGGAATAGCAAGACCAGCAAGAAGATCACCATTGCTGACTGTGGACAATTCTAATGAGTTTGACTTGTGTTTTATTTTCACCACCAGACCCATTCCTTCTGTAGCTCAGGAGAGCACCCCTCCACCACATTTGCTTGCAATATCCTAGAATCTXXGTGCTCTTGCTGCAGTTCCCTTTGGGTTCCATGTTTTCCTTGTTCCCTTCCATGCCTAGCTGGATTGCAGAGTTGAGTTAAGTTTATGATTATGAAATAAAAACTAAGTAACAA\",\r\n                149553002);\r\n            var codingRegion = new CodingRegion(149553003, 149553787, 1, 785, 785);\r\n\r\n            var regions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 149553003, 149553787, 1, 785)\r\n            };\r\n\r\n            var rnaEdits = new IRnaEdit[]\r\n            {\r\n                new RnaEdit(52,  52,  \"G\"),\r\n                new RnaEdit(164, 164, \"C\"),\r\n                new RnaEdit(174, 174, \"A\"),\r\n                new RnaEdit(284, 284, \"C\"),\r\n                new RnaEdit(294, 294, \"C\"),\r\n                new RnaEdit(420, 421, \"CA\"),\r\n                new RnaEdit(670, 671, \"CT\")\r\n            };\r\n\r\n            const byte startExonPhase = 0;\r\n\r\n            var codingSequence = new CodingSequence(genomicSeq, codingRegion, regions, false, startExonPhase, rnaEdits);\r\n\r\n            var expectedCodingSeq =\r\n                \"ACTATAAAGACAGTAAAAAGATCAGTGGTTATCTTTGCAGACGCCACCATCGCTGTGAGCCCTGTACTATCAGCCATGGTCAACTCCGTCGTCTTTTTTGACATCACCGTCGACGGCAAGCCCTTGGGCCGCATCTCCATCAAACTGTTTGCAGACAAGATTCCAAAGACAGCAGAAAACTTTCGTGCTCTGAGCACTGGAGAGAAAGGATTTCGTTATAAGGGTTCCTGCTTTCACAGAATTATTCCAGGGTTTATGTGTCAGGGTGGTGACTTCACACGCCCTAATGGCACCGGTGACAAGTCCATCTATGGGGAGAAATTTGATGATGAGAACCTCATCCGAAAGCATACAGGTTCTGGCATCTTGTCCATGGCAAATGCTGGACCCAACACAAATGGTTCCCAGTTTTTCATCTGCACTGCCAAGACTGAGTGGTTGGATGGCAAGCATGTGGCCTTTGGCAAGGTGAAAGAACGTGTGAATATTGTGGAAGCCATGGAGCACTTTGGGTACAGGAATAGCAAGACCAGCAAGAAGATCACCATTGCTGACTGTGGACAATTCTAATGAGTTTGACTTGTGTTTTATTTTCACCACCAGACCCATTCCTTCTGTAGCTCAGGAGAGCACCCCTCCACCACATTTGCTTGCAATATCCTAGAATCTCTGTGCTCTTGCTGCAGTTCCCTTTGGGTTCCATGTTTTCCTTGTTCCCTTCCATGCCTAGCTGGATTGCAGAGTTGAGTTAAGTTTATGATTATGAAATAAAAACTAAGTAACAA\";\r\n\r\n            Assert.Equal(expectedCodingSeq, codingSequence.GetCodingSequence());\r\n        }\r\n\r\n        [Fact]\r\n        public void With_rnaEdits_deletion()\r\n        {\r\n            //NM_033089.6, chrom: chr20:278204-280965\r\n            var genomicSeq = new SimpleSequence(\r\n                \"GGAGGATGCTGGGAAGGAGGTAAAATGGCCACCGGCGGCGGCGCGGAGGAAGAGAGGAAACGGGGGCGGCCGCAGCTTCTGCCCCCCGCGCGGCCCGCGGCCCGGGGCGAGGAGGCCGACGGCGGCCGCGAGAAGATGGGCTGGGCCCAGGTGGTGAAGAATCTAGCCGAGAAGAAGGGCGAATTCCGCGAGCCGCGGCCGCCGCGGCGGGAGGAGGAAAGCGGCGGCGGTGGAGGGAGCGCCGGGCTCGGCGGCCCCGCGGGCCTGGCGGCGCCGGACCTCGGCGACTTCCCACCGGCTGGCCGCGGGGATCCGAAGGGCCGTCGGAGAGATCCGGCCGGCGAGGCGGTGGACCCCCGCAAAAAGAAGGGCGCTGCGGAGGCGGGCAGGAGGAAGAAGGCCGAGGCGGCGGCGGCCGCCATGGCGACCCCGGCCAGGCCCGGCGAGGCCGAGGACGCGGCCGAGCGGCCCCTCCAGGATGAGCCGGCGGCGGCGGCGGCAGGCCCGGGCAAGGGTCGCTTCCTCGTCCGCATCTGTTTCCAGGGAGACGAGGGCGCCTGCCCGACCCGGGACTTCGTGGTAGGAGCGCTTATCCTGCGCTCCATCGGCATGGACCCGAGCGACATCTACGCGGTCATCCAGATCCCGGGCAGCCGCGAATTCGACGTGAGCTTCCGCTCAGCGGAGAAGCTGGCCCTGTTCCTACGCGTCTACGAGGAGAAGCGGGAGCAGGAGGACTGCTGGGAGAACTTTGTGGTGCTGGGGCGGAGCAAGTCCAGCTTGAAGACGCTCTTCATCCTCTTCCGGAACGAGACGGTGGACGTGGAGGACATTGTGACTTGGCTCAAGCGCCACTGCGACGTGCTGGCCGTGCCGGTGAAAGTGACCGACAGGTTTGGGATCTGGACCGGGGAGTACAAATGCGAGATCGAGCTGCGCCAGGGGGAGGGCGGGGTCAGGCACTTGCCAGGGGCCTTCTTCCTGGGGGCCGAGAGGGGCTACAGCTGGTACAAGGGGCAGCCCAAGACATGCTTTAAATGTGGTTCCCGGACCCACATGAGCGGCAGCTGCACGCAGGACAGGTGCTTCAGGTGCGGGGAGGAGGGGCACCTGAGCCCTTACTGCCGGAAGGGCATCGTGTGCAACCTCTGTGGCAAGCGAGGACACGCCTTTGCCCAGTGTCCCAAAGCAGTGCACAATTCCGTGGCAGCTCAGCTAACCGGCGTGGCCGGGCACTAAACACCCGCCTGCCTGCCAGGGTGAACACACAGCCAGCTTATCCCTCTTAAGTGCCAAAACTTTTTTTTAAACCATTTTTTATCGTTTTTGAAGGAGATCTTTTTAAAACCTACAAGAGACATCTCTCTATGCCTTCTTAAACCGAGTTTACTCCATTTCAGCCTGTTCTGAATTGGTGACTCTGTCACCAATAACGACTGCGGAGAACTGTAGCGTGCAGATGTGTTGCCCCTCCCTTTTAAAATTTTATTTTCGTTTTTCTATTGGGTATTTGTTTTGTTTCTTGTACTTTTTCTCTCTCTCCTTGCCCCCCTCCCGCCCTCCCCGCCCCATACCTTTTCTTCCCCTGGATTTTCACCCTTTGGGCTGCCTTGCTCATCTTTATGCCCCAGCACTAGGTACGGGGCCCAACACGTGGTAGGCACTCCATCAGTGTTTGCTGAATTGAAAACATTGTTGACTGTGGCTTCTATCAGAGTGTCTACCTTTTGCAGCTCTTCCCCTCCCTCATTTAATTTGCTGCTTTTAATCTACGTGGTCTGAGAATTTGTGAAACCAGTGTTGTTAGAAGTGTATATAATCTGAATCAATAAGCTCTGAATGGTGGCCAAGGGCCTCTCTTATGGCACAAAGATGCATGGACTTCATGACAGCTCTTTTGGTGGCTCAGAAGCCATTTTTTATAGAATCATGGAATCTAGAATATTCCTGCTGGAAAGAACCTGAGAGTTGGTTTGGACCAATTCCCTGGTTTTCCAGCAGATGAAACAGGCCCAAAGAGGTTAAATGACTGGGTGAAAATCACATAGCTGTCTGGTGCCAGAGCCAGCCTATAGTAGAGTCCCCTGACCCCAAGCCCGGTGCTCATTCCACTACCTCTCACACTTCACAACAATTTCCTCAACACTTGAGGGCCCAGAAAGTCTGATCTCTCCAGAATGATCAGCCCAGAGGAATGCTGAGAAATCACCTGGAGGAGGGAGCAGAAAGAGAAGGTTTTTAAGGAGGGGCTTCTGAATACTTGGGAGATACGGAACGGACCAAGGACCACACTCCAGGGTGCATTCGTTGCTCCCTGGGGCACCACTTCTGGATTACAGTGTGCCAGGTCCTTTGGAGGCCCTACCCCTTCCCCATTCATTGCCACCAGTGAGAAATGGGGGTGCCCCTGTGTAAAGAAACCTACCAAAGGTTTACATTTGCACCTTAGCCTCAATAGCTACGAACCCTAGAGAAGCAGCTAGCTGGAGCTCATGTGCAACTCCTGATTCTCAGGAGAAAGATGGATTTTAACCCAAAATTATGAGTGAGCTGTTAACTCTAAAATGTACTTGGGAGATAGGCCAAGCGAGAGGTCATGGGCCAACTAAGTGTTATCCAGTAGAAAAGACAGTACACTGCTTTTCTTTTAGTGTTTGCTTTTCCTTTGCTATATGTTTTGCTATTTCCTTGTGGCTTAGAATGTAAAATTGATTGTTAAAAGTTTTGTTCTGAATAAATATTTATCTTTTGTATTGCTAAAA\",\r\n                278204 - 1);\r\n            var codingRegion = new CodingRegion(278228, 279442, 25, 1236, 1212);\r\n\r\n            var regions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 278204, 278687, 1,   484),\r\n                new TranscriptRegion(TranscriptRegionType.Gap,  1, 278688, 278690, 484, 485),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 278691, 280965, 485, 2759)\r\n            };\r\n\r\n            var rnaEdits = new IRnaEdit[]\r\n            {\r\n                new RnaEdit(485,  487,  \"\"),\r\n                new RnaEdit(2763, 2762, \"AAAAAAAAAAAAAA\")\r\n            };\r\n\r\n            const byte startExonPhase  = 0;\r\n            const bool onReverseStrand = false;\r\n\r\n            var codingSequence = new CodingSequence(genomicSeq, codingRegion, regions, onReverseStrand, startExonPhase,\r\n                rnaEdits);\r\n\r\n            //The coding sequence from refseq does not have the deletion from rna edit. That was manually inserted.\r\n            var expectedCodingSeq =\r\n                \"ATGGCCACCGGCGGCGGCGCGGAGGAAGAGAGGAAACGGGGGCGGCCGCAGCTTCTGCCCCCCGCGCGGCCCGCGGCCCGGGGCGAGGAGGCCGACGGCGGCCGCGAGAAGATGGGCTGGGCCCAGGTGGTGAAGAATCTAGCCGAGAAGAAGGGCGAATTCCGCGAGCCGCGGCCGCCGCGGCGGGAGGAGGAAAGCGGCGGCGGTGGAGGGAGCGCCGGGCTCGGCGGCCCCGCGGGCCTGGCGGCGCCGGACCTCGGCGACTTCCCACCGGCTGGCCGCGGGGATCCGAAGGGCCGTCGGAGAGATCCGGCCGGCGAGGCGGTGGACCCCCGCAAAAAGAAGGGCGCTGCGGAGGCGGGCAGGAGGAAGAAGGCCGAGGCGGCGGCGGCCGCCATGGCGACCCCGGCCAGGCCCGGCGAGGCCGAGGACGCGGCCGAGCGGCCCCTCCAGGATGAGCCGGCGGCGGCGGCAGGCCCGGGCAAGGGTCGCTTCCTCGTCCGCATCTGTTTCCAGGGAGACGAGGGCGCCTGCCCGACCCGGGACTTCGTGGTAGGAGCGCTTATCCTGCGCTCCATCGGCATGGACCCGAGCGACATCTACGCGGTCATCCAGATCCCGGGCAGCCGCGAATTCGACGTGAGCTTCCGCTCAGCGGAGAAGCTGGCCCTGTTCCTACGCGTCTACGAGGAGAAGCGGGAGCAGGAGGACTGCTGGGAGAACTTTGTGGTGCTGGGGCGGAGCAAGTCCAGCTTGAAGACGCTCTTCATCCTCTTCCGGAACGAGACGGTGGACGTGGAGGACATTGTGACTTGGCTCAAGCGCCACTGCGACGTGCTGGCCGTGCCGGTGAAAGTGACCGACAGGTTTGGGATCTGGACCGGGGAGTACAAATGCGAGATCGAGCTGCGCCAGGGGGAGGGCGGGGTCAGGCACTTGCCAGGGGCCTTCTTCCTGGGGGCCGAGAGGGGCTACAGCTGGTACAAGGGGCAGCCCAAGACATGCTTTAAATGTGGTTCCCGGACCCACATGAGCGGCAGCTGCACGCAGGACAGGTGCTTCAGGTGCGGGGAGGAGGGGCACCTGAGCCCTTACTGCCGGAAGGGCATCGTGTGCAACCTCTGTGGCAAGCGAGGACACGCCTTTGCCCAGTGTCCCAAAGCAGTGCACAATTCCGTGGCAGCTCAGCTAACCGGCGTGGCCGGGCACTAA\";\r\n\r\n            Assert.Equal(expectedCodingSeq, codingSequence.GetCodingSequence());\r\n        }\r\n\r\n        [Fact]\r\n        public void With_rnaEdits_reverse_deletion_utr()\r\n        {\r\n            //NM_001317107.1 chr14:22138125-22139232\r\n            var genomicSeq = new SimpleSequence(\r\n                \"ATATGGTATGTAACTTATTCTTTGCAAGGCGCTTCTTTAATTTGGAGCACCACGTATCCTAAGGACGTAGACATTTTCATTTTTCTTCTTTTCTCTCTTTTCTCCCCACTAACTTGTTTAAGGCACTCTTCATTTCTTCATTCCTAAGGGTATAGATAATGGGGTTCAGCAGGGGGGTGACTGCAGTGAAAAACACAGATACTGCCTTGTCCTCTGGGAGGCTGGTGGATGGGCGGGAATAGATGAAGATGCAGTGTCCCAGGAACAGTGTAACTACAGTGAGATGGGCTGCACAGGTGGACAGGGCCTTCCACTTGCCCTTGGAGATCTGCTGCCTCAGACTCACCAGGATGACTGCGTAGGACACCACCAGGACCACAAAACAGACCACGGAGATCAATCCACTGTTGGAGACAATGAGGATCTCAAGGACGTGGGTGTGTCAATGCAGGCCAGCTTGATCACCTGAGGTACATCACAGAAGAAGTTGTCAATCTCATCAGGACCACAGTAGGGCAGCTTGATGGTAAGGGAGGTGAGGGCTATGGAGTGGATGGTCCCTCCTGTCCAGAGGGCCACAGCCAGCAGCACACATACCTTCCAGTTCATCACTATCATGTACTGCAGGGGTTTACAGATGGCCACATACCGATCATAGGCCATGACGGTGAGGAGGAAGATCTCTGTGCAGGCAAAGAGGTGCAGGAAGAACATCTGGGTCACACAGGCATCAAAAGAGATGAGCTTTTCCTCTGACCACACGTCTCTCAGCATCTTGGGGACAGTGACAGTGGAGTGGCAGACATCAATAAAGGACAGGTTGCTGAGGAAGAAATACATGGGAGTATGGAGCCGGTGGTCATAGATAATAGTTATGACAATGAGAACATTCCCAATCAGTGTCAGGACATAAAAAATGAGGAACATGGAAAACATAGCTATCCGTGCCTTATGATTTACAGATAAACCTCTAAGCCGAAAATATGTCACTAAAGAAGTTTGATTGAGTAGGATGGCCTCTTCCATTCTCTTTGTTAGACAACCTGTAAAGAATTAGAAAAAAAGTCTAATATAACACAGTATCTGCATCAATCATTTGGTCATTTAA\",\r\n                22138125 - 1);\r\n            var codingRegion = new CodingRegion(22138201, 22139150, 83, 1030, 948);\r\n\r\n            var regions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 22138125, 22138561, 670, 1106),\r\n                new TranscriptRegion(TranscriptRegionType.Gap,  1, 22138562, 22138563, 669, 670),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 22138564, 22139232, 1,   669)\r\n            };\r\n\r\n            var rnaEdits = new IRnaEdit[]\r\n            {\r\n                new RnaEdit(905, 905, \"T\"),\r\n                new RnaEdit(796, 796, \"C\"),\r\n                new RnaEdit(679, 679, \"A\"),\r\n                new RnaEdit(670, 671, \"\")\r\n            };\r\n\r\n            const byte startExonPhase = 0;\r\n            var codingSequence = new CodingSequence(genomicSeq, codingRegion, regions, true, startExonPhase, rnaEdits);\r\n\r\n            var expectedCodingSeq =\r\n                \"ATGGAAGAGGCCATCCTACTCAATCAAACTTCTTTAGTGACATATTTTCGGCTTAGAGGTTTATCTGTAAATCATAAGGCACGGATAGCTATGTTTTCCATGTTCCTCATTTTTTATGTCCTGACACTGATTGGGAATGTTCTCATTGTCATAACTATTATCTATGACCACCGGCTCCATACTCCCATGTATTTCTTCCTCAGCAACCTGTCCTTTATTGATGTCTGCCACTCCACTGTCACTGTCCCCAAGATGCTGAGAGACGTGTGGTCAGAGGAAAAGCTCATCTCTTTTGATGCCTGTGTGACCCAGATGTTCTTCCTGCACCTCTTTGCCTGCACAGAGATCTTCCTCCTCACCGTCATGGCCTATGATCGGTATGTGGCCATCTGTAAACCCCTGCAGTACATGATAGTGATGAACTGGAAGGTATGTGTGCTGCTGGCTGTGGCCCTCTGGACAGGAGGGACCATCCACTCCATAGCCCTCACCTCCCTTACCATCAAGCTGCCCTACTGTGGTCCTGATGAGATTGACAACTTCTTCTGTGATGTACCTCAGGTGATCAAGCTGGCCTGCATTGACACCCACGTCATTGAGATCCTCATTGTCTCCAACAGTGGATTGATCTCCGTGGTCTGTTTTGTGGTCCTGGTGGTGTCCTACGCAGTCATCCTGGTGAGTCTGAGGCAGCAGATCTCCAAGGGCAAGCGGAAGGCCCTGTCCACCTGTGCAGCCCATCTCACTGTAGTTACACTGTTCCTGGGACACTGCATCTTCATCTATTCCCGCCCATCCACCAGCCTCCCAGAGGACAAGGTAGTATCTGTGTTTTTCACTGCAGTCACCCCCCTGCTGAACCCCATTATCTATACCCTTAGGAATGAAGAAATGAAGAGTGCCTTAAACAAGTTAGTGGGGAGAAAAGAGAGAAAAGAAGAAAAATGA\";\r\n\r\n            Assert.Equal(expectedCodingSeq, codingSequence.GetCodingSequence());\r\n        }\r\n\r\n        [Fact]\r\n        public void RnaEdits_deletion_reverse_utr()\r\n        {\r\n            //NM_001123068.1  chrom: chr1:147954635-147955377\r\n            var genomicSeq = new SimpleSequence(\r\n                \"TTGTTACTTAGTTTTTATTTCATAATCATAAACTTAACTCAACTCTGCAATCCAGCTAGGCATGGAAGGGAACAAGGAAAACATGGAACCCAAAGGGAACTGCAGCAAGAGCACAAAGATTCTAGGATATTGCAAGCAAATGTGGTGGAGGGGTGCTCTCCTGAGCTACAGAAGGAATGGGTCTGGTGGTGAAAATAAAACACAAGTCAAACTCATTAGAATTGTCCACAGTCAGCAATGGTGATCTTCTTGCTGGTCTTGCTATTCCTGTACCCAAAGTGCTCCATGGCTTCCACAATATTCACACGTTCTTTCACCTTGCCAAAGGCCACATGCTTGCCATCCAACCACTCAGTCTTGGCAGCACAGATGAAAAACTGGGAACCATTTGTGTTGGGTCCAGCATTTGCCATGGACAAGATGCCAGAACCTGTATGCTTTCGGATGAGGTTCTCATCATCAAATTTCTCCCCATAGATGGACTTGTCACCAGTGCCATTATGGCGTGTGAAGTCACCACCCTGACACATAAACCCTGGAATAATTCTGTGAAAGCAGGAACCCTTATAACGAAATCCTTTCTCTCCAGTGCTCAGAGCACGAAAGTTTTCCGCTGTCTTTAGAATCTTGTCTGCAAACAGTTTGATGGAGATGCGGCCCAAGGGCTTGCCGTCGACGGTGATGTCAAAAAAGACGACGGAGTTGACCATGGCTGATAGTACAGGGCTCACAGTGATGGTGGC\",\r\n                147954635 - 1);\r\n\r\n            //coding region between 34..528\r\n            var codingRegion = new CodingRegion(147954850, 147955344, 34, 528, 495);\r\n\r\n            var regions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 147954635, 147954669, 704, 738),\r\n                new TranscriptRegion(TranscriptRegionType.Gap,  1, 147954670, 147954674, 703, 704),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 147954675, 147955377, 1,   703)\r\n            };\r\n\r\n            var rnaEdits = new IRnaEdit[]\r\n            {\r\n                new RnaEdit(704, 708, null),\r\n                new RnaEdit(378, 379, \"CA\"),\r\n                new RnaEdit(252, 252, \"C\"),\r\n                new RnaEdit(242, 242, \"C\"),\r\n                new RnaEdit(239, 239, \"A\"),\r\n                new RnaEdit(132, 132, \"A\"),\r\n                new RnaEdit(122, 122, \"C\"),\r\n                new RnaEdit(104, 104, \"A\"),\r\n                new RnaEdit(49,  49,  \"A\"),\r\n                new RnaEdit(10,  10,  \"G\"),\r\n                new RnaEdit(4,   4,   \"G\")\r\n            };\r\n\r\n            const byte startExonPhase = 0;\r\n\r\n            var codingSequence = new CodingSequence(genomicSeq, codingRegion, regions, true, startExonPhase, rnaEdits);\r\n\r\n            var expectedCodingSeq =\r\n                \"ATGGTCAACTCCGTCATCTTTTTTGACATCACCGTCGACGGCAAGCCCTTGGGCCGCATCTCCATCAAACAGTTTGCAGACAAGATTCCAAAGACAGCAGAAAACTTTCGTGCTCTGAGCACTGGAGAGAAAGGATTTCGTTATAAGGGTTCCTGCTTTCACAGAATTATTCCAGGGTTTATGTGTCAGGGTGGTGACTTCACACACCCTAATGGCACCGGTGACAAGTCCATCTATGGGGAGAAATTTGATGATGAGAACCTCATCCGAAAGCATACAGGTTCTGGCATCTTGTCCATGGCAAATGCTGGACCCAACACAAATGGTTCCCAGTTTTTCATCTGCACTGCCAAGACTGAGTGGTTGGATGGCAAGCATGTGGCCTTTGGCAAGGTGAAAGAACGTGTGAATATTGTGGAAGCCATGGAGCACTTTGGGTACAGGAATAGCAAGACCAGCAAGAAGATCACCATTGCTGACTGTGGACAATTCTAA\";\r\n\r\n            Assert.Equal(expectedCodingSeq, codingSequence.GetCodingSequence());\r\n        }\r\n\r\n        [Fact]\r\n        public void RnaEdits_big_insertions_reverse()\r\n        {\r\n            //Transcript id:NM_032508.3, chrom: chrX:148678216-148713568\r\n            var genomicSeq = new SimpleSequence(\r\n                \"TAAAATGAGGAACCGGTTTATTGAACAGCTTAAGGAGAGCAAAAATAGTGGCTTTAGCTACATTTTTTACACACTGAGCAGGAAAGTCTAAACCATCCCGTTCCCCTGTACCCCAAAGAGAACAGGGCTTGCTGGAGGCCAGTGCCAAGGGCGGAGTCGTGCTCGCAGCAGACTTGAATTAACCCCATGTAGGCCGGCGAGCAGTTGCCCGCGTGAAAACACCACCCTCTTCTCCTGGCTGAGAAGATCAAAGCTCTTTTTTTACCCTCTTTTCAGCAAAGGACCTATTTGTTTTCAGGCAGGAGGATGTTAAACTTGCAGCCTCTGACACACGGTGGAACCTGCAGTGCTTGGAGAAACGGCACGCACACGTGAAAACATCATGCCTACTCCAAAGCCTTCTTGTTGCTGGCAGGAGGGAAGCTTGAGACTTTCCCACGCATAGTCGTGACCCGCGTGGCCGTTTCTGCTCTCAGCAACATTCTCTAGTGTTCCGGCTTCAAGCAGCGCTTGTCAGGTTTGAAGCTAGCCACTATTCTGAGAACGTCAGAAAAGCATGGACCATCTCTTGCTTGGTGTTGCCGTTCTGGCAGTAGCAGCTACTACGTACCTGCACGAGTTCCAGGGCAGAAGTGGCAATGTCCCATGAAGGCGTGGCACCCCACGGGGGGGGGGGGAGTGTGCCACGGGCGTCCACTTCTGCAGCAGAAGGCATGTGCCTACAGCACAAGCTTGTAAAAAAATACTTGAACAGAATATGCTGTACAGAACTAGGGGTTAACACCGCATATGAAGATGCTAAAACATTTGTATAAATACTCTGTATACAAGCATGGAGTCACTCCCGTAGAAAGGGCTCATCCGTGAGGCTATGAAAAACTGCTGTCAGCATGCCCAAAGAGAAACTACTTCCACAGTAGGAACAGAAAAAAGGACTGTGCTGTGTCTAAACACGTGGTGCATCAGAGACATAGTTACAGTTCCTACTGACTGCCCCAGCCACGACCTGGGAGTGCTGAGGACCTGGGAGTGCTCAGCGAGCTGCAGGAGGTCAGCCCTGTGGAGAAATACATTTCTAAACAATACTTTTGATTGGGATTTCAGCACCGTATAGACAGATGTTCCTTCTGGGGGCCTGGCAAGCAGCCATCTCCCAGTGGGTCTGACGGGGAAGAGGGGTACCTGGAGCCCCTCCCAGACAGACGGTAATCCCACCCCTGTTCTCACACTCTTCCTGGCATCCGCATCTGCTGGCACACACCCCCGTCACCTGCCACTTCCGCGTCCCGTCGTGGTGAGTGGCTGATAGGCGCTGGATGCAAACAAGGCATGAGATGGACGTACCTGGAGACCCAGCTCCAGTACTGGTTCTGGTCTGCGGGGTGAACGAGGGGGCAGAGGAAGGCGGAGAGAGTGCGTCCCAGTCCACTTAAGCTCTGTCCCCGGAAGTGGCATCTAATCTGGCATTTCGATATTTAATTTGGGAGGTGGGAGCACATACTTCCCAGGGCTCTGGGTAATGACCACCCTGGCCTTCTTTCGAAACATGGGTGCGATTTTAGGGGGCTCCGGAACTGGGGTCTCTTCGGTTTCTTCATTATCTTCGTGATGGAGATCATAGGAAATGTTTCCATATTCTCGTAGAAATGGGAAGATTTCAAGCAGAAACTGACAGAAATCTTTGCGGATACCAAACCACCCTGAAAAATAAGAATTTTTTATTTCACACACGAGGCTCAACTGACCTTCCTGTTAACTTTCTTTCCGTAACAAGAAGTTTCACTCCTACAATGTCATAACATACTTTATCCAGACTCCTGAGTCACAAAGCCTGAACAGGGCTTGAGTACCCAAAATGGGGAAGAAGTGCAAATGCTAGCTCTGTGGTGCTTGGAGTGGGGTTCCCGGACCGGCAGGGACAGCGTCCACGGGGCCTAGTTAGGGATGCCATTCTCGGGCCCCAGCCCAGACCTCCAGAAACTGAGTCGGGCTAGGGTGGGCTCCAGCGGTCCCCTTTTCCTGGCCCTTTTGGGATTCTGCTGGATGCCCAAGTTTGAGAACTACTGCTCCAGTGAGTCTCAAAATATCTGTGGTGCGCAGACTACGGTGTCTTCCGCTAATCTTCTCCAGCCAGGATAAACTCATGGATGACAGTGCCACCCAAGAACAAGATTTCTGTCACCCTCTGGAATCCGTGAGGGCGGTAGTCATGCACGGGTCCTGGCCAGGAGGGGGCCTGAACTCATGGAGCCACCTTAAAGCCACTTTCCCAGTCCCACTACTCCTCTCTGTAGGCTACTGGAGTGTCAGCTCGGTGCAAGCCCTCCCTGCTCCCGGGTGCGGGGTAGGGGGCAGAGGCACAAACAGCAAGCACAGCCCGGGCTGCTGGGCTGCAGTGAGGCCCTGCCCCCAAACCCACTGGCTTTCCGAAGGGCAATGCTCTGGGCTTCCGTGCCATGGAGCCCACAGCCTTGCCAGGAAGGCACCCTCTGCAGAGATCGTTTTGGAAGTGTCTGCCTCAGCAAGCAGGTGGAGGGGAATAGAGTGTTAGCAAGGCAAGACAGGCAAGACTCGGGTGATGGCAGCAAGGATATGGGGGAGGCAGAGAGGCCAACAGGGACCTAGGATGAATCCCAGGTTTGGGTGGGAGATGTGGATTTTCCATCAAACCCTCCCGGGCCTGGGAAGAATCTGTCTTGATCCCCATTTTGCAGAGGAGGGAACGGGATCTCTGAGAGGTTGCCTGCCGTGTCTGGTTCTACCTCAAATGGCAGCGTGCACTGCGAGAAAAGTCCCGGTGCAGGCCAGCAGAACACCAGAGTTACGGCATGCCCTTCCCTTAGAAGGTCCCAGAATTTCCTCAGCCCTCACTTTCCCACACAAGCTTCTAAATTGGGGCCCTCGGGGACTCATCCCTTCCTAGACTTCTATCCGCCCCCCCCCCCACTCCCTGGTCCCCCCCCAGACACACACCAAGGACTTCTGAAATGCTGAGTACATACAGTGGTTTCCTCCCTTCTGTCCAAATGTGGTTGCCATCAGCGTGATCAACGAGAGCCAAAGGGGGACAAAGATCGGGATGCTGGAGAAGGCGTTGTGGCCATCCAGTTTGTGAACCAGCAGAATCTAAAGAAAGAGACATAGTCCCGGTTGATGCCAGCACCGAAAATGGGCAGAGGCGGAAGCCAGACTTCATTAGGCAGTTCCTCCCCACCACCCCACCCCCGCGTGAGCTCCCACAAGAGGGAACATCAGCACCGCCAGAAAAAGGCAGGAAACCACCTATCCCTGGGGAAAGCTCGAAATGAGCTTTTATGTCCCTCTTCAGAGCTCGGCAATAGCCTATCCACTTGAAAAGTTCCCAGTGCCAGCAGTTTTATGGCAAACTCCTCCGGGTGTTTGTTCTAAGGAGTCAACAGCTCCCATTCTAGAATTCTCCACGTGACTCCAATACACAAATCTGACATCCCACTCTGCTTTCCCCAGAGTGGAAACTGGAGCCATACAGAGGCACCATGGCTAAAAAGGTGCACTCTTCTCCCTGCCAGCCCCACGTGCTGCCCCCAAGAGAAAGGAAGGATGCTCTCCTTTCACCGAAGCTCCCTCTCGGAGATGGCTGTGTTCTCTCCCCTCTCCTGGAGTGGGCTCACTGTGAGCTCGAGGGACAGAGGCTGCCTTTCTAGGGGTGCAGAATCCTGTCAGGGGAAGCGCAAGCTTCAGGGGCTGAAGAGGCTTCCCGTGGAACGCTTACCTCAAATGTAAGAAGGGGCACGACGATGGTCATCCAGCTCAGGGCCATGGTTATGTGTGTCCTGCGCTGCTCTGCAATCACATCCATAGAGCGCAAGAACAAGACGGACCACACAATGTAGTAGAGGACCACCAGGCACAGAAAGGACATGAGAATCCACAGCGGGACACACACAACCTGGGGGTGGGTGAGAGAACAGCAAGAGAAGTCTCTTTAGAGCTTCCAACCTGGCCTCTGATGGAAGGCATCTTTAGCACCTTGCTGTGTCTGTCCAGTTAAGGCGGTCCTTCCCGTGAGCCGAATAAGGACCGTTCCATCTCCCAGGACTGCTGGGAGCATCGCTCAGGACAGAAAAGGTATGGTATGTTCACTATGGGGCCTGCTGCCACCAGGGGACACACACGCTCAGTGAGTCATCAGTCCCTCTTCCTTTGGGTGACAGACAGCCCTGCACCTGGCTCCGCAGCCTCTACTCTTCCAGAGGCCCACTCTCCCACACTCTCTCAGGCTCCTCTAGGTTCTGCTGCCATCACAGCTTCCCGGGAAATGGGACACAACTGTCACCCTGTGCACACACACAAGATCTCACCCCAACAGACTCTCTTCACAGGCAACATTCCCACAACCTGCTGGGGGTACTTTGGCAACACAAATGGGAATGGGCTCCCCAGAAAGTCTGGCTGCCTGGGCTCCTAAGGATCCCTAACCTCACCCCTACCAAGTTAGTGAACTTGGCGGGTTGATGCTGGATACAGGTTGATGCTGGATACGTAGCGCTGCCGGGTCCCCGCCTCCACGGCAAGGGCGCATTCCCAGTATGTCCCTGTCGTACCAGGTAGACCTTGTCTCATCCACACACAAGCCCAGAGGACGAGTTCCGGGGGCGCCACTTGGCCAGGCTCCCCTGTGACACGTCTTCGCCCTCCTGCCCTGCCTCCTGGGACGACACTCCTCCGTTCTCCCTTTTTATTAATTATCTATCATACAGTAGGAAAAGTGACCGTCTTCCTTTGGTGTGAGTTCCCTGAGTCTTCACACAAGTAGATTCGCACAGCCGTTGGCAGGATGCAGAAGAGGTCTGTCACCCTGCAAAACTCTCCGTGCTGTCCCTTCACTATCACACCGTCCCCACCATTAGCCCCGGCAAACACTGATCTGTTCTCTGTCACTGTACTTTTGTCTCTGCTGGAACTTTATGTAGATGGCATCGCGAGACAAGTAACCTGTTGAGACTGGCTTCCCGCCATCCACATAATGTCTCTAATGAGATTCATCCAAGTTGTTCCCTCCCTGTATCCACAGGTCGTTCCCTCTCAGTTCTGAGTGGTATTCCATTGTATGGATGCCTACAGTTTATCTGACTGTCCGCTGAGGGTGGTTTGTGAAAACCAAACAAGGCCGCTATCCAAAATGCAAACAAGACTGCTACAAACACTGGTGTGTGGGTTTCTACAAGGCTGCGCGCTTTCACTTCTCTGGGGGTAAATCTTATACCCAGGAGTGGGGCAGCCAAGTCCCACGGGAAGTGCGCTTTTAACTGCATCAGAGATGGCCAAACCATTTTCTACAGTGCCCGTACCACCTGCCTTCCCGCCAGTAACACTGGAGTGTCCCAGTTCCTCTGCATCCTCCCAGGCACGTGGCCTCGTCAGTGTTGCTGAGTTTCACCATTCTGAAGCCATGTGTTTCGGGCCCTCATCCTGGTTGTAGTTTGTCTTCCCTAACCTGTAATGGCGTTGAGCATCTTTTCCTGTGCTTTTTCGCCATGTGTATATCCCCTTCGCAAATTGTCAACTCTTTTGCCAATTTTTAGGTGTTTCTTTTTGCAGTTTTGAGTTTTTAAGAGTTCTCTGTATGTTCTGGGTGCAAGTCAGTGTTTTGATGTGTGCTTTGCAAATATTTTCTCCCAGTCTGTGGCCTGTCTTCATTTCATTTTAATTTTGAGGAAGTCCAAATTTATGATTTCTCTCTCGTATGGACCATATTAACAGTGCCATTTCTAAGTACTCTCTGCCTAATTGCAAACCCCAAAGATTCTGTCCTATGTTATTTCCTAACAGATCTATAGTTTTACATTTTCTTTTAGATCTATGATTTGAGTTGGCATACGAATTTTACTTCTCCTGACCAGTTCTTGATTGTAGGTTTCTCTGCAGAGTCTATGCACAGCCTTTCTTCCCCGTTCCCCATTCTGTGATGAGATTCTCCTTTTTACTGAAGTTCCCTTCATGGGTGGAATGTTAGATCTCAATAGGCTTCCTTGTTTTCTCTTGCTCACTATGGGAAACATGTACTCAGACTGCTTCATCAACTGGGATCTACAGAGGATGAAGGGCAGAAAAAATCTTTCTTCTCATTTGTGGAAAGCTCTCAAAATTACTAATCTTTTTTTTCCCTGACAGTATCTTATACGAAAAAATTTGGTCTTGTTTAGATATGTTTCCTTCACGTCACAACAGAAACAGTTTTGAACCCAATGACCATTCTCCAGATACAGCACTGTGAAGTTGTAGGATGAGTGACTGAATATTTTTTATTATGAATGTTTTATCAAAGACTTTGGGCAGAAGTGATTATTCCCATCTTTAAATATGGAGTATACTTAGGTTCCCTTCATTTCTTCTCTGCTCCCAATTCTTTACTATACTTTTCACTTTTTAAGGGTAGCTATATTTAATACATAAAATATATTGTATGCAAAATTATACATCAAACAACAGAGAAAATAAAACCGAACAAAAACACTAGCATGACCTTACCTCCCAGTGGCAATGGAACTCTTCTGCCTTTAGTCTCCATCTTTTTTCCATGCATTTAATAGTGGAATCTATACTGTGTTCTCTAATTTCTACCTTGCCACTTATCTTTCTATCTCTGCATCCATCTACCCATTTATTCACAGTTAATTTCAACCAAATGCCCAGTAACTGAAGTCACAGTTTAGAAGCATGACATAGATGCCACCACCAGCAAGAGTGTAAATGGGTATGGCTTTTTTTTTTTTTCTTTTGAGACAGGGTCTTGCTACATTGCACAGGTTGGTCTCGAACTCCTGAGCTCAAGTGATCAGCTCACCTCGGCCTCCCAAAGTGCTGGGACTGTAAGTGTAAGCCACTGCGCCTGGCCAGGTATGGCTTTTGGAAAAGCAAGTTGGCAGTGCAGTATACGTATATAGGAATCTCAAATAGTTCCCAACCTCTAGCTCAGTAACACTATTTCTGGACTATTTCCTAAGAAAACAACCAAAAAACAAAAGGCAAAAATTTTAATGCATAAACATATATACTGCAGTATGATTTAAAATCATTCAACACTGGCAACAATGGAAATACCATATTTTAGAAATAAGAGGATGGTCTGATACATGCACTTGAAGAATATTTTGTATCAATTAAACTTTAGAAGTCATGTTTATAAAGACCTTTTATTAACATGATAAAATGTTTATGATACAACATTAAAACAAAAAAATCAGGATACAAAATGGTGCACACAGTTATATCCAAACTGTTTGTATAAAACACAGACATAAAAACACTAACCATGTTATCTCCCCATGGTGGGATTATGGGTGACTATTAGGCTATTACTTCTGCTTGTCCGTCTTTTCCAAGCTTTGTACAGTGAATATGAATTACTTTTATAATAAAAAAGAAGTTTATTTAAGGATTTTAAAAGTTACATACAAGCCAGGGCCAGTGGATGATCTTGTCCAGTCTTAAGGCAATGAATATAAACTGGAGAATGTTGACAGAACACAGGATTTCTAACTAAAAATGAAGAGAAGAATCAGTTAAACAAAGTATAATTTGCATTTAATACTGCAGTAATTTGGTTAACACACTAAAAGACAATACACATTATAATACAGTGTAACTTGTATAATATTATATTGCACTGGAAACTCCTGATTTTCGGTACCAGAGGGGCACAGCAGTATTATGGCAAGGGGAAATGGGGCTCAGCCTGCTGCCTTGCCCCTCTGCTGCCTTGCCCCTCTGCTGCCTTGCCCTTTCTGGCCTGAGGCCTGAGCACAGATGAGAACCTTGTTTCCAGGTTCACTGGGAACACAGGCTAGCTGCAATAGACCACTAAGCTTCCTTCATGTCCCTACCAACAATACTGTCTACTCGAAACCATCCTTGTGCATCCTTCTTCTCATACCTCGGTTTTGCCCTTGGACCCCACCCCTTCCTTTCAGGCTCCTTGAGGACCTTGTCACATCAAGCCATCAACTGTAGATTCCGTACTGTATCTTTAACCCCACCTATTCCATTGATCCCTCCTAGCAGCACAGAAATGACTCCTTCTCATTAAAAACAAAAGCCCCGAAACCAGATAAACCCACCCCATCCTTTGATCCAAGACCCCTCTTAGCCTCAGGTTTCTCTCTGCGAGACAGTAATGAATATGCAAGCCCCTACTATCTCATCTACCATTCCCAGCTTCCTTGTGATCTGGCCTCCACCCCCATGACCACCAGAAGACAGCTTTCATCAAAGTCACCGATAACCGGGAGGCAGCAAAAACACTTCAACTGGCATCCTACTGATACTGTGGATGACTCTTTCCTTTTTGCTCCCCTCCTCCTGGCCATCTAGGATCCCACCATACAGTCTTGGAGCCCTCTGGAGTGTTCTGGTAATTCCTTCTGTCTCCTTTTCCCCCTCCTTAAATGGTGATGCTCCGCAGAGCCACAAACTCAGTCTTCTCTCCCGACTAGGAAGACAGACACACAAACACACATATTTTCACTCACCCGCTGCACCCTGGACTGGGGGAGAAGAAATTTCAACCAGACCCTTGGTCTGCATTATTACCTCCTTCATGGTTAGTTCTCACATCTGTCTCCAACTTAGCCTTCCTGCTGCATAGCAGACCCAGCTGTGCATAGGCCACTCGGCTCTCCTAAAGGCACCTCAAATGAAGCCAGTCCCAAATAGAGATCATTATCGCCTGCACTTAGAACCTTGTATCTCCTCTTTCTATAGCCTCCCATGTCACTTTCTATGGCCTCCCATGTCACTTTCTATGGCCTCCCATGTCACTTTCTATGGCCTCCCATGTCACTTTCTATGGCCTCCCATGTCACTTTCTATAGCCTCCCATGTCACTTTCTATAGCCTCCCATGTCACTTTCTATGGCCTCCCATGTCACTATGGCCTCCCATGTCACTTTCTATGGCCTCCCTTGTCACTTTCTATGGCCTCCCATGTCACTTTCTATGGCCTCCCATGTCACTTTCTATGGCCTCCCATGTCACTTTCTATAGCCTCCCATGTCACTTTCTATAGCCTCCCATGTCACTTGGAGGCACAACAGTGGTCTCATTTCTCAAGCCATGAGCCCGGGAATCACCCTGCATTCTTTTTCCCACACTGTCACGTTCAGTCACCACATCCTGTCCACTGTAAAGTCCAGATTTCTCCTGAATCCCGTGCCCACTTCCTATCCTGATGATGACTGCCTAGTGAGGCCTTCTTCGGCACCATCCGCCTACCACTTCAGCAACCCCTTACTAACCTCCCGCCTCCAGCCCAAACGCCCTGCAGGCTGCTCCTCACTCTGGTAAAGTACATTCTTGCCCACAAAATTGAAATCTGGGACCAGGCCCAGAGCTTCCCAAAAGTTCTCAGTACATAGGTACATGGGGAATTTAGTAATTCCTCCATAGACCTCTAGGCCAAAAGAAATCCCTAATGTGGCTGGGAATGCCAACAGTTGCACTGATTTAAGTAATTACGTCTAACTTAGTAAGTATTTCTAAGTAGCCACCAGAAAAAATAATTCATATAAATCTAAAGAAAAATGTTAATATTATTCTTAAATAACCAAAACTAATTCCCAGTGGGATGCGTGTGCCTGTCAGGTAGCTCACCATTTCCCACGCCTTGGAATCAGACAAGGTGCTCCCACTCGTTACCTGTTCTTCACCCGGATTTTCACACAGCATTAGCCTTTTTTGTTTTCACAGCAACTGCTGAAAACCCAGCTTCTTAAAGATACGACGTCACTGAAAGGAATGCAGTGTGGCCTAAAAATAAACCTGTAAACTATTTCAAGCTAGTAGTTTGTATGGTGTCCCAACAAATGTCAGGTATTACTGTTTTCCTCAAAATGTCCACTATCCCCTGGTGCCCTGTGAGTGCACTGGGGTGCCTGGGGCACTTCAGCACATGCTCTGTGGGCTGAGGACGTGGCCCCTATTTGCCCCAATCCCTCTGTAGAGGCTCCCTCTCTTCTCAACTCCCACAGTTCCTTCCTTTCCACCTTGTGCTCCCGTCGGATCTGAATCGCCACAGTCCACTCAGCTGATGGAGTGTTTCCTGCCTCTAGGCTTCAATGTGTCCCAAAAATGCCATCCCTTCCCTCCCAACACAGGGCTTCCTGGCAGACCCTCAACTCCTCCTTCCCATCTCTGTATGAGCCTACTCCAGACCACCCCCTCACCAACATAGGTACTGTTCTTGCATCACAGGAGGAGGGAGCTCAGCTCCTGGTATGTTGTTTCTTCCAAGGGCAGGAATGCCTAGAGTGTGAATATATGAAAGACTTACTTTCTGTAATTCAGGCCCAATGCAGTCCTAGTCCTTGTATAGTTGTCCCTCAGTATCTGTGGGGGATTGGTTCCAGGACCCCCTGTGGATACCAAAATCCATGGATGCTCAAGTCCCTGATATAAACTGGGAATTGTAGGGAGGGTGAAAGTGGACCATCAGATACTCTCCCTCCACGGCTCACTCCTACCTACCACTATACCCATGCTGTGGCCTTTCCACCAGCATGAAAATCAGGGAATAGCTCCCCTTGTTCAAGGCCAAACCCTTCCTGGTGCTCTAGGTCCCATTCTCCCTGAGTCCCACAGGGCCTTGCTCCATCACCGTCCCTCTGTCTACATACCTTCCCCTCACGCCCATACCTGGGGTCTAGGCTTATATGCCTGACCCACCCTACAGCTGCTATGTTTACTTCCTAAGCCAACTGCAGTCTTCTCCCTTCACTCTTCATCCACACGGCTCAAAACCAGGCATCTACAGCCTCTAGTTCTCCCCTCAACCCATCAGTATTCAGCTTTGGGCCCTCAGCTTCTATGCAGTTATGTAGTTATGTGCCCGGGCTTTAGAGTCGGGCTGACTCAAACGGAATCCTGGTCCTGCCCCTTCACATGTGGCCATGAACAAATGACTTATCTTCTCTGGACCTACCTCACAGAGTTAGTTAGTAAGAAAACTACCACATGTAATGTGCCTCGCACAATGCCTGGCACACAGTAAGTGCTCAATAAACGTTATCTGCAATTACTTTCATTACTATTATTACTAGTCCTGGTATTTTATTCATCTGCATATCCTCTATGCTTAGGGAAAAAGGGCTTGGCATCTAGTAAATACTTGATAAATGTTTATTGAATGAATAAACAAACACAGGGGCACATCAGGATAAGCTAACCAGACAGCAGGGGAGGTGCTAAATCATGGGGTCTGAGGTGGGGAGATGGTCAGTTTTGAGTGTCAACTTGGCTGGGCTATAGTACCCAGTTATTTAATCAAACACTAAGCTTGCTGTTGCAGTGAAGGTACTGACTTTGAATAAAGGAGACTACCCTCCATAGCATGGGTGGGCCTCATGCAATCAGGTGAAGGCCTTAAAAGCAAAAACTGTGGTTTCCTGGAGAGGAAAAAATTCTGTGCCAGGACTGCAGTGTCAACTCCTCCTAGGTCTCCAGCCTGTTCACCTGCCCTGGAGATTTCAGACTTGCCAGCCCCACAGTAATGTGAGCCAGTTCCTTAACTCTCTTTATACATATATCTGTATCTAACCTATCAGTTCTGTTTCTGTCTGATTGATACAGGATGTGAAGCTGGGAGAAGGCTGATGTCCTGGGTGAAAAGCTAGTGTTCTAAGTGAAGAAAGAAAAATTGTTATTTCCATCATCTTTTGTTGCCTTGTTGTCTCATGATGTAGAGTTGGTAATGATCAAGCTCTTCCTAACAAAGGGTAAGAAATTGACATCTGAATAACTGAGCAAAATATTTTACTTTTGAAAACATTCTTTTTTTTTTTTTTTAAGACGGAGTTTCATTCTGTCACCCAGGCTGGAGTGCAATGGTTCAATCTTGGCTCACTGCAACCTCCGCCTCCTGGGTTCAAGAGATTCTCCCGCCTCAGCCTCCGGAGAAGCTGGGATTACTGGCGCATGCCACCACACCCGGCTAATTTTTGTATTATTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTCAAACTCCTGACCTCAAGTGATCCACCTGCCTTGACCTCCCAAAGTGGAAAACATTCTTAAATATATGAAATCTCACCTCTAGTGACCTGTCATGTCGAAAGCCCCAAACGCAAGCTGCAACAGACACCGGGGAAACAAAGAACAGCGGCATGAAGACCAGGAGCCAGAAATGGCTTCCTCTCTCGATTCTGTCACAGACCAGAACTTCAAACATCAACAAGAGCAAGTGGATGCCCACTGCAATCAACATGGCTTTAAACTCCACACACGTTTCTCCTTCTGCTCTAAAAAAGGGAGAGAAGAAGAAAACACCCTCAGTTCAGAATCTCCACTATAAGCAAGCAGTTCAGGGCAAATACCAACTTATATTTATACTTTGAATTTTACTTGAAAATTTGACAAAAGCAAAGGGAAATCAGGTAGAAAGCTAACTTAAACCTAAGCTTTGGTAGGCAATCTCTGAAACATCGAAGAACTACTACATAATACAAAATGAACATTACAACCAAACCAGAATTTAATGTTTTAACCGTATAAGGATATTCTCAAAAGTAATAGCCAGTTCTTATTTCCCTGACAATGTACATAAACACTTCTGTTCACATCTTTAAATTCAACAACAAGAGTTACTTCCAAGATTATTCAAGCTGATTTGCTTCTGCTGCTAAAACCAGGCAAATACCCCTTAAGTCTCATGATCCTCATTTTTCAAGAAACCATGTAAACCACCCTTCACACAATATTATAAAAATAACTCTAGTTCTATGAACAAGTGCCAGTTATATTTCAAGATAGTAGTAACTATTGTTAGGTGCTATTTTTAAATGCAAATAAAACGTATAAATGATTTTCATTTTCCTTTCTATTCCATTAAGATAAATTAACACCTGCAGATGAAAGAGAAAGAAGAACAAGAGTTAAAACTGTTCTCAAACAAAATCAGTTTAATTAGCTAAGTATCATGCACAATAACCTTAACAGATCTACAATTGAGCAATGGTAAGGCCGCTAATCAGGAAAAGGCTCTATAATGCATCTGAAAGGCCTACAATGTTTATTCAAAATACAGATGAACATTTATGATATACATGTATTGTGGGTGACAAATACACCGGAAGTTAGATCATAGAGAAAATGCTATCAGAGGTTATTCCTGTAGGACCCAACCATGTTCCACTGGTTAATGTTAACATGAGAATGACCACGCCTGTACATTCCTTACATTCAACCCCACATACACAATTCCTTTCCTTGCTCAAAACATCTTAAATAAGACCAACAAAGAGAAGTTTGAATATATTCTAAATATCAATTAGTAGAACCTAAATGTTTATTTAACTTTGCATTCTTTGAGAAGCAATTAATATTAGATATCTGAAAATATCTCATAAAAAAATAAAACACATACAGCCACACAGGTCATCAATCTTTCAAAAAAAATCTAAGAACTCTGAAACAGCTATACATGAATGTCCCTCACCTGCCAAGGCTCTCTGTGTAACTGTTTTACAGTTCTTAGACATGTATGTGATATGTAATTTACACAATCTGAATCATTTTCATATTTAGTAAACAAAAATTTAAAAAGTTGATGTAGTGGCCGGACGTGGTGGCTCACACCTGTAATCCCAGCACTTCAGGAGGCCAAGGCGGGTGGATCACCTGATGTCAGGAGTTCAAAACTAGCCTGGCATGGTGAAACCCTATCTCTACTAAAAATACAAAAAATTAGCTGGGCATGGTGGCAGGTGCCTGTAATCCCAGCTACTCAGGAGGCCGAGGCAGGAGAATCACTTGAACCCGGGATGTGGAGGCTGCAGTGGGCCGAGATCACACCATTGCACTCCAGCCTGGGCAACAAGAGCGAAACTCTGTCTCAAAAAAAAAAAAAAAAAAAAAAAAAAGGTGCGGTAGTTTCAACTTTACACTTTTCCCACATGAGCAGCTGCCTTCTGGGAATTCCTGTACTCCTCATTTTCCCAGTGGAGGTTCATAATAGCCTCCCAGTCTTAAGTCCCCCTTTTTCCCTTTATGTAGTTACAGTCTCTGTGGCAGAGAAGGGAAAGCCTCTCGCAGGTCCCAGCAACAAGCAGGTTGCATGCTGGCGTGAGCCAACTCCCAGGAATTTGGGCCAGCGGAGGCATCCAAGAGCAGGGAGGGCAAGAGTTGGAAACGGTAAAGGGTACCCCTCTCCCCTGCCCCCAAAGGCTCTGCTTTCCTTCTCGGCATCCAATCTTTGACTTTCCTCATTCCCCAGCTGCTGTCTCAGGGACTCATGGTCTCCTGGTCAAGCCACCTCCCCTACACTGCTAATAGTCCTAAAGCTCTGGGACTAGGAGGGTGGGACAAGGGGAGCCCCAGTTCCAAAACTGTATTGGAGAAAGATCTTTCATGACCAAACATAATATGAGTGTCTTTTCCTAAAAACAGGGTGGTTTCATGCTGCTTAGTCTAGTATGGCATACCAGTTCTGTATTTTGGGTGCATTTTGGATTATATAGGCATCTGTGAGCTGGCTGGCAACTTACCCCCAAATGGCACTGCTTTTGTAAGAAAATACATACCAAAGACTAAATTTTCAAAAACACAGAAGAGATCTGTTAGCTTATACTATAGTTCTAAGACCCCAGATAGGTAGAAAATAAAATGGTCCTTACTTCATCAAAAGTGAGAAAAGTCAAGATATTGCTCCCTCATGCTAGAGACCAATGGGTTGTATAAAGCAGTATTACCGATATTGAGGATTTCGTGCCCAGACTCCAGTTCCAACTGAGGCTCCAACAATGACCATTAACTTCCACAGCCATATTGGAGCAAAGACAGCCCAGTAACTCCACTGTATGATGCCATCCAAACGAAGGGCCAGCAGCACAGAGAACAGCAGCAGACAGGCATAGATGAGGAATTTACTAGGAGAAAAGTAAAACGATTAAGAAGGATTCACTTTTACAAATATGTGATACTGAAATGGGGAGTAATAAGAGCCACATTTGTCAGCATGTAAAAGGAGTCACTAACTCAATAATCATTTATTGAAAAGGTCTATGGGGCAGACATAATGTGTTGGGAATAAAAGAAACATAAAGAAGACTAAAATAAGTCTCCTGCTTTCCAAGGCTTCCTCATAGAAGGAAGACCACACAGAAACATATAATACAGCACAATGTTTGTGATGAGAGCTTGGAAGAGGAAATACAGACTGTGTCTGAGGAGGCACTCAGAAGCAGAGATGTGGTGACCCTAGAGCTGGTTCCAGAGGGCAAGTAGGAAGCTGCCAGGCAGGAAAGCAAGTGAGATGAGGAAGTATTCCAGGCAGAAGGAACTAGCTATACCAAGACACAGAGACTGGAAAAGGCTGACATGCTCTGAAAATGGTCAAGTTCTATCACTAACTGATTCTATTTCTAAAAAGGCAGCCATCTGTCATATTCATATGGCATGAACATTTTAGTGTATGTATTATACTTTCCATGAATGAATAAATTACACATACACACATGCTCACGTCTCATATAAAAGGGAATTGCTACAGAGGATGTCCTTGAAATAATTAGAAATTATACTCTTGAGGACCTCTATTTCCAGCCTTGACTTAATAATAGGAATATAATTTACCTTCCCGCCTAAATAAGAAGCTTGATACAGTCTACAAAAGAACAGTTTTCAGACATTGACAACAGGCAGTGGAGAACAGGTGAGAAGGAGGAAATAAAGGAGGTAAACCCTACTATTGCCCCAGTTTGCAGATCAGAGGCAGTTTCCAGGCTGCAGCAAGAAAACAGTTAAAACTCAACCCTGTCAATAACTATATTAAATATAAATGGCCAACTGAAAGACAAAGATGATCAGATTGGATAAGTAAGCAAGACAACTATATGCTGTCTGTAAGAATCCCACTTTATCTATCTATAAAGACACAGATAGATTAAAAGCAAAAGGAAAGAAAAAGTTATACCAAATAAACACTAACCAAAAGAAAGCTGGAATGACTATATTAATATCTGTTTAGTCTTCCATTGCTGCTGTAACAAATTACCACAAACTTAGCAGCTTAAAACAACGTAAATTTATTATCTCACAGTTCTATATGACAAAAGGCGAGACGGGCTTGGTTGGTTTCTCTACTCAGAATCTCACAAGGCTGAAATAAAGATGTCTGTTGGTGGAATGCTTATCAGGGGACTCTGGCAGAATCTACTTCTAAGCTCATTCAGGTTGTTGGCAGAATCCAGTTTCTTGTGGTTGTAGGACTGAGGTATGTGTCTCTTTGCTTGCTGTCACGCAGCAGCTGATCTTGCGATAGTAGGGGCCTCTCCTGGGTCCTTGTAAATAGGCCCCTACATCTCAAAGCCAGTAACAAGCTATAGCATATTCAATCTTTCTCATGCTTGGGATGTTTTCTCACTACTTCTGCCATATCGCTTCTGCTTCCACTGAGAGAAAGTTCTCCGCTTTTAAGAGTTCATGTGATGAAACTGAGTCCACCTGGTTAAGCCAGGCTACTCTCCCTATTTTAAGGTCCATAACTGTGGTTGGTAGGCAGAATTCTAAAGAAGTTTCCCAGGATTCCTGTCCCCTGATTATTCAATCAAACACTAATCTGAGTAATACTGTGAAGGGACTTTGCAGATGGAATTAAGGTTACTAATCAGCTAACTTTACAATAGGAAGATTATACTGGATTATCCAGGTGTGCCCAGTGTAATCCCATAAGCCCTTAAGAAAGCAGAAGAGTAAGTCAGAGAAATGTGGTGGAAGAGAGATGAGGCAGAAGTCAGAGAGATTCCAGACTTGAGAAGGATTCAGCCTGTTACTTCTGGCTTTGAACATGGAGGTAAGGAACCATGAGCCAAGGAATGCAGGCAGGCTTCAGAAGCTGAGAATAACTCGCAGCTGACAGCCAGCAAGGTAAATGGGACCTCAGCCCTACAACCCCAAGGAACTAAATTCTGACAATAGCCCAAATGTGCTTGAAAGCAGATTAATCCCTGGAGGCTCCAGAAAGGAATAGAGCCCTCCTGACACTTTGATTTTGACCCTGTGAAACTAGGCAGAAGACCCATCTGAGTTGTGCTGTACCCGGACTTCTGACCTAAAGAACTGAGAGTAATTTGTCATGGTAACAGCAGAAACGAATGCTAATAAATATAGCTTCAAAGTCCCCTTTGCCATGTAAAATAATAACATATTCACAGGTTTCAGGGCTTAGGGCCTGGGTATCTGTGTATTTGTGGGGGTGGGGCATTCTGCCTACCACAACATAAGACACAGTATATTTTTGAACAAGGACTATTTCCAGGGACAAATAGAGGTAGTTCATAATGATAAAGGGGTCAATTTGTCATATGCCTAATAACAAAGTTTCACAATACATGTAGAAAGTACTGATCAATCTAAAAGGAGAAATAAAAAAATCAAACTGTTATAAATGGAAATTAACATTCCTTTCTTAGTAACTAATAGAACACATAAACAGAAAATTACTAAGGATATATATGATTGTAGCAACACTATCAACCAACTTGACCTAATTAATATTAATGAGTTCCTCCCAACAAAAGCAAAATACAGATTCCTTTCAAACACACACGGAACATTCACCAAGATAGATTGAATTCTGGGCCATAAAACAAAGCTCAACAAATTTAAAAGGACTGAAATCATACAAAGTAAACAAGCACAATGGAGTCAAACTAGAAATCAACAATAGAAAAATATCTGGAAAATTCTCAAAATACTTGAAAATTAAATGCCACACTGCGAAATAATCCATAGGTCAAAGACTATGAAGAAAATTGAAAAATATTTTGGACTAAAGGCAAAAACACAATATACCAAAATTTGTGAGATACACTAAAGCAGTACTTAAGGGAAATTTTAGCATCAAATACTTACATTAGAAAAGATATCAAGTCAATAATCTAAGATTCTATCTTATGAAACTAGAAAGAACATGGAGGTAAGGAACCTCCATGTAAGAAACGGAAGGAAATTTTTAAAAAGTAAATGGAAAGAAGAAAATGATAAATGTAAGCACATTAATCAATAAAATACAGTAAAAAGGGATTAGAGAAAAAAATCAATGAAACTAAAAGCAGTTTCTTTGAGAAAGTAAGAAAATTGGTAAATCTACAGCCAGAATAATCAGTAAAAAAGAGTAGGCTCAAATTACTAATATCAAGAATGAAAACAGGGATATCACTACAAATCCTAATAATACTAAAGGGATAAGTAGGGGATATTATAAACAACTTTATGCCAGGAAATTTCTTGAAACAAAGACATGGAAACTGCAATTCTAGTTAAAACCTTTTTCAGAAAGAAAATGTCAGGCCATGGCAAAATCTACCAAACATTTAATGAGGAAATACCACCAATTCTTCACAAACTCTTGCAAAAAAGATGAGGGGGGAACATTTCCCAATTTATTTTATGAAGCCAGCATTACCCTGATAACAAAACTGACAAAGAAAGAAAACTACAGACCAATATCTCTCATGAACAGAGATGCAAAAATCCTGAAAAGATTTTAGCCAACTGAATTCAGCAATATATAAGGATACTACATTACGACTAGGTGTGGTTTAGCTAGGATTGCAAGATTGCAATCTTGGATTAACATTTGAAAATCAGTTAGCAACCTTCGTGGTATTAGCAGGCTGAAAAAGAAAAATCATATGATCATCTCAATAGTTGTGTAAAAAGCATTTGACGTAATTTGCTACCCATTCAAGTTAAAAGCGTTAAACAAATTAGGAAAAGAAGAGCATCTATGAAAAACCTGCAGTTATAATGCTTAATGGTGAGAGACTCAATACTTTCCCCTTAAGACAGGAAACACAGCAAGTATGTCCACTTCAACACATCTATTCAACATTGTACTAAAGGTCCTAGCCACAACAATAAGATGATAAAAAGAAATTAAAAGAATATAGTTTTAAAAGAAAGAAGTAAACCTGTCATTATCTATAGGCAACATGATATTCCCTGTAGGAAATCCTAAGTAATCTAAAAAAAAGCTAGTAGATCTAGTAAGTGAATTTAACAAAATTGAAGGACACAAGGTAAATTTACAAAAATTATTGCATTTCTATATAGTAGCTATCACAAATTGGAAACAGAAATTTAAAAATATGTATCATTTACTGCAACTTTCCTATAAATTTATAACTATTCAAAAATTAAAAGTTTATGAAAAAATCCACCATTTACAATAGCATCACAAATATAAAACACTTAGGGATAAATGTAACAACATATATGCAAAACCTATATACTGAAACCTACAAAACACTGGTAGGACTGTGGGGAAGCTAATAAACAACAGAAATTTATTCCTCACAGTTCCAGAGGCTGGAAGTTGGAGATCAGGATGCCAGCATGGCTGGGTTCTGGCGAGGGCTGCCTTCTGGGTGGCAGATGGCAGACTTCTCAATACCCTTCACATGGTGAAAAGAGAGTGAGTTAGCTCTCTGGTCTCTTTTTATAAGGGCACTAGTTCCAACCATGAGGGTTCCACCCTCATAACCTAATCACCTCCCAAATGCCCCACCTCCAAATACCATCACACTGGGGACTAGAGTCAACTTGTGATTTTTAAAGGGACACATTCAGTTCATAACTGCCAAGAAAAATTAAAGATCTAAATAAATGGAGACATATACTGTGTTCATAGAACACTCAATACTGTTAAGATTTACATCCTCTGTGTATTAGTTTCCTATGGGTGCTGTAACAAACTACCACAAAATTGGTGACTTAAAATGGCACACATTTATTATCCTGAGGCCAGAGGTCCAAAAAGGGTTTCACTGGGCTAAAACCAAGATGTCAGCAGGACCTACTCCTTCTGGAGACTCTATGGGAGAATCTATTACTTGTTTTTTTCCAGTTTCCAGAGCTGTATTCCCTTGGCTCATGGCCCCTTTCTCTATCATCAAAACCAGCTGCATAAAATCTTCAAATCTCTGTCTCTGTTTCCACCACACTGCCTTCTCCTCTTAATATTATCTCCCTCTTTTAAGGGAACCTGTGATTGATTGCATTTATTGCCCCACTTGGATAACCCCATCATCTCAAGATCTTTAACAGGTTCCATGGAAGTGGGTATCTTTGTGGGTCATGATTTAGCCTACCACACACACCAAATTAACCAAATTATTTTAAAGAATCAATGCAATCCCAGCGAAAATTGGGAGCCAGCAGGTTCTTCTGTAGAAATTGACATACTGACTTTAAAAGTCATATGACAGTGCAAAGGACCTAGAATGGGCAAAACAATTCTGAAAAAGAACAAAGTTGGACAATTTATACTACATGCTTTCAAAGCTTACTATAAAGCTACAGTAATAAACACAATGTAGTACTGGCATAAGAATAGATATATAGAACGTACTAGAGAGTTGGAAAACAGACCCATACATATAGAGTCAGTTAATTTTCCACAAAGGTGCCAAAACAACTCAATGGAGAAATAATTATGTTTCAATAAATGGTGGTACAACTGGATAAATGAGGAAAGTACACCTTGACTCTTAACCTTATAACATATACAAAAATTTATGTGACATGGATCATAGACCTAAATGTCATACACAAAACTATAAAATGTCCAAAACACATATGAAAAAAAAATACTTGCAACCTACGGTTAGGCAAACATTTCTTAGATGGGATATGAAATTGGACTTCTCCAAAATGAAAAACTCTTACTCTCCAAAAGATACCTATCATTAACAAAATGACAGCCAAGCCACAAACTGGGAGACCATACTCTGAAAACACGTATCTGACAAAGTAACCTGTATCCAGTATATAAAAAGGACTTTTACAACTAAAAAAATAAGTGAACAACCCGATTTAAAATGGCCAAAAAATTCAGAGACATCCCATCAAAGAAGGCATACAAAAAGCAAATAAGCACACAAAAAATACTCAGTATTTTCAGATGTTAGGACAATGCAAACTAAAATGACAATGAGATAAGATTACACACACACTAGATCTCTTATATATGCTGGTGGGAATGCAAAATAGTACAGCCACTTTAGAAAACATTTTGGAAGTTTCTTATACAGTTAAGTGTATACTTATTCTATGGCCCAGCAATCCCACACCTAGGTATTTTACTCAAGAAAAAGGAAAACGTATGTCCACACAAAGGCCTGTATTCAAATATTCCAAGAAGCTTTATTCATAATTGCCACTGGTAACAACTCACACATCCATAAACTGGTGAATGGTTAACTGAATTTTGATATATCCGTTCAATGGAATACTAATCAGCAGTAAAAAGTAACAAACTATGGATGACAACAACAACATTGATGAATCTAAAATGCACTATAAGTAAAATAATTCAGATGGAAAACACAAGATAGTGTACAATGCTACTTATTTGACATTCTGGAAAGGGCAAAACTCTAGACACAGAAAAACAGATCAGTGCAGTTACAGATAAGGAACAGAAAGCTCAGATGTTAAATAACTTGTCTAAGAGCTCAGAGTTGGGAGTCAAACCATTTGACTCCAAAATCCTTCTAATTGAGTATTATACTATACCGGCTTTACACTGATAAATGAACATGTTTAGAAAAGCATCTGTATAAATATCTGCTGAGCAGCTAACAGGTCTCCACTCGGGAAGATTAAGGATAGTCTTTATACTCTTTTGCTTATGATATCTGAGTTTTCTGTAAGGTACGTGTACTGTTTTTGTACTATGAAAAATAACATAAGAAACTTCGATTGGAAAAAAGGGAGGAGGGTTGTAAGAAGACTGGACCTACAAAATTAAACTTGGAAGCAAGCCTCCCAGAGAGAAAACTACGCTGGGAAGATCATGAGATTCAGAGGGGTTGCTAAAAAAATATACATAGCATCATTGTGTCATGGTATTCTTACCCATAAAGTTAGGGCAATAATATGTACCTCATAAGGTGGCTACGAATATAAGATAATGAGTATAACAAGTGCTTTGTATAAAGCACTAAATCAGTGGCTCTCAAAGTTTTTAGTATCAAAAATTTTGACGTTTAACTCTTAAAAAGAACTCCTTTTCAGTTCTAACACTTACTGAGGATCCCAAAGAGCTCTGGTTGATATGGGTTATCCCTATTGGTATTTAATATTTACGATACAGTTGTTATTCAAAAAATATCTACTCTTCGAAGGTAATAATAAACACATTACATACTAACATACATAACATTATTTAAAAACATTTCCAAAACCAACACAAATTTAGTGAGAGTAGCGACACTGTTTCTACAGTTTAGCAAAATTTTTAATGTATGATATCATAGAAGACACCTAGATTCTAATATCTATATTTACATTCATTCTGTTGCAATACATTGTTTTGGTTCAAGTATACAAAGAAAATCTGGCCTCACAGAGATCTGTAGTTAGAAAAGAACTATTTTAATAGCTCTTTCAGATAATGGTAGACGTTCTTCAACAGGACACTAAAACTCAACAAGTGGTAGTTTCTTAAAGGTTAACTATGATGTGGAATCTGAAATCGTATCAATGACCTTTTTGTACTCTGCTACACTGAAATCCACTGGTCTCTCTTATACTTTCAATGAATCTTTTATCCCAGCATTATTGTATAATGCAATGTAGGTCGACTGGAAAATATTGGTTTACTGAGTTATAAAGATCTTTCAAATGTTGACACATTTCATATACAATAGCAAAAAAAATCACATTACTTACTATCTTCACCAACTTCATCAGAAATATTTTAATTACTGGGAGGTTGTCAAGCTCATGGTAGCAGATACAACTTTTCAAAAATTCTGGTTTTCCCATGAAAGATCAAATTTTACCATTTACAAAAAATACCATGAGTTGTTTTCCTTGAAGTTTTGCTTCCTGCACTTTCTTTTTTTGTGTGTGTGATAGAAGCATCTTTTATTATAGTATTTTTGTCTTTTTTTTTCTTTTTTTTATTATTATTATACTTTAAGATTTAGGGTACATGTGCACAATGTGCAGGTTAGTTACATATGTATACATGTGCCATGCTGGTGTGCTGCACCCATTAACTCGTCACTTAGCATTAGGTATATCTCCTAATGCTATCCCTCCCCCCAACCCCCACCCCACAACAGTCCCCAGAGTGTGATGTTCCCCTTCCTGTGTCCATGTGTTCTCATTGTTCAATTCCCATCTATGAGTGAGAACATGTGGTGTTTGGTTTTTTGTCCTTGCAATAGTTTACTGAGAATGATGATTTCCAATTTCACCCATGTCCCTACAAAGGACATGAACTCATCATTTTTTATGGCTGTATAGTATTCCATGGTGTATATGTGCCACATTTTCTTGATCCAGTCTATCATTGTTGGACATTTGGGTTGGTTCCAAATCTTTGCTATTGTGAATAGTGCCAGTTAGAATGTCAATCATTAAAAAGTCAGGAAACAACAGGTGCTGGAGAGGATGTGGAGAAATAGGAACACTTTTACACTGTTGGTGGGACTGTAAACTAGTTCAACCGTTGTGGAAGTCAGTGTGGCGATTCCTCAGGGATCTAGAACTAGAAATACCATTTGACCCAGCCATCCCATTACTGGGTATATACCCAAAGGACTATAAATCATGCTGCTATAAAGACACATGCACACGTATGTTTATTGTGGCTTCCTGCATTTTCAAGAAAATGTCTGCCAAACACCATAACACAAATAACCACAGTCTTGTCTGTCAGTTGTCCTTCCAAATAAAAATGATACTCCATGAAAACAGCAGCTTGTAACTCAGGCACACACGTTTTTCCTTGAGTCAACCATTGCACTTCAGCATACAACAGGCCTTTATGCATTCTTCCCATTTCATCACATGGAATATTAAACTAGATGTGCATTCAGGGGTCAAGATTAAATGAGATTAATATTTTTCTGCTTTATCAAGGACATTCGTAAGTGAAGCTGGCATTTTTTTTTTAACTGCAAGTCCAGCACGTGGTGGTGAAGAATCCAGTGACAGAAGACTAGTGCAGCTGGTGCCACTGTCCTGATTTGTGCTCCAGCACCAGCTGTTTGATGCACTACTTTTGTACCACCAGTGCCAATGTCGACCAAGGCAAAGAATGTCTTAGTATTATAATTTTGACTTTGCAGATACCTGGAAAGGGTCTCAAGGCCCACTCCCAAGGTCTGTGGGAAACAGTCTGATAACCAGCGTACTCAATACACATTAGCTAATATTATTAATACTCGAAAACAAAAACAGGCTTTTATAAGCTGCTACAAAGAAAGAACATTTAAAGAAACCAAGATAAAAATGACTTCATCCCTTAGACTTTGGACAAGAAGAGAGCCTGTGTCAAGGTGATTCACAAGTTAATAGAAATGGAGTTATCACCACAATTACAGTACACAATTAGTAGAGGCAGAAGTTTTCTCTCTTAAAGCAGAGGGAAATATTCCAAAATCTAAAAAAATCAGAACCAATTTGTCAACTAAAGCCTGTGGCTACAAATATAATTACCAGAAACAATGGACTTGAAAACAGACTATTTAAAAAAGAAATTAGTGGATTCAAACATTAACAAATGCTAAGATAATGACGACACAGGATTCATATGTAAATTAGATAACATGTACAGGTAATTTTTATCCTAAAGGATTTTCAGACTATAAATAAAAAGTAAATTGGTGGGGGGGGTGGTAAATGCTGTAAGCTAACTTCATTATCCTCTGTAAAACAGTTTCCTTATTTAATAAAAAAAGAGAGTAGTGGAACTAAAGAAAAGAGAAGTACCAAAATGTTTTAAATGTTGGAAAAGGGATTATTTTATGTGGTTTTTGACATAATTTAAAGGAAACTTAAAATTTTATGTCTATTACATGAAAAATAGAAAACAAGATAAACTTACAAAAGGACCCACAAAATATAATTCATGATTCAAATACTAGGGTTAGAAAAATATACATGAGCTGAATGTTCTCTTATTAATTTCCTATTGCTGCCATTCAAAACTGCCACAAACCTAGTGGTTTAAAAAGCATAAACCTAGTGGTTTAAAAGGCATAACCCTTACAATTCTGGAGGTCAGAGTCCAATATGGGTCTCGCTGGGCTAAACTTGAGGTGTCAGCAGGACTATGCTACTGAAGGCTCTAGGAGAGAATGTTTCTTTGCCTTTTCTAGCTCTAGAGGCTGCCACATTCCTTAGCTCATGGCCCCTTCCTCCATCTTCAAAGTCAACAATGGAGAATGCAGTTCTTCTCATACTGAATCACTCTGACCTCCTTTTCTGTCTCCCTCTTCTATATTTAAGGGCCCTGTAATTACATTGGGCTCAGCCAGAGAATCCAGAATAATCTATTTTAAGGTCAGCTGATGAGCAAACTTAATTCCATCTGCTACCTTAATTCCCCTTTGCCATGTAACAGAACATATTCCCTGGTTCCACGGATTAGGATGTGGACATACTTGGGGGCGTCATTATTCTGCCTACCACAGCTCTGTAAAAAGAAAGGTTGCTTCCTAGATTAAGAAAGTAAACCTGTTACATTCAAGAGTTACGCTTGGAACCAAAGTCAAAAGTCGAACCAACTATCAGAAATCAGATGACAGAGGTTTAGCACGCCTGGATGAAGACAAGAGCAACAATTTACTATCTTTAAAATTAGATAAATTAGATTTCAAAGCCGAAAGTATTAAAATGCTAAAAGGACTAATAAGGAAAACCTAAATAACAAAAACCTAACATGACCATGGAATATGGAAATAATTTTACAATTTTTCTTTTTTAAGGATACACAGAAATATTTTAATTGTGGGCTTCCTCATGCTACTCTTAAATCATGACAGATAAAACAGACAAAGTTCCTAAGGAAAATACGGAAATGAACACAAGAGTAGATTTAAAAAACAGCAAGGTTATATGAAGAGAATTAAGTAGCAGAACGAGAAAATTTAAAAAACAGCAAGCTTATATGAAGATAATTACGTAGCAGAACAAGAAAACATACTTGCTTTACATATATATAGTTTTTCCAGGGAGAAAAAAAGGATCATGGGGCAGCAATACACAATAAACACACATCTAATTTACAATTGTTAAAAATATTTGAGTGCCTGAAATGTGTGAGATACATAAAAAGGAGCAACAATGCGTTCGCTCAAGATGCTCAGACTAATAGGAACAGAGGGAGATGATGTATCAACAAGTAACTACATATGGTAAACTAGGGGACTTGGAAGATTAATTTCCCTGAGAGAGATGGGAAGAGAAGTGGAATCTGGGCAACAAGTTGGGAATAAGGAATTCCAGACAAAGGGAATAAGATGTTTTCCAATTTTTTAAAAATTATTTTTATTTATTATGGATACAAAACAGTTGTGCCTATTTATGGGGTACACGTGATACTGTGATGCAAGCATACAATGTGTAATGATCAAATCAGAGTAACTGGGATATCCATCACCTCAAGCACTGATCACTTACTTGTGTTAGGCACATTTCAATTCCACTCTTTTAGTTATTTGCAAATATACATTATTAACTATATTTGCCCTCTTGTGCTACCAAACACTAGATTTATTATTTCTATCTAACTGTATTATTGTACCCATTGCTCATCCCCTCTTTATCCTCCCCTCTCCAATTCCAGCAGACTGGAACACACAGAATACTTTCTATAAATCGTGGTGGGTAGGGTGCTGCAGAGACAGGAGAAAGGGTACTGGGGGGAATAAGCCAGAAGTTGGCTGTGGTAACATGGTAGGGGAATGTGAATATCAAGGTAAAGTCCTTGGAATTTATTAATAGGTAATATGGGGCCACTGCAGATTTTCTCAACAAGAGTGACAGAAGATACATATGAGTGTGTAAGGTAGATTGTAATCAGTAGAGACCAGAGGTTGACATTTTATACAATTCATAAAAACACTGACAAAAAATGGATCATTTTCAATACTGATTTGTTAAGCAGTTTAGTAGAGTTACTAAAATACAATTGAAACATAAAAATACCTGGGCAGCTGCCAAAACAAAACTCAAGAGAAAATGTGTTCATTTAAAATGTTTAAGTAAAAGTAGAAAACAAAGAAAAAAAAAAGAGGCAAAAGAAAACTAGTAAACTGAGTTTCTAAGAAATTTGGGGAAAAAGCCAAAACATGAAAATAATAAAACTAAAAGTAGAAATATAGATATAAAAATGAAACTGATCAGTAATCCCAACAGCTAGCTTTTTAAAAAAATTAAACTACAAAGTTGATCAAGTAAAACAAAGTAATACAATCAGTAAGTTCAAAAGATGTAATTCATACACACTCCTAGTTGGGGAAAGAAAGAATAGTATCTTAACTATATCAATATATTTGCAAGCAAAGTCTAAAAGGTGACTGCATAGCCAAAAGGAAATATCAAGCCTGATTTGCAAAAGAATACACAATAATATGCAATAATACAGTGATAAGAAAATGTATTTACAAAAATAACTTATGGTCATAGCCTAAACTACTTTGAGTTACTTTTCAAGAAACATTCAGACCAGATTCCAAGTATAAAAATAGACTGCTTAATTCTCCACAAACCTGGGAGAAATGGGAGGTTCTGGTTTAACATTAATCACTACTTCTTAAAATTCACTTTTCCAGTTACACCTTTAGAATGGATTTATTCCACTCTCATTTGAATCTGTAGTGTTAGTATACGATGAATCACTAAGTAGTGCTACCTGGGCCAAAGCTAGTATCCTCTCTGAAATTTACATGGAGCTTTCTGCTCAGGCTCAAATTCCCTCTCTCCACTTTGCAGGGTTGGGAGAATAGGGAGGGAGAAAAGGAAGAAGGGAGGGAGAGAGAGAATATACTTAGCAAAGGAATGATGACTCTGAATGTAAAAGTTCTAGTATCTGTTTTCTGCATATCATAAGAATGCAGCTGGACTCTTTTTAGACCTATCAGTTTTTTTCCAGTGGAAGCTGCTGGCTTCACTGGAGCACATACACGCGCATTAACACATGCCTGCATGCACGTACACACGCACAAGCACATACACACACACAAACTTTGGATGGCACATCCCAGTCTAAAGCTTGACAGAATGGCTCCAAATGACAACCTGACATACTCACAAACTATCAAGGGAACTGCCCCTCTTACACCTTATTAACCAAATTTAAAGTCTTTTAGTGGAGACTTTAGCTGGTTTTATTAAGATAAAAAATGTTTAAGAGCATTTAACCAGTAGATACAAAAAAGCACAAAAGCCAAATAAAAGCAGAGCCCAGGCCAGTAGGAAGATCTGCATTTAAGTTCCTCAACTGCCATTCATCAGCGGTGTGGTCTTGGGTGAGTTATTTTCCCCCAGGTCTGTCTCCTCATCTGATTCCTCATATACTGATTCATTAAGACAAGGGTATGTGAAAGCACTGTGTAAATGATACATTTTACCCATTCTAGCTTTAGCAGTATTATGAAAGACAAAAGTTCTGCCACATTGTAGGTAATAATCTCATTTAGCAATCATTGTTGTCACTATTAGGTTGGAGCTGACAAAGTATGAGTCTCCACTTATAAAGCATCTATCTCCAGAGTTCGAGGCTTTTGTCTTAAATTCCAGTCATCTTGTACAGGCATGACATATCAGGCAAAATGATTAGAAAACTCAAGCTCCATATTAAAAAGCTTAACTTCTGGAGTCCTAGGAAACTATCTAAAACTCCTTCAATCCACGGTCTCTCCTGTACAACAAAGACTTCCCAGTGGTAGATGTCTATTTGTACCCAACATCCACTCATTCAGCACGTAATTCAGCACCTCCAAATCCTGGCCCTCAAAGAACACAGCTAAGAACAATGTGTGTGTAATTATCAAGTAATAGGAATGATACTTTTAAAACTGGAAATTATACATTCAAATGAGATTTCTCTCCTTTAACCAGTCCCCTTGGGAGGCAATGCAGTAATTCCAATGGTACTTCATTACTCAAATCATCTTTGAAGCTTTCTTCTTGGAAGCACCTTGAGAACCTGCAGTCTGATCTTCTGACTATCCCAAATGGTGCTAAATTTTCACTGAGGGTGGATTCAAATTTTGGAAATGGCAAACAGTCAGTCAGAGCCAAGGTTAGTGAATAAGATGTGTGATCAAACTAGGTGGAACTATTTTGGTTGAAAATGATAGATGATCATAAAGCAATGAGATGGATCTTCTTATGTGATATGTAAACTGACTTTAAAGGGAATTCCAGATGAGTAACAAGGAGTATGAACAATGGAATAGGTGTATACATTCTCTTTCCCCAAGTAACCTCTTTGAATAACAACACTCATTTGGATGTATAAGCGCCACTAACAGGTTTGTTTTGTTTAATCACTTACAATTTGTAAATAGAGGGTTCCTTAGCATACTGGATCTACAATGTCATGGAGGAAAGGAGTTGTCACTTAACTAGAACTATGGCATGGCACGCAGGAAAGAGCACAGGCTTCATGAGACTGACGTGGATTCGAATGTTGACTTAGATTCTTCCTGCCTATGCAACTTCAGGGAAGTCACAAGCTGTCAAGACTTTAAAATAAGGCTGATACCTAATTTTGCAGGGTTGTTGTAAAGATCATGGATAATGGATGCAAAGCCCCTACATTGGGCCTGGCATGTGGTGGGTACTCAAGAAGTCACAGCTATTATTAACAGCAACATCAGTAAAATCAAGACCATTTTCTGACTGTAGGTGGCTTGAGAGGACAGAACAACAGATAAGCAGTCACTTGAGTGGTAAGTAGTTCATTGACAGTACTGACAGTACACTCGTGCATTAGATACTGCTTTCTTAAAAACAATAACCTGTAAAAACTATTTGCAGTAACTATGTATTTTTAATTCATACGACACATATCATCTGCAGTGCTCATACCACATGGACCTCTTCACTACCTGAACATACCAGGCACTATCATGACCCCATGCCTTTGTGTACTTTGTCCCTTCTGAGATACTGAAAGGGGCCAGCCCCTCCACACCTGTGGGTATTTCTCGTCAGGTGGGACGAGACTGAGAAAAGAAATAAGACACAGAAACAAAGTATAGAGAAAGAACAGTGGGCCCAGGGGACCGGCACTCAGCATACGGAGGACCCGCACCAGCGCTAGCCTCTGAGTTACCTCAGTATTTACTGATCATTATTTTTACTATCTTAGCGAGGGGAGTGTAGCAGGGCAACAGGTGGGGAGAAGGTCAGCAGGGAAACGTGAGCAAAGGAATCTGTATCATGAATAAGTTCAAGGAAAGGTACTGTGCCTGGATGTGCACGCAGGCTAGATTTATGTTTCTCTTTACCCAAACAACTCAGTGTAGCAAAGAGTAACAGAGCAGTATTGCTGCCAGCATACTTCGCCTCCAGCCACAGGGTGGTTTTCTCCTATCTCAGAATAGAACGAATGGGAATGGTCAGCTTTACACAGAGACATTCCATTCCCAGGGATGAGCAGGAGACAGAAGCCTTCCTCTTATCTCAACTGCAAAGAGGCCCCCCTCTTTCACTACTCCTCCTCAGCACAGACACTTTACGGGTGTCGGGCTGGGGGGTGGTAAGGTCTTTCCTTTCCCACAAGGCCATATCTCAGGCTGTCTCAGTGGGGGGAAACCTTGGACAATACCCAGGCTTTCTTGGGCAGATGTCCCTGCGGCCTTCCGCAGTGCACTGTGTCCCTGGTTAATCGAGAATGGAGAATGGCGATGACTTTTACCAAGCATACTGCCTGCAAACATATTGTTAACAAGGTACATCCTGCACAGCCCTAAATCCATTAAACCTTGATTCATTACAGCACAGGTTTCTGTGAGCACAGGGTTGGGACTAAAGTTACAGGTTAACAGCATCTCAAAGCAGAAACAATTTTTCTTAGTACAGATCAAAATGGAGTTTCTTATGTCTTCCTTTTCTACATAGACACAGTAACAATCTGATCTCTCTCTCTCTTCCTCACAAGGTACCCTTCCTACTCTCTGGTCTGATGACAACCCTACAATTCTAGCTCAAAAGTCTGGCTCAAAAGACTAGCTCAATTCAAAATCTAGCTCAAAAGTCATTTCCACTATAGACTCTTCCCTGCCTGCTCGAGACAGAATTAGTAGCTCTGCCATTTGTACTTCTAAGACATTTGGTTTCTTTCACTGTAATGCCTCATACTCTATTGAATTCTTATTTCCCTTTCATTTCTGCTTCTTCCACTAGGCTTACGCATTGAGAGGAGGACTATATTCCTTTATTTTTGTACTTAGAGCACCTAGTACATCACTTGGCACGAGATAGGAATCTAGATGTTTGAGGAATCAATGTTATAATATACTTTGCAAATAACTGTTAGAGGCCAGGTAGCTGACTTTAGTGAACTATATTACCAGCAATCGCATTCCTTTTTCCTGCTTCAAACAAGAGACAAGATAACTTATATGGACTCCAGAATGTCTTCTGAAGATGGAGCATACAGAAGTCTCCAAACACTACTGATCTCATCAGTACCCCATGCCCACAAGTTGGAACCCCCAGAGCAACACTTCTCAGCCTTTGTTCCAGTCTAGCACACCTGAGGGATACACCACACTCCCATCAGGAGCTCACAACAGATGACTAAGAAGGCCCAGCAGAGTTTTCACAGCCTTCCAGTGCCACATGGCTACCAGGGTGGAAGGGCCTTCTCTGAACCAGCAACTCCCCCAAAGTAGCAGAAAATTTCAAGGCTCCATGGTGAGTGCCGGGAGCCCACCTAATGCTACCTACTACCAGATCCCTACCACCTACAATGCCTCAGAATCAGTTTGCCTACTCTAAGAACAGGGCTCTTTCACTGTTAGGAAGCCTCAGGCATTCAAGGAAGGACTTGTTTGTGGGTCCGGTACATCTGTTTCGGCATGTCTTAACCTGCATAACTAAAAAGCAATTATGAAAAGAAGTTCATGATCAACCAGTTCAGTGCCAGATGGAAATAGGTAGAGTGGGTCAGCTGGCAGCCCTGTACATCTGAGTGTTGACACTTGTGAATCATTCTCTGTCACTATAGTTTCCAGAGCCTTGCCAACGCAGCAGTGGGTTCAGACTTACCAAGATCCAAAAGGCTGGAAAGGGGATCTGGAGCCATGACCCATGCCTTGGGACCCACCAAACCATGACAGTGACTATTTCTATGACAAAGACATGCTCTTCAAAGTAAATATAGCCATATTCTGCCTTAGTGCCATGTTCTCTCCCATCTCTCCAAAGCAATGGCTGCACTCTGAAGGTGGGAAGAGTGTGAGAAGAAAGAGAACCAGGCCACAGACCACCAGGAAAGCAGTGATTGAAAGCAGCAGCTCTAGATTTATCTTATAGGCTCTGGAGAGTTATTCAGTTATTTACTCCACAAATATTTATTGAGCCAGGTACTATTCCAAGTATGCAGGCTACGATACTGACACAGTCACGGCCTTGACCCCATGGAGCTTAAAGTTTAGTGGGGGAGGCAGGTATTAATCAAATAAGCATCCAGACAGACACAAGGGTACAAATATGCACTATGATGGAGGAGTAGTAAATACTACAGGAACTGTAGAAACAGGGAGCAGTAAATCTGGTCAGGGAAGTCAGGAATGCTTCCCTTAGCAAGTGCCAAGTGAGGATAAATAGGAGTTCACTAGGTCTGGAGGAGAGGAAAAGAGAGGGAAACATTCCTGGTAAAGAGAACAGTGTGTGACAAGGTCCTGGGGTAGGAGGGAGATGTTGAATTGGTTCAAAAATAGAGGCAGGGAGCAAAGCAGTAGCATCAAGGTTTGAGTCCCCAGCATAAGGTAACTTCCTGTGTACACCCCTCAGTTTTCTTTCTCCTATAGAGAAGGCCTTCCTCAGCATGGAAGGAGACTAAAGCACACTGTCAAAAACTAGGAGTGCTCAGGAGAGGTGAAACACTACCAGCTAAAAAAACAAATCTGATTTACCAATAATACCTGGCCAATGTCCTCTTGAAATATCTGTAAAATTAAGGAACTCACTACCTCTGGCAACAATGAATCAGGAAAATGGCACAGTATAAAAGACCTTTAAACTTTTCAGGTAGAAATACATTTTAATGCAGGTAGAATTAAGAGATTGATGAATATGTTGAGAATTACTATAAATCTGCTTAGATACCAGGTATTTCACTGTAATTTCATATACTAATTTTTTTGGGGGGAAAAGGGCTAAAGGAAAAAACGGTTAAAGGTAGTATCAGTGCAGCACTATTTTGCTTTGGCAACTCTGTTAGCCTGTTTGATCACTTAAGGCAGTTTCTTGAGGCTTCTGAATTCTCTGTCACCTTGCAAACAAGTCACTGTAGCTTTCTATTTGTAAGGCACTTTGTAATCAATCCTTAATTGGCCTCCCTACCCCACTGAGAGATTATACTCATTCTACAGATAAAACAATAGATAAGTGAGTTGCCCAAGGGCACAAAACAATTCAGTTTTTACTTAACTTTCTTATCACCATCTCCAAATAGAAGACATACAGGGCTTGTCCAGCCACAGCACCAACCCTACTACCCTGCAGACCAATCTTAACCCCCTGATGCAGCAGAGGAAGGGACTGCAGTTAGTTCTGTTACCTCCTCTATGCCTTCGAAGCCAACAATGATAATGTCTGCACCGTGTTAGAGACACTGGCACTCCTAACCAGAACATGAACTCAAAGTGGAATAAGAGCACATACAAAGTCACAATGACTCACAAGTTAAAATGTCACCATCCCTAGGGAGACTGCACCCTCAACATACAAATACTCCCCTAAAACTTCCTTTCATCCTCAACAAGCAGCCAAGGCATAGGTCCCCAGAGTTGGCCCCCCCGCAGTACAGCACTGCACAGTACTGTAAGGCATCTTAAGTGTACAAGCCCTTCTTCACCCCATGCTGAGACCGCCAGCATCTAAAGCACCTAGCATAGGTTACAGCACACAAATATTTACTGAGCACGTCTTCCCAACTGGCAAATAAGGGAAACGGATACTCCTTTCGGAGAGTAGTGCATTTAAGAGATCAGTTAGGCCAACTCCTCATTTTACAGAAGGGCCAAAGAAGGGCAGAGAATCACCCAGTTACATATGGCCTCGTCTCCAATCCAAGTCTTTGGGCTCAGTTTAGTTCCACAAACATTTTATATGTGCCTACTATGTGCCAGATACGGTGGGTGGGAGGCGCTGGGGACACTGAGATGAGCAGGAGATGGTCTCAGCTTCAGGGAGCTCACAGCCTGGGGTAAAGAACAGTTACAAAACAGTCGGGCAAGTGCAAGGACAGGCGCTTGAGTGCTTAGCCAGTGTTTTCTCCACACTGTCCACTGGGTGTTCCCCATCTGGAAAAGAAGGACAGCAATATCTACTTCCTGCAAATAAACTTGTAGTGAAGACTGATGAGAAAATGAGTATGGAAAATGATCAGGCTCACCCAGAAATGCATTATATAAGGTATTGCCATACCCCGACACTGACTCCAAGGATTCAACAGTGTTTCAGGTATCCGTTCAACAAAGAAGCAAACAAGAGAGGGATGGGCATAAACCCAAGGAAAACCCCAAGGACTGCGAGGAACTGGAGGAAACAGCTGGGGTGGGGGTAGGGGTGTTGGGTCGAGTGAGACGGCCCTGACGCGGAGAATGGAGGGCCCGCAGCGGCGCAGAAGAGGATGGAACCGAGACGAAGAAGTTGGGACACCAATGAGGGACAGCAAGCAGAAAAGAATGGGGTTCCCTTGGGGCAGGACGGGGCTCGCGGCCGGGCCCTTCCGGCCGTGGCCGGGCAGGGGCTGAAAGCACCGGGCACGGGAGGAGGAAGCGGGCGGGCGCCGAGGCCGACTGTTTTGCCTGGGGACCGCTTGCACCCGCAGGGAGGCTCGGGCAGGCGCCCGGGTCCTCGGGCTGCAGCATCTCGCCCGCCGTGCCTCCCCGGAGCCGAACACCAGCCCGCGCCCGAGCCCGCAGCGCGGACTCCCGGGGGCGCCAACGACGCCGCCTCACCTCGGGTTGAAGTCCTGGAAGAGGCCCCTCAGGTTCATGGCGGAGAACTTCACCGCGGCGTCCTCCTCCTCCTCCCCCGCACCCCGTGCTGCACAGCCTGCGCCTTACAGCGGGTTCATGGCGCCAGCGCCAGCCGCGTCCACGCTGCTGCTCCCGCTACTGCTGCCGTCCCCGCTGCCGTCGCCGTCGCCGTCGCCGCCGCCGCCGCCGCCCGGAGAAACCTGAGCCACCGCCCCCTGCCCCTCCTTCCGGGCTTCCGTACGAGGGCCGCGCATGCGTCCGGAGCCCCGCCCAGAGCGCTCCTCGCTGGGAGGTCCCCATCCTTGTGTCCGCACGCGACCGG\",\r\n                148678216 - 1);\r\n\r\n            var codingRegion = new CodingRegion(148679671, 148713263, 333, 1385, 1053);\r\n\r\n            var regions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   7, 148678216, 148679915, 1141, 2840),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 6, 148679916, 148681217, 1140, 1141),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   6, 148681218, 148681341, 1017, 1140),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 5, 148681342, 148681966, 1016, 1017),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   5, 148681967, 148682143, 840,  1016),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 4, 148682144, 148685652, 839,  840),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   4, 148685653, 148685736, 756,  839),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 3, 148685737, 148690313, 755,  756),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   3, 148690314, 148690521, 548,  755),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 2, 148690522, 148692969, 547,  548),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   2, 148692970, 148693146, 371,  547),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 1, 148693147, 148713225, 370,  371),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   1, 148713226, 148713418, 178,  370),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   1, 148713419, 148713568, 1,    150)\r\n            };\r\n\r\n            var rnaEdits = new IRnaEdit[]\r\n            {\r\n                new RnaEdit(2814, 2813, \"AAAAAAAAAAAAAAAAAAAAAAAAAAAAA\"),\r\n                new RnaEdit(2227, 2227, \"C\"),\r\n                new RnaEdit(1032, 1032, \"T\"),\r\n                new RnaEdit(917,  917,  \"G\"),\r\n                new RnaEdit(151,  150,  \"GCGGCGGCGGCGGCGGCGGCGGCGGCG\")\r\n            };\r\n\r\n            const byte startExonPhase = 0;\r\n\r\n            var codingSequence = new CodingSequence(genomicSeq, codingRegion, regions, true, startExonPhase, rnaEdits);\r\n\r\n            const string expectedCodingSeq = \"ATGAACCTGAGGGGCCTCTTCCAGGACTTCAACCCGAGTAAATTCCTCATCTATGCCTGTCTGCTGCTGTTCTCTGTGCTGCTGGCCCTTCGTTTGGATGGCATCATACAGTGGAGTTACTGGGCTGTCTTTGCTCCAATATGGCTGTGGAAGTTAATGGTCATTGTTGGAGCCTCAGTTGGAACTGGAGTCTGGGCACGAAATCCTCAATATCGAGCAGAAGGAGAAACGTGTGTGGAGTTTAAAGCCATGTTGATTGCAGTGGGCATCCACTTGCTCTTGTTGATGTTTGAAGTTCTGGTCTGTGACAGAATCGAGAGAGGAAGCCATTTCTGGCTCCTGGTCTTCATGCCGCTGTTCTTTGTTTCCCCGGTGTCTGTTGCAGCTTGCGTTTGGGGCTTTCGACATGACAGGTCACTAGAGTTAGAAATCCTGTGTTCTGTCAACATTCTCCAGTTTATATTCATTGCCTTAAGACTGGACAAGATCATCCACTGGCCCTGGCTTGTTGTGTGTGTCCCGCTGTGGATTCTCATGTCCTTTCTGTGCCTGGTGGTCCTCTACTACATTGTGTGGTCCGTCTTGTTCTTGCGCTCTATGGATGTGATTGCGGAGCAGCGCAGGACACACATAACCATGGCCCTGAGCTGGATGACCATCGTCGTGCCCCTTCTTACATTTGAGATTCTGCTGGTTCACAAACTGGATGGCCACAACGCCTTCTCCTGCATCCCGATCTTTGTCCCCCTTTGGCTCTCGTTGATCACGCTGATGGCAACCACATTTGGACAGAAGGGAGGAAACCACTGGTGGTTTGGTATCCGCAAAGATTTCTGTCAGTTTCTGCTTGAAATCTTCCCATTTCTACGAGAATATGGAAACATTTCCTATGATCTCCATCACGAAGATAATGAAGAAACCGAAGAGACCCCAGTTCCGGAGCCCCCTAAAATCGCACCCATGTTTCGAAAGAAGGCCAGGGTGGTCATTACCCAGAGCCCTGGGAAGTATGTGCTCCCACCTCCCAAATTAAATATCGAAATGCCAGATTAG\";\r\n\r\n            Assert.Equal(expectedCodingSeq, codingSequence.GetCodingSequence());\r\n        }\r\n\r\n        //NM_017940.4\r\n        [Fact]\r\n        public void RnaEdits_big_test_reverse()\r\n        {\r\n            //Transcript id:NM_017940.4, chrom: chr1: 16888922 - 16940100\r\n            var genomicSeq = new SimpleSequence(\r\n                \"TGATAGGCAAAAGCTTTTAATTGTATAGATTAAAATAACTTTGGACAAAAATTAAAACTCAGGCAGAGAATGTTTTTTTTTTCAACAACACACACTAGCAAAAACAAAGGCACAGTAAACATTGAGGCAGAAAGTTTCCAGCGTAGAGATATGAATATAATAATAGACACAGGCAGGGATGATTAATAAATGATAAAATGTTTACAGGATGATCATTGGAATACAGGACATTTCTAATTTTGAAAACCACCCTCCCAAATACTTCATTATAAGTAAGGTGTCTCTAAAAGGGACAGATCTCCTAGACCCCTCCTTAACCAAGTAACCAGTCCTGATATCATGATAATGCTGATGGACAAACTAGACCTTCTCTGCCCGCAGATGGGCTAAGGTTGGAAACTCACAGCATTGTCTCTGCAGTGTTCCCGGCAAAACGTTTAGGCTGAATTTAATCATGAAGACATTTTCAGACAACTTCAGAATGTAGATCATTGAGCCAGAGAGCTGACCTGTCCTCTATAAACAAGTCCATGTCACCACCATCAATGACAACAACAAAAAGATGAGGAAATATTTGGGGTTCAAAATAACTAAAGAAATGCAGCTATATTATCTTTTTACTTTTTTTGAACCCAAAATATCTCTTCTCCTTTTTGTTGTGTGATTTGTGGTGATATGGACTATGTGAAGGAGACAGGTCAGTTGTCCTGCTCAGTGTTCTACATTCTGCAGTTGTCTGGTAATTACCTCCTATGAAACTCAGGCTAAGCGTTTTCTGCAAGAACATGGCGTTGTTCATATTCTGCACCGGCAGAGTCCTGGGTGACATGCTGTCTCCTGCCAGCGGCTCCTGACTCCTGTTCTCTACAGGATGGAATCGAGAGGAGCAGGGCTAAGGCCTCCCAATGCTGTTTGTCCATCTAGCTGTGGTCTTCCTAAGTACTGACACCAATTGGAGGCTGAAGGACTGTGGCTTCTCTAACCAAAGGAGCCTAGCGGGTTAACAATTGTCAAGAGCAGTTGGTGGTTCTGAAATACAATCCTCAGCCAAGGATCCCTCCTGTGTTAAAGATGGATCAGCTAAAACAATTCAACACTGAAGATACAAAGAATGAGGTTAGGTTCATTGAAACCAGGGTAACACCTTTGGATGAGCTAAACACAAAGATGACACTGACCTTGAGCAGGTATAGAAGCTCAGAGACATGACTGCAAAATGAAATCCCTGAGGAACTTTGTAGCTACCCAGAGATAAGTGGTTCAAATTAAAATGTCTGACTGATCACTCCCGGCATGTGCTGCACAGTTATGTGAACGTGTCACACCTAACTTGGGTCCATTGTCTTCAGACTGAGCACAGGGTGCCACTGGCATGGTCTGAGAATAGGAATAGAGCCATGCCCACTGACCCATCCTATGTCTGGGCTTCCAAATGGAACTATAGTTTCATTCAAATCTTCACGTGCCTATAGGTCCTGCCTGCAGGAATGACATCTCTCGGCTTAGTAAGGGCTGCTTACTGTGGGAATATGACTCCCATCTGGAAGACCAGGTGGAGACTTGTTCCCATCAAAGTAAGAAACCTATTGTCCACGTCAAGGGCGAAGCTGATGTGCTGTTCCTCAAATGAGTAAAACACACTTCTGTAGTGCTGGAATGAGTCAGGTAGTTCAAAGTACATTGACGGAGTCGAATAACATCTATCCAGTGAGTCCTGCAAGACTTCAGGCTCTTCCACTTCCATCAGCATGCCGCTGAGCCTGGAAAAGCAGACAAAACTAAAGAAGCAGCCAGGGAAAATCAGACACCACAGAGCCCCACTAGATTTCAGAAGTAACGTAAGGAAGTGGTAAGAAAAGAAAAGGATAGATCCATTAGATCCATTAATGAGGTAAAAAAAAAAAATTATTGCCTTTATGTTGGGATAGAAAAGGGCCAGGTAGAAAACAATGAAAGAGAAAGACAGAGAGACAGAGACAGAGACAGAGACAGAGAGAAAGTGAGCTAGTGAATTGGCCAGGTGACATACTGGTAAGGGAGTAAAAGGACACTCTGAGTTAGTGCCCTCATGACACACAGCACACTGCGATCATGAAAAGAGTGAGCTCAATAGTTTTCCATAAAATATGCTCAAAATTCGATGCAGTGGCCATGAGAGTACAGCTTTTGAAGTATGGTCATCCTATGGTACGTTAGTAAATGATAAGGGGAGGAAGAAATGGAAACCTAAACATCTACTGCAATGAAAACCAACAGCAATGACAGTAGGAGTAATTCAGCCTTCGTTGAAAACATGAAATCAAACACACTCTGGTTTCCCTCAATCTGTTGCCTCCAGGTGTTAACACAGAATTAAGCATCCACAATTGCTGAAAGTTACCTGGGGCATGGTGGGTTTTGATCTTCTTCCCCTTCTTTTCTTCCCCTTCTCCTTCTTTTCTTCGTTGATCTTCTTCCCCTTCTTTTCTTCCCCTTCCCCTTCTTTTCAATTTCTGCAATAAATTCAGACATGGACAGACACATTAAGCTGATTCCCCTACACACATAACAATCCACTGTCTAACCCTCACACAGGGACCTCAGGCTCCTCAGCATAAGAATAGGAGACTGTGAGAGATATATTTCAGGAGGCCTGAAGGCTGGTCATGATAGAAATTCCTCGGTTTTTCTCCCAGAAACTGTGGGTAAAATGTCCCTATTCTAGTAGATCGTTATCCCAATATCATTTGTCCCGAGTTTGTGCAAACAGTTATGCCATATTTTTCCAATCAATTTAAAGCAAATACCCTCAAATGATTTCTAGGAGAAAAACTGCAATATTTAGCCCTGTCTCATCAAATACTCAGATTGTTCATGGTTGTGAGGACTTTAGACACTGAAATTAGAGTGAAAAAGGAAATCTACAAACCCTTGAGTCAAAATCATAGTTCTCTGAATTTGTCACATCTGCCCAGGTCCAATGTCATGAGAGTAGAATCAGAGTGCCACAGGCATGGCCTGAGACTAGGAAGAGAGCCATGCTCACTGACCCATCCCATGTCTGGGCTTCCAGTTAGAACTAGAGTTTCATTCAACCTACATGTGCCTATAGGTCCTCACTGCAGCAATGACATCTCTCAGCTCAGTAATGGCCACTTGGAGCAGGAATATGATCTTTATATGGAAGACTCAGTGGATCCTTATCACCTTCATAGAAAGGTACTCACCTCCCACGTCAAGAGAAAAGCCAACATGTTTTTCCTCCAATGCATAAAAGGAACTTCCATAGGGCAGGCAGGAGTCAGGCTGTTCAAGACAACTGGAAGGAGTTGAATAACATCTATCCAGTGAGTCCTGCAAGACTTCAGGCTCTACTGCCTCCAGCAGCTCCCTGCTGAGCCTGGAAAAGTAGGAAAAAGTAAAGAATAAGCCAGGGGGAATCAGAAACCACACAGCCCCAGCTACATTTCATGGCTAACATAAGGAACTGTTTAAACAGAAAAAGGACAGATCCATTAATGAGGTAATGAATTATTGCCTTTATGTTGGGATAGACCAGGGCCAGGTAGAAAAGAATGAAAGAGAAAGACAGGGAGAGGGAGAGGGAGAGAGAGACAGAGGAGAAAGTGAGCTCAGCGAATTGGCCGGGTGACACACTGACGAAGGGGTCAAAGGACACTCTGAGTTAGTGCCCTCGGGACACACAGAGAACAGTGATCATGAAAAGAGTGGGCTCAATAATTTTCCATAAACTTGCTTAAGATTCCATGCAGTTGCCATACAGCCTTTGAGGTATGGTCAACCTACAGTAAGTTAGTAAATGATAAGGGGAGGAAGAAATGGAAACCTAAACATCTACTGCAAGGAAAACCAACAGCAATGTCAGTAGGAGTAATTCAACCTTCGTTGAAAACATGAAATTGAACATACTCTTGTTTTCCCTGGACCTGGCATCTCCAGGTGTCAACACAGAATTAAGCATCCATAATTGCTCAAAGTTACCTGGGGCATGATGGGTCTTGGTCTTCTTCCACTTCTTGGTACTTTTCAATTTCTGCAATAAGTTCAGACATGGACAGACATATTAAGCTGGTTCTCCTACACACATAACAATCCACTGTCTAATCCTCACGCAGGGACTTCAGGCTCCTCAGCATGAGAATAGGACACTGTGAGAGATCTTCTTCAGGAGGCCTGAAGGCTGATCATGATAGAGATTCCTGGGTTTTTGTCCCAGAAACTGTGGGTAAAATTCCCTATTCTGGTAGATCGTTATCCCAAGATCATTTGTCCCAAGTTTGTGCAAATGGTTATGCCATATTTTTCCAATCGATTTAAAGCAAATGCCCCCAAATGGTTGCTGGGAGAAAAACTGCAATATTCAGCCCTGTCTCATCAAATACTCAGATTCTTCATGGTAGCGAGGATTTTAGATGCTGAAATTAGAGTGAAGGATGAAATCTACAAGATCTACAAAATTGAGACAAAATCAGAGTTGTGTGAATTTGTCACATCTGCCCAGATCCAACATCTTGAGAGTGGGATTAGGGTGCCACAGGCATGGCCTGAGACTAGGAAGAGAGCCCTGCTCACTGACCCATCCCTTGCCTGGGCTTCCAAGTGGAACTAGAGTTTCATTCAACCTACATGTGCCTATAGGTCCTCCCTGTGGCAATGACATCTCTCAGCTCAGTAAGGGCCATTTGCAGTAGGAATATGACCCTAACCAGAAGACTCAGTGGATCCTTATCACCTTCATAGAAAGGTACTCACCATCCATGTCAAGAGCCCAGCCAACACGCTGTTGCTCCAATATGTAAAAGGCACTTCTGTAGGGCTGGCATGAGTCAGTCAGTTCAAGATAACCTGAAGGAGTTGAATAACATCTATCCAGTGAGTCCTGCAAGACTTCAGGCCCTTTCTCATCCAGCAGCTCCCTGCTGAGCCTGGAACAGTGGGAAAAAGTAAAGAATAAGCCAGGGGGAATCAGAAACCACACAGCCCCAGCTAGATTTCATGGCTAACATAAGGAAGAGTTTGAAAAGAAAAAGGACAGATCCATTAATGAGGTAACAAATTATTGCCTTTATATTGGGATAGACTAGGGCCAGGTAGAAAAGGATGAAAGAGAAAGACACACACACACACACACACACACACACACACACACACACACACACAGAGTGAGCTCAGTGAATTGGCCAGGTGACACACTGATGAGGGAGTCAACGGTCATTCTCTATTTGTGCTCTCAGGACACACAGTGAACAGTGATCATGAAAAGCATGGCCTCAATAATTTTGCATAAAATGTGCTCAAGTTTCCCTGCAGCCACCATGAGAATACAGCTTTTGAGGTATGGTCAACCTTCACTAGGTTAGTAAATGATAAGGGTAGGAAGAAATGGAAACCTAAACATTTACTCTAATGAGAACCAAAAAGCAATGTAGTAGGCATAATTTAGACTTGTCTGACAAGACAAAATCATTATTTTCAGCATGTACTGTTTTCCCTGGACTTGGCATCTCCAGGTGTCAACATCAAATTAACTGTCCACAATTTCTCAGACTCACCTGGGACCTGTTGCCTCTTGGTCCTCCTTTTTCACTTGATCCCACCGATGTCCTGCAAATAAATTCAGATGGGGCCTCTTACATTAAGCAGTTCTTCCTTGCACACAGAAACATTCCTCTGTCCAATCCTAACACAGGTACATCAGTCTGGTCAGTGTGAGAACAGGAGACTTTGAGAGAAATATTCCAGCAGGCCTGAGGTCAAGTCTTGAGAAAACTGGCTTGGGTTCTTTCATGAGCCTTGGGCAAAATTACCCTGTTTTGGAATGTTATCTTCCCTATGTGCTCTGTCCTAGGTTTGTGTACACAAATGAGCAACTTTTTCCCCAATAAATTGTAGGCAAATAGTTCTAACACCTCATAGGAGAGATACTTCAATATTAAGCTTTCTCTCATCAAATACCCAGAATTTGATAGTTTATGAGATTGTGGACACAGAGATTTGATGAAGGGGTGCAATGTACCAGCTCTTGAGTCAAAATGAAACTTGGTTCTACACAGAAGCATCAGCTATTATGGCTTTTGTGGGTGAAAAGTCAGCCATTTATCTAGAAAACATACCAGGAACATGACGGACAGATGAGCTAAAGCAAGCGAACTTAGAAGACACAGAAAATGGGAATAAATTCAGTGAAACCTGGGCCACATCTTTCACTGAGAGGTAGACAAGGGTGACACTTGCCTTGGGCAGGTAAAGAACCACACAGACATGCTTTGGGAACAAAACTCATAAGGAATTTTGTAGCTGGCAAGAGACATTTAATTCAGATGAGCTGATCTGACAGACAACTCCTGGTCATGTGCTGCATAGTTTGGTGTGAGCTTGCCACACCTGCCTTGAGTTCAATGTCGTGACAGTCAGTCCAGGTTGGCACGGGCATGGCCTGAGACTAGGAAGAGAGCAAAGCTCACTCACCCACCCCATGCCTGTGCTTCAGACTCGACTCCAGAGTGATTGAAATCTACATTGATATATAGGTTCAGCCCACAGTGATGGCAAATCTCAGCCCAACAAGGGGCACAAGGCCCAAAGATTATGGGGTCTACCTGGGCCATGAACTGGAGCTTTATCACCTTCACAATGGAGTACTCACCGCCTATGTCAACAGCCATGCAGACTTGCTGTTCCTCTAATGAGTGAAATGTGCCGCTGTAAGACTTGTACGAGGCCAACATTTCAGGAGGAATTGAGAGAGTCGAATAACCTTCATCCCAGGACTCCTGGGGGACTTCCTCCTCTTCAGACTCCTGCAGATTCCTGATGAGCCAGGCAGGACAGGGATGATAGAAGATTTAACCAACAGACATTAGACAACAAAACCTCCCAGATGATCTGATGGGAGACAGAATGGAGTGGTCACAGAAACCAAAGGCATTTTTCCTTCAAGAGAAATAAAACTAGCCTTCTAAATACAGGGTGGAGGGTGACTGCTCTGGGGACAGAGCAAAAATGGGCAGCATGTGCTCAGTACATTTGCCACAGATGAGCCAACTCAGGGCACCCAGACTCTCCCTGTAAACTACCATCATGACTTGCAGCACAGAGAACTGACACAGGGCTTCAACTACTTTGCATAAATTGGGTTGAATTTTACATGCAGCATTCAAGTGAAGAGAGTTCTTGACACAGTGCAGACACAGATCTTGTGTATTAAGGGCCCCATTTTCCCAATATTTTGATATAATATATTTACCTTTTCAATTTCTTTTCTTGCAAAAATACTAGCCAACATACTACCAACAGATAGGAAGAAAGCATATATACATCTCTCCCTGGATTTAAACACATGGGAGAGAATAGGCAACACCAAGAAATCCCTGTTTGAGGGTCTGGAGTGGACTTCCAGCAAACTCCAACAGACCTGAAGCTGAGGGACCTGATTGTTAGAAGGAAAACTAACACACAGAAAGGAATAGCATCAACATCAACAAAAAAGACATCCATCCCAAAACCCCATCTGTAGGTCGCCATCATCAAAGACCAAGGGTAGATAAAACCACAAAGGTGGGGAGAAACCAGAGCACAAAAGCTGAAAATTCCAAAAACCTGACATCCCTTCTCCTCCAAAGGATCACAGCTCCTCGCCAGCAATGGAACAAAGCAGGATGGAGAATGACTTTGATGAGCTGACAGAAGTAGGCTTCAGAAAGTCGGTAATAACAAACTTCTCTGAGCTAAAGGAGGATGTGCGAACTCATCGCAAGGAAGCTAAAAACCTTGAAAAAAGATTAGACGAATGGCCAACCAGAATGAACAGTGTAGAGAAGACCTTAAATGACCTGATGGAGCTGAAAACCATGGCACGAGAACTACGTGATGCATGCACAAGCTTCAGTAGTCAATTCGATCAAGTGCAAGAAACGGTATCAGTGATTCAAGATCAAATTAGTGAAATGAAGCGAGAAGAGAAGTTTAGAGAAAAAAGAGTAAAAAGAAATGAACAAGCCTCCAATAAATATGGGACTATGTGGAAAGACCAAATCTACGTTTGATTGGTGCACTGAAAGTGACGGGGAGAATGGAACCAAGCTGGGAAACATTCTTCAGGATATTATCCAGGAGGACTTCCCCAACCTAGCAAGGAAGGCCAACATTCAAATTCAGGAAACACAGAGAACACCATAAAGATACTCCTCGAGAAGAGCAACCCCAAAACACATAATTGTCAGATTCACCAAGGTTGAAATGAAGGAAAAAATGCTAAGTGCAGCCAGAGAGAAAGGTCGGATTACCCACAAAGGGAAGCCCATCAGACTAGCAGCAGATCTCTTGGCACAAACCCTACAAGCCAGAAGAGAGTGGGAGCAATATTCAACATTCTTTTTTTTTTCCATATGTATAGTTTTCCTTTATTATTTTTTGTGTGTATGTATATATATGTATATATATTTTTCAATACTTTAAGTCTTAGGGTACATGTGCACAACGTGCAGGTTAGTTACATATGTATACATGTCCACATTGGTGTGCTTCACCCATTAACTCATCATTTAACATTAGGTATATCTCCTAATGCTACCCCTCCTCCCTCCCCCCACCCTACAACAGGCCCCAGTGTGTGATGTTCCCCTTCCTGTGTCCATGTGTTCTCATTGTTCAATTCCCACCTGTGAGTAAGAACATGCGGTATTTCGTTTTTTGTCCTTGCGATAGTTTGCTGAGAATGATGGTTTCCAGCTTCATCCATGCCCCTACAAAGGACATGAACTCATCATTTTTTATAGCTGCATAGTATTCCATGTTGTATATGTGCCACATTTTCTTAATCCAGTCTATCATTGCTGGATATTTGGCTTGGTTCCAAGTCTTTGCTATTGTGAATAGTGCCACAATAAACATATGTGTGCATGTGTCTTTACAACAGCATGATTTATAATCCTTTGGGTATACACCCAGTAATGGGATGGCTGGGTCAAATGGTATTTCTAGTTCTAGATCCCTGAGGAATTGCCACACTGTCTTCCACAATCGTTGAACTAGTTTACACTCCCACCAACAGTGTAAAAGTGTTCCTATTTCTCCACATCCTCTCCAGCATCTTCAACATTCTTAAAGAAAAGAATTTTCAACCCAGAATTTCATATCCAGCCAAACAAAGCTTCATAAGTGAAGGAGAAATAAATCCTTTACAGAGAAGCAAATGCTGAGAGATTTTGTCACCACCAGGCCTGCCTTACAAGAGCTCCTAAAGGAAGCACTAAACATGGAAAGGAACAACCGGTACCAGCCACTGCAAAAACATGCCAAACTGTAAAGACCATTGACGCTAGGAAGAAACTGCATCAACTAACGGGCGAAATAACCAGCTAACATCATAACGACAGGCTCAAATTCACACATAACAATATTAACCTTAAATGTAAATGGGCTAAATGCCCCAGTTAAAAAACACAGAATGGCAAATTGGACAAAGAGTCAAGACCCATCAGTGTGCTGTACTCAGGAAACCCATCTCACATGCAGAGACACACATAGGCTCAAAATAAAGGGATGGAGGAAGATCTACCAAGCAAATGGAAAGCAAAAAAATGCAGGGGTTGCAATCCTAGTCTCTGATAAAACAGACTTTAAACCAACAAAGATCAAAAGAGACAAAGAAGGCCACTACATAATGGTAAAGGGATCAATTCAACAAGAAGAGTTAACTATCCTAAATATATATGCACCCTATACGGGAGCACCCAGATTCATAAAGCAAGTCCTGAGAGACCTACAAAGAGATTTAGACTCCACACAATCATAATGGGAGACTTTAACACCCCACTGTCAATATTAGACAGATCAATGAGACAGAAGCTTTACAAGGATATCCAGGACTTGAACTCAGCTCTCCACCAAGCAGACCTAAAAGACATCTACAGAACTCTCCACCCCAAATCAACAGAATATACATTCTTCTCAGCACCACATCACACTTATTCCAAAATTGACCACATAGTTGGAGGTAAAGCACTCGTCAGCAAATGTAAAAGAATGGAAACCACAACAAACTGTCAGACCACAGTGCAATCAAATTAGAACTCAGGATTAAGAAACTCACTCAAAACCGCACAACTACATGGAAACTGAACAACCTGCTCCTGAATGACTACTGGGAAAATAACAAAATGAAGGCAGAAATAAAGATGTTCTTTGAAACCAATGAGAACAAAGACACAACATACCAGAATCTCTGGGACACATTTAAAGCAATGTGTAGAGGGAAAATTATAGCACTAAATGCCCACAAGAGAAAGCAGAAAAGATCTAAAATTGACACCCTAACATCACAATTAAAATAACTAGAGAAGCAAAGCAAACAAATTCAAAAGCTAGCAGAAGACAAGAAGTAACTAAGATCAGAGCAGAACTAAAGGAGATAGACACACAAAAAACCCTTCAAAAAATCAATGAATCCAGGGCTGGTTTTTTGAAAAGATCAACAAGAAAACCCTGTTTGGCTAGTTCACCTGGCTCATCTGATGGCAAGTTCCTATCTTGAGAGGACTATGAAATTAAAACCAATACAAGTGCCACAAATAACATACAACATTGTAAATCAGCACAATTTGTAGCTGGGTGAATGGAAGAAATAGTTCTATTCATCACTTCCTCATTTTCCCTAAATCTACAATCTCCAGATGTCACTACTGAATTAACAGCCAACAATTCCACAACATTACCTGGGAGACACTGGCCCTTTTTCTTCCTCTTCCTCATCATCACTTTCATTTTCTGTAAATAAATTCAGAGAAGCAGGTCACATTAAGCAATTCATACTTCACATATGACCAAATCACTGTCCAGTCATAGCACAAGGACATAACTATTCTCAGTGCAAGAATAAGGATTCTGACAGGAATATTCTAGGGTGCCCTAGATTAACTTTGGTGAGAATTAGATGACCCTGCTTTCCAGACCCACAGGCCAAAATCTCCCTCTACGTGTAGACCATAATGCCATATTCCCTGCCTGAGTCAAAGTTAAACAAAATTTTTTCCCCAAAAAAATCTCCAAAAATTGGTCCATTTTCTAAGAGTGTTGCTGCAATACGGACTTATATCACCAGATAACATGGACATTAAATGTTTAGAGGCATCTATACATGAAACACACATGATAGATAAATTTGAACAACTCTTGCTTTAAAAAGAATCTGTGATTTGGGAGGCCAAGACAGGTGAATCATTTGAGGTCATGAGTTCAGGACTACCCTGGCCAATATGGGGAAACCCTGTCTCTACTAAAAATACAAAAATTAGCCAGATGTGATGTTGTGCACCTGTGGTCCCAGCAACTCAGGAGGCTGAGGCAGGAGAATCACTTGAATCTGGGAGGCAGAGGTTGCACCAAGCCAAGATGGTGCAACTGCACTCTAGCCTGGGTGACAGAGCAAGACTCCATCGCAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAATCCACGATGCTACAAAGAAACATTGGATCAGCCATTGCATTGACAGGGTGGAGAACCAGGGTCCAGCCTTGCTTTATGGAAATATATCAGCAAAGTAAAGAAGAAAAGTTTCCGTCCTGATTTCAGGGTGACTGTGCAGCTAAGCAAGCTGACTTAAAGGAGATCCGGATGAAAGCTGAGAGCAGTGAAGCCTGGGGAACAATATTTCCAAATACAAAGGCAAGGCTGCCAGCTTCCTGAAACAGGCATAGAAACTCCATGGACATTGTTCAGGGACAGATGACTTAATCACAGATGACAAGAGATACTGAATCGAAGCTAGGAGGCCTGACAGATACTGCCTGTGCACCTCCTGCACTCAGGTGACTATGAGATTGTCACACTTGCCTGGGGTCGAGTAACTTGATACTGGGGACTGGCAGACAAAGGCATGACATTAGCTGAGAAGGACAAAAAAACTCCCTGATATCTGTTTAGAAACCCATCATAGTTTTTTATTCAAATGAATTTGTGTTTATAGAGCCTGTCTTCAGAGTTTATCTTCCTCAGCCTAGAGAGAGGTATGAGACACAAGGAAAACAGAGGCTACCTGGGATAATGTGTACAGCATCCTCCCATTCAACATGAGAGGATGAGCCAATGAGAGTTGAGTCGACTTTGTCTTCCTCAAATGTGATTTTGGTTTTCCTATGTGGCTGGTTGGAGTCATAAGGGCCATGGCTATTTGAACAAGTGATGGCACATTCCTCCAGTGAGTCCTCAGGGACTTCCTTTTCTTCAGCCTTCGGCATCTCCCTGATGAGCCAGGTGGGACAGAGATGACAGAAGATTAAACACAGAGGGATTGGACCCCAGGGAGTCCTAGCTGGTTTTGACAGGCGGCATTAAGAGAGTGGTCCCAGAAAGCAAAATGGAGGTTCCCTTTAAGGGGGAACATGCAATCCTGTTCTCTCTGCAACAGAGCATGGCTGCCATGGGAACCAGAGAGGAAGAGAGCAGCTGGTGTTCATTGCAGTGGACAGATAGGAGCTGAGGAGGATGAAGACTCAGCTATCCCTGTATGGTGCAGACATGACACTCGGCACACATAGAGAAACATGACAGCTGCCGCACCCTGTGTCTAAGCTGGGTTATATTTCACATACTGTGGCCAAGCAAATGCGGGTTTTTGGCCCATCATAGATGCCAGAGAGGGTGTACCTCCTAGATATTCTTCATATGTTACCATCCATTACTTGTTCCTGAGTATTCAGTGTTACCTGGGGGCAGACGATTTCTGCACTTTCTCAGCCACCTCAACTTGAACATCTTCATCGTCATCGTTGTCATTTTCTGTAAATACAGAAGTGTTCGTTCAGATATTTCCCACTTCACAGTCTGCAAGCACAGTCAGCCCAATGTGCAACAGAGACATGAACATCTAGGCATGGGTCACCGTTCAACTGAAAACTCTCATGTTTTATCTTTAACAGAATGCCCTGGCATGGTTTCCTGATCCATCAGGCAATGCATTTCTGATCTGGAGGGCCACCATCAAGATGTGGCCAAATATTGAAAAGACCTTTTGCTTCCCATATCACTGGAGGCTTGTGCAGCCTCTCTCTGGACTTTGGCAGCTGTCGCCCCCATCCTGCCAGATCTGATTCCCAGGCACAGGCTTGGTGTCCTGTCACAGTTTGCATTTCAAACCTAATTCTTTCTCTTAGAAGCAGACAAACTTATCCCACAGTCCTCTATGCATCAGAAGATTTCAAGCCTCCAAGTGGCTTCTGCTGTGTTATTCAGGGACATTCTATCCATGGGGAGTGCTCCAGTCTGAAGCACTTCCTACCACGAAACGCCACCACATAAAGTGCCTTCTCCAACATCACACGGCGAGGGGCTTCATCTCATTTTGGAAAGCAGTTTTAAGTGTTCCCACATTTGAATGCTTCAGACCCTTGCAAGAGACAATTTGCCATGGAGAGAGAGAAACTCAGGAAAGACAAGTCATTCAGTCACTGACAGTTACTAAGAACATTGCCGAAAAGACACCCTGGGAACCTTCATTCTTAGTCCAGAGCTCTTTTCACTCTAACAAGCCTGCTCCTATCGCAGCCTCCTTCCTGTCCTTTAAAACTAGATAGATGCTGCCTCTTACTCCAAAGACAACCTTCCATCAAGGGAGGAGGGACAATTGCAATACTGTGACCTCCAACCCCATGGGTTTCCCAACTCCGTTCTTACCCAGGAAGTCCTGGTCATGTCATGGCCACATAAGCTTAGTGGCAAAAAACACCATTGATACAACTGTCATTGTGAAAGTATGGAGGTCTGGAGTCTCTCATAAGCCTGGGGTTTTGGGTCATCAGGGCCTATGGCCACCTTACCTGGGCTGAGCTTTTGGACAAGGTGCTGTGCCAGTCTACACCCCTCAGCCAGCTGTTCTTGGAGGTCCTGCCCCTGGGACTTGTCTGGCTCATCCGGAGTGAGGAGGGCCTGGAGATGCTGATTCAATGAGCAGGAGGCATCTCTCCCTTCCCGTAACTTCTCCCTTAACTGGGTCAGCTCTCGTTCCTGAGAGTGAACCAGGACTTTATATTGCCTAAGGTGAGACGGTAGAGAAAATTTAAGAGTGGAAAGGGTTGAGTGATCCGCTCAAATATTGCAACAGAGATTTCTGAGACAATGTCCTCAAGGAGACCTCCAAGCAGAAGGTCAGCACATGTTGGAAGGAATGTCTGTGGCTAAGAGAAAGAATAGAAAATGGTTTACAGGTTTCCTCTGTATCAGAGAGGGCTCCTGCAAGATCCTCGATGATGTTCCATTCATCTTTCCCTTCTGTAAACAAAAGTAGGTGTCTTCCTAATTCCATTTCAAAAAGACATCCTTTCAGTCCCTCACTCTGGCCATGGACATTTCCATGTGAAAATACACATAGTGCATCTTGCGGCCACTAGATACAAAGCCATGTACAGAAATGAGGCCAGGTGCAGATGGGGCGAATTGAAAAGATGAAAGAAGAAAAGAATGACAGGGTCGAGAAGGCAACATTGATTGAGTGAAAGAATGAGAAGACGCAGTCAGTCAGAAGGTGATTCTCACTAAGGGTAAGTGGGGTGGTGATGGCACACCATTTTGAGTATACTGAATGCTGCTGTGTGGTTCACACTCCTTTGGTTAATTTTGTGTTATGTAAATTTCACATCAACAATTACTTGTTTGAAAAAGAGAAAACAAGGCTCTGAGAAACAACTGCAACCCATAAATTTTTATTATCCTTCTTCTCTGCTTGATAAATACTTGTGTGTTGCGAGCCTGCCATGGCAATTCCTGCCCTTCCCCTGGCCCAGCTTAGTTCTTAAGTCTCCCCACTGAGCTGCTGTACTTCAGAGATTTACACACCTGCCCCCCTGCCTGCCCCCATGGGGTCCCCTCACCTGAGCTCCTCAGCTTGCTTGAGCTGCTCTGCAAGCTTCTCCTCCTTGAACTGTCGCTCATTCCTCAGCATAGATTTTATGAGGTCTTTGCACTCTTCATTTTCTGAGAAAAGACAGACACGCCTGCCTCAGTGGAAGGCTGGACATGCTGCTGTGGTCATTGCCTACAGGGCAGGAGCCAGGTCCATCCCAAGGACAAAACTCTCCCCAGTACCAGGGTCTAGACAGGGATTTCCACATCTTTACTCTTCAGTCTCCTGACTTTCTGGCATCTGATCCTCCAAAATTTAGAGATGAAGAGAACCTCAATGGCACATCAAGGAAGTTGACAAGATGATTCAACCACAACGAAGTGGAGTCAGAATTCACAGCCCCTGAGGTCTGACTCTGAATGCAGGGCCACTTTCCCAAGACTTGCAGCCTCTCCTCTAAAACACTGCACTGGGGCATGAAGTAGTGATTTCTTGTACAGTCGGGAAGGCCCCTAGGACTATGGGACTGATGGTTTCCCTTTTACTGGGAATTTCAAGGACAAGTATGCAAAAGATTTTAAAAATCTTTGATTTTTAAATCATATCTTCAGTTATGATTTTAAGAATCATATCTGAAGCATAAAGTGTGACACATAACACCATAAGGCCATGAAGGAAATATGCCCAAATGTTAATAAAGTTTGTGTTAATTTAGAAACAGCAGAATGAAGAACTAATAGATAGTGTTTACTGTGTGCTAATAAATGTTCTAGGAGATTGACAAGAAATAGCTCATGTAATTCACTGCAGCAATTTACAGAGGTAGGTATTATTGTAGTACCCTCTGAACAGGTGAGGAAACAGGGACAGAAAAGACAAGCAACTTGGATGGAGCCCAGGAGACAGGCCCACGGTCTCTGCTCTGTACACTGCACTGCTATCTCCACACATTCTCGGGTGCGATCTTTCTTCCTCTTTAGGAACAAGACTCTGTGCCCCAGGAAGCAGGACTTCACTCTCACCAAGCTACATTCTGCTTCTTATTCTTATTTTTATTTATCATTATTAGTATTATTTTTTTAACAGTCTTGCCCTGTCGCCCAGGCTGGAGTGCAATGGCAAAATCTTGGCTCACTGCAACCTCAGCCTCCTGGGTTCAAAGGATTCTCCTGCCTCAGCCTCCTGAGCAGGGGTGATTACAGTCACCTGCCACCATGCCCATCTACTTTTTGTATTTTTAGTGGAGATGGGGTTTCTCCATGTTTCCCAGGCTGGTCTCAAACTCCTGACCTCGTGCTCTACCCGCCTCAGCCTCCCAAAGGGCTGGGATTACAGGAGTGAGCCACCATGCACAGCCCCTACTCCCTGCTCTTGATGCTGTCACTTATAGATAGCACAGGTTCTATTAGGAGCAGACTCCTCTTGAAGCCCCTCAGAGCAGGTACTGGCTACTATCACCAAGTTTCCCTCAGAGTCACTAGAACAGAGCTTTGCATATTGGGCCTCAACAGAAACTTGAACTGAATAAAAGTTCACTAGTCTCAGACATTTAGAACAACAGACTAGATGTTATTTGTCTGCAGGATCTTACATGGTACAGAGAGGATTCTTGAAAACATGATTGAGCCTCTTGGAGAAAACAGGTCATTCTGTGCCTGTGTCAGAAATCAATAAATGGCAGTTTAACTCTAGTCCCACCCCCACCTGATTGCAAACATGGAAAGTTGCTAAATATTTTGGGACCTCTGTCTTCCAACTTTAACAAAATGTTAAAATACCCATTTCTGTTTTCCTAGAAGTATGGGGAGGATGACATTATTTTAGATGGAGAGAGCACTTAGTTTCTCAGAGAGAAGACAGGACTTCGTTCATCACTTTCGTGATGGTGAGCCTATAGATCTTACTGTATTTGTTCTGCTGGTTGGCCAGGAAGCAGGCCAGTTGAGTTACAAAACATTTCTCTTTGAGGTTTCTGAACTGCTGTTTCTTCTCTGCCAGCTGGGGATGCAATTTCTCGTTGATTTCTAGAATGTTCATCTCTGCCTTCTCGCTGGACAAAGGGCCGGCTGATACCACCATGCTGACGTTTGTGGCAGAAGAGGTGGGGCCAGGGACTGGGGAGAAGAAAGGCAAACACATGATGGGTTAAAAACTGGTGAAATCAAATAGGTTTAATCACACTGAGGGATGTCAGCGGCAGCCTTGTCTACTTATTTGAAGATGATGTTTCCCTGGTTTCACTCTTGTCATCTCCAGTCTTGATCTCCTTTAAGTCAACTTATCTTAGCTATGCAGTCACCTTGAAACCAGGACATAAACACTTCTACACTTTTCTTGCTTATAAGTTTCTATAAAGCAAGGCTTGGCCCTGAGATTTTTACCCCATGAGTGGCCAATGTTTCTGTGTAGCACAAAAGGTTTCATTTTGCCTTTTTAATTTTTTTCTTTTTTGGTTTTTTGTTTTTTGTTTGAGACGGAGTCTCACTCTGTCACGCAGGCTGCAGTGCAGAGGCACAATCTCAGCTCACTGCCACCTCTGCCTCCCGGGTTCAAGCGATTCTCATCCCTCAGCCTGCCAAACATCTGGGATTACAAGCGCCAAGTAACATGCCAGCTAATTTTTGTATTTTTAGTAGAGATGGGGTTTCGCCATCTTGGACAGGCTGGTTTCGAACTCCTGACCTCAGGTGTTCCGCCCACCTTGGCCTCCCAAAGTGCTGGGATTAAGATGTGAGCCAGCACCCCCGGTCAGAGACTTTTTTTTTTTTTTTTGAGATGGAGTCTCGCTCTGTCTCCCAGGCTGGAGTGCAGTGGCACAATCTAGGCTCACTGCAAACTCCGGTTCCTGGGTTCATGCCATTCTCCTGCCACAGCCTCCCGAGTAGCTGGGACTACAGGTGCCCAACACCGTGCCCAGCTAATTTTTTTTTTTTGTATTTTTAGTAACGACGGGGTTTCACCGTGTTAGCCAGGATGGTCTCGATCTCCTGACCTCGTGATCCACCCGCCCCAGCCTCCGAAAGTTCTGGGATTACATGTGTGAGCCACCGCGCCCGGCCGAGACTTCTTATTAATAGCTAAGACAAGCCAATGAAAAGGAGAGAGAGTCTAGCCTGAGAGGAGTGAACCAGGGTGGGAGGATCGTCTCAGCCGATCCTCCCACCTAAGTCTCCTGAGCAGTTGGGACTAGAGGCACGCAGCACCATGCCTGCCTAATTTTTTGTATTCTTTGTAAAGATGGGTTTCACCATATTGTCCAGGCTGGTCTTCAACTCCTGAACTCAAGTCATCCTCCCACTTGGGCCTTCCAAAGTGCTGTGATTATATGTGTGAGTCACAGAACCTAGCTCCATCCTAGTTTCTGACTAAAACAATATGTGCGTATACAGCCTGTCCTCAGAATTGATCTTCCATAGCCTAGACAGAGGTATGAGACACAAGGAAAATAGAGGCTACCTGGGAGAATGTTTACAGCATCCTGACATTCATCATGAGAGGATTCTCTGTCTACAACCAGAGCTGAGTTGACTTTGTCTTCCTCAAAGGTGATGTTGATGTTCTTGTGAGGCTGGTTGGAGTCACAAGGGCCGTGGCTATTTGAACAAGTGATGGCACATTCCTCCAGTGAGTCCTCAGGGACTTTGCTTTCTTCAGCCTTCTGCACCTCCCTGATGAGCCAGGTGGGACAGAGATGACAGAAGATTAAACACAGAGGGATTGGACCCCAGGGAGTCCTAGCTGGTTTTGACAGGCGGCATTAAGACAGTGGTCCCAGAAAGCAAAATGGAGGTTCCCTTTAAGGGGGAACAGGCAATCCTCTTCTCTCTGCAACAGAGCATGGCTGCCATGGGAGCCAGAGAGGAAGAGAGCAGCTGGTGTTCAGTGCACTGGACAGATAGGAGCTGAGGAGGATGAAGACTCAGCTATCCCTGTATGGTACAGACATGACACTTGGCACACATAGAGAAACACGACAGCTGCCACACCCTGTGTCTAAGCTGGGTTGAATTTCACATACTGTGGCCAAGCGAATGCGGGCTTTTGGCCCATCATAGATGCCAGAGAGGGTGTGCCTCCTAGACATTTTCATATGTTACCACCCATTACTTGCTCCTGAGTATTCAGTGTTACCTGGGGGCAGATGATTCCAGTACTTTCTCAGCCTCCTCAACTTGAACATCTTCATCCTCATCTTCGTCATTTTCTGTAAATACAAAATGTTCGTTCAGATATTTCCCACTTCCCATTCTCCAAGCACAGTCAGCCCAATGTGCACAGAGACATGAACATCTATGTGTGGTTCAGCATTGTACTGAAAACTGTCATGTTTTATCTTTCACAAAATGCCCTGGCATGGTTTCCTGGTCCATCGGGCAATGCATTTCTGATCTGGAGGGCCACCATCAAGATGTGGCCAAATATTGAAAAGACCTTTTGCTTCCCATATCACTGGAGGCTTGTGCAGCCTCTCTCTGGACTTTGGCAGCTGTCTCCCCCATCCTGCCACAGATCTGATTCCCAGGAACAGGTTTGGTGTCCTGTCACAGTTCGCATTTCAAACCTCATTCTTTCTCTTAGGAGAGGACAAACTTGTCCCACAGTCCTCTATGTGTCATGAGACTGCACAGGCCCTCCATGTGGCTTCTGCTGTGTTATTCAGGGACATTCTATCCATGGGGAGTGCTCCAGTCTGAAGCACTTCCTACCACCAAATGCCCCCACATCAAGTGCCTTCTCCAACACCAAACGGAGAGGGGCTGCATCTCATTTTAAAAAGCATTCGTAAGTGTTCCCATATTTGGATGCTTCAGACCCTTGCAAGAGACAATTTGTTTGCCTTTGCAGATGGAGAGAGAGAAACTCTGGAAAGATAAATCACTCACTCACCGACAGTTACTAAGAACATTGCCAAAAAGACAGCCTGGGAACCTTCATTCTTAGCCCAGAGCTCTTTTCACTCCAACAAGCGCCCTCCCATCACAGCCTCCTTCCTGTCCTTTAAAACTAGACAGATGCTGCCTCTTGCTCCAAAGACCACCTTCCATCAAGGAAGGAGGGACACTTGCAATACTGTGACCTCCAACCCCATGGGTTTCCCATCTCTGTTCTTACCCAGGAAGTCCTGGTCATGTCATGGCCACATATGTGTAGCAGAAAATAACCCCACTGATACAACTGTCATTGTGAAAGTATGGAGGTCTGGAGCCTCTCATAAGCCTGGGGTTTTGGGTCATCAGGGCCTATGGCCACCTTACCTGGGCTGAGCTTCTGGAAAAGTTGCTGTGCCAGTCTACACCCCTCAGCCAGCTGTTCTTGGAGGTCCTGCCCCTGGGACTTGTCTGGCTTATCCGGAGTGAGGAGGGCCTGGAGATGCTGATTCAATGAGCGGGAGGCATCTCTCCCTTCCCGTAACTTCTCCCTTAACTGGGTCAGCTCTCGTTCCTGAGAGTGAACCAGGACTTTATATTGCCTAAGGTGAGACGGTAGAGAAAATTTAAGAGTGGAAAGGGTTGAGTGATCCGTTCAAATATTGCAACAGAGATTTCTGAGACAATGTCCTCAAGGAGACCTCCAAGCAGAAGGTCAGCACATGTTGAAAGGAATGACTGTGGCCAAGAGAAATAATAGAAAATGGTTTACAGGCTTCCTCTGTATCAGAGAGGGCTCCTGCAAGATCCTCGATGATGTTCCATTCATCTTTCCCTTCTGTAAACAAAAGTAGGTGTCTTCCTAATTCCGTTTCAAAAAGACATCCTTTCAGTTCCTCACTCTGGCCATGGACATTTCCATGTGAAAATACACATAGTGCAACTTGCAGCCACTAGATACAAAGCCATGTACAGAAATGAGGCCAGGTGCAGATGGGGCGAATTGAAAAGACGAAAGAAGAAAAGAATGACAGGGTCAAGAAGGCAACATTGATTGAGTGAAAGAATGAGAAGACGCAGTCAGTCAGAAGGTGGTTCTCACTAAGGGTAAGTGGGGTGGTGATGGCACACCATTTTGAGTATACTGAGTGCTGCTGTGTGGTTCACACTCCTTTGGTTAATTTTGTGTTATGTAAATTTCACATCAACAATTACTTGTTTGAAAAAGAGAAAACAAGGCTCTAAGAAACAACTGCAACCCATAAATTTTTATTATCCTTCTTCTCTGCTTGATAAATACTTGTGTGTTGCGAGCCTGCCATGGCAATTCCTGCCCTTCCCCTGGCCCAGCTTAGCTCTTACGTCTCCCCACCGAGCTGCTGTACTTCAGAGATTTACACAGCTGCTCCCCCACCTGCCCCCATGGGGTCCCCTCACCTGAGCTCCTCAGCTTGCTTGAGCTGCTCTGCAAGCTTCTCCTCCTTGAACTGTCGCTCATTCCTCAGCATAGATTTTATGAGGTCTTTGCACTCTTCATATTCTGAGAAAAGACAGACACGCCTGCCTCAGTGGAAGGCTGGACATGCTGCTGTGGTCATTGCCTACAGGGCAGGAGCCAGGTCCATCCCAAGGACAAAACTCTCCCCAGTACCAGGGTCTAGACAGGGATTTCCACATCTTTACTCTTCAGTCTCCTGACTTTCTGGCATCTGATCCTCCAAAATTTAGAGATGAAGAAAGGGAACCTCAAGGGCACATCAAGGAAGTTGACAAGATGATTCAACCACAACGAAGTGGAGTCAGAATTCACAGCCCCTGAGGTCTGACTCTGAATGCAGGGCCACTTTCCCAAGACTTGCAGCCTCTCCTCTAAAACACTGCACTGGGGCATGAAGTAGTGATTTCTTGTACAGTCGGGAAGGCCCCTAGGACTATGGGACTGACGGTTTCCCTTTTACTGGGAATTTCAAAGACAAGTATGCGAAAGATTTTAAAAATCTTTGATTTTTAAATCATATCTTCAGTTATGATTTTAAGAATCATATCTGAAGCATAAAGTGTGACACATAACACCATAAGGTCATGAAGGAAATATGCCCAAATGCTAATAAAGTTTGTGTTAATTTAGAAACAGCAGAATGAAGAACTAATAGATAGTGTTTACTGTGTGCCAATAAATGTTCTAGGAGATTGACAAGAAATAGCTCATGTAATTCACTGCAGCAATTTACAGAGGTAGGTATTATTGTAGTACCCTCTGAACAGGTGAGGAAACTGAGGGACAGACAAGACAAGCAACTTGGATGGAGCCCAGGAGACAGGCCCACGGTCTCTGCTCTGTACACTGCACTGCTACCTCCACACATTCTCAGGTGCGATCTTTCTTCCTCTTTAGGAACAAGACTCTGTGCCCCAGGAAGCAGGACTTCACTCTCACCAAGCTACACTCTGCTTCTTATTCTTATTTTTATTTATCATTATTATTATTATTATTATTATTATTATTTTTACCAGTCTTGCCCTGTCACCCAGAGTGGAGTGCAATGGCAAAATCTTGGCTCACTGCAACCTCAGCCTCCTGGGTTCAAAGGATTCTCCTGCCTCAGCCTCCTGAGCAGGGGTGATTACAGTCACCTGCCACCATGCCCATCTACTTTTTGTATTTTTAGTGGAGATGGGGTTTCTCCATGTTGCCCAGGCTGGTCTCAAACTCCTGACCTTGTGATCTGCCCGCCTCAGCCTCCCAAAGGGCTGGGATTACAGGAGTGAGCCACCATGCACAGCCCCTACTCCCTGCTCTTGATGCTGTCACTTATAGATAGCACAGGTTCTATTAGGAGCAGACTCCTCTTGAAGCCCCTCAGAGCGGGTACTGGCTACTATCACCAAGTTTCCCTCAGAGTCACTAGAACAGAGCTGTGCCTGTTGGGCCTCAACAGAAACTTGAACTGAATAAAAGTTCACTAGTCTCAGACATTTAGAACAACAGACTAGATGTTATTTGTCTGCAGGATCTTACATGGTACAGAGAGGATTCTTGGAAACATGATTGAGCCTCTTGGAGAAAACAGGTCATTCTGTGCCTGTGTCAGAAATCAATAAATGGCAGTTTAACTCTAGTCCCACCCCCACCTGATTGCAAACATGGAAAGTTGCTAAATACTTTGGTACCTCTCTCTTCCAACTTTAACAAAATGTTAAAATACCCATTTCTGTTTTCCTAGAAGTATGGGGAGGATGACATTATTTTAGATGGAGAGAGCACTTAGTTTCTCAGAGAGAAGACAGGACTTCGTTCATCACTTTCGTGATGGTGAGCCTATAGATCTTACTGTATTTGTTCTGCTGGTTGGCCAGGAAGCAGGCCAGTTGAGTTACAAAACATTTCTCTTTGAGGTTTCTGAACTGCTGTTTCTTCTCTGCCAGCTGGGGATGCAATTTCTCGTTGATTTCTAGAATGTTCATCTCTGCCTTCTCGCTGGACAAAGGGCCGGCTGATACCACCATGCTGACGTTTGTGGCAGAAGAGGTGGGGCCAGGGACTGGGGAGAAGAAAGGCAAACACATGATGGGTTAAAAACTGGTGAAATCAAATAGGTTTAATCACACTGAGGGATGTCAGTGGCAGCCTTGTCTACTTATTTGAAAATGTTGTTTCCCTGGTTTCACTCTTGTCATCTCCAGTCTTGATCTCCTTTAAGTCAACTTGTCTTAGCTATGCAGTCACCTTGAAACCAGGACATAAACACTTCTACACTTTTCTTGCTTATAAGTTTCTATAAAGCAAGGCTGGGCCCTGAGATTTTTACCCCATGAGTGGCCAATGTTTCTGTGTAGCACAAAAGATTGCATTTTCCTTTTTCGATATTTTTCTCTTTTGGTTTTTTGTTTTTTGTTTGAGACGGAGTCTCACTCTGTCACGCAGGCTGCAGTGCAGTGGCGCAATCTCAGCTCACTGCCACCTCTGCCTCCCGGGTTCAAGTGATTCTCATCCCTCAGCCTGCCAAACATCTGGGATTACAAGCGCCAAGTAACATGCCAGCTAATTTTTGCATTTTTAGTAGAGATTGGGTTTCGCCATCTTGGACAGGCTGGTTTCGAACTCCTGACCTCAGGTGTTCCGCCCACCTTGGCCTCCCAAAGTGCTGGGATTAAGATGTGAGCCAGCACCCCCGGTCAGAGACTTTTTTTTTTTTTTTTTTTGAGATGGAGTCTCGCTCTGTCTCCCAGGCTGGAGTGCAGTGGCACAATCTAGGCTCACTGCAAGCTCCGGTTCCTGGGTTCATGCCATTCTCCTGCCACAGCCTCCCGAGTAGCTGGGACTACAGGCGCCCAACACCGTGCCCAGCTAATTTTTTTTTTTTGTATTTTTAGTAACGACGGGGTTTCACCGTGTTAGCCAGGATGGTCTCGATCTCCTGACCTCGTGATCCACCCGCCCCGGCCTCCCAAAGTGCTGGGATTACATGTGTGAGCCACCGCGCCCGGCCGAGACTTCTTATTAATAGCTAAGACAAGCCAATGAAAAGGAGAGAGAGTCTAGCCTGAGAGGAGTGAACCAGGGTGGGAGGATCGTCTCAGCCGATCCTCCCACCTAAGTCTCCTGAGCAGTTGGGACTAGAGGCACGCAGCACCATGCCTGCCTAATTTTTTGTATTCTTTGTAAAGATGGGTTTCACCATATTGTCCAGGCTGGTCTTCAACTCCTGAACTCAAGTCATCCTCCCACTTGGGCCTTCCAAAGTGCTGTGATTATATGTGTGAGTCACAGAACCTAGCTCCATCCTAGTTTCTGACTAAAAGAATAACAATATGTGTATATACAGCCTGTCCTCAGAATTGATCTTCCATAGCCTAGACAGAGGTATGAGACACAAGGAAAATAGAGGCTACCTGGGAGAATGTTTACAGCATCCTGACATTCATCATGAGAGGATTCTCTGTCTACAACCAGAGTTGAGTTGACTTTGTCTTCCTCAAAGGTGATGTTGATGTTCTTGTGAGGCTGGTTGGAGTCACAAGGGCTGTGGCTATTTGAACAAGTGATGGCACATTCCTCCAGTGAGTCCTCAGGGACTTTGCTTTCTTCAGCCTTCTGCACCTCCCTGATGAGCCAGGTGGGACAGAGATGACAGAAGATTAAACACAGAGGGATTGGACCCCAGGGAGTCCTAGCTGGTTTTGACAGGCGGCATTAAGACAGTGGTCCCAGAAAGCAAAATGGAGGTTCCCTTTAAGGGGGAACAGGCAATCCTCTTCTCTCTGCAACAGAGCATGGCTGCCATGGGAGCCAGAGAGGAAGAGAGCAGCTGGTGTTCAGTGCACTGGACAGATAGGAGCTGAGGAGGATGAAGACTCAGCTATCCCTGTATGGTACAGACATGACACTTGGCACACATAGAGAAACACGACAGCTGCCGCACCCTGTGTCTAAGCTGGGTTGAATTTCACATACTGTGGCCAAGCGAATGCGGGCTTTTGGCCCATCATAGATGCCAGAGAGGGTGTGCCTCCTACACATTTTCATATGTTACCACCCATTACTTGCTCCCGAGTATTCAGTGTTACCTGGGGGCAGATGATTCCAGTACTTTCTCAGCCTCCTCAACTTGAACATCTTCATCCTCATCTTCGTCATTTTCTGTAAATACAAAATGTTCGTTCAGATATTTCCCACTTCACATTCTGCAAGCACAGTCAGCCCAATGTGCACAGAGACATGAACATCTATGTATGGTTCAGCACTGTACTGAAAACTGTCATATTTTATCTTTCACAAAATGCCCTGGCATGGTTTCCTGGTCCATCGGGCAATGCATTTCTGATCTGGAGGGCCACCATCAAGATGTGGTCAAATATTGAAAAGACCTTTTGCTTCCCATATCACTGGAGGCTTGTGCAGCCTCTCTCTGGACTTTGGCAGCTGTCGCCCCCATCCTGCCACATATCTGATTCCCAGGAACAGGCTTGGTGTCCTGTCACAGTTCGCATTTCAAACCTCATTCTTTCTCTTAGGAGAGGACAAACTTGTCCCACAGTCCTCTATGCATCATGAGACTGCACAGGCCCTCCATGTGGCTTCTGCTGTGTTATTCAGGGACATTCTATCCACGGGGAGTGCTCCAGTCTGAAGCACTTCCTACCACCAAATGCCCCTACACCAAGTGCCTTCTCCAACACCAAACGGAGAGGGGCTTCATCTCATTTTAAAAAGCATTCGTAAGTGTTCCCATATTTGGATGCTTCAGACCCTTGCAAGAGACAATTTGTTTGCCTTTGCAGATGGAGAGAGAGAAACTCTGGAAAGATAAATCACTCACTCACCGACAGTTACTAAGAACATTGTCAAAAAGACAGCCTGGGAACCTTCATTCTTAGCCCAGAGCTCTTTTCACTCCAACAAGCGCCCTCCCATCACAGCCTCCTTCCTGTCCTTTAAAACTAGACAGATGCTGCCTCTTGCTCCAAAGACCACCTTCCATCAAGGAAGGAGGGACACTTGCAATACTGTGACCTCCAACCCCATGGGTTTCCCATCTCTGTTCTTACCCAGGAAGTCCTGGTCATGTCATGGCCACATATGTATAGCAGAAAAAAACCCCACTGATACAACTGTCATTGTGAAAGTATGGAGGTCTGGAGCCTCTCATAAGCCTGGGGTTTTGGGTCATCAGGGCCTATGGCCACCTTACCTGGGCTGAGCTTCTGGAAAAGTTGCTGTGCCAGTCTACACCCCTCAGCCAGCTGTTCTTGGAGGTCCTGCCCCTGGGACTTGTCTGGCTTATCCGGAGTGAGGAGGGCCTGGAGATGCTGATTCAATGAGCGGGAGGCATCTCTCCCTTCCCGTAACTTCTCCCTTAACTGGGTCAGCTCTCGTTCCTGAGAGTGAACCAGGACTTTATATTGCCTAAGGTGAGACGGTAGAGAAAATTTAAGAGTGGAAAGGGTTGAGTGATCCGTTCAAATATTGCAACAGAGATTTCTGAGACAATGTCCTCAAGGAGACCTCCAAGCAGAAGGTCAGCACATGTTGAAAGGAATGACTGTGGCCAAGAGAAAGAATAGAAAATGGTTTACAGGCTTCCTCTGTATCAGAGAGGGCTCCTGCAAGATCCTCGATGATGTTCCATTCATCTTTCCCTTCTGTAAACAAAAGTAGGTGTCTTCCTAATTCCGTTTCAAAAAGACATCCTTTCAGTTCCTCACTCTGGCCATGGACATTTCCATGTGAAAATACACATAGTGCATCTTGCGGCCACTAGATACAAAGCCATGTACAGAAATGAGGCCAGGTGCAGATGGGGCGAATTGAAAAGACGAAAGAAGAAAAGAATGACAGGGTCGAGAAGGCAACATTGATTGAGTGAAAGAATGAGAAGACGCAGTCAGTCAGAAGGTGATTCTCACTAAGGGTAAGTGGGGTGGCGATGGCACACCATTTTGAGTATACTGAATGCTGCTGTGTGGTTCACACTCCTTTGGTTAATTTTGTGTTATGTAAATTTCACATCAACAATTACTTGTTTGAAAAAGAGAAAACAAGGCTCTAAGAAACAACTGCAACCCATAAATTTTTATTATCCTTCTTCTCTGCTTGATAAATACTTGTGTGTTGCGAGCCTGCCATGGCAATTCCTGCCCTTCCCCTGGCCCAGCTTAGCTCTTACGTCTCCCCACCGAGCTGCTGTACTTCAGAGATTTACACAGCTGCTCCCCTGCCTGCCCCCATGGGGTCCCCTCACCTGAGCTCCTCAGCTTGCTTGAGCTGCTCTGCAAGCTTCTCCTCCTTGAACTGTCGCTCATTCCTCAGCATAAATTTTATGAGGTCTTTACACTCTTCATACTCTGAGAAAAGACAGACACGCCTGCCTCAGTGGAAGGCTGGACATGCTGCTGTGGTCACTGCCTACAGGGCAGAAGCCAGGTCCATCCCAAGGACAAAACTGTCCCCCGTACCAGGCTCTAGGCAGGGATTTCCACATCTTTACTCTTCAGTCTCCTGACTTTCTGGCATCTTATCCTCCAAAATTTAAAGACGAAGAAAGAGAAATTCAAGGCACATCAAGGAAGTTGACAAGATGATTCAACCACAACGAAGTGGAGTCAGAACTCACAGCCCCTGAGGTCTGACTCTGAATGCGGGGCCACTTTCCCAAGACTTGCAGCCTCTCCTCTGAAACACTGCACTGGGGCATGAAGTAGTGATTTCTTGTACAGTTGGGAAGGCCCCTAGGACTATGGGACTGACGGTTTCCCTTTTACTGGGAATTTCAAAGACAAGTATGCGAAAGATTTTAAAAATCTTTGATTTTTAAATCATATCTTCAGTTATGATTTTAAGAATCATATCTGAAGCATAAAGTGTGACACATAACACCATAAGGCCATGAAGGAAATATGCCCAAATATTTTATTAGTATGACAGGCAGCATCAAGATTTAGATTAGTTGTGTTAATTTAGAAACAGCATAAGATTAGTTTGTGTTAATTTAGAAACATCAGAATGAAGAACTAATAGATAGTGTTTACACTGTGCCAATTAATGTTCAAGGAGATTGACAGGAAATACCTCATGTAATTCATTGCAGCAATTTACAGAGGTAGGTATTATTGTAGTACCCTCTGAACAGATGAGGAAACTGAGGGACAGACAAGACAAGCAACTTGGATGGAGCCCAGGAGACAGGCTGAGGGTCCCTGCTTTGCACACTGCACTGCTGCTTCCACACATTCTCGGGTGTGATCTTTCTTCCTCTTTAGGAACAAGAGCCTGTGCACCAGGAAGCAGGACTTCACTCTCACCAAGGTACTCTCTGCTTTTTATTTTTATTTTTGTTTTATTTATCTTTTTGTTTGTTTGTTTTTTGATGAGTCTTGCCCTGTCACCCATGCTGGAGTGCAATAGTGCAATCTTGGCTCACTGCAACATCTGCCTGCTGGGTTCAAAGGATTCTTCTGCCTCAGCCTCCCGATTAGTGGTGATTACAGTTGCCCGCCACGACGCCCATCTACTTTTTGTATTTTTAGTGGAGATGGGGTTTCTCCATGTTGCCCAGGCTAGTCTCAAACTCCTCACCTCGTGCTCTGCCCGCCTCAGCCTCCCAAAGTGCTGAGATTACAGGAGTGAGCCACGTTGCACGGCCCCTACTCCCTGCTCTTGATGCTGTCACTTATAGATAGCACAGGTTCTATTAGGAGCAGACTCCTCTTGAAGCCCCTCAGAGCAGGTACTGGCTACTATCACCAAGTTTCCCTCAGAGTCACTAGAACAGAGCCTTGCCTGTTGGGCCTCAACAGAAACTTGAACTGAATAAAAGTTCACTAGTCTCAGACATTTAGAACAACAGACTAGATGTTATTTGTCTGCAGGATCTTATATGGTACAGAGAGGATTCTTGAAAACATGATTGAGCCTCTTGGAGAAAACAGGTCGTTCTGTGTCTGTGTTAGAAATCAATAACTGTGAGTTTAACTCTAGTCCCACCCCCACCTGATTGCAAACATGGAAAGTTGCTAAATACTTTGGTACCTCTGTCTTCCAACTTTAACAAAATGTTAAAATACCCATTTCTGTTTTCCTAGAAGTACAGGAAGGATGAAATTATTTTTGATGGAGAGAGCATTTAGTGTCTCAGAGAGAAGACAGGACATCATTCATCACTTTCATGATGGTGAGCCTATAGATCTTACTGTATTTCTTCTGTCGGTTGGCCAGGAAGCCGGCCAGTTGAGTTACAAAACATTTCTCTTTGAGGTTTCTGAACTGCTGTTTGTTCTCTGCCAGCTGGGGGCGCAATTTCTCGTTGATTTCTAAAATGTTCGTCTCTGCCTTCTCGCTGGACCAAGGGCCAGCTGATACCACCATGCTGACGTTTGTGGCAGAAGAGGTGGAGCCAGGGACTGGGGAGAAGAAACCCAAACATATGATGGGTTAAAAACTGGTGAAATCAAATAGGTTTAATCAGGACTGAGGGATGTCAGTAACTGAAATTCTTAACTTACTGTTGTGAAAAATGTGATCACTCCCCACAGCACTTTAGGATCCTTCACCACAAAAACAAGGTTCGAGGTGCCTGAACTCAGAGCTGAAAGCACTGCCAGTAGCTCAGACTCTGATAAGAGTGAGGTAGACTGTGGCCAGCGTGCCAGGTAACCGTCTGCAGTTGCAATAACAGAATTAGAAGGTGGGGGTGTCATGGAATCTTAGGAGCCCTGCATTCCAATTGCCCAGGCTTTCCTGAAACACAGGCACCCTAGTCTCACCTGAGGGTCACCACCAATGGGGATCATTCCTTCAGCATTCACTCTCAGTATTCGTGTACCCTTGTGATGATGCCACAGACCCGTGTCTTTCCCAATACATCTAAGCATATTCCTCACTGTTTATCTCTTGTCTGTACAACATCATCAAGGCAGAAACAGTTTCCCAACAGGTTGTATTTTCTTAATGGTAGTCATGAAGTCACCCCACCTGCTCTCAGTTAAAACAGAGCTTAAGGCCTTTCCACAGGTGTAAGATATCAAACTTTTAGCCTGCCCTGATTTCCTCTGGGTCTTCTGCAGTTTTGTCTGTATCCACTAGAAAGTGAATGAATAATTCATTTGTAAAAAATGTTGTCTTTCCTGTCTCAGTATTCTTCTTGCTGTTTCCCATTGTTATGTTGATTTCTTTTTTCTCACTGGGGCACCATCTTTGCTTTTCATTACACTCTAGACCAGTTTGACATCCCTATGTCCAGAGCTCTTCCTCTATGTGGGTTGATTTGGTTTTTGATGTCACTGAGCGCTACATTTTATACTTGTCACTTATGGATGTCATTCTAGTGTCACAAGAGCTCTTTTCAAGGTATCAAGTGATCAAAATCATTTATATAGAGATCTCCTGAAAACATGTGTGACCATCTATCTTGGGAAGTTTCATAAACCTGATGCTATTTTGTTGTTTCCATTTTGTTTTCCCATATACTGAAAAGAACAGGGCCATGAGCGGTTCTTATGCAATATGGTTTGATATATATTTTGTTGAGATGACCTAACACCATTGATTTTGGGTTGCATTCCACTAACAGAACATGGCAAGATCAAGGTTATGGTCACGGTTGGTTGGTGATCCTCAGTGTTGCAGTAGAAGGTGAGTTTGAGATGAGAGGAATGAGTAGGAAAGAGTGATCCCCTGAACCACCTCCTCGCTTTCTCAGCTTTCACCCCACCTAGGTTTTGTGAGCCTGGAACTTGGGAGACTGTTCTGTAGCCCAGGTCTCCTAAGATTGGCTGCTGGACTTGCCTGAGTTGAGGGTGCGGTGGGTTGACCCTGGGCTGCCCAGCATTCATGTGGTAGTGAAGGAAGGAGGACTGGATCAATCCCATTTCAAAGCATGTCTCTCTGCACTCCACACTGTCCTCCAATGACACTGTAAGGAAACCGCTTTAAGACGTATCAACGGCTTTAAGTAAATGTATTTTCTGGCATCTGGGAGACCTGACATTCTGTGTCATAATGAAAATCTGTCATGTTTCTTTATTTTAAAAATGATAAAACTGCAGGTTCACAGAGTTACATGGCTTACTTGAGGTCACACGGGGATGAGTTTTCAGCACTGCCAATAAAAGCAATCACATGAATTATTCAGTAATTATTCATAGGATCCATATAATTCAGTAAATATTCACATAATTATTTACTAGTTGTTCATTGACCAATTCGTACAAGGCATTTTGCTCAAAACTGTGTTTATATTTGGACATTGTATCTTCATCATAATCCTGTGGTAATGCTGTTATCCGTAAGTAACAGGTAAGAAACCTGAAGAGGAGGGATAGCAAATCATGTATTTGGACATATTTCCTTTTTTTTTTTTTGGTTTTTGTGATGCTGGAAGAATGACCAGAATGAGTCATAGGAAGAGTATACATTCCTGTAGTATTTTCCAGGACAGAGGTGTGACCTCCTAGAGTACTGGGACCAAAATTCCCAAGTGTCTGCAACCTTGCTTTAACAGTATGGGAGATCACCTCTATCACCTGGAATTCCCCTGGAACTCTGGAATATACAAGAGAAGTATGAGACTTGGGTCTTCCCTTGGCTGTGTTTAATTCACTCTTCTATGGAATACCAATGATTCTCACTAAGACTGGCCTTTTCATAAGCACAATGTGCATTTTATGGAGAAGATTTTACACTTTGCTCTATTTAGAAAGAATAAATATGAGCAGTGGTTTAGGTTTTATGCCCTGGACTTAATATGTTTCTGATTCCTGTTTTGAGATTAAATTCTCATGTAAATAGAAAAATACTTATTATTTCTCATAAGGCCAAGTTTGTTATTAGTTTGAGTTTTTGAAGATGAAGCACAAACTTTTGATTTTATCTTTGTCTGTCTCTGTCAGCGCCACTCGTTGTCTCTCAGTATGACCTGGACTTGCCCCTGCACTTACCCTTGTCCTGCTGAACCATCTCCATGCACTGTCCAATTCCATCAGTGATTCGGGCTCCTTCCAAGGCTCCCTGAAAAGGGCACAGAGATCAGGACATTAGGCACATTCCGGACACAAAGGCAACCCATACTGTAGAGTGGGCAGCTGTGTTTCCACTTCCCTAATATTCCAGTGATGTCCTCAAACTGAAAGGAACACTTTCCCTTTTTAGGGGTCTGTTCTTCATGTCTCAGTGCCTCTGATCTAGTCAACACAACTGTCCTGAATGTGAAAGAACTTGCTAAATTTCTAGTTTCTTGTTAGGTGGCTAAAATAGATTTATAAGACTTCCTTACTTACCCATGACTGCTGAAGTTTGAATTCTTAGCAGTACGATTCGTTTTCTTGTAAGGTGAGCAGCTTAGGAAAGATTGGCCATCTTCCTGTGCAAAAAGAGGCAAACTTAATTTCTACTCAAAGCATGCTTGAATTTGGAATCAGGGCTTCCACTCTTCCGAAGTTGGAGTGTCACTGCGACAGGCATGTGTCCCGAAGGGCTCGTGTCTCTGCTATACTCAAAGTTTAAATGGAGCCCAGCAAGCCAGATGTCCTTTACTTCTAGGTTCCCTCAACAGTTTCTCCTCCGCTTTAGAGACCGCATTGAAAATATTCTTGTTCTGCTGTTGTGTTTTGGCTTTGGAATGATGTGATGCAGCTCAATGGGTCCCACCCCCAACTTGATCAAAGTAAGAAACAGCTGGGAAAGTCAGTGCAAATACAAGTTCATTGTCCTCCTTGCAGGGATTCTGATTCAGAGGGCTCAGGTGGGGCCTGGAATGTTTGTTAACATGACTCAGATGTGCAGTCAATTTGGGGACTCACTGACAGCATTGACCTTACAGTTTATGGGATGATTCTTTCTGTTTGGTGATGAAGAAACTGAGGCACACAGAGTCTGTAACTTGCCCAAGTTCCCCTTGTTGTAAGTCCTGGAGCCAGATCTCAGGTGGACCAGTGCTTCTCTCCCCTATACCTCATTTCTGAGAAAAGGAAATCTTCTGCAATTTGACTTCTTTCATCTAACACATTTCCTCACAACATGCAGCCAGCATCATATTTTGGCCACTTACTATTAAAGTGAGATGCTTTTTTTTTTTTTTTTTTGAGACAGGGTCTTATTCTGTCACCCAGGGTGGAGTGCACTGGTGATTATAGATCACGGCAATCTTGAACTTCTGGGCTCAAGCGATCCTCCTGCCTCAGCTTTCCAAGTAGTTGGAACTATAGGCACACATCACCATTTCTGGCTAATTTTATATTTTTCATAGAGACAAGGTCTTGCTATGTTGCTCAGGCTGGTTTTGAACTTCTGGCCTCAAGCGATCCTCCCACCTAGGCCTCCAAAAGTGCTGGGATTACAGAAGTTAGCCACTGAACCTGGCCCTGAAATGCTTTTATTTCTTTCTTTTTTTTAATGAAAATACTGGACATGGAGATGTGGAAAGACACCTTGCTTTATTACTTTTGTTGTTATTATTATTTCTACAGTAGAATTTATACATCACAAAATTCACCATTTTTAAGCATACATTTCAGTGTCTTTTACCATATTCCAAAACTTTCGCAACCATCGCCACTACCTAATTCCAGAATATTTTCATAATGCCAAAAAGCATGCCTGTACCTATGGGCAGACACTCTCCAATTCCCCCCTTCTTGCGCTCTCTGACAACCACTAATCTACCTTCTCTATATATTGATGTACTTGTTCTGGGCACTTCCTCTATATGGAATAACAAAGTGTGGTATTTTCTATCTGCTTCTTAGAATATTGTTCTCAAGTTTCATCCTTTCTAGCCTGCGTCAGTACTTCAACTTTTTATGGCCAGATAATATTCCACTATATGGTTATACCACATTTTGTTTATTCATCAACTCATGGTGGTTTAAGATGTTTCCACTTTTTAACTATTAGGAATAATGCTGCTGTGAACAGCTTTGTACAGGTTTTTGAGTGAACATCTGTTTTTCATTTTCTTGGTTATAAACCTAGGAGTGCAATTGCTGCATCATATGTCACTTTATGTTTCACTTTTTGAGGAACTCACACACTGTTTACTAACTTCAGTAGCTATATCATTTTAGATTCCCAATAGTAATATATGAGAATTCCATATTCTCCATCACTTTTGAAACATGTGTTGTCTTTATTTTTTTCTTAAGTCATACTGCTGGGTGTGAAGTGGTATCTCATTTTGGTTTAAATTTACATTTTCCTAATGACGAAAAACATTGAACATCTTTGCATGTGCTTCTTGGCCATTTGTGTGTTTCCTTTAGAGAAACCTCTACTCACAGCTTTTTTTCCCCATTGTTAAATGTGGTTGTCGTTTATTGCTCAGTTATATGAATTCCTTATACACTCTAGGTACTAGACCTGTGTCAAACATACAATTTGGAAATAGTTCTCCCATTATGTGGATTATCTTTTCACTTCCTTGACAGTGTCCTTTGAAGCATACAAGTTTTTTATTTTAATGAAGTCCATTTATCTATTTTTCGGTTGTTTGTGCCTACTTAAAAAATGTCTAATCCAAAATCACAAAGATTTGTACCTAGGTTTCCTTCAAGACATCGTCTTTTGAATGAGAACTTTCCTGGGTTTTAGAGGAGGGTGGACATTGTTTATTGATGCCTCCTGTCCATTACCGATGTTTCTCTTGATTGTTATTCATATGCTCACCACCCCTCCATGGAGCATCCATGGCCTGTGACAGAGCTCTGGGGACTGATATCCTTCCACTGACTTTGGCGCTGGTGAGAGCCCTGGTCATGTGATTCAGCTTGGCCTTAACCCGACCCAGTTGCACATATTCCTCAGGCCCTTTAGAGTTGAAGTCGAGACCTCTCTGAGAACGCTTGCCAGCCCATGCTCTTCTAAGGCTGGAGCAAACTTCCTCCATCTATTCCAGACAGAGGGGACTGCAGGGGTTGGACTCACTCAAGATATCTCTGGTGTTAGAAAGAAGACCTGTTTCAGGCTTTGGGGAAGATTGTTCAATATGAACTAGGTCCTCTCTAATTATTTTTACCGTATGTGTGACTTCTTTCTAGAAACAAGGGAAGAATATTTATGTTAGAACATTTTGTCTATTCTTTGTCAATTGTTGTTTATCTACAATTTTAACATGGATAAAGGAGAGTTCAGTGTCAATATATTCTTAACAACTAATTACGGCTCATGTCCACCGCCATGCGATCATATTTAAATCTGTCAACTATCCTGTTACTTAGGTATTATCCTGTTCCTGATGAGAAAACAAACTCAGAAAGATTGCAAAATTTCCCTAGGTCACAAAACTAGTGAGGAGAGGAGTAAGAATTAGATATCCGTTCCTTTTGGCCTTCAAAGCTAACCTTGTACCATTAGATCAAACTGATTTACATACTTTTGCTGGAATTAGTCTCAGACTTGTGGTTCTCACTTGATTTTCCCAAGGAAACAGTGTGCCACTTTAATATCATTTCAAACTTTGAAATTTAAAACTCTTTTTATTATACTTTTTTGTCTTTGTTCTATTCCGTTGCTTTTGGTTTCTTCTCAACGGATCCCTCTTATTTATATGCTAAATATTTGTTACCTATTTTCTGTCAATTTTCACCTTTTTGAGTGTTTGTTATCTGTCTGTTGTATGCTAACAGTTTTTCACTGAGGTAAAATTTGCGTAGAGTATACTGCAAAAAAACCTAAAGGCACAGCTTAATAAATTTTAATATAATTATAATTGTAAAGTAACACCCAGTTAAAGACAGAGAACATTTTCCCCCATGCCACAAAGTTCTGATGTGGTCCTTGCCAGTCAATACTCATCCCCCAAATGAAGAATATATTCTGAATGTTGTCACTGCCTTAGCCCCTTTGTGTTGCTGGAAAGGAATACCAGAGGCTGGGTAAGTTATCAAGACAAGAGGTGCCTTTTGCTCATAGTTCTGCAGGCTGTACAAGAAGCATGGCCCCCGCATCTGCTCCTAATGAGGGCCTGAGGCTGCTTCCACTTGCAGCAGAAGGTGAAAAGGAACCAGGGTGTGCAGAGATCATATGGCGAGAGAGGAAGCAAAAGAGAGCAAGGAAAGGTGAGAGGCACTTTTTAATAACCAGCTCCTACAGGAACTAAGAGAGTGAGAATTCACTCACTACCTTCTCCCAGGGTGGGGATTCATCTATTCATGAGGGATCCACTCCCATGACCCAAACACCTCCCATTTACCCCCACCTCCAACACTGGGGACCACATTTGAACATGTGATTTGGAGGGGACCAATATTTAAACTTAGCAGCCACCATAGATTCATTTTGCTTGATCATGTGCTTCATAAAAATGGAATCATTTTGGCTGGGCCTGGTGGCTCATGCCTGTAATCCCAAGACTTTGCAAGGCTGAGGCGGGCAGATCACCTGAGGTCAGGCGTTCAAGACCAGCCTGGCCAACATGGTAAAACCCTGCCTCTACTGAAAATACAAAAAATTAGCCAGGCATGGTGGCCGGTGCCTGTAATCCCAGGCACCAGATATGTACTGGTATCTCATATGTACAGGACATGTACTGGTATCTCATTGTTGTATTGATTGATGTTCCTGATGGCTAAACTGTAGAGCATCTTTTCCTATGCTAATTGACCATTCATGTATCTTCTTTTCTTAAGTACCTATTCAAGTCTTTTGAGAAATTGTTTCATTGTGCTGTTTATCTTATTAAACTTATATATATATACATACATATATATACAAATACACTCTAAAAAACCCCTTTGTTGGAAATAAATATATCTCCTATATTGTGGTTTCTTTTAATGTTCTCTTAATGTTCCCTGTTTGGAGATAACGATAGATAATCTTCAAAAAGGTGAATATACACACCCACACCCACCCACACACATACACACACACACACACACACACACACACACACACGTGAGCCACCGGATCCAGCCTGTTGAATTTATTTCTAAGCACAACATGTATTTAGATGTTACTTGAAATGAAATTGTATTTTTATTTCATTTTCCAAATGCTCATTGCTAATACACAGAAATACAAAAGACTACTTCTATTGAGCTTATATTCTGCAACATTACCAAACTCACTAATTACTTTTGGCAGATTTTTATAGATTTCTAGGATTATTAACATACACAGTCATTATCTGTGAATAAAGACAGCTTCAATTCTTTCTTTTCAATCTTTTCAATACTTTTATTTTTCTTACTTTATTGCATTGATTTAGATCTCTAGTATAATGCTGAATTGAAAGAATAACAACAGATATTCTACTTTTTTCTCTGATTTAATAGAAAAGCATTCAATCCTATGCCATTTAATATAATGTTACCTCTGAGTTTTTTTCAAATCTACCCTTAATAGGGTTGAAAGTGTTGCCTTCTCTTCTTATCATGCTGAGAGTTTTCTGGGGTTTGTTTTTATAAATCATGAAAAAAGTTTTCAATTGTGCCAAATGCTTTTACTGTGTATGACAAGGTAATCATATGGTTTTTCTCTTTTGCCCTGATAATACATAACATTACATTTTCTTAAATATAAAAAAGATTTCTTGAATCAAGCTAGGACAGTTTTTTTAATTATAAACTTTTAACAAATATATTGAAATATAACTTACATGCAATTGAGATGCATGAAAGTGTATAATCATTAAAGTGTATAATTTTAAGAGTTTGAGCACACTATACACGAGTCAAAGAGAAAGGACAGAAAATACTAACGATGGCTCAGCACATGTGGTCTATCTTGCTGAATGCTCTATGTGAGTTTGAGAAGAGTTATTTGTTAGCTGTTCTTAGATGTATTTTGCTTAAATATCGACCTGGCTAACATGTGTCATTGATTGTGTGAATTAATTTTGTTCTAGTGGGCAGTAAAATTACTGTCTGATCACTTTGGACTTATGTGGACTGGTTCATGTTTTATTACAACGGATTCATGGAAAGCCCACAGCATTTCCCAAGACCCTCTAATTTGGCAGGACTCAATCACCAATCCACCCCTTTGTGAATTTGTCAGGGTTTGCTTTTAGGCTTTAGCAGGTTGGTCTACAATAGGCCTTATTGAAAAGTGTGACACTTATTCCTAAAGCACATCCATTCTAGTGTCTCAGTTGGATACCTGGGTGCTAATGAGGTGTGCATGAGTTCTTCCCACCATGGATGGCAGAAACTCCATCATACATTCCCCAACCCTCCTCCACCTCAAGTACCTCTGGTCCAAACTCAATTTCATAGCAGCCACCCCTCTGTTAAATCTGTTAGTCTTTTCCTTGTGCAGGTAGAGTCCACTCCTTGATAAGTATGCACATGGAACCCCACATAGACTTTGAGAGCTGCACCTTTGATCAGCTGTCTCCTCACTGGTGCCCTGCCCTGCAGATTGCAGTTGCTTCAGCCGTCTTGAACTCTGATCTCTGCCTTCTCAGCTCAGTGAGCTGCCCTGCCCTGAGTGGACTCTAGCTCACTATGCAGCTGCTGAGAAATTCTCCCCAAACAACTAGGAAATCATGGGGCTTCCCCCTTAAGTTTTCTCTTGGACTGCCTGTTGTACACTGCTGAAAACAATTTTACGTTTGTTTATGGAGGCAGGGTTAGTCTGATATGATTTATTCTAACAGACAGAAGCAGAAATCTGTTATACTCTTTTAATTACTGTGTCTTTATAATATTATGGTAGACAGAATCCTAAGATGACCCCCAGTGATCTTTGCTCTTATATAATCACTTCCTCCTGAGTGTAGACAAAGCTACTGAGGAGATGTCACTCCTGTGATTGTGTTACAATTTATGGCAAAAACAAGTTAACAGATGTAATCGAGATCCCAAATCGGTCCAATTTAAGATAGACAGATTATCTGATGAGCTTGACCTAGTGAACGTGAGTTCCTTGGAGGGACTGAGGACTTCCTGGAGAGATGTGAAGTGCAGGAGGGTTTCCATGCAGGGCGATCCTCCTCTGCTGGCTGGAGGAAGCATGCAGTGGGAACATGGGAGGCCTCTAGGAGCAGCGAGAGGCCCCTGGCTGACAGCCAGCAAGAAAACAGAGATCTCAGTCCTACAGTCACAAGGAACTGAACTCAGCTGACAACCTGAGGAAACTTGAGAGGAAGTTCTTCCCCAGAACCTCCAGAAAGAAACCCAGCCTAATTTCAGCCTGTGAGGCCCTGAGAAGAAGACCCAGAGAATCCAGGCCTGAACTTCTGATCTGTGGACACTGCAAGAAAATAAATCATTCTTATTTTACGCCGCTAATGCTTGCAGTAATTTAGTATGCAGCAATAGAAAATTAATACAAATAAAATGGAGAAGGCTTTGGAGTGGGGACAAGAAGGAAACGGTGGGAGAGGGATGCCTGTATGCTGATATGGTTGATGCCTGTATGGTTGAATTGGGTCTACCGTTCCTCATCTAATTAGCTATGGTCTATTAAGGTGCATAGCTACACACAAATATTGGTACTACGTTCAATTCAGAGGAATAAGATATTGCATTCTTGACAGTAGACAAGAACACCCTGAATTTGGGGTCACTGTATCATAAGTCATGTTATCAGGTCCCTCTAGGAAGGCTTAGAGGAAGATTTCCAGGATACACTTGTGACAACATTGAAGGCTTCTTTTTTCCCCAAAGGGACCCGATCTCCCCTCAGTCGAGAAGCTCCAAGTCTCTGAACTGGATGCCAGGTTATAAATTCCCCCTATACTGACTCCATCAGGCTTCTGTCCTCAGAACTAGAGTTTATCAGTAAAAGATAGACTCATGGGAGTCTAGGCATTTATTCTCTTATTTTATATAAATCAGTTAATGTGCAGGAACAAAACAGACTTTGAAGAAAGACACTCACAGTTGCCACAGGAAAACACCTTCAACATCCTCATGAGTCATCATGGGTGTTCTGTTGGGAGGACTTGATAGGAGGCTTTCCTCCTCACGGGCTAGTGCAGATCCAGGGGAAATGTCATCAAGTCCTCCATTCGGAGGGTAGCAGCTGAGGCTGCTGATTCGTTAGGCCTCCTGCAGCTGGAGATGCAAGTAGTGCATTTTCATGGCCACCGCAGGGCCCTCAGTTTAGCATTCTTCAGAGCCAGCATCCAACAAGCCACAGAAGCTCTGAGTATTTCCCTTTCTTCAGTCACCCACATAAATGGCTTCAGGGCCTTCTGGGGAAGGCCTGAAGGAAGATTTACAGCATACACTTGTGGCAGCATTGAAGGCTTCACTCTTCCTCAAGGGATCCAATCTCCCCTCAGTCAAGAAGCTCCAGGTATCTGAACTGGATGCCAGGTCATAAATTCCCACTATGGTGACTCCATCAGGTCTCTGTCCTCAGAACTAGAGCTTTTCTAAGTGTAACGTAAGTTGATTTCTTAGTAGATGTCCCATCCATTACATTCCCAGACACCTCACAATGATTCGAATGATTAGTAACCACCACATATCCCTGCCTCTCAGGGAAATCCCTCCCGCCTTGTCTCTAGATGGCCAAGTCCCACGGCCTGTCCTCTACTCTTCCAGAACCCTGTTGTTCTCACTGACAGCAGGGAGGGCAAATCCATGCAGCAGCTCCCGCCATGACCTCCAGCCTGCAGAGGATGGGCGCCACAGGACTTTTAAACGCATGCCGCTGTTCCCCTCACCTGTGCATTTCTTAACGCCTTGGTGAGGAGAATGTCTCTGGATCTTCCTTGATGGGAGCTAAAGGAACAAAGGTAAATAATGCTATGGGACCCACTGAGAACTGGGGCTGTGGAAGAGTGGCCACTGAAGTAATAGACAGATGCAGCTATTGCCAGATACTCAGTGCCAGAGCAGGGAGGGACAGGGAAGAAATACGGACCTCACCTTCCTCTCACTTCCAGGATCCATCGGGGGCCCTCCATTGCTAAACCTAACTAGAAGTGTGCACGCACGGGAGCCAGGGATGCATTCTAGGAGGGACGAGCCCCGAGTGGCATGAGACAGGATGGAAATGAGTGGACAGTGGATCTGTGGGAAGAAGGAGGGGATGTTATGGGAAAACAAAAGGAGAATACTAGCTAAGAACGCTAGGTGACATTAATATTCCGAAGTCTGTGCTCATATTCAGCAAAGAAAGTTCAGCATAAAGCACTAAATAAGGAGTCAAGATATTGTACTTCCAACTGTTGTTCCAACAGCTGTATTATGAAGGGCCACTTTATTTCATGCCTTTCTAATTTGACCTAAAGTGCCAGGTGGCACTGGGGCTGGCACAGCCTTGCTCAATTATGTGTTGCAGAGTACACAGAGACTGCCAGGCTGAGGGAAGATGCAAGAGAATAGAAGAGATGCTCTCAGGGAACAAGAGACCACATGGCCCCAGAGTCAGGGGCAGCATCAGCCACTGTCAGCTGCTCATTTTCCCAGACAGAGCCCACAAGCCTCAGCCATGCTTTGCTTCTGCAAGACGCTTCTTCACCTTTTCAATAAACCTGCCTGAATTTAAGCTGACAGGGTTTATTTCTCCTTCATCATAAATGAAATTCTTCACCACAACAATCTCCAATGAATTTTGGGCACAGCAGGCAGGCCCATTTCTGCTTCTGTTCCACTATCTCTCCTGTAGGTTGAAAAGGAGGAGGTACTGAATTACCTCCAAATGTTCCTCTGGCTCTGATATTCTGTTATTCTGGTTCCTTTTTGGCTACTTTGTTTTTGGTAGCGTGTATCCTAAGGCGTCCAGTTGAACAACTTTTGTCTACTGTGTCCAGGCATTCCTGGTGGTATTTCAGATAAGACTCTCTTGGGTTGCTGAACTCACAACCACTGAACCAATTCTATGACCATCTGTTTCATGGCCACATGTTTGCTCATTTTATATGTACATAAAGGGAGGGGACAGACAGCAAACTTGCGTGTTACAAATTGTATCATCTTAAAAAGGAAACAAGGCAACACTTTGCAATAAAACCTTAAGATGCATGAAATTTGAGCCTAATGCAATAAAGGATGCCCATAAAATTCTTATCTAAAGAATGTTTCGAAAATTGTTGTACAAGGACATCATCATTTAAAGTGATATGAAGAAACCTTCTCAGCTAAGCATATGGGCTAGATTAGAGAGAAAAATAAAGGACCCATCTCTGCCCTGGAAAAACTGCTGGTAGCATCTTTCAAAAAGCTCTCTGTGTTTGAGTACGCACCTTGATCCATAGGCTCACATTTGATCCCAACTGGCAGCTGCTTCTTGGCATTAACATTGGATTCCCAACTAGTAAATCTTACCAAGATCTGACTTTCTGCAGATATAATATTATTTTGTTTGACCATCCTTATCTTCAAGGGCTACCAAGAAGGAACCAAGAATTTATTTACCTCCCCAAGGGAAAAGGTTTTACCAATGAGACCCTTTCTCACCATGACCCCAGGACCCCATATGCCCTGTTCACTTGAGTGCCCTGTGTGGCCTGATAGAAGCTCATGCTGGTCACAGGATTCCTTATATGACTAGCCTCCTTCCTGAATCCCAATTTCATGGTGGTGGTCATGACAGGTGTCCTGTATCCCATGCTCATGTCCCTGAAGTCACCAGCCTATCTCCAGTTAGAAAAAATTACATGTATATAGAGAGGCCTCTTTGGAAGGAGCAAAAGCTTTCTCACCTTCGTACACTAATGGTTGGAAGGTACAACAGCATATGCACTTTGGGAAAAAATATCTGGCATATTCTTACAGAAACAAACAACTACCTATTCTATGACTCAGTAATTCCTAAGCATTTATCCAAGAGAAACTAAAACCTATGTCCAGAAAATGACTTATACAAGAATGTTCATAGCAGTTTTATTCATAATACAAAAAACTGGAAACATTCAAGTATCTGTCAATACAAGAATGGATCAATAAACTGTGATACACTCATTCCATGGAATGGCTAAAGGAACAAACTGGTGACACACAGAACAACATGGATGAATCTCAAAAACATTTGGAGTGCGATAGAAGCCATACCCAAAAAAGTGTGAGAAAAAAAGATAAATAATAATGGTTCCAAGAAATGCACAGCAGACAGCCCAGAGGCAAAGACCCACAGGACGGCGGGCCGGTCCCAGGCTGTCGATCCTAATTAAGAAACTTCTGCTGGATTTTGCCCAGCTCCATTTCCAAACTATTTTGGGTCAGTGACTTCTTTATCCCTTCCATGTTGCCTCATTTTGAACTAGAATCACTGTAAGTGTTATTCTATGTCTGTCACATCATTCCACAGTAGGGGCAGATAAGCTGTTTAGAATGGCTAAAATTCAAAAAGGTGAACACACCAAATGCTGTCAAAGATGAGGAGCAACCAGAACTTTCCATCGCTAGTGGAAATCAAAAGGGTACAGTCACTTTGGAAAACTTAAGTTCACTCAAAATCCTGCACAGAAGTACTTACAGCAATTTTATTCATCATCGCCAAAACTTGGAAGTGCCCAAGATGTCTTTCACCAAGCGAAAGAATAAACAAACTGTTGTAGCCATACAAGGAAATCTGATTCACTGATTTTAAAAAACAAGTTATCAAGCCATGAAAAGACATGAAGGAACTTAAAGTACATAATGCTAGAAAGAAGCCAGTCTGGAAACCCACATACTGTACCACTCCAACTCTAGGACATTCTTGGAAAGTCAAAAAGATAGAAGTAGTAAAATGGTGAGTGGTTGTCAGGGGTGGAGGAGAGGAGGACGCGTGAAATGGTGAAGCACAGGGAATTTTCAGCAGTGAAACTCTTTCGCATGATGCTGTATTGGGGATTTAGGACATTATGTAATTGCCAAAACCCATAATCTGTGAAACTCAAAGAATGACCTCTAATGTAAACTATGGACTTTAGTTGATAATGACGTATCAACAGTGGTTCATCAATTGTAATGAATGGACCACACTAATACAACATACTAGTAGGGAAAATTGTGTGCTGGAGGACAGGGGAGCCTAGGAGAACTCTCTGTATTATCCACTCAAGTTTTCTGTAAACCTAGAACTGTTCTAAAAAATAATGTCTATTAACTGTTTTTTTAATTAGGATGCAGCAGCCCCATATCAAGGTTTTGGTGGCATCCTGTAATTGTGTGGTTAGTACTTGGCATTGAAGTGCACCAACCTGGAGTCAGAGCAGTTGGAGATTTCAAGGCCTGTGCCATTTACCTCTAACCCTGGGGTGCCCCTGGAATACAGATAGCAGATCGGTTAAGGAGAAGCAGCCTCAGCAATCTAGACAGTGCAGGTTTCTGGTGAGGACAGGTAAAAACCATCTGGGTGGGCAGAACTTGGTGAAGACCAGAAACCACTGAGACTCAGCAGCTGCCGCAGTGGCACCCACAAATCAAAGGAGGGGGCTGGGAAGAGCTAAGGGCTACTGGATGAGCTCTCTGCCTGCAAGACAGAAGCAGATCCAGAGATTTTGGAAAATAATGTAGGTTTCAGTACAGTGTGATCTCTTCAAAAAAGTAGAGAGAATGAAAAGGAAAGAAAAAGAGAGAGCATGAGAGAGAAAGAAGAAGAAAAGAAGAAAGGAAGAAAGGAAAGAAGGGAGGGAAGGAGGGAAGGAGGAAGGAAGGGAGGGATGGAGGGCGGGTGGGAAGGAGGGAAAGAATAAAAAGAGAGAGAAAGAGAGTTGGAGGGAAGTAGGGAAGGAAGGAAGGAAGGAAGGAAATGAACAAATTTACATGAAGATGAGAACAGTGGGGAAACTTACACCACCAATATTTTCCATTAACAGGAACACGCTAAGTAGTTATTAGAGAAAGACACGCTACTGTAAAACAATATACTGTTTCCATGGGGTACAACAACCCCTTCCTCCTCCTCTGAAACACATTCTATCTCTGGCTCACTGTTGCCAGAGACACTGAGTCTTGTCTTTGGATACGTTCTGGTGCCCACAAGAATGAGATGAGACAGTGGATCCCAGAACACCAGGCCACGAACTTCCCTGTTGCTCCTTGTCCACTCCAGAAGCTACCCAGCTGCAGTTGGGGACCTCAGCCCCTGGGTCTGATGTCATCCATTTGCCTTTCTCAATGGACTTCTCTCCTTGCACTGGCTCCTACTCCCCCAGGACCTGTGGGTGACCACATGAGAAGAACACAAACAGGCCATGCCCCTTTCTTTCTCCCCCTCTCAATGCCTGCAGTAGTGGGTTCCATGGGGTAGTGACCTGAGATTTACTCATTGTGGGGCCTCTAGCCCAGAGCAGGGCCTACTACCTCACAGTCACCCCATGAATGCTCAGTGAAAGAAGACGTCCACCACAAGGTCCTGGGGAACCAAGAATTCCACTGTGGCCCATAAATTCTAAGTCTACAGGATTCTGGAATGGGAGATGGGAAAGGCCTTCAAAAGTGGCCACTTTTAACCCATTATACTGGCAACTGAGCCATGTTTCCCCATCCTGGACACATCCAGAGGGCACTGCCTAAAACCAGACACATCTCCCCACCCAGGACAGTGTAGGAGCCTTAGCCTGGGGGATGCAGGTGGACAGGGAGGGGGTGAGCCACCAAAGCTGAAGAGCAGAAAGCAGGTGAAAGGGGACAGCAGGGTGGAAACAGAGAGAAATGGGGGCAGAGAATGGGGGGTGAGAGGGGAAGAGTGAGGAGAGGGATGCAGATCTAGCTAGTAAGGAAAAGTCCTGGAGAGAACACTGTCCTCTCCTGAAGTAAAATCACTTCTACCTGACCACGGCACTGCAGCTCATGGGCAGCACATGCTGTGGATATTTGTTCATTCATTTAACAAATATTTATTTAATATCTGTTGCATGCCAAGCAAGGCCCTGCAATGTTTAGGGACCTTGACATCTTCCCTTCACATCTGAGTCATAATACAAAGAGGACTCTCTGACCCCACTGAGCTGGCAATGCCTCGGGATTTTTACCTGTTGGATCTGGCAGCTCTTGATGTCAGCCCACACCATGTGAGGCTGCTCTTGGTGCACCCAATGGGGAAGTTTCTACATCAGGGCCTCGGAGAATCCACTGGAAGCCCTGGACAGTGGGAGTCAGCGGCATCCCCAGTGTGGAGGCCAAGAGCACACAGTGCTTAAGCTCCAGGCACCCTCAGGAGGACGGCAAGGGACAATTGGCTGGTGAGAGCCCGGGTCACCGGGAACCTTCGCCTGGGTCTAAACAGGATTTGCCTTCAGATTGCCTGTGAGATAAAAGAGAGAAATCAAGGTTAACGTTGAGATTTAGGGCTTCGGTAACTTGAAGGATGGAGCTGCCATTTACGGAGACTGGGAAGACCCAGGGAAGAGCAGATTGAAAGGTGGTGGGAACTAGAGGTGGTTGGGTTTCTGTCATATGTAATCAACAGTCCTGACCAGCCTGGGCAACATAGTAAGACCCCGTCTGGGAAAAGAAAAAAGGAAAAATAAGCTGAGCATGGTGGTGCACACTTGTAGTCTCAGCTACTTGGGAGGCTGAGGCAGGAGGATTCCTTGAGCCTTCAGTTAGCGGTTAGTGAGCTATGATGGCACCACTGTACTCCAGCCTGGGGGGAAAAAAATAAAGAGTCCTGACTAAATACTAGAGTAGCCAGGGAAGTTTTCACAAAGTAAGTAATATTTGAGGCAGATCTTAGTGAACAAGAATTCCATTATTTCTGTTAGGGAATTAAGAGAGTGTGGGTGTCGTTAGTTAATGCTTATTAAAGTAGCTTTGGAATCTCATCTACTGGTCTAGCTGGTCTATCTGTACACGTATATTGTATATGCTGTCTCTCTGAGCTTTCGCTAGGTTATGCTACGGTAACAAAAGCCCCAAAATCTTAGCAGCTACACATACGAAGGTTTATTTTTCATTGACATGTCCTTTTATGGCAGGTTGACTGTGACTCTACTCTATACAAGCTACTTTATTTGTTAGATGGTGAAAACTGTGATACTCGGAGGTTGTTGAATATGGTATTAGTATGTTCATTCATTCATTCATTTAAGAAATATTTATTCAATATCTGTTTCATGCCAGGCAAGGTCAAGTACTGAGAATACACTGGTGAATCAAAGAGACAAAATCTCTAATTGCCAGGAGCTTATATTGAAAATCAGATTAAACACATACAAAATCATCATAATAACAACAATGAATACTATATTCATAAATAATAGCTGTAAGAGATTTTAGTACATCTTTTAAATTAGAAAAATATAAAAATTATTAAAACTAAAATGGCCAGGTGTGATGGCTCATGCCTGTGATCCCAACACTTTGGGATGCCAAGGTGGGAGGATCATTTCAGCCCAGGAGTTTGAAACCAGTCTGGGCACTACAGGAAAACCCTGTCTACAAAAAGGAGAAAATTAGCCGGGCACAGTGGTGCATGCCTGTAGACCCAGCTACTAAGGAGGCTGAGGTGGGAGGAGTGCTTGAGCCTGAGAGATCAAGGCTGCAGAGAGCCATGATCATACCACTGCACTCCAGCCTGGGCGACAGAGCGAGACACTGTCTCAAGAAAAAAAAAAAAATTATTTGATGTAGTCCTAAAACTATTATGTAGAATACTATTGTTTATATCACAGCACGTGAGCCCCTTAAATGGCTTAACACTTATTTAGGTATGATCCATAAAGCTTTTCTGGTAATTAAGTATACTTAAGAACAATTAAGTATAAAAGAGTTACTGCCTTGACAGGAAGATTGTAAAAATTTTAAAAAGACAAATAAATAAAAGAGTAAAAACTGTAGCTCTGTGAGGCTCAAATAACATCTAATTCAAGTCACAATGAACATCTAGCAATCATTCTGAACACCATATAATTCACTTAATACGTTTTGCCTGAACACCCAACACATCTGAATTACCAACACCCATATGTAGCCAAGAAACTGGCAATCATTTATAAATTATCACCTATGACTCCATCTGCTCTACGCACTTATTTTTTAAATTTTATTCATTTATTTATTATTTTTATTTGTTGTAGAGATGGGATCTCACTATGTTACCCAAGTTGGTCCAGAAACAGAAACAGACCCACACTAATTTCATAAATCAGATGACCATACAGTCATTCGATTTATGAAAAAAAGTGCCACATGGTGCGGAAGGAAAAGGATGGTCTTTTCAATAAATGGTGCTGGATCAAGCAGACACATCCATGTAGTAAAAAGTGAATCATAGCCAGGTGGGGTGGCTCACACCTGTAATTCCAGCACTCTGGGAGGCTGAAGCGGGCAGATTACTTGAGCCCAGGAGTTCGAGACCAACCTGGGAAACATGTTGAATCCCCATCTCTACAAAAAATATGAAAATTAGCCAGGCATGGTGGCACATGCCTATAGTCGCAGCTACTCAGGAGGCTGAGGTGGGAGGATCACTTGAGCCAGGAGATGGAGGTTGAGTGAGCTGAGATCCTGCCACCACACTCTAGCCTGGGCAATAATAGACTGAGGCCCTGTCTGAAAAAAAAAAAAAGCAAAAACTAAAATAAAATCGTTATAAGGTTAACACAGAAAAATGTGTTCATACTCTTAGGTTAGGCATTGATTTCTTAAACAGGACACAAAAAACAGTAACCATAAAGGAAAAGATTAATAAAGTATAATTTCATTAAAATGAAGAATCTCAGGCTGGGTGCAGTGGCTCATGCCTGTAATCCCAACCCTTTGGGAGGCCGAGGCAGGTGTATCACTTGAGCCTAGGAATTCCAGACCAGCCTATGCAACGTGGCAAAACCCATCTCTACTAAAAATACAGAAAACAGCTGAGTGTGGTGGTACTCCCCTGTAGGTCCCAGCTACTTGGGGGCTGAGGCAGGGGGATCACCTGAGCCTTGTGAGGTCAAGGTTGCAGTGAGCTGTGATTGTGCCACTGCACTCCAGCCTGGGCGATGGAGTGAGATCCTGTCTCAAAAAGAAAAAAAAAAAAAGAGAATCTCCCTTCATGAAAAAACACCATAAAAGAGTGAAAACGCAAGCTACAGATTGAAAAAAGGGAAATGCAATACATATAAATCCTAGAAAGGAGGCATATCCAGAATAAAGTATTACAAATCAACAGGAAAACAAGCATATCAATGAAAACTGGATAAAAAGATTTAACAGGCACGTCACAAAAGAGGACATATAAATGGCAATAAAAGATACTCAATCTCAATGAAACCACACTGATATATTACTGCACCCCTACTAGAATGGCAAAATAATTTTTAACTGACAGGTATCAGCGAAGATGTGGGGTAACCAGCATATCCCTGCTAAATGGTACAACTACTTTGGGAAAATGTTCAACAATATGTAATACTAAAGTTTTATCATTCATATACCTCTAAAACCAACAATGCCACCCCTACAAATATACCCCAGACTAGTAATGTTCAATTTCTTGATCTGTGGTGGTTCACTTGGTAAAAATTCATTACTTTTTTTTTTTTTTTTTTGAGACAGGGTCTCACTCTGCCATCCAGGTCGGAGTGCACTGCCATGATCACGGCTCACTGCAATCTCAACCTCCCGGGCTCTGGTGATCCTCCCAACTCAGCCTACCGGGTAGCTGGGACTACAGGCACACGCCACCACACACAGCTAACTTCTGTATTTTTAGTAGAGAAAGGGTTTTGCCACATTGCCCAGGCTGGTCTGGAAATCCTGGGCTCAAGTGATCTACCCACCTTGGCGTCCCAAAGTGCTGGGATTACAGGTGTGATCACTGCGCCCGGGCCACCTGCACATGTAAAATTGTGAACTTCTGTATACTTCAGTAACTTTTCCAAGATTTCTTTGACGCAAAGTTCTCAGAAATCTTAAAGCTAGCATTTCAGAATAGAAAAAGTAGCTTCTGGTTCACTAGTGAAATTTTACCAATAGAATTTAAAAACAAAAAGCTACTAACGCATATCAGCTCAGAACACTACCAGCAGATCTTTTCTTTAACTTCCTGAAGCACTGGGATTCATTCTTTTGGCAAAGAAAGGATGAACAACACTGTAACCCAAAGAAAAGATACCACTGCCAGAAAAGACTTCTTTTCGAAAGCAGCTCTAAGCAAAAGATAGGAGGAAAACAAGGAAGCCAGGCCAAACGTCTTGGTTAACTCTCCGCTGAAAGGACGCCACATGAGATGATCTAAGAAGCCAGCCAGCCAGCCAGACGCAGGGAAATCACAGCAACTCTTTGGAGTGCAAACAGCAACCCCACAATCCAATCTACCCGAAATCCTGCGGTTCATTTGAGGCTTGCCCCGCTAGTCAGGAGGTGATTCAGTGATGGCTACAAATGCTGCTCATGTGCATCCTGGAGCTGGCACACCTGGCTTGCCCATCACCAGCCTGGAGACACCGCCAGGAGCAGAAGCCCGGAGGCCAGTAAAGACCCCAACTTTGCAAGTCAGGGGCGCGAGCGCGCTCGCCTCTCAGGTCCGCAGAGGGAACGGATTTCTGGCCTGGAGGGTGGGGTGCGGGGTCAGTGTCCTCTACAGGATATAGGAGGACGTGCCCCCGAAGCTGCTCCGTCCCTCCACCCCCTGGGATGCCACAGAACACCCGCCAGCGAGTTTCTTCCCCAGCGCCCACGAGAGTTGGGCTGCGGGCGGCAGCGGCAGGCGAAGAATCCAGCGCGGGGAACTCAGGCCCCGGCGGTGCACGACCCCCCACAGCCCCCACCCGCCCCCGCGCTCGCGCAACAAAACTTGCCACGGCCGCGCCTCGACCCAGCTGTGCGCCCGCGGGTCCCGGATTCACCGCCCGCCCAGCCTGGCGCGGCGCCCTCACCTCAGAAACGCTGGGTGGACTTCGCGTAACTTCCCATTCACAGGGCAGCCGGCAGCCGCGCCGCCGCGCCTCGGCCCAGCTCCTGGCGCCGCAGATCGCCCGTCCCGCGTTCCCAAAAGCACCGCGCTCGCTCAGAAGCTCGGGCAGCCTCGCGACCCTCACCTACGCCTCCCAGTACCGCCGCTGTCTCAACCGCCACCCAGCCCCTCGCCTGCGCCTGCGCCTGCAGCCCACTGGCTCCTCAGGATCCCGATGGGCGTGTCAGGAT\",\r\n                16888922 - 1);\r\n\r\n            //coding region between 34..528\r\n            var codingRegion = new CodingRegion(16890438, 16918516, 1007, 4651, 3645);\r\n\r\n            var regions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   28, 16888922, 16890681, 4408, 6167),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 27, 16890682, 16891301, 4407, 4408),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   27, 16891302, 16891413, 4296, 4407),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 26, 16891414, 16892127, 4295, 4296),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   26, 16892128, 16892302, 4121, 4295),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 25, 16892303, 16893674, 3844, 4121),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   25, 16893675, 16893846, 3673, 3844),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 24, 16893847, 16894473, 3672, 3673),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   24, 16894474, 16894525, 3621, 3672),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 23, 16894526, 16895567, 3620, 3621),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   23, 16895568, 16895731, 3457, 3620),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 22, 16895732, 16899636, 3456, 3457),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   22, 16899637, 16899688, 3405, 3456),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 21, 16899689, 16900981, 3404, 3405),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   21, 16900982, 16901187, 3199, 3404),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 20, 16901188, 16901651, 3198, 3199),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   20, 16901652, 16901724, 3126, 3198),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 19, 16901725, 16902761, 3125, 3126),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   19, 16902762, 16902976, 2911, 3125),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 18, 16902977, 16903811, 2910, 2911),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   18, 16903812, 16903914, 2808, 2910),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 17, 16903915, 16905687, 2807, 2808),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   17, 16905688, 16905897, 2598, 2807),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 16, 16905898, 16907239, 2597, 2598),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   16, 16907240, 16907451, 2386, 2597),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 15, 16907452, 16907914, 2385, 2386),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   15, 16907915, 16907987, 2313, 2385),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 14, 16907988, 16909038, 2312, 2313),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   14, 16909039, 16909253, 2098, 2312),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 13, 16909254, 16910088, 2097, 2098),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   13, 16910089, 16910191, 1995, 2097),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 12, 16910192, 16911983, 1994, 1995),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   12, 16911984, 16912193, 1785, 1994),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 11, 16912194, 16913544, 1784, 1785),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   11, 16913545, 16913756, 1573, 1784),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 10, 16913757, 16914219, 1572, 1573),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   10, 16914220, 16914292, 1500, 1572),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 9,  16914293, 16915343, 1499, 1500),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   9,  16915344, 16915558, 1285, 1499),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 8,  16915559, 16916393, 1284, 1285),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   8,  16916394, 16916496, 1182, 1284),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 7,  16916497, 16918341, 1181, 1182),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   7,  16918342, 16918551, 972,  1181),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 6,  16918552, 16918653, 971,  972),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   6,  16918654, 16918808, 817,  971),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 5,  16918809, 16919935, 816,  817),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   5,  16919936, 16920062, 690,  816),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 4,  16920063, 16921086, 689,  690),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   4,  16921087, 16921156, 620,  689),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 3,  16921157, 16921425, 619,  620),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   3,  16921426, 16921504, 541,  619),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 2,  16921505, 16935002, 540,  541),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   2,  16935003, 16935274, 269,  540),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 1,  16935275, 16939832, 268,  269),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   1,  16939833, 16940100, 1,    268)\r\n            };\r\n\r\n            var rnaEdits = new IRnaEdit[]\r\n            {\r\n                new RnaEdit(5892, 5891, \"AAAAAAAAAAAAAAAA\"),\r\n                new RnaEdit(5799, 5799, \"T\"),\r\n                new RnaEdit(5675, 5675, \"G\"),\r\n                new RnaEdit(5655, 5655, \"G\"),\r\n                new RnaEdit(5390, 5390, \"G\"),\r\n                new RnaEdit(5174, 5174, \"G\"),\r\n                new RnaEdit(5150, 5150, \"C\"),\r\n                new RnaEdit(4993, 4993, \"A\"),\r\n                new RnaEdit(4828, 4828, \"G\"),\r\n                new RnaEdit(4683, 4683, \"G\"),\r\n                new RnaEdit(4637, 4637, \"G\"),\r\n                new RnaEdit(4530, 4530, \"A\"),\r\n                new RnaEdit(3845, 3844,\r\n                    \"GAAATTGAAAAGTACCAAGAAGTGGAAGAAGACCAAGACCCATCATGCCCCAGGCTCAGCAGGGAGCTGCTGGATGAGAAAGAGCCTGAAGTCTTGCAGGACTCCCTGGATAGATGTTATTCGACTCCTTCAGGTTATCTTGAACTGCCTGACTTAGGCCAGCCCTACAGAAGTGCTGTTTACTCATTGGAGGAACAGTACCTTGGCTTGGCTCTTGACGTGGACAGAATTAAAAAGGACCAGGAAGAGGAAGAAGACCAAGGCCCACCATGCCCC\"),\r\n                new RnaEdit(3769, 3769, \"C\"),\r\n                new RnaEdit(3554, 3554, \"C\"),\r\n                new RnaEdit(3207, 3207, \"A\"),\r\n                new RnaEdit(3140, 3140, \"C\"),\r\n                new RnaEdit(3136, 3136, \"T\"),\r\n                new RnaEdit(3107, 3107, \"T\"),\r\n                new RnaEdit(3103, 3103, \"A\"),\r\n                new RnaEdit(2993, 2993, \"C\"),\r\n                new RnaEdit(2944, 2944, \"G\"),\r\n                new RnaEdit(2840, 2840, \"G\"),\r\n                new RnaEdit(2810, 2810, \"T\"),\r\n                new RnaEdit(2706, 2707, \"GC\"),\r\n                new RnaEdit(2695, 2695, \"T\"),\r\n                new RnaEdit(2692, 2692, \"G\"),\r\n                new RnaEdit(2509, 2509, \"A\"),\r\n                new RnaEdit(2299, 2299, \"A\"),\r\n                new RnaEdit(2294, 2294, \"G\"),\r\n                new RnaEdit(2290, 2290, \"C\"),\r\n                new RnaEdit(2222, 2222, \"G\"),\r\n                new RnaEdit(2009, 2009, \"G\"),\r\n                new RnaEdit(1964, 1964, \"G\"),\r\n                new RnaEdit(1893, 1894, \"GC\"),\r\n                new RnaEdit(1882, 1882, \"T\"),\r\n                new RnaEdit(1879, 1879, \"G\"),\r\n                new RnaEdit(1696, 1696, \"A\"),\r\n                new RnaEdit(1652, 1652, \"G\"),\r\n                new RnaEdit(1486, 1486, \"A\"),\r\n                new RnaEdit(1481, 1481, \"G\"),\r\n                new RnaEdit(1477, 1477, \"C\"),\r\n                new RnaEdit(1409, 1409, \"G\"),\r\n                new RnaEdit(1405, 1405, \"A\"),\r\n                new RnaEdit(1318, 1318, \"G\"),\r\n                new RnaEdit(1021, 1021, \"C\"),\r\n                new RnaEdit(932,  932,  \"A\")\r\n            };\r\n\r\n            const byte startExonPhase  = 0;\r\n            const bool onReverseStrand = true;\r\n            var codingSequence = new CodingSequence(genomicSeq, codingRegion, regions, onReverseStrand, startExonPhase,\r\n                rnaEdits);\r\n\r\n            var expectedCodingSeq =\r\n                \"ATGGTGGTATCAGCCGGCCCTTGGTCCAGCGAGAAGGCAGAGACGAACATTTTAGAAATCAACGAGAAATTGCGCCCCCAGCTGGCAGAGAACAAACAGCAGTTCAGAAACCTCAAAGAGAAATGTTTTGTAACTCAACTGGCCGGCTTCCTGGCCAACCGACAGAAGAAATACAAGTATGAAGAGTGTAAAGACCTCATAAAATTTATGCTGAGGAATGAGCGACAGTTCAAGGAGGAGAAGCTTGCAGAGCAGCTCAAGCAAGCTGAGGAGCTCAGGCAATATAAAGTCCTGGTTCACTCTCAGGAACGGGAGCTGACCCAGTTAAGGGAGAAGTTACGGGAAGGGAGAGATGCCTCCCGCTCATTGAATCAGCATCTCCAGGCCCTCCTCACTCCAGATGAGCCAGACAAGTCCCAGGGGCAGGACCTCCAAGAACAGCTGGCTGAGGGGTGTAGACTGGCACAGCACCTTGTCCAAAAGCTCAGCCCAGAAAATGACGAAGATGAGGATGAAGATGTTCAAGTTGAGGAGGCTGAGAAAGTACTGGAATCATCTGCCCCCAGGGAGGTGCAGAAGGCTGAAGAAAGCAAAGTCCCTGAGGACTCACTGGAGGAATGTGCCATCACTTGTTCAAATAGCCACGGCCCTTGTGACTCCAACCAGCCTCACAAGAACATCAACATCACATTTGAGGAAGACAAAGTCAACTCAACTCTGGTTGTAGACAGAGAATCCTCTCATGATGAATGTCAGGATGCTGTAAACATTCTCCCAGTCCCTGGCCCCACCTCTTCTGCCACAAACGTCAGCATGGTGGTATCAGCCGGCCCTTTGTCCAGCGAGAAGGCAGAGATGAACATTCTAGAAATGAATGAGAAATTGCGCCCCCAGCTGGCAGAGAAGAAACAGCAGTTCAGAAACCTCAAAGAGAAATGTTTTGTAACTCAACTGGCCGGCTTCCTGGCCAACCAGCAGAACAAATACAAATATGAAGAGTGCGAAGACCTCATAAAATCTATGCTGAGGAATGAGCGACAGTTCAAGGAGGAGAAGCTTGCAGAGCAGCTCAAGCAAGCTGAGGAGCTCAGGCAATATAAAGTCCTGGTTCACTCTCAGGAACGAGAGCTGACCCAGTTAAGGGAGAAGTTACGGGAAGGGAGAGATGCCTCCCGCTCATTGAATCAGCATCTCCAGGCCCTCCTCACTCCGGATGAGCCAGACAAGTCCCAGGGGCAGGACCTCCAAGAACAGCTGGCTGAGGGGTGTAGACTGGCACAGCACCTTGTCCAAAAGCTCAGCCCAGAAAATGACGAAGATGAGGATGAAGATGTTCAAGTTGAGGAGGCTGAGAAAGTACTGGAATCATCTGCCCCCAGGGAGGTGCAGAAGGCTGAAGAAAGCAAAGTCCCTGAGGACTCACTGGAGGAATGTGCCATCACTTGTTCAAATAGCCACGGCCCTTGTGACTCCAACCAGCCTCACAAGAACATCAACATCACATTTGAGGAAGACAAAGTCAACTCAGCTCTGGTTGTAGACAGAGAATCCTCTCATGATGAATGTCAGGATGCTGTAAACATTCTCCCAGTCCCTGGCCCCACCTCTTCTGCCACAAACGTCAGCATGGTGGTATCAGCCGGCCCTTTGTCCAGCGAGAAGGCAGAGATGAACATTCTAGAAATGAATGAGAAATTGCGCCCCCAGCTGGCAGAGAAGAAACAGCAGTTCAGAAACCTCAAAGAGAAATGTTTTGTAACTCAACTGGCCTGCTTCCTGGCCAACCAGCAGAACAAATACAAATATGAAGAGTGCAAAGACCTCATAAAATCTGTGCTGAGGAATGAGCGACAGTTCAAGGAGGAGAAGCTTGCAGAGCAGCTCAAGCAAGCTGAGGAGCTCAGGCAATATAAAGTCCTGGTTCACTCTCAGGAACGGGAGCTGACCCAGTTAAGGGAGAAGTTACGGGAAGGGAGAGATGCCTCCCGCTCATTGAATCAGCATCTCCAGGCCCTCCTCACTCCGGATGAGCCAGACAAGTCCCAGGGGCAGGACCTCCAAGAACAGCTGGCTGAGGGGTGTAGACTGGCACAGCAACTTTTCCAAAAGCTCAGCCCAGAAAATGACAATGATCACGATGAAGATGTTCAAGTTGAGGTGGCTGAGAAAGTGCAGAAATCGTCTGCCCCCAGGGAGATGCAGAAGGCTGAAGAAAAGGAAGTCCCTGAGGACTCACTGGAGGAATGTGCCATCACTTGTTCAAATAGCCATGGCCCTTATGACTCCAACCAGCCACATAGGAAAACCAAAATCACATTTGAGGAAGACAAAGTCGACTCAACTCTCATTGGCTCATCCTCTCATGTTGAATGGGAGGATGCTGTACACATTATCCCAGAAAATGAAAGTGATGATGAGGAAGAGGAAGAAAAAGGGCCAGTGTCTCCCAGGAATCTGCAGGAGTCTGAAGAGGAGGAAGTCCCCCAGGAGTCCTGGGATGAAGGTTATTCGACTCTCTCAATTCCTCCTGAAATGTTGGCCTCGTACCAGTCTTACAGCGGCACATTTCACTCATTAGAGGAACAGCAAGTCTGCATGGCTGTTGACATAGGCGGACATCGGTGGGATCAAGTGAAAAAGGAGGACCAAGAGGCAACAGGTCCCAGGCTCAGCAGGGAGCTGCTGGATGAGAAAGGGCCTGAAGTCTTGCAGGACTCACTGGATAGATGTTATTCAACTCCTTCAGGTTATCTTGAACTGACCGACTCATGCCAGCCCTACAGAAGTGCCTTTTACATATTGGAGCAACAGCGTGTTGGCTGGGCTCTTGACATGGATGAAATTGAAAAGTACCAAGAAGTGGAAGAAGACCAAGACCCATCATGCCCCAGGCTCAGCAGGGAGCTGCTGGATGAGAAAGAGCCTGAAGTCTTGCAGGACTCCCTGGATAGATGTTATTCGACTCCTTCAGGTTATCTTGAACTGCCTGACTTAGGCCAGCCCTACAGAAGTGCTGTTTACTCATTGGAGGAACAGTACCTTGGCTTGGCTCTTGACGTGGACAGAATTAAAAAGGACCAGGAAGAGGAAGAAGACCAAGGCCCACCATGCCCCAGGCTCAGCAGGGAGCTGCTGGAGGCAGTAGAGCCTGAAGTCTTGCAGGACTCACTGGATAGATGTTATTCAACTCCTTCCAGTTGTCTTGAACAGCCTGACTCCTGCCTGCCCTATGGAAGTTCCTTTTATGCATTGGAGGAAAAACATGTTGGCTTTTCTCTTGACGTGGGAGAAATTGAAAAGAAGGGGAAGGGGAAGAAAAGAAGGGGAAGAAGATCAACGAAGAAAAGAAGGAGAAGGGGAAGAAAAGAAGGGGAAGAAGATCAAAACCCACCATGCCCCAGGCTCAGCGGCATGCTGATGGAAGTGGAAGAGCCTGAAGTCTTGCAGGACTCACTGGATAGATGTTATTCGACTCCGTCAATGTACTTTGAACTACCTGACTCATTCCAGCACTACAGAAGTGTGTTTTACTCATTTGAGGAACAGCACATCAGCTTCGCCCTTGACGTGGACAATAGGTTTCTTACTTTGATGGGAACAAGTCTCCACCTGGTCTTCCAGATGGGAGTCATATTCCCACAGTAA\";\r\n\r\n            Assert.Equal(expectedCodingSeq, codingSequence.GetCodingSequence());\r\n        }\r\n\r\n        [Fact]\r\n        public void RnaEdits_big_test()\r\n        {\r\n            //NM_005960.1, chrom: chr7:100547052-100611619\r\n            var genomicSeq = new SimpleSequence(\r\n                \"GCGCTGACGTCTGTCTGTCCGGGTGCAGGGAGAAGGGAGGAAGAGGGGAGAGGTGGGGCGGTGCAAAGGTGAGGCTGTGCTCAGCCCTGACGCTCAGCAAAACCGATAACCAGCACTTTCATTACGTGCACGCCCCAGGGCCACGTCCCTGCCGCTGTCTTGGTCCTGAAGCCTGTTCTGCCCCAGCCCCCTGCCCGCTGGGCCCATGCAGCTGTTGGGGCTCCTCGGCCTCCTCTGGATGCTCAAGGCCTCCCCGTGGGCCACAGGTAAGGGGGAGAGGCGGAAGGGGGTTGGAGAAAAGCTCCTGATGTGATGTTCCAGGAAAGGGGAGGGAAAAGTGGCTGTAAGGCCTGGGGAGGGGGGATAAGAAGGCACCGCTTGGGGCTCTGGGTGCAGGGAGAACCGAGGCACGGCCTGACTGGGGGAGGGGGCGATGAGGAGAGGTTTCTTCCAGAGCTCCAGGTGCAGGGAAAACCCCGAGGTTGGGAAAGAGTGAGGGAGCTGGGTCTCTGCCACTCTCCACCAAGCACTGAGCAGGTTGCAGCGGCTGAGCCCCAATCTGTATCTGCAGCTGGAGGGTAGAGGGTGGGATTTACGTCTTCCCAGAGCAGTGCCCTTCCTGTCTTGACTCCTTCTGTCACCTGCCTCATGCCCCCAGCTTGAGTGTCCCCTTCACACTGGCCTCTCCCTCCCTGACAGCCCTCTAACTTCTACCCCTGGTCTCGGTCCTCTGGTTTCAGCCTCTCTGCCTTTTGTCCCCCGGCGGCTCCTCCCCAGCTCTGCCGTCACTCTCTTACCCCGGCCAGGGCCCATGTGTCTGGGTACAGCTGTTGGTACCAGGGCCGGGACAGGGAGCTCCTGATGTCCACTTTGCTTCCAAGGGTGCGTCTGAGACTACCCGTCGTGGGGTCCCAGGCTTCTCTCTTCTGCCCTGCAGGAACACCTCGCAATTCCTCTATACTTCTCTTTTCTCTGTACTTCAGTGTCTGCTTCTGATCCCCGATCCCAGGCCACCCAGCCTACAGGCCCATGAGTCCCCTTCTCAGTCACCTCCAGGGCCACATCCTGGAGCCAAGGGCTGTAGCCTGGGGATTCTCATAATCCCTGACCCCACTTCCCTGGCACCCACGAGCTAGGTTGAGACGTGACACCCCAGCTCTCAGCCACAAGATGGGCTGTGCCCGAGGTGAGGGGTAGCAGATCGGGTACTTCCCACTTCCCGTCTGCTGTGGCTGCCTGTCTTCCTTGTCCCTGACACCCCCGACAGCCGGATCTCTGATCCTAACTCTGACAAATTGTGAAATGGGTTGAAATCCACATGCTGGGGTTCATGCTTGTAAACTAATGAATCCCACGGCCAAAAGGGAATAGTATAGAAAAATATGTCTATTTGTGTGATGAACACTCACTGCTAAGCCTTAAGGTCTCCAGAACTCATCACGCCTGACTGCTGAGGTAGCTCCTTCTGGAAGTTTCCTTTTCTATGCTGTCTCTCTGCCTCTTCACCTAGTCCTCACTCCATGCCCTTTGAAGTCATTAGTGTGTGAGCTTAGCCTGTTTCTCTTTGGTGTTCTCTGGGCAGTTTTTTGTTTTTGTTTTTGTTTTTGTTTTTGTTTTTGATGGAGTCTTGCTCTGTCACCCAGGCTGGAGTGCAGTGGCACCATCTCAGCTCACTGCAGCCTCTGCTTCCTGGGTTCAAGCGATTCTCCTGCCTCAGCTACAGGTACACGCCACTACGCCTGGCTAATTTTTGTATTTTGTTTTAGTAGAGAAAGGGTTTCACCATGTTGGTCAGACTGGTCTCGAACTCCTGACCTCAAGTGATCTCCTGCCTTGGCCTTTCAAAGTGCTGGGATTACAGGCATGAGCCACCGTGCCTGGCCCTCTTTGTTCAGCTTTCCCTGTCTCCTGGTTTTTGTGATGCGCCCCCTGCCAGGACATGGCTGGGTTCTCTCTTTTTCTCTTTGAAAGCGGAGTCAGCCCAGACAGCAGCAGGGTGCCGGGAGAAGCAGTGGTGGGAGCAGAAGGTTAATGGGGGGATGGGAGCACTCCAGGCAGTAGCAGGGGGAGGAGAGGAAGGGGCAGAGGGAGGAGGAGCCTGGGTGTGACCAGAGGAGGGAAGGGAGGGGAGGGAGGCTCTGCCCAGCTCGGCTATATCAGGACAGGAGGACCTGCCATGACAAGGCCAAGGCCCGGTGAGGAATGAGGGCTCCCATGCCCCACTGCTCCCCTAGGAGCAGACAGGCAGTCGTCTCCAGCACCACAAAGCACCCAGCTCCAAGCTGCCTCTGATGCAGGAGTCAGCTGTAATATGCCCTGCCCTCTGTGATGCTGCCTGGAAAATGGGTGAGTGAGTAGCTTACATGAGTGATGTAACAAAATGACCCACGGATTTACCAGTGGATTCCTCTGCTCTGCCGCCAATGCAGGAACTTTATCCACGGCCACATCCATCTCTCAAGTGCCTTTCCCCAGAGCAGAAGCAGCCAGCGCTGTGCTCAGCAATTCTCCACACTCCAGAGACCTGGCTGGGTGGCCACTTGGTGTCCCCCAGCTCGCCTCTCCTGCTCCTGGCCACAGGGAAAATGCACCTATGACACTCACTACCTCCCCCCATGACACACTCATCTCTGAAACATTGCTCAACTCTCCAGTCAGTTCCAACACCTCAACCACCCCGACGTCCAAGTTTGCCTTCAAGGTTGAAACCACTCCACCCACCGTGTTGGTCTATTCAGCCACCACTGAGTGCGTGTATCCAACGAGCTTTATAATCACCATCTCCCACCCCACCTCCATCTGTGTGACCACGACGCAGGTGGCCTTCACCAGCTCTTACACCTCGACTCCCGTGACACAGAAGCCAGTGACCACCGTCACCAGTACTTACTCTATGACCACTACTGAGAAAGGAACGTCAGCCATGACATCTTCTCCCTCTACCACCACTGCAAGGGAAACTCCCATAGTGACAGTGACACCCTCCTCTGTGTCAGCCACAGACACAACCTTCCACACTACAATCTCATCTACAACTAGAACCACAGAAAGGACTCCCCTGCCCACTGGAAGCATCCATACAACCACGTCCCCAACCCCAGTATTTACTACTCTCAAAACAGCAGTGACTTCCACTTCCCCCATCACTTCTTCAATCACTTCCACAAATACAGTGACTTCTATGACAACGACCGCCTCCCAGCCCACAGCCACTAATACATTGTCATCACCCACTAGGACCATTTTATCTTCCACACCTGTCCTGAGCACAGAAACAATCACCAGTGGTATCACAAACACCACCCCCCTATCCACCTTGGTGACCACACTCCCCACTACCATCAGCAGGTCTACACCTACATCTGAGACCACCTACCCTACTTCTCCCACCAGCACTGTCACAGACTCCACTACCAAAATCGCCTACTCCACAAGTATGACAGGTACATTGTCCACAGAGACTTCTCTCCCACCCACCTCTTCCTCTCTCCCAACCACAGAAACAGCCACGACTCCTATGACAAACTTGGTAACCACCACCACTGAGATCTCCTCCCACAGTACTCCCAGCTTCTCTTCATCAACCATCTACTCCACAGTCACCTCACACAGTACTCCCAGATTCACTTCTTCAATCACCACTACCGAGACCCCCTCACACAGTACTCCCAGATTCACTTCTTCATTCACCAATACCAAGACCACCTCACACAGATCTCCCAGCTTCACTTCTTTGATCACCACCACGGAGACCACCTCACACAGTACTCCCAGCTTCACTTCTTCGATCACCACCACCGAGACCACCTCACACAGTGCTCGCAGCTTCACTTCTTCGATCACCACCACCGAGACCACCTCACACAATACTCGGAGCTTCACTTCTTCGATCACCACCACCGAGACCAACTCTCACAGTACTACCAGCTTCACTTCTTCGATCACCACCACCGAGACCACCTCACACAGTACTCCCAGCTTCAGTTCTTCAATCACCACCACTGAGACCCCCTTACACAGTACTCCTGGCCTCACTTCGTGGGTCACCACCACCAAGACCACCTCACACATTACTCCTGGCCTCACTTCTTCAATCACCACCACTGAGACTACCTCACACAGTACTCCCGGCTTCACTTCTTCAATCACCACCACTGAGACCACCTCAGAGAGTACTCCCAGCCTCAGTTCTTCAACCATCTACTCCACAGTCAGCACATCCACAACTGCCATCACCTCACATTTTACTACCTCAGAGACTGCGGTGACTCCCACACCTGTAACCCCATCTTCTCTGAGTACAGACATCCCGACCACAAGCCTACGAACTCTCACCCCTTCGTCTGTGGGCACCAGCACTTCATTGACTACAACCACAGACTTTCCCTCTATACCCACTGATATCAGTACCTTACCAACTCGAACACACATCATTTCATCTTCTCCCTCCATCCAAAGTACAGAAACCTCATCCCTTGTGGGCACCACCTCTCCCACCATGTCCACTGTGAGAATGACCCTCAGAATTACTGAGAACACCCCAATCAGTTCCTTTAGCACAAGTATTGTTGTTATACCTGAAACCCCAACACAGACCCCTCCTGTACTGACGTCAGCCACTGGGACCCAAACATCTCCTGCACCTACTACTGTCACCTTTGGAAGTACGGATTCCTCCACGTCCACTCTTCATACTCTTACTCCATCAACAGCCTTGAGCACGATCGTGTCAACATCACAGGTTCCTATTCCTAGCACACATTCCTCCACCCTTCAAACAACTCCTTCTACTCCCTCATTGCAAACTTCACTCACATCTACAAGTGAGTTCACTACAGAATCTTTCACTAGGGGAAGTACGTCTACAAATGCAATCTTGACTTCTTTTAGTACCATCATCTGGTCCTCAACACCCACTATTATCATGTCCTCTTCTCCATCTTCTGCCAGCATAACTCCAGTGTTCTCCACTACCATTCATTCTGTTCCTTCTTCACCATACATTTTCAGTACAGAAAATGTGGGCTCCGCTTCTATCACAGGCTTTCCTAGTCTCTCTTCCTCTGCAACTACCAGCACTTCTTCAACCAGCTCCTCTCTGACCACAGCTCTCACTGAAATAACCCCCTTTTCTTATATTTCCCTTCCCTCCACCACACCCTGTCCAGGAACTATAACAATTACCATAGTCCCTGCCTCTCCCACTGATCCATGTGTTGAAATGGATCCCAGCACTGAAGCTACTTCTCCTCCCACCACCCCATTAACAGTCTTTCCCTTTACTACCGAAATGGTCACCTGTCCTACCTCCATCAGTATCCAAACTACTCTTACTACATATATGGACACTTCTTCCATGATGCCAGAAAGTGAGTCCAGCATCTCACCCAATGCTTCCAGTTCCACTGGCACTGGGACTGTACCCACAAACACAGTTTTCACAAGTACTCGACTGCCCACCAGTGAGACCTGGCTGAGCAACAGTTCTGTGATCCCCCTACCTCTTCCTGGCGTCTCTACCATCCCGCTCACCATGAAACCAAGCAGTAGCCTCCCGACCATCCTGAGGACTTCAAGCAAGTCAACACACCCCTCCCCACCCACCACTAGGACTTCAGAGACACCAGTGGCCACTACCCAGACTCCTACCACCCTTACATCACGCAGGACAACTCGCATCACTTCTCAGATGACCACACAGTCCACGTTGACCACCACTGCAGGTTGGACCTTCTGCCTCTCTGTTCCCCTCCTTCCTCCCCTGCAAAATTCCTGTGTCACTGAGGTCAGGCTTTATCCTGAGCTTCCCTTTCTTTCTGTGTTTTCCAGGCACCTGTGACAATGGTGGCACCTGGGAACAGGGACAGTGTGCTTGCCTTCCGGGGTTTTCTGGGGACCGCTGTCAGCTCCAGACCAGATGCCAGAATGGGGGTCAGTGGGATGGCCTCAAATGCCAGTGCCCCAGCACCTTCTATGGTTCCAGTTGTGAGTTTGCTGTGGAACAGGTGGATCTAGGTGAGTTGCCAGAGCTATGCCTTCTGCACTTCCTCCCACAGGGTGTCACTGACTCTCCCCAGACTTATCCCTCTGTGGGGCCTGGAGGCACCCATGCCTTTTTGCCCGGTCCTTCCCTCCCTGCCATCTCTCCCATGCCCTCCGCTGCCCTGTGTCATGCTCCTCTCCGTCCTCACCCTTAGGAGGTGGCTGGGACTACCCTCCCTCCTGGGCCCATCTCCTGACTTGGGCTGCTTGGAGCTGTATCAGTTTCCAACTGCTGCCGGGCCAACAAACACAAATCTGGCTGCTGGAACAACACGACATTATCATGTTAGAATTCTGTAGATTAGAAGTCTGATGTGGGTGTCACTGGGCTGAAATCAAGGCGTCACCAGGGCTGTGTTGTCTTTCAGCGGCTCCAGGGAAGAATCCATTTTTTTGCCCTTTGCAGCTTCTGGAGCCTCCCACAGCAAGGCTGCATCTCTCTGTGTCTTTCTCCCATAGCCTCATCTCCCTCTAATGAACTCTGGCCTCCTCAATTGCTTCTCCCACTGTTAAGGACCCTTGTGATAACTTTGCCTCCTCCCCAAATAGTCTATGTTAATTTTCTCAAGATCAGCTGATTACGCCGGGCGGGGTGGCTCACACCTGTAATCCCAGCAGTTTGGGAGGCTGAGGGGGTAGGATCACCTGAGGTCAGGAGTTGGAGACTAGCCTGGCCAACATGGTGAAACTGTCTCTACTATTAGTTGGGCATGGTGGCAGATGCCTATAATCCTCACTATTCAGGAGGCTGAGGCAGGAAAATCGCTTGAATCCAGGAGGTGGATATTGCAGTAAAGCGAGATTTCGCCACTGCACACTAGCGTAGATGACAGAGGGAGAGTGAGACTCTGTCTTAAAAGAAAAAAAAAAATCAGCTGATTGTCTTATAATCCCTGCACTTTGGAAGGCCGAGGAGGGAGTATCGCTTGAGGCGAGGAGTTCAGGACCAGCCTGGGCAACACAGCGAGACCCTCATCTCCACAAAAAATTTTAAAAACTTACCTGGGCATGGTGGCTCATGCCTGTGGTCCCAGTTATTTGGAAGGCTGAGGTGGGAGAATCACTTGAGCCTGGGAGTTCAAGGCTGCAGTGAGCTATGATCCCCCTATTGCACTTCAGCCTGGATGACAGAATGAGACCCTATCTCAACAATAAAAAAAAGTTAGGCTGATTAGCAATCTAATTCAATCTGCACCCTTGATCCTCCCTTGCCATGTAGTATAGCATAGTCACAGTTCTGGGGATTAGGACATGGACATCTTCCCACTATGGGGGCAGCCAGGAGGGACCACAGGCTGACCGCTATCTTTCTGCCTGCTTTCACTCATCTCCACACAATTCCTTCCTTCCTCTCGCTCTCTTCTTTCTTTTCTTCTTTTCTTTTGTCTCTTTCTCTTTTCTTTTTTTCTTTCTCTTTTTCTTTCTTTCTTTCTCTTTCTTTTCTTTCCTTTCTTTTCTTCTTTCCCTCATCTTCACACAATGCTTTCTCCCTTTCTTTCTTTCCTTCTTTCTTTCTCTTTCTTTCTTTCTCTCTCTCTCTCTCTCTCTCTTTCTTTCTTTCTTTCTTTCTTTTTCTCTCTCTCTCTCTATCTTTCTTTTTCATTTTCTTGAGAGACAGTCTCACTCTGTGGCCCAGGCTGGAGTGCAGTGATGCAATCTCGGCTCACTACAACCTCCGCCTCCTGAGTTCAAGTGATTCTCCTGCCTCAGCCTCCTGAGGCATAATGCATCCTGGAATTCCTACATCTGTTCCTGCCTCTGGGCATCAGTCCTCAGGGATCTTGGAGGGGAGCAGCAGGAGGAGCCTGTGGGTTGGGGTGGTGGTGTTGGTGGCTTCAGACAAAAGCAGACAGAGAAGTGACTGGGGACATGCATGCTCTGTGTAGATGTAGTGGAGACCGAGGTGGGCATGGAAGTGTCTGTGGATCAGCAGTTCTCGCCGGACCTCAATGACAACACTTCCCAGGCCTACAGGGATTTCAACAAGACCTTCTGGAATCAGGTAAAGGGCAAAGAGAGGGGATTTTTTTTTTTTTTGAGGTGTAGTCTCGCACTCTCACCCTGGCTGGAGTGCAATGGTGCGATCTTGGGTCACTGCAACCTCCGCCTCCCGGGTTCACATGATTCTCTTGCCTCAGCCTCCCAAGTAGCTGGGATTACAGGTGCACACCATGACACCTGGCTAATGTTTTGTATTTTTAGTAGAGACGGGTTTTCACTATGTTGGCCAGACTGGTCTCGAACTACTGACTTTGTGATCCACCTGCCTCGTCCTCCCAAAGTGTTGGGATTACAGGCGTGAGCCACGGCACCCGGCCGGGGAGGGGAATTGAAGGGTCTTCCCTGGAGCTGGGGTTGGGCGTCTGGGTCCCCTCAGGTCTGCAGGTTCGGACGTGAGCCCAGGGATCCTTGGTGTTTCAGATGCAGAAGATTTTTGCAGACATGCAGGGCTTCACCTTCAAGGGTGTGGAGATCCTGTCCCTGAGGTAGGAGACCCATCTGGGGATGCGGAGGCGGTGTTGGGTGGGGGAAATGTGCGCACACAAAAAACCCATTCCTTTCTTTTGTAATCATCAGATTTTATAAAGAGGGGTGGAGGGGGTACATAAGGAATCACTCCCTGGGTATTTTTTCGGATCGTTTTCTGGGGCCATTTATCTGGAGGAGGGGTGGCACCTCTCTTCTTCAGCACACTGGAAGGAGAGAAGTTGCAGGGACATGTGGGAAGGTGGTGCCTGGATTGATGACTTTGTCCCCCTCTGGCTGGCCCCTGCTCTACTGAGTGGGTCAGCATTAGAAAGAGAGAGAGAGAAAGAGAGAGAGGGAGAGAACGCACGTCTAGGGGCGCCCGGTGGATGATGGCTTGATGCAACAAGAAGAGAACGTCAGGCCAGATGTGGTGGCTCACACTTGCAATCCTAGCACTTTGGGTAGGCTAGGCGGGTGGATCNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNGAATTCCTACATCTGTTCCTGCCTCTGGGCATCAGTCCTCAGGGATCTTGGAGGGGAGCAGCAGGAGGAGCCTGTGGGTGGTGGTGGTGGTGTCGGTGGCTTCAGACAAAAGCAGACAGAGAAGTGACTGGGGACATACATGCTCTGTGCAGATGTAGTGGAGACCGAGGTGGGCATGGAAGTGTCTGTGGATCAGCAGTTCTCGCCGGACCTCAATGACAACACTTCCCAGGCCTACAGGGATTTCAACAAGACCTTCTGGAATCAGGTAAAGGGCAAAGAGAGGGGATTTTTTTTTTTTTTTTTTTTTGAGATGGAGTCTCGCACTGTCATCCTGGCTGGAGTGCAATGGCGTGATCTTGGCTCACTGCAACCTCCGCTTCCCAGGTTCACATGATTCTCTTGCCTCAGCCTCCCGAGCAGCTGAGATTACAGGTGCACACCACCACACCTGGCTAATGTTTTGTATTTTTAGTAGAGACGGGGTTTCACTATGTTGGCCAGACTGGTCTCGAACTACTGACTTTGTGATCCGCCTGCCTCGTCCTCCCAAAGTGTTGGGATTACAGGCGTGAGCCACGGCACCCGGCCGGGGAGGGGAATTGAAGGGTCTTCCCTGGAGCTGGGGTTGGGCATCTGGGTCCCCTCAGGTCTGCAGGTTCGGACGTGAGCCCAGGGATCCTTGGTGTTTCAGATGCAGAAGATTTTTGCAGACATGCAGGGCTTCACCTTCAAGGGTGTGGAGATCCTGTCCCTGAGGTAGGAGACCCATCTGGGGATGTGGAGGCAGTGTTGGGTGGGGGAAATGTGCGCACACAAAAAACCCATTCCTTTCTTTTGTAATCATCAGATTTTATAAAGAGAGGGGTGGAGGAGGTACATAAGGAATCGCTCCCTGGGTATTTTTTTGGATCGTTTTCTGGGGCCATTTATCTGGAGGAGGGGTGGCACCTCCCTTCTTCAGCACACTGGAAAGAGAGAAGTTGCAGGGACATGTGGGAAGGTGGTGCCTGGATTGGTGACTTCGTCCCCCTCTGGCTGGCCCCTGCTCTACTGAGTGGGTCAGCATTAGAGAGAGAGAGAGAGAAAGAGAGAGAGGGAGAGAACGCGCGTCCAGGGGCGCCCGGTGGATGATGGCTTGATGCAACGAGAACGTCAGGCCAGATGTGGTGGCTCACACTTGCAATCCTAGCACTTTGGCAAGGTTAGGTGGGTGGATCACCTGAAACCAGTTCAAGACCAGCCTGGGCAACATAGTGAGAACCCATCCCTACAACAATAAAAATAGTAATAATAATAATAATAATAATAAAATGATTATCCAGGCGTGGTAGTGCACACCTGTAGTCCCAGATACTTGGGAGGCTGAGGAGAAAGGATCACTTTAGCCCAGGAGTTGGAGGCTGCAGTGAGCTACAATGATACCACTGCACTCCAGCCTGGGTGACAGCAAGACTTTGTCTCTATAAAACACACAGAGAGAGGAAGTCAATCATGTCAGTCATTCCTTGTCCTGCCTTCCCAGGCAGACCAAGTCAGGAATGCTGGCAGCCCCTTCTGAAAAGGATGCACGTGGCATCCCAACTCATGACCTCTGCCCTCTTTCCCCCTTCTGGTGCACTTTGGGTTGCTTCTGGAGGTGCCCCTCCAAGGACCCATATGTTCCTGGCTGGGGCACTCTCTAAGGCTGTGGACCCCTCAGGAATGGCAGCATCGTGGTGGACTACCTGGTCCTGCTGGAGATGCCCTTCAGCCCCCAGCTGGAGAGCGAGTATGAGCAGGTGAAGACCACGCTGAAGGAGGGGCTCCAGAACGCCAGCCAGGATGCGAACAGCTGCCAGGACTCCCAGAGTGAGCCCGGGCTGGAGGGAGGGGCCAGGGCCTGAGGTGTCACCCCAGCCCACTCCAGCTCAGCCAGGGGGCCACTGGACTCAGGTGCCAGCCCTGTGGTACCTCTGGCAGGTTGGGAGAACGGGAATAAGTCTACACACAATGCCATCAAGAGTGGGGCTAGGGAGGGTCTCCCCAGGACCTGGGTACTGGGGAAGAGACCCCCTGATCGTCAGGCTCAGCATTTCCCGATGGCTGAAGACCTCGGATTATTCAGGGGGGATAAGGGAGAGAACAGGAGTCTTCCCCTGTGGCCCCTCCACACTCCCCCAGACGGAGAGAGCCCTCACTGCCCTCCCTGTGCCTATCCTGCTTCCTGGCCCTAACCCCTTGACCTGCCCCGCCCATTCCATCTGTGCCTGTGTTTCCGCAGCCCTGTGTTTTAAGCCTGACTCCATCAAGGTGAACAACAACAGCAAGACAGAGCTGACCCCGGAAGGTGAGGGTGGGGTAAAGGGCTGAGTGGTCTCCCATGGCCATGACCCCTGCCACCAGGGACATTTGCCCATTGAAGCCTGTGGGCAGGGAGAGACCTTTGCGGGAGGCAAGTCATGTGGCCTAGGGAGGCTCTTCCTGGCGTTGGTTAGTGGCTTCCACCTGAGGACAGCAGGGGCCACGAGGAGAGGGTGAGGGTGCTGGGGGTGGCCTCCCCTCATCGAATCCCAGGGTCTACCCCACAGCATCCCACCTCGGAAATGGAATCCTCCTCGCGCATATTCAGAGGCACCATTATCAGGCCCCTGAATAGAATGGATGAGGTCCTTGTCTCTGTGCATACCCCTCCCCAACCCCCAGCCATCTGCCGCCGCGCCGCTCCCACGGGCTATGAAGAGTTCTACTTCCCTCTGGTGGAGGCCACCCGGCTCCGCTGTGTCACCAAATGCACGTCGGGCGTGGACAACGCCATCGACTGTCACCAGGGCCAGTGCGTTCTAGAGACGAGCGGTCCCGCGTGTCGGTAAGGCCCCGCTCACCATCAGCATCAGTCGAGCCCCGCCCACTCATTCTAGGATGAAGCCTAGCCTCACGCGCCGCCCCGGCTCTGCCCCCAGGCCCTACAGTGGAGCCTCGTCCCCAGAGTCCCGCTCCAAGCCCATCCCCGTTGCCCTACAGTGGAGCCCTGCCCTGGAGCTCTGCTCCGTCGCCCTAAAGTGTAGCCCCGCCTCCTTGATGGGGTTGAGTCCAATCCCCTGGTTCTGGGATAGACCCCGCCCACTCATTCTAGGGTGGGGCCCCGCCCCTTCGTTCTAGGGCTGAACCTTGCCCCCTTCTTCTGGGGTGGAGCCCCGCCCCCTTGTTCTAGGGTGGATCCCCGCCCCCTCCTTTTAGGGTGAAGCCCTGCCCACTTGATCTAAAGTGGAATCCCGCCCCCTCACCTAGGGTAGAGCCCCGCCCCCTCGTTCTAGGGTGGAGACCCGTCCGCTTGTTCTACGGTGGATTCCGGCCGCTTGTCTAGGGTGGAACCCCCCAGCTTGCCCTAGGGTGGAACCCCCCCGCTGCCCTAGGCTGGAGCCCCGCCCCCTCACCCGCCCCCGCGGGGCCCAGGTGCACGCGTGGACCCCGAGCCCGGAGGTGAAGAGGGTCTGACCCTGCGATCTCCCGCAGCTGCTACTCCACCGACACGCACTGGTTCTCTGGCCCGCGCTGCGAGGTGGCCGTCCACTGGAGGGCGCTGGTCGGGGGCCTGACGGCCGGCGCCGCGCTGCTGGTGCTGCTGCTGCTGGCGCTGGGCGTCCGGGCGGTGCGCTCCGGATGGTGGGGCGGCCAGCGCCGAGGCCGGTGAGCGTGCGGGGGGCGGGGCCGGGGGGCGAGGGCAGCCAAGGGGTCCCAGGCGGGCCGGCTCTGTCTGACCGCGCGGCGGCCCCACCTAGGTCCTGGGACCAGGACAGGAAATGGTTCGAGACCTGGGATGAGGAAGTCGTGGGCACTTTTTCAAACTGGGGTTTCGAGGACGACGGAACAGGTGAGTCCTGCCTCCTGGGGAAGCAGGCAGAGGCTTTCCTGGGCACCACTGCGAGGACAGACGCCCTCCTTGCCTTCCTCGCATTTACTCTGTCCCCCTCTCCCTTCCGTCCCCTCCCTCTCCCCTTCTATTTCTCCGCTCCTCTCTCTCTCTAGACAAGGATACAAATTTCCATGTGGCCTTGGAGAACGTGGACACCACTATGAAGGTGAGGGGCTAAAGAGGGGGACCCCAAGGAACTCTCCCAGCCTCCATTCCAGAATCCCTCCCCGACCCCCACCAGGGCAGGGAGGGGGCTGGGCTCGGATCAGCAGTGACCTCCCTGTCAGCCCAAACCAGTGGCTCCGCGTTCCCGTCCCTCACTGTGACTCTGACAGGTGCACATCAAGAGACCCGAGATGACCTCGTCCTCAGTGTGAGCCCTGCGGGGCCCCTTCACCACCCCCTCCGCCCTGCCCCGGACACAAGGGTCTGCATTGCGTCCATTTCAAGAGGTGGCCCCAGGACGCGGGCAGCCCAGGCTCCTGCTGTTCTTGGGCAAGATGAGACTGTTCCCCCAAATCCCATCCTTCTCCTTCCAACTTGGCTGAAACCCACCTGGAGACGCAGTTCACGTCCAGGCTCTTCCACTGTGGAATCTTGGGCAAGTCAGTAACGAGCCTCAGTTTCCTCACCTGCAAAACGGGTACAGCATTCCTGTATGATAGCTCACGCCGTCGTTGTGAAAACCACATAGACTTGGTCAATTCTCGGTCCTACTCTGCCCTCCCGTCTCAGCCCTCGTGTTGCCATTGCCTCTCTCGGATCCTCCAATCCTCACGTCCTTCACCTGGTCTCTGGCCCTGGTTCTTATTTTCTCTCAATTCCCTACTGCCTGTTTCTTACTTTGAACCTGGAGGCAGCCTGCAGCCCCATCCCATCTCCTGCCCTCTCCTGATCTAACTCCCTGCTGCATCTCTTGCTCTCATTCCTTAGACGTCCTCCCCTTTTGACCCCGTTCCTTCATCCATCCTGCACCCCAGTCCCCCAGCCCTAAATCCTCCCTCCTCTCCTCACATCCTGGTCCCTAGCAAGGTATAGATAGCCTCTGTGTCTTAGGATACCCCGGGTGCTGTTCCCTCGGTCACCCTGTTGCCCAGTTCCCCGTTTCTCTTGCTCTCATTCCTTGTATCCTTTCCCCTTTTGAGCCCGTCCATTCATCGGTTCTGCCCCCGACTCCCCCTGACCTAAATACCCCAGCTCCTAATTCCCCCCTCACCCCGTTGCTCAATTCCCCGTTTCTCTTGCTCTCATTCCTTGTATCTTCTCCCCTTCTGAGCCTGTCCATTCATCGGTGGTTCTGCCCCTACTCCCCCAGCCCTAAATACCCCAGCTGCTGTTCCTCCCCATCACCCAGCCACCGGATTCTCCATTCACCCCTTTCTCTCACCCCTGGAGCCCCGTGGGTGGGGGCAGGGCATGAGTTCCCCAGTCCCCAAGGAAAGGCAGCCCCCTCAGTCTCCCTCCTCCTCATTCCCTTCCATCTCCCTCCCCTCTGCCTTTTAAACCCATCCCCTCCGATTCCCCTCCTCCCCCCTCTCTCCCTGGTGTCAACTCGATTCCTGCGGTAACTCTGAGCCCTGAAATCCTCAGTCTCCTTGGCGGGGAAGATTGGCTTTGGGAACAGGAAGTCGGCACATCTCCAGGTCTCCATGTGCACAATATAGAGTTTATTGTAAAAAGC\",\r\n                100547052 - 1);\r\n\r\n            var codingRegion = new CodingRegion(100547257, 100610315, 206, 10177, 9972);\r\n\r\n            var regions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   1,  100547052, 100547317, 1,     266),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 1,  100547318, 100549480, 266,   267),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   2,  100549481, 100550652, 267,   1438),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 2,  100550653, 100550655, 1438,  5957),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   3,  100550656, 100550784, 5957,  6085),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 3,  100550785, 100550787, 6085,  6698),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   4,  100550788, 100550814, 6698,  6724),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 4,  100550815, 100550817, 6724,  7115),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   5,  100550818, 100552774, 7115,  9071),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 5,  100552775, 100552880, 9071,  9072),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   6,  100552881, 100553066, 9072,  9257),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 6,  100553067, 100554979, 9257,  9258),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   7,  100554980, 100555095, 9258,  9373),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 7,  100555096, 100555514, 9373,  9374),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   8,  100555515, 100555579, 9374,  9438),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 8,  100555580, 100607745, 9438,  9439),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   9,  100607746, 100607894, 9439,  9587),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 9,  100607895, 100608306, 9587,  9588),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   10, 100608307, 100608372, 9588,  9653),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 10, 100608373, 100608728, 9653,  9654),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   11, 100608729, 100608891, 9654,  9816),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 11, 100608892, 100609538, 9816,  9817),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   12, 100609539, 100609712, 9817,  9990),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 12, 100609713, 100609804, 9990,  9991),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   13, 100609805, 100609896, 9991,  10082),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 13, 100609897, 100610051, 10082, 10083),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   14, 100610052, 100610104, 10083, 10135),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 14, 100610105, 100610273, 10135, 10136),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   15, 100610274, 100611004, 10136, 10866),\r\n                new TranscriptRegion(TranscriptRegionType.Gap,    15, 100611005, 100611005, 10866, 10867),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   15, 100611006, 100611075, 10867, 10936),\r\n                new TranscriptRegion(TranscriptRegionType.Gap,    15, 100611076, 100611076, 10936, 10937),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   15, 100611077, 100611169, 10937, 11029),\r\n                new TranscriptRegion(TranscriptRegionType.Gap,    15, 100611170, 100611173, 11029, 11030),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   15, 100611174, 100611176, 11030, 11032),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 15, 100611177, 100611299, 11032, 11033),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   16, 100611300, 100611307, 11033, 11040),\r\n                new TranscriptRegion(TranscriptRegionType.Gap,    16, 100611308, 100611311, 11040, 11041),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   16, 100611312, 100611428, 11041, 11157),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   16, 100611429, 100611517, 11170, 11258),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   16, 100611518, 100611619, 11260, 11361)\r\n            };\r\n\r\n            var rnaEdits = new IRnaEdit[]\r\n            {\r\n                new RnaEdit(865,  865,  \"G\"),\r\n                new RnaEdit(1214, 1214, \"A\"),\r\n                new RnaEdit(1439, 1438,\r\n                    \"AGCACATCCACAACTGCCATCTCCTCACTTCCCCCTACCTCAGGTACTATGGTGACTTCCACAACCATGACCCCATCTTCTCTGAGTACAGACATCCCTTTCACAACACCAACAACTATCACCCACCATTCTGTGGGCTCTACCGGTTTCCTGACTACAGCAACAGACCTCACATCAACATTCACGGTTTCCAGTTCCTCAGCAATGTCCACGAGTGTCATTCCATCTTCCCCCAGCATTCAGAATACAGAAACCTCATCCCTTGTCAGCATGACCTCTGCCACTACTCCCAATGTGAGACCAACTTTTGTAAGTACACTCAGCACTCCTACAAGTTCCCTCCTGACGACCTTCCCAGCAACATATTCATTTTCATCTTCCATGTCTGCCAGCAGTGCTGGGACCACTCACACAGAGAGTATCTCCTCACCTCCAGCCAGCACCAGTACACTCCACACAACAGCTGAATCCACCCTGGCACCCACTACCACCACCTCATTCACAACTTCCACAACTATGGAACCACCTTCAACCACTGCAGCAACTACAGGAACAGGTCAGACCACCTTCACCAGCTCTACAGCCACATTTCCTGAGACCACCACACCGACTCCTACAACTGACATGTCCACAGAATCTCTCACAACAGCCATGACTTCTCCTCCCATCACTTCATCAGTCACTTCCACAAATACAGTGACTTCTATGACAACTACGACCTCTCCTCCCACAACCACCAATTCTTTTACATCACTGACCAGTATGCCTCTGTCTTCTACACCTGTCCCAAGCACAGAAGTAGTCACCAGTGGCACCATAAACACAATCCCTCCATCTATCTTGGTGACCACACTCCCCACTCCAAATGCTTCATCTATGACTACATCTGAGACCACCTATCCTAATTCTCCGACTGGTCCTGGTACAAACTCCACGACGGAAATCACCTATCCCACCACTATGACAGAGACATCATCCACTGCCACCTCTCTTCCACCCACCTCTCCCTTGGTCTCAACTGCAAAAACAGCCAAAACTCCTACCACAAACTTGGTAACCACCACCACCAAGACCACCTCACATAGTACCACCAGCTTCACTTCTTCAACCGTCTACTCCACAGCCAGCACATACACAACTGCCATCACCTCAGTTCCCACTACGTTGGGTACCATGGTAACTTCTACATCCATGATCTCATCTACTGTGAGTACAGGTATCCCTACCTCACAACCAACAACCATCACTCCCTCATCCGTGGGCATCAGTGGTTCATTACCTATGATGACAGACCTCACCTCAGTGTACACAGTCTCTAACATGTCTGCAAGGCCAACAACTGTCATTCCCTCATCTCCCACTGTCCAGAATACAGAAATCTCAATCTCTGTTAGCATGACGTCTGCTACCACTCCCAGTGGAGGACCAACTTTCACAAGTACTGAGAACACTCCAACAAGGTCCCTCCTGACAAGCTTTCCAATGACACATTCATTCTCTTCTTCTATGTCTGAAAGTAGTGCTGGGACCACTCACACAGAGAGTATCTCCTCACCTCGAGGCACCACCAGTACACTCCACACAACAGTTGAATCCACCCCATCACCCACTACCACCACCTCATTTACCACATCCACAATGATGGAACCACCTTCATCCACTGTATCAACTACAGGCAGAGGTCAGACCACCTTTCCCAGCTCTACAGCCACATTCCCTGAGACCACTACACTGACTCCTACAACTGACATTTCTACAGTATCTCTCACAACAGCCATGACTTCTCCTCCCCCCGTCAGTTCTTCAATCACTCCCACCAATACAATGACTTCTATGAGAACTACAACCTATTGGCCCACAGCCACTAATACATTATCACCACTCACCAGTAGCATTTTATCTTCTACACCTGTCCCAAGCACAGAAATGATCACCAGTCATACCACAAACACCACCCCTCTATCCACCTTGGTGACTACACTCCTCACTACCATCACCAGATCTACACCTACATCTGAGACCACCTACCCTACTTCTCCCACCAGCATTGTCTCAGACTCCACGACTGAAATCACCTATTCCACAAGTATAACAGGTACATTGTCCACTGCCACTACTCTCCCACCCACCTCTTCCTCTCTCCCAACCACAGAAACAGCCACGATGACTCCTACCACAACCTTGATAACCACCACCCCTAATACCACCTCCCTTAGTACCCCCAGCTTCACTTCTTCAACCATCTACTCCACAGTCAGCACATCCACAACTGCCATCTCCTCAGCTTCCCCTACCTCAGGTACCATGGTAACTTCCACAACCATGACCCCATCTTCTCTGAGTACAGACACCCCTTCCACAACACCAACAACTATCACCTACCCTTCTGTGGGCTCTACCGGTTTCCTGACTACAGCAACAGACCTCACATCAACATTCACTGTTTCCAGTTCCTCAGCAATGTCCACAAGTGTCATTCCATCTTCCCCCAGCATCCAGAATACAGAAACCTCATCCCTTGTCAGCATGACCTCTGCCACCACTCCCAGTTTGAGACCAACTATCACAAGTACTGACAGCACTCTAACAAGTTCCCTCCTGACGACCTTCCCAAGTACATATTCATTTTCATCTTCCATGTCTGCCAGCAGTGCAGGGACCACTCACACAGAGACTATTTCCTCACTTCCAGCCAGCACCAATACAATCCACACAACAGCTGAATCCGCCCTGGCACCCACTACCACCACCTCATTCACCACATCCCCAACGATGGAACCACCTTCAACCACTGTAGCGACTACAGGCACAGGTCAGACCACCTTCCCCAGCTCTACAGCCACATTCCTTGAGACCACCACACTGACTCCTACAACTGACTTTTCTACAGAATCTCTCACAACAGCCATGACTTCTACTCCCCCCATCACTTCTTCAATCACTCCCACCGATACAATGACTTCTATGAGAACTACGACCTCTTGGCCCACAGCCACTAATACGTTATCACCACTCACCAGTAGCATTTTATCTTCTACACCTGTCCCAAGCACAGAGGTGACCACCAGTCATACCACAAACACCAATCCTGTATCCACGTTGGTGACTACACTCCCCATTACCATCACCAGGTCTACACTTACATCTGAGACCGCCTACCCTAGTTCTCCCACAAGCACTGTCACAGAGTCCACAACTGAAATCACCTATCCCACCACTATGACAGAGACATCATCTACTGCCACCTCTCTTCCACCCACCTCTTCCTTGGTCTCAACCGCAGAAACAGCCAAAACTCCTACCACAAACTTGGTAACCACCACCACCAAGACCACCTCACATAGTACCACCAGCTTCACTTCTTCAACCATCTACTCCACAGCCAGCACACCCACCACTGCCATCACCTCAGTTCCCACTACCTTGGGTACCATGGTGACTTCTACATCCATGATCCCATCTACTGTGAGTACAGGTATCCCTACCTCACAACCAACAACTATTACTCCCTCATCCGTGGGCATCAGTGGTTCATTACCTATGATGACAGACCTCACCTCAGTGTACACAGTCTCCAGCATGTCTGCAAGGCCAACAAGTGTCATTCCTTCATCTCCCACTGTCCAGAATACAGAAACCTCAATCTTTGTTAGCATGATGTCTGCTACCACTCCCAGTGGAGGACCAACTTTCACAAGTACTGAGAACACTCCAACAAGGTCCCTCCTGACAAGCTTTCCAGTGACACATTCATTTTCCTCTTCCATGTCTGCCAGCAGTGTAGGGACCACTCACACCCAGAGTATCTCCTCACCCCCAGCCATCACCAGTACACTCCACACAACAGCTGAATCCACCCCATCACCTACAACCACCATGTCATTCACAACATTTACAAAGATGGAAACACCTTCATCCACTGTAGCAACTACAGGCACAGGTCAGACTACATTCACCAGTTCAACAGCCACATCCCCTAAGACCACCACACTGACTCCTACCTCTGACATTTCCACAGGATCTTTCAAAACAGCCGTGAGTTCTACTCCCCCCATCACTTCTTCAATCACCTCCACATATACGGTGACTTCGATGACAACTACCACCCCTCTAGGGCCCACAGCCACTAATACGTTACCATCATTTACCAGTAGCGTTTCATCTTCTACGCCTGTCCCAAGTACAGAAGCGATCACCAGTGGTACCACAAACACCACCCCTCTATCTACATTGGTGACCACATTCTCCAATTCCGACACCAGTTCTACACCTACATCTGAGACCACCTACCCTACTTCTCTTACTAGTGCTCTCACAGATTCCACGACCAGAACCACCTATTCCACCAATATGACAGGTACATTGTCCACTGTGACCTCTCTTCGACCCACCTCTTCCTCTCTCCTCACCACAGTAACAGCCACAGTTCCAACAACAAACTTGGTAACCACGACCACCAAGATCACCTCACACAGTACTCCTAGCTTCACTTCTTCAATCGCAACCACCGAGACCCCC\"),\r\n                new RnaEdit(1520, 1520, \"A\"),\r\n                new RnaEdit(1549, 1549, \"C\"),\r\n                new RnaEdit(1568, 1567,\r\n                    \"TCGATCACCACCACCGAGACCACATCCCACAATACTCCCAGCCTCACTTCTTCAATCACCACCACCAAGACCACCTCACACAGTACTCCCAGCTACACTTCTTTGATCACCACAACCACCACCACCTCACACAGTACTCCCAGCTTCACTTCTTCCATCACCACCACTGAGACCACATCCCACAATACTCCCAGCTTGACTTCTTCGATCACAACCACCGAGACCACATCCCATAGTACTCCCAGCTTCACTTCTTCGATCACCACCGAGACCACATCCCACAGTACTCCCAGCTTCACTTCATTGATCACCATCACCGAGATCACCTCACACAGTACTCTCAGCTACACTACCTCAATCACCACCACCGAGACCCCCTCACACAGTACTCTCAGCTTCACTTCTTCAATCACCACCACTGAGACCACCTCACACAGTACTCCCAGCTTCACTTCCTCAATCACCACCTCTGAGATGCCCTCACACAGTACTCCCAGCTTCACTTCTTCGATCACCACCACTGAGAACGCCACACACAGTACTCCCAACTTCACTTCTTCAATCACCACCACCGAGACCACATCCCACAGTACTCCCAGCTTCACTTCTTTG\"),\r\n                new RnaEdit(1595, 1594,\r\n                    \"AGGTGGGGGACCACCGAGACCACATCCTACAGTACTCCCAGCTTCACTTCTTCAAATACCATCACTGAGACCACCTCACACAGTACTCCCAGCTACATTACCTCAATCACCACCACCGAGACCCCCTCAAGCAGTACTCCCAGCTTCAGTTCTTCGATCACCACCACTGAGACCACATCCCACAGTACTCCCGGCTTCACTTCTTCAATCACCACCACTGAGACTACATCCCACAGTACTCCCAGCTTCACTTCTTCGATCACCACCACTGAGACCACCTCACATGATACTCCCAGCTTCACTTCTTCAATCACCACCAGTGAGACCCCCTCACACAGTACTCCCAGCTCCACTTCTTTAATCACCACCACCAAGACCACCTCACACAGT\"),\r\n                new RnaEdit(1627, 1627, \"G\"),\r\n                new RnaEdit(1650, 1650, \"A\"),\r\n                new RnaEdit(1702, 1702, \"C\"),\r\n                new RnaEdit(1957, 1957, \"T\"),\r\n                new RnaEdit(2794, 2794, \"T\"),\r\n                new RnaEdit(3028, 3028, \"C\"),\r\n                new RnaEdit(3586, 3586, \"C\"),\r\n                new RnaEdit(4024, 4024, \"G\"),\r\n                new RnaEdit(4044, 4044, \"T\"),\r\n                new RnaEdit(4131, 4131, \"C\"),\r\n                new RnaEdit(4183, 4184, \"CT\"),\r\n                new RnaEdit(4231, 4231, \"G\"),\r\n                new RnaEdit(4273, 4273, \"G\"),\r\n                new RnaEdit(4289, 4289, \"A\"),\r\n                new RnaEdit(4580, 4580, \"T\"),\r\n                new RnaEdit(4956, 4956, \"T\"),\r\n                new RnaEdit(5344, 5344, \"A\"),\r\n                new RnaEdit(5347, 5347, \"\"),\r\n                new RnaEdit(5356, 5356, \"T\"),\r\n                new RnaEdit(5358, 5358, \"C\"),\r\n                new RnaEdit(5360, 5360, \"C\"),\r\n                new RnaEdit(5368, 5368, \"C\"),\r\n                new RnaEdit(5411, 5411, \"T\"),\r\n                new RnaEdit(5413, 5413, \"C\"),\r\n                new RnaEdit(5418, 5418, \"\"),\r\n                new RnaEdit(5421, 5421, \"C\"),\r\n                new RnaEdit(5480, 5480, \"G\"),\r\n                new RnaEdit(5483, 5484, \"GT\"),\r\n                new RnaEdit(5486, 5486, \"C\"),\r\n                new RnaEdit(5492, 5492, \"A\"),\r\n                new RnaEdit(5499, 5499, \"T\"),\r\n                new RnaEdit(5501, 5501, \"C\"),\r\n                new RnaEdit(5505, 5505, \"C\"),\r\n                new RnaEdit(5512, 5515, \"\"),\r\n                new RnaEdit(5527, 5530, \"\"),\r\n                new RnaEdit(5556, 5556, \"T\"),\r\n                new RnaEdit(5558, 5558, \"C\"),\r\n                new RnaEdit(5630, 5630, \"T\"),\r\n                new RnaEdit(5636, 5636, \"G\"),\r\n                new RnaEdit(5640, 5640, \"C\"),\r\n                new RnaEdit(5648, 5647, \"TTCTTGCCTCCC\"),\r\n                new RnaEdit(5691, 5691, \"T\"),\r\n                new RnaEdit(5697, 5697, \"T\"),\r\n                new RnaEdit(5708, 5708, \"C\"),\r\n                new RnaEdit(5711, 5711, \"G\"),\r\n                new RnaEdit(5721, 5721, \"A\"),\r\n                new RnaEdit(5737, 5736, \"T\"),\r\n                new RnaEdit(5750, 5750, \"G\"),\r\n                new RnaEdit(5777, 5777, \"G\"),\r\n                new RnaEdit(5805, 5805, \"T\"),\r\n                new RnaEdit(5819, 5819, \"C\")\r\n            };\r\n\r\n            const byte startExonPhase  = 0;\r\n            const bool onReverseStrand = false;\r\n            var codingSequence = new CodingSequence(genomicSeq, codingRegion, regions, onReverseStrand, startExonPhase,\r\n                rnaEdits);\r\n\r\n            var expectedCodingSeq =\r\n                \"ATGCAGCTGTTGGGGCTCCTCGGCCTCCTCTGGATGCTCAAGGCCTCCCCGTGGGCCACAGGAACTTTATCCACGGCCACATCCATCTCTCAAGTGCCTTTCCCCAGAGCAGAAGCAGCCAGCGCTGTGCTCAGCAATTCTCCACACTCCAGAGACCTGGCTGGGTGGCCACTTGGTGTCCCCCAGCTCGCCTCTCCTGCTCCTGGCCACAGGGAAAATGCACCTATGACACTCACTACCTCCCCCCATGACACACTCATCTCTGAAACATTGCTCAACTCTCCAGTCAGTTCCAACACCTCAACCACCCCGACGTCCAAGTTTGCCTTCAAGGTTGAAACCACTCCACCCACCGTGTTGGTCTATTCAGCCACCACTGAGTGCGTGTATCCAACGAGCTTTATAATCACCATCTCCCACCCCACCTCCATCTGTGTGACCACGACGCAGGTGGCCTTCACCAGCTCTTACACCTCGACTCCCGTGACACAGAAGCCAGTGACCACCGTCACCAGTACTTACTCTATGACCACTACTGAGAAAGGAACGTCAGCCATGACATCTTCTCCCTCTACCACCACTGCAAGGGAAACTCCCATAGTGACAGTGACACCCTCCTCTGTGTCAGCCACAGACACAACCTTCCACACTACAATCTCGTCTACAACTAGAACCACAGAAAGGACTCCCCTGCCCACTGGAAGCATCCATACAACCACGTCCCCAACCCCAGTATTTACTACTCTCAAAACAGCAGTGACTTCCACTTCCCCCATCACTTCTTCAATCACTTCCACAAATACAGTGACTTCTATGACAACGACCGCCTCCCAGCCCACAGCCACTAATACATTGTCATCACCCACTAGGACCATTTTATCTTCCACACCTGTCCTGAGCACAGAAACAATCACCAGTGGTATCACAAACACCACCCCCCTATCCACCTTGGTGACCACACTCCCCACTACCATCAGCAGGTCTACACCTACATCTGAGACCACCTACACTACTTCTCCCACCAGCACTGTCACAGACTCCACTACCAAAATCGCCTACTCCACAAGTATGACAGGTACATTGTCCACAGAGACTTCTCTCCCACCCACCTCTTCCTCTCTCCCAACCACAGAAACAGCCACGACTCCTATGACAAACTTGGTAACCACCACCACTGAGATCTCCTCCCACAGTACTCCCAGCTTCTCTTCATCAACCATCTACTCCACAGTCAGCACATCCACAACTGCCATCTCCTCACTTCCCCCTACCTCAGGTACTATGGTGACTTCCACAACCATGACCCCATCTTCTCTGAGTACAGACATCCCTTTCACAACACCAACAACTATCACCCACCATTCTGTGGGCTCTACCGGTTTCCTGACTACAGCAACAGACCTCACATCAACATTCACGGTTTCCAGTTCCTCAGCAATGTCCACGAGTGTCATTCCATCTTCCCCCAGCATTCAGAATACAGAAACCTCATCCCTTGTCAGCATGACCTCTGCCACTACTCCCAATGTGAGACCAACTTTTGTAAGTACACTCAGCACTCCTACAAGTTCCCTCCTGACGACCTTCCCAGCAACATATTCATTTTCATCTTCCATGTCTGCCAGCAGTGCTGGGACCACTCACACAGAGAGTATCTCCTCACCTCCAGCCAGCACCAGTACACTCCACACAACAGCTGAATCCACCCTGGCACCCACTACCACCACCTCATTCACAACTTCCACAACTATGGAACCACCTTCAACCACTGCAGCAACTACAGGAACAGGTCAGACCACCTTCACCAGCTCTACAGCCACATTTCCTGAGACCACCACACCGACTCCTACAACTGACATGTCCACAGAATCTCTCACAACAGCCATGACTTCTCCTCCCATCACTTCATCAGTCACTTCCACAAATACAGTGACTTCTATGACAACTACGACCTCTCCTCCCACAACCACCAATTCTTTTACATCACTGACCAGTATGCCTCTGTCTTCTACACCTGTCCCAAGCACAGAAGTAGTCACCAGTGGCACCATAAACACAATCCCTCCATCTATCTTGGTGACCACACTCCCCACTCCAAATGCTTCATCTATGACTACATCTGAGACCACCTATCCTAATTCTCCGACTGGTCCTGGTACAAACTCCACGACGGAAATCACCTATCCCACCACTATGACAGAGACATCATCCACTGCCACCTCTCTTCCACCCACCTCTCCCTTGGTCTCAACTGCAAAAACAGCCAAAACTCCTACCACAAACTTGGTAACCACCACCACCAAGACCACCTCACATAGTACCACCAGCTTCACTTCTTCAACCGTCTACTCCACAGCCAGCACATACACAACTGCCATCACCTCAGTTCCCACTACGTTGGGTACCATGGTAACTTCTACATCCATGATCTCATCTACTGTGAGTACAGGTATCCCTACCTCACAACCAACAACCATCACTCCCTCATCCGTGGGCATCAGTGGTTCATTACCTATGATGACAGACCTCACCTCAGTGTACACAGTCTCTAACATGTCTGCAAGGCCAACAACTGTCATTCCCTCATCTCCCACTGTCCAGAATACAGAAATCTCAATCTCTGTTAGCATGACGTCTGCTACCACTCCCAGTGGAGGACCAACTTTCACAAGTACTGAGAACACTCCAACAAGGTCCCTCCTGACAAGCTTTCCAATGACACATTCATTCTCTTCTTCTATGTCTGAAAGTAGTGCTGGGACCACTCACACAGAGAGTATCTCCTCACCTCGAGGCACCACCAGTACACTCCACACAACAGTTGAATCCACCCCATCACCCACTACCACCACCTCATTTACCACATCCACAATGATGGAACCACCTTCATCCACTGTATCAACTACAGGCAGAGGTCAGACCACCTTTCCCAGCTCTACAGCCACATTCCCTGAGACCACTACACTGACTCCTACAACTGACATTTCTACAGTATCTCTCACAACAGCCATGACTTCTCCTCCCCCCGTCAGTTCTTCAATCACTCCCACCAATACAATGACTTCTATGAGAACTACAACCTATTGGCCCACAGCCACTAATACATTATCACCACTCACCAGTAGCATTTTATCTTCTACACCTGTCCCAAGCACAGAAATGATCACCAGTCATACCACAAACACCACCCCTCTATCCACCTTGGTGACTACACTCCTCACTACCATCACCAGATCTACACCTACATCTGAGACCACCTACCCTACTTCTCCCACCAGCATTGTCTCAGACTCCACGACTGAAATCACCTATTCCACAAGTATAACAGGTACATTGTCCACTGCCACTACTCTCCCACCCACCTCTTCCTCTCTCCCAACCACAGAAACAGCCACGATGACTCCTACCACAACCTTGATAACCACCACCCCTAATACCACCTCCCTTAGTACCCCCAGCTTCACTTCTTCAACCATCTACTCCACAGTCAGCACATCCACAACTGCCATCTCCTCAGCTTCCCCTACCTCAGGTACCATGGTAACTTCCACAACCATGACCCCATCTTCTCTGAGTACAGACACCCCTTCCACAACACCAACAACTATCACCTACCCTTCTGTGGGCTCTACCGGTTTCCTGACTACAGCAACAGACCTCACATCAACATTCACTGTTTCCAGTTCCTCAGCAATGTCCACAAGTGTCATTCCATCTTCCCCCAGCATCCAGAATACAGAAACCTCATCCCTTGTCAGCATGACCTCTGCCACCACTCCCAGTTTGAGACCAACTATCACAAGTACTGACAGCACTCTAACAAGTTCCCTCCTGACGACCTTCCCAAGTACATATTCATTTTCATCTTCCATGTCTGCCAGCAGTGCAGGGACCACTCACACAGAGACTATTTCCTCACTTCCAGCCAGCACCAATACAATCCACACAACAGCTGAATCCGCCCTGGCACCCACTACCACCACCTCATTCACCACATCCCCAACGATGGAACCACCTTCAACCACTGTAGCGACTACAGGCACAGGTCAGACCACCTTCCCCAGCTCTACAGCCACATTCCTTGAGACCACCACACTGACTCCTACAACTGACTTTTCTACAGAATCTCTCACAACAGCCATGACTTCTACTCCCCCCATCACTTCTTCAATCACTCCCACCGATACAATGACTTCTATGAGAACTACGACCTCTTGGCCCACAGCCACTAATACGTTATCACCACTCACCAGTAGCATTTTATCTTCTACACCTGTCCCAAGCACAGAGGTGACCACCAGTCATACCACAAACACCAATCCTGTATCCACGTTGGTGACTACACTCCCCATTACCATCACCAGGTCTACACTTACATCTGAGACCGCCTACCCTAGTTCTCCCACAAGCACTGTCACAGAGTCCACAACTGAAATCACCTATCCCACCACTATGACAGAGACATCATCTACTGCCACCTCTCTTCCACCCACCTCTTCCTTGGTCTCAACCGCAGAAACAGCCAAAACTCCTACCACAAACTTGGTAACCACCACCACCAAGACCACCTCACATAGTACCACCAGCTTCACTTCTTCAACCATCTACTCCACAGCCAGCACACCCACCACTGCCATCACCTCAGTTCCCACTACCTTGGGTACCATGGTGACTTCTACATCCATGATCCCATCTACTGTGAGTACAGGTATCCCTACCTCACAACCAACAACTATTACTCCCTCATCCGTGGGCATCAGTGGTTCATTACCTATGATGACAGACCTCACCTCAGTGTACACAGTCTCCAGCATGTCTGCAAGGCCAACAAGTGTCATTCCTTCATCTCCCACTGTCCAGAATACAGAAACCTCAATCTTTGTTAGCATGATGTCTGCTACCACTCCCAGTGGAGGACCAACTTTCACAAGTACTGAGAACACTCCAACAAGGTCCCTCCTGACAAGCTTTCCAGTGACACATTCATTTTCCTCTTCCATGTCTGCCAGCAGTGTAGGGACCACTCACACCCAGAGTATCTCCTCACCCCCAGCCATCACCAGTACACTCCACACAACAGCTGAATCCACCCCATCACCTACAACCACCATGTCATTCACAACATTTACAAAGATGGAAACACCTTCATCCACTGTAGCAACTACAGGCACAGGTCAGACTACATTCACCAGTTCAACAGCCACATCCCCTAAGACCACCACACTGACTCCTACCTCTGACATTTCCACAGGATCTTTCAAAACAGCCGTGAGTTCTACTCCCCCCATCACTTCTTCAATCACCTCCACATATACGGTGACTTCGATGACAACTACCACCCCTCTAGGGCCCACAGCCACTAATACGTTACCATCATTTACCAGTAGCGTTTCATCTTCTACGCCTGTCCCAAGTACAGAAGCGATCACCAGTGGTACCACAAACACCACCCCTCTATCTACATTGGTGACCACATTCTCCAATTCCGACACCAGTTCTACACCTACATCTGAGACCACCTACCCTACTTCTCTTACTAGTGCTCTCACAGATTCCACGACCAGAACCACCTATTCCACCAATATGACAGGTACATTGTCCACTGTGACCTCTCTTCGACCCACCTCTTCCTCTCTCCTCACCACAGTAACAGCCACAGTTCCAACAACAAACTTGGTAACCACGACCACCAAGATCACCTCACACAGTACTCCTAGCTTCACTTCTTCAATCGCAACCACCGAGACCCCCTCACACAGTACTCCCAGATTCACTTCTTCAATCACCACTACCGAGACCCCCTCACACAGTACTCCCAGATTCACTTCTTCAATCACCAATACCAAGACCACCTCACACAGCTCTCCCAGCTTCACTTCTTCGATCACCACCACCGAGACCACATCCCACAATACTCCCAGCCTCACTTCTTCAATCACCACCACCAAGACCACCTCACACAGTACTCCCAGCTACACTTCTTTGATCACCACAACCACCACCACCTCACACAGTACTCCCAGCTTCACTTCTTCCATCACCACCACTGAGACCACATCCCACAATACTCCCAGCTTGACTTCTTCGATCACAACCACCGAGACCACATCCCATAGTACTCCCAGCTTCACTTCTTCGATCACCACCGAGACCACATCCCACAGTACTCCCAGCTTCACTTCATTGATCACCATCACCGAGATCACCTCACACAGTACTCTCAGCTACACTACCTCAATCACCACCACCGAGACCCCCTCACACAGTACTCTCAGCTTCACTTCTTCAATCACCACCACTGAGACCACCTCACACAGTACTCCCAGCTTCACTTCCTCAATCACCACCTCTGAGATGCCCTCACACAGTACTCCCAGCTTCACTTCTTCGATCACCACCACTGAGAACGCCACACACAGTACTCCCAACTTCACTTCTTCAATCACCACCACCGAGACCACATCCCACAGTACTCCCAGCTTCACTTCTTTGATCACCACCACGGAGACCACCTCACACAGGTGGGGGACCACCGAGACCACATCCTACAGTACTCCCAGCTTCACTTCTTCAAATACCATCACTGAGACCACCTCACACAGTACTCCCAGCTACATTACCTCAATCACCACCACCGAGACCCCCTCAAGCAGTACTCCCAGCTTCAGTTCTTCGATCACCACCACTGAGACCACATCCCACAGTACTCCCGGCTTCACTTCTTCAATCACCACCACTGAGACTACATCCCACAGTACTCCCAGCTTCACTTCTTCGATCACCACCACTGAGACCACCTCACATGATACTCCCAGCTTCACTTCTTCAATCACCACCAGTGAGACCCCCTCACACAGTACTCCCAGCTCCACTTCTTTAATCACCACCACCAAGACCACCTCACACAGTACTCCCAGCTTCACTTCTTCGATCACCACCACGGAGACCACCTCACACAGTGCTCACAGCTTCACTTCTTCGATCACCACCACCGAGACCACCTCACACAATACTCGCAGCTTCACTTCTTCGATCACCACCACCGAGACCAACTCTCACAGTACTACCAGCTTCACTTCTTCGATCACCACCACCGAGACCACCTCACACAGTACTCCCAGCTTCAGTTCTTCAATCACCACCACTGAGACCCCCTTACACAGTACTCCTGGCCTCACTTCGTGGGTCACCACCACCAAGACCACCTCACACATTACTCCTGGCCTCACTTCTTCAATCACCACCACTGAGACTACCTCACACAGTACTCCTGGCTTCACTTCTTCAATCACCACCACTGAGACCACCTCAGAGAGTACTCCCAGCCTCAGTTCTTCAACCATCTACTCCACAGTCAGCACATCCACAACTGCCATCACCTCACATTTTACTACCTCAGAGACTGCGGTGACTCCCACACCTGTAACCCCATCTTCTCTGAGTACAGACATCCCGACCACAAGCCTACGAACTCTCACCCCTTCGTCTGTGGGCACCAGCACTTCATTGACTACAACCACAGACTTTCCCTCTATACCCACTGATATCAGTACCTTACCAACTCGAACACACATCATTTCATCTTCTCCCTCCATCCAAAGTACAGAAACCTCATCCCTTGTGGGCACCACCTCTCCCACCATGTCCACTGTGAGAATGACCCTCAGAATTACTGAGAACACCCCAATCAGTTCCTTTAGCACAAGTATTGTTGTTATACCTGAAACCCCAACACAGACCCCTCCTGTACTGACGTCAGCCACTGGGACCCAAACATCTCCTGCACCTACTACTGTCACCTTTGGAAGTACGGATTCCTCCACGTCCACTCTTCATACTCTTACTCCATCAACAGCCTTGAGCACGATCGTGTCAACATCACAGGTTCCTATTCCTAGCACACATTCCTCCACCCTTCAAACAACTCCTTCTACTCCCTCATTGCAAACTTCACTCACATCTACAAGTGAGTTCACTACAGAATCTTTCACTAGGGGAAGTACGTCTACAAATGCAATCTTGACTTCTTTTAGTACCATCATCTGGTCCTCAACACCCACTATTATCATGTCCTCTTCTCCATCTTCTGCCAGCATAACTCCAGTGTTTTCCACTACCATTCATTCTGTTCCTTCTTCACCATACATTTTCAGTACAGAAAATGTGGGCTCCGCTTCTATCACAGGCTTTCCTAGTCTCTCTTCCTCTGCAACTACCAGCACTTCTTCAACCAGCTCCTCTCTGACCACAGCTCTCACTGAAATAACCCCCTTTTCTTATATTTCCCTTCCCTCCACCACACCCTGTCCAGGAACTATAACAATTACCATAGTCCCTGCCTCCCCCACTGATCCATGTGTTGAAATGGATCCCAGCACTGAAGCTACTTCTCCTCCCACCACCCCATTAACAGTCTTTCCCTTTACTACCGAAATGGTCACCTGTCCTACCTCCATCAGTATCCAAACTACTCTTACTACATATATGGACACTTCTTCCATGATGCCAGAAAGTGAGTCCAGCATCTCACCCAATGCTTCCAGTTCCACTGGCACTGGGACTGTACCCACAAACACAGTTTTCACAAGTACTCGACTGCCCACCAGTGAGACCTGGCTGAGCAACAGTTCTGTGATCCCCCTACCTCTTCCTGGCGTCTCTACCATCCCGCTCACCATGAAACCAAGCAGTAGCCTCCCGACCATCCTGAGGACTTCAAGCAAGTCAACACACCCCTCCCCACCCACCACTAGGACTTCAGAGACACCAGTGGCCACTACCCAGACTCCTACCACCCTTACATCACGCAGGACAACTCGCATCACTTCTCAGATGACCACACAGTCCACGTTGACCACCACTGCAGGCACCTGTGACAATGGTGGCACCTGGGAACAGGGCCAGTGTGCTTGCCTTCCGGGGTTTTCTGGGGACCGCTGTCAGCTCCAGACCAGATGCCAGAATGGGGGTCAGTGGGATGGCCTCAAATGCCAGTGCCCCAGCACCTTCTATGGTTCCAGTTGTGAGTTTGCTGTGGAACAGGTGGATCTAGATGTAGTGGAGACCGAGGTGGGCATGGAAGTGTCTGTGGATCAGCAGTTCTCGCCGGACCTCAATGACAACACTTCCCAGGCCTACAGGGATTTCAACAAGACCTTCTGGAATCAGATGCAGAAGATTTTTGCAGACATGCAGGGCTTCACCTTCAAGGGTGTGGAGATCCTGTCCCTGAGGAATGGCAGCATCGTGGTGGACTACCTGGTCCTGCTGGAGATGCCCTTCAGCCCCCAGCTGGAGAGCGAGTATGAGCAGGTGAAGACCACGCTGAAGGAGGGGCTGCAGAACGCCAGCCAGGATGTGAACAGCTGCCAGGACTCCCAGACCCTGTGTTTTAAGCCTGACTCCATCAAGGTGAACAACAACAGCAAGACAGAGCTGACCCCGGCAGCCATCTGCCGCCGCGCCGCTCCCACGGGCTATGAAGAGTTCTACTTCCCCTTGGTGGAGGCCACCCGGCTCCGCTGTGTCACCAAATGCACGTCGGGGGTGGACAACGCCATCGACTGTCACCAGGGCCAGTGCGTTCTGGAGACGAGCGGTCCCACGTGTCGCTGCTACTCCACCGACACGCACTGGTTCTCTGGCCCGCGCTGCGAGGTGGCCGTCCACTGGAGGGCGCTGGTCGGGGGCCTGACGGCCGGCGCCGCGCTGCTGGTGCTGCTGCTGCTGGCGCTGGGCGTCCGGGCGGTGCGCTCCGGATGGTGGGGCGGCCAGCGCCGAGGCCGGTCCTGGGACCAGGACAGGAAATGGTTCGAGACCTGGGATGAGGAAGTCGTGGGCACTTTTTCAAACTGGGGTTTCGAGGACGACGGAACAGACAAGGATACAAATTTCTATGTGGCCTTGGAGAACGTGGACACCACTATGAAGGTGCACATCAAGAGACCCGAGATGACCTCGTCCTCAGTGTGA\";\r\n\r\n            Assert.Equal(expectedCodingSeq, codingSequence.GetCodingSequence());\r\n        }\r\n\r\n        [Fact]\r\n        public void GetCodingSequence_RnaEditInsertion_StartsCds_EndsUtr()\r\n        {\r\n            // NM_001220765.1, chr7:50344378-50367353\r\n            var genomicSeq = new SimpleSequence(\r\n                \"CGCGGCGCATCCCAGCCTGGGCGGGACGCTCGGCCGCGGCGAGGCGGGCAAGCCTGGCAGGGCAGAGGGAGCCCCGGCTCCGAGGTTGCTCTTCGCACCCGAGGATCAGTCTTGGCCCCAAAGCGCGACGCACAAATCCACGTGAGTGTTTTCAAATTGAATTTCAATAGGAAAACTTGGGGTAACTGGTGAATTTAAAAAAAAAAAAACACAGTAAAGAAAAGCGGTAAGGTTGGTAGACCCTGGTGTCGCTCAGGTCCGCCTCTCTTTTCTGAGGACAGTGAGAGAGTTCACTTCTGTCAAGCGTCTGTTGCTCTGCACTGTGCCAGCAGGTGCAGGACCAGGCCGACATGGGACACTTCTGAGCAGCCCCGCTGTCACCAGGAGAGGAGTTCTAGCTCCCAACCATATTTAAATTTATGTAGACCTACATATACCCACGGAAGTCAGCCTTTATAAAGTCGTGTGTAAAGAGTTTTCCTTATATTTGAGCCGGGAGCTTTCTTTTTATACTATAAATATGATGAGATCGAGTCTGAACTTAATTTCTGCAAGAGAGGAATTATCCCGGCTTTGAAAAGTTAGTCCTTTTGCTGACCGCAGGTTTGACGCTCAAGTCACCAAACCTTCTCAGGAAAACCCTTAGTAATATTAAGGCATCAGGTTACTTGCGGTTATATTTGAAATGTATTTTAAATATTTGTCAAGCATCGCTGCTGATGCCTAAGGAACCTCGTGAGGGCTTGTTTTTCCTTCTAATTTGGAGGCATCTAATGACCGAAAACCGTAGCGATTCCATAGGGTCTGACCAGGCACAGCTTTCAAATGCAGCTTCCCTCTCTCTAGGGACTGCAGCCCACCCAGACTGAATTTCAATGCGGTGCGCTTTGCTTAGGTTACCCACTCACAATTTCCCACTGCGCCGCAGGCAGTATATTTCAGCTTTGAGATACCTTGTTTTAAAATTCCAGACAAAATGGTGTTGAGGAAATGTCTCCTTACTAGTCCCATCAACTTCTGTTAAAAGAGGAAAATTTATGGAATTTGAAAATACTGCGTATGATATTTAAACTTTCATAGACATTCAAATGCTTTTAAGGCCAGGTTCAATTTGGTTATGAGTCGAGGGGTGGGGGGGACCCACATAGAAATGTCCTGGGTCCTCTTGAGTTTATTTCTTTGTTTGAAGATGTTTGTTCAATGAGTTTTATTGTACTCATCTTTTATATGGAATTTTAAAAAGTAACAATTTCAGTATTATTTATATTAGAATGTGTCAGAATTATTTCCGTGACAAATCAGATCATTTGGGCTATGGCTTAAAATGTACACGAGGCAAATATTCATGACAAGAAGATTCACCTTCTTACGCTGGCATCTTGTAAAATGCAGAACAAGTTAAAGAAATAATGTGTACACATACAAATAATGATGTCACATTAAAAATACTACACTATTCTTGCTTGATGGAATGTATCTGATTTCCAATTTCACCATGAACATATTTCATACATTTTTTACATGAAAAAAAACGTGACTCTTAAGTCTCACAGTCAATCAGAGCTGGTGACCAGAACATTTTATTGAACTAAATGGTCATGTTTTCTTCCCCTTTTGTTTCACGGTGAGAGTTGAAGGAAGGAGTTTAGAAACTCTCCAGTACTTGTTTAATTCATCAGTGTTCTAATTAGAGTGGTACCTCTTGGAAAACTACACACCCCCCTAATGCAGAAACATCATAGCAATAATCACCCACCCTCAGGGTCTCCAGGAGACCACAAGGGCTGCAGATAAAAGTCTGGATGTGTTAGGTTTGACCCTTTCGAAGAGTTTTACACAGGCTCCTAAAGAGAAGATCAGCTGTGGCCGTTTGTAGCCATTTCCTTTGTCGAAAAACTAAGATCGCAGTGAATGTATTAGCCAAGAGGTCTAAAGCCCTGTTGTACTGCAGGCCACTGTCTTCCTTGTTTGACTAGAGACTTGGAGTTTGAGAACAGTGGTTCTTTGGTTTGGATACATTTTTTGTTCTTGATTTGGATGTGTGTGTTTCATGCGTGGTTAATATAGCATATTTTCAATATAAATGTCAAAAATTTTGAAATAGGAAAGAACTCTCTATATATTAATGTACTTATACACACACTTCAAGATTATGCATTTATTAACAGATACATGAAATAAATTCCATGTGCATATGCACATATGCACACAGAGCGTGCACACACACAGCATGCACACAGCGTGGAGTGAGAGGCATGGGGCAGTGTGGAAGAGTTTTAACATCAAACAGACCTGAAATGAGTATTAAAGGCCCCCTTTATTTTTAAACTTTTACTAAAACAAGATGGATTTCCCTATGTTATATAATGGTGAATTTTAGGCATAAATAACGTTTTTTGAGTGTTGCATAATTGTACGTATTAATGTAATGTAACTGTGGTTAACGAAGAATTCATCAAGGATATCACTGTTTTGTGGCATTTTTTTTTTCCTCCTCTAATCTTTGGACTTGTGAAATAATTTCACTATGAAATAAATGTTGGTTCTTGTCATATTCTAAGGGAGATTGATGTAAGTGGCTCCACTCCAGCTTACAGAAGGTAAACCACGACCTTTTTGCGTTCTCTGAAAACGCTTGTCTTCCGATGCCTCTGTTTCTAAGACTGACAAGCACTCTGGGGGCACTGTGACGCCTGCTTCTAGCGGCAGAGTTGCTGCAGCTCCTGTCCTGGCTGTGAACATTGTTCTCTCTCTGGTGTCTCTATGTTCATAACTACAGAGACTTCAGCTCTATTCCATTTCATATTTGTGCTGAATAATCATTCCATTTTATGGGAGAAAACACAAGATGTAAAAGCAACAAGTGACCCATCCTTTGAAGCTTACAAGAAGAGAAACATTAATCTATTTCACGTCTTGAAAACAGATCAGTTTTATTTTGCTCAAAAAGGGCACATGTACATTTTTGATCTAGGTCTTAGAAACGTAGAGTTTCAGAGGATCAGCATTATACACACTGTCACACACACACACACTTAAAATTCAGATGAGGAACAAGATAGGAATGAGGTTTTGTTAGGGACGCAGAGCACCTAAAACCAAAGGATATCGACAGTAACAAAGCTGTTTTTACTGTAGTGCTGACTGAACACTCATGCTGGTGTCTTCATGTGGACCATGGCTTTCTTGTATTTCTTTGCAGTTTAATAAATGACTTCATATCTCAGGTTACCTTTCCACATCTCCTGGAATATATGTTTATGTCCTTAAAGTTTCAGTGTCGTCACTTTAGTAGCTTTAGTTTGAGTTTTTAAATGTTTGGTAATATTCCAACAAATATTTTTTAAGACATTATGAAACCTTATGAAGTGCCATATATTACAAGTGAGATAAAACAGCAAGCAAAAGAAGGTTTGCAGAAGGTTTTTAAGTGGCGAAGTGCGGGCCTGCCCATTTTGGTGTCTCCTTGGTGGTTACTCCTGAGAAGGGCCTGGAGGAAGAGCAACTGAGGCCTAATCTACAGGCAACTGCCAAATTGTTTCAGTTGACGTTTTTCCCTCTCATGTTTGACTATAATAAATAGGTAGTTGCCAGTGGAGCCTTCAGCCAACCACCTGGTAATAAACTGTTAAAAATGGTGCAAACCCTAGGTCACAGGTGTGGGGGCCATTTGTCTTGCCTGTTAACAGGCCTGGCCTTAATTCTTTTCTCCCATGGCCATTTCTGCCTTTGGGGAACTCACAATTCCTGTTGACTAAAAGAGCACCCTTTTCCACCACAAGCCTGACAAATCAGACGTCCACATAATTTCTGAACTCGTTTTGGTTAGGACAGGAAGCACAGGCTCCCTTCCTGTCTGTGTTTTCCTAAGAGAAAACGGTCTTCCCTCCTTTTTTGCATATTTGGCAAGTGGTTCCACCTTTCTCTGCACCCTGGTGGAGTGTGAAGGCAGCAGAGGAACCTTTTGGAGGAGGAAGAGGACACAGAGGCCCTGTAGCCAGGCACCAAGATCCCTCCCAGGTGGCTGGGTCTGAGGGGAACTCCGAGCAGCCCTAGGTCCTCAAAGTCTGGATTTGTGTGGAAAAGGCAGCTCTCACTTGGCCTTGGCGAGGCCTCGGTTGGTTGGTGAGTGCCACACGGTTTCTTTGTGTGCTTGCATGGATTGGAATAGCCATTGTGTTCTTCCGTCTTCCCTGCTGGTGTTTCCACAGTGGGTGGCCTGAGCCCAGAGCAGCTCCCCATATCCCTGTGCAGGCCACCTGTCTCGGGTGATGGAGAGCATCATTATGCTCCGTCTGAACGCTCTGCTTTCGGATGGCCCCATGCTCCACCTCCTGATAGCTCGTGGCGCGGGGCCACGGCTTAACAAATGGCTGAAAATGGGTCCTAATTAGTGGAAAAGTGCTTTCTTCATATTTTCTCACTCGAGTGTGCAGTGATTCATTTTTCTTCTGCAATCAGCTCACTGCTAAAGTAAATCTGACTCTCTTCCCGCCATTGCACACCAAAAGTTAACTCTAATGGGTAGGAGGTTAGGTTTGTTGAGAGAGCAATGCAGTAAAAAGAGGGGATCCAATGTGGTCTTGTCTGTCTGGTCTTCCTTTCTTCGTTTTTTCCTCCCTTGTCTTCTCTGTCATTCCCTTCCCTCCATTTGCCTTGCCTTTCCTGTCCTTCCCTTCCCTTCCTTCCCCTCTTTCTTTCTATAATTGGTGGGGGGTTTGCACAGACTGCCAAAACACTAAGAACTGTGTAAAGTGTTTTTGAATGGCCTTACACATATTGAAGTAGATTTTTATGCTCCATTTTTGAGATCACACACTAAAATCTATACCTTTAAAGCATTTTCTGTTAGTTTGAAACTATTTGAAAATGAACAATGTGGTTTAGATTAGAGTCCTGTTCTGAAGCTAGGAGTTCCACTATGAATATTGATTTATCAGTTTTTGACAAATTTTTGTTGTTATACCAGATTTTCACTGGCAAACCTAGAGCAAATAAAATTCCACATAAGATACTTCCCTAGACCTAATGGGAAAAATGTTTAATTTAGAGTCTTTAGGAGAAATGAGAATGAGGAATTGACCTTTTGTAAGCTTACTTCTGAGGCACTCTGAAGTGTGTTCCAGTGCTTTTAATGGAAACTAGAGAGAGCCAGCAACCCCCTAGTGTGAGCCCCACTTTTAACCGGAAAAAGTGACCTTTTCCTCCTCCTTTGTGCTGAGTTTTGCGTAGGGCAGAAAATTAAGCTGATATTCAAAGAGATTCACTGCAAAAACATATTGATAAATCGTATATTCTATTTCATTAAATTAAAACCATACTGCTAATTATCTCAGGTTGTTAAACATAAGGCAATTAATTATCATTTTAAAAGTTGGTAGGAAGTTGTGAGTACTTTTGCAGTATGAGTGTTTTCCCGCTTTAGTATGAGGTTGTGTATGTTTGCTTGAATTTACAGAATTTTCACTTTAAGAGCAGACAATGTTTTGTTAAAGAAATGAAATTTGCTAAAAAGGAGCATGTAAAGTGAAACATTAAAAATAAATAATTTCAACTTACTTAAGAGCTGCAGAAAAATCTGATTGCTGTGTTTAAAATGAATTTTCCCACATTTCGCTCTCTTATGGACAGGAGCATTTTCTGTCAGGTTATAAATAAAGACATGCCCATTTTTTGTACCCCCACAAATGAGGAAGTTGTAAGCTCTCTGAGGTTTTACTGATGAGCCCCCTCCCCCTGGGTTTGCATGAAGAGATCATAGGCCACAAATAAAGGACTACAAAATGGGGTCTAAACTATCCTGGTGGGGCCTGATACCCACGTTTCGCATGGACCTTACGATGTGATGAATGGTTTTGGCATGAGTGTCTTAAGAATGCTTCCAGATTCGGGTTACAGGACAGCCAGCGCTGAGCTCCCTATTGCAGAACAAAGTAGGAATCTAGAACTTTCTTGCTAACAGGATCCAGCTAAAACACCAAGTTAGATTCTTAAATGATGTTCTTTTCTGTCATTATTTGATTGTTGTCAGTAGCAGTAATTGTTACCAAGCCATTGATGCTTCTATTCTTCCCTTTGCCCTTCTGAGACACAGCTCATTTTGACTTCAGTGGAACCCCTCGAAGGTGGGGTGATGAGCAAGGTGAATTTTCAAAGTAAAGCTACTAAGAGACCAAACTACAATTTAAGGAACCTGATTTTTGAATCAAATTCCATATACTGTGGGTATAGTTCAACATAGATTAATTTCTTATAGTTATTATGAAAAAAATCTCATCTTGATGATAGCTGATAATTTTGTGGGTGTCGTAAACAAAACAGAGGTCAGAATTCAGTCCCTTGGGGAAAATTTCCAATTAGTAGGAAACCAAGTGGCCTACCTTAGTTTGAAGACACCCATCAGGATGTCTGCACCTTTTCATCCTCTCTGGAGGAAAGACTAAATACCCATTATTGTATATAGGTCAGGCCAAAGCAGCCTTTTATATTGCAAGGAATAAGAGGTAAATAGATATATGTGCAACAATGAATCCCCTAATGTGTTTACTCTAGAACACATGTTCTTTCTGTATTTATATGTAGATTTTGTAGATCTTGTCTTACCACCTGCTAATGGTAGATACTGTATCTAAATAAGTTGAGGAAAATTTATAGTACCTAGGAATGTGTCCTCAGTGGGCCAATCAATCAATCATGACTTCAGGTTATTTTTAATAAATATACACGTATGGGTTCATAAACAATGGGATGTTCTTGTGAAGATCTAAATAATTTTACTTCTTTGGGACTAAATAAAATATAGCTTTTGCCAAATAAACTCACACAAGCACTTATTTTAATAGAAGTCAAATGGCTTTGCAGAAACTTCAGTTTTACAGGTGCATTGTTTGAAATGTTACGGGTATACAAGTGGATTTCTCTATTATGTACAGTGTTAAGTTTGAGTTTCAAAATGTCCACCTGAAATGATTTACTTGTACGTTAAGATAATTTAACTGCTAAGAAGGCAAGATAAAGCATTCTTTGTGACACCATATGGCCTTGCTGAGGGAAAAACTTACTGTTATAAGTTTGTGTTTATCTCTCTTTTTAAAAAAAAATGAAGAAAAAAACGTTTAAAATAATGGGAACACAGCAGTTCCTGGGGTCCTCTGTCTCTTTATCTTATTATAGTAAATTACCAAAAAAATAATGACCTGGGGCATGTCTGTGTGGACCCTTCTTTTAGAGGCAGTTTCTGTGTTTTGTAAAGCTGTAGGTTCTATTTTCATTGCACTTCATATTGCTGCACAGCTCCTGACCATGCATGAAGGTCCTCTGAAATCGGTAAGAGGGCAGAAGAAAATGATTCTAAACTTAGATTTTTTTAACTTAAGTGATGAAGTGTGAAACGCCATTTATATTTGAGGAAGCTACCTAGGAAGTGGCTCATGTCGATGGCCCAAATCAGAAGAGGGCCTGTAAAAGCTTCTATCAATTTTGACTGTGTATGCTTCTACCATGGCGGCTCAATAAACAGCAGTATTAGTTTAAGAGTGGATGGTACAGTAGTATAGACGGGAAGCCTCTCCTCTCCGTGTGAACCGTGCACCCCTATGAGAGGGTAGAGACAATACAATATGCCTGTAACGTCAGGACAGACAGTCATGGCCAGCTTGAACTCCAGCCCTGGGCTTCTTGCAGCAACAAACGTGAACACAGAGGACTGTCTCCAACTCCACTTTCTCTATTTTTAAAACAACTTTTTGAATACAGTATCTGCCATCTTTTCTTATACCTCACTTTGAAACAGGTGGCTCCACTGTGGCATTTAAAATGTTCTGTTTCTTTTCCCTCTGTATCAAATACCTCTTTACCAAGAAAACATTCAAACAGCATAGTTTTTAACTGTATTTTGAAAGGTTTCCTTAGTTCCCTTTGACCCTTCCTCTTTTGCATATCAGTTCCTGGCCATAAAAATAAAAAATGCTAGGACAGAATTGCACATCTGAGCTGATTTGCCCTCAAAAAGTTTCACAGTGGAACAAACCGCAGGAGGAGTTTTCTGTGGCTCAGTTAAATGTCGGGGGAGGGTGGTGTGAAAGCCAAATTGGATTCCTGCTTTCCTGTTTAAATCTTGTTTTTCATTGTTATTTGCACCAGCAATACTCTGTGGAATAATCATGAAAATGTGTAGATTGGCAGCTAATTTTTGAAAAATGAAAAGAATCAGAAATGAAATAAGAGTGCTCGGAAGTTTTTATGTTCTCTCAACCTGTTTTGTCAAATTGTTACGAAAACCTATAAGGTCTCTTTGACTAGATACAAAGACTTTGCACATTGCCTTAGCTTTCTCTTGAAGCATTTCCTTTTTTAAAATACAGTGTAATTCACAGTGATATGATAGATTTGCAAAAGTAAAATCTACCAGTCTGAAGATGAAAGGACTTGTCTCTTAGCAGGAATAATGGGTTTTATTAAAGAGGTCTGTGACCTAAGGCATTTTAAATAAATTACAGGCTTGGTCCCTGTCTCCCCCATGTATCTACTCCCTTCAATATAAGCATCATTGAGTATTTAAGGAAATAACCCCAAATGTAACTCTAGTGTAGCTTCACTTGTCAGGGAGGAAAAAGTAAATAGCATACATTTGGCCAAATAACCAGAACTTTACTGTAGAAGTTTTATGATGAAATTTGCCTTTAGTGCAGAGTATTACAAAGATCATGTTTAGTTTCTAGCAGTATATAAGTAGCATCCATCCTTATCTGTCATGCATTTGGAGTGTGCGACCCCTGCACTGGGCTGCAACATTCTGATGGGCAAGAGTGCTAGGGAGAAAGAGGCATCACCATCAGACTGCACGGGTTCAAGTGTCAGCTCTGTGGTTGATTAGCTGTGTGACCTGGGGAAAGCTATTTCTCTTAGCCTTGGTTCTCTCATCTATAAAATGGAGATAATGATGCAGATGCCTTGGGTTTAATTGGGAGAGTTAAAGACACATTTACATATTTAGCAAGTAGGTGTTGAATTCTAGCTCTACATTGGACACTATGCCAGGTGCTCAAATAAACAAGTGGACAAGACAGACAACACCCATGGTCTTATGAGGCTTAACCATTTGCCTCTTCAATGCCAGAAACTTAGTAGGTTGATTAGATAAAGCCAGTGAGTACCAGTATCCTTTTCTTTGCAGCCTTTTCCTGGCACACTAAAAATACTCAGTACATATGAAATATCACTGGACAAAGAATCCCCCTTAGAGTACCAGTGGAGAAGGAAGGCATTTGCTTAAAAGCAAACCAACAGAAAGACATTGTAAGGCAGTTGTTTAAGTCTCAGAGACCTATAATTTTTTTCTTTTTTCTTTTTTTTCATCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCACAATCTCAGCTCACTGCAAGCTCCACCTTCCGGGTTCATGCCATTCTTCTGCCTCAGCCTCCCAAGTAGCAGAGACTACAGGCGCCCGCCACCACACCTGGCTAATTTTTTGTATTTTTAGTGGAGACGGGGTTTCGCCGTGTTAGCCAGGATGGTCTTGATCTCCTGACCTCATGATCCGCCTGCCTCGGCCTCCCAAAGTGCTGGGATTACTGGCATGAGCCACCACGCCCGGCAACTACAATTGTTCTTAAAGCTTGTAGAATTACTGTGTGCTACCAACAGACAGGCTAATTTTGAGTGACCCTCAGTACTTTGTACAGTTAATTTGGCACGCTGTGTACTTAGTGGCTTTTTAACAGCTATAAATTTGGGCTGCTAGAAAAGTAGTAAAGTTGTGATTCTTGACAGGCATCTATCTGCATTTTCATTTTTACTTCATTTGTCTAGACTCAGCTTGTCAGAATTATGGAAGAGACTCCTTGTGTCAGGGCAAGCACTGTGAAGAGAGGTATTCACTGTCAGAAAAGAGAGGGGAGCTGGAGGCAGCTCAGAGGCCTGAGACCCGCCTCCACAGGAGCCCCAGCAGGTTCGGTGGAGCTCTGGCCACACTCTCCTTTGGGATGCTGAAGTCAGAATGAGTTCACTTCCCAGCCAGTCTTGCCAAGGCTCCTCACCTGGAAGCAGCAACTGCCCAGGGCTGTTGGATGTTTCTCCCCAGGGGACAGCCAGGTCCCAGTCCCGCCTCGGTGTGGAAGGAGGAAAGGCAGGGTCCAGGAAGCTGTTTCAGGACAGGCCCAAGGTCCCCCAGGGATGCCTTTCAGGGTCAGCGGAGGCTGTAAATCAGCAGGGCCCACACGGCCTGGAAGAGGCCCCTGTGCTGTCGGCTTGCCCGGCTTGCCCGGCTCCTAGTCCGGCTTCTGCTCCTCCTTTGTAAAGTTATGGATATGCTAATAGTTTCCAACTGAGACTAGGAAAGTAAGTCCTACTTGACACTGTTTGGTCAGAAAGAGGGAGAGAAAGGAGAAGGACAGAGAGAGACTGAGAGAGAGACAGTCTCAGACAAAGGGAGACGGAGGGAGGGAGGGAGAGACAGAGAAAGAGATGGGAGGTAGGTGTGGGAGGAGGGAGAGATGCAGAAGGCAGAGGAAAGACAGACAGAGATTTAGACCTCCCAAGTCAGTGAGCAGTCCAGAGTTGGAGTGGAGGGTGCCTGGTGGCTTGTGACTGCAGACTCCACTCCCCGCTCCTAGAGGCACAGCCATGGACAGCTTCTGTCACGTTGGCCCTGCACTTATCTCTGCATCTATTTCCCCTTGTGCAAGATTCAGAACTGCATGCTCCAAAAAAACAATAAAAGCATTCATGTTCATAAGAATTGCACAGGTAAAAGGTAGTTTGCTGATATTGTTGTATTTTTTACTATCGCTTCTTTTAGGTCTTGCCTGAAATTGTTTGGGTTTCCCAGGCAAAGTAGAAAACTGCGGTACGTTTCTGTGAAATAATTATTCCTTCTGGCATCTCCCTTTACAGACCTACTGATCTTGATTTTTCATTTAGGTGAAAGTTTGTGAAAACATGCCATTAGCTTGCTTTGTGATTAACTCCTTTTACTGAATGTGAGCTCCTTTTAAATTGAGGCCATATCAAGCTTAAATTCCATATTTTACCCGGCACTCTGCATTTCTTCCATGTGGGAGAGGAGGGGCTCAGTAAGTGCTTTGTAAAATACACAGCCGAAGTGATGCACGTGCTAACAAAGGAGTGTGACAGGACTTAAGTGCCCTTCTAGACACTTCAGGCTCCCCTTTGTAAGCTGTCTTGGAAGAGGCCACATTTCCTTTCCCTCAAACAGTTTCTCATTGTTTGATTATTCTTTTAGCCTTTCTCTGGAAGCAAAGCCACTTTTACGAGAAAGTCACTGCTTTTTCATCTCAAGAGATGCAAGTTTGGAGTTTGGGGAAGTTTTCAGGTGCCCGTCAAGTCATCCTTTATGATGTCAGACGAGTCAGGCCACAGAATTCACAGGGCTCAGTGCAGACCGAAAACTTGAGGCCTCTTGTTCAGAAATTATTAAAAATTTTGGTGAACATCACCCCAAGCAAAGAGATCCCCTAAGCACCAGCCCCCAAGCAACTGCACTCATAAGCCCATGAAGCCCCCTGCTGTCAGAAACAATGTGGTTGAAATTGTGTATGCACTTGGAAGTGAGATGGATTGCAAAACACAGGTCTCCATGCTGGGGCAGGAGTGGTGATAGGGCATGGAGTGGAAATGTCCAGCAGGCCCACGTGCGAAAATGCAGAGCTCTCTGGCTCTTGCAGACTTGGCTGCTGACAATAGACGCGCTCCAGGAAGGTGCTCGCTGTGGTGTGATCTGCTGCCCACCCCTAGCTCCCTCCAGGAGACTGGTGCGGGGACTGTTTGCAAATGACTGCAAAAGTAAGAAGGTTCCCACAGAGCAGAGCTTGATTTGGGGACCAGCCGAGGGCAGTTTGTCAGGATTCCGGCTTGAAACTGTTCTCACATCTCACCGCCTGAAAGGACGAGTGTGTCCAGAGGACTTAGCATTGATCACCTCTGTCTCCATGCAGCAAACTCAGAGGCTCAGCCCGCATTCCACTGGAAGGGCGTTTGCCAGTGGTGTTGGTTGGAAGAGCCTTGACTTTGCCTTAGGAAACATCTTTTTTTAAGAATTGAAAATAACTTGAGTATGCAACAGTAGGGCATTTGTTATATAAATTAGTTGACTAGTGTGTAGCCAGTAAAATGATGATGGTGGTGTGTATTTGTTAAATAAAAAGATATGTGTGGTATTAAATTAAAAAATATTTTAAAACAACATATTTGTAATCTGTTTAGTGTCCTCTTTTTGTAAAAAGTACAGAAATAAATATACAGAAAAAATAGTAGTCCTAAGTGGTAGAAATTATGAGCATTTTCTTGCCTTTAAAAAAAGTTGTAAAAGATTGTATCATTTATGTAGCAAAAAGTTTTAAGTCAGCATTCTAAAAATTTCGTGTTGTTATAGTTGCTGTGACAAGATTTAACTTCTGTATGCTTCACCAATCAATACAGAGGTATTTAAGACCCGGTGTGTGATAGGCCGCGCTAAAATACTATACACATCTTCAGAAAACTAGAGAACTAACTTCTAACTTCCTATATTAGTGTGGCACGGCTGTTACAAAGATTTTTCTCATTTGAGTCTATCTTGCTTCTTTATCATTGTTTTGACAGTTTCAGAAGAATCGTGGCTTTTCCCCTTTTTTACAGTAAAGGTACCTGAGACTCTTGACGTATTGCTTTTTGGAAATGCTTGTGCTGGTCACATGCTTGCATCTGGGCTAGTGTGTCTGGCTTCCGTGTGCTGGTGGATGCTTACTCTGTTTTCTGAAATACTTTTTCTGTACAGTGGCCACTAGCTGTACTCCTAAGCCACACACCTACCTTGAAAATTCATGTCACTTTTAGAAATAGATAAAAGCCCCTCCCATCCAGAAAAAGTGACTATCATGTATATCCTCATCATGACTAATACTGATATTCCTGAAATTGAAAATACATATTCCATATGTACCATAAAAGGTATTAAAGATATATGGAGTGATAGATATATTATATATAACACTTCTACCCTCACAGTTTTCAGCCTAATTGAGAGGGTAAGATCCCTGAATCATCCATCAGTTTTTCAGGTCTCTGCTGAAAGCAGGCCACAGCTCAGATCCACACATCTGAACCAGAGACAGAGGTGGCCAAAAATAAAAAGGGGGACAGGGGGACAACCTGGTTTAGAGTCAACAAATAGACTGCATTTTCTGGTTAGTGAAGGAGCTCTCCTGAAAGTCATATACCAGAGCATAAATGAGCAGATTTCCTTGAGGTCACCTTCTGCTGGCCATAGCTTTCTTATCTGTGGAGCTGCCAGCTGTCATCCACTTTGGGGCACCTGAGACTGCCGAGCGGCAGGCCAGGACCCAAGTGCGAAAACACAGAACACCTTTTTGTTTCTACTCCACTGATGCTGGGGTTCTCTCCCTGGTGTTTGTGGCTCGTAGTACACTCTGTGGAACATTCACTATGGTCATCGAAGGGCAGCATCTTCCCAGTTGTTTCTTTCTTTTCTTTTTTTTTTTTAATTTAAACCGATCTGAGAAGCCAGCCATCTGTCAGCAAAACAGGAAGGCTCGGGCTGTCTCCTGGGCTCGTTTTGCTGCCGTAGTGAGCGTCACTTCTCCCCGTGTAAGAGTGCTGGTGAAGGCTGAGGCAAGGGCCCAGAAAGATTGAGGGACAAAGACAGGAGCGCCCGCATTGCCCATCTGCCAGGCTGGAGGTGTATTCATTATTGATGGAGGTAGTGCAGTTGCTGCTCAGATATGCAGCCCTGCCTGGGTAAATGAGACATTCTTCAGCAAATTGCTTCGTTTTTTGATTGCTGATTGTACGCGTGTCACCAAGCTGACTCAAGGTTCATCGATGCATGCTCAGTAAATTAGAAAGAACATAACTATGGATCAGCCAAGAGAATGAATTCTGTGCCTACAATGACCCAGGGCCATTTAATTTTCTGCTTAATTTTGTTGCAGTCAGTTTGCATTTTGGGTTATTATGCAGTAGGAAATTAACAATAAATAACAAATTTGGTCCTCCTGTGCTTGTAATGATATTTTTATAAATCTTTGTAATGCTGTTTTTAAAAGGATCAAGGTCTGTGCCAGTCTGATACTCCAGCAAGTATGTGAGGAGGAAAATGCATTATTCTTGCTAGATAACCTTGTTGTTAAATAGCATAGGGGTTCTTTATCTCTCTCTCTTTCTCATATCTTATTAGTATTTTTGCTTTAAACTAAAATCCCTTCCTCTCTTTCTCAGATAACCTGAGGACCATGGATGCTGATGAGGGTCAAGACATGTCCCAAGTTTCAGGTGAGACCTTATGAGATAGCTGTGTGGGAAGTTCATGAGAAAAGCTTCCCTGGGGCCGGAAGTCACAGTGCTTGGTATGCTCATGGGGGAGGAATAGGGGCTATTCTGCAAAAGAAAAGACCATGATGGAATTTGCCTGAGTGTTTCCTTCACCTGTTACAAATTATCTCACTTTGAGCTGAACAGAAAGCCTCCAAGATGAAATTAGTTTTACTGTTAAACTTCAGGAAAAAAAAACGGGAAGAGTTAAATACATTTTTGTACTGTTGGAAGGAAAAATGGCTGATTGGTTTAAAACCCAAACACATGCCAATGATGGTACTTAAAGAGAGAGAGAGAGAGAAGCTTGAAAAACATAATTGTTGGGCACAGTCATGACTGTTTGTTCATTAAGCATGGACACAACATTGCTCCCCTTTGCCATATATCTTTTCAAGCCGTATTGGATATAGCTCTTCTCATCCAGGAGACCCAGGAAGTGGAGAAGTCTGTAGTAGGAAAAGCCTAAGGGTAGGTCACAGACTGTGACCATTTGGCAGCACTGAGGGTGGACGGCGAGCCAGTCCAACAAAACCGCACAGTTCCCCAGTGCATGGACATAGGAAGACAGCTTTCTATCTGGCCCTGTATCCAGAGGCGTCAGCCCCAGTAGCAGCTTTCATGGACTTTGGGGTTTTCGGTATTTCATATTTTTGAGCCTCACAGACTCACAGCCAGCCCCAGAGGCTGACTTATATTTGAGAAAGTTCTCAGTGGCACCTTGCCTTGGCTGAGCGCCCTCGTGTTTTGAAGTTTCTATGGGATTCTACAAGTTGGTGCTCCTGATGAAGACCAGGACCTATGTGTGGCTGCTCCCCTGCTTGGTGGTTTCCCTGGGGAAGGTGCAGGAGAGGATCTTCTGAGTTCCATGGAACTGGAGATAGATCTGCCAATCACAGGCTTCCTTCTCCACCACTCCTCAGCCGCTCTATTCATGTTTCAGATTTTGGACTTAAACTCTCCCAGGTGCAAAGAACAAACAAAAGGCTAGCTTATTTTTCTTTTAGAGTGAGGCTTCGTATTTATTACAATATAATTGCCACATTCTTTGTGTAATTCTCACATTTATATCTTAAATATAATTCTCATGAATGAGAATTATATAATTCTCTTTTTGTATATCATTGAATATTTTCACTTAATTTTTAATTTTTTTAATCGTCACAAAATAATTGTGTACATAGACACAAAATAATTGGGTACATAGTGATGTTGTGATATATACAATGTATAGTAATCGGATCAGGTAAATCAGCATATTCATCATCTCAAACATTTATCGTTTCTTTGTATTAGGAACATTCGACATCTTCCTTCTAGCTATTTGAAACTATATATTATTGTTGACTACAGTCATCCTGCAATGGTGTAGAACACTAGAACTTATTCTTCCTACCTAGCTGTAATTTTGTCTCCTTTAACAAATCTCTCCCTATCTTCCACTCCCCCGACCTTTCCAGCCTCTATTAGCCTCTGTCCTACTTTCTACTTATAATGATGACAGCAGCATTTGTTAGTTTCCACATGTGAGTGAGAACATGTGGCTTTTTAACTTTTAGAATGTGGTATTCAGGCACTTCATGGTACAGTTGGTAAAAGTGAAAATGTGTCCAAAAGTTTGTGATTATCTATATAAACAAAAATGGTATAAATACAAATATCAATTTTGCATTGAAGAACTTACCTTAGAGGTATATTCTCACAAGTGCACAGAGCATTTAAGCATTTGTTCACTGCAGCATTGTTATCAGTATTTTAAAACTATGGTACATCCATGTACTTCCACATACAGCTCTTAAAAATAAGGAGGATATGAATGAACTAGTATGAAAAGAAGTCCAAATACATGTGAAAGTGAGAATAGCATGGTTCTGGATGGTATGCAAAGTATGATCTCGTTCTTTTAAAAGAAAATAAATTACATACACATACATATTTTCTATATGCTTGCCCATAACGTTTAGGAAAATTCTTGGGTGATATTTATTAACCTGGACTTCCTCTTGGAAGACTGATGGTAGAAGGAAGGGGACGAGTTAGGGAAGAGGAGGAGAAGGAAAACTTTGCTTTTCATCTTCTACCTTTTAGCATTATTTGAATTTATTTTCCTTAAGCGTTTACTTTGTTTCGTAAACAAAAAAGCACAAAAACAAAAAACGAGTTAAATGGGAAAAAAAGCAGTTTAGCTCTTTATAGCCTCTCATTTGGCTTCGCCAGCCTCTCACTGCAGCCTCAGAGAGCTGGTCTGGGAAACACTGGTAGATGAGGACTGTAATCCTCACTCATGGAAGAGGATCTCATTCACTGGGTTTGCTGACTGTGACTAGAAGTGATTAGGGTGTCAAAAAACCCAAGCATGTTAAAAATTTCCAGAGGCCAAAAAGATGCTTTCATTGTTCTGCTCTTCTTTTCCTTGTCGCTTTCACTTTGGGTAGCTTCTAAATTGGTATTTTGCATGGTGCATTTAAAGAAAATGAGACCCCTTTGGCCAATGCAGGAGTCTACACTCTGATATTCTAGAGTCAAAGCTGAATGCTGACACCTAGGAATTCATCTCTAGAATGTTTATATAAGGAATAGCCCCTCAGTATTCCGATCTCGTATCTTAGTAACGAAACTAACAAAAGCCTGATTCTCCTCTGGTAGTTTTCTTGTCTTTACCATAATACAAAATAAGTAATTTGTTCTGCACCCTGACTGTTCAAAGGATAGGGTAGCTGGGGGCGGGGACAAGAATGGAGACCTTATTACATAAGACTTCCTGAAAAAGGAAACTCTGTTTTTGTTTGAAATGATTTGGTCTGAAATTTAGTTTGTGTACACTTACCAAAGGGATTCCTATTTCTAAAACACTCATACTGCTTTTGATTCCTGTTAACCTTTGAGCACTCTACGTAATGATGAGAGCACTTAAAGAGTCATGTCACTTTTAGTAAAGAATCAAAGGATACTTTTTCTACTTCTTCGAGTTTGATCTCTGCTTCTCCAGTTAAAACCAGTATTTGTTTTTTTCATTTCTAAAGTTGGAAGAAATGACAGTTAGTTATGGCATAAGGATGTACATTTAACCAAATAGGAGTTGACATTCTTGGTAAGAAATCTTACCAAGATTATGTTATAGATTATAAGAAATCTTAACAAGAATATGTTCCTAAATCATCCTCTTTTCCCATAAAATATTAAAGTATCAGCAATTTCATAGGATTCAACCTAATGTATGCGAAATGCTAGATAAACAGATAAATACTTAATATCTGGCTTTTTTTCAAAGCACTGGGTTATTTGTTCCTTGAGATTTATCCTAAATGTGGGCTATACCCTGGTTTACAGTGTCTCACAGATGTGTAGTAGTAGACACTCCATAAGTGTTTACTGACTTGAATCCACAGGGTACTGAGAAAATGCTACTGATAGACTTGGAGGAGAGCATATCTAAAGCAAGCTACCCTTTCCTTTAGGGCACGTCTCACTAATTCTTTGGGTAAAGCGTATTTTTCTTCCTTTTGTGTTTTTGGCAGTCTTTCCAAAAATACGTGTTATACCTATGCATTATTTTTTGGTTTGGTTTCTAAAGAAAGAGTCAGCCGGTGGGAAAGTGAAGGATGTGGGAACTGAGAGATCTGCATCAGCATCCCACCTCTACCTCCCACGATGGGACCTGAGACAGTTATTTTTGCCTCCTGGACCACTATAGTATCATCTGTAACAGGAGGGACTTGAGCCAGTTGATCTCTAAGGTTCCTCTGGCACCTGTGACCCTAAATAGATATTGGATATTGGTTTAATGCTATTTGTAGTGTGTTTTTTTGGGGATATGGAAACCAGAAGTTTGTTTCCATAAACATAAACATAAACTGTATATATCTAAAGGATATGGAAACCTTTAGATATATATAATCTGCTTACGTAAAGAAGGTTTGTATATATTGCAGTGTCAATGGGAATATTTTATCAAGTTAAGCATAGTAAATCACATTGATTAAATGCTTTGTATTTACCAAACATTACCCAAAGTGTTTTCTCCTTTCAACCTCACAAGGACCCACAGAAGAAAATACAGTTATCATTTCCAACCTGCAGGGAGCTGAGACACAGAGAATTTAAGCAACTGACCGGAAGTCCAACAGGGAGTCAGAGATTGCTCTGGGGTGTGATCCCCACTTGGACCCTAGAGTGGAAGCTTCTCCACTACTTTATAGAGTTGAGATTCTATATTTTGAGCTTGTATTTACCCAGAGAATTATATCCTCTTGGGCAATTGTGTATAATAAAACCTCATGCATTTAGGAGAGGCGGGATGACAGAACTTTGTTGAGTGAATTATAATCTACTTGAGAAATTATTTGCTTACATTTTATAAGCTAATTATACCATATCTCATCCAGTTTTCCCAGAACACTTCTCATAGGTAATGCTTTATTTGAAACATAGGCCATAGGTAAGTTAAGTGTAAATGTGTATTTTTATAATTTAACCAGAAGTTTATTTCATTTTTCTAAATAAGTGAAATTGTATTGCATCTTCTAAATTATTCTATTTAAACACTTGATGTCTTGCTGTCTCCGTCTCTGTGTGTTTGCATGTCATTGTACATGTTCTTAGGAAAAGTGTGGGAGCTTGACGCAATATATACCTTATGTTTCTATGTGCATATAGTTTACCAAATAATACCATAAGTTTACTTAGCATATTAGAATCCATGCACATTATTTTTATTTTATCTTCACCGCAACCCTGTGGGATAGACCAAAATCATGCTTTTCAGCCTCCTTTTTCCACTTGAGGAAAGGAGTCTTAAAAAAGGGACCAGTCTCATGTTCCCATTCGTCTTACAACTAATTGGTCAAGCCAGAAAGCCAGAACTATGTCCTGGGTCACTAACTCCTAGTCACTGTGTGTTAGTATTTGAGATGCCTGTTGGCTTGATTTAGTCATTTATTTTTTAGTGTTTTATAATCCTTGCATACTTTTACATTTTAAATGGTTAACCAGGCAAATTGGTTTAAAATCAGTGCATAAAAATACTGTGCCTATCATGATGGGTTTCATGAAGTGATAACTTTTCATCATGGAGATCCTCAGCTGTCACAGAAGATGAGGGGCCCTGGGTACAGAGGCTCACGTGAGGGATGAAAGTCTCAGCAGCCCGGACTTACACTTTGGGGCTTTTAGGCAAATCAGACAACCTCTTAAGAACTATCACTGAGTTCAGGCAAGGCGAGCTTGAATTAACACAGGGCCCTTGGTGGGCATGTGAATATATCTCACTTCACTACCATCCAGTTCTGACTCTTTACTAGATGCCCCTGTACATACCAAGACTGATTTTTTATTCTCCCTTCTCCCCATGTGGTTTCTTCTGCATAGAGAGTTCCTATTGATCAGTCTGACCCATGGTATTTTAGAATTGCGATCCCTACTGTTTCATTATTCCTTTTTCTCCCCCATGTTGAAAAAAATAAATGTCCTGAGATGCAAGATCAGGGACACTGGAGCACTGACATTTAGTTCAGTGCAGGAACTGAAGGCAGATGTAATTCTTAAGAAGCGTACCTGTTATTATGAACCATCCTCAACAAATTGTAGTGGATCTTGTTTTCTCATAGATACAGCAGTTAAATTTTTTAATAAAAGTAACTAAGAGTTATTTGGATGTATTTTAGCATGCACTGAGCGGAAAGTACGACATTTCTTCATTGGGTAAGTCCTGATTCTTTATGATCCTCACTTGGTTCCAGGGCCCCATGCATCTAAGGGTGTCTCAGAGCATCCTGCAGTGCTCCAGCATGATCGCAGGGAAAAGCTATAGGAGGAAAAGAGTCAATAAAGTTTAGTTTCTCAACCTCCCACCTCCACCCCATAATAATGACAGCTGGTTAATCATGAGACGCGTGCACACCCCACACGCCCTGTACATGTTTACTCATTGGGATAGCATGTCAGGCCAGAAGGCTCCATGGTCATTTCTATGAAGGTACTTTAGCAGGTCTTCAAGAAGGCAAGTGGCCTGGGTCCCTGCCTCCCCAAATTGCAAGCTCCCTGCTTTATGTAGGAGACCTATGTGTATATTACAGTTCTGTGTAAGATTATTTTGTTATTCTTACCCCCACACCCACCCCCCAACCCCCCGCTGCCACCAAAAAAAAAAAAAAAAAATTCCTCTGACAACCTTCATAAAGTCCTGGGAGTTTGAACACCATTGCTCTAGGAAGTCATCTTATACAAAAATAAGAGTTGTGAGGTGGTTCATATACCTCCTGCGTTCTCCTATTTGGAGTTTTTCCCCATTTATGAAAGAGGTGAAAACGCTAAGATATTTAGCAATTATTACTTTAAACATTTTCTATTTATAGGCCGGGCGCAGTGGCTCATGCCTGTAATCCCAGCGCTTGGGAGGCCAAGGCAGGCAGATCACGAGGTCAGGAGATCGAGACCATCTTGGCTAACACGGTGAAACCCTGTCTCTACTAAAAAAATACAAAAATTTAGTTGGGCATGGTGGGGGATACCTGTGGTCCCAGCTACTCGGGAGGCTGAGACAGGAGAATGGCTTGAACCTGGGAGGCCGAGCTTGCAGTGAGCCAAGATCGCGCCACTGCACTCCAGCCTGGGTGACAGAGCAAGACTCTGTCTCGAAAAAAAAAAAAAATTCTATTTACAGCAGTGAAAATAGTAGTGACTTAATGCACATTGCCAAGGCTTTAGCATAACATGAACACTTTCACTCAATGTCTCTCTGGCCTTTTGTTTTTCCTTGGGAAATTCTTATAATCCTGCTCCGTCTTTAACTATTCATTTTGTATTGGCTATCCAAATATACCCAATAATGCTCTTTCTGAAAATATGCCAATTGTGGTAATTACAGCTAAGCTGGAATATTAAATTGTGATGTCTGTTTTCCAGAGAATGAAGTAGTATTCCCCAGAGCATAGGCTTGGTGCCTGTGCAGGTTCTATTTTAAATATTCCAGGAAGGGTTGTTTTATATACTGAGGATGATTTTACTGGTCTTGCCAGTCGTCTGAAATGCTGGTATTACTCTTGTGGAAGGTTTATTCAAACAAACAAGGACATTTCACACAATACCTAGTCATGTTTTTCAGACATTTTAATGTTTGGTTCATCATTTGCACACACTCTCAAAAATCTAGGTTTGTCTATGTGTTCATATCATTTTGCCTGTTGCCAGCTCAGTCAGCAGGCACACTCTCCCAGGCTGTTGCTGTTTTGTTAGACTTCTTCAGGACCTTCATCTAAAATGGTCTTCCACACGTAGCTATACTGCATAAGTTCACATCATCTGTTTCTTGCATGTGGGTTGTGTCTCAACTCAAGTTTAAGTTAGATTTGGAAGGGCGGAAACTATAGGAGTTGCAGCTTCAGTGGAGAAAAGAGCATTTCCTACTAGTTATGGCTTCCCAAGGAAGGTTAGATTCCTCAGAGTAGGAGTGATTCCCCAATGCTAGAACCTTTGGTCAAATATAATTCTAATCCAGTCAAAATAAATACAGGTATTCTGTAAAACCCGATTTCATTTTGTAAATCCTACTTTGTATAGTATAAGCAATTTTTGTATTTGTGTGGATTATATTTTATTTTCCTATTTCAAAGAGAAGAATTTGTATTAGCAGACTCCCTTTGCATGCGGAGAGGGGATCATTTTCCCAGTAGGCATGGGGTTCCCTTCCATTCCTTGTCCAGTCTTCTTTTCCCCACTAAGTTAAGTCAAACTAAGCAGCTGGTAAGATATTCCCTGGTTCTTGCAAAGAAAGTGAGCAGATGGCAGAATGTATAGCTCTAAGCAGAATACCTGGTGTGGTATCCTCAAACACAAATTGACAGGAGGGTGTGGTGTGGCAAGCTCATTGTGGGGGTAAATTGGAATAAGCTTACAGGGGGAAGAGTTGACAAAAGATAGGAAGAACCTTAAAAATATAGATGCCTTTTATGCAGTGATAAAATGTCTAGATATTTATACTGTGGTGATTATTAGGAATATGTGCAAAGATTGGCTATTAGGATGTTCATTACAGTGTTGTTTAATAATTATAAAAGGACAGAAAGCAATGTGGACTCAAAAATAGGAAAAGAATTTAAATAAATCCTAGTGTACCCGTTATACATGAAATTATGGAAATATGACCCTGAGCATGGAAATATGTACATGAGAATGTCTAAAAGCTAGTTCATTTTGAAAAACAAAATAATGTCACCTCATATTATTTATAGTATATAAAGATGATTTTAAGAGTGGCAGTGTCTGGGATTATAGGTGATTGTATTTCTTCCCTTTTGCACATCTATGTTCTCTCATTTGTATTGTGTGGGGAGAAGTGACTTTTTTTATAAAAAGAAAAAGGTATATGCATCCCAGCAGAGAAGCACTGGCTCCACCCAGTACCTGCCTCCTCATGCCACCCTCTCAAGCCAAAAGCCGGGGGAAGCCCAGGCACCTTGACCATGACCGCCCGAGACTCACACTTCTTCTTTCTCATCAGGGAAGGAAAGCCCCCCTGTAAGCGATACTCCAGATGAGGGCGATGAGCCCATGCCGATCCCCGAGGACCTCTCCACCACCTCGGGAGGACAGCAAAGCTCCAAGAGTGACAGAGTCGTGG\",\r\n                50344378 - 1);\r\n\r\n            // the stuff from the genome represents a small part, most of it is brought in through the RNA-edit insertion\r\n            var codingRegion = new CodingRegion(50358658, 50367353, 169, 1602, 1434);\r\n\r\n            var regions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   1, 50344378, 50344383, 13,  17),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   1, 50344384, 50344518, 19,  154),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 1, 50344519, 50358643, 154, 155),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   2, 50358644, 50358697, 155, 208),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 2, 50358698, 50367233, 208, 209),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   3, 50367234, 50367353, 209, 328)\r\n            };\r\n\r\n            var rnaEdits = new IRnaEdit[]\r\n            {\r\n                new RnaEdit(1,  0,  \"GAATTCCGGCGT\"),\r\n                new RnaEdit(6,  5,  \"A\"),\r\n                new RnaEdit(16, 16, \"T\"),\r\n                new RnaEdit(97, 97, \"C\"),\r\n                new RnaEdit(316, 315,\r\n                    \"CCAGTAATGTTAAAGTAGAGACTCAGAGTGATGAAGAGAATGGGCGTGCCTGTGAAATGAATGGGGAAGAATGTGCGGAGGATTTACGAATGCTTGATGCCTCGGGAGAGAAAATGAATGGCTCCCACAGGGACCAAGGCAGCTCGGCTTTGTCGGGAGTTGGAGGCATTCGACTTCCTAACGGAAAACTAAAGTGTGATATCTGTGGGATCATTTGCATCGGGCCCAATGTGCTCATGGTTCACAAAAGAAGCCACACTGGAGAACGGCCCTTCCAGTGCAATCAGTGCGGGGCCTCATTCACCCAGAAGGGCAACCTGCTCCGGCACATCAAGCTGCATTCCGGGGAGAAGCCCTTCAAATGCCACCTCTGCAACTACGCCTGCCGCCGGAGGGACGCCCTCACTGGCCACCTGAGGACGCACTCCGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGTAAGAGCTCTATGCCTCAGAAATTTCTTGGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAAAGCCCTCCCGCGCCCCCACCCCAGACCCCGAGCCACCCCAGGAAAAGCACAAGGACTGCCGCCTTCTCGCTCCCGCCAGCAGCATAGACTGGACTGGACCAGACAATGTTGTGTTTGGATTTGTAACTGTTTTTTGTTTTTTGTTTGAGTTGGTTGATTGGGGTTTGATTTGCTTTTGAAAAGATTTTTATTTTTAGAGGCAGGGCTGCATTGGGAGCATCCAGAACTGCTACCTTCCTAGATGTTTCCCCAGACCGCTGGCTGAGATTCCCTCACCTGTCGCTTCCTAGAATCCCCTTCTCCAAACGATTAGTCTAAATTTTCAGAGAGAAATAGATAAAACACGCCACAGCCTGGGAAGGAGCGTGCTCTACCCTGTGCTAAGCACGGGGTTCGCGCACCAGGTGTCTTTTTCCAGTCCCCAGAAGCAGAGAGCACAGCCCCTGCTGTGTGGGTCTGCAGGTGAGCAGACAGGACAGGTGTGCCGCCACCCAAGTGCCAAGACACAGCAGGGCCAACAACCTGTGCCCAGGCCAGCTTCGAGCTACATGCATCTAGGGCGGAGAGGCTGCACTTGTGAGAGAAAATACTATTTCAAGTCATATTCTGCGTAGGAAAATGAATTGGTTGGGGAAAGTCGTGTCTGTCAGACTGCCCTGGGTGGAGGGAGACGCCGGGCTAGAGCCTTTGGGATCGTCCTGGATTCACTGGCTTTGCGGAGGCTGCTCAGATGGCCTGAGCCTCCCGAGGCTTGCTGCCCCGTAGGAGGAGACTGTCTTCCCGTGGGCATATCTGGGGAGCCCTGTTCCCCGCTTTTTCACTCCCATACCTTTAATGGCCCCCAAAATCTGTCACTACAATTTAAACACCAGTCCCGAAATTTGGATCTTCTTTCTTTTTGAATCTCTCAAACGGCAACATTCCTCAGAAACCAAAGCTTTATTTCAAATCTCTTCCTTCCCTGGCTGGTTCCATCTAGTACCAGAGGCCTCTTTTCCTGAAGAAATCCAATCCTAGCCCTCATTTTAATTATGTACATCTGTTTGTAGCCACAAGCCTGAATTTCTCAGTGTTGGTAAGTTTCTTTACCTACCCTCACTATATATTATTCTCGTTTTAAAACCCATAAAGGAGTGATTTAGAACAGTCATTAATTTTCAACTCAATGAAATATGTGAAGCCCAGCATCTCTGTTGCTAACACACAGAGCTCACCTGTTTGAAACCAAGCTTTCAAACATGTTGAAGCTCTTTACTGTAAAGGCAAGCCAGCATGTGTGTCCACACATACATAGGATGGCTGGCTCTGCACCTGTAGGATATTGGAATGCACAGGGCAATTGAGGGACTGAGCCAGACCTTCGGAGAGTAATGCCACCAGATCCCCTAGGAAAGAGGAGGCAAATGGCACTGCAGGTGAGAACCCCGCCCATCCGTGCTATGACATGGAGGCACTGAAGCCCGAGGAAGGTGTGTGGAGATTCTAATCCCAACAAGCAAGGGTCTCCTTCAAGATTAATGCTATCAATCATTAAGGTCATTACTCTCAACCACCTAGGCAATGAAGAATATACCATTTCAAATATTTACAGTACTTGTCTTCACCAACACTGTCCCAAGGTGAAATGAAGCAACAGAGAGGAAATTGTACATAAGTACCTCAGCATTTAATCCAAACAGGGGTTCTTAGTCTCAGCACTATGACATTTTGGGCTGACTACTTATTTGTTAGGCGGGAGCTCTCCTGTGCATTGTAGGATAATTAGCAGTATCCCTGGTGGCTACCCAATAGACGCCAGTAGCACCCCGAATTGACAACCCAAACTCTCCAGACATCACCAACTGTCCCCTGCGAGGAGAAATCACTCCTGGGGGAGAACCACTGACCCAAATGAATTCTAAACCAATCAAATGTCTGGGAAGCCCTCCAAGAAAAAAAATAGAAAAGCACTTGAAGAATATTCCCAATATTCCCGGTCAGCAGTATCAAGGCTGACTTGTGTTCATGTGGAGTCATTATAAATTCTATAAATCAATTATTCCCCTTCGGTCTTAAAAATATATTTCCTCATAAACATTTGAGTTTTGTTGAAAAGATGGAGTTTACAAAGATACCATTCTTGAGTCATGGATTTCTCTGCTCACAGAAGGGTGTGGCATTTGGAAACGGGAATAAACAAAATTGCTGCACCAATGCACTGAGTGAAGGAAGAGAGACAGAGGATCAAGGGCTTTAGACAGCACTCCTTCAATATGCAATCACAGAGAAAGATGCGCCTTATCCAAGTTAATATCTCTAAGGTGAGAGCCTTCTTAGAGTCAGTTTGTTGCAAATTTCACCTACTCTGTTCTTTTCCATCCATCCCCCTGAGTCAGTTGGTTGAAGGGAGTTATTTTTTCAAGTGGAATTCAAACAAAGCTCAAACCAGAACTGTAAATAGTGATTGCAGGAATTCTTTTCTAAACTGCTTTGCCCTTTCCTCTCACTGCCTTTTATAGCCAATATAAATGTCTCTTTGCACACCTTTTGTTGTGGTTTTATATTGTAACACCATTTTTCTTTGAAACTATTGTATTTAAAGTAAGGTTTCATATTATGTCAGCAAGTAATTAACTTATGTTTAAAAGGTGGCCATATCATGTACCAAAAGTTGCTGAAGTTTCTCTTCTAGCTGGTAAAGTAGGAGTTTGCATGACTTCACACTTTTTTTGCGTAGTTTCTTCTGTTGTATGATGGCGTGAGTGTGTGTCTTGGGTACCGCTGTGTACTACTGTGTGCCTAGATTCCATGCACTCTCGTTGTGTTTGAAGTAAATATTGGAGACCGGAGGGTAACAGGTTGGCCTGTTGATTACAGCTAGTAATCGCTGTGTCTTGTTCCGCCCCCTCCCTGACACCCCAGCTTCCCAGGATGTGGAAAGCCTGGATCTCAGCTCCTTGCCCCATATCCCTTCTGTAATTTGTACCTAAAGAGTGTGATTATCCTAATTCAAGAGTCACTAAAACTCATCACATTATCATTGCATATCAGCAAAGGGTAAAGTCCTAGCACCAATTGCTTCACATACCAGCATGTTCCATTTCCAATTTAGAATTAGCCACATAATAAAATCTTAGAATCTTCCTTGAGAAAGAGCTGCCTGAGATGTAGTTTTGTTATATGGTTCCCCACCGACCATTTTTGTGCTTTTTTCTTGTTTTGTTTTGTTTTGACTGCACTGTGAGTTTTGTAGTGTCCTCTTCTTGCCAAAACAAACGCGAGATGAACTGGACTTATGTAGACAAATCGTGATGCCAGTGTATCCTTCCTTTCTTCAGTTCCAGCAATAATGAATGGTCAACTTTTTTAAAATCTAGATCTCTCTCATTCATTTCAATGTATTTTTACTTTAAGATGAACCAAAATTATTAGACTTATTTAAGATGTACAGGCATCAGAAAAAAGAAGCACATAATGCTTTTGGTGCGATGGCACTCACTGTGAACATGTGTAACCACATATTAATATGCAATATTGTTTCCAATACTTTCTAATACAGTTTTTTATAATGTTGTGTGTGGTGATTGTTCAGGTCGAATCTGTTGTATCCAGTACAGCTTTAGGTCTTCAGCTGCCCTTCTGGCGAGTACATGCACAGGATTGTAAATGAGAAATGCAGTCATATTTCCAGTCTGCCTCTATGATGATGTTAAATTATTGCTGTTTAGCTGTGAACAAGGGATGTACCACTGGAGGAATAGAGTATCCTTTTGTACACATTTTGAAATGCTTCTTCTGTAGTGATAGAACAAATAAATGCAACGAATACTCTGTCTGCCCTATCCCGTGAAGTCCACACTGGCGTAAGAGAAGGCCCAGCAGAGCAGGAATCTGCCTAGACTTTCTCCCAATGAGATCCCAATATGAGAGGGAGAAGAGATGGGCCTCAGGACAGCTGCAATACCACTTGGGAACACATGTGGTGTCTTGATGTGGCCAGCGCAGCAGTTCAGCACAACGTACCTCCCATCTACAACAGTGCTGGACGTGGGAATTCTAAGTCCCAGTCTTGAGGGTGGGTGGAGATGGAGGGCAACAAGAGATACATTTCCAGTTCTCCACTGCAGCATGCTTCAGTCATTCTGTGAGTGGCCGGGCCCAGGGCCCTCACAATTTCACTACCTTGTCTTTTACATAGTCATAAGAATTATCCTCAACATAGCCTTTTGACGCTGTAAATCTTGAGTATTCATTTACCCTTTTCTGATCTCCTGGAAACAGCTGCCTGCCTGCATTGCACTTCTCTTCCCGAGGAGTGGGGTAAATTTAAAAGTCAAGTTATAGTTTGGATGTTAGTATAGAATTTTGAAATTGGGAATTAAAAATCAGGACTGGGGACTGGGAGACCAAAAATTTCTGATCCCATTTCTGATGGATGTGTCACACCTTTTCTGTCAAAATAAAATGTCTTGGAGGTTATGACTCCTTGGTGAAAAAAAAAAAAAAAAAA\")\r\n            };\r\n\r\n            string actualCds =\r\n                new CodingSequence(genomicSeq, codingRegion, regions, false, 0, rnaEdits).GetCodingSequence();\r\n\r\n            const string expectedCds =\r\n                \"ATGGATGCTGATGAGGGTCAAGACATGTCCCAAGTTTCAGGGAAGGAAAGCCCCCCTGTAAGCGATACTCCAGATGAGGGCGATGAGCCCATGCCGATCCCCGAGGACCTCTCCACCACCTCGGGAGGACAGCAAAGCTCCAAGAGTGACAGAGTCGTGGCCAGTAATGTTAAAGTAGAGACTCAGAGTGATGAAGAGAATGGGCGTGCCTGTGAAATGAATGGGGAAGAATGTGCGGAGGATTTACGAATGCTTGATGCCTCGGGAGAGAAAATGAATGGCTCCCACAGGGACCAAGGCAGCTCGGCTTTGTCGGGAGTTGGAGGCATTCGACTTCCTAACGGAAAACTAAAGTGTGATATCTGTGGGATCATTTGCATCGGGCCCAATGTGCTCATGGTTCACAAAAGAAGCCACACTGGAGAACGGCCCTTCCAGTGCAATCAGTGCGGGGCCTCATTCACCCAGAAGGGCAACCTGCTCCGGCACATCAAGCTGCATTCCGGGGAGAAGCCCTTCAAATGCCACCTCTGCAACTACGCCTGCCGCCGGAGGGACGCCCTCACTGGCCACCTGAGGACGCACTCCGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGTAAGAGCTCTATGCCTCAGAAATTTCTTGGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAA\";\r\n\r\n            Assert.Equal(expectedCds, actualCds);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetCodingSequence_RnaEditSnv_StartsUtr()\r\n        {\r\n            // NM_001135635.1, chr11:65684281-65686531 \r\n            var genomicSeq = new SimpleSequence(\r\n                \"TTTTAAAAAACACTCAAGACACAGACCCAAGCCGGGTTTTATTGAAATGCCAGGAGCAGGCACATGTCAAAGTAGCCAAGGAAGGGGGGACAGTGGTACAGGCTGTGTAAGTTGGCAGGGATGGGCAAGCCTCATGTCCATGGTCCTGGCATCCCCTCTGCCAGGGGATAAGTAGGCACAACTACCCTCCCCTCAAAATGGCATGCTCAGGCCAGTGGGGCCCCTACCCCTGGACCATGAAGGCTCCAAGAAGGGCTGGAAGCACTAAGTTTTCTCTCTCCTGAGGGGGAAGGAAAGAAGGGGAGATGCAGGAGGAAGGGGAGGTATAGCGGGGGATGAGCGTTCCAAGAAGTCTCTCCTTCTAGGTGTCTGCACCCAACTCATGGTGCTGGGCAGTGGAGAGGAGCAGCATTACAAAGGGAGGCTGAAGGCTCATCCCTCAGGGAACCGGAGCCCCCCAGCCTGTGGGGCTTGTGTCAGCCCTGAACAGAGGGCAGAAGTTCAAGGGGACTGAAGATGCAGGTAGTTCCCAAGTGACCTAGGAGTCCCCAGAGCTGGGGGGTGTGGCCTTCATAGGACAAGGAGGAAGACAGGAGGATCCAACCCCAGCATGGAGGGGGGAGTGGGCAGTCTCCCCAATTTGGCCCCCCTAGGTCAGTTCCACGTTGTTGGCACGGTCAAGCACTCGGGAGCCACGGGCACTACCCCCAAGCTGGAAACGGCTCTCATAGAGAGTGGGGCAGAGGTGCCAGCGATTGGCCCGGTAGATGCCCAGGTAGGTGTAGACATCAGGCTTGTAGGTGAGCAGGCACTTAATGCCCGCTGCACGGATGGCTGAATCCGCCTCCAGTACACCCAAGCGGTCCGTGAAGTCGTCCGTGTAAACACAGATGACCTGGCGCCCACCCTCCTTGGCACGTGGGCTCACCTTGGCCACCTGAAGCTGGCCTTCAACCACGGCCCGGGCAATGCCAGCCCAGGCGTGGTCCAGCTTGAAGCCCGGTGCCAGATGCATAAGCCACTTGCCCGAGAGCACGTGGTGGGTGATGGCGAGCTGGCGCAGGGTACCCGGTGTGATGGGCCGCCCACTGGTCTGCAGAGCTTCCCAGGCTGCCTGCAGGCCCTGCACGTCCCCGGAGTTGGGGCTGTAGCCCTGCCCATACACTGCAATCCAGCCCACAGGCTCTGAGTTGGGTGAACCGGGGTCCCCATAGCGGGTAACTTGGGATGGTGGGTACTTGGCCAGCCAGGCATCCAGCTCAGTGGCAGGCGTTGTGCGGGCATCAAACACTAGCCAGGGGTCCATGTCAGCTGCCATGGCCTCTGCAGCCAGGTGCTCGGCGGTGAAGCCATCCTCACGGCCACCTGGAGAGCCCTCCTCTTCCAGCTCCTCACCTGGTTCCATCCTGCTGTGAGGGAACCGAGTCAGGGCAGGGTCTGAGACAATAACTACAGATGCCAGGCACTGGATTAAACTGTGGCCTTGAGTAAGAGTTACTGTCGATGCGCCTCAGTTGCCTCATCTTTACAATGGGATAACAACTGTTCCTGTCCCGTAGATCTGCTATGAAAATTAGATGCCTGAGGAGTCAGCGCTCCAGAAGGGTTGCTGCAGTTATTACTATTCTCCTTGACTTACAGAAAAGGAAACTGAGGCTGAGAAAAAGGACTTGCCCAAGGTCACACCTGCAGTGCGTGGCAGGGCCGAAGGGTGAATCCAGGCGTGGGAGCAACCAGCCCCAGCTACACCTCCCGGCCCTGCCAAGGCCCCCTTTTCCTGGCAGGTATCCGGTGCGCTGGCATTTAATAGAGGAACGCAAAGAAGCGCACGTTCGCGCAGCTCCCGAGGCCGGCTCTGTAAGGCCAGGCCTCCCAGGCAGGCGTTATCGGGCCCACTTACAGACGAGGACGCTGAAGTCCAGAGAGGTTACAGGCCGTTCCGAGGCCAATGGGGCGGTTCCCAGACTCGAACCAGGGCTTGTTAGAGCCTGCAGGAGAGCCAGGCTCCGGCCGTGCCGCGCCCGCCGCCATTAACGCCCACGGGCCCGAGCTGTGCTCCCGCCCCGGCCCTGCCCTGCCCCTCCCGCCGCCCGCAGTCACCTCCGGCCTTCGCTGCGTTCGACGCCGGCCCAGCCCCGGGCCCGGCTCCGCTCCTGCCGTGGCTCCGCGCCACCGCCACCGCGCCCCACCCCCGCCACGGCCGCCGCCGCCGCCGCCGCCATCTTAGCGCCGCGCCACCTCAACAACAACT\",\r\n                65684281 - 1);\r\n\r\n            // the stuff from the genome represents a small part, most of it is brought in through the RNA-edit insertion\r\n            var codingRegion = new CodingRegion(65684930, 65686502, 30, 911, 882);\r\n\r\n            var regions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   1, 65684281, 65685689, 152, 1560),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 1, 65685690, 65686380, 151, 152),\r\n                new TranscriptRegion(TranscriptRegionType.Exon,   2, 65686381, 65686531, 1, 151)\r\n            };\r\n\r\n            var rnaEdits = new IRnaEdit[]\r\n            {\r\n                new RnaEdit(912,  912, \"A\"),\r\n                new RnaEdit(986,  986, \"C\"),\r\n                new RnaEdit(1561, 1560,  \"AAAAAAAAAAA\")\r\n            };\r\n\r\n            string actualCds =\r\n                new CodingSequence(genomicSeq, codingRegion, regions, true, 0, rnaEdits).GetCodingSequence();\r\n\r\n            const string expectedCds =\r\n                \"ATGGCGGCGGCGGCGGCGGCGGCCGTGGCGGGGGTGGGGCGCGGTGGCGGTGGCGCGGAGCCACGGCAGGAGCGGAGCCGGGCCCGGGGCTGGGCCGGCGTCGAACGCAGCGAAGGCCGGAGCAGGATGGAACCAGGTGAGGAGCTGGAAGAGGAGGGCTCTCCAGGTGGCCGTGAGGATGGCTTCACCGCCGAGCACCTGGCTGCAGAGGCCATGGCAGCTGACATGGACCCCTGGCTAGTGTTTGATGCCCGCACAACGCCTGCCACTGAGCTGGATGCCTGGCTGGCCAAGTACCCACCATCCCAAGTTACCCGCTATGGGGACCCCGGTTCACCCAACTCAGAGCCTGTGGGCTGGATTGCAGTGTATGGGCAGGGCTACAGCCCCAACTCCGGGGACGTGCAGGGCCTGCAGGCAGCCTGGGAAGCTCTGCAGACCAGTGGGCGGCCCATCACACCGGGTACCCTGCGCCAGCTCGCCATCACCCACCACGTGCTCTCGGGCAAGTGGCTTATGCATCTGGCACCGGGCTTCAAGCTGGACCACGCCTGGGCTGGCATTGCCCGGGCCGTGGTTGAAGGCCAGCTTCAGGTGGCCAAGGTGAGCCCACGTGCCAAGGAGGGTGGGCGCCAGGTCATCTGTGTTTACACGGACGACTTCACGGACCGCTTGGGTGTACTGGAGGCGGATTCAGCCATCCGTGCAGCGGGCATTAAGTGCCTGCTCACCTACAAGCCTGATGTCTACACCTACCTGGGCATCTACCGGGCCAATCGCTGGCACCTCTGCCCCACTCTCTATGAGAGCCGTTTCCAGCTTGGGGGTAGTGCCCGTGGCTCCCGAGTGCTTGACCGTGCCAACAACGTGGAACTGACCTAG\";\r\n\r\n            Assert.Equal(expectedCds, actualCds);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetCodingSequence_NonZeroStartExonPhase_CdsBeforeFirstExon()\r\n        {\r\n            // NM_001220775.1, chr7:\r\n            var genomicSeq = new SimpleSequence(\r\n                \"ACTTTAGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGTAAGAGCTCTATGCCTCAGAAATTTCTTGGTAAGAGTTAAATGTTTGCTGTCTCTTAAAAAAAAACTATGTGGGTGTTTTAGATGCAAGTAGAAATGAGTTGAGGGTGGAAGAAAGGGAAAAAAATCTTATTTTTTCAAAAGGAAAAATTGGTAAGCTTAACATTCCTTAAATATCTTAGAATTTTTTCCAATAAGTATCTTAAAAATAACAAACCTCCCATCAGTTTTTCCTAGATTTGATTTTGCAGCATCTGGGGCCTGCCCTGTGATCTGCCTGTGGACATCGCTCTTAGGGGCGGCTGCACCAGCGTGCACAGGGTGGAGAGTTTGGGCCTGGCTCGTCCGGGGGACACCACACTGCAGGACACTCCAGGCCTGGCCGGCTTCTCAGAGCTTCAGATCCTCATTTTTCATATGAAGCTCCTAATGCTCCCCTTATGGGGGACTCTGAAGGGTTAATGGGAGGAATCATACAGTGACTGACCCCTGAGAAGTGTCCAGTGAAGACAGGGCTTAGCTAGGATTGCTGTTTTGCCTAATGCTCTGCGGGATTAAAAAAAAAGAAGAAGAAGAACAAGACCATTCGTCTCTCTAGGAGCATTGCCCAGAGTAGGTATTAGACACACCAACACCACCATCCAGCCAGACGCTGCAGGGACAGTGAGCCAGGGTCCGAGTGGAAAGGCGCTAGGCTTGGGAACCAGCTCAGAGTCAATACAGAGCCACCGCCACTCACCAACTCTGTCAGCTTAGTAAAATGGCTCTGCCCCTAGAGCCCTGGTTCCATCCTTTAGTATCTCACAGGGTGATTGTGAATATCCCATGACTCCAAGATTGAGAAAACGTTTAGAATCCCTCGGTGTGAAGGTTAACTCTGTCCGGAAAGAGGACCAGTAAAAGCTTCATGAGGCTGAGATGCACTTTGGAAGAGGAATAGAGTTTCAGCACATTCTAGGTGTTGGAGGAATGGGGGAATCTAGGCAGATGTTTAAAATCAATGAGAAACCAGAATGCTGACCATGAGGGTTGGAGTGGGGGCCTAAGGACATGACGGAGGAGCAGGGTGTGTTCCCAGCTTAACTCAGGTACCCATGGGGAAGCAGGAAAAGTGAAGGTGTCCTAGGCAGCTCTGCCACAGGATGAATGGCTTCAGATGCCAGGTGAGCGAGGGACCCTTCATTCAGTCAGCAGGAAAGAAGCACTGGCATATTTTTTATGAGAACAAAGGCTAGGATAGTAAAGACAGCAAGTACCAAAAAATGACTGGAAAAGGGAGACTGTGGAGGCAGTGGCAGCAGGCATGGAAAGAAGGGCTTGTGAAGGGGAAGGGGTGGTGTCAGAGGAACATAGGGCTGGGGGCAGGGATTAGGTGAGGGAAACCATGAGTCACACTGATTCTAGAGTAGTGTGCCCTTGATGAAAAGGATAACACCAGGTTCTAGGAAAAGATGGGGTTCTGTTTTTGACGTGTTGATTTTCAAGGACTTCTGGTGTTTGTGACACATGGGGAAATTGTGGTGGGAGAGAGGTGGGGCCAGAACAGGGGCTGGTGAGGCCAAGGGTCCCAGAGGGCACCTGTTGACCTGCAGGATGACATGAAGGGGGAAGGACAGAGGCAAGGCCAAGTCCTGGGCACCAGCCTCCCTCTTGCAGCTTCAAATAGGGCTCCATTTTGACCTTTTGATTAATTAGAGGTTTGTCATAGGTTGGGGGTTGAGAGGAGCAAGGGAGAGAAGGATTCAGTGTACAAAAAGAATGAAAGCCACTGGCTGAGCCAGTGGGGAGTTGTCCACACACACATGAGCCTTTGGACCATGAGAACGAGGGAGGCCTTGCCTTCCTGAACGGAGTAGGAGTGAGGTCCTGTGCTGAGCGTAAGCAGTGGGATTCCCACAGCACTGGGCACAGAGCCCACGGGCTGCCTCCTGAGCAGCCAGCATCTGCCTGGGGTGGACACAGTGACAGAGAGATGGGTGGTGACTGGGGTATGGGCAGAGATAAGGCAGCAAGTGTGTGCAAGGGGAGTGAAGGGTTACTGACCTTAAGAAGCAGGGATGGCGTCCTCTGTCAGGTGAGGAGCCTGGAGAATGCTTTGGTGAATGAACGTTTGCAGCCCCTTTTAGCTTTTGGAGACTTGAAACCAAAGGAGAGATTCATCTGTGAAACTCTACTGGAGCCACTCCCCAACCCCCACCCTTGTGAGACCACAATGTGGGCGTTGGCTTGAGATGCTTCTGTGTTAGTAGAAGAAATAAACAACACAGTGCTCTGATGAGGCAAAGCGAAGATGAAAAAGGAGTTCCCAGGGGACATAGTAGGAACAGTGGACGAGGGTAGCAGAAGAGGAGTTTGGAGCAAAAGACTCACAAGCAGCTGCATAATCTGTTGGTGCTTGGCAGTTCATTTGTAAAAATGATGCCTCTTCCTGCCCTAAAATACCTACCTTACCCCCGCTTCAACTTGATGAGATTTCCATCAGTCACTCCCAATGTGTCACAGCTTCTGCAGCCCTAAAATTAAAAGGTGAGTGAGTCTCTGAGGCCCCTCTCCACTTCTCGGATGCTGAGTTTAGCCTTCATGTGAATGTGGAAAGACTAGGAATACAGCTGTTATCACACAAGCTGGCCCAATAGTGGTTCAGTTGAGAGAGCCCCATCCTTCAGAGTCAGCTCCAGCTAGGAGTGACTGGTGGCCTTGAGCATGGTGCTGGGCTTAGTGTTGCCATCTGTGGAATGGGTGTGGGTCTGTTGCCCTGCCTCCTCCCAGAGCTATTCTGAGGCTCAGAAGGGGTGATGGATGTGATGGTGCTCCCAACACTAGAAAGCATCTTAAGAATGTAAGATTTTCATGATGACTGTTGCTCAGAGTGGCTATTATAGTTTTGCTTTATTGTTCTATAACCTATGATTAAAATTTTTACCTTAAACTTTGACGTGAGTGTGAATAAGTATTTGTTTTGCCAGCAACATTCCTCACCACTGGGGCCATTAAAGATCTCCCCCTCTGAGACCATCAAATACAGGTCAACAGGACTGATTAATCTAATTAGAAAAGGGCTTGTATTAAATAGCAATGATAATTGTTGTTTTTAGTCTGTCTGGTGTTTGACTTGGGAACGTTTTTAAAATAGAGAAAAGCACAAAGAGGAAAACAACAATTACCAATATTCCTGCTACCCATTATAATTATCTAGGTATATTTTCTTCTTTTGTAAGAAAAAGAAACCCTGTTATATTGTTAAAATAACACAAAGTTAATATAAAGAATTTTAATGCAAAGATTAATGTTTTCAAATCACCACAAAACCCAACATCCAGAAATTACCAATATTAAAAGTAGAAAAGTATCATTCTAAATATTTTCTGTTGCATATGTATGTGAGTGGATAGGCTGATGAATTAGGTGGATTGATGGATAGGTAAATATGAAATAAATACTTTCATAAATATTCCAACTTATCATACATGCCTTAAATTCAAGAGGTGAAAAAAGACCCAAACAAAACTAGAGAAGCGGCTTATTTTAAATATCCTCTGACATAAAGGAATATTATATTTAAAGGATCCTCTAAGATTAAAAATATGTACTATGAAAAACATTAAGAAATTTGAATTTTTTTTAATCCATTTGTTTCAATTTAAGCAGCATCTACTGGCTCACTGCTTTGAAAAATAAGGACAGTATTCCAGTTCACATTCAGTGTTCCAGTGTTCACATTATCTTATTATTTTTACATTGTCCAGCTTTGTAATATTCACATTCTATTCTGTAATCATAATTCATAGTAGTTTAGTTATTTATTACTAACTCTATTTAAATAGATTCAAGGATCAGACCCTGCCCTTTTCTTCTTATTTATGTTTATTTTGATTAATCTCTTAATTGATTGGACTTTACATTCAAGCAACTTTTTTAAAAAAAAGTTTCTATAGATGTTCTATTTCTATCATTGTATTGTTTTTGAGGATGTTGGCCTGTTGCCTTTGTATTTGATGAGCATTTTGACAGAGTCTATGGTCTTGGGCCACTCTTTCTTTTTCTCCCTTGAGAACTTTTTAGATTTTGCTGATGGCATTGCTTGTTGAATGTTGCTGTGGAAACATCAAGTCTAGTGTAACTGTTTCTTCTTCAAGGTGATTTGCATTTTATTCCTGAATGCCTGAGGGTTCTTTATTTAACCTTGAAGTTAAATACCCTAATTAGGATGTATCTTGGTCTATTCATTCGGAATAAAAAATTCCTGCCATTTTGTCTAGAGAGTCCCTTTTTTTTCTCTTTATTTCTGGGAAATTCTCTTTTATATAAATATGTTTTGTTCCATCTATTGTGATCTCTGTTGAGGGATACCAGTTGTCCATATGTTAGATAATTTGTCTTCCATATCTGTTAACAGTTCTTAAAGTTTTTTGTTTATTTCTTTGTCTATTTTTACATTTACTCACTGTTCTCTTGTGGTTTTCCTCTGTCAGTAATTTAATTTTTAGTAGTTCCTGTTCTATTACTTGCTATTTTTAATCCATGCATTAATTTTATAATAATATTATTTTGCTCCTTATTTTGTCTCCTGAGACCCGAAATCTCTTTTTTCCTCTTACTCTGTTGCTTTTGCATTTTATTTTGAATACTTTTAAAATTGATTCCATGTTATGAAGCAATTATGAGGCATTTCCTCTCTTGTTGGAATTAACGATTTTTTCCCCTAGGAGGGACTCTATGGTCTGTGTTTTACTTCCTTTCTTCCCCTGTATTTCTAGAAAATATTTTCCTAGTAACCCTGACATTTCTTTTCATCTTGCTTATTCTAGTTGGTCTGATATAGCTTGATTGACATTTCAGCCTTCTTCCCACTATATTTTTTTTTCCTGTGAGAGCTATTGGGTTTTCTAAATCCTGAAAGAATGCCAAAGATGGGGTTGGAGGAGTTTGGTGAGGCAAAGTGCAGCCTTTGTTAAAATACTTTTCCTTTGCTCTCTCTCCCTCATCTGAAATTTAGTTAAATACCCTAAGCCATCAGCACTGTACCTAGTTGGGGAATGCTTTCATCCCCACAGGAGATTCTCTGGGGCTTTGGGCCATCTTCCCCTTCAGTGTAGACCACAGAGGACTTTGCTTCTGTCCCAGGGAGCCCGCAGGGGCTCACTTCTCCATGTTCATCTGATTCTTGTCAGCCAAGGTTTCAAATGCTTTTCTGATCAGAACAGGGAAAAGATACCTATCTGAATCATGTCTTTATAGATATGAGGCTATGAGGGAAAATTCTGAGGTTATTCTTGACTCACACCTAAAGATTTGGAAATGAGATTAGCAGCAAAGCTTTGCCCTACATCTCATGTCAGAATTTTCTGTTTCTTTCTAGTCTTTGAGTGTATGTGTGTTCTCACACACGCCATAATGAAATGCATATTATATATAATTATGTGTATATATAATATTCTATGACTATACATGACATGTTCCTTTAGCTGATTGCTGTTAAGAGAAATTTATAGGTTTTTATTTTTCTTGTTTTGTTGGGTATTAAGGAAGAGAAATTCTATGGTAATTTTCATGTGGCACAGTAATCTGGCATATATGTTGATTTTTTTCCTACACCCATTTGTTGTGATACCAAGTTTGAAAACAACAGATTTCAGTGGTTGCTTGGGAAACCACAGAACCATGACTTGGGGAGAGACAGGATGATTAGGTGGGAAAGCACCCTTTTGGTGGGGCTGTAAAGACTTTTATATTTAGCAAAATTGGCTACAAAGTCCATTCCCCTCCTTTTCTTGCCTTGATTTGGTAGAGGGATAGACTTGGATACAAACTAGAATGGATTCATTCTTCTCTGGAGTTAGTGTAACAAGACATTTAGCTGCTCAACACAAAAACAGAAACAAAAAAATTGTGTGGTTTCAGCAGTGCTATACAATTACTTTTTCTGACCTTTAATGGAGAGAAACACCACTTCTTTGGTCCCTACCATCAGCTTCATAGGGTTTTCATCCTGTTCTGTTTCTGGGAGGGCGTAACTGGCCATGCACAAGTTTTTTTTCTCTAATCAGAGTATGTGCCACTTCTGACCACCAGTAGATGAAAACGAATGGAAACCAGGCTATTATATGATACATATCCATTACAAAATAAGACATGAAACTCAAAGGTACTTTATGGTATAATGGGGCATATATTCCTGGACAATTCTTAATGGTCACAGATTTTATAAAAGGACTATTAGTAAATGTATGAATTACAGAGTAATTTATCCTTCTGTTAGTAAGAACCAGCTGATGACCTCAGTGTCAGGTGCATCGTGGAAGGTGTTGGGACCTTCCCTTGCCACCACCCTCACCAGCCATCATCAGCCATAACCTGCACATTGGGGAAGTTTTGACTTATCCCTCACTTTTGCCCCTCTTCAAGCTGTTCTTTCCACAGTGAATGAGAAGGCCACTTCTTCCTTCAAACCTTTCAGTGGTTTCCATTTTCCTTTAGACAAAGTCTCTGCCTAGCTGGCCTCTGCCTGCCCCTCCTGCCTACCTCTCGAGCACTGCCCCCACCTAGGGCTCTGGTTCCCCAACCTTCACTCGGTCCTGCCACACCTCCCAGCCCCTTCTCCCTTCAGAACTTTCCTTCTTGTTGTCCCCAACACTGGGACACAAAACCCTCCTTATCAACCCTCCTTATCTGGCTGACTCTTACAAGATCAGAAACCTGTGTAATGCTCTCATGGCACGCTCCCCTTGTCTTCGTGGATTTCTCAGATGGGAAGGAATTATCCATGCAATCACACATAAACTTCTACCTACCCTCCCCTAGTAGCTGTCTGCTGCTAAGGATGGGGACCATTCTCACTTACTCACTGTTCTGTCCCTCTGCCCAGTCCAGATGTGTTGAAGGATGGAAATATACAGAGTAGTGGTAAAATATAAACCGTTCAGACATTCCAAGGATGGGCTCATGTGCTTTGACTCATTAATGTACCACTGCTGAAAACAGAACACAGCCGCAGTCTTGCCAGTAAGAGTGCAGTTACTGTAATTAATGAATTTGCTAATTAAGCCATGATTTCATACTGAACTTATGACCAACATATTGAGAAGGTGTGTCTTCAAGAAAATTTATTTTTTGTATTAAGATATTTACTCCAAAGCTAATTGAAGAAGCCAAATCTAGGCTCTGGTTTCACCATTGCCAGGGAAATGAGCTCATGGACTCCTATGAACTGATGATGTTAGATCAGAAGTTTCTCAAGGCCAGGGCCCAATCACTGCTGAGGCGTCAACAGTAGTTCCTTGTACATCAATAATTCTCATTACTTTTAAAAAATAACAGATGAATAGCAACTATTTTCCCTGTAGCTCCCTTGCTGTGCCTCCTACCCTCCACCACATGTTTCTGGGGAGCCCTGCTTCGGGCCTGCCAACTACAGAGAATTACTTTTGAGTATCCCTTCCACTCTCATCTCAAGACAGAGTTCATCTACCTTTGGGTTATTTGTCAAAAATGTGTCATTTTATTACAAAAAATATACAATCATCATGTATTTTGATTAAATTTTACACTAGATTATTAAAATTATTAAATACAATTATTAAAATTAATAATTTAACATATCACATATTTTAAATATATTGTATATAATGAATAATAATATAATTATTGTCTATTTTAATTCAATAAATGTATAGTAAGTTAGCCAGTTGTAAATTACTGAGAACACTCTACTGAAAAAGCATCATTTCAAATACACTATTTAAAATATTAAATGAAATACAATAACATAATTAAACTAATCTTTGGTTCCCCTATTTATGTATTCATTTATCCAACAAAATCTCCTTAAGTGCTTATAATGGGTAGGTCCTGGCTCGGTGTCCCCTAGACAGACGCATGGGCCTTCCCCCAGCCCGTCAGTATGGTGCAGGTGTGATGTGTCCGCAGGTGTGTGTGTATGTGTGCAGGTGTGGGGTCCGCAGGCGTGCTGGGCCCCCAGGCCGTGTTCCCCTTCCCCTCCCCGGTTGTAGATTTCAGCTGTTGCTGCCAGACCTGACCGGTTCCGGAGGTGGCCGCGCCCCACTCACTGTCGCCTGCTTTCCACAGGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAAAGCCCTCCCGCGCCCCCACCCCAGACCCCGAGCCACCCCAGGAAAAGCACAAGGACTGCCGCCTTCTCGCTCCCGCCAGCAGCATAGACTGGACTGGACCAGACAATGTTGTGTTTGGATTTGTAACTGTTTTTTGTTTTTTGTTTGAGTTGGTTGATTGGGGTTTGATTTGCTTTTGAAAAGATTTTTATTTTTAGAGGCAGGGCTGCATTGGGAGCATCCAGAACTGCTACCTTCCTAGATGTTTCCCCAGACCGCTGGCTGAGATTCCCTCACCTGTCGCTTCCTAGAATCCCCTTCTCCAAACGATTAGTCTAAATTTTCAGAGAGAAATAGATAAAACACGCCACAGCCTGGGAAGGAGCGTGCTCTACCCTGTGCTAAGCACGGGGTTCGCGCACCAGGTGTCTTTTTCCAGTCCCCAGAAGCAGAGAGCACAGCCCCTGCTGTGTGGGTCTGCAGGTGAGCAGACAGGACAGGTGTGCCGCCACCCAAGTGCCAAGACACAGCAGGGCCAACAACCTGTGCCCAGGCCAGCTTCGAGCTACATGCATCTAGGGCGGAGAGGCTGCACTTGTGAGAGAAAATACTATTTCAAGTCATATTCTGCGTAGGAAAATGAATTGGTTGGGGAAAGTCGTGTCTGTCAGACTGCCCTGGGTGGAGGGAGACGCCGGGCTAGAGCCTTTGGGATCGTCCTGGATTCACTGGCTTTGCGGAGGCTGCTCAGATGGCCTGAGCCTCCCGAGGCTTGCTGCCCCGTAGGAGGAGACTGTCTTCCCGTGGGCATATCTGGGGAGCCCTGTTCCCCGCTTTTTCACTCCCATACCTTTAATGGCCCCCAAAATCTGTCACTACAATTTAAACACCAGTCCCGAAATTTGGATCTTCTTTCTTTTTGAATCTCTCAAACGGCAACATTCCTCAGAAACCAAAGCTTTATTTCAAATCTCTTCCTTCCCTGGCTGGTTCCATCTAGTACCAGAGGCCTCTTTTCCTGAAGAAATCCAATCCTAGCCCTCATTTTAATTATGTACATCTGTTTGTAGCCACAAGCCTGAATTTCTCAGTGTTGGTAAGTTTCTTTACCTACCCTCACTATATATTATTCTCGTTTTAAAACCCATAAAGGAGTGATTTAGAACAGTCATTAATTTTCAACTCAATGAAATATGTGAAGCCCAGCATCTCTGTTGCTAACACACAGAGCTCACCTGTTTGAAACCAAGCTTTCAAACATGTTGAAGCTCTTTACTGTAAAGGCAAGCCAGCATGTGTGTCCACACATACATAGGATGGCTGGCTCTGCACCTGTAGGATATTGGAATGCACAGGGCAATTGAGGGACTGAGCCAGACCTTCGGAGAGTAATGCCACCAGATCCCCTAGGAAAGAGGAGGCAAATGGCACTGCAGGTGAGAACCCCGCCCATCCGTGCTATGACATGGAGGCACTGAAGCCCGAGGAAGGTGTGTGGAGATTCTAATCCCAACAAGCAAGGGTCTCCTTCAAGATTAATGCTATCAATCATTAAGGTCATTACTCTCAACCACCTAGGCAATGAAGAATATACCATTTCAAATATTTACAGTACTTGTCTTCACCAACACTGTCCCAAGGTGAAATGAAGCAACAGAGAGGAAATTGTACATAAGTACCTCAGCATTTAATCCAAACAGGGGTTCTTAGTCTCAGCACTATGACATTTTGGGCTGACTACTTATTTGTTAGGCGGGAGCTCTCCTGTGCATTGTAGGATAATTAGCAGTATCCCTGGTGGCTACCCAATAGACGCCAGTAGCACCCCGAATTGACAACCCAAACTCTCCAGACATCACCAACTGTCCCCTGCGAGGAGAAATCACTCCTGGGGGAGAACCACTGACCCAAATGAATTCTAAACCAATCAAATGTCTGGGAAGCCCTCCAAGAAAAAAAATAGAAAAGCACTTGAAGAATATTCCCAATATTCCCGGTCAGCAGTATCAAGGCTGACTTGTGTTCATGTGGAGTCATTATAAATTCTATAAATCAATTATTCCCCTTCGGTCTTAAAAATATATTTCCTCATAAACATTTGAGTTTTGTTGAAAAGATGGAGTTTACAAAGATACCATTCTTGAGTCATGGATTTCTCTGCTCACAGAAGGGTGTGGCATTTGGAAACGGGAATAAACAAAATTGCTGCACCAATGCACTGAGTGAAGGAAGAGAGACAGAGGATCAAGGGCTTTAGACAGCACTCCTTCAATATGCAATCACAGAGAAAGATGCGCCTTATCCAAGTTAATATCTCTAAGGTGAGAGCCTTCTTAGAGTCAGTTTGTTGCAAATTTCACCTACTCTGTTCTTTTCCATCCATCCCCCTGAGTCAGTTGGTTGAAGGGAGTTATTTTTTCAAGTGGAATTCAAACAAAGCTCAAACCAGAACTGTAAATAGTGATTGCAGGAATTCTTTTCTAAACTGCTTTGCCCTTTCCTCTCACTGCCTTTTATAGCCAATATAAATGTCTCTTTGCACACCTTTTGTTGTGGTTTTATATTGTAACACCATTTTTCTTTGAAACTATTGTATTTAAAGTAAGGTTTCATATTATGTCAGCAAGTAATTAACTTATGTTTAAAAGGTGGCCATATCATGTACCAAAAGTTGCTGAAGTTTCTCTTCTAGCTGGTAAAGTAGGAGTTTGCATGACTTCACACTTTTTTTGCGTAGTTTCTTCTGTTGTATGATGGCGTGAGTGTGTGTCTTGGGTACCGCTGTGTACTACTGTGTGCCTAGATTCCATGCACTCTCGTTGTGTTTGAAGTAAATATTGGAGACCGGAGGGTAACAGGTTGGCCTGTTGATTACAGCTAGTAATCGCTGTGTCTTGTTCCGCCCCCTCCCTGACACCCCAGCTTCCCAGGATGTGGAAAGCCTGGATCTCAGCTCCTTGCCCCATATCCCTTCTGTAATTTGTACCTAAAGAGTGTGATTATCCTAATTCAAGAGTCACTAAAACTCATCACATTATCATTGCATATCAGCAAAGGGTAAAGTCCTAGCACCAATTGCTTCACATACCAGCATGTTCCATTTCCAATTTAGAATTAGCCACATAATAAAATCTTAGAATCTTCCTTGAGAAAGAGCTGCCTGAGATGTAGTTTTGTTATATGGTTCCCCACCGACCATTTTTGTGCTTTTTTCTTGTTTTGTTTTGTTTTGACTGCACTGTGAGTTTTGTAGTGTCCTCTTCTTGCCAAAACAAACGCGAGATGAACTGGACTTATGTAGACAAATCGTGATGCCAGTGTATCCTTCCTTTCTTCAGTTCCAGCAATAATGAATGGTCAACTTTTTTAAAATCTAGATCTCTCTCATTCATTTCAATGTATTTTTACTTTAAGATGAACCAAAATTATTAGACTTATTTAAGATGTACAGGCATCAGAAAAAAGAAGCACATAATGCTTTTGGTGCGATGGCACTCACTGTGAACATGTGTAACCACATATTAATATGCAATATTGTTTCCAATACTTTCTAATACAGTTTTTTATAATGTTGTGTGTGGTGATTGTTCAGGTCGAATCTGTTGTATCCAGTACAGCTTTAGGTCTTCAGCTGCCCTTCTGGCGAGTACATGCACAGGATTGTAAATGAGAAATGCAGTCATATTTCCAGTCTGCCTCTATGATGATGTTAAATTATTGCTGTTTAGCTGTGAACAAGGGATGTACCACTGGAGGAATAGAGTATCCTTTTGTACACATTTTGAAATGCTTCTTCTGTAGTGATAGAACAAATAAATGCAACGAATACTCTGTCTGCCCTATCCCGTGAAGTCCACACTGGCGTAAGAGAAGGCCCAGCAGAGCAGGAATCTGCCTAGACTTTCTCCCAATGAGATCCCAATATGAGAGGGAGAAGAGATGGGCCTCAGGACAGCTGCAATACCACTTGGGAACACATGTGGTGTCTTGATGTGGCCAGCGCAGCAGTTCAGCACAACGTACCTCCCATCTACAACAGTGCTGGACGTGGGAATTCTAAGTCCCAGTCTTGAGGGTGGGTGGAGATGGAGGGCAACAAGAGATACATTTCCAGTTCTCCACTGCAGCATGCTTCAGTCATTCTGTGAGTGGCCGGGCCCAGGGCCCTCACAATTTCACTACCTTGTCTTTTACATAGTCATAAGAATTATCCTCAACATAGCCTTTTGACGCTGTAAATCTTGAGTATTCATTTACCCTTTTCTGATCTCCTGGAAACAGCTGCCTGCCTGCATTGCACTTCTCTTCCCGAGGAGTGGGGTAAATTTAAAAGTCAAGTTATAGTTTGGATGTTAGTATAGAATTTTGAAATTGGGAATTAAAAATCAGGACTGGGGACTGGGAGACCAAAAATTTCTGATCCCATTTCTGATGGATGTGTCACACCTTTTCTGTCAAAATAAAATGTCTTGGAGGTTATGACTCCTTGGTGAAACC\",\r\n                50459420 - 1);\r\n\r\n            var codingRegion = new CodingRegion(50459422, 50468325, 169, 1053, 885);\r\n\r\n            var rnaEdits = new IRnaEdit[]\r\n            {\r\n                new RnaEdit(1, 0,\r\n                    \"GAATTCCGGCGTCGCGGACGCATCCCAGTCTGGGCGGGACGCTCGGCCGCGGCGAGGCGGGCAAGCCTGGCAGGGCAGAGGGAGCCCCGGCTCCGAGGTTGCTCTTCGCCCCCGAGGATCAGTCTTGGCCCCAAAGCGCGACGCACAAATCCACATAACCTGAGGACCATGGATGCTGATGAGGGTCAAGACATGTCCCAAG\"),\r\n                new RnaEdit(4,    3,    \"C\"),\r\n                new RnaEdit(5325, 5324, \"AAAAAAAAAAAAAAA\")\r\n            };\r\n\r\n            var regions = new ITranscriptRegion[]\r\n            {\r\n                // insertion\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 50459422, 50459424, 204, 206),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 50459425, 50459561, 208, 343),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 2, 50467616, 50472799, 344, 5527)\r\n            };\r\n            \r\n            string actualCdna =\r\n                new CdnaSequence(genomicSeq, codingRegion, regions, false, rnaEdits).GetCdnaSequence();\r\n            const string expectedCdna =\r\n                \"GAATTCCGGCGTCGCGGACGCATCCCAGTCTGGGCGGGACGCTCGGCCGCGGCGAGGCGGGCAAGCCTGGCAGGGCAGAGGGAGCCCCGGCTCCGAGGTTGCTCTTCGCCCCCGAGGATCAGTCTTGGCCCCAAAGCGCGACGCACAAATCCACATAACCTGAGGACCATGGATGCTGATGAGGGTCAAGACATGTCCCAAGTTTCAGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGTAAGAGCTCTATGCCTCAGAAATTTCTTGGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAAAGCCCTCCCGCGCCCCCACCCCAGACCCCGAGCCACCCCAGGAAAAGCACAAGGACTGCCGCCTTCTCGCTCCCGCCAGCAGCATAGACTGGACTGGACCAGACAATGTTGTGTTTGGATTTGTAACTGTTTTTTGTTTTTTGTTTGAGTTGGTTGATTGGGGTTTGATTTGCTTTTGAAAAGATTTTTATTTTTAGAGGCAGGGCTGCATTGGGAGCATCCAGAACTGCTACCTTCCTAGATGTTTCCCCAGACCGCTGGCTGAGATTCCCTCACCTGTCGCTTCCTAGAATCCCCTTCTCCAAACGATTAGTCTAAATTTTCAGAGAGAAATAGATAAAACACGCCACAGCCTGGGAAGGAGCGTGCTCTACCCTGTGCTAAGCACGGGGTTCGCGCACCAGGTGTCTTTTTCCAGTCCCCAGAAGCAGAGAGCACAGCCCCTGCTGTGTGGGTCTGCAGGTGAGCAGACAGGACAGGTGTGCCGCCACCCAAGTGCCAAGACACAGCAGGGCCAACAACCTGTGCCCAGGCCAGCTTCGAGCTACATGCATCTAGGGCGGAGAGGCTGCACTTGTGAGAGAAAATACTATTTCAAGTCATATTCTGCGTAGGAAAATGAATTGGTTGGGGAAAGTCGTGTCTGTCAGACTGCCCTGGGTGGAGGGAGACGCCGGGCTAGAGCCTTTGGGATCGTCCTGGATTCACTGGCTTTGCGGAGGCTGCTCAGATGGCCTGAGCCTCCCGAGGCTTGCTGCCCCGTAGGAGGAGACTGTCTTCCCGTGGGCATATCTGGGGAGCCCTGTTCCCCGCTTTTTCACTCCCATACCTTTAATGGCCCCCAAAATCTGTCACTACAATTTAAACACCAGTCCCGAAATTTGGATCTTCTTTCTTTTTGAATCTCTCAAACGGCAACATTCCTCAGAAACCAAAGCTTTATTTCAAATCTCTTCCTTCCCTGGCTGGTTCCATCTAGTACCAGAGGCCTCTTTTCCTGAAGAAATCCAATCCTAGCCCTCATTTTAATTATGTACATCTGTTTGTAGCCACAAGCCTGAATTTCTCAGTGTTGGTAAGTTTCTTTACCTACCCTCACTATATATTATTCTCGTTTTAAAACCCATAAAGGAGTGATTTAGAACAGTCATTAATTTTCAACTCAATGAAATATGTGAAGCCCAGCATCTCTGTTGCTAACACACAGAGCTCACCTGTTTGAAACCAAGCTTTCAAACATGTTGAAGCTCTTTACTGTAAAGGCAAGCCAGCATGTGTGTCCACACATACATAGGATGGCTGGCTCTGCACCTGTAGGATATTGGAATGCACAGGGCAATTGAGGGACTGAGCCAGACCTTCGGAGAGTAATGCCACCAGATCCCCTAGGAAAGAGGAGGCAAATGGCACTGCAGGTGAGAACCCCGCCCATCCGTGCTATGACATGGAGGCACTGAAGCCCGAGGAAGGTGTGTGGAGATTCTAATCCCAACAAGCAAGGGTCTCCTTCAAGATTAATGCTATCAATCATTAAGGTCATTACTCTCAACCACCTAGGCAATGAAGAATATACCATTTCAAATATTTACAGTACTTGTCTTCACCAACACTGTCCCAAGGTGAAATGAAGCAACAGAGAGGAAATTGTACATAAGTACCTCAGCATTTAATCCAAACAGGGGTTCTTAGTCTCAGCACTATGACATTTTGGGCTGACTACTTATTTGTTAGGCGGGAGCTCTCCTGTGCATTGTAGGATAATTAGCAGTATCCCTGGTGGCTACCCAATAGACGCCAGTAGCACCCCGAATTGACAACCCAAACTCTCCAGACATCACCAACTGTCCCCTGCGAGGAGAAATCACTCCTGGGGGAGAACCACTGACCCAAATGAATTCTAAACCAATCAAATGTCTGGGAAGCCCTCCAAGAAAAAAAATAGAAAAGCACTTGAAGAATATTCCCAATATTCCCGGTCAGCAGTATCAAGGCTGACTTGTGTTCATGTGGAGTCATTATAAATTCTATAAATCAATTATTCCCCTTCGGTCTTAAAAATATATTTCCTCATAAACATTTGAGTTTTGTTGAAAAGATGGAGTTTACAAAGATACCATTCTTGAGTCATGGATTTCTCTGCTCACAGAAGGGTGTGGCATTTGGAAACGGGAATAAACAAAATTGCTGCACCAATGCACTGAGTGAAGGAAGAGAGACAGAGGATCAAGGGCTTTAGACAGCACTCCTTCAATATGCAATCACAGAGAAAGATGCGCCTTATCCAAGTTAATATCTCTAAGGTGAGAGCCTTCTTAGAGTCAGTTTGTTGCAAATTTCACCTACTCTGTTCTTTTCCATCCATCCCCCTGAGTCAGTTGGTTGAAGGGAGTTATTTTTTCAAGTGGAATTCAAACAAAGCTCAAACCAGAACTGTAAATAGTGATTGCAGGAATTCTTTTCTAAACTGCTTTGCCCTTTCCTCTCACTGCCTTTTATAGCCAATATAAATGTCTCTTTGCACACCTTTTGTTGTGGTTTTATATTGTAACACCATTTTTCTTTGAAACTATTGTATTTAAAGTAAGGTTTCATATTATGTCAGCAAGTAATTAACTTATGTTTAAAAGGTGGCCATATCATGTACCAAAAGTTGCTGAAGTTTCTCTTCTAGCTGGTAAAGTAGGAGTTTGCATGACTTCACACTTTTTTTGCGTAGTTTCTTCTGTTGTATGATGGCGTGAGTGTGTGTCTTGGGTACCGCTGTGTACTACTGTGTGCCTAGATTCCATGCACTCTCGTTGTGTTTGAAGTAAATATTGGAGACCGGAGGGTAACAGGTTGGCCTGTTGATTACAGCTAGTAATCGCTGTGTCTTGTTCCGCCCCCTCCCTGACACCCCAGCTTCCCAGGATGTGGAAAGCCTGGATCTCAGCTCCTTGCCCCATATCCCTTCTGTAATTTGTACCTAAAGAGTGTGATTATCCTAATTCAAGAGTCACTAAAACTCATCACATTATCATTGCATATCAGCAAAGGGTAAAGTCCTAGCACCAATTGCTTCACATACCAGCATGTTCCATTTCCAATTTAGAATTAGCCACATAATAAAATCTTAGAATCTTCCTTGAGAAAGAGCTGCCTGAGATGTAGTTTTGTTATATGGTTCCCCACCGACCATTTTTGTGCTTTTTTCTTGTTTTGTTTTGTTTTGACTGCACTGTGAGTTTTGTAGTGTCCTCTTCTTGCCAAAACAAACGCGAGATGAACTGGACTTATGTAGACAAATCGTGATGCCAGTGTATCCTTCCTTTCTTCAGTTCCAGCAATAATGAATGGTCAACTTTTTTAAAATCTAGATCTCTCTCATTCATTTCAATGTATTTTTACTTTAAGATGAACCAAAATTATTAGACTTATTTAAGATGTACAGGCATCAGAAAAAAGAAGCACATAATGCTTTTGGTGCGATGGCACTCACTGTGAACATGTGTAACCACATATTAATATGCAATATTGTTTCCAATACTTTCTAATACAGTTTTTTATAATGTTGTGTGTGGTGATTGTTCAGGTCGAATCTGTTGTATCCAGTACAGCTTTAGGTCTTCAGCTGCCCTTCTGGCGAGTACATGCACAGGATTGTAAATGAGAAATGCAGTCATATTTCCAGTCTGCCTCTATGATGATGTTAAATTATTGCTGTTTAGCTGTGAACAAGGGATGTACCACTGGAGGAATAGAGTATCCTTTTGTACACATTTTGAAATGCTTCTTCTGTAGTGATAGAACAAATAAATGCAACGAATACTCTGTCTGCCCTATCCCGTGAAGTCCACACTGGCGTAAGAGAAGGCCCAGCAGAGCAGGAATCTGCCTAGACTTTCTCCCAATGAGATCCCAATATGAGAGGGAGAAGAGATGGGCCTCAGGACAGCTGCAATACCACTTGGGAACACATGTGGTGTCTTGATGTGGCCAGCGCAGCAGTTCAGCACAACGTACCTCCCATCTACAACAGTGCTGGACGTGGGAATTCTAAGTCCCAGTCTTGAGGGTGGGTGGAGATGGAGGGCAACAAGAGATACATTTCCAGTTCTCCACTGCAGCATGCTTCAGTCATTCTGTGAGTGGCCGGGCCCAGGGCCCTCACAATTTCACTACCTTGTCTTTTACATAGTCATAAGAATTATCCTCAACATAGCCTTTTGACGCTGTAAATCTTGAGTATTCATTTACCCTTTTCTGATCTCCTGGAAACAGCTGCCTGCCTGCATTGCACTTCTCTTCCCGAGGAGTGGGGTAAATTTAAAAGTCAAGTTATAGTTTGGATGTTAGTATAGAATTTTGAAATTGGGAATTAAAAATCAGGACTGGGGACTGGGAGACCAAAAATTTCTGATCCCATTTCTGATGGATGTGTCACACCTTTTCTGTCAAAATAAAATGTCTTGGAGGTTATGACTCCTTGGTGAAAAAAAAAAAAAAAAAA\";\r\n\r\n            Assert.Equal(expectedCdna, actualCdna);\r\n\r\n            // var cdsTemp = actualCdna.Substring(codingRegion.CdnaStart - 1, codingRegion.Length);\r\n            const string expectedCds =\r\n                \"ATGGATGCTGATGAGGGTCAAGACATGTCCCAAGTTTCAGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGTAAGAGCTCTATGCCTCAGAAATTTCTTGGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAA\";\r\n            \r\n            // Assert.Equal(expectedCds, cdsTemp);\r\n\r\n            string actualCds =\r\n                new CodingSequence(genomicSeq, codingRegion, regions, false, 0, rnaEdits).GetCodingSequence();\r\n            \r\n            Assert.Equal(expectedCds, actualCds);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/VariantAnnotation/AnnotatedPositions/Transcript/CodonsTests.cs",
    "content": "﻿using Genome;\r\nusing Moq;\r\nusing UnitTests.TestDataStructures;\r\nusing VariantAnnotation.AnnotatedPositions.Transcript;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.AnnotatedPositions.Transcript\r\n{\r\n    public sealed class CodonsTests\r\n    {\r\n        [Fact]\r\n        public void Assign_WhenIntervalsNull_ReturnNull()\r\n        {\r\n            var sequence = new SimpleSequence(\"AAA\");\r\n            var codons = Codons.GetCodons(\"G\", -1, -1, -1, -1, sequence);\r\n\r\n            Assert.Equal(\"\", codons.Reference);\r\n            Assert.Equal(\"\", codons.Alternate);\r\n        }\r\n\r\n        [Fact]\r\n        public void Assign_SNV_SuffixLenTooBig()\r\n        {\r\n            var sequence = new Mock<ISequence>();\r\n            sequence.SetupGet(x => x.Length).Returns(89);\r\n            sequence.Setup(x => x.Substring(87, 1)).Returns(\"t\");\r\n            sequence.Setup(x => x.Substring(88, 1)).Returns(\"C\");\r\n\r\n            var codons = Codons.GetCodons(\"T\", 89, 89, 30, 30, sequence.Object);\r\n\r\n            Assert.Equal(\"tC\", codons.Reference);\r\n            Assert.Equal(\"tT\", codons.Alternate);\r\n        }\r\n\r\n        [Fact]\r\n        public void Assign_SNV()\r\n        {\r\n            var sequence = new Mock<ISequence>();\r\n            sequence.SetupGet(x => x.Length).Returns(100);\r\n            sequence.Setup(x => x.Substring(21, 2)).Returns(\"CA\");\r\n            sequence.Setup(x => x.Substring(23, 1)).Returns(\"A\");\r\n\r\n            var codons = Codons.GetCodons(\"G\", 24, 24, 8, 8, sequence.Object);\r\n\r\n            Assert.Equal(\"caA\", codons.Reference);\r\n            Assert.Equal(\"caG\", codons.Alternate);\r\n        }\r\n\r\n        [Fact]\r\n        public void Assign_MNV()\r\n        {\r\n            var sequence = new Mock<ISequence>();\r\n            sequence.SetupGet(x => x.Length).Returns(100);\r\n            sequence.Setup(x => x.Substring(21, 2)).Returns(\"CA\");\r\n            sequence.Setup(x => x.Substring(28, 2)).Returns(\"GG\");\r\n            sequence.Setup(x => x.Substring(23, 5)).Returns(\"GTGCT\");\r\n\r\n            var codons = Codons.GetCodons(\"ACCGA\", 24, 28, 8, 10, sequence.Object);\r\n\r\n            Assert.Equal(\"caGTGCTgg\", codons.Reference);\r\n            Assert.Equal(\"caACCGAgg\", codons.Alternate);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetCodon_NullPrefixAndSuffix()\r\n        {\r\n            const string allele = \"GAA\";\r\n            var observedResult = Codons.GetCodon(allele, \"\", \"\");\r\n            Assert.Equal(allele, observedResult);\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(3, true)]\r\n        [InlineData(1, false)]\r\n        public void IsTriplet(int len, bool expectedResult)\r\n        {\r\n            var observedResult = Codons.IsTriplet(len);\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(-33, 4, -11, 2, \"ACGTca\")]\r\n        [InlineData(95, 101, 32, 34, \"gGCTGA\")]\r\n        public void GetCodons_OutOfRangeIndexes_Adjusted(int cdsStart, int cdsEnd, int proteinBegin, int proteinEnd, string expectedRefCodons)\r\n        {\r\n            var sequence = new Mock<ISequence>();\r\n            sequence.SetupGet(x => x.Length).Returns(99);\r\n            sequence.Setup(x => x.Substring(0, 0)).Returns(\"\");\r\n            sequence.Setup(x => x.Substring(0, 4)).Returns(\"ACGT\");\r\n            sequence.Setup(x => x.Substring(4, 2)).Returns(\"CA\");\r\n            sequence.Setup(x => x.Substring(94, 5)).Returns(\"GCTGA\");\r\n            sequence.Setup(x => x.Substring(93, 1)).Returns(\"G\");\r\n\r\n            var codons = Codons.GetCodons(\"\", cdsStart, cdsEnd, proteinBegin, proteinEnd, sequence.Object);\r\n\r\n            Assert.Equal(expectedRefCodons, codons.Reference);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/VariantAnnotation/AnnotatedPositions/Transcript/CompactIdTests.cs",
    "content": "﻿using System;\r\nusing System.IO;\r\nusing System.Text;\r\nusing IO;\r\nusing VariantAnnotation.AnnotatedPositions.Transcript;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.AnnotatedPositions.Transcript\r\n{\r\n    public sealed class CompactIdTests\r\n    {\r\n        [Fact]\r\n        public void Convert_ENSG()\r\n        {\r\n            var id = CompactId.Convert(\"ENSG00000223972\");\r\n            Assert.Equal(\"ENSG00000223972\", id.ToString());\r\n        }\r\n\r\n        [Fact]\r\n        public void Convert_ENST()\r\n        {\r\n            var id = CompactId.Convert(\"ENST00000456328\", 2);\r\n            Assert.Equal(\"ENST00000456328.2\", id.WithVersion);\r\n        }\r\n\r\n        [Fact]\r\n        public void Convert_ENSP()\r\n        {\r\n            var id = CompactId.Convert(\"ENSP00000334393\", 3);\r\n            Assert.Equal(\"ENSP00000334393.3\", id.WithVersion);\r\n        }\r\n\r\n        [Fact]\r\n        public void Convert_ENSESTG()\r\n        {\r\n            var id = CompactId.Convert(\"ENSESTG00000027277\");\r\n            Assert.Equal(\"ENSESTG00000027277\", id.WithVersion);\r\n        }\r\n\r\n        [Fact]\r\n        public void Convert_ENSESTP()\r\n        {\r\n            var id = CompactId.Convert(\"ENSESTP00000068714\", 1);\r\n            Assert.Equal(\"ENSESTP00000068714.1\", id.WithVersion);\r\n        }\r\n\r\n        [Fact]\r\n        public void Convert_ENSR()\r\n        {\r\n            var id = CompactId.Convert(\"ENSR00001576074\", 4);\r\n            Assert.Equal(\"ENSR00001576074.4\", id.WithVersion);\r\n        }\r\n\r\n        [Fact]\r\n        public void Convert_CCDS()\r\n        {\r\n            var id = CompactId.Convert(\"CCDS30555\", 1);\r\n            Assert.Equal(\"CCDS30555.1\", id.WithVersion);\r\n        }\r\n\r\n        [Fact]\r\n        public void Convert_NR()\r\n        {\r\n            var id = CompactId.Convert(\"NR_074509\", 1);\r\n            Assert.Equal(\"NR_074509.1\", id.WithVersion);\r\n        }\r\n\r\n        [Fact]\r\n        public void Convert_NM()\r\n        {\r\n            var id = CompactId.Convert(\"NM_001029885\", 1);\r\n            Assert.Equal(\"NM_001029885.1\", id.WithVersion);\r\n        }\r\n\r\n        [Fact]\r\n        public void Convert_NP()\r\n        {\r\n            var id = CompactId.Convert(\"NP_001025056\", 1);\r\n            Assert.Equal(\"NP_001025056.1\", id.WithVersion);\r\n        }\r\n\r\n        [Fact]\r\n        public void Convert_XR()\r\n        {\r\n            var id = CompactId.Convert(\"XR_246629\", 1);\r\n            Assert.Equal(\"XR_246629.1\", id.WithVersion);\r\n        }\r\n\r\n        [Fact]\r\n        public void Convert_XM()\r\n        {\r\n            var id = CompactId.Convert(\"XM_005244723\", 1);\r\n            Assert.Equal(\"XM_005244723.1\", id.WithVersion);\r\n        }\r\n\r\n        [Fact]\r\n        public void Convert_XP()\r\n        {\r\n            var id = CompactId.Convert(\"XP_005244780\", 1);\r\n            Assert.Equal(\"XP_005244780.1\", id.WithVersion);\r\n        }\r\n\r\n        [Fact]\r\n        public void Convert_NullInput_ReturnsEmptyId()\r\n        {\r\n            var id = CompactId.Convert(null);\r\n            Assert.True(id.IsEmpty());\r\n            Assert.Null(id.WithVersion);\r\n\r\n            id = CompactId.Convert(string.Empty);\r\n            Assert.True(id.IsEmpty());\r\n            Assert.Null(id.WithVersion);\r\n        }\r\n\r\n        [Fact]\r\n        public void Convert_Unknown()\r\n        {\r\n            var id = CompactId.Convert(\"ABC123\");\r\n            Assert.True(id.IsEmpty());\r\n            Assert.Null(id.WithVersion);\r\n        }\r\n\r\n        [Fact]\r\n        public void Convert_OnlyNumbers()\r\n        {\r\n            var id = CompactId.Convert(\"268435455\");\r\n            Assert.Equal(\"268435455\", id.WithoutVersion);\r\n        }\r\n\r\n        [Fact]\r\n        public void Convert_OnlyNumbers_ThrowException_NumberTooLarge()\r\n        {\r\n            Assert.Throws<ArgumentOutOfRangeException>(delegate\r\n            {\r\n                // ReSharper disable once UnusedVariable\r\n                var id = CompactId.Convert(\"268435456\");\r\n            });\r\n        }\r\n\r\n        [Fact]\r\n        public void CompactId_IO_EndToEnd()\r\n        {\r\n            const string expectedResults = \"ENSP00000334393.3\";\r\n            var id = CompactId.Convert(\"ENSP00000334393\", 3);\r\n\r\n            ICompactId observedId;\r\n\r\n            using (var ms = new MemoryStream())\r\n            {\r\n                using (var writer = new ExtendedBinaryWriter(ms, Encoding.UTF8, true))\r\n                {\r\n                    id.Write(writer);\r\n                }\r\n\r\n                ms.Position = 0;\r\n\r\n                using (var reader = new BufferedBinaryReader(ms))\r\n                {\r\n                    observedId = CompactId.Read(reader);\r\n                }\r\n            }\r\n\r\n            Assert.NotNull(observedId);\r\n            Assert.Equal(expectedResults, observedId.WithVersion);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/VariantAnnotation/AnnotatedPositions/Transcript/FeatureVariantEffectsTests.cs",
    "content": "﻿using Intervals;\r\nusing VariantAnnotation.AnnotatedPositions.Transcript;\r\nusing Variants;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.AnnotatedPositions.Transcript\r\n{\r\n    public sealed class FeatureVariantEffectsTests\r\n    {\r\n        [Theory]\r\n        [InlineData(VariantType.deletion,         OverlapType.CompletelyOverlaps, EndpointOverlapType.Both,  true)]\r\n        [InlineData(VariantType.copy_number_loss, OverlapType.CompletelyOverlaps, EndpointOverlapType.Both,  true)]\r\n        [InlineData(VariantType.deletion,         OverlapType.Partial,            EndpointOverlapType.Start, false)]\r\n        [InlineData(VariantType.copy_number_loss, OverlapType.Partial,            EndpointOverlapType.End,   false)]\r\n        [InlineData(VariantType.copy_number_loss, OverlapType.CompletelyWithin,   EndpointOverlapType.None,  false)]\r\n        public void Ablation(VariantType variantType, OverlapType overlapType, EndpointOverlapType endpointOverlapType, bool expectResult)\r\n        {\r\n            var  featureEffect  = new FeatureVariantEffects(overlapType, endpointOverlapType, false, variantType, true);\r\n            bool observedResult = featureEffect.Ablation();\r\n            Assert.Equal(expectResult, observedResult);\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(VariantType.copy_number_gain,   OverlapType.CompletelyOverlaps, EndpointOverlapType.Both,  true)]\r\n        [InlineData(VariantType.duplication,        OverlapType.CompletelyOverlaps, EndpointOverlapType.Both,  true)]\r\n        [InlineData(VariantType.tandem_duplication, OverlapType.CompletelyOverlaps, EndpointOverlapType.Both,  true)]\r\n        [InlineData(VariantType.copy_number_gain,   OverlapType.Partial,            EndpointOverlapType.Start, false)]\r\n        [InlineData(VariantType.duplication,        OverlapType.CompletelyWithin,   EndpointOverlapType.None,  false)]\r\n        [InlineData(VariantType.tandem_duplication, OverlapType.Partial,            EndpointOverlapType.End,   false)]\r\n        public void Amplification(VariantType variantType, OverlapType overlapType, EndpointOverlapType endpointOverlapType, bool expectedResult)\r\n        {\r\n            var  featureEffect  = new FeatureVariantEffects(overlapType, endpointOverlapType, false, variantType, true);\r\n            bool observedResult = featureEffect.Amplification();\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(VariantType.deletion,         OverlapType.Partial,            EndpointOverlapType.Start, true)]\r\n        [InlineData(VariantType.copy_number_loss, OverlapType.Partial,            EndpointOverlapType.End,   true)]\r\n        [InlineData(VariantType.copy_number_loss, OverlapType.CompletelyWithin,   EndpointOverlapType.None,  true)]\r\n        [InlineData(VariantType.deletion,         OverlapType.CompletelyOverlaps, EndpointOverlapType.Both,  false)]\r\n        [InlineData(VariantType.copy_number_loss, OverlapType.CompletelyOverlaps, EndpointOverlapType.Both,  false)]\r\n        public void Truncation(VariantType variantType, OverlapType overlapType, EndpointOverlapType endpointOverlapType, bool expectedResult)\r\n        {\r\n            var  featureEffect  = new FeatureVariantEffects(overlapType, endpointOverlapType, false, variantType, true);\r\n            bool observedResult = featureEffect.Truncation();\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(VariantType.copy_number_gain,   OverlapType.CompletelyWithin,   EndpointOverlapType.None,  true)]\r\n        [InlineData(VariantType.duplication,        OverlapType.CompletelyWithin,   EndpointOverlapType.None,  true)]\r\n        [InlineData(VariantType.tandem_duplication, OverlapType.CompletelyWithin,   EndpointOverlapType.None,  true)]\r\n        [InlineData(VariantType.insertion,          OverlapType.CompletelyWithin,   EndpointOverlapType.None,  true)]\r\n        [InlineData(VariantType.copy_number_gain,   OverlapType.CompletelyOverlaps, EndpointOverlapType.Both,  false)]\r\n        [InlineData(VariantType.duplication,        OverlapType.Partial,            EndpointOverlapType.Start, false)]\r\n        [InlineData(VariantType.tandem_duplication, OverlapType.Partial,            EndpointOverlapType.End,   false)]\r\n        public void Elongation(VariantType variantType, OverlapType overlapType, EndpointOverlapType endpointOverlapType, bool expectedResult)\r\n        {\r\n            var  featureEffect  = new FeatureVariantEffects(overlapType, endpointOverlapType, false, variantType, true);\r\n            bool observedResult = featureEffect.Elongation();\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(VariantType.copy_number_gain,   OverlapType.Partial,            EndpointOverlapType.End,   false, false)]\r\n        [InlineData(VariantType.copy_number_gain,   OverlapType.Partial,            EndpointOverlapType.End,   true,  true)]\r\n        [InlineData(VariantType.copy_number_gain,   OverlapType.CompletelyWithin,   EndpointOverlapType.None,  false, false)]\r\n        [InlineData(VariantType.copy_number_gain,   OverlapType.Partial,            EndpointOverlapType.Start, false, true)]\r\n        [InlineData(VariantType.copy_number_gain,   OverlapType.Partial,            EndpointOverlapType.Start, true,  false)]\r\n        [InlineData(VariantType.copy_number_gain,   OverlapType.CompletelyOverlaps, EndpointOverlapType.Both,  false, false)]\r\n        [InlineData(VariantType.duplication,        OverlapType.Partial,            EndpointOverlapType.End,   true,  true)]\r\n        [InlineData(VariantType.tandem_duplication, OverlapType.Partial,            EndpointOverlapType.End,   true,  true)]\r\n        [InlineData(VariantType.duplication,        OverlapType.Partial,            EndpointOverlapType.Start, true,  false)]\r\n        [InlineData(VariantType.tandem_duplication, OverlapType.Partial,            EndpointOverlapType.Start, true,  false)]\r\n        [InlineData(VariantType.duplication,        OverlapType.CompletelyWithin,   EndpointOverlapType.Start, false, true)]\r\n        [InlineData(VariantType.duplication,        OverlapType.CompletelyWithin,   EndpointOverlapType.End,   true,  true)]\r\n        public void FivePrimeDuplicatedTranscript(VariantType variantType,     OverlapType overlapType, EndpointOverlapType endpointOverlapType,\r\n                                                  bool        onReverseStrand, bool        expectedResult)\r\n        {\r\n            var  featureEffect  = new FeatureVariantEffects(overlapType, endpointOverlapType, onReverseStrand, variantType, true);\r\n            bool observedResult = featureEffect.FivePrimeDuplicatedTranscript();\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(VariantType.copy_number_gain,   OverlapType.Partial,            EndpointOverlapType.End,   false, true)]\r\n        [InlineData(VariantType.copy_number_gain,   OverlapType.Partial,            EndpointOverlapType.End,   true,  false)]\r\n        [InlineData(VariantType.copy_number_gain,   OverlapType.CompletelyWithin,   EndpointOverlapType.None,  false, false)]\r\n        [InlineData(VariantType.copy_number_gain,   OverlapType.Partial,            EndpointOverlapType.Start, false, false)]\r\n        [InlineData(VariantType.copy_number_gain,   OverlapType.Partial,            EndpointOverlapType.Start, true,  true)]\r\n        [InlineData(VariantType.copy_number_gain,   OverlapType.CompletelyOverlaps, EndpointOverlapType.Both,  false, false)]\r\n        [InlineData(VariantType.duplication,        OverlapType.Partial,            EndpointOverlapType.End,   true,  false)]\r\n        [InlineData(VariantType.tandem_duplication, OverlapType.Partial,            EndpointOverlapType.End,   true,  false)]\r\n        [InlineData(VariantType.duplication,        OverlapType.Partial,            EndpointOverlapType.Start, true,  true)]\r\n        [InlineData(VariantType.tandem_duplication, OverlapType.Partial,            EndpointOverlapType.Start, true,  true)]\r\n        [InlineData(VariantType.duplication,        OverlapType.CompletelyWithin,   EndpointOverlapType.End,   false, true)]\r\n        [InlineData(VariantType.duplication,        OverlapType.CompletelyWithin,   EndpointOverlapType.Start, true,  true)]\r\n        public void ThreePrimeDuplicatedTranscript(VariantType variantType,     OverlapType overlapType, EndpointOverlapType endpointOverlapType,\r\n                                                   bool        onReverseStrand, bool        expectedResult)\r\n        {\r\n            var  featureEffect  = new FeatureVariantEffects(overlapType, endpointOverlapType, onReverseStrand, variantType, true);\r\n            bool observedResult = featureEffect.ThreePrimeDuplicatedTranscript();\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/VariantAnnotation/AnnotatedPositions/Transcript/MappedPositionUtilitiesTests.cs",
    "content": "﻿using Intervals;\r\nusing VariantAnnotation.AnnotatedPositions.Transcript;\r\nusing VariantAnnotation.Caches.DataStructures;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.AnnotatedPositions.Transcript\r\n{\r\n    public sealed class MappedPositionUtilitiesTests\r\n    {\r\n        private readonly ITranscriptRegion[] _forwardTranscriptRegions;\r\n        private readonly ITranscriptRegion[] _reverseTranscriptRegions;\r\n\r\n        private const int ForwardVariantStart = 78024346;\r\n        private const int ForwardVariantEnd   = 78024345;\r\n\r\n        // Mother.vcf: chr2    313885  .       CTGATTTGCTATGAAA        C\r\n        private const int ReverseVariantStart = 313886;\r\n        private const int ReverseVariantEnd   = 313900;\r\n\r\n        // NM_033517.1, SHANK3\r\n        private readonly ITranscriptRegion[] _regionsNm33517 =\r\n        {\r\n            new TranscriptRegion(TranscriptRegionType.Exon, 1, 51113070, 51113132, 1, 63),\r\n            new TranscriptRegion(TranscriptRegionType.Intron, 1, 51113133, 51113475, 63, 64),\r\n            new TranscriptRegion(TranscriptRegionType.Exon, 2, 51113476, 51113679, 64, 267),\r\n            new TranscriptRegion(TranscriptRegionType.Intron, 2, 51113680, 51115049, 267, 268),\r\n            new TranscriptRegion(TranscriptRegionType.Exon, 3, 51115050, 51115121, 268, 339),\r\n            new TranscriptRegion(TranscriptRegionType.Intron, 3, 51115122, 51117012, 339, 340),\r\n            new TranscriptRegion(TranscriptRegionType.Exon, 4, 51117013, 51117121, 340, 448),\r\n            new TranscriptRegion(TranscriptRegionType.Intron, 4, 51117122, 51117196, 448, 449),\r\n            new TranscriptRegion(TranscriptRegionType.Exon, 5, 51117197, 51117348, 449, 600),\r\n            new TranscriptRegion(TranscriptRegionType.Intron, 5, 51117349, 51117446, 600, 601),\r\n            new TranscriptRegion(TranscriptRegionType.Exon, 6, 51117447, 51117614, 601, 768),\r\n            new TranscriptRegion(TranscriptRegionType.Intron, 6, 51117615, 51117739, 768, 769),\r\n            new TranscriptRegion(TranscriptRegionType.Exon, 7, 51117740, 51117856, 769, 885),\r\n            new TranscriptRegion(TranscriptRegionType.Intron, 7, 51117857, 51121767, 885, 886),\r\n            new TranscriptRegion(TranscriptRegionType.Exon, 8, 51121768, 51121845, 886, 963),\r\n            new TranscriptRegion(TranscriptRegionType.Intron, 8, 51121846, 51123012, 963, 964),\r\n            new TranscriptRegion(TranscriptRegionType.Exon, 9, 51123013, 51123079, 964, 1030),\r\n            new TranscriptRegion(TranscriptRegionType.Intron, 9, 51123080, 51133202, 1030, 1031),\r\n            new TranscriptRegion(TranscriptRegionType.Exon, 10, 51133203, 51133474, 1031, 1302),\r\n            new TranscriptRegion(TranscriptRegionType.Intron, 10, 51133475, 51135984, 1302, 1342),\r\n            new TranscriptRegion(TranscriptRegionType.Exon, 11, 51135985, 51135989, 1342, 1346),\r\n            new TranscriptRegion(TranscriptRegionType.Gap, 11, 51135990, 51135991, 1346, 1347),\r\n            new TranscriptRegion(TranscriptRegionType.Exon, 11, 51135992, 51136143, 1347, 1498),\r\n            new TranscriptRegion(TranscriptRegionType.Intron, 11, 51136144, 51137117, 1498, 1499),\r\n            new TranscriptRegion(TranscriptRegionType.Exon, 12, 51137118, 51137231, 1499, 1612),\r\n            new TranscriptRegion(TranscriptRegionType.Intron, 12, 51137232, 51142287, 1612, 1613),\r\n            new TranscriptRegion(TranscriptRegionType.Exon, 13, 51142288, 51142363, 1613, 1688),\r\n            new TranscriptRegion(TranscriptRegionType.Intron, 13, 51142364, 51142593, 1688, 1689),\r\n            new TranscriptRegion(TranscriptRegionType.Exon, 14, 51142594, 51142676, 1689, 1771),\r\n            new TranscriptRegion(TranscriptRegionType.Intron, 14, 51142677, 51143165, 1771, 1772),\r\n            new TranscriptRegion(TranscriptRegionType.Exon, 15, 51143166, 51143290, 1772, 1896),\r\n            new TranscriptRegion(TranscriptRegionType.Intron, 15, 51143291, 51143391, 1896, 1897),\r\n            new TranscriptRegion(TranscriptRegionType.Exon, 16, 51143392, 51143524, 1897, 2029),\r\n            new TranscriptRegion(TranscriptRegionType.Intron, 16, 51143525, 51144499, 2029, 2030),\r\n            new TranscriptRegion(TranscriptRegionType.Exon, 17, 51144500, 51144580, 2030, 2110),\r\n            new TranscriptRegion(TranscriptRegionType.Intron, 17, 51144581, 51150042, 2110, 2111),\r\n            new TranscriptRegion(TranscriptRegionType.Exon, 18, 51150043, 51150066, 2111, 2134),\r\n            new TranscriptRegion(TranscriptRegionType.Intron, 18, 51150067, 51153344, 2134, 2135),\r\n            new TranscriptRegion(TranscriptRegionType.Exon, 19, 51153345, 51153475, 2135, 2265),\r\n            new TranscriptRegion(TranscriptRegionType.Intron, 19, 51153476, 51154096, 2265, 2266),\r\n            new TranscriptRegion(TranscriptRegionType.Exon, 20, 51154097, 51154181, 2266, 2350),\r\n            new TranscriptRegion(TranscriptRegionType.Intron, 20, 51154182, 51158611, 2350, 2351),\r\n            new TranscriptRegion(TranscriptRegionType.Exon, 21, 51158612, 51160865, 2351, 4604),\r\n            new TranscriptRegion(TranscriptRegionType.Intron, 21, 51160866, 51169148, 4604, 4605),\r\n            new TranscriptRegion(TranscriptRegionType.Exon, 22, 51169149, 51171640, 4605, 7096)\r\n        };\r\n\r\n        // NM_000682.6\r\n        private readonly ITranscriptRegion[] _regionsNm682 =\r\n        {\r\n            new TranscriptRegion(TranscriptRegionType.Exon, 1, 96778623, 96780986, 1008, 3371),\r\n            new TranscriptRegion(TranscriptRegionType.Exon, 1, 96780987, 96781984, 1, 998)\r\n        };\r\n\r\n        // NM_001317107.1\r\n        private readonly ITranscriptRegion[] _regionsNm1317107 =\r\n        {\r\n            new TranscriptRegion(TranscriptRegionType.Exon, 1, 22138125, 22138561, 670, 1106),\r\n            new TranscriptRegion(TranscriptRegionType.Gap, 1, 22138562, 22138563, 669, 670),\r\n            new TranscriptRegion(TranscriptRegionType.Exon, 1, 22138564, 22139232, 1, 669)\r\n        };\r\n\r\n        public MappedPositionUtilitiesTests()\r\n        {\r\n            _forwardTranscriptRegions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 1,   77997792, 77998025, 1, 234),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 1, 77998026, 78001531, 234, 235),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 2,   78001532, 78001723, 235, 426),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 2, 78001724, 78024286, 426, 427),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 3,   78024287, 78024416, 427, 556)\r\n            };\r\n\r\n            // ENST00000591244\r\n            _reverseTranscriptRegions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 5,   309218, 309407, 622, 811),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 4, 309408, 310214, 621, 622),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 4,   310215, 310499, 337, 621),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 3, 310500, 312956, 336, 337),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 3,   312957, 313157, 136, 336),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 2, 313158, 313873, 135, 136),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 2,   313874, 313892, 117, 135),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 1, 313893, 314242, 116, 117),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 1,   314243, 314358, 1, 116)\r\n            };\r\n        }\r\n\r\n        [Fact]\r\n        public void FindRegion_Forward_Insertion()\r\n        {\r\n            var observedStart = MappedPositionUtilities.FindRegion(_forwardTranscriptRegions, ForwardVariantStart);\r\n            var observedEnd   = MappedPositionUtilities.FindRegion(_forwardTranscriptRegions, ForwardVariantEnd);\r\n\r\n            Assert.Equal(4, observedStart.Index);\r\n            Assert.Equal(4, observedEnd.Index);\r\n            Assert.NotNull(observedStart.Region);\r\n            Assert.NotNull(observedEnd.Region);\r\n        }\r\n\r\n        [Fact]\r\n        public void FindRegion_Reverse_Deletion()\r\n        {\r\n            var observedStart = MappedPositionUtilities.FindRegion(_reverseTranscriptRegions, ReverseVariantStart);\r\n            var observedEnd   = MappedPositionUtilities.FindRegion(_reverseTranscriptRegions, ReverseVariantEnd);\r\n\r\n            Assert.Equal(6, observedStart.Index);\r\n            Assert.Equal(7, observedEnd.Index);\r\n            Assert.NotNull(observedStart.Region);\r\n            Assert.NotNull(observedEnd.Region);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetCdnaPositions_Forward_Insertion()\r\n        {\r\n            var variant  = new Interval(ForwardVariantStart, ForwardVariantEnd);\r\n            var observed = MappedPositionUtilities.GetCdnaPositions(_forwardTranscriptRegions[4],\r\n                _forwardTranscriptRegions[4], variant, false, true);\r\n\r\n            Assert.Equal(486, observed.CdnaStart);\r\n            Assert.Equal(485, observed.CdnaEnd);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetCdnaPositions_Reverse_Deletion()\r\n        {\r\n            var variant  = new Interval(ReverseVariantStart, ReverseVariantEnd);\r\n            var observed = MappedPositionUtilities.GetCdnaPositions(_reverseTranscriptRegions[6], _reverseTranscriptRegions[7], variant, true, false);\r\n\r\n            Assert.Equal(123, observed.CdnaStart);\r\n            Assert.Equal(-1, observed.CdnaEnd);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetCdnaPosition_Snv_AfterOutFrameRnaEditDeletion()\r\n        {\r\n            // NM_001317107.1\r\n            var variant = new Interval(22138550, 22138550);\r\n            var observed = MappedPositionUtilities.GetCdnaPositions(_regionsNm1317107[0], _regionsNm1317107[0], variant, true, false);\r\n\r\n            Assert.Equal(681, observed.CdnaStart);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetCdnaPosition_Snv_AfterInframeRnaEditInsertion()\r\n        {\r\n            // NM_000682.6\r\n            var variant = new Interval(96780984, 96780984);\r\n            var observed = MappedPositionUtilities.GetCdnaPositions(_regionsNm682[0], _regionsNm682[0], variant, true, false);\r\n\r\n            Assert.Equal(1010, observed.CdnaStart);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetCdnaPosition_Snv_AfterOutframeRnaEditInsertion()\r\n        {\r\n            // NM_033517.1\r\n            var variant = new Interval(51135986, 51135986);\r\n            var observed = MappedPositionUtilities.GetCdnaPositions(_regionsNm33517[20], _regionsNm33517[20], variant, false, false);\r\n\r\n            Assert.Equal(1343, observed.CdnaStart);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetCoveredCdnaPositions_Forward_StartBefore_EndExon()\r\n        {\r\n            var observedResults = _forwardTranscriptRegions.GetCoveredCdnaPositions(-1, -1, 300, 2, false);\r\n            Assert.Equal(1, observedResults.Start);\r\n            Assert.Equal(300, observedResults.End);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetCoveredCdnaPositions_Forward_StartIntron_EndExon()\r\n        {\r\n            var observedResults = _forwardTranscriptRegions.GetCoveredCdnaPositions(-1, 1, 300, 2, false);\r\n            Assert.Equal(235, observedResults.Start);\r\n            Assert.Equal(300, observedResults.End);\r\n        }\r\n        \r\n        [Fact]\r\n        public void GetCoveredCdnaPositions_Forward_StartExon_EndIntron()\r\n        {\r\n            var observedResults = _forwardTranscriptRegions.GetCoveredCdnaPositions(250, 2, -1, 3, false);\r\n            Assert.Equal(250, observedResults.Start);\r\n            Assert.Equal(426, observedResults.End);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetCoveredCdnaPositions_Forward_StartExon_EndAfter()\r\n        {\r\n            var observedResults = _forwardTranscriptRegions.GetCoveredCdnaPositions(-1, ~_forwardTranscriptRegions.Length, 300, 2, false);\r\n            Assert.Equal(300, observedResults.Start);\r\n            Assert.Equal(556, observedResults.End);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetCoveredCdnaPositions_Forward_StartBefore_EndAfter()\r\n        {\r\n            var observedResults = _forwardTranscriptRegions.GetCoveredCdnaPositions(-1, -1, -1, ~_forwardTranscriptRegions.Length, false);\r\n            Assert.Equal(1, observedResults.Start);\r\n            Assert.Equal(556, observedResults.End);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetCoveredCdnaPositions_Forward_Insertion_StartAfter_EndExon()\r\n        {\r\n            var regions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 10, 19, 10, 19),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 1, 20, 29, 19, 20),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 2, 30, 39, 20, 29),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 2, 40, 49, 29, 30),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 3, 50, 59, 30, 39),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 3, 60, 69, 39, 40),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 4, 70, 79, 40, 49),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 4, 80, 89, 49, 50),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 5, 90, 4834618, 50, 1676),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 5, 4834619, 4842604, 1676, 1677),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 6, 4842605, 4852594, 1677, 11666)\r\n            };\r\n\r\n            var observedResults = regions.GetCoveredCdnaPositions(-1, -12, 11666, 1, false);\r\n            Assert.Equal(11666, observedResults.Start);\r\n            Assert.Equal(11666, observedResults.End);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetCoveredCdnaPositions_Reverse_StartBefore_EndExon()\r\n        {\r\n            var regions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 4, 103288513, 103288696, 522, 705)\r\n            };\r\n\r\n            // ClinVar ENST00000546844 103288512\r\n            var observedResults = regions.GetCoveredCdnaPositions(523, -1, -1, 0, true);\r\n            Assert.Equal(523, observedResults.Start);\r\n            Assert.Equal(705, observedResults.End);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetCoveredCdnaPositions_Reverse_StartIntron_EndExon()\r\n        {\r\n            var regions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 3, 103271329, 103288512, 825, 826),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 3, 103288513, 103288696, 642, 825)\r\n            };\r\n\r\n            // ClinVar ENST00000553106 103288512\r\n            var observedResults = regions.GetCoveredCdnaPositions(643, 0, -1, 1, true);\r\n            Assert.Equal(643, observedResults.Start);\r\n            Assert.Equal(825, observedResults.End);\r\n        }\r\n        \r\n        [Fact]\r\n        public void GetCoveredCdnaPositions_Reverse_StartExon_EndIntron()\r\n        {\r\n            var regions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 3, 103288513, 103288696, 642, 825),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 2, 103288697, 103288999, 641, 642)\r\n            };\r\n            \r\n            var observedResults = regions.GetCoveredCdnaPositions(-1, 0, 666, 1, true);\r\n            Assert.Equal(642, observedResults.Start);\r\n            Assert.Equal(666, observedResults.End);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetCoveredCdnaPositions_Reverse_StartExon_EndAfter()\r\n        {\r\n            var regions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 103288513, 103288696, 1, 825)\r\n            };\r\n\r\n            // synthetic\r\n            var observedResults = regions.GetCoveredCdnaPositions(-1, ~1, -1, -1, true);\r\n            Assert.Equal(1, observedResults.Start);\r\n            Assert.Equal(825, observedResults.End);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetCoveredCdnaPositions_Reverse_StartBefore_EndAfter()\r\n        {\r\n            var observedResults = _reverseTranscriptRegions.GetCoveredCdnaPositions(-1, -1, -1, ~_reverseTranscriptRegions.Length, true);\r\n            Assert.Equal(1, observedResults.Start);\r\n            Assert.Equal(811, observedResults.End);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetCdsPosition_Forward_Insertion()\r\n        {\r\n            var codingRegion = new CodingRegion(78001559, 78024355, 262, 495, 234);\r\n            const byte startExonPhase = 0;\r\n\r\n            var (cdsStart, cdsEnd) = MappedPositionUtilities.GetCdsPositions(codingRegion, 486, 485, startExonPhase, true);\r\n\r\n            Assert.Equal(225, cdsStart);\r\n            Assert.Equal(224, cdsEnd);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetCdsPosition_Snv_AfterOutFrameRnaEditDeletion()\r\n        {\r\n            // NM_001317107.1\r\n            var codingRegion = new CodingRegion(22138201, 22139150, 83, 1030, 948);\r\n            const byte startExonPhase = 0;\r\n            (int cdsStart, _) = MappedPositionUtilities.GetCdsPositions(codingRegion, 681, 681, startExonPhase, false);\r\n\r\n            Assert.Equal(599, cdsStart);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetCdsPosition_Snv_AfterInframeRnaEditInsertion()\r\n        {\r\n            // NM_000682.6\r\n            var codingRegion = new CodingRegion(96780545, 96781888, 97, 1449, 1344);\r\n            const byte startExonPhase = 0;\r\n            (int cdsStart, _) = MappedPositionUtilities.GetCdsPositions(codingRegion, 1010, 1010, startExonPhase, false);\r\n\r\n            Assert.Equal(914, cdsStart);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetCdsPosition_Snv_AfterOutframeRnaEditInsertion()\r\n        {\r\n            // NM_033517.1\r\n            var codingRegion = new CodingRegion(51113070, 51169740, 1, 5196, 5157);\r\n            const byte startExonPhase = 0;\r\n            (int cdsStart, _) = MappedPositionUtilities.GetCdsPositions(codingRegion, 1343, 1343, startExonPhase, false);\r\n\r\n            Assert.Equal(1343, cdsStart);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetCdsPosition_Forward_Insertion_WithStartExonPhase()\r\n        {\r\n            var codingRegion = new CodingRegion(6413107, 6415837, 1, 953, 953);\r\n            const byte startExonPhase = 1;\r\n\r\n            var (cdsStart, cdsEnd) = MappedPositionUtilities.GetCdsPositions(codingRegion, 29, 28, startExonPhase, true);\r\n\r\n            Assert.Equal(30, cdsStart);\r\n            Assert.Equal(29, cdsEnd);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetCdsPosition_Reverse_NoCodingRegion_Deletion()\r\n        {\r\n            const byte startExonPhase = 0;\r\n\r\n            var (cdsStart, cdsEnd) = MappedPositionUtilities.GetCdsPositions(null, -1, 123, startExonPhase, false);\r\n\r\n            Assert.Equal(-1, cdsStart);\r\n            Assert.Equal(-1, cdsEnd);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetCdsPosition_SilenceOutput_InsertionAfterCodingRegion_Forward()\r\n        {\r\n            // variant: [6647337, 6647336] insertion after coding region\r\n            var codingRegion = new CodingRegion(6643999, 6647336, 667, 1674, 1008);\r\n            const byte startExonPhase = 0;\r\n\r\n            var (cdsStart, cdsEnd) = MappedPositionUtilities.GetCdsPositions(codingRegion, 1675, 1674, startExonPhase, true);\r\n\r\n            Assert.Equal(-1, cdsStart);\r\n            Assert.Equal(-1, cdsEnd);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetCdsPosition_SilenceOutput_InsertionAfterCodingRegion_Reverse()\r\n        {\r\n            // variant: [103629803, 103629804] insertion after coding region\r\n            var codingRegion = new CodingRegion(103113259, 103629803, 161, 10543, 10383);\r\n            const byte startExonPhase = 0;\r\n\r\n            var (cdsStart, cdsEnd) = MappedPositionUtilities.GetCdsPositions(codingRegion, 161, 160, startExonPhase, true);\r\n\r\n            Assert.Equal(-1, cdsStart);\r\n            Assert.Equal(-1, cdsEnd);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetCdsPosition_SilenceOutput_InsertionBeforeCodingRegion_Reverse()\r\n        {\r\n            // variant: [37480320, 37480319] insertion after coding region\r\n            var codingRegion = new CodingRegion(37480320, 37543667, 556, 3228, 2673);\r\n            const byte startExonPhase = 0;\r\n\r\n            var (cdsStart, cdsEnd) = MappedPositionUtilities.GetCdsPositions(codingRegion, 3229, 3228, startExonPhase, true);\r\n\r\n            Assert.Equal(-1, cdsStart);\r\n            Assert.Equal(-1, cdsEnd);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetCdsPosition_DoNotSilenceOutput_Reverse()\r\n        {\r\n            // variant: [179315139, 179315692]\r\n            var codingRegion = new CodingRegion(179308070, 179315170, 617, 942, 326);\r\n            const byte startExonPhase = 0;\r\n\r\n            var (cdsStart, cdsEnd) = MappedPositionUtilities.GetCdsPositions(codingRegion, 95, 648, startExonPhase, false);\r\n\r\n            Assert.Equal(-1, cdsStart);\r\n            Assert.Equal(32, cdsEnd);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetProteinPosition_Forward_Insertion()\r\n        {\r\n            int proteinStart = MappedPositionUtilities.GetProteinPosition(225);\r\n            int proteinEnd   = MappedPositionUtilities.GetProteinPosition(224);\r\n            Assert.Equal(75, proteinStart);\r\n            Assert.Equal(75, proteinEnd);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetProteinPosition_Reverse_Deletion()\r\n        {\r\n            int proteinStart = MappedPositionUtilities.GetProteinPosition(-1);\r\n            int proteinEnd   = MappedPositionUtilities.GetProteinPosition(-1);\r\n            Assert.Equal(-1, proteinStart);\r\n            Assert.Equal(-1, proteinEnd);\r\n        }\r\n\r\n        private static ITranscriptRegion GetExon() => new TranscriptRegion(TranscriptRegionType.Exon, 0, 10001, 10199, 1, 199);\r\n        private static ITranscriptRegion GetIntron() => new TranscriptRegion(TranscriptRegionType.Intron, 0, 10200, 10299, 199, 200);\r\n\r\n        [Fact]\r\n        public void FoundExonEndpointInsertion_NotInsertion_ReturnFalse()\r\n        {\r\n            Assert.False(MappedPositionUtilities.FoundExonEndpointInsertion(false, -1, 100, GetExon(), GetIntron()));\r\n        }\r\n\r\n        [Fact]\r\n        public void FoundExonEndpointInsertion_BothExons_ReturnFalse()\r\n        {\r\n            Assert.False(MappedPositionUtilities.FoundExonEndpointInsertion(true, -1, 100, GetExon(), GetExon()));\r\n        }\r\n\r\n        [Fact]\r\n        public void FoundExonEndpointInsertion_BothIntrons_ReturnFalse()\r\n        {\r\n            Assert.False(MappedPositionUtilities.FoundExonEndpointInsertion(true, -1, 100, GetIntron(), GetIntron()));\r\n        }\r\n\r\n        [Fact]\r\n        public void FoundExonEndpointInsertion_BothDefinedCdnaPositions_ReturnFalse()\r\n        {\r\n            Assert.False(MappedPositionUtilities.FoundExonEndpointInsertion(true, 100, 110, GetExon(), GetIntron()));\r\n        }\r\n\r\n        [Fact]\r\n        public void FoundExonEndpointInsertion_BothUndefinedCdnaPositions_ReturnFalse()\r\n        {\r\n            Assert.False(MappedPositionUtilities.FoundExonEndpointInsertion(true, -1, -1, GetExon(), GetIntron()));\r\n        }\r\n\r\n        [Fact]\r\n        public void FoundExonEndpointInsertion_UndefinedRegion_ReturnFalse()\r\n        {\r\n            Assert.False(MappedPositionUtilities.FoundExonEndpointInsertion(true, -1, -1, null, GetIntron()));\r\n        }\r\n\r\n        [Fact]\r\n        public void FoundExonEndpointInsertion_OneIntron_OneExon_OneUndefinedPosition_ReturnTrue()\r\n        {\r\n            Assert.True(MappedPositionUtilities.FoundExonEndpointInsertion(true, 108, -1, GetExon(), GetIntron()));\r\n        }\r\n\r\n        [Fact]\r\n        public void FixExonEndpointInsertion_VariantEnd_ExonEnd_Reverse()\r\n        {\r\n            var startRegion = new TranscriptRegion(TranscriptRegionType.Intron, 7, 243736351, 243776972, 762, 763);\r\n            var endRegion   = new TranscriptRegion(TranscriptRegionType.Exon, 8, 243736228, 243736350, 763, 885);\r\n\r\n            var result = MappedPositionUtilities.FixExonEndpointInsertion(-1, 763, true, startRegion, endRegion,\r\n                new Interval(243736351, 243736350));\r\n\r\n            Assert.Equal(762, result.CdnaStart);\r\n            Assert.Equal(763, result.CdnaEnd);\r\n        }\r\n\r\n        [Fact]\r\n        public void FixExonEndpointInsertion_VariantStart_ExonStart_Reverse()\r\n        {\r\n            // N.B. this configuration has never been spotted in the wild\r\n            var startRegion = new TranscriptRegion(TranscriptRegionType.Exon, 2, 2000, 2199, 1, 200);\r\n            var endRegion   = new TranscriptRegion(TranscriptRegionType.Intron, 2, 1999, 1000, 200, 201);\r\n\r\n            var result = MappedPositionUtilities.FixExonEndpointInsertion(200, -1, true, startRegion, endRegion,\r\n                new Interval(2000, 1999));\r\n\r\n            Assert.Equal(200, result.CdnaStart);\r\n            Assert.Equal(201, result.CdnaEnd);\r\n        }\r\n\r\n        [Fact]\r\n        public void FixExonEndpointInsertion_VariantEnd_ExonEnd_Forward()\r\n        {\r\n            var startRegion = new TranscriptRegion(TranscriptRegionType.Intron, 16, 89521770, 89528546, 3071, 3072);\r\n            var endRegion   = new TranscriptRegion(TranscriptRegionType.Exon,   16, 89521614, 89521769, 2916, 3071);\r\n\r\n            var result = MappedPositionUtilities.FixExonEndpointInsertion(-1, 3071, false, startRegion, endRegion,\r\n                new Interval(89521770, 89521769));\r\n\r\n            Assert.Equal(3072, result.CdnaStart);\r\n            Assert.Equal(3071, result.CdnaEnd);\r\n        }\r\n\r\n        [Fact]\r\n        public void FixExonEndpointInsertion_VariantStart_ExonStart_Forward()\r\n        {\r\n            var startRegion = new TranscriptRegion(TranscriptRegionType.Exon,   2, 99459243, 99459360, 108, 225);\r\n            var endRegion   = new TranscriptRegion(TranscriptRegionType.Intron, 1, 99456512, 99459242, 107, 108);\r\n\r\n            var result = MappedPositionUtilities.FixExonEndpointInsertion(108, -1, false, startRegion, endRegion,\r\n                new Interval(99459243, 99459242));\r\n\r\n            Assert.Equal(108, result.CdnaStart);\r\n            Assert.Equal(107, result.CdnaEnd);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/VariantAnnotation/AnnotatedPositions/Transcript/StringExtensionsTests.cs",
    "content": "﻿using VariantAnnotation.AnnotatedPositions.Transcript;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.AnnotatedPositions.Transcript\r\n{\r\n    public sealed class StringExtensionsTests\r\n    {\r\n        [Theory]\r\n        [InlineData(null,null,0)]\r\n        [InlineData(\"abc\",null,0)]\r\n        [InlineData(\"abc\", \"abgg\", 2)]\r\n        [InlineData(\"abcfdg\", \"abgg\", 2)]\r\n        public void CommonPrefixLength(string a, string b, int expResult)\r\n        {\r\n            Assert.Equal(expResult,a.CommonPrefixLength(b));\r\n        }\r\n\r\n\r\n        [Theory]\r\n        [InlineData(null, null, 0)]\r\n        [InlineData(\"abc\", null, 0)]\r\n        [InlineData(\"abc\", \"abgg\", 0)]\r\n        [InlineData(\"abcfdg\", \"abgg\", 1)]\r\n        public void CommonSuffixLength(string a, string b, int expResult)\r\n        {\r\n            Assert.Equal(expResult, a.CommonSuffixLength(b));\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/VariantAnnotation/AnnotatedPositions/Transcript/TranscriptPositionalEffectTests.cs",
    "content": "﻿using Intervals;\r\nusing Moq;\r\nusing UnitTests.TestUtilities;\r\nusing VariantAnnotation.AnnotatedPositions.Transcript;\r\nusing VariantAnnotation.Caches.DataStructures;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing Variants;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.AnnotatedPositions.Transcript\r\n{\r\n    public sealed class TranscriptPositionalEffectTests\r\n    {\r\n        private readonly Mock<ITranscript> _forwardTranscript; // use info from \"ENST00000455979.1\" with modification\r\n        private readonly Mock<ITranscript> _reverseTranscript; // use info from \"ENST00000385042\"\r\n        private readonly ITranscriptRegion[] _otherTranscriptRegions;\r\n\r\n        public TranscriptPositionalEffectTests()\r\n        {\r\n            const int start = 874655;\r\n            const int end   = 879639;\r\n\r\n            _otherTranscriptRegions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 200, 300, 1, 186),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 1, 301, 400, 186, 187),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 2, 401, 699, 187, 349),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 2, 700, 709, 359, 360),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 3, 710, 800, 350, 465)\r\n            };\r\n\r\n            var forwardTranscriptRegions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 874655, 874840, 1, 186),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 1, 874841, 876523, 186, 187),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 2, 876524, 876686, 187, 349),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 2, 876687, 877515, 349, 350),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 3, 877516, 877631, 350, 465),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 3, 877632, 877789, 465, 466),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 4, 877790, 877868, 466, 544),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 4, 877869, 877938, 544, 545),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 5, 877939, 878438, 545, 1044),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 5, 878439, 878632, 1044, 1045),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 6, 878633, 878757, 1045, 1169),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 6, 878758, 879077, 1169, 1170),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 7, 879078, 879639, 1170, 1731)\r\n            };\r\n\r\n            var reverseTranscriptRegions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 3477259, 3477354, 1, 96)\r\n            };\r\n\r\n            var translation = new Mock<ITranslation>();\r\n            translation.SetupGet(x => x.CodingRegion).Returns(new CodingRegion(874655, 879533, 1, 1625, 1625));\r\n\r\n            var gene = new Mock<IGene>();\r\n            gene.SetupGet(x => x.OnReverseStrand).Returns(false);\r\n\r\n            _forwardTranscript = new Mock<ITranscript>();\r\n            _forwardTranscript.SetupGet(x => x.Chromosome).Returns(ChromosomeUtilities.Chr1);\r\n            _forwardTranscript.SetupGet(x => x.Start).Returns(start);\r\n            _forwardTranscript.SetupGet(x => x.End).Returns(end);\r\n            _forwardTranscript.SetupGet(x => x.Gene).Returns(gene.Object);\r\n            _forwardTranscript.SetupGet(x => x.TranscriptRegions).Returns(forwardTranscriptRegions);\r\n            _forwardTranscript.SetupGet(x => x.Translation).Returns(translation.Object);\r\n            _forwardTranscript.SetupGet(x => x.TotalExonLength).Returns(1731);\r\n\r\n            _reverseTranscript = new Mock<ITranscript>();\r\n            _reverseTranscript.SetupGet(x => x.Chromosome).Returns(ChromosomeUtilities.Chr1);\r\n            _reverseTranscript.SetupGet(x => x.Start).Returns(3477259);\r\n            _reverseTranscript.SetupGet(x => x.Start).Returns(3477354);\r\n            _reverseTranscript.SetupGet(x => x.Gene.OnReverseStrand).Returns(true);\r\n            _reverseTranscript.SetupGet(x => x.Translation).Returns((ITranslation)null);\r\n            _reverseTranscript.SetupGet(x => x.BioType).Returns(BioType.miRNA);\r\n            _reverseTranscript.SetupGet(x => x.TranscriptRegions).Returns(reverseTranscriptRegions);\r\n            _reverseTranscript.SetupGet(x => x.MicroRnas).Returns(new IInterval[] { new Interval(61, 81) });\r\n        }\r\n\r\n        [Fact]\r\n        public void DetermineIntronicEffect_NullIntrons()\r\n        {\r\n            var positionalEffect = new TranscriptPositionalEffect();\r\n            positionalEffect.DetermineIntronicEffect(null, new Interval(400, 400), VariantType.SNV);\r\n\r\n            Assert.False(positionalEffect.IsEndSpliceSite);\r\n            Assert.False(positionalEffect.IsStartSpliceSite);\r\n            Assert.False(positionalEffect.IsWithinFrameshiftIntron);\r\n            Assert.False(positionalEffect.IsWithinIntron);\r\n            Assert.False(positionalEffect.IsWithinSpliceSiteRegion);\r\n            Assert.False(positionalEffect.HasExonOverlap);\r\n            Assert.False(positionalEffect.AfterCoding);\r\n            Assert.False(positionalEffect.BeforeCoding);\r\n            Assert.False(positionalEffect.WithinCdna);\r\n            Assert.False(positionalEffect.WithinCds);\r\n            Assert.False(positionalEffect.HasFrameShift);\r\n            Assert.False(positionalEffect.IsCoding);\r\n        }\r\n\r\n        [Fact]\r\n        public void DetermineIntronicEffect_NotWithinFrameshiftIntron()\r\n        {\r\n            var transcriptRegions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 201342300, 201342340, 1, 186),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 1, 201342340, 201342343, 186, 187),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 2, 201342344, 201342400, 187, 349)\r\n            };\r\n\r\n            IInterval variant    = new Interval(201342344, 201342344);\r\n            var positionalEffect = new TranscriptPositionalEffect();\r\n            positionalEffect.DetermineIntronicEffect(transcriptRegions, variant, VariantType.SNV);\r\n\r\n            Assert.True(positionalEffect.IsWithinSpliceSiteRegion);\r\n        }\r\n\r\n        [Fact]\r\n        public void DetermineIntronicEffect_IsEndSpliceSite()\r\n        {\r\n            var positionalEffect = new TranscriptPositionalEffect();\r\n            positionalEffect.DetermineIntronicEffect(_otherTranscriptRegions, new Interval(400, 400), VariantType.SNV);\r\n            Assert.True(positionalEffect.IsEndSpliceSite);\r\n        }\r\n\r\n        [Fact]\r\n        public void DetermineIntronicEffect_IsStartSpliceSite()\r\n        {\r\n            var positionalEffect = new TranscriptPositionalEffect();\r\n            positionalEffect.DetermineIntronicEffect(_otherTranscriptRegions, new Interval(300, 303), VariantType.deletion);\r\n            Assert.True(positionalEffect.IsStartSpliceSite);\r\n        }\r\n\r\n        [Fact]\r\n        public void DetermineIntronicEffect_IsWithinFrameshiftIntron_NotInSpliceSite()\r\n        {\r\n            var positionalEffect = new TranscriptPositionalEffect();\r\n            positionalEffect.DetermineIntronicEffect(_otherTranscriptRegions, new Interval(701, 709), VariantType.deletion);\r\n            Assert.True(positionalEffect.IsWithinFrameshiftIntron);\r\n            Assert.False(positionalEffect.IsStartSpliceSite);\r\n            Assert.False(positionalEffect.IsEndSpliceSite);\r\n        }\r\n\r\n        [Fact]\r\n        public void DetermineIntronicEffect_IsWithinIntron()\r\n        {\r\n            IInterval variant    = new Interval(300, 302);\r\n            var positionalEffect = new TranscriptPositionalEffect();\r\n            positionalEffect.DetermineIntronicEffect(_otherTranscriptRegions, variant, VariantType.deletion);\r\n            Assert.False(positionalEffect.IsWithinIntron);\r\n\r\n            IInterval variant2    = new Interval(303, 303);\r\n            var positionalEffect2 = new TranscriptPositionalEffect();\r\n            positionalEffect2.DetermineIntronicEffect(_otherTranscriptRegions, variant2, VariantType.deletion);\r\n            Assert.True(positionalEffect2.IsWithinIntron);\r\n        }\r\n\r\n        [Fact]\r\n        public void DetermineIntronicEffect_IsWithinSpliceSiteRegion()\r\n        {\r\n            var positionalEffect = new TranscriptPositionalEffect();\r\n            IInterval variant    = new Interval(298, 302);\r\n\r\n            positionalEffect.DetermineIntronicEffect(_otherTranscriptRegions, variant, VariantType.deletion);\r\n            Assert.True(positionalEffect.IsWithinSpliceSiteRegion);\r\n        }\r\n\r\n        [Fact]\r\n        public void DetermineExonicEffect_HasExonOverlap()\r\n        {\r\n            IInterval variant = new Interval(876686, 876686);\r\n            var position      = new MappedPosition(349, 349, 349, 349, 117, 117, 2, 2, -1, -1, 2, 2);\r\n\r\n            var positionalEffect = new TranscriptPositionalEffect();\r\n            positionalEffect.DetermineExonicEffect(_forwardTranscript.Object, variant, position, 349, 349, 349, 349, \"G\", false);\r\n\r\n            Assert.True(positionalEffect.HasExonOverlap);\r\n        }\r\n\r\n        [Fact]\r\n        public void DetermineExonicEffect_AfterCoding()\r\n        {\r\n            IInterval variant = new Interval(879600, 879600);\r\n            var position      = new MappedPosition(1692, 1692, -1, -1, -1, -1, 7, 7, -1, -1, 12, 12);\r\n\r\n            var positionalEffect = new TranscriptPositionalEffect();\r\n            positionalEffect.DetermineExonicEffect(_forwardTranscript.Object, variant, position, 1692, 1692, -1, -1, \"G\", false);\r\n            Assert.True(positionalEffect.AfterCoding);\r\n        }\r\n\r\n        [Fact]\r\n        public void DetermineExonicEffect_WithinCdna()\r\n        {\r\n            IInterval variant = new Interval(879600, 879600);\r\n            var position      = new MappedPosition(1692, 1692, -1, -1, -1, -1, 7, 7, -1, -1, 12, 12);\r\n\r\n            var positionalEffect = new TranscriptPositionalEffect();\r\n            positionalEffect.DetermineExonicEffect(_forwardTranscript.Object, variant, position, 1692, 1692, -1, -1, \"G\", false);\r\n            Assert.True(positionalEffect.WithinCdna);\r\n        }\r\n\r\n        [Fact]\r\n        public void DetermineExonicEffect_WithinCds()\r\n        {\r\n            IInterval variant = new Interval(876543, 876543);\r\n            var position      = new MappedPosition(206, 206, 206, 206, 69, 69, 2, 2, -1, -1, 2, 2);\r\n\r\n            var positionalEffect = new TranscriptPositionalEffect();\r\n            positionalEffect.DetermineExonicEffect(_forwardTranscript.Object, variant, position, 206, 206, 206, 206, \"G\", false);\r\n            Assert.True(positionalEffect.WithinCdna);\r\n        }\r\n\r\n        [Fact]\r\n        public void DetermineExonicEffect_OverlapWithMicroRna()\r\n        {\r\n            IInterval variant = new Interval(3477284, 3477284);\r\n            var position      = new MappedPosition(71, 71, -1, -1, -1, -1, 1, 1, -1, -1, 0, 0);\r\n\r\n            var positionalEffect = new TranscriptPositionalEffect();\r\n            positionalEffect.DetermineExonicEffect(_reverseTranscript.Object, variant, position, 71, 71, -1, -1, \"G\", false);\r\n            Assert.True(positionalEffect.OverlapWithMicroRna);\r\n        }\r\n\r\n        [Fact]\r\n        public void ExonOverlaps_NoOverlap()\r\n        {\r\n            var transcriptRegions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 100, 200, 300, 400)\r\n            };\r\n\r\n            IInterval variant  = new Interval(201, 500);\r\n            var observedResult = transcriptRegions[0].Overlaps(variant);\r\n\r\n            Assert.False(observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void IsMatureMirnaVariant_NullMirnas()\r\n        {\r\n            var observedResult = TranscriptPositionalEffect.IsMatureMirnaVariant(-1, -1, null, true);\r\n            Assert.False(observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void IsWithinCds_ReturnFalse()\r\n        {\r\n            var positionalEffect = new TranscriptPositionalEffect();\r\n            var observedResult = positionalEffect.IsWithinCds(-1, -1, null, null);\r\n            Assert.False(observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void IsWithinCds_ReturnTrue()\r\n        {\r\n            var positionalEffect = new TranscriptPositionalEffect();\r\n            var observedResult = positionalEffect.IsWithinCds(180, 180, null, null);\r\n            Assert.True(observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void IsWithinCds_IsWithinFrameshiftIntron_OverlapCodingRegion_ReturnTrue()\r\n        {\r\n            var variant          = new Interval(100, 101);\r\n            var codingRegion     = new Interval(90, 120);\r\n            var positionalEffect = new TranscriptPositionalEffect { IsWithinFrameshiftIntron = true };\r\n\r\n            var observedResult = positionalEffect.IsWithinCds(-1, -1, codingRegion, variant);\r\n            Assert.True(observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void IsWithinCds_IsWithinFrameshiftIntron_ReturnFalse()\r\n        {\r\n            var variant          = new Interval(100, 101);\r\n            var codingRegion     = new Interval(102, 120);\r\n            var positionalEffect = new TranscriptPositionalEffect { IsWithinFrameshiftIntron = true };\r\n\r\n            var observedResult = positionalEffect.IsWithinCds(-1, -1, codingRegion, variant);\r\n            Assert.False(observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void IsAfterCoding_True_WhenInsertion()\r\n        {\r\n            var observedResult = TranscriptPositionalEffect.IsAfterCoding(101, 100, 100, 100);\r\n            Assert.True(observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void IsBeforeCoding_True_WhenInsertion()\r\n        {\r\n            var observedResult = TranscriptPositionalEffect.IsBeforeCoding(101, 100, 100, 101);\r\n            Assert.True(observedResult);\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(100, 200, 300, true)]\r\n        [InlineData(500, 600, 300, false)]\r\n        public void IsWithinCdna(int cdnaStart, int cdnaEnd, int totalExonLen, bool expectedResult)\r\n        {\r\n            var observedResult = TranscriptPositionalEffect.IsWithinCdna(cdnaStart, cdnaEnd, totalExonLen);\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/VariantAnnotation/AnnotatedPositions/Transcript/TranscriptUtilitiesTests.cs",
    "content": "﻿using Genome;\r\nusing Intervals;\r\nusing Moq;\r\nusing UnitTests.TestDataStructures;\r\nusing UnitTests.TestUtilities;\r\nusing VariantAnnotation.AnnotatedPositions;\r\nusing Variants;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.AnnotatedPositions.Transcript\r\n{\r\n    public sealed class TranscriptUtilitiesTests\r\n    {\r\n        private readonly ISequence _refSequence = new SimpleSequence(\"ACTTCGGGC\", 12340);\r\n\r\n        [Fact]\r\n        public void IsDuplicateWithinInterval_not_intertion()\r\n        {\r\n            var simpleVar = GenSimpleDeletionMock();\r\n            Assert.False(HgvsUtilities.IsDuplicateWithinInterval(_refSequence, simpleVar.Object, new Interval(1, 3), false));\r\n        }\r\n\r\n        [Fact]\r\n        public void IsDuplicateWithinInterval_outside_interval()\r\n        {\r\n            var simpleVar = GenSimpleInsertionMock();\r\n\r\n            // forward strand\r\n            Assert.False(HgvsUtilities.IsDuplicateWithinInterval(_refSequence, simpleVar.Object, new Interval(12344, 12370), false));\r\n\r\n            // reverse strand\r\n            Assert.False(HgvsUtilities.IsDuplicateWithinInterval(_refSequence, simpleVar.Object, new Interval(12340, 12347), true));\r\n        }\r\n\r\n        [Fact]\r\n        public void IsDuplicateWithinInterval_insertion_not_dup()\r\n        {\r\n            var simpleVar = GenSimpleInsertionMock();\r\n\r\n            // forward strand\r\n            Assert.False(HgvsUtilities.IsDuplicateWithinInterval(new SimpleSequence(\"ACTTCGGGC\", 12340),\r\n                simpleVar.Object, new Interval(12300, 12400), false));\r\n\r\n            // reverse strand\r\n            Assert.False(HgvsUtilities.IsDuplicateWithinInterval(new SimpleSequence(\"ACTTCGGGC\", 12340),\r\n                simpleVar.Object, new Interval(12300, 12400), true));\r\n        }\r\n\r\n        [Fact]\r\n        public void IsDuplicateWithinInterval_insertion_is_dup()\r\n        {\r\n            var simpleVar = GenSimpleInsertionMock();\r\n\r\n            // forward strand\r\n            Assert.True(HgvsUtilities.IsDuplicateWithinInterval(new SimpleSequence(\"ACCTGGGGC\", 12340),\r\n                simpleVar.Object, new Interval(12300, 12400), false));\r\n\r\n            // reverse strand\r\n            Assert.True(HgvsUtilities.IsDuplicateWithinInterval(new SimpleSequence(\"ACTTCCTGC\", 12340),\r\n                simpleVar.Object, new Interval(12300, 12400), true));\r\n        }\r\n\r\n        private static Mock<ISimpleVariant> GenSimpleDeletionMock()\r\n        {\r\n            var simpleVar = new Mock<ISimpleVariant>();\r\n            simpleVar.SetupGet(x => x.Chromosome).Returns(ChromosomeUtilities.Chr21);\r\n            simpleVar.SetupGet(x => x.Start).Returns(12345);\r\n            simpleVar.SetupGet(x => x.End).Returns(12348);\r\n            simpleVar.SetupGet(x => x.RefAllele).Returns(\"CTG\");\r\n            simpleVar.SetupGet(x => x.AltAllele).Returns(\"\");\r\n            simpleVar.SetupGet(x => x.Type).Returns(VariantType.deletion);\r\n            return simpleVar;\r\n        }\r\n\r\n        private static Mock<ISimpleVariant> GenSimpleInsertionMock()\r\n        {\r\n            var simpleVar = new Mock<ISimpleVariant>();\r\n            simpleVar.SetupGet(x => x.Chromosome).Returns(ChromosomeUtilities.Chr21);\r\n            simpleVar.SetupGet(x => x.Start).Returns(12346);\r\n            simpleVar.SetupGet(x => x.End).Returns(12345);\r\n            simpleVar.SetupGet(x => x.RefAllele).Returns(\"\");\r\n            simpleVar.SetupGet(x => x.AltAllele).Returns(\"CTG\");\r\n            simpleVar.SetupGet(x => x.Type).Returns(VariantType.insertion);\r\n            return simpleVar;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/VariantAnnotation/AnnotatedPositions/Transcript/VariantEffectTests.cs",
    "content": "﻿using Moq;\r\nusing VariantAnnotation.AnnotatedPositions.Transcript;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing Variants;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.AnnotatedPositions.Transcript\r\n{\r\n    public sealed class VariantEffectTests\r\n    {\r\n        [Theory]\r\n        [InlineData(false,false,false,false)]\r\n        [InlineData(false,false,true,true)]\r\n        [InlineData(true, false, true, false)]\r\n        [InlineData(true, true, true, true)]\r\n        public void IsSpliceAcceptorVariant(bool onReverseStrand,bool isStartSpliceSite, bool isEndSpliceSite,bool expectedResult)\r\n        {\r\n            var positionalEffect = new TranscriptPositionalEffect\r\n            {\r\n                IsStartSpliceSite = isStartSpliceSite,\r\n                IsEndSpliceSite = isEndSpliceSite\r\n            };\r\n\r\n            var variant = new Mock<ISimpleVariant>();\r\n            var transcript = new Mock<ITranscript>();\r\n\r\n            variant.SetupGet(x => x.AltAllele).Returns(\"G\");\r\n            variant.SetupGet(x => x.RefAllele).Returns(\"C\");\r\n\r\n            var variantEffect = new VariantEffect(positionalEffect, variant.Object, transcript.Object, \"\", \"\", \"\", \"\",\r\n                null, \"\", \"\");\r\n\r\n            var gene = new Mock<IGene>();\r\n            transcript.SetupGet(x => x.Gene).Returns(gene.Object);\r\n            gene.SetupGet(x => x.OnReverseStrand).Returns(onReverseStrand);\r\n\r\n            Assert.Equal(expectedResult, variantEffect.IsSpliceAcceptorVariant());\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(false, false, false, false)]\r\n        [InlineData(false, true, true, true)]\r\n        [InlineData(true, false, false, false)]\r\n        [InlineData(true, true, true, true)]\r\n        public void IsSpliceDonorVariant(bool onReverseStrand, bool isStartSpliceSite, bool isEndSpliceSite, bool expectedResult)\r\n        {\r\n            var positionalEffect = new TranscriptPositionalEffect\r\n            {\r\n                IsStartSpliceSite = isStartSpliceSite,\r\n                IsEndSpliceSite = isEndSpliceSite\r\n            };\r\n\r\n            var variant = new Mock<ISimpleVariant>();\r\n            var transcript = new Mock<ITranscript>();\r\n\r\n            variant.SetupGet(x => x.AltAllele).Returns(\"G\");\r\n            variant.SetupGet(x => x.RefAllele).Returns(\"C\");\r\n\r\n            var variantEffect = new VariantEffect(positionalEffect, variant.Object, transcript.Object, \"\", \"\", \"\", \"\",\r\n                null, \"\", \"\");\r\n\r\n            var gene = new Mock<IGene>();\r\n            transcript.SetupGet(x => x.Gene).Returns(gene.Object);\r\n            gene.SetupGet(x => x.OnReverseStrand).Returns(onReverseStrand);\r\n\r\n            Assert.Equal(expectedResult, variantEffect.IsSpliceDonorVariant());\r\n\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(1, \"M\", \"KM\", \"\", \"TCT\", true)]\r\n        [InlineData(2, \"M\", \"Mk\", \"\", \"TCT\", false)]\r\n        [InlineData(1, \"K\", \"MK\", \"\", \"ATG\", true)]\r\n        public void IsStartRetainedVariant(int proteinBegin, string refAminoAcids, string altAminoAcids, string refAllele, string altAllele, bool isStartRetained)\r\n        {\r\n            var variant = new Mock<ISimpleVariant>();\r\n            var transcript = new Mock<ITranscript>();\r\n\r\n            variant.SetupGet(x => x.AltAllele).Returns(refAllele);\r\n            variant.SetupGet(x => x.RefAllele).Returns(altAllele);\r\n\r\n            var variantEffect = new VariantEffect(null, variant.Object, transcript.Object, refAminoAcids, altAminoAcids , \"\", \"\",\r\n                proteinBegin, refAminoAcids, altAminoAcids);\r\n\r\n            if (isStartRetained) Assert.True(variantEffect.IsStartRetained());\r\n            else Assert.False(variantEffect.IsStartRetained());\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(false, true, false, false, false)]\r\n        [InlineData(false, true, true, true, true)]\r\n        [InlineData(false, false, true, true, false)]\r\n        [InlineData(true, true, false, false, false)]\r\n        [InlineData(true, true, true, true, true)]\r\n        [InlineData(true, false, true, true, false)]\r\n        public void IsFivePrimeUtrVariant(bool onReverseStrand, bool withinCdna, bool beforeCoding, bool afterCoding,\r\n            bool expectedResult)\r\n        {\r\n            var positionalEffect = new TranscriptPositionalEffect\r\n            {\r\n                BeforeCoding = beforeCoding,\r\n                AfterCoding = afterCoding,\r\n                WithinCdna = withinCdna\r\n            };\r\n\r\n            var variant = new Mock<ISimpleVariant>();\r\n            var transcript = new Mock<ITranscript>();\r\n\r\n            variant.SetupGet(x => x.AltAllele).Returns(\"G\");\r\n            variant.SetupGet(x => x.RefAllele).Returns(\"C\");\r\n\r\n            var variantEffect = new VariantEffect(positionalEffect, variant.Object, transcript.Object, \"\", \"\", \"\", \"\",\r\n                null, \"\", \"\");\r\n\r\n            var gene = new Mock<IGene>();\r\n            transcript.SetupGet(x => x.Gene).Returns(gene.Object);\r\n            gene.SetupGet(x => x.OnReverseStrand).Returns(onReverseStrand);\r\n\r\n            var translation = new Mock<ITranslation>();\r\n            transcript.SetupGet(x => x.Translation).Returns(translation.Object);\r\n\r\n            Assert.Equal(expectedResult, variantEffect.IsFivePrimeUtrVariant());\r\n        }\r\n\r\n        [Fact]\r\n        public void IsStopLost_DeletionOverStopCodon_ReturnTrue()\r\n        {\r\n            var positionalEffect = new TranscriptPositionalEffect\r\n            {\r\n                BeforeCoding = false,\r\n                AfterCoding = true,\r\n                WithinCdna = true\r\n            };\r\n\r\n            var variant = new Mock<ISimpleVariant>();\r\n            variant.SetupGet(x => x.AltAllele).Returns(\"ATAGCCC\");\r\n            variant.SetupGet(x => x.RefAllele).Returns(\"A\");\r\n\r\n            var variantEffect = new VariantEffect(positionalEffect, variant.Object, null, \"\", \"\", \"\", \"\",\r\n                null, \"*\", \"X\");\r\n\r\n            Assert.True(variantEffect.IsStopLost());\r\n        }\r\n        \r\n        [Theory]\r\n        [InlineData(ConsequenceTag.start_retained_variant)]\r\n        [InlineData(ConsequenceTag.incomplete_terminal_codon_variant)]\r\n        public void IsCodingSequenceVariant_WithMoreSpecificConsequence_ReturnFalse(ConsequenceTag ct)\r\n        {\r\n            var positionalEffect = new TranscriptPositionalEffect\r\n            {\r\n                BeforeCoding = false,\r\n                AfterCoding  = true,\r\n                WithinCdna   = true\r\n            };\r\n\r\n            var cache = new VariantEffectCache();\r\n            cache.Add(ct, true);\r\n            var variant = new Mock<ISimpleVariant>();\r\n            variant.SetupGet(x => x.AltAllele).Returns(\"ATAGCCC\");\r\n            variant.SetupGet(x => x.RefAllele).Returns(\"A\");\r\n\r\n            var variantEffect = new VariantEffect(positionalEffect, variant.Object, null, \"\", \"\", \"\", \"\",\r\n                null, null, null, cache);\r\n\r\n            Assert.False(variantEffect.IsCodingSequenceVariant());\r\n        }\r\n        \r\n    }\r\n}"
  },
  {
    "path": "UnitTests/VariantAnnotation/AnnotatorTests.cs",
    "content": "﻿using ErrorHandling.Exceptions;\r\nusing Genome;\r\nusing Moq;\r\nusing UnitTests.TestUtilities;\r\nusing VariantAnnotation;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.Interface.GeneAnnotation;\r\nusing VariantAnnotation.Interface.Positions;\r\nusing VariantAnnotation.Interface.Providers;\r\nusing Variants;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation\r\n{\r\n    public sealed class AnnotatorTest\r\n    {\r\n        private static IVariant[] GetVariants()\r\n        {\r\n            var variant = new Mock<IVariant>();\r\n            variant.SetupGet(x => x.Chromosome).Returns(ChromosomeUtilities.Chr1);\r\n            variant.SetupGet(x => x.Type).Returns(VariantType.SNV);\r\n            variant.SetupGet(x => x.Start).Returns(949523);\r\n            variant.SetupGet(x => x.End).Returns(949523);\r\n            variant.SetupGet(x => x.RefAllele).Returns(\"C\");\r\n            variant.SetupGet(x => x.AltAllele).Returns(\"T\");\r\n            variant.SetupGet(x => x.Behavior).Returns(AnnotationBehavior.SmallVariants);\r\n            return new[] { variant.Object };\r\n        }\r\n\r\n        private static IVariant[] GetMitoVariants()\r\n        {\r\n            var variant = new Mock<IVariant>();\r\n            variant.SetupGet(x => x.Chromosome).Returns(ChromosomeUtilities.ChrM);\r\n            variant.SetupGet(x => x.Type).Returns(VariantType.SNV);\r\n            variant.SetupGet(x => x.Start).Returns(9495);\r\n            variant.SetupGet(x => x.End).Returns(9495);\r\n            variant.SetupGet(x => x.RefAllele).Returns(\"C\");\r\n            variant.SetupGet(x => x.AltAllele).Returns(\"T\");\r\n            variant.SetupGet(x => x.Behavior).Returns(AnnotationBehavior.SmallVariants);\r\n            return new[] { variant.Object };\r\n        }\r\n\r\n        [Fact]\r\n        public void Annotate_conservation_annotation()\r\n        {\r\n            var position = new Mock<IPosition>();\r\n            position.SetupGet(x => x.Variants).Returns(GetMitoVariants);\r\n            position.SetupGet(x => x.Chromosome).Returns(ChromosomeUtilities.Chr1);\r\n\r\n            var csProvider = new Mock<IAnnotationProvider>();\r\n            csProvider.SetupGet(x => x.Assembly).Returns(GenomeAssembly.GRCh37);\r\n            csProvider.Setup(x => x.Annotate(It.IsAny<IAnnotatedPosition>())).\r\n                Callback((IAnnotatedPosition x) => { x.CytogeneticBand = \"testCytoBand\"; });\r\n\r\n            var taProvider = new Mock<IAnnotationProvider>();\r\n            taProvider.SetupGet(x => x.Assembly).Returns(GenomeAssembly.GRCh37);\r\n            taProvider.Setup(x => x.Annotate(It.IsAny<IAnnotatedPosition>())).Callback((IAnnotatedPosition x) => { });//do nothing\r\n\r\n            var annotator = new Annotator(taProvider.Object, null, null, csProvider.Object, null, null, null, null);\r\n\r\n            var annotatedPosition = annotator.Annotate(position.Object);\r\n\r\n            Assert.Equal(\"testCytoBand\", annotatedPosition.CytogeneticBand);\r\n        }\r\n\r\n        [Fact]\r\n        public void Annotate_mito_hg19()\r\n        {\r\n            var position = new Mock<IPosition>();\r\n            position.SetupGet(x => x.Variants).Returns(GetVariants);\r\n            position.SetupGet(x => x.Chromosome).Returns(ChromosomeUtilities.ChrM);\r\n\r\n            var csProvider = new Mock<IAnnotationProvider>();\r\n            csProvider.SetupGet(x => x.Assembly).Returns(GenomeAssembly.GRCh37);\r\n            csProvider.Setup(x => x.Annotate(It.IsAny<IAnnotatedPosition>())).\r\n                Callback((IAnnotatedPosition x) => { x.CytogeneticBand = \"testCytoBand\"; });\r\n\r\n            var taProvider = new Mock<IAnnotationProvider>();\r\n            taProvider.SetupGet(x => x.Assembly).Returns(GenomeAssembly.GRCh37);\r\n            taProvider.Setup(x => x.Annotate(It.IsAny<IAnnotatedPosition>())).Callback((IAnnotatedPosition x) => { });//do nothing\r\n\r\n            var annotator = new Annotator(taProvider.Object, null, null, csProvider.Object, null, null, null, null);\r\n\r\n            var annotatedPosition = annotator.Annotate(position.Object);\r\n\r\n            Assert.Null(annotatedPosition.CytogeneticBand);\r\n        }\r\n\r\n        [Fact]\r\n        public void Annotate_mito_GRCh37()\r\n        {\r\n            var position = new Mock<IPosition>();\r\n            position.SetupGet(x => x.Variants).Returns(GetVariants);\r\n            position.SetupGet(x => x.Chromosome).Returns(ChromosomeUtilities.ChrM);\r\n\r\n            var csProvider = new Mock<IAnnotationProvider>();\r\n            csProvider.SetupGet(x => x.Assembly).Returns(GenomeAssembly.GRCh37);\r\n            csProvider.Setup(x => x.Annotate(It.IsAny<IAnnotatedPosition>())).\r\n                Callback((IAnnotatedPosition x) => { x.CytogeneticBand = \"testCytoBand\"; });\r\n\r\n            var taProvider = new Mock<IAnnotationProvider>();\r\n            taProvider.SetupGet(x => x.Assembly).Returns(GenomeAssembly.GRCh37);\r\n            taProvider.Setup(x => x.Annotate(It.IsAny<IAnnotatedPosition>())).Callback((IAnnotatedPosition x) => { });//do nothing\r\n\r\n            var annotator = new Annotator(taProvider.Object, null, null, csProvider.Object, null, null, null, null);\r\n            annotator.EnableMitochondrialAnnotation();\r\n\r\n            var annotatedPosition = annotator.Annotate(position.Object);\r\n\r\n            Assert.NotNull(annotatedPosition.CytogeneticBand);\r\n        }\r\n\r\n\r\n\r\n\r\n        [Fact]\r\n        public void Annotate_null_position()\r\n        {\r\n            var annotator         = new Annotator(null, null, null, null, null, null, null, null);\r\n            var annotatedPosition = annotator.Annotate(null);\r\n\r\n            Assert.Null(annotatedPosition);\r\n        }\r\n\r\n        //[Fact]\r\n        //public void TrackAffectedGenes()\r\n        //{\r\n        //    var taProvider = new Mock<IAnnotationProvider>();\r\n        //    taProvider.SetupGet(x => x.Assembly).Returns(GenomeAssembly.GRCh37);\r\n        //    taProvider.Setup(x => x.Annotate(It.IsAny<IAnnotatedPosition>())).Callback((IAnnotatedPosition x) => { });//do nothing\r\n        //    var geneAnnotationProvider = new Mock<IGeneAnnotationProvider>();\r\n        //    geneAnnotationProvider.SetupGet(x => x.Assembly).Returns(GenomeAssembly.GRCh37);\r\n\r\n        //    var annotator          = new Annotator(taProvider.Object, null, null, null, geneAnnotationProvider.Object);\r\n        //    var annotatedPosition  = new Mock<IAnnotatedPosition>();\r\n        //    var annotatedVariant   = new Mock<IAnnotatedVariant>();\r\n        //    var ensembleTranscript = new Mock<IAnnotatedTranscript>();\r\n        //    annotatedVariant.SetupGet(x => x.EnsemblTranscripts)\r\n        //        .Returns(new List<IAnnotatedTranscript> { ensembleTranscript.Object });\r\n        //    ensembleTranscript.SetupGet(x => x.Transcript.Gene.Symbol).Returns(\"ensembl1\");\r\n\r\n        //    var refSeqTranscript = new Mock<IAnnotatedTranscript>();\r\n        //    annotatedVariant.SetupGet(x => x.RefSeqTranscripts)\r\n        //        .Returns(new List<IAnnotatedTranscript> { refSeqTranscript.Object });\r\n        //    refSeqTranscript.SetupGet(x => x.Transcript.Gene.Symbol).Returns(\"refseq1\");\r\n\r\n        //    annotatedPosition.SetupGet(x => x.AnnotatedVariants).Returns(new[] { annotatedVariant.Object });\r\n\r\n        //    annotator.TrackAffectedGenes(annotatedPosition.Object);\r\n\r\n        //    var geneAnnotation = new Mock<IAnnotatedGene>();\r\n        //    geneAnnotationProvider.Setup(x => x.Annotate(\"ensembl1\")).Returns(geneAnnotation.Object);\r\n        //    geneAnnotationProvider.Setup(x => x.Annotate(\"refseq1\")).Returns((string)null);\r\n\r\n        //    var annotatedGenes = annotator.GetGeneAnnotations();\r\n        //    Assert.Equal(1, annotatedGenes.Count);\r\n        //}\r\n        \r\n        [Fact]\r\n        public void CheckAssemblyConsistency_Consistent()\r\n        {\r\n            var taProvider = new Mock<IAnnotationProvider>();\r\n            taProvider.SetupGet(x => x.Assembly).Returns(GenomeAssembly.GRCh37);\r\n\r\n            var saProvider = new Mock<IAnnotationProvider>();\r\n            saProvider.SetupGet(x => x.Assembly).Returns(GenomeAssembly.GRCh37);\r\n\r\n            var csProvider = new Mock<IAnnotationProvider>();\r\n            csProvider.SetupGet(x => x.Assembly).Returns(GenomeAssembly.GRCh37);\r\n\r\n            var omimProvider = new Mock<IGeneAnnotationProvider>();\r\n            omimProvider.SetupGet(x => x.Assembly).Returns(GenomeAssembly.GRCh37);\r\n\r\n            var annotator = new Annotator(taProvider.Object, null, saProvider.Object, csProvider.Object, null, omimProvider.Object, null, null);\r\n\r\n            Assert.NotNull(annotator);\r\n        }\r\n\r\n        [Fact]\r\n        public void CheckAssemblyConsistency_Inconsistent()\r\n        {\r\n            var taProvider = new Mock<IAnnotationProvider>();\r\n            taProvider.SetupGet(x => x.Assembly).Returns(GenomeAssembly.GRCh37);\r\n\r\n            var saProvider = new Mock<IAnnotationProvider>();\r\n            saProvider.SetupGet(x => x.Assembly).Returns(GenomeAssembly.GRCh37);\r\n\r\n            var csProvider = new Mock<IAnnotationProvider>();\r\n            csProvider.SetupGet(x => x.Assembly).Returns(GenomeAssembly.GRCh38);\r\n\r\n            var omimProvider = new Mock<IGeneAnnotationProvider>();\r\n            omimProvider.SetupGet(x => x.Assembly).Returns(GenomeAssembly.GRCh37);\r\n\r\n            Assert.Throws<UserErrorException>(() => new Annotator(taProvider.Object, null, saProvider.Object, csProvider.Object, null, omimProvider.Object, null, null));\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/VariantAnnotation/Caches/DataStructures/EncodedTranscriptDataTests.cs",
    "content": "﻿using System.IO;\r\nusing System.Text;\r\nusing IO;\r\nusing VariantAnnotation.Caches.DataStructures;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.Caches.DataStructures\r\n{\r\n    public sealed class EncodedTranscriptDataTests\r\n    {\r\n        [Fact]\r\n        public void EncodedTranscriptData_EndToEnd()\r\n        {\r\n            const BioType expectedBiotype        = BioType.Y_RNA;\r\n            const bool expectedCdsStartNotFound  = true;\r\n            const bool expectedCdsEndNotFound    = true;\r\n            const Source expectedSource          = Source.BothRefSeqAndEnsembl;\r\n            const bool expectedCanonical         = true;\r\n            const bool expectedSift              = true;\r\n            const bool expectedPolyPhen          = true;\r\n            const bool expectedMirnas            = true;\r\n            const bool expectedRnaEdits          = true;\r\n            const bool expectedSelenocysteines   = true;\r\n            const bool expectedTranscriptRegions = true;\r\n            const bool expectedTranslation       = true;\r\n            const byte expectedStartExonPhase    = 3;\r\n\r\n            // ReSharper disable ConditionIsAlwaysTrueOrFalse\r\n            var encodedData = EncodedTranscriptData.GetEncodedTranscriptData(expectedBiotype, expectedCdsStartNotFound,\r\n                expectedCdsEndNotFound, expectedSource, expectedCanonical, expectedSift, expectedPolyPhen,\r\n                expectedMirnas, expectedRnaEdits, expectedSelenocysteines, expectedTranscriptRegions,\r\n                expectedTranslation, expectedStartExonPhase);\r\n            // ReSharper restore ConditionIsAlwaysTrueOrFalse\r\n\r\n            EncodedTranscriptData observedEncodedTranscriptData;\r\n\r\n            using (var ms = new MemoryStream())\r\n            {\r\n                using (var writer = new ExtendedBinaryWriter(ms, Encoding.UTF8, true))\r\n                {\r\n                    encodedData.Write(writer);\r\n                }\r\n\r\n                ms.Position = 0;\r\n\r\n                using (var reader = new BufferedBinaryReader(ms))\r\n                {\r\n                    observedEncodedTranscriptData = EncodedTranscriptData.Read(reader);\r\n                }\r\n            }\r\n\r\n            Assert.NotNull(observedEncodedTranscriptData);\r\n            Assert.Equal(expectedBiotype,           observedEncodedTranscriptData.BioType);\r\n            Assert.Equal(expectedSource,            observedEncodedTranscriptData.TranscriptSource);\r\n            Assert.Equal(expectedCanonical,         observedEncodedTranscriptData.IsCanonical);\r\n            Assert.Equal(expectedSift,              observedEncodedTranscriptData.HasSift);\r\n            Assert.Equal(expectedPolyPhen,          observedEncodedTranscriptData.HasPolyPhen);\r\n            Assert.Equal(expectedMirnas,            observedEncodedTranscriptData.HasMirnas);\r\n            Assert.Equal(expectedTranscriptRegions, observedEncodedTranscriptData.HasTranscriptRegions);\r\n            Assert.Equal(expectedTranslation,       observedEncodedTranscriptData.HasTranslation);\r\n            Assert.Equal(expectedStartExonPhase,    observedEncodedTranscriptData.StartExonPhase);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/VariantAnnotation/Caches/DataStructures/GeneTests.cs",
    "content": "﻿using System.IO;\r\nusing System.Text;\r\nusing Genome;\r\nusing IO;\r\nusing UnitTests.TestUtilities;\r\nusing VariantAnnotation.AnnotatedPositions.Transcript;\r\nusing VariantAnnotation.Caches.DataStructures;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.Caches.DataStructures\r\n{\r\n    public sealed class GeneTests\r\n    {\r\n        [Fact]\r\n        public void Gene_EndToEnd()\r\n        {\r\n            const int expectedStart           = int.MaxValue;\r\n            const int expectedEnd             = int.MinValue;\r\n            Chromosome expectedChromosome    = ChromosomeUtilities.Chr1;\r\n            const bool expectedReverseStrand  = true;\r\n            const string expectedSymbol       = \"anavrin\";\r\n            const string expectedEntrezGeneId = \"7157\";\r\n            const string expectedEnsemblId    = \"ENSG00000141510\";\r\n            const int expectedHgncId          = int.MaxValue;\r\n\r\n            // ReSharper disable ConditionIsAlwaysTrueOrFalse\r\n            var gene = new Gene(expectedChromosome, expectedStart, expectedEnd, expectedReverseStrand,\r\n                expectedSymbol, expectedHgncId, CompactId.Convert(expectedEntrezGeneId),\r\n                CompactId.Convert(expectedEnsemblId));\r\n            // ReSharper restore ConditionIsAlwaysTrueOrFalse\r\n\r\n            IGene observedGene;\r\n\r\n            using (var ms = new MemoryStream())\r\n            {\r\n                using (var writer = new ExtendedBinaryWriter(ms, Encoding.UTF8, true))\r\n                {\r\n                    gene.Write(writer);\r\n                }\r\n\r\n                ms.Position = 0;\r\n\r\n                using (var reader = new BufferedBinaryReader(ms))\r\n                {\r\n                    observedGene = Gene.Read(reader, ChromosomeUtilities.RefIndexToChromosome);\r\n                }\r\n            }\r\n\r\n            Assert.NotNull(observedGene);\r\n            Assert.Equal(expectedStart,            observedGene.Start);\r\n            Assert.Equal(expectedEnd,              observedGene.End);\r\n            Assert.Equal(expectedChromosome.Index, observedGene.Chromosome.Index);\r\n            Assert.Equal(expectedReverseStrand,    observedGene.OnReverseStrand);\r\n            Assert.Equal(expectedSymbol,           observedGene.Symbol);\r\n            Assert.Equal(expectedEntrezGeneId,     observedGene.EntrezGeneId.ToString());\r\n            Assert.Equal(expectedEnsemblId,        observedGene.EnsemblId.ToString());\r\n            Assert.Equal(expectedHgncId,           observedGene.HgncId);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/VariantAnnotation/Caches/DataStructures/RegulatoryRegionTests.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Text;\r\nusing Genome;\r\nusing IO;\r\nusing UnitTests.TestUtilities;\r\nusing VariantAnnotation.AnnotatedPositions.Transcript;\r\nusing VariantAnnotation.Caches.DataStructures;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.Interface.Caches;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.Caches.DataStructures\r\n{\r\n    public sealed class RegulatoryRegionTests\r\n    {\r\n        [Fact]\r\n        public void RegulatoryRegion_EndToEnd()\r\n        {\r\n            Chromosome expectedChromosome          = ChromosomeUtilities.Chr1;\r\n            const int expectedStart                 = int.MaxValue;\r\n            const int expectedEnd                   = int.MinValue;\r\n            const string expectedId                 = \"ENST00000540021\";\r\n            const RegulatoryRegionType expectedType = RegulatoryRegionType.open_chromatin_region;\r\n\r\n            var indexToChromosome = new Dictionary<ushort, Chromosome>\r\n            {\r\n                [expectedChromosome.Index] = expectedChromosome\r\n            };\r\n\r\n            // ReSharper disable ConditionIsAlwaysTrueOrFalse\r\n            var regulatoryRegion = new RegulatoryRegion(expectedChromosome, expectedStart, expectedEnd,\r\n                CompactId.Convert(expectedId), expectedType);\r\n            // ReSharper restore ConditionIsAlwaysTrueOrFalse\r\n\r\n            IRegulatoryRegion observedRegulatoryRegion;\r\n\r\n            using (var ms = new MemoryStream())\r\n            {\r\n                using (var writer = new ExtendedBinaryWriter(ms, Encoding.UTF8, true))\r\n                {\r\n                    regulatoryRegion.Write(writer);\r\n                }\r\n\r\n                ms.Position = 0;\r\n\r\n                using (var reader = new BufferedBinaryReader(ms))\r\n                {\r\n                    observedRegulatoryRegion = RegulatoryRegion.Read(reader, indexToChromosome);\r\n                }\r\n            }\r\n\r\n            Assert.NotNull(observedRegulatoryRegion);\r\n            Assert.Equal(expectedStart,            observedRegulatoryRegion.Start);\r\n            Assert.Equal(expectedEnd,              observedRegulatoryRegion.End);\r\n            Assert.Equal(expectedId,               observedRegulatoryRegion.Id.WithoutVersion);\r\n            Assert.Equal(expectedType,             observedRegulatoryRegion.Type);\r\n            Assert.Equal(expectedChromosome.Index, observedRegulatoryRegion.Chromosome.Index);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/VariantAnnotation/Caches/DataStructures/TranscriptRegionExtensionsTests.cs",
    "content": "﻿using VariantAnnotation.Caches.DataStructures;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.Caches.DataStructures\r\n{\r\n    public sealed class TranscriptRegionExtensionsTests\r\n    {\r\n        private readonly ITranscriptRegion[] _forwardTranscriptRegions;\r\n        private readonly ITranscriptRegion[] _reverseTranscriptRegions;\r\n\r\n        public TranscriptRegionExtensionsTests()\r\n        {\r\n            _forwardTranscriptRegions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 1,   77997792, 77998025, 1, 234),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 1, 77998026, 78001531, 234, 235),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 2,   78001532, 78001723, 235, 426),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 2, 78001724, 78024286, 426, 427),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 3,   78024287, 78024416, 427, 556)\r\n            };\r\n\r\n            _reverseTranscriptRegions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 3,   312957, 313157, 136, 336),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 2, 313158, 313873, 135, 136),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 2,   313874, 313892, 117, 135),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 1, 313893, 314242, 116, 117),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 1,   314243, 314358, 1, 116)\r\n            };\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(77997792, 0)]\r\n        [InlineData(78001723, 2)]\r\n        [InlineData(78024416, 4)]\r\n        [InlineData(78001724, 3)]\r\n        public void BinarySearch_Nominal(int position, int expectedResult)\r\n        {\r\n            int observedResult = _forwardTranscriptRegions.BinarySearch(position);\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(77997791, -1)]\r\n        [InlineData(78024417, -6)]\r\n        // the binary search method returns the bitwise complement of the next larger element\r\n        public void BinarySearch_ReturnNegative_BeyondExons(int position, int expectedResult)\r\n        {\r\n            int observedResult = _forwardTranscriptRegions.BinarySearch(position);\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetExonsAndIntrons_Forward_Internal()\r\n        {\r\n            var observedResults = _forwardTranscriptRegions.GetExonsAndIntrons(1, 3);\r\n\r\n            Assert.Equal(2, observedResults.ExonStart);\r\n            Assert.Equal(2, observedResults.ExonEnd);\r\n            Assert.Equal(1, observedResults.IntronStart);\r\n            Assert.Equal(2, observedResults.IntronEnd);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetExonsAndIntrons_Reverse_Gap_NIR_3592()\r\n        {\r\n            var transcriptRegions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 19,   16606122, 16606679, 3404, 3961),\r\n                new TranscriptRegion(TranscriptRegionType.Gap, 19, 16606680, 16606680, 3403, 3404),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 19,   16606681, 16607898, 2186, 3403)\r\n            };\r\n\r\n            var observedResults = transcriptRegions.GetExonsAndIntrons(1, 1);\r\n\r\n            Assert.Equal(19, observedResults.ExonStart);\r\n            Assert.Equal(19, observedResults.ExonEnd);\r\n            Assert.Equal(-1, observedResults.IntronStart);\r\n            Assert.Equal(-1, observedResults.IntronEnd);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetExonsAndIntrons_Reverse_Internal()\r\n        {\r\n            var observedResults = _reverseTranscriptRegions.GetExonsAndIntrons(2, 4);\r\n\r\n            Assert.Equal(1, observedResults.ExonStart);\r\n            Assert.Equal(2, observedResults.ExonEnd);\r\n            Assert.Equal(1, observedResults.IntronStart);\r\n            Assert.Equal(1, observedResults.IntronEnd);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetExonsAndIntrons_Forward_StartBefore()\r\n        {\r\n            var observedResults = _forwardTranscriptRegions.GetExonsAndIntrons(-1, 3);\r\n\r\n            Assert.Equal(1, observedResults.ExonStart);\r\n            Assert.Equal(2, observedResults.ExonEnd);\r\n            Assert.Equal(1, observedResults.IntronStart);\r\n            Assert.Equal(2, observedResults.IntronEnd);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetExonsAndIntrons_Forward_EndAfter()\r\n        {\r\n            var observedResults = _forwardTranscriptRegions.GetExonsAndIntrons(2, -6);\r\n\r\n            Assert.Equal(2, observedResults.ExonStart);\r\n            Assert.Equal(3, observedResults.ExonEnd);\r\n            Assert.Equal(2, observedResults.IntronStart);\r\n            Assert.Equal(2, observedResults.IntronEnd);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetExonsAndIntrons_Reverse_StartBefore_EndAfter()\r\n        {\r\n            var observedResults = _reverseTranscriptRegions.GetExonsAndIntrons(-1, -6);\r\n\r\n            Assert.Equal(1, observedResults.ExonStart);\r\n            Assert.Equal(3, observedResults.ExonEnd);\r\n            Assert.Equal(1, observedResults.IntronStart);\r\n            Assert.Equal(2, observedResults.IntronEnd);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/VariantAnnotation/Caches/DataStructures/TranscriptRegionTests.cs",
    "content": "﻿using System.IO;\r\nusing System.Text;\r\nusing CacheUtils.TranscriptCache.Comparers;\r\nusing IO;\r\nusing VariantAnnotation.Caches.DataStructures;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.Caches.DataStructures\r\n{\r\n    public sealed class TranscriptRegionTests\r\n    {\r\n        [Fact]\r\n        public void TranscriptRegion_EndToEnd()\r\n        {\r\n            var expectedResults = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 13, 100, 200, 300, 400),\r\n                new TranscriptRegion(TranscriptRegionType.Gap, 0, 120, 230, 10, 20),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 14, 130, 230, 330, 430)\r\n            };\r\n\r\n            var observedResults = new ITranscriptRegion[expectedResults.Length];\r\n\r\n            using (var ms = new MemoryStream())\r\n            {\r\n                using (var writer = new ExtendedBinaryWriter(ms, Encoding.UTF8, true))\r\n                {\r\n                    foreach(var region in expectedResults) region.Write(writer);\r\n                }\r\n\r\n                ms.Position = 0;\r\n\r\n                using (var reader = new BufferedBinaryReader(ms))\r\n                {\r\n                    for (int i = 0; i < expectedResults.Length; i++)\r\n                    {\r\n                        observedResults[i] = TranscriptRegion.Read(reader);\r\n                    }\r\n                }\r\n            }\r\n\r\n            var comparer = new TranscriptRegionComparer();\r\n            Assert.Equal(expectedResults, observedResults, comparer);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/VariantAnnotation/Caches/DataStructures/TranscriptTests.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Text;\r\nusing Genome;\r\nusing Intervals;\r\nusing IO;\r\nusing UnitTests.TestUtilities;\r\nusing VariantAnnotation.AnnotatedPositions.Transcript;\r\nusing VariantAnnotation.Caches.DataStructures;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.Caches.DataStructures\r\n{\r\n    public sealed class TranscriptTests\r\n    {\r\n        [Fact]\r\n        public void Transcript_EndToEnd()\r\n        {\r\n            Chromosome expectedChromosome      = ChromosomeUtilities.Chr1;\r\n            const int expectedStart             = int.MaxValue;\r\n            const int expectedEnd               = int.MinValue;\r\n            const string expectedId             = \"ENST00000540021\";\r\n            const byte expectedVersion          = 7;\r\n            const BioType expectedBioType       = BioType.IG_J_pseudogene;\r\n            const bool expectedCanonical        = true;\r\n            const Source expectedSource         = Source.BothRefSeqAndEnsembl;\r\n            const bool expectedCdsStartNotFound = true;\r\n            const bool expectedCdsEndNotFound   = true;\r\n\r\n            var expectedIdAndVersion = expectedId + \".\" + expectedVersion;\r\n\r\n            ICodingRegion expectedCodingRegion = new CodingRegion(10001, 10200, 1, 200, 200);\r\n            ITranscriptRegion[] expectedTranscriptRegions = GetTranscriptRegions();\r\n            const byte expectedNumExons = 3;\r\n\r\n            const int expectedTotalExonLength             = 300;\r\n            const byte expectedStartExonPhase             = 3;\r\n            const int expectedSiftIndex                   = 11;\r\n            const int expectedPolyPhenIndex               = 13;\r\n\r\n            IInterval[] expectedMicroRnas = GetMicroRnas();\r\n\r\n            ITranslation expectedTranslation = new Translation(expectedCodingRegion, CompactId.Convert(\"ENSP00000446475\", 17), \"VEIDSD\");\r\n\r\n            IGene expectedGene = new Gene(expectedChromosome, 100, 200, true, \"TP53\", 300, CompactId.Convert(\"7157\"),\r\n                CompactId.Convert(\"ENSG00000141510\"));\r\n\r\n            var genes = new IGene[1];\r\n            genes[0] = expectedGene;\r\n\r\n            var peptideSeqs = new string[1];\r\n            peptideSeqs[0] = expectedTranslation.PeptideSeq;\r\n\r\n            var geneIndices             = CreateIndices(genes);\r\n            var transcriptRegionIndices = CreateIndices(expectedTranscriptRegions);\r\n            var microRnaIndices         = CreateIndices(expectedMicroRnas);\r\n            var peptideIndices          = CreateIndices(peptideSeqs);\r\n\r\n            var indexToChromosome = new Dictionary<ushort, Chromosome>\r\n            {\r\n                [expectedChromosome.Index] = expectedChromosome\r\n            };\r\n\r\n            // ReSharper disable ConditionIsAlwaysTrueOrFalse\r\n            var transcript = new Transcript(expectedChromosome, expectedStart, expectedEnd,\r\n                CompactId.Convert(expectedId, expectedVersion), expectedTranslation, expectedBioType, expectedGene,\r\n                expectedTotalExonLength, expectedStartExonPhase, expectedCanonical, expectedTranscriptRegions,\r\n                expectedNumExons, expectedMicroRnas, expectedSiftIndex, expectedPolyPhenIndex,\r\n                expectedSource, expectedCdsStartNotFound, expectedCdsEndNotFound, null, null);\r\n            // ReSharper restore ConditionIsAlwaysTrueOrFalse\r\n\r\n            ITranscript observedTranscript;\r\n\r\n            using (var ms = new MemoryStream())\r\n            {\r\n                using (var writer = new ExtendedBinaryWriter(ms, Encoding.UTF8, true))\r\n                {\r\n                    transcript.Write(writer, geneIndices, transcriptRegionIndices, microRnaIndices, peptideIndices);\r\n                }\r\n\r\n                ms.Position = 0;\r\n\r\n                using (var reader = new BufferedBinaryReader(ms))\r\n                {\r\n                    observedTranscript = Transcript.Read(reader, indexToChromosome, genes, expectedTranscriptRegions, expectedMicroRnas, peptideSeqs);\r\n                }\r\n            }\r\n\r\n            Assert.NotNull(observedTranscript);\r\n            Assert.Equal(expectedStart,           observedTranscript.Start);\r\n            Assert.Equal(expectedEnd,             observedTranscript.End);\r\n            Assert.Equal(expectedIdAndVersion,    observedTranscript.Id.WithVersion);\r\n            Assert.Equal(expectedBioType,         observedTranscript.BioType);\r\n            Assert.Equal(expectedCanonical,       observedTranscript.IsCanonical);\r\n            Assert.Equal(expectedSource,          observedTranscript.Source);\r\n            Assert.Equal(expectedTotalExonLength, observedTranscript.TotalExonLength);\r\n            Assert.Equal(expectedStartExonPhase,  observedTranscript.StartExonPhase);\r\n            Assert.Equal(expectedSiftIndex,       observedTranscript.SiftIndex);\r\n            Assert.Equal(expectedPolyPhenIndex,   observedTranscript.PolyPhenIndex);\r\n\r\n            Assert.Equal(expectedChromosome.Index,         observedTranscript.Chromosome.Index);\r\n            Assert.Equal(expectedGene.Symbol,              observedTranscript.Gene.Symbol);\r\n            Assert.Equal(expectedTranslation.PeptideSeq,   observedTranscript.Translation.PeptideSeq);\r\n            Assert.Equal(expectedTranscriptRegions.Length, observedTranscript.TranscriptRegions.Length);\r\n            Assert.Equal(expectedMicroRnas.Length,         observedTranscript.MicroRnas.Length);\r\n        }\r\n\r\n        private static Dictionary<T, int> CreateIndices<T>(T[] objects)\r\n        {\r\n            var indexDict = new Dictionary<T, int>();\r\n            for (int i = 0; i < objects.Length; i++) indexDict[objects[i]] = i;\r\n            return indexDict;\r\n        }\r\n\r\n        private static ITranscriptRegion[] GetTranscriptRegions()\r\n        {\r\n            var regions = new ITranscriptRegion[5];\r\n            regions[0] = new TranscriptRegion(TranscriptRegionType.Exon, 1, 100, 199, 300, 399);\r\n            regions[1] = new TranscriptRegion(TranscriptRegionType.Intron, 1, 200, 299, 400, 499);\r\n            regions[2] = new TranscriptRegion(TranscriptRegionType.Exon, 2, 300, 399, 500, 599);\r\n            regions[3] = new TranscriptRegion(TranscriptRegionType.Intron, 2, 400, 499, 600, 699);\r\n            regions[4] = new TranscriptRegion(TranscriptRegionType.Exon, 3, 500, 599, 700, 799);\r\n            return regions;\r\n        }\r\n\r\n        private static IInterval[] GetMicroRnas()\r\n        {\r\n            var introns = new IInterval[1];\r\n            introns[0] = new Interval(100, 200);\r\n            return introns;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/VariantAnnotation/Caches/DataStructures/TranslationTests.cs",
    "content": "﻿using System.IO;\r\nusing System.Text;\r\nusing IO;\r\nusing VariantAnnotation.AnnotatedPositions.Transcript;\r\nusing VariantAnnotation.Caches.DataStructures;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.Caches.DataStructures\r\n{\r\n    public sealed class TranslationTests\r\n    {\r\n        [Fact]\r\n        public void Translation_EndToEnd()\r\n        {\r\n            ICodingRegion expectedCodingRegion = new CodingRegion(100, 200, 300, 400, 101);\r\n            const string expectedProteinId         = \"ENSP00000446475.7\";\r\n            const string expectedPeptideSeq        = \"VEIDSD\";\r\n\r\n            string[] peptideSeqs = { expectedPeptideSeq };\r\n\r\n            ITranslation expectedTranslation =\r\n                new Translation(expectedCodingRegion, CompactId.Convert(expectedProteinId, 7),\r\n                    expectedPeptideSeq);\r\n\r\n            ITranslation observedTranslation;\r\n\r\n            using (var ms = new MemoryStream())\r\n            {\r\n                using (var writer = new ExtendedBinaryWriter(ms, Encoding.UTF8, true))\r\n                {\r\n                    expectedTranslation.Write(writer, 0);\r\n                }\r\n\r\n                ms.Position = 0;\r\n\r\n                using (var reader = new BufferedBinaryReader(ms))\r\n                {\r\n                    observedTranslation = Translation.Read(reader, peptideSeqs);\r\n                }\r\n            }\r\n\r\n            Assert.NotNull(observedTranslation);\r\n            Assert.Equal(expectedCodingRegion.CdnaStart, observedTranslation.CodingRegion.CdnaStart);\r\n            Assert.Equal(expectedProteinId,              observedTranslation.ProteinId.WithVersion);\r\n            Assert.Equal(expectedPeptideSeq,             observedTranslation.PeptideSeq);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/VariantAnnotation/Caches/TranscriptCacheTests.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.Linq;\r\nusing CacheUtils.TranscriptCache;\r\nusing Genome;\r\nusing UnitTests.TestUtilities;\r\nusing VariantAnnotation.AnnotatedPositions.Transcript;\r\nusing VariantAnnotation.Caches;\r\nusing VariantAnnotation.Caches.DataStructures;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.Interface.Caches;\r\nusing VariantAnnotation.Interface.Providers;\r\nusing VariantAnnotation.Providers;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.Caches\r\n{\r\n    public sealed class TranscriptCacheTests\r\n    {\r\n        private readonly ITranscriptCache _cache;\r\n        private readonly IEnumerable<IDataSourceVersion> _expectedDataSourceVersions;\r\n        private const GenomeAssembly ExpectedAssembly = GenomeAssembly.hg19;\r\n\r\n        public TranscriptCacheTests()\r\n        {\r\n            _expectedDataSourceVersions        = GetDataSourceVersions();\r\n            var transcriptIntervalArrays       = GetTranscripts().ToIntervalArrays(11);\r\n            var regulatoryRegionIntervalArrays = GetRegulatoryRegions().ToIntervalArrays(11);\r\n\r\n            _cache = new TranscriptCache(_expectedDataSourceVersions, ExpectedAssembly, transcriptIntervalArrays,\r\n                regulatoryRegionIntervalArrays);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetOverlappingFlankingTranscripts_TwoOverlaps()\r\n        {\r\n            var interval = new ChromosomeInterval(ChromosomeUtilities.Chr1, 100, 200);\r\n            ITranscript[] overlappingTranscripts = _cache.TranscriptIntervalForest.GetAllFlankingValues(interval);\r\n\r\n            Assert.NotNull(overlappingTranscripts);\r\n            Assert.Equal(2, overlappingTranscripts.Length);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetOverlappingFlankingTranscripts_NoOverlaps()\r\n        {\r\n            var interval = new ChromosomeInterval(ChromosomeUtilities.Chr11, 5000, 5001);\r\n            ITranscript[] overlappingTranscripts = _cache.TranscriptIntervalForest.GetAllFlankingValues(interval);\r\n\r\n            Assert.Null(overlappingTranscripts);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetOverlappingRegulatoryRegions_OneOverlap()\r\n        {\r\n            var overlappingRegulatoryRegions =\r\n                _cache.RegulatoryIntervalForest.GetAllOverlappingValues(ChromosomeUtilities.Chr1.Index, 100, 200);\r\n\r\n            Assert.NotNull(overlappingRegulatoryRegions);\r\n            Assert.Single(overlappingRegulatoryRegions);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetOverlappingRegulatoryRegions_NoOverlaps()\r\n        {\r\n            var overlappingRegulatoryRegions =\r\n                _cache.RegulatoryIntervalForest.GetAllOverlappingValues(ChromosomeUtilities.Chr1.Index, 5000, 5001);\r\n\r\n            Assert.Null(overlappingRegulatoryRegions);\r\n        }\r\n\r\n        [Fact]\r\n        public void Assembly_Get()\r\n        {\r\n            var observedAssembly = _cache.Assembly;\r\n            Assert.Equal(ExpectedAssembly, observedAssembly);\r\n        }\r\n\r\n        [Fact]\r\n        public void DataSourceVersions_Get()\r\n        {\r\n            var observedDataSourceVersions = _cache.DataSourceVersions.ToArray();\r\n            Assert.Single(observedDataSourceVersions);\r\n\r\n            var expectedDataSourceVersion = _expectedDataSourceVersions.ToArray()[0];\r\n            var observedDataSourceVersion = observedDataSourceVersions[0];\r\n            Assert.Equal(expectedDataSourceVersion.Name, observedDataSourceVersion.Name);\r\n        }\r\n\r\n        [Fact]\r\n        private IEnumerable<IDataSourceVersion> GetDataSourceVersions()\r\n        {\r\n            return new List<IDataSourceVersion>\r\n            {\r\n                new DataSourceVersion(\"VEP\", \"87\", DateTime.Now.Ticks, Source.BothRefSeqAndEnsembl.ToString())\r\n            };\r\n        }\r\n\r\n        private static IRegulatoryRegion[] GetRegulatoryRegions()\r\n        {\r\n            var regulatoryRegions = new IRegulatoryRegion[3];\r\n\r\n            regulatoryRegions[0] = new RegulatoryRegion(ChromosomeUtilities.Chr11, 11000, 12000, CompactId.Empty,\r\n                RegulatoryRegionType.promoter);\r\n\r\n            regulatoryRegions[1] = new RegulatoryRegion(ChromosomeUtilities.Chr1, 120, 180, CompactId.Empty,\r\n                RegulatoryRegionType.promoter);\r\n\r\n            regulatoryRegions[2] = new RegulatoryRegion(ChromosomeUtilities.Chr1, 300, 320, CompactId.Empty,\r\n                RegulatoryRegionType.promoter);\r\n\r\n            return regulatoryRegions;\r\n        }\r\n\r\n        internal static ITranscript[] GetTranscripts()\r\n        {\r\n            return new ITranscript[]\r\n            {\r\n                new Transcript(ChromosomeUtilities.Chr11, 11000, 12000, CompactId.Empty, null, BioType.other, null, 0, 0,\r\n                    false, null, 0, null, 0, 0, Source.None, false, false, null, null),\r\n                new Transcript(ChromosomeUtilities.Chr1, 120, 180, CompactId.Empty, null, BioType.other, null, 0, 0,\r\n                    false, null, 0, null, 0, 0, Source.None, false, false, null, null),\r\n                new Transcript(ChromosomeUtilities.Chr1, 300, 320, CompactId.Empty, null, BioType.other, null, 0, 0,\r\n                    false, null, 0, null, 0, 0, Source.None, false, false, null, null)\r\n            };\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/VariantAnnotation/Caches/Utilities/RnaEditUtilitiesTests.cs",
    "content": "﻿using VariantAnnotation.Caches.DataStructures;\nusing VariantAnnotation.Caches.Utilities;\nusing Variants;\nusing Xunit;\n\nnamespace UnitTests.VariantAnnotation.Caches.Utilities\n{\n    public sealed class RnaEditUtilitiesTests\n    {\n        [Theory]\n        [InlineData(100, 100, \"G\", VariantType.SNV)]\n        [InlineData(100, 101, \"GT\", VariantType.MNV)]\n        [InlineData(101, 100, \"GCTA\", VariantType.insertion)]\n        [InlineData(100, 100, \"\", VariantType.deletion)]\n        [InlineData(100, 101, null, VariantType.deletion)]\n        public void RnaEditTypes(int start, int end, string bases, VariantType expectedType)\n        {\n            var rnaEdit = new RnaEdit(start, end, bases);\n\n            Assert.Equal(expectedType, RnaEditUtilities.GetRnaEditType(rnaEdit));\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/VariantAnnotation/Caches/Utilities/TranscriptUtilitiesTests.cs",
    "content": "﻿using VariantAnnotation.Caches.DataStructures;\r\nusing VariantAnnotation.Caches.Utilities;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.Caches.Utilities\r\n{\r\n    public sealed class TranscriptUtilitiesTests\r\n    {\r\n        private readonly ITranscriptRegion[] _transcriptRegions;\r\n\r\n        public TranscriptUtilitiesTests()\r\n        {\r\n            _transcriptRegions = GetTranscriptRegions();\r\n        }\r\n\r\n        [Fact]\r\n        public void GetTotalExonLength_MultipleExons()\r\n        {\r\n            const int expectedLength = 300;\r\n            int observedLength = ExonUtilities.GetTotalExonLength(_transcriptRegions);\r\n            Assert.Equal(expectedLength, observedLength);\r\n        }\r\n\r\n        private static ITranscriptRegion[] GetTranscriptRegions()\r\n        {\r\n            return new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 100, 199, 0, 99),\r\n                new TranscriptRegion(TranscriptRegionType.Gap, 0, 200, 299, 99, 100),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 300, 399, 100, 199),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 1, 400, 499, 199, 200),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 2, 500, 599, 200, 299)\r\n            };\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/VariantAnnotation/GeneFusions/Calling/GeneFusionCallerTests.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.Text;\nusing CacheUtils.TranscriptCache;\nusing Genome;\nusing Intervals;\nusing UnitTests.MockedData;\nusing UnitTests.TestUtilities;\nusing VariantAnnotation.AnnotatedPositions;\nusing VariantAnnotation.AnnotatedPositions.Transcript;\nusing VariantAnnotation.Caches.DataStructures;\nusing VariantAnnotation.GeneFusions.Calling;\nusing VariantAnnotation.Interface.AnnotatedPositions;\nusing VariantAnnotation.Pools;\nusing Variants;\nusing Xunit;\n\nnamespace UnitTests.VariantAnnotation.GeneFusions.Calling\n{\n    public sealed class GeneFusionCallerTests\n    {\n        private readonly ITranscript[] _forwardTranscripts          = {Transcripts.ENST00000370673};\n        private readonly ITranscript[] _forwardNonCodingTranscripts = {Transcripts.ENST00000427819};\n        private readonly ITranscript[] _reverseTranscripts          = {Transcripts.ENST00000615053};\n\n        [Fact]\n        public void AddGeneFusionsToDictionary_ForwardFirst5PrimeUtr_ReverseFirstCds_ActualFusion()\n        {\n            var origin    = new BreakPoint(ChromosomeUtilities.Chr1, 84298366,  false);\n            var partner   = new BreakPoint(ChromosomeUtilities.Chr2, 130509235, true);\n            var adjacency = new BreakEndAdjacency(origin, partner);\n\n            var transcriptIdToGeneFusions = new Dictionary<string, IAnnotatedGeneFusion[]>();\n            GeneFusionCaller.AddGeneFusionsToDictionary(transcriptIdToGeneFusions, adjacency, _forwardTranscripts, _reverseTranscripts, false);\n\n            IAnnotatedGeneFusion[] actualGeneFusions = transcriptIdToGeneFusions[\"ENST00000370673.7\"];\n            Assert.Single(actualGeneFusions);\n\n            IAnnotatedGeneFusion geneFusion = actualGeneFusions[0];\n            Assert.Equal(Transcripts.ENST00000615053.Id.WithVersion, geneFusion.transcript.Id.WithVersion);\n            Assert.Equal(1,                                          geneFusion.exon);\n            Assert.Null(geneFusion.intron);\n            Assert.Equal(\"ENST00000370673.7(SAMD13):r.?_-192::ENST00000615053.3(POTEI):r.1_?\", geneFusion.hgvsr);\n        }\n\n        [Fact]\n        public void AddGeneFusionsToDictionary_ForwardLast5PrimeUtr_ReverseIntronCds_ActualFusion()\n        {\n            var origin    = new BreakPoint(ChromosomeUtilities.Chr1, 84298557,  false);\n            var partner   = new BreakPoint(ChromosomeUtilities.Chr2, 130508713, true);\n            var adjacency = new BreakEndAdjacency(origin, partner);\n\n            var transcriptIdToGeneFusions = new Dictionary<string, IAnnotatedGeneFusion[]>();\n            GeneFusionCaller.AddGeneFusionsToDictionary(transcriptIdToGeneFusions, adjacency, _forwardTranscripts, _reverseTranscripts, false);\n\n            IAnnotatedGeneFusion[] actualGeneFusions = transcriptIdToGeneFusions[\"ENST00000370673.7\"];\n            Assert.Single(actualGeneFusions);\n\n            IAnnotatedGeneFusion geneFusion = actualGeneFusions[0];\n            Assert.Equal(Transcripts.ENST00000615053.Id.WithVersion, geneFusion.transcript.Id.WithVersion);\n            Assert.Null(geneFusion.exon);\n            Assert.Equal(1,                                                                      geneFusion.intron);\n            Assert.Equal(\"ENST00000370673.7(SAMD13):r.?_-1::ENST00000615053.3(POTEI):r.521+2_?\", geneFusion.hgvsr);\n        }\n\n        [Fact]\n        public void AddGeneFusionsToDictionary_ForwardFirstCds_ReverseFirst3PrimeUtr_ActualFusion()\n        {\n            var origin    = new BreakPoint(ChromosomeUtilities.Chr1, 84298558,  false);\n            var partner   = new BreakPoint(ChromosomeUtilities.Chr2, 130465652, true);\n            var adjacency = new BreakEndAdjacency(origin, partner);\n\n            var transcriptIdToGeneFusions = new Dictionary<string, IAnnotatedGeneFusion[]>();\n            GeneFusionCaller.AddGeneFusionsToDictionary(transcriptIdToGeneFusions, adjacency, _forwardTranscripts, _reverseTranscripts, false);\n\n            IAnnotatedGeneFusion[] actualGeneFusions = transcriptIdToGeneFusions[\"ENST00000370673.7\"];\n            Assert.Single(actualGeneFusions);\n\n            IAnnotatedGeneFusion geneFusion = actualGeneFusions[0];\n            Assert.Equal(Transcripts.ENST00000615053.Id.WithVersion, geneFusion.transcript.Id.WithVersion);\n            Assert.Equal(12,                                         geneFusion.exon);\n            Assert.Null(geneFusion.intron);\n            Assert.Equal(\"ENST00000370673.7(SAMD13):r.?_1::ENST00000615053.3(POTEI):r.*1_?\", geneFusion.hgvsr);\n        }\n\n        [Fact]\n        public void AddGeneFusionsToDictionary_ForwardIntronCds_ReverseLastCds_ActualFusion()\n        {\n            var origin    = new BreakPoint(ChromosomeUtilities.Chr1, 84298569,  false);\n            var partner   = new BreakPoint(ChromosomeUtilities.Chr2, 130465653, true);\n            var adjacency = new BreakEndAdjacency(origin, partner);\n\n            var transcriptIdToGeneFusions = new Dictionary<string, IAnnotatedGeneFusion[]>();\n            GeneFusionCaller.AddGeneFusionsToDictionary(transcriptIdToGeneFusions, adjacency, _forwardTranscripts, _reverseTranscripts, false);\n\n            IAnnotatedGeneFusion[] actualGeneFusions = transcriptIdToGeneFusions[\"ENST00000370673.7\"];\n            Assert.Single(actualGeneFusions);\n\n            IAnnotatedGeneFusion geneFusion = actualGeneFusions[0];\n            Assert.Equal(Transcripts.ENST00000615053.Id.WithVersion, geneFusion.transcript.Id.WithVersion);\n            Assert.Equal(12,                                         geneFusion.exon);\n            Assert.Null(geneFusion.intron);\n            Assert.Equal(\"ENST00000370673.7(SAMD13):r.?_10+2::ENST00000615053.3(POTEI):r.1527_?\", geneFusion.hgvsr);\n        }\n\n        [Fact]\n        public void AddGeneFusionsToDictionary_ForwardLastCds_ReverseLast3PrimeUtr_ActualFusion()\n        {\n            var origin    = new BreakPoint(ChromosomeUtilities.Chr1, 84349774,  false);\n            var partner   = new BreakPoint(ChromosomeUtilities.Chr2, 130463799, true);\n            var adjacency = new BreakEndAdjacency(origin, partner);\n\n            var transcriptIdToGeneFusions = new Dictionary<string, IAnnotatedGeneFusion[]>();\n            GeneFusionCaller.AddGeneFusionsToDictionary(transcriptIdToGeneFusions, adjacency, _forwardTranscripts, _reverseTranscripts, false);\n\n            IAnnotatedGeneFusion[] actualGeneFusions = transcriptIdToGeneFusions[\"ENST00000370673.7\"];\n            Assert.Single(actualGeneFusions);\n\n            IAnnotatedGeneFusion geneFusion = actualGeneFusions[0];\n            Assert.Equal(Transcripts.ENST00000615053.Id.WithVersion, geneFusion.transcript.Id.WithVersion);\n            Assert.Equal(13,                                         geneFusion.exon);\n            Assert.Null(geneFusion.intron);\n            Assert.Equal(\"ENST00000370673.7(SAMD13):r.?_351::ENST00000615053.3(POTEI):r.*347_?\", geneFusion.hgvsr);\n        }\n\n        [Fact]\n        public void AddGeneFusionsToDictionary_ForwardFirst3PrimeUtr_ReverseFirst5PrimeUtr_ActualFusion()\n        {\n            var origin    = new BreakPoint(ChromosomeUtilities.Chr1, 84349775,  false);\n            var partner   = new BreakPoint(ChromosomeUtilities.Chr2, 130509287, true);\n            var adjacency = new BreakEndAdjacency(origin, partner);\n\n            var transcriptIdToGeneFusions = new Dictionary<string, IAnnotatedGeneFusion[]>();\n            GeneFusionCaller.AddGeneFusionsToDictionary(transcriptIdToGeneFusions, adjacency, _forwardTranscripts, _reverseTranscripts, false);\n\n            IAnnotatedGeneFusion[] actualGeneFusions = transcriptIdToGeneFusions[\"ENST00000370673.7\"];\n            Assert.Single(actualGeneFusions);\n\n            IAnnotatedGeneFusion geneFusion = actualGeneFusions[0];\n            Assert.Equal(Transcripts.ENST00000615053.Id.WithVersion, geneFusion.transcript.Id.WithVersion);\n            Assert.Equal(1,                                          geneFusion.exon);\n            Assert.Null(geneFusion.intron);\n            Assert.Equal(\"ENST00000370673.7(SAMD13):r.?_*1::ENST00000615053.3(POTEI):r.-52_?\", geneFusion.hgvsr);\n        }\n\n        [Fact]\n        public void AddGeneFusionsToDictionary_ForwardLast3PrimeUtr_ReverseLast5PrimeUtr_ActualFusion()\n        {\n            var origin    = new BreakPoint(ChromosomeUtilities.Chr1, 84350798,  false);\n            var partner   = new BreakPoint(ChromosomeUtilities.Chr2, 130509236, true);\n            var adjacency = new BreakEndAdjacency(origin, partner);\n\n            var transcriptIdToGeneFusions = new Dictionary<string, IAnnotatedGeneFusion[]>();\n            GeneFusionCaller.AddGeneFusionsToDictionary(transcriptIdToGeneFusions, adjacency, _forwardTranscripts, _reverseTranscripts, false);\n\n            IAnnotatedGeneFusion[] actualGeneFusions = transcriptIdToGeneFusions[\"ENST00000370673.7\"];\n            Assert.Single(actualGeneFusions);\n\n            IAnnotatedGeneFusion geneFusion = actualGeneFusions[0];\n            Assert.Equal(Transcripts.ENST00000615053.Id.WithVersion, geneFusion.transcript.Id.WithVersion);\n            Assert.Equal(1,                                          geneFusion.exon);\n            Assert.Null(geneFusion.intron);\n            Assert.Equal(\"ENST00000370673.7(SAMD13):r.?_*1024::ENST00000615053.3(POTEI):r.-1_?\", geneFusion.hgvsr);\n        }\n\n        [Fact]\n        public void AddGeneFusionsToDictionary_ReverseFirst5PrimeUtr_ForwardLastCds_ActualFusion()\n        {\n            var origin    = new BreakPoint(ChromosomeUtilities.Chr2, 130509287, false);\n            var partner   = new BreakPoint(ChromosomeUtilities.Chr1, 84349774,  true);\n            var adjacency = new BreakEndAdjacency(origin, partner);\n\n            var transcriptIdToGeneFusions = new Dictionary<string, IAnnotatedGeneFusion[]>();\n            GeneFusionCaller.AddGeneFusionsToDictionary(transcriptIdToGeneFusions, adjacency, _reverseTranscripts, _forwardTranscripts, false);\n\n            IAnnotatedGeneFusion[] actualGeneFusions = transcriptIdToGeneFusions[\"ENST00000615053.3\"];\n            Assert.Single(actualGeneFusions);\n\n            IAnnotatedGeneFusion geneFusion = actualGeneFusions[0];\n            Assert.Equal(Transcripts.ENST00000370673.Id.WithVersion, geneFusion.transcript.Id.WithVersion);\n            Assert.Equal(4,                                          geneFusion.exon);\n            Assert.Null(geneFusion.intron);\n            Assert.Equal(\"ENST00000370673.7(SAMD13):r.?_351::ENST00000615053.3(POTEI):r.-52_?\", geneFusion.hgvsr);\n        }\n\n        [Fact]\n        public void AddGeneFusionsToDictionary_ReverseLast5PrimeUtr_ForwardFirstCds_ActualFusion()\n        {\n            var origin    = new BreakPoint(ChromosomeUtilities.Chr2, 130509236, false);\n            var partner   = new BreakPoint(ChromosomeUtilities.Chr1, 84298558,  true);\n            var adjacency = new BreakEndAdjacency(origin, partner);\n\n            var transcriptIdToGeneFusions = new Dictionary<string, IAnnotatedGeneFusion[]>();\n            GeneFusionCaller.AddGeneFusionsToDictionary(transcriptIdToGeneFusions, adjacency, _reverseTranscripts, _forwardTranscripts, false);\n\n            IAnnotatedGeneFusion[] actualGeneFusions = transcriptIdToGeneFusions[\"ENST00000615053.3\"];\n            Assert.Single(actualGeneFusions);\n\n            IAnnotatedGeneFusion geneFusion = actualGeneFusions[0];\n            Assert.Equal(Transcripts.ENST00000370673.Id.WithVersion, geneFusion.transcript.Id.WithVersion);\n            Assert.Equal(1,                                          geneFusion.exon);\n            Assert.Null(geneFusion.intron);\n            Assert.Equal(\"ENST00000370673.7(SAMD13):r.?_1::ENST00000615053.3(POTEI):r.-1_?\", geneFusion.hgvsr);\n        }\n\n        [Fact]\n        public void AddGeneFusionsToDictionary_ReverseFirstCds_ForwardFirst3PrimeUtr_ActualFusion()\n        {\n            var origin    = new BreakPoint(ChromosomeUtilities.Chr2, 130509235, false);\n            var partner   = new BreakPoint(ChromosomeUtilities.Chr1, 84349775,  true);\n            var adjacency = new BreakEndAdjacency(origin, partner);\n\n            var transcriptIdToGeneFusions = new Dictionary<string, IAnnotatedGeneFusion[]>();\n            GeneFusionCaller.AddGeneFusionsToDictionary(transcriptIdToGeneFusions, adjacency, _reverseTranscripts, _forwardTranscripts, false);\n\n            IAnnotatedGeneFusion[] actualGeneFusions = transcriptIdToGeneFusions[\"ENST00000615053.3\"];\n            Assert.Single(actualGeneFusions);\n\n            IAnnotatedGeneFusion geneFusion = actualGeneFusions[0];\n            Assert.Equal(Transcripts.ENST00000370673.Id.WithVersion, geneFusion.transcript.Id.WithVersion);\n            Assert.Equal(4,                                          geneFusion.exon);\n            Assert.Null(geneFusion.intron);\n            Assert.Equal(\"ENST00000370673.7(SAMD13):r.?_*1::ENST00000615053.3(POTEI):r.1_?\", geneFusion.hgvsr);\n        }\n\n        [Fact]\n        public void AddGeneFusionsToDictionary_ReverseLastCds_ForwardLast3PrimeUtr_ActualFusion()\n        {\n            var origin    = new BreakPoint(ChromosomeUtilities.Chr2, 130465653, false);\n            var partner   = new BreakPoint(ChromosomeUtilities.Chr1, 84350798,  true);\n            var adjacency = new BreakEndAdjacency(origin, partner);\n\n            var transcriptIdToGeneFusions = new Dictionary<string, IAnnotatedGeneFusion[]>();\n            GeneFusionCaller.AddGeneFusionsToDictionary(transcriptIdToGeneFusions, adjacency, _reverseTranscripts, _forwardTranscripts, false);\n\n            IAnnotatedGeneFusion[] actualGeneFusions = transcriptIdToGeneFusions[\"ENST00000615053.3\"];\n            Assert.Single(actualGeneFusions);\n\n            IAnnotatedGeneFusion geneFusion = actualGeneFusions[0];\n            Assert.Equal(Transcripts.ENST00000370673.Id.WithVersion, geneFusion.transcript.Id.WithVersion);\n            Assert.Equal(4,                                          geneFusion.exon);\n            Assert.Null(geneFusion.intron);\n            Assert.Equal(\"ENST00000370673.7(SAMD13):r.?_*1024::ENST00000615053.3(POTEI):r.1527_?\", geneFusion.hgvsr);\n        }\n\n        [Fact]\n        public void AddGeneFusionsToDictionary_ReverseFirst3PrimeUtr_ForwardFirst5PrimeUtr_ActualFusion()\n        {\n            var origin    = new BreakPoint(ChromosomeUtilities.Chr2, 130465652, false);\n            var partner   = new BreakPoint(ChromosomeUtilities.Chr1, 84298366,  true);\n            var adjacency = new BreakEndAdjacency(origin, partner);\n\n            var transcriptIdToGeneFusions = new Dictionary<string, IAnnotatedGeneFusion[]>();\n            GeneFusionCaller.AddGeneFusionsToDictionary(transcriptIdToGeneFusions, adjacency, _reverseTranscripts, _forwardTranscripts, false);\n\n            IAnnotatedGeneFusion[] actualGeneFusions = transcriptIdToGeneFusions[\"ENST00000615053.3\"];\n            Assert.Single(actualGeneFusions);\n\n            IAnnotatedGeneFusion geneFusion = actualGeneFusions[0];\n            Assert.Equal(Transcripts.ENST00000370673.Id.WithVersion, geneFusion.transcript.Id.WithVersion);\n            Assert.Equal(1,                                          geneFusion.exon);\n            Assert.Null(geneFusion.intron);\n            Assert.Equal(\"ENST00000370673.7(SAMD13):r.?_-192::ENST00000615053.3(POTEI):r.*1_?\", geneFusion.hgvsr);\n        }\n\n        [Fact]\n        public void AddGeneFusionsToDictionary_ReverseLast3PrimeUtr_ForwardLast5PrimeUtr_ActualFusion()\n        {\n            var origin    = new BreakPoint(ChromosomeUtilities.Chr2, 130463799, false);\n            var partner   = new BreakPoint(ChromosomeUtilities.Chr1, 84298557,  true);\n            var adjacency = new BreakEndAdjacency(origin, partner);\n\n            var transcriptIdToGeneFusions = new Dictionary<string, IAnnotatedGeneFusion[]>();\n            GeneFusionCaller.AddGeneFusionsToDictionary(transcriptIdToGeneFusions, adjacency, _reverseTranscripts, _forwardTranscripts, false);\n\n            IAnnotatedGeneFusion[] actualGeneFusions = transcriptIdToGeneFusions[\"ENST00000615053.3\"];\n            Assert.Single(actualGeneFusions);\n\n            IAnnotatedGeneFusion geneFusion = actualGeneFusions[0];\n            Assert.Equal(Transcripts.ENST00000370673.Id.WithVersion, geneFusion.transcript.Id.WithVersion);\n            Assert.Equal(1,                                          geneFusion.exon);\n            Assert.Null(geneFusion.intron);\n            Assert.Equal(\"ENST00000370673.7(SAMD13):r.?_-1::ENST00000615053.3(POTEI):r.*347_?\", geneFusion.hgvsr);\n        }\n\n        [Fact]\n        public void AddGeneFusionsToDictionary_ForwardNonCodingFirstCdna_ReverseFirstCds_ActualFusion()\n        {\n            var origin    = new BreakPoint(ChromosomeUtilities.Chr1, 85276715,  false);\n            var partner   = new BreakPoint(ChromosomeUtilities.Chr2, 130509235, true);\n            var adjacency = new BreakEndAdjacency(origin, partner);\n\n            var transcriptIdToGeneFusions = new Dictionary<string, IAnnotatedGeneFusion[]>();\n            GeneFusionCaller.AddGeneFusionsToDictionary(transcriptIdToGeneFusions, adjacency, _forwardNonCodingTranscripts, _reverseTranscripts, false);\n\n            IAnnotatedGeneFusion[] actualGeneFusions = transcriptIdToGeneFusions[\"ENST00000427819.5\"];\n            Assert.Single(actualGeneFusions);\n\n            IAnnotatedGeneFusion geneFusion = actualGeneFusions[0];\n            Assert.Equal(Transcripts.ENST00000615053.Id.WithVersion, geneFusion.transcript.Id.WithVersion);\n            Assert.Equal(1,                                          geneFusion.exon);\n            Assert.Null(geneFusion.intron);\n            Assert.Equal(\"ENST00000427819.5(AL078459.1):r.?_1::ENST00000615053.3(POTEI):r.1_?\", geneFusion.hgvsr);\n        }\n        \n        [Fact]\n        public void AddGeneFusionsToDictionary_ForwardNonCodingLastCdna_ReverseLastCds_ActualFusion()\n        {\n            var origin    = new BreakPoint(ChromosomeUtilities.Chr1, 85399963,  false);\n            var partner   = new BreakPoint(ChromosomeUtilities.Chr2, 130465653, true);\n            var adjacency = new BreakEndAdjacency(origin, partner);\n\n            var transcriptIdToGeneFusions = new Dictionary<string, IAnnotatedGeneFusion[]>();\n            GeneFusionCaller.AddGeneFusionsToDictionary(transcriptIdToGeneFusions, adjacency, _forwardNonCodingTranscripts, _reverseTranscripts, false);\n\n            IAnnotatedGeneFusion[] actualGeneFusions = transcriptIdToGeneFusions[\"ENST00000427819.5\"];\n            Assert.Single(actualGeneFusions);\n\n            IAnnotatedGeneFusion geneFusion = actualGeneFusions[0];\n            Assert.Equal(Transcripts.ENST00000615053.Id.WithVersion, geneFusion.transcript.Id.WithVersion);\n            Assert.Equal(12,                                         geneFusion.exon);\n            Assert.Null(geneFusion.intron);\n            Assert.Equal(\"ENST00000427819.5(AL078459.1):r.?_1950::ENST00000615053.3(POTEI):r.1527_?\", geneFusion.hgvsr);\n        }\n        \n        [Fact]\n        public void AddGeneFusionsToDictionary_ForwardCds_ReverseCds_InFrame_ActualFusion()\n        {\n            var origin    = new BreakPoint(ChromosomeUtilities.Chr1, 84298558,  false);\n            var partner   = new BreakPoint(ChromosomeUtilities.Chr2, 130509234, true);\n            var adjacency = new BreakEndAdjacency(origin, partner);\n\n            var transcriptIdToGeneFusions = new Dictionary<string, IAnnotatedGeneFusion[]>();\n            GeneFusionCaller.AddGeneFusionsToDictionary(transcriptIdToGeneFusions, adjacency, _forwardTranscripts, _reverseTranscripts, false);\n\n            IAnnotatedGeneFusion[] actualGeneFusions = transcriptIdToGeneFusions[\"ENST00000370673.7\"];\n            Assert.Single(actualGeneFusions);\n\n            IAnnotatedGeneFusion geneFusion = actualGeneFusions[0];\n            Assert.Equal(Transcripts.ENST00000615053.Id.WithVersion, geneFusion.transcript.Id.WithVersion);\n            Assert.Equal(1,                                          geneFusion.exon);\n            Assert.Null(geneFusion.intron);\n            Assert.Equal(\"ENST00000370673.7(SAMD13):r.?_1::ENST00000615053.3(POTEI):r.2_?\", geneFusion.hgvsr);\n            Assert.True(geneFusion.isInFrame);\n        }\n\n        [Fact]\n        public void AddGeneFusionsToDictionary_ForwardCds_ReverseCds_Imprecise_NotInFrame_ActualFusion()\n        {\n            var origin    = new BreakPoint(ChromosomeUtilities.Chr1, 84298558,  false);\n            var partner   = new BreakPoint(ChromosomeUtilities.Chr2, 130509234, true);\n            var adjacency = new BreakEndAdjacency(origin, partner);\n\n            var transcriptIdToGeneFusions = new Dictionary<string, IAnnotatedGeneFusion[]>();\n            GeneFusionCaller.AddGeneFusionsToDictionary(transcriptIdToGeneFusions, adjacency, _forwardTranscripts, _reverseTranscripts, true);\n\n            IAnnotatedGeneFusion[] actualGeneFusions = transcriptIdToGeneFusions[\"ENST00000370673.7\"];\n            Assert.Single(actualGeneFusions);\n\n            IAnnotatedGeneFusion geneFusion = actualGeneFusions[0];\n            Assert.Equal(Transcripts.ENST00000615053.Id.WithVersion, geneFusion.transcript.Id.WithVersion);\n            Assert.Equal(1,                                          geneFusion.exon);\n            Assert.Null(geneFusion.intron);\n            Assert.Equal(\"ENST00000370673.7(SAMD13):r.?_1::ENST00000615053.3(POTEI):r.2_?\", geneFusion.hgvsr);\n            Assert.False(geneFusion.isInFrame);\n        }\n\n        [Fact]\n        public void FoundViableGeneFusion_ReturnTrue()\n        {\n            var adjacency = new BreakEndAdjacency(\n                new BreakPoint(ChromosomeUtilities.Chr1, 100, false),\n                new BreakPoint(ChromosomeUtilities.Chr2, 100, true));\n            \n            var originInterval  = new ChromosomeInterval(ChromosomeUtilities.Chr1, 100, 200);\n            var partnerInterval = new ChromosomeInterval(ChromosomeUtilities.Chr2, 100, 200);\n\n            bool actualResult = GeneFusionCaller.FoundViableGeneFusion(adjacency, Genes.SAMD13, originInterval, Source.Ensembl, Genes.POTEI,\n                partnerInterval, Source.Ensembl);\n            Assert.True(actualResult);\n        }\n        \n        [Fact]\n        public void FoundViableGeneFusion_AffectedByOriginAdjacency_ReturnTrue()\n        {\n            var adjacency = new BreakEndAdjacency(\n                new BreakPoint(ChromosomeUtilities.Chr7,  26241365, true),\n                new BreakPoint(ChromosomeUtilities.Chr15, 40854180, false));\n\n            var originInterval  = new ChromosomeInterval(ChromosomeUtilities.Chr7,  26240782, 26252976);\n            var partnerInterval = new ChromosomeInterval(ChromosomeUtilities.Chr15, 40820882, 40857210);\n\n            var originGene = new Gene(ChromosomeUtilities.Chr7, 26240782, 26253227, false, \"CBX3\", 1553, CompactId.Convert(\"11335\"),\n                CompactId.Convert(\"ENSG00000122565\"));\n            var partnerGene = new Gene(ChromosomeUtilities.Chr15, 40820882, 40857256, true, \"CCDC32\", 28295, CompactId.Convert(\"90416\"),\n                CompactId.Convert(\"ENSG00000128891\"));\n\n            bool actualResult = GeneFusionCaller.FoundViableGeneFusion(adjacency, originGene, originInterval, Source.Ensembl, partnerGene,\n                partnerInterval, Source.Ensembl);\n            Assert.True(actualResult);\n        }\n\n        [Fact]\n        public void FoundViableGeneFusion_SameGeneSymbol_ReturnFalse()\n        {\n            var adjacency = new BreakEndAdjacency(\n                new BreakPoint(ChromosomeUtilities.Chr1, 100, false),\n                new BreakPoint(ChromosomeUtilities.Chr2, 100, true));\n            \n            var originInterval  = new ChromosomeInterval(ChromosomeUtilities.Chr1, 100, 200);\n            var partnerInterval = new ChromosomeInterval(ChromosomeUtilities.Chr2, 100, 200);\n\n            bool actualResult = GeneFusionCaller.FoundViableGeneFusion(adjacency, Genes.SAMD13, originInterval, Source.Ensembl, Genes.SAMD13,\n                partnerInterval, Source.Ensembl);\n            Assert.False(actualResult);\n        }\n        \n        [Fact]\n        public void FoundViableGeneFusion_DifferentOriginOrientation_ReturnFalse()\n        {\n            var adjacency = new BreakEndAdjacency(\n                new BreakPoint(ChromosomeUtilities.Chr1, 100, true),\n                new BreakPoint(ChromosomeUtilities.Chr2, 100, true));\n            \n            var originInterval  = new ChromosomeInterval(ChromosomeUtilities.Chr1, 100, 200);\n            var partnerInterval = new ChromosomeInterval(ChromosomeUtilities.Chr2, 100, 200);\n\n            bool actualResult = GeneFusionCaller.FoundViableGeneFusion(adjacency, Genes.SAMD13, originInterval, Source.Ensembl, Genes.POTEI,\n                partnerInterval, Source.Ensembl);\n            Assert.False(actualResult);\n        }\n\n        [Fact]\n        public void FoundViableGeneFusion_DifferentPartnerOrientation_ReturnFalse()\n        {\n            var adjacency = new BreakEndAdjacency(\n                new BreakPoint(ChromosomeUtilities.Chr1, 100, false),\n                new BreakPoint(ChromosomeUtilities.Chr2, 100, false));\n            \n            var originInterval  = new ChromosomeInterval(ChromosomeUtilities.Chr1, 100, 200);\n            var partnerInterval = new ChromosomeInterval(ChromosomeUtilities.Chr2, 100, 200);\n\n            bool actualResult = GeneFusionCaller.FoundViableGeneFusion(adjacency, Genes.SAMD13, originInterval, Source.Ensembl, Genes.POTEI,\n                partnerInterval, Source.Ensembl);\n            Assert.False(actualResult);\n        }\n\n        [Fact]\n        public void FoundViableGeneFusion_DifferentTranscriptSource_ReturnFalse()\n        {\n            var adjacency = new BreakEndAdjacency(\n                new BreakPoint(ChromosomeUtilities.Chr1, 100, false),\n                new BreakPoint(ChromosomeUtilities.Chr2, 100, true));\n            \n            var originInterval  = new ChromosomeInterval(ChromosomeUtilities.Chr1, 100, 200);\n            var partnerInterval = new ChromosomeInterval(ChromosomeUtilities.Chr2, 100, 200);\n\n            bool actualResult = GeneFusionCaller.FoundViableGeneFusion(adjacency, Genes.SAMD13, originInterval, Source.RefSeq, Genes.POTEI,\n                partnerInterval, Source.Ensembl);\n            Assert.False(actualResult);\n        }\n\n        [Fact]\n        public void FoundViableGeneFusion_TranscriptsAlreadyOverlap_ReturnFalse()\n        {\n            var adjacency = new BreakEndAdjacency(\n                new BreakPoint(ChromosomeUtilities.Chr1, 100, false),\n                new BreakPoint(ChromosomeUtilities.Chr1, 100, true));\n            \n            var originInterval  = new ChromosomeInterval(ChromosomeUtilities.Chr1, 100, 200);\n            var partnerInterval = new ChromosomeInterval(ChromosomeUtilities.Chr1, 105, 205);\n\n            bool actualResult = GeneFusionCaller.FoundViableGeneFusion(adjacency, Genes.SAMD13, originInterval, Source.Ensembl, Genes.POTEI,\n                partnerInterval, Source.Ensembl);\n            Assert.False(actualResult);\n        }\n\n        private sealed class GetCodonPositionData : TheoryData<int, int, byte?>\n        {\n            public GetCodonPositionData()\n            {\n                Add(84298557, 0, null); // UTR\n                Add(84298558, 0, 1);\n                Add(84298559, 0, 2);\n                Add(84298560, 0, 3);\n                Add(84298561, 0, 1);\n                Add(84298562, 0, 2);\n                Add(84298563, 0, 3);\n                Add(84298568, 1, null); // intron\n            }\n        }\n\n        [Theory]\n        [ClassData(typeof(GetCodonPositionData))]\n        public void GetCodonPosition_Forward_ExpectedResults(int genomicPosition, int regionIndex, byte? expectedCodonPosition)\n        {\n            ITranscript transcript = Transcripts.ENST00000370673;\n            byte? actualCodonPosition = GeneFusionCaller.GetCodonPosition(transcript.TranscriptRegions[regionIndex], transcript.Translation,\n                transcript.StartExonPhase, transcript.Gene.OnReverseStrand, genomicPosition);\n            Assert.Equal(expectedCodonPosition, actualCodonPosition);\n        }\n\n        [Theory]\n        [InlineData(84298558, 130509234, true)]  // 1 -> 2\n        [InlineData(84298559, 130509233, true)]  // 2 -> 3\n        [InlineData(84298560, 130509232, true)]  // 3 -> 1\n        [InlineData(84298561, 130509231, true)]  // 1 -> 2\n        [InlineData(84298562, 130509230, true)]  // 2 -> 3\n        [InlineData(84298563, 130509229, true)]  // 3 -> 1\n        [InlineData(84298561, 130509227, false)] // 1 -> 3\n        [InlineData(84298562, 130509228, false)] // 2 -> 2\n        [InlineData(84298563, 130509225, false)] // 3 -> 2\n        [InlineData(84298564, 130509226, false)] // 1 -> 1\n        [InlineData(84298565, 130509223, false)] // 2 -> 1\n        [InlineData(84298566, 130509221, false)] // 3 -> 3\n        public void DetermineInFrameFusion_ExpectedResults(int firstGenomicPosition, int secondGenomicPosition, bool expectedResult)\n        {\n            var  first        = new BreakPointTranscript(Transcripts.ENST00000370673, firstGenomicPosition,  0);\n            var  second       = new BreakPointTranscript(Transcripts.ENST00000615053, secondGenomicPosition, 24);\n            bool actualResult = GeneFusionCaller.DetermineInFrameFusion(first, second);\n            Assert.Equal(expectedResult, actualResult);\n        }\n\n        [Fact]\n        public void GetGeneSymbols_SameChromosome()\n        {\n            IGene a = new Gene(ChromosomeUtilities.Chr1, 1000, 2000, false, \"A\", 0, CompactId.Empty, CompactId.Empty);\n            IGene b = new Gene(ChromosomeUtilities.Chr1, 900,  1900, false, \"B\", 0, CompactId.Empty, CompactId.Empty);\n\n            var expectedFirstGeneSymbol  = \"B\";\n            var expectedSecondGeneSymbol = \"A\";\n\n            (ulong _, string actualFirstGeneSymbol, uint _, string actualSecondGeneSymbol, uint _) =\n                GeneFusionCaller.GetGeneAndFusionKeys(a, b);\n\n            Assert.Equal(expectedFirstGeneSymbol,  actualFirstGeneSymbol);\n            Assert.Equal(expectedSecondGeneSymbol, actualSecondGeneSymbol);\n        }\n\n        [Fact]\n        public void GetGeneSymbols_DifferentChromosomes()\n        {\n            IGene a = new Gene(ChromosomeUtilities.Chr1, 1000, 2000, false, \"A\", 0, CompactId.Empty, CompactId.Empty);\n            IGene b = new Gene(ChromosomeUtilities.Chr3, 900,  1900, false, \"B\", 0, CompactId.Empty, CompactId.Empty);\n\n            var expectedFirstGeneSymbol  = \"A\";\n            var expectedSecondGeneSymbol = \"B\";\n            (ulong _, string actualFirstGeneSymbol, uint _, string actualSecondGeneSymbol, uint _) =\n                GeneFusionCaller.GetGeneAndFusionKeys(a, b);\n\n            Assert.Equal(expectedFirstGeneSymbol,  actualFirstGeneSymbol);\n            Assert.Equal(expectedSecondGeneSymbol, actualSecondGeneSymbol);\n        }\n\n        [Fact]\n        public void AddGeneFusions_ExpectedResults()\n        {\n            const string expectedConsequences = \"\\\"consequence\\\":[\\\"unidirectional_gene_fusion\\\"]\";\n            const string expectedGeneFusionJson =\n                \"\\\"geneFusions\\\":[{\\\"transcript\\\":\\\"ENST00000615053.3\\\",\\\"bioType\\\":\\\"protein_coding\\\",\\\"exon\\\":1,\\\"geneId\\\":\\\"ENSG00000196834\\\",\\\"hgnc\\\":\\\"POTEI\\\",\\\"hgvsr\\\":\\\"ENST00000370673.7(SAMD13):r.?_1::ENST00000615053.3(POTEI):r.2_?\\\",\\\"inFrame\\\":true}]}\";\n\n            IntervalForest<ITranscript> transcriptIntervalForest = GetTranscriptIntervalForest();\n            IAnnotatedVariant[]         annotatedVariants        = GetAnnotatedVariants();\n\n            var geneFusionCaller = new GeneFusionCaller(ChromosomeUtilities.RefNameToChromosome, transcriptIntervalForest);\n            geneFusionCaller.AddGeneFusions(annotatedVariants, false, false, false);\n\n            IAnnotatedVariant annotatedVariant = annotatedVariants[0];\n\n            var sb = new StringBuilder();\n            annotatedVariant.Transcripts[0].SerializeJson(sb);\n            var json = sb.ToString();\n            \n            VariantPool.Return((Variant)annotatedVariant.Variant);\n            AnnotatedTranscriptPool.Return((AnnotatedTranscript) annotatedVariant.Transcripts[0]);\n            AnnotatedVariantPool.Return((AnnotatedVariant)annotatedVariant);\n\n            Assert.Contains(expectedConsequences,   json);\n            Assert.Contains(expectedGeneFusionJson, json);\n        }\n\n        private IAnnotatedVariant[] GetAnnotatedVariants()\n        {\n            var variant = VariantPool.Get(ChromosomeUtilities.Chr1, 84298558, 84298558, \"A\", \"A]chr2:130509234]\", VariantType.translocation_breakend,\n                \"1-84298558-A-A]chr2:130509234]\", false, false, false, null, AnnotationBehavior.StructuralVariants, true);\n\n            var annotatedTranscript = AnnotatedTranscriptPool.Get(Transcripts.ENST00000370673, null, null, null, null, null, null, null, null, null,\n                new List<ConsequenceTag>(), null);\n            \n            var annotatedVariant = AnnotatedVariantPool.Get(variant);\n            annotatedVariant.Transcripts.Add(annotatedTranscript);\n            \n            return new IAnnotatedVariant[] {annotatedVariant};\n        }\n\n        private IntervalForest<ITranscript> GetTranscriptIntervalForest()\n        {\n            var transcripts = new List<ITranscript>();\n            transcripts.AddRange(_forwardTranscripts);\n            transcripts.AddRange(_reverseTranscripts);\n\n            IntervalArray<ITranscript>[] intervalArrays = transcripts.ToIntervalArrays(2);\n            return new IntervalForest<ITranscript>(intervalArrays);\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/VariantAnnotation/GeneFusions/IO/GeneFusionIndexEntryTests.cs",
    "content": "﻿using VariantAnnotation.GeneFusions.IO;\nusing Xunit;\n\nnamespace UnitTests.VariantAnnotation.GeneFusions.IO\n{\n    public sealed class GeneFusionIndexEntryTests\n    {\n        [Theory]\n        [InlineData(1000, 1)]\n        [InlineData(2000, 0)]\n        [InlineData(3000, -1)]\n        public void Compare_ExpectedResults(ulong otherGeneKey, int expectedResult)\n        {\n            var indexEntry   = new GeneFusionIndexEntry(2000, 0);\n            int actualResult = indexEntry.Compare(otherGeneKey);\n            Assert.Equal(expectedResult, actualResult);\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/VariantAnnotation/GeneFusions/IO/GeneFusionSourceReaderTests.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing System.Text;\nusing IO.v2;\nusing SAUtils.FusionCatcher;\nusing UnitTests.SAUtils.FusionCatcher;\nusing VariantAnnotation.GeneFusions.IO;\nusing VariantAnnotation.GeneFusions.SA;\nusing VariantAnnotation.Interface.AnnotatedPositions;\nusing VariantAnnotation.Interface.Providers;\nusing VariantAnnotation.Interface.SA;\nusing VariantAnnotation.Providers;\nusing Xunit;\n\nnamespace UnitTests.VariantAnnotation.GeneFusions.IO\n{\n    public sealed class GeneFusionSourceReaderTests\n    {\n        [Theory]\n        [InlineData(FileType.FusionCatcher,  GeneFusionSourceReader.SupportedFileFormatVersion, true)]\n        [InlineData(FileType.GeneFusionJson, GeneFusionSourceReader.SupportedFileFormatVersion, false)]\n        [InlineData(FileType.FusionCatcher,  0,                                                 false)]\n        public void CheckHeader_ExpectedResults(FileType fileType, ushort fileFormatVersion, bool expectedIsValid)\n        {\n            Exception ex            = Record.Exception(() => { GeneFusionSourceReader.CheckHeader(fileType, fileFormatVersion); });\n            bool      actualIsValid = ex == null;\n            Assert.Equal(expectedIsValid, actualIsValid);\n        }\n\n        [Fact]\n        public void AddAnnotations_ExpectedResults()\n        {\n            const string expectedJson =\n                \"[{\\\"genes\\\":{\\\"first\\\":{\\\"hgnc\\\":\\\"A\\\",\\\"isOncogene\\\":true},\\\"second\\\":{\\\"hgnc\\\":\\\"B\\\"},\\\"isParalogPair\\\":true},\\\"germlineSources\\\":[\\\"1000 Genomes Project\\\",\\\"Healthy (strong support)\\\",\\\"Illumina Body Map 2.0\\\"],\\\"somaticSources\\\":[\\\"Alaei-Mahabadi 18 cancers\\\",\\\"DepMap CCLE\\\"]},{\\\"genes\\\":{\\\"first\\\":{\\\"hgnc\\\":\\\"E\\\"},\\\"second\\\":{\\\"hgnc\\\":\\\"F\\\"}},\\\"somaticSources\\\":[\\\"CCLE Vellichirammal\\\",\\\"Cancer Genome Project\\\"]}]\";\n\n            using var ms = new MemoryStream();\n            WriteGeneFusionSourceFile(ms);\n\n            var supplementaryAnnotations = new List<ISupplementaryAnnotation>();\n\n            IGeneFusionPair[] fusionPairs =\n            {\n                new GeneFusionPair(1000, \"A\", 123, \"B\", 456),\n                new GeneFusionPair(1500, \"C\", 234, \"D\", 567), // no matching SA\n                new GeneFusionPair(3000, \"E\", 345, \"F\", 678)\n            };\n\n            using (var reader = new GeneFusionSourceReader(ms))\n            {\n                reader.LoadAnnotations();\n                reader.AddAnnotations(fusionPairs, supplementaryAnnotations);\n            }\n\n            Assert.Single(supplementaryAnnotations);\n            ISupplementaryAnnotation sa = supplementaryAnnotations[0];\n\n            var sb = new StringBuilder();\n            sa.SerializeJson(sb);\n            var actualJson = sb.ToString();\n\n            Assert.Equal(\"fusionCatcher\", sa.JsonKey);\n            Assert.Equal(expectedJson,    actualJson);\n        }\n\n        [Fact]\n        public void AddAnnotations_NoResults()\n        {\n            using var ms = new MemoryStream();\n            WriteGeneFusionSourceFile(ms);\n\n            var supplementaryAnnotations = new List<ISupplementaryAnnotation>();\n\n            IGeneFusionPair[] fusionPairs =\n            {\n                new GeneFusionPair(1500, \"C\", 234, \"D\", 567) // no matching SA\n            };\n\n            using (var reader = new GeneFusionSourceReader(ms))\n            {\n                reader.LoadAnnotations();\n                reader.AddAnnotations(fusionPairs, supplementaryAnnotations);\n            }\n\n            Assert.Empty(supplementaryAnnotations);\n        }\n\n        private static void WriteGeneFusionSourceFile(MemoryStream ms)\n        {\n            (uint[] expectedOncogeneKeys, GeneFusionSourceCollection[] expectedIndex, GeneFusionIndexEntry[] expectedIndexEntries) =\n                GeneFusionSourceWriterTests.GetKeyToGeneFusion();\n\n            IDataSourceVersion expectedVersion = new DataSourceVersion(\"FusionCatcher\", \"1.33\", DateTime.Now.Ticks, \"gene fusions\");\n            const string       expectedJsonKey = \"fusionCatcher\";\n\n            using (var writer = new GeneFusionSourceWriter(ms, expectedJsonKey, expectedVersion, true))\n            {\n                writer.Write(expectedOncogeneKeys, expectedIndex, expectedIndexEntries);\n            }\n\n            ms.Position = 0;\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/VariantAnnotation/GeneFusions/SA/GeneFusionPairTests.cs",
    "content": "﻿using VariantAnnotation.GeneFusions.SA;\nusing VariantAnnotation.Interface.AnnotatedPositions;\nusing Xunit;\n\nnamespace UnitTests.VariantAnnotation.GeneFusions.SA\n{\n    public sealed class GeneFusionPairTests\n    {\n        private readonly GeneFusionPair _fusionPair     = new(1000, \"A\", 123, \"B\", 456);\n        private readonly GeneFusionPair _fusionPairDup  = new(1000, \"A\", 123, \"B\", 456);\n        private readonly GeneFusionPair _fusionPairDiff = new(2000, \"A\", 123, \"B\", 456);\n\n        [Fact]\n        public void Equals_ExpectedResults()\n        {\n            Assert.False(_fusionPair.Equals(null));\n            Assert.Equal(_fusionPair, _fusionPair);\n            Assert.Equal(_fusionPair, _fusionPairDup);\n            Assert.NotEqual(_fusionPair, _fusionPairDiff);\n        }\n        \n        [Fact]\n        public void Equals_IGeneFusionPair_ExpectedResults()\n        {\n            IGeneFusionPair fusionPair     = _fusionPair;\n            IGeneFusionPair fusionPairDup  = _fusionPairDup;\n            IGeneFusionPair fusionPairDiff = _fusionPairDiff;\n\n            Assert.False(fusionPair.Equals(null));\n            Assert.Equal(fusionPair, fusionPair);\n            Assert.Equal(fusionPair, fusionPairDup);\n            Assert.NotEqual(fusionPair, fusionPairDiff);\n        }\n\n        [Fact]\n        public void GetHashCode_ExpectedResults()\n        {\n            Assert.Equal(_fusionPair.GetHashCode(), _fusionPairDup.GetHashCode());\n            Assert.NotEqual(_fusionPair.GetHashCode(), _fusionPairDiff.GetHashCode());\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/VariantAnnotation/GeneFusions/SA/GeneFusionSourceCollectionTests.cs",
    "content": "﻿using VariantAnnotation.GeneFusions.SA;\nusing Xunit;\n\nnamespace UnitTests.VariantAnnotation.GeneFusions.SA\n{\n    public sealed class GeneFusionSourceCollectionTests\n    {\n        private readonly GeneFusionSourceCollection _sourceCollection =\n            new(false, false, false, new[] {GeneFusionSource.Healthy}, new[]\n                {GeneFusionSource.Bao_gliomas, GeneFusionSource.Robinson_prostate_cancers});\n\n        private readonly GeneFusionSourceCollection _sourceCollectionDup =\n            new(false, false, false, new[] {GeneFusionSource.Healthy}, new[]\n                {GeneFusionSource.Bao_gliomas, GeneFusionSource.Robinson_prostate_cancers});\n\n        private readonly GeneFusionSourceCollection _sourceCollectionDiff =\n            new(false, true, false, new[] {GeneFusionSource.Healthy}, new[]\n                {GeneFusionSource.Bao_gliomas, GeneFusionSource.Robinson_prostate_cancers});\n        \n        [Fact]\n        public void Equals_ExpectedResults()\n        {\n            Assert.False(_sourceCollection.Equals(null));\n            Assert.Equal(_sourceCollection, _sourceCollection);\n            Assert.Equal(_sourceCollection, _sourceCollectionDup);\n            Assert.NotEqual(_sourceCollection, _sourceCollectionDiff);\n        }\n\n        [Fact]\n        public void GetJsonEntry_ExpectedResults()\n        {\n            const string expectedJson =\n                \"\\\"genes\\\":{\\\"first\\\":{\\\"hgnc\\\":\\\"A\\\"},\\\"second\\\":{\\\"hgnc\\\":\\\"B\\\"}},\\\"germlineSources\\\":[\\\"Healthy\\\"],\\\"somaticSources\\\":[\\\"Bao gliomas\\\",\\\"Robinson prostate cancers\\\"]\";\n            var    geneFusionPair = new GeneFusionPair(100, \"A\", 100, \"B\", 200);\n            string actualJson     = _sourceCollection.GetJsonEntry(geneFusionPair, new uint[] {123});\n            Assert.Equal(expectedJson, actualJson);\n        }\n\n        [Fact]\n        public void GetHashCode_ExpectedResults()\n        {\n            Assert.Equal(_sourceCollection.GetHashCode(), _sourceCollectionDup.GetHashCode());\n            Assert.NotEqual(_sourceCollection.GetHashCode(), _sourceCollectionDiff.GetHashCode());\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/VariantAnnotation/GeneFusions/SA/GeneFusionSourceUtilitiesTests.cs",
    "content": "﻿using VariantAnnotation.GeneFusions.SA;\nusing Xunit;\n\nnamespace UnitTests.VariantAnnotation.GeneFusions.SA\n{\n    public sealed class GeneFusionSourceUtilitiesTests\n    {\n        [Theory]\n        [InlineData(GeneFusionSource.Babiceanu_NonCancerTissues,  \"Babiceanu non-cancer tissues\")]\n        [InlineData(GeneFusionSource.Bailey_pancreatic_cancers,   \"Bailey pancreatic cancers\")]\n        [InlineData(GeneFusionSource.Bao_gliomas,                 \"Bao gliomas\")]\n        [InlineData(GeneFusionSource.CACG,                        \"CACG\")]\n        [InlineData(GeneFusionSource.ConjoinG,                    \"ConjoinG\")]\n        [InlineData(GeneFusionSource.COSMIC,                      \"COSMIC\")]\n        [InlineData(GeneFusionSource.Duplicated_Genes_Database,   \"Duplicated Genes Database\")]\n        [InlineData(GeneFusionSource.GTEx_healthy_tissues,        \"GTEx healthy tissues\")]\n        [InlineData(GeneFusionSource.Healthy,                     \"Healthy\")]\n        [InlineData(GeneFusionSource.Healthy_prefrontal_cortex,   \"Healthy prefrontal cortex\")]\n        [InlineData(GeneFusionSource.Human_Protein_Atlas,         \"Human Protein Atlas\")]\n        [InlineData(GeneFusionSource.NonTumorCellLines,           \"non-tumor cell lines\")]\n        [InlineData(GeneFusionSource.Robinson_prostate_cancers,   \"Robinson prostate cancers\")]\n        [InlineData(GeneFusionSource.TumorFusions_normal,         \"TumorFusions normal\")]\n        [InlineData(GeneFusionSource.TCGA_oesophageal_carcinomas, \"TCGA oesophageal carcinomas\")]\n        [InlineData(GeneFusionSource.TCGA_Tumor,                  \"TCGA tumor\")]\n        public void Convert_ExpectedResults(GeneFusionSource source, string expectedResult)\n        {\n            string actualResult = GeneFusionSourceUtilities.Convert(source);\n            Assert.Equal(expectedResult, actualResult);\n        }\n\n        [Fact]\n        public void Convert_UnknownSource_ReturnsNull()\n        {\n            string actualResult = GeneFusionSourceUtilities.Convert(GeneFusionSource.None);\n            Assert.Null(actualResult);\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/VariantAnnotation/GeneFusions/Utilities/GeneFusionKeyTests.cs",
    "content": "﻿using VariantAnnotation.GeneFusions.Utilities;\nusing Xunit;\n\nnamespace UnitTests.VariantAnnotation.GeneFusions.Utilities\n{\n    public sealed class GeneFusionKeyTests\n    {\n        [Fact]\n        public void Create_ExpectedResults()\n        {\n            const string geneA             = \"ENSG00000006210\";\n            const string geneB             = \"ENSG00000102962\";\n            const ulong  expectedFusionKey = 26671747011122;\n\n            ulong actualFusionKey = GeneFusionKey.Create(GeneFusionKey.CreateGeneKey(geneA), GeneFusionKey.CreateGeneKey(geneB));\n            Assert.Equal(expectedFusionKey, actualFusionKey);\n        }\n\n        [Theory]\n        [InlineData(\"ENSG00000006210\", null)]\n        [InlineData(null,              \"ENSG00000102962\")]\n        [InlineData(null,              null)]\n        public void Create_OneGeneIsNull_ReturnZero(string geneA, string geneB)\n        {\n            const ulong expectedFusionKey = 0;\n            ulong       actualFusionKey   = GeneFusionKey.Create(GeneFusionKey.CreateGeneKey(geneA), GeneFusionKey.CreateGeneKey(geneB));\n            Assert.Equal(expectedFusionKey, actualFusionKey);\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/VariantAnnotation/GeneFusions/Utilities/IndexEntryExtensionsTests.cs",
    "content": "﻿using VariantAnnotation.GeneFusions.IO;\nusing VariantAnnotation.GeneFusions.Utilities;\nusing Xunit;\n\nnamespace UnitTests.VariantAnnotation.GeneFusions.Utilities\n{\n    public sealed class IndexEntryExtensionsTests\n    {\n        private readonly GeneFusionIndexEntry[] _indexEntries;\n\n        public IndexEntryExtensionsTests()\n        {\n            _indexEntries = new GeneFusionIndexEntry[]\n            {\n                new(1000, 1),\n                new(1001, 2),\n                new(2000, 3),\n                new(3000, 4),\n                new(3100, 5)\n            };\n        }\n\n        [Theory]\n        [InlineData(1000, 1)]\n        [InlineData(1001, 2)]\n        [InlineData(2000, 3)]\n        [InlineData(3000, 4)]\n        [InlineData(3100, 5)]\n        public void GetIndex_Matches_ExpectedResults(ulong geneKey, ushort expectedIndex)\n        {\n            ushort? actualIndex = _indexEntries.GetIndex(geneKey);\n            Assert.NotNull(actualIndex);\n            Assert.Equal(expectedIndex, actualIndex);\n        }\n        \n        [Theory]\n        [InlineData(100)]\n        [InlineData(1002)]\n        [InlineData(4000)]\n        public void GetIndex_NotFound_ReturnNull(ulong geneKey)\n        {\n            ushort? actualIndex = _indexEntries.GetIndex(geneKey);\n            Assert.Null(actualIndex);\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/VariantAnnotation/IO/Caches/CacheConstantsTests.cs",
    "content": "﻿using IO;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.IO.Caches\r\n{\r\n    public sealed class CacheConstantsTests\r\n    {\r\n        [Fact]\r\n        public void TranscriptPath_Null_WithNullPrefix()\r\n        {\r\n            var observedResult = CacheConstants.TranscriptPath(null);\r\n            Assert.Null(observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void TranscriptPath_NominalCase()\r\n        {\r\n            const string expectedResult = \"bob.transcripts.ndb\";\r\n            var observedResult = CacheConstants.TranscriptPath(\"bob\");\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void SiftPath_NominalCase()\r\n        {\r\n            const string expectedResult = \"bob.sift.ndb\";\r\n            var observedResult = CacheConstants.SiftPath(\"bob\");\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void PolyPhenPath_NominalCase()\r\n        {\r\n            const string expectedResult = \"bob.polyphen.ndb\";\r\n            var observedResult = CacheConstants.PolyPhenPath(\"bob\");\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/VariantAnnotation/IO/Caches/CacheHeaderTests.cs",
    "content": "﻿using System.IO;\r\nusing System.Text;\r\nusing Genome;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.IO.Caches;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.IO.Caches\r\n{\r\n    public sealed class CacheHeaderTests\r\n    {\r\n        [Fact]\r\n        public void CacheHeader_EndToEnd()\r\n        {\r\n            const Source expectedTranscriptSource = Source.BothRefSeqAndEnsembl;\r\n            const long expectedCreationTimeTicks  = long.MaxValue;\r\n            const GenomeAssembly expectedAssembly = GenomeAssembly.hg19;\r\n            const ushort expectedVepVersion       = ushort.MaxValue;\r\n\r\n            var expectedBaseHeader   = new Header(\"VEP\", 1, 2, expectedTranscriptSource, expectedCreationTimeTicks, expectedAssembly);\r\n            var expectedCustomHeader = new TranscriptCacheCustomHeader(expectedVepVersion, 0);\r\n            var expectedHeader       = new CacheHeader(expectedBaseHeader, expectedCustomHeader);\r\n\r\n            CacheHeader observedHeader;\r\n\r\n            using (var ms = new MemoryStream())\r\n            {\r\n                using (var writer = new BinaryWriter(ms, Encoding.UTF8, true))\r\n                {\r\n                    expectedHeader.Write(writer);\r\n                }\r\n\r\n                ms.Position = 0;\r\n                observedHeader = CacheHeader.Read(ms);\r\n            }\r\n\r\n            Assert.NotNull(observedHeader);\r\n            Assert.Equal(expectedTranscriptSource,  observedHeader.Source);\r\n            Assert.Equal(expectedCreationTimeTicks, observedHeader.CreationTimeTicks);\r\n            Assert.Equal(expectedAssembly,    observedHeader.Assembly);\r\n            Assert.Equal(expectedVepVersion,        observedHeader.Custom.VepVersion);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/VariantAnnotation/IO/Caches/TranscriptCacheReaderTests.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Linq;\r\nusing System.Text;\r\nusing CacheUtils.TranscriptCache;\r\nusing Genome;\r\nusing Intervals;\r\nusing IO;\r\nusing UnitTests.TestUtilities;\r\nusing VariantAnnotation.AnnotatedPositions.Transcript;\r\nusing VariantAnnotation.Caches;\r\nusing VariantAnnotation.Caches.DataStructures;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.Interface.Caches;\r\nusing VariantAnnotation.IO.Caches;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.IO.Caches\r\n{\r\n    public sealed class TranscriptCacheReaderTests\r\n    {\r\n        private readonly TranscriptCacheData _expectedCacheData;\r\n        private readonly CacheHeader _expectedHeader;\r\n\r\n        public TranscriptCacheReaderTests()\r\n        {\r\n            const GenomeAssembly genomeAssembly = GenomeAssembly.GRCh38;\r\n\r\n            var baseHeader   = new Header(\"test\", 2, 3, Source.BothRefSeqAndEnsembl, 4, genomeAssembly);\r\n            var customHeader = new TranscriptCacheCustomHeader(1, 2);\r\n            _expectedHeader  = new CacheHeader(baseHeader, customHeader);\r\n\r\n            var transcriptRegions = new ITranscriptRegion[]\r\n            {\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 1, 100, 199, 300, 399),\r\n                new TranscriptRegion(TranscriptRegionType.Intron, 1, 200, 299, 399, 400),\r\n                new TranscriptRegion(TranscriptRegionType.Exon, 2, 300, 399, 400, 499)\r\n            };\r\n\r\n            var mirnas = new IInterval[2];\r\n            mirnas[0] = new Interval(100, 200);\r\n            mirnas[1] = new Interval(300, 400);\r\n\r\n            var peptideSeqs = new[] { \"MASE*\" };\r\n\r\n            var genes = new IGene[1];\r\n            genes[0] = new Gene(ChromosomeUtilities.Chr3, 100, 200, true, \"TP53\", 300, CompactId.Convert(\"7157\"),\r\n                CompactId.Convert(\"ENSG00000141510\"));\r\n\r\n            var regulatoryRegions = new IRegulatoryRegion[2];\r\n            regulatoryRegions[0] = new RegulatoryRegion(ChromosomeUtilities.Chr3, 1200, 1300, CompactId.Convert(\"123\"), RegulatoryRegionType.enhancer);\r\n            regulatoryRegions[1] = new RegulatoryRegion(ChromosomeUtilities.Chr3, 1250, 1450, CompactId.Convert(\"456\"), RegulatoryRegionType.enhancer);\r\n            var regulatoryRegionIntervalArrays = regulatoryRegions.ToIntervalArrays(3);\r\n\r\n            var transcripts = GetTranscripts(ChromosomeUtilities.Chr3, genes, transcriptRegions, mirnas);\r\n            var transcriptIntervalArrays = transcripts.ToIntervalArrays(3);\r\n\r\n            _expectedCacheData = new TranscriptCacheData(_expectedHeader, genes, transcriptRegions, mirnas, peptideSeqs,\r\n                transcriptIntervalArrays, regulatoryRegionIntervalArrays);\r\n        }\r\n\r\n        [Fact]\r\n        public void TranscriptCacheReader_EndToEnd()\r\n        {\r\n            TranscriptCacheData observedCache;\r\n\r\n            using (var ms = new MemoryStream())\r\n            {\r\n                using (var writer = new TranscriptCacheWriter(ms, _expectedHeader, true))\r\n                {\r\n                    writer.Write(_expectedCacheData);\r\n                }\r\n\r\n                ms.Position = 0;\r\n\r\n                using (var reader = new TranscriptCacheReader(ms))\r\n                {\r\n                    observedCache = reader.Read(ChromosomeUtilities.RefIndexToChromosome);\r\n                }\r\n            }\r\n\r\n            Assert.NotNull(observedCache);\r\n            Assert.Equal(_expectedCacheData.PeptideSeqs, observedCache.PeptideSeqs);\r\n            CheckChromosomeIntervals(_expectedCacheData.Genes, observedCache.Genes);\r\n            CheckIntervalArrays(_expectedCacheData.RegulatoryRegionIntervalArrays, observedCache.RegulatoryRegionIntervalArrays);\r\n            CheckIntervalArrays(_expectedCacheData.TranscriptIntervalArrays, observedCache.TranscriptIntervalArrays);\r\n            CheckIntervals(_expectedCacheData.TranscriptRegions, observedCache.TranscriptRegions);\r\n            CheckIntervals(_expectedCacheData.Mirnas, observedCache.Mirnas);\r\n        }\r\n\r\n        private static void CheckIntervalArrays<T>(IntervalArray<T>[] expected, IntervalArray<T>[] observed)\r\n            where T : IInterval\r\n        {\r\n            Assert.Equal(expected.Length, observed.Length);\r\n\r\n            for (var refIndex = 0; refIndex < expected.Length; refIndex++)\r\n            {\r\n                var expectedIntervalArray = expected[refIndex];\r\n                var observedIntervalArray = observed[refIndex];\r\n\r\n                if (expectedIntervalArray == null && observedIntervalArray == null) continue;\r\n\r\n                Assert.NotNull(expectedIntervalArray);\r\n                Assert.NotNull(observedIntervalArray);\r\n                Assert.Equal(expectedIntervalArray.Array.Length, observedIntervalArray.Array.Length);\r\n\r\n                for (var i = 0; i < expectedIntervalArray.Array.Length; i++)\r\n                {\r\n                    var expectedInterval = expectedIntervalArray.Array[i];\r\n                    var observedInterval = observedIntervalArray.Array[i];\r\n                    Assert.Equal(expectedInterval.Begin, observedInterval.Begin);\r\n                    Assert.Equal(expectedInterval.End, observedInterval.End);\r\n                }\r\n            }\r\n        }\r\n\r\n        private static void CheckChromosomeIntervals(IEnumerable<IChromosomeInterval> expected,\r\n            IEnumerable<IChromosomeInterval> observed)\r\n        {\r\n            var expectedList = expected.ToList();\r\n            var observedList = observed.ToList();\r\n\r\n            Assert.Equal(expectedList.Count, observedList.Count);\r\n\r\n            for (var i = 0; i < expectedList.Count; i++)\r\n            {\r\n                var expectedEntry = expectedList[i];\r\n                var observedEntry = observedList[i];\r\n                Assert.Equal(expectedEntry.Chromosome.EnsemblName, observedEntry.Chromosome.EnsemblName);\r\n                Assert.Equal(expectedEntry.Start, observedEntry.Start);\r\n                Assert.Equal(expectedEntry.End, observedEntry.End);\r\n            }\r\n        }\r\n\r\n        private static void CheckIntervals(IEnumerable<IInterval> expected, IEnumerable<IInterval> observed)\r\n        {\r\n            var expectedList = expected.ToList();\r\n            var observedList = observed.ToList();\r\n\r\n            Assert.Equal(expectedList.Count, observedList.Count);\r\n\r\n            for (var i = 0; i < expectedList.Count; i++)\r\n            {\r\n                var expectedEntry = expectedList[i];\r\n                var observedEntry = observedList[i];\r\n                Assert.Equal(expectedEntry.Start, observedEntry.Start);\r\n                Assert.Equal(expectedEntry.End, observedEntry.End);\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void ReadItems_EndToEnd()\r\n        {\r\n            var expectedStrings = new[] { \"Huey\", \"Duey\", \"Louie\" };\r\n            string[] observedStrings;\r\n\r\n            using (var ms = new MemoryStream())\r\n            {\r\n                // ReSharper disable AccessToDisposedClosure\r\n                using (var writer = new ExtendedBinaryWriter(ms, Encoding.UTF8, true))\r\n                {\r\n                    TranscriptCacheWriter.WriteItems(writer, expectedStrings, x => writer.WriteOptAscii(x));\r\n                }\r\n\r\n                ms.Position = 0;\r\n\r\n                using (var reader = new BufferedBinaryReader(ms))\r\n                {\r\n                    observedStrings = TranscriptCacheReader.ReadItems(reader, () => reader.ReadAsciiString());\r\n                }\r\n                // ReSharper restore AccessToDisposedClosure\r\n            }\r\n\r\n            Assert.NotNull(observedStrings);\r\n            Assert.Equal(expectedStrings, observedStrings);\r\n        }\r\n\r\n        [Fact]\r\n        public void CheckGuard_InvalidGuard()\r\n        {\r\n            Assert.Throws<InvalidDataException>(delegate\r\n            {\r\n                using (var ms = new MemoryStream())\r\n                {\r\n                    using (var writer = new ExtendedBinaryWriter(ms, Encoding.UTF8, true)) writer.Write(7);\r\n                    ms.Position = 0;\r\n                    using (var reader = new BufferedBinaryReader(ms)) TranscriptCacheReader.CheckGuard(reader);\r\n                }\r\n            });\r\n        }\r\n\r\n        private static ITranscript[] GetTranscripts(Chromosome chromosome, IGene[] genes, ITranscriptRegion[] regions,\r\n            IInterval[] mirnas)\r\n        {\r\n            return new ITranscript[]\r\n            {\r\n                new Transcript(chromosome, 120, 180, CompactId.Convert(\"789\"), null, BioType.IG_D_gene, genes[0], 0, 0,\r\n                    false, regions, 0, mirnas, -1, -1, Source.None, false, false, null, null)\r\n            };\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/VariantAnnotation/IO/JsonObjectTests.cs",
    "content": "﻿using System.Globalization;\r\nusing System.Text;\r\nusing System.Threading;\r\nusing System.Threading.Tasks;\r\nusing OptimizedCore;\r\nusing VariantAnnotation.Interface.IO;\r\nusing VariantAnnotation.IO;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.IO\r\n{\r\n    public sealed class JsonObjectTests\r\n    {\r\n        [Fact]\r\n        public void ProcessBoolValue_True_TwoTimes()\r\n        {\r\n            var sb = StringBuilderPool.Get();\r\n            var json = new JsonObject(sb);\r\n\r\n            json.AddBoolValue(\"test1\", true);\r\n            json.AddBoolValue(\"test2\", true);\r\n\r\n            const string expectedResult = \"\\\"test1\\\":true,\\\"test2\\\":true\";\r\n            var observedResult = StringBuilderPool.GetStringAndReturn(sb);\r\n\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n\r\n\r\n        [Fact]\r\n        public void AddBoolValue_True_TwoTimes()\r\n        {\r\n            var sb = StringBuilderPool.Get();\r\n            var json = new JsonObject(sb);\r\n            json.AddBoolValue(\"test1\", true);\r\n            json.AddBoolValue(\"test2\", true);\r\n\r\n            const string expectedResult = \"\\\"test1\\\":true,\\\"test2\\\":true\";\r\n            var observedResult = StringBuilderPool.GetStringAndReturn(sb);\r\n\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void AddIntValue_TwoTimes()\r\n        {\r\n            var sb = StringBuilderPool.Get();\r\n            var json = new JsonObject(sb);\r\n            json.AddIntValue(\"test1\", 5);\r\n            json.AddIntValue(\"test2\", 7);\r\n\r\n            const string expectedResult = \"\\\"test1\\\":5,\\\"test2\\\":7\";\r\n            var observedResult = StringBuilderPool.GetStringAndReturn(sb);\r\n\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void AddIntValue_NullInt()\r\n        {\r\n            var sb = StringBuilderPool.Get();\r\n            var json = new JsonObject(sb);\r\n            json.AddIntValue(\"test1\", null);\r\n\r\n            var observedResult = StringBuilderPool.GetStringAndReturn(sb);\r\n\r\n            Assert.Equal(string.Empty, observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void AddDoubleValue_TwoTimes()\r\n        {\r\n            var sb = StringBuilderPool.Get();\r\n            var json = new JsonObject(sb);\r\n            json.AddDoubleValue(\"test1\", 5.7);\r\n            json.AddDoubleValue(\"test2\", 7.9);\r\n\r\n            const string expectedResult = \"\\\"test1\\\":5.7,\\\"test2\\\":7.9\";\r\n            var observedResult = StringBuilderPool.GetStringAndReturn(sb);\r\n\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n        \r\n        public static string GetJsonDoubleString()\r\n        {\r\n            var defaultCulture = Thread.CurrentThread.CurrentCulture;\r\n            var newCulture     = CultureInfo.CreateSpecificCulture(\"fr-FR\");\r\n            Thread.CurrentThread.CurrentCulture = newCulture;\r\n            \r\n            var sb   = StringBuilderPool.Get();\r\n            var json = new JsonObject(sb);\r\n            json.AddDoubleValue(\"test1\", 5.7);\r\n            json.AddDoubleValue(\"test2\", 7.9);\r\n\r\n            var result = StringBuilderPool.GetStringAndReturn(sb);\r\n            Thread.CurrentThread.CurrentCulture = defaultCulture;\r\n            \r\n            return result;\r\n        }\r\n        [Fact]\r\n        public void AddDoubleValue_InvariantCulture()\r\n        {\r\n            var task           = Task<string>.Factory.StartNew(GetJsonDoubleString);\r\n            var observedResult = task.Result;\r\n            \r\n            const string expectedResult = \"\\\"test1\\\":5.7,\\\"test2\\\":7.9\";\r\n            Assert.Equal(expectedResult, observedResult);\r\n\r\n        }\r\n\r\n        [Fact]\r\n        public void AddDoubleValue_NullInt()\r\n        {\r\n            var sb = StringBuilderPool.Get();\r\n            var json = new JsonObject(sb);\r\n            json.AddDoubleValue(\"test1\", null);\r\n\r\n            var observedResult = StringBuilderPool.GetStringAndReturn(sb);\r\n\r\n            Assert.Equal(string.Empty, observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void AddStringValue_TwoTimes()\r\n        {\r\n            var sb = StringBuilderPool.Get();\r\n            var json = new JsonObject(sb);\r\n            json.AddStringValue(\"test1\", \"bob\");\r\n            json.AddStringValue(\"test2\", \"jane\", false);\r\n\r\n            const string expectedResult = \"\\\"test1\\\":\\\"bob\\\",\\\"test2\\\":jane\";\r\n            var observedResult = StringBuilderPool.GetStringAndReturn(sb);\r\n\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void AddStringValue_NullInt()\r\n        {\r\n            var sb = StringBuilderPool.Get();\r\n            var json = new JsonObject(sb);\r\n            json.AddStringValue(\"test1\", null);\r\n\r\n            var observedResult = StringBuilderPool.GetStringAndReturn(sb);\r\n\r\n            Assert.Equal(string.Empty, observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void AddStringValues_TwoTimes()\r\n        {\r\n            var sb = StringBuilderPool.Get();\r\n            var json = new JsonObject(sb);\r\n\r\n            var strings = new[] { \"A\", \"B\", \"C\" };\r\n            var strings2 = new[] { \"D\", \"E\", \"F\" };\r\n\r\n            json.AddStringValues(\"test1\", strings);\r\n            json.AddStringValues(\"test2\", strings2, false);\r\n\r\n            const string expectedResult = \"\\\"test1\\\":[\\\"A\\\",\\\"B\\\",\\\"C\\\"],\\\"test2\\\":[D,E,F]\";\r\n            var observedResult = StringBuilderPool.GetStringAndReturn(sb);\r\n\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void AddStringValues_NullArray()\r\n        {\r\n            var sb = StringBuilderPool.Get();\r\n            var json = new JsonObject(sb);\r\n\r\n            json.AddStringValues(\"test1\", (string[])null);\r\n            var observedResult = StringBuilderPool.GetStringAndReturn(sb);\r\n\r\n            Assert.Equal(string.Empty, observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void AddIntValues_TwoTimes()\r\n        {\r\n            var sb = StringBuilderPool.Get();\r\n            var json = new JsonObject(sb);\r\n\r\n            var ints = new[] { 1, 2, 3 };\r\n            var ints2 = new[] { 4, 5, 6 };\r\n\r\n            json.AddIntValues(\"test1\", ints);\r\n            json.AddIntValues(\"test2\", ints2);\r\n\r\n            const string expectedResult = \"\\\"test1\\\":[1,2,3],\\\"test2\\\":[4,5,6]\";\r\n            var observedResult = StringBuilderPool.GetStringAndReturn(sb);\r\n\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void AddIntValues_NullArray()\r\n        {\r\n            var sb = StringBuilderPool.Get();\r\n            var json = new JsonObject(sb);\r\n\r\n            json.AddIntValues(\"test1\", null);\r\n            var observedResult = StringBuilderPool.GetStringAndReturn(sb);\r\n\r\n            Assert.Equal(string.Empty, observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void AddObjectValues_TwoTimes()\r\n        {\r\n            var sb = StringBuilderPool.Get();\r\n            var json = new JsonObject(sb);\r\n\r\n            var points = new Point[2];\r\n            points[0] = new Point(1, 2);\r\n            points[1] = new Point(3, 4);\r\n\r\n            var points2 = new Point[1];\r\n            points2[0] = new Point(5, 6);\r\n\r\n            json.AddObjectValues(\"test1\", points);\r\n            json.AddObjectValues(\"test2\", points2);\r\n\r\n            const string expectedResult = \"\\\"test1\\\":[{\\\"X\\\":1,\\\"Y\\\":2},{\\\"X\\\":3,\\\"Y\\\":4}],\\\"test2\\\":[{\\\"X\\\":5,\\\"Y\\\":6}]\";\r\n            var observedResult = StringBuilderPool.GetStringAndReturn(sb);\r\n\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void AddObjectValues_NullArray()\r\n        {\r\n            var sb = StringBuilderPool.Get();\r\n            var json = new JsonObject(sb);\r\n\r\n            json.AddObjectValues(\"test1\", null as Point[]);\r\n            var observedResult = StringBuilderPool.GetStringAndReturn(sb);\r\n\r\n            Assert.Equal(string.Empty, observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void AddStringValues_EmptyArray()\r\n        {\r\n            var sb = StringBuilderPool.Get();\r\n            var json = new JsonObject(sb);\r\n\r\n            json.AddStringValues(\"test1\", new string[0]);\r\n            var observedResult = StringBuilderPool.GetStringAndReturn(sb);\r\n\r\n            Assert.Equal(string.Empty, observedResult);\r\n        }\r\n\r\n        private sealed class Point : IJsonSerializer\r\n        {\r\n            private readonly int _x;\r\n            private readonly int _y;\r\n\r\n            public Point(int x, int y)\r\n            {\r\n                _x = x;\r\n                _y = y;\r\n            }\r\n\r\n            public void SerializeJson(StringBuilder sb)\r\n            {\r\n                var jsonObject = new JsonObject(sb);\r\n                sb.Append(JsonObject.OpenBrace);\r\n                jsonObject.AddIntValue(\"X\", _x);\r\n                jsonObject.AddIntValue(\"Y\", _y);\r\n                sb.Append(JsonObject.CloseBrace);\r\n            }\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/VariantAnnotation/IO/JsonWriterTests.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Text;\r\nusing Moq;\r\nusing UnitTests.TestUtilities;\r\nusing VariantAnnotation.Interface.Positions;\r\nusing VariantAnnotation.Interface.Providers;\r\nusing VariantAnnotation.IO;\r\nusing VariantAnnotation.Providers;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.IO\r\n{\r\n    public sealed class JsonWriterTests\r\n    {\r\n        [Fact]\r\n        public void WriteJsonEntry_Nominal()\r\n        {\r\n            var dataSourceVersions = new List<IDataSourceVersion> { new DataSourceVersion(\"nirvana\", \"2.0\", 100) };\r\n            var sampleNames = new[] { \"NA12878\" };\r\n\r\n            var position1 = new Mock<IPosition>();\r\n            position1.SetupGet(x => x.Chromosome).Returns(ChromosomeUtilities.Chr1);\r\n            position1.SetupGet(x => x.Start).Returns(100);\r\n            position1.SetupGet(x => x.End).Returns(100);\r\n\r\n            var position2 = new Mock<IPosition>();\r\n            position2.SetupGet(x => x.Chromosome).Returns(ChromosomeUtilities.Chr1);\r\n            position2.SetupGet(x => x.Start).Returns(101);\r\n            position2.SetupGet(x => x.End).Returns(101);\r\n\r\n            string observedResult;\r\n\r\n            using (var ms = new MemoryStream())\r\n            {\r\n                using (var streamWriter = new StreamWriter(ms, Encoding.ASCII, 1024, true))\r\n                using (var writer       = new JsonWriter(streamWriter, null, \"nirvana\", \"time\", \"vep\", dataSourceVersions, \"hg19\", sampleNames, false))\r\n                {\r\n                    writer.WritePosition(position1.Object, \"{\\\"test\\\":\\\"good\\\"}\");\r\n                    writer.WritePosition(position2.Object, \"{\\\"crash\\\":\\\"bad\\\"}\");\r\n                    writer.WritePosition(null, (string)null);\r\n                }\r\n\r\n                observedResult = Encoding.UTF8.GetString(ms.ToArray());\r\n            }\r\n\r\n            const string expectedResult = \"{\\\"header\\\":{\\\"annotator\\\":\\\"nirvana\\\",\\\"creationTime\\\":\\\"time\\\",\\\"genomeAssembly\\\":\\\"hg19\\\",\\\"schemaVersion\\\":6,\\\"dataVersion\\\":\\\"vep\\\",\\\"dataSources\\\":[{\\\"name\\\":\\\"nirvana\\\",\\\"version\\\":\\\"2.0\\\",\\\"releaseDate\\\":\\\"0001-01-01\\\"}],\\\"samples\\\":[\\\"NA12878\\\"]},\\\"positions\\\":[\\n{\\\"test\\\":\\\"good\\\"},\\n{\\\"crash\\\":\\\"bad\\\"}\\n]}\\n\";\r\n            Assert.Equal(expectedResult, observedResult);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/VariantAnnotation/IO/SampleExtensionsTests.cs",
    "content": "﻿using VariantAnnotation.IO;\r\nusing Vcf.Sample;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.IO\r\n{\r\n    public sealed class SampleExtensionsTests\r\n    {\r\n        [Fact]\r\n        public void GetJsonString_Nominal()\r\n        {\r\n            var sample = new Sample(new[] {23, 34}, 12.345f, 3, new[] {\"-\", \"+\"}, true, \r\n                \"1/2\", 98, true, 98.3, 56.67f,\r\n                new[] {8, 14}, new[] {7, 4}, new[] {10, 15}, 34, new[] {0.34, 0.56}, \r\n                1, 2.3, null, new []{\"12.34\", \"null\"}, 1234);\r\n\r\n            string observedResult = sample.GetJsonString();\r\n\r\n            Assert.Contains(\"\\\"alleleDepths\\\":[23,34]\",                  observedResult);\r\n            Assert.Contains(\"\\\"artifactAdjustedQualityScore\\\":12.3\",     observedResult);\r\n            Assert.Contains(\"\\\"copyNumber\\\":3\",                          observedResult);\r\n            Assert.Contains(\"\\\"diseaseAffectedStatuses\\\":[\\\"-\\\",\\\"+\\\"]\", observedResult);\r\n            Assert.Contains(\"\\\"failedFilter\\\":true\",                     observedResult);\r\n            Assert.Contains(\"\\\"genotype\\\":\\\"1/2\\\"\",                      observedResult);\r\n            Assert.Contains(\"\\\"genotypeQuality\\\":98\",                    observedResult);\r\n            Assert.Contains(\"\\\"isDeNovo\\\":true\",                         observedResult);\r\n            Assert.Contains(\"\\\"deNovoQuality\\\":98.3\",                    observedResult);\r\n            Assert.Contains(\"\\\"likelihoodRatioQualityScore\\\":56.7\",      observedResult);\r\n            Assert.Contains(\"\\\"pairedEndReadCounts\\\":[8,14]\",            observedResult);\r\n            Assert.Contains(\"\\\"repeatUnitCounts\\\":[7,4]\",                observedResult);\r\n            Assert.Contains(\"\\\"splitReadCounts\\\":[10,15]\",               observedResult);\r\n            Assert.Contains(\"\\\"totalDepth\\\":34\",                         observedResult);\r\n            Assert.Contains(\"\\\"variantFrequencies\\\":[0.34,0.56]\",        observedResult);\r\n            Assert.Contains(\"\\\"minorHaplotypeCopyNumber\\\":1\",            observedResult);\r\n            Assert.Contains(\"\\\"somaticQuality\\\":2.3\",                    observedResult);\r\n            Assert.Contains(\"\\\"heteroplasmyPercentile\\\":[12.34,null]\",   observedResult);\r\n            Assert.Contains(\"\\\"binCount\\\":1234\",                         observedResult);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/VariantAnnotation/NSA/NsaIndexTests.cs",
    "content": "﻿using System;\nusing System.IO;\nusing Genome;\nusing IO;\nusing VariantAnnotation.NSA;\nusing VariantAnnotation.Providers;\nusing VariantAnnotation.SA;\nusing Xunit;\n\nnamespace UnitTests.VariantAnnotation.NSA\n{\n    public sealed class NsaIndexTests\n    {\n        [Fact]\n        public void Query_chunks_in_same_chrom()\n        {\n            var stream = new MemoryStream();\n            var writer = new ExtendedBinaryWriter(stream);\n            var version = new DataSourceVersion(\"dbsnp\", \"150\", DateTime.Now.Ticks, \"dbsnp ids\");\n            var index = new NsaIndex(writer, GenomeAssembly.GRCh37, version, \"dbsnp\", true, true, SaCommon.SchemaVersion, false);\n\n            index.Add(0, 100, 2000, 23457, 89320);\n            index.Add(0, 2100, 4000, 112778, 58746);\n            index.Add(0, 4100, 7000, 171525, 658794);\n\n            (long start, int chunkCount) = index.GetFileRange(0, 150, 2120);\n            Assert.Equal(23457, start);\n            Assert.Equal(2, chunkCount);\n\n            (start, chunkCount) = index.GetFileRange(0, 50, 98);\n            Assert.Equal(-1, start);\n            Assert.Equal(0, chunkCount);\n\n            (start, chunkCount) = index.GetFileRange(0, 150, 2010);\n            Assert.Equal(23457, start);\n            Assert.Equal(1, chunkCount);\n\n            (start, chunkCount) = index.GetFileRange(0, 2010, 4050);\n            Assert.Equal(112778, start);\n            Assert.Equal(1, chunkCount);\n\n            (start, chunkCount) = index.GetFileRange(0, 4010, 4050);\n            Assert.Equal(-1, start);\n            Assert.Equal(0, chunkCount);\n\n            (start, chunkCount) = index.GetFileRange(0, 7010, 7050);\n            Assert.Equal(-1, start);\n            Assert.Equal(0, chunkCount);\n        }\n\n        [Fact]\n        public void Query_chunks_in_different_chrom()\n        {\n            var stream = new MemoryStream();\n            var writer = new ExtendedBinaryWriter(stream);\n            var version = new DataSourceVersion(\"dbsnp\", \"150\", DateTime.Now.Ticks, \"dbsnp ids\");\n            var index = new NsaIndex(writer, GenomeAssembly.GRCh37, version, \"dbsnp\", true, true, SaCommon.SchemaVersion, false);\n\n            index.Add(0, 100, 2000, 23457, 89320);\n            index.Add(0, 2100, 4000, 112778, 58746);\n            index.Add(0, 4100, 7000, 171525, 658794);\n\n            index.Add(1, 100, 2000, 23457, 89320);\n            index.Add(1, 2100, 4000, 112778, 58746);\n            index.Add(1, 4100, 7000, 171525, 658794);\n\n            (long start, int chunkCount) = index.GetFileRange(0, 150, 2120);\n            Assert.Equal(23457, start);\n            Assert.Equal(2, chunkCount);\n\n            (start, chunkCount) = index.GetFileRange(0, 50, 98);\n            Assert.Equal(-1, start);\n            Assert.Equal(0, chunkCount);\n\n            (start, chunkCount) = index.GetFileRange(0, 150, 2010);\n            Assert.Equal(23457, start);\n            Assert.Equal(1, chunkCount);\n\n            (start, chunkCount) = index.GetFileRange(0, 2010, 4050);\n            Assert.Equal(112778, start);\n            Assert.Equal(1, chunkCount);\n\n            (start, chunkCount) = index.GetFileRange(0, 4010, 4050);\n            Assert.Equal(-1, start);\n            Assert.Equal(0, chunkCount);\n\n            (start, chunkCount) = index.GetFileRange(0, 7010, 7050);\n            Assert.Equal(-1, start);\n            Assert.Equal(0, chunkCount);\n\n            //chr2\n            (start, chunkCount) =  index.GetFileRange(0, 150, 2120);\n            Assert.Equal(23457, start);\n            Assert.Equal(2, chunkCount);\n\n            (start, chunkCount) = index.GetFileRange(0, 50, 98);\n            Assert.Equal(-1, start);\n            Assert.Equal(0, chunkCount);\n\n            (start, chunkCount) = index.GetFileRange(0, 150, 2010);\n            Assert.Equal(23457, start);\n            Assert.Equal(1, chunkCount);\n\n            (start, chunkCount) = index.GetFileRange(0, 2010, 4050);\n            Assert.Equal(112778, start);\n            Assert.Equal(1, chunkCount);\n\n            (start, chunkCount) = index.GetFileRange(0, 4010, 4050);\n            Assert.Equal(-1, start);\n            Assert.Equal(0, chunkCount);\n\n            (start, chunkCount) = index.GetFileRange(0, 7010, 7050);\n            Assert.Equal(-1, start);\n            Assert.Equal(0, chunkCount);\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/VariantAnnotation/NSA/RefMinorIndexTests.cs",
    "content": "﻿using System;\nusing System.IO;\nusing Genome;\nusing IO;\nusing VariantAnnotation.NSA;\nusing VariantAnnotation.Providers;\nusing VariantAnnotation.SA;\nusing Xunit;\n\nnamespace UnitTests.VariantAnnotation.NSA\n{\n    public sealed class RefMinorIndexTests\n    {\n        [Fact]\n        public void CreateAndQuery_one_chromosome()\n        {\n            using (var stream = new MemoryStream())\n            using(var writer = new ExtendedBinaryWriter(stream))\n            {\n                var index = new RefMinorIndex(writer, GenomeAssembly.GRCh37, new DataSourceVersion(\"name\", \"1\",  DateTime.Now.Ticks), SaCommon.SchemaVersion );\n\n                index.Add(0, 100);\n                index.Add(0, 105);\n                index.Add(0, 110);\n                index.Add(0, 115);\n                index.Write(120);\n\n                (long location, int byteCount, int count) = index.GetFileRange(0);\n                Assert.Equal(100, location);\n                Assert.Equal(20, byteCount);\n                Assert.Equal(4, count);\n            }\n        }\n        [Fact]\n        public void CreateAndQuery_multiple_chromosomes()\n        {\n            using (var stream = new MemoryStream())\n            using (var writer = new ExtendedBinaryWriter(stream))\n            {\n                var index = new RefMinorIndex(writer, GenomeAssembly.GRCh37, new DataSourceVersion(\"name\", \"1\", DateTime.Now.Ticks), SaCommon.SchemaVersion);\n\n                index.Add(0, 100);\n                index.Add(0, 105);\n                index.Add(0, 110);\n                index.Add(0, 115);\n                index.Add(1, 200);\n                index.Add(1, 205);\n                index.Add(1, 210);\n                index.Add(2, 315);\n\n                index.Write(320);\n\n                (long location, int byteCount, int count) = index.GetFileRange(0);\n                Assert.Equal(100, location);\n                Assert.Equal(100, byteCount);\n                Assert.Equal(4, count);\n\n                (location, byteCount, count) = index.GetFileRange(1);\n                Assert.Equal(200, location);\n                Assert.Equal(115, byteCount);\n                Assert.Equal(3, count);\n\n                (location, byteCount, count) = index.GetFileRange(2);\n                Assert.Equal(315, location);\n                Assert.Equal(5, byteCount);\n                Assert.Equal(1, count);\n            }\n        }\n\n        [Fact]\n        public void ReadBack()\n        {\n            var stream = new MemoryStream();\n            using (var writer = new ExtendedBinaryWriter(stream))\n            {\n                var index = new RefMinorIndex(writer, GenomeAssembly.GRCh37, new DataSourceVersion(\"name\", \"1\", DateTime.Now.Ticks), SaCommon.SchemaVersion);\n\n                index.Add(0, 100);\n                index.Add(0, 105);\n                index.Add(0, 110);\n                index.Add(0, 115);\n                index.Add(1, 200);\n                index.Add(1, 205);\n                index.Add(1, 210);\n                index.Add(2, 315);\n\n                index.Write(320);\n                \n            }\n            var readStream = new MemoryStream(stream.ToArray()) { Position = 0 };\n            using (var reader = new ExtendedBinaryReader(readStream))\n            {\n                var index = new RefMinorIndex(reader);\n                (long location, int byteCount, int count) = index.GetFileRange(0);\n                Assert.Equal(100, location);\n                Assert.Equal(100, byteCount);\n                Assert.Equal(4, count);\n\n                (location, byteCount, count) = index.GetFileRange(1);\n                Assert.Equal(200, location);\n                Assert.Equal(115, byteCount);\n                Assert.Equal(3, count);\n\n                (location, byteCount, count) = index.GetFileRange(2);\n                Assert.Equal(315, location);\n                Assert.Equal(5, byteCount);\n                Assert.Equal(1, count);\n            }\n            \n        }\n    }\n}"
  },
  {
    "path": "UnitTests/VariantAnnotation/NSA/SuppAnnotationsOutputTests.cs",
    "content": "﻿using System.Text;\nusing ErrorHandling.Exceptions;\nusing VariantAnnotation.NSA;\nusing Xunit;\n\nnamespace UnitTests.VariantAnnotation.NSA\n{\n    public sealed class SuppAnnotationsOutputTests\n    {\n        [Fact]\n        public void Output_positional_not_array()\n        {\n            var sa = new SupplementaryAnnotation(\"Anno\", false, true, \"pathogenic\", null);\n\n            var sb = new StringBuilder();\n            sa.SerializeJson(sb);\n            Assert.Equal(\"pathogenic\", sb.ToString());\n        }\n\n        [Fact]\n        public void Output_not_positional_not_array()\n        {\n            var sa = new SupplementaryAnnotation(\"alleleFreq\", false, false, \"pathogenic\", null);\n\n            var sb = new StringBuilder();\n            sa.SerializeJson(sb);\n            Assert.Equal(\"{pathogenic}\", sb.ToString());\n        }\n\n        [Fact]\n        public void Output_not_positional_array()\n        {\n            //e.g. clinvar\n            var sa = new SupplementaryAnnotation(\"spliceAi\", true, false, null, new []{\"likely pathogenic\", \"unknown pathogenicity\"});\n\n            var sb = new StringBuilder();\n            sa.SerializeJson(sb);\n            Assert.Equal(\"[{likely pathogenic},{unknown pathogenicity}]\", sb.ToString());\n        }\n\n        [Fact]\n        public void Output_emptyJsonStrings_array()\n        {\n            Assert.Throws<UserErrorException>(()=>new SupplementaryAnnotation(\"svAnno\", true, true, \"pathogenic\", null));\n        }\n        [Fact]\n        public void Output_emptyJsonString_not_array()\n        {\n            Assert.Throws<UserErrorException>(() => new SupplementaryAnnotation(\"svAnno\", false, true, null, new []{\"pathogenic\"}));\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/VariantAnnotation/NSA/SuppIntervalUtilitiesTests.cs",
    "content": "﻿using Genome;\r\nusing UnitTests.TestUtilities;\r\nusing VariantAnnotation.NSA;\r\nusing Variants;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.NSA\r\n{\r\n    public sealed class SuppIntervalUtilitiesTests\r\n    {\r\n        [Theory]\r\n        [InlineData(1, 100, 51, 200, 0.33333, 0.33333)]\r\n        [InlineData(1, 300, 51, 200, 0.5, 1)]\r\n        [InlineData(101, 300, 51, 200, 0.5, 0.66667)]\r\n        [InlineData(1, 100, 100, 299, 0.005, 0.005)]\r\n        public void GetOverlapFractions_NotNull_AsExpected(int varStart, int varEnd, int saStart, int saEnd, double expectedReciprocalOverlap, double expecedAnnotationOverlap)\r\n        {\r\n            var saInterval = new ChromosomeInterval(ChromosomeUtilities.Chr1, saStart, saEnd);\r\n            var variant = new SimpleVariant(ChromosomeUtilities.Chr1, varStart, varEnd, null, null, VariantType.deletion);\r\n            var (reciprocalOverlap, annotationOverlap) = SuppIntervalUtilities.GetOverlapFractions(saInterval, variant);\r\n\r\n            Assert.NotNull(reciprocalOverlap);\r\n            Assert.NotNull(annotationOverlap);\r\n            Assert.Equal(expectedReciprocalOverlap, reciprocalOverlap.Value, 5);\r\n            Assert.Equal(expecedAnnotationOverlap, annotationOverlap.Value, 5);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetOverlapFractions_ReturnNulls_DifferentChroms()\r\n        {\r\n            var saInterval = new ChromosomeInterval(ChromosomeUtilities.Chr1, 1, 2);\r\n            var variant = new SimpleVariant(ChromosomeUtilities.Chr2, 1, 2, null, null, VariantType.deletion);\r\n            var (reciprocalOverlap, annotationOverlap) = SuppIntervalUtilities.GetOverlapFractions(saInterval, variant);\r\n\r\n            Assert.Null(reciprocalOverlap);\r\n            Assert.Null(annotationOverlap);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetOverlapFractions_ReturnNulls_Insertion()\r\n        {\r\n            var saInterval = new ChromosomeInterval(ChromosomeUtilities.Chr1, 1, 2);\r\n            var variant = new SimpleVariant(ChromosomeUtilities.Chr1, 1, 2, null, null, VariantType.insertion);\r\n            var (reciprocalOverlap, annotationOverlap) = SuppIntervalUtilities.GetOverlapFractions(saInterval, variant);\r\n\r\n            Assert.Null(reciprocalOverlap);\r\n            Assert.Null(annotationOverlap);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetOverlapFractions_ReturnNulls_SaInsertion()\r\n        {\r\n            var saInterval = new ChromosomeInterval(ChromosomeUtilities.Chr1, 2, 1);\r\n            var variant = new SimpleVariant(ChromosomeUtilities.Chr1, 1, 2, null, null, VariantType.deletion);\r\n            var (reciprocalOverlap, annotationOverlap) = SuppIntervalUtilities.GetOverlapFractions(saInterval, variant);\r\n\r\n            Assert.Null(reciprocalOverlap);\r\n            Assert.Null(annotationOverlap);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetOverlapFractions_ReturnNulls_BreakEnd()\r\n        {\r\n            var saInterval = new ChromosomeInterval(ChromosomeUtilities.Chr1, 2, 1);\r\n            var variant = new SimpleVariant(ChromosomeUtilities.Chr1, 1, 2, null, null, VariantType.translocation_breakend);\r\n            var (reciprocalOverlap, annotationOverlap) = SuppIntervalUtilities.GetOverlapFractions(saInterval, variant);\r\n\r\n            Assert.Null(reciprocalOverlap);\r\n            Assert.Null(annotationOverlap);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/VariantAnnotation/ProviderTests/GsaProviderTests.cs",
    "content": "using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing ErrorHandling.Exceptions;\nusing Genome;\nusing Moq;\nusing OptimizedCore;\nusing SAUtils.GenericScore;\nusing SAUtils.GenericScore.GenericScoreParser;\nusing UnitTests.TestUtilities;\nusing UnitTests.VariantAnnotation.ScoreFile;\nusing VariantAnnotation.GenericScore;\nusing VariantAnnotation.Interface.AnnotatedPositions;\nusing VariantAnnotation.Interface.Providers;\nusing VariantAnnotation.Pools;\nusing VariantAnnotation.Providers;\nusing VariantAnnotation.SA;\nusing Variants;\nusing Vcf.VariantCreator;\nusing Xunit;\n\nnamespace UnitTests.VariantAnnotation.ProviderTests;\n\npublic sealed class GsaProviderTests\n{\n    private static (ScoreReader, Dictionary<Chromosome, List<Dictionary<string, object>>>) GetScoreReaderWithData()\n    {\n        var testSetup = new Dictionary<Chromosome, List<Dictionary<string, object>>>\n        {\n            {\n                ChromosomeUtilities.Chr1, new List<Dictionary<string, object>>\n                {\n                    new Dictionary<string, object>\n                    {\n                        {\"startPosition\", 10_001},\n                        {\"endPosition\", 23_000},\n                    }\n                }\n            },\n            {\n                ChromosomeUtilities.Chr2, new List<Dictionary<string, object>>\n                {\n                    new Dictionary<string, object>\n                    {\n                        {\"startPosition\", 11_001},\n                        {\"endPosition\", 23_500},\n                    }\n                }\n            },\n        };\n\n        return (TestDataGenerator.GetScoreReaderWithRandomData(testSetup), testSetup);\n    }\n\n    private static (ScoreProvider provider, Dictionary<Chromosome, List<Dictionary<string, object>>> providerTestData) GetScoreProvider()\n    {\n        (ScoreReader scoreReader, Dictionary<Chromosome, List<Dictionary<string, object>>> testData) = GetScoreReaderWithData();\n\n        var provider = new ScoreProvider(new[] {scoreReader});\n        return (provider, testData);\n    }\n\n    private static IAnnotatedPosition GetPosition(Chromosome chrom, int start, string refAllele, string[] altAlleles)\n    {\n        var position          = new Mock<IAnnotatedPosition>();\n        var annotatedVariants = new List<IAnnotatedVariant>();\n        foreach (string altAllele in altAlleles)\n        {\n            VariantType type = SmallVariantCreator.GetVariantType(refAllele, altAllele);\n            int         end  = start + altAllele.Length - 1;\n\n            var variant = VariantPool.Get(chrom, start, end, refAllele, altAllele, type, null, false, false, false,\n                null, AnnotationBehavior.SmallVariants, false);\n            annotatedVariants.Add(AnnotatedVariantPool.Get(variant));\n        }\n\n        position.SetupGet(x => x.AnnotatedVariants).Returns(annotatedVariants.ToArray);\n        return position.Object;\n    }\n\n    [Fact]\n    public void TestAnnotateUsingScoreProvider()\n    {\n        (IAnnotationProvider provider, Dictionary<Chromosome, List<Dictionary<string, object>>> testSetup) = GetScoreProvider();\n\n        foreach ((Chromosome chromosome, List<Dictionary<string, object>> chromosomeTests) in testSetup)\n        {\n            foreach (Dictionary<string, object> chromosomeTest in chromosomeTests)\n            {\n                var expectedScores = (List<double>) chromosomeTest[\"expectedScores\"];\n                var startPosition  = (int) chromosomeTest[\"startPosition\"];\n                for (var i = 0; i < expectedScores.Count; i++)\n                {\n                    IAnnotatedPosition position = GetPosition(chromosome, startPosition + i, \"T\", new[] {\"A\"});\n                    provider.Annotate(position);\n\n                    var sb         = position.AnnotatedVariants[0].GetJsonStringBuilder(chromosome.UcscName);\n                    var jsonString = sb.ToString();\n                    StringBuilderPool.Return(sb);\n                    var expectedScore = $\"{Math.Round(expectedScores[i], 2):0.##}\";\n                    var expectedString =\n                        \"{\\\"chromosome\\\":\\\"\" + $\"{chromosome.UcscName}\\\",\" +\n                        \"\\\"begin\\\":\"         + $\"{startPosition + i},\"     +\n                        \"\\\"end\\\":\"           + $\"{startPosition + i},\"     +\n                        \"\\\"refAllele\\\":\"     + \"\\\"T\\\",\"                    +\n                        \"\\\"altAllele\\\":\"     + \"\\\"A\\\",\"                    +\n                        \"\\\"variantType\\\":\"   + \"\\\"SNV\\\",\"                  +\n                        \"\\\"TestKey\\\":\"       + $\"{expectedScore}\"          +\n                        \"}\";\n\n                    Assert.Equal(expectedString, jsonString);\n                }\n            }\n        }\n    }\n        \n    [Fact]\n    public void TestSNVTypeAnnotationOnly()\n    {\n        (IAnnotationProvider provider, Dictionary<Chromosome, List<Dictionary<string, object>>> testSetup) = GetScoreProvider();\n            \n        var position          = new Mock<IAnnotatedPosition>();\n        var annotatedVariants = new List<IAnnotatedVariant>();\n            \n        var type = VariantType.insertion;\n        Variant variant = VariantPool.Get(ChromosomeUtilities.Chr1, 15_000, 15_001, \"-\", \"G\", type, null, false, false, false,\n            null, AnnotationBehavior.SmallVariants, false);\n        annotatedVariants.Add(AnnotatedVariantPool.Get(variant));\n            \n        position.SetupGet(x => x.AnnotatedVariants).Returns(annotatedVariants.ToArray);\n            \n        IAnnotatedPosition annotatedPosition = position.Object;\n        provider.Annotate(annotatedPosition);\n        Assert.Empty(annotatedPosition.AnnotatedVariants[0].SaList);\n    }\n\n    [Fact]\n    private void TestUnknownPosition()\n    {\n        (IAnnotationProvider provider, Dictionary<Chromosome, List<Dictionary<string, object>>> testSetup) = GetScoreProvider();\n        IAnnotatedPosition position = GetPosition(ChromosomeUtilities.Chr1, 5_000, \"T\", new[] {\"A\"});\n        provider.Annotate(position);\n        Assert.Empty(position.AnnotatedVariants[0].SaList);\n\n        // Unknown Chromosome\n        position = GetPosition(ChromosomeUtilities.Chr7, 5_000, \"T\", new[] {\"A\"});\n        provider.Annotate(position);\n        Assert.Empty(position.AnnotatedVariants[0].SaList);\n    }\n\n    [Fact]\n    private void TestUnknownAssembly()\n    {\n        var      version     = new DataSourceVersion(\"source1\", \"v1\", DateTime.Now.Ticks, \"description\");\n        string[] nucleotides = {\"A\", \"C\", \"G\", \"T\"};\n        var writerSettings = new WriterSettings(\n            10_000,\n            nucleotides,\n            false,\n            EncoderType.ZeroToOne,\n            new ZeroToOneScoreEncoder(2, 1.0),\n            new ScoreJsonEncoder(\"TestKey\", \"TestSubKey\"),\n            new SaItemValidator(true, true)\n        );\n\n        var position = 10_010;\n        using (var dataStream = new MemoryStream())\n        using (var indexStream = new MemoryStream())\n        {\n            using (var saWriter = new ScoreFileWriter(\n                       writerSettings,\n                       dataStream,\n                       indexStream,\n                       version,\n                       GenericScoreTests.GetAllASequenceProvider(GenomeAssembly.Unknown),\n                       SaCommon.SchemaVersion,\n                       skipIncorrectRefEntries: false,\n                       leaveOpen: true\n                   ))\n            {\n                IEnumerable<GenericScoreItem> items = new List<GenericScoreItem>\n                {\n                    new(ChromosomeUtilities.Chr1, position, \"A\", \"C\", 0.5),\n                };\n                saWriter.Write(items);\n            }\n\n            dataStream.Position  = 0;\n            indexStream.Position = 0;\n\n            ScoreReader scoreReader = ScoreReader.Read(dataStream, indexStream);\n            Assert.Throws<UserErrorException>(() => new ScoreProvider(new[] {scoreReader}));\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/VariantAnnotation/ProviderTests/LcrProviderTests.cs",
    "content": "using System;\nusing System.IO;\nusing Genome;\nusing Moq;\nusing SAUtils.gnomAD;\nusing UnitTests.TestUtilities;\nusing VariantAnnotation.Interface.AnnotatedPositions;\nusing VariantAnnotation.Interface.SA;\nusing VariantAnnotation.NSA;\nusing VariantAnnotation.Providers;\nusing VariantAnnotation.SA;\nusing Xunit;\n\nnamespace UnitTests.VariantAnnotation.ProviderTests\n{\n    public class LcrProviderTests\n    {\n        private Stream GetNsiStream()\n        {\n            var stream = new MemoryStream();\n            var version = new DataSourceVersion(\"test\", \"June_2020\", DateTime.Now.Ticks, \"dummy\");\n            using (var writer = new NsiWriter(stream, version, GenomeAssembly.GRCh37, SaCommon.LowComplexityRegionTag,\n                ReportFor.AllVariants,\n                SaCommon.NsiSchemaVersion, true))\n            {\n                writer.Write(new []\n                {\n                    new LcrInterval(ChromosomeUtilities.Chr1, 100, 150),\n                    new LcrInterval(ChromosomeUtilities.Chr1, 300, 450),\n                    new LcrInterval(ChromosomeUtilities.Chr1, 600, 650),\n                    new LcrInterval(ChromosomeUtilities.Chr2, 100, 150),\n                    new LcrInterval(ChromosomeUtilities.Chr2, 300, 450),\n                    new LcrInterval(ChromosomeUtilities.Chr2, 600, 650)\n                });\n            }\n\n            stream.Position = 0;\n\n            return stream;\n        }\n\n        private IAnnotatedVariant GetAnnotatedVariant(Chromosome chromosome, int start, int end)\n        {\n            var annoVariant = new Mock<IAnnotatedVariant>();\n            annoVariant.SetupGet(x => x.Variant.Chromosome).Returns(chromosome);\n            annoVariant.SetupGet(x => x.Variant.Start).Returns(start);\n            annoVariant.SetupGet(x => x.Variant.End).Returns(end);\n            annoVariant.SetupProperty(x => x.InLowComplexityRegion);\n            return annoVariant.Object;\n        }\n\n        private IAnnotatedPosition GetAnnotatedPosition(Chromosome chromosome, int start, int end)\n        {\n            var annoPosition = new Mock<IAnnotatedPosition>();\n            annoPosition.SetupGet(x => x.AnnotatedVariants).Returns(\n                new []\n                {\n                    GetAnnotatedVariant(chromosome, start, end)\n                }\n                );\n            \n            return annoPosition.Object;\n        }\n\n        [Fact]\n        public void AddAnnotationsTest()\n        {\n            using (var provider = new LcrProvider(GetNsiStream()))\n            {\n                var position = GetAnnotatedPosition(ChromosomeUtilities.Chr1, 50, 70);\n                provider.Annotate(position);\n\n                Assert.False(position.AnnotatedVariants[0].InLowComplexityRegion);\n                \n                position = GetAnnotatedPosition(ChromosomeUtilities.Chr1, 110, 160);\n                provider.Annotate(position);\n\n                Assert.True(position.AnnotatedVariants[0].InLowComplexityRegion);\n                \n                position = GetAnnotatedPosition(ChromosomeUtilities.Chr2, 110, 160);\n                provider.Annotate(position);\n\n                Assert.True(position.AnnotatedVariants[0].InLowComplexityRegion);\n            }\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/VariantAnnotation/ProviderTests/NsaProviderTests.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing Genome;\nusing Moq;\nusing OptimizedCore;\nusing UnitTests.TestUtilities;\nusing VariantAnnotation.AnnotatedPositions;\nusing VariantAnnotation.Interface.AnnotatedPositions;\nusing VariantAnnotation.Interface.Providers;\nusing VariantAnnotation.Interface.SA;\nusing VariantAnnotation.Pools;\nusing VariantAnnotation.Providers;\nusing Variants;\nusing Vcf.VariantCreator;\nusing Xunit;\nusing VariantAnnotation.SA;\n    \nnamespace UnitTests.VariantAnnotation.ProviderTests\n{\n    public sealed class NsaProviderTests\n    {\n        private static IAnnotationProvider GetDbSnpProvider()\n        {\n            var chrom1Pos100Annotations = new List<(string refAllele, string altAllele, string annotation)>\n            {\n                (\"A\", \"T\", \"\\\"rs100\\\"\"),\n                (\"A\", \"C\", \"\\\"rs101\\\"\")\n            };\n\n            var dbsnpReader = new Mock<INsaReader>();\n            dbsnpReader.SetupGet(x => x.Assembly).Returns(GenomeAssembly.GRCh37);\n            dbsnpReader.SetupGet(x => x.MatchByAllele).Returns(true);\n            dbsnpReader.SetupGet(x => x.IsArray).Returns(true);\n            dbsnpReader.SetupGet(x => x.JsonKey).Returns(\"dbSnp\");\n            dbsnpReader.SetupGet(x => x.Version)\n                .Returns(new DataSourceVersion(\"dbsnp\", \"v1\", DateTime.Now.Ticks, \"dummy db snp\"));\n            \n            //dbsnpReader.SetupSequence(x => x.GetAnnotation(100)).Returns(chrom1Pos100Annotations);\n            //List<(string refAllele, string altAllele, string annotation)> annotations=null;\n            dbsnpReader.Setup(x =>\n                x.GetAnnotation(It.IsAny<int>(), It.IsAny<List<(string refAllele, string altAllele, string annotation)>>() ))\n                .Callback((int position, List<(string refAllele, string altAllele, string annotation)> annotations) =>\n                {\n                    annotations.Clear();\n                    annotations.AddRange(chrom1Pos100Annotations);\n                });\n            var provider = new NsaProvider(new[] {dbsnpReader.Object}, null, null);\n\n            return provider;\n        }\n\n        private static IAnnotationProvider GetClinVarProvider()\n        {\n            var chrom1Pos100Annotations = new List<(string refAllele, string altAllele, string annotation)>\n            {\n                (\"A\", \"T\", \"RCV00001\"),\n                (\"A\", \"C\", \"RCV00002\")\n            };\n\n            var clinvarReader = new Mock<INsaReader>();\n            clinvarReader.SetupGet(x => x.Assembly).Returns(GenomeAssembly.GRCh37);\n            clinvarReader.SetupGet(x => x.MatchByAllele).Returns(false);\n            clinvarReader.SetupGet(x => x.IsArray).Returns(true);\n            clinvarReader.SetupGet(x => x.JsonKey).Returns(\"clinvar\");\n            clinvarReader.SetupGet(x => x.Version)\n                .Returns(new DataSourceVersion(\"clinvar\", \"v1\", DateTime.Now.Ticks, \"dummy clinvar data\"));\n            clinvarReader.Setup(x =>\n                    x.GetAnnotation(It.IsAny<int>(), It.IsAny<List<(string refAllele, string altAllele, string annotation)>>() ))\n                .Callback((int position, List<(string refAllele, string altAllele, string annotation)> annotations) =>\n                {\n                    annotations.Clear();\n                    annotations.AddRange(chrom1Pos100Annotations);\n                });\n\n            var provider = new NsaProvider(new[] {clinvarReader.Object}, null, null);\n\n            return provider;\n        }\n\nprivate static IAnnotationProvider GetGmeProvider()\n        {\n            var chrom1Post69134Annotations = new List<(string refAllele, string altAllele, string annotation)>\n            {\n                (\"A\", \"G\", \"\\\"allAc\\\":10,\\\"allAn\\\":202,\\\"allAf\\\":0.0495,\\\"failedFilter\\\":true\")\n            };\n\n            var gmeReader = new Mock<INsaReader>();\n            gmeReader.SetupGet(x => x.Assembly).Returns(GenomeAssembly.GRCh37);\n            gmeReader.SetupGet(x => x.MatchByAllele).Returns(true);\n            gmeReader.SetupGet(x => x.IsArray).Returns(false);\n            gmeReader.SetupGet(x => x.JsonKey).Returns(SaCommon.GmeTag);\n            gmeReader.SetupGet(x => x.Version)\n                .Returns(new DataSourceVersion(SaCommon.GmeTag, \"v1\", DateTime.Now.Ticks, \"dummy gme data\"));\n            \n            //dbsnpReader.SetupSequence(x => x.GetAnnotation(100)).Returns(chrom1Pos100Annotations);\n            //List<(string refAllele, string altAllele, string annotation)> annotations=null;\n            gmeReader.Setup(x =>\n                    x.GetAnnotation(It.IsAny<int>(), It.IsAny<List<(string refAllele, string altAllele, string annotation)>>() ))\n                .Callback((int position, List<(string refAllele, string altAllele, string annotation)> annotations) =>\n                {\n                    annotations.Clear();\n                    annotations.AddRange(chrom1Post69134Annotations);\n                });\n            var provider = new NsaProvider(new[] {gmeReader.Object}, null, null);\n\n            return provider;\n        }\n\nprivate static IAnnotatedPosition GetPosition(Chromosome chrom, int start, string refAllele, string[] altAlleles)\n        \n        {\n            var position = new Mock<IAnnotatedPosition>();\n            var annotatedVariants = new List<IAnnotatedVariant>();\n            foreach (string altAllele in altAlleles)\n            {\n                VariantType type = SmallVariantCreator.GetVariantType(refAllele, altAllele);\n                int end = start + altAllele.Length - 1;\n\n                var variant = VariantPool.Get(chrom, start, end, refAllele, altAllele, type, null, false, false, false,\n                    null, AnnotationBehavior.SmallVariants, false);\n                annotatedVariants.Add(AnnotatedVariantPool.Get(variant));\n            }\n\n            position.SetupGet(x => x.AnnotatedVariants).Returns(annotatedVariants.ToArray);\n            return position.Object;\n        }\n\n\n        [Fact]\n        public void Annotate_alleleSpecific()\n        {\n            var provider = GetDbSnpProvider();\n            var position = GetPosition(ChromosomeUtilities.Chr1, 100, \"A\", new []{\"T\"});\n\n            provider.Annotate(position);\n            var sb         = position.AnnotatedVariants[0].GetJsonStringBuilder(\"chr1\");\n            var jsonString = sb.ToString();\n            StringBuilderPool.Return(sb);\n\n            Assert.Equal(\"{\\\"chromosome\\\":\\\"chr1\\\",\\\"begin\\\":100,\\\"end\\\":100,\\\"refAllele\\\":\\\"A\\\",\\\"altAllele\\\":\\\"T\\\",\\\"variantType\\\":\\\"SNV\\\",\\\"dbSnp\\\":[\\\"rs100\\\"]}\", jsonString);\n            VariantPool.Return((Variant)position.AnnotatedVariants[0].Variant);\n            AnnotatedVariantPool.Return((AnnotatedVariant) position.AnnotatedVariants[0]);\n        }\n        \n        [Fact]\n        public void Annotate_gme()\n        {\n            var provider = GetGmeProvider();\n            var position = GetPosition(ChromosomeUtilities.Chr1, 69134, \"A\", new []{\"G\"});\n\n            provider.Annotate(position);\n            var sb         = position.AnnotatedVariants[0].GetJsonStringBuilder(\"chr1\");\n            var jsonString = sb.ToString();\n            StringBuilderPool.Return(sb);\n\n            Assert.Equal(\"{\\\"chromosome\\\":\\\"chr1\\\",\\\"begin\\\":69134,\\\"end\\\":69134,\\\"refAllele\\\":\\\"A\\\",\\\"altAllele\\\":\\\"G\\\",\\\"variantType\\\":\\\"SNV\\\",\\\"gmeVariome\\\":{\\\"allAc\\\":10,\\\"allAn\\\":202,\\\"allAf\\\":0.0495,\\\"failedFilter\\\":true}}\", jsonString);\n            VariantPool.Return((Variant)position.AnnotatedVariants[0].Variant);\n            AnnotatedVariantPool.Return((AnnotatedVariant) position.AnnotatedVariants[0]);\n        }\n\n        [Fact]\n        public void Annotate_notAlleleSpecific_isArray()\n        {\n            var provider = GetClinVarProvider();\n            var position = GetPosition(ChromosomeUtilities.Chr1, 100, \"A\", new[] { \"T\" });\n\n            provider.Annotate(position);\n            var sb         = position.AnnotatedVariants[0].GetJsonStringBuilder(\"chr1\");\n            var jsonString = sb.ToString();\n            StringBuilderPool.Return(sb);\n\n            Assert.Equal(\"{\\\"chromosome\\\":\\\"chr1\\\",\\\"begin\\\":100,\\\"end\\\":100,\\\"refAllele\\\":\\\"A\\\",\\\"altAllele\\\":\\\"T\\\",\\\"variantType\\\":\\\"SNV\\\",\\\"clinvar\\\":[{RCV00001,\\\"isAlleleSpecific\\\":true},{RCV00002}]}\", jsonString);\n            VariantPool.Return((Variant)position.AnnotatedVariants[0].Variant);\n            AnnotatedVariantPool.Return((AnnotatedVariant) position.AnnotatedVariants[0]);\n        }\n    }\n}\n"
  },
  {
    "path": "UnitTests/VariantAnnotation/ScoreFile/GenericScoreEncoderTests.cs",
    "content": "using System.Collections.Generic;\nusing System.IO;\nusing IO;\nusing VariantAnnotation.GenericScore;\nusing Xunit;\n\nnamespace UnitTests.VariantAnnotation.ScoreFile;\n\npublic sealed class GenericScoreEncoderTests\n{\n    [Fact]\n    public void TestEncoderDecoder()\n    {\n        var testData = new List<(double inputNumber, double expectedResult)>\n        {\n            (0.246, 0.246),\n            (0.2461, 0.2461),\n            (0.999, 0.999),\n\n            (0.127, 0.127),\n            (0.128, 0.128),\n            (0.129, 0.129),\n\n            // Duplicate of above 3 data points to check if the generic score only stores the codes uniquely\n            (0.127, 0.127),\n            (0.128, 0.128),\n            (0.129, 0.129),\n\n            (0.254, 0.254),\n            (0.255, 0.255),\n            (0.256, 0.256),\n\n            (0.1271, 0.1271),\n            (0.1281, 0.1281),\n            (0.1291, 0.1291),\n\n            (0.2541, 0.2541),\n            (0.2551, 0.2551),\n            (0.2561, 0.2561),\n\n            (0.1266, 0.1266),\n            (0.1276, 0.1276),\n            (0.0, 0.0),\n            (1.0, 1.0),\n            (-1.0, -1.0),\n            (double.NaN, double.NaN)\n        };\n\n        var scoreEncoder = new GenericScoreEncoder();\n\n        foreach ((double input, _) in testData)\n        {\n            scoreEncoder.AddScore(input);\n        }\n\n        using var stream = new MemoryStream();\n        using var writer = new ExtendedBinaryWriter(stream, System.Text.Encoding.Default);\n\n        scoreEncoder.Write(writer);\n        stream.Position = 0;\n        var reader = new ExtendedBinaryReader(stream);\n\n        GenericScoreEncoder deserializedScoreEncoder = GenericScoreEncoder.Read(reader);\n        stream.Close();\n\n        foreach ((double inputNumber, double expectedOutput)in testData)\n        {\n            Assert.Equal(expectedOutput, EncodeDecode(deserializedScoreEncoder, inputNumber));\n        }\n    }\n\n    private static double EncodeDecode(GenericScoreEncoder encoder, double number)\n    {\n        return encoder.DecodeFromBytes(encoder.EncodeToBytes(number));\n    }\n}"
  },
  {
    "path": "UnitTests/VariantAnnotation/ScoreFile/GenericScoreEndToEndTests.cs",
    "content": "using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing Genome;\nusing SAUtils.GenericScore;\nusing SAUtils.GenericScore.GenericScoreParser;\nusing UnitTests.TestUtilities;\nusing VariantAnnotation.GenericScore;\nusing VariantAnnotation.Providers;\nusing VariantAnnotation.SA;\nusing Xunit;\n\nnamespace UnitTests.VariantAnnotation.ScoreFile\n{\n    public sealed class GenericScoreEndToEndTests\n    {\n        [Fact]\n        public void ScoreWriterTestRandomData()\n        {\n            const int blockLength = 10_000;\n            const int places      = 2;\n            double    tol         = Math.Pow(10, -places);\n\n            string[] nucleotides = {\"A\", \"C\", \"G\", \"T\"};\n\n            var testSetup = new Dictionary<Chromosome, List<Dictionary<string, object>>>\n            {\n                // Normal Chromosome\n                {\n                    ChromosomeUtilities.Chr1, new List<Dictionary<string, object>>\n                    {\n                        new Dictionary<string, object>\n                        {\n                            {\"startPosition\", 10_001},\n                            {\"endPosition\", 23_000},\n                        }\n                    }\n                },\n                // Chromosome with large gaps\n                {\n                    ChromosomeUtilities.Chr2, new List<Dictionary<string, object>>\n                    {\n                        new Dictionary<string, object>\n                        {\n                            {\"startPosition\", 24_001},\n                            {\"endPosition\", 100_000},\n                        },\n                        // 5 Block gap\n                        new Dictionary<string, object>\n                        {\n                            {\"startPosition\", 154_001},\n                            {\"endPosition\", 200_000},\n                        },\n                    }\n                },\n                // Next chromosome starting at immediately next position to last chromosome ending position\n                {\n                    ChromosomeUtilities.Chr3, new List<Dictionary<string, object>>\n                    {\n                        new Dictionary<string, object>\n                        {\n                            {\"startPosition\", 200_001},\n                            {\"endPosition\", 210_000},\n                        },\n                        new Dictionary<string, object>\n                        {\n                            {\"startPosition\", 210_001},\n                            {\"endPosition\", 214_000},\n                        },\n                        // Short gap but still within the same block\n                        new Dictionary<string, object>\n                        {\n                            {\"startPosition\", 215_001},\n                            {\"endPosition\", 216_000},\n                        },\n                        // Larger gap to go to next block\n                        new Dictionary<string, object>\n                        {\n                            {\"startPosition\", 221_001},\n                            {\"endPosition\", 235_000},\n                        },\n                    }\n                },\n                // New chromosome with positions that preceed others\n                {\n                    ChromosomeUtilities.Chr4, new List<Dictionary<string, object>>\n                    {\n                        new Dictionary<string, object>\n                        {\n                            {\"startPosition\", 10_001},\n                            {\"endPosition\", 21_000},\n                        }\n                    }\n                },\n            };\n\n            var writeStream = new MemoryStream();\n            var indexStream = new MemoryStream();\n            var saItems     = new List<GenericScoreItem>();\n            var version     = new DataSourceVersion(\"Test\", \"1\", DateTime.Parse(DateTime.Now.ToString(\"yyyy-MM-dd\")).Ticks, \"No description\");\n            var writerSettings = new WriterSettings(\n                blockLength,\n                nucleotides,\n                false,\n                EncoderType.ZeroToOne,\n                new ZeroToOneScoreEncoder(2, 1.0),\n                new ScoreJsonEncoder(\"TestKey\", \"TestSubKey\"),\n                new SaItemValidator(true, true)\n            );\n\n            // Scoring function to fill random scores\n            TestDataGenerator.GenerateRandomScoreData(testSetup, saItems, TestDataGenerator.GetSequenceProvider());\n\n            using (var scoreFileWriter = new ScoreFileWriter(\n                       writerSettings,\n                       writeStream,\n                       indexStream,\n                       version,\n                       TestDataGenerator.GetSequenceProvider(),\n                       SaCommon.SchemaVersion,\n                       leaveOpen: true\n                   ))\n            {\n                // Write saItems to stream\n                scoreFileWriter.Write(saItems);\n\n                // Reset streams in preparation for reading them\n                indexStream.Position = 0;\n                writeStream.Position = 0;\n\n                // Read the scores\n                ScoreReader scoreReader = ScoreReader.Read(writeStream, indexStream);\n\n                // Assert scores are equal to what was set in test data\n                AssertTestData(testSetup, scoreReader, blockLength, places, tol);\n\n                // Scores in the gap\n                Assert.Equal(double.NaN, scoreReader.GetScore(2, 100_001, \"A\"));\n\n                // Scores for unspecified Allele\n                Assert.Equal(double.NaN, scoreReader.GetScore(2, 100_001, \"C\"));\n            }\n        }\n\n        [Fact]\n        public void ScoreWriterTestDeterministicData()\n        {\n            const int blockLength = 10_000;\n            const int places      = 2;\n            double    tol         = Math.Pow(10, -places);\n\n            string[] nucleotides = {\"A\", \"C\", \"G\", \"T\"};\n            var testSetup = new Dictionary<Chromosome, List<Dictionary<string, object>>>\n            {\n                {\n                    ChromosomeUtilities.Chr1, new List<Dictionary<string, object>>\n                    {\n                        new Dictionary<string, object>\n                        {\n                            {\"startPosition\", 10_001},\n                            {\"endPosition\", 23_000},\n                        }\n                    }\n                },\n                {\n                    ChromosomeUtilities.Chr2, new List<Dictionary<string, object>>\n                    {\n                        new Dictionary<string, object>\n                        {\n                            {\"startPosition\", 24_001},\n                            {\"endPosition\", 100_000},\n                        }\n                    }\n                },\n            };\n\n            var saItems     = new List<GenericScoreItem>();\n            var writeStream = new MemoryStream();\n            var indexStream = new MemoryStream();\n            var version     = new DataSourceVersion(\"Test\", \"1\", DateTime.Parse(DateTime.Now.ToString(\"yyyy-MM-dd\")).Ticks, \"No description\");\n            var writerSettings = new WriterSettings(\n                10_000,\n                nucleotides,\n                false,\n                EncoderType.ZeroToOne,\n                new ZeroToOneScoreEncoder(2, 1.0),\n                new ScoreJsonEncoder(\"TestKey\", \"TestSubKey\"),\n                new SaItemValidator(true, true)\n            );\n\n            // Scoring function to fill scores from position\n            double ScoreFunction(int i, int endPosition) => (double) i / endPosition;\n            TestDataGenerator.GenerateTestData(testSetup, saItems, ScoreFunction, TestDataGenerator.GetSequenceProvider());\n\n            using (var scoreFileWriter = new ScoreFileWriter(\n                       writerSettings,\n                       writeStream,\n                       indexStream,\n                       version,\n                       TestDataGenerator.GetSequenceProvider(),\n                       SaCommon.SchemaVersion,\n                       leaveOpen: true\n                   ))\n            {\n                // Write saItems to stream\n                scoreFileWriter.Write(saItems);\n\n                // Reset streams in preparation for reading them\n                indexStream.Position = 0;\n                writeStream.Position = 0;\n\n                // Read the scores\n                var scoreReader = ScoreReader.Read(writeStream, indexStream);\n\n                // Assert scores are equal to what was set in test data\n                AssertTestData(testSetup, scoreReader, blockLength, places, tol);\n            }\n        }\n\n        private static void AssertTestData(Dictionary<Chromosome, List<Dictionary<string, object>>> testSetup, ScoreReader scoreReader,\n            int blockLength,\n            int places, double tol)\n        {\n            foreach ((Chromosome chromosome, List<Dictionary<string, object>> chromosomeTests) in testSetup)\n            {\n                foreach (Dictionary<string, object> chromosomeTest in chromosomeTests)\n                {\n                    var expectedScores = (List<double>) chromosomeTest[\"expectedScores\"];\n                    var startPosition  = (int) chromosomeTest[\"startPosition\"];\n                    for (var i = 0; i < expectedScores.Count; i++)\n                    {\n                        // Read score at position\n                        double score = scoreReader.GetScore(chromosome.Index, startPosition + i, \"A\");\n                        Assert.True(Math.Round(Math.Abs(expectedScores[i] - score), places) <= tol);\n                    }\n                }\n\n                var chromosomeStartPosition = (int) chromosomeTests[0][\"startPosition\"];\n                var chromosomeEndPosition   = (int) chromosomeTests[^1][\"endPosition\"];\n\n                Assert.Equal(double.NaN, scoreReader.GetScore(chromosome.Index, chromosomeStartPosition - 1, \"A\"));\n                Assert.Equal(double.NaN, scoreReader.GetScore(chromosome.Index, chromosomeEndPosition   + 1, \"A\"));\n            }\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/VariantAnnotation/ScoreFile/GenericScoreTests.cs",
    "content": "using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing ErrorHandling.Exceptions;\nusing Genome;\nusing IO;\nusing IO.v2;\nusing Moq;\nusing SAUtils.GenericScore;\nusing SAUtils.GenericScore.GenericScoreParser;\nusing UnitTests.TestDataStructures;\nusing UnitTests.TestUtilities;\nusing VariantAnnotation.GenericScore;\nusing VariantAnnotation.Interface.Providers;\nusing VariantAnnotation.Providers;\nusing VariantAnnotation.SA;\nusing Variants;\nusing Xunit;\n\nnamespace UnitTests.VariantAnnotation.ScoreFile\n{\n    public sealed class GenericScoreTests\n    {\n        [Fact]\n        public void TestScoreReader()\n        {\n            var      version     = new DataSourceVersion(\"source1\", \"v1\", DateTime.Now.Ticks, \"description\");\n            string[] nucleotides = {\"A\", \"C\", \"G\", \"T\"};\n            var writerSettings = new WriterSettings(\n                10_000,\n                nucleotides,\n                false,\n                EncoderType.ZeroToOne,\n                new ZeroToOneScoreEncoder(2, 1.0),\n                new ScoreJsonEncoder(\"TestKey\", \"TestSubKey\"),\n                new SaItemValidator(true, true)\n            );\n\n            using (var saStream = new MemoryStream())\n            using (var indexStream = new MemoryStream())\n            {\n                using (var saWriter = new ScoreFileWriter(\n                           writerSettings,\n                           saStream,\n                           indexStream,\n                           version,\n                           GetAllASequenceProvider(),\n                           SaCommon.SchemaVersion,\n                           skipIncorrectRefEntries: false,\n                           leaveOpen: true\n                       ))\n                {\n                    var items = GetSaItems(1000);\n                    saWriter.Write(items);\n                }\n\n                saStream.Position    = 0;\n                indexStream.Position = 0;\n\n                var saReader = ScoreReader.Read(saStream, indexStream);\n\n                // before any SA existed\n                Assert.True(double.IsNaN(saReader.GetScore(0, 90, \"C\")));\n                // first entry of first block\n                Assert.False(double.IsNaN(saReader.GetScore(0, 100, \"C\")));\n                // last query of first block\n                Assert.False(double.IsNaN(saReader.GetScore(0, 480, \"C\")));\n                // between first and second block\n                Assert.True(double.IsNaN(saReader.GetScore(0, 488, \"C\")));\n                // first entry of second block\n                Assert.False(double.IsNaN(saReader.GetScore(0, 490, \"C\")));\n                // unknown allele\n                Assert.True(double.IsNaN(saReader.GetScore(0, 490, \"K\")));\n            }\n        }\n\n        [Fact]\n        public void TestParRegion()\n        {\n            var      version     = new DataSourceVersion(\"source1\", \"v1\", DateTime.Now.Ticks, \"description\");\n            string[] nucleotides = {\"A\", \"C\", \"G\", \"T\"};\n            var writerSettings = new WriterSettings(\n                10_000,\n                nucleotides,\n                false,\n                EncoderType.ZeroToOne,\n                new ZeroToOneScoreEncoder(2, 1.0),\n                new ScoreJsonEncoder(\"TestKey\", \"TestSubKey\"),\n                new SaItemValidator(true, true)\n            );\n\n            var count = 1000;\n\n            using (var saStream = new MemoryStream())\n            using (var indexStream = new MemoryStream())\n            {\n                using (var saWriter = new ScoreFileWriter(\n                           writerSettings,\n                           saStream,\n                           indexStream,\n                           version,\n                           GetAllASequenceProvider(),\n                           SaCommon.SchemaVersion,\n                           skipIncorrectRefEntries: false,\n                           leaveOpen: true\n                       ))\n                {\n                    IEnumerable<GenericScoreItem> items = GetParRegionItems(count);\n                    saWriter.Write(items);\n                }\n\n                saStream.Position    = 0;\n                indexStream.Position = 0;\n\n                var saReader = ScoreReader.Read(saStream, indexStream);\n\n                var position = 10_010;\n                for (int i = 0; i < count; i++, position += 2)\n                {\n                    Assert.False(double.IsNaN(saReader.GetScore(23, position, \"C\")));\n                }\n            }\n        }\n\n        [Fact]\n        public void TestWriteUnknownAllele()\n        {\n            var      version     = new DataSourceVersion(\"source1\", \"v1\", DateTime.Now.Ticks, \"description\");\n            string[] nucleotides = {\"A\", \"C\", \"G\", \"T\"};\n            var writerSettings = new WriterSettings(\n                10_000,\n                nucleotides,\n                false,\n                EncoderType.ZeroToOne,\n                new ZeroToOneScoreEncoder(2, 1.0),\n                new ScoreJsonEncoder(\"TestKey\", \"TestSubKey\"),\n                new SaItemValidator(true, true)\n            );\n\n            var position = 10_010;\n            using (var saStream = new MemoryStream())\n            using (var indexStream = new MemoryStream())\n            {\n                using (var saWriter = new ScoreFileWriter(\n                           writerSettings,\n                           saStream,\n                           indexStream,\n                           version,\n                           GetAllASequenceProvider(),\n                           SaCommon.SchemaVersion,\n                           skipIncorrectRefEntries: false,\n                           leaveOpen: true\n                       ))\n                {\n                    IEnumerable<GenericScoreItem> items = new List<GenericScoreItem>\n                    {\n                        new(ChromosomeUtilities.Chr1, position, \"A\", \"K\", 0.5),\n                    };\n                    saWriter.Write(items);\n\n\n                    saStream.Position    = 0;\n                    indexStream.Position = 0;\n\n                    var saReader = ScoreReader.Read(saStream, indexStream);\n                    Assert.True(double.IsNaN(saReader.GetScore(ChromosomeUtilities.Chr1.Index, position, \"A\")));\n                    Assert.True(double.IsNaN(saReader.GetScore(ChromosomeUtilities.Chr1.Index, position, \"C\")));\n                    Assert.True(double.IsNaN(saReader.GetScore(ChromosomeUtilities.Chr1.Index, position, \"G\")));\n                    Assert.True(double.IsNaN(saReader.GetScore(ChromosomeUtilities.Chr1.Index, position, \"T\")));\n                }\n            }\n        }\n\n        [Fact]\n        public void TestOutOfOrderWriting()\n        {\n            var      version     = new DataSourceVersion(\"source1\", \"v1\", DateTime.Now.Ticks, \"description\");\n            string[] nucleotides = {\"A\", \"C\", \"G\", \"T\"};\n            var writerSettings = new WriterSettings(\n                10_000,\n                nucleotides,\n                false,\n                EncoderType.ZeroToOne,\n                new ZeroToOneScoreEncoder(2, 1.0),\n                new ScoreJsonEncoder(\"TestKey\", \"TestSubKey\"),\n                new SaItemValidator(true, true)\n            );\n\n            var position = 10_010;\n            using (var saStream = new MemoryStream())\n            using (var indexStream = new MemoryStream())\n            {\n                using (var saWriter = new ScoreFileWriter(\n                           writerSettings,\n                           saStream,\n                           indexStream,\n                           version,\n                           GetAllASequenceProvider(),\n                           SaCommon.SchemaVersion,\n                           skipIncorrectRefEntries: false,\n                           leaveOpen: true\n                       ))\n                {\n                    IEnumerable<GenericScoreItem> items = new List<GenericScoreItem>\n                    {\n                        new(ChromosomeUtilities.Chr1, position, \"A\", \"C\", 0.5),\n                        new(ChromosomeUtilities.Chr1, position - 1, \"A\", \"G\", 0.5),\n                    };\n\n                    Assert.Throws<UserErrorException>(() => saWriter.Write(items));\n                }\n            }\n        }\n\n        [Fact]\n        public void TestParRegion2()\n        {\n            var      version     = new DataSourceVersion(\"source1\", \"v1\", DateTime.Now.Ticks, \"description\");\n            string[] nucleotides = {\"A\", \"C\", \"G\", \"T\"};\n            var writerSettings = new WriterSettings(\n                10_000,\n                nucleotides,\n                false,\n                EncoderType.ZeroToOne,\n                new ZeroToOneScoreEncoder(2, 1.0),\n                new ScoreJsonEncoder(\"TestKey\", \"TestSubKey\"),\n                new SaItemValidator(true, true)\n            );\n\n            var position = 10_010;\n            using (var dataStream = new MemoryStream())\n            using (var indexStream = new MemoryStream())\n            {\n                using (var saWriter = new ScoreFileWriter(\n                           writerSettings,\n                           dataStream,\n                           indexStream,\n                           version,\n                           GetAllASequenceProvider(),\n                           SaCommon.SchemaVersion,\n                           skipIncorrectRefEntries: false,\n                           leaveOpen: true\n                       ))\n                {\n                    IEnumerable<GenericScoreItem> items = new List<GenericScoreItem>\n                    {\n                        new(ChromosomeUtilities.ChrY, position, \"N\", \"C\", 0.5),\n                    };\n                    saWriter.Write(items);\n                }\n\n                dataStream.Position  = 0;\n                indexStream.Position = 0;\n\n                var saReader = ScoreReader.Read(dataStream, indexStream);\n                Assert.Equal(0.5, saReader.GetScore(23, position, \"C\"));\n            }\n        }\n\n        [Fact]\n        public void SchemaVersionTest()\n        {\n            var      version     = new DataSourceVersion(\"source1\", \"v1\", DateTime.Now.Ticks, \"description\");\n            string[] nucleotides = {\"A\", \"C\", \"G\", \"T\"};\n            var writerSettings = new WriterSettings(\n                10_000,\n                nucleotides,\n                false,\n                EncoderType.ZeroToOne,\n                new ZeroToOneScoreEncoder(2, 1.0),\n                new ScoreJsonEncoder(\"TestKey\", \"TestSubKey\"),\n                new SaItemValidator(true, true)\n            );\n\n            var position = 10_010;\n            using (var dataStream = new MemoryStream())\n            using (var indexStream = new MemoryStream())\n            {\n                using (var saWriter = new ScoreFileWriter(\n                           writerSettings,\n                           dataStream,\n                           indexStream,\n                           version,\n                           GetAllASequenceProvider(),\n                           SaCommon.SchemaVersion + SaCommon.SchemaVersion,\n                           skipIncorrectRefEntries: false,\n                           leaveOpen: true\n                       ))\n                {\n                    IEnumerable<GenericScoreItem> items = new List<GenericScoreItem>\n                    {\n                        new(ChromosomeUtilities.Chr1, position, \"A\", \"C\", 0.5),\n                    };\n                    saWriter.Write(items);\n                }\n\n                dataStream.Position  = 0;\n                indexStream.Position = 0;\n\n                Assert.Throws<UserErrorException>(() => ScoreReader.Read(dataStream, indexStream));\n            }\n        }\n\n        [Fact]\n        public void TestHeader()\n        {\n            var testData = new List<(FileType GsaIndex, uint GuardInt, ushort)>\n            {\n                (FileType.GsaIndex, SaCommon.GuardInt, 1),  // Incorrect File Type\n                (FileType.GsaWriter, SaCommon.GuardInt, 2), // Incorrect File Format Version\n                (FileType.GsaWriter, 2, 1)                  // Incorrect Guard Int\n            };\n\n            foreach ((FileType fileType, uint guardInt, ushort fileFormatVersion) in testData)\n            {\n                var writerStream = PrepareHeaderTestData(fileType, guardInt, fileFormatVersion);\n                Assert.Throws<UserErrorException>(() => ScoreReader.Read(writerStream, null));\n            }\n        }\n\n        private MemoryStream PrepareHeaderTestData(FileType fileType, uint guardInt, ushort fileFormatVersion)\n        {\n            var writerStream = new MemoryStream();\n            var writer       = new ExtendedBinaryWriter(writerStream, System.Text.Encoding.Default);\n            var header       = new Header(fileType, fileFormatVersion);\n\n            header.Write(writer);\n            writer.WriteOpt(1); // FilePairId\n            writer.Write(guardInt);\n            writerStream.Position = 0;\n\n            return writerStream;\n        }\n\n        // [Fact]\n        // TODO Understand what this test is doing\n        // public void RemoveConflictingItems()\n        // {\n        //     const int blockLength = 10_000;\n        //     string[]  nucleotides = {\"A\", \"C\", \"G\", \"T\"};\n        //     var       version     = new DataSourceVersion(\"source1\", \"v1\", DateTime.Now.Ticks, \"description\");\n        //\n        //     using (var saStream = new MemoryStream())\n        //     using (var indexStream = new MemoryStream())\n        //     using (var saWriter = new ScoreFileWriter(saStream, indexStream, version, GetAllASequenceProvider(), \"dbsnp\",\n        //                SaCommon.SchemaVersion, nucleotides, blockLength, GenomeAssembly.GRCh37, 1, false, true, false))\n        //     {\n        //         Assert.Equal(0, saWriter.Write(GetConflictingGnomadItems()));\n        //     }\n        // }\n\n        private static IEnumerable<GenericScoreItem> GetSaItems(int count)\n        {\n            var items    = new List<GenericScoreItem>();\n            var position = 100;\n            var random   = new Random();\n            for (int i = 0; i < count; i++, position += 5)\n            {\n                double score = Math.Round(random.NextDouble(), 2);\n                items.Add(new GenericScoreItem(ChromosomeUtilities.Chr1, position, \"A\", \"C\", score));\n            }\n\n            return items;\n        }\n\n        private static IEnumerable<GenericScoreItem> GetParRegionItems(int count)\n        {\n            var items    = new List<GenericScoreItem>();\n            var position = 10_010;\n            var random   = new Random();\n            for (int i = 0; i < count; i++, position += 2)\n            {\n                double score = Math.Round(random.NextDouble(), 2);\n                items.Add(new GenericScoreItem(ChromosomeUtilities.ChrY, position, \"A\", \"C\", score));\n            }\n\n            return items;\n        }\n\n        [Fact]\n        public void WrongRefAllele_ThrowUserException()\n        {\n            var saItem = new GenericScoreItem(ChromosomeUtilities.Chr1, 100, \"C\", \"T\", 0.9);\n            Assert.Throws<InvalidDataException>(() => WriteCustomSaItem(saItem, false));\n            WriteCustomSaItem(saItem, true);\n        }\n\n\n        private static void WriteCustomSaItem(GenericScoreItem customItem, bool skipIncorrectRefEntries)\n        {\n            const int blockLength = 10_000;\n            string[]  nucleotides = {\"A\", \"C\", \"G\", \"T\"};\n            var       version     = new DataSourceVersion(\"source1\", \"v1\", DateTime.Now.Ticks, \"description\");\n\n            var writerSettings = new WriterSettings(\n                blockLength,\n                nucleotides,\n                false,\n                EncoderType.ZeroToOne,\n                new ZeroToOneScoreEncoder(2, 1.0),\n                new ScoreJsonEncoder(\"TestKey\", \"TestSubKey\"),\n                new SaItemValidator(true, !skipIncorrectRefEntries)\n            );\n\n            using (var writeStream = new MemoryStream())\n            using (var indexStream = new MemoryStream())\n            using (var scoreFileWriter = new ScoreFileWriter(\n                       writerSettings,\n                       writeStream,\n                       indexStream,\n                       version,\n                       GetAllASequenceProvider(),\n                       SaCommon.SchemaVersion,\n                       skipIncorrectRefEntries,\n                       true\n                   ))\n            {\n                scoreFileWriter.Write(new[] {customItem});\n            }\n        }\n\n        private static Stream GetChr22_17467787_17467799_genome()\n        {\n            var stream = new MemoryStream();\n            var writer = new StreamWriter(stream);\n\n            writer.WriteLine(\"##gnomAD\");\n            writer.WriteLine(\"#CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\");\n            writer.WriteLine(\n                \"22\\t17467787\\trs1013532764\\tAAAAG\\tA\\t5607.38\\tPASS\\tAC=9;AN=7342;AF=0.00122582;rf_tp_probability=0.526938;FS=1.835;InbreedingCoeff=-0.0586;MQ=60.31;MQRankSum=-0.363;QD=12.01;ReadPosRankSum=0.416;SOR=0.869;BaseQRankSum=0.067;ClippingRankSum=0.263;DP=659925;VQSLOD=-0.9495;VQSR_culprit=FS;variant_type=indel;allele_type=del;n_alt_alleles=1;pab_max=0.864166;gq_hist_alt_bin_freq=0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|17;gq_hist_all_bin_freq=2625|6415|2399|2552|894|245|475|590|299|567|573|228|560|58|171|68|135|8|78|194;dp_hist_alt_bin_freq=0|0|0|2|4|6|2|2|0|1|0|0|0|0|0|0|0|0|0|0;dp_hist_alt_n_larger=0;dp_hist_all_bin_freq=4|18|221|1132|2818|4248|4392|3451|2107|976|414|186|95|56|40|33|32|20|18|17;dp_hist_all_n_larger=32;ab_hist_alt_bin_freq=0|0|0|0|0|0|2|1|4|1|2|5|2|0|0|0|0|0|0|0;AC_nfe_seu=0;AN_nfe_seu=38;AF_nfe_seu=0;nhomalt_nfe_seu=0;controls_AC_afr_male=1;controls_AN_afr_male=132;controls_AF_afr_male=0.00757576;controls_nhomalt_afr_male=0;non_topmed_AC_amr=1;non_topmed_AN_amr=168;non_topmed_AF_amr=0.00595238;non_topmed_nhomalt_amr=0;AC_raw=9;AN_raw=29502;AF_raw=0.000305064;nhomalt_raw=0;AC_fin_female=0;AN_fin_female=598;AF_fin_female=0;nhomalt_fin_female=0;non_neuro_AC_asj_female=0;non_neuro_AN_asj_female=12;non_neuro_AF_asj_female=0;non_neuro_nhomalt_asj_female=0;non_neuro_AC_afr_male=1;non_neuro_AN_afr_male=154;non_neuro_AF_afr_male=0.00649351;non_neuro_nhomalt_afr_male=0;AC_afr_male=1;AN_afr_male=446;AF_afr_male=0.00224215;nhomalt_afr_male=0;AC_afr=2;AN_afr=756;AF_afr=0.0026455;nhomalt_afr=0;non_neuro_AC_afr_female=1;non_neuro_AN_afr_female=164;non_neuro_AF_afr_female=0.00609756;non_neuro_nhomalt_afr_female=0;non_topmed_AC_amr_female=1;non_topmed_AN_amr_female=72;non_topmed_AF_amr_female=0.0138889;non_topmed_nhomalt_amr_female=0;non_topmed_AC_oth_female=2;non_topmed_AN_oth_female=110;non_topmed_AF_oth_female=0.0181818;non_topmed_nhomalt_oth_female=0;AC_eas_female=0;AN_eas_female=12;AF_eas_female=0;nhomalt_eas_female=0;AC_afr_female=1;AN_afr_female=310;AF_afr_female=0.00322581;nhomalt_afr_female=0;non_neuro_AC_female=2;non_neuro_AN_female=2324;non_neuro_AF_female=0.000860585;non_neuro_nhomalt_female=0;controls_AC_afr=1;controls_AN_afr=228;controls_AF_afr=0.00438596;controls_nhomalt_afr=0;AC_nfe_onf=1;AN_nfe_onf=628;AF_nfe_onf=0.00159236;nhomalt_nfe_onf=0;controls_AC_fin_male=0;controls_AN_fin_male=200;controls_AF_fin_male=0;controls_nhomalt_fin_male=0;non_neuro_AC_nfe_nwe=2;non_neuro_AN_nfe_nwe=2582;non_neuro_AF_nfe_nwe=0.000774593;non_neuro_nhomalt_nfe_nwe=0;AC_fin_male=0;AN_fin_male=526;AF_fin_male=0;nhomalt_fin_male=0;AC_nfe_female=0;AN_nfe_female=2104;AF_nfe_female=0;nhomalt_nfe_female=0;AC_amr=1;AN_amr=178;AF_amr=0.00561798;nhomalt_amr=0;non_topmed_AC_nfe_male=3;non_topmed_AN_nfe_male=1778;non_topmed_AF_nfe_male=0.00168729;non_topmed_nhomalt_nfe_male=0;AC_eas=0;AN_eas=48;AF_eas=0;nhomalt_eas=0;nhomalt=0;non_neuro_AC_nfe_female=0;non_neuro_AN_nfe_female=1840;non_neuro_AF_nfe_female=0;non_neuro_nhomalt_nfe_female=0;non_neuro_AC_afr=2;non_neuro_AN_afr=318;non_neuro_AF_afr=0.00628931;non_neuro_nhomalt_afr=0;controls_AC_raw=2;controls_AN_raw=10110;controls_AF_raw=0.000197824;controls_nhomalt_raw=0;controls_AC_male=2;controls_AN_male=1340;controls_AF_male=0.00149254;controls_nhomalt_male=0;non_topmed_AC_male=5;non_topmed_AN_male=3004;non_topmed_AF_male=0.00166445;non_topmed_nhomalt_male=0;controls_AC_nfe_female=0;controls_AN_nfe_female=740;controls_AF_nfe_female=0;controls_nhomalt_nfe_female=0;non_neuro_AC_amr=0;non_neuro_AN_amr=114;non_neuro_AF_amr=0;non_neuro_nhomalt_amr=0;non_neuro_AC_eas_female=0;non_neuro_AN_eas_female=12;non_neuro_AF_eas_female=0;non_neuro_nhomalt_eas_female=0;AC_asj_male=1;AN_asj_male=50;AF_asj_male=0.02;nhomalt_asj_male=0;controls_AC_nfe_male=1;controls_AN_nfe_male=908;controls_AF_nfe_male=0.00110132;controls_nhomalt_nfe_male=0;non_neuro_AC_fin=0;non_neuro_AN_fin=378;non_neuro_AF_fin=0;non_neuro_nhomalt_fin=0;AC_oth_female=2;AN_oth_female=112;AF_oth_female=0.0178571;nhomalt_oth_female=0;controls_AC_nfe=1;controls_AN_nfe=1648;controls_AF_nfe=0.000606796;controls_nhomalt_nfe=0;controls_AC_oth_female=0;controls_AN_oth_female=48;controls_AF_oth_female=0;controls_nhomalt_oth_female=0;controls_AC_asj=0;controls_AN_asj=8;controls_AF_asj=0;controls_nhomalt_asj=0;non_neuro_AC_amr_male=0;non_neuro_AN_amr_male=58;non_neuro_AF_amr_male=0;non_neuro_nhomalt_amr_male=0;controls_AC_nfe_nwe=1;controls_AN_nfe_nwe=308;controls_AF_nfe_nwe=0.00324675;controls_nhomalt_nfe_nwe=0;AC_nfe_nwe=2;AN_nfe_nwe=2906;AF_nfe_nwe=0.000688231;nhomalt_nfe_nwe=0;controls_AC_nfe_seu=0;controls_AN_nfe_seu=16;controls_AF_nfe_seu=0;controls_nhomalt_nfe_seu=0;non_neuro_AC_amr_female=0;non_neuro_AN_amr_female=56;non_neuro_AF_amr_female=0;non_neuro_nhomalt_amr_female=0;non_neuro_AC_nfe_onf=1;non_neuro_AN_nfe_onf=464;non_neuro_AF_nfe_onf=0.00215517;non_neuro_nhomalt_nfe_onf=0;non_topmed_AC_eas_male=0;non_topmed_AN_eas_male=34;non_topmed_AF_eas_male=0;non_topmed_nhomalt_eas_male=0;controls_AC_amr_female=0;controls_AN_amr_female=16;controls_AF_amr_female=0;controls_nhomalt_amr_female=0;non_neuro_AC_fin_male=0;non_neuro_AN_fin_male=200;non_neuro_AF_fin_male=0;non_neuro_nhomalt_fin_male=0;AC_female=4;AN_female=3236;AF_female=0.00123609;nhomalt_female=0;non_neuro_AC_oth_male=0;non_neuro_AN_oth_male=84;non_neuro_AF_oth_male=0;non_neuro_nhomalt_oth_male=0;non_topmed_AC_nfe_est=0;non_topmed_AN_nfe_est=1352;non_topmed_AF_nfe_est=0;non_topmed_nhomalt_nfe_est=0;non_topmed_AC_nfe_nwe=2;non_topmed_AN_nfe_nwe=1632;non_topmed_AF_nfe_nwe=0.00122549;non_topmed_nhomalt_nfe_nwe=0;non_topmed_AC_amr_male=0;non_topmed_AN_amr_male=96;non_topmed_AF_amr_male=0;non_topmed_nhomalt_amr_male=0;non_topmed_AC_nfe_onf=1;non_topmed_AN_nfe_onf=448;non_topmed_AF_nfe_onf=0.00223214;non_topmed_nhomalt_nfe_onf=0;controls_AC_eas_male=0;controls_AN_eas_male=16;controls_AF_eas_male=0;controls_nhomalt_eas_male=0;controls_AC_oth_male=0;controls_AN_oth_male=52;controls_AF_oth_male=0;controls_nhomalt_oth_male=0;non_topmed_AC=9;non_topmed_AN=5806;non_topmed_AF=0.00155012;non_topmed_nhomalt=0;controls_AC_fin=0;controls_AN_fin=378;controls_AF_fin=0;controls_nhomalt_fin=0;non_neuro_AC_nfe=3;non_neuro_AN_nfe=4272;non_neuro_AF_nfe=0.000702247;non_neuro_nhomalt_nfe=0;non_neuro_AC_fin_female=0;non_neuro_AN_fin_female=178;non_neuro_AF_fin_female=0;non_neuro_nhomalt_fin_female=0;non_topmed_AC_nfe_seu=0;non_topmed_AN_nfe_seu=38;non_topmed_AF_nfe_seu=0;non_topmed_nhomalt_nfe_seu=0;controls_AC_eas_female=0;controls_AN_eas_female=12;controls_AF_eas_female=0;controls_nhomalt_eas_female=0;non_topmed_AC_asj=1;non_topmed_AN_asj=38;non_topmed_AF_asj=0.0263158;non_topmed_nhomalt_asj=0;controls_AC_nfe_onf=0;controls_AN_nfe_onf=124;controls_AF_nfe_onf=0;controls_nhomalt_nfe_onf=0;non_neuro_AC=7;non_neuro_AN=5332;non_neuro_AF=0.00131283;non_neuro_nhomalt=0;non_topmed_AC_nfe=3;non_topmed_AN_nfe=3470;non_topmed_AF_nfe=0.000864553;non_topmed_nhomalt_nfe=0;non_topmed_AC_raw=9;non_topmed_AN_raw=24832;non_topmed_AF_raw=0.000362436;non_topmed_nhomalt_raw=0;non_neuro_AC_nfe_est=0;non_neuro_AN_nfe_est=1212;non_neuro_AF_nfe_est=0;non_neuro_nhomalt_nfe_est=0;non_topmed_AC_oth_male=0;non_topmed_AN_oth_male=114;non_topmed_AF_oth_male=0;non_topmed_nhomalt_oth_male=0;AC_nfe_est=0;AN_nfe_est=1356;AF_nfe_est=0;nhomalt_nfe_est=0;non_topmed_AC_afr_male=1;non_topmed_AN_afr_male=434;non_topmed_AF_afr_male=0.00230415;non_topmed_nhomalt_afr_male=0;AC_eas_male=0;AN_eas_male=36;AF_eas_male=0;nhomalt_eas_male=0;controls_AC_eas=0;controls_AN_eas=28;controls_AF_eas=0;controls_nhomalt_eas=0;non_neuro_AC_eas_male=0;non_neuro_AN_eas_male=36;non_neuro_AF_eas_male=0;non_neuro_nhomalt_eas_male=0;non_neuro_AC_asj_male=1;non_neuro_AN_asj_male=44;non_neuro_AF_asj_male=0.0227273;non_neuro_nhomalt_asj_male=0;controls_AC_oth=0;controls_AN_oth=100;controls_AF_oth=0;controls_nhomalt_oth=0;AC_nfe=3;AN_nfe=4928;AF_nfe=0.000608766;nhomalt_nfe=0;non_topmed_AC_female=4;non_topmed_AN_female=2802;non_topmed_AF_female=0.00142755;non_topmed_nhomalt_female=0;non_neuro_AC_asj=1;non_neuro_AN_asj=56;non_neuro_AF_asj=0.0178571;non_neuro_nhomalt_asj=0;non_topmed_AC_eas_female=0;non_topmed_AN_eas_female=10;non_topmed_AF_eas_female=0;non_topmed_nhomalt_eas_female=0;non_neuro_AC_raw=7;non_neuro_AN_raw=20066;non_neuro_AF_raw=0.000348849;non_neuro_nhomalt_raw=0;non_topmed_AC_eas=0;non_topmed_AN_eas=44;non_topmed_AF_eas=0;non_topmed_nhomalt_eas=0;non_topmed_AC_fin_male=0;non_topmed_AN_fin_male=526;non_topmed_AF_fin_male=0;non_topmed_nhomalt_fin_male=0;AC_fin=0;AN_fin=1124;AF_fin=0;nhomalt_fin=0;AC_nfe_male=3;AN_nfe_male=2824;AF_nfe_male=0.00106232;nhomalt_nfe_male=0;controls_AC_amr_male=0;controls_AN_amr_male=30;controls_AF_amr_male=0;controls_nhomalt_amr_male=0;controls_AC_afr_female=0;controls_AN_afr_female=96;controls_AF_afr_female=0;controls_nhomalt_afr_female=0;controls_AC_amr=0;controls_AN_amr=46;controls_AF_amr=0;controls_nhomalt_amr=0;AC_asj_female=0;AN_asj_female=22;AF_asj_female=0;nhomalt_asj_female=0;non_neuro_AC_eas=0;non_neuro_AN_eas=48;non_neuro_AF_eas=0;non_neuro_nhomalt_eas=0;non_neuro_AC_male=5;non_neuro_AN_male=3008;non_neuro_AF_male=0.00166223;non_neuro_nhomalt_male=0;AC_asj=1;AN_asj=72;AF_asj=0.0138889;nhomalt_asj=0;controls_AC_nfe_est=0;controls_AN_nfe_est=1200;controls_AF_nfe_est=0;controls_nhomalt_nfe_est=0;non_topmed_AC_asj_female=0;non_topmed_AN_asj_female=16;non_topmed_AF_asj_female=0;non_topmed_nhomalt_asj_female=0;non_topmed_AC_oth=2;non_topmed_AN_oth=224;non_topmed_AF_oth=0.00892857;non_topmed_nhomalt_oth=0;non_topmed_AC_fin_female=0;non_topmed_AN_fin_female=598;non_topmed_AF_fin_female=0;non_topmed_nhomalt_fin_female=0;AC_oth=2;AN_oth=236;AF_oth=0.00847458;nhomalt_oth=0;non_neuro_AC_nfe_male=3;non_neuro_AN_nfe_male=2432;non_neuro_AF_nfe_male=0.00123355;non_neuro_nhomalt_nfe_male=0;controls_AC_female=0;controls_AN_female=1096;controls_AF_female=0;controls_nhomalt_female=0;non_topmed_AC_fin=0;non_topmed_AN_fin=1124;non_topmed_AF_fin=0;non_topmed_nhomalt_fin=0;non_topmed_AC_nfe_female=0;non_topmed_AN_nfe_female=1692;non_topmed_AF_nfe_female=0;non_topmed_nhomalt_nfe_female=0;controls_AC_asj_male=0;controls_AN_asj_male=2;controls_AF_asj_male=0;controls_nhomalt_asj_male=0;non_topmed_AC_asj_male=1;non_topmed_AN_asj_male=22;non_topmed_AF_asj_male=0.0454545;non_topmed_nhomalt_asj_male=0;non_neuro_AC_oth=1;non_neuro_AN_oth=146;non_neuro_AF_oth=0.00684932;non_neuro_nhomalt_oth=0;AC_male=5;AN_male=4106;AF_male=0.00121773;nhomalt_male=0;controls_AC_fin_female=0;controls_AN_fin_female=178;controls_AF_fin_female=0;controls_nhomalt_fin_female=0;controls_AC_asj_female=0;controls_AN_asj_female=6;controls_AF_asj_female=0;controls_nhomalt_asj_female=0;AC_amr_male=0;AN_amr_male=100;AF_amr_male=0;nhomalt_amr_male=0;AC_amr_female=1;AN_amr_female=78;AF_amr_female=0.0128205;nhomalt_amr_female=0;AC_oth_male=0;AN_oth_male=124;AF_oth_male=0;nhomalt_oth_male=0;non_neuro_AC_nfe_seu=0;non_neuro_AN_nfe_seu=14;non_neuro_AF_nfe_seu=0;non_neuro_nhomalt_nfe_seu=0;non_topmed_AC_afr_female=1;non_topmed_AN_afr_female=304;non_topmed_AF_afr_female=0.00328947;non_topmed_nhomalt_afr_female=0;non_topmed_AC_afr=2;non_topmed_AN_afr=738;non_topmed_AF_afr=0.00271003;non_topmed_nhomalt_afr=0;controls_AC=2;controls_AN=2436;controls_AF=0.000821018;controls_nhomalt=0;non_neuro_AC_oth_female=1;non_neuro_AN_oth_female=62;non_neuro_AF_oth_female=0.016129;non_neuro_nhomalt_oth_female=0;non_topmed_faf95_amr=0.000305;non_topmed_faf99_amr=0.000305;faf95_afr=0.00047001;faf99_afr=0.00046996;controls_faf95_afr=0.000224;controls_faf99_afr=0.000224;faf95_amr=0.000288;faf99_amr=0.000288;faf95_eas=0;faf99_eas=0;faf95=0.00063865;faf99=0.0006395;non_neuro_faf95_afr=0.00111728;non_neuro_faf99_afr=0.00111671;non_neuro_faf95_amr=0;non_neuro_faf99_amr=0;controls_faf95_nfe=3.1e-05;controls_faf99_nfe=3.1e-05;non_topmed_faf95=0.00080814;non_topmed_faf99=0.00080791;non_neuro_faf95_nfe=0.000191;non_neuro_faf99_nfe=0.00019047;non_neuro_faf95=0.00061599;non_neuro_faf99=0.00061588;non_topmed_faf95_nfe=0.0002353;non_topmed_faf99_nfe=0.00023558;controls_faf95_eas=0;controls_faf99_eas=0;faf95_nfe=0.0001658;faf99_nfe=0.00016511;non_topmed_faf95_eas=0;non_topmed_faf99_eas=0;controls_faf95_amr=0;controls_faf99_amr=0;non_neuro_faf95_eas=0;non_neuro_faf99_eas=0;non_topmed_faf95_afr=0.00048118;non_topmed_faf99_afr=0.00048064;controls_faf95=0.00014568;controls_faf99=0.00014565;controls_popmax=afr;controls_AC_popmax=1;controls_AN_popmax=228;controls_AF_popmax=0.00438596;controls_nhomalt_popmax=0;popmax=amr;AC_popmax=1;AN_popmax=178;AF_popmax=0.00561798;nhomalt_popmax=0;age_hist_het_bin_freq=1|0|1|1|0|2|0|0|0|0;age_hist_het_n_smaller=1;age_hist_het_n_larger=0;age_hist_hom_bin_freq=0|0|0|0|0|0|0|0|0|0;age_hist_hom_n_smaller=0;age_hist_hom_n_larger=0;non_neuro_popmax=afr;non_neuro_AC_popmax=2;non_neuro_AN_popmax=318;non_neuro_AF_popmax=0.00628931;non_neuro_nhomalt_popmax=0;non_topmed_popmax=amr;non_topmed_AC_popmax=1;non_topmed_AN_popmax=168;non_topmed_AF_popmax=0.00595238;non_topmed_nhomalt_popmax=0\");\n            writer.WriteLine(\n                \"22\\t17467793\\trs200526150\\tAAGAA\\tA\\t2.96178e+06\\tPASS\\tAC=25;AN=13820;AF=0.00180897;rf_tp_probability=0.6944;FS=0;InbreedingCoeff=-0.0226;MQ=61.07;MQRankSum=0.061;QD=19.6;ReadPosRankSum=0.177;SOR=0.694;BaseQRankSum=-0.031;ClippingRankSum=-0.053;DP=657153;VQSLOD=5.11;VQSR_culprit=FS;variant_type=multi-indel;allele_type=del;n_alt_alleles=2;pab_max=1;gq_hist_alt_bin_freq=0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|1|0|36;gq_hist_all_bin_freq=2892|4902|1140|827|277|141|343|478|268|556|481|207|525|87|178|89|169|40|119|5100;dp_hist_alt_bin_freq=0|0|0|1|5|8|10|5|4|1|0|0|1|1|0|0|0|0|1|0;dp_hist_alt_n_larger=0;dp_hist_all_bin_freq=3|25|286|1366|3137|4439|4355|3211|1821|851|331|175|79|53|32|42|22|27|18|12;dp_hist_all_n_larger=25;ab_hist_alt_bin_freq=0|0|0|0|0|0|2|2|6|8|3|6|7|2|0|0|0|0|0|0;AC_nfe_seu=0;AN_nfe_seu=60;AF_nfe_seu=0;nhomalt_nfe_seu=0;controls_AC_afr_male=0;controls_AN_afr_male=654;controls_AF_afr_male=0;controls_nhomalt_afr_male=0;non_topmed_AC_amr=17;non_topmed_AN_amr=272;non_topmed_AF_amr=0.0625;non_topmed_nhomalt_amr=1;AC_raw=25;AN_raw=28996;AF_raw=0.000862188;nhomalt_raw=1;AC_fin_female=0;AN_fin_female=834;AF_fin_female=0;nhomalt_fin_female=0;non_neuro_AC_asj_female=0;non_neuro_AN_asj_female=38;non_neuro_AF_asj_female=0;non_neuro_nhomalt_asj_female=0;non_neuro_AC_afr_male=0;non_neuro_AN_afr_male=730;non_neuro_AF_afr_male=0;non_neuro_nhomalt_afr_male=0;AC_afr_male=2;AN_afr_male=2172;AF_afr_male=0.00092081;nhomalt_afr_male=0;AC_afr=2;AN_afr=3678;AF_afr=0.000543774;nhomalt_afr=0;non_neuro_AC_afr_female=0;non_neuro_AN_afr_female=754;non_neuro_AF_afr_female=0;non_neuro_nhomalt_afr_female=0;non_topmed_AC_amr_female=9;non_topmed_AN_amr_female=132;non_topmed_AF_amr_female=0.0681818;non_topmed_nhomalt_amr_female=1;non_topmed_AC_oth_female=2;non_topmed_AN_oth_female=190;non_topmed_AF_oth_female=0.0105263;non_topmed_nhomalt_oth_female=0;AC_eas_female=0;AN_eas_female=248;AF_eas_female=0;nhomalt_eas_female=0;AC_afr_female=0;AN_afr_female=1506;AF_afr_female=0;nhomalt_afr_female=0;non_neuro_AC_female=7;non_neuro_AN_female=4262;non_neuro_AF_female=0.00164242;non_neuro_nhomalt_female=0;controls_AC_afr=0;controls_AN_afr=1120;controls_AF_afr=0;controls_nhomalt_afr=0;AC_nfe_onf=0;AN_nfe_onf=904;AF_nfe_onf=0;nhomalt_nfe_onf=0;controls_AC_fin_male=0;controls_AN_fin_male=276;controls_AF_fin_male=0;controls_nhomalt_fin_male=0;non_neuro_AC_nfe_nwe=1;non_neuro_AN_nfe_nwe=3534;non_neuro_AF_nfe_nwe=0.000282965;non_neuro_nhomalt_nfe_nwe=0;AC_fin_male=0;AN_fin_male=708;AF_fin_male=0;nhomalt_fin_male=0;AC_nfe_female=1;AN_nfe_female=3128;AF_nfe_female=0.000319693;nhomalt_nfe_female=0;AC_amr=18;AN_amr=286;AF_amr=0.0629371;nhomalt_amr=1;non_topmed_AC_nfe_male=1;non_topmed_AN_nfe_male=2566;non_topmed_AF_nfe_male=0.000389712;non_topmed_nhomalt_nfe_male=0;AC_eas=0;AN_eas=656;AF_eas=0;nhomalt_eas=0;nhomalt=1;non_neuro_AC_nfe_female=1;non_neuro_AN_nfe_female=2732;non_neuro_AF_nfe_female=0.000366032;non_neuro_nhomalt_nfe_female=0;non_neuro_AC_afr=0;non_neuro_AN_afr=1484;non_neuro_AF_afr=0;non_neuro_nhomalt_afr=0;controls_AC_raw=4;controls_AN_raw=9932;controls_AF_raw=0.000402739;controls_nhomalt_raw=0;controls_AC_male=3;controls_AN_male=2680;controls_AF_male=0.0011194;controls_nhomalt_male=0;non_topmed_AC_male=11;non_topmed_AN_male=6164;non_topmed_AF_male=0.00178456;non_topmed_nhomalt_male=0;controls_AC_nfe_female=0;controls_AN_nfe_female=1186;controls_AF_nfe_female=0;controls_nhomalt_nfe_female=0;non_neuro_AC_amr=9;non_neuro_AN_amr=184;non_neuro_AF_amr=0.048913;non_neuro_nhomalt_amr=0;non_neuro_AC_eas_female=0;non_neuro_AN_eas_female=248;non_neuro_AF_eas_female=0;non_neuro_nhomalt_eas_female=0;AC_asj_male=0;AN_asj_male=92;AF_asj_male=0;nhomalt_asj_male=0;controls_AC_nfe_male=0;controls_AN_nfe_male=1378;controls_AF_nfe_male=0;controls_nhomalt_nfe_male=0;non_neuro_AC_fin=0;non_neuro_AN_fin=532;non_neuro_AF_fin=0;non_neuro_nhomalt_fin=0;AC_oth_female=2;AN_oth_female=194;AF_oth_female=0.0103093;nhomalt_oth_female=0;controls_AC_nfe=0;controls_AN_nfe=2564;controls_AF_nfe=0;controls_nhomalt_nfe=0;controls_AC_oth_female=0;controls_AN_oth_female=76;controls_AF_oth_female=0;controls_nhomalt_oth_female=0;controls_AC_asj=0;controls_AN_asj=20;controls_AF_asj=0;controls_nhomalt_asj=0;non_neuro_AC_amr_male=4;non_neuro_AN_amr_male=74;non_neuro_AF_amr_male=0.0540541;non_neuro_nhomalt_amr_male=0;controls_AC_nfe_nwe=0;controls_AN_nfe_nwe=426;controls_AF_nfe_nwe=0;controls_nhomalt_nfe_nwe=0;AC_nfe_nwe=2;AN_nfe_nwe=3958;AF_nfe_nwe=0.000505306;nhomalt_nfe_nwe=0;controls_AC_nfe_seu=0;controls_AN_nfe_seu=26;controls_AF_nfe_seu=0;controls_nhomalt_nfe_seu=0;non_neuro_AC_amr_female=5;non_neuro_AN_amr_female=110;non_neuro_AF_amr_female=0.0454545;non_neuro_nhomalt_amr_female=0;non_neuro_AC_nfe_onf=0;non_neuro_AN_nfe_onf=704;non_neuro_AF_nfe_onf=0;non_neuro_nhomalt_nfe_onf=0;non_topmed_AC_eas_male=0;non_topmed_AN_eas_male=400;non_topmed_AF_eas_male=0;non_topmed_nhomalt_eas_male=0;controls_AC_amr_female=1;controls_AN_amr_female=46;controls_AF_amr_female=0.0217391;controls_nhomalt_amr_female=0;non_neuro_AC_fin_male=0;non_neuro_AN_fin_male=276;non_neuro_AF_fin_male=0;non_neuro_nhomalt_fin_male=0;AC_female=13;AN_female=6098;AF_female=0.00213185;nhomalt_female=1;non_neuro_AC_oth_male=1;non_neuro_AN_oth_male=156;non_neuro_AF_oth_male=0.00641026;non_neuro_nhomalt_oth_male=0;non_topmed_AC_nfe_est=0;non_topmed_AN_nfe_est=2184;non_topmed_AF_nfe_est=0;non_topmed_nhomalt_nfe_est=0;non_topmed_AC_nfe_nwe=2;non_topmed_AN_nfe_nwe=2250;non_topmed_AF_nfe_nwe=0.000888889;non_topmed_nhomalt_nfe_nwe=0;non_topmed_AC_amr_male=8;non_topmed_AN_amr_male=140;non_topmed_AF_amr_male=0.0571429;non_topmed_nhomalt_amr_male=0;non_topmed_AC_nfe_onf=0;non_topmed_AN_nfe_onf=646;non_topmed_AF_nfe_onf=0;non_topmed_nhomalt_nfe_onf=0;controls_AC_eas_male=0;controls_AN_eas_male=244;controls_AF_eas_male=0;controls_nhomalt_eas_male=0;controls_AC_oth_male=0;controls_AN_oth_male=84;controls_AF_oth_male=0;controls_nhomalt_oth_male=0;non_topmed_AC=23;non_topmed_AN=11642;non_topmed_AF=0.00197561;non_topmed_nhomalt=1;controls_AC_fin=0;controls_AN_fin=532;controls_AF_fin=0;controls_nhomalt_fin=0;non_neuro_AC_nfe=1;non_neuro_AN_nfe=6226;non_neuro_AF_nfe=0.000160617;non_neuro_nhomalt_nfe=0;non_neuro_AC_fin_female=0;non_neuro_AN_fin_female=256;non_neuro_AF_fin_female=0;non_neuro_nhomalt_fin_female=0;non_topmed_AC_nfe_seu=0;non_topmed_AN_nfe_seu=60;non_topmed_AF_nfe_seu=0;non_topmed_nhomalt_nfe_seu=0;controls_AC_eas_female=0;controls_AN_eas_female=172;controls_AF_eas_female=0;controls_nhomalt_eas_female=0;non_topmed_AC_asj=0;non_topmed_AN_asj=68;non_topmed_AF_asj=0;non_topmed_nhomalt_asj=0;controls_AC_nfe_onf=0;controls_AN_nfe_onf=168;controls_AF_nfe_onf=0;controls_nhomalt_nfe_onf=0;non_neuro_AC=12;non_neuro_AN=9480;non_neuro_AF=0.00126582;non_neuro_nhomalt=0;non_topmed_AC_nfe=2;non_topmed_AN_nfe=5140;non_topmed_AF_nfe=0.000389105;non_topmed_nhomalt_nfe=0;non_topmed_AC_raw=23;non_topmed_AN_raw=24482;non_topmed_AF_raw=0.000939466;non_topmed_nhomalt_raw=1;non_neuro_AC_nfe_est=0;non_neuro_AN_nfe_est=1962;non_neuro_AF_nfe_est=0;non_neuro_nhomalt_nfe_est=0;non_topmed_AC_oth_male=1;non_topmed_AN_oth_male=184;non_topmed_AF_oth_male=0.00543478;non_topmed_nhomalt_oth_male=0;AC_nfe_est=0;AN_nfe_est=2192;AF_nfe_est=0;nhomalt_nfe_est=0;non_topmed_AC_afr_male=1;non_topmed_AN_afr_male=2132;non_topmed_AF_afr_male=0.000469043;non_topmed_nhomalt_afr_male=0;AC_eas_male=0;AN_eas_male=408;AF_eas_male=0;nhomalt_eas_male=0;controls_AC_eas=0;controls_AN_eas=416;controls_AF_eas=0;controls_nhomalt_eas=0;non_neuro_AC_eas_male=0;non_neuro_AN_eas_male=408;non_neuro_AF_eas_male=0;non_neuro_nhomalt_eas_male=0;non_neuro_AC_asj_male=0;non_neuro_AN_asj_male=80;non_neuro_AF_asj_male=0;non_neuro_nhomalt_asj_male=0;controls_AC_oth=0;controls_AN_oth=160;controls_AF_oth=0;controls_nhomalt_oth=0;AC_nfe=2;AN_nfe=7114;AF_nfe=0.000281136;nhomalt_nfe=0;non_topmed_AC_female=12;non_topmed_AN_female=5478;non_topmed_AF_female=0.00219058;non_topmed_nhomalt_female=1;non_neuro_AC_asj=0;non_neuro_AN_asj=118;non_neuro_AF_asj=0;non_neuro_nhomalt_asj=0;non_topmed_AC_eas_female=0;non_topmed_AN_eas_female=240;non_topmed_AF_eas_female=0;non_topmed_nhomalt_eas_female=0;non_neuro_AC_raw=12;non_neuro_AN_raw=19660;non_neuro_AF_raw=0.000610376;non_neuro_nhomalt_raw=0;non_topmed_AC_eas=0;non_topmed_AN_eas=640;non_topmed_AF_eas=0;non_topmed_nhomalt_eas=0;non_topmed_AC_fin_male=0;non_topmed_AN_fin_male=708;non_topmed_AF_fin_male=0;non_topmed_nhomalt_fin_male=0;AC_fin=0;AN_fin=1542;AF_fin=0;nhomalt_fin=0;AC_nfe_male=1;AN_nfe_male=3986;AF_nfe_male=0.000250878;nhomalt_nfe_male=0;controls_AC_amr_male=3;controls_AN_amr_male=38;controls_AF_amr_male=0.0789474;controls_nhomalt_amr_male=0;controls_AC_afr_female=0;controls_AN_afr_female=466;controls_AF_afr_female=0;controls_nhomalt_afr_female=0;controls_AC_amr=4;controls_AN_amr=84;controls_AF_amr=0.047619;controls_nhomalt_amr=0;AC_asj_female=0;AN_asj_female=46;AF_asj_female=0;nhomalt_asj_female=0;non_neuro_AC_eas=0;non_neuro_AN_eas=656;non_neuro_AF_eas=0;non_neuro_nhomalt_eas=0;non_neuro_AC_male=5;non_neuro_AN_male=5218;non_neuro_AF_male=0.000958222;non_neuro_nhomalt_male=0;AC_asj=0;AN_asj=138;AF_asj=0;nhomalt_asj=0;controls_AC_nfe_est=0;controls_AN_nfe_est=1944;controls_AF_nfe_est=0;controls_nhomalt_nfe_est=0;non_topmed_AC_asj_female=0;non_topmed_AN_asj_female=34;non_topmed_AF_asj_female=0;non_topmed_nhomalt_asj_female=0;non_topmed_AC_oth=3;non_topmed_AN_oth=374;non_topmed_AF_oth=0.00802139;non_topmed_nhomalt_oth=0;non_topmed_AC_fin_female=0;non_topmed_AN_fin_female=834;non_topmed_AF_fin_female=0;non_topmed_nhomalt_fin_female=0;AC_oth=3;AN_oth=406;AF_oth=0.00738916;nhomalt_oth=0;non_neuro_AC_nfe_male=0;non_neuro_AN_nfe_male=3494;non_neuro_AF_nfe_male=0;non_neuro_nhomalt_nfe_male=0;controls_AC_female=1;controls_AN_female=2216;controls_AF_female=0.000451264;controls_nhomalt_female=0;non_topmed_AC_fin=0;non_topmed_AN_fin=1542;non_topmed_AF_fin=0;non_topmed_nhomalt_fin=0;non_topmed_AC_nfe_female=1;non_topmed_AN_nfe_female=2574;non_topmed_AF_nfe_female=0.0003885;non_topmed_nhomalt_nfe_female=0;controls_AC_asj_male=0;controls_AN_asj_male=6;controls_AF_asj_male=0;controls_nhomalt_asj_male=0;non_topmed_AC_asj_male=0;non_topmed_AN_asj_male=34;non_topmed_AF_asj_male=0;non_topmed_nhomalt_asj_male=0;non_neuro_AC_oth=2;non_neuro_AN_oth=280;non_neuro_AF_oth=0.00714286;non_neuro_nhomalt_oth=0;AC_male=12;AN_male=7722;AF_male=0.001554;nhomalt_male=0;controls_AC_fin_female=0;controls_AN_fin_female=256;controls_AF_fin_female=0;controls_nhomalt_fin_female=0;controls_AC_asj_female=0;controls_AN_asj_female=14;controls_AF_asj_female=0;controls_nhomalt_asj_female=0;AC_amr_male=8;AN_amr_male=144;AF_amr_male=0.0555556;nhomalt_amr_male=0;AC_amr_female=10;AN_amr_female=142;AF_amr_female=0.0704225;nhomalt_amr_female=1;AC_oth_male=1;AN_oth_male=212;AF_oth_male=0.00471698;nhomalt_oth_male=0;non_neuro_AC_nfe_seu=0;non_neuro_AN_nfe_seu=26;non_neuro_AF_nfe_seu=0;non_neuro_nhomalt_nfe_seu=0;non_topmed_AC_afr_female=0;non_topmed_AN_afr_female=1474;non_topmed_AF_afr_female=0;non_topmed_nhomalt_afr_female=0;non_topmed_AC_afr=1;non_topmed_AN_afr=3606;non_topmed_AF_afr=0.000277316;non_topmed_nhomalt_afr=0;controls_AC=4;controls_AN=4896;controls_AF=0.000816993;controls_nhomalt=0;non_neuro_AC_oth_female=1;non_neuro_AN_oth_female=124;non_neuro_AF_oth_female=0.00806452;non_neuro_nhomalt_oth_female=0;non_topmed_faf95_amr=0.0398231;non_topmed_faf99_amr=0.0398236;faf95_afr=9.592e-05;faf99_afr=9.609e-05;controls_faf95_afr=0;controls_faf99_afr=0;faf95_amr=0.0406793;faf99_amr=0.0406792;faf95_eas=0;faf99_eas=0;faf95=0.00125772;faf99=0.00125736;non_neuro_faf95_afr=0;non_neuro_faf99_afr=0;non_neuro_faf95_amr=0.0255171;non_neuro_faf99_amr=0.0255167;controls_faf95_nfe=0;controls_faf99_nfe=0;non_topmed_faf95=0.00134988;non_topmed_faf99=0.00134945;non_neuro_faf95_nfe=8e-06;non_neuro_faf99_nfe=8e-06;non_neuro_faf95=0.00072973;non_neuro_faf99=0.00073008;non_topmed_faf95_nfe=6.881e-05;non_topmed_faf99_nfe=6.877e-05;controls_faf95_eas=0;controls_faf99_eas=0;faf95_nfe=4.922e-05;faf99_nfe=4.923e-05;non_topmed_faf95_eas=0;non_topmed_faf99_eas=0;controls_faf95_amr=0.0162655;controls_faf99_amr=0.0162653;non_neuro_faf95_eas=0;non_neuro_faf99_eas=0;non_topmed_faf95_afr=1.4e-05;non_topmed_faf99_afr=1.4e-05;controls_faf95=0.00027835;controls_faf99=0.00027827;controls_popmax=amr;controls_AC_popmax=4;controls_AN_popmax=84;controls_AF_popmax=0.047619;controls_nhomalt_popmax=0;popmax=amr;AC_popmax=18;AN_popmax=286;AF_popmax=0.0629371;nhomalt_popmax=1;age_hist_het_bin_freq=0|0|2|1|1|1|0|0|0|0;age_hist_het_n_smaller=4;age_hist_het_n_larger=0;age_hist_hom_bin_freq=0|0|0|0|0|0|0|0|0|0;age_hist_hom_n_smaller=0;age_hist_hom_n_larger=0;non_neuro_popmax=amr;non_neuro_AC_popmax=9;non_neuro_AN_popmax=184;non_neuro_AF_popmax=0.048913;non_neuro_nhomalt_popmax=0;non_topmed_popmax=amr;non_topmed_AC_popmax=17;non_topmed_AN_popmax=272;non_topmed_AF_popmax=0.0625;non_topmed_nhomalt_popmax=1\");\n            writer.WriteLine(\n                \"22\\t17467793\\trs200526150\\tAAGAA\\tA\\t2.96178e+06\\tPASS\\tAC=4501;AN=13820;AF=0.325687;rf_tp_probability=0.6944;FS=0;InbreedingCoeff=-0.0226;MQ=61.07;MQRankSum=0.061;QD=19.6;ReadPosRankSum=0.177;SOR=0.694;BaseQRankSum=-0.031;ClippingRankSum=-0.053;DP=657153;VQSLOD=5.11;VQSR_culprit=FS;variant_type=multi-indel;allele_type=del;n_alt_alleles=2;pab_max=1;gq_hist_alt_bin_freq=3|3|4|4|5|3|4|6|8|10|21|14|36|33|27|47|34|35|43|4884;gq_hist_all_bin_freq=2897|4907|1144|830|282|143|344|482|273|559|484|208|528|92|176|87|149|45|119|5070;dp_hist_alt_bin_freq=0|6|126|551|1133|1285|1033|600|260|102|40|27|13|13|3|11|1|6|7|2;dp_hist_alt_n_larger=5;dp_hist_all_bin_freq=3|25|286|1366|3137|4439|4355|3211|1821|851|331|175|79|53|32|42|22|27|18|12;dp_hist_all_n_larger=25;ab_hist_alt_bin_freq=0|7|1|7|36|124|277|456|835|741|1055|616|404|155|42|25|5|6|5|0;AC_nfe_seu=19;AN_nfe_seu=60;AF_nfe_seu=0.316667;nhomalt_nfe_seu=1;controls_AC_afr_male=325;controls_AN_afr_male=654;controls_AF_afr_male=0.496942;controls_nhomalt_afr_male=35;non_topmed_AC_amr=77;non_topmed_AN_amr=272;non_topmed_AF_amr=0.283088;non_topmed_nhomalt_amr=2;AC_raw=4527;AN_raw=28996;AF_raw=0.156125;nhomalt_raw=356;AC_fin_female=187;AN_fin_female=834;AF_fin_female=0.224221;nhomalt_fin_female=6;non_neuro_AC_asj_female=15;non_neuro_AN_asj_female=38;non_neuro_AF_asj_female=0.394737;non_neuro_nhomalt_asj_female=0;non_neuro_AC_afr_male=358;non_neuro_AN_afr_male=730;non_neuro_AF_afr_male=0.490411;non_neuro_nhomalt_afr_male=37;AC_afr_male=1071;AN_afr_male=2172;AF_afr_male=0.493094;nhomalt_afr_male=113;AC_afr=1825;AN_afr=3678;AF_afr=0.496194;nhomalt_afr=196;non_neuro_AC_afr_female=376;non_neuro_AN_afr_female=754;non_neuro_AF_afr_female=0.498674;non_neuro_nhomalt_afr_female=42;non_topmed_AC_amr_female=35;non_topmed_AN_amr_female=132;non_topmed_AF_amr_female=0.265152;non_topmed_nhomalt_amr_female=0;non_topmed_AC_oth_female=58;non_topmed_AN_oth_female=190;non_topmed_AF_oth_female=0.305263;non_topmed_nhomalt_oth_female=6;AC_eas_female=135;AN_eas_female=248;AF_eas_female=0.544355;nhomalt_eas_female=14;AC_afr_female=754;AN_afr_female=1506;AF_afr_female=0.500664;nhomalt_afr_female=83;non_neuro_AC_female=1325;non_neuro_AN_female=4262;non_neuro_AF_female=0.310887;non_neuro_nhomalt_female=93;controls_AC_afr=566;controls_AN_afr=1120;controls_AF_afr=0.505357;controls_nhomalt_afr=67;AC_nfe_onf=233;AN_nfe_onf=904;AF_nfe_onf=0.257743;nhomalt_nfe_onf=13;controls_AC_fin_male=58;controls_AN_fin_male=276;controls_AF_fin_male=0.210145;controls_nhomalt_fin_male=2;non_neuro_AC_nfe_nwe=797;non_neuro_AN_nfe_nwe=3534;non_neuro_AF_nfe_nwe=0.225523;non_neuro_nhomalt_nfe_nwe=38;AC_fin_male=146;AN_fin_male=708;AF_fin_male=0.206215;nhomalt_fin_male=4;AC_nfe_female=774;AN_nfe_female=3128;AF_nfe_female=0.247442;nhomalt_nfe_female=42;AC_amr=79;AN_amr=286;AF_amr=0.276224;nhomalt_amr=2;non_topmed_AC_nfe_male=636;non_topmed_AN_nfe_male=2566;non_topmed_AF_nfe_male=0.247857;non_topmed_nhomalt_nfe_male=33;AC_eas=359;AN_eas=656;AF_eas=0.547256;nhomalt_eas=35;nhomalt=352;non_neuro_AC_nfe_female=666;non_neuro_AN_nfe_female=2732;non_neuro_AF_nfe_female=0.243777;non_neuro_nhomalt_nfe_female=30;non_neuro_AC_afr=734;non_neuro_AN_afr=1484;non_neuro_AF_afr=0.494609;non_neuro_nhomalt_afr=79;controls_AC_raw=1673;controls_AN_raw=9932;controls_AF_raw=0.168445;controls_nhomalt_raw=138;controls_AC_male=920;controls_AN_male=2680;controls_AF_male=0.343284;controls_nhomalt_male=78;non_topmed_AC_male=2163;non_topmed_AN_male=6164;non_topmed_AF_male=0.350909;non_topmed_nhomalt_male=179;controls_AC_nfe_female=300;controls_AN_nfe_female=1186;controls_AF_nfe_female=0.252951;controls_nhomalt_nfe_female=11;non_neuro_AC_amr=55;non_neuro_AN_amr=184;non_neuro_AF_amr=0.298913;non_neuro_nhomalt_amr=1;non_neuro_AC_eas_female=135;non_neuro_AN_eas_female=248;non_neuro_AF_eas_female=0.544355;non_neuro_nhomalt_eas_female=14;AC_asj_male=34;AN_asj_male=92;AF_asj_male=0.369565;nhomalt_asj_male=5;controls_AC_nfe_male=360;controls_AN_nfe_male=1378;controls_AF_nfe_male=0.261248;controls_nhomalt_nfe_male=21;non_neuro_AC_fin=118;non_neuro_AN_fin=532;non_neuro_AF_fin=0.221805;non_neuro_nhomalt_fin=3;AC_oth_female=60;AN_oth_female=194;AF_oth_female=0.309278;nhomalt_oth_female=7;controls_AC_nfe=660;controls_AN_nfe=2564;controls_AF_nfe=0.25741;controls_nhomalt_nfe=32;controls_AC_oth_female=19;controls_AN_oth_female=76;controls_AF_oth_female=0.25;controls_nhomalt_oth_female=1;controls_AC_asj=9;controls_AN_asj=20;controls_AF_asj=0.45;controls_nhomalt_asj=1;non_neuro_AC_amr_male=24;non_neuro_AN_amr_male=74;non_neuro_AF_amr_male=0.324324;non_neuro_nhomalt_amr_male=1;controls_AC_nfe_nwe=99;controls_AN_nfe_nwe=426;controls_AF_nfe_nwe=0.232394;controls_nhomalt_nfe_nwe=5;AC_nfe_nwe=894;AN_nfe_nwe=3958;AF_nfe_nwe=0.225872;nhomalt_nfe_nwe=44;controls_AC_nfe_seu=10;controls_AN_nfe_seu=26;controls_AF_nfe_seu=0.384615;controls_nhomalt_nfe_seu=0;non_neuro_AC_amr_female=31;non_neuro_AN_amr_female=110;non_neuro_AF_amr_female=0.281818;non_neuro_nhomalt_amr_female=0;non_neuro_AC_nfe_onf=190;non_neuro_AN_nfe_onf=704;non_neuro_AF_nfe_onf=0.269886;non_neuro_nhomalt_nfe_onf=12;non_topmed_AC_eas_male=219;non_topmed_AN_eas_male=400;non_topmed_AF_eas_male=0.5475;non_topmed_nhomalt_eas_male=20;controls_AC_amr_female=18;controls_AN_amr_female=46;controls_AF_amr_female=0.391304;controls_nhomalt_amr_female=0;non_neuro_AC_fin_male=58;non_neuro_AN_fin_male=276;non_neuro_AF_fin_male=0.210145;non_neuro_nhomalt_fin_male=2;AC_female=1965;AN_female=6098;AF_female=0.322237;nhomalt_female=152;non_neuro_AC_oth_male=49;non_neuro_AN_oth_male=156;non_neuro_AF_oth_male=0.314103;non_neuro_nhomalt_oth_male=5;non_topmed_AC_nfe_est=577;non_topmed_AN_nfe_est=2184;non_topmed_AF_nfe_est=0.264194;non_topmed_nhomalt_nfe_est=32;non_topmed_AC_nfe_nwe=515;non_topmed_AN_nfe_nwe=2250;non_topmed_AF_nfe_nwe=0.228889;non_topmed_nhomalt_nfe_nwe=28;non_topmed_AC_amr_male=42;non_topmed_AN_amr_male=140;non_topmed_AF_amr_male=0.3;non_topmed_nhomalt_amr_male=2;non_topmed_AC_nfe_onf=169;non_topmed_AN_nfe_onf=646;non_topmed_AF_nfe_onf=0.26161;non_topmed_nhomalt_nfe_onf=8;controls_AC_eas_male=136;controls_AN_eas_male=244;controls_AF_eas_male=0.557377;controls_nhomalt_eas_male=15;controls_AC_oth_male=25;controls_AN_oth_male=84;controls_AF_oth_male=0.297619;controls_nhomalt_oth_male=4;non_topmed_AC=3972;non_topmed_AN=11642;non_topmed_AF=0.341178;non_topmed_nhomalt=324;controls_AC_fin=118;controls_AN_fin=532;controls_AF_fin=0.221805;controls_nhomalt_fin=3;non_neuro_AC_nfe=1506;non_neuro_AN_nfe=6226;non_neuro_AF_nfe=0.241889;non_neuro_nhomalt_nfe=73;non_neuro_AC_fin_female=60;non_neuro_AN_fin_female=256;non_neuro_AF_fin_female=0.234375;non_neuro_nhomalt_fin_female=1;non_topmed_AC_nfe_seu=19;non_topmed_AN_nfe_seu=60;non_topmed_AF_nfe_seu=0.316667;non_topmed_nhomalt_nfe_seu=1;controls_AC_eas_female=95;controls_AN_eas_female=172;controls_AF_eas_female=0.552326;controls_nhomalt_eas_female=12;non_topmed_AC_asj=24;non_topmed_AN_asj=68;non_topmed_AF_asj=0.352941;non_topmed_nhomalt_asj=1;controls_AC_nfe_onf=46;controls_AN_nfe_onf=168;controls_AF_nfe_onf=0.27381;controls_nhomalt_nfe_onf=4;non_neuro_AC=2909;non_neuro_AN=9480;non_neuro_AF=0.306857;non_neuro_nhomalt=207;non_topmed_AC_nfe=1280;non_topmed_AN_nfe=5140;non_topmed_AF_nfe=0.249027;non_topmed_nhomalt_nfe=69;non_topmed_AC_raw=3996;non_topmed_AN_raw=24482;non_topmed_AF_raw=0.163222;non_topmed_nhomalt_raw=327;non_neuro_AC_nfe_est=509;non_neuro_AN_nfe_est=1962;non_neuro_AF_nfe_est=0.259429;non_neuro_nhomalt_nfe_est=23;non_topmed_AC_oth_male=56;non_topmed_AN_oth_male=184;non_topmed_AF_oth_male=0.304348;non_topmed_nhomalt_oth_male=6;AC_nfe_est=579;AN_nfe_est=2192;AF_nfe_est=0.264142;nhomalt_nfe_est=32;non_topmed_AC_afr_male=1054;non_topmed_AN_afr_male=2132;non_topmed_AF_afr_male=0.494371;non_topmed_nhomalt_afr_male=113;AC_eas_male=224;AN_eas_male=408;AF_eas_male=0.54902;nhomalt_eas_male=21;controls_AC_eas=231;controls_AN_eas=416;controls_AF_eas=0.555288;controls_nhomalt_eas=27;non_neuro_AC_eas_male=224;non_neuro_AN_eas_male=408;non_neuro_AF_eas_male=0.54902;non_neuro_nhomalt_eas_male=21;non_neuro_AC_asj_male=31;non_neuro_AN_asj_male=80;non_neuro_AF_asj_male=0.3875;non_neuro_nhomalt_asj_male=5;controls_AC_oth=44;controls_AN_oth=160;controls_AF_oth=0.275;controls_nhomalt_oth=5;AC_nfe=1725;AN_nfe=7114;AF_nfe=0.24248;nhomalt_nfe=90;non_topmed_AC_female=1809;non_topmed_AN_female=5478;non_topmed_AF_female=0.33023;non_topmed_nhomalt_female=145;non_neuro_AC_asj=46;non_neuro_AN_asj=118;non_neuro_AF_asj=0.389831;non_neuro_nhomalt_asj=5;non_topmed_AC_eas_female=132;non_topmed_AN_eas_female=240;non_topmed_AF_eas_female=0.55;non_topmed_nhomalt_eas_female=14;non_neuro_AC_raw=2928;non_neuro_AN_raw=19660;non_neuro_AF_raw=0.148932;non_neuro_nhomalt_raw=211;non_topmed_AC_eas=351;non_topmed_AN_eas=640;non_topmed_AF_eas=0.548438;non_topmed_nhomalt_eas=34;non_topmed_AC_fin_male=146;non_topmed_AN_fin_male=708;non_topmed_AF_fin_male=0.206215;non_topmed_nhomalt_fin_male=4;AC_fin=333;AN_fin=1542;AF_fin=0.215953;nhomalt_fin=10;AC_nfe_male=951;AN_nfe_male=3986;AF_nfe_male=0.238585;nhomalt_nfe_male=48;controls_AC_amr_male=12;controls_AN_amr_male=38;controls_AF_amr_male=0.315789;controls_nhomalt_amr_male=0;controls_AC_afr_female=241;controls_AN_afr_female=466;controls_AF_afr_female=0.517167;controls_nhomalt_afr_female=32;controls_AC_amr=30;controls_AN_amr=84;controls_AF_amr=0.357143;controls_nhomalt_amr=0;AC_asj_female=18;AN_asj_female=46;AF_asj_female=0.391304;nhomalt_asj_female=0;non_neuro_AC_eas=359;non_neuro_AN_eas=656;non_neuro_AF_eas=0.547256;non_neuro_nhomalt_eas=35;non_neuro_AC_male=1584;non_neuro_AN_male=5218;non_neuro_AF_male=0.303565;non_neuro_nhomalt_male=114;AC_asj=52;AN_asj=138;AF_asj=0.376812;nhomalt_asj=5;controls_AC_nfe_est=505;controls_AN_nfe_est=1944;controls_AF_nfe_est=0.259774;controls_nhomalt_nfe_est=23;non_topmed_AC_asj_female=14;non_topmed_AN_asj_female=34;non_topmed_AF_asj_female=0.411765;non_topmed_nhomalt_asj_female=0;non_topmed_AC_oth=114;non_topmed_AN_oth=374;non_topmed_AF_oth=0.304813;non_topmed_nhomalt_oth=12;non_topmed_AC_fin_female=187;non_topmed_AN_fin_female=834;non_topmed_AF_fin_female=0.224221;non_topmed_nhomalt_fin_female=6;AC_oth=128;AN_oth=406;AF_oth=0.315271;nhomalt_oth=14;non_neuro_AC_nfe_male=840;non_neuro_AN_nfe_male=3494;non_neuro_AF_nfe_male=0.240412;non_neuro_nhomalt_nfe_male=43;controls_AC_female=738;controls_AN_female=2216;controls_AF_female=0.333032;controls_nhomalt_female=57;non_topmed_AC_fin=333;non_topmed_AN_fin=1542;non_topmed_AF_fin=0.215953;non_topmed_nhomalt_fin=10;non_topmed_AC_nfe_female=644;non_topmed_AN_nfe_female=2574;non_topmed_AF_nfe_female=0.250194;non_topmed_nhomalt_nfe_female=36;controls_AC_asj_male=4;controls_AN_asj_male=6;controls_AF_asj_male=0.666667;controls_nhomalt_asj_male=1;non_topmed_AC_asj_male=10;non_topmed_AN_asj_male=34;non_topmed_AF_asj_male=0.294118;non_topmed_nhomalt_asj_male=1;non_neuro_AC_oth=91;non_neuro_AN_oth=280;non_neuro_AF_oth=0.325;non_neuro_nhomalt_oth=11;AC_male=2536;AN_male=7722;AF_male=0.328412;nhomalt_male=200;controls_AC_fin_female=60;controls_AN_fin_female=256;controls_AF_fin_female=0.234375;controls_nhomalt_fin_female=1;controls_AC_asj_female=5;controls_AN_asj_female=14;controls_AF_asj_female=0.357143;controls_nhomalt_asj_female=0;AC_amr_male=42;AN_amr_male=144;AF_amr_male=0.291667;nhomalt_amr_male=2;AC_amr_female=37;AN_amr_female=142;AF_amr_female=0.260563;nhomalt_amr_female=0;AC_oth_male=68;AN_oth_male=212;AF_oth_male=0.320755;nhomalt_oth_male=7;non_neuro_AC_nfe_seu=10;non_neuro_AN_nfe_seu=26;non_neuro_AF_nfe_seu=0.384615;non_neuro_nhomalt_nfe_seu=0;non_topmed_AC_afr_female=739;non_topmed_AN_afr_female=1474;non_topmed_AF_afr_female=0.501357;non_topmed_nhomalt_afr_female=83;non_topmed_AC_afr=1793;non_topmed_AN_afr=3606;non_topmed_AF_afr=0.497227;non_topmed_nhomalt_afr=196;controls_AC=1658;controls_AN=4896;controls_AF=0.338644;controls_nhomalt=135;non_neuro_AC_oth_female=42;non_neuro_AN_oth_female=124;non_neuro_AF_oth_female=0.33871;non_neuro_nhomalt_oth_female=6;non_topmed_faf95_amr=0.232194;non_topmed_faf99_amr=0.232194;faf95_afr=0.477244;faf99_afr=0.477244;controls_faf95_afr=0.470932;controls_faf99_afr=0.470932;faf95_amr=0.227168;faf99_amr=0.227169;faf95_eas=0.500629;faf99_eas=0.500629;faf95=0.317744;faf99=0.317744;non_neuro_faf95_afr=0.464967;non_neuro_faf99_afr=0.464967;non_neuro_faf95_amr=0.235846;non_neuro_faf99_amr=0.235846;controls_faf95_nfe=0.241154;controls_faf99_nfe=0.241154;non_topmed_faf95=0.332322;non_topmed_faf99=0.332323;non_neuro_faf95_nfe=0.231727;non_neuro_faf99_nfe=0.231728;non_neuro_faf95=0.297558;non_neuro_faf99=0.297559;non_topmed_faf95_nfe=0.237689;non_topmed_faf99_nfe=0.23769;controls_faf95_eas=0.49659;controls_faf99_eas=0.49659;faf95_nfe=0.232957;faf99_nfe=0.232956;non_topmed_faf95_eas=0.501191;non_topmed_faf99_eas=0.501191;controls_faf95_amr=0.257071;controls_faf99_amr=0.257071;non_neuro_faf95_eas=0.500629;non_neuro_faf99_eas=0.500629;non_topmed_faf95_afr=0.47807;non_topmed_faf99_afr=0.47807;controls_faf95=0.32508;controls_faf99=0.325081;controls_popmax=eas;controls_AC_popmax=231;controls_AN_popmax=416;controls_AF_popmax=0.555288;controls_nhomalt_popmax=27;popmax=eas;AC_popmax=359;AN_popmax=656;AF_popmax=0.547256;nhomalt_popmax=35;age_hist_het_bin_freq=128|162|214|283|349|260|234|152|93|46;age_hist_het_n_smaller=717;age_hist_het_n_larger=23;age_hist_hom_bin_freq=9|11|18|24|26|15|20|8|12|6;age_hist_hom_n_smaller=82;age_hist_hom_n_larger=4;non_neuro_popmax=eas;non_neuro_AC_popmax=359;non_neuro_AN_popmax=656;non_neuro_AF_popmax=0.547256;non_neuro_nhomalt_popmax=35;non_topmed_popmax=eas;non_topmed_AC_popmax=351;non_topmed_AN_popmax=640;non_topmed_AF_popmax=0.548438;non_topmed_nhomalt_popmax=34\");\n\n            writer.Flush();\n\n            stream.Position = 0;\n            return stream;\n        }\n\n        private static IEnumerable<GenericScoreItem> GetConflictingGnomadItems()\n        {\n            var sequence = new SimpleSequence(\n                new string('T', VariantUtils.MaxUpstreamLength) + \"AAAGAAAGAAAG\",\n                17467787                                        - 1 - VariantUtils.MaxUpstreamLength\n            );\n            var sequenceProvider = new SimpleSequenceProvider(GenomeAssembly.GRCh38, sequence, ChromosomeUtilities.RefNameToChromosome);\n\n            var parserSettings = new ParserSettings(\n                new ColumnIndex(0, 2, 3, 4, 5, null),\n                new[] {\"A\", \"C\", \"G\", \"T\"},\n                GenericScoreParser.MaxRepresentativeScores\n            );\n\n            var gnomadReader = new GenericScoreParser(parserSettings, new StreamReader(GetChr22_17467787_17467799_genome()), null);\n\n            return gnomadReader.GetItems();\n        }\n\n        public static ISequenceProvider GetAllASequenceProvider(GenomeAssembly assembly = GenomeAssembly.GRCh37)\n        {\n            var seqProvider = new Mock<ISequenceProvider>();\n            seqProvider.SetupGet(x => x.Assembly).Returns(assembly);\n            seqProvider.Setup(x => x.Sequence.Substring(It.IsAny<int>(), 1)).Returns(\"A\");\n\n            return seqProvider.Object;\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/VariantAnnotation/ScoreFile/HeaderTests.cs",
    "content": "using System.Collections.Generic;\nusing System.IO;\nusing ErrorHandling.Exceptions;\nusing SAUtils;\nusing SAUtils.GenericScore;\nusing SAUtils.GenericScore.GenericScoreParser;\nusing UnitTests.TestUtilities;\nusing VariantAnnotation.GenericScore;\nusing VariantAnnotation.Providers;\nusing VariantAnnotation.SA;\nusing Xunit;\n\nnamespace UnitTests.VariantAnnotation.ScoreFile\n{\n    public sealed class HeaderTests\n    {\n        [Fact]\n        public void TestFilePairId()\n        {\n            (\n                List<GenericScoreItem> saItems1,\n                WriterSettings writerSettings1,\n                MemoryStream indexStream1,\n                MemoryStream writeStream1,\n                DataSourceVersion version1,\n                _\n            ) = TestDataGenerator.GetRandomSingleChromosomeData(ChromosomeUtilities.Chr1, 10_001, 15_001);\n            (\n                List<GenericScoreItem> saItems2,\n                WriterSettings writerSettings2,\n                MemoryStream indexStream2,\n                MemoryStream writeStream2,\n                DataSourceVersion version2,\n                _\n            ) = TestDataGenerator.GetRandomSingleChromosomeData(ChromosomeUtilities.Chr1, 10_001, 15_001);\n\n            using (var scoreFileWriter1 = new ScoreFileWriter(\n                       writerSettings1,\n                       writeStream1,\n                       indexStream1,\n                       version1,\n                       TestDataGenerator.GetSequenceProvider(),\n                       SaCommon.SchemaVersion\n                   ))\n            using (var scoreFileWriter2 = new ScoreFileWriter(\n                       writerSettings2,\n                       writeStream2,\n                       indexStream2,\n                       version2,\n                       TestDataGenerator.GetSequenceProvider(),\n                       SaCommon.SchemaVersion\n                   ))\n            {\n                // Write saItems to stream\n                scoreFileWriter1.Write(saItems1);\n                scoreFileWriter2.Write(saItems2);\n\n                // Reset streams in preparation for reading them\n                indexStream1.Position = 0;\n                indexStream2.Position = 0;\n                writeStream1.Position = 0;\n                writeStream2.Position = 0;\n                // Mixing indexes with different data files must throw exception\n                Assert.Throws<UserErrorException>(() => ScoreReader.Read(writeStream2, indexStream1));\n                Assert.Throws<UserErrorException>(() => ScoreReader.Read(writeStream1, indexStream2));\n\n                indexStream1.Position = 0;\n                indexStream2.Position = 0;\n                writeStream1.Position = 0;\n                writeStream2.Position = 0;\n                // Shoud not throw any exception\n                ScoreReader.Read(writeStream1, indexStream1);\n                ScoreReader.Read(writeStream2, indexStream2);\n            }\n        }\n\n        [Fact]\n        public void TestFileType()\n        {\n            (\n                List<GenericScoreItem> saItems1,\n                WriterSettings writerSettings1,\n                MemoryStream indexStream1,\n                MemoryStream writeStream1,\n                DataSourceVersion version1,\n                _\n            ) = TestDataGenerator.GetRandomSingleChromosomeData(ChromosomeUtilities.Chr1, 10_001, 15_001);\n            (\n                List<GenericScoreItem> saItems2,\n                _,\n                MemoryStream indexStream2,\n                MemoryStream writeStream2,\n                DataSourceVersion version2,\n                _\n            ) = TestDataGenerator.GetRandomSingleChromosomeData(ChromosomeUtilities.Chr1, 10_001, 15_001);\n\n            using (var scoreFileWriter1 = new ScoreFileWriter(\n                       writerSettings1,\n                       writeStream1,\n                       indexStream1,\n                       version1,\n                       TestDataGenerator.GetSequenceProvider(),\n                       SaCommon.SchemaVersion\n                   ))\n            using (var nsaWriter = new NsaWriter(\n                       writeStream2,\n                       indexStream2,\n                       version2,\n                       TestDataGenerator.GetSequenceProvider(),\n                       \"TestNsa\",\n                       true,\n                       false,\n                       SaCommon.SchemaVersion,\n                       false\n                   ))\n            {\n                scoreFileWriter1.Write(saItems1);\n                nsaWriter.Write(saItems2);\n\n                // Reset streams in preparation for reading them\n                indexStream1.Position = 0;\n                indexStream2.Position = 0;\n                writeStream1.Position = 0;\n                writeStream2.Position = 0;\n                // Attempting to read NSA file with this score reader must throw exception\n                Assert.Throws<InvalidDataException>(() => ScoreReader.Read(writeStream2, indexStream1));\n                Assert.Throws<InvalidDataException>(() => ScoreReader.Read(writeStream1, indexStream2));\n\n                indexStream1.Position = 0;\n                indexStream2.Position = 0;\n                writeStream1.Position = 0;\n                writeStream2.Position = 0;\n                // Shoud not throw any exception\n                ScoreReader.Read(writeStream1, indexStream1);\n            }\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/VariantAnnotation/ScoreFile/ReaderSettingsTests.cs",
    "content": "using System;\nusing System.IO;\nusing IO;\nusing VariantAnnotation.GenericScore;\nusing Xunit;\n\nnamespace UnitTests.VariantAnnotation.ScoreFile;\n\npublic sealed class ReaderSettingsTests\n{\n    [Fact]\n    public void TestReadWriteZeroToOne()\n    {\n        var stream = new MemoryStream();\n        var writer = new ExtendedBinaryWriter(stream, System.Text.Encoding.Default);\n\n        var nucleotides = new[] {\"A\", \"C\", \"G\", \"T\"};\n        var blockLength = 25;\n        var encoderType = EncoderType.ZeroToOne;\n\n        var readerSettings = GetReaderSettings(encoderType, nucleotides, blockLength);\n\n        AssertData(writer, readerSettings, nucleotides, blockLength);\n    }\n\n    [Fact]\n    public void TestReadWriteGenericScoreEncoder()\n    {\n        var stream = new MemoryStream();\n        var writer = new ExtendedBinaryWriter(stream, System.Text.Encoding.Default);\n\n        var nucleotides = new[] {\"N\"};\n        var blockLength = 25;\n        var encoderType = EncoderType.Generic;\n\n        var readerSettings = GetReaderSettings(encoderType, nucleotides, blockLength);\n\n        AssertData(writer, readerSettings, nucleotides, blockLength);\n    }\n\n    [Fact]\n    public void TestReadUnknownEncoder()\n    {\n        var writer = new ExtendedBinaryWriter(new MemoryStream(), System.Text.Encoding.Default);\n\n        var         nucleotides = new[] {\"N\"};\n        var         blockLength = 25;\n        EncoderType encoderType = EncoderType.Generic;\n\n        var readerSettings = GetReaderSettings(encoderType, nucleotides, blockLength);\n        using (writer)\n        {\n            readerSettings.Write(writer);\n            writer.BaseStream.Position = 1;\n\n            // Changing EncoderType in base stream to unknown\n            writer.Write(255);\n            writer.BaseStream.Position = 0;\n\n            Assert.Throws<Exception>(() => ReaderSettings.Read(new ExtendedBinaryReader(writer.BaseStream)));\n        }\n    }\n\n    private void AssertData(ExtendedBinaryWriter writer, ReaderSettings readerSettings, string[] nucleotides, int blockLength)\n    {\n        using (writer)\n        {\n            readerSettings.Write(writer);\n\n            writer.BaseStream.Position = 0;\n\n            var            reader             = new ExtendedBinaryReader(writer.BaseStream);\n            ReaderSettings deserializedReader = ReaderSettings.Read(reader);\n\n            Assert.Equal(nucleotides, deserializedReader.Nucleotides);\n            Assert.Equal(blockLength, deserializedReader.BlockLength);\n        }\n    }\n\n    private ReaderSettings GetReaderSettings(EncoderType encoderType, string[] nucleotides, int blockLength)\n    {\n        IScoreEncoder scoreEncoder = encoderType switch\n        {\n            EncoderType.Generic   => new GenericScoreEncoder(),\n            EncoderType.ZeroToOne => new ZeroToOneScoreEncoder(2, 1),\n            _                     => null\n        };\n\n        return new ReaderSettings(\n            false,\n            encoderType,\n            scoreEncoder,\n            new ScoreJsonEncoder(\"TestKey\", \"TestSubKey\"),\n            nucleotides,\n            blockLength\n        );\n    }\n}"
  },
  {
    "path": "UnitTests/VariantAnnotation/ScoreFile/SaItemValidatorTests.cs",
    "content": "using System.IO;\nusing Genome;\nusing SAUtils.GenericScore.GenericScoreParser;\nusing UnitTests.TestDataStructures;\nusing UnitTests.TestUtilities;\nusing Xunit;\n\nnamespace UnitTests.VariantAnnotation.ScoreFile;\n\npublic sealed class SaItemValidatorTests\n{\n    [Fact]\n    public void TestParRegion()\n    {\n        var saItemValidator = new SaItemValidator(true, true);\n\n        var sequence         = new SimpleSequence(new string('A', 15_000));\n        var sequenceProvider = new SimpleSequenceProvider(GenomeAssembly.GRCh38, sequence, ChromosomeUtilities.RefNameToChromosome);\n\n        Assert.True(saItemValidator.Validate(\n            new GenericScoreItem(ChromosomeUtilities.ChrY, 10_011, \"A\", \"C\", 0.5),\n            sequenceProvider\n        ));\n\n        Assert.True(saItemValidator.Validate(\n            new GenericScoreItem(ChromosomeUtilities.ChrY, 10_011, \"N\", \"C\", 0.5),\n            sequenceProvider\n        ));\n\n        Assert.Throws<InvalidDataException>(() => saItemValidator.Validate(\n            new GenericScoreItem(ChromosomeUtilities.ChrY, 10_011, \"C\", \"C\", 0.5),\n            sequenceProvider\n        ));\n\n        Assert.True(saItemValidator.Validate(\n            new GenericScoreItem(ChromosomeUtilities.ChrY, 10_011, \"N\", \"N\", 0.5),\n            sequenceProvider\n        ));\n\n        saItemValidator = new SaItemValidator(true, false);\n        Assert.False(saItemValidator.Validate(\n            new GenericScoreItem(ChromosomeUtilities.ChrY, 10_011, \"C\", \"N\", 0.5),\n            sequenceProvider\n        ));\n    }\n\n    [Fact]\n    public void TestIncorrectReference()\n    {\n        var sequence         = new SimpleSequence(new string('A', 99));\n        var sequenceProvider = new SimpleSequenceProvider(GenomeAssembly.GRCh38, sequence, ChromosomeUtilities.RefNameToChromosome);\n\n        // Strict Checking throws exceptions\n        var saItemValidator = new SaItemValidator(true, true);\n        Assert.Throws<InvalidDataException>(() => saItemValidator.Validate(\n            new GenericScoreItem(ChromosomeUtilities.Chr1, 11, \"C\", \"G\", 0.5),\n            sequenceProvider\n        ));\n        \n        Assert.True(saItemValidator.Validate(\n            new GenericScoreItem(ChromosomeUtilities.Chr1, 11, \"A\", \"G\", 0.5),\n            sequenceProvider\n        ));\n        \n        // Will not throw exceptions\n        saItemValidator = new SaItemValidator(true, false);\n        Assert.False(saItemValidator.Validate(\n            new GenericScoreItem(ChromosomeUtilities.Chr1, 11, \"C\", \"A\", 0.5),\n            sequenceProvider\n        ));\n        \n        Assert.True(saItemValidator.Validate(\n            new GenericScoreItem(ChromosomeUtilities.Chr1, 11, \"A\", \"G\", 0.5),\n            sequenceProvider\n        ));\n        \n        // Ref checking disabled\n        saItemValidator = new SaItemValidator(true, null);\n        Assert.True(saItemValidator.Validate(\n            new GenericScoreItem(ChromosomeUtilities.Chr1, 11, \"C\", \"A\", 0.5),\n            sequenceProvider\n        ));\n        \n        Assert.True(saItemValidator.Validate(\n            new GenericScoreItem(ChromosomeUtilities.Chr1, 11, \"A\", \"G\", 0.5),\n            sequenceProvider\n        ));\n        \n    }\n\n    [Fact]\n    public void TestCheckSnv()\n    {\n        var sequence         = new SimpleSequence(new string('A', 99));\n        var sequenceProvider = new SimpleSequenceProvider(GenomeAssembly.GRCh38, sequence, ChromosomeUtilities.RefNameToChromosome);\n\n        // Strict checking throws exceptions on invalid items\n        var saItemValidator = new SaItemValidator(true, true);\n        Assert.Throws<InvalidDataException>(() => saItemValidator.Validate(\n            new GenericScoreItem(ChromosomeUtilities.Chr1, 11, \"AA\", \"C\", 0.5),\n            sequenceProvider\n        ));\n\n        Assert.Throws<InvalidDataException>(() => saItemValidator.Validate(\n            new GenericScoreItem(ChromosomeUtilities.Chr1, 11, \"A\", \"CG\", 0.5),\n            sequenceProvider\n        ));\n\n        Assert.True(saItemValidator.Validate(\n            new GenericScoreItem(ChromosomeUtilities.Chr1, 11, \"A\", \"G\", 0.5),\n            sequenceProvider\n        ));\n\n        // SnvCheck will not throw exceptions\n        saItemValidator = new SaItemValidator(false, true);\n        Assert.False(saItemValidator.Validate(\n            new GenericScoreItem(ChromosomeUtilities.Chr1, 11, \"AA\", \"C\", 0.5),\n            sequenceProvider\n        ));\n\n        Assert.False(saItemValidator.Validate(\n            new GenericScoreItem(ChromosomeUtilities.Chr1, 11, \"A\", \"CG\", 0.5),\n            sequenceProvider\n        ));\n\n        Assert.True(saItemValidator.Validate(\n            new GenericScoreItem(ChromosomeUtilities.Chr1, 11, \"A\", \"G\", 0.5),\n            sequenceProvider\n        ));\n\n        // SnvCheck disabled\n        saItemValidator = new SaItemValidator(null, true);\n        Assert.True(saItemValidator.Validate(\n            new GenericScoreItem(ChromosomeUtilities.Chr1, 11, \"AA\", \"C\", 0.5),\n            sequenceProvider\n        ));\n\n        Assert.True(saItemValidator.Validate(\n            new GenericScoreItem(ChromosomeUtilities.Chr1, 11, \"A\", \"CG\", 0.5),\n            sequenceProvider\n        ));\n\n        Assert.True(saItemValidator.Validate(\n            new GenericScoreItem(ChromosomeUtilities.Chr1, 11, \"A\", \"G\", 0.5),\n            sequenceProvider\n        ));\n    }\n}"
  },
  {
    "path": "UnitTests/VariantAnnotation/ScoreFile/ScoreEncoderTests.cs",
    "content": "using System.Collections.Generic;\nusing System.IO;\nusing ErrorHandling.Exceptions;\nusing IO;\nusing VariantAnnotation.GenericScore;\nusing Xunit;\n\nnamespace UnitTests.VariantAnnotation.ScoreFile\n{\n    public sealed class ScoreEncoderTests\n    {\n        [Fact]\n        public void TestEncoderDecoder()\n        {\n            const int    numberOfDigits = 3;\n            const double maxScore       = 1.0;\n\n            var scoreEncoder = new ZeroToOneScoreEncoder(numberOfDigits, maxScore);\n\n            var stream = new MemoryStream();\n            var writer = new ExtendedBinaryWriter(stream, System.Text.Encoding.Default);\n\n            scoreEncoder.Write(writer);\n            stream.Position = 0;\n            var reader = new ExtendedBinaryReader(stream);\n\n            var deserializedScoreEncoder = ZeroToOneScoreEncoder.Read(reader);\n            stream.Close();\n\n            var testData = new List<(double inputNumber, double expectedResult)>\n            {\n                (0.246, 0.246),\n                (0.2461, 0.246),\n                (0.2466, 0.247),\n\n                (0.800, 0.800),\n                (0.999, 0.999),\n                (0.9999, 1.000),\n\n                (0.127, 0.127),\n                (0.128, 0.128),\n                (0.129, 0.129),\n                \n                (0.254, 0.254),\n                (0.255, 0.255),\n                (0.256, 0.256),\n                \n                (0.1271, 0.127),\n                (0.1281, 0.128),\n                (0.1291, 0.129),\n                \n                (0.2541, 0.254),\n                (0.2551, 0.255),\n                (0.2561, 0.256),\n                \n                (0.1266, 0.127),\n                (0.1276, 0.128),\n                (0.1286, 0.129),\n                (0.1296, 0.130),\n                \n                (0.2536, 0.254),\n                (0.2546, 0.255),\n                (0.2556, 0.256),\n                (0.2566, 0.257),\n                \n                (0.0, 0.0),\n                (1.0, 1.0),\n                (double.NaN, double.NaN)\n            };\n\n            // Test encoder and its deserialized version\n            foreach (ZeroToOneScoreEncoder encoder in new[] {scoreEncoder, deserializedScoreEncoder})\n            {\n                foreach ((double inputNumber, double expectedOutput)in testData)\n                {\n                    Assert.Equal(expectedOutput, EncodeDecode(encoder, inputNumber));\n                }\n\n                Assert.Throws<UserErrorException>(() => encoder.EncodeToBytes(2.1));\n            }\n        }\n\n        [Fact]\n        public void TestByteRequired()\n        {\n            var testData = new List<(int numberOfDigits, double maxScore, int expectedBytesRequired)>\n            {\n                (2, 1.0, 1),\n                (2, 10.0, 1),\n\n                (3, 1.0, 2),\n                (4, 1.0, 2),\n\n                (5, 1.0, 3),\n                (6, 1.0, 3),\n                (7, 1.0, 3),\n                (5, 1000, 3)\n            };\n\n            foreach ((int numberOfDigits, double maxScore, int expectedBytesRequired) in testData)\n            {\n                var scoreEncoder = new ZeroToOneScoreEncoder(numberOfDigits, maxScore);\n                Assert.Equal(expectedBytesRequired, scoreEncoder.BytesRequired);\n            }\n        }\n\n        private static double EncodeDecode(ZeroToOneScoreEncoder encoder, double number)\n        {\n            return encoder.DecodeFromBytes(encoder.EncodeToBytes(number));\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/VariantAnnotation/ScoreFile/ScoreIndexTests.cs",
    "content": "using System;\nusing System.IO;\nusing ErrorHandling.Exceptions;\nusing Genome;\nusing IO;\nusing IO.v2;\nusing VariantAnnotation.GenericScore;\nusing VariantAnnotation.Providers;\nusing Xunit;\n\nnamespace UnitTests.VariantAnnotation.ScoreFile\n{\n    public sealed class ScoreIndexTests\n    {\n        [Fact]\n        public void ScoreIndexTest()\n        {\n            (Stream indexStream, ScoreIndex scoreIndex) = GetScoreIndex();\n\n            // Add chromosome blocks\n            scoreIndex.AddChromosomeBlock(1, 10);\n            scoreIndex.Add(1, 0, 1, 1);\n\n            scoreIndex.AddChromosomeBlock(2, 80);\n            scoreIndex.Add(2, 1, 2, 3);\n            scoreIndex.Add(2, 3, 2, 3);\n            scoreIndex.Add(2, 5, 2, 3);\n\n            scoreIndex.AddChromosomeBlock(3, 70);\n            scoreIndex.Add(3, 7,  20, 30);\n            scoreIndex.Add(3, 27, 30, 30);\n            scoreIndex.Add(3, 57, 20, 30);\n\n            // Serialization and deserialization \n            scoreIndex.Write();\n            indexStream.Position = 0;\n            ScoreIndex scoreIndexDeserialized = ScoreIndex.Read(indexStream, 1);\n            indexStream.Close();\n\n            Assert.Equal(scoreIndex.GetBlockNumber(1, 10),  scoreIndexDeserialized.GetBlockNumber(1, 10));\n            Assert.Equal(scoreIndex.GetBlockNumber(2, 104), scoreIndexDeserialized.GetBlockNumber(2, 104));\n            Assert.Equal(scoreIndex.GetBlockLength(),       scoreIndexDeserialized.GetBlockLength());\n            Assert.Equal(scoreIndex.GetNucleotideCount(),   scoreIndexDeserialized.GetNucleotideCount());\n\n            // LastBlockNumber\n            Assert.Equal(0, scoreIndexDeserialized.GetLastBlockNumber(1));\n            Assert.Equal(2, scoreIndexDeserialized.GetLastBlockNumber(2));\n            Assert.Equal(2, scoreIndexDeserialized.GetLastBlockNumber(3));\n\n            // BlockNumber\n            Assert.Equal(-1, scoreIndexDeserialized.GetBlockNumber(1, 9));\n            Assert.Equal(0,  scoreIndexDeserialized.GetBlockNumber(1, 10));\n            Assert.Equal(0,  scoreIndexDeserialized.GetBlockNumber(1, 34));\n            Assert.Equal(-1, scoreIndexDeserialized.GetBlockNumber(1, 35));\n\n            Assert.Equal(-1, scoreIndexDeserialized.GetBlockNumber(2, 70));\n            Assert.Equal(-1, scoreIndexDeserialized.GetBlockNumber(2, 75));\n            Assert.Equal(-1, scoreIndexDeserialized.GetBlockNumber(2, 79));\n            Assert.Equal(0,  scoreIndexDeserialized.GetBlockNumber(2, 80));\n            Assert.Equal(0,  scoreIndexDeserialized.GetBlockNumber(2, 104));\n            Assert.Equal(1,  scoreIndexDeserialized.GetBlockNumber(2, 105));\n            Assert.Equal(1,  scoreIndexDeserialized.GetBlockNumber(2, 129));\n            Assert.Equal(2,  scoreIndexDeserialized.GetBlockNumber(2, 130));\n            Assert.Equal(2,  scoreIndexDeserialized.GetBlockNumber(2, 154));\n            Assert.Equal(-1, scoreIndexDeserialized.GetBlockNumber(2, 155));\n\n            Assert.Equal(-1, scoreIndexDeserialized.GetBlockNumber(3, 68));\n            Assert.Equal(0,  scoreIndexDeserialized.GetBlockNumber(3, 70));\n            Assert.Equal(0,  scoreIndexDeserialized.GetBlockNumber(3, 80));\n            Assert.Equal(0,  scoreIndexDeserialized.GetBlockNumber(3, 94));\n            Assert.Equal(1,  scoreIndexDeserialized.GetBlockNumber(3, 95));\n            Assert.Equal(1,  scoreIndexDeserialized.GetBlockNumber(3, 119));\n            Assert.Equal(2,  scoreIndexDeserialized.GetBlockNumber(3, 120));\n            Assert.Equal(2,  scoreIndexDeserialized.GetBlockNumber(3, 144));\n            Assert.Equal(-1, scoreIndexDeserialized.GetBlockNumber(3, 145));\n\n            // Position before chromosome starts\n            Assert.Equal((-1, -1), scoreIndex.PositionToBlockLocation((ushort) 3, 67));\n            Assert.Equal((-1, -1), scoreIndex.PositionToBlockLocation((ushort) 3, 67));\n\n            // Chromosome not added\n            Assert.Equal(-1,       scoreIndex.GetBlockNumber(4, 67));\n            Assert.Equal(-1,       scoreIndex.GetFilePosition(4, 67));\n            Assert.Equal((-1, -1), scoreIndex.PositionToBlockLocation((ushort) 4, 1));\n        }\n\n        [Fact]\n        public void PositionToBlockIndexTest()\n        {\n            (Stream indexStream, ScoreIndex scoreIndex) = GetScoreIndex();\n            // Position to block location tests\n            var testData = new[]\n            {\n                // Start psotion, postiion, expected Block number, expected block index\n                (10, 11, 0, 4),\n                (10, 26, 0, 64),\n                (10, 34, 0, 96),\n                (10, 35, 1, 0),\n                (10, 40, 1, 20),\n            };\n\n            foreach ((int startingPosition, int position, int expectedBlockNumber, int expectedBlockIndex) in testData)\n            {\n                Assert.Equal((expectedBlockNumber, expectedBlockIndex), scoreIndex.PositionToBlockLocation(position, startingPosition));\n            }\n        }\n\n        [Fact]\n        public void AddGetChromosomeBlocksTest()\n        {\n            (_, ScoreIndex scoreIndex) = GetScoreIndex();\n\n            // Add and get chromosome blocks\n            scoreIndex.AddChromosomeBlock(1, 10);\n            scoreIndex.Add(1, 0, 1, 1);\n            Assert.Single(scoreIndex.GetChromosomeBlocks());\n            Assert.Equal(1, scoreIndex.GetChromosomeBlocks()[1].BlockCount);\n\n            scoreIndex.AddChromosomeBlock(2, 80);\n            scoreIndex.Add(2, 1, 2, 3);\n            scoreIndex.Add(2, 3, 2, 3);\n            scoreIndex.Add(2, 5, 2, 3);\n            Assert.Equal(2, scoreIndex.GetChromosomeBlocks().Count);\n            Assert.Equal(1, scoreIndex.GetChromosomeBlocks()[1].BlockCount);\n            Assert.Equal(3, scoreIndex.GetChromosomeBlocks()[2].BlockCount);\n        }\n\n        [Fact]\n        public void TestGetNucleotidePosition()\n        {\n            (_, ScoreIndex scoreIndex) = GetScoreIndex();\n\n            // Add and get chromosome blocks\n            scoreIndex.AddChromosomeBlock(1, 10);\n            scoreIndex.Add(1, 0, 1, 1);\n\n            Assert.Null(scoreIndex.GetNucleotidePosition(\"F\"));\n            Assert.Equal(0, (short) scoreIndex.GetNucleotidePosition(\"A\"));\n            Assert.Equal(1, (short) scoreIndex.GetNucleotidePosition(\"C\"));\n            Assert.Equal(2, (short) scoreIndex.GetNucleotidePosition(\"G\"));\n            Assert.Equal(3, (short) scoreIndex.GetNucleotidePosition(\"T\"));\n        }\n\n        private static (Stream stream, ScoreIndex scoreIndex) GetScoreIndex()\n        {\n            var indexStream = new MemoryStream();\n            var indexWriter = new ExtendedBinaryWriter(indexStream, System.Text.Encoding.Default);\n            var version     = new DataSourceVersion(\"Test\", \"1\", DateTime.Parse(DateTime.Now.ToString(\"yyyy-MM-dd\")).Ticks, \"No description\");\n            var header      = new Header(FileType.GsaIndex, 1);\n\n            var readerSettings = new ReaderSettings(\n                false,\n                EncoderType.ZeroToOne,\n                new ZeroToOneScoreEncoder(2, 1),\n                new ScoreJsonEncoder(\"TestKey\", \"TestSubKey\"),\n                new[] {\"A\", \"C\", \"G\", \"T\"},\n                25\n            );\n\n            var scoreIndex = new ScoreIndex(\n                indexWriter,\n                readerSettings,\n                GenomeAssembly.Unknown,\n                version,\n                0,\n                header,\n                1\n            );\n            return (indexStream, scoreIndex);\n        }\n\n        [Fact]\n        public void TestHeader()\n        {\n            var indexStream = new MemoryStream();\n            var indexWriter = new ExtendedBinaryWriter(indexStream, System.Text.Encoding.Default);\n            var version     = new DataSourceVersion(\"Test\", \"1\", DateTime.Parse(DateTime.Now.ToString(\"yyyy-MM-dd\")).Ticks, \"No description\");\n            var header      = new Header(FileType.GsaWriter, 1);\n\n            var readerSettings = new ReaderSettings(\n                false,\n                EncoderType.ZeroToOne,\n                new ZeroToOneScoreEncoder(2, 1),\n                new ScoreJsonEncoder(\"TestKey\", \"TestSubKey\"),\n                new[] {\"A\", \"C\", \"G\", \"T\"},\n                25\n            );\n\n            var scoreIndex = new ScoreIndex(\n                indexWriter,\n                readerSettings,\n                GenomeAssembly.Unknown,\n                version,\n                0,\n                header,\n                1\n            );\n\n            scoreIndex.Write();\n\n            indexStream.Position = 0;\n\n            Assert.Throws<UserErrorException>(() => ScoreIndex.Read(indexStream, 1));\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/VariantAnnotation/ScoreFile/ScoreJsonEncoderTests.cs",
    "content": "using VariantAnnotation.GenericScore;\nusing Xunit;\n\nnamespace UnitTests.VariantAnnotation.ScoreFile;\n\npublic sealed class ScoreJsonEncoderTests\n{\n    [Fact]\n    public void TestJsonRepresentation()\n    {\n        var scoreJsonEncoder = new ScoreJsonEncoder(\"Test\", \"TestSubKey\");\n        \n        Assert.Equal(\n            \"\\\"TestSubKey\\\":1\",\n            new ScoreJsonEncoder(\"Test\", \"TestSubKey\").JsonRepresentation(1));\n        Assert.Equal(\n            \"1\",\n            new ScoreJsonEncoder(\"Test\", null).JsonRepresentation(1));\n    }\n}"
  },
  {
    "path": "UnitTests/VariantAnnotation/Sequence/CompressedSequenceReaderTests.cs",
    "content": "﻿using Genome;\r\nusing ReferenceSequence.IO;\r\nusing UnitTests.TestUtilities;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.Sequence\r\n{\r\n    public sealed class CompressedSequenceReaderTests\r\n    {\r\n        [Fact]\r\n        public void GetCompressedSequence()\r\n        {\r\n            using (var reader = new CompressedSequenceReader(ResourceUtilities.GetReadStream(Resources.TopPath(\"TestSeq_reference.dat\"))))\r\n            {\r\n                Assert.Equal(GenomeAssembly.GRCh37, reader.Assembly);\r\n                var sequence = reader.Sequence;\r\n\r\n                var chromosome = new Chromosome(\"chrBob\", \"Bob\", null, null, 1, 1);\r\n                reader.GetCompressedSequence(chromosome);\r\n\r\n                Assert.Null(sequence.CytogeneticBands);\r\n                Assert.Equal(0, sequence.Length);\r\n\r\n                chromosome = new Chromosome(\"chrTestSeq\", \"TestSeq\", null, null, 1, 0);\r\n                reader.GetCompressedSequence(chromosome);\r\n                var bases = sequence.Substring(0, 100);\r\n\r\n                Assert.NotNull(sequence.CytogeneticBands);\r\n                Assert.Equal(53, sequence.Length);\r\n                Assert.Equal(\"NNATGTTTCCACTTTCTCCTCATTAGANNNTAACGAATGGGTGATTTCCCTAN\", bases);\r\n            }\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/VariantAnnotation/Sequence/CompressedSequenceTests.cs",
    "content": "﻿using Genome;\r\nusing Intervals;\r\nusing ReferenceSequence.Common;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.Sequence\r\n{\r\n\tpublic sealed class CompressedSequenceTests\r\n\t{\r\n\t\tprivate readonly ReferenceSequence.Common.Sequence _sequence;\r\n\t\tprivate const int NumBases = 53;\r\n\r\n\t\tpublic CompressedSequenceTests()\r\n\t\t{\r\n            _sequence = new ReferenceSequence.Common.Sequence { Assembly = GenomeAssembly.hg19 };\r\n\r\n            // create the following sequence: NNATGTTTCCACTTTCTCCTCATTAGANNNTAACGAATGGGTGATTTCCCTAN\r\n            var twoBitBuffer = new byte[] { 14, 42, 93, 169, 150, 122, 204, 11, 211, 224, 35, 169, 91, 0 };\r\n\r\n\t\t\tvar maskedIntervals = new Interval<MaskedEntry>[3];\r\n\t\t\tmaskedIntervals[0]  = new Interval<MaskedEntry>(0, 1, new MaskedEntry(0, 1));\r\n\t\t\tmaskedIntervals[1]  = new Interval<MaskedEntry>(27, 29, new MaskedEntry(27, 29));\r\n\t\t\tmaskedIntervals[2]  = new Interval<MaskedEntry>(52, 52, new MaskedEntry(52, 52));\r\n\r\n\t\t\tvar maskedIntervalArray = new IntervalArray<MaskedEntry>(maskedIntervals);\r\n            _sequence.Set(NumBases, 0, twoBitBuffer, maskedIntervalArray, null);\r\n        }\r\n\r\n\t    [Fact]\r\n\t    public void Assembly_hg19()\r\n\t    {\r\n\t        Assert.Equal(GenomeAssembly.hg19, _sequence.Assembly);\r\n\t    }\r\n\r\n        [Fact]\r\n\t    public void GetNumBufferBytes()\r\n        {\r\n            const int expectedNumBufferBytes = 25;\r\n            var observedNumBufferBytes = ReferenceSequence.Common.Sequence.GetNumBufferBytes(97);\r\n            Assert.Equal(expectedNumBufferBytes, observedNumBufferBytes);\r\n        }\r\n\r\n        [Theory]\r\n\t\t[InlineData(23, 5, \"TAGAN\")]\r\n\t\t[InlineData(0, 5, \"NNATG\")]\r\n\t\t[InlineData(-1, 5, null)]\r\n\t\t[InlineData(48, 5, \"CCTAN\")]\r\n\t\t[InlineData(49, 5, \"CTAN\")]\r\n\t\t[InlineData(53, 5, null)]\r\n\t\t[InlineData(23, 0, null)]\r\n\t\tpublic void Substring(int offset, int length, string expectedSubstring)\r\n\t\t{\r\n\t\t\tvar observedSubstring = _sequence.Substring(offset, length);\r\n\t\t\tAssert.Equal(expectedSubstring, observedSubstring);\r\n\t\t}\t\t\r\n\t}\r\n}"
  },
  {
    "path": "UnitTests/VariantAnnotation/TranscriptAnnotation/BreakEndUtilitiesTests.cs",
    "content": "﻿using System.IO;\r\nusing Genome;\r\nusing UnitTests.TestUtilities;\r\nusing VariantAnnotation.GeneFusions.Calling;\r\nusing Variants;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.TranscriptAnnotation\r\n{\r\n    public sealed class BreakEndUtilitiesTests\r\n    {\r\n        [Theory]\r\n        [InlineData(28722335, \"T\", \"[3:115024109[T\",             true,  \"3\",                 115024109, false)]\r\n        [InlineData(31410878, \"C\", \"]6:42248252]C\",              true,  \"6\",                 42248252,  true)]\r\n        [InlineData(31561816, \"C\", \"CGATCTCAT[6:41297838[\",      false, \"6\",                 41297838,  false)]\r\n        [InlineData(84461562, \"A\", \"A]8:100990100]\",             false, \"8\",                 100990100, true)]\r\n        [InlineData(32518102, \"C\", \"C]HLA-DRB1*10:01:01:12922]\", false, \"HLA-DRB1*10:01:01\", 12922,     true)]\r\n        public void CreateFromTranslocation_Nominal(int position, string refAllele, string altAllele, bool expectedOnReverseStrand,\r\n            string expectedPartnerChr, int expectedPartnerPosition, bool expectedPartnerOnReverseStrand)\r\n        {\r\n            var variant = new SimpleVariant(ChromosomeUtilities.Chr1, position, position, refAllele, altAllele, VariantType.translocation_breakend);\r\n            BreakEndAdjacency[] adjacencies =\r\n                BreakEndAdjacencyFactory.CreateAdjacencies(variant, ChromosomeUtilities.RefNameToChromosome, false, false);\r\n\r\n            Assert.NotNull(adjacencies);\r\n            Assert.Single(adjacencies);\r\n\r\n            BreakEndAdjacency actual = adjacencies[0];\r\n            Assert.Equal(expectedOnReverseStrand,        actual.Origin.OnReverseStrand);\r\n            Assert.Equal(expectedPartnerChr,             actual.Partner.Chromosome.EnsemblName);\r\n            Assert.Equal(expectedPartnerPosition,        actual.Partner.Position);\r\n            Assert.Equal(expectedPartnerOnReverseStrand, actual.Partner.OnReverseStrand);\r\n        }\r\n\r\n        [Fact]\r\n        public void CreateFromTranslocation_InvalidAltAllele_ThrowException()\r\n        {\r\n            Assert.Throws<InvalidDataException>(delegate\r\n            {\r\n                var variant = new SimpleVariant(ChromosomeUtilities.Chr1, 100, 100, \"A\", \"A{3:115024109{T\", VariantType.translocation_breakend);\r\n                // ReSharper disable once UnusedVariable\r\n                BreakEndAdjacency[] adjacencies = BreakEndAdjacencyFactory.CreateFromTranslocation(variant, ChromosomeUtilities.RefNameToChromosome);\r\n            });\r\n        }\r\n\r\n        [Fact]\r\n        public void CreateFromSymbolicAllele_Deletion()\r\n        {\r\n            var                 interval    = new ChromosomeInterval(ChromosomeUtilities.Chr1, 1594584, 1660503);\r\n            BreakEndAdjacency[] adjacencies = BreakEndAdjacencyFactory.CreateFromSymbolicAllele(interval, VariantType.deletion, false, false);\r\n\r\n            Assert.NotNull(adjacencies);\r\n            Assert.Equal(2, adjacencies.Length);\r\n\r\n            BreakEndAdjacency actual = adjacencies[0];\r\n            Assert.Equal(ChromosomeUtilities.Chr1.EnsemblName, actual.Origin.Chromosome.EnsemblName);\r\n            Assert.Equal(1594583,                              actual.Origin.Position);\r\n            Assert.False(actual.Origin.OnReverseStrand);\r\n            Assert.Equal(ChromosomeUtilities.Chr1.EnsemblName, actual.Partner.Chromosome.EnsemblName);\r\n            Assert.Equal(1660504,                              actual.Partner.Position);\r\n            Assert.False(actual.Partner.OnReverseStrand);\r\n\r\n            BreakEndAdjacency actual2 = adjacencies[1];\r\n            Assert.Equal(ChromosomeUtilities.Chr1.EnsemblName, actual2.Origin.Chromosome.EnsemblName);\r\n            Assert.Equal(1660504,                              actual2.Origin.Position);\r\n            Assert.True(actual2.Origin.OnReverseStrand);\r\n            Assert.Equal(ChromosomeUtilities.Chr1.EnsemblName, actual2.Partner.Chromosome.EnsemblName);\r\n            Assert.Equal(1594583,                              actual2.Partner.Position);\r\n            Assert.True(actual2.Partner.OnReverseStrand);\r\n        }\r\n\r\n        [Fact]\r\n        public void CreateFromSymbolicAllele_Duplication()\r\n        {\r\n            var interval = new ChromosomeInterval(ChromosomeUtilities.Chr1, 37820921, 38404543);\r\n            BreakEndAdjacency[] adjacencies =\r\n                BreakEndAdjacencyFactory.CreateFromSymbolicAllele(interval, VariantType.tandem_duplication, false, false);\r\n\r\n            Assert.NotNull(adjacencies);\r\n            Assert.Equal(2, adjacencies.Length);\r\n\r\n            BreakEndAdjacency actual = adjacencies[0];\r\n            Assert.Equal(ChromosomeUtilities.Chr1.EnsemblName, actual.Origin.Chromosome.EnsemblName);\r\n            Assert.Equal(38404543,                             actual.Origin.Position);\r\n            Assert.False(actual.Origin.OnReverseStrand);\r\n            Assert.Equal(ChromosomeUtilities.Chr1.EnsemblName, actual.Partner.Chromosome.EnsemblName);\r\n            Assert.Equal(37820920,                             actual.Partner.Position);\r\n            Assert.False(actual.Partner.OnReverseStrand);\r\n\r\n            BreakEndAdjacency actual2 = adjacencies[1];\r\n            Assert.Equal(ChromosomeUtilities.Chr1.EnsemblName, actual2.Origin.Chromosome.EnsemblName);\r\n            Assert.Equal(37820920,                             actual2.Origin.Position);\r\n            Assert.True(actual2.Origin.OnReverseStrand);\r\n            Assert.Equal(ChromosomeUtilities.Chr1.EnsemblName, actual2.Partner.Chromosome.EnsemblName);\r\n            Assert.Equal(38404543,                             actual2.Partner.Position);\r\n            Assert.True(actual2.Partner.OnReverseStrand);\r\n        }\r\n\r\n        [Fact]\r\n        public void CreateFromSymbolicAllele_Inversion()\r\n        {\r\n            var expectedAdjacency = new BreakEndAdjacency(\r\n                new BreakPoint(ChromosomeUtilities.Chr1, 63989115, false), // 63989116 + (+1 offset)\r\n                new BreakPoint(ChromosomeUtilities.Chr1, 64291267, true)); // 64291267 - (0 offset)\r\n\r\n            var expectedAdjacency2 = new BreakEndAdjacency(\r\n                new BreakPoint(ChromosomeUtilities.Chr1, 64291268, true),   // 64291268 - (0 offset)\r\n                new BreakPoint(ChromosomeUtilities.Chr1, 63989116, false)); // 63989117 + (+1 offset)\r\n\r\n            var                 interval    = new ChromosomeInterval(ChromosomeUtilities.Chr1, 63989116, 64291267);\r\n            BreakEndAdjacency[] adjacencies = BreakEndAdjacencyFactory.CreateFromSymbolicAllele(interval, VariantType.inversion, false, false);\r\n\r\n            Assert.NotNull(adjacencies);\r\n            Assert.Equal(2,                  adjacencies.Length);\r\n            Assert.Equal(expectedAdjacency,  adjacencies[0]);\r\n            Assert.Equal(expectedAdjacency2, adjacencies[1]);\r\n        }\r\n\r\n        [Fact]\r\n        public void CreateFromSymbolicAllele_Inversion_INV3()\r\n        {\r\n            var expectedAdjacency = new BreakEndAdjacency(\r\n                new BreakPoint(ChromosomeUtilities.Chr1, 63989115, false), // GOOD\r\n                new BreakPoint(ChromosomeUtilities.Chr1, 64291267, true)); // GOOD\r\n\r\n            var expectedAdjacency2 = new BreakEndAdjacency(\r\n                new BreakPoint(ChromosomeUtilities.Chr1, 64291267, false),\r\n                new BreakPoint(ChromosomeUtilities.Chr1, 63989115, true));\r\n\r\n            var                 interval    = new ChromosomeInterval(ChromosomeUtilities.Chr1, 63989116, 64291267);\r\n            BreakEndAdjacency[] adjacencies = BreakEndAdjacencyFactory.CreateFromSymbolicAllele(interval, VariantType.inversion, true, false);\r\n\r\n            Assert.NotNull(adjacencies);\r\n            Assert.Equal(2,                  adjacencies.Length);\r\n            Assert.Equal(expectedAdjacency,  adjacencies[0]);\r\n            Assert.Equal(expectedAdjacency2, adjacencies[1]);\r\n        }\r\n\r\n        [Fact]\r\n        public void CreateFromSymbolicAllele_Inversion_INV5()\r\n        {\r\n            var expectedAdjacency = new BreakEndAdjacency(\r\n                new BreakPoint(ChromosomeUtilities.Chr1, 63989116, true), \r\n                new BreakPoint(ChromosomeUtilities.Chr1, 64291268, false)); \r\n\r\n            var expectedAdjacency2 = new BreakEndAdjacency(\r\n                new BreakPoint(ChromosomeUtilities.Chr1, 64291268, true), // GOOD\r\n                new BreakPoint(ChromosomeUtilities.Chr1, 63989116, false)); // GOOD\r\n\r\n            var                 interval    = new ChromosomeInterval(ChromosomeUtilities.Chr1, 63989116, 64291267);\r\n            BreakEndAdjacency[] adjacencies = BreakEndAdjacencyFactory.CreateFromSymbolicAllele(interval, VariantType.inversion, false, true);\r\n\r\n            Assert.NotNull(adjacencies);\r\n            Assert.Equal(2,                  adjacencies.Length);\r\n            Assert.Equal(expectedAdjacency,  adjacencies[0]);\r\n            Assert.Equal(expectedAdjacency2, adjacencies[1]);\r\n        }\r\n\r\n        [Fact]\r\n        public void CreateFromSymbolicAllele_UnhandledVariantType_ReturnNull()\r\n        {\r\n            var interval = new ChromosomeInterval(ChromosomeUtilities.Chr1, 63989116, 64291267);\r\n            BreakEndAdjacency[] adjacencies =\r\n                BreakEndAdjacencyFactory.CreateFromSymbolicAllele(interval, VariantType.complex_structural_alteration, false, false);\r\n\r\n            Assert.Null(adjacencies);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/VariantAnnotation/TranscriptAnnotation/FullTranscriptAnnotatorTests.cs",
    "content": "﻿using VariantAnnotation.TranscriptAnnotation;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.TranscriptAnnotation\r\n{\r\n    public sealed class FullTranscriptAnnotatorTests\r\n    {\r\n        [Theory]\r\n        [InlineData(\"S\", \"S\", 60, 60, \"S\", \"S\", 60, 60)]\r\n        [InlineData(\"S\", \"T\", 60, 60, \"S\", \"T\", 60, 60)]\r\n        [InlineData(\"ELC\", \"DVR\", 632, 634, \"ELC\", \"DVR\", 632, 634)]\r\n        [InlineData(\"LL\", \"LI\", 213, 214, \"L\", \"I\", 214, 214)]\r\n        [InlineData(\"K\", \"KLX\", 523, 523, \"K\", \"KLX\", 523, 523 )]\r\n        [InlineData(\"C\", \"CC\", 46, 46, \"C\", \"CC\", 46, 46)]\r\n        [InlineData(\"R\", \"KR\", 22955, 22955, \"R\", \"KR\", 22955, 22955)]\r\n        [InlineData(\"PPPPPQQQQ\", \"\", 65, 73, \"PPPPPQQQQ\", \"\", 65, 73)]\r\n        [InlineData(\"DMEIHA\", \"D\", 370, 375, \"MEIHA\", \"\", 371, 375)]\r\n        [InlineData(\"VV\", \"V\", 690, 691, \"V\", \"\", 691, 691)]\r\n        [InlineData(\"NARCN\", \"N\", 243, 247, \"ARCN\", \"\", 244, 247)]\r\n        [InlineData(\"QQQQP\", \"P\", 52, 56, \"QQQQ\", \"\", 52, 55)]\r\n        [InlineData(\"RV\", \"X\", 1172, 1173, \"RV\", \"X\", 1172, 1173)]\r\n        [InlineData(\"GA\", \"GX\", 112, 113, \"A\", \"X\", 113, 113)]\r\n        [InlineData(\"SPDGHE\", \"R\", 566, 571, \"SPDGHE\", \"R\", 566, 571)]\r\n        [InlineData(\"Q\", \"*VRX\", 96, 96, \"Q\", \"*VRX\", 96, 96)]\r\n        public void TryTrimAminoAcidsAndUpdateProteinPositions_AsExpected(string reference, string alt, int start, int end, string newReference, string newAlt, int newStart, int newEnd)\r\n        {\r\n            var trimmedAa = FullTranscriptAnnotator.TryTrimAminoAcidsAndUpdateProteinPositions(new SequenceChange(reference, alt), start, end);\r\n\r\n            Assert.Equal(newReference, trimmedAa.AaChange.Reference);\r\n            Assert.Equal(newAlt, trimmedAa.AaChange.Alternate);\r\n            Assert.Equal(newStart, trimmedAa.ProteinStart);\r\n            Assert.Equal(newEnd, trimmedAa.ProteinEnd);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/VariantAnnotation/TranscriptAnnotation/TranscriptAnnotationFactoryTests.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing Genome;\r\nusing Intervals;\r\nusing Moq;\r\nusing UnitTests.TestUtilities;\r\nusing VariantAnnotation.AnnotatedPositions.Transcript;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.TranscriptAnnotation;\r\nusing Variants;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.TranscriptAnnotation\r\n{\r\n    public sealed class TranscriptAnnotationFactoryTests\r\n    {\r\n        [Fact]\r\n        public void DecideAnnotationStatus_NoOverlap_ReturnNoAnnotation()\r\n        {\r\n            var observedStatus = TranscriptAnnotationFactory.DecideAnnotationStatus(new Interval(100, 101),\r\n                new Interval(5102, 6100), AnnotationBehavior.SmallVariants, Chromosome.ShortFlankingLength);\r\n\r\n            Assert.Equal(TranscriptAnnotationFactory.Status.NoAnnotation, observedStatus);\r\n        }\r\n\r\n        [Fact]\r\n        public void DecideAnnotationStatus_Flanking_ReturnFlankingAnnotation()\r\n        {\r\n            var observedStatus = TranscriptAnnotationFactory.DecideAnnotationStatus(new Interval(100, 100),\r\n                new Interval(102, 305), AnnotationBehavior.SmallVariants, Chromosome.ShortFlankingLength);\r\n\r\n            Assert.Equal(TranscriptAnnotationFactory.Status.FlankingAnnotation, observedStatus);\r\n        }\r\n\r\n        [Fact]\r\n        public void DecideAnnotationStatus_Reduced_TranscriptPartialOverlap_ReturnReducedAnnotation()\r\n        {\r\n            var observedStatus = TranscriptAnnotationFactory.DecideAnnotationStatus(new Interval(100, 200),\r\n                new Interval(102, 305), AnnotationBehavior.StructuralVariants, Chromosome.ShortFlankingLength);\r\n\r\n            Assert.Equal(TranscriptAnnotationFactory.Status.ReducedAnnotation, observedStatus);\r\n        }\r\n\r\n        [Fact]\r\n        public void DecideAnnotationStatus_Full_PartialOverlap_ReturnFullAnnotation()\r\n        {\r\n            var observedStatus = TranscriptAnnotationFactory.DecideAnnotationStatus(new Interval(100, 105),\r\n                new Interval(102, 305), AnnotationBehavior.SmallVariants, Chromosome.ShortFlankingLength);\r\n\r\n            Assert.Equal(TranscriptAnnotationFactory.Status.FullAnnotation, observedStatus);\r\n        }\r\n\r\n        [Fact]\r\n        public void DecideAnnotationStatus_Full_CompleteOverlap_ReturnFullAnnotation()\r\n        {\r\n            var observedStatus = TranscriptAnnotationFactory.DecideAnnotationStatus(new Interval(100, 500),\r\n                new Interval(102, 305), AnnotationBehavior.SmallVariants, Chromosome.ShortFlankingLength);\r\n\r\n            Assert.Equal(TranscriptAnnotationFactory.Status.FullAnnotation, observedStatus);\r\n        }\r\n\r\n        [Fact]\r\n        public void DecideAnnotationStatus_ROH_Return_RohAnnotation()\r\n        {\r\n            var observedStatus = TranscriptAnnotationFactory.DecideAnnotationStatus(new Interval(100, 500),\r\n                new Interval(102, 305), AnnotationBehavior.RunsOfHomozygosity, Chromosome.ShortFlankingLength);\r\n\r\n            Assert.Equal(TranscriptAnnotationFactory.Status.RohAnnotation, observedStatus);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetAnnotatedTranscripts_ReturnEmptyList()\r\n        {\r\n            var variant     = new Mock<IVariant>();\r\n            var transcript1 = new Mock<ITranscript>();\r\n            var transcript2 = new Mock<ITranscript>();\r\n\r\n            ITranscript[] transcripts = { transcript1.Object, transcript2.Object };\r\n\r\n            var chromosome = ChromosomeUtilities.Chr1;\r\n            variant.SetupGet(x => x.Behavior).Returns(AnnotationBehavior.SmallVariants);\r\n            variant.SetupGet(x => x.Chromosome).Returns(chromosome);\r\n            //variant.SetupGet(x => x.Chromosome.FlankingLength).Returns(Chromosome.ShortFlankingLength);\r\n            variant.SetupGet(x => x.Start).Returns(123456);\r\n            variant.SetupGet(x => x.End).Returns(123456);\r\n\r\n            transcript1.SetupGet(x => x.Id).Returns(CompactId.Convert(\"NR_046018.2\"));\r\n            transcript1.SetupGet(x => x.Start).Returns(108455);\r\n            transcript1.SetupGet(x => x.End).Returns(118455);\r\n            transcript1.SetupGet(x => x.Gene.Start).Returns(108455);\r\n            transcript1.SetupGet(x => x.Gene.End).Returns(118455);\r\n\r\n            transcript2.SetupGet(x => x.Id).Returns(CompactId.Convert(\"NR_106918.1\"));\r\n            transcript2.SetupGet(x => x.Start).Returns(128460);\r\n            transcript2.SetupGet(x => x.End).Returns(129489);\r\n            transcript2.SetupGet(x => x.Gene.Start).Returns(128460);\r\n            transcript2.SetupGet(x => x.Gene.End).Returns(129489);\r\n\r\n            var compressedSequence = new Mock<ISequence>();\r\n\r\n            IList<IAnnotatedTranscript> observedAnnotatedTranscripts =\r\n                TranscriptAnnotationFactory.GetAnnotatedTranscripts(variant.Object, transcripts,\r\n                    compressedSequence.Object, null, null);\r\n\r\n            Assert.Empty(observedAnnotatedTranscripts);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetAnnotatedTranscripts_RohAnnotation_ReturnsCanonicalOnly()\r\n        {\r\n            var variant = new Mock<IVariant>();\r\n            var transcript1 = new Mock<ITranscript>();\r\n            var transcript2 = new Mock<ITranscript>();\r\n\r\n            ITranscript[] transcripts = { transcript1.Object, transcript2.Object };\r\n\r\n            variant.SetupGet(x => x.Chromosome).Returns(ChromosomeUtilities.Chr1);\r\n            variant.SetupGet(x => x.Behavior).Returns(AnnotationBehavior.RunsOfHomozygosity);\r\n            variant.SetupGet(x => x.Start).Returns(10000);\r\n            variant.SetupGet(x => x.End).Returns(20000);\r\n\r\n            transcript1.SetupGet(x => x.Id).Returns(CompactId.Convert(\"NM_123.1\"));\r\n            transcript1.SetupGet(x => x.Start).Returns(11000);\r\n            transcript1.SetupGet(x => x.End).Returns(15000);\r\n            transcript1.SetupGet(x => x.IsCanonical).Returns(true);\r\n\r\n            transcript2.SetupGet(x => x.Id).Returns(CompactId.Convert(\"NM_456.2\"));\r\n            transcript2.SetupGet(x => x.Start).Returns(11000);\r\n            transcript2.SetupGet(x => x.End).Returns(15000);\r\n            transcript2.SetupGet(x => x.IsCanonical).Returns(false);\r\n\r\n            IList<IAnnotatedTranscript> observedAnnotatedTranscripts =\r\n                TranscriptAnnotationFactory.GetAnnotatedTranscripts(variant.Object, transcripts, null, null, null);\r\n\r\n            Assert.Single(observedAnnotatedTranscripts);\r\n            Assert.Equal(\"NM_123\", observedAnnotatedTranscripts[0].Transcript.Id.WithVersion);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/VariantAnnotation/Utilities/DateTests.cs",
    "content": "﻿using System;\r\nusing System.Text.RegularExpressions;\r\nusing VariantAnnotation.Utilities;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.Utilities\r\n{\r\n    public sealed class DateTests\r\n    {\r\n        [Fact]\r\n        public void GetTimeStamp_CheckFormat()\r\n        {\r\n            var timeStamp = Date.CurrentTimeStamp;\r\n            var regex = new Regex(@\"\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\");\r\n            Assert.True(regex.Match(timeStamp).Success);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetDate()\r\n        {\r\n            long numTicks = new DateTime(2017, 6, 23).Ticks;\r\n            const string expectedDate = \"2017-06-23\";\r\n            var observedDate = Date.GetDate(numTicks);\r\n            Assert.Equal(expectedDate, observedDate);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/VariantAnnotation/Utilities/FormatUtilitiesTests.cs",
    "content": "﻿using VariantAnnotation.Utilities;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.Utilities\r\n{\r\n    public sealed class FormatUtilitiesTests\r\n    {\r\n        [Fact]\r\n        public void SplitVersion_ReturnNull_WithNullInput()\r\n        {\r\n            var result = FormatUtilities.SplitVersion(null);\r\n            Assert.Null(result.Id);\r\n            Assert.Equal(0, result.Version);\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(\"ENSG00000141510.7\", \"ENSG00000141510\", 7)]\r\n        [InlineData(\"ENSG00000141510\", \"ENSG00000141510\", 0)]\r\n        public void SplitVersion(string combinedId, string expectedId, byte expectedVersion)\r\n        {\r\n            var result = FormatUtilities.SplitVersion(combinedId);\r\n            Assert.Equal(expectedId, result.Id);\r\n            Assert.Equal(expectedVersion, result.Version);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/VariantAnnotation/Utilities/GeneComparerTests.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing UnitTests.TestUtilities;\r\nusing VariantAnnotation.AnnotatedPositions.Transcript;\r\nusing VariantAnnotation.Caches.DataStructures;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.Utilities;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.VariantAnnotation.Utilities\r\n{\r\n    public sealed class GeneComparerTests\r\n    {\r\n        private readonly IGene _geneA;\r\n        private readonly IGene _geneB;\r\n        private readonly IGene _geneC;\r\n        private readonly GeneComparer _geneComparer;\r\n\r\n        public GeneComparerTests()\r\n        {\r\n            _geneA         = new Gene(ChromosomeUtilities.Chr1, 100, 200, false, \"PAX\", 123, CompactId.Convert(\"NM_123\"), CompactId.Convert(\"ENST0000123\"));\r\n            _geneB         = new Gene(ChromosomeUtilities.Chr1, 100, 200, false, \"PAX\", 123, CompactId.Convert(\"NM_123\"), CompactId.Convert(\"ENST0000123\"));\r\n            _geneC         = new Gene(ChromosomeUtilities.Chr1, 101, 200, false, \"PAX\", 123, CompactId.Convert(\"NM_123\"), CompactId.Convert(\"ENST0000123\"));\r\n            _geneComparer  = new GeneComparer();\r\n        }\r\n\r\n        [Fact]\r\n        public void Equals_AsExpected()\r\n        {\r\n            Assert.Equal(_geneA, _geneB, _geneComparer);\r\n            Assert.NotEqual(_geneA, _geneC, _geneComparer);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetHashCode_AsExpected()\r\n        {\r\n            IGene geneD = new Gene(_geneA.Chromosome, 100, 200, false, \"PAX\", 123, CompactId.Convert(\"NM_123\", 2), CompactId.Convert(\"ENST0000123\"));\r\n\r\n            var hashCodes = new HashSet<int>\r\n            {\r\n                _geneComparer.GetHashCode(_geneA),\r\n                _geneComparer.GetHashCode(_geneB),\r\n                _geneComparer.GetHashCode(_geneC),\r\n                _geneComparer.GetHashCode(geneD)\r\n            };\r\n\r\n            Assert.Equal(3, hashCodes.Count);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/Variants/BiDirectionalTrimmerTests.cs",
    "content": "﻿using Variants;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Variants\r\n{\r\n    public sealed class BiDirectionalTrimmerTests\r\n    {\r\n        [Theory]\r\n        [InlineData(100, \"A\", \"C\", 100, \"A\", \"C\")]\r\n        [InlineData(100, \"A\", \"A\", 100, \"A\", \"A\")]\r\n        [InlineData(100, \"AT\", null, 100, \"AT\", \"\")]\r\n        [InlineData(100, null, \"CG\", 100, \"\", \"CG\")]\r\n        [InlineData(100, \"ATTT\", \"AT\", 102, \"TT\", \"\")]\r\n        [InlineData(100, \"CGGG\", \"TGGG\", 100, \"C\", \"T\")]\r\n        public void Trim(int start, string refAllele, string altAllele, int expectedStart, string expectedRef, string expectedAlt)\r\n        {\r\n            (int observedStart, string observedRef, string observedAlt) =\r\n                BiDirectionalTrimmer.Trim(start, refAllele, altAllele);\r\n\r\n            Assert.Equal(expectedStart, observedStart);\r\n            Assert.Equal(expectedRef,   observedRef);\r\n            Assert.Equal(expectedAlt,   observedAlt);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/Variants/SimpleVariantTests.cs",
    "content": "﻿using UnitTests.TestUtilities;\r\nusing Variants;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Variants\r\n{\r\n    public sealed class SimpleVariantTests\r\n    {\r\n        [Fact]\r\n        public void SimpleVariant_Set()\r\n        {\r\n            const int expectedStart        = 100;\r\n            const int expectedEnd          = 102;\r\n            const string expectedRef       = \"AT\";\r\n            const string expectedAlt       = \"\";\r\n            const VariantType expectedType = VariantType.deletion;\r\n\r\n            var variant = new SimpleVariant(ChromosomeUtilities.Chr1, expectedStart, expectedEnd, expectedRef, expectedAlt, expectedType);\r\n\r\n            Assert.Equal(ChromosomeUtilities.Chr1, variant.Chromosome);\r\n            Assert.Equal(expectedStart,      variant.Start);\r\n            Assert.Equal(expectedEnd,        variant.End);\r\n            Assert.Equal(expectedRef,        variant.RefAllele);\r\n            Assert.Equal(expectedAlt,        variant.AltAllele);\r\n            Assert.Equal(expectedType,       variant.Type);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/Variants/VariantRotatorTests.cs",
    "content": "﻿using Genome;\r\nusing Moq;\r\nusing UnitTests.TestDataStructures;\r\nusing UnitTests.TestUtilities;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing Variants;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Variants\r\n{\r\n    public sealed class VariantRotatorTests\r\n    {\r\n        private readonly ISequence _refSequence =\r\n            new SimpleSequence(\r\n                new string('A', VariantRotator.MaxDownstreamLength) + \"ATGTGTGTGTGCAGT\" +\r\n                new string('A', VariantRotator.MaxDownstreamLength), 965891);\r\n\r\n        [Fact]\r\n        public void Right_Deletion_ForwardStrand()\r\n        {\r\n            // chr1\t966391\t.\tATG\tA\t2694.00\tPASS\t.\r\n            var variant = GetDeletion();\r\n\r\n            var transcript = new Mock<ITranscript>();\r\n            transcript.SetupGet(x => x.Start).Returns(966300);\r\n            transcript.SetupGet(x => x.End).Returns(966405);\r\n            transcript.SetupGet(x => x.Gene.OnReverseStrand).Returns(false);\r\n\r\n            var rotatedVariant = VariantRotator.Right(variant, transcript.Object, _refSequence, transcript.Object.Gene.OnReverseStrand);\r\n\r\n            Assert.False(ReferenceEquals(variant, rotatedVariant));\r\n            Assert.Equal(966400, rotatedVariant.Start);\r\n            Assert.Equal(\"TG\", rotatedVariant.RefAllele);\r\n        }\r\n\r\n        [Fact]\r\n        public void Right_Deletion_ReverseStrand()\r\n        {\r\n            var variant = new SimpleVariant(ChromosomeUtilities.Chr1, 966399, 966401, \"TG\", \"\", VariantType.deletion);\r\n\r\n            var transcript = new Mock<ITranscript>();\r\n            transcript.SetupGet(x => x.Start).Returns(966300);\r\n            transcript.SetupGet(x => x.End).Returns(966405);\r\n            transcript.SetupGet(x => x.Gene.OnReverseStrand).Returns(true);\r\n\r\n            var rotatedVariant = VariantRotator.Right(variant, transcript.Object, _refSequence, transcript.Object.Gene.OnReverseStrand);\r\n\r\n            Assert.False(ReferenceEquals(variant, rotatedVariant));\r\n            Assert.Equal(966393, rotatedVariant.Start);\r\n            Assert.Equal(\"TG\", rotatedVariant.RefAllele);\r\n        }\r\n\r\n        [Fact]\r\n        public void Right_Insertion()\r\n        {\r\n            var variant = GetInsertion();\r\n\r\n            var transcript = new Mock<ITranscript>();\r\n            transcript.SetupGet(x => x.Start).Returns(966300);\r\n            transcript.SetupGet(x => x.End).Returns(966405);\r\n            transcript.SetupGet(x => x.Gene.OnReverseStrand).Returns(false);\r\n\r\n            var rotated = VariantRotator.Right(variant, transcript.Object, _refSequence, transcript.Object.Gene.OnReverseStrand);\r\n\r\n            Assert.False(ReferenceEquals(variant, rotated));\r\n            Assert.Equal(966403, rotated.Start);\r\n            Assert.Equal(\"TG\", rotated.AltAllele);\r\n        }\r\n\r\n        [Fact]\r\n        public void Right_Identity_WhenRefSequenceNull()\r\n        {\r\n            var originalVariant = GetDeletion();\r\n            var rotatedVariant  = VariantRotator.Right(originalVariant, null, null, false);\r\n            Assert.True(ReferenceEquals(originalVariant, rotatedVariant));\r\n        }\r\n\r\n        [Fact]\r\n        public void Right_Identity_WhenNotInsertionOrDeletion()\r\n        {\r\n            var originalVariant = new SimpleVariant(ChromosomeUtilities.Chr1, 966392, 966392, \"T\", \"A\", VariantType.SNV);\r\n            var rotated = VariantRotator.Right(originalVariant, null, _refSequence, false);\r\n            Assert.True(ReferenceEquals(originalVariant, rotated));\r\n        }\r\n\r\n        [Fact]\r\n        public void Right_Identity_VariantBeforeTranscript_ForwardStrand()\r\n        {\r\n            var originalVariant = GetDeletion();\r\n\r\n            var transcript = new Mock<ITranscript>();\r\n            transcript.SetupGet(x => x.Start).Returns(966397);\r\n            transcript.SetupGet(x => x.Gene.OnReverseStrand).Returns(false);\r\n\r\n            var rotated = VariantRotator.Right(originalVariant, transcript.Object, _refSequence, transcript.Object.Gene.OnReverseStrand);\r\n            Assert.True(ReferenceEquals(originalVariant, rotated));\r\n        }\r\n\r\n        [Fact]\r\n        public void Right_Identity_VariantBeforeTranscript_ReverseStrand()\r\n        {\r\n            var originalVariant = GetDeletion();\r\n\r\n            var transcript = new Mock<ITranscript>();\r\n            transcript.SetupGet(x => x.End).Returns(966390);\r\n            transcript.SetupGet(x => x.Gene.OnReverseStrand).Returns(true);\r\n\r\n            var rotated = VariantRotator.Right(originalVariant, transcript.Object, _refSequence, transcript.Object.Gene.OnReverseStrand);\r\n            Assert.True(ReferenceEquals(originalVariant, rotated));\r\n        }\r\n\r\n        [Fact]\r\n        public void Right_Identity_InsertionVariantBeforeTranscript_ForwardStrand()\r\n        {\r\n            var originalVariant = GetInsertion();\r\n\r\n            var transcript = new Mock<ITranscript>();\r\n            transcript.SetupGet(x => x.End).Returns(966392);\r\n            transcript.SetupGet(x => x.Gene.OnReverseStrand).Returns(false);\r\n\r\n            var rotated = VariantRotator.Right(originalVariant, transcript.Object, _refSequence, transcript.Object.Gene.OnReverseStrand);\r\n            Assert.True(ReferenceEquals(originalVariant, rotated));\r\n        }\r\n\r\n        [Fact]\r\n        public void Right_Identity_WithNoRotation()\r\n        {\r\n            var originalVariant = GetDeletion();\r\n\r\n            ISequence refSequence = new SimpleSequence(\r\n                new string('A', VariantRotator.MaxDownstreamLength) + \"GAGAGTTAGGTA\" +\r\n                new string('A', VariantRotator.MaxDownstreamLength), 965891);\r\n\r\n            var transcript = new Mock<ITranscript>();\r\n            transcript.SetupGet(x => x.Start).Returns(966300);\r\n            transcript.SetupGet(x => x.End).Returns(966405);\r\n            transcript.SetupGet(x => x.Gene.OnReverseStrand).Returns(false);\r\n\r\n            var rotated = VariantRotator.Right(originalVariant, transcript.Object, refSequence, transcript.Object.Gene.OnReverseStrand);\r\n            Assert.True(ReferenceEquals(originalVariant, rotated));\r\n        }\r\n\r\n        private static ISimpleVariant GetDeletion() =>\r\n            new SimpleVariant(ChromosomeUtilities.Chr1, 966392, 966394, \"TG\", \"\", VariantType.deletion);\r\n\r\n        private static ISimpleVariant GetInsertion() =>\r\n            new SimpleVariant(ChromosomeUtilities.Chr1, 966397, 966396, \"\", \"TG\", VariantType.insertion);\r\n\r\n        [Theory]\r\n        [InlineData(519, \"TG\", 515, \"TG\")]\r\n        [InlineData(511, \"ATT\", 509, \"TTA\")]\r\n        [InlineData(508, \"GTT\", 504, \"TGT\")]\r\n        public void Left_align_deletions(int position, string refAllele, int rotatedPos, string rotatedRef)\r\n        {\r\n            var reference = new SimpleSequence(new string('A', VariantUtils.MaxUpstreamLength) + \"ATGTGTTGTTATTCTGTGTGCAT\");\r\n\r\n            var rotatedVariant = VariantUtils.TrimAndLeftAlign(position, refAllele, \"\", reference);\r\n\r\n            Assert.Equal(rotatedPos, rotatedVariant.start);\r\n            Assert.Equal(rotatedRef, rotatedVariant.refAllele);\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(519, \"TG\", 515, \"TG\")]\r\n        [InlineData(511, \"ATT\", 509, \"TTA\")]\r\n        [InlineData(508, \"GTT\", 504, \"TGT\")]\r\n        public void Left_align_insertion(int position, string altAllele, int rotatedPos, string rotatedAlt)\r\n        {\r\n            var reference = new SimpleSequence(new string('A', VariantUtils.MaxUpstreamLength) + \"ATGTGTTGTTATTCTGTGTGCAT\");\r\n\r\n            var rotatedVariant = VariantUtils.TrimAndLeftAlign(position, \"\", altAllele, reference);\r\n\r\n            Assert.Equal(rotatedPos, rotatedVariant.start);\r\n            Assert.Equal(rotatedAlt, rotatedVariant.altAllele);\r\n        }\r\n\r\n        [Fact]\r\n        public void Left_align_multiple_padding_bases()\r\n        {\r\n            var reference = new SimpleSequence(new string('A', VariantUtils.MaxUpstreamLength) + \"ATGTGTTGTTATTCTGTGTGCAT\");\r\n\r\n            var rotatedVariant = VariantUtils.TrimAndLeftAlign(501, \"AT\", \"ATT\", reference);\r\n\r\n            Assert.Equal(502, rotatedVariant.start);\r\n            Assert.Equal(\"T\", rotatedVariant.altAllele);\r\n        }\r\n        [Theory]\r\n        [InlineData(\"TC\", \"T\", false)]\r\n        [InlineData(\"T\", \"TC\", false)]\r\n        [InlineData(\"T\", \"TCT\", true)]\r\n        [InlineData(\"TCT\", \"T\", true)]\r\n        [InlineData(\"TCT\", \"TA\", true)] // no conclusion for indels\r\n        [InlineData(\"TC\", \"AT\", true)]//no conclusion for mnvs\r\n        [InlineData(\"T\", \"A\", false)]\r\n        [InlineData(\"T\", \"T\", false)]\r\n        public void CanNotLeftRotate(string refAllele, string altAllele, bool result)\r\n        {\r\n            Assert.Equal(result, VariantUtils.IsLeftShiftPossible(refAllele, altAllele));\r\n        }\r\n\r\n    }\r\n\r\n\r\n}"
  },
  {
    "path": "UnitTests/Variants/VariantTests.cs",
    "content": "﻿using UnitTests.TestUtilities;\r\nusing VariantAnnotation.Pools;\r\nusing Variants;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Variants\r\n{\r\n    public sealed class VariantTests\r\n    {\r\n        [Fact]\r\n        public void Variant_Set()\r\n        {\r\n            const int expectedStart        = 100;\r\n            const int expectedEnd          = 102;\r\n            const string expectedRef       = \"AT\";\r\n            const string expectedAlt       = \"\";\r\n            const VariantType expectedType = VariantType.deletion;\r\n            const string expectedVid       = \"1:100:A:C\";\r\n            const bool expectedRefMinor    = true;\r\n            const bool expectedDecomposed  = false;\r\n            const bool expectedRecomposed  = true;\r\n            var expectedLinkedVids         = new[] { \"1:102:T:G\" };\r\n            var expectedBehavior           = AnnotationBehavior.SmallVariants;\r\n\r\n            var variant                    = VariantPool.Get(ChromosomeUtilities.Chr1, expectedStart, expectedEnd, expectedRef, expectedAlt,\r\n                expectedType, expectedVid, expectedRefMinor, expectedDecomposed, expectedRecomposed, expectedLinkedVids,\r\n                expectedBehavior, false);\r\n\r\n            Assert.Equal(ChromosomeUtilities.Chr1, variant.Chromosome);\r\n            Assert.Equal(expectedStart,      variant.Start);\r\n            Assert.Equal(expectedEnd,        variant.End);\r\n            Assert.Equal(expectedRef,        variant.RefAllele);\r\n            Assert.Equal(expectedAlt,        variant.AltAllele);\r\n            Assert.Equal(expectedType,       variant.Type);\r\n            Assert.Equal(expectedVid,        variant.VariantId);\r\n            Assert.Equal(expectedRefMinor,   variant.IsRefMinor);\r\n            Assert.Equal(expectedDecomposed, variant.IsDecomposed);\r\n            Assert.Equal(expectedRecomposed, variant.IsRecomposed);\r\n            Assert.Equal(expectedLinkedVids, variant.LinkedVids);\r\n            Assert.Equal(expectedBehavior,   variant.Behavior);\r\n            \r\n            VariantPool.Return(variant);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/Vcf/Samples/BooleanExtensionsTests.cs",
    "content": "﻿using Vcf.Sample;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Vcf.Samples\r\n{\r\n    public sealed class BooleanExtensionsTests\r\n    {\r\n        [Theory]\r\n        [InlineData(\"PASS\", false)]\r\n        [InlineData(\"LowGQX\", true)]\r\n        [InlineData(null, false)]\r\n        public void GetFailedFilter(string filter, bool? expectedFailedFilter)\r\n        {\r\n            bool observedFailedFilter = filter.GetFailedFilter();\r\n            Assert.Equal(expectedFailedFilter, observedFailedFilter);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/Vcf/Samples/FormatIndicesTests.cs",
    "content": "﻿using Vcf.Sample;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Vcf.Samples\r\n{\r\n    public sealed class FormatIndicesTests\r\n    {\r\n        [Fact]\r\n        public void FormatIndicesTest()\r\n        {\r\n            const string formatColumn = \"AD:AQ:BOB:CN:DN:DP:DST:FT:GQ:GT:LQ:PR:SR:VF\";\r\n            var formatIndices = new FormatIndices();\r\n            formatIndices.Set(formatColumn);\r\n\r\n            Assert.Equal(0, formatIndices.AD);\r\n            Assert.Equal(1, formatIndices.AQ);\r\n            Assert.Equal(3, formatIndices.CN);\r\n            Assert.Equal(4, formatIndices.DN);\r\n            Assert.Equal(5, formatIndices.DP);\r\n            Assert.Equal(6, formatIndices.DST);\r\n            Assert.Equal(7, formatIndices.FT);\r\n            Assert.Equal(8, formatIndices.GQ);\r\n            Assert.Equal(9, formatIndices.GT);\r\n            Assert.Equal(10, formatIndices.LQ);\r\n            Assert.Equal(11, formatIndices.PR);\r\n            Assert.Equal(12, formatIndices.SR);\r\n            Assert.Equal(13, formatIndices.VF);\r\n\r\n            formatIndices.Set(null);\r\n            Assert.False(formatIndices.AD.HasValue);\r\n            Assert.False(formatIndices.AQ.HasValue);\r\n            Assert.False(formatIndices.CN.HasValue);\r\n            Assert.False(formatIndices.DN.HasValue);\r\n            Assert.False(formatIndices.DP.HasValue);\r\n            Assert.False(formatIndices.DST.HasValue);\r\n            Assert.False(formatIndices.FT.HasValue);\r\n            Assert.False(formatIndices.GQ.HasValue);\r\n            Assert.False(formatIndices.GT.HasValue);\r\n            Assert.False(formatIndices.LQ.HasValue);\r\n            Assert.False(formatIndices.PR.HasValue);\r\n            Assert.False(formatIndices.SR.HasValue);\r\n            Assert.False(formatIndices.VF.HasValue);\r\n\r\n            formatIndices.Set(\"TEMP:DP:BOB\");\r\n            Assert.Equal(1, formatIndices.DP);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/Vcf/Samples/Legacy/LegacySampleFieldExtractorTests.cs",
    "content": "﻿using System.Linq;\nusing Vcf.Sample;\nusing Xunit;\nusing static UnitTests.Vcf.Samples.TestUtilities;\n\nnamespace UnitTests.Vcf.Samples.Legacy\n{\n    public sealed class LegacySampleFieldExtractorTests\n    {\n\n        [Fact]\n        public void FormatIndicesTest()\n        {\n            const string formatColumn = \"AU:GU:TAR:FT:GQ:DP:VF:CU:TU:TIR:GT:GQX:BOB:DPI:NV:NR:CHC:DST:PCH:DCS:DID:PLG:PCN:MAD:SCH:AQ:LQ\";\n            var formatIndicies = new FormatIndices();\n            formatIndicies.Set(formatColumn);\n\n            Assert.Equal(0, formatIndicies.AU);\n            Assert.Equal(7, formatIndicies.CU);\n            Assert.Equal(1, formatIndicies.GU);\n            Assert.Equal(8, formatIndicies.TU);\n            Assert.Equal(2, formatIndicies.TAR);\n            Assert.Equal(9, formatIndicies.TIR);\n            Assert.Equal(3, formatIndicies.FT);\n            Assert.Equal(10, formatIndicies.GT);\n            Assert.Equal(4, formatIndicies.GQ);\n            Assert.Equal(11, formatIndicies.GQX);\n            Assert.Equal(5, formatIndicies.DP);\n            Assert.Equal(6, formatIndicies.VF);\n            Assert.Equal(13, formatIndicies.DPI);\n            Assert.Equal(17, formatIndicies.DST);\n            Assert.Equal(25, formatIndicies.AQ);\n            Assert.Equal(26, formatIndicies.LQ);\n\n            formatIndicies.Set(null);\n            Assert.Null(formatIndicies.TIR);\n            Assert.Null(formatIndicies.AU);\n\n\n            formatIndicies.Set(\"TEMP:DPI:BOB\");\n            Assert.Equal(1, formatIndicies.DPI);\n            Assert.Null(formatIndicies.AU);\n        }\n\n        [Theory]\n        [InlineData(\"GT:TIR:TAR\", \"1/1:18,19:37,38\", new[] { 37, 18 })]\n        [InlineData(\"GT:AU:CU:GU:TU:AD\", \"1/1:10,11:20,21:30,31:40,41:11,13\", new[] { 20, 40 })]\n        [InlineData(\"GT:AD\", \"1/1:11,13\", new[] { 11, 13 })]\n        [InlineData(\"GT:AU:CU:GU:TU:AD\", \"1/1:.:20,21:30,31:40,41:11,13\", new[] { 11, 13 })]\n        [InlineData(\"GT:AU:CU:GU:TU:AD\", \"1/1:.\", null)]//null when all fields are dropped after GT\n        [InlineData(\"GT:AU:CU:GU:TU:AD\", \"1/1\", null)]//null when all fields are dropped after GT\n        [InlineData(\"AD\", \".\", null)]\n        [InlineData(\"AD\", \"\", null)]\n        public void AlleleDepths(string formatCol, string sampleCol, int[] expectedAlleleDepths)\n        {\n            string vcfLine = $\"chr1\\t5592503\\t.\\tC\\tT\\t900.00\\tPASS\\t.\\t{formatCol}\\t{sampleCol}\";\n            var vcfColumns = vcfLine.Split('\\t');\n\n            var samples = vcfColumns.ToSamples(new FormatIndices(), GetSimplePositionUsingAlleleNum(1), null, null);\n\n            Assert.Single(samples);\n\n            var sample = samples[0];\n            var observedAlleleDepths = sample?.AlleleDepths;\n            Assert.Equal(expectedAlleleDepths, observedAlleleDepths);\n        }\n\n        [Theory]\n        [InlineData(\"GT:TIR:TAR\", \"1/1:18,19:37,38\", null)]\n        [InlineData(\"GT:TIR:TAR:AD\", \"1/1:.:37,38:11,13,17\", new[] { 11, 13, 17 })]\n        [InlineData(\"GT:TIR:TAR:AD\", \"1/1:.:37,38\", null)]//null when no values for AD for multi-allelic site\n        public void AlleleDepthsMultiAllelic(string formatCol, string sampleCol, int[] expectedAlleleDepths)\n        {\n            string vcfLine = $\"chr1\\t5592503\\t.\\tC\\tT,A\\t900.00\\tPASS\\t.\\t{formatCol}\\t{sampleCol}\";\n            var vcfColumns = vcfLine.Split('\\t');\n\n            var samples = vcfColumns.ToSamples(new FormatIndices(), GetSimplePositionUsingAlleleNum(2), null, null);\n\n            Assert.Single(samples);\n\n            var sample = samples[0];\n            var observedAlleleDepths = sample?.AlleleDepths;\n            Assert.Equal(expectedAlleleDepths, observedAlleleDepths);\n        }\n\n        [Theory]\n        [InlineData(\"1/1:208:47:70:3:F\", true)]\n        [InlineData(\"1/1:208:47:70:3:.\", false)]\n        [InlineData(\".\", false)]\n        [InlineData(\"\", false)]\n        public void FailedFilter(string sampleCol, bool? expectedFailedFilter)\n        {\n            string vcfLine = $\"chr1\\t5592503\\t.\\tC\\tT\\t900.00\\tPASS\\t.\\tGT:GQ:GQX:DP:DPF:FT\\t{sampleCol}\";\n            var vcfColumns = vcfLine.Split('\\t');\n\n            var samples = vcfColumns.ToSamples(new FormatIndices(), GetSimplePositionUsingAlleleNum(2), null, null);\n\n            Assert.Single(samples);\n\n            var sample = samples[0];\n            var observedFailedFilter = sample?.FailedFilter;\n            Assert.Equal(expectedFailedFilter, observedFailedFilter);\n        }\n\n        [Theory]\n        [InlineData(\"1/1:208:47:70:3:0,70\", \"1/1\")]\n        [InlineData(\".:208:47:70:3:0,70\", null)]\n        [InlineData(\".\", null)]\n        [InlineData(\"\", null)]\n        [InlineData(\"./.\", \"./.\")]\n        public void Genotype(string sampleCol, string expectedGenotype)\n        {\n            string vcfLine = $\"chr1\\t5592503\\t.\\tC\\tT\\t900.00\\tPASS\\t.\\tGT:GQ:GQX:DP:DPF:AD\\t{sampleCol}\";\n            var vcfColumns = vcfLine.Split('\\t');\n\n            var samples = vcfColumns.ToSamples(new FormatIndices(), GetSimplePositionUsingAlleleNum(1),null, null);\n\n            Assert.Single(samples);\n\n            var sample = samples[0];\n            var observedGenotype = sample?.Genotype;\n            Assert.Equal(expectedGenotype, observedGenotype);\n        }\n\n        [Theory]\n        [InlineData(\"GT:GQ:GQX:DP:DPF:AD\", \"1/1:208:47:70:3:0,70\", 47)]\n        [InlineData(\"GT:GQ:DP:DPF:AD\", \"1/1:208:70:3:0,70\", 208)]\n        [InlineData(\"GT:GQ:DP:DPF:AD\", \"1/1:.:70:3:0,70\", null)]\n        [InlineData(\"GT:DP:DPF:AD:GQ\", \"1/1:70:3\", null)]//dropped fields without '.'\n        [InlineData(\"GQ\", \".\", null)]\n        [InlineData(\"GQX\", \"\", null)]\n        [InlineData(\"GQX\", \"./.\", null)]\n        public void GenotypeQuality(string formatCol, string sampleCol, int? expectedGenotypeQuality)\n        {\n            string vcfLine = $\"chr1\\t5592503\\t.\\tC\\tT\\t900.00\\tPASS\\t.\\t{formatCol}\\t{sampleCol}\";\n            var vcfColumns = vcfLine.Split('\\t');\n\n            var samples = vcfColumns.ToSamples(new FormatIndices(), GetSimplePositionUsingAlleleNum(1), null, null);\n\n            Assert.Single(samples);\n\n            var sample = samples[0];\n            var observedGenotypeQuality = sample?.GenotypeQuality;\n            Assert.Equal(expectedGenotypeQuality, observedGenotypeQuality);\n        }\n\n        [Theory]\n        [InlineData(\"GT:TIR:TAR:DP:DPF:AD\", \"1/1:22,22:3,4:70:3:0,70\", 25)]\n        [InlineData(\"GT:AU:CU:GU:TU:DP:DPF:AD\", \"1/1:10,11:20,21:30,31:40,41:70:3:0,70\", 100)]\n        [InlineData(\"GT:DPI:DP:DPF:AD\", \"1/1:17:70:3:0,70\", 17)]\n        [InlineData(\"GT:DP:DPF:AD\", \"1/1:70:3:0,70\", 70)]\n        [InlineData(\"GT:AU:CU:GU:TU:DPF:AD\", \"1/1:.:20,21:30,31:40,41:3:0,70\", null)]\n        [InlineData(\"GT:AU:CU:GU:TU:DPF:AD\", \"1/1:.:20,21:30,31:40,41:3\", null)]//dropping AD completely\n        [InlineData(\"GT:DP:DPF:AD\", \"1/1:.:3:0,70\", null)]\n        [InlineData(\"DP\", \".\", null)]\n        [InlineData(\"DPI\", \"\", null)]\n        public void TotalDepth(string formatCol, string sampleCol, int? expectedTotalDepth)\n        {\n            string vcfLine = $\"chr1\\t5592503\\t.\\tC\\tT\\t900.00\\tPASS\\t.\\t{formatCol}\\t{sampleCol}\";\n            var vcfColumns = vcfLine.Split('\\t');\n\n            var samples = vcfColumns.ToSamples(new FormatIndices(), GetSimplePositionUsingAlleleNum(1), null, null);\n\n            Assert.Single(samples);\n\n            var sample = samples[0];\n            var observedTotalDepth = sample?.TotalDepth;\n            Assert.Equal(expectedTotalDepth, observedTotalDepth);\n        }\n\n        [Fact]\n        public void PiscesTotalDepth()\n        {\n            const string vcfLine =\n                \"chr1\\t115251293\\t.\\tGA\\tG\\t100\\tSB;LowVariantFreq\\tDP=7882\\tGT:GQ:AD:VF:NL:SB:GQX\\t0/1:100:7588,294:0:20:-100.0000:100\";\n            var vcfColumns = vcfLine.Split('\\t');\n\n            var samples = vcfColumns.ToSamples(new FormatIndices(), GetSimplePositionUsingAlleleNum(1),null, null);\n\n            var sample = samples[0];\n            var observedTotalDepth = sample.TotalDepth;\n            const int expectedTotalDepth = 7882;\n            Assert.Equal(expectedTotalDepth, observedTotalDepth);\n        }\n\n        [Theory]\n        [InlineData(\"T\", \"GT:GQ:GQX:DP:DPF:AD:VF\", \"1/1:208:47:70:3:0,70:0.75\", \"0.75\")] // VF\n        [InlineData(\"T\", \"GT:TIR:TAR\", \"1/1:10,11:20,21\", \"0.3333\")]                     // TAR/TIR        \n        [InlineData(\"A\", \"GT:AU:CU:GU:TU\", \"1/1:10,11:20,21:30,31:40,41\", \"0.1\")]        // allele counts (A)\n        [InlineData(\"C\", \"GT:AU:CU:GU:TU\", \"1/1:10,11:20,21:30,31:40,41\", \"0.2\")]        // allele counts (C)\n        [InlineData(\"G\", \"GT:AU:CU:GU:TU\", \"1/1:10,11:20,21:30,31:40,41\", \"0.3\")]        // allele counts (G)\n        [InlineData(\"T\", \"GT:AU:CU:GU:TU\", \"1/1:10,11:20,21:30,31:40,41\", \"0.4\")]        // allele counts (T)\n        [InlineData(\"T\", \"GT:AD\", \"1/1:3,70\", \"0.9589\")]                                 // allele depths\n        [InlineData(\"T\", \"GT:AU:CU:GU:TU:AD\", \"1/1:.:20,21:30,31:40,41:7,11\", \"0.6111\")] // missing allele count\n        [InlineData(\"T\", \"GT:AD:DP:VF\", \"0/1:317,200:517:0.38685\", \"0.3869\")]            // VF (rounding issue)\n        public void VariantFrequency_Nominal(string altAllele, string formatCol, string sampleCol, string expectedResults)\n        {\n            string vcfLine = $\"chr1\\t5592503\\t.\\tC\\t{altAllele}\\t900.00\\tPASS\\t.\\t{formatCol}\\t{sampleCol}\";\n            var vcfColumns = vcfLine.Split('\\t');\n\n            var samples = vcfColumns.ToSamples(new FormatIndices(), GetSimplePositionUsingAlleleNum(1), null, null);\n\n            Assert.Single(samples);\n\n            var sample = samples[0];\n            Assert.NotNull(sample?.VariantFrequencies);\n            var observedResults = string.Join(',', sample.VariantFrequencies.Select(x => x.ToString(\"0.####\")));\n            Assert.Equal(expectedResults, observedResults);\n        }\n\n        [Theory]\n        [InlineData(\"C\", \"T\", \"GT:AD\", \"1/1:.\")]                                        // missing AD\n        [InlineData(\"C\", \"T\", \"VF\", \".\")]                                               // missing VF\n        [InlineData(\"C\", \"T\", \"AD\", \"\")]                                                // missing AD\n        [InlineData(\"C\", \"T,A\", \"GT:GQ:GQX:DP:DPF:AD:VF\", \"1/1:208:47:70:3:0,70:0.75\")] // multiple alleles (VF)\n        [InlineData(\"CG\", \"T\", \"GT:AU:CU:GU:TU\", \"1/1:10,11:20,21:30,31:40,41\")]        // multiple ref bases (AC)\n        [InlineData(\"CG\", \"T\", \"GT:AU:CU:GU:TU\", \"1/1\")]                                // dropping all fields after GT\n        [InlineData(\"C\", \".\", \"DP:AU:CU:GU:TU\", \"19:0,0:14,14:0,0:5,6\")]                // ref minor (AC)\n        [InlineData(\"C\", \".\", \"DP:AU:CU:GU:TU\", \"75:0,0:72,77:0,0:0,2\")]                // ref minor (AC)\n        public void VariantFrequency_ReturnNull(string refAllele, string altAllele, string formatCol, string sampleCol)\n        {\n            var vcfLine = $\"chr1\\t5592503\\t.\\t{refAllele}\\t{altAllele}\\t900.00\\tPASS\\t.\\t{formatCol}\\t{sampleCol}\";\n            var vcfColumns = vcfLine.Split('\\t');\n\n            var samples = vcfColumns.ToSamples(new FormatIndices(), GetSimplePositionUsingAlleleNum(altAllele.Split(',').Length), null, null);\n\n            Assert.Single(samples);\n            var sample = samples[0];\n            Assert.Null(sample.VariantFrequencies);\n        }\n\n        [Theory]\n        [InlineData(\"GT:GQ:GQX:DP:DPF:AD:VF\", \"1/1:208:47:70:3:70\", \"VF\")]\n        [InlineData(\"GT:GQ:GQX:DP:DPF:AD:VF\", \"1/1:208:47:70:3\", \"AD\")]\n        [InlineData(\"GT:DP:DPF:AD:VF:GQ:GQX\", \"1/1:70:3:208:47\", \"GQ\")]\n        [InlineData(\"GT:DP:DPF:AD:VF:GQ:GQX\", \"1/1:70:3:208:47\", \"GQX\")]\n        [InlineData(\"GT:AD:VF:FT\", \"1/1:47:70\", \"FT\")]\n        public void Leftout_fields_return_null(string formatCol, string sampleCol, string missingField)\n        {\n            var vcfLine = $\"chr1\\t5592503\\t.\\tA\\tC\\t900.00\\tPASS\\t.\\t{formatCol}\\t{sampleCol}\";\n            var vcfColumns = vcfLine.Split('\\t');\n\n            var samples = vcfColumns.ToSamples(new FormatIndices(), GetSimplePositionUsingAlleleNum(1),null, null);\n\n            Assert.Single(samples);\n            var sample = samples[0];\n\n            switch (missingField)\n            {\n                case \"VF\":\n                    Assert.Null(sample.VariantFrequencies);\n                    break;\n                case \"AD\":\n                    Assert.Null(sample.AlleleDepths);\n                    break;\n                case \"FT\":\n                    Assert.False(sample.FailedFilter);\n                    break;\n                case \"GQ\":\n                case \"GQX\":\n                    Assert.Null(sample.GenotypeQuality);\n                    break;\n\n            }\n\n        }\n\n        [Fact]\n        public void MajorChromosomeCopyTest()\n        {\n            // data from NIR-1095\n            // for NIR-1218\n            const string vcfLine = \"1\t9314202\tCanvas:GAIN:1:9314202:9404148\tN\t<CNV>\t36\tPASS\tSVTYPE=CNV;END=9404148;ensembl_gene_id=ENSG00000049239,ENSG00000252841,ENSG00000171621\tRC:BC:CN:MCC\t.\t151:108:6:4\";\n            var vcfColumns = vcfLine.Split('\\t');\n\n            var samples = vcfColumns.ToSamples(new FormatIndices(), GetSimplePositionUsingAlleleNum(1),null, null);\n\n            Assert.Equal(2, samples.Length);\n\n            var sample = samples[1];\n\n            var observedMcc = sample?.IsLossOfHeterozygosity;\n            Assert.False(observedMcc);\n        }\n\n        [Fact]\n        public void EmptySamples()\n        {\n            // for NIR-1306\n            const string vcfLine = \"chrX\t2735147\t.\tG\tA\t38.25\tVQSRTrancheSNP99.90to100.00\tAC=3;AF=0.500;AN=6;BaseQRankSum=-0.602;DP=56;Dels=0.00;FS=30.019;HaplotypeScore=7.7259;MLEAC=3;MLEAF=0.500;MQ=41.18;MQ0=0;MQRankSum=0.098;QD=1.06;ReadPosRankSum=0.266;SB=-8.681e-03;VQSLOD=-6.0901;culprit=QD\tGT:AD:DP:GQ:PL\t0:7,0:7:3:0,3,39\t./.\t0/1:14,3:17:35:35,0,35\t1/1:9,10:19:3:41,3,0\";\n            var vcfColumns = vcfLine.Split('\\t');\n\n            var samples = vcfColumns.ToSamples(new FormatIndices(), GetSimplePositionUsingAlleleNum(1), null, null);\n\n            Assert.Equal(4, samples.Length);\n\n            var sample = samples[1];\n            var observedGenotype = sample.Genotype;\n            var observedVariantFrequency = sample.VariantFrequencies;\n\n            Assert.Equal(\"./.\", observedGenotype);\n            Assert.Null(observedVariantFrequency);\n        }\n\n        [Theory]\n        [InlineData(\"GT:TIR:TAR\", \"1/1:0,11:0,21\", \"0\")]\n        [InlineData(\"GT:AU:CU:GU:TU\", \"1/1:0,11:0,21:0,31:0,41\", \"0\")]\n        [InlineData(\"GT:AD\", \"1/1:0,0\", \"0\")]\n        [InlineData(\"GT:AU:CU:GU:TU:AD\", \"1/1:.:20,21:30,31:40,41:0,0\", \"0\")]\n        [InlineData(\"GT:AD\", \"1/1:.\", null)]\n        [InlineData(\"VF\", \".\", null)]\n        [InlineData(\"AD\", \"\", null)]\n        public void VariantFrequencyNan(string formatCol, string sampleCol, string expectedResults)\n        {\n            // NIR-1338\n            var vcfLine = $\"chr1\\t5592503\\t.\\tC\\tT\\t900.00\\tPASS\\t.\\t{formatCol}\\t{sampleCol}\";\n            var vcfColumns = vcfLine.Split('\\t');\n\n            var samples = vcfColumns.ToSamples(new FormatIndices(), GetSimplePositionUsingAlleleNum(1), null, null);\n\n            Assert.Single(samples);\n\n            var sample = samples[0];\n            if (expectedResults == null)\n            {\n                Assert.Null(sample?.VariantFrequencies);\n                return;\n            }\n\n            Assert.NotNull(sample?.VariantFrequencies);\n            var observedResults = string.Join(',', sample.VariantFrequencies.Select(x => x.ToString(\"0.####\")));\n            Assert.Equal(expectedResults, observedResults);\n        }\n\n        [Fact]\n        public void SplitReadCounts()\n        {\n            const string vcfLine = \"chr7\t127717248\tMantaINV:267944:0:1:2:0:0\tT\t<INV>\t.\tPASS\tEND=140789466;SVTYPE=INV;SVLEN=13072218;INV5\tPR:SR\t78,0:65,0\t157,42:252,63\";\n\n            var vcfColumns = vcfLine.Split('\\t');\n\n            var samples = vcfColumns.ToSamples(new FormatIndices(), GetSimplePositionUsingAlleleNum(1), null, null);\n\n            Assert.Equal(2, samples.Length);\n            var sample1 = samples[0];\n            Assert.Equal(new[] { 78, 0 }, sample1.PairedEndReadCounts);\n            Assert.Equal(new[] { 65, 0 }, sample1.SplitReadCounts);\n\n            var sample2 = samples[1];\n            Assert.Equal(new[] { 157, 42 }, sample2.PairedEndReadCounts);\n            Assert.Equal(new[] { 252, 63 }, sample2.SplitReadCounts);\n        }\n\n        [Fact]\n        public void EmptySample()\n        {\n            const string vcfLine = \"chr7\t127717248\tMantaINV:267944:0:1:2:0:0\tT\t<INV>\t.\tPASS\tEND=140789466;SVTYPE=INV;SVLEN=13072218;INV5\tPR:SR\t.\";\n\n            var vcfColumns = vcfLine.Split('\\t');\n\n            var samples = vcfColumns.ToSamples(new FormatIndices(), GetSimplePositionUsingAlleleNum(1), null, null);\n\n            Assert.Single(samples);\n            var sample = samples[0];\n            Assert.True(sample.IsEmpty);\n        }\n\n    }\n}"
  },
  {
    "path": "UnitTests/Vcf/Samples/SampleFieldExtractorTests.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing MitoHeteroplasmy;\r\nusing UnitTests.TestUtilities;\r\nusing VariantAnnotation.Interface.Positions;\r\nusing VariantAnnotation.Pools;\r\nusing Variants;\r\nusing Vcf;\r\nusing Vcf.Sample;\r\nusing Xunit;\r\nusing static UnitTests.Vcf.Samples.TestUtilities;\r\n\r\nnamespace UnitTests.Vcf.Samples\r\n{\r\n    public sealed class SampleFieldExtractorTests\r\n    {\r\n        [Fact]\r\n        public void NormalizeNulls()\r\n        {\r\n            const string periwinkle = \"periwinkle\";\r\n            string[] cols = { periwinkle, \"\", \".\", null };\r\n            cols.NormalizeNulls();\r\n\r\n            Assert.Equal(periwinkle, cols[0]);\r\n            Assert.Null(cols[1]);\r\n            Assert.Null(cols[2]);\r\n            Assert.Null(cols[3]);\r\n        }\r\n\r\n        [Fact]\r\n        public void ExtractSample_PEPE()\r\n        {\r\n            var formatIndices = new FormatIndices();\r\n            formatIndices.Set(\"GT:GQ:AD:DP:VF:NL:SB:NC:US:AQ:LQ\");\r\n            var sample = SampleFieldExtractor.ExtractSample(\"0/1:5:338,1:339:0.00295:30:-7.3191:0.0314:0,0,0,1,0,0,17,1,129,21,148,22:3.366:0.000\", formatIndices, GetSimplePositionUsingAlleleNum(1), null, null);\r\n\r\n            Assert.Equal(\"0/1\", sample.Genotype);\r\n            Assert.Equal(5, sample.GenotypeQuality);\r\n            Assert.Equal(new[] { 338, 1 }, sample.AlleleDepths);\r\n            Assert.Equal(339, sample.TotalDepth);\r\n            Assert.Equal(new[] { 0.00295 }, sample.VariantFrequencies);\r\n            Assert.Equal(3.366f, sample.ArtifactAdjustedQualityScore);\r\n            Assert.Equal(0.000f, sample.LikelihoodRatioQualityScore);\r\n        }\r\n\r\n        [Fact]\r\n        public void ExtractSample_DragenSomatic_AsExpected()\r\n        {\r\n            var formatIndices = new FormatIndices();\r\n            formatIndices.Set(\"GT:SQ:AD:AF:F1R2:F2R1:DP:SB:MB:PS\");\r\n            var sample = SampleFieldExtractor.ExtractSample(\"0|1:3.96:33,8:0.195:13,6:20,2:41:17,16,4,4:13,20,4,4:534234\", formatIndices, GetSimplePositionUsingAlleleNum(1), null, null);\r\n\r\n            Assert.Equal(\"0|1\", sample.Genotype);\r\n            Assert.Equal(3.96, sample.SomaticQuality);\r\n            Assert.Equal(new[] { 33, 8 }, sample.AlleleDepths);\r\n            Assert.Equal(41, sample.TotalDepth);\r\n            Assert.Equal(new[] { 8 / 41.0 }, sample.VariantFrequencies);\r\n        }\r\n\r\n        [Fact]\r\n        public void ExtractSample_DragenCNV_AsExpected()\r\n        {\r\n            var formatIndices = new FormatIndices();\r\n            formatIndices.Set(\"GT:CN:MCN\");\r\n            var sample = SampleFieldExtractor.ExtractSample(\"0|1:3:1\", formatIndices, GetSimplePositionUsingAlleleNum(1), null, null);\r\n\r\n            Assert.Equal(\"0|1\", sample.Genotype);\r\n            Assert.Equal(3, sample.CopyNumber);\r\n            Assert.Equal(1, sample.MinorHaplotypeCopyNumber);\r\n        }\r\n\r\n        [Fact]\r\n        public void ExtractSample_Custom_format()\r\n        {\r\n            var formatIndices = new FormatIndices(new HashSet<string>(){\"CUST\"});\r\n            formatIndices.Set(\"GT:CN:MCN:CUST\");\r\n            var sample = SampleFieldExtractor.ExtractSample(\"0|1:3:1:4.5\", formatIndices, GetSimplePositionUsingAlleleNum(1), null, null);\r\n\r\n            Assert.Equal(\"0|1\", sample.Genotype);\r\n            Assert.Equal(3,     sample.CopyNumber);\r\n            Assert.Equal(1,     sample.MinorHaplotypeCopyNumber);\r\n            Assert.NotNull(sample.CustomFields);\r\n            Assert.Contains(\"\\\"CUST\\\":\\\"4.5\\\"\", sample.CustomFields.ToString());\r\n        }\r\n        \r\n        [Fact]\r\n        public void ExtractSample_Custom_format_empty()\r\n        {\r\n            var formatIndices = new FormatIndices(new HashSet<string>(){\"CUST\"});\r\n            formatIndices.Set(\"GT:CN:MCN\");\r\n            var sample = SampleFieldExtractor.ExtractSample(\"0|1:3:1\", formatIndices, GetSimplePositionUsingAlleleNum(1), null, null);\r\n\r\n            Assert.Equal(\"0|1\", sample.Genotype);\r\n            Assert.Equal(3,     sample.CopyNumber);\r\n            Assert.Equal(1,     sample.MinorHaplotypeCopyNumber);\r\n            Assert.NotNull(sample.CustomFields);\r\n            Assert.True(sample.CustomFields.IsEmpty());\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(\"GT:CN:MCN:CNQ:MCNQ:CNF:MCNF:SD:MAF:BC:AS\", \"1/2:2:0:1000:1000:2.03102:0.000203:248.8:0.0001:1493:1137\", 1493)]\r\n        [InlineData(\"GT:CN:MCN:CNQ:MCNQ:CNF:MCNF:SD:MAF:BC:AS\", \"1/2:3:0:1000:1000:3.02612:0.000303:370.7:0.0001:8765:9070\", 8765)]\r\n        public void ExtractSample_DragenCNV_MCN_LOH_BC(string formatField, string sampleField, int binCount)\r\n        {\r\n            var formatIndices = new FormatIndices();\r\n            formatIndices.Set(formatField);\r\n            var sample = SampleFieldExtractor.ExtractSample(sampleField, formatIndices, GetSimplePositionUsingAlleleNum(1), null, null);\r\n\r\n            Assert.True(sample.IsLossOfHeterozygosity);\r\n            Assert.Equal(binCount, sample.BinCount);\r\n        }\r\n\r\n        [Fact]\r\n        public void ExtractSample_ExpansionHunter()\r\n        {\r\n            var formatIndices = new FormatIndices();\r\n            formatIndices.Set(\"GT:SO:REPCN:REPCI:ADSP:ADFL:ADIR:LC\");\r\n            var sample = SampleFieldExtractor.ExtractSample(\"1/1:SPANNING/SPANNING:15/15:15-15/15-15:22/22:23/23:0/0:38.270270\", formatIndices, GetSimplePositionUsingAlleleNum(1), null, null);\r\n\r\n            Assert.Equal(\"1/1\", sample.Genotype);\r\n            Assert.Equal(new[] { 15, 15 }, sample.RepeatUnitCounts);\r\n        }\r\n\r\n        [Fact]\r\n        public void ExtractSample_EmptySampleColumn_ReturnEmptySample()\r\n        {\r\n            var formatIndices = new FormatIndices();\r\n            var sample = SampleFieldExtractor.ExtractSample(null, formatIndices, GetSimplePositionUsingAlleleNum(1), null, null);\r\n            Assert.True(sample.IsEmpty);\r\n        }\r\n\r\n        [Fact]\r\n        public void ExtractSample_DotInSampleColumn_ReturnEmptySample()\r\n        {\r\n            var formatIndices = new FormatIndices();\r\n            var sample = SampleFieldExtractor.ExtractSample(\".\", formatIndices, GetSimplePositionUsingAlleleNum(1), null, null);\r\n            Assert.True(sample.IsEmpty);\r\n        }\r\n\r\n        [Fact]\r\n        public void ToSamples_SMN1_CNV()\r\n        {\r\n            // GT:AD:DST:RPL:LC\r\n            // 0/1:30,20:-:35.8981:45.810811\r\n\r\n            // GT:SM:CN:BC:QS:FT:DN\r\n            // ./1:1.24763:3:4:5:cnvLength:.\r\n            // ./.:1.17879:2:4:8:cnvLength:.\r\n            // ./1:1.26335:3:4:6:cnvLength:Inherited\r\n\r\n            var formatIndices = new FormatIndices();\r\n\r\n            string[] cols = {\r\n                \"chr1\",\r\n                \"125068769\",\r\n                \"DRAGEN:GAIN:125068770-125075279\",\r\n                \"N\",\r\n                \"<DUP>\",\r\n                \".\",\r\n                \"SampleFT\",\r\n                \"SVTYPE=CNV;END=125075279;REFLEN=6510\",\r\n                \"GT:AD:DST:RPL:LC:SM:CN:BC:QS:FT:DN\",\r\n                \"0/1:30,20:-:35.8981:45.810811\",\r\n                \"./1:.:.:.:.:1.24763:3:4:5:cnvLength:.\",\r\n                \"./.:.:.:.:.:1.17879:2:4:8:cnvLength:.\",\r\n                \"./1:.:.:.:.:1.26335:3:4:6:cnvLength:Inherited\"\r\n            };\r\n\r\n            ISample[] samples = cols.ToSamples(formatIndices, GetSimplePositionUsingAlleleNum(1), null, null);\r\n\r\n            Assert.Equal(4, samples.Length);\r\n\r\n            Assert.Equal(\"0/1\", samples[0].Genotype);\r\n            Assert.Equal(new[] { 30, 20 }, samples[0].AlleleDepths);\r\n            Assert.Equal(new[] { \"-\" }, samples[0].DiseaseAffectedStatuses);\r\n\r\n            Assert.Equal(\"./1\", samples[1].Genotype);\r\n            Assert.Equal(3, samples[1].CopyNumber);\r\n            Assert.True(samples[1].FailedFilter);\r\n\r\n            Assert.Equal(\"./.\", samples[2].Genotype);\r\n            Assert.Equal(2, samples[2].CopyNumber);\r\n            Assert.True(samples[2].FailedFilter);\r\n\r\n            Assert.Equal(\"./1\", samples[3].Genotype);\r\n            Assert.Equal(3, samples[3].CopyNumber);\r\n            Assert.True(samples[3].FailedFilter);\r\n        }\r\n\r\n        [Fact]\r\n        public void ToSamples_Custom()\r\n        {\r\n            var formatIndices = new FormatIndices(new HashSet<string>(){\"CF\"});\r\n\r\n            string[] cols = {\r\n                \"chr1\",\r\n                \"125068769\",\r\n                \"DRAGEN:GAIN:125068770-125075279\",\r\n                \"N\",\r\n                \"<DUP>\",\r\n                \".\",\r\n                \"SampleFT\",\r\n                \"SVTYPE=CNV;END=125075279;REFLEN=6510\",\r\n                \"GT:AD:DST:RPL:LC:SM:CN:BC:QS:CF:FT:DN\",\r\n                \"0/1:30,20:-:35.8981:45.810811:.:.:.:.:4.5\",\r\n                \"./1:.:.:.:.:1.24763:3:4:5:1.2:cnvLength:.\",\r\n                \"./.:.:.:.:.:1.17879:2:4:8:2.3:cnvLength:.\",\r\n                \"./1:.:.:.:.:1.26335:3:4:6:3.4:cnvLength:Inherited\"\r\n            };\r\n\r\n            ISample[] samples = cols.ToSamples(formatIndices, GetSimplePositionUsingAlleleNum(1), null, null);\r\n\r\n            Assert.Equal(4, samples.Length);\r\n\r\n            Assert.NotNull(samples[0].CustomFields);\r\n            Assert.Contains(\"\\\"CF\\\":\\\"4.5\\\"\", samples[0].CustomFields.ToString());\r\n            \r\n            Assert.NotNull(samples[1].CustomFields);\r\n            Assert.Contains(\"\\\"CF\\\":\\\"1.2\\\"\", samples[1].CustomFields.ToString());\r\n\r\n            Assert.NotNull(samples[2].CustomFields);\r\n            Assert.Contains(\"\\\"CF\\\":\\\"2.3\\\"\", samples[2].CustomFields.ToString());\r\n\r\n            Assert.NotNull(samples[3].CustomFields);\r\n            Assert.Contains(\"\\\"CF\\\":\\\"3.4\\\"\", samples[3].CustomFields.ToString());\r\n\r\n        }\r\n\r\n        [Fact]\r\n        public void ToSamples_TooFewVcfColumns_ReturnNull()\r\n        {\r\n            var formatIndices = new FormatIndices();\r\n\r\n            string[] cols = {\r\n                \"chr1\",\r\n                \"125068769\",\r\n                \"DRAGEN:GAIN:125068770-125075279\",\r\n                \"N\",\r\n                \"<DUP>\",\r\n                \".\",\r\n                \"SampleFT\",\r\n                \"SVTYPE=CNV;END=125075279;REFLEN=6510\"\r\n            };\r\n\r\n            ISample[] samples = cols.ToSamples(formatIndices, GetSimplePositionUsingAlleleNum(1), null, null);\r\n            Assert.Null(samples);\r\n        }\r\n\r\n        [Fact]\r\n        public void ExtractSample_MitoHeteroplasmy_AsExpected()\r\n        {\r\n            var position = 1;\r\n            var provider = new MitoHeteroplasmyProvider();\r\n            provider.Add(position, \"C\", new[] { 0.123, 0.200, 0.301 }, new[] { 1, 2, 4 });\r\n            provider.Add(position, \"G\", new[] { 0.101, 0.201 }, new[] { 1, 2 });\r\n\r\n            \r\n            var simplePosition = new SimplePosition(ChromosomeUtilities.ChrM, 1, \"A\", new[] { \"C\", \"T\"});\r\n            IVariant[] variants =\r\n            {\r\n                VariantPool.Get(ChromosomeUtilities.ChrM, position, position, \"A\", \"C\", VariantType.SNV,\r\n                    null, false, false, false, null, AnnotationBehavior.SmallVariants, false),\r\n                VariantPool.Get(ChromosomeUtilities.ChrM, position, position, \"A\", \"T\", VariantType.SNV,\r\n                    null, false, false, false, null, AnnotationBehavior.SmallVariants, false)\r\n            };\r\n            \r\n            var formatIndices = new FormatIndices();\r\n            formatIndices.Set(\"GT:SQ:AD:AF:F1R2:F2R1:DP:SB:MB:PS\");\r\n            var sample = SampleFieldExtractor.ExtractSample(\"1|2:3.96:0,15,85:0.195:13,6:20,2:100:17,16,4,4:13,20,4,4:534234\", formatIndices, simplePosition,variants, provider);\r\n\r\n            Assert.Equal(new[] { 15 / 100.0, 85 / 100.0 }, sample.VariantFrequencies);\r\n            Assert.Equal(new[] { \"14.29\", \"null\" }, sample.HeteroplasmyPercentile);\r\n            \r\n            VariantPool.Return((Variant)variants[0]);\r\n            VariantPool.Return((Variant)variants[1]);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/Vcf/Samples/SampleParsingExtensionsTests.cs",
    "content": "﻿using Vcf.Sample;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Vcf.Samples\r\n{\r\n    public sealed class SampleParsingExtensionsTests\r\n    {\r\n        [Fact]\r\n        public void GetString()\r\n        {\r\n            var cols = new[] { \"knatte\", \"fnatte\", \"tjatte\" };\r\n            string observedResult = cols.GetString(2);\r\n            Assert.Equal(cols[2], observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetString_NullIndex_ReturnNull()\r\n        {\r\n            var cols = new[] { \"temp\" };\r\n            string observedResult = cols.GetString(null);\r\n            Assert.Null(observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetFloat()\r\n        {\r\n            var observedResult = \"1.23\".GetFloat();\r\n            Assert.Equal(1.23f, observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetFloat_NotFloat_ReturnNull()\r\n        {\r\n            float? observedResult = \"test\".GetFloat();\r\n            Assert.Null(observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetFloat_NullString_ReturnNull()\r\n        {\r\n            string s = null;\r\n            float? observedResult = s.GetFloat();\r\n            Assert.Null(observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetDouble()\r\n        {\r\n            double? observedResult = \"1.23\".GetDouble();\r\n            Assert.Equal(1.23, observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetDouble_NotDouble_ReturnNull()\r\n        {\r\n            double? observedResult = \"test\".GetDouble();\r\n            Assert.Null(observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetDouble_NullString_ReturnNull()\r\n        {\r\n            string s = null;\r\n            double? observedResult = s.GetDouble();\r\n            Assert.Null(observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetInteger()\r\n        {\r\n            int? observedResult = \"17\".GetInteger();\r\n            Assert.Equal(17, observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetInteger_NotInteger_ReturnNull()\r\n        {\r\n            int? observedResult = \"test\".GetInteger();\r\n            Assert.Null(observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetInteger_NullString_ReturnNull()\r\n        {\r\n            string s = null;\r\n            int? observedResult = s.GetInteger();\r\n            Assert.Null(observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetStrings()\r\n        {\r\n            string[] observedResult = \"17,test,13\".GetStrings();\r\n            Assert.Equal(new[] { \"17\", \"test\", \"13\" }, observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetStrings_NullString_ReturnNull()\r\n        {\r\n            string s = null;\r\n            string[] observedResult = s.GetStrings();\r\n            Assert.Null(observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetIntegers()\r\n        {\r\n            int[] observedResult = \"17,13,11\".GetIntegers();\r\n            Assert.Equal(new[] { 17, 13, 11 }, observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetIntegers_NotInteger_ReturnNull()\r\n        {\r\n            int[] observedResult = \"10,13,bobby\".GetIntegers();\r\n            Assert.Null(observedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetIntegers_NullString_ReturnNull()\r\n        {\r\n            string s = null;\r\n            int[] observedResult = s.GetIntegers();\r\n            Assert.Null(observedResult);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/Vcf/Samples/SampleTests.cs",
    "content": "﻿using Vcf.Sample;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Vcf.Samples\r\n{\r\n    public sealed class SampleTests\r\n    {\r\n        [Fact]\r\n        public void Sample_ReturnEmpty()\r\n        {\r\n            var emptySample = new Sample(null, null, null, null, false, null, null, false, null, null, null, null, null, null,\r\n                null, null, null, null, null, null);\r\n\r\n            Assert.True(emptySample.IsEmpty);\r\n            Assert.True(Sample.EmptySample.IsEmpty);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/Vcf/Samples/TestUtilities.cs",
    "content": "﻿using Moq;\nusing VariantAnnotation.Interface.Positions;\n\nnamespace UnitTests.Vcf.Samples\n{\n    public static class TestUtilities\n    {\n        public static ISimplePosition GetSimplePositionUsingAlleleNum(int numAlleles)\n        {\n            var mock = new Mock<ISimplePosition>();\n            mock.SetupGet(x => x.AltAlleles).Returns(new string[numAlleles]);\n            mock.SetupGet(x => x.Start).Returns(-1);\n\n            return mock.Object;\n        }\n\n    }\n}"
  },
  {
    "path": "UnitTests/Vcf/Samples/VariantFrequencyTests.cs",
    "content": "﻿using Vcf.Sample;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Vcf.Samples\r\n{\r\n    public sealed class VariantFrequencyTests\r\n    {\r\n        [Fact]\r\n        public void GetVariantFrequencies_VF_OneAltAllele()\r\n        {\r\n            double[] expectedResults = { 0.75 };\r\n            double[] observedResults = VariantFrequency.GetVariantFrequencies(0.75, null, 1);\r\n            Evaluate(expectedResults, observedResults);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetVariantFrequencies_VF_MultipleAltAlleles_ReturnNull()\r\n        {\r\n            double[] observedResults = VariantFrequency.GetVariantFrequencies(0.75, null, 2);\r\n            Assert.Null(observedResults);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetVariantFrequencies_OverrideAD_UseVF()\r\n        {\r\n            double[] expectedResults = { 0.75 };\r\n            double[] observedResults = VariantFrequency.GetVariantFrequencies(0.75, new[] { 10, 20 }, 1);\r\n            Evaluate(expectedResults, observedResults);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetVariantFrequencies_AD_WrongAlleleCount_ReturnNull()\r\n        {\r\n            double[] observedResults = VariantFrequency.GetVariantFrequencies(null, new[] { 10, 20 }, 3);\r\n            Assert.Null(observedResults);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetVariantFrequencies_AD()\r\n        {\r\n            double[] expectedResults = { 0.35, 0.4 };\r\n            double[] observedResults = VariantFrequency.GetVariantFrequencies(null, new[] { 5, 7, 8 }, 2);\r\n            Evaluate(expectedResults, observedResults);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetVariantFrequencies_AD_ZeroSumAlleleCount_ReturnZeros()\r\n        {\r\n            double[] expectedResults = { 0.0, 0.0 };\r\n            double[] observedResults = VariantFrequency.GetVariantFrequencies(null, new[] { 0, 0, 0 }, 2);\r\n            Evaluate(expectedResults, observedResults);\r\n        }\r\n\r\n        private static void Evaluate(double[] expectedResults, double[] observedResults)\r\n        {\r\n            if (expectedResults == null || observedResults == null)\r\n            {\r\n                Assert.Equal(expectedResults, observedResults);\r\n                return;\r\n            }\r\n\r\n            Assert.Equal(expectedResults.Length, observedResults.Length);\r\n\r\n            for (int i = 0; i < expectedResults.Length; i++)\r\n            {\r\n                Assert.Equal(expectedResults[i], observedResults[i], 10);\r\n            }\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/Vcf/StringExtensionsTests.cs",
    "content": "﻿using Vcf;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Vcf\r\n{\r\n    public sealed class StringExtensionsTests\r\n    {\r\n\r\n        [Theory]\r\n        [InlineData(\"12\",12)]\r\n        [InlineData(\"12.0\", null)]\r\n        public void GetNullableValue_int(string input, int? exp)\r\n        {\r\n            var observe = input.GetNullableValue<int>(int.TryParse);\r\n            Assert.Equal(exp,observe);\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(\"12\", 12)]\r\n        [InlineData(\"12.0\", 12.0)]\r\n        [InlineData(\"a.8\",null)]\r\n        public void GetNullableValue_double(string input, double? exp)\r\n        {\r\n            var observe = input.GetNullableValue<double>(double.TryParse);\r\n            Assert.Equal(exp, observe);\r\n        }\r\n\r\n\r\n        [Theory]\r\n        [InlineData(\"12\", new[]{12})]\r\n        [InlineData(\"12,13\", new[]{12,13})]\r\n        [InlineData(\"12,13.0\", null)]\r\n        public void SplitToArray_int(string input, int[] exp)\r\n        {\r\n            var observe = input.SplitToArray();\r\n            Assert.Equal(exp, observe);\r\n        }\r\n\r\n\r\n        //[Theory]\r\n        //[InlineData(\"12\", new double[] { 12 })]\r\n        //[InlineData(\"12,13\", new double[] { 12, 13 })]\r\n        //[InlineData(\"12,13.0\", new[] { 12, 13.0})]\r\n        //[InlineData(\"12.a,13.0\", null)]\r\n        //public void SplitToArray_double(string input, double[] exp)\r\n        //{\r\n        //    var observe = input.SplitToArray<double>(',', double.TryParse);\r\n        //    Assert.Equal(exp, observe);\r\n        //}\r\n\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/Vcf/VariantCreator/CnvCreatorTests.cs",
    "content": "﻿using UnitTests.TestUtilities;\r\nusing Variants;\r\nusing Vcf.Info;\r\nusing Vcf.VariantCreator;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Vcf.VariantCreator\r\n{\r\n    public sealed class CnvCreatorTests\r\n    {\r\n        [Fact]\r\n        public void Create_Dragen_3_3_DEL()\r\n        {\r\n            // chr1    907965  DRAGEN:LOSS:907966-909406       N       <DEL>   .       SampleFT        SVTYPE=CNV;END=909406;REFLEN=1441       GT:SM:CN:BC:QS:FT:DN    0/1:0.516574:1:1:24:cnvLength:.     0/1:0.409726:1:1:26:cnvLength:. 0/1:0.496663:1:1:23:cnvLength:Inherited\r\n            var      builder  = new InfoDataBuilder {SvType = \"CNV\", End = 909406};\r\n            InfoData infoData = builder.Create();\r\n\r\n            IVariant observedResults = CnvCreator.Create(ChromosomeUtilities.Chr1, 907965, infoData.End.Value, \"N\", \"<DEL>\", null);\r\n\r\n            Assert.Equal(ChromosomeUtilities.Chr1,     observedResults.Chromosome);\r\n            Assert.Equal(907966,                       observedResults.Start);\r\n            Assert.Equal(909406,                       observedResults.End);\r\n            Assert.Equal(\"N\",                          observedResults.RefAllele);\r\n            Assert.Equal(\"<DEL>\",                      observedResults.AltAllele);\r\n            Assert.Equal(VariantType.copy_number_loss, observedResults.Type);\r\n        }\r\n\r\n        [Fact]\r\n        public void Create_Dragen_3_3_DUP()\r\n        {\r\n            // chr1    1715898 DRAGEN:GAIN:1715899-1750149     N       <DUP>   .       PASS    SVTYPE=CNV;END=1750149;REFLEN=34251     GT:SM:CN:BC:QS:FT:DN    ./.:1.07189:2:6:33:PASS:.   ./1:1.53631:3:6:49:PASS:.       ./.:1.012:2:6:38:PASS:Inherited\r\n            var      builder  = new InfoDataBuilder {SvType = \"CNV\", End = 1750149};\r\n            InfoData infoData = builder.Create();\r\n\r\n            IVariant observedResults = CnvCreator.Create(ChromosomeUtilities.Chr1, 1715898, infoData.End.Value, \"N\", \"<DUP>\", null);\r\n\r\n            Assert.Equal(ChromosomeUtilities.Chr1,     observedResults.Chromosome);\r\n            Assert.Equal(1715899,                      observedResults.Start);\r\n            Assert.Equal(1750149,                      observedResults.End);\r\n            Assert.Equal(\"N\",                          observedResults.RefAllele);\r\n            Assert.Equal(\"<DUP>\",                      observedResults.AltAllele);\r\n            Assert.Equal(VariantType.copy_number_gain, observedResults.Type);\r\n        }\r\n\r\n        [Fact]\r\n        public void Create_Canvas_TotalCopyNumber()\r\n        {\r\n            // 1\t723707\tCanvas:GAIN:1:723708:2581225\tN\t<CNV>\t41\tPASS\tSVTYPE=CNV;END=2581225\tRC:BC:CN:MCC\t.\t129:3123:3:2\r\n            var      builder  = new InfoDataBuilder {SvType = \"CNV\", End = 2581225};\r\n            InfoData infoData = builder.Create();\r\n\r\n            IVariant observedResults = CnvCreator.Create(ChromosomeUtilities.Chr1, 723707, infoData.End.Value, \"N\", \"<CNV>\", null);\r\n\r\n            Assert.Equal(ChromosomeUtilities.Chr1,          observedResults.Chromosome);\r\n            Assert.Equal(723708,                            observedResults.Start);\r\n            Assert.Equal(2581225,                           observedResults.End);\r\n            Assert.Equal(\"N\",                               observedResults.RefAllele);\r\n            Assert.Equal(\"<CNV>\",                           observedResults.AltAllele);\r\n            Assert.Equal(VariantType.copy_number_variation, observedResults.Type);\r\n        }\r\n\r\n        [Fact]\r\n        public void Create_Canvas_AlleleSpecificCopyNumber()\r\n        {\r\n            //chr1    854895  Canvas:COMPLEXCNV:chr1:854896-861879    N       <CN0>,<CN3>     .       PASS    SVTYPE=CNV;END=861879;CNVLEN=6984;CIPOS=-291,291;CIEND=-291,291 GT:RC:BC:CN:MCC:MCCQ:QS:FT:DQ   0/1:59.45:12:1:1:.:25.34:PASS:. 0/1:59.45:12:1:1:.:25.34:PASS:. 1/2:165.40:12:3:3:16.80:16.71:PASS:.\r\n            var      builder  = new InfoDataBuilder {SvType = \"CNV\", End = 861879, CiPos = new[] {-291, 291}, CiEnd = new[] {-291, 291}};\r\n            InfoData infoData = builder.Create();\r\n\r\n            IVariant observedResults = CnvCreator.Create(ChromosomeUtilities.Chr1, 854895, infoData.End.Value, \"N\", \"<CN0>\", null);\r\n\r\n            Assert.Equal(ChromosomeUtilities.Chr1,          observedResults.Chromosome);\r\n            Assert.Equal(854896,                            observedResults.Start);\r\n            Assert.Equal(861879,                            observedResults.End);\r\n            Assert.Equal(\"N\",                               observedResults.RefAllele);\r\n            Assert.Equal(\"<CN0>\",                           observedResults.AltAllele);\r\n            Assert.Equal(VariantType.copy_number_variation, observedResults.Type);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/Vcf/VariantCreator/LegacyVariantIdTests.cs",
    "content": "﻿using System;\r\nusing UnitTests.TestUtilities;\r\nusing VariantAnnotation.Interface;\r\nusing Variants;\r\nusing Vcf.VariantCreator;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Vcf.VariantCreator\r\n{\r\n    public sealed class LegacyVariantIdTests\r\n    {\r\n        private readonly LegacyVariantId _vidCreator = new(ChromosomeUtilities.RefNameToChromosome);\r\n\r\n        [Theory]\r\n        [InlineData(66507, 66507, \"T\",     \"A\",                                 \"1:66507:A\")]\r\n        [InlineData(66522, 66521, \"\",      \"ATATA\",                             \"1:66522:66521:ATATA\")]\r\n        [InlineData(66573, 66574, \"TA\",    \"\",                                  \"1:66573:66574\")]\r\n        [InlineData(66573, 66572, \"\",      \"TACTATATATTA\",                      \"1:66573:66572:TACTATATATTA\")]\r\n        [InlineData(100,   104,   \"TAGGT\", \"ACTTA\",                             \"1:100:104:ACTTA\")]\r\n        [InlineData(100,   104,   \"TAGGT\", \"\",                                  \"1:100:104\")]\r\n        [InlineData(101,   100,   \"\",      \"CGA\",                               \"1:101:100:CGA\")]\r\n        [InlineData(100,   100,   \"T\",     \"A\",                                 \"1:100:A\")]\r\n        [InlineData(100,   104,   \"TAGGT\", \"CGA\",                               \"1:100:104:CGA\")]\r\n        [InlineData(100,   99,    \"\",      \"ACTGACGTACGAAGTTGCCGTACGTACTTGTCC\", \"1:100:99:3bd631d37e62d5db0f6d5d6db3cdcb60\")]\r\n        [InlineData(66366, 66378, \"ATATAATATATAA\",\r\n            \"TATATATATTATTATATAATATAATATATATTATATAATATATTTTATTATATAATATAATATATATTATATAATATAATATATTTTATTATATAAATATATATTATATTATATAATATAATATATATTAATATAAATATATATTAT\",\r\n            \"1:66366:66378:17b72647da13e3c186348467b29b0492\")]\r\n        [InlineData(100, 300, \"\", \"<M>\", \"1:100:*\")]\r\n        public void Create_SmallVariants_ReturnVid(int start, int end, string refAllele, string altAllele, string expectedVid)\r\n        {\r\n            string observedVid = _vidCreator.Create(null, VariantCategory.SmallVariant, null, ChromosomeUtilities.Chr1, start, end, refAllele,\r\n                altAllele, null);\r\n            Assert.Equal(expectedVid, observedVid);\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(66507, 66507, \"T\", \".\", \"1:66507:66507:T\")]\r\n        [InlineData(100,   100,   \"T\", \"T\", \"1:100:100:T\")]\r\n        [InlineData(100,   100,   \"T\", \".\", \"1:100:100:T\")]\r\n        public void Create_Reference_ReturnVid(int start, int end, string refAllele, string altAllele, string expectedVid)\r\n        {\r\n            string observedVid = _vidCreator.Create(null, VariantCategory.Reference, null, ChromosomeUtilities.Chr1, start, end, refAllele, altAllele,\r\n                null);\r\n            Assert.Equal(expectedVid, observedVid);\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(2617277,  \"A\", \"AAAAAAAAAAAAAAAAAATTAGTCAGGCAC[chr3:153444911[\", \"2:2617277:+:3:153444911:+\")]\r\n        [InlineData(32973490, \"T\", \"T]chr9:74198768]\",                               \"2:32973490:+:9:74198768:-\")]\r\n        [InlineData(321681,   \"G\", \"G[13:123460[\",                                   \"2:321681:+:13:123460:+\")]\r\n        [InlineData(32527769, \"C\", \"[HLA-DRB1*13:02:01:3117[C\",                      \"2:32527769:-:HLA-DRB1*13:02:01:3117:+\")]\r\n        public void Create_TranslocationBreakend_ReturnVid(int position, string refAllele, string altAllele, string expectedVid)\r\n        {\r\n            string observedVid = _vidCreator.Create(null, VariantCategory.SV, \"BND\", ChromosomeUtilities.Chr2, position, position, refAllele,\r\n                altAllele, null);\r\n            Assert.Equal(expectedVid, observedVid);\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(1000,    3001000, \"<ROH>\",        null,  \"ROH\",   VariantCategory.ROH,             \"1:1001:3001000:ROH\")]\r\n        [InlineData(1350082, 1351320, \"<DEL>\",        null,  \"DEL\",   VariantCategory.SV,              \"1:1350083:1351320\")]\r\n        [InlineData(999,     2015,    \"<DUP>\",        null,  \"DUP\",   VariantCategory.SV,              \"1:1000:2015:DUP\")]\r\n        [InlineData(1477854, 1477984, \"<DUP:TANDEM>\", null,  \"DUP\",   VariantCategory.SV,              \"1:1477855:1477984:TDUP\")]\r\n        [InlineData(1477968, 1477968, \"<INS>\",        null,  \"INS\",   VariantCategory.SV,              \"1:1477969:1477968:INS\")]\r\n        [InlineData(2000,    5000,    \"<CNV>\",        null,  \"CNV\",   VariantCategory.CNV,             \"1:2001:5000:CNV\")]\r\n        [InlineData(2000,    5000,    \"<CN3>\",        null,  \"CNV\",   VariantCategory.CNV,             \"1:2001:5000:CN3\")]\r\n        [InlineData(2000,    5000,    \"<DUP>\",        null,  \"CNV\",   VariantCategory.CNV,             \"1:2001:5000:CDUP\")]\r\n        [InlineData(2000,    5000,    \"<DEL>\",        null,  \"CNV\",   VariantCategory.CNV,             \"1:2001:5000:CDEL\")]\r\n        [InlineData(2000,    5000,    \"<ALU>\",        null,  \"ALU\",   VariantCategory.SV,              \"1:2001:5000:MEI\")]\r\n        [InlineData(2000,    5000,    \"<LINE1>\",      null,  \"LINE1\", VariantCategory.SV,              \"1:2001:5000:MEI\")]\r\n        [InlineData(2000,    5000,    \"<SVA>\",        null,  \"SVA\",   VariantCategory.SV,              \"1:2001:5000:MEI\")]\r\n        [InlineData(2000,    5000,    \"<BOB>\",        null,  \"BOB\",   VariantCategory.SV,              \"1:2001:5000\")]\r\n        [InlineData(1715898, 1750149, \"<DUP>\",        null,  \"CNV\",   VariantCategory.CNV,             \"1:1715899:1750149:CDUP\")]\r\n        [InlineData(2650426, 2653074, \"<DEL>\",        null,  \"CNV\",   VariantCategory.CNV,             \"1:2650427:2653074:CDEL\")]\r\n        [InlineData(321682,  421681,  \"<INV>\",        null,  \"INV\",   VariantCategory.SV,              \"1:321683:421681:Inverse\")]\r\n        [InlineData(199,     202,     \"<STR5>\",       \"TTG\", \"\",      VariantCategory.RepeatExpansion, \"1:200:202:TTG:5\")]\r\n        public void Create_StructuralVariants_ReturnVid(int start, int end, string altAllele, string repeatUnit, string svType,\r\n            VariantCategory category, string expectedVid)\r\n        {\r\n            string observedVid = _vidCreator.Create(null, category, svType, ChromosomeUtilities.Chr1, start, end, \"\", altAllele, repeatUnit);\r\n            Assert.Equal(expectedVid, observedVid);\r\n        }\r\n\r\n        [Fact]\r\n        public void Create_LOH_ReturnsCnvVid()\r\n        {\r\n            const string    altAllele       = \"<CNV>\";\r\n            const string    svType          = \"LOH\";\r\n            VariantCategory variantCategory = VariantFactory.GetVariantCategory(altAllele, svType);\r\n\r\n            string observedVid = _vidCreator.Create(null, variantCategory, svType, ChromosomeUtilities.Chr1, 787923, 887923, \"N\", altAllele, null);\r\n            Assert.Equal(\"1:787924:887923:CNV\", observedVid);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetSmallVariantVid_UnknownVariantType_ThrowsException()\r\n        {\r\n            Assert.Throws<ArgumentOutOfRangeException>(delegate\r\n            {\r\n                // ReSharper disable once UnusedVariable\r\n                string vid = LegacyVariantId.GetSmallVariantVid(ChromosomeUtilities.Chr1, 100, 200, \"A\", VariantType.complex_structural_alteration);\r\n            });\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/Vcf/VariantCreator/ReferenceVariantCreatorTests.cs",
    "content": "﻿using CacheUtils.TranscriptCache;\r\nusing Genome;\r\nusing UnitTests.TestUtilities;\r\nusing Variants;\r\nusing Vcf.VariantCreator;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Vcf.VariantCreator\r\n{\r\n    public sealed class ReferenceVariantCreatorTests\r\n    {\r\n        private static readonly ISequence Sequence = new NSequence();\r\n        private readonly VariantId _vidCreator = new VariantId();\r\n\r\n        [Fact]\r\n        public void Create_SinglePosition_NoGlobalMajorAllele_ReturnNull()\r\n        {\r\n            IVariant[] variants = ReferenceVariantCreator.Create(_vidCreator, Sequence, ChromosomeUtilities.Chr1, 100, 100, \"A\", \".\", null);\r\n            Assert.Null(variants);\r\n        }\r\n\r\n        [Fact]\r\n        public void Create_SinglePosition_HasGlobalMajorAllele_ReturnVariant()\r\n        {\r\n            var variant = GetVariant(100, 100, \"A\", \".\", \"T\");\r\n            Assert.True(variant.IsRefMinor);\r\n        }\r\n\r\n        [Fact]\r\n        public void Create_MultiplePositions_NoGlobalMajorAllele_ReturnNull()\r\n        {\r\n            IVariant[] variants = ReferenceVariantCreator.Create(_vidCreator, Sequence, ChromosomeUtilities.Chr1, 100, 101, \"A\", \".\", null);\r\n            Assert.Null(variants);\r\n        }\r\n\r\n        [Fact]\r\n        public void Create_MultiplePositions_HasGlobalMajorAllele_ReturnNull()\r\n        {\r\n            IVariant[] variants = ReferenceVariantCreator.Create(_vidCreator, Sequence, ChromosomeUtilities.Chr1, 100, 101, \"A\", \".\", \"T\");\r\n            Assert.Null(variants);\r\n        }\r\n\r\n        private IVariant GetVariant(int start, int end, string refAllele, string altAllele, string globalMajorAllele)\r\n        {\r\n            IVariant[] variants = ReferenceVariantCreator.Create(_vidCreator, Sequence, ChromosomeUtilities.Chr1, start, end, refAllele, altAllele, globalMajorAllele);\r\n            Assert.Single(variants);\r\n            return variants[0];\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/Vcf/VariantCreator/SmallVariantCreatorTests.cs",
    "content": "﻿using UnitTests.TestUtilities;\r\nusing Variants;\r\nusing Vcf.VariantCreator;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Vcf.VariantCreator\r\n{\r\n    public sealed class SmallVariantCreatorTests\r\n    {\r\n        [Fact]\r\n        public void Create_Insertion_ReturnVariant()\r\n        {\r\n            var variant = SmallVariantCreator.Create(ChromosomeUtilities.Chr1, 101, 100, \"\", \"CG\", false, false, null, null, false);\r\n            Assert.False(variant.IsRefMinor);\r\n            Assert.Equal(AnnotationBehavior.SmallVariants, variant.Behavior);\r\n            Assert.Equal(\"1\", variant.Chromosome.EnsemblName);\r\n            Assert.Equal(101, variant.Start);\r\n            Assert.Equal(100, variant.End);\r\n            Assert.Equal(\"\", variant.RefAllele);\r\n            Assert.Equal(\"CG\", variant.AltAllele);\r\n            Assert.Equal(VariantType.insertion, variant.Type);\r\n            Assert.False(variant.IsDecomposed);\r\n            Assert.False(variant.IsRecomposed);\r\n            Assert.Null(variant.LinkedVids);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/Vcf/VariantCreator/VariantFactoryTests.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing CacheUtils.TranscriptCache;\r\nusing ErrorHandling.Exceptions;\r\nusing Genome;\r\nusing Moq;\r\nusing OptimizedCore;\r\nusing UnitTests.TestDataStructures;\r\nusing UnitTests.TestUtilities;\r\nusing VariantAnnotation.Interface.IO;\r\nusing VariantAnnotation.Interface.Positions;\r\nusing VariantAnnotation.Interface.Providers;\r\nusing Variants;\r\nusing Vcf;\r\nusing Vcf.Info;\r\nusing Vcf.VariantCreator;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Vcf.VariantCreator\r\n{\r\n    public sealed class VariantFactoryTests\r\n    {\r\n        private static readonly ISequence Sequence = new NSequence();\r\n\r\n        private readonly ISequence _chr12Seq = new SimpleSequence(\r\n            \"TCCCCATGCTGCTCTTTTTTGCAAACACCAACACAATTTGGGCTCCATTTATAAGGCATCTGCTGCACCAACCCTCTTTCTTGGTGCTTACTGGACCTGCTCAGGGTTAATTTCTAACTCAAAGAACCTAACTTGGAGTAACTCCGTACCACCAGCAAAGCGACTGGCTTTGGGGAATGACATTTACAATGTATCCACTGTTATTTGGTCACCCAGCAAACTGTCATTTTTCAGAAACCAGGGCTGTCTCACAAACTGGCTTTCAATAAGGTGGGTTGCTTAGCAACTGCCAAGGAATTAAGAAGACAGAATAAGGTATCCGCCAGAGATATTTTATGACCAAAATGAGCTGCACTCATGTGTCTGGTTGTGTTCAAGGTAACCAAGTAAGAGATAACACCCGACTATTTTTGCATCATGAGGAAAAATACTTGGCTTCTGCCCAGAAGGGCAATTATCTCAAAGTCTTGGCAGGCCCCATGGTATGAGAAATGGTAACTGATATGGGGGTTAAAAAAAA\",\r\n            106499648);\r\n\r\n        private readonly VariantId         _vidCreator       = new();\r\n        private readonly LegacyVariantId   _legacyVidCreator = new(null);\r\n        private readonly Mock<ISequence>   _sequenceMock     = new();\r\n        private readonly VariantFactory    _variantFactory;\r\n        private readonly ISequenceProvider _sequenceProvider;\r\n\r\n        public VariantFactoryTests()\r\n        {\r\n            // GRCh38\r\n            _sequenceMock.Setup(x => x.Substring(1037629,   1)).Returns(\"G\");\r\n            _sequenceMock.Setup(x => x.Substring(787922,    1)).Returns(\"A\");\r\n            _sequenceMock.Setup(x => x.Substring(110541588, 1)).Returns(\"T\");\r\n            _sequenceMock.Setup(x => x.Substring(100955983, 1)).Returns(\"C\");\r\n            _sequenceMock.Setup(x => x.Substring(11071438,  1)).Returns(\"G\");\r\n            _sequenceMock.Setup(x => x.Substring(934063,    1)).Returns(\"A\");\r\n            _sequenceMock.Setup(x => x.Substring(36690135,  1)).Returns(\"C\");\r\n            _sequenceMock.Setup(x => x.Substring(20093,     1)).Returns(\"T\");\r\n            _sequenceMock.Setup(x => x.Substring(15902,     1)).Returns(\"G\");\r\n\r\n            // GRCh37 (for multi-allelic deletion with left alignment)\r\n            _sequenceMock.Setup(x => x.Substring(106500157, 1)).Returns(\"G\");\r\n            _sequenceMock.Setup(x => x.Substring(106500158, 1)).Returns(\"T\");\r\n            _sequenceMock.Setup(x => x.Substring(106500159, 1)).Returns(\"T\");\r\n            _sequenceMock.Setup(x => x.Substring(106500159, 2)).Returns(\"TA\");\r\n            _sequenceMock.Setup(x => x.Substring(106500159-50, 50)).Returns(\r\n                \"AAAGTCTTGGCAGGCCCCATGGTATGAGAAATGGTAACTGATATGGGGGT\");\r\n            _sequenceMock.Setup(x => x.Substring(23102861, 63)).Returns(\r\n                \"GGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGC\");\r\n            _sequenceMock.Setup(x => x.Substring(23102861 -50, 50)).Returns(\r\n                \"GCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGC\");\r\n            _sequenceMock.Setup(x => x.Substring(23102861 -50, 63)).Returns(\r\n                \"GCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGG\");\r\n            _sequenceMock.Setup(x => x.Substring(23102861 -100, 50)).Returns(\r\n                \"CAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGG\");\r\n            \r\n            _sequenceMock.Setup(x => x.Substring(106499659, 500)).Returns(\r\n                \"CTCTTTTTTGCAAACACCAACACAATTTGGGCTCCATTTATAAGGCATCTGCTGCACCAACCCTCTTTCTTGGTGCTTACTGGACCTGCTCAGGGTTAATTTCTAACTCAAAGAACCTAACTTGGAGTAACTCCGTACCACCAGCAAAGCGACTGGCTTTGGGGAATGACATTTACAATGTATCCACTGTTATTTGGTCACCCAGCAAACTGTCATTTTTCAGAAACCAGGGCTGTCTCACAAACTGGCTTTCAATAAGGTGGGTTGCTTAGCAACTGCCAAGGAATTAAGAAGACAGAATAAGGTATCCGCCAGAGATATTTTATGACCAAAATGAGCTGCACTCATGTGTCTGGTTGTGTTCAAGGTAACCAAGTAAGAGATAACACCCGACTATTTTTGCATCATGAGGAAAAATACTTGGCTTCTGCCCAGAAGGGCAATTATCTCAAAGTCTTGGCAGGCCCCATGGTATGAGAAATGGTAACTGATATGGGGGT\");\r\n\r\n            _sequenceProvider = new SimpleSequenceProvider(GenomeAssembly.GRCh38, _sequenceMock.Object, ChromosomeUtilities.RefNameToChromosome);\r\n            _variantFactory   = new VariantFactory(_sequenceMock.Object, _vidCreator, new HashSet<string>(){\"CF\"});\r\n        }\r\n\r\n        private IPosition ParseVcfLine(string vcfLine)\r\n        {\r\n            string[]    vcfFields  = vcfLine.OptimizedSplit('\\t');\r\n            Chromosome chromosome = ReferenceNameUtilities.GetChromosome(ChromosomeUtilities.RefNameToChromosome, vcfFields[VcfCommon.ChromIndex]);\r\n\r\n            (int start, bool foundError) = vcfFields[VcfCommon.PosIndex].OptimizedParseInt32();\r\n            if (foundError) throw new InvalidDataException($\"Unable to convert the VCF position to an integer: {vcfFields[VcfCommon.PosIndex]}\");\r\n\r\n            var simplePosition = SimplePosition.GetSimplePosition(chromosome, start, vcfFields, new NullVcfFilter());\r\n\r\n            return Position.ToPosition(simplePosition, null, _sequenceProvider, null, _variantFactory);\r\n        }\r\n\r\n        // chr1    69391    .    A    <DEL>    .    .    SVTYPE=DEL;END=138730    .    .\r\n        [Fact]\r\n        public void CreateVariants_svDel()\r\n        {\r\n            var      builder        = new InfoDataBuilder {SvType = \"DEL\", End = 138730};\r\n            InfoData infoData       = builder.Create();\r\n            var      variantFactory = new VariantFactory(Sequence, _vidCreator);\r\n\r\n            IVariant[] variants = variantFactory.CreateVariants(ChromosomeUtilities.Chr1, 69391, 138730, \"A\", new[] {\"<DEL>\"}, infoData,\r\n                new[] {false}, false, null, null);\r\n            Assert.NotNull(variants);\r\n        }\r\n\r\n        // 1\t723707\tCanvas:GAIN:1:723708:2581225\tN\t<CNV>\t41\tPASS\tSVTYPE=CNV;END=2581225\tRC:BC:CN:MCC\t.\t129:3123:3:2\r\n        [Fact]\r\n        public void CreateVariants_canvas_cnv()\r\n        {\r\n            var      builder  = new InfoDataBuilder {SvType = \"CNV\", End = 2581225};\r\n            InfoData infoData = builder.Create();\r\n\r\n            var variantFactory = new VariantFactory(Sequence, _vidCreator);\r\n\r\n            IVariant[] variants = variantFactory.CreateVariants(ChromosomeUtilities.Chr1, 723707, 2581225, \"N\", new[] {\"<CNV>\"}, infoData,\r\n                new[] {false}, false, null, null);\r\n            Assert.NotNull(variants);\r\n\r\n            Assert.Equal(\"1-723707-2581225-N-<CNV>-CNV\",    variants[0].VariantId);\r\n            Assert.Equal(VariantType.copy_number_variation, variants[0].Type);\r\n        }\r\n\r\n        // chr1    854895  Canvas:COMPLEXCNV:chr1:854896-861879    N       <CN0>,<CN3>     .       PASS    SVTYPE=CNV;END=861879;CNVLEN=6984;CIPOS=-291,291;CIEND=-291,291 GT:RC:BC:CN:MCC:MCCQ:QS:FT:DQ   0/1:59.45:12:1:1:.:25.34:PASS:. 0/1:59.45:12:1:1:.:25.34:PASS:. 1/2:165.40:12:3:3:16.80:16.71:PASS:.\r\n        [Fact]\r\n        public void CreateVariants_canvas_cnx()\r\n        {\r\n            var      builder        = new InfoDataBuilder {SvType = \"CNV\", End = 861879, CiPos = new[] {-291, 291}, CiEnd = new[] {-291, 291}};\r\n            InfoData infoData       = builder.Create();\r\n            var      variantFactory = new VariantFactory(Sequence, _vidCreator);\r\n\r\n            IVariant[] variants = variantFactory.CreateVariants(ChromosomeUtilities.Chr1, 854895, 861879, \"N\", new[] {\"<CN0>\", \"<CN3>\"}, infoData,\r\n                new[] {false, false}, false, null, null);\r\n            Assert.NotNull(variants);\r\n            Assert.Equal(2, variants.Length);\r\n\r\n            Assert.Equal(\"1-854895-861879-N-<CN0>-CNV\",     variants[0].VariantId);\r\n            Assert.Equal(VariantType.copy_number_variation, variants[0].Type);\r\n\r\n            Assert.Equal(\"1-854895-861879-N-<CN3>-CNV\",     variants[1].VariantId);\r\n            Assert.Equal(VariantType.copy_number_variation, variants[1].Type);\r\n        }\r\n\r\n        // chr1    1463185 Canvas:COMPLEXCNV:chr1:1463186-1476229  N       <CN0>,<DUP>     .       PASS    SVTYPE=CNV;END=1476229;CNVLEN=13044;CIPOS=-415,415;CIEND=-291,291       GT:RC:BC:CN:MCC:MCCQ:QS:FT:DQ   0/0:109.56:15:2:.:.:20.04:PASS:.        1/1:0.00:15:0:.:.:64.59:PASS:.  ./2:167.45:15:3:.:.:17.87:PASS:.\r\n        [Fact]\r\n        public void CreateVariants_canvas_cnv_dup()\r\n        {\r\n            var      builder        = new InfoDataBuilder {SvType = \"CNV\", End = 1476229, CiPos = new[] {-415, 415}, CiEnd = new[] {-291, 291}};\r\n            InfoData infoData       = builder.Create();\r\n            var      variantFactory = new VariantFactory(Sequence, _vidCreator);\r\n\r\n            IVariant[] variants = variantFactory.CreateVariants(ChromosomeUtilities.Chr1, 1463185, 1476229, \"N\", new[] {\"<CN0>\", \"<DUP>\"}, infoData,\r\n                new[] {false, false}, false, null, null);\r\n            Assert.NotNull(variants);\r\n            Assert.Equal(2, variants.Length);\r\n\r\n            Assert.Equal(\"1-1463185-1476229-N-<CN0>-CNV\",   variants[0].VariantId);\r\n            Assert.Equal(VariantType.copy_number_variation, variants[0].Type);\r\n\r\n            Assert.Equal(\"1-1463185-1476229-N-<DUP>-CNV\", variants[1].VariantId);\r\n            Assert.Equal(VariantType.copy_number_gain,    variants[1].Type); // <DUP>s are copy number gains\r\n        }\r\n\r\n        // chr1    1463185 .  N       <DUP>     .       PASS    SVTYPE=DUP;END=1476229;SVLEN=13044;CIPOS=-415,415;CIEND=-291,291       GT:RC:BC:CN:MCC:MCCQ:QS:FT:DQ   0/0:109.56:15:2:.:.:20.04:PASS:.        1/1:0.00:15:0:.:.:64.59:PASS:.  ./1:167.45:15:3:.:.:17.87:PASS:.\r\n        [Fact]\r\n        public void CreateVariants_dup()\r\n        {\r\n            var      builder        = new InfoDataBuilder {SvType = \"DUP\", End = 1476229, CiPos = new[] {-415, 415}, CiEnd = new[] {-291, 291}};\r\n            InfoData infoData       = builder.Create();\r\n            var      variantFactory = new VariantFactory(Sequence, _vidCreator);\r\n\r\n            IVariant[] variants = variantFactory.CreateVariants(ChromosomeUtilities.Chr1, 1463185, 1476229, \"N\", new[] {\"<DUP>\"}, infoData,\r\n                new[] {false}, false, null, null);\r\n            Assert.NotNull(variants);\r\n            Assert.Single(variants);\r\n\r\n            Assert.Equal(\"1-1463185-1476229-N-<DUP>-DUP\", variants[0].VariantId);\r\n            Assert.Equal(VariantType.duplication,         variants[0].Type);\r\n        }\r\n\r\n        // 1       37820921        MantaDUP:TANDEM:5515:0:1:0:0:0  G       <DUP:TANDEM>    .       MGE10kb END=38404543;SVTYPE=DUP;SVLEN=583622;CIPOS=0,1;CIEND=0,1;HOMLEN=1;HOMSEQ=A;SOMATIC;SOMATICSCORE=63;ColocalizedCanvas    PR:SR   39,0:44,0       202,26:192,32\r\n        [Fact]\r\n        public void CreateVariants_tandem_duplication()\r\n        {\r\n            var      builder = new InfoDataBuilder {SvType = \"DUP\", End = 38404543, SvLength = 583622, CiPos = new[] {0, 1}, CiEnd = new[] {0, 1}};\r\n            InfoData infoData = builder.Create();\r\n            var      variantFactory = new VariantFactory(Sequence, _vidCreator);\r\n\r\n            IVariant[] variants = variantFactory.CreateVariants(ChromosomeUtilities.Chr1, 723707, 2581225, \"N\", new[] {\"<DUP:TANDEM>\"}, infoData,\r\n                new[] {false}, false, null, null);\r\n            Assert.NotNull(variants);\r\n\r\n            Assert.Equal(VariantType.tandem_duplication, variants[0].Type);\r\n        }\r\n\r\n        // 1   4000000 .   N   <ROH> .   ROHLC   SVTYPE=ROH;END=4001000  GT  .   .   1\r\n        [Fact]\r\n        public void CreateVariants_ROH()\r\n        {\r\n            var      builder        = new InfoDataBuilder {SvType = \"ROH\", End = 4001000};\r\n            InfoData infoData       = builder.Create();\r\n            var      variantFactory = new VariantFactory(Sequence, _vidCreator);\r\n\r\n            IVariant[] variants = variantFactory.CreateVariants(ChromosomeUtilities.Chr1, 400_0000, 400_1000, \"N\", new[] {\"<ROH>\"}, infoData,\r\n                new[] {false}, false, null, null);\r\n\r\n            Assert.Equal(AnnotationBehavior.RunsOfHomozygosity, variants[0].Behavior);\r\n            Assert.Equal(VariantType.run_of_homozygosity,       variants[0].Type);\r\n        }\r\n\r\n        // chr12\t106500158\t.\tGTTA\tGTA,GT\t.\t.\t.\r\n        [Fact]\r\n        public void CreateVariants_LegacyVid_DisableLeftAlignment_MultiAllelic_Deletions()\r\n        {\r\n            InfoData infoData       = new InfoDataBuilder().Create();\r\n            var      variantFactory = new VariantFactory(_chr12Seq, _legacyVidCreator);\r\n\r\n            IVariant[] variants = variantFactory.CreateVariants(ChromosomeUtilities.Chr12, 106500158, 106500161, \"GTTA\",\r\n                new[] {\"GTA\", \"GT\"}, infoData, new[] {false, false}, false, null, null);\r\n\r\n            Assert.Equal(2,                        variants.Length);\r\n            Assert.Equal(\"12:106500160:106500160\", variants[0].VariantId);\r\n            Assert.Equal(\"12:106500160:106500161\", variants[1].VariantId);\r\n        }\r\n\r\n        // chr12\t106500158\t.\tGTTA\tGTA,GT\t.\t.\t.\r\n        [Fact]\r\n        public void CreateVariants_NormalVid_EnableLeftAlignment_MultiAllelic_Deletions()\r\n        {\r\n            InfoData infoData       = new InfoDataBuilder().Create();\r\n            var      variantFactory = new VariantFactory(_chr12Seq, _vidCreator);\r\n\r\n            IVariant[] variants = variantFactory.CreateVariants(ChromosomeUtilities.Chr12, 106500158, 106500161, \"GTTA\",\r\n                new[] {\"GTA\", \"GT\"}, infoData, new[] {false, false}, false, null, null);\r\n\r\n            Assert.Equal(2,                    variants.Length);\r\n            Assert.Equal(\"12-106500158-GT-G\",  variants[0].VariantId);\r\n            Assert.Equal(\"12-106500159-TTA-T\", variants[1].VariantId);\r\n        }\r\n\r\n        [Fact]\r\n        public void ToPosition_SNV()\r\n        {\r\n            IPosition  position = ParseVcfLine(\"chr1\t15274\tSNV\tA\tT\t.\t.\t.\t.\t.\");\r\n            IVariant[] variants = position.Variants;\r\n            Assert.NotNull(variants);\r\n\r\n            IVariant variant = variants[0];\r\n            Assert.Equal(\"1-15274-A-T\",   variant.VariantId);\r\n            Assert.Equal(VariantType.SNV, variant.Type);\r\n            Assert.Equal(15274,           variant.Start);\r\n            Assert.Equal(15274,           variant.End);\r\n        }\r\n\r\n        [Fact]\r\n        public void ToPosition_insertion()\r\n        {\r\n            IPosition  position = ParseVcfLine(\"chr1\t15903\tINS\tG\tGC\t.\t.\t.\t.\t.\");\r\n            IVariant[] variants = position.Variants;\r\n            Assert.NotNull(variants);\r\n\r\n            IVariant variant = variants[0];\r\n            Assert.Equal(\"1-15903-G-GC\",        variant.VariantId);\r\n            Assert.Equal(VariantType.insertion, variant.Type);\r\n            Assert.Equal(15904,                 variant.Start);\r\n            Assert.Equal(15903,                 variant.End);\r\n        }\r\n\r\n        [Fact]\r\n        public void ToPosition_deletion()\r\n        {\r\n            IPosition  position = ParseVcfLine(\"chr1\t20094\tDEL\tTAA\tT\t.\t.\t.\t.\t.\");\r\n            IVariant[] variants = position.Variants;\r\n            Assert.NotNull(variants);\r\n\r\n            IVariant variant = variants[0];\r\n            Assert.Equal(\"1-20094-TAA-T\",      variant.VariantId);\r\n            Assert.Equal(VariantType.deletion, variant.Type);\r\n            Assert.Equal(20095,                variant.Start);\r\n            Assert.Equal(20096,                variant.End);\r\n        }\r\n\r\n        [Fact]\r\n        public void ToPosition_CANVAS_LOH()\r\n        {\r\n            IPosition  position = ParseVcfLine(\"chr1\t787923\tCNV_CANVAS_LOH\tN\t<CNV>\t40\t.\tSVTYPE=LOH;END=887923\tRC:BC:CN:MCC\t106.52:12642:2:2\");\r\n            IVariant[] variants = position.Variants;\r\n            Assert.NotNull(variants);\r\n\r\n            IVariant variant = variants[0];\r\n            Assert.Equal(\"1-787923-887923-A-<CNV>-LOH\",     variant.VariantId);\r\n            Assert.Equal(VariantType.copy_number_variation, variant.Type);\r\n            Assert.Equal(787924,                            variant.Start);\r\n            Assert.Equal(887923,                            variant.End);\r\n        }\r\n\r\n        [Fact]\r\n        public void ToPosition_Manta_SmallDeletion()\r\n        {\r\n            IPosition position = ParseVcfLine(\r\n                \"chr1\t934064\tSV_SNV\tAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCTGCTCCGTTACAGGTGGGCGGGGGAGGCTGCTCCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCG\tA\t.\t.\tEND=934904;SVTYPE=DEL\t.\t.\");\r\n            IVariant[] variants = position.Variants;\r\n            Assert.NotNull(variants);\r\n\r\n            IVariant variant = variants[0];\r\n            Assert.Equal(\r\n                \"1-934064-AGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCTGCTCCGTTACAGGTGGGCGGGGGAGGCTGCTCCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCG-A\",\r\n                variant.VariantId);\r\n            Assert.Equal(VariantType.deletion, variant.Type);\r\n            Assert.Equal(934065,               variant.Start);\r\n            Assert.Equal(934904,               variant.End);\r\n        }\r\n\r\n        [Fact]\r\n        public void ToPosition_CANVAS_CNnum()\r\n        {\r\n            IPosition position = ParseVcfLine(\r\n                \"chr1\t1037630\tCNV_CN#\tN\t<CN0>\t.\t.\tSVTYPE=CNV;END=1045024\tGT:RC:BC:CN:MCC:MCCQ:QS:FT:DQ\t0/1:60.76:8:1:.:.:22.51:PASS:.\");\r\n            IVariant[] variants = position.Variants;\r\n            Assert.NotNull(variants);\r\n\r\n            IVariant variant = variants[0];\r\n            Assert.Equal(\"1-1037630-1045024-G-<CN0>-CNV\",   variant.VariantId);\r\n            Assert.Equal(VariantType.copy_number_variation, variant.Type);\r\n            Assert.Equal(1037631,                           variant.Start);\r\n            Assert.Equal(1045024,                           variant.End);\r\n        }\r\n\r\n        [Fact]\r\n        public void ToPosition_SV_DUP()\r\n        {\r\n            IPosition  position = ParseVcfLine(\"chr1\t1477854\tSV_DUP\tC\t<DUP:TANDEM>\t.\t.\tEND=1477984;SVTYPE=DUP\t.\t.\");\r\n            IVariant[] variants = position.Variants;\r\n            Assert.NotNull(variants);\r\n\r\n            IVariant variant = variants[0];\r\n            Assert.Equal(\"1-1477854-1477984-C-<DUP:TANDEM>-DUP\", variant.VariantId);\r\n            Assert.Equal(VariantType.tandem_duplication,         variant.Type);\r\n            Assert.Equal(1477855,                                variant.Start);\r\n            Assert.Equal(1477984,                                variant.End);\r\n        }\r\n\r\n        [Fact]\r\n        public void ToPosition_SV_INS()\r\n        {\r\n            IPosition  position = ParseVcfLine(\"chr1\t1565683\tSV_INS\tG\t<INS>\t.\t.\tEND=1565684;SVTYPE=INS\t.\t.\");\r\n            IVariant[] variants = position.Variants;\r\n            Assert.NotNull(variants);\r\n\r\n            IVariant variant = variants[0];\r\n            Assert.Equal(\"1-1565683-1565684-G-<INS>-INS\", variant.VariantId);\r\n            Assert.Equal(VariantType.insertion,           variant.Type);\r\n            Assert.Equal(1565684,                         variant.Start);\r\n            Assert.Equal(1565684,                         variant.End);\r\n        }\r\n\r\n        [Fact]\r\n        public void ToPosition_SV_INV()\r\n        {\r\n            IPosition  position = ParseVcfLine(\"chr1\t6558910\tSV_INV\tG\t<INV>\t.\t.\tEND=6559723;SVTYPE=INV\t.\t.\");\r\n            IVariant[] variants = position.Variants;\r\n            Assert.NotNull(variants);\r\n\r\n            IVariant variant = variants[0];\r\n            Assert.Equal(\"1-6558910-6559723-G-<INV>-INV\", variant.VariantId);\r\n            Assert.Equal(VariantType.inversion,           variant.Type);\r\n            Assert.Equal(6558911,                         variant.Start);\r\n            Assert.Equal(6559723,                         variant.End);\r\n        }\r\n\r\n        [Fact]\r\n        public void ToPosition_SV_Translocation()\r\n        {\r\n            IPosition  position = ParseVcfLine(\"chr1\t9061384\tSV_BND\tC\tC]chr14:93246833]\t.\t.\tSVTYPE=BND\t.\t.\");\r\n            IVariant[] variants = position.Variants;\r\n            Assert.NotNull(variants);\r\n\r\n            IVariant variant = variants[0];\r\n            Assert.Equal(\"1-9061384-C-C]chr14:93246833]\",    variant.VariantId);\r\n            Assert.Equal(VariantType.translocation_breakend, variant.Type);\r\n            Assert.Equal(9061384,                            variant.Start);\r\n            Assert.Equal(9061384,                            variant.End);\r\n        }\r\n\r\n        [Fact]\r\n        public void ToPosition_DRAGEN_LOH()\r\n        {\r\n            IPosition position = ParseVcfLine(\r\n                \"chr1\t11071439\tCNV_DRAGEN_LOH\tN\t<DEL>,<DUP>\t.\t.\tSVTYPE=CNV;END=12859473;REFLEN=1788034\tGT:CN:MCN:CNQ:MCNQ:CNF:MCNF:SD:MAF:BC:AS\t1/2:2:0:1000:1000:2.03102:0.000203:248.8:0.0001:1493:1137\");\r\n            IVariant[] variants = position.Variants;\r\n            Assert.NotNull(variants);\r\n\r\n            IVariant variant = variants[0];\r\n            Assert.Equal(\"1-11071439-12859473-G-<DEL>-CNV\", variant.VariantId);\r\n            Assert.Equal(VariantType.copy_number_loss,      variant.Type);\r\n            Assert.Equal(11071440,                          variant.Start);\r\n            Assert.Equal(12859473,                          variant.End);\r\n\r\n            variant = variants[1];\r\n            Assert.Equal(\"1-11071439-12859473-G-<DUP>-CNV\", variant.VariantId);\r\n            Assert.Equal(VariantType.copy_number_gain,      variant.Type);\r\n            Assert.Equal(11071440,                          variant.Start);\r\n            Assert.Equal(12859473,                          variant.End);\r\n        }\r\n\r\n        [Fact]\r\n        public void ToPosition_STR()\r\n        {\r\n            IPosition position = ParseVcfLine(\r\n                \"chr3\t63912684\tSTR\tG\t<STR12>\t.\tPASS\tEND=63912714;REF=10;RL=30;RU=GCA;VARID=ATXN7;REPID=ATXN7\tGT:SO:REPCN:REPCI:ADSP:ADFL:ADIR:LC\t0/1:SPANNING/SPANNING:10/12:10-10/12-12:9/3:8/11:0/0:26.270270\");\r\n            IVariant[] variants = position.Variants;\r\n            Assert.NotNull(variants);\r\n\r\n            IVariant variant = variants[0];\r\n            Assert.Equal(\"3-63912684-63912714-G-<STR12>-STR\",       variant.VariantId);\r\n            Assert.Equal(VariantType.short_tandem_repeat_variation, variant.Type);\r\n            Assert.Equal(63912685,                                  variant.Start);\r\n            Assert.Equal(63912714,                                  variant.End);\r\n        }\r\n        \r\n        [Fact]\r\n        public void STR_without_num_throws_user_error()\r\n        {\r\n            var vcfLine =\r\n                \"chr3\t63912684\tSTR\tG\t<STR>\t.\tPASS\tEND=63912714;REF=10;RL=30;RU=GCA;VARID=ATXN7;REPID=ATXN7\tGT:SO:REPCN:REPCI:ADSP:ADFL:ADIR:LC\t0/1:SPANNING/SPANNING:10/12:10-10/12-12:9/3:8/11:0/0:26.270270\"; \r\n            \r\n            Assert.Throws<UserErrorException>(()=>ParseVcfLine(vcfLine));\r\n            \r\n        }\r\n\r\n        [Fact]\r\n        public void ToPosition_indel()\r\n        {\r\n            IPosition  position = ParseVcfLine(\"chr4\t46758265\tINDEL\tGAGGTATAGAG\tGTT\t.\t.\t.\t.\t.\");\r\n            IVariant[] variants = position.Variants;\r\n            Assert.NotNull(variants);\r\n\r\n            IVariant variant = variants[0];\r\n            Assert.Equal(\"4-46758266-AGGTATAGAG-TT\", variant.VariantId);\r\n            Assert.Equal(VariantType.indel,          variant.Type);\r\n            Assert.Equal(46758266,                   variant.Start);\r\n            Assert.Equal(46758275,                   variant.End);\r\n        }\r\n\r\n        [Fact]\r\n        public void ToPosition_MNV()\r\n        {\r\n            IPosition  position = ParseVcfLine(\"chr4\t67754304\tMNV\tTGA\tTTT\t.\t.\t.\t.\t.\");\r\n            IVariant[] variants = position.Variants;\r\n            Assert.NotNull(variants);\r\n\r\n            IVariant variant = variants[0];\r\n            Assert.Equal(\"4-67754305-GA-TT\", variant.VariantId);\r\n            Assert.Equal(VariantType.MNV,    variant.Type);\r\n            Assert.Equal(67754305,           variant.Start);\r\n            Assert.Equal(67754306,           variant.End);\r\n        }\r\n\r\n        [Fact]\r\n        public void ToPosition_CNV_DUP()\r\n        {\r\n            IPosition position = ParseVcfLine(\r\n                \"chr7\t100955984\tCNV_DUP\tN\t<DUP>\t37\tPASS\tSVTYPE=CNV;END=100969873;REFLEN=13889\tGT:SM:CN:BC:PE\t./1:1.6625:3:12:48,81\");\r\n            IVariant[] variants = position.Variants;\r\n            Assert.NotNull(variants);\r\n\r\n            IVariant variant = variants[0];\r\n            Assert.Equal(\"7-100955984-100969873-C-<DUP>-CNV\", variant.VariantId);\r\n            Assert.Equal(VariantType.copy_number_gain,        variant.Type);\r\n            Assert.Equal(100955985,                           variant.Start);\r\n            Assert.Equal(100969873,                           variant.End);\r\n        }\r\n\r\n        [Fact]\r\n        public void ToPosition_CNV_DEL()\r\n        {\r\n            IPosition position = ParseVcfLine(\r\n                \"chr7\t110541589\tCNV_DEL\tN\t<DEL>\t27\tcnvLength\tSVTYPE=CNV;END=110548681;REFLEN=7092\tGT:SM:CN:BC:PE\t0/1:0.443182:1:7:19,17\");\r\n            IVariant[] variants = position.Variants;\r\n            Assert.NotNull(variants);\r\n\r\n            IVariant variant = variants[0];\r\n            Assert.Equal(\"7-110541589-110548681-T-<DEL>-CNV\", variant.VariantId);\r\n            Assert.Equal(VariantType.copy_number_loss,        variant.Type);\r\n            Assert.Equal(110541590,                           variant.Start);\r\n            Assert.Equal(110548681,                           variant.End);\r\n        }\r\n\r\n        [Fact]\r\n        public void ToPosition_CustomSampleInfoFields()\r\n        {\r\n            IPosition position = ParseVcfLine(\r\n                \"chr7\t110541589\tCNV_DEL\tN\t<DEL>\t27\tcnvLength\tSVTYPE=CNV;END=110548681;REFLEN=7092\tGT:SM:CN:BC:PE:CF\t0/1:0.443182:1:7:19,17:0.1,1.2\");\r\n            IVariant[] variants = position.Variants;\r\n            Assert.NotNull(variants);\r\n\r\n            IVariant variant = variants[0];\r\n            Assert.Equal(\"7-110541589-110548681-T-<DEL>-CNV\", variant.VariantId);\r\n            Assert.Equal(VariantType.copy_number_loss,        variant.Type);\r\n            Assert.Equal(110541590,                           variant.Start);\r\n            Assert.Equal(110548681,                           variant.End);\r\n\r\n            Assert.NotNull(position.Samples);\r\n            var sample = position.Samples[0];\r\n            Assert.Contains(\"{\\\"CF\\\":\\\"0.1,1.2\\\"}\", sample.CustomFields.ToString()!);\r\n        }\r\n\r\n        [Fact]\r\n        public void ToPosition_ROH()\r\n        {\r\n            IPosition  position = ParseVcfLine(\"chr22\t36690136\tROH\tN\t<ROH>\t.\t.\tEND=36788158;SVTYPE=ROH\t.\t.\");\r\n            IVariant[] variants = position.Variants;\r\n            Assert.NotNull(variants);\r\n\r\n            IVariant variant = variants[0];\r\n            Assert.Equal(\"22-36690136-36788158-C-<ROH>-ROH\", variant.VariantId);\r\n            Assert.Equal(VariantType.run_of_homozygosity,    variant.Type);\r\n            Assert.Equal(36690137,                           variant.Start);\r\n            Assert.Equal(36788158,                           variant.End);\r\n        }\r\n\r\n        // this is actually on GRCh37\r\n        [Fact]\r\n        public void ToPosition_MultiAllelic_Deletions()\r\n        {\r\n            IPosition  position = ParseVcfLine(\"chr12\t106500158\t.\tGTTA\tGTA,GT\t.\t.\t.\");\r\n            IVariant[] variants = position.Variants;\r\n            Assert.NotNull(variants);\r\n\r\n            IVariant variant = variants[0];\r\n            Assert.Equal(\"12-106500158-GT-G\",  variant.VariantId);\r\n            Assert.Equal(VariantType.deletion, variant.Type);\r\n            Assert.Equal(106500159,            variant.Start);\r\n            Assert.Equal(106500159,            variant.End);\r\n\r\n            variant = variants[1];\r\n            Assert.Equal(\"12-106500159-TTA-T\", variant.VariantId);\r\n            Assert.Equal(VariantType.deletion, variant.Type);\r\n            Assert.Equal(106500160,            variant.Start);\r\n            Assert.Equal(106500161,            variant.End);\r\n        }\r\n        \r\n        [Fact]\r\n        public void ToPosition_Giant_dbsnp155_variant()\r\n        {\r\n            IPosition  position = ParseVcfLine(\"15\\t23102333\\trs1894384199\\tATGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGC\\tATGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGCGGGAGCAGGAGGGGCAGGTGC\\t.\\t.\\tR5;VC=INDEL;GNO;INT;FREQ=GnomAD:.,1,1.068e-05;RS=1894384199;SSR=0;GENEINFO=LOC283683:283683|LOC729900:729900\");\r\n            IVariant[] variants = position.Variants;\r\n            Assert.NotNull(variants);\r\n\r\n            IVariant variant = variants[0];\r\n            //this variant shifts more than what is shown here. Due to mocking limitations, we limit it to 2 iterations of \r\n            // left rotation of 50bp each\r\n            Assert.Equal(VariantType.deletion, variant.Type);\r\n            Assert.Equal(23102762,             variant.Start);\r\n            Assert.Equal(23102824,             variant.End);\r\n\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/Vcf/VariantCreator/VariantFactoryTestsWithLegacyVids.cs",
    "content": "﻿using System.IO;\nusing Genome;\nusing Moq;\nusing OptimizedCore;\nusing UnitTests.TestDataStructures;\nusing UnitTests.TestUtilities;\nusing VariantAnnotation.Interface.IO;\nusing VariantAnnotation.Interface.Positions;\nusing VariantAnnotation.Interface.Providers;\nusing Variants;\nusing Vcf;\nusing Vcf.VariantCreator;\nusing Xunit;\n\nnamespace UnitTests.Vcf.VariantCreator\n{\n    public sealed class VariantFactoryTestsWithLegacyVids\n    {\n        private readonly Mock<ISequence>   _sequenceMock = new();\n        private readonly ISequenceProvider _sequenceProvider;\n        private readonly VariantFactory    _variantFactory;\n\n        public VariantFactoryTestsWithLegacyVids()\n        {\n            _sequenceProvider = new SimpleSequenceProvider(GenomeAssembly.GRCh38, _sequenceMock.Object, ChromosomeUtilities.RefNameToChromosome);\n            var vidCreator = new LegacyVariantId(ChromosomeUtilities.RefNameToChromosome);\n            _variantFactory = new VariantFactory(_sequenceMock.Object, vidCreator);\n        }\n\n        private IPosition ParseVcfLine(string vcfLine)\n        {\n            string[] vcfFields = vcfLine.OptimizedSplit('\\t');\n            Chromosome chromosome =\n                ReferenceNameUtilities.GetChromosome(ChromosomeUtilities.RefNameToChromosome, vcfFields[VcfCommon.ChromIndex]);\n\n            (int start, bool foundError) = vcfFields[VcfCommon.PosIndex].OptimizedParseInt32();\n            if (foundError) throw new InvalidDataException($\"Unable to convert the VCF position to an integer: {vcfFields[VcfCommon.PosIndex]}\");\n\n            var simplePosition = SimplePosition.GetSimplePosition(chromosome, start, vcfFields, new NullVcfFilter());\n\n            return Position.ToPosition(simplePosition, null, _sequenceProvider, null, _variantFactory);\n        }\n\n        [Fact]\n        public void ToPosition_SNV()\n        {\n            IPosition  position = ParseVcfLine(\"chr1\t15274\tSNV\tA\tT\t.\t.\t.\t.\t.\");\n            IVariant[] variants = position.Variants;\n            Assert.NotNull(variants);\n\n            IVariant variant = variants[0];\n            Assert.Equal(\"1:15274:T\",     variant.VariantId);\n            Assert.Equal(VariantType.SNV, variant.Type);\n            Assert.Equal(15274,           variant.Start);\n            Assert.Equal(15274,           variant.End);\n        }\n\n        [Fact]\n        public void ToPosition_insertion()\n        {\n            IPosition  position = ParseVcfLine(\"chr1\t15903\tINS\tG\tGC\t.\t.\t.\t.\t.\");\n            IVariant[] variants = position.Variants;\n            Assert.NotNull(variants);\n\n            IVariant variant = variants[0];\n            Assert.Equal(\"1:15904:15903:C\",     variant.VariantId);\n            Assert.Equal(VariantType.insertion, variant.Type);\n            Assert.Equal(15904,                 variant.Start);\n            Assert.Equal(15903,                 variant.End);\n        }\n\n        [Fact]\n        public void ToPosition_deletion()\n        {\n            IPosition  position = ParseVcfLine(\"chr1\t20094\tDEL\tTAA\tT\t.\t.\t.\t.\t.\");\n            IVariant[] variants = position.Variants;\n            Assert.NotNull(variants);\n\n            IVariant variant = variants[0];\n            Assert.Equal(\"1:20095:20096\",      variant.VariantId);\n            Assert.Equal(VariantType.deletion, variant.Type);\n            Assert.Equal(20095,                variant.Start);\n            Assert.Equal(20096,                variant.End);\n        }\n\n        [Fact]\n        public void ToPosition_CANVAS_LOH()\n        {\n            IPosition  position = ParseVcfLine(\"chr1\t787923\tCNV_CANVAS_LOH\tN\t<CNV>\t40\t.\tSVTYPE=LOH;END=887923\tRC:BC:CN:MCC\t106.52:12642:2:2\");\n            IVariant[] variants = position.Variants;\n            Assert.NotNull(variants);\n\n            IVariant variant = variants[0];\n            Assert.Equal(\"1:787924:887923:CNV\",             variant.VariantId);\n            Assert.Equal(VariantType.copy_number_variation, variant.Type);\n            Assert.Equal(787924,                            variant.Start);\n            Assert.Equal(887923,                            variant.End);\n        }\n\n        [Fact]\n        public void ToPosition_Manta_SmallDeletion()\n        {\n            IPosition position = ParseVcfLine(\n                \"chr1\t934064\tSV_SNV\tAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCTGCTCCGTTACAGGTGGGCGGGGGAGGCTGCTCCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCG\tA\t.\t.\tEND=934904;SVTYPE=DEL\t.\t.\");\n            IVariant[] variants = position.Variants;\n            Assert.NotNull(variants);\n\n            IVariant variant = variants[0];\n            Assert.Equal(\n                \"1:934065:934904\",\n                variant.VariantId);\n            Assert.Equal(VariantType.deletion, variant.Type);\n            Assert.Equal(934065,               variant.Start);\n            Assert.Equal(934904,               variant.End);\n        }\n\n        [Fact]\n        public void ToPosition_CANVAS_CNnum()\n        {\n            IPosition position =\n                ParseVcfLine(\"chr1\t1037630\tCNV_CN#\tN\t<CN0>\t.\t.\tSVTYPE=CNV;END=1045024\tGT:RC:BC:CN:MCC:MCCQ:QS:FT:DQ\t0/1:60.76:8:1:.:.:22.51:PASS:.\");\n            IVariant[] variants = position.Variants;\n            Assert.NotNull(variants);\n\n            IVariant variant = variants[0];\n            Assert.Equal(\"1:1037631:1045024:CN0\",           variant.VariantId);\n            Assert.Equal(VariantType.copy_number_variation, variant.Type);\n            Assert.Equal(1037631,                           variant.Start);\n            Assert.Equal(1045024,                           variant.End);\n        }\n\n        [Fact]\n        public void ToPosition_SV_DUP()\n        {\n            IPosition  position = ParseVcfLine(\"chr1\t1477854\tSV_DUP\tC\t<DUP:TANDEM>\t.\t.\tEND=1477984;SVTYPE=DUP\t.\t.\");\n            IVariant[] variants = position.Variants;\n            Assert.NotNull(variants);\n\n            IVariant variant = variants[0];\n            Assert.Equal(\"1:1477855:1477984:TDUP\",       variant.VariantId);\n            Assert.Equal(VariantType.tandem_duplication, variant.Type);\n            Assert.Equal(1477855,                        variant.Start);\n            Assert.Equal(1477984,                        variant.End);\n        }\n\n        [Fact]\n        public void ToPosition_SV_INS()\n        {\n            IPosition  position = ParseVcfLine(\"chr1\t1565683\tSV_INS\tG\t<INS>\t.\t.\tEND=1565684;SVTYPE=INS\t.\t.\");\n            IVariant[] variants = position.Variants;\n            Assert.NotNull(variants);\n\n            IVariant variant = variants[0];\n            Assert.Equal(\"1:1565684:1565684:INS\", variant.VariantId);\n            Assert.Equal(VariantType.insertion,   variant.Type);\n            Assert.Equal(1565684,                 variant.Start);\n            Assert.Equal(1565684,                 variant.End);\n        }\n\n        [Fact]\n        public void ToPosition_SV_INV()\n        {\n            IPosition  position = ParseVcfLine(\"chr1\t6558910\tSV_INV\tG\t<INV>\t.\t.\tEND=6559723;SVTYPE=INV\t.\t.\");\n            IVariant[] variants = position.Variants;\n            Assert.NotNull(variants);\n\n            IVariant variant = variants[0];\n            Assert.Equal(\"1:6558911:6559723:Inverse\", variant.VariantId);\n            Assert.Equal(VariantType.inversion,       variant.Type);\n            Assert.Equal(6558911,                     variant.Start);\n            Assert.Equal(6559723,                     variant.End);\n        }\n\n        [Fact]\n        public void ToPosition_SV_Translocation()\n        {\n            IPosition  position = ParseVcfLine(\"chr1\t9061384\tSV_BND\tC\tC]chr14:93246833]\t.\t.\tSVTYPE=BND\t.\t.\");\n            IVariant[] variants = position.Variants;\n            Assert.NotNull(variants);\n\n            IVariant variant = variants[0];\n            Assert.Equal(\"1:9061384:+:14:93246833:-\",        variant.VariantId);\n            Assert.Equal(VariantType.translocation_breakend, variant.Type);\n            Assert.Equal(9061384,                            variant.Start);\n            Assert.Equal(9061384,                            variant.End);\n        }\n\n        [Fact]\n        public void ToPosition_DRAGEN_LOH()\n        {\n            IPosition position =\n                ParseVcfLine(\n                    \"chr1\t11071439\tCNV_DRAGEN_LOH\tN\t<DEL>,<DUP>\t.\t.\tSVTYPE=CNV;END=12859473;REFLEN=1788034\tGT:CN:MCN:CNQ:MCNQ:CNF:MCNF:SD:MAF:BC:AS\t1/2:2:0:1000:1000:2.03102:0.000203:248.8:0.0001:1493:1137\");\n            IVariant[] variants = position.Variants;\n            Assert.NotNull(variants);\n\n            IVariant variant = variants[0];\n            Assert.Equal(\"1:11071440:12859473:CDEL\",   variant.VariantId);\n            Assert.Equal(VariantType.copy_number_loss, variant.Type);\n            Assert.Equal(11071440,                     variant.Start);\n            Assert.Equal(12859473,                     variant.End);\n\n            variant = variants[1];\n            Assert.Equal(\"1:11071440:12859473:CDUP\",   variant.VariantId);\n            Assert.Equal(VariantType.copy_number_gain, variant.Type);\n            Assert.Equal(11071440,                     variant.Start);\n            Assert.Equal(12859473,                     variant.End);\n        }\n\n        [Fact]\n        public void ToPosition_STR()\n        {\n            IPosition position =\n                ParseVcfLine(\n                    \"chr3\t63912684\tSTR\tG\t<STR12>\t.\tPASS\tEND=63912714;REF=10;RL=30;RU=GCA;VARID=ATXN7;REPID=ATXN7\tGT:SO:REPCN:REPCI:ADSP:ADFL:ADIR:LC\t0/1:SPANNING/SPANNING:10/12:10-10/12-12:9/3:8/11:0/0:26.270270\");\n            IVariant[] variants = position.Variants;\n            Assert.NotNull(variants);\n\n            IVariant variant = variants[0];\n            Assert.Equal(\"3:63912685:63912714:GCA:12\",              variant.VariantId);\n            Assert.Equal(VariantType.short_tandem_repeat_variation, variant.Type);\n            Assert.Equal(63912685,                                  variant.Start);\n            Assert.Equal(63912714,                                  variant.End);\n        }\n\n        [Fact]\n        public void ToPosition_indel()\n        {\n            IPosition  position = ParseVcfLine(\"chr4\t46758265\tINDEL\tGAGGTATAGAG\tGTT\t.\t.\t.\t.\t.\");\n            IVariant[] variants = position.Variants;\n            Assert.NotNull(variants);\n\n            IVariant variant = variants[0];\n            Assert.Equal(\"4:46758266:46758275:TT\", variant.VariantId);\n            Assert.Equal(VariantType.indel,        variant.Type);\n            Assert.Equal(46758266,                 variant.Start);\n            Assert.Equal(46758275,                 variant.End);\n        }\n\n        [Fact]\n        public void ToPosition_MNV()\n        {\n            IPosition  position = ParseVcfLine(\"chr4\t67754304\tMNV\tTGA\tTTT\t.\t.\t.\t.\t.\");\n            IVariant[] variants = position.Variants;\n            Assert.NotNull(variants);\n\n            IVariant variant = variants[0];\n            Assert.Equal(\"4:67754305:67754306:TT\", variant.VariantId);\n            Assert.Equal(VariantType.MNV,          variant.Type);\n            Assert.Equal(67754305,                 variant.Start);\n            Assert.Equal(67754306,                 variant.End);\n        }\n\n        [Fact]\n        public void ToPosition_CNV_DUP()\n        {\n            IPosition position =\n                ParseVcfLine(\"chr7\t100955984\tCNV_DUP\tN\t<DUP>\t37\tPASS\tSVTYPE=CNV;END=100969873;REFLEN=13889\tGT:SM:CN:BC:PE\t./1:1.6625:3:12:48,81\");\n            IVariant[] variants = position.Variants;\n            Assert.NotNull(variants);\n\n            IVariant variant = variants[0];\n            Assert.Equal(\"7:100955985:100969873:CDUP\", variant.VariantId);\n            Assert.Equal(VariantType.copy_number_gain, variant.Type);\n            Assert.Equal(100955985,                    variant.Start);\n            Assert.Equal(100969873,                    variant.End);\n        }\n\n        [Fact]\n        public void ToPosition_CNV_DEL()\n        {\n            IPosition position =\n                ParseVcfLine(\n                    \"chr7\t110541589\tCNV_DEL\tN\t<DEL>\t27\tcnvLength\tSVTYPE=CNV;END=110548681;REFLEN=7092\tGT:SM:CN:BC:PE\t0/1:0.443182:1:7:19,17\");\n            IVariant[] variants = position.Variants;\n            Assert.NotNull(variants);\n\n            IVariant variant = variants[0];\n            Assert.Equal(\"7:110541590:110548681:CDEL\", variant.VariantId);\n            Assert.Equal(VariantType.copy_number_loss, variant.Type);\n            Assert.Equal(110541590,                    variant.Start);\n            Assert.Equal(110548681,                    variant.End);\n        }\n\n        [Fact]\n        public void ToPosition_ROH()\n        {\n            IPosition  position = ParseVcfLine(\"chr22\t36690136\tROH\tN\t<ROH>\t.\t.\tEND=36788158;SVTYPE=ROH\t.\t.\");\n            IVariant[] variants = position.Variants;\n            Assert.NotNull(variants);\n\n            IVariant variant = variants[0];\n            Assert.Equal(\"22:36690137:36788158:ROH\",      variant.VariantId);\n            Assert.Equal(VariantType.run_of_homozygosity, variant.Type);\n            Assert.Equal(36690137,                        variant.Start);\n            Assert.Equal(36788158,                        variant.End);\n        }\n\n        // this is actually on GRCh37\n        [Fact]\n        public void ToPosition_MultiAllelic_Deletions()\n        {\n            IPosition  position = ParseVcfLine(\"chr12\t106500158\t.\tGTTA\tGTA,GT\t.\t.\t.\");\n            IVariant[] variants = position.Variants;\n            Assert.NotNull(variants);\n\n            IVariant variant = variants[0];\n            Assert.Equal(\"12:106500160:106500160\", variant.VariantId);\n            Assert.Equal(VariantType.deletion,     variant.Type);\n            Assert.Equal(106500160,                variant.Start);\n            Assert.Equal(106500160,                variant.End);\n\n            variant = variants[1];\n            Assert.Equal(\"12:106500160:106500161\", variant.VariantId);\n            Assert.Equal(VariantType.deletion,     variant.Type);\n            Assert.Equal(106500160,                variant.Start);\n            Assert.Equal(106500161,                variant.End);\n        }\n    }\n}"
  },
  {
    "path": "UnitTests/Vcf/VariantCreator/VariantIdTests.cs",
    "content": "﻿using Genome;\r\nusing Moq;\r\nusing UnitTests.TestUtilities;\r\nusing VariantAnnotation.Interface;\r\nusing Vcf.VariantCreator;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Vcf.VariantCreator\r\n{\r\n    public sealed class VariantIdTests\r\n    {\r\n        private readonly ISequence _sequence;\r\n        private readonly VariantId _vidCreator = new VariantId();\r\n\r\n        public VariantIdTests()\r\n        {\r\n            var sequenceMock = new Mock<ISequence>();\r\n            sequenceMock.Setup(x => x.Substring(999, 1)).Returns(\"N\");\r\n            sequenceMock.Setup(x => x.Substring(66520, 1)).Returns(\"T\");\r\n            sequenceMock.Setup(x => x.Substring(66571, 1)).Returns(\"G\");\r\n            sequenceMock.Setup(x => x.Substring(321681, 1)).Returns(\"G\");\r\n            sequenceMock.Setup(x => x.Substring(477967, 1)).Returns(\"A\");\r\n            sequenceMock.Setup(x => x.Substring(1350081, 1)).Returns(\"C\");\r\n            sequenceMock.Setup(x => x.Substring(1477853, 1)).Returns(\"A\");\r\n            sequenceMock.Setup(x => x.Substring(1477967, 1)).Returns(\"A\");\r\n            sequenceMock.Setup(x => x.Substring(1715897, 1)).Returns(\"A\");\r\n            sequenceMock.Setup(x => x.Substring(2633402, 1)).Returns(\"G\");\r\n            sequenceMock.Setup(x => x.Substring(2633403, 1)).Returns(\"G\");\r\n            sequenceMock.Setup(x => x.Substring(2650425, 1)).Returns(\"N\");\r\n\r\n            _sequence = sequenceMock.Object;\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(66507, \"T\", \".\", \"1-66507-T-T\")]\r\n        [InlineData(66507, \"T\", \"A\", \"1-66507-T-A\")]\r\n        [InlineData(66522, \"\", \"ATATA\", \"1-66521-T-TATATA\")]\r\n        [InlineData(66573, \"TA\", \"\", \"1-66572-GTA-G\")]\r\n        [InlineData(66573, \"\", \"TACTATATATTA\", \"1-66572-G-GTACTATATATTA\")]\r\n        public void Create_SmallVariants_ReturnShortVid(int position, string refAllele, string altAllele, string expectedVid)\r\n        {\r\n            string observedVid = _vidCreator.Create(_sequence, VariantCategory.SmallVariant, null, ChromosomeUtilities.Chr1, position, position, refAllele, altAllele,\r\n                null);\r\n            Assert.Equal(expectedVid, observedVid);\r\n        }\r\n\r\n        [Fact]\r\n        public void Create_TranslocationBreakend_ReturnShortVid()\r\n        {\r\n            string observedVid = _vidCreator.Create(_sequence, VariantCategory.SV, \"BND\", ChromosomeUtilities.Chr1, 2617277, 2617277, \"A\",\r\n                \"AAAAAAAAAAAAAAAAAATTAGTCAGGCAC[chr3:153444911[\", null);\r\n            Assert.Equal(\"1-2617277-A-AAAAAAAAAAAAAAAAAATTAGTCAGGCAC[chr3:153444911[\", observedVid);\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(1000, 3001000, \"N\", \"<ROH>\", \"ROH\", VariantCategory.ROH, \"1-1000-3001000-N-<ROH>-ROH\")]\r\n        [InlineData(1350082, 1351320, \"N\", \"<DEL>\", \"DEL\", VariantCategory.SV, \"1-1350082-1351320-C-<DEL>-DEL\")]\r\n        [InlineData(1477854, 1477984, \"N\", \"<DUP:TANDEM>\", \"DUP\", VariantCategory.SV, \"1-1477854-1477984-A-<DUP:TANDEM>-DUP\")]\r\n        [InlineData(1477968, 1477968, \"N\", \"<INS>\", \"INS\", VariantCategory.SV, \"1-1477968-1477968-A-<INS>-INS\")]\r\n        [InlineData(1715898, 1750149, \"N\", \"<DUP>\", \"CNV\", VariantCategory.CNV, \"1-1715898-1750149-A-<DUP>-CNV\")]\r\n        [InlineData(2650426, 2653074, \"N\", \"<DEL>\", \"CNV\", VariantCategory.CNV, \"1-2650426-2653074-N-<DEL>-CNV\")]\r\n        [InlineData(321682, 421681, \"N\", \"<INV>\", \"INV\", VariantCategory.SV, \"1-321682-421681-G-<INV>-INV\")]\r\n        [InlineData(2633403, 2633421, \"N\", \"<STR2>\", \"\", VariantCategory.RepeatExpansion, \"1-2633403-2633421-G-<STR2>-STR\")]\r\n        public void Create_StructuralVariants_RecoverRefAllele_ReturnLongVid(int position, int endPosition,\r\n            string refAllele, string altAllele, string svType, VariantCategory category, string expectedVid)\r\n        {\r\n            string observedVid = _vidCreator.Create(_sequence, category, svType, ChromosomeUtilities.Chr1, position, endPosition, refAllele, altAllele, null);\r\n            Assert.Equal(expectedVid, observedVid);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/Vcf/VcfFilterTests.cs",
    "content": "﻿using System.IO;\r\nusing System.Text;\r\nusing Genome;\r\nusing UnitTests.TestUtilities;\r\nusing Vcf;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Vcf\r\n{\r\n    public sealed class VcfFilterTests\r\n    {\r\n\r\n        [Fact]\r\n        public void FastForward_UcscNamingStyle_ChangeReaderStateCorrectly()\r\n        {\r\n            var annotationRange = new GenomicRange(new GenomicPosition(ChromosomeUtilities.Chr1, 100), new GenomicPosition(ChromosomeUtilities.Chr1, 200) );\r\n\r\n            var vcfFilter = new VcfFilter(annotationRange);\r\n\r\n            const string firstLineInRange = \"chr1\\t100\\t.\\tC\\tT\\t165.00\\tPASS\\tSNVSB=-12.5;SNVHPOL=2\\tGT:GQ:GQX:DP:DPF:AD\\t0/1:119:35:25:0:8,17\";\r\n\r\n            using (var ms = new MemoryStream())\r\n            {\r\n                using (var writer = new StreamWriter(ms, Encoding.UTF8, 1024, true))\r\n                {\r\n                    writer.WriteLine(\"#Header line 1\");\r\n                    writer.WriteLine(\"#Header line 2\");\r\n                    writer.WriteLine(\"#CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\\tFORMAT\\tMother\");\r\n                    writer.WriteLine(\"chr2\\t150\\t.\\tG\\tA\\t5.00\\tLowGQXHetSNP\\tSNVSB=0.0;SNVHPOL=2\\tGT:GQ:GQX:DP:DPF:AD\\t0/1:3:1:1:0:0,1\");\r\n                    writer.WriteLine(\"chr1\\t90\\t.\\tT\\tC\\t1.00\\tLowGQXHetSNP\\tSNVSB=0.0;SNVHPOL=2\\tGT:GQ:GQX:DP:DPF:AD\\t0/1:23:9:3:0:2,1\");\r\n                    writer.WriteLine(\"chr1\\t95\\t.\\tA\\tT\\t2.00\\tLowGQXHetSNP\\tSNVSB=0.0;SNVHPOL=2\\tGT:GQ:GQX:DP:DPF:AD\\t0/1:23:9:3:0:2,1\");\r\n                    writer.WriteLine(firstLineInRange);\r\n                    writer.WriteLine(\"chr1\\t102\\t.\\tC\\tA\\t3.00\\tLowGQXHetSNP\\tSNVSB=0.0;SNVHPOL=5\\tGT:GQ:GQX:DP:DPF:AD\\t0/1:29:2:2:0:1,1\");\r\n\r\n                }\r\n\r\n                ms.Position = 0;\r\n\r\n                using (var reader = new StreamReader(ms))\r\n                {\r\n                    vcfFilter.FastForward(reader);\r\n                    Assert.Equal(firstLineInRange, vcfFilter.BufferedLine);\r\n                }\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void FastForward_EnsemblNamingStyle_ChangeReaderStateCorrectly()\r\n        {\r\n            var annotationRange = new GenomicRange(new GenomicPosition(ChromosomeUtilities.Chr1, 100), new GenomicPosition(ChromosomeUtilities.Chr1, 200));\r\n\r\n            var vcfFilter = new VcfFilter(annotationRange);\r\n\r\n            const string firstLineInRange = \"1\\t100\\t.\\tC\\tT\\t165.00\\tPASS\\tSNVSB=-12.5;SNVHPOL=2\\tGT:GQ:GQX:DP:DPF:AD\\t0/1:119:35:25:0:8,17\";\r\n\r\n            using (var ms = new MemoryStream())\r\n            {\r\n                using (var writer = new StreamWriter(ms, Encoding.UTF8, 1024, true))\r\n                {\r\n                    writer.WriteLine(\"#Header line 1\");\r\n                    writer.WriteLine(\"#Header line 2\");\r\n                    writer.WriteLine(\"#CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\\tFORMAT\\tMother\");\r\n                    writer.WriteLine(\"2\\t150\\t.\\tG\\tA\\t5.00\\tLowGQXHetSNP\\tSNVSB=0.0;SNVHPOL=2\\tGT:GQ:GQX:DP:DPF:AD\\t0/1:3:1:1:0:0,1\");\r\n                    writer.WriteLine(\"1\\t90\\t.\\tT\\tC\\t1.00\\tLowGQXHetSNP\\tSNVSB=0.0;SNVHPOL=2\\tGT:GQ:GQX:DP:DPF:AD\\t0/1:23:9:3:0:2,1\");\r\n                    writer.WriteLine(\"1\\t95\\t.\\tA\\tT\\t2.00\\tLowGQXHetSNP\\tSNVSB=0.0;SNVHPOL=2\\tGT:GQ:GQX:DP:DPF:AD\\t0/1:23:9:3:0:2,1\");\r\n                    writer.WriteLine(firstLineInRange);\r\n                    writer.WriteLine(\"1\\t102\\t.\\tC\\tA\\t3.00\\tLowGQXHetSNP\\tSNVSB=0.0;SNVHPOL=5\\tGT:GQ:GQX:DP:DPF:AD\\t0/1:29:2:2:0:1,1\");\r\n\r\n                }\r\n\r\n                ms.Position = 0;\r\n\r\n                using (var reader = new StreamReader(ms))\r\n                {\r\n                    vcfFilter.FastForward(reader);\r\n                    Assert.Equal(firstLineInRange, vcfFilter.BufferedLine);\r\n                }\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void GetNextLine_NoBufferedLine_ReadNextLine()\r\n        {\r\n            var vcfFilter = new VcfFilter(null);\r\n            using (var ms = new MemoryStream(Encoding.UTF8.GetBytes(\"first line\\nsecond line\\n\")))\r\n            using (var reader = new StreamReader(ms))\r\n            {\r\n                string nextLine = vcfFilter.GetNextLine(reader);\r\n                Assert.Equal(\"first line\", nextLine);\r\n            }\r\n        }\r\n\r\n        [Fact]\r\n        public void GetNextLine_ReturnBufferedLine()\r\n        {\r\n            const string bufferedLine = \"I am buffered\";\r\n            var vcfFilter = new VcfFilter(null) {BufferedLine = bufferedLine};\r\n\r\n            string nextLine = vcfFilter.GetNextLine(null);\r\n            Assert.Equal(bufferedLine, nextLine);\r\n\r\n        }\r\n\r\n        [Fact]\r\n        public void PassedTheEnd_AsExpected()\r\n        {\r\n            var annotationRange = new GenomicRange(new GenomicPosition(ChromosomeUtilities.Chr1, 100), new GenomicPosition(ChromosomeUtilities.Chr1, 200));\r\n            var vcfFilter = new VcfFilter(annotationRange);\r\n\r\n            Assert.False(vcfFilter.PassedTheEnd(ChromosomeUtilities.Chr1, 150));\r\n            Assert.False(vcfFilter.PassedTheEnd(ChromosomeUtilities.Chr1, 200));\r\n            Assert.True(vcfFilter.PassedTheEnd(ChromosomeUtilities.Chr1, 201));\r\n            Assert.True(vcfFilter.PassedTheEnd(ChromosomeUtilities.Chr2, 150));\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "UnitTests/Vcf/VcfInfoParserTests.cs",
    "content": "﻿using VariantAnnotation.Interface.Positions;\r\nusing Vcf.Info;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Vcf\r\n{\r\n    public sealed class VcfInfoParserTests\r\n    {\r\n        [Fact]\r\n        public void Parse_Somatic_Manta()\r\n        {\r\n            IInfoData info =\r\n                VcfInfoParser.Parse(\r\n                    \"END=1660503;SVTYPE=DEL;SVLEN=-65919;IMPRECISE;CIPOS=-285,285;CIEND=-205,205;SOMATIC;SOMATICSCORE=36;ColocalizedCanvas\");\r\n            Assert.Equal(65919,             info.SvLength);\r\n            Assert.Equal(1660503,           info.End);\r\n            Assert.Equal(36,                info.JointSomaticNormalQuality);\r\n            Assert.Equal(new[] {-285, 285}, info.CiPos);\r\n            Assert.Equal(new[] {-205, 205}, info.CiEnd);\r\n            Assert.True(info.IsImprecise);\r\n        }\r\n\r\n        [Fact]\r\n        public void Parse_Somatic_Strelka()\r\n        {\r\n            var info = VcfInfoParser.Parse(\"SOMATIC;QSS=2;TQSS=1;NT=het;QSS_NT=2;TQSS_NT=1;SGT=CG->CG;DP=183;MQ=46.57;MQ0=15;ALTPOS=35;ALTMAP=24;ReadPosRankSum=-1.23;SNVSB=0.00;PNOISE=0.00;PNOISE2=0.00;VQSR=1.23\");\r\n            Assert.Equal(1.23, info.RecalibratedQuality);\r\n            Assert.Equal(2, info.JointSomaticNormalQuality);\r\n        }\r\n\r\n        [Fact]\r\n        public void Parse_GATK()\r\n        {\r\n            var info = VcfInfoParser.Parse(\"AC=2;AF=0.250;AN=8;BaseQRankSum=1.719;DB;DP=106;Dels=0.00;FS=20.202;HaplotypeScore=0.0000;MLEAC=2;MLEAF=0.250;MQ=43.50;MQ0=52;MQRankSum=2.955;QD=4.73;ReadPosRankSum=1.024;SB=-1.368e+02;VQSLOD=-0.3503;culprit=MQ;PLF\");\r\n\r\n            Assert.Equal(-136.8, info.StrandBias);\r\n            Assert.Equal(20.202, info.FisherStrandBias);\r\n            Assert.Equal(43.50, info.MappingQuality);\r\n        }\r\n        \r\n        [Fact]\r\n        public void Parse_Breakend_Event_Id()\r\n        {\r\n            var info = VcfInfoParser.Parse(\"SVTYPE=BND;MATEID=MantaBND:2312:0:1:1:0:0:0;IMPRECISE;CIPOS=-344,344;EVENT=MantaBND:2312:0:1:0:0:0:0;JUNCTION_QUAL=204;BND_DEPTH=38;MATE_BND_DEPTH=46\");\r\n\r\n            Assert.Equal(\"MantaBND:2312:0:1:0:0:0:0\", info.BreakendEventId);\r\n        }\r\n\r\n        [Fact]\r\n        public void EmptyInfoField()\r\n        {\r\n            Assert.Null(VcfInfoParser.Parse(\"\"));\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/Vcf/VcfReaderTests.cs",
    "content": "﻿using System.IO;\r\nusing ErrorHandling.Exceptions;\r\nusing Genome;\r\nusing IO;\r\nusing Moq;\r\nusing UnitTests.SAUtils.InputFileParsers;\r\nusing UnitTests.TestUtilities;\r\nusing VariantAnnotation.Interface.Positions;\r\nusing VariantAnnotation.Interface.Providers;\r\nusing Vcf;\r\nusing Vcf.VariantCreator;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Vcf\r\n{\r\n    public sealed class VcfReaderTests\r\n    {\r\n        private MemoryStream _ms;\r\n        private StreamWriter _streamWriter;\r\n        private readonly VariantId _vidCreator = new VariantId();\r\n\r\n        private void AddLines(string[] lines)\r\n        {\r\n            _ms = new MemoryStream();\r\n            _streamWriter = new StreamWriter(_ms);\r\n            foreach (string headline in lines)\r\n            {\r\n                _streamWriter.WriteLine(headline);\r\n            }\r\n            _streamWriter.Flush();\r\n\r\n            _ms.Position = 0;\r\n        }\r\n\r\n        [Fact]\r\n        public void ValidateVcfHeader_ExceptionThrown_NoFileFormat()\r\n        {\r\n            var headers = new[] { \"##Some comments\", \"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tNHL-16\tNHL-17\" };\r\n            AddLines(headers);\r\n            var seqProvider = ParserTestUtils.GetSequenceProvider(1000, \"A\", 'T', ChromosomeUtilities.RefNameToChromosome);\r\n            var reader = FileUtilities.GetStreamReader(_ms);\r\n            Assert.Throws<UserErrorException>(() => VcfReader.Create(reader, reader, seqProvider, null, new NullVcfFilter(), _vidCreator, null));\r\n        }\r\n\r\n        [Fact]\r\n        public void ValidateVcfHeader_ExceptionThrown_NoChromHeaderLine()\r\n        {\r\n            var headers = new[] { \"##fileformat=VCFv4.1\", \"##FILTER=<ID=PASS,Description=\\\"All filters passed\\\">\", \"##fileDate=20160920\" };\r\n            AddLines(headers);\r\n            var seqProvider = ParserTestUtils.GetSequenceProvider(1000, \"A\", 'T', ChromosomeUtilities.RefNameToChromosome);\r\n            var reader = FileUtilities.GetStreamReader(_ms);\r\n            Assert.Throws<UserErrorException>(() => VcfReader.Create(reader, reader, seqProvider, null, new NullVcfFilter(), _vidCreator, null));\r\n        }\r\n\r\n        [Fact]\r\n        public void Sample_names_are_reported()\r\n        {\r\n            var headers = new[] { \"##fileformat=VCFv4.1\", \"##FILTER=<ID=PASS,Description=\\\"All filters passed\\\">\", \"##fileDate=20160920\", \"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tNHL-16\tNHL-17\" };\r\n            AddLines(headers);\r\n            string[] samples;\r\n            var seqProvider = ParserTestUtils.GetSequenceProvider(1000, \"A\", 'T', ChromosomeUtilities.RefNameToChromosome);\r\n\r\n            using (var reader = FileUtilities.GetStreamReader(_ms))\r\n            using (var vcfReader = VcfReader.Create(reader, reader, seqProvider, null, new NullVcfFilter(), _vidCreator, null))\r\n            {\r\n                samples = vcfReader.GetSampleNames();\r\n            }\r\n\r\n            Assert.Equal(new[] { \"NHL-16\", \"NHL-17\" }, samples);\r\n        }\r\n\r\n        [Fact]\r\n        public void GetChromAndLengthInfo_ReturnEmptyArray_NoProperPrefix()\r\n        {\r\n            Assert.Empty(VcfReader.GetChromAndLengthInfo(\"##fileformat=VCFv\"));\r\n        }\r\n\r\n        [Fact]\r\n        public void GetChromAndLengthInfo_ReturnEmptyArray_NoChromInfo()\r\n        {\r\n            Assert.Empty(VcfReader.GetChromAndLengthInfo(\"##contig=<ID>\"));\r\n        }\r\n\r\n        [Fact]\r\n        public void GetChromAndLengthInfo_ReturnEmptyArray_NoLengthInfo()\r\n        {\r\n            Assert.Empty(VcfReader.GetChromAndLengthInfo(\"##contig=<ID=chr1>\"));\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(\"##contig=<ID=chr1,length=343>\")]\r\n        [InlineData(\"##contig=<ID=X,length=1239495\")]\r\n        public void CheckContigId_IncorrectAutoAndSexChromLength_ThrowException(string contigLine)\r\n        {\r\n            var headers = new[] { \"##fileformat=VCFv4.1\", contigLine, \"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\" };\r\n            AddLines(headers);\r\n            var seqProvider = ParserTestUtils.GetSequenceProvider(1000, \"A\", 'T', ChromosomeUtilities.RefNameToChromosome);\r\n\r\n            using (var reader = FileUtilities.GetStreamReader(_ms))\r\n                Assert.Throws<UserErrorException>(() => VcfReader.Create(reader, reader, seqProvider, null, new NullVcfFilter(), _vidCreator, null));\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(\"##contig=<ID=unknown_contig,length=1232455>\")]\r\n        [InlineData(\"##contig=<ID=random_chrom,length=98772>\")]\r\n        public void CheckContigId_InferredAssemblyIsUnknown_GivenIrregularChrom(string contigLine)\r\n        {\r\n            var headers = new[] { \"##fileformat=VCFv4.1\", contigLine, \"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\" };\r\n            AddLines(headers);\r\n            var seqProvider = ParserTestUtils.GetSequenceProvider(1000, \"A\", 'T', ChromosomeUtilities.RefNameToChromosome);\r\n\r\n            using (var reader = FileUtilities.GetStreamReader(_ms))\r\n            using (var vcfReader = VcfReader.Create(reader, reader, seqProvider, null, new NullVcfFilter(), _vidCreator, null))\r\n            {\r\n                Assert.Equal(GenomeAssembly.Unknown, vcfReader.InferredGenomeAssembly);\r\n            }\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(\"##contig=<ID=chrM,length=16569>\")]\r\n        [InlineData(\"##contig=<ID=MT,length=16569>\")]\r\n        public void CheckContigId_IsRcrsMitochondrionTrue_InferredAssemblyIsUnknown_GivenRcrsChrMLength(string contigLine)\r\n        {\r\n            var headers = new[] { \"##fileformat=VCFv4.1\", contigLine, \"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\" };\r\n            AddLines(headers);\r\n            var seqProvider = ParserTestUtils.GetSequenceProvider(1000, \"A\", 'T', ChromosomeUtilities.RefNameToChromosome);\r\n            using (var reader = FileUtilities.GetStreamReader(_ms))\r\n            using (var vcfReader = VcfReader.Create(reader, reader, seqProvider, null, new NullVcfFilter(), _vidCreator, null))\r\n            {\r\n                Assert.Equal(GenomeAssembly.Unknown, vcfReader.InferredGenomeAssembly);\r\n                Assert.True(vcfReader.IsRcrsMitochondrion);\r\n            }\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(\"##contig=<ID=chrM,length=1234>\")]\r\n        [InlineData(\"##contig=<ID=MT,length=5678>\")]\r\n        public void CheckContigId_IsRcrsMitochondrionFalse_InferredAssemblyIsUnknown_GivenNonRcrsChrMLength(string contigLine)\r\n        {\r\n            var headers = new[] { \"##fileformat=VCFv4.1\", contigLine, \"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\" };\r\n            AddLines(headers);\r\n            var seqProvider = ParserTestUtils.GetSequenceProvider(1000, \"A\", 'T', ChromosomeUtilities.RefNameToChromosome);\r\n\r\n            using (var reader = FileUtilities.GetStreamReader(_ms))\r\n            using (var vcfReader = VcfReader.Create(reader, reader, seqProvider, null, new NullVcfFilter(), _vidCreator, null))\r\n            {\r\n                Assert.Equal(GenomeAssembly.Unknown, vcfReader.InferredGenomeAssembly);\r\n                Assert.False(vcfReader.IsRcrsMitochondrion);\r\n            }\r\n        }\r\n\r\n        [Theory]\r\n        [InlineData(\"##contig=<ID=chr1,length=248956422>\", new[] { \"chr1\", \"248956422\" })]\r\n        [InlineData(\"##contig=<ID=2,length=242193529>\", new[] { \"2\", \"242193529\" })]\r\n        [InlineData(\"##contig=<ID=chrM,length=16569>\", new[] { \"chrM\", \"16569\" })]\r\n        [InlineData(\"##contig=<ID=MT,length=16569>\", new[] { \"MT\", \"16569\" })]\r\n        public void GetChromAndLength_AsExpect(string line, string[] info)\r\n        {\r\n            Assert.Equal(info, VcfReader.GetChromAndLengthInfo(line));\r\n        }\r\n\r\n        [Fact]\r\n        public void GetNextPosition()\r\n        {\r\n            const string vcfLine = \"chr1\t13133\t.\tT\tC\t36.00\tPASS\tSNVSB=0.0;SNVHPOL=4\tGT:GQ:GQX:DP:DPF:AD\t0/1:62:20:7:1:3,4\";\r\n            var lines = new[]\r\n            {\r\n                \"##fileformat=VCFv4.1\", \"##FILTER=<ID=PASS,Description=\\\"All filters passed\\\">\", \"##fileDate=20160920\",\r\n                \"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tNHL-16\", vcfLine\r\n            };\r\n\r\n            AddLines(lines);\r\n\r\n            var refMinorProvider = new Mock<IRefMinorProvider>();\r\n            var seqProvider = ParserTestUtils.GetSequenceProvider(13133, \"T\", 'A', ChromosomeUtilities.RefNameToChromosome);\r\n            IPosition observedResult;\r\n\r\n            using (var reader = FileUtilities.GetStreamReader(_ms))\r\n            using (var vcfReader = VcfReader.Create(reader, reader, seqProvider, refMinorProvider.Object, new NullVcfFilter(), _vidCreator, null))\r\n            {\r\n                observedResult = vcfReader.GetNextPosition();\r\n            }\r\n\r\n            var expectedResult = PositionPool.Get(ChromosomeUtilities.Chr1, 13133, 13133, \"T\", new[] { \"C\" }, 36, new[] { \"PASS\" }, null,\r\n                null, null, vcfLine.Split('\\t'), new[] { false }, false);\r\n\r\n            Assert.NotNull(observedResult);\r\n            Assert.Equal(expectedResult.End, observedResult.End);\r\n            Assert.Equal(expectedResult.AltAlleles, observedResult.AltAlleles);\r\n            Assert.Equal(expectedResult.Filters, observedResult.Filters);\r\n            Assert.Equal(expectedResult.Quality, observedResult.Quality);\r\n            Assert.Equal(expectedResult.VcfFields, observedResult.VcfFields);\r\n            \r\n            PositionPool.Return(expectedResult);\r\n        }\r\n\r\n        [Fact]\r\n        public void CheckSampleConsistency_oneSample()\r\n        {\r\n            const string vcfLine1 = \"chr1\t13133\t.\tT\tC\t36.00\tPASS\tSNVSB=0.0;SNVHPOL=4\tGT:GQ:GQX:DP:DPF:AD\t0/1:62:20:7:1:3,4\";\r\n            const string vcfLine2 = \"chr1\t13133\t.\tT\tA\t36.00\tPASS\tSNVSB=0.0;SNVHPOL=4\tGT:GQ:GQX:DP:DPF:AD\";\r\n            var lines = new[]\r\n            {\r\n                \"##fileformat=VCFv4.1\", \"##FILTER=<ID=PASS,Description=\\\"All filters passed\\\">\", \"##fileDate=20160920\",\r\n                \"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tNHL-16\", vcfLine1, vcfLine2\r\n            };\r\n\r\n            AddLines(lines);\r\n            \r\n            var refMinorProvider = new Mock<IRefMinorProvider>();\r\n            var seqProvider = ParserTestUtils.GetSequenceProvider(13133, \"T\", 'A', ChromosomeUtilities.RefNameToChromosome);\r\n\r\n            using (var reader = FileUtilities.GetStreamReader(_ms))\r\n            using (var vcfReader = VcfReader.Create(reader, reader, seqProvider, refMinorProvider.Object, new NullVcfFilter(), _vidCreator, null))\r\n            {\r\n                //first line is valid. So, no exception\r\n                Assert.NotNull(vcfReader.GetNextPosition()); \r\n                // second line has invalid number of sample fields, so it will throw exception\r\n                Assert.Throws<UserErrorException>(()=>vcfReader.GetNextPosition());\r\n            }\r\n            \r\n        }\r\n\r\n        [Fact]\r\n        public void CheckSampleConsistency_noSample()\r\n        {\r\n            const string vcfLine1 = \"chr1\t13133\t.\tT\tC\t36.00\tPASS\tSNVSB=0.0;SNVHPOL=4\";\r\n            const string vcfLine2 = \"chr1\t13133\t.\tT\tA\t36.00\tPASS\tSNVSB=0.0;SNVHPOL=4\tGT:GQ:GQX:DP:DPF:AD\t0/1:62:20:7:1:3,4\";\r\n            var lines = new[]\r\n            {\r\n                \"##fileformat=VCFv4.1\", \"##FILTER=<ID=PASS,Description=\\\"All filters passed\\\">\", \"##fileDate=20160920\",\r\n                \"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\", vcfLine1, vcfLine2\r\n            };\r\n\r\n            AddLines(lines);\r\n            \r\n            var refMinorProvider = new Mock<IRefMinorProvider>();\r\n            var seqProvider = ParserTestUtils.GetSequenceProvider(13133, \"T\", 'A', ChromosomeUtilities.RefNameToChromosome);\r\n\r\n            using (var reader = FileUtilities.GetStreamReader(_ms))\r\n            using (var vcfReader = VcfReader.Create(reader, reader, seqProvider, refMinorProvider.Object, new NullVcfFilter(), _vidCreator, null))\r\n            {\r\n                //first line is valid. So, no exception\r\n                Assert.NotNull(vcfReader.GetNextPosition());\r\n                // second line has invalid number of sample fields, so it will throw exception\r\n                Assert.Throws<UserErrorException>(() => vcfReader.GetNextPosition());\r\n            }\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "UnitTests/Vcf/VcfReaderUtilsTests.cs",
    "content": "﻿using System.Linq;\r\nusing Moq;\r\nusing UnitTests.SAUtils.InputFileParsers;\r\nusing UnitTests.TestUtilities;\r\nusing VariantAnnotation;\r\nusing VariantAnnotation.Interface.Providers;\r\nusing Vcf.VariantCreator;\r\nusing Xunit;\r\n\r\nnamespace UnitTests.Vcf\r\n{\r\n    public sealed class VcfReaderUtilsTests\r\n    {\r\n        private readonly VariantId _vidCreator = new VariantId();\r\n\r\n#if (NI_ALLELE)\r\n        [Fact]\r\n        public void ParseVcfLine_NonInformativeAlleles_Alone_NotFiltered()\r\n        {\r\n            const string vcfLine1 = \"chr1\t13133\t.\tT\t<*>\t36.00\tPASS\tSNVSB=0.0;SNVHPOL=4\tGT:GQ:GQX:DP:DPF:AD\t0/1:62:20:7:1:3,4\";\r\n            const string vcfLine2 = \"chr1\t13133\t.\tT\t*\t36.00\tPASS\tSNVSB=0.0;SNVHPOL=4\tGT:GQ:GQX:DP:DPF:AD\t0/1:62:20:7:1:3,4\";\r\n            const string vcfLine3 = \"chr1\t13133\t.\tT\t<M>\t36.00\tPASS\tSNVSB=0.0;SNVHPOL=4\tGT:GQ:GQX:DP:DPF:AD\t0/1:62:20:7:1:3,4\";\r\n\r\n            var refMinorProvider    = new Mock<IRefMinorProvider>();\r\n            var seqProvider = ParserTestUtils.GetSequenceProvider(13133, \"T\", 'A', ChromosomeUtilities.RefNameToChromosome);\r\n            var variantFactory      = new VariantFactory(seqProvider);\r\n\r\n            var position1 = AnnotationUtilities.ParseVcfLine(vcfLine1, refMinorProvider.Object, variantFactory, seqProvider.RefNameToChromosome);\r\n            var position2 = AnnotationUtilities.ParseVcfLine(vcfLine2, refMinorProvider.Object, variantFactory, seqProvider.RefNameToChromosome);\r\n            var position3 = AnnotationUtilities.ParseVcfLine(vcfLine3, refMinorProvider.Object, variantFactory, seqProvider.RefNameToChromosome);\r\n\r\n            var annotatedVariants1 = Annotator.GetAnnotatedVariants(position1.Variants);\r\n            var annotatedVariants2 = Annotator.GetAnnotatedVariants(position2.Variants);\r\n            var annotatedVariants3 = Annotator.GetAnnotatedVariants(position3.Variants);\r\n\r\n            // SimplePositions unchanged\r\n            Assert.Equal(\"<*>\", position1.AltAlleles[0]);\r\n            Assert.Equal(\"*\", position2.AltAlleles[0]);\r\n            Assert.Equal(\"<M>\", position3.AltAlleles[0]);\r\n\r\n            // Variants not filtered\r\n            Assert.Equal(\"<*>\", annotatedVariants1[0].Variant.AltAllele);\r\n            Assert.Equal(\"*\", annotatedVariants2[0].Variant.AltAllele);\r\n            Assert.Equal(\"<M>\", annotatedVariants3[0].Variant.AltAllele);\r\n        }\r\n\r\n        [Fact]\r\n        public void ParseVcfLine_NonInformativeAlleles_WithNormalAllele_NotFiltered()\r\n        {\r\n            const string vcfLine1 = \"chr1\t13133\t.\tT\t<*>,G\t36.00\tPASS\tSNVSB=0.0;SNVHPOL=4\tGT:GQ:GQX:DP:DPF:AD\t0/1:62:20:7:1:3,4\";\r\n            const string vcfLine2 = \"chr1\t13133\t.\tT\t*,C\t36.00\tPASS\tSNVSB=0.0;SNVHPOL=4\tGT:GQ:GQX:DP:DPF:AD\t0/1:62:20:7:1:3,4\";\r\n            const string vcfLine3 = \"chr1\t13133\t.\tT\t<M>,A\t36.00\tPASS\tSNVSB=0.0;SNVHPOL=4\tGT:GQ:GQX:DP:DPF:AD\t0/1:62:20:7:1:3,4\";\r\n            const string vcfLine4 = \"chr1\t13133\t.\tT\tA,<NON_REF>\t36.00\tPASS\tSNVSB=0.0;SNVHPOL=4\tGT:GQ:GQX:DP:DPF:AD\t0/1:62:20:7:1:3,4\";\r\n\r\n            var refMinorProvider = new Mock<IRefMinorProvider>();\r\n            var seqProvider = ParserTestUtils.GetSequenceProvider(13133, \"T\", 'A', ChromosomeUtilities.RefNameToChromosome);\r\n            var refNameToChromosome = seqProvider.RefNameToChromosome;\r\n\r\n            var variantFactory = new VariantFactory(seqProvider);\r\n\r\n            var position1 = AnnotationUtilities.ParseVcfLine(vcfLine1, refMinorProvider.Object, variantFactory, refNameToChromosome);\r\n            var position2 = AnnotationUtilities.ParseVcfLine(vcfLine2, refMinorProvider.Object, variantFactory, refNameToChromosome);\r\n            var position3 = AnnotationUtilities.ParseVcfLine(vcfLine3, refMinorProvider.Object, variantFactory, refNameToChromosome);\r\n            var position4 = AnnotationUtilities.ParseVcfLine(vcfLine4, refMinorProvider.Object, variantFactory, refNameToChromosome);\r\n\r\n            var annotatedVariants1 = Annotator.GetAnnotatedVariants(position1.Variants);\r\n            var annotatedVariants2 = Annotator.GetAnnotatedVariants(position2.Variants);\r\n            var annotatedVariants3 = Annotator.GetAnnotatedVariants(position3.Variants);\r\n            var annotatedVariants4 = Annotator.GetAnnotatedVariants(position4.Variants);\r\n\r\n            // SimplePositions\r\n            Assert.Equal(new[] { \"<*>\", \"G\" }, position1.AltAlleles);\r\n            Assert.Equal(new[] { \"*\", \"C\" }, position2.AltAlleles);\r\n            Assert.Equal(new[] { \"<M>\", \"A\" }, position3.AltAlleles);\r\n            Assert.Equal(new[] { \"A\", \"<NON_REF>\" }, position4.AltAlleles);\r\n\r\n            // Variants\r\n            Assert.Equal(new[] { \"<*>\", \"G\" }, annotatedVariants1.Select(x => x.Variant.AltAllele).ToArray());\r\n            Assert.Equal(new[] { \"*\", \"C\" }, annotatedVariants2.Select(x => x.Variant.AltAllele).ToArray());\r\n            Assert.Equal(new[] { \"<M>\", \"A\" }, annotatedVariants3.Select(x => x.Variant.AltAllele).ToArray());\r\n            Assert.Equal(new[] { \"A\", \"<NON_REF>\" }, annotatedVariants4.Select(x => x.Variant.AltAllele).ToArray());\r\n        }\r\n#endif\r\n\r\n        [Fact]\r\n        public void Test_crash_caused_by_variant_trimming ()\r\n        {\r\n            const string vcfLine1 = \"chr1\t8021910\trs373653682\tGGTGCTGGACGGTGTCCCT\tG\t.\t.\t.\";\r\n\r\n            var refMinorProvider    = new Mock<IRefMinorProvider>();\r\n            var seqProvider         = ParserTestUtils.GetSequenceProvider(8021910, \"GGTGCTGGACGGTGTCCCT\", 'A', ChromosomeUtilities.RefNameToChromosome);\r\n\r\n            var variantFactory = new VariantFactory(seqProvider.Sequence, _vidCreator);\r\n\r\n            var position1           = AnnotationUtilities.ParseVcfLine(vcfLine1, refMinorProvider.Object, seqProvider, null, variantFactory);\r\n\r\n            var annotatedVariants1 = Annotator.GetAnnotatedVariants(position1.Variants);\r\n\r\n            // SimplePositions\r\n            Assert.Equal(new[] { \"G\"}, position1.AltAlleles);\r\n\r\n            // Variants\r\n            Assert.Equal(new[] { \"\" }, annotatedVariants1.Select(x => x.Variant.AltAllele).ToArray());\r\n        }\r\n\r\n\r\n        [Fact]\r\n        public void ParseVcfLine_line_with_only_NonRef_is_refMinor()\r\n        {\r\n            const string vcfLine = \"1\t10628385\t.\tC\t<NON_REF>\t.\tLowGQX;HighDPFRatio\tEND=10628385;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:24:9:18\";\r\n\r\n            var refMinorProvider = new Mock<IRefMinorProvider>();\r\n            refMinorProvider.Setup(x => x.GetGlobalMajorAllele(ChromosomeUtilities.Chr1, 10628385)).Returns(\"T\");\r\n            var seqProvider =\r\n                ParserTestUtils.GetSequenceProvider(10628385, \"C\", 'A', ChromosomeUtilities.RefNameToChromosome);\r\n            var variantFactory = new VariantFactory(seqProvider.Sequence, _vidCreator);\r\n            \r\n            var position          = AnnotationUtilities.ParseVcfLine(vcfLine, refMinorProvider.Object, seqProvider, null, variantFactory);\r\n            var annotatedVariants = Annotator.GetAnnotatedVariants(position.Variants);\r\n\r\n            Assert.Equal(\"C\", position.RefAllele);\r\n            Assert.Equal(new[] { \"<NON_REF>\" }, position.AltAlleles);\r\n            Assert.Equal(\"T\", position.Variants[0].RefAllele);\r\n            Assert.Equal(\"C\", position.Variants[0].AltAllele);\r\n\r\n            // Variants\r\n            Assert.Equal(new[] { \"C\" }, annotatedVariants.Select(x => x.Variant.AltAllele).ToArray());\r\n        }\r\n\r\n        [Fact]\r\n        public void ParseVcfLine_line_with_only_NonRef_is_not_refMinor()\r\n        {\r\n            const string vcfLine = \"1\t10005\t.\tC\t<NON_REF>\t.\tLowGQX\tEND=10034;BLOCKAVG_min30p3a\tGT:GQX:DP:DPF\t0/0:3:1:0\";\r\n\r\n            var refMinorProvider = new Mock<IRefMinorProvider>();\r\n            var seqProvider =\r\n                ParserTestUtils.GetSequenceProvider(10005, \"C\", 'A', ChromosomeUtilities.RefNameToChromosome);\r\n            var variantFactory = new VariantFactory(seqProvider.Sequence, _vidCreator);\r\n\r\n            var position          = AnnotationUtilities.ParseVcfLine(vcfLine, refMinorProvider.Object, seqProvider, null, variantFactory);\r\n            var annotatedVariants = Annotator.GetAnnotatedVariants(position.Variants);\r\n\r\n            Assert.Equal(\"C\", position.RefAllele);\r\n            Assert.Equal(new[] { \"<NON_REF>\" }, position.AltAlleles);\r\n            Assert.Null(position.Variants);\r\n            Assert.Null(annotatedVariants);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/Algorithms/Swap.cs",
    "content": "﻿namespace VariantAnnotation.Algorithms\r\n{\r\n    public static class Swap\r\n    {\r\n        /// <summary>\r\n        /// swaps two integers\r\n        /// </summary>\r\n        public static void Int(ref int a, ref int b)\r\n        {\r\n            var temp = a;\r\n            a = b;\r\n            b = temp;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/AnnotatedPositions/AnnotatedPosition.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.Linq;\r\nusing System.Text;\r\nusing OptimizedCore;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.Interface.Positions;\r\nusing VariantAnnotation.Interface.SA;\r\nusing VariantAnnotation.IO;\r\n\r\nnamespace VariantAnnotation.AnnotatedPositions\r\n{\r\n    public sealed class AnnotatedPosition : IAnnotatedPosition\r\n    {\r\n        public IPosition                       Position               { get; private set; }\r\n        public string                          CytogeneticBand        { get; set; }\r\n        public IAnnotatedVariant[]             AnnotatedVariants      { get; private set; }\r\n        public IList<ISupplementaryAnnotation> SupplementaryIntervals { get; } = new List<ISupplementaryAnnotation>();\r\n\r\n        public void Initialize(IPosition position, IAnnotatedVariant[] annotatedVariants)\r\n        {\r\n            Position          = position;\r\n            AnnotatedVariants = annotatedVariants;\r\n            SupplementaryIntervals.Clear();\r\n        }\r\n\r\n   \r\n        public StringBuilder GetJsonStringBuilder()\r\n        {\r\n            if (AnnotatedVariants == null || AnnotatedVariants.Length == 0) return null;\r\n\r\n            var sb = StringBuilderPool.Get();\r\n            var jsonObject = new JsonObject(sb);\r\n\r\n            sb.Append(JsonObject.OpenBrace);\r\n\r\n            string originalChromName = Position.VcfFields[0];\r\n\r\n            jsonObject.AddStringValue(\"chromosome\",  originalChromName);\r\n            jsonObject.AddIntValue(\"position\",       Position.Start);\r\n\r\n            if (Position.HasShortTandemRepeat)\r\n            {\r\n                jsonObject.AddStringValue(\"repeatUnit\",  Position.InfoData?.RepeatUnit);\r\n                jsonObject.AddIntValue(\"refRepeatCount\", Position.InfoData?.RefRepeatCount);\r\n            }\r\n\r\n            if (Position.HasStructuralVariant) jsonObject.AddIntValue(\"svEnd\", Position.InfoData?.End);\r\n\r\n            jsonObject.AddStringValue(\"refAllele\", Position.RefAllele);\r\n            jsonObject.AddStringValues(\"altAlleles\", Position.AltAlleles);\r\n\r\n            jsonObject.AddDoubleValue(\"quality\", Position.Quality);\r\n\r\n            jsonObject.AddStringValues(\"filters\", Position.Filters);\r\n\r\n            jsonObject.AddIntValues(\"ciPos\",   Position.InfoData?.CiPos);\r\n            jsonObject.AddIntValues(\"ciEnd\",   Position.InfoData?.CiEnd);\r\n            jsonObject.AddIntValue(\"svLength\", Position.InfoData?.SvLength);\r\n            jsonObject.AddStringValue(\"breakendEventId\", Position.InfoData?.BreakendEventId);\r\n\r\n            jsonObject.AddDoubleValue(\"strandBias\",             Position.InfoData?.StrandBias,JsonCommon.FrequencyRoundingFormat);\r\n            jsonObject.AddDoubleValue(\"fisherStrandBias\",             Position.InfoData?.FisherStrandBias,\"0.###\");\r\n            jsonObject.AddDoubleValue(\"mappingQuality\",             Position.InfoData?.MappingQuality,\"0.##\");\r\n            jsonObject.AddIntValue(\"jointSomaticNormalQuality\", Position.InfoData?.JointSomaticNormalQuality);\r\n            jsonObject.AddDoubleValue(\"recalibratedQuality\",    Position.InfoData?.RecalibratedQuality);\r\n\r\n            jsonObject.AddStringValue(\"cytogeneticBand\", CytogeneticBand);\r\n            jsonObject.AddDoubleValue(\"logOddsRatio\", Position.InfoData?.LogOddsRatio, \"0.###\");\r\n            \r\n            //adding object of custom vcf info fields\r\n            if (Position.InfoData!=null && !Position.InfoData.CustomKeyValues.IsEmpty())\r\n            {\r\n                jsonObject.AddObjectValue(\"vcfInfo\", Position.InfoData.CustomKeyValues);\r\n            }\r\n\r\n            if (Position.Samples != null && Position.Samples.Length > 0) jsonObject.AddStringValues(\"samples\", Position.Samples.Select(s => s.GetJsonString()), false);\r\n\r\n            if (SupplementaryIntervals != null && SupplementaryIntervals.Any())\r\n            {\r\n                AddSuppIntervalToJsonObject(jsonObject);\r\n            }\r\n\r\n            \r\n\t\t\tvar variantStringBuilders = AnnotatedVariants.Select(v => v.GetJsonStringBuilder(originalChromName)).ToArray();\r\n            jsonObject.AddStringValues(\"variants\", variantStringBuilders , false);\r\n\r\n            foreach (StringBuilder builder in variantStringBuilders)\r\n            {\r\n                StringBuilderPool.Return(builder);\r\n            }\r\n\t\t\tsb.Append(JsonObject.CloseBrace);\r\n            return sb;\r\n        }\r\n        private void AddSuppIntervalToJsonObject(JsonObject jsonObject)\r\n        {\r\n            foreach (var si in SupplementaryIntervals) jsonObject.AddObjectValue(si.JsonKey, si);\r\n        }\r\n\r\n        \r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/AnnotatedPositions/AnnotatedRegulatoryRegion.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.Linq;\r\nusing System.Text;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.IO;\r\n\r\nnamespace VariantAnnotation.AnnotatedPositions\r\n{\r\n    public sealed class AnnotatedRegulatoryRegion : IAnnotatedRegulatoryRegion\r\n    {\r\n        public IRegulatoryRegion RegulatoryRegion { get; }\r\n        public IEnumerable<ConsequenceTag> Consequences { get; }\r\n\r\n        public AnnotatedRegulatoryRegion(IRegulatoryRegion regulatoryRegion, List<ConsequenceTag> consequences)\r\n        {\r\n            RegulatoryRegion = regulatoryRegion;\r\n            Consequences     = consequences;\r\n        }\r\n\r\n        public void SerializeJson(StringBuilder sb)\r\n        {\r\n            var jsonObject = new JsonObject(sb);\r\n\r\n            sb.Append(JsonObject.OpenBrace);\r\n            jsonObject.AddStringValue(\"id\", RegulatoryRegion.Id.WithVersion);\r\n            jsonObject.AddStringValue(\"type\", RegulatoryRegion.Type.ToString());\r\n            jsonObject.AddStringValues(\"consequence\", Consequences?.Select(ConsequenceUtil.GetConsequence));\r\n            sb.Append(JsonObject.CloseBrace);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/AnnotatedPositions/AnnotatedVariant.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.Text;\r\nusing OptimizedCore;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.Interface.SA;\r\nusing VariantAnnotation.IO;\r\nusing Variants;\r\n\r\nnamespace VariantAnnotation.AnnotatedPositions\r\n{\r\n    public sealed class AnnotatedVariant : IAnnotatedVariant\r\n    {\r\n        public IVariant                          Variant                   { get; private set; }\r\n        public string                            HgvsgNotation             { get; set; }\r\n        public IList<IAnnotatedRegulatoryRegion> RegulatoryRegions         { get; } = new List<IAnnotatedRegulatoryRegion>();\r\n        public IList<IAnnotatedTranscript>       Transcripts               { get; } = new List<IAnnotatedTranscript>();\r\n        public IList<ISupplementaryAnnotation>   SaList                    { get; } = new List<ISupplementaryAnnotation>();\r\n        public ISupplementaryAnnotation          RepeatExpansionPhenotypes { get; set; }\r\n        public double?                           PhylopScore               { get; set; }\r\n        public double?                           GerpScore               { get; set; }\r\n        \r\n        public bool InLowComplexityRegion { get; set; }\r\n        \r\n        public void Initialize(IVariant variant)\r\n        {\r\n            Variant       = variant;\r\n            HgvsgNotation = null;\r\n            RegulatoryRegions.Clear();\r\n            Transcripts.Clear();\r\n            SaList.Clear();\r\n            RepeatExpansionPhenotypes = null;\r\n            PhylopScore               = null;\r\n            GerpScore                 = null;\r\n        }\r\n\r\n        \r\n        public StringBuilder GetJsonStringBuilder(string originalChromName)\r\n        {\r\n            var sb         = StringBuilderPool.Get();\r\n            var jsonObject = new JsonObject(sb);\r\n\r\n            // data section\r\n            sb.Append(JsonObject.OpenBrace);\r\n\r\n            jsonObject.AddStringValue(\"vid\", Variant.VariantId);\r\n            jsonObject.AddStringValue(\"chromosome\", originalChromName);\r\n            jsonObject.AddIntValue(\"begin\", Variant.Start);\r\n            jsonObject.AddIntValue(\"end\", Variant.End);\r\n            jsonObject.AddBoolValue(\"isReferenceMinorAllele\", Variant.IsRefMinor);\r\n            jsonObject.AddBoolValue(\"isStructuralVariant\", Variant.IsStructuralVariant);\r\n\r\n            jsonObject.AddStringValue(\"refAllele\",\r\n                string.IsNullOrEmpty(Variant.RefAllele) ? \"-\" : Variant.RefAllele);\r\n            jsonObject.AddStringValue(\"altAllele\",\r\n                string.IsNullOrEmpty(Variant.AltAllele) ? \"-\" : Variant.AltAllele);\r\n\r\n            jsonObject.AddStringValue(\"variantType\", Variant.Type.ToString());\r\n            jsonObject.AddBoolValue(\"isDecomposedVariant\", Variant.IsDecomposed);\r\n            jsonObject.AddBoolValue(\"isRecomposedVariant\", Variant.IsRecomposed);\r\n            jsonObject.AddStringValues(\"linkedVids\", Variant.LinkedVids);\r\n            jsonObject.AddStringValue(\"hgvsg\", HgvsgNotation);\r\n\r\n            jsonObject.AddDoubleValue(\"phylopScore\", PhylopScore);\r\n            jsonObject.AddDoubleValue(\"gerpScore\", GerpScore);\r\n            jsonObject.AddBoolValue(\"inLowComplexityRegion\", InLowComplexityRegion);\r\n\r\n            if (RegulatoryRegions?.Count > 0) jsonObject.AddObjectValues(\"regulatoryRegions\", RegulatoryRegions);\r\n\r\n            foreach (ISupplementaryAnnotation saItem in SaList)\r\n            {\r\n                jsonObject.AddObjectValue(saItem.JsonKey, saItem);\r\n            }\r\n\r\n            jsonObject.AddObjectValue(RepeatExpansionPhenotypes?.JsonKey, RepeatExpansionPhenotypes);\r\n\r\n            if (Transcripts?.Count > 0) jsonObject.AddObjectValues(\"transcripts\", Transcripts);\r\n\r\n            sb.Append(JsonObject.CloseBrace);\r\n            return sb;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/AnnotatedPositions/Consequence/Consequences.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing Variants;\r\n\r\nnamespace VariantAnnotation.AnnotatedPositions.Consequence\r\n{\r\n    public sealed class Consequences\r\n    {\r\n        private readonly VariantType _variantType;\r\n        private readonly List<ConsequenceTag> _consequences;\r\n        public List<ConsequenceTag> GetConsequences() => _consequences;\r\n\r\n        private readonly IVariantEffect _variantEffect;\r\n        private readonly IFeatureVariantEffects _featureEffect;\r\n\r\n        private readonly (Func<bool>, ConsequenceTag)[] _tier3Consequences;\r\n\r\n        private static readonly HashSet<ConsequenceTag> ConsequencesThatNeedTranscriptVariant = new HashSet<ConsequenceTag>\r\n        {\r\n            ConsequenceTag.feature_elongation, \r\n            ConsequenceTag.feature_truncation,\r\n            ConsequenceTag.short_tandem_repeat_change, \r\n            ConsequenceTag.short_tandem_repeat_contraction,\r\n            ConsequenceTag.short_tandem_repeat_expansion, \r\n            ConsequenceTag.transcript_ablation\r\n        };\r\n\r\n        public Consequences(VariantType variantType, IVariantEffect variantEffect, IFeatureVariantEffects featureEffect)\r\n        {\r\n            _variantType   = variantType;\r\n            _consequences  = new List<ConsequenceTag>();\r\n            _variantEffect = variantEffect;\r\n            _featureEffect = featureEffect;\r\n\r\n            _tier3Consequences = new List<(Func<bool>, ConsequenceTag)>\r\n            {\r\n                (() => _variantEffect.IsSpliceDonorVariant(),                     ConsequenceTag.splice_donor_variant),\r\n                (() => _variantEffect.IsSpliceAcceptorVariant(),                  ConsequenceTag.splice_acceptor_variant),\r\n                (() => _variantEffect.IsStopGained(),                             ConsequenceTag.stop_gained),\r\n                (() => _variantEffect.IsFrameshiftVariant(),                      ConsequenceTag.frameshift_variant),\r\n                (() => _variantEffect.IsStopLost(),                               ConsequenceTag.stop_lost),\r\n                (() => _variantEffect.IsStartLost(),                              ConsequenceTag.start_lost),\r\n                (() => _variantEffect.IsInframeInsertion(),                       ConsequenceTag.inframe_insertion),\r\n                (() => _variantEffect.IsInframeDeletion(),                        ConsequenceTag.inframe_deletion),\r\n                (() => _variantEffect.IsMissenseVariant(),                        ConsequenceTag.missense_variant),\r\n                (() => _variantEffect.IsProteinAlteringVariant(),                 ConsequenceTag.protein_altering_variant),\r\n                (() => _variantEffect.IsSpliceRegionVariant(),                    ConsequenceTag.splice_region_variant),\r\n                (() => _variantEffect.IsIncompleteTerminalCodonVariant(),         ConsequenceTag.incomplete_terminal_codon_variant),\r\n                (() => _variantEffect.IsStartRetained(),                          ConsequenceTag.start_retained_variant),\r\n                (() => _variantEffect.IsStopRetained(),                           ConsequenceTag.stop_retained_variant),\r\n                (() => _variantEffect.IsSynonymousVariant(),                      ConsequenceTag.synonymous_variant),\r\n                (() => _variantEffect.IsCodingSequenceVariant(),                  ConsequenceTag.coding_sequence_variant),\r\n                (() => _variantEffect.IsFivePrimeUtrVariant(),                    ConsequenceTag.five_prime_UTR_variant),\r\n                (() => _variantEffect.IsThreePrimeUtrVariant(),                   ConsequenceTag.three_prime_UTR_variant),\r\n                (() => _variantEffect.IsNonCodingTranscriptExonVariant(),         ConsequenceTag.non_coding_transcript_exon_variant),\r\n                (() => _variantEffect.IsWithinIntron(),                           ConsequenceTag.intron_variant),\r\n                (() => _variantEffect.IsNonsenseMediatedDecayTranscriptVariant(), ConsequenceTag.NMD_transcript_variant),\r\n                (() => _variantEffect.IsNonCodingTranscriptVariant(),             ConsequenceTag.non_coding_transcript_variant),\r\n                (() => _featureEffect.Elongation(),                               ConsequenceTag.feature_elongation),\r\n                (() => _featureEffect.Truncation(),                               ConsequenceTag.feature_truncation)\r\n            }.ToArray();\r\n        }\r\n\r\n        public static List<ConsequenceTag> DetermineFlankingVariantEffects(bool isDownstreamVariant) => new List<ConsequenceTag>(1)\r\n            {isDownstreamVariant ? ConsequenceTag.downstream_gene_variant : ConsequenceTag.upstream_gene_variant};\r\n\r\n        public void DetermineSmallVariantEffects()\r\n        {\r\n            GetTier1Types();\r\n            if (_consequences.Count == 0) GetTier2Types();\r\n            if (_consequences.Count == 0) GetTier3Types();\r\n\r\n            if (NeedsTranscriptVariant(_variantType, _consequences)) _consequences.Add(ConsequenceTag.transcript_variant);\r\n        }\r\n\r\n        internal static bool NeedsTranscriptVariant(VariantType variantType, List<ConsequenceTag> consequences) => consequences.Count == 0\r\n            ? NeedsTranscriptVariantByVariantType(variantType)\r\n            : NeedsTranscriptVariantByConsequences(consequences);\r\n\r\n        private static bool NeedsTranscriptVariantByConsequences(List<ConsequenceTag> consequences)\r\n        {\r\n            foreach (ConsequenceTag consequence in consequences)\r\n            {\r\n                if (!ConsequencesThatNeedTranscriptVariant.Contains(consequence)) return false;\r\n            }\r\n\r\n            return true;\r\n        }\r\n\r\n        private static bool NeedsTranscriptVariantByVariantType(VariantType variantType)\r\n        {\r\n            switch (variantType)\r\n            {\r\n                case VariantType.duplication:\r\n                case VariantType.tandem_duplication:\r\n                case VariantType.copy_number_variation:\r\n                case VariantType.copy_number_loss:\r\n                case VariantType.copy_number_gain:\r\n                case VariantType.run_of_homozygosity:\r\n                    return false;\r\n                default:\r\n                    return true;\r\n            }\r\n        }\r\n\r\n        public void DetermineStructuralVariantEffect(IVariant variant)\r\n        {\r\n            GetTier1Types();\r\n            if (_consequences.Count == 0) GetStructuralTier2Types();\r\n\r\n            DetermineCopyNumberEffect(variant.Type);\r\n            DetermineRepeatExpansionEffect(variant);\r\n\r\n            if (NeedsTranscriptVariant(_variantType, _consequences)) _consequences.Add(ConsequenceTag.transcript_variant);\r\n        }\r\n\r\n        private void DetermineRepeatExpansionEffect(IVariant variant)\r\n        {\r\n            if (!(variant is RepeatExpansion repeatExpansion)) return;\r\n\r\n            if (repeatExpansion.RefRepeatCount == null || repeatExpansion.RefRepeatCount == repeatExpansion.RepeatCount)\r\n            {\r\n                _consequences.Add(ConsequenceTag.short_tandem_repeat_change);\r\n            }\r\n            else if (repeatExpansion.RepeatCount > repeatExpansion.RefRepeatCount)\r\n            {\r\n                _consequences.Add(ConsequenceTag.short_tandem_repeat_expansion);\r\n            }\r\n            else\r\n            {\r\n                _consequences.Add(ConsequenceTag.short_tandem_repeat_contraction);\r\n            }\r\n        }\r\n\r\n        private void DetermineCopyNumberEffect(VariantType variantType)\r\n        {\r\n            // ReSharper disable once SwitchStatementMissingSomeCases\r\n            switch (variantType)\r\n            {\r\n                case VariantType.copy_number_gain:\r\n                    _consequences.Add(ConsequenceTag.copy_number_increase);\r\n                    break;\r\n                case VariantType.copy_number_loss:\r\n                    _consequences.Add(ConsequenceTag.copy_number_decrease);\r\n                    break;\r\n                case VariantType.copy_number_variation:\r\n                    _consequences.Add(ConsequenceTag.copy_number_change);\r\n                    break;\r\n            }\r\n        }\r\n\r\n        private void GetStructuralTier2Types()\r\n        {\r\n            // FeatureElongation\r\n            if (_featureEffect.Elongation()) _consequences.Add(ConsequenceTag.feature_elongation);\r\n\r\n            // FeatureTruncation\r\n            if (_featureEffect.Truncation()) _consequences.Add(ConsequenceTag.feature_truncation);\r\n\r\n            // FivePrimeDuplicatedTranscript\r\n            if (_featureEffect.FivePrimeDuplicatedTranscript()) _consequences.Add(ConsequenceTag.five_prime_duplicated_transcript);\r\n\r\n            // ThreePrimeDuplicatedTranscript\r\n            if (_featureEffect.ThreePrimeDuplicatedTranscript()) _consequences.Add(ConsequenceTag.three_prime_duplicated_transcript);\r\n        }\r\n\r\n        private void GetTier1Types()\r\n        {\r\n            // TranscriptAblation\r\n            if (_featureEffect.Ablation()) _consequences.Add(ConsequenceTag.transcript_ablation);\r\n\r\n            // TranscriptAmplification\r\n            if (_featureEffect.Amplification()) _consequences.Add(ConsequenceTag.transcript_amplification);\r\n        }\r\n\r\n        private void GetTier2Types()\r\n        {\r\n            // MatureMirnaVariant\r\n            if (_variantEffect.IsMatureMirnaVariant()) _consequences.Add(ConsequenceTag.mature_miRNA_variant);\r\n        }\r\n\r\n        private void GetTier3Types()\r\n        {\r\n            foreach ((Func<bool> consequenceTest, ConsequenceTag consequenceTag) in _tier3Consequences)\r\n            {\r\n                if (consequenceTest()) _consequences.Add(consequenceTag);\r\n            }\r\n        }\r\n\r\n        public void DetermineRegulatoryVariantEffects()\r\n        {\r\n            // RegulatoryRegionAmplification\r\n            if (_featureEffect.Amplification()) _consequences.Add(ConsequenceTag.regulatory_region_amplification);\r\n\r\n            // RegulatoryRegionAblation\r\n            if (_featureEffect.Ablation()) _consequences.Add(ConsequenceTag.regulatory_region_ablation);\r\n\r\n            // RegulatoryRegionVariant\r\n            _consequences.Add(ConsequenceTag.regulatory_region_variant);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/AnnotatedPositions/HgvsCodingNomenclature.cs",
    "content": "﻿using Genome;\r\nusing Intervals;\r\nusing VariantAnnotation.AnnotatedPositions.Transcript;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing Variants;\r\n\r\nnamespace VariantAnnotation.AnnotatedPositions\r\n{\r\n    public static class HgvsCodingNomenclature\r\n    {\r\n        public static string GetHgvscAnnotation(ITranscript transcript, ISimpleVariant variant, ISequence refSequence,\r\n            int regionStart, int regionEnd, string transcriptRef, string transcriptAlt )\r\n        {\r\n            // sanity check: don't try to handle odd characters, make sure this is not a reference allele, \r\n            //               and make sure that we have protein coordinates\r\n            if (variant.Type == VariantType.reference || SequenceUtilities.HasNonCanonicalBase(variant.AltAllele)) return null;\r\n\r\n            // do not report HGVSc notation when variant lands inside gap region\r\n            if (regionStart > -1 && regionEnd > -1)\r\n            {\r\n                var startRegion = transcript.TranscriptRegions[regionStart];\r\n                var endRegion   = transcript.TranscriptRegions[regionEnd];\r\n                if (startRegion.Id == endRegion.Id && startRegion.Type == TranscriptRegionType.Gap &&\r\n                    endRegion.Type == TranscriptRegionType.Gap) return null;\r\n            }\r\n            \r\n            bool onReverseStrand = transcript.Gene.OnReverseStrand;\r\n\r\n            string refAllele = string.IsNullOrEmpty(transcriptRef)? onReverseStrand ? SequenceUtilities.GetReverseComplement(variant.RefAllele) : variant.RefAllele\r\n                : transcriptRef;\r\n            string altAllele = string.IsNullOrEmpty(transcriptAlt)\r\n                ? onReverseStrand ? SequenceUtilities.GetReverseComplement(variant.AltAllele) : variant.AltAllele\r\n                : transcriptAlt;\r\n            \r\n            // decide event type from HGVS nomenclature\r\n            var genomicChange = GetGenomicChange(transcript, onReverseStrand, refSequence, variant);\r\n\r\n            int variantStart = variant.Start;\r\n            int variantEnd   = variant.End;\r\n\r\n            if (genomicChange == GenomicChange.Duplication)\r\n            {\r\n                (variantStart, variantEnd, refAllele, regionStart, regionEnd) = transcript.TranscriptRegions.ShiftDuplication(variantStart, altAllele, onReverseStrand);\r\n            }\r\n\r\n            var startPositionOffset = HgvsUtilities.GetPositionOffset(transcript, variantStart, regionStart, true);\r\n            var endPositionOffset = variantStart == variantEnd\r\n                ? startPositionOffset\r\n                : HgvsUtilities.GetPositionOffset(transcript, variantEnd, regionEnd, false);\r\n\r\n            if (onReverseStrand)\r\n            {\r\n                PositionOffset tmp = startPositionOffset;\r\n                startPositionOffset = endPositionOffset;\r\n                endPositionOffset = tmp;\r\n            }\r\n\r\n            if (startPositionOffset == null && variant.Type == VariantType.insertion)\r\n            {\r\n                startPositionOffset =\r\n                    new PositionOffset(endPositionOffset.Position + 1, endPositionOffset.Offset, $\"{endPositionOffset.Position + 1}\");\r\n            }\r\n\r\n            // sanity check: make sure we have coordinates\r\n            if (startPositionOffset == null || endPositionOffset == null) return null;\r\n\r\n            var hgvsNotation = new HgvscNotation(refAllele, altAllele, transcript.Id.WithVersion, genomicChange,\r\n                startPositionOffset, endPositionOffset, transcript.Translation != null);\r\n\r\n            // generic formatting\r\n            return hgvsNotation.ToString();\r\n        }\r\n\r\n        /// <summary>\r\n        /// Adjust positions by alt allele length\r\n        /// </summary>\r\n        internal static (int Start, int End, string RefAllele, int RegionStart, int RegionEnd) ShiftDuplication(\r\n            this ITranscriptRegion[] regions, int start, string altAllele, bool onReverseStrand)\r\n        {\r\n            int incrementLength = altAllele.Length;\r\n            int dupStart = onReverseStrand ? start + incrementLength - 1    : start - incrementLength;\r\n            int dupEnd   = onReverseStrand ? dupStart - incrementLength + 1 : dupStart + incrementLength - 1;\r\n\r\n            (int regionStart, _) = MappedPositionUtilities.FindRegion(regions, dupStart);\r\n            (int regionEnd, _)   = MappedPositionUtilities.FindRegion(regions, dupEnd);\r\n\r\n            return (dupStart, dupEnd, altAllele, regionStart, regionEnd);\r\n        }\r\n\r\n        public static GenomicChange GetGenomicChange(IInterval interval, bool onReverseStrand, ISequence refSequence, ISimpleVariant variant)\r\n        {\r\n            // length of the reference allele. Negative lengths make no sense\r\n            int refLength = variant.End - variant.Start + 1;\r\n            if (refLength < 0) refLength = 0;\r\n\r\n            // length of alternative allele\r\n            int altLength = variant.AltAllele.Length;\r\n\r\n            // sanity check: make sure that the alleles are different\r\n            if (variant.RefAllele == variant.AltAllele) return GenomicChange.Reference;\r\n\r\n            // deletion\r\n            if (altLength == 0) return GenomicChange.Deletion;\r\n\r\n            if (refLength == altLength)\r\n            {\r\n                // substitution\r\n                if (refLength == 1) return GenomicChange.Substitution;\r\n\r\n                // inversion\r\n                string rcRefAllele = SequenceUtilities.GetReverseComplement(variant.RefAllele);\r\n                return variant.AltAllele == rcRefAllele ? GenomicChange.Inversion : GenomicChange.DelIns;\r\n            }\r\n\r\n            // deletion/insertion\r\n            if (refLength != 0) return GenomicChange.DelIns;\r\n\r\n            // If this is an insertion, we should check if the preceding reference nucleotides\r\n            // match the insertion. In that case it should be annotated as a multiplication.\r\n            bool isGenomicDuplicate = HgvsUtilities.IsDuplicateWithinInterval(refSequence, variant, interval, onReverseStrand);\r\n\r\n            return isGenomicDuplicate ? GenomicChange.Duplication : GenomicChange.Insertion;\r\n        }\r\n    }\r\n\r\n    public enum GenomicChange\r\n    {\r\n        Unknown,\r\n        Deletion,\r\n        Duplication,\r\n        DelIns,\r\n        Insertion,\r\n        Inversion,\r\n        Substitution,\r\n        Reference\r\n    }\r\n}\r\n"
  },
  {
    "path": "VariantAnnotation/AnnotatedPositions/HgvsProteinNomenclature.cs",
    "content": "﻿using Genome;\r\nusing OptimizedCore;\r\nusing VariantAnnotation.Algorithms;\r\nusing VariantAnnotation.AnnotatedPositions.Transcript;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing Variants;\r\n\r\nnamespace VariantAnnotation.AnnotatedPositions\r\n{\r\n    public static class HgvsProteinNomenclature\r\n    {\r\n        public static string GetHgvsProteinAnnotation(\r\n\t\t\tITranscript transcript, \r\n\t\t\tstring refAminoAcids,\r\n\t\t\tstring altAminoAcids,\r\n\t\t\tstring transcriptAltAllele,\r\n            IMappedPosition position,\r\n\t\t\tVariantEffect variantEffect, \r\n\t\t\tISimpleVariant variant, \r\n\t\t\tISequence refSequence, \r\n\t\t\tstring hgvscNotation,\r\n\t\t\tbool isMitochondrial)\r\n        {\r\n\t        if (IsHgvspNull(transcriptAltAllele, position.CdsStart, position.CdsEnd, variant, hgvscNotation)) return null;\r\n\r\n\t\t\tvar peptideSeq = transcript.Translation.PeptideSeq;\r\n\r\n            // Amino acid seq should never go past the stop codon\r\n            refAminoAcids = !refAminoAcids.EndsWith(AminoAcids.StopCodon) && refAminoAcids.Contains(AminoAcids.StopCodon)\r\n                ? refAminoAcids.OptimizedSplit(AminoAcids.StopCodon[0])[0] + AminoAcids.StopCodon\r\n                : refAminoAcids;\r\n\r\n            int proteinStart = position.ProteinStart;\r\n\t\t\tHgvsUtilities.ShiftAndRotateAlleles(ref proteinStart, ref refAminoAcids, ref altAminoAcids, peptideSeq);\r\n\r\n\t        var end             = proteinStart + refAminoAcids.Length - 1;\r\n\t        var refAbbreviation = AminoAcids.GetAbbreviations(refAminoAcids);\r\n\t        var altAbbreviation = AminoAcids.GetAbbreviations(altAminoAcids);\r\n\r\n\t\t\tvar proteinId     = transcript.Translation.ProteinId.WithVersion;\r\n\t\t\tvar proteinChange = GetProteinChange(proteinStart, refAminoAcids, altAminoAcids, peptideSeq, variantEffect);\r\n\r\n            // ReSharper disable once SwitchStatementMissingSomeCases\r\n            switch (proteinChange)\r\n\t\t\t{\r\n\t\t\t\tcase ProteinChange.Substitution:\r\n\t\t\t\t\treturn HgvspNotation.GetSubstitutionNotation(proteinId, proteinStart, refAbbreviation, altAbbreviation);\r\n\r\n\t\t\t\tcase ProteinChange.Unknown:\r\n\t\t\t\t\treturn HgvspNotation.GetUnknownNotation(proteinId, proteinStart, end, refAbbreviation, altAbbreviation);\r\n\r\n\t\t\t\tcase ProteinChange.Deletion:\r\n\t\t\t\t\treturn HgvspNotation.GetDeletionNotation(proteinId, proteinStart, end, refAbbreviation, variantEffect.IsStopGained());\r\n\r\n\t\t\t\tcase ProteinChange.Duplication:\r\n\t\t\t\t    proteinStart -= altAminoAcids.Length;\r\n\t\t\t\t\treturn HgvspNotation.GetDuplicationNotation(proteinId, proteinStart, end, altAbbreviation);\r\n\r\n\t\t\t\tcase ProteinChange.Frameshift:\r\n\t\t\t\t    return GetHgvsFrameshiftNotation(refSequence, position.CdsStart, position.CdsEnd, transcriptAltAllele,\r\n\t\t\t\t        transcript, isMitochondrial, proteinId, proteinStart, end);\r\n\r\n\t\t\t\tcase ProteinChange.None:\r\n\t\t\t\t\treturn HgvspNotation.GetSilentNotation(hgvscNotation, proteinStart, refAbbreviation, variantEffect.IsStopRetained());\r\n\r\n\t\t\t\tcase ProteinChange.DelIns:\r\n\t\t\t\t\treturn HgvspNotation.GetDelInsNotation(proteinId, proteinStart, end, refAbbreviation, altAbbreviation);\r\n\r\n\t\t\t\tcase ProteinChange.Insertion:\r\n\t\t\t\t\tSwap.Int(ref proteinStart, ref end);\r\n\t\t\t\t\treturn HgvspNotation.GetInsertionNotation(proteinId, proteinStart, end, altAbbreviation, peptideSeq);\r\n\t\t\t\t\r\n\t\t\t\tcase ProteinChange.Extension:\r\n\t\t\t\t    var altPeptideSequence = HgvsUtilities.GetAltPeptideSequence(refSequence, position.CdsStart, position.CdsEnd,\r\n\t\t\t\t        transcriptAltAllele, transcript, isMitochondrial);\r\n\t\t\t\t\taltAbbreviation = proteinStart <= altPeptideSequence.Length ? AminoAcids.ConvertAminoAcidToAbbreviation(altPeptideSequence[proteinStart - 1]): \"Ter\";\r\n\t\t\t\t\tvar countToStop = HgvsUtilities.GetNumAminoAcidsUntilStopCodon(altPeptideSequence, peptideSeq, proteinStart - 1, false);\r\n\r\n\t\t\t\t\treturn HgvspNotation.GetExtensionNotation(proteinId, proteinStart, refAbbreviation, altAbbreviation,countToStop);\r\n\r\n\t\t\t\tcase ProteinChange.StartLost:\r\n\t\t\t\t\treturn HgvspNotation.GetStartLostNotation(proteinId);\r\n\t\t\t}\r\n\r\n\t\t\treturn null;\r\n\t\t}\r\n\r\n        private static string GetHgvsFrameshiftNotation(ISequence refSequence, int cdsBegin, int cdsEnd,\r\n            string transcriptAltAllele, ITranscript transcript, bool isMitochondrial, string proteinId, int start,\r\n            int end)\r\n        {\r\n\t\t    var peptideSeq = transcript.Translation.PeptideSeq;\r\n\t\t    var altPeptideSeq = HgvsUtilities.GetAltPeptideSequence(refSequence, cdsBegin, cdsEnd, transcriptAltAllele, transcript, isMitochondrial);\r\n\r\n\t\t    if (start > end) Swap.Int(ref start, ref end);\r\n\r\n\t\t    var frameshiftedParameters = HgvsUtilities.GetChangesAfterFrameshift(start, peptideSeq, altPeptideSeq);\r\n\r\n\t\t\tstart            = frameshiftedParameters.Item1;\r\n\t\t    var refAminoAcid = frameshiftedParameters.Item2;\r\n\t\t    var altAminoAcid = frameshiftedParameters.Item3;\r\n\r\n\t\t    var refAbbreviation = AminoAcids.ConvertAminoAcidToAbbreviation(refAminoAcid);\r\n\r\n\t\t\tif (altAminoAcid == AminoAcids.StopCodonChar)\r\n\t\t\t    return HgvspNotation.GetSubstitutionNotation(proteinId, start, refAbbreviation, \"Ter\");\r\n\t\t\t\r\n\t\t    var altAbbreviation = AminoAcids.ConvertAminoAcidToAbbreviation(altAminoAcid);\r\n\t\t    var countToStop     = HgvsUtilities.GetNumAminoAcidsUntilStopCodon(altPeptideSeq, peptideSeq, start - 1, true);\r\n\r\n\t\t    return HgvspNotation.GetFrameshiftNotation(proteinId, start, refAbbreviation, altAbbreviation, countToStop);\r\n\t    }\r\n\r\n        private static bool IsHgvspNull(string transcriptAltAllele, int cdsStart, int cdsEnd, ISimpleVariant variant,\r\n            string hgvscNotation)\r\n        {\r\n            return string.IsNullOrEmpty(hgvscNotation)                        ||\r\n                   variant.Type == VariantType.reference                      ||\r\n                   SequenceUtilities.HasNonCanonicalBase(transcriptAltAllele) ||\r\n                   cdsStart == -1                                             || \r\n                   cdsEnd == -1;\r\n        }\r\n\r\n        internal static ProteinChange GetProteinChange(int start, string refAminoAcids, string altAminoAcids,\r\n            string peptideSeq, IVariantEffect variantEffect)\r\n        {\r\n            var insertionBeforeTranscript = refAminoAcids.Length == 0 && start == 1;\r\n            if (refAminoAcids == altAminoAcids || variantEffect.IsStopRetained() || insertionBeforeTranscript) return ProteinChange.None;\r\n\r\n            if (variantEffect.IsStartLost()) return ProteinChange.StartLost;\r\n\r\n            // according to var nom, only if the Stop codon is effected, we call it an extension\r\n            if (variantEffect.IsStopLost() && refAminoAcids.OptimizedStartsWith(AminoAcids.StopCodonChar)) return ProteinChange.Extension;\r\n\r\n            if (variantEffect.IsFrameshiftVariant()) return ProteinChange.Frameshift;\r\n\r\n            if (altAminoAcids.Length > refAminoAcids.Length &&\r\n                HgvsUtilities.IsAminoAcidDuplicate(start, altAminoAcids, peptideSeq)) return ProteinChange.Duplication;\r\n\r\n            if (refAminoAcids.Length == 0 && altAminoAcids.Length != 0) return ProteinChange.Insertion;\r\n\r\n            if (refAminoAcids.Length != 0 && altAminoAcids.Length == 0) return ProteinChange.Deletion;\r\n\r\n            if (refAminoAcids.Length == 1 && altAminoAcids.Length == 1) return ProteinChange.Substitution;\r\n\r\n            // the only remaining possibility is deletions/insertions\r\n            return ProteinChange.DelIns;\r\n        }\r\n    }\r\n\r\n    public enum ProteinChange\r\n    {\r\n        Unknown,\r\n        Deletion,\r\n        Duplication,\r\n        Frameshift,\r\n        DelIns,\r\n        Insertion,\r\n        None,\r\n\t\tExtension,\r\n\t\tStartLost,\r\n        Substitution\r\n    }\r\n}\r\n"
  },
  {
    "path": "VariantAnnotation/AnnotatedPositions/HgvsUtilities.cs",
    "content": "﻿using System;\r\nusing System.Text;\r\nusing Genome;\r\nusing Intervals;\r\nusing OptimizedCore;\r\nusing VariantAnnotation.AnnotatedPositions.Transcript;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing Variants;\r\n\r\nnamespace VariantAnnotation.AnnotatedPositions\r\n{\r\n    public static class HgvsUtilities\r\n    {\r\n        public static void ShiftAndRotateAlleles(ref int start, ref string refAminoAcids, ref string altAminoAcids, string peptides)\r\n        {\r\n            (start, refAminoAcids, altAminoAcids) = BiDirectionalTrimmer.Trim(start, refAminoAcids, altAminoAcids);\r\n            (start, refAminoAcids, altAminoAcids) = Rotate3Prime(refAminoAcids, altAminoAcids, start, peptides);\r\n        }\r\n\r\n        internal static (int Start, string RefAminoAcids, string AltAminoAcids) Rotate3Prime(string refAminoAcids, string altAminoAcids, int start,\r\n            string peptides)\r\n        {\r\n            if (!(string.IsNullOrEmpty(refAminoAcids) || string.IsNullOrEmpty(altAminoAcids))) return (start, refAminoAcids, altAminoAcids);\r\n\r\n            bool isInsertion = !string.IsNullOrEmpty(altAminoAcids);\r\n\r\n            // ReSharper disable once PossibleNullReferenceException\r\n            int end = start + refAminoAcids.Length - 1;\r\n\r\n            // for insertion, the reference bases will be empty string. The shift should happen on the alternate allele\r\n            string rotatingPeptides = isInsertion ? altAminoAcids : refAminoAcids;\r\n            int    numBases         = rotatingPeptides.Length;\r\n\r\n            string downstreamPeptides = peptides.Length >= end ? peptides.Substring(end) : null;\r\n            string combinedSequence   = rotatingPeptides + downstreamPeptides;\r\n\r\n            int shiftStart, shiftEnd;\r\n            var hasShifted = false;\r\n\r\n            for (shiftStart = 0, shiftEnd = numBases; shiftEnd < combinedSequence.Length; shiftStart++, shiftEnd++)\r\n            {\r\n                if (combinedSequence[shiftStart] != combinedSequence[shiftEnd]) break;\r\n                start++;\r\n                hasShifted = true;\r\n            }\r\n\r\n            if (hasShifted) rotatingPeptides = combinedSequence.Substring(shiftStart, numBases);\r\n\r\n            if (isInsertion) altAminoAcids = rotatingPeptides;\r\n            else refAminoAcids             = rotatingPeptides;\r\n\r\n            return (start, refAminoAcids, altAminoAcids);\r\n        }\r\n\r\n        public static bool IsAminoAcidDuplicate(int start, string altAminoAcids, string transcriptPeptides)\r\n        {\r\n            if (altAminoAcids == null || transcriptPeptides == null) return false;\r\n\r\n            int testAminoAcidPos = start - altAminoAcids.Length - 1;\r\n            if (testAminoAcidPos < 0) return false;\r\n\r\n            string precedingAminoAcids = testAminoAcidPos + altAminoAcids.Length <= transcriptPeptides.Length\r\n                ? transcriptPeptides.Substring(testAminoAcidPos, altAminoAcids.Length)\r\n                : \"\";\r\n\r\n            return testAminoAcidPos >= 0 && precedingAminoAcids == altAminoAcids;\r\n        }\r\n\r\n        public static int GetNumAminoAcidsUntilStopCodon(string altCds, string peptideSeq, int refVarPos, bool isFrameshift)\r\n        {\r\n            int numExtraAminoAcids = -1;\r\n            int refLen             = peptideSeq.Length;\r\n\r\n            // find the number of residues that are translated until a termination codon is encountered\r\n            int terPos = altCds.IndexOf('*');\r\n            if (terPos != -1)\r\n            {\r\n                numExtraAminoAcids = terPos + 1 - (isFrameshift ? refVarPos : refLen + 1);\r\n            }\r\n\r\n            // A special case is if the first aa is a stop codon => don't display the number of residues until the stop codon\r\n            return numExtraAminoAcids > 0 ? numExtraAminoAcids : -1;\r\n        }\r\n\r\n        public static (int Start, char RefAminoAcid, char AltAminoAcid) GetChangesAfterFrameshift(int start, string peptideSeq, string altPeptideSeq)\r\n        {\r\n            start = Math.Min(start, peptideSeq.Length);\r\n\r\n            // for deletions at the end of peptide sequence\r\n            if (start > altPeptideSeq.Length) return (start, peptideSeq[start - 1], '?');\r\n\r\n            string refPeptideSeq = peptideSeq + \"*\";\r\n            char   refAminoAcid  = refPeptideSeq[start - 1];\r\n            char   altAminoAcid  = altPeptideSeq[start - 1];\r\n\r\n            while (start <= altPeptideSeq.Length && start <= refPeptideSeq.Length)\r\n            {\r\n                refAminoAcid = refPeptideSeq[start - 1];\r\n                altAminoAcid = altPeptideSeq[start - 1];\r\n\r\n                // variation at stop codon, but maintains stop codon - set to synonymous\r\n                if (refAminoAcid == '*' && altAminoAcid == '*' || refAminoAcid != altAminoAcid) break;\r\n                start++;\r\n            }\r\n\r\n            return (start, refAminoAcid, altAminoAcid);\r\n        }\r\n\r\n        public static string GetAltPeptideSequence(ISequence refSequence, int cdsBegin, int cdsEnd, string transcriptAltAllele,\r\n            ITranscript transcript, bool isMitochondrial)\r\n        {\r\n            string altCds = TranscriptUtilities.GetAlternateCds(refSequence, cdsBegin, cdsEnd, transcriptAltAllele, transcript.TranscriptRegions,\r\n                transcript.Gene.OnReverseStrand, transcript.StartExonPhase, transcript.Translation.CodingRegion.CdnaStart);\r\n\r\n            var aminoAcids = new AminoAcids(isMitochondrial);\r\n            return aminoAcids.TranslateBases(altCds, true);\r\n        }\r\n\r\n        public static PositionOffset GetPositionOffset(ITranscript transcript, int genomicPosition, int regionIndex, bool isRegionStart)\r\n        {\r\n            if (!transcript.Overlaps(genomicPosition, genomicPosition)) return null;\r\n\r\n            ITranscriptRegion region            = transcript.TranscriptRegions[regionIndex];\r\n            int               codingRegionStart = transcript.Translation?.CodingRegion.CdnaStart ?? -1;\r\n            int               codingRegionEnd   = transcript.Translation?.CodingRegion.CdnaEnd   ?? -1;\r\n\r\n            (int position, int offset) = GetPositionAndOffset(genomicPosition, region, transcript.Gene.OnReverseStrand, isRegionStart);\r\n            if (position == -1) return null;\r\n\r\n            string coordinate   = GetCoordinate(position, codingRegionStart, codingRegionEnd);\r\n            string offsetString = offset == 0 ? \"\" : offset.ToString(\"+0;-0;+0\");\r\n            string value        = coordinate + offsetString;\r\n\r\n            return new PositionOffset(position, offset, value);\r\n        }\r\n\r\n        internal static (int Position, int Offset) GetPositionAndOffset(int position, ITranscriptRegion region, bool onReverseStrand,\r\n            bool isRegionStart)\r\n        {\r\n            int cdsPos = -1;\r\n            int offset = -1;\r\n\r\n            switch (region.Type)\r\n            {\r\n                case TranscriptRegionType.Exon:\r\n                    cdsPos = region.CdnaStart + (onReverseStrand ? region.End - position : position - region.Start);\r\n                    offset = 0;\r\n                    break;\r\n                case TranscriptRegionType.Gap:\r\n                    (cdsPos, offset) = GetGapPositionAndOffset(region, isRegionStart);\r\n                    break;\r\n                case TranscriptRegionType.Intron:\r\n                    (cdsPos, offset) = GetIntronPositionAndOffset(position, region, onReverseStrand);\r\n                    break;\r\n            }\r\n\r\n            return (cdsPos, offset);\r\n        }\r\n        \r\n        private static (int Position, int Offset) GetIntronPositionAndOffset(int position, ITranscriptRegion region,\r\n            bool onReverseStrand)\r\n        {\r\n            int leftDist  = position   - region.Start + 1;\r\n            int rightDist = region.End - position     + 1;\r\n\r\n            int offset = Math.Min(leftDist, rightDist);\r\n            if (!onReverseStrand && rightDist < leftDist || onReverseStrand && rightDist > leftDist) offset = -offset;\r\n\r\n            // cDNA position truth table\r\n            //\r\n            //          forward     reverse\r\n            //       -------------------------\r\n            // L < R | CdnaStart | CdnaEnd   |\r\n            // L = R | CdnaStart | CdnaStart |\r\n            // L > R | CdnaEnd   | CdnaStart |\r\n            //       -------------------------\r\n\r\n            int cdnaPosition = leftDist < rightDist && onReverseStrand || leftDist > rightDist && !onReverseStrand\r\n                ? region.CdnaEnd\r\n                : region.CdnaStart;\r\n\r\n            return (cdnaPosition, offset);\r\n        }\r\n\r\n        private static (int Position, int Offset) GetGapPositionAndOffset(ITranscriptRegion region, bool isRegionStart) =>\r\n            isRegionStart ? (region.CdnaEnd, 0) : (region.CdnaStart, 0);\r\n\r\n        private static string GetCoordinate(int position, int codingRegionStart, int codingRegionEnd)\r\n        {\r\n            if (codingRegionEnd != -1 && position > codingRegionEnd) return \"*\" + (position - codingRegionEnd);\r\n\r\n            return codingRegionStart != -1\r\n                ? (position + (position >= codingRegionStart ? 1 : 0) - codingRegionStart).ToString()\r\n                : position.ToString();\r\n        }\r\n\r\n        public static string GetTranscriptAllele(string variantAllele, bool onReverseStrand) =>\r\n            onReverseStrand ? SequenceUtilities.GetReverseComplement(variantAllele) : variantAllele;\r\n\r\n        public static string FormatDnaNotation(string start, string end, string referenceId, string referenceBases,\r\n            string alternateBases, GenomicChange type, char notationType)\r\n        {\r\n            StringBuilder sb = StringBuilderPool.Get();\r\n\r\n            // all start with transcript name & numbering type\r\n            sb.Append(referenceId + ':' + notationType + '.');\r\n\r\n            // handle single and multiple positions\r\n            string coordinates = start == end\r\n                ? start\r\n                : start + '_' + end;\r\n\r\n            // format rest of string according to type\r\n            // note: inversion and multiple are never assigned as genomic changes\r\n            // ReSharper disable once SwitchStatementMissingSomeCases\r\n            switch (type)\r\n            {\r\n                case GenomicChange.Deletion:\r\n                    sb.Append(coordinates + \"del\");\r\n                    break;\r\n                case GenomicChange.Inversion:\r\n                    sb.Append(coordinates + \"inv\");\r\n                    break;\r\n                case GenomicChange.Duplication:\r\n                    sb.Append(coordinates + \"dup\");\r\n                    break;\r\n                case GenomicChange.Substitution:\r\n                    if (referenceBases == alternateBases)\r\n                    {\r\n                        sb.Append(start + '=');\r\n                    }\r\n                    else\r\n                    {\r\n                        sb.Append(start + referenceBases + '>' + alternateBases);\r\n                    } \r\n                    break;\r\n                case GenomicChange.DelIns:\r\n                    // NOTE: change to delins, now use del--ins-- to reduce anavarin differences\r\n                    sb.Append(coordinates + \"delins\" + alternateBases);\r\n                    break;\r\n                case GenomicChange.Insertion:\r\n                    sb.Append(coordinates + \"ins\" + alternateBases);\r\n                    break;\r\n                case GenomicChange.Reference:\r\n                    sb.Append(coordinates + \"=\");\r\n                    break;\r\n                default:\r\n                    throw new InvalidOperationException(\"Unhandled genomic change found: \" + type);\r\n            }\r\n\r\n            return StringBuilderPool.GetStringAndReturn(sb);\r\n        }\r\n\r\n        public static bool IsDuplicateWithinInterval(ISequence refSequence, ISimpleVariant variant, IInterval interval, bool onReverseStrand)\r\n        {\r\n            if (variant.Type != VariantType.insertion) return false;\r\n\r\n            int    altAlleleLen = variant.AltAllele.Length;\r\n            string compareRegion;\r\n\r\n            if (onReverseStrand)\r\n            {\r\n                if (variant.End + altAlleleLen > interval.End) return false;\r\n                compareRegion = refSequence.Substring(variant.Start - 1, altAlleleLen);\r\n            }\r\n            else\r\n            {\r\n                if (variant.Start - altAlleleLen < interval.Start) return false;\r\n                compareRegion = refSequence.Substring(variant.End - altAlleleLen, altAlleleLen);\r\n            }\r\n\r\n            return compareRegion == variant.AltAllele;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/AnnotatedPositions/HgvscNotation.cs",
    "content": "﻿namespace VariantAnnotation.AnnotatedPositions\r\n{\r\n    public sealed class HgvscNotation\r\n    {\r\n        private readonly string         _referenceBases;\r\n        private readonly string         _alternateBases;\r\n        private          PositionOffset _start;\r\n        private          PositionOffset _end;\r\n        private readonly string         _transcriptId;\r\n        private readonly char           _transcriptType;\r\n        private readonly GenomicChange  _type;\r\n\r\n        private const char CodingType    = 'c';\r\n        private const char NonCodingType = 'n';\r\n\r\n        public HgvscNotation(string referenceBases, string alternateBases, string transcriptId, GenomicChange changeType, PositionOffset start,\r\n            PositionOffset end, bool isCoding)\r\n        {\r\n            _transcriptId = transcriptId;\r\n            _start        = start;\r\n            _end          = end;\r\n            _type         = changeType;\r\n\r\n            SwapEndpoints();\r\n\r\n            _referenceBases = referenceBases ?? \"\";\r\n            _alternateBases = alternateBases ?? \"\";\r\n\r\n            _transcriptType = isCoding ? CodingType : NonCodingType;\r\n        }\r\n\r\n        /// <summary>\r\n        /// HGVS aligns changes 3' \r\n        /// e.g. given a ATG/- deletion in C[ATG]ATGT, we want to move to: CATG[ATG]T\r\n        ///      given a   A/- deletion in  TA[A]AAAA, we want to move to:  TAAAAA[A]\r\n        ///      given a  AA/- deletion in  TA[AA]AAA, we want to move to:  TAAAA[AA]\r\n        /// </summary>\r\n        private void SwapEndpoints()\r\n        {\r\n            if (_start.Position <= _end.Position && (_start.Position != _end.Position || _start.Offset <= _end.Offset)) return;\r\n\r\n            PositionOffset temp = _start;\r\n            _start = _end;\r\n            _end   = temp;\r\n        }\r\n\r\n        public override string ToString() => HgvsUtilities.FormatDnaNotation(_start.Value, _end.Value, _transcriptId, _referenceBases,\r\n            _alternateBases, _type, _transcriptType);\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/AnnotatedPositions/HgvsgNotation.cs",
    "content": "﻿using System;\r\nusing Genome;\r\nusing Intervals;\r\nusing Variants;\r\n\r\nnamespace VariantAnnotation.AnnotatedPositions\r\n{\r\n    public static class HgvsgNotation\r\n    {\r\n        private const char NotationType     = 'g';\r\n        private const char MitoNotationType = 'm';\r\n\r\n        public static string GetNotation(string refseqAccession, ISimpleVariant variant, ISequence refSequence,\r\n                                         IInterval referenceInterval)\r\n        {\r\n            ISimpleVariant rotatedVariant = VariantRotator.Right(variant, referenceInterval, refSequence, false);\r\n            int            start          = Math.Min(rotatedVariant.Start, rotatedVariant.End);\r\n            int            end            = Math.Max(rotatedVariant.Start, rotatedVariant.End);\r\n            string         referenceBases = rotatedVariant.RefAllele;\r\n            string         alternateBases = rotatedVariant.AltAllele;\r\n            GenomicChange  type           = HgvsCodingNomenclature.GetGenomicChange(referenceInterval, false, refSequence, rotatedVariant);\r\n\r\n            if (type == GenomicChange.Duplication && variant.Type == VariantType.insertion)\r\n            {\r\n                referenceBases = alternateBases;\r\n                end            = start;\r\n                start          = end - referenceBases.Length + 1;\r\n            }\r\n\r\n            char notationType = variant.Chromosome.UcscName == \"chrM\" ? MitoNotationType : NotationType;\r\n            return HgvsUtilities.FormatDnaNotation(start.ToString(), end.ToString(), refseqAccession, referenceBases, alternateBases, type,\r\n                notationType);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/AnnotatedPositions/HgvspNotation.cs",
    "content": "﻿using VariantAnnotation.AnnotatedPositions.Transcript;\r\n\r\nnamespace VariantAnnotation.AnnotatedPositions\r\n{\r\n    public static class HgvspNotation\r\n    {\r\n        public static string GetDelInsNotation(string proteinId, int start, int end, string refAbbreviation, string altAbbreviation)\r\n        {\r\n            if (altAbbreviation.StartsWith(\"Ter\"))\r\n                return GetSubstitutionNotation(proteinId, start, refAbbreviation.Substring(0, 3), \"Ter\");\r\n\r\n            return start == end\r\n                ? $\"{proteinId}:p.({refAbbreviation}{start}delins{altAbbreviation})\"\r\n                : $\"{proteinId}:p.({refAbbreviation.Substring(0, 3)}{start}_{refAbbreviation.Substring(refAbbreviation.Length - 3)}{end}delins{altAbbreviation})\";\r\n        }\r\n\r\n        public static string GetInsertionNotation(string proteinId, int start, int end, string altAbbreviation, string peptideSeq)\r\n        {\r\n            // insertion past the last AA\r\n            if (end > peptideSeq.Length) return null;\r\n\r\n            var leftFlankingAa = AminoAcids.ConvertAminoAcidToAbbreviation(peptideSeq[start - 1]);\r\n            if (altAbbreviation.StartsWith(\"Ter\"))\r\n            {\r\n                var refAminoAcid = AminoAcids.ConvertAminoAcidToAbbreviation(peptideSeq[start]);\r\n                return $\"{proteinId}:p.({refAminoAcid}{end}Ter)\";\r\n            }\r\n\r\n            var rightFlankingAa = end > peptideSeq.Length ? \"Ter\" : AminoAcids.ConvertAminoAcidToAbbreviation(peptideSeq[end - 1]);\r\n\r\n            return $\"{proteinId}:p.({leftFlankingAa}{start}_{rightFlankingAa}{end}ins{altAbbreviation})\";\r\n        }\r\n\r\n        public static string GetFrameshiftNotation(string proteinId, int start, string refAbbreviation, string altAbbreviation, int countToStop)\r\n        {\r\n            if (altAbbreviation.StartsWith(\"Ter\"))\r\n                return $\"{proteinId}:p.({refAbbreviation}{start}Ter)\";\r\n\r\n            return countToStop > 0 ?\r\n                $\"{proteinId}:p.({refAbbreviation}{start}{altAbbreviation}fsTer{countToStop})\" :\r\n                $\"{proteinId}:p.({refAbbreviation}{start}{altAbbreviation}fsTer?)\";\r\n        }\r\n\r\n        public static string GetExtensionNotation(string proteinId, int start, string refAbbreviation, string altAbbreviation, int countToStop)\r\n        {\r\n\t\t\treturn countToStop > 0 ?\r\n                $\"{proteinId}:p.({refAbbreviation}{start}{altAbbreviation.Substring(0, 3)}extTer{countToStop})\" :\r\n                $\"{proteinId}:p.({refAbbreviation}{start}{altAbbreviation.Substring(0, 3)}extTer?)\";\r\n        }\r\n\r\n        public static string GetDuplicationNotation(string proteinId, int start, int end, string altAbbreviation)\r\n        {\r\n            return start == end ?\r\n                $\"{proteinId}:p.({altAbbreviation}{start}dup)\" :\r\n                $\"{proteinId}:p.({altAbbreviation.Substring(0, 3)}{start}_{altAbbreviation.Substring(altAbbreviation.Length - 3)}{end}dup)\";\r\n        }\r\n\r\n\r\n        public static string GetStartLostNotation(string proteinId)\r\n        {\r\n            // changing it according to https://varnomen.hgvs.org/recommendations/protein/variant/substitution/\r\n            return $\"{proteinId}:p.?\";\r\n        }\r\n\r\n        public static string GetSilentNotation(string hgvscNotation, int start, string refAbbreviation, bool isStopRetained)\r\n        {\r\n            return isStopRetained ? $\"{hgvscNotation}(p.(Ter{start}=))\" : $\"{hgvscNotation}(p.({refAbbreviation}{start}=))\";\r\n        }\r\n\r\n        internal static string GetSubstitutionNotation(string proteinId, int start, string refAbbreviation, string altAbbreviation)\r\n        {\r\n            // start lost\r\n            if (start == 1 && refAbbreviation != altAbbreviation)\r\n                return $\"{proteinId}:p.({refAbbreviation}{start}?)\";\r\n\r\n            return $\"{proteinId}:p.({refAbbreviation}{start}{altAbbreviation})\";\r\n        }\r\n\r\n        internal static string GetUnknownNotation(string proteinId, int start, int end, string refAbbreviation, string altAbbreviation)\r\n        {\r\n            return start == end\r\n                ? $\"{proteinId}:p.({refAbbreviation}{start}{altAbbreviation})\"\r\n                : $\"{proteinId}:p.({refAbbreviation}{start}_{altAbbreviation}{end})\";\r\n        }\r\n\r\n        internal static string GetDeletionNotation(string proteinId, int start, int end, string refAbbreviation, bool isStopGained)\r\n        {\r\n            if (isStopGained)\r\n                return $\"{proteinId}:p.({refAbbreviation}{start}Ter)\";\r\n\r\n            return start == end ?\r\n                $\"{proteinId}:p.({refAbbreviation}{start}del)\" :\r\n                $\"{proteinId}:p.({refAbbreviation.Substring(0, 3)}{start}_{refAbbreviation.Substring(refAbbreviation.Length - 3)}{end}del)\";\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/AnnotatedPositions/PositionOffset.cs",
    "content": "﻿namespace VariantAnnotation.AnnotatedPositions\n{\n    public sealed record PositionOffset(int Position, int Offset, string Value);\n}"
  },
  {
    "path": "VariantAnnotation/AnnotatedPositions/RegulatoryRegionAnnotator.cs",
    "content": "﻿using Intervals;\r\nusing VariantAnnotation.AnnotatedPositions.Consequence;\r\nusing VariantAnnotation.AnnotatedPositions.Transcript;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing Variants;\r\n\r\nnamespace VariantAnnotation.AnnotatedPositions\r\n{\r\n    public static class RegulatoryRegionAnnotator\r\n    {\r\n        public static IAnnotatedRegulatoryRegion Annotate(IVariant variant, IRegulatoryRegion regulatoryRegion)\r\n        {\r\n            OverlapType overlapType = Intervals.Utilities.GetOverlapType(regulatoryRegion.Start, regulatoryRegion.End,\r\n                variant.Start, variant.End);\r\n            EndpointOverlapType endpointOverlapType =\r\n                Intervals.Utilities.GetEndpointOverlapType(regulatoryRegion.Start, regulatoryRegion.End, variant.Start, variant.End);\r\n            var featureEffect = new FeatureVariantEffects(overlapType, endpointOverlapType, false, variant.Type, variant.IsStructuralVariant);\r\n            \r\n            var consequence = new Consequences(VariantType.unknown, null, featureEffect);\r\n            consequence.DetermineRegulatoryVariantEffects();\r\n            return new AnnotatedRegulatoryRegion(regulatoryRegion, consequence.GetConsequences());\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/AnnotatedPositions/Transcript/AminoAcids.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing OptimizedCore;\r\nusing VariantAnnotation.TranscriptAnnotation;\r\n\r\nnamespace VariantAnnotation.AnnotatedPositions.Transcript\r\n{\r\n    public sealed class AminoAcids\r\n    {\r\n        public const string StopCodon   = \"*\";\r\n        public const char StopCodonChar = '*';\r\n\r\n        private readonly CodonConversion _codonConversionScheme = CodonConversion.HumanChromosome;\r\n\r\n        private readonly Dictionary<string, char> _aminoAcidLookupTable;\r\n        private readonly Dictionary<string, char> _mitoDifferences;\r\n\r\n        // converts single letter amino acid ambiguity codes to three\r\n        // letter abbreviations\r\n        private static readonly Dictionary<char, string> SingleToThreeAminoAcids = new Dictionary<char, string>\r\n        {\r\n            {'A', \"Ala\"},\r\n            {'B', \"Asx\"},\r\n            {'C', \"Cys\"},\r\n            {'D', \"Asp\"},\r\n            {'E', \"Glu\"},\r\n            {'F', \"Phe\"},\r\n            {'G', \"Gly\"},\r\n            {'H', \"His\"},\r\n            {'I', \"Ile\"},\r\n            {'K', \"Lys\"},\r\n            {'L', \"Leu\"},\r\n            {'M', \"Met\"},\r\n            {'N', \"Asn\"},\r\n            {'P', \"Pro\"},\r\n            {'Q', \"Gln\"},\r\n            {'R', \"Arg\"},\r\n            {'S', \"Ser\"},\r\n            {'T', \"Thr\"},\r\n            {'V', \"Val\"},\r\n            {'W', \"Trp\"},\r\n            {'Y', \"Tyr\"},\r\n            {'Z', \"Glx\"},\r\n            {'X', \"Ter\"}, // Ter now recommended in HGVS\r\n\t\t    {'*', \"Ter\"},\r\n            {'U', \"Sec\"},\r\n            {'O', \"Pyl\"},\r\n            {'J', \"Xle\"},\r\n            {'?', \"_?_\"} //deletion at the end of incomplete transcript results in unknown change\r\n        };\r\n\r\n        private enum CodonConversion : byte\r\n        {\r\n            HumanChromosome,\r\n            HumanMitochondrion\r\n        }\r\n\r\n        public AminoAcids(bool isMitochondrial)\r\n        {\r\n            if (isMitochondrial) _codonConversionScheme = CodonConversion.HumanMitochondrion;\r\n\r\n            _aminoAcidLookupTable = new Dictionary<string, char>\r\n            {\r\n                // 2nd base: T\r\n                {\"TTT\", 'F'},\r\n                {\"TTC\", 'F'},\r\n                {\"TTA\", 'L'},\r\n                {\"TTG\", 'L'},\r\n                {\"CTT\", 'L'},\r\n                {\"CTC\", 'L'},\r\n                {\"CTA\", 'L'},\r\n                {\"CTG\", 'L'},\r\n                {\"ATT\", 'I'},\r\n                {\"ATC\", 'I'},\r\n                {\"ATA\", 'I'},\r\n                {\"ATG\", 'M'},\r\n                {\"GTT\", 'V'},\r\n                {\"GTC\", 'V'},\r\n                {\"GTA\", 'V'},\r\n                {\"GTG\", 'V'},\r\n\r\n                // 2nd base: C\r\n                {\"TCT\", 'S'},\r\n                {\"TCC\", 'S'},\r\n                {\"TCA\", 'S'},\r\n                {\"TCG\", 'S'},\r\n                {\"CCT\", 'P'},\r\n                {\"CCC\", 'P'},\r\n                {\"CCA\", 'P'},\r\n                {\"CCG\", 'P'},\r\n                {\"ACT\", 'T'},\r\n                {\"ACC\", 'T'},\r\n                {\"ACA\", 'T'},\r\n                {\"ACG\", 'T'},\r\n                {\"GCT\", 'A'},\r\n                {\"GCC\", 'A'},\r\n                {\"GCA\", 'A'},\r\n                {\"GCG\", 'A'},\r\n\r\n                // 2nd base: A\r\n                {\"TAT\", 'Y'},\r\n                {\"TAC\", 'Y'},\r\n                {\"TAA\", '*'},\r\n                {\"TAG\", '*'},\r\n                {\"CAT\", 'H'},\r\n                {\"CAC\", 'H'},\r\n                {\"CAA\", 'Q'},\r\n                {\"CAG\", 'Q'},\r\n                {\"AAT\", 'N'},\r\n                {\"AAC\", 'N'},\r\n                {\"AAA\", 'K'},\r\n                {\"AAG\", 'K'},\r\n                {\"GAT\", 'D'},\r\n                {\"GAC\", 'D'},\r\n                {\"GAA\", 'E'},\r\n                {\"GAG\", 'E'},\r\n\r\n                // 2nd base: G\r\n                {\"TGT\", 'C'},\r\n                {\"TGC\", 'C'},\r\n                {\"TGA\", '*'},\r\n                {\"TGG\", 'W'},\r\n                {\"CGT\", 'R'},\r\n                {\"CGC\", 'R'},\r\n                {\"CGA\", 'R'},\r\n                {\"CGG\", 'R'},\r\n                {\"AGT\", 'S'},\r\n                {\"AGC\", 'S'},\r\n                {\"AGA\", 'R'},\r\n                {\"AGG\", 'R'},\r\n                {\"GGT\", 'G'},\r\n                {\"GGC\", 'G'},\r\n                {\"GGA\", 'G'},\r\n                {\"GGG\", 'G'}\r\n            };\r\n\r\n            _mitoDifferences = new Dictionary<string, char>\r\n            {\r\n                {\"ATA\", 'M'},\r\n                {\"TGA\", 'W'},\r\n                {\"AGA\", '*'},\r\n                {\"AGG\", '*'}\r\n            };\r\n        }\r\n\r\n        internal static string AddUnknownAminoAcid(string aminoAcids) => aminoAcids == StopCodon ? aminoAcids : aminoAcids + 'X';\r\n\r\n        public SequenceChange Translate(string referenceCodons, string alternateCodons)\r\n        {\r\n            if (string.IsNullOrEmpty(referenceCodons) && string.IsNullOrEmpty(alternateCodons)) return new SequenceChange(\"\", \"\");\r\n            if (referenceCodons != null && (referenceCodons.Contains(\"N\") || alternateCodons.Contains(\"N\"))) return new SequenceChange(\"\", \"\");\r\n\r\n            var referenceAminoAcids = TranslateBases(referenceCodons, false);\r\n            var alternateAminoAcids = TranslateBases(alternateCodons, false);\r\n            return new SequenceChange(referenceAminoAcids, alternateAminoAcids);\r\n        }\r\n\r\n        /// <summary>\r\n        /// converts a DNA triplet to the appropriate amino acid abbreviation\r\n        /// </summary>\r\n        public static string ConvertAminoAcidToAbbreviation(char aminoAcid)\r\n        {\r\n            if (!SingleToThreeAminoAcids.TryGetValue(aminoAcid, out var abbreviation))\r\n            {\r\n                throw new NotSupportedException($\"Unable to convert the following string to an amino acid abbreviation: {aminoAcid}\");\r\n            }\r\n\r\n            return abbreviation;\r\n        }\r\n\r\n        /// <summary>\r\n        /// converts a DNA triplet to the appropriate amino acid abbreviation\r\n        /// The default conversion is human chromosomes. The second parameter also allows the user to specify other codon conversions like mitochondria, etc.\r\n        /// </summary>\r\n        internal char ConvertTripletToAminoAcid(string triplet)\r\n        {\r\n            var upperTriplet = triplet.ToUpper();\r\n\r\n            // check our exceptions first\r\n            if (_codonConversionScheme == CodonConversion.HumanMitochondrion &&\r\n                _mitoDifferences.TryGetValue(upperTriplet, out var mitoAminoAcid)) return mitoAminoAcid;\r\n\r\n            // the default case\r\n            return _aminoAcidLookupTable.TryGetValue(upperTriplet, out var aminoAcid) ? aminoAcid : 'X';\r\n        }\r\n\r\n\r\n        /// <summary>\r\n        /// given a string of 1-letter amino acid ambiguity codes, this function\r\n        /// returns a string of 3-letter amino acid abbreviations up until the first\r\n        /// stop codon.\r\n        /// </summary>\r\n        public static string GetAbbreviations(string aminoAcids)\r\n        {\r\n            if (string.IsNullOrEmpty(aminoAcids)) return \"\";\r\n            if (aminoAcids.Length == 1) return ConvertAminoAcidToAbbreviation(aminoAcids[0]);\r\n\r\n            var sb = StringBuilderPool.Get();\r\n\r\n            foreach (var aminoAcid in aminoAcids)\r\n            {\r\n                sb.Append(ConvertAminoAcidToAbbreviation(aminoAcid));\r\n            }\r\n\r\n            return StringBuilderPool.GetStringAndReturn(sb);\r\n        }\r\n\r\n        /// <summary>\r\n        /// returns a string of single-letter amino acids translated from a string of bases. \r\n        /// The bases must already be grouped by triplets (i.e. len must be a multiple of 3)\r\n        /// </summary>\r\n        public string TranslateBases(string bases, bool forceNonTriplet)\r\n        {\r\n            // sanity check: handle the empty case\r\n            if (bases == null) return null;\r\n\r\n            var numAminoAcids = bases.Length / 3;\r\n\r\n            // check if we have a non triplet case\r\n            var nonTriplet = !forceNonTriplet && numAminoAcids * 3 != bases.Length;\r\n\r\n            // special case: single amino acid\r\n            string aminoAcidString;\r\n            if (numAminoAcids == 1)\r\n            {\r\n                aminoAcidString =\r\n                    ConvertTripletToAminoAcid(bases.Substring(0, 3 * numAminoAcids))\r\n                        .ToString();\r\n                return nonTriplet ? AddUnknownAminoAcid(aminoAcidString) : aminoAcidString;\r\n            }\r\n\r\n            // multiple amino acid case\r\n            var aminoAcids = new char[numAminoAcids];\r\n            for (var i = 0; i < numAminoAcids; i++)\r\n            {\r\n                aminoAcids[i] = ConvertTripletToAminoAcid(bases.Substring(i * 3, 3));\r\n            }\r\n\r\n            aminoAcidString = new string(aminoAcids);\r\n            return nonTriplet ? AddUnknownAminoAcid(aminoAcidString) : aminoAcidString;\r\n        }\r\n    }\r\n}\r\n\r\n"
  },
  {
    "path": "VariantAnnotation/AnnotatedPositions/Transcript/AnnotatedConservationScore.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.Linq;\nusing System.Text;\nusing VariantAnnotation.Interface.IO;\nusing VariantAnnotation.IO;\n\nnamespace VariantAnnotation.AnnotatedPositions.Transcript\n{\n    public sealed class AnnotatedConservationScore : IJsonSerializer\n    {\n        private readonly IEnumerable<double> _scores;\n\n        public AnnotatedConservationScore(IEnumerable<double> scores) => _scores = scores;\n\n        public void SerializeJson(StringBuilder sb)\n        {\n            var jsonObject = new JsonObject(sb);\n\n            sb.Append(JsonObject.OpenBrace);\n            jsonObject.AddStringValues(\"scores\", _scores.Select(x => x.ToString(\"0.##\")), false);\n            sb.Append(JsonObject.CloseBrace);\n        }\n    }\n}"
  },
  {
    "path": "VariantAnnotation/AnnotatedPositions/Transcript/AnnotatedGeneFusion.cs",
    "content": "﻿using System.Text;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.IO;\r\n\r\nnamespace VariantAnnotation.AnnotatedPositions.Transcript\r\n{\r\n    // ReSharper disable InconsistentNaming\r\n    public sealed record AnnotatedGeneFusion(ITranscript transcript, int? exon, int? intron, string hgvsr, bool isInFrame, ulong FusionKey,\r\n        string FirstGeneSymbol, uint FirstGeneKey, string SecondGeneSymbol, uint SecondGeneKey) : IAnnotatedGeneFusion\r\n    {\r\n        // ReSharper restore InconsistentNaming\r\n\r\n        public void SerializeJson(StringBuilder sb)\r\n        {\r\n            string geneId = transcript.Source == Source.Ensembl\r\n                ? transcript.Gene.EnsemblId.ToString()\r\n                : transcript.Gene.EntrezGeneId.ToString();\r\n\r\n            var jsonObject = new JsonObject(sb);\r\n            sb.Append(JsonObject.OpenBrace);\r\n            jsonObject.AddStringValue(\"transcript\", transcript.Id.WithVersion);\r\n            jsonObject.AddStringValue(\"bioType\",    AnnotatedTranscript.GetBioType(transcript.BioType));\r\n            jsonObject.AddIntValue(\"exon\",   exon);\r\n            jsonObject.AddIntValue(\"intron\", intron);\r\n            jsonObject.AddStringValue(\"geneId\", geneId);\r\n            jsonObject.AddStringValue(\"hgnc\",   transcript.Gene.Symbol);\r\n            jsonObject.AddStringValue(\"hgvsr\",  hgvsr);\r\n            jsonObject.AddBoolValue(\"inFrame\", isInFrame);\r\n            sb.Append(JsonObject.CloseBrace);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/AnnotatedPositions/Transcript/AnnotatedTranscript.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.Globalization;\r\nusing System.Linq;\r\nusing System.Text;\r\nusing VariantAnnotation.Algorithms;\r\nusing VariantAnnotation.GeneFusions.SA;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.IO;\r\n\r\nnamespace VariantAnnotation.AnnotatedPositions.Transcript\r\n{\r\n    public sealed class AnnotatedTranscript : IAnnotatedTranscript\r\n    {\r\n        public ITranscript          Transcript          { get; private set; }\r\n        public string               ReferenceAminoAcids { get; private set;}\r\n        public string               AlternateAminoAcids { get; private set;}\r\n        public string               ReferenceCodons     { get; private set;}\r\n        public string               AlternateCodons     { get; private set;}\r\n        public IMappedPosition      MappedPosition      { get; private set;}\r\n        public string               HgvsCoding          { get; private set;}\r\n        public string               HgvsProtein         { get; private set;}\r\n        public PredictionScore      Sift                { get; private set;}\r\n        public PredictionScore      PolyPhen            { get; private set;}\r\n        public List<ConsequenceTag> Consequences        { get; private set;}\r\n        public bool?                CompleteOverlap     { get; private set;}\r\n        public List<double>         ConservationScores  { get; set; }\r\n\r\n        private List<IAnnotatedGeneFusion> _geneFusions;\r\n        \r\n        public void Initialize(ITranscript transcript, string referenceAminoAcids, string alternateAminoAcids,\r\n            string referenceCodons, string alternateCodons, IMappedPosition mappedPosition, string hgvsCoding,\r\n            string hgvsProtein, PredictionScore sift, PredictionScore polyphen,\r\n            List<ConsequenceTag> consequences, bool? completeOverlap)\r\n        {\r\n            Transcript          = transcript;\r\n            ReferenceAminoAcids = referenceAminoAcids;\r\n            AlternateAminoAcids = alternateAminoAcids;\r\n            ReferenceCodons     = referenceCodons;\r\n            AlternateCodons     = alternateCodons;\r\n            MappedPosition      = mappedPosition;\r\n            HgvsCoding          = hgvsCoding;\r\n            HgvsProtein         = hgvsProtein;\r\n            Sift                = sift;\r\n            PolyPhen            = polyphen;\r\n            Consequences        = consequences;\r\n            CompleteOverlap     = completeOverlap;\r\n            _geneFusions        = null;\r\n        }\r\n        \r\n        public void SerializeJson(StringBuilder sb)\r\n        {\r\n            var jsonObject = new JsonObject(sb);\r\n\r\n            sb.Append(JsonObject.OpenBrace);\r\n            jsonObject.AddStringValue(\"transcript\", Transcript.Id.WithVersion);\r\n            jsonObject.AddStringValue(\"source\", Transcript.Source.ToString());\r\n            if (CompleteOverlap.HasValue && !CompleteOverlap.Value) jsonObject.AddStringValue(\"bioType\", GetBioType(Transcript.BioType));\r\n            jsonObject.AddStringValue(\"codons\", GetCodonString(ReferenceCodons, AlternateCodons));\r\n            jsonObject.AddStringValue(\"aminoAcids\", GetAminoAcidString(ReferenceAminoAcids, AlternateAminoAcids));\r\n\r\n            if (MappedPosition != null)\r\n            {\r\n                jsonObject.AddStringValue(\"cdnaPos\",    GetRangeString(MappedPosition.CoveredCdnaStart, MappedPosition.CoveredCdnaEnd));\r\n                jsonObject.AddStringValue(\"cdsPos\",     GetRangeString(MappedPosition.CoveredCdsStart, MappedPosition.CoveredCdsEnd));\r\n                jsonObject.AddStringValue(\"exons\",      GetFractionString(MappedPosition.ExonStart,   MappedPosition.ExonEnd, Transcript.NumExons));\r\n                jsonObject.AddStringValue(\"introns\",    GetFractionString(MappedPosition.IntronStart, MappedPosition.IntronEnd, Transcript.NumExons - 1));\r\n                jsonObject.AddStringValue(\"proteinPos\", GetRangeString(MappedPosition.CoveredProteinStart, MappedPosition.CoveredProteinEnd));\r\n            }\r\n\r\n            string geneId = Transcript.Source == Source.Ensembl\r\n                ? Transcript.Gene.EnsemblId.ToString()\r\n                : Transcript.Gene.EntrezGeneId.ToString();\r\n\r\n            if (CompleteOverlap.HasValue &&!CompleteOverlap.Value) jsonObject.AddStringValue(\"geneId\", geneId);\r\n            jsonObject.AddStringValue(\"hgnc\", Transcript.Gene.Symbol);\r\n\r\n            if (Consequences != null) AddConsequences(jsonObject);\r\n            jsonObject.AddStringValue(\"hgvsc\", HgvsCoding);\r\n            jsonObject.AddStringValue(\"hgvsp\", HgvsProtein);\r\n\r\n            if (_geneFusions != null) jsonObject.AddObjectValues(\"geneFusions\", _geneFusions);\r\n\r\n            jsonObject.AddBoolValue(\"isCanonical\", Transcript.IsCanonical);\r\n\r\n            jsonObject.AddDoubleValue(\"polyPhenScore\", PolyPhen?.Score);\r\n\r\n            jsonObject.AddStringValue(\"polyPhenPrediction\", PolyPhen?.Prediction);\r\n            if (CompleteOverlap.HasValue && !CompleteOverlap.Value && Transcript.Translation != null) jsonObject.AddStringValue(\"proteinId\", Transcript.Translation.ProteinId.WithVersion);\r\n\r\n            jsonObject.AddDoubleValue(\"siftScore\", Sift?.Score);\r\n\r\n            jsonObject.AddStringValue(\"siftPrediction\", Sift?.Prediction);\r\n\r\n            if (ConservationScores != null && ConservationScores.Count > 0)\r\n            {\r\n                jsonObject.AddObjectValue(\"aminoAcidConservation\", new AnnotatedConservationScore(ConservationScores));\r\n            }\r\n\r\n            if (CompleteOverlap.HasValue) jsonObject.AddBoolValue(\"completeOverlap\", CompleteOverlap.Value);\r\n\r\n            sb.Append(JsonObject.CloseBrace);\r\n        }\r\n\r\n        private void AddConsequences(JsonObject jsonObject)\r\n        {\r\n            jsonObject.AddStringValues(\"consequence\", Consequences?.Select(ConsequenceUtil.GetConsequence));\r\n        }\r\n\r\n        public static string GetBioType(BioType bioType) => bioType == BioType.three_prime_overlapping_ncRNA\r\n            ? \"3prime_overlapping_ncRNA\"\r\n            : bioType.ToString();\r\n\r\n        private static string GetAminoAcidString(string a, string b)\r\n        {\r\n            if (a == b) return a;\r\n            a = string.IsNullOrEmpty(a) ? \"-\" : a;\r\n            b = string.IsNullOrEmpty(b) ? \"-\" : b;\r\n            return $\"{a}/{b}\";\r\n        }\r\n\r\n        private static string GetCodonString(string a, string b)\r\n        {\r\n            if (a == b && string.IsNullOrEmpty(a)) return a;\r\n            a = string.IsNullOrEmpty(a) ? \"-\" : a;\r\n            b = string.IsNullOrEmpty(b) ? \"-\" : b;\r\n            return $\"{a}/{b}\";\r\n        }\r\n\r\n        private static string GetRangeString(int start, int end)\r\n        {\r\n            if (start == -1 && end == -1) return null;\r\n            if (start == -1) return \"?-\" + end;\r\n            if (end == -1) return start + \"-?\";\r\n            if (start > end) Swap.Int(ref start, ref end);\r\n            return start == end ? start.ToString(CultureInfo.InvariantCulture) : start + \"-\" + end;\r\n        }\r\n\r\n        private static string GetFractionString(int start, int end, int total)\r\n        {\r\n            if (start == -1 && end == -1) return null;\r\n            return GetRangeString(start, end) + \"/\" + total;\r\n        }\r\n\r\n        public void AddGeneFusions(IAnnotatedGeneFusion[] geneFusions)\r\n        {\r\n            _geneFusions ??= new List<IAnnotatedGeneFusion>();\r\n            _geneFusions.AddRange(geneFusions);\r\n            Consequences.Add(ConsequenceTag.unidirectional_gene_fusion);\r\n        }\r\n\r\n        public void AddGeneFusionPairs(HashSet<IGeneFusionPair> fusionPairs)\r\n        {\r\n            if (_geneFusions == null) return;\r\n            foreach (IAnnotatedGeneFusion gf in _geneFusions)\r\n                fusionPairs.Add(new GeneFusionPair(gf.FusionKey, gf.FirstGeneSymbol, gf.FirstGeneKey, gf.SecondGeneSymbol, gf.SecondGeneKey));\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/AnnotatedPositions/Transcript/CdnaSequence.cs",
    "content": "using System.Text;\nusing ErrorHandling.Exceptions;\nusing Genome;\nusing OptimizedCore;\nusing VariantAnnotation.Caches.Utilities;\nusing VariantAnnotation.Interface.AnnotatedPositions;\nusing Variants;\n\nnamespace VariantAnnotation.AnnotatedPositions.Transcript\n{\n    public sealed class CdnaSequence : ISequence\n    {\n        private readonly ICodingRegion _codingRegion;\n        private readonly ITranscriptRegion[] _regions;\n        private readonly IRnaEdit[] _rnaEdits;\n        private readonly bool _onReverseStrand;\n        private readonly ISequence _compressedSequence;\n        private string _sequence;\n\n        public CdnaSequence(ISequence compressedSequence, ICodingRegion codingRegion, ITranscriptRegion[] regions,\n            bool onReverseStrand, IRnaEdit[] rndEdits)\n        {\n            _codingRegion       = codingRegion;\n            _regions            = regions;\n            _rnaEdits           = rndEdits;\n            _onReverseStrand    = onReverseStrand;\n            _compressedSequence = compressedSequence;\n\n            _sequence = GetCdnaSequence();\n        }\n\n        public string GetCdnaSequence()\n        {\n            if (_sequence != null) return _sequence;\n\n            var sb = StringBuilderPool.Get();\n            \n            foreach (var region in _regions)\n            {\n                if (region.Type != TranscriptRegionType.Exon) continue;\n                sb.Append(_compressedSequence.Substring(region.Start - 1, region.End - region.Start + 1));\n            }\n\n            if (_onReverseStrand)\n            {\n                string reverseComplement = SequenceUtilities.GetReverseComplement(sb.ToString());\n                sb.Clear();\n                sb.Append(reverseComplement);\n            }\n\n            ApplyRnaEdits(sb);\n\n            _sequence = StringBuilderPool.GetStringAndReturn(sb);\n            return _sequence;\n        }\n\n        private void ApplyRnaEdits(StringBuilder sb)\n        {\n            if (_rnaEdits == null) return;\n            var editOffset = 0;\n            RnaEditUtilities.SetTypesAndSort(_rnaEdits);\n\n            foreach (var rnaEdit in _rnaEdits)\n            {\n                int cdnaEditStart = rnaEdit.Start - 1 + editOffset;\n                \n                switch (rnaEdit.Type)\n                {\n                    case VariantType.SNV:\n                        if(cdnaEditStart >= 0 ) sb[cdnaEditStart] = rnaEdit.Bases[0];\n                        break;\n                    \n                    case VariantType.MNV:\n                        for (var i = 0; i < rnaEdit.Bases.Length && cdnaEditStart >= 0; i++)\n                            sb[cdnaEditStart + i] = rnaEdit.Bases[i];\n                        break;\n                    \n                    case VariantType.insertion:\n                        if (cdnaEditStart >= 0) sb.Insert(cdnaEditStart, rnaEdit.Bases);\n                        editOffset += rnaEdit.Bases.Length;\n                        break;\n                    \n                    case VariantType.deletion:\n                        editOffset -= rnaEdit.End - rnaEdit.Start + 1;\n                        break;\n\n                    default:\n                        throw new UserErrorException(\"Encountered unknown rnaEdit type:\" + rnaEdit.Type);\n                }\n            }\n        }\n        \n        public int Length => _sequence?.Length ?? _codingRegion?.Length ?? 0;\n        public Band[] CytogeneticBands => null;\n\n        public string Substring(int offset, int length)\n        {\n            if (_sequence == null) _sequence = GetCdnaSequence();\n            return _sequence.Substring(offset, length);\n        }\n    }\n}"
  },
  {
    "path": "VariantAnnotation/AnnotatedPositions/Transcript/CodingSequence.cs",
    "content": "﻿using Genome;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\n\r\nnamespace VariantAnnotation.AnnotatedPositions.Transcript\r\n{\r\n    public sealed class CodingSequence : ISequence\r\n    {\r\n        private readonly string _sequence;\r\n\r\n        public CodingSequence(ISequence compressedSequence, ICodingRegion codingRegion, ITranscriptRegion[] regions,\r\n                              bool onReverseStrand, byte startExonPhase, IRnaEdit[] rnaEdits)\r\n        {\r\n            string cdnaSequence = \r\n                new CdnaSequence(compressedSequence, codingRegion, regions, onReverseStrand, rnaEdits)\r\n                    .GetCdnaSequence();\r\n            int cdsLen = codingRegion.CdnaEnd - codingRegion.CdnaStart + 1;\r\n            \r\n            _sequence = new string('N', startExonPhase) + cdnaSequence.Substring(codingRegion.CdnaStart - 1, cdsLen);\r\n        }\r\n\r\n        public string GetCodingSequence()               => _sequence;\r\n        public int    Length                            => _sequence.Length;\r\n        public Band[] CytogeneticBands                  => null;\r\n        public string Substring(int offset, int length) => _sequence.Substring(offset, length);\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/AnnotatedPositions/Transcript/Codons.cs",
    "content": "﻿using System;\r\nusing Genome;\r\nusing VariantAnnotation.TranscriptAnnotation;\r\n\r\nnamespace VariantAnnotation.AnnotatedPositions.Transcript\r\n{\r\n    public static class Codons\r\n    {\r\n        public static SequenceChange GetCodons(string transcriptAlternateAllele,\r\n            int cdsStart, int cdsEnd, int proteinBegin, int proteinEnd, ISequence codingSequence)\r\n        {\r\n            if (cdsStart == -1 || cdsEnd == -1 || proteinBegin == -1 || proteinEnd == -1) return new SequenceChange(\"\", \"\");\r\n\r\n            // current implementation of GetCoveredCdsAndProteinPositions may return negative cdsStart and cdsEnd beyond the CDS region\r\n            if (cdsStart < 1) cdsStart = 1;\r\n            if (cdsEnd > codingSequence.Length) cdsEnd = codingSequence.Length;\r\n\r\n            int aminoAcidStart = Math.Max(proteinBegin * 3 - 2, 1);\r\n            int aminoAcidEnd = Math.Min(proteinEnd * 3, codingSequence.Length);\r\n\r\n            var transcriptReferenceAllele = cdsEnd >= cdsStart ? codingSequence.Substring(cdsStart - 1, cdsEnd - cdsStart + 1) : \"\";\r\n\r\n            int prefixStartIndex = aminoAcidStart - 1;\r\n            int prefixLen = cdsStart - aminoAcidStart;\r\n\r\n            int suffixStartIndex = cdsEnd;\r\n            int suffixLen = aminoAcidEnd - cdsEnd;\r\n\r\n            string prefix = prefixStartIndex + prefixLen < codingSequence.Length\r\n                ? codingSequence.Substring(prefixStartIndex, prefixLen).ToLower()\r\n                : \"AAA\";\r\n\r\n            string suffix = suffixLen > 0\r\n                ? codingSequence.Substring(suffixStartIndex, suffixLen).ToLower()\r\n                : \"\";\r\n\r\n            var refCodons = GetCodon(transcriptReferenceAllele, prefix, suffix);\r\n            var altCodons = GetCodon(transcriptAlternateAllele, prefix, suffix);\r\n            return new SequenceChange(refCodons, altCodons);\r\n        }\r\n\r\n        /// <summary>\r\n        /// returns the codon string consisting of the prefix and suffix bases flanking the allele bases\r\n        /// </summary>\r\n        public static string GetCodon(string allele, string prefix, string suffix)\r\n        {\r\n            if (prefix.Length == 0 && suffix.Length == 0) return allele;\r\n            return $\"{prefix}{allele}{suffix}\";\r\n        }\r\n\r\n        /// <summary>\r\n        /// returns true if the length is a multiple of three, false otherwise\r\n        /// </summary>\r\n        public static bool IsTriplet(int len) => Math.Abs(len) % 3 == 0;\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/AnnotatedPositions/Transcript/CompactId.cs",
    "content": "﻿using System;\r\nusing IO;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.Utilities;\r\n\r\nnamespace VariantAnnotation.AnnotatedPositions.Transcript\r\n{\r\n    public struct CompactId : ICompactId\r\n    {\r\n        private readonly IdType _id;\r\n        private readonly byte _version;\r\n        private readonly uint _info;\r\n\r\n        private const int NoInfo     = int.MaxValue;\r\n        private const byte NoVersion = byte.MaxValue;\r\n        private const int NumShift   = 4;\r\n        private const int LengthMask = 0xf;\r\n        private const int MaxNumber  = 0xfffffff;\r\n\r\n        internal static CompactId Empty => new CompactId(IdType.Unknown, NoVersion, NoInfo);\r\n        public bool IsEmpty()           => _id == IdType.Unknown;\r\n\r\n        private CompactId(IdType id, byte version, uint info)\r\n        {\r\n            _id      = id;\r\n            _version = version;\r\n            _info    = info;\r\n        }\r\n\r\n        public override string ToString() => ConvertToString(true);\r\n        public string WithVersion         => ConvertToString(true);\r\n        public string WithoutVersion      => ConvertToString(false);\r\n\r\n        public static CompactId Convert(string s, byte version = NoVersion)\r\n        {\r\n            if (string.IsNullOrEmpty(s)) return Empty;\r\n\r\n            if (s.StartsWith(\"ENSG\"))    return GetCompactId(s, 4, IdType.EnsemblGene, version);\r\n            if (s.StartsWith(\"ENST\"))    return GetCompactId(s, 4, IdType.EnsemblTranscript, version);\r\n            if (s.StartsWith(\"ENSP\"))    return GetCompactId(s, 4, IdType.EnsemblProtein, version);\r\n            if (s.StartsWith(\"ENSESTG\")) return GetCompactId(s, 7, IdType.EnsemblEstGene, version);\r\n            if (s.StartsWith(\"ENSESTP\")) return GetCompactId(s, 7, IdType.EnsemblEstProtein, version);\r\n            if (s.StartsWith(\"ENSR\"))    return GetCompactId(s, 4, IdType.EnsemblRegulatory, version);\r\n            if (s.StartsWith(\"CCDS\"))    return GetCompactId(s, 4, IdType.Ccds, version);\r\n            if (s.StartsWith(\"NR_\"))     return GetCompactId(s, 3, IdType.RefSeqNonCodingRNA, version);\r\n            if (s.StartsWith(\"NM_\"))     return GetCompactId(s, 3, IdType.RefSeqMessengerRNA, version);\r\n            if (s.StartsWith(\"NP_\"))     return GetCompactId(s, 3, IdType.RefSeqProtein, version);\r\n            if (s.StartsWith(\"XR_\"))     return GetCompactId(s, 3, IdType.RefSeqPredictedNonCodingRNA, version);\r\n            if (s.StartsWith(\"XM_\"))     return GetCompactId(s, 3, IdType.RefSeqPredictedMessengerRNA, version);\r\n            if (s.StartsWith(\"XP_\"))     return GetCompactId(s, 3, IdType.RefSeqPredictedProtein, version);\r\n            if (s.StartsWith(\"YP_\"))     return GetCompactId(s, 3, IdType.RefSeq_YP, version);\r\n\r\n            if (int.TryParse(s, out int i)) return GetNumericalCompactId(i, s.Length);\r\n\r\n            Console.WriteLine(\"Unknown ID: [{0}] ({1})\", s, s.Length);\r\n            return Empty;\r\n        }\r\n        \r\n        private static uint ToInfo(int num, int len) => (uint)(num << 4 | (len & LengthMask));\r\n\r\n        private static CompactId GetCompactId(string s, int prefixLen, IdType idType, byte version)\r\n        {\r\n            var (id, _) = FormatUtilities.SplitVersion(s);\r\n            int num     = int.Parse(id.Substring(prefixLen));\r\n            return new CompactId(idType, version, ToInfo(num, id.Length - prefixLen));\r\n        }\r\n\r\n        private static CompactId GetNumericalCompactId(int num, int paddedLength)\r\n        {\r\n            if (num > MaxNumber) throw new ArgumentOutOfRangeException($\"Could not convert the number ({num}) to a CompactID. Max supported number is {MaxNumber}.\");\r\n            return new CompactId(IdType.OnlyNumbers, NoVersion, ToInfo(num, paddedLength));\r\n        }\r\n\r\n        private string ConvertToString(bool showVersion)\r\n        {\r\n            if (_id == IdType.Unknown) return null;\r\n            var prefix  = GetPrefix();\r\n            var number  = GetNumber();\r\n            var version = GetVersion(showVersion);\r\n            return prefix + number + version;\r\n        }\r\n\r\n        private string GetVersion(bool showVersion)\r\n        {\r\n            if (!showVersion || _version == NoVersion) return null;\r\n            return \".\" + _version;\r\n        }\r\n\r\n        private string GetNumber()\r\n        {\r\n            var num    = _info >> NumShift;\r\n            var length = _info & LengthMask;\r\n            return num.ToString(\"D\" + length);\r\n        }\r\n\r\n        private string GetPrefix()\r\n        {\r\n            // ReSharper disable once SwitchStatementMissingSomeCases\r\n            switch (_id)\r\n            {\r\n                case IdType.EnsemblGene:\r\n                    return \"ENSG\";\r\n                case IdType.EnsemblTranscript:\r\n                    return \"ENST\";\r\n                case IdType.EnsemblProtein:\r\n                    return \"ENSP\";\r\n                case IdType.EnsemblEstGene:\r\n                    return \"ENSESTG\";\r\n                case IdType.EnsemblEstProtein:\r\n                    return \"ENSESTP\";\r\n                case IdType.EnsemblRegulatory:\r\n                    return \"ENSR\";\r\n                case IdType.Ccds:\r\n                    return \"CCDS\";\r\n                case IdType.RefSeqNonCodingRNA:\r\n                    return \"NR_\";\r\n                case IdType.RefSeqMessengerRNA:\r\n                    return \"NM_\";\r\n                case IdType.RefSeqProtein:\r\n                    return \"NP_\";\r\n                case IdType.RefSeqPredictedNonCodingRNA:\r\n                    return \"XR_\";\r\n                case IdType.RefSeqPredictedMessengerRNA:\r\n                    return \"XM_\";\r\n                case IdType.RefSeqPredictedProtein:\r\n                    return \"XP_\";\r\n                case IdType.RefSeq_YP:\r\n                    return \"YP_\";\r\n            }\r\n\r\n            return null;\r\n        }\r\n\r\n        public void Write(IExtendedBinaryWriter writer)\r\n        {\r\n            writer.Write((byte)_id);\r\n            writer.Write(_version);\r\n            writer.Write(_info);\r\n        }\r\n\r\n        public static CompactId Read(IBufferedBinaryReader reader)\r\n        {\r\n            var id      = (IdType)reader.ReadByte();\r\n            var version = reader.ReadByte();\r\n            var info    = reader.ReadUInt32();\r\n            return new CompactId(id, version, info);\r\n        }\r\n    }\r\n\r\n    public enum IdType : byte\r\n    {\r\n        // ReSharper disable InconsistentNaming\r\n        Unknown,\r\n        Ccds,\r\n        EnsemblEstGene,\r\n        EnsemblEstProtein,\r\n        EnsemblGene,\r\n        EnsemblProtein,\r\n        EnsemblRegulatory,\r\n        EnsemblTranscript,\r\n        OnlyNumbers,\r\n        RefSeqMessengerRNA,\r\n        RefSeqNonCodingRNA,\r\n        RefSeqPredictedMessengerRNA,\r\n        RefSeqPredictedNonCodingRNA,\r\n        RefSeqPredictedProtein,\r\n        RefSeqProtein,\r\n        RefSeq_YP\r\n        // ReSharper restore InconsistentNaming\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/AnnotatedPositions/Transcript/FeatureVariantEffects.cs",
    "content": "﻿using Intervals;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing Variants;\r\n\r\nnamespace VariantAnnotation.AnnotatedPositions.Transcript\r\n{\r\n    public sealed class FeatureVariantEffects : IFeatureVariantEffects\r\n    {\r\n        private readonly bool _isSv;\r\n\r\n        private readonly bool _completelyOverlaps;\r\n        private readonly bool _partialOverlap;\r\n        private readonly bool _fivePrimeOverlap;\r\n        private readonly bool _threePrimeOverlap;\r\n        private readonly bool _completelyWithin;\r\n\r\n        private readonly bool _lossOrDeletion;\r\n        private readonly bool _gainOrDuplication;\r\n        private readonly bool _isInsertionDeletion;\r\n        private readonly bool _isInsertion;\r\n\r\n        public FeatureVariantEffects(OverlapType overlapType, EndpointOverlapType endpointOverlapType, bool onReverseStrand, VariantType vt,\r\n                                     bool        isSv)\r\n        {\r\n            _isSv = isSv;\r\n\r\n            _partialOverlap     = overlapType != OverlapType.CompletelyOverlaps && overlapType != OverlapType.None;\r\n            _completelyOverlaps = overlapType == OverlapType.CompletelyOverlaps;\r\n            _completelyWithin   = overlapType == OverlapType.CompletelyWithin;\r\n\r\n            _fivePrimeOverlap = !onReverseStrand && endpointOverlapType == EndpointOverlapType.Start ||\r\n                                onReverseStrand  && endpointOverlapType == EndpointOverlapType.End;\r\n\r\n            _threePrimeOverlap = !onReverseStrand && endpointOverlapType == EndpointOverlapType.End ||\r\n                                 onReverseStrand  && endpointOverlapType == EndpointOverlapType.Start;\r\n\r\n            _lossOrDeletion = vt == VariantType.copy_number_loss || vt == VariantType.deletion;\r\n            _gainOrDuplication = vt == VariantType.copy_number_gain || vt == VariantType.duplication ||\r\n                                 vt == VariantType.tandem_duplication;\r\n\r\n            _isInsertionDeletion = vt == VariantType.indel;\r\n            _isInsertion         = vt == VariantType.insertion;\r\n        }\r\n\r\n        public bool Ablation()      => (_lossOrDeletion || _isInsertionDeletion) && _completelyOverlaps;\r\n        public bool Amplification() => _gainOrDuplication && _completelyOverlaps;\r\n        public bool Truncation()    => _isSv && _lossOrDeletion && _partialOverlap;\r\n        public bool Elongation()    => _isSv && _completelyWithin && (_gainOrDuplication || _isInsertion);\r\n\r\n        public bool FivePrimeDuplicatedTranscript()  => _gainOrDuplication && _fivePrimeOverlap;\r\n        public bool ThreePrimeDuplicatedTranscript() => _gainOrDuplication && _threePrimeOverlap;\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/AnnotatedPositions/Transcript/MappedPosition.cs",
    "content": "﻿using VariantAnnotation.Interface.AnnotatedPositions;\r\n\r\nnamespace VariantAnnotation.AnnotatedPositions.Transcript\r\n{\r\n    public sealed class MappedPosition : IMappedPosition\r\n    {\r\n        public int CdnaStart { get; }\r\n        public int CdnaEnd { get; }\r\n        public int CdsStart { get; }\r\n        public int CdsEnd { get; }\r\n        public int ProteinStart { get; set; }\r\n        public int ProteinEnd { get; set; }\r\n        public int ExonStart { get; }\r\n        public int ExonEnd { get; }\r\n        public int IntronStart { get; }\r\n        public int IntronEnd { get; }\r\n        public int RegionStartIndex { get; }\r\n        public int RegionEndIndex { get; }\r\n\r\n        public int CoveredProteinStart { get; set; } = -1;\r\n        public int CoveredProteinEnd { get; set; }   = -1;\r\n        public int CoveredCdsStart { get; set; }     = -1;\r\n        public int CoveredCdsEnd { get; set; }       = -1;\r\n        public int CoveredCdnaStart { get; set; }    = -1;\r\n        public int CoveredCdnaEnd { get; set; }      = -1;\r\n\r\n        public MappedPosition(int cdnaStart, int cdnaEnd, int cdsStart, int cdsEnd, int proteinStart, int proteinEnd,\r\n            int exonStart, int exonEnd, int intronStart, int intronEnd, int regionStartIndex, int regionEndIndex)\r\n        {\r\n            CdnaStart        = cdnaStart;\r\n            CdnaEnd          = cdnaEnd;\r\n            CdsStart         = cdsStart;\r\n            CdsEnd           = cdsEnd;\r\n            ProteinStart     = proteinStart;\r\n            ProteinEnd       = proteinEnd;\r\n            ExonStart        = exonStart;\r\n            ExonEnd          = exonEnd;\r\n            IntronStart      = intronStart;\r\n            IntronEnd        = intronEnd;\r\n            RegionStartIndex = regionStartIndex;\r\n            RegionEndIndex   = regionEndIndex;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/AnnotatedPositions/Transcript/MappedPositionUtilities.cs",
    "content": "﻿using Intervals;\r\nusing VariantAnnotation.Algorithms;\r\nusing VariantAnnotation.Caches.DataStructures;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\n\r\nnamespace VariantAnnotation.AnnotatedPositions.Transcript\r\n{\r\n    public static class MappedPositionUtilities\r\n    {\r\n        public static (int Index, ITranscriptRegion Region) FindRegion(ITranscriptRegion[] regions,\r\n            int variantPosition)\r\n        {\r\n            int index = regions.BinarySearch(variantPosition);\r\n            var region = index < 0 ? null : regions[index];\r\n            return (index, region);\r\n        }\r\n\r\n        public static (int CdnaStart, int CdnaEnd) GetCdnaPositions(ITranscriptRegion startRegion,\r\n            ITranscriptRegion endRegion, IInterval variant, bool onReverseStrand, bool isInsertion)\r\n        {\r\n            int cdnaStart = GetCdnaPosition(startRegion, variant.Start, onReverseStrand);\r\n            int cdnaEnd   = GetCdnaPosition(endRegion, variant.End, onReverseStrand);\r\n\r\n            if (FoundExonEndpointInsertion(isInsertion, cdnaStart, cdnaEnd, startRegion, endRegion))\r\n            {\r\n                return FixExonEndpointInsertion(cdnaStart, cdnaEnd, onReverseStrand, startRegion,\r\n                    endRegion, variant);\r\n            }\r\n\r\n            return (cdnaStart, cdnaEnd);\r\n        }\r\n\r\n        private static int GetCdnaPosition(ITranscriptRegion region, int variantPosition,  bool onReverseStrand)\r\n        {\r\n            if (region == null || region.Type != TranscriptRegionType.Exon) return -1;\r\n\r\n            return onReverseStrand\r\n                ? region.End - variantPosition + region.CdnaStart\r\n                : variantPosition - region.Start + region.CdnaStart;\r\n        }\r\n\r\n        /// <summary>\r\n        /// Assuming at least one cDNA coordinate overlaps with an exon, the covered cDNA coordinates represent\r\n        /// the coordinates actually covered by the variant.\r\n        /// </summary>\r\n        public static (int Start, int End) GetCoveredCdnaPositions(this ITranscriptRegion[] regions, int cdnaStart, int startRegionIndex,\r\n            int cdnaEnd, int endRegionIndex, bool onReverseStrand)\r\n        {\r\n            // exon case\r\n            if (cdnaStart != -1 && cdnaEnd != -1) return (cdnaStart, cdnaEnd);\r\n\r\n            if (onReverseStrand) Swap.Int(ref startRegionIndex, ref endRegionIndex);\r\n\r\n            var startRegion = regions.GetCoveredRegion(startRegionIndex);\r\n            var endRegion   = regions.GetCoveredRegion(endRegionIndex);\r\n\r\n            if (startRegion.Type != TranscriptRegionType.Exon && endRegion.Type != TranscriptRegionType.Exon)\r\n                return (-1, -1);\r\n\r\n            int codingEnd = onReverseStrand ? regions[0].CdnaEnd : regions[regions.Length - 1].CdnaEnd;\r\n\r\n            cdnaStart = GetCoveredCdnaPosition(cdnaStart, startRegion, startRegionIndex, codingEnd, onReverseStrand, false);\r\n            cdnaEnd   = GetCoveredCdnaPosition(cdnaEnd, endRegion, endRegionIndex, codingEnd, onReverseStrand, true);\r\n\r\n            return cdnaStart < cdnaEnd ? (cdnaStart, cdnaEnd) : (cdnaEnd, cdnaStart);\r\n        }\r\n\r\n        private static ITranscriptRegion GetCoveredRegion(this ITranscriptRegion[] regions, int regionIndex)\r\n        {\r\n            if (regionIndex == -1) return regions[0];\r\n            return regionIndex == ~regions.Length ? regions[regions.Length - 1] : regions[regionIndex];\r\n        }\r\n\r\n        private static int GetCoveredCdnaPosition(int cdnaPosition, ITranscriptRegion region, int regionIndex, int codingEnd, \r\n            bool onReserveStrand, bool isEndPosition)\r\n        {\r\n            if (cdnaPosition >= 0) return cdnaPosition;\r\n\r\n            // genomic position on the left of the transcript\r\n            if (regionIndex == -1) return onReserveStrand ? codingEnd : 1;\r\n\r\n            // genomic position on the right of the transcript\r\n            if (regionIndex < -1) return onReserveStrand ? 1 : codingEnd;\r\n\r\n            // intron\r\n            return isEndPosition ? region.CdnaStart : region.CdnaEnd;\r\n        }\r\n\r\n        public static (int CdsStart, int CdsEnd, int ProteinStart, int ProteinEnd) GetCoveredCdsAndProteinPositions(int coveredCdnaStart, int coveredCdnaEnd,\r\n            byte startExonPhase, ICodingRegion codingRegion)\r\n        {\r\n            if (codingRegion == null || \r\n                coveredCdnaEnd < codingRegion.CdnaStart || \r\n                coveredCdnaStart > codingRegion.CdnaEnd ||\r\n                coveredCdnaStart == -1 && coveredCdnaEnd == -1) return (-1, -1, -1, -1);\r\n\r\n            int beginOffset = startExonPhase - codingRegion.CdnaStart + 1;\r\n            int start = coveredCdnaStart + beginOffset;\r\n            int end   = coveredCdnaEnd + beginOffset;\r\n\r\n            return (start, end, GetProteinPosition(start), GetProteinPosition(end));\r\n        }\r\n\r\n        public static int GetProteinPosition(int cdsPosition)\r\n        {\r\n            if (cdsPosition == -1) return -1;\r\n            return (cdsPosition + 2) / 3;\r\n        }\r\n\r\n        public static (int CdsStart, int CdsEnd) GetCdsPositions(ICodingRegion codingRegion, int cdnaStart,\r\n            int cdnaEnd, byte startExonPhase, bool isInsertion)\r\n        {\r\n            int cdsStart = GetCdsPosition(codingRegion, cdnaStart, startExonPhase);\r\n            int cdsEnd   = GetCdsPosition(codingRegion, cdnaEnd, startExonPhase);\r\n\r\n            // silence CDS for insertions that occur just after the coding region\r\n            if (isInsertion && codingRegion != null && (cdnaEnd == codingRegion.CdnaEnd || cdnaStart == codingRegion.CdnaStart))\r\n            {\r\n                cdsStart = -1;\r\n                cdsEnd   = -1;\r\n            }\r\n\r\n            return (cdsStart, cdsEnd);\r\n        }\r\n\r\n        private static int GetCdsPosition(ICodingRegion codingRegion, int cdnaPosition, byte startExonPhase)\r\n        {\r\n            if (codingRegion == null || cdnaPosition < codingRegion.CdnaStart ||\r\n                cdnaPosition > codingRegion.CdnaEnd) return -1;\r\n            return cdnaPosition - codingRegion.CdnaStart + startExonPhase + 1;\r\n        }\r\n\r\n        /// <summary>\r\n        /// Fixes the missing cDNA coordinate for situations where an insertion occurs on either the first or last\r\n        /// base of an exon\r\n        /// </summary>\r\n        internal static (int CdnaStart, int CdnaEnd) FixExonEndpointInsertion(int cdnaStart, int cdnaEnd,\r\n            bool onReverseStrand, ITranscriptRegion startRegion, ITranscriptRegion endRegion, IInterval variant)\r\n        {\r\n            var (intron, exon) = startRegion.Type == TranscriptRegionType.Exon\r\n                ? (endRegion, startRegion)\r\n                : (startRegion, endRegion);\r\n\r\n            bool matchExonStart = variant.Start == exon.Start;\r\n\r\n            int cdnaPos = !onReverseStrand && matchExonStart || onReverseStrand && !matchExonStart\r\n                ? intron.CdnaStart\r\n                : intron.CdnaEnd;\r\n\r\n            if (cdnaStart == -1) cdnaStart = cdnaPos;\r\n            else cdnaEnd = cdnaPos;\r\n\r\n            return (cdnaStart, cdnaEnd);\r\n        }\r\n\r\n        /// <summary>\r\n        /// Identifies when an insertion on an exon boundary needs special attention. Here we're looking for one\r\n        /// intron & one exon where one cDNA coordinate is defined, but the other isn't.\r\n        /// </summary>\r\n        internal static bool FoundExonEndpointInsertion(bool isInsertion, int cdnaStart, int cdnaEnd,\r\n            ITranscriptRegion startRegion, ITranscriptRegion endRegion)\r\n        {\r\n            bool isCdnaStartUndef = cdnaStart         == -1;\r\n            bool isCdnaEndUndef   = cdnaEnd           == -1;\r\n            bool isStartExon      = startRegion?.Type == TranscriptRegionType.Exon;\r\n            bool isEndExon        = endRegion?.Type   == TranscriptRegionType.Exon;\r\n\r\n            return isInsertion && startRegion != null && endRegion != null && isStartExon ^ isEndExon &&\r\n                   isCdnaStartUndef ^ isCdnaEndUndef;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/AnnotatedPositions/Transcript/StringExtensions.cs",
    "content": "﻿using System;\r\n\r\nnamespace VariantAnnotation.AnnotatedPositions.Transcript\r\n{\r\n    public static class StringExtensions\r\n    {\r\n\r\n        public static int CommonPrefixLength(this string a, string b)\r\n        {\r\n            if (a == null || b == null) return 0;\r\n\r\n            var maxPrefixLength = Math.Min(a.Length, b.Length);\r\n\r\n            var prefixLength = 0;\r\n            while (prefixLength < maxPrefixLength && a[prefixLength] == b[prefixLength]) prefixLength++;\r\n\r\n            return prefixLength;\r\n        }\r\n\r\n        public static int CommonSuffixLength(this string a, string b)\r\n        {\r\n            if (a == null || b == null) return 0;\r\n\r\n            var maxSuffixLength = Math.Min(a.Length, b.Length);\r\n\r\n            var suffixLength = 0;\r\n            while (suffixLength < maxSuffixLength &&\r\n                   a[a.Length - suffixLength - 1] == b[b.Length - suffixLength - 1]) suffixLength++;\r\n\r\n            return suffixLength;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/AnnotatedPositions/Transcript/TranscriptPositionalEffect.cs",
    "content": "﻿using System.Linq;\r\nusing Intervals;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing Variants;\r\n\r\nnamespace VariantAnnotation.AnnotatedPositions.Transcript\r\n{\r\n    public sealed class TranscriptPositionalEffect\r\n    {\r\n        public bool IsEndSpliceSite;\r\n        public bool IsStartSpliceSite;\r\n        public bool IsWithinFrameshiftIntron;\r\n        public bool IsWithinIntron;\r\n        public bool IsWithinSpliceSiteRegion;\r\n\r\n        public bool HasExonOverlap;\r\n        public bool AfterCoding;\r\n        public bool BeforeCoding;\r\n        public bool WithinCdna;\r\n        public bool WithinCds;\r\n        public bool HasFrameShift;\r\n        public bool IsCoding;\r\n\r\n        public bool OverlapWithMicroRna;\r\n\r\n        public void DetermineIntronicEffect(ITranscriptRegion[] regions, IInterval variant, VariantType variantType)\r\n        {\r\n            if (regions == null) return;\r\n\r\n            var isInsertion = variantType == VariantType.insertion;\r\n\r\n            foreach (var region in regions)\r\n            {\r\n                if (region.Type != TranscriptRegionType.Intron) continue;\r\n\r\n                // skip this one if variant is out of range : the range is set to 3 instead of the original old:\r\n                // all of the checking occured in the region between start-3 to end+3, if we set to 8, we can made mistakes when\r\n                // checking IsWithinIntron when we have a small exon\r\n                if (!variant.Overlaps(region.Start - 3, region.End + 3)) continue;\r\n\r\n                // under various circumstances the genebuild process can introduce artificial \r\n                // short (<= 12 nucleotide) introns into transcripts (e.g. to deal with errors\r\n                // in the reference sequence etc.), we don't want to categorize variations that\r\n                // fall in these introns as intronic, or as any kind of splice variant\r\n\r\n                var isFrameshiftIntron = region.End - region.Start <= 12;\r\n\r\n                if (isFrameshiftIntron && variant.Overlaps(region.Start, region.End))\r\n                {\r\n                    IsWithinFrameshiftIntron = true;\r\n                    continue;\r\n                }\r\n\r\n                CheckSpliceSiteOverlap(variant, region);\r\n                CheckIntronOverlap(variant, isInsertion, region);\r\n\r\n                // the definition of splice_region (SO:0001630) is \"within 1-3 bases of the\r\n                // exon or 3-8 bases of the intron.\" We also need to special case insertions\r\n                // between the edge of an exon and a donor or acceptor site and between a donor\r\n                // or acceptor site and the intron\r\n                IsWithinSpliceSiteRegion = variant.Overlaps(region.Start + 2, region.Start + 7) ||\r\n                                           variant.Overlaps(region.End - 7, region.End - 2) ||\r\n                                           variant.Overlaps(region.Start - 3, region.Start - 1) ||\r\n                                           variant.Overlaps(region.End + 1, region.End + 3) ||\r\n                                           isInsertion &&\r\n                                           (variant.Start == region.Start ||\r\n                                            variant.End == region.End ||\r\n                                            variant.Start == region.Start + 2 ||\r\n                                            variant.End == region.End - 2);\r\n            }\r\n        }\r\n\r\n        private void CheckSpliceSiteOverlap(IInterval variant, IInterval region)\r\n        {\r\n            if (variant.Overlaps(region.Start, region.Start + 1))\r\n            {\r\n                IsStartSpliceSite = true;\r\n            }\r\n\r\n            if (variant.Overlaps(region.End - 1, region.End))\r\n            {\r\n                IsEndSpliceSite = true;\r\n            }\r\n        }\r\n\r\n        private void CheckIntronOverlap(IInterval variant, bool isInsertion, IInterval region)\r\n        {\r\n            // we need to special case insertions between the donor and acceptor sites\r\n            // make sure the size of intron is larger than 4\r\n            if (region.Start <= region.End - 4 && (variant.Overlaps(region.Start + 2, region.End - 2) ||\r\n                                                   isInsertion &&\r\n                                                   (variant.Start == region.Start + 2 ||\r\n                                                    variant.End == region.End - 2)))\r\n            {\r\n                IsWithinIntron = true;\r\n            }\r\n        }\r\n\r\n        public void DetermineExonicEffect(ITranscript transcript, IInterval variant, IMappedPosition position,\r\n            int coveredCdnaStart, int coveredCdnaEnd, int coveredCdsStart, int coveredCdsEnd, string altAllele,\r\n            bool startCodonInsertionWithNoImpact)\r\n        {\r\n            HasExonOverlap = position.ExonStart != -1 || position.ExonEnd != -1;\r\n\r\n            if (transcript.Translation != null)\r\n            {\r\n                var codingRegion = transcript.Translation.CodingRegion;\r\n                AfterCoding      = IsAfterCoding(variant.Start, variant.End, transcript.End, codingRegion.End);\r\n                BeforeCoding     = IsBeforeCoding(variant.Start, variant.End, transcript.Start, codingRegion.Start);\r\n                WithinCds        = IsWithinCds(coveredCdsStart, coveredCdsEnd, codingRegion, variant);\r\n                IsCoding         = !startCodonInsertionWithNoImpact && (position.CdsStart != -1 || position.CdsEnd != -1);\r\n            }\r\n\r\n            WithinCdna = IsWithinCdna(coveredCdnaStart, coveredCdnaEnd, transcript.TotalExonLength);\r\n\r\n            if (coveredCdsStart != -1 && coveredCdsEnd != -1)\r\n            {\r\n                var varLen    = coveredCdsEnd - coveredCdsStart + 1;\r\n                var alleleLen = altAllele?.Length ?? 0;\r\n                HasFrameShift = position.CdsStart != -1 && position.CdsEnd != -1 && !Codons.IsTriplet(alleleLen - varLen);\r\n            }\r\n\r\n            OverlapWithMicroRna = IsMatureMirnaVariant(position.CdnaStart, position.CdnaEnd, transcript.MicroRnas,\r\n                transcript.BioType == BioType.miRNA);\r\n        }\r\n\r\n        internal static bool IsMatureMirnaVariant(int cdnaStart, int cdnaEnd, IInterval[] microRnas, bool isMiRna)\r\n        {\r\n            if (microRnas == null) return false;\r\n            if (!isMiRna || cdnaStart == -1 || cdnaEnd == -1) return false;\r\n            return microRnas.Any(microRna => microRna.Overlaps(cdnaStart, cdnaEnd));\r\n        }\r\n\r\n        internal static bool IsAfterCoding(int variantRefBegin, int variantRefEnd, int transcriptEnd, int codingRegionEnd)\r\n        {\r\n            // special case to handle insertions after the CDS end\r\n            if (variantRefBegin == variantRefEnd + 1 && variantRefEnd == codingRegionEnd)\r\n            {\r\n                return true;\r\n            }\r\n\r\n            var result = Intervals.Utilities.Overlaps(variantRefBegin, variantRefEnd, codingRegionEnd + 1, transcriptEnd);\r\n\r\n            return result;\r\n        }\r\n\r\n        internal static bool IsBeforeCoding(int variantRefBegin, int variantRefEnd, int transcriptStart, int codingRegionStart)\r\n        {\r\n            // special case to handle insertions before the CDS start\r\n            if (variantRefBegin == variantRefEnd + 1 && variantRefBegin == codingRegionStart) return true;\r\n\r\n            bool result = Intervals.Utilities.Overlaps(variantRefBegin, variantRefEnd, transcriptStart, codingRegionStart - 1);\r\n            return result;\r\n        }\r\n\r\n        internal static bool IsWithinCdna(int coveredCdnaStart, int coveredCdnaEnd, int totalExonLen) =>\r\n            coveredCdnaStart > 0 && coveredCdnaEnd <= totalExonLen;\r\n\r\n        internal bool IsWithinCds(int coveredCdsBegin, int coveredCdsEnd, IInterval codingRegion, IInterval variant)\r\n        {\r\n            if (IsWithinFrameshiftIntron) return variant.Overlaps(codingRegion);\r\n            return coveredCdsBegin != -1 && coveredCdsEnd != -1;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/AnnotatedPositions/Transcript/TranscriptUtilities.cs",
    "content": "﻿using Genome;\r\nusing OptimizedCore;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\n\r\nnamespace VariantAnnotation.AnnotatedPositions.Transcript\r\n{\r\n    public static class TranscriptUtilities\r\n    {\r\n\t    /// <summary>\r\n\t    /// returns the alternate CDS given the reference sequence, the cds coordinates, and the alternate allele.\r\n\t    /// </summary>\r\n\t    public static string GetAlternateCds(ISequence refSequence, int cdsBegin, int cdsEnd, string alternateAllele,\r\n\t\t    ITranscriptRegion[] regions, bool onReverseStrand, byte startExonPhase, int cdnaCodingStart)\r\n\t    {\r\n\t\t    var splicedSeq     = GetSplicedSequence(refSequence, regions, onReverseStrand);\r\n\t\t    int numPaddedBases = startExonPhase;\r\n\r\n            int shift           = cdnaCodingStart - 1;\r\n            int upstreamLength  = GetUpstreamLength(shift, cdsBegin - numPaddedBases - 1, splicedSeq.Length);\r\n            int downstreamStart = cdsEnd - numPaddedBases + shift;\r\n\r\n            string upstreamSeq   = splicedSeq.Substring(shift, upstreamLength);\r\n\t        string downstreamSeq = downstreamStart < splicedSeq.Length ? splicedSeq.Substring(downstreamStart) : \"\";\r\n\r\n\t\t    if (alternateAllele == null) alternateAllele = string.Empty;\r\n\t\t    var paddedBases = numPaddedBases > 0 ? new string('N', numPaddedBases) : \"\";\r\n\r\n\t\t    return paddedBases + upstreamSeq + alternateAllele + downstreamSeq;\r\n\t    }\r\n\r\n        private static int GetUpstreamLength(int start, int length, int seqLength)\r\n        {\r\n            int desiredLength = start + length;\r\n            int maxLength     = seqLength - start;\r\n            return desiredLength <= seqLength ? length : maxLength;\r\n        }\r\n\r\n        /// <summary>\r\n\t    /// Retrieves all Exon sequences and concats them together. \r\n\t    /// This includes 5' UTR + cDNA + 3' UTR [Transcript.pm:862 spliced_seq]\r\n\t    /// </summary>\r\n\t    private static string GetSplicedSequence(ISequence refSequence, ITranscriptRegion[] regions, bool onReverseStrand)\r\n\t    {\r\n\t\t    var sb = StringBuilderPool.Get();\r\n\r\n\t\t    foreach (var region in regions)\r\n\t\t    {\r\n\t\t        if (region.Type != TranscriptRegionType.Exon) continue;\r\n\t\t\t    var exonLength = region.End - region.Start + 1;\r\n\r\n\t\t\t    // sanity check: handle the situation where no reference has been provided\r\n\t\t\t    if (refSequence == null)\r\n\t\t\t    {\r\n\t\t\t\t    sb.Append(new string('N', exonLength));\r\n\t\t\t\t    continue;\r\n\t\t\t    }\r\n\r\n\t\t\t    sb.Append(refSequence.Substring(region.Start - 1, exonLength));\r\n\t\t    }\r\n\r\n\t        var results    = StringBuilderPool.GetStringAndReturn(sb);\r\n\t        return onReverseStrand ? SequenceUtilities.GetReverseComplement(results) : results;\r\n\t    }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/AnnotatedPositions/Transcript/VariantEffect.cs",
    "content": "﻿using System;\r\nusing OptimizedCore;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing Variants;\r\n\r\nnamespace VariantAnnotation.AnnotatedPositions.Transcript\r\n{\r\n    /// <summary>\r\n    /// This class performs all of the functional consequence testing. An additional caching layer\r\n    /// has been added to prevent unneeded calculations. The caching layer is reset when each new\r\n    /// variant has been read.\r\n    /// </summary>\r\n    public sealed class VariantEffect : IVariantEffect\r\n    {\r\n        private readonly TranscriptPositionalEffect _preCache;\r\n\r\n        private readonly ITranscript _transcript;\r\n        private readonly ISimpleVariant _variant;\r\n\r\n        private readonly VariantEffectCache _cache;\r\n\r\n        private readonly string _referenceAminoAcids;\r\n        private readonly string _alternateAminoAcids;\r\n\r\n        private readonly int _referenceAminoAcidsLen;\r\n        private readonly int _alternateAminoAcidsLen;\r\n\r\n        private readonly string _coveredReferenceAminoAcids;\r\n        private readonly string _coveredAlternateAminoAcids;\r\n\r\n        private readonly string _referenceCodons;\r\n        private readonly string _alternateCodons;\r\n\r\n        private readonly int _referenceCodonsLen;\r\n        private readonly int _alternateCodonsLen;\r\n\r\n        private readonly bool _isInsertion;\r\n        private readonly bool _isDeletion;\r\n\r\n        private readonly int _proteinBegin;\r\n\r\n        public VariantEffect(TranscriptPositionalEffect transcriptEffect, ISimpleVariant variant, ITranscript transcript,\r\n            string referenAminoAcids, string alternateAminoAcids, string referenceCodons, string alternateCodons,\r\n            int? proteinBegin, string coveredReferenceAminoAcids, string coveredAlternateAminoAcids, VariantEffectCache cache = null)\r\n        {\r\n            _transcript = transcript;\r\n            _variant    = variant;\r\n\r\n            _preCache = transcriptEffect;\r\n\r\n            _cache = cache ?? new VariantEffectCache();\r\n\r\n            _referenceAminoAcids    = referenAminoAcids;\r\n            _alternateAminoAcids    = alternateAminoAcids;\r\n            _referenceAminoAcidsLen = _referenceAminoAcids?.Length ?? 0;\r\n            _alternateAminoAcidsLen = _alternateAminoAcids?.Length ?? 0;\r\n\r\n            _coveredReferenceAminoAcids = coveredReferenceAminoAcids;\r\n            _coveredAlternateAminoAcids = coveredAlternateAminoAcids;\r\n\r\n            _referenceCodons        = referenceCodons;\r\n            _alternateCodons        = alternateCodons;\r\n            _referenceCodonsLen     = _referenceCodons?.Length ?? 0;\r\n            _alternateCodonsLen     = _alternateCodons?.Length ?? 0;\r\n\r\n            _isInsertion = variant.AltAllele.Length > variant.RefAllele.Length;\r\n            _isDeletion  = variant.AltAllele.Length < variant.RefAllele.Length;\r\n\r\n            _proteinBegin = proteinBegin ?? -1;\r\n        }\r\n\r\n        /// <summary>\r\n        /// returns true if the variant is a splice acceptor variant [VariationEffect.pm:404 acceptor_splice_site]\r\n        /// </summary>\r\n        public bool IsSpliceAcceptorVariant()\r\n        {\r\n            const ConsequenceTag ct = ConsequenceTag.splice_acceptor_variant;\r\n            if (_cache.Contains(ct)) return _cache.Get(ct);\r\n\r\n            bool result = _transcript.Gene.OnReverseStrand ? _preCache.IsStartSpliceSite : _preCache.IsEndSpliceSite;\r\n\r\n            _cache.Add(ct, result);\r\n            return result;\r\n        }\r\n\r\n        /// <summary>\r\n        /// returns true if the variant is a splice donor variant [VariationEffect.pm:459 donor_splice_site]\r\n        /// </summary>\r\n        public bool IsSpliceDonorVariant()\r\n        {\r\n            const ConsequenceTag ct = ConsequenceTag.splice_donor_variant;\r\n            if (_cache.Contains(ct)) return _cache.Get(ct);\r\n\r\n            bool result = _transcript.Gene.OnReverseStrand ? _preCache.IsEndSpliceSite : _preCache.IsStartSpliceSite;\r\n\r\n            _cache.Add(ct, result);\r\n            return result;\r\n        }\r\n\r\n        /// <summary>\r\n        /// returns true if the variant is a 5' UTR variant (VariationEffect.pm:595 within_5_prime_utr)\r\n        /// </summary>\r\n        public bool IsFivePrimeUtrVariant()\r\n        {\r\n            const ConsequenceTag ct = ConsequenceTag.five_prime_UTR_variant;\r\n            if (_cache.Contains(ct)) return _cache.Get(ct);\r\n\r\n            bool result = false;\r\n\r\n            if (_transcript.Translation != null)\r\n            {\r\n\r\n                var isFivePrimeOfCoding = _transcript.Gene.OnReverseStrand\r\n                    ? _preCache.AfterCoding\r\n                    : _preCache.BeforeCoding;\r\n\r\n                result = isFivePrimeOfCoding && _preCache.WithinCdna;\r\n            }\r\n\r\n            _cache.Add(ct, result);\r\n            return result;\r\n        }\r\n\r\n        /// <summary>\r\n        /// returns true if the variant is a frameshift variant [VariantEffect.pm:940 frameshift]\r\n        /// </summary>\r\n        public bool IsFrameshiftVariant()\r\n        {\r\n            const ConsequenceTag ct = ConsequenceTag.frameshift_variant;\r\n            if (_cache.Contains(ct)) return _cache.Get(ct);\r\n\r\n            // check the predicates\r\n            if (!_preCache.IsCoding)\r\n            {\r\n                _cache.Add(ct, false);\r\n                return false;\r\n            }\r\n\r\n            if (IsIncompleteTerminalCodonVariant())\r\n            {\r\n                _cache.Add(ct, false);\r\n                return false;\r\n            }\r\n\r\n            bool result = _preCache.HasFrameShift && !IsStopRetained() && !IsTruncatedByStop();\r\n\r\n            _cache.Add(ct, result);\r\n            return result;\r\n        }\r\n\r\n        /// <summary>\r\n        /// returns true if we have an incomplete terminal codon variant. [VariantEffect.pm:983 partial_codon]\r\n        /// </summary>\r\n        public bool IsIncompleteTerminalCodonVariant()\r\n        {\r\n            const ConsequenceTag ct = ConsequenceTag.incomplete_terminal_codon_variant;\r\n            if (_cache.Contains(ct)) return _cache.Get(ct);\r\n\r\n            if (_transcript.Translation == null)\r\n            {\r\n                _cache.Add(ct, false);\r\n                return false;\r\n            }\r\n\r\n            int cdsLength       = _transcript.Translation.CodingRegion.Length;\r\n            int codonCdsStart   = _proteinBegin * 3 - 2;\r\n            int lastCodonLength = cdsLength - (codonCdsStart - 1);\r\n\r\n            bool result = lastCodonLength < 3 && lastCodonLength > 0;\r\n\r\n            _cache.Add(ct, result);\r\n            return result;\r\n        }\r\n\r\n        /// <summary>\r\n        /// returns true if the variant is an inframe deletion [VariantEffect.pm:825 inframe_deletion]\r\n        /// </summary>\r\n        public bool IsInframeDeletion()\r\n        {\r\n            const ConsequenceTag ct = ConsequenceTag.inframe_deletion;\r\n            if (_cache.Contains(ct)) return _cache.Get(ct);\r\n\r\n            // check the predicates\r\n            if (!_preCache.IsCoding || !_isDeletion)\r\n            {\r\n                _cache.Add(ct, false);\r\n                return false;\r\n            }\r\n\r\n            if (_referenceCodonsLen == 0 //|| (PreCache.ReferenceCodonLen < PreCache.AlternateCodonLen) \r\n                || IsFrameshiftVariant()\r\n                || IsIncompleteTerminalCodonVariant()\r\n                || IsStopGained())\r\n            {\r\n                _cache.Add(ct, false);\r\n                return false;\r\n            }\r\n\r\n            // simple string match\r\n            var referenceCodon = _referenceCodons.ToLower();\r\n            var alternateCodon = _alternateCodons.ToLower();\r\n\r\n            if (referenceCodon.StartsWith(alternateCodon) || referenceCodon.EndsWith(alternateCodon))\r\n            {\r\n                _cache.Add(ct, true);\r\n                return true;\r\n            }\r\n\r\n            // try a more complex string match\r\n            var commonPrefixLength = _referenceCodons.CommonPrefixLength(_alternateCodons);\r\n            var commonSuffixLength = _referenceCodons.CommonSuffixLength(_alternateCodons);\r\n\r\n            bool result = _alternateCodonsLen - commonPrefixLength - commonSuffixLength == 0;\r\n\r\n            _cache.Add(ct, result);\r\n            return result;\r\n        }\r\n\r\n        /// <summary>\r\n        /// returns true if the variant is an inframe insertion [VariantEffect.pm:780 inframe_insertion]\r\n        /// </summary>\r\n        public bool IsInframeInsertion()\r\n        {\r\n            const ConsequenceTag ct = ConsequenceTag.inframe_insertion;\r\n            if (_cache.Contains(ct)) return _cache.Get(ct);\r\n\r\n            // check the predicates\r\n            if (!_preCache.IsCoding || !_isInsertion)\r\n            {\r\n                _cache.Add(ct, false);\r\n                return false;\r\n            }\r\n\r\n            if (IsStopRetained() ||\r\n                IsFrameshiftVariant() ||\r\n                IsStartLost() ||\r\n                _alternateCodonsLen <= _referenceCodonsLen ||\r\n                IsIncompleteTerminalCodonVariant())\r\n            {\r\n                _cache.Add(ct, false);\r\n                return false;\r\n            }\r\n\r\n            bool result = !IsTruncatedByStop();\r\n\r\n            _cache.Add(ct, result);\r\n            return result;\r\n        }\r\n\r\n        private bool IsTruncatedByStop()\r\n        {\r\n            if (_alternateAminoAcids != null && _alternateAminoAcids.Contains(AminoAcids.StopCodon))\r\n            {\r\n                var stopPos = _alternateAminoAcids.IndexOf(AminoAcids.StopCodon, StringComparison.Ordinal);\r\n                var altAminoAcidesBeforeStop = _alternateAminoAcids.Substring(0, stopPos);\r\n                if (_alternateAminoAcids.OptimizedStartsWith(AminoAcids.StopCodonChar) ||\r\n                    _referenceAminoAcids.StartsWith(altAminoAcidesBeforeStop))\r\n                    return true;\r\n            }\r\n            return false;\r\n        }\r\n\r\n        /// <summary>\r\n        /// returns true if at least one base of the first codon was changed in the transcript [VariantEffect.pm:722 affects_start_codon]\r\n        /// </summary>\r\n        public bool IsStartLost()\r\n        {\r\n            const ConsequenceTag ct = ConsequenceTag.start_lost;\r\n            if (_cache.Contains(ct)) return _cache.Get(ct);\r\n\r\n            // check the predicates\r\n            if (!_preCache.IsCoding)\r\n            {\r\n                _cache.Add(ct, false);\r\n                return false;\r\n            }\r\n\r\n            if (_proteinBegin != 1 || _referenceAminoAcidsLen == 0)\r\n            {\r\n                _cache.Add(ct, false);\r\n                return false;\r\n            }\r\n\r\n            // insertion in start codon and do not change start codon\r\n            if (_isInsertion && _proteinBegin == 1 && _alternateAminoAcids.EndsWith(_referenceAminoAcids))\r\n            {\r\n                _cache.Add(ct, false);\r\n                return false;\r\n            }\r\n\r\n            bool result = _alternateAminoAcidsLen == 0 || _alternateAminoAcids[0] != _referenceAminoAcids[0];\r\n\r\n            _cache.Add(ct, result);\r\n            return result;\r\n        }\r\n\r\n        /// <summary>\r\n        /// returns true if the variant is a missense variant [VariantEffect.pm:682 missense_variant]\r\n        /// </summary>\r\n        public bool IsMissenseVariant()\r\n        {\r\n            const ConsequenceTag ct = ConsequenceTag.missense_variant;\r\n            if (_cache.Contains(ct)) return _cache.Get(ct);\r\n\r\n            // check the predicates\r\n            if (!_preCache.IsCoding)\r\n            {\r\n                _cache.Add(ct, false);\r\n                return false;\r\n            }\r\n\r\n            if (IsStartLost() ||\r\n                IsStopLost() ||\r\n                IsStopGained() ||\r\n                IsIncompleteTerminalCodonVariant() ||\r\n                IsFrameshiftVariant() ||\r\n                IsInframeDeletion() ||\r\n                IsInframeInsertion())\r\n            {\r\n                _cache.Add(ct, false);\r\n                return false;\r\n            }\r\n\r\n            bool result = _referenceAminoAcids != _alternateAminoAcids &&\r\n                _referenceAminoAcidsLen == _alternateAminoAcidsLen;\r\n\r\n            _cache.Add(ct, result);\r\n            return result;\r\n        }\r\n\r\n        /// <summary>\r\n        /// returns true if the variant is a non-coding transcript exon variant [VariationEffect.pm:405 non_coding_exon_variant]\r\n        /// </summary>\r\n        public bool IsNonCodingTranscriptExonVariant()\r\n        {\r\n            const ConsequenceTag ct = ConsequenceTag.non_coding_transcript_exon_variant;\r\n            if (_cache.Contains(ct)) return _cache.Get(ct);\r\n\r\n            bool result = _preCache.HasExonOverlap && _transcript.Translation == null && !_preCache.OverlapWithMicroRna;\r\n\r\n            _cache.Add(ct, result);\r\n            return result;\r\n        }\r\n\r\n        /// <summary>\r\n        /// returns true if the variant is a nonsense-mediated decay transcript variant [VariationEffect.pm:391 within_nmd_transcript]\r\n        /// </summary>\r\n        public bool IsNonsenseMediatedDecayTranscriptVariant()\r\n        {\r\n            const ConsequenceTag ct = ConsequenceTag.NMD_transcript_variant;\r\n            if (_cache.Contains(ct)) return _cache.Get(ct);\r\n            var result = _transcript.BioType == BioType.nonsense_mediated_decay;\r\n            _cache.Add(ct, result);\r\n            return result;\r\n        }\r\n        \r\n\r\n        /// <summary>\r\n        /// returns true if the variant is a protein altering variant [VariationEffect.pm:300 protein_altering_variant]\r\n        /// </summary>\r\n        public bool IsProteinAlteringVariant()\r\n        {\r\n            const ConsequenceTag ct = ConsequenceTag.protein_altering_variant;\r\n            if (_cache.Contains(ct)) return _cache.Get(ct);\r\n\r\n            var result = true;\r\n\r\n            var sameLen = _referenceAminoAcidsLen == _alternateAminoAcidsLen;\r\n            var startsWithTer = _referenceAminoAcids.OptimizedStartsWith('X') || _alternateAminoAcids.OptimizedStartsWith('X');\r\n\r\n            var isInframeDeletion = IsInframeDeletion();\r\n            // Note: sequence ontology says that stop retained should not be here (http://www.sequenceontology.org/browser/current_svn/term/SO:0001567)\r\n            var isStopCodonVarinat = IsStopLost() || IsStopGained();\r\n\r\n            if (sameLen || startsWithTer || isInframeDeletion || isStopCodonVarinat ||\r\n                IsStartLost() || IsFrameshiftVariant() || IsInframeInsertion() || IsStopRetained() || !_preCache.IsCoding)\r\n            {\r\n                result = false;\r\n            }\r\n\r\n            _cache.Add(ct, result);\r\n            return result;\r\n        }\r\n\r\n        /// <summary>\r\n        /// returns true if the variant is a splice region variant [VariationEffect.pm:483 splice_region]\r\n        /// </summary>\r\n        public bool IsSpliceRegionVariant()\r\n        {\r\n            const ConsequenceTag ct = ConsequenceTag.splice_region_variant;\r\n            if (_cache.Contains(ct)) return _cache.Get(ct);\r\n\r\n            bool result = false;\r\n\r\n            if (IsSpliceDonorVariant() || IsSpliceAcceptorVariant())\r\n            {\r\n                // false\r\n            }\r\n            else\r\n            {\r\n                result = _preCache.IsWithinSpliceSiteRegion;\r\n            }\r\n\r\n            _cache.Add(ct, result);\r\n            return result;\r\n        }\r\n\r\n        /// <summary>\r\n        /// returns true if the variant's amino acid changes to a stop codon [VariationEffect.pm:884 stop_gained]\r\n        /// </summary>\r\n        public bool IsStopGained()\r\n        {\r\n            const ConsequenceTag ct = ConsequenceTag.stop_gained;\r\n            if (_cache.Contains(ct)) return _cache.Get(ct);\r\n\r\n            bool result = !IsStopRetained() &&\r\n                     (string.IsNullOrEmpty(_referenceAminoAcids) || !_referenceAminoAcids.Contains(AminoAcids.StopCodon)) &&\r\n                          !string.IsNullOrEmpty(_alternateAminoAcids) && _alternateAminoAcids.Contains(AminoAcids.StopCodon);\r\n\r\n            _cache.Add(ct, result);\r\n            return result;\r\n        }\r\n\r\n        /// <summary>\r\n        /// returns true if the variant is a stop lost variant [VariationEffect.pm:898 stop_lost]\r\n        /// </summary>\r\n        public bool IsStopLost()\r\n        {\r\n            const ConsequenceTag ct = ConsequenceTag.stop_lost;\r\n            if (_cache.Contains(ct)) return _cache.Get(ct);\r\n\r\n            bool result = false;\r\n            if (!string.IsNullOrEmpty(_coveredReferenceAminoAcids) && _coveredAlternateAminoAcids != null)\r\n                result = _coveredReferenceAminoAcids.Contains(AminoAcids.StopCodon) &&\r\n                         !_coveredAlternateAminoAcids.Contains(AminoAcids.StopCodon);\r\n\r\n            _cache.Add(ct, result);\r\n            return result;\r\n        }\r\n\r\n        /// <summary>\r\n        /// returns true if the variant is a stop retained variant [VariationEffect.pm:701 stop_lost]\r\n        /// </summary>\r\n        public bool IsStopRetained()\r\n        {\r\n            const ConsequenceTag ct = ConsequenceTag.stop_retained_variant;\r\n            if (_cache.Contains(ct)) return _cache.Get(ct);\r\n\r\n            var alternateAminoAcids = TrimPeptides(_alternateAminoAcids);\r\n\r\n            bool result = !string.IsNullOrEmpty(_referenceAminoAcids) && alternateAminoAcids != null &&\r\n                     _referenceAminoAcids == alternateAminoAcids &&\r\n                     _referenceAminoAcids.Contains(AminoAcids.StopCodon) ||\r\n                     string.IsNullOrEmpty(_referenceAminoAcids) && alternateAminoAcids != null &&\r\n                     _proteinBegin == _transcript.Translation?.PeptideSeq.Length + 1 &&\r\n                     alternateAminoAcids == AminoAcids.StopCodon;\r\n\r\n            _cache.Add(ct, result);\r\n            return result;\r\n        }\r\n\r\n        public bool IsStartRetained()\r\n        {\r\n            const ConsequenceTag ct = ConsequenceTag.start_retained_variant;\r\n            if (_cache.Contains(ct)) return _cache.Get(ct);\r\n\r\n            if (_proteinBegin != 1 || string.IsNullOrEmpty(_referenceAminoAcids))\r\n            {\r\n                _cache.Add(ct, false);\r\n                return false;\r\n            }\r\n\r\n            var startProtein = _referenceAminoAcids[0].ToString();\r\n            var alternateAminoAcids = TrimPeptides(_alternateAminoAcids);\r\n\r\n            var result = alternateAminoAcids != null\r\n                          && alternateAminoAcids.Contains(startProtein);\r\n\r\n            _cache.Add(ct, result);\r\n            return result;\r\n        }\r\n\r\n        private static string TrimPeptides(string alternateAminoAcids)\r\n        {\r\n            if (string.IsNullOrEmpty(alternateAminoAcids)) return null;\r\n            if (!alternateAminoAcids.Contains(AminoAcids.StopCodon)) return alternateAminoAcids;\r\n            var pos = alternateAminoAcids.IndexOf(AminoAcids.StopCodon, StringComparison.Ordinal);\r\n            return pos < 0 ? alternateAminoAcids : alternateAminoAcids.Substring(0, pos + 1);\r\n        }\r\n\r\n        /// <summary>\r\n        /// returns true if the variant is a synonymous variant [VariationEffect.pm:755 synonymous_variant]\r\n        /// </summary>\r\n        public bool IsSynonymousVariant()\r\n        {\r\n            const ConsequenceTag ct = ConsequenceTag.synonymous_variant;\r\n            if (_cache.Contains(ct)) return _cache.Get(ct);\r\n\r\n            bool result = !string.IsNullOrEmpty(_referenceAminoAcids)  &&\r\n                     (_variant.Type == VariantType.SNV ||\r\n                      _variant.Type == VariantType.MNV) &&\r\n                     _referenceAminoAcids == _alternateAminoAcids && !_referenceAminoAcids.Contains(\"X\") &&\r\n                     !_alternateAminoAcids.Contains(\"X\") && !IsStopRetained();\r\n\r\n            _cache.Add(ct, result);\r\n            return result;\r\n        }\r\n\r\n        /// <summary>\r\n        /// returns true if the variant is a 3' UTR variant [VariationEffect.pm:609 within_3_prime_utr]\r\n        /// </summary>\r\n        public bool IsThreePrimeUtrVariant()\r\n        {\r\n            const ConsequenceTag ct = ConsequenceTag.three_prime_UTR_variant;\r\n            if (_cache.Contains(ct)) return _cache.Get(ct);\r\n\r\n            bool result = false;\r\n\r\n            if (_transcript.Translation != null)\r\n            {\r\n                var isThreePrimeOfCoding = _transcript.Gene.OnReverseStrand\r\n                    ? _preCache.BeforeCoding\r\n                    : _preCache.AfterCoding;\r\n\r\n                result = isThreePrimeOfCoding && _preCache.WithinCdna;\r\n            }\r\n\r\n            _cache.Add(ct, result);\r\n            return result;\r\n        }\r\n\r\n        /// <summary>\r\n        /// returns true if the variant is within a non-coding gene [VariationEffect.pm:398 within_non_coding_gene]\r\n        /// </summary>\r\n        public bool IsNonCodingTranscriptVariant()\r\n        {\r\n            const ConsequenceTag ct = ConsequenceTag.non_coding_transcript_variant;\r\n            if (_cache.Contains(ct)) return _cache.Get(ct);\r\n\r\n            // NOTE: Isn't IsWithinTranscript always true? and not within mature miRNA is always true\r\n            // For Ensembl transcript, miRNA may be a valid attribute. We have their location and we would like to check if the variant overlaps with the miRNA\r\n            var result = !_preCache.HasExonOverlap && _transcript.Translation == null && !_preCache.OverlapWithMicroRna;\r\n\r\n            _cache.Add(ct, result);\r\n            return result;\r\n        }\r\n\r\n        /// <summary>\r\n        /// returns true if it's a coding sequnce variant [VariationEffect.pm:998 coding_unknown]\r\n        /// </summary>\r\n        public bool IsCodingSequenceVariant()\r\n        {\r\n            const ConsequenceTag ct = ConsequenceTag.coding_sequence_variant;\r\n            if (_cache.Contains(ct)) return _cache.Get(ct);\r\n\r\n            bool result = _preCache.WithinCds &&\r\n                     (string.IsNullOrEmpty(_transcript.Translation.PeptideSeq) ||\r\n                      string.IsNullOrEmpty(_alternateAminoAcids) || _alternateAminoAcids.Contains(\"X\"))\r\n                     && !(IsFrameshiftVariant() || IsInframeDeletion() || IsIncompleteTerminalCodonVariant() ||\r\n                          IsProteinAlteringVariant() || IsStopGained() || IsStopRetained() || IsStopLost() || \r\n                          IsStartRetained());\r\n\r\n            _cache.Add(ct, result);\r\n            return result;\r\n        }\r\n\r\n        ///<summary>\r\n        /// returns true if the variant occurs within an intron [VariationEffect.pm:494 within_intron]\r\n        /// </summary>\r\n        public bool IsWithinIntron() => _preCache.IsWithinIntron;\r\n\r\n        /// <summary>\r\n        /// returns true if the variant overlaps a mature MiRNA. [VariationEffect.pm:432 within_mature_miRNA]\r\n        /// </summary>\r\n        public bool IsMatureMirnaVariant()\r\n        {\r\n            const ConsequenceTag ct = ConsequenceTag.mature_miRNA_variant;\r\n            if (_cache.Contains(ct)) return _cache.Get(ct);\r\n\r\n            bool result = _preCache.OverlapWithMicroRna;\r\n\r\n            _cache.Add(ct, result);\r\n            return result;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/AnnotatedPositions/Transcript/VariantEffectCache.cs",
    "content": "﻿using System;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\n\r\nnamespace VariantAnnotation.AnnotatedPositions.Transcript\r\n{\r\n    public sealed class VariantEffectCache\r\n    {\r\n        #region members\r\n\r\n        private readonly bool[] _isCached;\r\n        private readonly bool[] _cachedResults;\r\n\r\n        #endregion\r\n\r\n        // constructor\r\n        public VariantEffectCache()\r\n        {\r\n            var numConsequences = Enum.GetNames(typeof(ConsequenceTag)).Length;\r\n            _isCached = new bool[numConsequences];\r\n            _cachedResults = new bool[numConsequences];\r\n        }\r\n\r\n        /// <summary>\r\n        /// returns true if the corresponding value has been cached\r\n        /// </summary>\r\n        public void Add(ConsequenceTag consequence, bool result)\r\n        {\r\n            var index = (int)consequence;\r\n\r\n            _isCached[index] = true;\r\n            _cachedResults[index] = result;\r\n        }\r\n\r\n        /// <summary>\r\n        /// returns the cached value for the corresponding result\r\n        /// </summary>\r\n        public bool Get(ConsequenceTag consequence)\r\n        {\r\n            return _cachedResults[(int)consequence];\r\n        }\r\n\r\n        /// <summary>\r\n        /// returns true if the corresponding value has been cached\r\n        /// </summary>\r\n        public bool Contains(ConsequenceTag consequence)\r\n        {\r\n            return _isCached[(int)consequence];\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/Annotator.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.Linq;\r\nusing ErrorHandling.Exceptions;\r\nusing Genome;\r\nusing OptimizedCore;\r\nusing RepeatExpansions;\r\nusing VariantAnnotation.Interface;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.Interface.GeneAnnotation;\r\nusing VariantAnnotation.Interface.Positions;\r\nusing VariantAnnotation.Interface.Providers;\r\nusing VariantAnnotation.Pools;\r\nusing Variants;\r\n\r\nnamespace VariantAnnotation\r\n{\r\n    public sealed class Annotator : IAnnotator\r\n    {\r\n        private readonly IAnnotationProvider      _saProvider;\r\n        private readonly IAnnotationProvider      _gsaProvider;\r\n        private readonly IAnnotationProvider      _taProvider;\r\n        private readonly IAnnotationProvider      _lcrProvider;\r\n        private readonly ISequenceProvider        _sequenceProvider;\r\n        private readonly IAnnotationProvider      _conservationProvider;\r\n        private readonly IGeneAnnotationProvider  _geneAnnotationProvider;\r\n        private readonly IRepeatExpansionProvider _repeatExpansionProvider;\r\n        private readonly HashSet<string>          _affectedGenes;\r\n\r\n        private bool           _annotateMito;\r\n        public  GenomeAssembly Assembly { get; }\r\n\r\n        public Annotator(IAnnotationProvider taProvider,\r\n            ISequenceProvider sequenceProvider,\r\n            IAnnotationProvider saProvider,\r\n            IAnnotationProvider conservationProvider,\r\n            IAnnotationProvider lcrProvider,\r\n            IGeneAnnotationProvider geneAnnotationProvider,\r\n            IRepeatExpansionProvider repeatExpansionProvider,\r\n            IAnnotationProvider gsaProvider\r\n        )\r\n        {\r\n            _saProvider              = saProvider;\r\n            _gsaProvider             = gsaProvider;\r\n            _taProvider              = taProvider;\r\n            _sequenceProvider        = sequenceProvider;\r\n            _conservationProvider    = conservationProvider;\r\n            _lcrProvider             = lcrProvider;\r\n            _geneAnnotationProvider  = geneAnnotationProvider;\r\n            _repeatExpansionProvider = repeatExpansionProvider;\r\n            _affectedGenes           = new HashSet<string>();\r\n            Assembly                 = GetAssembly();\r\n        }\r\n\r\n        private GenomeAssembly GetAssembly()\r\n        {\r\n            var assemblies = new Dictionary<GenomeAssembly, List<string>>();\r\n            AddAssembly(assemblies, _taProvider);\r\n            AddAssembly(assemblies, _saProvider);\r\n            AddAssembly(assemblies, _gsaProvider);\r\n            AddAssembly(assemblies, _sequenceProvider);\r\n            AddAssembly(assemblies, _conservationProvider);\r\n\r\n            if (assemblies.Count == 0) return GenomeAssembly.Unknown;\r\n            if (assemblies.Count != 1) throw new UserErrorException(GetAssemblyErrorMessage(assemblies));\r\n\r\n            return assemblies.First().Key;\r\n        }\r\n\r\n        private static void AddAssembly(Dictionary<GenomeAssembly, List<string>> assemblies, IProvider provider)\r\n        {\r\n            if (provider == null) return;\r\n            if (assemblies.TryGetValue(provider.Assembly, out List<string> assemblyList)) assemblyList.Add(provider.Name);\r\n            else assemblies[provider.Assembly] = new List<string> {provider.Name};\r\n        }\r\n\r\n        private static string GetAssemblyErrorMessage(Dictionary<GenomeAssembly, List<string>> assemblies)\r\n        {\r\n            var sb = StringBuilderPool.Get();\r\n            sb.AppendLine(\"Not all of the data sources have the same genome assembly:\");\r\n            foreach ((GenomeAssembly genomeAssembly, List<string> dataSources) in assemblies)\r\n                sb.AppendLine($\"- Using {genomeAssembly}: {string.Join(\", \", dataSources)}\");\r\n            return StringBuilderPool.GetStringAndReturn(sb);\r\n        }\r\n\r\n        public IAnnotatedPosition Annotate(IPosition position)\r\n        {\r\n            if (position == null) return null;\r\n            IAnnotatedVariant[] annotatedVariants = GetAnnotatedVariants(position.Variants);\r\n            //var annotatedPosition = new AnnotatedPosition(position, annotatedVariants);\r\n            var annotatedPosition = AnnotatedPositionPool.Get(position, annotatedVariants);\r\n\r\n            if (annotatedPosition.AnnotatedVariants           == null\r\n                || annotatedPosition.AnnotatedVariants.Length == 0\r\n                || position.Chromosome.UcscName == \"chrM\" && !_annotateMito\r\n               ) return annotatedPosition;\r\n\r\n            _sequenceProvider?.Annotate(annotatedPosition);\r\n            _lcrProvider?.Annotate(annotatedPosition);\r\n            _repeatExpansionProvider?.Annotate(annotatedPosition);\r\n            _conservationProvider?.Annotate(annotatedPosition);\r\n            _taProvider.Annotate(annotatedPosition);\r\n            _saProvider?.Annotate(annotatedPosition); // needs to come after _taProvider for gene fusions\r\n            _gsaProvider?.Annotate(annotatedPosition);\r\n\r\n            TrackAffectedGenes(annotatedPosition);\r\n            return annotatedPosition;\r\n        }\r\n\r\n        private void TrackAffectedGenes(IAnnotatedPosition annotatedPosition)\r\n        {\r\n            if (_geneAnnotationProvider == null) return;\r\n\r\n            foreach (var variant in annotatedPosition.AnnotatedVariants)\r\n            {\r\n                AddGenesFromTranscripts(variant.Transcripts);\r\n            }\r\n        }\r\n\r\n        private void AddGenesFromTranscripts(IList<IAnnotatedTranscript> transcripts)\r\n        {\r\n            foreach (var transcript in transcripts)\r\n            {\r\n                if (IsFlankingTranscript(transcript)) continue;\r\n                _affectedGenes.Add(transcript.Transcript.Gene.Symbol);\r\n            }\r\n        }\r\n\r\n        private static bool IsFlankingTranscript(IAnnotatedTranscript transcript)\r\n        {\r\n            if (transcript.Consequences == null) return false;\r\n\r\n            // ReSharper disable once LoopCanBeConvertedToQuery\r\n            foreach (var consequence in transcript.Consequences)\r\n            {\r\n                if (consequence == ConsequenceTag.downstream_gene_variant ||\r\n                    consequence == ConsequenceTag.upstream_gene_variant) return true;\r\n            }\r\n\r\n            return false;\r\n        }\r\n\r\n        internal static IAnnotatedVariant[] GetAnnotatedVariants(IVariant[] variants)\r\n        {\r\n            if (variants?[0].Behavior == null) return null;\r\n            int numVariants                                            = variants.Length;\r\n            var annotatedVariants                                      = new IAnnotatedVariant[numVariants];\r\n            for (var i = 0; i < numVariants; i++) annotatedVariants[i] = AnnotatedVariantPool.Get(variants[i]);\r\n            return annotatedVariants;\r\n        }\r\n\r\n        public IEnumerable<string> GetGeneAnnotations()\r\n        {\r\n            var geneAnnotations = new List<string>();\r\n\r\n            foreach (string gene in _affectedGenes.OrderBy(x => x))\r\n            {\r\n                string annotation = _geneAnnotationProvider.Annotate(gene);\r\n                if (string.IsNullOrEmpty(annotation)) continue;\r\n                geneAnnotations.Add(annotation);\r\n            }\r\n\r\n            return geneAnnotations.Count > 0 ? geneAnnotations : null;\r\n        }\r\n\r\n        public void EnableMitochondrialAnnotation() => _annotateMito = true;\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/AssemblyInfo.cs",
    "content": "﻿using System.Runtime.CompilerServices;\n\n[assembly: InternalsVisibleTo(\"UnitTests\")]"
  },
  {
    "path": "VariantAnnotation/Caches/DataStructures/CodingRegion.cs",
    "content": "﻿using IO;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\n\r\nnamespace VariantAnnotation.Caches.DataStructures\r\n{\r\n    public sealed class CodingRegion : ICodingRegion\r\n    {\r\n        public int Start { get; }\r\n        public int End { get; }\r\n        public int CdnaStart { get; }\r\n        public int CdnaEnd { get; }\r\n        public int Length { get; }\r\n\r\n        public CodingRegion(int start, int end, int cdnaStart, int cdnaEnd, int length)\r\n        {\r\n            Start     = start;\r\n            End       = end;\r\n            CdnaStart = cdnaStart;\r\n            CdnaEnd   = cdnaEnd;\r\n            Length    = length;\r\n        }\r\n\r\n        public static ICodingRegion Read(BufferedBinaryReader reader)\r\n        {\r\n            int genomicStart = reader.ReadOptInt32();\r\n            int genomicEnd   = reader.ReadOptInt32();\r\n            int cdnaStart    = reader.ReadOptInt32();\r\n            int cdnaEnd      = reader.ReadOptInt32();\r\n            int length       = reader.ReadOptInt32();\r\n\r\n            return new CodingRegion(genomicStart, genomicEnd, cdnaStart, cdnaEnd, length);\r\n        }\r\n\r\n        public void Write(IExtendedBinaryWriter writer)\r\n        {\r\n            writer.WriteOpt(Start);\r\n            writer.WriteOpt(End);\r\n            writer.WriteOpt(CdnaStart);\r\n            writer.WriteOpt(CdnaEnd);\r\n            writer.WriteOpt(Length);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "VariantAnnotation/Caches/DataStructures/EncodedTranscriptData.cs",
    "content": "﻿using IO;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\n\r\nnamespace VariantAnnotation.Caches.DataStructures\r\n{\r\n    public sealed class EncodedTranscriptData\r\n    {\r\n        private readonly ushort _info;\r\n        private readonly byte _contents;\r\n\r\n        // contents\r\n        // +====+====+====+====+====+====+====+====+\r\n        // |Tran|TReg|////|Mirn|Poly|Sift|StrExonPh|\r\n        // +====+====+====+====+====+====+====+====+\r\n        private const int StartExonMask         = 3;\r\n        private const int SiftMask              = 4;\r\n        private const int PolyPhenMask          = 8;\r\n        private const int MirnasMask            = 16;\r\n        private const int TranscriptRegionsMask = 64;\r\n        private const int TranslationMask       = 128;\r\n\r\n        public byte StartExonPhase       => (byte)(_contents & StartExonMask);\r\n        public bool HasSift              => (_contents & SiftMask)              != 0;\r\n        public bool HasPolyPhen          => (_contents & PolyPhenMask)          != 0;\r\n        public bool HasMirnas            => (_contents & MirnasMask)            != 0;\r\n        public bool HasRnaEdits          => (_info & RnaEditsMask)              != 0;\r\n        public bool HasSelenocysteines   => (_info & SelenocysteinesMask)       != 0;\r\n        public bool HasTranscriptRegions => (_contents & TranscriptRegionsMask) != 0;\r\n        public bool HasTranslation       => (_contents & TranslationMask)       != 0;\r\n\r\n        // info\r\n        // +====+====+====+====+====+====+====+====+====+====+====+====+====+====+====+====+\r\n        // |Cano|  Source |\\\\\\\\|Sele|RnaE|CSNF|CENF|                BioType                |\r\n        // +====+====+====+====+====+====+====+====+====+====+====+====+====+====+====+====+\r\n        private const int BioTypeMask           = 0xff;\r\n        private const int CdsStartNotFoundMask  = 0x100;\r\n        private const int CdsEndNotFoundMask    = 0x200;\r\n        private const int TranscriptSourceMask  = 0x3;\r\n        private const int CanonicalMask         = 0x8000;\r\n        private const int TranscriptSourceShift = 13;\r\n        private const int RnaEditsMask          = 1024;\r\n        private const int SelenocysteinesMask   = 2048;\r\n\r\n        public BioType BioType         => (BioType)(_info & BioTypeMask);\r\n        public bool CdsStartNotFound   => (_info & CdsStartNotFoundMask) != 0;\r\n        public bool CdsEndNotFound     => (_info & CdsEndNotFoundMask) != 0;\r\n        public Source TranscriptSource => (Source)((_info >> TranscriptSourceShift) & TranscriptSourceMask);\r\n        public bool IsCanonical        => (_info & CanonicalMask) != 0;\r\n\r\n        private EncodedTranscriptData(ushort info, byte contents)\r\n        {\r\n            _info     = info;\r\n            _contents = contents;\r\n        }\r\n\r\n        public static EncodedTranscriptData GetEncodedTranscriptData(BioType bioType, bool cdsStartNotFound,\r\n            bool cdsEndNotFound, Source source, bool isCanonical, bool hasSift, bool hasPolyPhen, bool hasMicroRnas,\r\n            bool hasRnaEdits, bool hasSelenocysteines, bool hasTranscriptRegions, bool hasTranslation,\r\n            byte startExonPhase)\r\n        {\r\n            ushort info = (ushort)bioType;\r\n            if (cdsStartNotFound)   info |= CdsStartNotFoundMask;\r\n            if (cdsEndNotFound)     info |= CdsEndNotFoundMask;\r\n            if (isCanonical)        info |= CanonicalMask;\r\n            if (hasRnaEdits)        info |= RnaEditsMask;\r\n            if (hasSelenocysteines) info |= SelenocysteinesMask;\r\n            info |= (ushort)((ushort)source << TranscriptSourceShift);\r\n\r\n            byte contents = startExonPhase;\r\n            if (hasSift)              contents |= SiftMask;\r\n            if (hasPolyPhen)          contents |= PolyPhenMask;\r\n            if (hasMicroRnas)         contents |= MirnasMask;\r\n            if (hasTranscriptRegions) contents |= TranscriptRegionsMask;\r\n            if (hasTranslation)       contents |= TranslationMask;\r\n\r\n            return new EncodedTranscriptData(info, contents);\r\n        }\r\n\r\n        public static EncodedTranscriptData Read(BufferedBinaryReader reader)\r\n        {\r\n            var info     = reader.ReadUInt16();\r\n            var contents = reader.ReadByte();\r\n            return new EncodedTranscriptData(info, contents);\r\n        }\r\n\r\n        internal void Write(IExtendedBinaryWriter writer)\r\n        {\r\n            writer.Write(_info);\r\n            writer.Write(_contents);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/Caches/DataStructures/Gene.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing Genome;\r\nusing IO;\r\nusing VariantAnnotation.AnnotatedPositions.Transcript;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\n\r\nnamespace VariantAnnotation.Caches.DataStructures\r\n{\r\n    public sealed class Gene : IGene\r\n    {\r\n        public int Start { get; }\r\n        public int End { get; }\r\n        public Chromosome Chromosome { get; }\r\n        public bool OnReverseStrand { get; }\r\n        public string Symbol { get; }\r\n        public ICompactId EntrezGeneId { get; }\r\n        public ICompactId EnsemblId { get; }\r\n        public int HgncId { get; }\r\n\r\n        public Gene(Chromosome chromosome, int start, int end, bool onReverseStrand, string symbol, int hgncId,\r\n            CompactId entrezGeneId, CompactId ensemblId)\r\n        {\r\n            OnReverseStrand = onReverseStrand;\r\n            Symbol          = symbol;\r\n            HgncId          = hgncId;\r\n            EntrezGeneId    = entrezGeneId;\r\n            EnsemblId       = ensemblId;\r\n            Start           = start;\r\n            End             = end;\r\n\t        Chromosome\t\t= chromosome;\r\n        }\r\n\r\n        public static IGene Read(IBufferedBinaryReader reader, Dictionary<ushort, Chromosome> indexToChromosome)\r\n        {\r\n            ushort referenceIndex = reader.ReadOptUInt16();\r\n            int start             = reader.ReadOptInt32();\r\n            int end               = reader.ReadOptInt32();\r\n            bool onReverseStrand  = reader.ReadBoolean();\r\n            string symbol         = reader.ReadAsciiString();\r\n            int hgncId            = reader.ReadOptInt32();\r\n            var entrezId          = CompactId.Read(reader);\r\n            var ensemblId         = CompactId.Read(reader);\r\n\r\n            return new Gene(indexToChromosome[referenceIndex], start, end, onReverseStrand, symbol, hgncId, entrezId, ensemblId);\r\n        }\r\n\r\n        public void Write(IExtendedBinaryWriter writer)\r\n        {\r\n            writer.WriteOpt(Chromosome.Index);\r\n            writer.WriteOpt(Start);\r\n            writer.WriteOpt(End);\r\n            writer.Write(OnReverseStrand);\r\n            writer.WriteOptAscii(Symbol);\r\n            writer.WriteOpt(HgncId);\r\n            // ReSharper disable ImpureMethodCallOnReadonlyValueField\r\n            EntrezGeneId.Write(writer);\r\n            EnsemblId.Write(writer);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "VariantAnnotation/Caches/DataStructures/IndexEntry.cs",
    "content": "﻿using System.IO;\r\n\r\nnamespace VariantAnnotation.Caches.DataStructures\r\n{\r\n    public struct IndexEntry\r\n    {\r\n        public long FileOffset;\r\n        public int Count;\r\n\r\n        public void Read(BinaryReader reader)\r\n        {\r\n            FileOffset = reader.ReadInt64();\r\n            Count      = reader.ReadInt32();\r\n        }\r\n\r\n        public void Write(BinaryWriter writer)\r\n        {\r\n            writer.Write(FileOffset);\r\n            writer.Write(Count);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/Caches/DataStructures/Prediction.cs",
    "content": "﻿using System.IO;\r\nusing IO;\r\nusing VariantAnnotation.AnnotatedPositions.Transcript;\r\n\r\nnamespace VariantAnnotation.Caches.DataStructures\r\n{\r\n    public sealed class Prediction\r\n    {\r\n        private readonly byte[] _data;\r\n        private readonly Entry[] _lut;\r\n\r\n        //                                                 A   X  C  D  E  F  G  H  I   X  K  L   M   N   X   P   Q   R   S   T   X   V   W   X   Y   X\r\n        private static readonly int[] AminoAcidIndices = { 0, -1, 1, 2, 3, 4, 5, 6, 7, -1, 8, 9, 10, 11, -1, 12, 13, 14, 15, 16, -1, 17, 18, -1, 19, -1 };\r\n\r\n        private const int NumAminoAcids = 20;\r\n        private const byte NullEntry    = 0xff;\r\n\r\n        public Prediction(byte[] data, Entry[] lut)\r\n        {\r\n            _data = data;\r\n            _lut  = lut;\r\n        }\r\n\r\n        public Entry GetPrediction(char newAminoAcid, int aaPosition)\r\n        {\r\n            // sanity check: skip stop codons\r\n            if (newAminoAcid == AminoAcids.StopCodonChar || newAminoAcid == 'X') return null;\r\n\r\n            int index = GetIndex(newAminoAcid, aaPosition);\r\n\r\n            // sanity check: skip instances where the data isn't long enough\r\n            if (index >= _data.Length) return null;\r\n\r\n            byte entry = _data[index];\r\n            return entry == NullEntry ? null : _lut[entry];\r\n        }\r\n\r\n        private static int GetIndex(char newAminoAcid, int aaPosition)\r\n        {\r\n            int asciiIndex = char.ToUpper(newAminoAcid) - 'A';\r\n\r\n            // sanity check: make sure the array index is within range\r\n            if (asciiIndex < 0 || asciiIndex >= 26)\r\n            {\r\n                throw new InvalidDataException($\"Expected an array index on the interval [0, 25], but observed the following: {asciiIndex} ({newAminoAcid})\");\r\n            }\r\n\r\n            int aaIndex = AminoAcidIndices[asciiIndex];\r\n\r\n            // sanity check: make sure the array index is within range\r\n            if (aaIndex == -1)\r\n            {\r\n                throw new InvalidDataException($\"An invalid amino acid was given: {newAminoAcid}\");\r\n            }\r\n\r\n            return NumAminoAcids * (aaPosition - 1) + aaIndex;\r\n        }\r\n\r\n        public void Write(BinaryWriter writer)\r\n        {\r\n            writer.Write(_data.Length);\r\n            writer.Write(_data);\r\n        }\r\n\r\n        public static Prediction Read(ExtendedBinaryReader reader, Entry[] lut)\r\n        {\r\n            int numBytes = reader.ReadInt32();\r\n            var data     = reader.ReadBytes(numBytes);\r\n            return new Prediction(data, lut);\r\n        }\r\n\r\n        public sealed class Entry\r\n        {\r\n            public readonly double Score;\r\n            public readonly byte EnumIndex;\r\n\r\n            public Entry(double score, byte enumIndex)\r\n            {\r\n                Score     = score;\r\n                EnumIndex = enumIndex;\r\n            }\r\n\r\n            public static Entry ReadEntry(ExtendedBinaryReader reader)\r\n            {\r\n                double score   = reader.ReadDouble();\r\n                byte enumIndex = reader.ReadByte();\r\n                return new Entry(score, enumIndex);\r\n            }\r\n\r\n            public void Write(BinaryWriter writer)\r\n            {\r\n                writer.Write(Score);\r\n                writer.Write(EnumIndex);\r\n            }\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/Caches/DataStructures/RegulatoryRegion.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing Genome;\r\nusing IO;\r\nusing VariantAnnotation.AnnotatedPositions.Transcript;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.Interface.Caches;\r\n\r\nnamespace VariantAnnotation.Caches.DataStructures\r\n{\r\n    public sealed class RegulatoryRegion : IRegulatoryRegion\r\n    {\r\n        public int Start { get; }\r\n        public int End { get; }\r\n        public Chromosome Chromosome { get; }\r\n        public ICompactId Id { get; }\r\n        public RegulatoryRegionType Type { get; }\r\n\r\n        public RegulatoryRegion(Chromosome chromosome, int start, int end, CompactId id, RegulatoryRegionType type)\r\n        {\r\n            Id         = id;\r\n            Type       = type;\r\n            Start      = start;\r\n            End        = end;\r\n            Chromosome = chromosome;\r\n        }\r\n\r\n        public static IRegulatoryRegion Read(IBufferedBinaryReader reader, Dictionary<ushort, Chromosome> chromosomeIndexDictionary)\r\n        {\r\n            var refIndex = reader.ReadOptUInt16();\r\n            int start    = reader.ReadOptInt32();\r\n            int end      = reader.ReadOptInt32();\r\n            var type     = (RegulatoryRegionType)reader.ReadByte();\r\n            var id       = CompactId.Read(reader);\r\n\r\n            return new RegulatoryRegion(chromosomeIndexDictionary[refIndex], start, end, id, type);\r\n        }\r\n\r\n        public void Write(IExtendedBinaryWriter writer)\r\n        {\r\n            writer.WriteOpt(Chromosome.Index);\r\n            writer.WriteOpt(Start);\r\n            writer.WriteOpt(End);\r\n            writer.Write((byte)Type);\r\n            // ReSharper disable once ImpureMethodCallOnReadonlyValueField\r\n            Id.Write(writer);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "VariantAnnotation/Caches/DataStructures/RnaEdit.cs",
    "content": "﻿using IO;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing Variants;\r\n\r\nnamespace VariantAnnotation.Caches.DataStructures\r\n{\r\n    public sealed class RnaEdit : IRnaEdit\r\n    {\r\n        public int Start { get; }\r\n        public int End { get; }\r\n        public string Bases { get; }\r\n        public VariantType Type { get; set; }\r\n\r\n        public RnaEdit(int start, int end, string bases)\r\n        {\r\n            Start = start;\r\n            End   = end;\r\n            Bases = bases;\r\n            Type  = VariantType.unknown;\r\n        }\r\n\r\n        public static IRnaEdit Read(BufferedBinaryReader reader)\r\n        {\r\n            int start    = reader.ReadOptInt32();\r\n            int end      = reader.ReadOptInt32();\r\n            string bases = reader.ReadAsciiString();\r\n            return new RnaEdit(start, end, bases);\r\n        }\r\n\r\n        public void Write(IExtendedBinaryWriter writer)\r\n        {\r\n            writer.WriteOpt(Start);\r\n            writer.WriteOpt(End);\r\n            writer.WriteOptAscii(Bases);\r\n        }\r\n\r\n        public int CompareTo(IRnaEdit other)\r\n        {\r\n            return Start.CompareTo(other.Start);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "VariantAnnotation/Caches/DataStructures/Transcript.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing Genome;\r\nusing Intervals;\r\nusing IO;\r\nusing VariantAnnotation.AnnotatedPositions.Transcript;\r\nusing VariantAnnotation.Caches.Utilities;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\n\r\nnamespace VariantAnnotation.Caches.DataStructures\r\n{\r\n    public sealed class Transcript : ITranscript\r\n    {\r\n        public Chromosome         Chromosome        { get; }\r\n        public int                 Start             { get; }\r\n        public int                 End               { get; }\r\n        public ICompactId          Id                { get; }\r\n        public BioType             BioType           { get; }\r\n        public bool                IsCanonical       { get; }\r\n        public Source              Source            { get; }\r\n        public IGene               Gene              { get; }\r\n        public ITranscriptRegion[] TranscriptRegions { get; }\r\n        public ushort              NumExons          { get; }\r\n        public int                 TotalExonLength   { get; }\r\n        public byte                StartExonPhase    { get; }\r\n        public int                 SiftIndex         { get; }\r\n        public int                 PolyPhenIndex     { get; }\r\n        public ITranslation        Translation       { get; }\r\n        public IInterval[]         MicroRnas         { get; }\r\n        public int[]               Selenocysteines   { get; }\r\n        public IRnaEdit[]          RnaEdits          { get; }\r\n        public bool                CdsStartNotFound  { get; }\r\n        public bool                CdsEndNotFound    { get; }\r\n        public ISequence           CodingSequence    { get; set; }\r\n        public ISequence           CdnaSequence    { get; set; }\r\n\r\n        public Transcript(Chromosome chromosome, int start, int end, ICompactId id, ITranslation translation,\r\n            BioType bioType, IGene gene, int totalExonLength, byte startExonPhase, bool isCanonical,\r\n            ITranscriptRegion[] transcriptRegions, ushort numExons, IInterval[] microRnas, int siftIndex,\r\n            int polyPhenIndex, Source source, bool cdsStartNotFound, bool cdsEndNotFound, int[] selenocysteines,\r\n            IRnaEdit[] rnaEdits)\r\n        {\r\n            Chromosome        = chromosome;\r\n            Start             = start;\r\n            End               = end;\r\n            Id                = id;\r\n            Translation       = translation;\r\n            BioType           = bioType;\r\n            Gene              = gene;\r\n            TotalExonLength   = totalExonLength;\r\n            StartExonPhase    = startExonPhase;\r\n            IsCanonical       = isCanonical;\r\n            TranscriptRegions = transcriptRegions;\r\n            NumExons          = numExons;\r\n            MicroRnas         = microRnas;\r\n            SiftIndex         = siftIndex;\r\n            PolyPhenIndex     = polyPhenIndex;\r\n            Source            = source;\r\n            CdsStartNotFound  = cdsStartNotFound;\r\n            CdsEndNotFound    = cdsEndNotFound;\r\n            Selenocysteines   = selenocysteines;\r\n            RnaEdits          = rnaEdits;\r\n        }\r\n\r\n        public static ITranscript Read(BufferedBinaryReader reader,\r\n            Dictionary<ushort, Chromosome> chromosomeIndexDictionary, IGene[] cacheGenes,\r\n            ITranscriptRegion[] cacheTranscriptRegions, IInterval[] cacheMirnas, string[] cachePeptideSeqs)\r\n        {\r\n            // transcript\r\n            ushort referenceIndex = reader.ReadOptUInt16();\r\n            int start             = reader.ReadOptInt32();\r\n            int end               = reader.ReadOptInt32();\r\n            var id                = CompactId.Read(reader);\r\n\r\n            // gene\r\n            int geneIndex = reader.ReadOptInt32();\r\n            var gene      = cacheGenes[geneIndex];\r\n\r\n            // encoded data\r\n            var encoded = EncodedTranscriptData.Read(reader);\r\n\r\n            // transcript regions\r\n            ITranscriptRegion[] transcriptRegions = encoded.HasTranscriptRegions ? ReadIndices(reader, cacheTranscriptRegions) : null;\r\n            ushort numExons       = reader.ReadOptUInt16();\r\n\r\n            // protein function predictions\r\n            int siftIndex     = encoded.HasSift     ? reader.ReadOptInt32() : -1;\r\n            int polyphenIndex = encoded.HasPolyPhen ? reader.ReadOptInt32() : -1;\r\n\r\n            // translation\r\n            var translation = encoded.HasTranslation ? DataStructures.Translation.Read(reader, cachePeptideSeqs) : null;\r\n            \r\n            // attributes\r\n            IInterval[] mirnas    = encoded.HasMirnas          ? ReadIndices(reader, cacheMirnas)         : null;\r\n            IRnaEdit[] rnaEdits   = encoded.HasRnaEdits        ? ReadItems(reader, RnaEdit.Read)          : null;\r\n            int[] selenocysteines = encoded.HasSelenocysteines ? ReadItems(reader, x => x.ReadOptInt32()) : null;\r\n\r\n            return new Transcript(chromosomeIndexDictionary[referenceIndex], start, end, id, translation,\r\n                encoded.BioType, gene, ExonUtilities.GetTotalExonLength(transcriptRegions), encoded.StartExonPhase,\r\n                encoded.IsCanonical, transcriptRegions, numExons, mirnas, siftIndex, polyphenIndex,\r\n                encoded.TranscriptSource, encoded.CdsStartNotFound, encoded.CdsEndNotFound, selenocysteines, rnaEdits);\r\n        }\r\n\r\n        /// <summary>\r\n        /// writes the transcript to the binary writer\r\n        /// </summary>\r\n        public void Write(IExtendedBinaryWriter writer, Dictionary<IGene, int> geneIndices,\r\n            Dictionary<ITranscriptRegion, int> transcriptRegionIndices, Dictionary<IInterval, int> microRnaIndices,\r\n            Dictionary<string, int> peptideIndices)\r\n        {\r\n            // transcript\r\n            writer.WriteOpt(Chromosome.Index);\r\n            writer.WriteOpt(Start);\r\n            writer.WriteOpt(End);\r\n            // ReSharper disable once ImpureMethodCallOnReadonlyValueField\r\n            Id.Write(writer);\r\n\r\n            // gene\r\n            writer.WriteOpt(GetIndex(Gene, geneIndices));\r\n\r\n            // encoded data\r\n            var encoded = EncodedTranscriptData.GetEncodedTranscriptData(BioType, CdsStartNotFound, CdsEndNotFound,\r\n                Source, IsCanonical, SiftIndex != -1, PolyPhenIndex != -1, MicroRnas != null, RnaEdits != null,\r\n                Selenocysteines != null, TranscriptRegions != null, Translation != null, StartExonPhase);\r\n            encoded.Write(writer);\r\n\r\n            // transcript regions\r\n            if (encoded.HasTranscriptRegions) WriteIndices(writer, TranscriptRegions, transcriptRegionIndices);\r\n            writer.WriteOpt(NumExons);\r\n\r\n            // protein function predictions\r\n            if (encoded.HasSift) writer.WriteOpt(SiftIndex);\r\n            if (encoded.HasPolyPhen) writer.WriteOpt(PolyPhenIndex);\r\n\r\n            // translation\r\n            if (encoded.HasTranslation)\r\n            {\r\n                // ReSharper disable once PossibleNullReferenceException\r\n                int peptideIndex = GetIndex(Translation.PeptideSeq, peptideIndices);\r\n                Translation.Write(writer, peptideIndex);\r\n            }\r\n\r\n            // attributes\r\n            if (encoded.HasMirnas)          WriteIndices(writer, MicroRnas, microRnaIndices);\r\n            if (encoded.HasRnaEdits)        WriteItems(writer, RnaEdits, (x, y) => x.Write(y));\r\n            if (encoded.HasSelenocysteines) WriteItems(writer, Selenocysteines, (x, y) => y.WriteOpt(x));\r\n        }\r\n\r\n        private static T[] ReadItems<T>(BufferedBinaryReader reader, Func<BufferedBinaryReader, T> readFunc)\r\n        {\r\n            int numItems = reader.ReadOptInt32();\r\n            var items    = new T[numItems];\r\n            for (var i = 0; i < numItems; i++) items[i] = readFunc(reader);\r\n            return items;\r\n        }\r\n\r\n        private static void WriteItems<T>(IExtendedBinaryWriter writer, T[] items, Action<T, IExtendedBinaryWriter> writeAction)\r\n        {\r\n            writer.WriteOpt(items.Length);\r\n            foreach (var item in items) writeAction(item, writer);\r\n        }\r\n\r\n        private static T[] ReadIndices<T>(IBufferedBinaryReader reader, T[] cachedItems)\r\n        {\r\n            int numItems = reader.ReadOptInt32();\r\n            var items = new T[numItems];\r\n\r\n            for (var i = 0; i < numItems; i++)\r\n            {\r\n                int index = reader.ReadOptInt32();\r\n                items[i] = cachedItems[index];\r\n            }\r\n\r\n            return items;\r\n        }\r\n\r\n        private static void WriteIndices<T>(IExtendedBinaryWriter writer, T[] items, IReadOnlyDictionary<T, int> indices)\r\n        {\r\n            writer.WriteOpt(items.Length);\r\n            foreach (var item in items) writer.WriteOpt(GetIndex(item, indices));\r\n        }\r\n\r\n        private static int GetIndex<T>(T item, IReadOnlyDictionary<T, int> indices)\r\n        {\r\n            if (item == null) return -1;\r\n\r\n            if (!indices.TryGetValue(item, out int index))\r\n            {\r\n                throw new InvalidDataException($\"Unable to locate the {typeof(T)} in the indices: {item}\");\r\n            }\r\n\r\n            return index;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/Caches/DataStructures/TranscriptRegion.cs",
    "content": "﻿using IO;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\n\r\nnamespace VariantAnnotation.Caches.DataStructures\r\n{\r\n    public sealed class TranscriptRegion : ITranscriptRegion\r\n    {\r\n        public TranscriptRegionType Type { get; }\r\n        public ushort Id { get; }\r\n        public int Start { get; }\r\n        public int End { get; }\r\n        public int CdnaStart { get; }\r\n        public int CdnaEnd { get; }\r\n\r\n        public TranscriptRegion(TranscriptRegionType type, ushort id, int start, int end, int cdnaStart, int cdnaEnd)\r\n        {\r\n            Type      = type;\r\n            Id        = id;\r\n            Start     = start;\r\n            End       = end;\r\n            CdnaStart = cdnaStart;\r\n            CdnaEnd   = cdnaEnd;\r\n        }\r\n\r\n        public static ITranscriptRegion Read(BufferedBinaryReader reader)\r\n        {\r\n            TranscriptRegionType type = (TranscriptRegionType)reader.ReadByte();\r\n            ushort id                 = reader.ReadOptUInt16();\r\n            int genomicStart          = reader.ReadOptInt32();\r\n            int genomicEnd            = reader.ReadOptInt32();\r\n\r\n            int cdnaStart = reader.ReadOptInt32();\r\n            int cdnaEnd   = reader.ReadOptInt32();\r\n\r\n            return new TranscriptRegion(type, id, genomicStart, genomicEnd, cdnaStart, cdnaEnd);\r\n        }\r\n\r\n        public void Write(IExtendedBinaryWriter writer)\r\n        {\r\n            writer.Write((byte)Type);\r\n            writer.WriteOpt(Id);\r\n            writer.WriteOpt(Start);\r\n            writer.WriteOpt(End);\r\n            writer.WriteOpt(CdnaStart);\r\n            writer.WriteOpt(CdnaEnd);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "VariantAnnotation/Caches/DataStructures/TranscriptRegionExtensions.cs",
    "content": "﻿using System;\r\nusing VariantAnnotation.Algorithms;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\n\r\nnamespace VariantAnnotation.Caches.DataStructures\r\n{\r\n    public static class TranscriptRegionExtensions\r\n    {\r\n        public static int BinarySearch(this ITranscriptRegion[] regions, int position)\r\n        {\r\n            var begin = 0;\r\n            int end   = regions.Length - 1;\r\n\r\n            while (begin <= end)\r\n            {\r\n                int index  = begin + (end - begin >> 1);\r\n                var region = regions[index];\r\n\r\n                if (position >= region.Start && position <= region.End) return index;\r\n                if (region.End < position) begin = index + 1;\r\n                else if (position < region.Start) end = index - 1;\r\n            }\r\n\r\n            return ~begin;\r\n        }\r\n\r\n        public static (int ExonStart, int ExonEnd, int IntronStart, int IntronEnd) GetExonsAndIntrons(\r\n            this ITranscriptRegion[] regions, int startIndex, int endIndex)\r\n        {\r\n            int affectedStartIndex = GetAffectedRegionIndex(startIndex);\r\n            int affectedEndIndex   = GetAffectedRegionIndex(endIndex);\r\n\r\n            var exons   = regions.FindDesiredRegionIds(x => x == TranscriptRegionType.Exon || x == TranscriptRegionType.Gap, affectedStartIndex, affectedEndIndex);\r\n            var introns = regions.FindDesiredRegionIds(x => x == TranscriptRegionType.Intron, affectedStartIndex, affectedEndIndex);\r\n\r\n            return (exons.Start, exons.End, introns.Start, introns.End);\r\n        }\r\n\r\n        private static (int Start, int End) FindDesiredRegionIds(this ITranscriptRegion[] regions,\r\n            Func<TranscriptRegionType, bool> hasDesiredRegion, int startIndex, int endIndex)\r\n        {\r\n            int regionStart   = FindFirst(regions, hasDesiredRegion, startIndex, endIndex);\r\n            int newStartIndex = regionStart != -1 ? regionStart : startIndex;\r\n            int regionEnd     = FindLast(regions, hasDesiredRegion, newStartIndex, endIndex);\r\n\r\n            int startId = regionStart == -1 ? -1 : regions[regionStart].Id;\r\n            int endId   = regionEnd   == -1 ? -1 : regions[regionEnd].Id;\r\n\r\n            if (endId < startId) Swap.Int(ref startId, ref endId);\r\n            return (startId, endId);\r\n        }\r\n\r\n        private static int FindFirst(ITranscriptRegion[] regions, Func<TranscriptRegionType, bool> hasDesiredRegion, int startIndex,\r\n            int endIndex)\r\n        {\r\n            for (int i = startIndex; i <= endIndex; i++) if (hasDesiredRegion(regions[i].Type)) return i;\r\n            return -1;\r\n        }\r\n\r\n        private static int FindLast(ITranscriptRegion[] regions, Func<TranscriptRegionType, bool> hasDesiredRegion, int startIndex,\r\n            int endIndex)\r\n        {\r\n            for (int i = endIndex; i >= startIndex; i--) if (hasDesiredRegion(regions[i].Type)) return i;\r\n            return -1;\r\n        }\r\n\r\n        private static int GetAffectedRegionIndex(int index)\r\n        {\r\n            if (index >= 0) return index;\r\n            index = ~index;\r\n            return index == 0 ? 0 : index - 1;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "VariantAnnotation/Caches/DataStructures/Translation.cs",
    "content": "﻿using IO;\r\nusing VariantAnnotation.AnnotatedPositions.Transcript;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\n\r\nnamespace VariantAnnotation.Caches.DataStructures\r\n{\r\n    public sealed class Translation : ITranslation\r\n    {\r\n        public ICodingRegion CodingRegion { get; }\r\n        public ICompactId ProteinId { get; }\r\n        public string PeptideSeq { get; }\r\n\r\n        public Translation(ICodingRegion codingRegion, CompactId proteinId, string peptideSeq)\r\n        {\r\n            CodingRegion = codingRegion;\r\n            ProteinId    = proteinId;\r\n            PeptideSeq   = peptideSeq;\r\n        }\r\n\r\n        public static ITranslation Read(BufferedBinaryReader reader, string[] peptideSeqs)\r\n        {\r\n            var codingRegion = DataStructures.CodingRegion.Read(reader);\r\n            var proteinId    = CompactId.Read(reader);\r\n            var peptideIndex = reader.ReadOptInt32();\r\n            var peptideSeq   = peptideIndex == -1 ? null : peptideSeqs[peptideIndex];\r\n\r\n            return new Translation(codingRegion, proteinId, peptideSeq);\r\n        }\r\n\r\n        public void Write(IExtendedBinaryWriter writer, int peptideIndex)\r\n        {\r\n            CodingRegion.Write(writer);\r\n            ProteinId.Write(writer);\r\n            writer.WriteOpt(peptideIndex);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/Caches/PredictionCache.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing Genome;\r\nusing VariantAnnotation.Caches.DataStructures;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.Interface.Caches;\r\nusing VariantAnnotation.Interface.Providers;\r\n\r\nnamespace VariantAnnotation.Caches\r\n{\r\n    public sealed class PredictionCache : IPredictionCache\r\n    {\r\n        private readonly Prediction[]                    _predictions;\r\n\t    public           string                          Name               { get; } = string.Empty;\r\n\t    public           GenomeAssembly                  Assembly           { get; }\r\n        public           IEnumerable<IDataSourceVersion> DataSourceVersions { get; } = new List<IDataSourceVersion>();\r\n        private readonly string[]                        _descriptions;\r\n\r\n        public PredictionCache(GenomeAssembly genomeAssembly, Prediction[] predictions, string[] descriptions)\r\n        {\r\n            Assembly      = genomeAssembly;\r\n            _predictions  = predictions;\r\n            _descriptions = descriptions;\r\n        }\r\n\r\n        public PredictionScore GetProteinFunctionPrediction(int predictionIndex, char newAminoAcid,\r\n            int aaPosition)\r\n        {\r\n            var entry = _predictions[predictionIndex].GetPrediction(newAminoAcid, aaPosition);\r\n\r\n            return entry == null\r\n                ? null\r\n                : new PredictionScore(_descriptions[entry.EnumIndex], entry.Score);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/Caches/TranscriptCache.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing Genome;\r\nusing Intervals;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.Interface.Caches;\r\nusing VariantAnnotation.Interface.Providers;\r\n\r\nnamespace VariantAnnotation.Caches\r\n{\r\n    public sealed class TranscriptCache : ITranscriptCache\r\n    {\r\n        public IIntervalForest<ITranscript> TranscriptIntervalForest { get; }\r\n        public IIntervalForest<IRegulatoryRegion> RegulatoryIntervalForest { get; }\r\n\t    public string Name { get; }\r\n\t    public GenomeAssembly Assembly { get; }\r\n        public IEnumerable<IDataSourceVersion> DataSourceVersions { get; }\r\n\r\n        public TranscriptCache(IEnumerable<IDataSourceVersion> dataSourceVersions, GenomeAssembly genomeAssembly,\r\n            IntervalArray<ITranscript>[] transcriptIntervalArrays,\r\n            IntervalArray<IRegulatoryRegion>[] regulatoryRegionIntervalArrays)\r\n        {\r\n            Name                     = \"Transcript annotation provider\";\r\n            DataSourceVersions       = dataSourceVersions;\r\n            Assembly                 = genomeAssembly;\r\n            TranscriptIntervalForest = new IntervalForest<ITranscript>(transcriptIntervalArrays);\r\n            RegulatoryIntervalForest = new IntervalForest<IRegulatoryRegion>(regulatoryRegionIntervalArrays);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/Caches/TranscriptCacheData.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing Intervals;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.Interface.Providers;\r\nusing VariantAnnotation.IO.Caches;\r\nusing VariantAnnotation.Providers;\r\n\r\nnamespace VariantAnnotation.Caches\r\n{\r\n    public sealed class TranscriptCacheData\r\n    {\r\n        public readonly CacheHeader Header;\r\n        \r\n        public readonly IGene[] Genes;\r\n        public readonly ITranscriptRegion[] TranscriptRegions;\r\n        public readonly IInterval[] Mirnas;\r\n        public readonly string[] PeptideSeqs;\r\n        public readonly IntervalArray<ITranscript>[] TranscriptIntervalArrays;\r\n        public readonly IntervalArray<IRegulatoryRegion>[] RegulatoryRegionIntervalArrays;\r\n\r\n        public TranscriptCacheData(CacheHeader header, IGene[] genes, ITranscriptRegion[] transcriptRegions,\r\n            IInterval[] mirnas, string[] peptideSeqs, IntervalArray<ITranscript>[] transcriptIntervalArrays,\r\n            IntervalArray<IRegulatoryRegion>[] regulatoryRegionIntervalArrays)\r\n        {\r\n            Header                         = header;\r\n            Genes                          = genes;\r\n            TranscriptRegions              = transcriptRegions;\r\n            Mirnas                         = mirnas;\r\n            PeptideSeqs                    = peptideSeqs;\r\n            TranscriptIntervalArrays       = transcriptIntervalArrays;\r\n            RegulatoryRegionIntervalArrays = regulatoryRegionIntervalArrays;\r\n        }\r\n\r\n        public TranscriptCache GetCache()\r\n        {\r\n            var dataSourceVersions = GetDataSourceVersions(Header);\r\n            return new TranscriptCache(dataSourceVersions, Header.Assembly, TranscriptIntervalArrays, RegulatoryRegionIntervalArrays);\r\n        }\r\n\r\n        private static IEnumerable<IDataSourceVersion> GetDataSourceVersions(CacheHeader header)\r\n        {\r\n            var dataSourceVersions = new List<IDataSourceVersion>();\r\n            if (header == null) return dataSourceVersions;\r\n\r\n            ushort vepVersion = header.Custom.VepVersion;\r\n\r\n            // TODO: Embed the data source version in the next cache file format. This hack let's us handle the SARS-CoV-2 genome\r\n            DataSourceVersion dataSourceVersion = vepVersion == 0\r\n                ? new DataSourceVersion(\"RefSeq\", \"NC_045512.2\", new DateTime(2020,3,20,0,0,0,DateTimeKind.Utc).Ticks, \"Severe acute respiratory syndrome coronavirus 2 (SARS-CoV2)\")\r\n                : new DataSourceVersion(\"VEP\", vepVersion.ToString(), header.CreationTimeTicks, header.Source.ToString());\r\n            \r\n            dataSourceVersions.Add(dataSourceVersion);\r\n            return dataSourceVersions;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "VariantAnnotation/Caches/TranscriptIntervalForestExtensions.cs",
    "content": "﻿using Genome;\r\nusing Intervals;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\n\r\nnamespace VariantAnnotation.Caches\r\n{\r\n    public static class TranscriptIntervalForestExtensions\r\n    {\r\n        public static ITranscript[] GetAllFlankingValues(this IIntervalForest<ITranscript> transcriptIntervalForest,\r\n            IChromosomeInterval interval) => transcriptIntervalForest.GetAllOverlappingValues(interval.Chromosome.Index,\r\n            interval.Start - interval.Chromosome.FlankingLength, interval.End + interval.Chromosome.FlankingLength);\r\n    }\r\n}\r\n"
  },
  {
    "path": "VariantAnnotation/Caches/Utilities/ExonUtilities.cs",
    "content": "﻿using VariantAnnotation.Interface.AnnotatedPositions;\r\n\r\nnamespace VariantAnnotation.Caches.Utilities\r\n{\r\n    public static class ExonUtilities\r\n    {\r\n        public static int GetTotalExonLength(ITranscriptRegion[] regions)\r\n        {\r\n            int totalExonLength = 0;\r\n\r\n            foreach (var region in regions)\r\n            {\r\n                if (region.Type != TranscriptRegionType.Exon) continue;\r\n                totalExonLength += region.End - region.Start + 1;\r\n            }\r\n\r\n            return totalExonLength;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/Caches/Utilities/GeneForestGenerator.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.Linq;\r\nusing Intervals;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.Utilities;\r\n\r\nnamespace VariantAnnotation.Caches.Utilities\r\n{\r\n    public static class GeneForestGenerator\r\n    {\r\n        private static readonly IntervalArray<IGene> EmptyIntervalArray = new IntervalArray<IGene>(new Interval<IGene>[0]);\r\n\r\n        public static IntervalForest<IGene> GetGeneForest(IntervalArray<ITranscript>[] transcriptIntervalArrays)\r\n        {\r\n            int numChromosomes     = transcriptIntervalArrays.Length;\r\n            var geneIntervalArrays = new IntervalArray<IGene>[numChromosomes];\r\n            var geneComparer       = new GeneComparer();\r\n\r\n            for (var chrIndex = 0; chrIndex < numChromosomes; chrIndex++)\r\n            {\r\n                if (transcriptIntervalArrays[chrIndex] == null)\r\n                {\r\n                    geneIntervalArrays[chrIndex] = EmptyIntervalArray;\r\n                    continue; // assign an empty IntervalArray to this chr\r\n                }\r\n                var geneList = new List<IGene>(); // keeps the order of genes, as the intervals are already sorted at trasncripts level\r\n                var geneSet = new HashSet<IGene>(geneComparer);\r\n                foreach (var transcriptInterval in transcriptIntervalArrays[chrIndex].Array)\r\n                {\r\n                    var transcript = transcriptInterval.Value;\r\n\r\n                    var gene = transcript.Gene;\r\n                    if (geneSet.Contains(gene)) continue;\r\n\r\n                    geneSet.Add(gene);\r\n                    geneList.Add(gene);\r\n                }\r\n                geneIntervalArrays[chrIndex] = new IntervalArray<IGene>(geneList.Select(GetGeneInterval).ToArray());\r\n            }\r\n            return new IntervalForest<IGene>(geneIntervalArrays);\r\n        }\r\n\r\n        private static Interval<IGene> GetGeneInterval(IGene gene) => new Interval<IGene>(gene.Start, gene.End, gene);\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/Caches/Utilities/RnaEditUtilities.cs",
    "content": "﻿using System;\nusing VariantAnnotation.Interface.AnnotatedPositions;\nusing Variants;\n\nnamespace VariantAnnotation.Caches.Utilities\n{\n    public static class RnaEditUtilities\n    {\n        public static VariantType GetRnaEditType(IRnaEdit rnaEdit)\n        {\n            if (string.IsNullOrEmpty(rnaEdit.Bases)) return VariantType.deletion;\n\n            if (rnaEdit.Start == rnaEdit.End && rnaEdit.Bases.Length == 1) return VariantType.SNV;\n\n            if (rnaEdit.Start == rnaEdit.End + 1 && !string.IsNullOrEmpty(rnaEdit.Bases)) return VariantType.insertion;\n\n            if (Math.Abs(rnaEdit.End - rnaEdit.Start) + 1 == rnaEdit.Bases.Length) return VariantType.MNV;\n\n            return VariantType.unknown;\n        }\n\n        public static void SetTypesAndSort(IRnaEdit[] rnaEdits)\n        {\n            foreach (var rnaEdit in rnaEdits)\n            {\n                if (rnaEdit.Type != VariantType.unknown) return;\n                rnaEdit.Type = GetRnaEditType(rnaEdit);\n            }\n\n            Array.Sort(rnaEdits);\n        }\n    }\n}"
  },
  {
    "path": "VariantAnnotation/GeneAnnotation/GeneAnnotationProvider.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Linq;\r\nusing Genome;\r\nusing OptimizedCore;\r\nusing VariantAnnotation.Interface.GeneAnnotation;\r\nusing VariantAnnotation.Interface.Providers;\r\nusing VariantAnnotation.IO;\r\nusing VariantAnnotation.NSA;\r\n\r\nnamespace VariantAnnotation.GeneAnnotation\r\n{\r\n    public sealed class GeneAnnotationProvider : IGeneAnnotationProvider\r\n    {\r\n\t    public string Name { get; }\r\n        public GenomeAssembly Assembly => GenomeAssembly.Unknown;\r\n        public IEnumerable<IDataSourceVersion> DataSourceVersions => _ngaReaders.Select(x => x.Version);\r\n\r\n        private readonly List<NgaReader> _ngaReaders;\r\n\r\n        public string Annotate(string geneName)\r\n        {\r\n            var sb = StringBuilderPool.Get();\r\n            var jsonObject = new JsonObject(sb);\r\n\r\n            sb.Append(JsonObject.OpenBrace);\r\n            jsonObject.AddStringValue(\"name\", geneName);\r\n\r\n            var hasAnnotation = false;\r\n            foreach (var ngaReader in _ngaReaders)\r\n            {\r\n                string jsonString = ngaReader.GetAnnotation(geneName);\r\n                jsonObject.AddStringValue(ngaReader.JsonKey, jsonString, false);\r\n                if (!string.IsNullOrEmpty(jsonString)) hasAnnotation = true;\r\n            }\r\n\r\n            if (!hasAnnotation)\r\n            {\r\n                StringBuilderPool.GetStringAndReturn(sb);\r\n                return null;\r\n            }\r\n\r\n            sb.Append(JsonObject.CloseBrace);\r\n\r\n            return StringBuilderPool.GetStringAndReturn(sb);\r\n        }\r\n\r\n        public GeneAnnotationProvider(IEnumerable<Stream> dbStreams)\r\n        {\r\n            Name        = \"Gene annotation provider\";\r\n            _ngaReaders = new List<NgaReader>();\r\n\r\n            foreach (var dbStream in dbStreams) _ngaReaders.Add(NgaReader.Read(dbStream));\r\n        }\r\n\r\n        public void Dispose() {}\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/GeneFusions/Calling/BreakEndAdjacency.cs",
    "content": "﻿namespace VariantAnnotation.GeneFusions.Calling\n{\n    public sealed record BreakEndAdjacency(BreakPoint Origin, BreakPoint Partner);\n}\n"
  },
  {
    "path": "VariantAnnotation/GeneFusions/Calling/BreakEndAdjacencyFactory.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing System.Text.RegularExpressions;\nusing Genome;\nusing Variants;\n\nnamespace VariantAnnotation.GeneFusions.Calling\n{\n    public static class BreakEndAdjacencyFactory\n    {\n        private const           string ReverseBracket = \"]\";\n        private static readonly Regex  ForwardRegex   = new(@\"\\w+([\\[\\]])(.+):(\\d+)([\\[\\]])\", RegexOptions.Compiled);\n        private static readonly Regex  ReverseRegex   = new(@\"([\\[\\]])(.+):(\\d+)([\\[\\]])\\w+\", RegexOptions.Compiled);\n\n        public static BreakEndAdjacency[] CreateAdjacencies(ISimpleVariant variant, Dictionary<string, Chromosome> refNameToChromosome, bool isInv3,\n            bool isInv5) => variant.Type == VariantType.translocation_breakend\n            ? CreateFromTranslocation(variant, refNameToChromosome)\n            : CreateFromSymbolicAllele(variant, variant.Type, isInv3, isInv5);\n        \n        public static BreakEndAdjacency[] CreateFromTranslocation(ISimpleVariant variant,\n            Dictionary<string, Chromosome> refNameToChromosome) => variant.AltAllele.StartsWith(variant.RefAllele)\n            ? ConvertTranslocation(variant, ForwardRegex, false, 4, refNameToChromosome)\n            : ConvertTranslocation(variant, ReverseRegex, true,  1, refNameToChromosome);\n\n        private static BreakEndAdjacency[] ConvertTranslocation(ISimpleVariant variant, Regex regex,\n            bool onReverseStrand, int partnerBracketIndex, Dictionary<string, Chromosome> refNameToChromosome)\n        {\n            Match match = regex.Match(variant.AltAllele);\n            if (!match.Success)\n                throw new InvalidDataException(\n                    $\"Unable to successfully parse the complex rearrangements for the following allele: {variant.AltAllele}\");\n\n            bool        partnerOnReverseStrand = match.Groups[partnerBracketIndex].Value == ReverseBracket;\n            var         partnerPosition        = Convert.ToInt32(match.Groups[3].Value);\n            string      partnerReferenceName   = match.Groups[2].Value;\n            Chromosome partnerChromosome      = ReferenceNameUtilities.GetChromosome(refNameToChromosome, partnerReferenceName);\n\n            var origin  = new BreakPoint(variant.Chromosome, variant.Start,   onReverseStrand);\n            var partner = new BreakPoint(partnerChromosome,  partnerPosition, partnerOnReverseStrand);\n\n            return new[] {new BreakEndAdjacency(origin, partner)};\n        }\n\n        public static BreakEndAdjacency[] CreateFromSymbolicAllele(IChromosomeInterval interval, VariantType variantType, bool isInv3, bool isInv5)\n        {\n            // ReSharper disable once SwitchStatementMissingSomeCases\n            return variantType switch\n            {\n                VariantType.deletion           => CreateFromDeletion(interval),\n                VariantType.tandem_duplication => CreateFromDuplication(interval),\n                VariantType.inversion          => CreateFromInversion(interval, isInv3, isInv5),\n                _                              => null\n            };\n        }\n\n        // ReSharper disable once UseDeconstructionOnParameter\n        private static BreakEndAdjacency Flip(this BreakEndAdjacency adjacency)\n        {\n            var origin  = new BreakPoint(adjacency.Partner.Chromosome, adjacency.Partner.Position, !adjacency.Partner.OnReverseStrand);\n            var partner = new BreakPoint(adjacency.Origin.Chromosome,  adjacency.Origin.Position,  !adjacency.Origin.OnReverseStrand);\n            return new BreakEndAdjacency(origin, partner);\n        }\n\n        private static BreakEndAdjacency[] CreateFromDeletion(IChromosomeInterval interval)\n        {\n            // 1 10 . N N[1:21[\n            var origin    = new BreakPoint(interval.Chromosome, interval.Start - 1, false);\n            var remote    = new BreakPoint(interval.Chromosome, interval.End   + 1, false);\n            var adjacency = new BreakEndAdjacency(origin, remote);\n\n            return new[] {adjacency, adjacency.Flip()};\n        }\n\n        private static BreakEndAdjacency[] CreateFromDuplication(IChromosomeInterval interval)\n        {\n            // 1 1 . N ]1:10]N\n            var origin    = new BreakPoint(interval.Chromosome, interval.End,       false);\n            var remote    = new BreakPoint(interval.Chromosome, interval.Start - 1, false);\n            var adjacency = new BreakEndAdjacency(origin, remote);\n\n            return new[] {adjacency, adjacency.Flip()};\n        }\n\n        private static BreakEndAdjacency[] CreateFromInversion(IChromosomeInterval interval, bool isInv3, bool isInv5)\n        {\n            // 1 10 . N N]1:20]\n            // 1 11 . N [1:21[N\n            BreakPoint origin, origin2, remote, remote2;\n\n            // ReSharper disable once ConvertIfStatementToSwitchStatement\n            if (!isInv3 && !isInv5)\n            {\n                origin = new BreakPoint(interval.Chromosome, interval.Start - 1, false);\n                remote = new BreakPoint(interval.Chromosome, interval.End,       true);\n\n                origin2 = new BreakPoint(interval.Chromosome, interval.End + 1, true);\n                remote2 = new BreakPoint(interval.Chromosome, interval.Start,   false);\n            }\n            else if (isInv3)\n            {\n                origin = new BreakPoint(interval.Chromosome, interval.Start - 1, false);\n                remote = new BreakPoint(interval.Chromosome, interval.End,       true);\n\n                origin2 = new BreakPoint(interval.Chromosome, interval.End,       false);\n                remote2 = new BreakPoint(interval.Chromosome, interval.Start - 1, true);\n            }\n            else // isInv5\n            {\n                origin = new BreakPoint(interval.Chromosome, interval.Start,   true);\n                remote = new BreakPoint(interval.Chromosome, interval.End + 1, false);\n\n                origin2 = new BreakPoint(interval.Chromosome, interval.End + 1, true);\n                remote2 = new BreakPoint(interval.Chromosome, interval.Start,   false);\n            }\n\n            return new[] {new BreakEndAdjacency(origin, remote), new BreakEndAdjacency(origin2, remote2)};\n        }\n    }\n}"
  },
  {
    "path": "VariantAnnotation/GeneFusions/Calling/BreakPoint.cs",
    "content": "﻿using Genome;\n\nnamespace VariantAnnotation.GeneFusions.Calling\n{\n    public sealed record BreakPoint(Chromosome Chromosome, int Position, bool OnReverseStrand);\n}"
  },
  {
    "path": "VariantAnnotation/GeneFusions/Calling/BreakPointTranscript.cs",
    "content": "﻿using VariantAnnotation.Interface.AnnotatedPositions;\n\nnamespace VariantAnnotation.GeneFusions.Calling\n{\n    public sealed record BreakPointTranscript(ITranscript Transcript, int GenomicPosition, int RegionIndex);\n}"
  },
  {
    "path": "VariantAnnotation/GeneFusions/Calling/GeneFusionCaller.cs",
    "content": "﻿using System.Collections.Generic;\nusing Genome;\nusing Intervals;\nusing VariantAnnotation.AnnotatedPositions.Transcript;\nusing VariantAnnotation.GeneFusions.HGVS;\nusing VariantAnnotation.GeneFusions.Utilities;\nusing VariantAnnotation.Interface.AnnotatedPositions;\nusing Variants;\n\nnamespace VariantAnnotation.GeneFusions.Calling\n{\n    public sealed class GeneFusionCaller\n    {\n        private readonly Dictionary<string, Chromosome> _refNameToChromosome;\n        private readonly IIntervalForest<ITranscript>     _transcriptIntervalForest;\n\n        public GeneFusionCaller(Dictionary<string, Chromosome> refNameToChromosome, IIntervalForest<ITranscript> transcriptIntervalForest)\n        {\n            _refNameToChromosome      = refNameToChromosome;\n            _transcriptIntervalForest = transcriptIntervalForest;\n        }\n\n        // ReSharper disable once ParameterTypeCanBeEnumerable.Global\n        public void AddGeneFusions(IAnnotatedVariant[] annotatedVariants, bool isImprecise, bool isInv3, bool isInv5)\n        {\n            var transcriptIdToGeneFusions = new Dictionary<string, IAnnotatedGeneFusion[]>();\n\n            foreach (IAnnotatedVariant annotatedVariant in annotatedVariants)\n            {\n                IVariant variant = annotatedVariant.Variant;\n                if (!variant.IsStructuralVariant) continue;\n\n                BreakEndAdjacency[] adjacencies = BreakEndAdjacencyFactory.CreateAdjacencies(variant, _refNameToChromosome, isInv3, isInv5);\n                if (adjacencies == null) continue;\n\n                transcriptIdToGeneFusions.Clear();\n\n                foreach (BreakEndAdjacency adjacency in adjacencies)\n                {\n                    ITranscript[] originTranscripts  = GetOverlappingTranscripts(adjacency.Origin);\n                    ITranscript[] partnerTranscripts = GetOverlappingTranscripts(adjacency.Partner);\n                    if (originTranscripts == null || partnerTranscripts == null) continue;\n                    AddGeneFusionsToDictionary(transcriptIdToGeneFusions, adjacency, originTranscripts, partnerTranscripts, isImprecise);\n                }\n\n                foreach (IAnnotatedTranscript transcript in annotatedVariant.Transcripts)\n                {\n                    string transcriptId = transcript.Transcript.Id.WithVersion;\n                    if (!transcriptIdToGeneFusions.TryGetValue(transcriptId, out IAnnotatedGeneFusion[] annotatedGeneFusions)) continue;\n                    transcript.AddGeneFusions(annotatedGeneFusions);\n                }\n            }\n        }\n\n        private ITranscript[] GetOverlappingTranscripts(BreakPoint bp) =>\n            bp == null ? null : _transcriptIntervalForest.GetAllOverlappingValues(bp.Chromosome.Index, bp.Position, bp.Position);\n\n        internal static void AddGeneFusionsToDictionary(Dictionary<string, IAnnotatedGeneFusion[]> transcriptIdToGeneFusions,\n            // ReSharper disable once ParameterTypeCanBeEnumerable.Global\n            BreakEndAdjacency adjacency, ITranscript[] originTranscripts, ITranscript[] partnerTranscripts, bool isImprecise)\n        {\n            var geneKeys    = new HashSet<ulong>();\n            var geneFusions = new List<IAnnotatedGeneFusion>();\n\n            foreach (ITranscript originTranscript in originTranscripts)\n            {\n                geneFusions.Clear();\n                (int originIndex, ITranscriptRegion _) =\n                    MappedPositionUtilities.FindRegion(originTranscript.TranscriptRegions, adjacency.Origin.Position);\n\n                foreach (ITranscript partnerTranscript in partnerTranscripts)\n                {\n                    EvaluateGeneFusionCandidate(geneFusions, geneKeys, adjacency, originTranscript, originIndex, partnerTranscript, isImprecise);\n                }\n\n                if (geneFusions.Count == 0) continue;\n                transcriptIdToGeneFusions[originTranscript.Id.WithVersion] = geneFusions.ToArray();\n            }\n        }\n\n        // ReSharper disable once UseDeconstructionOnParameter\n        private static void EvaluateGeneFusionCandidate(List<IAnnotatedGeneFusion> geneFusions, HashSet<ulong> geneKeys, BreakEndAdjacency adjacency,\n            ITranscript originTranscript, int originIndex, ITranscript partnerTranscript, bool isImprecise)\n        {\n            IGene originGene  = originTranscript.Gene;\n            IGene partnerGene = partnerTranscript.Gene;\n\n            if (!FoundViableGeneFusion(adjacency, originGene, originTranscript, originTranscript.Source, partnerGene, partnerTranscript,\n                partnerTranscript.Source)) return;\n\n            (int partnerIndex, ITranscriptRegion partnerRegion) =\n                MappedPositionUtilities.FindRegion(partnerTranscript.TranscriptRegions, adjacency.Partner.Position);\n\n            int? partnerExon   = partnerRegion.Type == TranscriptRegionType.Exon ? partnerRegion.Id : null;\n            int? partnerIntron = partnerRegion.Type == TranscriptRegionType.Intron ? partnerRegion.Id : null;\n\n            var origin  = new BreakPointTranscript(originTranscript,  adjacency.Origin.Position,  originIndex);\n            var partner = new BreakPointTranscript(partnerTranscript, adjacency.Partner.Position, partnerIndex);\n\n            bool originOnReverseStrand = originGene.OnReverseStrand ^ adjacency.Origin.OnReverseStrand;\n            (BreakPointTranscript first, BreakPointTranscript second) = originOnReverseStrand ? (partner, origin) : (origin, partner);\n\n            bool     isInFrame   = !isImprecise && DetermineInFrameFusion(first, second);\n            string   hgvsr       = HgvsRnaNomenclature.GetHgvs(first, second);\n\n            (ulong fusionKey, string firstGeneSymbol, uint firstGeneKey, string secondGeneSymbol, uint secondGeneKey) =\n                GetGeneAndFusionKeys(originGene, partnerGene);\n\n            geneFusions.Add(new AnnotatedGeneFusion(partnerTranscript, partnerExon, partnerIntron, hgvsr, isInFrame, fusionKey, firstGeneSymbol,\n                firstGeneKey, secondGeneSymbol, secondGeneKey));\n            geneKeys.Add(fusionKey);\n        }\n\n        internal static (ulong FusionKey, string FirstGeneSymbol, uint FirstGeneKey, string SecondGeneSymbol, uint SecondGeneKey)\n            GetGeneAndFusionKeys(IGene originGene, IGene partnerGene)\n        {\n            (IGene firstGene, IGene secondGene) = SortGenes(originGene, partnerGene);\n\n            string firstGeneId   = firstGene.EnsemblId.WithoutVersion;\n            string secondGeneId  = secondGene.EnsemblId.WithoutVersion;\n            uint   firstGeneKey  = GeneFusionKey.CreateGeneKey(firstGeneId);\n            uint   secondGeneKey = GeneFusionKey.CreateGeneKey(secondGeneId);\n            ulong  fusionKey     = GeneFusionKey.Create(firstGeneKey, secondGeneKey);\n            return (fusionKey, firstGene.Symbol, firstGeneKey, secondGene.Symbol, secondGeneKey);\n        }\n\n        private static (IGene FirstGene, IGene SecondGene) SortGenes(IGene originGene, IGene partnerGene)\n        {\n            if (originGene.Chromosome.Index == partnerGene.Chromosome.Index)\n            {\n                return originGene.Start < partnerGene.Start\n                    ? (originGene, partnerGene)\n                    : (partnerGene, originGene);\n            }\n\n            return originGene.Chromosome.Index < partnerGene.Chromosome.Index\n                ? (originGene, partnerGene)\n                : (partnerGene, originGene);\n        }\n\n        // ReSharper disable UseDeconstructionOnParameter\n        internal static bool DetermineInFrameFusion(BreakPointTranscript first, BreakPointTranscript second)\n            // ReSharper restore UseDeconstructionOnParameter\n        {\n            ITranscriptRegion firstRegion  = first.Transcript.TranscriptRegions[first.RegionIndex];\n            ITranscriptRegion secondRegion = second.Transcript.TranscriptRegions[second.RegionIndex];\n\n            byte? firstCodonPosition = GetCodonPosition(firstRegion, first.Transcript.Translation, first.Transcript.StartExonPhase,\n                first.Transcript.Gene.OnReverseStrand,               first.GenomicPosition);\n\n            byte? secondCodonPosition = GetCodonPosition(secondRegion, second.Transcript.Translation, second.Transcript.StartExonPhase,\n                second.Transcript.Gene.OnReverseStrand,                second.GenomicPosition);\n\n            // nothing to do if we landed outside of the CDS or outside an exon\n            if (firstCodonPosition == null || secondCodonPosition == null) return false;\n\n            return firstCodonPosition == 1 && secondCodonPosition == 2 ||\n                   firstCodonPosition == 2 && secondCodonPosition == 3 ||\n                   firstCodonPosition == 3 && secondCodonPosition == 1;\n        }\n\n        internal static byte? GetCodonPosition(ITranscriptRegion region, ITranslation translation, byte startExonPhase, bool onReverseStrand,\n            int genomicPosition)\n        {\n            if (translation == null || region.Type != TranscriptRegionType.Exon) return null;\n\n            var variant = new Interval(genomicPosition, genomicPosition);\n            (int cdnaPosition, int _) = MappedPositionUtilities.GetCdnaPositions(region, region, variant, onReverseStrand, false);\n\n            (int cdsPosition, int _) =\n                MappedPositionUtilities.GetCdsPositions(translation.CodingRegion, cdnaPosition, cdnaPosition, startExonPhase, false);\n            if (cdsPosition == -1) return null;\n            \n            return (byte) ((cdsPosition - 1) % 3 + 1);\n        }\n\n        // ReSharper disable once UseDeconstructionOnParameter\n        internal static bool FoundViableGeneFusion(BreakEndAdjacency adjacency, IGene originGene, IChromosomeInterval originInterval,\n            Source originSource, IGene partnerGene, IChromosomeInterval partnerInterval, Source partnerSource)\n        {\n            bool originOnReverseStrand     = originGene.OnReverseStrand  ^ adjacency.Origin.OnReverseStrand;\n            bool partnerOnReverseStrand    = partnerGene.OnReverseStrand ^ adjacency.Partner.OnReverseStrand;\n            bool differentStrand           = originOnReverseStrand != partnerOnReverseStrand;\n            bool differentTranscriptSource = originSource          != partnerSource;\n            bool sameGeneSymbol            = originGene.Symbol     == partnerGene.Symbol;\n\n            bool transcriptAlreadyOverlaps =\n                originInterval.Chromosome.Index == partnerInterval.Chromosome.Index && originInterval.Overlaps(partnerInterval);\n\n            return !differentStrand && !differentTranscriptSource && !sameGeneSymbol && !transcriptAlreadyOverlaps;\n        }\n    }\n}"
  },
  {
    "path": "VariantAnnotation/GeneFusions/HGVS/HgvsRnaNomenclature.cs",
    "content": "﻿using VariantAnnotation.AnnotatedPositions;\nusing VariantAnnotation.GeneFusions.Calling;\nusing VariantAnnotation.Interface.AnnotatedPositions;\n\nnamespace VariantAnnotation.GeneFusions.HGVS\n{\n    public static class HgvsRnaNomenclature\n    {\n        public static string GetHgvs(BreakPointTranscript first, BreakPointTranscript second)\n        {\n            string firstCoordinate  = GetHgvsRnaCoordinate(first);\n            string secondCoordinate = GetHgvsRnaCoordinate(second);\n\n            return\n                $\"{first.Transcript.Id.WithVersion}({first.Transcript.Gene.Symbol}):r.?_{firstCoordinate}::{second.Transcript.Id.WithVersion}({second.Transcript.Gene.Symbol}):r.{secondCoordinate}_?\";\n        }\n\n        // ReSharper disable once UseDeconstructionOnParameter\n        private static string GetHgvsRnaCoordinate(BreakPointTranscript first)\n        {\n            ITranscript    transcript     = first.Transcript;\n            PositionOffset positionOffset = HgvsUtilities.GetPositionOffset(transcript, first.GenomicPosition, first.RegionIndex, true);\n            return positionOffset.Value;\n        }\n    }\n}"
  },
  {
    "path": "VariantAnnotation/GeneFusions/IO/GeneFusionIndexEntry.cs",
    "content": "﻿using System;\nusing IO;\n\nnamespace VariantAnnotation.GeneFusions.IO\n{\n    public readonly struct GeneFusionIndexEntry\n    {\n        private readonly ulong  _geneKey;\n        public readonly  ushort Index;\n\n        public GeneFusionIndexEntry(ulong geneKey, ushort index)\n        {\n            _geneKey = geneKey;\n            Index    = index;\n        }\n\n        public int Compare(ulong geneKey)\n        {\n            if (_geneKey < geneKey) return -1;\n            return _geneKey > geneKey ? 1 : 0;\n        }\n\n        public static GeneFusionIndexEntry Read(ref ReadOnlySpan<byte> byteSpan)\n        {\n            ulong  geneKey = SpanBufferBinaryReader.ReadUInt64(ref byteSpan);\n            ushort index   = SpanBufferBinaryReader.ReadOptUInt16(ref byteSpan);\n            return new GeneFusionIndexEntry(geneKey, index);\n        }\n\n        public void Write(ExtendedBinaryWriter writer)\n        {\n            writer.Write(_geneKey);\n            writer.WriteOpt(Index);\n        }\n    }\n}"
  },
  {
    "path": "VariantAnnotation/GeneFusions/IO/GeneFusionJsonReader.cs",
    "content": "﻿using System;\nusing System.Buffers;\nusing System.Collections.Generic;\nusing System.IO;\nusing System.Text;\nusing Compression.Utilities;\nusing ErrorHandling;\nusing Genome;\nusing IO;\nusing IO.v2;\nusing VariantAnnotation.Interface.AnnotatedPositions;\nusing VariantAnnotation.Interface.Providers;\nusing VariantAnnotation.Interface.SA;\nusing VariantAnnotation.NSA;\nusing VariantAnnotation.Providers;\n\nnamespace VariantAnnotation.GeneFusions.IO\n{\n    public sealed class GeneFusionJsonReader : IGeneFusionSaReader\n    {\n        public const ushort SupportedFileFormatVersion = 1;\n\n        public GenomeAssembly     Assembly => GenomeAssembly.Unknown;\n        public IDataSourceVersion Version  { get; }\n        public string             JsonKey  { get; }\n\n        private readonly ExtendedBinaryReader _reader;\n\n        internal Dictionary<ulong, string[]> FusionKeyToFusions;\n\n        public GeneFusionJsonReader(Stream stream)\n        {\n            _reader = new ExtendedBinaryReader(stream, Encoding.UTF8);\n            // ReSharper disable once UseDeconstruction\n            Header header = Header.Read(_reader);\n            JsonKey = _reader.ReadString();\n            CheckHeader(header.FileType, header.FileFormatVersion);\n            Version = DataSourceVersion.Read(_reader);\n        }\n\n        public static void CheckHeader(FileType fileType, ushort fileFormatVersion)\n        {\n            if (fileType != FileType.GeneFusionJson)\n                throw new InvalidDataException(\n                        $\"Found an invalid file type ({fileType}) while reading the gene fusions file.\")\n                    .MakeUserError();\n\n            if (fileFormatVersion != SupportedFileFormatVersion)\n                throw new InvalidDataException(\n                        $\"The gene fusion reader currently supports v{SupportedFileFormatVersion} files, but found v{fileFormatVersion} instead.\")\n                    .MakeUserError();\n        }\n\n        public void LoadAnnotations()\n        {\n            ArrayPool<byte>    bytePool = ArrayPool<byte>.Shared;\n            byte[]             bytes    = _reader.ReadCompressedByteArray(bytePool);\n            ReadOnlySpan<byte> byteSpan = bytes.AsSpan();\n\n            int numGeneFusionPairs = SpanBufferBinaryReader.ReadOptInt32(ref byteSpan);\n            FusionKeyToFusions = new Dictionary<ulong, string[]>(numGeneFusionPairs);\n\n            for (var i = 0; i < numGeneFusionPairs; i++)\n            {\n                ulong geneKey        = SpanBufferBinaryReader.ReadUInt64(ref byteSpan);\n                int   numJsonEntries = SpanBufferBinaryReader.ReadOptInt32(ref byteSpan);\n                var   jsonArray      = new string[numJsonEntries];\n\n                for (var j = 0; j < numJsonEntries; j++) jsonArray[j] = SpanBufferBinaryReader.ReadUtf8String(ref byteSpan);\n                FusionKeyToFusions[geneKey] = jsonArray;\n            }\n\n            bytePool.Return(bytes);\n        }\n\n        public void AddAnnotations(IGeneFusionPair[] fusionPairs, IList<ISupplementaryAnnotation> supplementaryAnnotations)\n        {\n            var jsonEntries = new List<string>();\n\n            foreach (IGeneFusionPair fusionPair in fusionPairs)\n            {\n                if (!FusionKeyToFusions.TryGetValue(fusionPair.FusionKey, out string[] entries)) continue;\n                jsonEntries.AddRange(entries);\n            }\n\n            if (jsonEntries.Count == 0) return;\n\n            var sa = new SupplementaryAnnotation(JsonKey, true, false, null, jsonEntries);\n            supplementaryAnnotations.Add(sa);\n        }\n\n        public void Dispose() => _reader.Dispose();\n    }\n}"
  },
  {
    "path": "VariantAnnotation/GeneFusions/IO/GeneFusionSourceReader.cs",
    "content": "﻿using System;\nusing System.Buffers;\nusing System.Collections.Generic;\nusing System.IO;\nusing System.Text;\nusing Compression.Utilities;\nusing ErrorHandling;\nusing Genome;\nusing IO;\nusing IO.v2;\nusing VariantAnnotation.GeneFusions.SA;\nusing VariantAnnotation.GeneFusions.Utilities;\nusing VariantAnnotation.Interface.AnnotatedPositions;\nusing VariantAnnotation.Interface.Providers;\nusing VariantAnnotation.Interface.SA;\nusing VariantAnnotation.NSA;\nusing VariantAnnotation.Providers;\n\nnamespace VariantAnnotation.GeneFusions.IO\n{\n    public sealed class GeneFusionSourceReader : IGeneFusionSaReader\n    {\n        public const ushort SupportedFileFormatVersion = 1;\n\n        private readonly ExtendedBinaryReader _reader;\n\n        public GenomeAssembly     Assembly => GenomeAssembly.Unknown;\n        public IDataSourceVersion Version  { get; }\n        public string             JsonKey  { get; }\n\n        internal uint[]                       OncogeneKeys;\n        internal GeneFusionSourceCollection[] Index;\n        internal GeneFusionIndexEntry[]       IndexEntries;\n\n        public GeneFusionSourceReader(Stream stream)\n        {\n            _reader = new ExtendedBinaryReader(stream, Encoding.UTF8);\n            // ReSharper disable once UseDeconstruction\n            Header header = Header.Read(_reader);\n            JsonKey = _reader.ReadString();\n            CheckHeader(header.FileType, header.FileFormatVersion);\n            Version = DataSourceVersion.Read(_reader);\n        }\n\n        internal static void CheckHeader(FileType fileType, ushort fileFormatVersion)\n        {\n            if (fileType != FileType.FusionCatcher)\n                throw new InvalidDataException(\n                        $\"Found an invalid file type ({fileType}) while reading the gene fusions file.\")\n                    .MakeUserError();\n\n            if (fileFormatVersion != SupportedFileFormatVersion)\n                throw new InvalidDataException(\n                        $\"The gene fusion reader currently supports v{SupportedFileFormatVersion} files, but found v{fileFormatVersion} instead.\")\n                    .MakeUserError();\n        }\n\n        public void LoadAnnotations()\n        {\n            ArrayPool<byte>    bytePool = ArrayPool<byte>.Shared;\n            byte[]             bytes    = _reader.ReadCompressedByteArray(bytePool);\n            ReadOnlySpan<byte> byteSpan = bytes.AsSpan();\n            \n            int numOncogenes = SpanBufferBinaryReader.ReadOptInt32(ref byteSpan);\n            OncogeneKeys = new uint[numOncogenes];\n            for (var i = 0; i < numOncogenes; i++) OncogeneKeys[i] = SpanBufferBinaryReader.ReadOptUInt32(ref byteSpan);\n\n            int indexLength = SpanBufferBinaryReader.ReadOptInt32(ref byteSpan);\n            Index = new GeneFusionSourceCollection[indexLength];\n            for (var i = 0; i < indexLength; i++) Index[i] = GeneFusionSourceCollection.Read(ref byteSpan);\n\n            int numIndexEntries = SpanBufferBinaryReader.ReadOptInt32(ref byteSpan);\n            IndexEntries = new GeneFusionIndexEntry[numIndexEntries];\n            for (var i = 0; i < numIndexEntries; i++) IndexEntries[i] = GeneFusionIndexEntry.Read(ref byteSpan);\n\n            bytePool.Return(bytes);\n        }\n\n        public void AddAnnotations(IGeneFusionPair[] fusionPairs, IList<ISupplementaryAnnotation> supplementaryAnnotations)\n        {\n            var jsonEntries = new List<string>();\n\n            foreach (IGeneFusionPair fusionPair in fusionPairs)\n            {\n                ushort? index = IndexEntries.GetIndex(fusionPair.FusionKey);\n                if (index == null) continue;\n                jsonEntries.Add(Index[index.Value].GetJsonEntry(fusionPair, OncogeneKeys));\n            }\n\n            if (jsonEntries.Count == 0) return;\n\n            var sa = new SupplementaryAnnotation(JsonKey, true, false, null, jsonEntries);\n            supplementaryAnnotations.Add(sa);\n        }\n\n        public void Dispose() => _reader.Dispose();\n    }\n}"
  },
  {
    "path": "VariantAnnotation/GeneFusions/IO/IGeneFusionSaReader.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing VariantAnnotation.Interface.AnnotatedPositions;\nusing VariantAnnotation.Interface.SA;\n\nnamespace VariantAnnotation.GeneFusions.IO\n{\n    public interface IGeneFusionSaReader : ISaMetadata, IDisposable\n    {\n        void LoadAnnotations();\n        // ReSharper disable once ParameterTypeCanBeEnumerable.Global\n        void AddAnnotations(IGeneFusionPair[] fusionPairs, IList<ISupplementaryAnnotation> supplementaryAnnotations);\n    }\n}"
  },
  {
    "path": "VariantAnnotation/GeneFusions/SA/GeneFusionPair.cs",
    "content": "﻿using VariantAnnotation.Interface.AnnotatedPositions;\n\nnamespace VariantAnnotation.GeneFusions.SA\n{\n    public sealed record GeneFusionPair\n        (ulong FusionKey, string FirstGeneSymbol, uint FirstGeneKey, string SecondGeneSymbol, uint SecondGeneKey) : IGeneFusionPair\n    {\n        public bool Equals(GeneFusionPair other)\n        {\n            if (ReferenceEquals(null, other)) return false;\n            if (ReferenceEquals(this, other)) return true;\n            return FusionKey == other.FusionKey;\n        }\n\n        public override int GetHashCode() => FusionKey.GetHashCode();\n    }\n}"
  },
  {
    "path": "VariantAnnotation/GeneFusions/SA/GeneFusionSource.cs",
    "content": "﻿// ReSharper disable InconsistentNaming\n// ReSharper disable UnusedMember.Global\n\nnamespace VariantAnnotation.GeneFusions.SA\n{\n    public enum GeneFusionSource : byte\n    {\n        None = 0,\n        Alaei_Mahabadi_18_Cancers, // 18cancer\n        Babiceanu_NonCancerTissues,\n        Bailey_pancreatic_cancers,\n        Bao_gliomas,\n        CACG,\n        Cancer_Genome_Project,\n        CCLE,\n        CCLE_Vellichirammal, // ccle3\n        ConjoinG,\n        COSMIC,\n        Duplicated_Genes_Database,\n        GTEx_healthy_tissues,\n        Healthy,\n        Healthy_prefrontal_cortex,\n        Healthy_strong_support, // banned\n        Human_Protein_Atlas,\n        Illumina_BodyMap2,\n        NonTumorCellLines,\n        OneK_Genomes_Project,\n        Paralog,\n        Pseudogene,\n        Readthrough,\n        Robinson_prostate_cancers,\n        TumorFusions_normal,\n        TCGA_oesophageal_carcinomas,\n        TCGA_Tumor,\n\n        // additional data sources (2021-05-25)\n        CCLE_Klign, // ccle2.txt\n        ChimerKB_4,\n        ChimerPub_4,\n        ChimerSeq_4,\n        Known,\n        Mitelman_DB,\n        OncoKB,\n        PCAWG,\n        TCGA,                // tcga.txt\n        TumorFusions_tumor,  // tcga-cancer.txt\n        TCGA_Gao,            // tcga2.txt\n        TCGA_Vellichirammal, // tcga3.txt\n        TICdb\n    }\n}"
  },
  {
    "path": "VariantAnnotation/GeneFusions/SA/GeneFusionSourceCollection.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.Text;\nusing IO;\nusing OptimizedCore;\nusing VariantAnnotation.Interface.AnnotatedPositions;\nusing VariantAnnotation.IO;\n\nnamespace VariantAnnotation.GeneFusions.SA\n{\n    public sealed class GeneFusionSourceCollection : IEquatable<GeneFusionSourceCollection>\n    {\n        private readonly bool               _isPseudogenePair;\n        private readonly bool               _isParalogPair;\n        private readonly bool               _isReadthrough;\n        private readonly GeneFusionSource[] _germlineSources;\n        private readonly GeneFusionSource[] _somaticSources;\n\n        private const int PseudogeneMask  = 1;\n        private const int ParalogMask     = 2;\n        private const int ReadthroughMask = 4;\n\n        public GeneFusionSourceCollection(bool isPseudogenePair, bool isParalogPair, bool isReadthrough, GeneFusionSource[] germlineSources,\n            GeneFusionSource[] somaticSources)\n        {\n            _isPseudogenePair = isPseudogenePair;\n            _isParalogPair    = isParalogPair;\n            _isReadthrough    = isReadthrough;\n            _germlineSources  = germlineSources;\n            _somaticSources   = somaticSources;\n        }\n\n        public void Write(ExtendedBinaryWriter writer)\n        {\n            writer.Write(GetFlags());\n            WriteSourceGroup(writer, _germlineSources);\n            WriteSourceGroup(writer, _somaticSources);\n        }\n\n        private byte GetFlags()\n        {\n            byte flags                   = 0;\n            if (_isPseudogenePair) flags |= PseudogeneMask;\n            if (_isParalogPair) flags    |= ParalogMask;\n            if (_isReadthrough) flags    |= ReadthroughMask;\n            return flags;\n        }\n\n        // ReSharper disable once SuggestBaseTypeForParameter\n        private static void WriteSourceGroup(ExtendedBinaryWriter writer, GeneFusionSource[] sources)\n        {\n            if (sources == null)\n            {\n                writer.Write((byte)0);\n                return;\n            }\n\n            writer.WriteOpt(sources.Length);\n            foreach (GeneFusionSource source in sources) writer.Write((byte) source);\n        }\n\n        public static GeneFusionSourceCollection Read(ref ReadOnlySpan<byte> byteSpan)\n        {\n            byte flags            = SpanBufferBinaryReader.ReadByte(ref byteSpan);\n            bool isPseudogenePair = (flags & PseudogeneMask)  != 0;\n            bool isParalogPair    = (flags & ParalogMask)     != 0;\n            bool isReadthrough    = (flags & ReadthroughMask) != 0;\n\n            GeneFusionSource[] germlineSources = ReadSources(ref byteSpan);\n            GeneFusionSource[] somaticSources  = ReadSources(ref byteSpan);\n            return new GeneFusionSourceCollection(isPseudogenePair, isParalogPair, isReadthrough, germlineSources, somaticSources);\n        }\n\n        private static GeneFusionSource[] ReadSources(ref ReadOnlySpan<byte> byteSpan)\n        {\n            int numSources = SpanBufferBinaryReader.ReadOptInt32(ref byteSpan);\n            if (numSources == 0) return null;\n\n            var sources = new GeneFusionSource[numSources];\n\n            for (var i = 0; i < numSources; i++)\n            {\n                sources[i] = (GeneFusionSource) SpanBufferBinaryReader.ReadByte(ref byteSpan);\n            }\n\n            return sources;\n        }\n\n        // ReSharper disable once ParameterTypeCanBeEnumerable.Global\n        public string GetJsonEntry(IGeneFusionPair geneFusionPair, uint[] oncogeneKeys)\n        {\n            StringBuilder sb         = StringBuilderPool.Get();\n            var           jsonObject = new JsonObject(sb);\n            var           entries    = new List<string>();\n\n            AddGenes(geneFusionPair, oncogeneKeys, jsonObject);\n            if (_germlineSources != null) AddGeneFusionSource(\"germlineSources\", _germlineSources, entries, jsonObject);\n            if (_somaticSources  != null) AddGeneFusionSource(\"somaticSources\",  _somaticSources,  entries, jsonObject);\n            return StringBuilderPool.GetStringAndReturn(sb);\n        }\n\n        private void AddGenes(IGeneFusionPair geneFusionPair, uint[] oncogeneKeys, JsonObject jsonObject)\n        {\n            jsonObject.StartObjectWithKey(\"genes\");\n            AddGene(\"first\",  geneFusionPair.FirstGeneKey,  geneFusionPair.FirstGeneSymbol,  oncogeneKeys, jsonObject);\n            AddGene(\"second\", geneFusionPair.SecondGeneKey, geneFusionPair.SecondGeneSymbol, oncogeneKeys, jsonObject);\n\n            jsonObject.AddBoolValue(\"isParalogPair\",    _isParalogPair);\n            jsonObject.AddBoolValue(\"isPseudogenePair\", _isPseudogenePair);\n            jsonObject.AddBoolValue(\"isReadthrough\",    _isReadthrough);\n            jsonObject.EndObject();\n        }\n\n        private static void AddGene(string key, uint geneKey, string geneSymbol, uint[] oncogeneKeys, JsonObject jsonObject)\n        {\n            jsonObject.StartObjectWithKey(key);\n            jsonObject.AddStringValue(\"hgnc\", geneSymbol);\n\n            bool isOncogene = Array.BinarySearch(oncogeneKeys, geneKey) >= 0;\n            jsonObject.AddBoolValue(\"isOncogene\", isOncogene);\n\n            jsonObject.EndObject();\n        }\n\n        // ReSharper disable once ParameterTypeCanBeEnumerable.Local\n        private static void AddGeneFusionSource(string description, GeneFusionSource[] sources, List<string> entries, JsonObject jsonObject)\n        {\n            entries.Clear();\n            foreach (GeneFusionSource source in sources)\n            {\n                string sourceString = GeneFusionSourceUtilities.Convert(source);\n                if (sourceString != null) entries.Add(sourceString);\n            }\n            jsonObject.AddStringValues(description, entries);\n        }\n\n        public bool Equals(GeneFusionSourceCollection other)\n        {\n            if (ReferenceEquals(null, other)) return false;\n            if (ReferenceEquals(this, other)) return true;\n\n            return _isPseudogenePair == other._isPseudogenePair        &&\n                   _isParalogPair    == other._isParalogPair           &&\n                   _isReadthrough    == other._isReadthrough           &&\n                   _germlineSources.ArrayEqual(other._germlineSources) &&\n                   _somaticSources.ArrayEqual(other._somaticSources);\n        }\n\n        public override int GetHashCode()\n        {\n            var hashCode = new HashCode();\n            hashCode.Add(_isPseudogenePair);\n            hashCode.Add(_isParalogPair);\n            hashCode.Add(_isReadthrough);\n            \n            if (_germlineSources != null)\n                foreach (GeneFusionSource source in _germlineSources)\n                    hashCode.Add((byte) source);\n            \n            if (_somaticSources != null)\n                foreach (GeneFusionSource source in _somaticSources)\n                    hashCode.Add((byte) source);\n            \n            return hashCode.ToHashCode();\n        }\n    }\n}"
  },
  {
    "path": "VariantAnnotation/GeneFusions/SA/GeneFusionSourceUtilities.cs",
    "content": "﻿namespace VariantAnnotation.GeneFusions.SA\n{\n    public static class GeneFusionSourceUtilities\n    {\n        public static string Convert(GeneFusionSource source)\n        {\n            // ReSharper disable once SwitchExpressionHandlesSomeKnownEnumValuesWithExceptionInDefault\n            return source switch\n            {\n                GeneFusionSource.Alaei_Mahabadi_18_Cancers   => \"Alaei-Mahabadi 18 cancers\",\n                GeneFusionSource.Babiceanu_NonCancerTissues  => \"Babiceanu non-cancer tissues\",\n                GeneFusionSource.Bailey_pancreatic_cancers   => \"Bailey pancreatic cancers\",\n                GeneFusionSource.Bao_gliomas                 => \"Bao gliomas\",\n                GeneFusionSource.CACG                        => \"CACG\",\n                GeneFusionSource.Cancer_Genome_Project       => \"Cancer Genome Project\",\n                GeneFusionSource.CCLE                        => \"DepMap CCLE\",\n                GeneFusionSource.CCLE_Klign                  => \"CCLE Klijn\",\n                GeneFusionSource.CCLE_Vellichirammal         => \"CCLE Vellichirammal\",\n                GeneFusionSource.ChimerKB_4                  => \"ChimerKB 4.0\",\n                GeneFusionSource.ChimerPub_4                 => \"ChimerPub 4.0\",\n                GeneFusionSource.ChimerSeq_4                 => \"ChimerSeq 4.0\",\n                GeneFusionSource.ConjoinG                    => \"ConjoinG\",\n                GeneFusionSource.COSMIC                      => \"COSMIC\",\n                GeneFusionSource.Duplicated_Genes_Database   => \"Duplicated Genes Database\",\n                GeneFusionSource.GTEx_healthy_tissues        => \"GTEx healthy tissues\",\n                GeneFusionSource.Healthy                     => \"Healthy\",\n                GeneFusionSource.Healthy_prefrontal_cortex   => \"Healthy prefrontal cortex\",\n                GeneFusionSource.Healthy_strong_support      => \"Healthy (strong support)\",\n                GeneFusionSource.Human_Protein_Atlas         => \"Human Protein Atlas\",\n                GeneFusionSource.Illumina_BodyMap2           => \"Illumina Body Map 2.0\",\n                GeneFusionSource.Known                       => \"Known\",\n                GeneFusionSource.Mitelman_DB                 => \"Mitelman DB\",\n                GeneFusionSource.NonTumorCellLines           => \"non-tumor cell lines\",\n                GeneFusionSource.OncoKB                      => \"OncoKB\",\n                GeneFusionSource.OneK_Genomes_Project        => \"1000 Genomes Project\",\n                GeneFusionSource.PCAWG                       => \"PCAWG\",\n                GeneFusionSource.Robinson_prostate_cancers   => \"Robinson prostate cancers\",\n                GeneFusionSource.TCGA                        => \"TCGA\",\n                GeneFusionSource.TCGA_Gao                    => \"TCGA Gao\",\n                GeneFusionSource.TCGA_oesophageal_carcinomas => \"TCGA oesophageal carcinomas\",\n                GeneFusionSource.TCGA_Tumor                  => \"TCGA tumor\",\n                GeneFusionSource.TCGA_Vellichirammal         => \"TCGA Vellichirammal\",\n                GeneFusionSource.TICdb                       => \"TICdb\",\n                GeneFusionSource.TumorFusions_tumor          => \"TumorFusions tumor\",\n                GeneFusionSource.TumorFusions_normal         => \"TumorFusions normal\",\n                _                                            => null\n            };\n        }\n    }\n}"
  },
  {
    "path": "VariantAnnotation/GeneFusions/Utilities/GeneFusionKey.cs",
    "content": "﻿using System;\n\nnamespace VariantAnnotation.GeneFusions.Utilities\n{\n    public static class GeneFusionKey\n    {\n        public static ulong Create(uint num, uint num2)\n        {\n            if (num == 0 || num2 == 0) return 0;\n            (ulong min, ulong max) = num < num2 ? (num, num2) : (num2, num);\n            return min << 32 | max;\n        }\n\n        public static uint CreateGeneKey(string geneId)\n        {\n            if (geneId == null) return 0;\n            ReadOnlySpan<char> geneSpan = geneId.AsSpan().Slice(4);\n            return uint.Parse(geneSpan);\n        }\n    }\n}"
  },
  {
    "path": "VariantAnnotation/GeneFusions/Utilities/IndexEntryExtensions.cs",
    "content": "﻿using VariantAnnotation.GeneFusions.IO;\n\nnamespace VariantAnnotation.GeneFusions.Utilities\n{\n    public static class IndexEntryExtensions\n    {\n        public static ushort? GetIndex(this GeneFusionIndexEntry[] array, ulong geneKey)\n        {\n            var begin = 0;\n            int end   = array.Length - 1;\n\n            while (begin <= end)\n            {\n                int index = begin + (end - begin >> 1);\n\n                int ret = array[index].Compare(geneKey);\n                // ReSharper disable once ConvertIfStatementToSwitchStatement\n                if (ret == 0) return array[index].Index;\n                if (ret < 0) begin = index + 1;\n                else end           = index - 1;\n            }\n\n            return null;\n        }\n    }\n}"
  },
  {
    "path": "VariantAnnotation/GenericScore/ChromosomeBlock.cs",
    "content": "using System.Collections.Generic;\nusing IO;\n\nnamespace VariantAnnotation.GenericScore\n{\n    public sealed class ChromosomeBlock\n    {\n        private         List<ScoreIndexBlock> ScoreIndexBlocks { get; }\n        public          int                   BlockCount;\n        public readonly long                  StartingPosition;\n\n        public ChromosomeBlock(List<ScoreIndexBlock> scoreIndexBlocks, int blockCount, long startingPosition)\n        {\n            ScoreIndexBlocks = scoreIndexBlocks;\n            BlockCount       = blockCount;\n            StartingPosition = startingPosition;\n        }\n\n        /// <summary>\n        /// Add the index block to the list of all blocks for each chromosome\n        /// </summary>\n        /// <param name=\"indexBlock\"></param>\n        public void Add(ScoreIndexBlock indexBlock)\n        {\n            ScoreIndexBlocks.Add(indexBlock);\n            BlockCount++;\n        }\n        \n        /// <summary>\n        /// Returns the index block corresponding to the blocknumber\n        /// </summary>\n        /// <param name=\"blockNumber\"></param>\n        /// <returns></returns>\n        public ScoreIndexBlock Get(int blockNumber)\n        {\n            return blockNumber < BlockCount ? ScoreIndexBlocks[blockNumber] : null;\n        }\n        \n\n        /// <summary>\n        /// Serialize the instance to writer stream\n        /// </summary>\n        /// <param name=\"writer\"></param>\n        public void Write(ExtendedBinaryWriter writer)\n        {\n            writer.WriteOpt(BlockCount);\n            writer.WriteOpt(StartingPosition);\n            foreach (ScoreIndexBlock scoreIndexBlock in ScoreIndexBlocks)\n            {\n                scoreIndexBlock.Write(writer);\n            }\n        }\n\n        /// <summary>\n        /// Deserialize the instance from reader stream\n        /// </summary>\n        /// <param name=\"reader\"></param>\n        /// <returns></returns>\n        public static ChromosomeBlock Read(ExtendedBinaryReader reader)\n        {\n            int  blockCount       = reader.ReadOptInt32();\n            long startingPosition = reader.ReadOptInt64();\n\n            var scoreIndexBlocks = new List<ScoreIndexBlock>(blockCount);\n            for (var i = 0; i < blockCount; i++)\n            {\n                scoreIndexBlocks.Add(ScoreIndexBlock.Read(reader));\n            }\n\n            return new ChromosomeBlock(scoreIndexBlocks, blockCount, startingPosition);\n        }\n    }\n}"
  },
  {
    "path": "VariantAnnotation/GenericScore/EncoderType.cs",
    "content": "namespace VariantAnnotation.GenericScore;\n\npublic enum EncoderType: byte\n{\n    Generic,\n    ZeroToOne\n}"
  },
  {
    "path": "VariantAnnotation/GenericScore/GenericScoreEncoder.cs",
    "content": "using System;\nusing System.Collections.Generic;\nusing System.Collections.Immutable;\nusing IO;\n\nnamespace VariantAnnotation.GenericScore;\n\npublic sealed class GenericScoreEncoder : IScoreEncoder\n{\n    private readonly byte[]                              _encodedArray;\n    private readonly Dictionary<double, ushort>          _scoreMap;\n    private          ImmutableDictionary<ushort, double> _scoreMapReader;\n    public           ushort                              BytesRequired => 2;\n    private          ushort                              _nextScoreCode;\n\n    public GenericScoreEncoder()\n    {\n        _encodedArray = new byte[BytesRequired];\n        _scoreMap     = new Dictionary<double, ushort>(byte.MaxValue);\n    }\n\n    public ushort AddScore(double number)\n    {\n        // if the score is already in the map, return the index\n        // this is because the socre and the code, both should be unique\n        if(_scoreMap.TryGetValue(number, out ushort code)) return code;\n        \n        // if the score is not in the map, add it and return the index\n        code = _nextScoreCode++;\n        _scoreMap.Add(number, code);\n        return code;\n    }\n\n    public byte[] EncodeToBytes(double number)\n    {\n        Array.Clear(_encodedArray, 0, _encodedArray.Length);\n        ushort transformedNumber = AddScore(number);\n\n        // BitConverter is used as a convenient means of transforming the number into bytes\n        // Only the `BytesRequred` portion is saved, because the converted bytes will not exceed it.\n        Array.Copy(BitConverter.GetBytes(transformedNumber), _encodedArray, BytesRequired);\n        return _encodedArray;\n    }\n\n    public double DecodeFromBytes(ReadOnlySpan<byte> encodedArray)\n    {\n        // Because the scoreMap uses `ushort`\n        return GetScore(BitConverter.ToUInt16(encodedArray));\n    }\n\n    private double GetScore(ushort encodedNumber)\n    {\n        return _scoreMapReader.GetValueOrDefault(encodedNumber, double.NaN);\n    }\n\n    public void Write(ExtendedBinaryWriter writer)\n    {\n        writer.WriteOpt(_scoreMap.Count);\n        foreach ((double score, ushort code) in _scoreMap)\n        {\n            writer.Write(code);\n            writer.Write(score);\n        }\n    }\n\n    public static GenericScoreEncoder Read(ExtendedBinaryReader reader)\n    {\n        int scoreCount     = reader.ReadOptInt32();\n        var scoreMapReader = new Dictionary<ushort, double>(scoreCount);\n        for (var i = 0; i < scoreCount; i++)\n        {\n            scoreMapReader.Add(reader.ReadUInt16(), reader.ReadDouble());\n        }\n\n        return new GenericScoreEncoder\n        {\n            _scoreMapReader = scoreMapReader.ToImmutableDictionary()\n        };\n    }\n}"
  },
  {
    "path": "VariantAnnotation/GenericScore/IScoreEncoder.cs",
    "content": "using System;\nusing IO;\n\nnamespace VariantAnnotation.GenericScore;\n\npublic interface IScoreEncoder\n{\n    public ushort      BytesRequired { get; }\n    public byte[]      EncodeToBytes(double number);\n    public double      DecodeFromBytes(ReadOnlySpan<byte> encodedArray);\n\n    public void Write(ExtendedBinaryWriter writer);\n}"
  },
  {
    "path": "VariantAnnotation/GenericScore/MetaData.cs",
    "content": "using System;\nusing CommandLine.Utilities;\n\nnamespace VariantAnnotation.GenericScore\n{\n    public sealed class MetaData\n    {\n        private int     _totalChromosomeCount;\n        private int     _totalBlockCount;\n        private int     _blankBlockCount;\n        private uint    _compressedChromosomeSize;\n        private uint    _uncompressedChromosomeSize;\n        private uint    _compressedSize;\n        private uint    _uncompressedSize;\n        private uint    _unmatchedReferencePositionsCount;\n        private double  _totalProcessingTime;\n        private double  _chromosomeProcessingTime;\n        private ushort? _lastChromosome = null;\n\n        private readonly Benchmark _blockBenchmark;\n\n        private const string DashedLine = \"________________________________________________________________\";\n\n        private double AverageCompressionRatio    => CalculateCompressionRatio(_compressedSize, _uncompressedSize);\n        private double AverageCompressedBlockSize => (double) _compressedSize / _totalBlockCount;\n        private double AverageProcessingTime      => _totalProcessingTime     / _totalBlockCount;\n        private double AverageWriteSpeed          => _compressedSize          / _totalProcessingTime / 1_000_000;\n\n        private static double CalculateCompressionRatio(uint compressedSize, uint uncompressedSize)\n        {\n            return compressedSize * 100.0 / uncompressedSize;\n        }\n\n        public MetaData()\n        {\n            _blockBenchmark = new Benchmark();\n        }\n\n        public void AddIndexBlock(ushort chromosomeIndex, int blockNumber, long fileStartingPosition, uint uncompressedSize, uint compressedSize)\n        {\n            double processingTime = _blockBenchmark.GetElapsedTime().TotalSeconds;\n\n            _totalBlockCount++;\n            if (fileStartingPosition < 0)\n            {\n                _blankBlockCount++;\n            }\n\n            _uncompressedSize += uncompressedSize;\n            _compressedSize   += compressedSize;\n\n            _uncompressedChromosomeSize += uncompressedSize;\n            _compressedChromosomeSize   += compressedSize;\n\n            _totalProcessingTime      += processingTime;\n            _chromosomeProcessingTime += processingTime;\n\n            PrintFormattedString(chromosomeIndex, blockNumber, uncompressedSize, compressedSize, processingTime);\n\n            _blockBenchmark.Reset();\n        }\n\n        public void AddChromosomeBlock(ushort chromosomeIndex)\n        {\n            _totalChromosomeCount++;\n\n            if (_lastChromosome == null)\n            {\n                _lastChromosome = chromosomeIndex;\n                return;\n            }\n\n            PrintFormattedString(_lastChromosome, null, _uncompressedChromosomeSize, _compressedChromosomeSize, _chromosomeProcessingTime);\n\n            _lastChromosome             = chromosomeIndex;\n            _chromosomeProcessingTime   = 0;\n            _uncompressedChromosomeSize = 0;\n            _compressedChromosomeSize   = 0;\n        }\n\n        public void TrackUnmatchedReferencePositions()\n        {\n            _unmatchedReferencePositionsCount++;\n        }\n\n        private static void PrintFormattedString(ushort? chromosomeIndex, int? blockNumber, uint uncompressedSize, uint compressedSize,\n            double processingTime)\n        {\n            string headerLine = $\"{chromosomeIndex}:{blockNumber}\";\n\n            if (blockNumber == null)\n            {\n                headerLine = $\"{DashedLine}\\n{chromosomeIndex}\";\n            }\n\n            Console.WriteLine(\n                $\"{headerLine}\"                                                          +\n                $\"\\t{compressedSize} bytes/{uncompressedSize} bytes\\t= \"                 +\n                $\"{CalculateCompressionRatio(compressedSize, uncompressedSize):F1} % \\t\" +\n                $\"Processing Time {processingTime:F4} s\"\n            );\n\n            if (blockNumber == null)\n            {\n                Console.WriteLine($\"{DashedLine}\");\n            }\n        }\n\n        public void PrintWriteMetrics()\n        {\n            PrintFormattedString(_lastChromosome, null, _uncompressedChromosomeSize, _compressedChromosomeSize, _chromosomeProcessingTime);\n\n            Console.WriteLine(\n                $\"{DashedLine}\\n\"                                                          +\n                $\"Write Metrics\\n\"                                                         +\n                $\"{DashedLine}\\n\"                                                          +\n                $\"Total Chromosomes = {_totalChromosomeCount}\\n\"                           +\n                $\"Total Blocks = {_totalBlockCount}\\n\"                                     +\n                $\"Blank Blocks = {_blankBlockCount}\\n\"                                     +\n                $\"Unmatched Reference Positions = {_unmatchedReferencePositionsCount}\\n\"   +\n                $\"Total Compressed Size = {_compressedSize} bytes\\n\"                       +\n                $\"Total Uncompressed Size = {_uncompressedSize} bytes\\n\"                   +\n                $\"Total Processing Time = {_totalProcessingTime:F3} seconds\\n\"             +\n                $\"Average Compressed Block Size = {AverageCompressedBlockSize:F0} bytes\\n\" +\n                $\"Average Compression Ratio = {AverageCompressionRatio:F1} %\\n\"            +\n                $\"Average Processing Time = {AverageProcessingTime:F4} seconds\\n\"          +\n                $\"Average Writing Speed = {AverageWriteSpeed:F4} MB/second\\n\"              +\n                $\"{DashedLine}\"\n            );\n        }\n    }\n}"
  },
  {
    "path": "VariantAnnotation/GenericScore/ReaderSettings.cs",
    "content": "using System;\nusing IO;\n\nnamespace VariantAnnotation.GenericScore\n{\n    public sealed class ReaderSettings\n    {\n        public readonly bool             IsPositional;\n        public readonly EncoderType      EncoderType;\n        public readonly IScoreEncoder    ScoreEncoder;\n        public readonly ScoreJsonEncoder ScoreJsonEncoder;\n        public readonly string[]         Nucleotides;\n        public readonly int              BlockLength;\n\n        public ushort BytesRequired => ScoreEncoder.BytesRequired;\n\n        public ReaderSettings(\n            bool isPositional,\n            EncoderType encoderType,\n            IScoreEncoder scoreEncoder,\n            ScoreJsonEncoder scoreJsonEncoder,\n            string[] nucleotides,\n            int blockLength\n        )\n        {\n            IsPositional     = isPositional;\n            EncoderType      = encoderType;\n            ScoreEncoder     = scoreEncoder;\n            ScoreJsonEncoder = scoreJsonEncoder;\n            BlockLength      = blockLength;\n            Nucleotides      = nucleotides;\n        }\n\n\n        public static ReaderSettings Read(ExtendedBinaryReader reader)\n        {\n            bool isPositional = reader.ReadBoolean();\n            var  encoderType  = (EncoderType) reader.ReadByte();\n            IScoreEncoder scoreEncoder = encoderType switch\n            {\n                EncoderType.ZeroToOne => ZeroToOneScoreEncoder.Read(reader),\n                EncoderType.Generic   => GenericScoreEncoder.Read(reader),\n                _                     => throw new Exception(\"Unknown score encoder\")\n            };\n\n            ScoreJsonEncoder scoreJsonEncoder = ScoreJsonEncoder.Read(reader);\n\n            byte nucleotideCount = reader.ReadByte();\n            var  nucleotides     = new string[nucleotideCount];\n\n            for (var i = 0; i < nucleotideCount; i++)\n            {\n                string value = reader.ReadAsciiString();\n                nucleotides[i] = value;\n            }\n\n            int blockLength = reader.ReadOptInt32();\n\n            return new ReaderSettings(\n                isPositional,\n                encoderType,\n                scoreEncoder,\n                scoreJsonEncoder,\n                nucleotides,\n                blockLength\n            );\n        }\n\n        public void Write(ExtendedBinaryWriter writer)\n        {\n            writer.Write(IsPositional);\n            writer.Write((byte) EncoderType);\n            ScoreEncoder.Write(writer);\n            ScoreJsonEncoder.Write(writer);\n\n            writer.Write((byte) Nucleotides.Length);\n            foreach (string key in Nucleotides)\n            {\n                writer.WriteOptAscii(key);\n            }\n\n            writer.WriteOpt(BlockLength);\n        }\n    }\n}"
  },
  {
    "path": "VariantAnnotation/GenericScore/ScoreBlock.cs",
    "content": "using System;\nusing Compression.Algorithms;\nusing IO;\n\nnamespace VariantAnnotation.GenericScore\n{\n    public sealed class ScoreBlock\n    {\n        private readonly ICompressionAlgorithm _compressionAlgorithm;\n\n        private readonly byte[] _compressedBytes;\n        private readonly byte[] _uncompressedBytes;\n        private          uint   _cursorPosition;\n        private readonly int    _blockSize;\n\n\n        public ScoreBlock(ICompressionAlgorithm compressionAlgorithm, int blockSize)\n        {\n            _compressionAlgorithm = compressionAlgorithm;\n            _blockSize            = blockSize;\n\n            int compressedBlockSize = _compressionAlgorithm.GetCompressedBufferBounds(_blockSize);\n\n            _compressedBytes   = new byte[compressedBlockSize];\n            _uncompressedBytes = new byte[_blockSize];\n            Clear();\n        }\n\n        private void Clear()\n        {\n            Array.Fill(_uncompressedBytes, byte.MaxValue);\n            _cursorPosition = 0;\n        }\n\n        public bool IsFull()\n        {\n            return _cursorPosition == _blockSize;\n        }\n\n        public void Add(uint memoryIndex, byte[] variableArray, uint arraySize)\n        {\n            Array.Copy(variableArray, 0, _uncompressedBytes, memoryIndex, arraySize);\n            _cursorPosition = (memoryIndex + arraySize);\n        }\n\n        public (uint uncompressedSize, int compressedSize) Write(ExtendedBinaryWriter writer)\n        {\n            int compressedSize = _compressionAlgorithm.Compress(\n                _uncompressedBytes,\n                _blockSize,\n                _compressedBytes,\n                _compressedBytes.Length\n            );\n\n            writer.Write(_compressedBytes, 0, compressedSize);\n            uint uncompressedSize = _cursorPosition;\n            Clear();\n            return (uncompressedSize, compressedSize);\n        }\n    }\n}"
  },
  {
    "path": "VariantAnnotation/GenericScore/ScoreIndex.cs",
    "content": "using System.Collections.Generic;\nusing System.IO;\nusing ErrorHandling.Exceptions;\nusing Genome;\nusing IO;\nusing IO.v2;\nusing VariantAnnotation.Interface.Providers;\nusing VariantAnnotation.Providers;\nusing VariantAnnotation.SA;\n\nnamespace VariantAnnotation.GenericScore\n{\n    public sealed class ScoreIndex\n    {\n        private readonly ExtendedBinaryWriter                _writer;\n        private readonly int                                 _blockLength;\n        private readonly ushort                              _scoreSize;\n        private readonly byte                                _nucleotideCount;\n        private readonly Dictionary<string, ushort>          _nucleotideIndexMapper;\n        public readonly  GenomeAssembly                      Assembly;\n        public readonly  int                                 SchemaVersion;\n        private readonly Header                              _indexHeader;\n        private readonly int                                 _filePairId;\n        public readonly  IDataSourceVersion                  Version;\n        private readonly MetaData                            _metaData;\n        private          Dictionary<ushort, ChromosomeBlock> _chromosomeBlocks;\n        public readonly  ReaderSettings                      ReaderSettings;\n\n        public ScoreIndex(\n            ExtendedBinaryWriter indexWriter,\n            ReaderSettings readerSettings,\n            GenomeAssembly assembly,\n            IDataSourceVersion version,\n            int schemaVersion,\n            Header indexHeader,\n            int filePairId\n        )\n        {\n            _writer       = indexWriter;\n            Assembly      = assembly;\n            Version       = version;\n            SchemaVersion = schemaVersion;\n            _indexHeader  = indexHeader;\n            _filePairId   = filePairId;\n\n            ReaderSettings = readerSettings;\n\n            _chromosomeBlocks = new Dictionary<ushort, ChromosomeBlock>();\n            _metaData         = new MetaData();\n\n            string[] nucleotides = readerSettings.Nucleotides;\n            _nucleotideCount = (byte) nucleotides.Length;\n\n            _scoreSize   = readerSettings.BytesRequired;\n            _blockLength = _nucleotideCount * readerSettings.BlockLength * _scoreSize;\n\n            // Nucleotide to position mapping\n            _nucleotideIndexMapper = new Dictionary<string, ushort>();\n            for (ushort i = 0; i < _nucleotideCount; i++)\n            {\n                _nucleotideIndexMapper[readerSettings.Nucleotides[i]] = (ushort) (i * _scoreSize);\n            }\n        }\n\n        /// <summary>\n        /// Add the block to index\n        /// </summary>\n        /// <param name=\"chromIndex\"></param>\n        /// <param name=\"filePosition\"></param>\n        /// <param name=\"compressedSize\"></param>\n        /// <param name=\"uncompressedSize\"></param>\n        public void Add(ushort chromIndex, long filePosition, int compressedSize, uint uncompressedSize)\n        {\n            // Create index block and add to chromosome block\n            var indexBlock = new ScoreIndexBlock(filePosition, compressedSize);\n            _chromosomeBlocks[chromIndex].Add(indexBlock);\n            int blockNumber = GetLastBlockNumber(chromIndex);\n            _metaData.AddIndexBlock(chromIndex, blockNumber, filePosition, uncompressedSize, (uint) compressedSize);\n        }\n\n        public void AddChromosomeBlock(ushort chromIndex, int chromosomeStartingPosition)\n        {\n            _chromosomeBlocks[chromIndex] = new ChromosomeBlock(new List<ScoreIndexBlock>(), 0, chromosomeStartingPosition);\n            _metaData.AddChromosomeBlock(chromIndex);\n        }\n\n        public void TrackUnmatchedReferencePositions()\n        {\n            _metaData.TrackUnmatchedReferencePositions();\n        }\n\n        private void WriteHeader()\n        {\n            _indexHeader.Write(_writer);\n            _writer.WriteOpt(_filePairId);\n            _writer.Write(SaCommon.GuardInt);\n        }\n\n        private static void CheckHeader(Header header)\n        {\n            (FileType fileType, ushort fileFormatVersion) = header;\n            if (fileType != FileType.GsaIndex)\n                throw new UserErrorException($\"The file type {fileType} version {fileFormatVersion} \" +\n                                             $\"is not supported by this reader {FileType.GsaIndex}\");\n        }\n\n        private static (Header indexHeader, int filePairId) ReadHeader(ExtendedBinaryReader reader, int expectedFilePairId)\n        {\n            Header indexHeader = Header.Read(reader);\n            CheckHeader(indexHeader);\n            int  filePairId = reader.ReadOptInt32();\n            uint guardInt   = reader.ReadUInt32();\n\n            if (guardInt != SaCommon.GuardInt || filePairId != expectedFilePairId)\n            {\n                throw new UserErrorException(\"Unable to read the index\");\n            }\n\n            return (indexHeader, filePairId);\n        }\n\n        /// <summary>\n        /// Serialize the instance to writer stream\n        /// </summary>\n        public void Write()\n        {\n            WriteHeader();\n            _writer.Write((byte) Assembly);\n            Version.Write(_writer);\n            _writer.WriteOpt(SchemaVersion);\n\n            _writer.WriteOpt(_chromosomeBlocks.Count);\n            // Write the Chromsome Blocks\n            foreach ((ushort index, ChromosomeBlock chromosomeBlocks) in _chromosomeBlocks)\n            {\n                _writer.WriteOpt(index);\n                chromosomeBlocks.Write(_writer);\n            }\n\n            ReaderSettings.Write(_writer);\n\n            _metaData.PrintWriteMetrics();\n        }\n\n        /// <summary>\n        /// Deserialize the instance from reader stream\n        /// </summary>\n        /// <param name=\"stream\"></param>\n        /// <param name=\"dataFilePairId\"></param>\n        /// <returns></returns>\n        public static ScoreIndex Read(Stream stream, int dataFilePairId)\n        {\n            using (var memStream = new MemoryStream())\n            using (var reader = new ExtendedBinaryReader(memStream))\n            {\n                stream.CopyTo(memStream); //reading all bytes in stream to memStream\n                memStream.Position = 0;\n\n                (Header indexHeader, int filePairId) = ReadHeader(reader, dataFilePairId);\n\n                GenomeAssembly     assembly      = (GenomeAssembly) reader.ReadByte();\n                IDataSourceVersion version       = DataSourceVersion.Read(reader);\n                int                schemaVersion = reader.ReadOptInt32();\n\n                int chromCount = reader.ReadOptInt32();\n\n                // read the chromblocks\n                var chromBlocks = new Dictionary<ushort, ChromosomeBlock>(chromCount);\n                for (var i = 0; i < chromCount; i++)\n                {\n                    var chromIndex = reader.ReadOptUInt16();\n                    chromBlocks[chromIndex] = ChromosomeBlock.Read(reader);\n                }\n\n                ReaderSettings readerSettings = ReaderSettings.Read(reader);\n\n                var scoreIndex = new ScoreIndex(\n                    null,\n                    readerSettings,\n                    assembly,\n                    version,\n                    schemaVersion,\n                    indexHeader,\n                    filePairId\n                )\n                {\n                    _chromosomeBlocks = chromBlocks,\n                };\n\n                return scoreIndex;\n            }\n        }\n\n        /// <summary>\n        /// Return the file position of the block containing the given chromosome and chromosomal position\n        /// </summary>\n        /// <param name=\"chromIndex\"></param>\n        /// <param name=\"position\"></param>\n        /// <returns></returns>\n        public long GetFilePosition(ushort chromIndex, int position)\n        {\n            if (_chromosomeBlocks == null || !_chromosomeBlocks.TryGetValue(chromIndex, out var chromosomeBlock)) return -1;\n            int blockNumber = GetBlockNumber(chromosomeBlock, position);\n\n            if (blockNumber < 0) return -1;\n            return chromosomeBlock.Get(blockNumber) != null ? chromosomeBlock.Get(blockNumber).FilePosition : -1;\n        }\n\n        /// <summary>\n        /// Returns the block number which would contain the position\n        /// Because each block is of a known size, (e.g. 10_000)\n        /// the first position (e.g 10_001) can be used to find the file position\n        /// Example: \n        ///     blockNumber = (354_011 - 10_001) / 10_000 = 45th block contains the position 354_011\n        /// </summary>\n        /// <param name=\"chromosomeBlock\"></param>\n        /// <param name=\"position\"></param>\n        /// <returns></returns>\n        private int GetBlockNumber(ChromosomeBlock chromosomeBlock, int position)\n        {\n            // Position is less than start position\n            if (position < chromosomeBlock.StartingPosition) return -1;\n\n            (int blockNumber, _) = PositionToBlockLocation(position, (int) chromosomeBlock.StartingPosition);\n\n            // Position is outside the last block\n            if (blockNumber >= chromosomeBlock.BlockCount) return -1;\n\n            return blockNumber;\n        }\n\n        public int GetBlockNumber(ushort chromosomeIndex, int position)\n        {\n            if (_chromosomeBlocks == null || !_chromosomeBlocks.TryGetValue(chromosomeIndex, out ChromosomeBlock chromosomeBlock)) return -1;\n            return GetBlockNumber(chromosomeBlock, position);\n        }\n\n        public int GetBytesToRead(ushort chromIndex, int blockNumber)\n        {\n            return _chromosomeBlocks[chromIndex].Get(blockNumber).BytesWritten;\n        }\n\n        public int GetBlockLength()\n        {\n            return _blockLength;\n        }\n\n        public uint GetNucleotideCount()\n        {\n            return _nucleotideCount;\n        }\n\n        public ushort? GetNucleotidePosition(string saItemAltAllele)\n        {\n            if (!_nucleotideIndexMapper.ContainsKey(saItemAltAllele)) return null;\n            return _nucleotideIndexMapper[saItemAltAllele];\n        }\n\n        public (int blockNumber, int localBlockIndex) PositionToBlockLocation(int position, int startingPosition)\n        {\n            // Position is less than start position\n            if (position < startingPosition) return (-1, -1);\n\n            int deltaPosition = (position - startingPosition) * _nucleotideCount * _scoreSize;\n\n            return (deltaPosition / _blockLength, deltaPosition % _blockLength);\n        }\n\n        private (int blockNumber, int localBlockIndex) PositionToBlockLocation(ChromosomeBlock chromosomeBlock, int position)\n        {\n            return PositionToBlockLocation(position, (int) chromosomeBlock.StartingPosition);\n        }\n\n        public (int blockNumber, int localBlockIndex) PositionToBlockLocation(ushort chromosomeIndex, int position)\n        {\n            if (_chromosomeBlocks == null || !_chromosomeBlocks.TryGetValue(chromosomeIndex, out var chromosomeBlock)) return (-1, -1);\n            return PositionToBlockLocation(chromosomeBlock, position);\n        }\n\n        public Dictionary<ushort, ChromosomeBlock> GetChromosomeBlocks()\n        {\n            return _chromosomeBlocks;\n        }\n\n        public int GetLastBlockNumber(ushort chromosomeIndex)\n        {\n            return _chromosomeBlocks[chromosomeIndex].BlockCount - 1;\n        }\n    }\n}"
  },
  {
    "path": "VariantAnnotation/GenericScore/ScoreIndexBlock.cs",
    "content": "using IO;\n\nnamespace VariantAnnotation.GenericScore\n{\n    public sealed class ScoreIndexBlock\n    {\n        public readonly long FilePosition;\n        public readonly int  BytesWritten;\n\n        public ScoreIndexBlock(long filePosition, int bytesWritten)\n        {\n            FilePosition = filePosition;\n            BytesWritten = bytesWritten;\n        }\n\n        /// <summary>\n        /// Deserialize the instance from reader stream\n        /// </summary>\n        /// <param name=\"reader\"></param>\n        /// <returns></returns>\n        public static ScoreIndexBlock Read(ExtendedBinaryReader reader)\n        {\n            long filePosition = reader.ReadOptInt64();\n            int  bytesWritten = reader.ReadOptInt32();\n\n            return new ScoreIndexBlock(filePosition, bytesWritten);\n        }\n\n        /// <summary>\n        /// Serialize the instance to writer stream\n        /// </summary>\n        /// <param name=\"writer\"></param>\n        public void Write(ExtendedBinaryWriter writer)\n        {\n            writer.WriteOpt(FilePosition);\n            writer.WriteOpt(BytesWritten);\n        }\n    }\n}"
  },
  {
    "path": "VariantAnnotation/GenericScore/ScoreJsonEncoder.cs",
    "content": "using IO;\n\nnamespace VariantAnnotation.GenericScore\n{\n    public sealed class ScoreJsonEncoder\n    {\n        public readonly  string JsonKey;\n        private readonly string _jsonSubKey;\n\n        public string JsonRepresentation<T>(T data)\n        {\n            if (_jsonSubKey != null)\n                return $\"\\\"{_jsonSubKey}\\\":{data}\";\n            \n            return data.ToString();\n        }\n\n        public ScoreJsonEncoder(string jsonKey, string jsonSubKey)\n        {\n            JsonKey     = jsonKey;\n            _jsonSubKey = jsonSubKey;\n        }\n\n        public void Write(ExtendedBinaryWriter writer)\n        {\n            writer.WriteOptAscii(JsonKey);\n            writer.WriteOptAscii(_jsonSubKey);\n        }\n\n        public static ScoreJsonEncoder Read(ExtendedBinaryReader reader)\n        {\n            return new ScoreJsonEncoder(\n                reader.ReadAsciiString(),\n                reader.ReadAsciiString()\n            );\n        }\n    }\n}"
  },
  {
    "path": "VariantAnnotation/GenericScore/ScoreReader.cs",
    "content": "using System;\nusing System.Buffers;\nusing System.IO;\nusing Compression.Algorithms;\nusing ErrorHandling.Exceptions;\nusing Genome;\nusing IO;\nusing IO.v2;\nusing VariantAnnotation.Interface.Providers;\nusing VariantAnnotation.Interface.SA;\nusing VariantAnnotation.SA;\n\nnamespace VariantAnnotation.GenericScore\n{\n    public sealed class ScoreReader : ISaMetadata\n    {\n        private const    int                  FileFormatVersion = 1;\n        private readonly ExtendedBinaryReader _reader;\n        public           GenomeAssembly       Assembly { get; }\n        private readonly ScoreIndex           _index;\n        public           IDataSourceVersion   Version { get; }\n        public           string               JsonKey { get; }\n\n        private readonly ICompressionAlgorithm _compressionAlgorithm = new Zstandard();\n\n        private readonly byte[] _uncompressedBlock;\n        private readonly byte[] _compressedBlock;\n\n        private          long? _lastFileLocation;\n        private readonly int   _encodedScoreSize;\n\n        private ScoreReader(ScoreIndex scoreIndex, ExtendedBinaryReader dataFileReader)\n        {\n            _index  = scoreIndex;\n            _reader = dataFileReader;\n\n            Assembly = _index.Assembly;\n            Version  = _index.Version;\n            JsonKey  = _index.ReaderSettings.ScoreJsonEncoder.JsonKey;\n\n            if (_index.SchemaVersion != SaCommon.SchemaVersion)\n                throw new UserErrorException(\n                    $\"SA schema version mismatch. Expected {SaCommon.SchemaVersion}, observed {_index.SchemaVersion} for {JsonKey}\");\n\n            _encodedScoreSize  = _index.ReaderSettings.BytesRequired;\n            _uncompressedBlock = ArrayPool<byte>.Shared.Rent(_index.GetBlockLength());\n\n            int compressedBlockSize = _compressionAlgorithm.GetCompressedBufferBounds(_index.GetBlockLength());\n            _compressedBlock = ArrayPool<byte>.Shared.Rent(compressedBlockSize);\n        }\n\n        public static ScoreReader Read(Stream dataStream, Stream indexStream)\n        {\n            var        dataFileReader = new ExtendedBinaryReader(dataStream);\n            int        filePairId     = ReadHeader(dataFileReader);\n            ScoreIndex index          = ScoreIndex.Read(indexStream, filePairId);\n\n            return new ScoreReader(index, dataFileReader);\n        }\n\n        private static void CheckHeader(Header header)\n        {\n            (FileType fileType, ushort fileFormatVersion) = header;\n            if (fileType != FileType.GsaWriter || fileFormatVersion != FileFormatVersion)\n            {\n                throw new UserErrorException(\n                    $\"The file type {fileType} version {fileFormatVersion} is not supported by this reader \" +\n                    $\"{FileType.GsaWriter} version {FileFormatVersion}.\"\n                );\n            }\n        }\n\n        private static int ReadHeader(ExtendedBinaryReader dataFileReader)\n        {\n            Header header = Header.Read(dataFileReader);\n            CheckHeader(header);\n            int  filePairId = dataFileReader.ReadOptInt32();\n            uint guardInt   = dataFileReader.ReadUInt32();\n\n            if (guardInt != SaCommon.GuardInt)\n            {\n                throw new UserErrorException(\"The data file may be corrupted\");\n            }\n\n            return filePairId;\n        }\n\n\n        private bool GetUncompressedBlock(ushort chromIndex, int position)\n        {\n            long fileLocation = _index.GetFilePosition(chromIndex, position);\n\n            if (fileLocation < 0) return false;\n\n            // Reuse the current block\n            if (_lastFileLocation == fileLocation) return true;\n\n            _lastFileLocation = fileLocation;\n\n            Array.Clear(_uncompressedBlock, 0, _uncompressedBlock.Length);\n            _reader.BaseStream.Position = fileLocation;\n\n            int blockNumber = _index.GetBlockNumber(chromIndex, position);\n            int bytesToRead = _index.GetBytesToRead(chromIndex, blockNumber);\n            _reader.BaseStream.Read(_compressedBlock, 0, bytesToRead);\n\n            _compressionAlgorithm.Decompress(_compressedBlock, bytesToRead, _uncompressedBlock, _index.GetBlockLength());\n            return true;\n        }\n\n        public double GetScore(ushort chromosomeIndex, int position, string allele)\n        {\n            if (_index.ReaderSettings.IsPositional)\n                allele = \"N\";\n                \n            if (!GetUncompressedBlock(chromosomeIndex, position)) return double.NaN;\n\n            (_, int localBlockIndex) = _index.PositionToBlockLocation(chromosomeIndex, position);\n            ushort? allelePosition = _index.GetNucleotidePosition(allele);\n            if (allelePosition == null) return double.NaN;\n\n            Span<byte> score = _uncompressedBlock.AsSpan(localBlockIndex + (ushort) allelePosition, _encodedScoreSize);\n            return _index.ReaderSettings.ScoreEncoder.DecodeFromBytes(score);\n        }\n\n        public string GetAnnotationJson(ushort chromosomeIndex, int position, string altAllele)\n        {\n            double score = GetScore(chromosomeIndex, position, altAllele);\n            return double.IsNaN(score) ? null : _index.ReaderSettings.ScoreJsonEncoder.JsonRepresentation(score);\n        }\n    }\n}"
  },
  {
    "path": "VariantAnnotation/GenericScore/ZeroToOneScoreEncoder.cs",
    "content": "using System;\nusing ErrorHandling.Exceptions;\nusing IO;\n\nnamespace VariantAnnotation.GenericScore\n{\n    public sealed class ZeroToOneScoreEncoder : IScoreEncoder\n    {\n        private readonly byte[] _encodedArray;\n        private readonly int    _numberOfDigits;\n        private readonly uint   _maxNumber;\n        private readonly double _maxScore;\n\n        public ushort BytesRequired { get; }\n\n        public ZeroToOneScoreEncoder(int numberOfDigits, double maxScore)\n        {\n            _numberOfDigits = numberOfDigits;\n            _maxScore       = maxScore;\n\n            _maxNumber    = (uint) Math.Pow(10, _numberOfDigits);\n            BytesRequired = (ushort) Math.Ceiling(_numberOfDigits / Math.Log10(256));\n\n            _encodedArray = new byte[BytesRequired];\n        }\n\n        public void Write(ExtendedBinaryWriter writer)\n        {\n            writer.WriteOpt(_numberOfDigits);\n            writer.Write(_maxScore);\n        }\n\n        public static ZeroToOneScoreEncoder Read(ExtendedBinaryReader reader)\n        {\n            return new ZeroToOneScoreEncoder(reader.ReadOptInt32(), reader.ReadDouble());\n        }\n\n        public byte[] EncodeToBytes(double number)\n        {\n            Array.Clear(_encodedArray, 0, _encodedArray.Length);\n            if (double.IsNaN(number))\n            {\n                Array.Fill(_encodedArray, byte.MaxValue);\n                return _encodedArray;\n            }\n\n            uint transformedNumber = TransformToUint(number);\n\n            // BitConverter is used as a convenient means of transforming the number into bytes\n            // Only the `BytesRequred` portion is saved, because the converted bytes will not exceed it.\n            Array.Copy(BitConverter.GetBytes(transformedNumber), _encodedArray, BytesRequired);\n            return _encodedArray;\n        }\n\n\n        public double DecodeFromBytes(ReadOnlySpan<byte> encodedArray)\n        {\n            if (encodedArray[^1] == byte.MaxValue) return double.NaN;\n\n            var count = 0;\n            var shift = 0;\n\n            // because a variable lenght enodedarray is received, the BitConverter cannot be used directly\n            foreach (byte b in encodedArray)\n            {\n                count |= (b & byte.MaxValue) << shift;\n                shift += 8;\n            }\n\n            return TransformToDouble((uint) count);\n        }\n\n        private uint TransformToUint(double number)\n        {\n            if (number > _maxScore) throw new UserErrorException(\"Score may not be larger than maximum score\");\n            return (uint) Math.Round(number * _maxNumber / _maxScore);\n        }\n\n        private double TransformToDouble(uint encodedNumber)\n        {\n            return encodedNumber * _maxScore / _maxNumber;\n        }\n    }\n}"
  },
  {
    "path": "VariantAnnotation/IO/Caches/CacheHeader.cs",
    "content": "﻿using System.IO;\r\nusing System.Text;\r\n\r\nnamespace VariantAnnotation.IO.Caches\r\n{\r\n    public sealed class CacheHeader : Header\r\n    {\r\n        public readonly TranscriptCacheCustomHeader Custom;\r\n\r\n        public CacheHeader(Header header, TranscriptCacheCustomHeader customHeader) : base(header.Identifier,\r\n            header.SchemaVersion, header.DataVersion, header.Source, header.CreationTimeTicks,\r\n            header.Assembly)\r\n        {\r\n            Custom = customHeader;\r\n        }\r\n\r\n        public new void Write(BinaryWriter writer)\r\n        {\r\n            base.Write(writer);\r\n            Custom.Write(writer);\r\n        }\r\n\r\n        public static CacheHeader Read(Stream stream)\r\n        {\r\n            CacheHeader header;\r\n\r\n            using (var reader = new BinaryReader(stream, Encoding.Default, true))\r\n            {\r\n                var baseHeader   = Read(reader);\r\n                var customHeader = TranscriptCacheCustomHeader.Read(reader);\r\n                header           = new CacheHeader(baseHeader, customHeader);\r\n            }\r\n\r\n            return header;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "VariantAnnotation/IO/Caches/Header.cs",
    "content": "﻿using System.IO;\r\nusing Genome;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\n\r\nnamespace VariantAnnotation.IO.Caches\r\n{\r\n    public class Header\r\n    {\r\n        public readonly string Identifier;\r\n        public readonly ushort SchemaVersion;\r\n        public readonly ushort DataVersion;\r\n        public readonly Source Source;\r\n        public readonly long CreationTimeTicks;\r\n        public readonly GenomeAssembly Assembly;\r\n\r\n        public Header(string identifier, ushort schemaVersion, ushort dataVersion, Source source,\r\n            long creationTimeTicks, GenomeAssembly genomeAssembly)\r\n        {\r\n            Identifier        = identifier;\r\n            SchemaVersion     = schemaVersion;\r\n            DataVersion       = dataVersion;\r\n            Source            = source;\r\n            CreationTimeTicks = creationTimeTicks;\r\n            Assembly          = genomeAssembly;\r\n        }\r\n\r\n        protected void Write(BinaryWriter writer)\r\n        {\r\n            writer.Write(Identifier);\r\n            writer.Write(SchemaVersion);\r\n            writer.Write(DataVersion);\r\n            writer.Write((byte)Source);\r\n            writer.Write(CreationTimeTicks);\r\n            writer.Write((byte)Assembly);\r\n        }\r\n\r\n        protected static Header Read(BinaryReader reader)\r\n        {\r\n            string identifier      = reader.ReadString();\r\n            ushort schemaVersion   = reader.ReadUInt16();\r\n            ushort dataVersion     = reader.ReadUInt16();\r\n            var source             = (Source)reader.ReadByte();\r\n            long creationTimeTicks = reader.ReadInt64();\r\n            var genomeAssembly     = (GenomeAssembly)reader.ReadByte();\r\n\r\n            return new Header(identifier, schemaVersion, dataVersion, source, creationTimeTicks, genomeAssembly);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "VariantAnnotation/IO/Caches/PredictionCacheCustomHeader.cs",
    "content": "﻿using System.IO;\r\nusing VariantAnnotation.Caches.DataStructures;\r\n\r\nnamespace VariantAnnotation.IO.Caches\r\n{\r\n    public sealed class PredictionCacheCustomHeader\r\n    {\r\n        public readonly IndexEntry[] Entries;\r\n\r\n        public PredictionCacheCustomHeader(IndexEntry[] entries) => Entries = entries;\r\n\r\n        public void Write(BinaryWriter writer)\r\n        {\r\n            writer.Write((ushort)Entries.Length);\r\n            foreach (var entry in Entries) entry.Write(writer);\r\n        }\r\n\r\n        public static PredictionCacheCustomHeader Read(BinaryReader reader)\r\n        {\r\n            ushort numReferenceSeqs = reader.ReadUInt16();\r\n            var entries = new IndexEntry[numReferenceSeqs];\r\n            for (var i = 0; i < numReferenceSeqs; i++) entries[i].Read(reader);\r\n            return new PredictionCacheCustomHeader(entries);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/IO/Caches/PredictionCacheReader.cs",
    "content": "﻿using System;\r\nusing System.IO;\r\nusing System.IO.Compression;\r\nusing System.Text;\r\nusing Compression.Algorithms;\r\nusing Compression.FileHandling;\r\nusing IO;\r\nusing VariantAnnotation.Caches;\r\nusing VariantAnnotation.Caches.DataStructures;\r\nusing VariantAnnotation.Interface.Caches;\r\n\r\nnamespace VariantAnnotation.IO.Caches\r\n{\r\n    public sealed class PredictionCacheReader : IDisposable\r\n    {\r\n        private readonly ExtendedBinaryReader _reader;\r\n        private readonly BlockStream _blockStream;\r\n        private readonly string[] _predictionDescriptions;\r\n        private readonly IndexEntry[] _indexEntries;\r\n        private readonly int _numRefSeqs;\r\n        public readonly PredictionHeader Header;\r\n\r\n        public PredictionCacheReader(Stream stream, string[] predictionDescriptions)\r\n        {\r\n            _blockStream = new BlockStream(new Zstandard(), stream, CompressionMode.Decompress);\r\n            Header       = PredictionHeader.Read(stream, _blockStream);\r\n\r\n            _reader = new ExtendedBinaryReader(_blockStream, Encoding.Default, true);\r\n            _predictionDescriptions = predictionDescriptions;\r\n\r\n            _indexEntries = Header.Custom.Entries;\r\n            _numRefSeqs = _indexEntries.Length;\r\n        }\r\n\r\n        public void Dispose()\r\n        {\r\n            _reader.Dispose();\r\n            _blockStream.Dispose();\r\n        }\r\n\r\n        /// <summary>\r\n        /// parses the database cache file and populates the specified lists and interval trees\r\n        /// </summary>\r\n        public IPredictionCache Read(ushort refIndex)\r\n        {\r\n            if (refIndex >= _numRefSeqs) return null;\r\n            var predictions = GetPredictions(refIndex);\r\n            return new PredictionCache(Header.Assembly, predictions, _predictionDescriptions);\r\n        }\r\n\r\n        public Prediction[] GetPredictions(ushort refIndex)\r\n        {\r\n            var indexEntry = _indexEntries[refIndex];\r\n\r\n            _blockStream.SetBlockPosition(indexEntry.FileOffset);\r\n\r\n            var predictions = new Prediction[indexEntry.Count];\r\n            for (var i = 0; i < indexEntry.Count; i++) predictions[i] = Prediction.Read(_reader, Header.LookupTable);\r\n\r\n            return predictions;\r\n        }\r\n\r\n        public static readonly string[] SiftDescriptions = new string[]{\"tolerated\",\r\n            \"deleterious\", \"tolerated - low confidence\", \"deleterious - low confidence\"};\r\n\r\n        public static readonly string[] PolyphenDescriptions =\r\n            new string[]{\"probably damaging\", \"possibly damaging\", \"benign\", \"unknown\"};\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/IO/Caches/PredictionHeader.cs",
    "content": "﻿using System.IO;\r\nusing System.Text;\r\nusing Compression.FileHandling;\r\nusing IO;\r\nusing VariantAnnotation.Caches.DataStructures;\r\n\r\nnamespace VariantAnnotation.IO.Caches\r\n{\r\n    public sealed class PredictionHeader : Header\r\n    {\r\n        public readonly PredictionCacheCustomHeader Custom;\r\n        public readonly Prediction.Entry[] LookupTable;\r\n\r\n        public PredictionHeader(Header header, PredictionCacheCustomHeader customHeader, Prediction.Entry[] lookupTable)\r\n            : base(header.Identifier, header.SchemaVersion, header.DataVersion, header.Source,\r\n                header.CreationTimeTicks, header.Assembly)\r\n        {\r\n            Custom      = customHeader;\r\n            LookupTable = lookupTable;\r\n        }\r\n\r\n        public new void Write(BinaryWriter writer)\r\n        {\r\n            base.Write(writer);\r\n            Custom.Write(writer);\r\n        }\r\n\r\n        public static PredictionHeader Read(Stream stream, BlockStream blockStream)\r\n        {\r\n            Header baseHeader;\r\n            PredictionCacheCustomHeader customHeader;\r\n            Prediction.Entry[] lookupTable;\r\n\r\n            using (var reader = new BinaryReader(stream, Encoding.Default, true))\r\n            {\r\n                baseHeader = Read(reader);\r\n                customHeader = PredictionCacheCustomHeader.Read(reader);\r\n            }\r\n\r\n            using (var reader = new ExtendedBinaryReader(blockStream, Encoding.Default, true))\r\n            {\r\n                lookupTable = ReadLookupTable(reader);\r\n            }\r\n\r\n            return new PredictionHeader(baseHeader, customHeader, lookupTable);\r\n        }\r\n\r\n        private static Prediction.Entry[] ReadLookupTable(ExtendedBinaryReader reader)\r\n        {\r\n            int numEntries = reader.ReadInt32();\r\n            var lut = new Prediction.Entry[numEntries];\r\n            for (var i = 0; i < numEntries; i++) lut[i] = Prediction.Entry.ReadEntry(reader);\r\n            return lut;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "VariantAnnotation/IO/Caches/TranscriptCacheCustomHeader.cs",
    "content": "﻿using System.IO;\r\n\r\nnamespace VariantAnnotation.IO.Caches\r\n{\r\n    public sealed class TranscriptCacheCustomHeader\r\n    {\r\n        public readonly ushort VepVersion;\r\n        private readonly long _vepReleaseTicks;\r\n\r\n        public TranscriptCacheCustomHeader(ushort vepVersion, long vepReleaseTicks)\r\n        {\r\n            VepVersion       = vepVersion;\r\n            _vepReleaseTicks = vepReleaseTicks;\r\n        }\r\n\r\n        public void Write(BinaryWriter writer)\r\n        {\r\n            writer.Write(_vepReleaseTicks);\r\n            writer.Write(VepVersion);\r\n        }\r\n\r\n        public static TranscriptCacheCustomHeader Read(BinaryReader reader)\r\n        {\r\n            long vepReleaseTicks = reader.ReadInt64();\r\n            ushort vepVersion    = reader.ReadUInt16();\r\n            return new TranscriptCacheCustomHeader(vepVersion, vepReleaseTicks);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/IO/Caches/TranscriptCacheReader.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing System.IO.Compression;\r\nusing Compression.Algorithms;\r\nusing Compression.FileHandling;\r\nusing Genome;\r\nusing Intervals;\r\nusing IO;\r\nusing VariantAnnotation.Caches;\r\nusing VariantAnnotation.Caches.DataStructures;\r\n\r\nnamespace VariantAnnotation.IO.Caches\r\n{\r\n    public sealed class TranscriptCacheReader : IDisposable\r\n    {\r\n        private readonly BufferedBinaryReader _reader;\r\n        public readonly CacheHeader Header;\r\n\r\n        public TranscriptCacheReader(Stream stream)\r\n        {\r\n            Header          = CacheHeader.Read(stream);\r\n            var blockStream = new BlockStream(new Zstandard(), stream, CompressionMode.Decompress);\r\n            _reader         = new BufferedBinaryReader(blockStream);\r\n        }\r\n\r\n        public void Dispose() => _reader.Dispose();\r\n\r\n        /// <summary>\r\n        /// parses the database cache file and populates the specified lists and interval trees\r\n        /// </summary>\r\n        public TranscriptCacheData Read(Dictionary<ushort, Chromosome> refIndexToChromosome)\r\n        {\r\n            var genes             = ReadItems(_reader,     () => Gene.Read(_reader, refIndexToChromosome));\r\n            var transcriptRegions = ReadItems(_reader,     () => TranscriptRegion.Read(_reader));\r\n            var mirnas            = ReadItems(_reader,     () => IntervalExtensions.Read(_reader));\r\n            var peptideSeqs       = ReadItems(_reader,     () => _reader.ReadAsciiString());\r\n            var regulatoryRegions = ReadIntervals(_reader, () => RegulatoryRegion.Read(_reader, refIndexToChromosome));\r\n            var transcripts       = ReadIntervals(_reader, () => Transcript.Read(_reader, refIndexToChromosome, genes, transcriptRegions, mirnas, peptideSeqs));\r\n\r\n            return new TranscriptCacheData(Header, genes, transcriptRegions, mirnas, peptideSeqs, transcripts, regulatoryRegions);\r\n        }\r\n\r\n        private static IntervalArray<T>[] ReadIntervals<T>(IBufferedBinaryReader reader, Func<T> readMethod) where T : IInterval\r\n        {\r\n            var numRefSeqs     = reader.ReadOptInt32();\r\n            var intervalArrays = new IntervalArray<T>[numRefSeqs];\r\n\r\n            for (int refSeqIndex = 0; refSeqIndex < numRefSeqs; refSeqIndex++)\r\n            {\r\n                var numItems  = reader.ReadOptInt32();\r\n                if (numItems == 0) continue;\r\n\r\n                var intervals = new Interval<T>[numItems];\r\n\r\n                for (int i = 0; i < numItems; i++)\r\n                {\r\n                    var item = readMethod();\r\n                    intervals[i] = new Interval<T>(item.Start, item.End, item);\r\n                }\r\n\r\n                intervalArrays[refSeqIndex] = new IntervalArray<T>(intervals);\r\n            }\r\n\r\n            CheckGuard(reader);\r\n            return intervalArrays;\r\n        }\r\n\r\n        internal static T[] ReadItems<T>(IBufferedBinaryReader reader, Func<T> readMethod)\r\n        {\r\n            var numItems = reader.ReadOptInt32();\r\n            var items    = new T[numItems];\r\n            for (int i = 0; i < numItems; i++) items[i] = readMethod();\r\n            CheckGuard(reader);\r\n            return items;\r\n        }\r\n\r\n        /// <summary>\r\n        /// check if the section guard is in place\r\n        /// </summary>\r\n        internal static void CheckGuard(IBufferedBinaryReader reader)\r\n        {\r\n            uint observedGuard = reader.ReadUInt32();\r\n            if (observedGuard != CacheConstants.GuardInt)\r\n            {\r\n                throw new InvalidDataException($\"Expected a guard integer ({CacheConstants.GuardInt}), but found another value: ({observedGuard})\");\r\n            }\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/IO/IntervalExtensions.cs",
    "content": "﻿using Intervals;\nusing IO;\n\nnamespace VariantAnnotation.IO\n{\n    public static class IntervalExtensions\n    {\n        public static IInterval Read(IBufferedBinaryReader reader)\n        {\n            int start = reader.ReadOptInt32();\n            int end   = reader.ReadOptInt32();\n            return new Interval(start, end);\n        }\n\n        public static void Write(this IInterval interval, IExtendedBinaryWriter writer)\n        {\n            writer.WriteOpt(interval.Start);\n            writer.WriteOpt(interval.End);\n        }\n    }\n}"
  },
  {
    "path": "VariantAnnotation/IO/JsonCommon.cs",
    "content": "﻿namespace VariantAnnotation.IO\r\n{\r\n    public static class JsonCommon\r\n    {\r\n        public const int    SchemaVersion           = 6;\r\n        public const string FrequencyRoundingFormat = \"0.######\";\r\n\r\n        public const string Chromosome = \"chromosome\";\r\n        public const string Begin      = \"begin\";\r\n        public const string End        = \"end\";\r\n\r\n        public const string FailedFilter = \"failedFilter\";\r\n        public const string VariantId    = \"variantId\";\r\n        public const string VariantType  = \"variantType\";\r\n\r\n        public const string AllAlleleCount    = \"allAc\";\r\n        public const string AfrAlleleCount    = \"afrAc\";\r\n        public const string AmrAlleleCount    = \"amrAc\";\r\n        public const string EasAlleleCount    = \"easAc\";\r\n        public const string EurAlleleCount    = \"eurAc\";\r\n        public const string OthAlleleCount    = \"othAc\";\r\n        public const string FemaleAlleleCount = \"femaleAc\";\r\n        public const string MaleAlleleCount   = \"maleAc\";\r\n\r\n        public const string AllAlleleFrequency    = \"allAf\";\r\n        public const string AfrAlleleFrequency    = \"afrAf\";\r\n        public const string AmrAlleleFrequency    = \"amrAf\";\r\n        public const string EasAlleleFrequency    = \"easAf\";\r\n        public const string EurAlleleFrequency    = \"eurAf\";\r\n        public const string OthAlleleFrequency    = \"othAf\";\r\n        public const string FemaleAlleleFrequency = \"femaleAf\";\r\n        public const string MaleAlleleFrequency   = \"maleAf\";\r\n\r\n        public const string AllAlleleNumber    = \"allAn\";\r\n        public const string AfrAlleleNumber    = \"afrAn\";\r\n        public const string AmrAlleleNumber    = \"amrAn\";\r\n        public const string EasAlleleNumber    = \"easAn\";\r\n        public const string EurAlleleNumber    = \"eurAn\";\r\n        public const string OthAlleleNumber    = \"othAn\";\r\n        public const string FemaleAlleleNumber = \"femaleAn\";\r\n        public const string MaleAlleleNumber   = \"maleAn\";\r\n\r\n        public const string AllHomCount    = \"allHc\";\r\n        public const string AfrHomCount    = \"afrHc\";\r\n        public const string AmrHomCount    = \"amrHc\";\r\n        public const string EasHomCount    = \"easHc\";\r\n        public const string EurHomCount    = \"eurHc\";\r\n        public const string OthHomCount    = \"othHc\";\r\n        public const string FemaleHomCount = \"femaleHc\";\r\n        public const string MaleHomCount   = \"maleHc\";\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/IO/JsonObject.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.Globalization;\r\nusing System.Linq;\r\nusing System.Text;\r\nusing VariantAnnotation.Interface.IO;\r\n\r\nnamespace VariantAnnotation.IO\r\n{\r\n    public sealed class JsonObject\r\n    {\r\n        private readonly StringBuilder _sb;\r\n        private bool _needsComma;\r\n        private int _nestedLevel;\r\n\r\n        public const char Comma = ',';\r\n        private const char DoubleQuote = '\\\"';\r\n        public const char OpenBracket = '[';\r\n        public const char CloseBracket = ']';\r\n        public const char OpenBrace = '{';\r\n        public const char CloseBrace = '}';\r\n        private const string ColonString = \"\\\":\";\r\n\r\n        public JsonObject(StringBuilder sb) => _sb = sb;\r\n\r\n        private void AddKey(string description)\r\n        {\r\n            _sb.Append(DoubleQuote);\r\n            _sb.Append(description);\r\n            _sb.Append(ColonString);\r\n        }\r\n\r\n        public void StartObjectWithKey(string objectKey)\r\n        {\r\n            if (_needsComma) _sb.Append(Comma);\r\n\r\n            _sb.Append(DoubleQuote);\r\n            _sb.Append(objectKey);\r\n            _sb.Append(ColonString);\r\n            _sb.Append(OpenBrace);\r\n\r\n            _needsComma = false;\r\n            _nestedLevel++;\r\n        }\r\n\r\n        public bool AddBoolValue(string description, bool b, bool outputFalse = false)\r\n        {\r\n            // we do not want to print out false flags by default.\r\n            if (!b && !outputFalse) return false;\r\n\r\n            if (_needsComma) _sb.Append(Comma);\r\n            AddKey(description);\r\n\r\n            _sb.Append(b ? \"true\" : \"false\");\r\n            _needsComma = true;\r\n\r\n            return true;\r\n        }\r\n\r\n        public bool AddIntValue(string description, int? i)\r\n        {\r\n            if (i == null) return false;\r\n\r\n            if (_needsComma) _sb.Append(Comma);\r\n            AddKey(description);\r\n\r\n            _sb.Append(i);\r\n            _needsComma = true;\r\n\r\n            return true;\r\n        }\r\n        \r\n        public bool AddUIntValue(string description, uint? i)\r\n        {\r\n            if (i == null) return false;\r\n\r\n            if (_needsComma) _sb.Append(Comma);\r\n            AddKey(description);\r\n\r\n            _sb.Append(i);\r\n            _needsComma = true;\r\n\r\n            return true;\r\n        }\r\n\r\n        public void AddIntValues(string description, int[] values)\r\n        {\r\n            if (values == null || values.Length == 0) return;\r\n\r\n            // removing '.'s from the list of values\r\n            var valueList = values.Select(value => value.ToString()).ToList();\r\n\r\n            AddStringValues(description, valueList, false);\r\n            _needsComma = true;\r\n        }\r\n\r\n        public bool AddDoubleValue(string description, double? d, string format = \"0.####\")\r\n        {\r\n            if (d == null) return false;\r\n\r\n            if (_needsComma) _sb.Append(Comma);\r\n            AddKey(description);\r\n            _sb.Append(d.Value.ToString(format, CultureInfo.InvariantCulture));\r\n            _needsComma = true;\r\n\r\n            return true;\r\n        }\r\n\r\n        public void AddDoubleValues(string description, double[] values, string format = \"0.####\")\r\n        {\r\n            if (values == null || values.Length == 0) return;\r\n\r\n            var valueList = values.Select(value => value.ToString(format)).ToList();\r\n\r\n            AddStringValues(description, valueList, false);\r\n            _needsComma = true;\r\n        }\r\n\r\n        public bool AddStringValue(string description, string s, bool useQuote = true)\r\n        {\r\n            if (string.IsNullOrEmpty(s) || s == \".\") return false;\r\n\r\n            if (_needsComma) _sb.Append(Comma);\r\n            AddKey(description);\r\n\r\n            if (useQuote) _sb.Append(DoubleQuote);\r\n            _sb.Append(s);\r\n            if (useQuote) _sb.Append(DoubleQuote);\r\n            _needsComma = true;\r\n\r\n            return true;\r\n        }\r\n\r\n        public bool AddStringValues(string description, IEnumerable<string> values, bool useQuote = true)\r\n        {\r\n            if (values == null) return false;\r\n\r\n            var validEntries = new List<string>();\r\n            foreach (string value in values) if (value != \".\") validEntries.Add(value);\r\n\r\n            if (validEntries.Count == 0) return false;\r\n\r\n            if (_needsComma) _sb.Append(Comma);\r\n            AddKey(description);\r\n            _sb.Append(OpenBracket);\r\n\r\n            var needsComma = false;\r\n\r\n            foreach (string value in validEntries)\r\n            {\r\n                if (needsComma) _sb.Append(Comma);\r\n                if (useQuote) _sb.Append(DoubleQuote);\r\n                _sb.Append(value);\r\n                if (useQuote) _sb.Append(DoubleQuote);\r\n                needsComma = true;\r\n            }\r\n\r\n            _sb.Append(CloseBracket);\r\n            _needsComma = true;\r\n\r\n            return true;\r\n        }\r\n        \r\n        public bool AddStringValues(string description, IEnumerable<StringBuilder> sbs, bool useQuote = true)\r\n        {\r\n            if (sbs == null) return false;\r\n\r\n            var validEntries = new List<StringBuilder>();\r\n            foreach (var sb in sbs)\r\n                if (sb.Length > 0 && sb[0] != '.') validEntries.Add(sb);\r\n                \r\n\r\n            if (validEntries.Count == 0) return false;\r\n\r\n            if (_needsComma) _sb.Append(Comma);\r\n            AddKey(description);\r\n            _sb.Append(OpenBracket);\r\n\r\n            var needsComma = false;\r\n\r\n            foreach (var value in validEntries)\r\n            {\r\n                if (needsComma) _sb.Append(Comma);\r\n                if (useQuote) _sb.Append(DoubleQuote);\r\n                _sb.Append(value);\r\n                if (useQuote) _sb.Append(DoubleQuote);\r\n                needsComma = true;\r\n            }\r\n\r\n            _sb.Append(CloseBracket);\r\n            _needsComma = true;\r\n\r\n            return true;\r\n        }\r\n\r\n        public void AddObjectValue<T>(string description, T value) where T : IJsonSerializer\r\n        {\r\n            if (value == null) return;\r\n\r\n            if (_needsComma) _sb.Append(Comma);\r\n            AddKey(description);\r\n            \r\n            value.SerializeJson(_sb);\r\n\r\n            _needsComma = true;\r\n        }\r\n\r\n        public bool AddObjectValues<T>(string description, IEnumerable<T> values) where T : IJsonSerializer\r\n        {\r\n            if (values == null) return false;\r\n\r\n            if (_needsComma) _sb.Append(Comma);\r\n            AddKey(description);\r\n            _sb.Append(OpenBracket);\r\n\r\n            var needsComma = false;\r\n\r\n            foreach (var value in values)\r\n            {\r\n                // comma handling\r\n                if (needsComma) _sb.Append(Comma);\r\n                else needsComma = true;\r\n                value.SerializeJson(_sb);\r\n            }\r\n            \r\n            _sb.Append(CloseBracket);\r\n            _needsComma = true;\r\n\r\n            return true;\r\n        }\r\n\r\n        public void StartObject()\r\n        {\r\n            _sb.Append(OpenBrace);\r\n            _needsComma = false;\r\n            _nestedLevel++;\r\n        }\r\n\r\n        public void EndObject()\r\n        {\r\n            _sb.Append(CloseBrace);\r\n            _needsComma = true;\r\n            _nestedLevel--;\r\n        }\r\n\r\n        public void EndAllObjects()\r\n        {\r\n            _sb.Append(CloseBrace, _nestedLevel);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/IO/JsonWriter.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Text;\r\nusing Compression.FileHandling;\r\nusing Jasix;\r\nusing Jasix.DataStructures;\r\nusing OptimizedCore;\r\nusing VariantAnnotation.Interface;\r\nusing VariantAnnotation.Interface.IO;\r\nusing VariantAnnotation.Interface.Positions;\r\nusing VariantAnnotation.Interface.Providers;\r\n\r\nnamespace VariantAnnotation.IO\r\n{\r\n    public sealed class JsonWriter : IJsonWriter\r\n    {\r\n        private readonly StreamWriter _writer;\r\n        private bool _firstEntry;\r\n        private bool _positionFieldClosed;\r\n        private readonly bool _leaveOpen;\r\n\r\n        private readonly BgzipTextWriter _bgzipTextWriter;\r\n        private readonly OnTheFlyIndexCreator _jasixIndexCreator;\r\n\r\n        private JsonWriter(Stream jsonStream, Stream indexStream, string annotator, string creationTime, string vepDataVersion,\r\n            List<IDataSourceVersion> dataSourceVersions, string genomeAssembly, string[] sampleNames, bool leaveOpen) : this(GetProperWriter(jsonStream), indexStream, annotator, creationTime, vepDataVersion, dataSourceVersions, genomeAssembly, sampleNames, leaveOpen)\r\n        {\r\n        }\r\n\r\n        public JsonWriter(Stream jsonStream, Stream indexStream, IAnnotationResources annotationResources, string creationTime, string[] sampleNames, bool leaveOpen) : this(jsonStream, indexStream, annotationResources.AnnotatorVersionTag, creationTime, annotationResources.VepDataVersion, annotationResources.DataSourceVersions, annotationResources.SequenceProvider.Assembly.ToString(), sampleNames, leaveOpen)\r\n        {\r\n        }\r\n\r\n        private static StreamWriter GetProperWriter(Stream jsonStream) => jsonStream is BlockGZipStream stream\r\n            ? new BgzipTextWriter(stream)\r\n            : new StreamWriter(jsonStream);\r\n\r\n        public JsonWriter(StreamWriter writer, Stream indexStream, string annotator, string creationTime, string vepDataVersion,\r\n            List<IDataSourceVersion> dataSourceVersions, string genomeAssembly, string[] sampleNames, bool leaveOpen)\r\n        {\r\n            _writer              = writer;\r\n            _writer.NewLine      = \"\\n\";\r\n            _firstEntry          = true;\r\n            _positionFieldClosed = false;\r\n            _leaveOpen           = leaveOpen;\r\n\r\n            _bgzipTextWriter = writer as BgzipTextWriter;\r\n\r\n            _jasixIndexCreator = _bgzipTextWriter != null\r\n                ? new OnTheFlyIndexCreator(indexStream)\r\n                : null;\r\n\r\n            WriteHeader(annotator, creationTime, genomeAssembly, JsonCommon.SchemaVersion, vepDataVersion,\r\n                dataSourceVersions, sampleNames);\r\n        }\r\n\r\n\r\n        private void WriteHeader(string annotator, string creationTime, string genomeAssembly, int schemaVersion,\r\n            string vepDataVersion, IEnumerable<IDataSourceVersion> dataSourceVersions, string[] sampleNames)\r\n        {\r\n            \r\n            BeginSection(JasixCommons.HeaderSectionTag);\r\n\r\n            var sb         = StringBuilderPool.Get();\r\n            var jsonObject = new JsonObject(sb);\r\n\r\n            sb.Append($\"{{\\\"{JasixCommons.HeaderSectionTag}\\\":{{\");\r\n            jsonObject.AddStringValue(\"annotator\", annotator);\r\n            jsonObject.AddStringValue(\"creationTime\", creationTime);\r\n            jsonObject.AddStringValue(\"genomeAssembly\", genomeAssembly);\r\n            jsonObject.AddIntValue(\"schemaVersion\", schemaVersion);\r\n            jsonObject.AddStringValue(\"dataVersion\", vepDataVersion);\r\n\r\n            jsonObject.AddObjectValues(\"dataSources\", dataSourceVersions);\r\n\r\n            if (sampleNames != null) jsonObject.AddStringValues(\"samples\", sampleNames);\r\n            sb.Append($\"}},\\\"{JasixCommons.PositionsSectionTag}\\\":[\\n\");\r\n\r\n            _writer.Write(StringBuilderPool.GetStringAndReturn(sb));\r\n            if(_bgzipTextWriter != null) EndSection(JasixCommons.HeaderSectionTag);\r\n        }\r\n\r\n        public void Dispose()\r\n        {\r\n            WriteFooter();\r\n            _writer?.Flush();\r\n            _jasixIndexCreator?.Flush();\r\n            if (_leaveOpen) return;\r\n            _writer?.Dispose();\r\n            _jasixIndexCreator?.Dispose();\r\n        }\r\n        \r\n        // due to the flush, the end of a section will point to the next to last block for a section.\r\n        // e.g. if positions start at block 2 and end at block 10, blocks 2..9 contains positions. \r\n        private void BeginSection(string section)\r\n        {\r\n            if (_bgzipTextWriter == null) return;\r\n            _bgzipTextWriter.Flush();\r\n            _jasixIndexCreator.BeginSection(section, _bgzipTextWriter.Position);\r\n        }\r\n\r\n        private void EndSection(string section)\r\n        {\r\n            if (_bgzipTextWriter == null) return;\r\n            _bgzipTextWriter.Flush();\r\n            _jasixIndexCreator.EndSection(section, _bgzipTextWriter.Position);\r\n        }\r\n\r\n\r\n        public void WritePosition(IPosition position, string entry)\r\n        {\r\n            if (string.IsNullOrEmpty(entry)) return;\r\n            _jasixIndexCreator?.Add(position, _bgzipTextWriter.Position);\r\n            if (_firstEntry)\r\n            {\r\n                BeginSection(JasixCommons.PositionsSectionTag);\r\n            }\r\n            else _writer.WriteLine(\",\");\r\n\r\n            _firstEntry = false;\r\n            _writer.Write(entry);\r\n        }\r\n        \r\n        public void WritePosition(IPosition position, StringBuilder sb)\r\n        {\r\n            if (sb == null || sb.Length == 0) return;\r\n            _jasixIndexCreator?.Add(position, _bgzipTextWriter.Position);\r\n            if (_firstEntry)\r\n            {\r\n                BeginSection(JasixCommons.PositionsSectionTag);\r\n            }\r\n            else _writer.WriteLine(\",\");\r\n\r\n            _firstEntry = false;\r\n            _writer.Write(sb);\r\n        }\r\n\r\n        public void WriteGenes(IEnumerable<string> annotatedGenes)\r\n        {\r\n            _positionFieldClosed = true;\r\n            EndSection(JasixCommons.PositionsSectionTag);\r\n            \r\n            _writer.Write(\"\\n]\");\r\n\r\n            if (annotatedGenes == null) return;\r\n            _writer.Write($\",\\\"{JasixCommons.GenesSectionTag}\\\":[\\n\");\r\n            BeginSection(JasixCommons.GenesSectionTag);\r\n\r\n            var sb = StringBuilderPool.Get();\r\n            var firstGeneEntry = true;\r\n\r\n            foreach (string jsonString in annotatedGenes)\r\n            {\r\n                if (!firstGeneEntry) sb.Append(\",\\n\");\r\n                sb.Append(jsonString);\r\n                firstGeneEntry = false;\r\n            }\r\n\r\n            var json = StringBuilderPool.GetStringAndReturn(sb);\r\n            _writer.Write(json);\r\n\r\n            EndSection(JasixCommons.GenesSectionTag);\r\n            \r\n            _writer.WriteLine();\r\n            _writer.Write(\"]\");\r\n        }\r\n\r\n        private void WriteFooter()\r\n        {\r\n            if (!_positionFieldClosed)\r\n            {\r\n                EndSection(JasixCommons.PositionsSectionTag);\r\n\r\n                _writer.WriteLine();\r\n                _writer.Write(\"]\");\r\n            }\r\n            _writer.WriteLine(\"}\");\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/IO/SampleExtensions.cs",
    "content": "﻿using OptimizedCore;\r\nusing VariantAnnotation.Interface.Positions;\r\n\r\nnamespace VariantAnnotation.IO\r\n{\r\n    public static class SampleExtensions\r\n    {\r\n        public static string GetJsonString(this ISample sample)\r\n        {\r\n            var sb = StringBuilderPool.Get();\r\n            var jsonObject = new JsonObject(sb);\r\n\r\n            // data section\r\n            sb.Append(JsonObject.OpenBrace);\r\n\r\n            jsonObject.AddBoolValue(\"isEmpty\",                        sample.IsEmpty);\r\n            jsonObject.AddStringValue(\"genotype\",                  sample.Genotype);\r\n            jsonObject.AddDoubleValues(\"variantFrequencies\",          sample.VariantFrequencies);\r\n            jsonObject.AddIntValue(\"totalDepth\",                      sample.TotalDepth);\r\n            jsonObject.AddIntValue(\"genotypeQuality\",                 sample.GenotypeQuality);\r\n            jsonObject.AddIntValue(\"copyNumber\",                      sample.CopyNumber);\r\n            jsonObject.AddIntValue(\"minorHaplotypeCopyNumber\",        sample.MinorHaplotypeCopyNumber);\r\n            jsonObject.AddIntValues(\"repeatUnitCounts\",               sample.RepeatUnitCounts);\r\n            jsonObject.AddIntValues(\"alleleDepths\",                   sample.AlleleDepths);\r\n            jsonObject.AddBoolValue(\"failedFilter\",                   sample.FailedFilter);\r\n            jsonObject.AddIntValues(\"splitReadCounts\",                sample.SplitReadCounts);\r\n            jsonObject.AddIntValues(\"pairedEndReadCounts\",            sample.PairedEndReadCounts);\r\n            jsonObject.AddBoolValue(\"isDeNovo\",                       sample.IsDeNovo);\r\n            jsonObject.AddDoubleValue(\"deNovoQuality\",                sample.DeNovoQuality);\r\n            jsonObject.AddStringValues(\"diseaseAffectedStatuses\",     sample.DiseaseAffectedStatuses);\r\n            jsonObject.AddDoubleValue(\"artifactAdjustedQualityScore\", sample.ArtifactAdjustedQualityScore, \"0.#\");\r\n            jsonObject.AddDoubleValue(\"likelihoodRatioQualityScore\",  sample.LikelihoodRatioQualityScore, \"0.#\");\r\n            if (sample.IsLossOfHeterozygosity.HasValue)\r\n                jsonObject.AddBoolValue(\"lossOfHeterozygosity\", sample.IsLossOfHeterozygosity.Value);\r\n            jsonObject.AddDoubleValue(\"somaticQuality\",               sample.SomaticQuality, \"0.#\");\r\n            jsonObject.AddStringValues(\"heteroplasmyPercentile\",      sample.HeteroplasmyPercentile, false);\r\n            jsonObject.AddIntValue(\"binCount\",                      sample.BinCount);\r\n            \r\n            if(sample.CustomFields != null && !sample.CustomFields.IsEmpty())\r\n                jsonObject.AddObjectValue(\"vcfSampleInfo\", sample.CustomFields);\r\n            \r\n            sb.Append(JsonObject.CloseBrace);\r\n            return StringBuilderPool.GetStringAndReturn(sb);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/NSA/NgaReader.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing System.IO.Compression;\r\nusing System.Text;\r\nusing Compression.Algorithms;\r\nusing Compression.FileHandling;\r\nusing ErrorHandling.Exceptions;\r\nusing IO;\r\nusing VariantAnnotation.Interface.Providers;\r\nusing VariantAnnotation.Providers;\r\nusing VariantAnnotation.SA;\r\n\r\nnamespace VariantAnnotation.NSA\r\n{\r\n    public sealed class NgaReader\r\n    {\r\n        public readonly IDataSourceVersion Version;\r\n        public readonly string JsonKey;\r\n        private readonly bool _isArray;\r\n\r\n        private readonly Dictionary<string, List<string>> _geneSymbolToJsonStrings;\r\n\r\n        private NgaReader(IDataSourceVersion version, string jsonKey, bool isArray, Dictionary<string, List<string>> geneSymbolToJsonStrings)\r\n        {\r\n            Version                  = version;\r\n            JsonKey                  = jsonKey;\r\n            _isArray                 = isArray;\r\n            _geneSymbolToJsonStrings = geneSymbolToJsonStrings;\r\n        }\r\n\r\n        public static NgaReader Read(Stream stream)\r\n        {\r\n            (IDataSourceVersion version, string jsonKey, bool isArray) = ReadHeader(stream);\r\n\r\n            Dictionary<string, List<string>> geneSymbolToJsonStrings;\r\n\r\n            using (var blockStream = new BlockStream(new Zstandard(), stream, CompressionMode.Decompress))\r\n            using (var reader = new ExtendedBinaryReader(blockStream))\r\n            {\r\n                int geneCount = reader.ReadOptInt32();\r\n                geneSymbolToJsonStrings = new Dictionary<string, List<string>>(geneCount);\r\n\r\n                for (var i = 0; i < geneCount; i++)\r\n                {\r\n                    string geneSymbol = reader.ReadAsciiString();\r\n                    int numEntries = reader.ReadOptInt32();\r\n                    var entries = new List<string>(numEntries);\r\n\r\n                    for (var j = 0; j < numEntries; j++)\r\n                    {\r\n                        entries.Add(reader.ReadString());\r\n                    }\r\n\r\n                    geneSymbolToJsonStrings[geneSymbol] = entries;\r\n                }\r\n            }\r\n\r\n            return new NgaReader(version, jsonKey, isArray, geneSymbolToJsonStrings);\r\n        }\r\n\r\n        private static (IDataSourceVersion Version, string JsonKey, bool IsArray) ReadHeader(Stream stream)\r\n        {\r\n            IDataSourceVersion version;\r\n            string jsonKey;\r\n            bool isArray;\r\n\r\n            using (var reader = new ExtendedBinaryReader(stream, Encoding.UTF8, true))\r\n            {\r\n                string identifier    = reader.ReadString();\r\n\r\n                if (identifier != SaCommon.NgaIdentifier)\r\n                {\r\n                    throw new InvalidDataException($\"Expected the NGA identifier ({SaCommon.NgaIdentifier}), but found another value: ({identifier})\");\r\n                }\r\n\r\n                version              = DataSourceVersion.Read(reader);\r\n                jsonKey              = reader.ReadString();\r\n                isArray              = reader.ReadBoolean();\r\n                ushort schemaVersion = reader.ReadUInt16();\r\n\r\n                if (schemaVersion != SaCommon.SchemaVersion)\r\n                {\r\n                    throw new UserErrorException($\"Expected the schema version {SaCommon.SchemaVersion}, but found another value: ({schemaVersion}) for {jsonKey}\");\r\n                }\r\n\r\n                uint guard = reader.ReadUInt32();\r\n\r\n                if (guard != SaCommon.GuardInt)\r\n                {\r\n                    throw new InvalidDataException($\"Expected a guard integer ({SaCommon.GuardInt}), but found another value: ({guard})\");\r\n                }\r\n            }\r\n\r\n            return (version, jsonKey, isArray);\r\n        }\r\n\r\n        public string GetAnnotation(string geneName) => _geneSymbolToJsonStrings.TryGetValue(geneName, out List<string> annotations) ? GetJsonString(annotations) : null;\r\n\r\n        private string GetJsonString(List<string> annotations)\r\n        {\r\n            if (_isArray) return \"[\" + string.Join(',', annotations) + \"]\";\r\n            return annotations[0];\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/NSA/NsaBlock.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing Compression.Algorithms;\nusing IO;\n\nnamespace VariantAnnotation.NSA\n{\n    public sealed class NsaBlock:IDisposable\n    {\n        private readonly ICompressionAlgorithm _compressionAlgorithm;\n        private readonly byte[] _compressedBlock;\n        private readonly byte[] _uncompressedBlock;\n        private int _compressedLength;\n        private int _uncompressedLength;\n        private readonly ExtendedBinaryWriter _writer;\n        public int BlockOffset => (int)_writer.BaseStream.Position;\n        private int _firstPosition;\n        private int _lastPosition;\n        private int _count;\n        \n        private readonly ExtendedBinaryReader _blockReader;\n        private readonly MemoryStream         _blockStream;\n        \n        \n        public NsaBlock(ICompressionAlgorithm compressionAlgorithm, int size)\n        {\n            _compressionAlgorithm = compressionAlgorithm;\n            _uncompressedBlock    = new byte[size];\n            _blockStream          = new MemoryStream(_uncompressedBlock);\n            _blockReader          = new ExtendedBinaryReader(_blockStream);\n            _writer               = new ExtendedBinaryWriter(new MemoryStream(_uncompressedBlock));\n            \n            int compressedBlockSize = compressionAlgorithm.GetCompressedBufferBounds(size);\n            _compressedBlock = new byte[compressedBlockSize];\n            \n        }\n\n        public void Read(ExtendedBinaryReader reader)\n        {\n            _compressedLength = reader.ReadOptInt32();\n            _firstPosition    = reader.ReadOptInt32();\n            //_lastPosition   = reader.ReadOptInt32();\n            _count            = reader.ReadOptInt32();\n            reader.Read(_compressedBlock, 0, _compressedLength);\n\n            _uncompressedLength = _compressionAlgorithm.Decompress(_compressedBlock, _compressedLength,\n                _uncompressedBlock, _uncompressedBlock.Length);\n            \n            _blockStream.Position = 0;\n        }\n\n        //read block but do not uncompress\n        public void ReadCompressedBytes(ExtendedBinaryReader reader)\n        {\n            _compressedLength = reader.ReadOptInt32();\n            _firstPosition    = reader.ReadOptInt32();\n            //_lastPosition   = reader.ReadOptInt32();\n            _count            = reader.ReadOptInt32();\n            reader.Read(_compressedBlock, 0, _compressedLength);\n\n        }\n\n        //write a block that has not been uncompressed\n        public void WriteCompressedBytes(ExtendedBinaryWriter writer)\n        {\n            writer.WriteOpt(_compressedLength);\n            writer.WriteOpt(_firstPosition);\n            //writer.WriteOpt(_lastPosition);\n            writer.WriteOpt(_count);\n            writer.Write(_compressedBlock, 0, _compressedLength);\n\n        }\n\n        public bool HasSpace(int length)\n        {\n            return BlockOffset + length + 2 * sizeof(int) <= _uncompressedBlock.Length; //saving space for length and position\n        }\n\n        public void Add(byte[] data, int length, int position)\n        {\n            if (!HasSpace(length)) return;\n\n            if (_writer.BaseStream.Position == 0)\n            {\n                _firstPosition = position;\n                _lastPosition = position;\n            }\n\n            _writer.WriteOpt(length);\n            _writer.WriteOpt(position - _lastPosition);\n            _writer.Write(data, 0, length);\n\n            _lastPosition = position;\n            _count++;\n        }\n\n        \n        public int AddAnnotations(List<int> vcfPositions, int j, List<AnnotationItem> annotationItems)\n        {\n            if (_uncompressedLength == 0) return j;\n\n            _blockStream.Position = 0;\n            var position = _firstPosition;\n\n            var i = 0;\n            var length = _blockReader.ReadOptInt32();\n            position += _blockReader.ReadOptInt32();\n\n            while (i < _count && j < vcfPositions.Count)\n            {\n                if (position < vcfPositions[j])\n                {\n                    _blockStream.Position += length;\n                    //this position is not needed, move to next\n                    length   =  _blockReader.ReadOptInt32();\n                    position += _blockReader.ReadOptInt32();\n                    i++;\n                    continue;\n                }\n\n                if (vcfPositions[j] < position)\n                {\n                    //go to next position from vcf\n                    j++;\n                    continue;\n                }\n                \n                //positions have matched\n                var data = _blockReader.ReadBytes(length);\n                \n                annotationItems.Add(new AnnotationItem(position, data));\n\n                j++;\n                i++;\n                length   =  _blockReader.ReadOptInt32();\n                position += _blockReader.ReadOptInt32();\n            }\n            return j;\n        }\n        \n        public (int firstPosition, int lastPosition, int numBytes) Write(ExtendedBinaryWriter writer)\n        {\n            var compressedLength = _compressionAlgorithm.Compress(_uncompressedBlock, BlockOffset,\n                _compressedBlock, _compressedBlock.Length);\n\n            writer.WriteOpt(compressedLength);\n            writer.WriteOpt(_firstPosition);\n            //writer.WriteOpt(_lastPosition);\n            writer.WriteOpt(_count);\n            writer.Write(_compressedBlock, 0, compressedLength);\n\n            _writer.BaseStream.Position = 0;\n\n            return (_firstPosition, _lastPosition, compressedLength);\n        }\n\n        public void Clear()\n        {\n            _count = 0;\n            _firstPosition = -1;\n            _lastPosition = -1;\n            _compressedLength = 0;\n            _uncompressedLength = 0;\n            _blockStream.Position = 0;\n        }\n        \n        public void Dispose()\n        {\n            _writer?.Dispose();\n            _blockReader?.Dispose();\n            _blockStream?.Dispose();\n        }\n    }\n}"
  },
  {
    "path": "VariantAnnotation/NSA/NsaIndex.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.IO;\nusing Genome;\nusing IO;\nusing VariantAnnotation.Interface.Providers;\nusing VariantAnnotation.Providers;\n\nnamespace VariantAnnotation.NSA\n{\n    public sealed class NsaIndex\n    {\n        private readonly Dictionary<ushort, List<NsaIndexBlock>> _chromBlocks;\n        private ushort _chromIndex = ushort.MaxValue;\n        private readonly ExtendedBinaryWriter _writer;\n\n        public readonly GenomeAssembly Assembly;\n        public readonly IDataSourceVersion Version;\n        public readonly string JsonKey;\n        public readonly int SchemaVersion;\n        public readonly bool IsArray;\n        public readonly bool MatchByAllele;\n        public readonly bool IsPositional;\n        public IEnumerable<ushort> ChromosomeIndices => _chromBlocks.Keys;\n\n        public Dictionary<ushort, List<NsaIndexBlock>> GetBlocks() => _chromBlocks;\n        public List<NsaIndexBlock> GetChromBlocks(ushort chromIndex) => _chromBlocks[chromIndex];\n\n        public NsaIndex(ExtendedBinaryWriter indexWriter, GenomeAssembly assembly, IDataSourceVersion version, string jsonKey, bool matchByAllele, bool isArray, int schemaVersion, bool isPositional)\n        {\n            _writer       = indexWriter;\n            MatchByAllele = matchByAllele;\n            JsonKey       = jsonKey;\n            Version       = version;\n            Assembly      = assembly;\n            IsArray       = isArray;\n            IsPositional  = isPositional;\n\n            indexWriter.Write((byte)assembly);\n            version.Write(indexWriter);\n            indexWriter.WriteOptAscii(jsonKey);\n            indexWriter.Write(matchByAllele);\n            indexWriter.Write(isArray);\n            indexWriter.WriteOpt(schemaVersion);\n            indexWriter.Write(isPositional);\n\n            _chromBlocks = new Dictionary<ushort, List<NsaIndexBlock>>();\n        }\n\n        public void Add(ushort chromIndex, int start, int end, long filePosition, int dataLength)\n        {\n            _chromIndex = chromIndex;\n            \n            if (! _chromBlocks.ContainsKey(_chromIndex))\n            {\n                _chromBlocks[_chromIndex] = new List<NsaIndexBlock>();\n            }\n\n            var indexBlock = new NsaIndexBlock(start, end, filePosition, dataLength);\n            _chromBlocks[_chromIndex].Add(indexBlock);\n        }\n\n        \n        public void Write()\n        {\n            _writer.WriteOpt(_chromBlocks.Count);\n\n            foreach ((ushort index, List<NsaIndexBlock> chunks) in _chromBlocks)\n            {\n                _writer.WriteOpt(index);\n                _writer.WriteOpt(chunks.Count);\n                foreach (NsaIndexBlock chunk in chunks)\n                {\n                    chunk.Write(_writer);\n                }\n            }\n        }\n\n        public void Write(Dictionary<ushort, List<NsaIndexBlock>>  chromBlocks)\n        {\n            _writer.WriteOpt(chromBlocks.Count);\n\n            foreach ((ushort index, List<NsaIndexBlock> chunks) in chromBlocks)\n            {\n                _writer.WriteOpt(index);\n                _writer.WriteOpt(chunks.Count);\n                foreach (NsaIndexBlock chunk in chunks)\n                {\n                    chunk.Write(_writer);\n                }\n            }\n        }\n\n\n        public NsaIndex(Stream stream)\n        {\n            using (var memStream = new MemoryStream())\n            using (var memReader = new ExtendedBinaryReader(memStream))\n            {\n                stream.CopyTo(memStream);//reading all bytes in stream to memStream\n                memStream.Position = 0;\n\n                Assembly      = (GenomeAssembly)memReader.ReadByte();\n                Version       = DataSourceVersion.Read(memReader);\n                JsonKey       = memReader.ReadAsciiString();\n                MatchByAllele = memReader.ReadBoolean();\n                IsArray       = memReader.ReadBoolean();\n                SchemaVersion = memReader.ReadOptInt32();\n                IsPositional  = memReader.ReadBoolean();\n\n                var chromCount = memReader.ReadOptInt32();\n                _chromBlocks = new Dictionary<ushort, List<NsaIndexBlock>>(chromCount);\n                for (var i = 0; i < chromCount; i++)\n                {\n                    var chromIndex = memReader.ReadOptUInt16();\n                    var chunkCount = memReader.ReadOptInt32();\n                    _chromBlocks[chromIndex] = new List<NsaIndexBlock>(chunkCount);\n                    for (var j = 0; j < chunkCount; j++)\n                        _chromBlocks[chromIndex].Add(new NsaIndexBlock(memReader));\n                }\n            }\n        }\n\n        public long GetFileLocation(ushort chromIndex, int start)\n        {\n            if (_chromBlocks == null || !_chromBlocks.TryGetValue(chromIndex, out var chunks)) return -1;\n            var index = BinarySearch(chunks, start);\n\n            if (index < 0) return -1;\n            return chunks[index].FilePosition;\n        }\n\n        public (long startFilePosition, int chunkCount) GetFileRange(ushort chromIndex, int start, int end)\n        {\n            //create a static empty entry.\n            if (_chromBlocks == null || !_chromBlocks.TryGetValue(chromIndex, out var chunks)) return (-1, 0);\n\n            long startFilePosition = -1;\n            long endFilePosition = -1;\n\n            int startChunkIndex = BinarySearch(chunks, start);\n            int endChunkIndex = BinarySearch(chunks, end);\n\n            if (startChunkIndex < 0) startChunkIndex = ~startChunkIndex;\n            if (startChunkIndex == chunks.Count) return (-1, 0); //start lands after the last chunk=> nothing to return\n            if (startChunkIndex < chunks.Count)\n                startFilePosition = chunks[startChunkIndex].FilePosition;\n\n            if (endChunkIndex < 0) endChunkIndex = ~endChunkIndex - 1; //if end lands on a gap, return the the chunk to the left of end\n            if (endChunkIndex < 0) return (-1, 0); //end lands before the first chunk => nothing to return\n            if (endChunkIndex < chunks.Count)\n                endFilePosition = chunks[endChunkIndex].FilePosition + chunks[endChunkIndex].Length;\n\n            if (endFilePosition < startFilePosition) return (-1, 0); //both begin and end landed on the same gap.\n\n            return (startFilePosition, endChunkIndex - startChunkIndex + 1);\n        }\n\n        private static int BinarySearch(List<NsaIndexBlock> chunks, int position)\n        {\n            var begin = 0;\n            int end = chunks.Count - 1;\n\n            while (begin <= end)\n            {\n                int index = begin + (end - begin >> 1);\n\n                int ret = chunks[index].CompareTo(position);\n                if (ret == 0) return index;\n                if (ret < 0) begin = index + 1;\n                else end = index - 1;\n            }\n\n            return ~begin;\n        }\n\n    }\n    \n}"
  },
  {
    "path": "VariantAnnotation/NSA/NsaIndexBlock.cs",
    "content": "﻿using System;\nusing IO;\n\nnamespace VariantAnnotation.NSA\n{\n    public sealed class NsaIndexBlock\n    {\n        public readonly int Start;\n        public readonly int End;\n        public readonly long FilePosition;\n        public readonly int Length;\n\n        public NsaIndexBlock(int start, int end, long filePosition, int length)\n        {\n            Start       = start;\n            End         = end;\n            FilePosition = filePosition;\n            Length       = length;\n        }\n\n        [Obsolete(\"Use a factory method instead of an extra constructor.\")]\n        public NsaIndexBlock(ExtendedBinaryReader reader)\n        {\n            Start       = reader.ReadOptInt32();\n            End         = reader.ReadOptInt32();\n            FilePosition = reader.ReadOptInt64();\n            Length       = reader.ReadOptInt32();\n        }\n\n        public void Write(ExtendedBinaryWriter writer)\n        {\n            writer.WriteOpt(Start);\n            writer.WriteOpt(End);\n            writer.WriteOpt(FilePosition);\n            writer.WriteOpt(Length);\n        }\n\n        public int CompareTo(int position)\n        {\n            if (Start <= position && position <= End) return 0;\n            return Start.CompareTo(position);\n        }\n    }\n}"
  },
  {
    "path": "VariantAnnotation/NSA/NsaReader.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing Compression.Algorithms;\r\nusing ErrorHandling.Exceptions;\r\nusing Genome;\r\nusing IO;\r\nusing VariantAnnotation.Interface.Providers;\r\nusing VariantAnnotation.Interface.SA;\r\nusing VariantAnnotation.SA;\r\n\r\nnamespace VariantAnnotation.NSA\r\n{\r\n    public sealed class AnnotationItem\r\n    {\r\n        public readonly int Position;\r\n        public readonly byte[] Data;\r\n\r\n        public AnnotationItem(int position, byte[] data)\r\n        {\r\n            Position = position;\r\n            Data = data;\r\n        }\r\n    }\r\n\r\n    public sealed class NsaReader : INsaReader\r\n    {\r\n        private readonly Stream _stream;\r\n        private readonly ExtendedBinaryReader _reader;\r\n        public GenomeAssembly Assembly { get; }\r\n        private readonly NsaIndex _index;\r\n        public IDataSourceVersion Version { get; }\r\n\r\n        private readonly NsaBlock _block;\r\n\r\n        public string JsonKey { get; }\r\n        public bool MatchByAllele { get; }\r\n        public bool IsArray { get; }\r\n        public bool IsPositional { get; }\r\n        public IEnumerable<ushort> ChromosomeIndices => _index.ChromosomeIndices;\r\n        private readonly List<AnnotationItem> _annotations;\r\n        private readonly int _blockSize;\r\n        \r\n        private ExtendedBinaryReader _annotationReader;\r\n        private MemoryStream         _annotationStream;\r\n        private byte[]               _annotationBuffer;\r\n\r\n        \r\n        public NsaReader(Stream dataStream, Stream indexStream, int blockSize = SaCommon.DefaultBlockSize)\r\n        {\r\n            _stream = dataStream;\r\n            _blockSize = blockSize;\r\n            _reader = new ExtendedBinaryReader(_stream);\r\n            _block = new NsaBlock(new Zstandard(), blockSize);\r\n\r\n            _index = new NsaIndex(indexStream);\r\n            Assembly = _index.Assembly;\r\n            Version = _index.Version;\r\n            JsonKey = _index.JsonKey;\r\n            MatchByAllele = _index.MatchByAllele;\r\n            IsArray = _index.IsArray;\r\n            IsPositional = _index.IsPositional;\r\n\r\n            if (_index.SchemaVersion != SaCommon.SchemaVersion) throw new UserErrorException($\"SA schema version mismatch. Expected {SaCommon.SchemaVersion}, observed {_index.SchemaVersion} for {JsonKey}\");\r\n\r\n            _annotations = new List<AnnotationItem>(64 * 1024);\r\n            _annotationBuffer = new byte[1024*1024];\r\n            _annotationStream = new MemoryStream(_annotationBuffer);\r\n            _annotationReader = new ExtendedBinaryReader(_annotationStream);\r\n        }\r\n\r\n        public void PreLoad(Chromosome chrom, List<int> positions)\r\n        {\r\n            if (positions == null || positions.Count == 0) return;\r\n\r\n            _annotations.Clear();\r\n            for (var i = 0; i < positions.Count;)\r\n            {\r\n                int position = positions[i];\r\n                long fileLocation = _index.GetFileLocation(chrom.Index, position);\r\n                if (fileLocation == -1)\r\n                {\r\n                    i++;\r\n                    continue;\r\n                }\r\n\r\n                //only reconnect if necessary\r\n                if (_reader.BaseStream.Position != fileLocation)\r\n                    _reader.BaseStream.Position = fileLocation;\r\n                _block.Read(_reader);\r\n                var newIndex = _block.AddAnnotations(positions, i, _annotations);\r\n                if (newIndex == i) i++; //no positions were found in this block\r\n                else i = newIndex;\r\n            }\r\n        }\r\n\r\n        public List<NsaIndexBlock> GetIndexBlocks(ushort chromIndex) => _index.GetChromBlocks(chromIndex);\r\n\r\n        public bool HasDataBlocks(ushort chromIndex) {\r\n            var (location, _) = _index.GetFileRange(chromIndex, 1, int.MaxValue);\r\n            return location != -1;\r\n        }\r\n        \r\n        public IEnumerable<NsaBlock> GetCompressedBlocks(ushort chromIndex)\r\n        {\r\n            var (location, blockCount) = _index.GetFileRange(chromIndex, 1, int.MaxValue);\r\n            if (location == -1) yield break;\r\n\r\n            _reader.BaseStream.Position = location;\r\n\r\n            for (var i = 0; i < blockCount; i++)\r\n            {\r\n                var block = new NsaBlock(new Zstandard(), _blockSize);\r\n                block.ReadCompressedBytes(_reader);\r\n                yield return block;\r\n            }\r\n        }\r\n\r\n        private void ExtractAnnotations(byte[] data, List<(string refAllele, string altAllele, string annotation)> annotations)\r\n        {\r\n            if (_annotationBuffer.Length < data.Length)\r\n            {\r\n                _annotationBuffer = new byte[2 *data.Length];\r\n                _annotationReader.Dispose();\r\n                _annotationStream?.Dispose();\r\n                _annotationStream = new MemoryStream(_annotationBuffer);\r\n                _annotationReader = new ExtendedBinaryReader(_annotationStream);\r\n            }\r\n            Array.Copy(data, _annotationBuffer, data.Length);\r\n            _annotationStream.Position = 0;\r\n            if (IsPositional)\r\n            {\r\n                var positionalAnno = _annotationReader.ReadString();\r\n                annotations.Add((null, null, positionalAnno));\r\n                return;\r\n            }\r\n\r\n            int count       = _annotationReader.ReadOptInt32();\r\n            for (var i = 0; i < count; i++)\r\n            {\r\n                string refAllele  = _annotationReader.ReadAsciiString();\r\n                string altAllele  = _annotationReader.ReadAsciiString();\r\n                string annotation = _annotationReader.ReadString();\r\n                annotations.Add((refAllele ?? \"\", altAllele ?? \"\", annotation));\r\n            }\r\n        }\r\n\r\n        public void GetAnnotation(int position, List<(string refAllele, string altAllele, string annotation)> annotations)\r\n        {\r\n            annotations.Clear();\r\n            int index = BinarySearch(position);\r\n            if(index < 0) return;\r\n            ExtractAnnotations(_annotations[index].Data, annotations);\r\n        }\r\n\r\n        private int BinarySearch(int position)\r\n        {\r\n            var begin = 0;\r\n            int end = _annotations.Count - 1;\r\n\r\n            while (begin <= end)\r\n            {\r\n                int index = begin + (end - begin >> 1);\r\n\r\n                int ret = _annotations[index].Position.CompareTo(position);\r\n                if (ret == 0) return index;\r\n                if (ret < 0) begin = index + 1;\r\n                else end = index - 1;\r\n            }\r\n\r\n            return ~begin;\r\n        }\r\n\r\n        public void Dispose()\r\n        {\r\n            _stream?.Dispose();\r\n            _block?.Dispose();\r\n            _annotationStream?.Dispose();\r\n            _annotationReader?.Dispose();\r\n        } \r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/NSA/NsiReader.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing System.IO.Compression;\nusing System.Linq;\nusing System.Text;\nusing Compression.Algorithms;\nusing Compression.FileHandling;\nusing ErrorHandling.Exceptions;\nusing Genome;\nusing Intervals;\nusing IO;\nusing VariantAnnotation.Algorithms;\nusing VariantAnnotation.Interface.Providers;\nusing VariantAnnotation.Interface.SA;\nusing VariantAnnotation.IO;\nusing VariantAnnotation.Providers;\nusing VariantAnnotation.SA;\nusing Variants;\n\nnamespace VariantAnnotation.NSA\n{\n    public sealed class NsiReader : INsiReader\n    {\n        public GenomeAssembly Assembly { get; }\n        public IDataSourceVersion Version { get; }\n        public string JsonKey { get; }\n        public ReportFor ReportFor { get; }\n        private readonly IntervalForest<string> _intervalForest;\n        \n        private NsiReader(GenomeAssembly assembly, IDataSourceVersion version, string jsonKey, ReportFor reportFor, IntervalArray<string>[] intervalArrays)\n        {\n            Assembly        = assembly;\n            Version         = version;\n            JsonKey         = jsonKey;\n            ReportFor       = reportFor;\n            _intervalForest = new IntervalForest<string>(intervalArrays);\n        }\n\n        public static NsiReader Read(Stream stream)\n        {\n            (IDataSourceVersion version, GenomeAssembly assembly, string jsonKey, ReportFor reportFor, int schemaVersion) = ReadHeader(stream);\n            if (schemaVersion != SaCommon.SchemaVersion)\n                throw new UserErrorException($\"Schema version mismatch!! Expected {SaCommon.SchemaVersion}, observed {schemaVersion} for {jsonKey}\");\n\n            using (var blockStream = new BlockStream(new Zstandard(), stream, CompressionMode.Decompress))\n            using (var reader = new ExtendedBinaryReader(blockStream))\n            {\n                int count = reader.ReadOptInt32();\n                var suppIntervals = new Dictionary<ushort, List<Interval<string>>>();\n                for (var i = 0; i < count; i++)\n                {\n                    var saInterval = SuppInterval.Read(reader);\n                    if (suppIntervals.TryGetValue(saInterval.Chromosome.Index, out var intervals)) intervals.Add(new Interval<string>(saInterval.Start, saInterval.End, saInterval.GetJsonString()));\n                    else suppIntervals[saInterval.Chromosome.Index] = new List<Interval<string>> { new Interval<string>(saInterval.Start, saInterval.End, saInterval.GetJsonString()) };\n                }\n\n                var maxChromIndex = suppIntervals.Keys.Max();\n                var intervalArrays = new IntervalArray<string>[maxChromIndex + 1];\n                for (ushort i = 0; i < intervalArrays.Length; i++)\n                {\n                    intervalArrays[i] = suppIntervals.ContainsKey(i)\n                        ? new IntervalArray<string>(suppIntervals[i].ToArray())\n                        : null;\n                }\n                \n                return new NsiReader(assembly, version, jsonKey, reportFor, intervalArrays);\n            }\n            \n        }\n\n        private static (IDataSourceVersion, GenomeAssembly, string, ReportFor, int) ReadHeader(Stream stream)\n        {\n\n            using (var reader = new ExtendedBinaryReader(stream, Encoding.UTF8, true))\n            {\n                var identifier = reader.ReadAsciiString();\n                if(identifier != SaCommon.NsiIdentifier)\n                    throw new InvalidDataException($\"Failed to find identifier!!Expected: {SaCommon.NsiIdentifier}, observed:{identifier}\");\n\n                var version       = DataSourceVersion.Read(reader);\n                var assembly      = (GenomeAssembly)reader.ReadByte();\n                var jsonKey       = reader.ReadAsciiString();\n                var reportFor     = (ReportFor)reader.ReadByte();\n                int schemaVersion = reader.ReadInt32();\n                \n                var guard = reader.ReadUInt32();\n                if (guard != SaCommon.GuardInt)\n                    throw new InvalidDataException($\"Failed to find guard int!!Expected: {SaCommon.GuardInt}, observed:{guard}\");\n\n                return (version, assembly, jsonKey, reportFor, schemaVersion);\n            }\n        }\n\n        public IEnumerable<string> GetAnnotation(IVariant variant)\n        {\n            var start = variant.Start;\n            var end   = variant.End;\n\n            // for insertions, the end position is one past the last base\n            if (end < start) Swap.Int(ref start, ref end);\n            var overlappingSvs =\n                  _intervalForest.GetAllOverlappingIntervals(variant.Chromosome.Index, start, end);\n              \n            if (overlappingSvs == null) return null;\n\n            var jsonStrings = new List<string>();\n            foreach (var interval in overlappingSvs)\n            {\n                var (reciprocalOverlap, annotationOverlap) = SuppIntervalUtilities.GetOverlapFractions(\n                    new ChromosomeInterval(variant.Chromosome, interval.Begin, interval.End), variant);\n                jsonStrings.Add(AddOverlapToAnnotation(interval.Value, reciprocalOverlap, annotationOverlap));\n            }\n\n            return jsonStrings;\n        }\n\n        private static string AddOverlapToAnnotation(string jsonString, double? reciprocalOverlap, double? annotationOverlap)\n        {\n            if (reciprocalOverlap != null)\n                jsonString+=JsonObject.Comma + \"\\\"reciprocalOverlap\\\":\" + reciprocalOverlap.Value.ToString(\"0.#####\");\n            if (annotationOverlap != null)\n                jsonString += JsonObject.Comma + \"\\\"annotationOverlap\\\":\" + annotationOverlap.Value.ToString(\"0.#####\");\n            return jsonString;\n        }\n\n        public bool OverlapsAny(IChromosomeInterval variant)\n        {\n            return _intervalForest.OverlapsAny(variant.Chromosome.Index, variant.Start, variant.End);\n        }\n    }\n}"
  },
  {
    "path": "VariantAnnotation/NSA/NsiWriter.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing System.IO.Compression;\r\nusing System.Linq;\r\nusing System.Text;\r\nusing Compression.Algorithms;\r\nusing Compression.FileHandling;\r\nusing Genome;\r\nusing IO;\r\nusing VariantAnnotation.Interface.SA;\r\nusing VariantAnnotation.Providers;\r\nusing VariantAnnotation.SA;\r\n\r\nnamespace VariantAnnotation.NSA\r\n{\r\n    public sealed class NsiWriter:IDisposable\r\n    {\r\n        private readonly Stream _stream;\r\n        private readonly ExtendedBinaryWriter _writer;\r\n        private readonly bool _leaveOpen;\r\n        \r\n        public NsiWriter(Stream stream, DataSourceVersion version,\r\n            GenomeAssembly assembly, string jsonKey, ReportFor reportFor, int schemaVersion,\r\n            bool leaveOpen = false)\r\n        {\r\n            _stream = stream;\r\n            _leaveOpen = leaveOpen;\r\n            WriteHeader(version, assembly, jsonKey, reportFor, schemaVersion);\r\n\r\n            var blockStream = new BlockStream(new Zstandard(), stream, CompressionMode.Compress);\r\n            _writer = new ExtendedBinaryWriter(blockStream, Encoding.UTF8, leaveOpen);\r\n\r\n        }\r\n\r\n        private void WriteHeader(DataSourceVersion version, GenomeAssembly assembly, string jsonKey, ReportFor reportFor, int schemaVersion)\r\n        {\r\n            using (var writer = new ExtendedBinaryWriter(_stream, Encoding.UTF8, true))\r\n            {\r\n                writer.WriteOptAscii(SaCommon.NsiIdentifier);\r\n                version.Write(writer);\r\n                writer.Write((byte)assembly);\r\n                writer.WriteOptAscii(jsonKey);\r\n                writer.Write((byte)reportFor);\r\n                writer.Write(schemaVersion);\r\n                writer.Write(SaCommon.GuardInt);\r\n            }\r\n        }\r\n\r\n        public void Write(IEnumerable<ISuppIntervalItem> siItems)\r\n        {\r\n            var sortedItems = siItems.OrderBy(x => x.Chromosome.Index).ThenBy(x => x.Start).ThenBy(x => x.End).ToList();\r\n\r\n            Console.WriteLine($\"Writing {sortedItems.Count} intervals to database...\");\r\n            _writer.WriteOpt(sortedItems.Count);\r\n            \r\n            foreach (ISuppIntervalItem item in sortedItems)\r\n            {\r\n                _writer.WriteOptAscii(item.Chromosome.EnsemblName);\r\n                _writer.WriteOptAscii(item.Chromosome.UcscName);\r\n                _writer.WriteOpt(item.Chromosome.Index);\r\n                _writer.WriteOpt(item.Start);\r\n                _writer.WriteOpt(item.End);\r\n                _writer.Write(item.GetJsonString());\r\n            }\r\n        }\r\n\r\n        public void Dispose()\r\n        {\r\n            _writer?.Dispose();\r\n            if(!_leaveOpen) _stream?.Dispose();\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/NSA/RefMinorDbReader.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing ErrorHandling.Exceptions;\r\nusing Genome;\r\nusing IO;\r\nusing VariantAnnotation.SA;\r\n\r\nnamespace VariantAnnotation.NSA\r\n{\r\n    public sealed class RefMinorDbReader : IDisposable\r\n    {\r\n        private readonly ExtendedBinaryReader _reader;\r\n        private readonly RefMinorIndex _index;\r\n\r\n        private readonly Dictionary<int, string> _annotations;\r\n        private readonly Stream _dbStream, _indexStream;\r\n\r\n        public RefMinorDbReader(Stream dbStream, Stream indexStream)\r\n        {\r\n            _dbStream = dbStream;\r\n            _indexStream = indexStream;\r\n            _reader      = new ExtendedBinaryReader(dbStream);\r\n            _index       = new RefMinorIndex(new ExtendedBinaryReader(indexStream));\r\n            _annotations = new Dictionary<int, string>();\r\n\r\n            if (_index.SchemaVersion != SaCommon.SchemaVersion)\r\n                throw new UserErrorException($\"SA schema version mismatch. Expected {SaCommon.SchemaVersion}, observed {_index.SchemaVersion}\");            \r\n        }\r\n\r\n        private Chromosome _chromosome;\r\n\r\n        private void PreLoad(Chromosome chrom)\r\n        {\r\n            _annotations.Clear();\r\n            _chromosome = chrom;\r\n\r\n            (long startLocation, int numBytes, int refMinorCount) = _index.GetFileRange(chrom.Index);\r\n            if (startLocation == -1) return;\r\n            _reader.BaseStream.Position = startLocation;\r\n            var buffer = _reader.ReadBytes(numBytes);\r\n\r\n            using (var memStream = new MemoryStream(buffer))\r\n            using(var reader = new ExtendedBinaryReader(memStream))\r\n            {\r\n                for (var i = 0; i < refMinorCount; i++)\r\n                {\r\n                    var position = reader.ReadOptInt32();\r\n                    var globalMajor = reader.ReadAsciiString();\r\n\r\n                    _annotations[position] = globalMajor;\r\n                }\r\n            }\r\n\r\n        }\r\n\r\n        public string GetGlobalMajorAllele(Chromosome chromosome, int position)\r\n        {\r\n            if (_chromosome == null || chromosome.Index != _chromosome.Index)\r\n                PreLoad(chromosome);\r\n\r\n            return _annotations.TryGetValue(position, out string globalMajor) ? globalMajor : null;\r\n\r\n            \r\n        }\r\n\r\n        public void Dispose()\r\n        {\r\n            _dbStream?.Dispose();\r\n            _indexStream?.Dispose();\r\n            _reader?.Dispose();\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/NSA/RefMinorIndex.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing Genome;\r\nusing IO;\r\nusing VariantAnnotation.Interface.Providers;\r\nusing VariantAnnotation.Providers;\r\n\r\nnamespace VariantAnnotation.NSA\r\n{\r\n    public sealed class RefMinorIndex\r\n    {\r\n        private readonly ExtendedBinaryWriter _writer;\r\n        private readonly Dictionary<ushort, (long location, int numBytes, int count)> _chromBlocks;\r\n        private readonly IDataSourceVersion _version;\r\n        private readonly GenomeAssembly _assembly;\r\n        public readonly int SchemaVersion;\r\n\r\n        public RefMinorIndex(ExtendedBinaryWriter writer, GenomeAssembly assembly, IDataSourceVersion version, int schemaVersion)\r\n        {\r\n            _writer      = writer;\r\n            _chromBlocks = new Dictionary<ushort, (long location, int numBytes, int count)>();\r\n\r\n            _assembly     = assembly;\r\n            _version      = version;\r\n            SchemaVersion = schemaVersion;\r\n        }\r\n\r\n        private ushort _chromIndex  = ushort.MaxValue;\r\n        private long _chromLocation =-1;\r\n        private int _blockLength    =-1;\r\n        private int _count;\r\n        \r\n        public void Add(ushort chromIndex, long location)\r\n        {\r\n            if (_chromIndex != chromIndex)\r\n            {\r\n                _blockLength = (int) (location - _chromLocation);\r\n\r\n                //if you try to add a chrom twice (i.e. the positions are not sorted by chrom), this will throw an exception\r\n                _chromBlocks.Add(_chromIndex, (_chromLocation, _blockLength, _count));\r\n\r\n                _chromIndex = chromIndex;\r\n                _chromLocation = location;\r\n                _count = 1;\r\n            }\r\n            else _count++;\r\n\r\n        }\r\n\r\n        public (long location, int numBytes, int count) GetFileRange(ushort chromIndex)\r\n        {\r\n            return _chromBlocks.TryGetValue(chromIndex, out var locationSize) ? locationSize : (-1, -1, 0);\r\n        }\r\n\r\n        public void Write(long finalLocation)\r\n        {\r\n            _blockLength = (int)(finalLocation - _chromLocation);\r\n\r\n            //adding the last chrom to index\r\n            _chromBlocks.Add(_chromIndex, (_chromLocation, _blockLength, _count));\r\n\r\n            _writer.Write((byte)_assembly);\r\n            _version.Write(_writer);\r\n            _writer.WriteOpt(SchemaVersion);\r\n\r\n            _writer.WriteOpt(_chromBlocks.Count);\r\n\r\n            foreach ((ushort chromIndex, (long location, int numBytes, int count)) in _chromBlocks)\r\n            {\r\n                _writer.WriteOpt(chromIndex);\r\n                _writer.WriteOpt(location);\r\n                _writer.WriteOpt(numBytes); \r\n                _writer.WriteOpt(count);\r\n            }\r\n        }\r\n\r\n        public RefMinorIndex(ExtendedBinaryReader reader)\r\n        {\r\n            _assembly      = (GenomeAssembly) reader.ReadByte();\r\n            _version       = DataSourceVersion.Read(reader);\r\n            SchemaVersion = reader.ReadOptInt32();\r\n\r\n            var chromCount = reader.ReadOptInt32();\r\n\r\n            _chromBlocks= new Dictionary<ushort, (long location, int numBytes, int count)>(chromCount);\r\n\r\n            for (int i = 0; i < chromCount; i++)\r\n            {\r\n                var chromIndex = reader.ReadOptUInt16();\r\n                var location   = reader.ReadOptInt64();\r\n                var numBytes   = reader.ReadOptInt32();\r\n                int count      = reader.ReadOptInt32();\r\n\r\n                _chromBlocks.Add(chromIndex, (location, numBytes, count));\r\n            }\r\n        }\r\n        \r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/NSA/SuppInterval.cs",
    "content": "﻿using Genome;\nusing IO;\nusing VariantAnnotation.Interface.SA;\n\nnamespace VariantAnnotation.NSA\n{\n    public sealed class SuppInterval : ISuppIntervalItem\n    {\n        public int Start { get; }\n        public int End { get; }\n        public Chromosome Chromosome { get; }\n        private readonly string _jsonString;\n\n        private SuppInterval(Chromosome chromosome, int start, int end, string jsonString)\n        {\n            Chromosome  = chromosome;\n            Start       = start;\n            End         = end;\n            _jsonString = jsonString;\n        }\n\n        public static SuppInterval Read(ExtendedBinaryReader reader)\n        {\n            string ensemblName = reader.ReadAsciiString();\n            string ucscName    = reader.ReadAsciiString();\n            ushort chromIndex  = reader.ReadOptUInt16();\n            var chromosome     = new Chromosome(ucscName, ensemblName, null, null, 1, chromIndex);\n\n            var start       = reader.ReadOptInt32();\n            var end         = reader.ReadOptInt32();\n            var jsonString  = reader.ReadString();\n\n            return new SuppInterval(chromosome, start, end, jsonString);\n        }\n\n        public string GetJsonString() => _jsonString;\n    }\n}"
  },
  {
    "path": "VariantAnnotation/NSA/SuppIntervalUtilities.cs",
    "content": "﻿using System;\nusing Genome;\nusing Variants;\n\nnamespace VariantAnnotation.NSA\n{\n    public static class SuppIntervalUtilities\n    {\n        public static (double? ReciprocalOverlap, double? AnnotationOverlap) GetOverlapFractions( ChromosomeInterval saInterval, ISimpleVariant variant)\n        {\n            if (saInterval.Chromosome.Index != variant.Chromosome.Index) return (null, null);\n\n            //skip for insertions\n            if (saInterval.Start >= saInterval.End || variant.Type == VariantType.insertion) return (null, null);\n\n            //skip for break-ends\n            if (variant.Type == VariantType.translocation_breakend) return (null, null);\n\n            if (!Intervals.Utilities.Overlaps(saInterval.Start, saInterval.End, variant.Start, variant.End)) return (null, null);\n\n            var overlapSize = (double)(Math.Min(saInterval.End, variant.End) - Math.Max(saInterval.Start, variant.Start) + 1);\n            int annoSize    = saInterval.End - saInterval.Start + 1;\n            int varSize     = variant.End - variant.Start + 1;\n            int maxSize     = Math.Max(annoSize, varSize);\n\n            return (overlapSize / maxSize, overlapSize / annoSize);\n        }\n    }\n}"
  },
  {
    "path": "VariantAnnotation/NSA/SupplementaryAnnotation.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.Text;\nusing ErrorHandling.Exceptions;\nusing VariantAnnotation.Interface.SA;\n\nnamespace VariantAnnotation.NSA\n{\n    public sealed class SupplementaryAnnotation:ISupplementaryAnnotation\n    {\n        public string JsonKey { get; }\n        private readonly bool _isArray;\n        private readonly bool _isPositional;\n        private readonly string _jsonString;\n        private readonly IEnumerable<string> _jsonStrings;\n        \n        public SupplementaryAnnotation(string key, bool isArray, bool isPositional, string jsonString, IEnumerable<string> jsonStrings)\n        {\n            JsonKey       = key;\n            _isArray      = isArray;\n            _isPositional = isPositional;\n            _jsonString   = jsonString;\n            _jsonStrings  = jsonStrings;\n\n            if (_isArray && _jsonStrings == null)\n            {\n                throw new UserErrorException($\"No list of json strings provided for a supplementary annotation of array type!! JsonKey: {JsonKey}\");\n            }\n            if (!_isArray && string.IsNullOrEmpty(jsonString))\n                throw new UserErrorException(\"ERROR: No json string provided for a supplementary annotation of non-array type!!\");\n        }\n\n        public void SerializeJson(StringBuilder sb)\n        {\n            if (_isPositional)\n            {\n                sb.Append(_jsonString);\n                return;\n            }\n\n            if (!_isArray)\n            {\n                sb.Append('{');\n                sb.Append(_jsonString);\n                sb.Append('}');\n            }\n            else\n            {\n                sb.Append('[');\n                var firstString = true;\n                foreach (var jsonString in _jsonStrings)\n                {\n                    if (!firstString) sb.Append(',');\n                    if (!jsonString.StartsWith(\"\\\"rs\"))\n                    {\n                        sb.Append('{');\n                        sb.Append(jsonString);\n                        sb.Append('}');\n                    }\n                    else sb.Append(jsonString);\n                    firstString = false;\n                }\n                sb.Append(']');\n            }\n        }\n\n        \n    }\n}"
  },
  {
    "path": "VariantAnnotation/PerformanceMetrics.cs",
    "content": "﻿using System;\r\nusing System.Diagnostics;\r\nusing CommandLine.Utilities;\r\nusing Genome;\r\nusing IO;\r\n\r\nnamespace VariantAnnotation\r\n{\r\n    public sealed class PerformanceMetrics\r\n    {\r\n        public readonly TimeKeeper Cache          = new TimeKeeper();\r\n        public readonly TimeKeeper Annotation     = new TimeKeeper();\r\n        public readonly TimeKeeper Preload        = new TimeKeeper();\r\n        public readonly TimeKeeper SaPositionScan = new TimeKeeper();\r\n\r\n        public void ShowAnnotationEntry(Chromosome chromosome, int numVariants)\r\n        {\r\n            Annotation.Stop();\r\n            \r\n            string referenceName     = GetPaddedField(chromosome.UcscName, 38);\r\n            string preloadTime       = Preload.GetTime();\r\n            string annotationTime    = Annotation.GetTime();\r\n            double variantsPerSecond = Annotation.GetIterationsPerSecond(numVariants);\r\n            \r\n            Logger.WriteLine($\"{referenceName}  {preloadTime}  {annotationTime}  {variantsPerSecond,11:N0}\");\r\n        }\r\n\r\n        public void ShowCacheLoad()\r\n        {\r\n            Cache.Stop();\r\n            string time = Cache.GetTime();\r\n            Logger.WriteLine($\"Cache                                               {time}\");\r\n        }\r\n\r\n        public void ShowSaPositionScanLoad(int numPositions)\r\n        {\r\n            SaPositionScan.Stop();\r\n            string time               = SaPositionScan.GetTime();\r\n            double positionsPerSecond = SaPositionScan.GetIterationsPerSecond(numPositions);\r\n            Logger.WriteLine($\"SA Position Scan                                    {time}  {positionsPerSecond,11:N0}\");\r\n        }\r\n\r\n        private static string GetPaddedField(string s, int fieldLength)\r\n        {\r\n            if (s.Length > fieldLength) return s.Substring(0, fieldLength - 3) + \"...\";\r\n            return s.PadRight(fieldLength, ' ');\r\n        }\r\n\r\n        public static void ShowAnnotationHeader() =>\r\n            MetricsCommon.DisplayHeader(\"\\nReference                                Preload    Annotation   Variants/s\");\r\n        \r\n        public static void ShowInitializationHeader() =>\r\n            MetricsCommon.DisplayHeader(\"Initialization                                         Time     Positions/s\");\r\n        \r\n        public void ShowSummaryTable()\r\n        {\r\n            MetricsCommon.DisplayHeader(\"\\nSummary                                                Time         Percent\");\r\n\r\n            long processTicks        = GetTotalProcessTicks();\r\n            long initializationTicks = Cache.TotalTicks + SaPositionScan.TotalTicks;\r\n            long annotationTicks     = Annotation.TotalTicks;\r\n            long preloadTicks        = Preload.TotalTicks;\r\n\r\n            ShowSummaryEntry(\"Initialization\", initializationTicks, processTicks);\r\n            ShowSummaryEntry(\"Preload\", preloadTicks, processTicks);\r\n            ShowSummaryEntry(\"Annotation\", annotationTicks, processTicks);\r\n        }\r\n\r\n        private void ShowSummaryEntry(string description, long entryTicks, long processTicks)\r\n        {\r\n            string paddedDescription = GetPaddedField(description, 50);\r\n            string time              = Benchmark.ToHumanReadable(TimeSpan.FromTicks(entryTicks));\r\n            double percentage        = entryTicks / (double) processTicks * 100.0;\r\n            Logger.WriteLine($\"{paddedDescription}  {time}  {percentage, 9:0.0} %\");\r\n        }\r\n\r\n        private static long GetTotalProcessTicks() => DateTime.Now.Ticks - Process.GetCurrentProcess().StartTime.Ticks;\r\n    }\r\n\r\n    public sealed class TimeKeeper\r\n    {\r\n        public long TotalTicks { get; private set; }\r\n\r\n        private readonly Benchmark _benchmark = new Benchmark();\r\n        private          TimeSpan  _elapsedTime;\r\n        \r\n        public void Stop()\r\n        {\r\n            _elapsedTime = _benchmark.GetElapsedTime();\r\n            TotalTicks += _elapsedTime.Ticks;\r\n        }\r\n\r\n        public void   Start()                         => _benchmark.Reset();\r\n        public string GetTime()                       => Benchmark.ToHumanReadable(_elapsedTime);\r\n        public double GetIterationsPerSecond(int num) => Benchmark.GetElapsedIterationsPerSecond(_elapsedTime, num);\r\n    }\r\n    \r\n    public static class MetricsCommon\r\n    {\r\n        private const int LineLength = 75;\r\n        private static readonly string Divider = new string('-', LineLength);\r\n\r\n        public static void DisplayHeader(string s)\r\n        {\r\n            Logger.SetBold();\r\n            Logger.WriteLine(s);\r\n            Logger.ResetColor();\r\n            Logger.WriteLine(Divider);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/PhyloP/NpdIndex.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.IO;\nusing Genome;\nusing IO;\nusing VariantAnnotation.Interface.Providers;\nusing VariantAnnotation.Providers;\n\nnamespace VariantAnnotation.PhyloP\n{\n    public sealed class NpdIndex\n    {\n        private readonly Dictionary<ushort, (long, int)> _chromRanges;\n        private readonly ExtendedBinaryWriter _writer;\n        public readonly IDataSourceVersion Version;\n        public readonly GenomeAssembly Assembly;\n        public readonly int SchemaVersion;\n        private readonly string _jsonKey;\n        public readonly Dictionary<double, byte> ScoreMap;\n\n        public const int MaxChromLength = 250_000_000;\n\n        public NpdIndex(Stream stream, GenomeAssembly assembly, IDataSourceVersion version, string jsonKey, int schemaVersion)\n        {\n            _writer       = new ExtendedBinaryWriter(stream);\n            Assembly      = assembly;\n            Version       = version;\n            _jsonKey       = jsonKey;\n            SchemaVersion = schemaVersion;\n\n            _chromRanges = new Dictionary<ushort, (long, int)>(32);\n\n        }\n\n        public void Add(ushort chromIndex, long location, int byteCount)\n        {\n            _chromRanges.Add(chromIndex, (location, byteCount));\n        }\n\n        public (long location, int numBytes) GetFileRange(ushort chromIndex)\n        {\n            return _chromRanges.TryGetValue(chromIndex, out var fileRange) ? fileRange: (-1, -1);\n        }\n\n        public void Write(Dictionary<double, byte> scoreMap)\n        {\n            _writer.Write((byte)Assembly);\n            Version.Write(_writer);\n            _writer.WriteOptAscii(_jsonKey);\n            _writer.WriteOpt(SchemaVersion);\n\n            _writer.WriteOpt(_chromRanges.Count);\n\n            foreach ((ushort chromIndex, (long location, int byteCount)) in _chromRanges)\n            {\n                _writer.WriteOpt(chromIndex);\n                _writer.WriteOpt(location);\n                _writer.WriteOpt(byteCount);\n            }\n\n            _writer.WriteOpt(scoreMap.Count);\n            foreach ((double score, byte code) in scoreMap)\n            {\n                _writer.Write(score);\n                _writer.Write(code);\n            }\n        }\n\n        public NpdIndex(ExtendedBinaryReader reader)\n        {\n            Assembly = (GenomeAssembly)reader.ReadByte();\n            Version = DataSourceVersion.Read(reader);\n            _jsonKey = reader.ReadAsciiString();\n            SchemaVersion = reader.ReadOptInt32();\n\n            var chromCount = reader.ReadOptInt32();\n\n            _chromRanges = new Dictionary<ushort, (long, int)>(chromCount);\n\n            for (int i = 0; i < chromCount; i++)\n            {\n                var chromIndex = reader.ReadOptUInt16();\n                var location   = reader.ReadOptInt64();\n                var numBytes   = reader.ReadOptInt32();\n\n                _chromRanges.Add(chromIndex, (location, numBytes));\n            }\n\n            var scoreCount = reader.ReadOptInt32();\n            var scoreMap = new Dictionary<double, byte>(scoreCount);\n            for (int i = 0; i < scoreCount; i++)\n            {\n                var score = reader.ReadDouble();\n                var code = reader.ReadByte();\n                scoreMap.Add(score, code);\n            }\n\n            ScoreMap = scoreMap;\n\n        }\n    }\n}"
  },
  {
    "path": "VariantAnnotation/PhyloP/NpdReader.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing Compression.Algorithms;\nusing ErrorHandling.Exceptions;\nusing Genome;\nusing IO;\nusing VariantAnnotation.Interface.Providers;\nusing VariantAnnotation.SA;\n\nnamespace VariantAnnotation.PhyloP\n{\n    public sealed class NpdReader:IDisposable\n    {\n        private readonly ExtendedBinaryReader _reader;\n\n        private readonly byte[] _scores;\n        private readonly Zstandard _zstd;\n\n        private readonly Dictionary<byte, double> _scoreMap;\n\n        private readonly NpdIndex _index;\n        public GenomeAssembly Assembly { get; }\n        public IDataSourceVersion Version { get; }\n\n        private readonly Stream _dbStream;\n        private readonly Stream _indexStream;\n\n        public NpdReader(Stream dbStream, Stream indexStream)\n        {\n            _dbStream = dbStream;\n            _indexStream = indexStream;\n            _reader = new ExtendedBinaryReader(dbStream);\n\n            _index   = new NpdIndex(new ExtendedBinaryReader(indexStream));\n            Assembly = _index.Assembly;\n            Version  = _index.Version;\n\n            if (_index.SchemaVersion != SaCommon.SchemaVersion)\n                throw new UserErrorException($\"SA schema version mismatch. Expected {SaCommon.SchemaVersion}, observed {_index.SchemaVersion}\");\n\n            var scoreMap= new Dictionary<byte, double>();\n            foreach ((double score, byte code)in _index.ScoreMap)\n            {\n                scoreMap.Add(code, score);\n            }\n\n            _scoreMap = scoreMap;\n            _zstd = new Zstandard();\n            _scores = new byte[NpdIndex.MaxChromLength];\n        }\n\n        private Chromosome _chromosome;\n        private int _lastPhylopPosition;\n        private void PreLoad(Chromosome chrom)\n        {\n            _chromosome = chrom;\n            (long startLocation, int numBytes) = _index.GetFileRange(chrom.Index);\n            if (startLocation == -1)\n            {\n                _lastPhylopPosition = -1;\n                return;\n            }\n            _reader.BaseStream.Position = startLocation;\n            var buffer = _reader.ReadBytes(numBytes);\n\n            _lastPhylopPosition = _zstd.Decompress(buffer, buffer.Length, _scores, _scores.Length);\n            \n        }\n\n        public double? GetAnnotation(Chromosome chromosome, int position)\n        {\n            if (_chromosome==null || chromosome.Index != _chromosome.Index) PreLoad(chromosome);\n\n            if (position >= _lastPhylopPosition) return null;\n            var scoreCode = _scores[position - 1];\n            if (scoreCode == 0) return null;\n            return _scoreMap[scoreCode];\n        }\n\n        public void Dispose()\n        {\n            _reader?.Dispose();\n            _dbStream?.Dispose();\n            _indexStream?.Dispose();\n        }\n    }\n}"
  },
  {
    "path": "VariantAnnotation/Pools/AnnotatedPositionPool.cs",
    "content": "using Microsoft.Extensions.ObjectPool;\nusing VariantAnnotation.AnnotatedPositions;\nusing VariantAnnotation.Interface.AnnotatedPositions;\nusing VariantAnnotation.Interface.Positions;\n\nnamespace VariantAnnotation.Pools\n{\n    public static class AnnotatedPositionPool\n    {\n        private static readonly ObjectPool<AnnotatedPosition> Pool \n            = new DefaultObjectPool<AnnotatedPosition>(new DefaultPooledObjectPolicy<AnnotatedPosition>(), 4);\n        \n        public static AnnotatedPosition Get(IPosition position, IAnnotatedVariant[] annotatedVariants)\n        {\n            var annotatedPosition =  Pool.Get();\n            annotatedPosition.Initialize(position, annotatedVariants);\n            return annotatedPosition;\n        }\n        \n        public static void Return(AnnotatedPosition ap) => Pool.Return(ap);\n    }\n}"
  },
  {
    "path": "VariantAnnotation/Pools/AnnotatedTranscriptPool.cs",
    "content": "using System.Collections.Generic;\nusing Microsoft.Extensions.ObjectPool;\nusing VariantAnnotation.AnnotatedPositions.Transcript;\nusing VariantAnnotation.Interface.AnnotatedPositions;\n\nnamespace VariantAnnotation.Pools\n{\n    public static class AnnotatedTranscriptPool\n    {\n        private static readonly ObjectPool<AnnotatedTranscript> Pool = new DefaultObjectPool<AnnotatedTranscript>(new DefaultPooledObjectPolicy<AnnotatedTranscript>(), 16);\n        \n        public static AnnotatedTranscript Get(ITranscript transcript, string referenceAminoAcids, string alternateAminoAcids,\n            string referenceCodons, string alternateCodons, IMappedPosition mappedPosition, string hgvsCoding,\n            string hgvsProtein, PredictionScore sift, PredictionScore polyphen,\n            List<ConsequenceTag> consequences, bool? completeOverlap)\n        {\n            var annotatedTranscript =  Pool.Get();\n            annotatedTranscript.Initialize(transcript, referenceAminoAcids, alternateAminoAcids, referenceCodons, alternateCodons, mappedPosition, \n                hgvsCoding, hgvsProtein, sift, polyphen, consequences, completeOverlap);\n            \n            return annotatedTranscript;\n        }\n        \n        public static void Return(AnnotatedTranscript annotatedTranscript) => Pool.Return(annotatedTranscript);\n        \n    }\n}"
  },
  {
    "path": "VariantAnnotation/Pools/AnnotatedVariantPool.cs",
    "content": "using Microsoft.Extensions.ObjectPool;\nusing VariantAnnotation.AnnotatedPositions;\nusing Variants;\n\nnamespace VariantAnnotation.Pools\n{\n    public static class AnnotatedVariantPool\n    {\n        private static readonly ObjectPool<AnnotatedVariant> Pool \n            = new DefaultObjectPool<AnnotatedVariant>(new DefaultPooledObjectPolicy<AnnotatedVariant>(), 8);\n        \n        public static AnnotatedVariant Get(IVariant variant)\n        {\n            var annotatedVariant =  Pool.Get();\n            annotatedVariant.Initialize(variant);\n            return annotatedVariant;\n        }\n        \n        public static void Return(AnnotatedVariant av) => Pool.Return(av);\n    }\n}"
  },
  {
    "path": "VariantAnnotation/Pools/VariantPool.cs",
    "content": "using Genome;\nusing Microsoft.Extensions.ObjectPool;\nusing Variants;\n\nnamespace VariantAnnotation.Pools\n{\n    public static class VariantPool\n    {\n        private static readonly ObjectPool<Variant> Pool = \n            new DefaultObjectPool<Variant>(new DefaultPooledObjectPolicy<Variant>(), 8);\n        \n        public static Variant Get(Chromosome chromosome, int start, int end, string refAllele, string altAllele,\n            VariantType variantType, string variantId, bool isRefMinor, bool isDecomposed, bool isRecomposed,\n            string[] linkedVids, AnnotationBehavior behavior, bool isStructuralVariant)\n        {\n            var variant =  Pool.Get();\n            variant.Initialize( chromosome,  start,  end,  refAllele,  altAllele,\n                 variantType,  variantId,  isRefMinor,  isDecomposed,  isRecomposed,\n                 linkedVids,  behavior, isStructuralVariant);\n            return variant;\n        }\n        \n        public static void Return(Variant variant) => Pool.Return(variant);\n    }\n}"
  },
  {
    "path": "VariantAnnotation/ProteinConservation/ProteinConservationCommon.cs",
    "content": "﻿namespace VariantAnnotation.ProteinConservation\n{\n    public static class ProteinConservationCommon\n    {\n        public const string FileSuffix = \".pcs\";\n        public const int SchemaVersion = 1;\n    }\n}"
  },
  {
    "path": "VariantAnnotation/ProteinConservation/ProteinConservationItem.cs",
    "content": "﻿namespace VariantAnnotation.ProteinConservation\n{\n    public sealed class ProteinConservationItem\n    {\n        public readonly string TranscriptId;\n        public readonly string Chromosome;\n\n        public readonly string ProteinSequence;\n        public readonly byte[] Scores;\n\n        public ProteinConservationItem(string chrom, string transcriptId, string proteinSequence, byte[] scores)\n        {\n            Chromosome      = chrom;\n            TranscriptId    = transcriptId;\n            ProteinSequence = proteinSequence;\n            Scores          = scores;\n        }\n       \n    }\n}"
  },
  {
    "path": "VariantAnnotation/ProteinConservation/ProteinConservationReader.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing Genome;\nusing IO;\nusing VariantAnnotation.Interface.Providers;\nusing VariantAnnotation.Providers;\n\nnamespace VariantAnnotation.ProteinConservation\n{\n    public sealed class ProteinConservationReader:IDisposable\n    {\n        private GenomeAssembly Assembly { get; }\n        private readonly ExtendedBinaryReader _reader;\n        public readonly IDataSourceVersion Version;\n\n        public ProteinConservationReader(Stream stream)\n        {\n            _reader = new ExtendedBinaryReader(stream);\n            \n            var schemaVersion = _reader.ReadOptInt32();\n            if(schemaVersion != ProteinConservationCommon.SchemaVersion)\n                throw new Exception($\"Schema version mismatch found. Observed: {schemaVersion}, expected: {ProteinConservationCommon.SchemaVersion}\");\n            Assembly = (GenomeAssembly) _reader.ReadByte();\n            Version = DataSourceVersion.Read(_reader);\n        }\n\n        public IEnumerable<TranscriptConservationScores> GetItems()\n        {\n            TranscriptConservationScores score;\n            while ((score = TranscriptConservationScores.Read(_reader))!=null)\n            {\n                if (score.IsEmpty()) break;\n                yield return score;\n            }\n            \n        }\n\n        public void Dispose() =>_reader?.Dispose(); \n        \n    }\n}"
  },
  {
    "path": "VariantAnnotation/ProteinConservation/TranscriptConservationScores.cs",
    "content": "﻿using System;\nusing IO;\n\nnamespace VariantAnnotation.ProteinConservation\n{\n    public sealed class TranscriptConservationScores\n    {\n        public readonly string TranscriptId;\n        public readonly byte[] ConservationScores;\n\n        public TranscriptConservationScores(string id, byte[] scores)\n        {\n            //removing versions for ensembl only\n            TranscriptId = id;\n            ConservationScores = scores;\n        }\n\n        public static TranscriptConservationScores Read(ExtendedBinaryReader reader)\n        {\n            var id = reader.ReadAsciiString();\n            var count = reader.ReadOptInt32();\n            var scores = reader.ReadBytes(count);\n            var item = new TranscriptConservationScores(id, scores);\n            return item.IsEmpty() ? null : item;\n        }\n\n        public void Write(ExtendedBinaryWriter writer)\n        {\n            writer.WriteOptAscii(TranscriptId);\n            writer.WriteOpt(ConservationScores.Length);\n            writer.Write(ConservationScores);\n        }\n\n        public static TranscriptConservationScores GetEmptyItem() => new TranscriptConservationScores(\"\", Array.Empty<byte>());\n        \n        public bool IsEmpty() => string.IsNullOrEmpty(TranscriptId) && ConservationScores.Length == 0;\n        \n    }\n}"
  },
  {
    "path": "VariantAnnotation/Providers/ConservationScoreProvider.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing Genome;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.Interface.Providers;\r\nusing VariantAnnotation.PhyloP;\r\nusing Variants;\r\n\r\nnamespace VariantAnnotation.Providers\r\n{\r\n    public sealed class ConservationScoreProvider : IAnnotationProvider\r\n    {\r\n        private NpdReader _phylopReader;\r\n\r\n        public           string                          Name               { get; }\r\n        public           GenomeAssembly                  Assembly           => _phylopReader.Assembly;\r\n        public           IEnumerable<IDataSourceVersion> DataSourceVersions => _versions;\r\n        private readonly List<IDataSourceVersion>        _versions = new();\r\n\r\n        public ConservationScoreProvider()\r\n        {\r\n            Name = \"Conservation score provider\";\r\n        }\r\n\r\n        public ConservationScoreProvider AddPhylopReader(Stream dbStream, Stream indexStream)\r\n        {\r\n            if (dbStream == null || indexStream == null) return this;\r\n            _phylopReader = new NpdReader(dbStream, indexStream);\r\n            _versions.Add(_phylopReader.Version);\r\n            return this;\r\n        }\r\n\r\n        public void Annotate(IAnnotatedPosition annotatedPosition)\r\n        {\r\n            foreach (var annotatedVariant in annotatedPosition.AnnotatedVariants)\r\n            {\r\n                if (annotatedVariant.Variant.Type != VariantType.SNV) continue;\r\n                if (_phylopReader != null)\r\n                    annotatedVariant.PhylopScore = _phylopReader.GetAnnotation(annotatedPosition.Position.Chromosome, annotatedVariant.Variant.Start);\r\n            }\r\n        }\r\n\r\n        public void PreLoad(Chromosome chromosome, List<int> positions)\r\n        {\r\n            throw new NotImplementedException();\r\n        }\r\n\r\n        public void Dispose()\r\n        {\r\n            _phylopReader?.Dispose();\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/Providers/DataSourceVersion.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.Text;\r\nusing IO;\r\nusing VariantAnnotation.Interface.Providers;\r\nusing VariantAnnotation.IO;\r\nusing VariantAnnotation.Utilities;\r\n\r\nnamespace VariantAnnotation.Providers\r\n{\r\n    public sealed class DataSourceVersion : IDataSourceVersion, ISerializable\r\n    {\r\n        public string Name { get; }\r\n        public string Description { get; }\r\n        public string Version { get; }\r\n        public long ReleaseDateTicks { get; }\r\n\r\n        public DataSourceVersion(string name, string version, long releaseDateTicks, string description = null)\r\n        {\r\n            Name             = name;\r\n            Description      = description;\r\n            Version          = version;\r\n            ReleaseDateTicks = releaseDateTicks;\r\n        }\r\n\r\n        public static IDataSourceVersion Read(ExtendedBinaryReader reader)\r\n        {\r\n            var name             = reader.ReadAsciiString();\r\n            var version          = reader.ReadAsciiString();\r\n            var releaseDateTicks = reader.ReadOptInt64();\r\n            var description      = reader.ReadAsciiString();\r\n            return new DataSourceVersion(name, version, releaseDateTicks, description);\r\n        }\r\n\r\n        public void Write(IExtendedBinaryWriter writer)\r\n        {\r\n            writer.WriteOptAscii(Name);\r\n            writer.WriteOptAscii(Version);\r\n            writer.WriteOpt(ReleaseDateTicks);\r\n            writer.WriteOptAscii(Description);\r\n        }\r\n\r\n        private string GetReleaseDate() => Date.GetDate(ReleaseDateTicks);\r\n\r\n        public override string ToString() => \"dataSource=\" + Name + \",version:\" + Version + \",release date:\" + GetReleaseDate();\r\n\r\n        public void SerializeJson(StringBuilder sb)\r\n        {\r\n            var jsonObject = new JsonObject(sb);\r\n\r\n            sb.Append(JsonObject.OpenBrace);\r\n            jsonObject.AddStringValue(\"name\", Name);\r\n            jsonObject.AddStringValue(\"version\", Version);\r\n            if (Description != null) jsonObject.AddStringValue(\"description\", Description.Trim());\r\n            if (ReleaseDateTicks != 0) jsonObject.AddStringValue(\"releaseDate\", GetReleaseDate());\r\n            sb.Append(JsonObject.CloseBrace);\r\n        }\r\n    }\r\n\r\n    public sealed class DataSourceVersionComparer : EqualityComparer<IDataSourceVersion>\r\n    {\r\n        public override bool Equals(IDataSourceVersion x, IDataSourceVersion y)\r\n        {\r\n            return string.Equals(x.Name, y.Name) &&\r\n                   string.Equals(x.Description, y.Description) &&\r\n                   string.Equals(x.Version, y.Version) &&\r\n                   x.ReleaseDateTicks == y.ReleaseDateTicks;\r\n        }\r\n\r\n        public override int GetHashCode(IDataSourceVersion obj)\r\n        {\r\n            unchecked\r\n            {\r\n                var hashCode = obj.Name.GetHashCode();\r\n                if (obj.Description != null) hashCode = (hashCode * 397) ^ obj.Description.GetHashCode();\r\n                if (obj.Version != null) hashCode = (hashCode * 397) ^ obj.Version.GetHashCode();\r\n                hashCode = (hashCode * 397) ^ obj.ReleaseDateTicks.GetHashCode();\r\n                return hashCode;\r\n            }\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/Providers/LcrProvider.cs",
    "content": "using System.Collections.Generic;\nusing System.IO;\nusing Genome;\nusing VariantAnnotation.Interface.AnnotatedPositions;\nusing VariantAnnotation.Interface.Providers;\nusing VariantAnnotation.NSA;\n\nnamespace VariantAnnotation.Providers\n{\n    public class LcrProvider: IAnnotationProvider\n    {\n        public string Name => \"Lcr provider\";\n        public GenomeAssembly Assembly { get; }\n        public IEnumerable<IDataSourceVersion> DataSourceVersions { get; }\n\n        private readonly NsiReader _nsiReader;\n\n        public LcrProvider(Stream stream)\n        {\n            _nsiReader = NsiReader.Read(stream);\n            Assembly = _nsiReader.Assembly;\n            DataSourceVersions = new[] { _nsiReader.Version };\n        }\n\n        public void Dispose()\n        {\n            // nsiReaders are not disposable. They read from the input stream and disposes it in the Read method.\n        }\n\n        public void Annotate(IAnnotatedPosition annotatedPosition)\n        {\n            foreach (var annotatedVariant in annotatedPosition.AnnotatedVariants)\n            {\n                annotatedVariant.InLowComplexityRegion = _nsiReader.OverlapsAny(annotatedVariant.Variant);\n            }\n        }\n\n        public void PreLoad(Chromosome chromosome, List<int> positions)\n        {\n            throw new System.NotImplementedException();\n        }\n    }\n}"
  },
  {
    "path": "VariantAnnotation/Providers/NsaProvider.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.Linq;\r\nusing System.Text;\r\nusing System.Threading.Tasks;\r\nusing ErrorHandling.Exceptions;\r\nusing Genome;\r\nusing VariantAnnotation.GeneFusions.IO;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.Interface.Positions;\r\nusing VariantAnnotation.Interface.Providers;\r\nusing VariantAnnotation.Interface.SA;\r\nusing VariantAnnotation.NSA;\r\nusing Variants;\r\n\r\nnamespace VariantAnnotation.Providers\r\n{\r\n    public sealed class NsaProvider : IAnnotationProvider\r\n    {\r\n        public string                          Name               => \"Supplementary annotation provider\";\r\n        public GenomeAssembly                  Assembly           { get; }\r\n        public IEnumerable<IDataSourceVersion> DataSourceVersions { get; }\r\n\r\n        private readonly INsaReader[]          _nsaReaders;\r\n        private readonly INsiReader[]          _nsiReaders;\r\n        private readonly IGeneFusionSaReader[] _fusionReaders;\r\n\r\n        private readonly bool _hasFusionReaders;\r\n        private          bool _hasLoadedGeneFusions;\r\n\r\n        private readonly List<(string refAllele, string altAllele, string jsonString)> _annotations = new();\r\n\r\n        public NsaProvider(INsaReader[] nsaReaders, INsiReader[] nsiReaders, IGeneFusionSaReader[] fusionReaders)\r\n        {\r\n            _nsaReaders    = nsaReaders;\r\n            _nsiReaders    = nsiReaders;\r\n            _fusionReaders = fusionReaders;\r\n\r\n            if (fusionReaders != null && fusionReaders.Length > 0) _hasFusionReaders = true;\r\n\r\n            (List<ISaMetadata> variant, List<ISaMetadata> position, List<ISaMetadata> all) = OrganizeReaders(nsaReaders, nsiReaders, fusionReaders);\r\n\r\n            (Assembly, DataSourceVersions) = GetReaderMetadata(all);\r\n            CheckDuplicatePositionKeys(position);\r\n            CheckDuplicateVariantKeys(variant);\r\n        }\r\n\r\n        private static (List<ISaMetadata> Variant, List<ISaMetadata> Position, List<ISaMetadata> All) OrganizeReaders(\r\n            INsaReader[] nsaReaders, INsiReader[] nsiReaders, IGeneFusionSaReader[] fusionReaders)\r\n        {\r\n            List<ISaMetadata> variant  = new();\r\n            List<ISaMetadata> position = new();\r\n            List<ISaMetadata> all      = new();\r\n\r\n            if (nsaReaders != null)\r\n            {\r\n                foreach (INsaReader reader in nsaReaders)\r\n                {\r\n                    variant.Add(reader);\r\n                    all.Add(reader);\r\n                }\r\n            }\r\n\r\n            if (nsiReaders != null)\r\n            {\r\n                foreach (INsiReader reader in nsiReaders)\r\n                {\r\n                    position.Add(reader);\r\n                    all.Add(reader);\r\n                }\r\n            }\r\n\r\n            if (fusionReaders != null)\r\n            {\r\n                foreach (IGeneFusionSaReader reader in fusionReaders)\r\n                {\r\n                    variant.Add(reader);\r\n                    all.Add(reader);\r\n                }\r\n            }\r\n\r\n            return (variant, position, all);\r\n        }\r\n\r\n        private static void CheckDuplicateVariantKeys(List<ISaMetadata> readers)\r\n        {\r\n            var jsonKeys = new HashSet<string>();\r\n            foreach (ISaMetadata reader in readers) CheckJsonKey(reader.JsonKey, \"variant-level (.nsa or fusion)\", jsonKeys);\r\n        }\r\n\r\n        private static void CheckDuplicatePositionKeys(List<ISaMetadata> readers)\r\n        {\r\n            var jsonKeys = new HashSet<string>();\r\n            foreach (ISaMetadata reader in readers) CheckJsonKey(reader.JsonKey, \"position-level (.nsi)\", jsonKeys);\r\n        }\r\n\r\n        private static void CheckJsonKey(string jsonKey, string description, HashSet<string> jsonKeys)\r\n        {\r\n            if (jsonKeys.Contains(jsonKey)) throw new UserErrorException($\"Duplicate {description} JSON keys found for: {jsonKey}\");\r\n            jsonKeys.Add(jsonKey);\r\n        }\r\n\r\n        private static (GenomeAssembly Assembly, IEnumerable<IDataSourceVersion> Versions) GetReaderMetadata(List<ISaMetadata> readers)\r\n        {\r\n            HashSet<GenomeAssembly>  assemblies = new();\r\n            List<IDataSourceVersion> versions   = new();\r\n            var                      sb         = new StringBuilder();\r\n\r\n            foreach (ISaMetadata reader in readers)\r\n            {\r\n                if (reader.Assembly != GenomeAssembly.rCRS && reader.Assembly != GenomeAssembly.Unknown) assemblies.Add(reader.Assembly);\r\n                versions.Add(reader.Version);\r\n                sb.AppendLine($\"{reader.Version}, Assembly: {reader.Assembly}\");\r\n            }\r\n\r\n            if (assemblies.Count == 1) return (assemblies.First(), versions);\r\n\r\n            throw new UserErrorException($\"Multiple genome assemblies detected in Supplementary annotation directory.\\n{sb}\");\r\n        }\r\n\r\n        public void Annotate(IAnnotatedPosition annotatedPosition)\r\n        {\r\n            if (_nsaReaders != null) AddPositionAndAlleleAnnotations(annotatedPosition);\r\n            if (_nsiReaders != null) GetStructuralVariantAnnotations(annotatedPosition);\r\n            if (_hasFusionReaders && annotatedPosition.Position.HasStructuralVariant) GetGeneFusionAnnotations(annotatedPosition);\r\n        }\r\n\r\n        private void GetGeneFusionAnnotations(IAnnotatedPosition annotatedPosition)\r\n        {\r\n            foreach (IAnnotatedVariant variant in annotatedPosition.AnnotatedVariants)\r\n            {\r\n                IGeneFusionPair[] fusionPairs = GetGeneFusionPairs(variant);\r\n                if (fusionPairs == null) continue;\r\n\r\n                // this only needs to happen if we have a gene fusion\r\n                if (!_hasLoadedGeneFusions) LoadGeneFusions();\r\n\r\n                foreach (IGeneFusionSaReader reader in _fusionReaders) reader.AddAnnotations(fusionPairs, variant.SaList);\r\n            }\r\n        }\r\n\r\n        private void LoadGeneFusions()\r\n        {\r\n            foreach (IGeneFusionSaReader reader in _fusionReaders) reader.LoadAnnotations();\r\n            _hasLoadedGeneFusions = true;\r\n        }\r\n\r\n        private static IGeneFusionPair[] GetGeneFusionPairs(IAnnotatedVariant variant)\r\n        {\r\n            var fusionPairs = new HashSet<IGeneFusionPair>();\r\n            foreach (IAnnotatedTranscript transcript in variant.Transcripts) transcript.AddGeneFusionPairs(fusionPairs);\r\n            return fusionPairs.Count == 0 ? null : fusionPairs.ToArray();\r\n        }\r\n\r\n        private void GetStructuralVariantAnnotations(IAnnotatedPosition annotatedPosition)\r\n        {\r\n            bool needSaIntervals     = annotatedPosition.AnnotatedVariants.Any(x => x.Variant.Behavior.NeedSaInterval);\r\n            bool needSmallAnnotation = annotatedPosition.AnnotatedVariants.Any(x => x.Variant.Behavior == AnnotationBehavior.SmallVariants);\r\n\r\n            foreach (INsiReader nsiReader in _nsiReaders)\r\n            {\r\n                IPosition position = annotatedPosition.Position;\r\n                if (nsiReader.ReportFor == ReportFor.SmallVariants      && !needSmallAnnotation) continue;\r\n                if (nsiReader.ReportFor == ReportFor.StructuralVariants && !needSaIntervals) continue;\r\n\r\n                IEnumerable<string> annotations = nsiReader.GetAnnotation(position.Variants[0]);\r\n                if (annotations == null) continue;\r\n\r\n                annotatedPosition.SupplementaryIntervals.Add(new SupplementaryAnnotation(nsiReader.JsonKey, true, false, null, annotations));\r\n            }\r\n        }\r\n\r\n        private void AddPositionAndAlleleAnnotations(IAnnotatedPosition annotatedPosition)\r\n        {\r\n            foreach (IAnnotatedVariant annotatedVariant in annotatedPosition.AnnotatedVariants)\r\n            {\r\n                bool needSaPosition = annotatedVariant.Variant.Behavior.NeedSaPosition;\r\n                bool needSaAllele   = annotatedVariant.Variant.Behavior.NeedSaAllele;\r\n                if (!needSaPosition && !needSaAllele) continue;\r\n                AddSmallAnnotations(annotatedVariant, needSaPosition, needSaAllele);\r\n            }\r\n        }\r\n\r\n        private void AddSmallAnnotations(IAnnotatedVariant annotatedVariant, bool needSaPosition, bool needSaAllele)\r\n        {\r\n            foreach (INsaReader nsaReader in _nsaReaders)\r\n            {\r\n                IVariant variant = annotatedVariant.Variant;\r\n                nsaReader.GetAnnotation(variant.Start, _annotations);\r\n                if (_annotations.Count == 0) continue;\r\n\r\n                if (nsaReader.IsPositional && needSaPosition)\r\n                {\r\n                    AddPositionalAnnotation(_annotations, annotatedVariant, nsaReader);\r\n                    continue;\r\n                }\r\n\r\n                if (nsaReader.MatchByAllele && needSaAllele) AddAlleleSpecificAnnotation(nsaReader, _annotations, annotatedVariant, variant);\r\n\r\n                else AddNonAlleleSpecificAnnotations(_annotations, variant, annotatedVariant, nsaReader);\r\n            }\r\n        }\r\n\r\n        private static void AddPositionalAnnotation(IEnumerable<(string refAllele, string altAllele, string annotation)> annotations,\r\n                                                    IAnnotatedVariant annotatedVariant, INsaReader nsaReader)\r\n        {\r\n            // e.g. ancestral allele, global minor allele\r\n            string jsonString = annotations.First().annotation;\r\n            annotatedVariant.SaList.Add(new SupplementaryAnnotation(nsaReader.JsonKey, nsaReader.IsArray, nsaReader.IsPositional, jsonString, null));\r\n        }\r\n\r\n        private static void AddNonAlleleSpecificAnnotations(IEnumerable<(string refAllele, string altAllele, string annotation)> annotations,\r\n                                                            IVariant variant, IAnnotatedVariant annotatedVariant, INsaReader nsaReader)\r\n        {\r\n            var jsonStrings = new List<string>();\r\n            foreach ((string refAllele, string altAllele, string jsonString) in annotations)\r\n            {\r\n                if (refAllele == variant.RefAllele && altAllele == variant.AltAllele) jsonStrings.Add(jsonString + \",\\\"isAlleleSpecific\\\":true\");\r\n                else jsonStrings.Add(jsonString);\r\n            }\r\n\r\n            if (jsonStrings.Count > 0)\r\n                annotatedVariant.SaList.Add(new SupplementaryAnnotation(nsaReader.JsonKey, nsaReader.IsArray, nsaReader.IsPositional, null,\r\n                    jsonStrings));\r\n        }\r\n\r\n        private static void AddAlleleSpecificAnnotation(INsaReader nsaReader,\r\n                                                        IEnumerable<(string refAllele, string altAllele, string annotation)> annotations,\r\n                                                        IAnnotatedVariant annotatedVariant, IVariant variant)\r\n        {\r\n            if (nsaReader.IsArray)\r\n            {\r\n                var jsonStrings = new List<string>();\r\n                foreach ((string refAllele, string altAllele, string jsonString) in annotations)\r\n                {\r\n                    if (refAllele == variant.RefAllele && altAllele == variant.AltAllele)\r\n                        jsonStrings.Add(jsonString);\r\n                }\r\n\r\n                if (jsonStrings.Count > 0)\r\n                    annotatedVariant.SaList.Add(new SupplementaryAnnotation(nsaReader.JsonKey, nsaReader.IsArray, nsaReader.IsPositional, null,\r\n                        jsonStrings));\r\n            }\r\n            else\r\n                foreach ((string refAllele, string altAllele, string jsonString) in annotations)\r\n                {\r\n                    if (refAllele != variant.RefAllele || altAllele != variant.AltAllele) continue;\r\n\r\n                    annotatedVariant.SaList.Add(new SupplementaryAnnotation(nsaReader.JsonKey, nsaReader.IsArray, nsaReader.IsPositional, jsonString,\r\n                        null));\r\n                    break;\r\n                }\r\n        }\r\n\r\n        public void PreLoad(Chromosome chromosome, List<int> positions)\r\n        {\r\n            Task[] preloadTasks = _nsaReaders.Select(x => DoPreload(x, chromosome, positions)).ToArray();\r\n            Task.WaitAll(preloadTasks);\r\n            foreach (Task preloadTask in preloadTasks) preloadTask.Dispose();\r\n        }\r\n\r\n        private static Task DoPreload(INsaReader nsaReader, Chromosome chromosome, List<int> positions) =>\r\n            Task.Run(() => { nsaReader.PreLoad(chromosome, positions); });\r\n\r\n        public void Dispose()\r\n        {\r\n            if (_nsaReaders != null)\r\n                foreach (INsaReader reader in _nsaReaders)\r\n                    reader.Dispose();\r\n\r\n            if (_fusionReaders != null)\r\n                foreach (IGeneFusionSaReader reader in _fusionReaders)\r\n                    reader.Dispose();\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/Providers/ProteinConservationProvider.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing System.IO;\nusing VariantAnnotation.Interface.Providers;\nusing VariantAnnotation.ProteinConservation;\n\nnamespace VariantAnnotation.Providers\n{\n    public sealed class ProteinConservationProvider:IDisposable\n    {\n        private readonly ProteinConservationReader _reader;\n        public string Name => \"Amino acid conservation score provider\";\n        public IDataSourceVersion Version => _reader.Version;\n        private readonly Dictionary<string, byte[]> _conservationScores;\n\n        public ProteinConservationProvider(Stream stream)\n        {\n            _reader = new ProteinConservationReader(stream);\n            _conservationScores = new Dictionary<string, byte[]>(100_000);\n        }\n\n        public void Load()\n        {\n            foreach (var item in _reader.GetItems())\n            {\n                _conservationScores.Add(item.TranscriptId, item.ConservationScores);\n            }\n        }\n\n        \n        public int GetConservationScore(string transcriptId, int position)\n        {\n            if (_conservationScores.TryGetValue(transcriptId, out var scores))\n                return position < scores.Length ? scores[position - 1] : -1;\n            return -1;\n        }\n        \n        public void Dispose() =>_reader?.Dispose();\n        \n\n    }\n}"
  },
  {
    "path": "VariantAnnotation/Providers/RefMinorProvider.cs",
    "content": "﻿using System.IO;\r\nusing Genome;\r\nusing VariantAnnotation.Interface.Providers;\r\nusing VariantAnnotation.NSA;\r\n\r\nnamespace VariantAnnotation.Providers\r\n{\r\n    public sealed class RefMinorProvider : IRefMinorProvider\r\n    {\r\n        private readonly RefMinorDbReader _reader;\r\n\r\n        public RefMinorProvider(Stream dbStream, Stream indexStream)\r\n        {\r\n            _reader = new RefMinorDbReader(dbStream, indexStream);\r\n        }\r\n\r\n        public string GetGlobalMajorAllele(Chromosome chromosome, int pos) => _reader.GetGlobalMajorAllele(chromosome, pos);\r\n\r\n        public void Dispose()\r\n        {\r\n            _reader?.Dispose();\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/Providers/ReferenceSequenceProvider.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.IO;\r\nusing Genome;\r\nusing Intervals;\r\nusing ReferenceSequence.IO;\r\nusing VariantAnnotation.AnnotatedPositions;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.Interface.Providers;\r\n\r\nnamespace VariantAnnotation.Providers\r\n{\r\n    public sealed class ReferenceSequenceProvider : ISequenceProvider\r\n    {\r\n        public Dictionary<string, Chromosome> RefNameToChromosome  => _sequenceReader.RefNameToChromosome;\r\n        public Dictionary<ushort, Chromosome> RefIndexToChromosome => _sequenceReader.RefIndexToChromosome;\r\n        public GenomeAssembly                   Assembly             => _sequenceReader.Assembly;\r\n        public string                           Name                 => \"Reference sequence provider\";\r\n        public IEnumerable<IDataSourceVersion>  DataSourceVersions   => null;\r\n\r\n        public ISequence Sequence { get; }\r\n\r\n        private ushort _currentChromosomeIndex = 65534; // guaranteed to be updated\r\n        private readonly CompressedSequenceReader _sequenceReader;\r\n\r\n        public ReferenceSequenceProvider(Stream stream)\r\n        {\r\n            _sequenceReader = new CompressedSequenceReader(stream);\r\n            Sequence        = _sequenceReader.Sequence;\r\n        }\r\n\r\n        public void Annotate(IAnnotatedPosition annotatedPosition)\r\n        {\r\n            if (annotatedPosition.AnnotatedVariants == null) return;\r\n\r\n            annotatedPosition.CytogeneticBand = Sequence.CytogeneticBands.Find(annotatedPosition.Position.Chromosome, annotatedPosition.Position.Start,\r\n                annotatedPosition.Position.End);\r\n\r\n            // we don't want HGVS g. nomenclature for structural variants or STRs\r\n            if (annotatedPosition.Position.HasStructuralVariant || annotatedPosition.Position.HasShortTandemRepeat) return;\r\n            \r\n            string refSeqAccession = annotatedPosition.Position.Chromosome.RefSeqAccession;\r\n            foreach (var annotatedVariant in annotatedPosition.AnnotatedVariants)\r\n            {\r\n                annotatedVariant.HgvsgNotation = HgvsgNotation.GetNotation(refSeqAccession, annotatedVariant.Variant, Sequence, new Interval(0, Sequence.Length));\r\n            }\r\n        }\r\n\r\n        public void PreLoad(Chromosome chromosome, List<int> positions)\r\n        {\r\n            throw new System.NotImplementedException();\r\n        }\r\n\r\n        public void LoadChromosome(Chromosome chromosome)\r\n        {\r\n            if (chromosome.Index == _currentChromosomeIndex) return;\r\n            _sequenceReader.GetCompressedSequence(chromosome);\r\n            _currentChromosomeIndex = chromosome.Index;\r\n        }\r\n\r\n        public void Dispose() => _sequenceReader?.Dispose();\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/Providers/ScoreProvider.cs",
    "content": "using System.Collections.Generic;\nusing System.Linq;\nusing System.Text;\nusing ErrorHandling.Exceptions;\nusing Genome;\nusing VariantAnnotation.GenericScore;\nusing VariantAnnotation.Interface.AnnotatedPositions;\nusing VariantAnnotation.Interface.Providers;\nusing VariantAnnotation.NSA;\nusing Variants;\n\nnamespace VariantAnnotation.Providers;\n\npublic sealed class ScoreProvider : IAnnotationProvider\n{\n    public string                          Name               => \"Supplementary annotation provider\";\n    public GenomeAssembly                  Assembly           { get; }\n    public IEnumerable<IDataSourceVersion> DataSourceVersions { get; }\n\n    private readonly ScoreReader[] _scoreReaders;\n\n    public ScoreProvider(ScoreReader[] scoreReaders)\n    {\n        _scoreReaders                  = scoreReaders;\n        (Assembly, DataSourceVersions) = GetReadersMetadata();\n    }\n\n    public void Annotate(IAnnotatedPosition annotatedPosition)\n    {\n        foreach (ScoreReader scoreReader in _scoreReaders)\n        {\n            foreach (IAnnotatedVariant annotatedVariant in annotatedPosition.AnnotatedVariants)\n            {\n                IVariant variant = annotatedVariant.Variant;\n                    \n                // Score provider is only limited to SNV type calls\n                if (variant.Type != VariantType.SNV) continue;\n                    \n                Chromosome chromosome = variant.Chromosome;\n                string     jsonString = scoreReader.GetAnnotationJson(chromosome.Index, variant.Start, variant.AltAllele);\n\n                if (jsonString == null) continue;\n\n                annotatedVariant.SaList.Add(new SupplementaryAnnotation(\n                    scoreReader.JsonKey,\n                    false,\n                    true,\n                    jsonString,\n                    null\n                ));\n            }\n        }\n    }\n\n    private (GenomeAssembly Assembly, IEnumerable<IDataSourceVersion> Versions) GetReadersMetadata()\n    {\n        HashSet<GenomeAssembly>  assemblies = new();\n        List<IDataSourceVersion> versions   = new();\n        var                      sb         = new StringBuilder();\n\n        foreach (ScoreReader reader in _scoreReaders)\n        {\n            if (reader.Assembly != GenomeAssembly.rCRS && reader.Assembly != GenomeAssembly.Unknown) assemblies.Add(reader.Assembly);\n            versions.Add(reader.Version);\n            sb.AppendLine($\"{reader.Version}, Assembly: {reader.Assembly}\");\n        }\n\n        if (assemblies.Count == 1) return (assemblies.First(), versions);\n\n        throw new UserErrorException($\"Multiple genome assemblies detected in Supplementary annotation directory.\\n{sb}\");\n    }\n\n\n    public void PreLoad(Chromosome chromosome, List<int> positions)\n    {\n    }\n\n    public void Dispose()\n    {\n    }\n}"
  },
  {
    "path": "VariantAnnotation/Providers/TranscriptAnnotationProvider.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Linq;\r\nusing System.Text;\r\nusing ErrorHandling.Exceptions;\r\nusing Genome;\r\nusing Intervals;\r\nusing IO;\r\nusing OptimizedCore;\r\nusing VariantAnnotation.AnnotatedPositions;\r\nusing VariantAnnotation.Caches;\r\nusing VariantAnnotation.GeneFusions.Calling;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.Interface.Caches;\r\nusing VariantAnnotation.Interface.Positions;\r\nusing VariantAnnotation.Interface.Providers;\r\nusing VariantAnnotation.IO.Caches;\r\nusing VariantAnnotation.TranscriptAnnotation;\r\nusing Variants;\r\n\r\nnamespace VariantAnnotation.Providers\r\n{\r\n    public sealed class TranscriptAnnotationProvider : ITranscriptAnnotationProvider\r\n    {\r\n        private readonly ITranscriptCache _transcriptCache;\r\n        private readonly ISequence        _sequence;\r\n\r\n        public string                          Name                     { get; }\r\n        public GenomeAssembly                  Assembly                 { get; }\r\n        public IEnumerable<IDataSourceVersion> DataSourceVersions       { get; }\r\n        public IntervalArray<ITranscript>[]    TranscriptIntervalArrays { get; }\r\n        public ushort                          VepVersion               { get; }\r\n\r\n        private readonly Stream                _siftStream;\r\n        private readonly Stream                _polyphenStream;\r\n        private readonly PredictionCacheReader _siftReader;\r\n        private readonly PredictionCacheReader _polyphenReader;\r\n        private          IPredictionCache      _siftCache;\r\n        private          IPredictionCache      _polyphenCache;\r\n        private          ushort                _currentRefIndex = ushort.MaxValue;\r\n\r\n        private readonly ProteinConservationProvider _conservationProvider;\r\n        private readonly GeneFusionCaller            _fusionCaller;\r\n\r\n        public TranscriptAnnotationProvider(string pathPrefix, ISequenceProvider sequenceProvider, ProteinConservationProvider conservationProvider)\r\n        {\r\n            Name                  = \"Transcript annotation provider\";\r\n            _sequence             = sequenceProvider.Sequence;\r\n            _conservationProvider = conservationProvider;\r\n\r\n            using (var stream = PersistentStreamUtils.GetReadStream(CacheConstants.TranscriptPath(pathPrefix)))\r\n            {\r\n                (_transcriptCache, TranscriptIntervalArrays, VepVersion) =\r\n                    InitiateCache(stream, sequenceProvider.RefIndexToChromosome, sequenceProvider.Assembly);\r\n            }\r\n\r\n            _fusionCaller      = new GeneFusionCaller(sequenceProvider.RefNameToChromosome, _transcriptCache.TranscriptIntervalForest);\r\n            Assembly           = _transcriptCache.Assembly;\r\n            DataSourceVersions = _transcriptCache.DataSourceVersions;\r\n\r\n            // TODO: this is not great. We should not be using IEnumerables if we have to resort to strange stuff like this\r\n            if (conservationProvider != null)\r\n                DataSourceVersions = DataSourceVersions.Concat(new[] {conservationProvider.Version});\r\n\r\n            _siftStream = PersistentStreamUtils.GetReadStream(CacheConstants.SiftPath(pathPrefix));\r\n            _siftReader = new PredictionCacheReader(_siftStream, PredictionCacheReader.SiftDescriptions);\r\n\r\n            _polyphenStream = PersistentStreamUtils.GetReadStream(CacheConstants.PolyPhenPath(pathPrefix));\r\n            _polyphenReader = new PredictionCacheReader(_polyphenStream, PredictionCacheReader.PolyphenDescriptions);\r\n        }\r\n\r\n        private static (TranscriptCache Cache, IntervalArray<ITranscript>[] TranscriptIntervalArrays, ushort VepVersion) InitiateCache(Stream stream,\r\n            Dictionary<ushort, Chromosome> refIndexToChromosome, GenomeAssembly refAssembly)\r\n        {\r\n            using var reader     = new TranscriptCacheReader(stream);\r\n            ushort    vepVersion = reader.Header.Custom.VepVersion;\r\n            CheckHeaderVersion(reader.Header, refAssembly);\r\n            TranscriptCacheData cacheData = reader.Read(refIndexToChromosome);\r\n            TranscriptCache     cache     = cacheData.GetCache();\r\n\r\n            return (cache, cacheData.TranscriptIntervalArrays, vepVersion);\r\n        }\r\n\r\n        private static void CheckHeaderVersion(Header header, GenomeAssembly refAssembly)\r\n        {\r\n            if (header.Assembly != refAssembly)\r\n                throw new UserErrorException(GetAssemblyErrorMessage(header.Assembly, refAssembly));\r\n\r\n            if (header.SchemaVersion != CacheConstants.SchemaVersion)\r\n                throw new UserErrorException(\r\n                    $\"Expected the cache schema version ({CacheConstants.SchemaVersion}) to be identical to the schema version in the cache header ({header.SchemaVersion})\");\r\n        }\r\n\r\n        private static string GetAssemblyErrorMessage(GenomeAssembly cacheAssembly, GenomeAssembly refAssembly)\r\n        {\r\n            StringBuilder sb = StringBuilderPool.Get();\r\n            sb.AppendLine(\"Not all of the data sources have the same genome assembly:\");\r\n            sb.AppendLine($\"- Using {refAssembly}: Reference sequence provider\");\r\n            sb.AppendLine($\"- Using {cacheAssembly}: Transcript annotation provider\");\r\n            return StringBuilderPool.GetStringAndReturn(sb);\r\n        }\r\n\r\n        public void Annotate(IAnnotatedPosition annotatedPosition)\r\n        {\r\n            if (annotatedPosition.AnnotatedVariants == null || annotatedPosition.AnnotatedVariants.Length == 0) return;\r\n\r\n            IPosition position = annotatedPosition.Position;\r\n            ushort    refIndex = position.Chromosome.Index;\r\n            LoadPredictionCaches(refIndex);\r\n\r\n            AddRegulatoryRegions(annotatedPosition.AnnotatedVariants, _transcriptCache.RegulatoryIntervalForest);\r\n            AddTranscripts(annotatedPosition.AnnotatedVariants);\r\n\r\n            if (position.HasStructuralVariant)\r\n                _fusionCaller.AddGeneFusions(annotatedPosition.AnnotatedVariants, annotatedPosition.Position.InfoData.IsImprecise,\r\n                    position.InfoData.IsInv3, position.InfoData.IsInv5);\r\n        }\r\n\r\n        private void AddTranscripts(IAnnotatedVariant[] annotatedVariants)\r\n        {\r\n            foreach (var annotatedVariant in annotatedVariants)\r\n            {\r\n                var variant = annotatedVariant.Variant;\r\n                if (variant.Behavior.MinimalTranscriptAnnotation) continue;\r\n\r\n                ITranscript[] transcripts = _transcriptCache.TranscriptIntervalForest.GetAllFlankingValues(variant);\r\n                if (transcripts == null) continue;\r\n\r\n                IList<IAnnotatedTranscript> annotatedTranscripts =\r\n                    TranscriptAnnotationFactory.GetAnnotatedTranscripts(variant, transcripts, _sequence, _siftCache,\r\n                        _polyphenCache);\r\n\r\n                if (annotatedTranscripts.Count == 0) continue;\r\n\r\n                foreach (IAnnotatedTranscript annotatedTranscript in annotatedTranscripts)\r\n                {\r\n                    AddConservationScore(annotatedTranscript);\r\n                }\r\n\r\n                foreach (IAnnotatedTranscript annotatedTranscript in annotatedTranscripts)\r\n                    annotatedVariant.Transcripts.Add(annotatedTranscript);\r\n            }\r\n        }\r\n\r\n        private void AddConservationScore(IAnnotatedTranscript annotatedTranscript)\r\n        {\r\n            if (_conservationProvider              == null) return;\r\n            if (annotatedTranscript.MappedPosition == null) return;\r\n\r\n            var scores = new List<double>();\r\n            int start  = annotatedTranscript.MappedPosition.ProteinStart;\r\n            int end    = annotatedTranscript.MappedPosition.ProteinEnd;\r\n\r\n            if (start == -1 || end == -1) return;\r\n            for (int aaPos = start; aaPos <= end; aaPos++)\r\n            {\r\n                string transcriptId = annotatedTranscript.Transcript.Id.WithVersion;\r\n                int    score        = _conservationProvider.GetConservationScore(transcriptId, aaPos);\r\n                if (score == -1) return; //don't add conservation scores\r\n                scores.Add(1.0 * score / 100);\r\n            }\r\n\r\n            annotatedTranscript.ConservationScores = scores;\r\n        }\r\n\r\n        public void PreLoad(Chromosome chromosome, List<int> positions) => throw new NotImplementedException();\r\n\r\n        private void LoadPredictionCaches(ushort refIndex)\r\n        {\r\n            if (refIndex == _currentRefIndex) return;\r\n\r\n            if (refIndex == ushort.MaxValue)\r\n            {\r\n                ClearCache();\r\n                return;\r\n            }\r\n\r\n            _siftCache       = _siftReader.Read(refIndex);\r\n            _polyphenCache   = _polyphenReader.Read(refIndex);\r\n            _currentRefIndex = refIndex;\r\n        }\r\n\r\n        private void ClearCache()\r\n        {\r\n            _siftCache       = null;\r\n            _polyphenCache   = null;\r\n            _currentRefIndex = ushort.MaxValue;\r\n        }\r\n\r\n        private static void AddRegulatoryRegions(IAnnotatedVariant[] annotatedVariants, IIntervalForest<IRegulatoryRegion> regulatoryIntervalForest)\r\n        {\r\n            foreach (IAnnotatedVariant annotatedVariant in annotatedVariants)\r\n            {\r\n                if (!annotatedVariant.Variant.Behavior.NeedRegulatoryRegions) continue;\r\n\r\n                // In case of insertions, the base(s) are assumed to be inserted at the end position\r\n                // if this is an insertion just before the beginning of the regulatory element, this takes care of it\r\n                IVariant variant      = annotatedVariant.Variant;\r\n                int      variantBegin = variant.Type == VariantType.insertion ? variant.End : variant.Start;\r\n\r\n                if (SkipLargeVariants(variantBegin, variant.End)) continue;\r\n\r\n                IRegulatoryRegion[] regulatoryRegions =\r\n                    regulatoryIntervalForest.GetAllOverlappingValues(variant.Chromosome.Index, variantBegin,\r\n                        variant.End);\r\n                if (regulatoryRegions == null) continue;\r\n\r\n                foreach (IRegulatoryRegion regulatoryRegion in regulatoryRegions)\r\n                {\r\n                    // if the insertion is at the end, its past the feature and therefore not overlapping\r\n                    if (variant.Type == VariantType.insertion && variant.End == regulatoryRegion.End) continue;\r\n\r\n                    annotatedVariant.RegulatoryRegions.Add(RegulatoryRegionAnnotator.Annotate(variant, regulatoryRegion));\r\n                }\r\n            }\r\n        }\r\n\r\n        private const int MaxSvLengthForRegulatoryRegionAnnotation = 50000;\r\n\r\n        private static bool SkipLargeVariants(int begin, int end) => end - begin + 1 > MaxSvLengthForRegulatoryRegionAnnotation;\r\n\r\n        public void Dispose()\r\n        {\r\n            _siftReader?.Dispose();\r\n            _polyphenReader?.Dispose();\r\n            _siftStream?.Dispose();\r\n            _polyphenStream?.Dispose();\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/Providers/VersionProvider.cs",
    "content": "﻿using IO;\r\nusing ReferenceSequence;\r\nusing VariantAnnotation.Interface.Providers;\r\nusing VariantAnnotation.SA;\r\n\r\nnamespace VariantAnnotation.Providers\r\n{\r\n    public sealed class VersionProvider : IVersionProvider\r\n    {\r\n        public string DataVersion { get; } = $\"Cache version: {CacheConstants.DataVersion}, Supplementary annotation version: {SaCommon.DataVersion}, Reference version: {ReferenceSequenceCommon.HeaderVersion}\";\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/SA/CustomAnnotationCategories.cs",
    "content": "﻿namespace VariantAnnotation.SA\n{\n    public enum CustomAnnotationCategories:byte\n    {\n        Unknown,\n        AlleleCount,\n        AlleleNumber,\n        AlleleFrequency,\n        Prediction,\n        Filter,\n        Description,\n        Identifier,\n        HomozygousCount,\n        Score\n    }\n}"
  },
  {
    "path": "VariantAnnotation/SA/JsonDataType.cs",
    "content": "﻿namespace VariantAnnotation.SA\n{\n    public enum JsonDataType : byte\n    {\n        String,\n        Bool,\n        Number,\n        Array,\n        Object\n    }\n\n    public static class BacisJsonTypeExtension\n    {\n        public static string ToTypeString(this JsonDataType jsonDataType)\n        {\n            switch (jsonDataType)\n            {\n                case JsonDataType.String:\n                    return \"string\";\n                case JsonDataType.Bool:\n                    return \"boolean\";\n                case JsonDataType.Number:\n                    return \"number\";\n                case JsonDataType.Array:\n                    return \"array\";\n                case JsonDataType.Object:\n                    return \"object\";\n                default:\n                    return \"\";\n            }\n        }\n\n        public static string GetSchemaKey(this JsonDataType jsonDataType)\n        {\n            switch (jsonDataType)\n            {\n                case JsonDataType.Array:\n                    return \"items\";\n                case JsonDataType.Object:\n                    return \"properties\";\n                default:\n                    return \"\";\n            }\n        }\n\n        public static bool IsComplexType(this JsonDataType jsonDataType)\n        {\n            switch (jsonDataType)\n            {\n                case JsonDataType.Array:\n                case JsonDataType.Object:\n                    return true;\n                default:\n                    return false;\n            }\n        }\n    }\n}"
  },
  {
    "path": "VariantAnnotation/SA/SaCommon.cs",
    "content": "﻿namespace VariantAnnotation.SA\r\n{\r\n    public static class SaCommon\r\n    {\r\n        public const int    DefaultBlockSize = 8 * 1024 * 1024;\r\n        public const ushort DataVersion      = 66;\r\n        public const ushort SchemaVersion    = 22;\r\n        public const ushort NsiSchemaVersion = 22;\r\n\r\n        public const double RefMinorThreshold = 0.95;\r\n\r\n        public const uint   GuardInt      = 4041327495;\r\n        public const string NgaIdentifier = \"NirvanaGenes\";\r\n        public const string NsiIdentifier = \"NirvanaSupplementaryIntervals\";\r\n\r\n        public const string IndexSuffix            = \".idx\";\r\n        public const string SaFileSuffix           = \".nsa\";\r\n        public const string GsaFileSuffix          = \".gsa\";\r\n        public const string PhylopFileSuffix       = \".npd\";\r\n        public const string RefMinorFileSuffix     = \".rma\";\r\n        public const string IntervalFileSuffix     = \".nsi\";\r\n        public const string LcrFileSuffix          = \".lcr\";\r\n        public const string GeneFileSuffix         = \".nga\";\r\n        public const string GeneFusionSourceSuffix = \".gfs\";\r\n        public const string GeneFusionJsonSuffix   = \".gfj\";\r\n        public const string JsonSchemaSuffix       = \".schema\";\r\n\r\n        public const string DbsnpTag                = \"dbsnp\";\r\n        public const string GlobalAlleleTag         = \"globalAllele\";\r\n        public const string OneKgenTag              = \"oneKg\";\r\n        public const string AncestralAlleleTag      = \"ancestralAllele\";\r\n        public const string RefMinorTag             = \"refMinor\";\r\n        public const string GnomadTag               = \"gnomad\";\r\n        public const string GnomadExomeTag          = \"gnomadExome\";\r\n        public const string ClinvarTag              = \"clinvar\";\r\n        public const string CosmicTag               = \"cosmic\";\r\n        public const string CosmicCnvTag            = \"cosmicCnv\";\r\n        public const string OnekSvTag               = \"oneKg\";\r\n        public const string DgvTag                  = \"dgv\";\r\n        public const string ClinGenTag              = \"clingen\";\r\n        public const string MitoMapTag              = \"mitomap\";\r\n        public const string TopMedTag               = \"topmed\";\r\n        public const string PhylopTag               = \"phylopScore\";\r\n        public const string OmimTag                 = \"omim\";\r\n        public const string GnomadGeneScoreTag      = \"gnomAD\";\r\n        public const string GnomadStructuralVariant = \"gnomAD-preview\";\r\n        public const string DosageSensitivityTag    = \"clingenDosageSensitivityMap\";\r\n        public const string DiseaseValidityTag      = \"clingenGeneValidity\";\r\n        public const string SpliceAiTag             = \"spliceAI\";\r\n        public const string PrimateAiTag            = \"primateAI\";\r\n        public const string MitoHeteroplasmyTag     = \"heteroplasmy\";\r\n        public const string RevelTag                = \"revel\";\r\n        public const string DannTag                 = \"dann\";\r\n        public const string GerpTag                = \"gerp\";\r\n        public const string LowComplexityRegionTag = \"lowComplexityRegion\";\r\n        public const string GmeTag                 = \"gmeVariome\";\r\n        public const string DecipherTag            = \"decipher\";\r\n        public const string Score                  = \"Score\";\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/TranscriptAnnotation/FlankingTranscriptAnnotator.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing VariantAnnotation.AnnotatedPositions.Consequence;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.Pools;\r\n\r\nnamespace VariantAnnotation.TranscriptAnnotation\r\n{\r\n    public static class FlankingTranscriptAnnotator\r\n    {\r\n        public static IAnnotatedTranscript GetAnnotatedTranscript(int variantEnd, ITranscript transcript)\r\n        {\r\n            bool                 isDownStream = variantEnd < transcript.Start == transcript.Gene.OnReverseStrand;\r\n            List<ConsequenceTag> consequences = Consequences.DetermineFlankingVariantEffects(isDownStream);\r\n            return AnnotatedTranscriptPool.Get(transcript, null, null, null, null, null, null, null, null, null, consequences, false);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/TranscriptAnnotation/FullTranscriptAnnotator.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing Genome;\r\nusing Intervals;\r\nusing VariantAnnotation.Algorithms;\r\nusing VariantAnnotation.AnnotatedPositions;\r\nusing VariantAnnotation.AnnotatedPositions.Consequence;\r\nusing VariantAnnotation.AnnotatedPositions.Transcript;\r\nusing VariantAnnotation.Caches.DataStructures;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.Interface.Caches;\r\nusing VariantAnnotation.Pools;\r\nusing Variants;\r\n\r\nnamespace VariantAnnotation.TranscriptAnnotation\r\n{\r\n    public static class FullTranscriptAnnotator\r\n    {\r\n        public static IAnnotatedTranscript GetAnnotatedTranscript(ITranscript transcript, IVariant leftShiftedVariant,\r\n            ISequence refSequence, IPredictionCache siftCache, IPredictionCache polyphenCache, AminoAcids aminoAcids)\r\n        {\r\n            var rightShiftedVariant = VariantRotator.Right(leftShiftedVariant, transcript, refSequence,\r\n                transcript.Gene.OnReverseStrand);\r\n\r\n            var leftAnnotation = AnnotateTranscript(transcript, leftShiftedVariant, aminoAcids, refSequence);\r\n\r\n            var rightAnnotation = ReferenceEquals(leftShiftedVariant, rightShiftedVariant)\r\n                ? leftAnnotation\r\n                : AnnotateTranscript(transcript, rightShiftedVariant, aminoAcids, refSequence);\r\n\r\n            List<ConsequenceTag> consequences = GetConsequences(transcript, transcript.Gene.OnReverseStrand,\r\n                leftShiftedVariant, leftAnnotation.VariantEffect);\r\n            \r\n            var refAllele = rightAnnotation.TranscriptRefAllele;\r\n            var altAllele = rightAnnotation.TranscriptAltAllele;\r\n            var hgvsCoding = HgvsCodingNomenclature.GetHgvscAnnotation(transcript, rightShiftedVariant, refSequence,\r\n                    rightAnnotation.Position.RegionStartIndex, rightAnnotation.Position.RegionEndIndex, refAllele, altAllele);\r\n\r\n            var hgvsProtein = HgvsProteinNomenclature.GetHgvsProteinAnnotation(transcript,\r\n                rightAnnotation.RefAminoAcids, rightAnnotation.AltAminoAcids, rightAnnotation.TranscriptAltAllele,\r\n                rightAnnotation.Position, rightAnnotation.VariantEffect, rightShiftedVariant, refSequence, hgvsCoding,\r\n                leftShiftedVariant.Chromosome.UcscName == \"chrM\");\r\n\r\n            var predictionScores = GetPredictionScores(leftAnnotation.Position, leftAnnotation.RefAminoAcids,\r\n                leftAnnotation.AltAminoAcids, siftCache, polyphenCache, transcript.SiftIndex, transcript.PolyPhenIndex);\r\n\r\n            return AnnotatedTranscriptPool.Get(transcript, leftAnnotation.RefAminoAcids, leftAnnotation.AltAminoAcids,\r\n                leftAnnotation.RefCodons, leftAnnotation.AltCodons, leftAnnotation.Position, hgvsCoding, hgvsProtein,\r\n                predictionScores.Sift, predictionScores.PolyPhen, consequences, false);\r\n        }\r\n\r\n        private static (VariantEffect VariantEffect, IMappedPosition Position, string RefAminoAcids, string\r\n            AltAminoAcids, string RefCodons, string AltCodons, string TranscriptAltAllele, string TranscriptRefAllele) AnnotateTranscript(ITranscript transcript, ISimpleVariant variant, AminoAcids aminoAcids, ISequence refSequence)\r\n        {\r\n            bool onReverseStrand = transcript.Gene.OnReverseStrand;\r\n            var start = MappedPositionUtilities.FindRegion(transcript.TranscriptRegions, variant.Start);\r\n            var end   = MappedPositionUtilities.FindRegion(transcript.TranscriptRegions, variant.End);\r\n\r\n            var position = GetMappedPosition(transcript.TranscriptRegions, start.Region, start.Index, end.Region,\r\n                end.Index, variant, onReverseStrand, transcript.Translation?.CodingRegion, transcript.StartExonPhase,\r\n                variant.Type == VariantType.insertion);\r\n\r\n            var    codingSequence      = GetCodingSequence(transcript, refSequence);\r\n            var    cdnaSequence        = GetCdnaSequence(transcript, refSequence);\r\n\r\n            string transcriptAltAllele = HgvsUtilities.GetTranscriptAllele(variant.AltAllele, onReverseStrand);\r\n            var codons = Codons.GetCodons(transcriptAltAllele, position.CdsStart, position.CdsEnd, position.ProteinStart, position.ProteinEnd, codingSequence);\r\n            \r\n            var aa = aminoAcids.Translate(codons.Reference, codons.Alternate);\r\n            (aa, position.ProteinStart, position.ProteinEnd) = TryTrimAminoAcidsAndUpdateProteinPositions(aa, position.ProteinStart, position.ProteinEnd);\r\n\r\n            (position.CoveredCdnaStart, position.CoveredCdnaEnd) = transcript.TranscriptRegions.GetCoveredCdnaPositions(position.CdnaStart, start.Index, position.CdnaEnd, end.Index, onReverseStrand);\r\n            (position.CoveredCdsStart, position.CoveredCdsEnd, position.CoveredProteinStart, position.CoveredProteinEnd) = MappedPositionUtilities.GetCoveredCdsAndProteinPositions(position.CoveredCdnaStart, position.CoveredCdnaEnd, transcript.StartExonPhase, transcript.Translation?.CodingRegion);\r\n\r\n            var            transcriptRefAllele = GetTranscriptRefAllele(position, cdnaSequence, variant, onReverseStrand);\r\n            SequenceChange coveredAa;\r\n            // only generate the covered version of ref & alt alleles when CDS start/end is -1\r\n            if (position.CdsStart == -1 || position.CdsEnd == -1)\r\n            {\r\n                coveredAa = GetCoveredAa(aminoAcids, transcriptAltAllele, position.CoveredCdsStart, position.CoveredCdsEnd, position.CoveredProteinStart, position.CoveredProteinEnd, codingSequence);\r\n                (coveredAa, position.CoveredProteinStart, position.CoveredProteinEnd) = TryTrimAminoAcidsAndUpdateProteinPositions(coveredAa, position.CoveredProteinStart, position.CoveredProteinEnd);\r\n            }\r\n            else\r\n            {\r\n                coveredAa = aa;\r\n                position.CoveredProteinStart = position.ProteinStart;\r\n                position.CoveredProteinEnd = position.ProteinEnd;\r\n            }\r\n            \r\n\r\n            var positionalEffect = GetPositionalEffect(transcript, variant, position, aa.Reference, aa.Alternate,\r\n                position.CoveredCdnaStart, position.CoveredCdnaEnd, position.CoveredCdsStart, position.CoveredCdsEnd);\r\n\r\n            var variantEffect = new VariantEffect(positionalEffect, variant, transcript, aa.Reference, aa.Alternate,\r\n                codons.Reference, codons.Alternate, position.ProteinStart, coveredAa.Reference, coveredAa.Alternate);\r\n\r\n            return (variantEffect, position, aa.Reference, aa.Alternate, codons.Reference, codons.Alternate, transcriptAltAllele, transcriptRefAllele);\r\n        }\r\n\r\n        private static string GetTranscriptRefAllele(IMappedPosition position, ISequence cdnaSequence, ISimpleVariant variant, \r\n                                                     bool onReverseStrand)\r\n        {\r\n            var variantRef = HgvsUtilities.GetTranscriptAllele(variant.RefAllele, onReverseStrand);\r\n            if (position == null || cdnaSequence == null) return variantRef;\r\n            var start = position.CoveredCdnaStart;\r\n            var end   = position.CoveredCdnaEnd;\r\n            if (start == -1 && end == -1) return variantRef;\r\n            if (start != -1 && end != -1 && end < start) Swap.Int(ref start, ref end);\r\n                \r\n            return cdnaSequence.Substring(start - 1, end - start + 1);\r\n            \r\n        }\r\n        private static string GetCodingFromCdna(ICodingRegion codingRegion, ISequence cdnaSequence)\r\n        {\r\n            if (codingRegion == null) return null;\r\n            return cdnaSequence.Substring(codingRegion.CdnaStart - 1, codingRegion.CdnaEnd - codingRegion.CdnaStart + 1);\r\n        }\r\n\r\n        internal static (SequenceChange AaChange, int ProteinStart, int ProteinEnd) TryTrimAminoAcidsAndUpdateProteinPositions(SequenceChange aaChange, int proteinStart, int proteinEnd)\r\n        {\r\n            (int newStart, string newReference, string newAlternate) = BiDirectionalTrimmer.Trim(proteinStart, aaChange.Reference, aaChange.Alternate);\r\n\r\n            return string.IsNullOrEmpty(newReference) ? (aaChange, proteinStart, proteinEnd) : \r\n                (new SequenceChange(newReference, newAlternate), newStart, newStart + newReference.Length - 1);\r\n        }\r\n\r\n        private static SequenceChange GetCoveredAa(AminoAcids aminoAcids, string transcriptAltAllele, int coveredCdsStart, int coveredCdsEnd, int coveredProteinStart, int coveredProteinEnd, ISequence codingSequence)\r\n        {\r\n            var codonsChange = Codons.GetCodons(transcriptAltAllele, coveredCdsStart, coveredCdsEnd, coveredProteinStart, coveredProteinEnd, codingSequence);\r\n            return aminoAcids.Translate(codonsChange.Reference, codonsChange.Alternate);\r\n        }\r\n\r\n        private static ISequence GetCodingSequence(ITranscript transcript, ISequence refSequence)\r\n        {\r\n            if (transcript.Translation == null) return null;\r\n\r\n            return transcript.CodingSequence ?? (transcript.CodingSequence = new CodingSequence(refSequence,\r\n                       transcript.Translation.CodingRegion, transcript.TranscriptRegions,\r\n                       transcript.Gene.OnReverseStrand, transcript.StartExonPhase, transcript.RnaEdits));\r\n        }\r\n        \r\n        private static ISequence GetCdnaSequence(ITranscript transcript, ISequence refSequence)\r\n        {\r\n            return transcript.CdnaSequence ?? (transcript.CdnaSequence = new CdnaSequence(refSequence,\r\n                transcript.Translation?.CodingRegion, transcript.TranscriptRegions,\r\n                transcript.Gene.OnReverseStrand, transcript.RnaEdits));\r\n        }\r\n\r\n        private static IMappedPosition GetMappedPosition(ITranscriptRegion[] regions, ITranscriptRegion startRegion, \r\n            int startIndex, ITranscriptRegion endRegion, int endIndex, IInterval variant, bool onReverseStrand,\r\n            ICodingRegion codingRegion, byte startExonPhase, bool isInsertion)\r\n        {\r\n            (int cdnaStart, int cdnaEnd) = MappedPositionUtilities.GetCdnaPositions(startRegion, endRegion, variant, onReverseStrand, isInsertion);\r\n            if (onReverseStrand) Swap.Int(ref cdnaStart, ref cdnaEnd);\r\n\r\n            (int cdsStart, int cdsEnd) = MappedPositionUtilities.GetCdsPositions(codingRegion, cdnaStart, cdnaEnd,\r\n                startExonPhase, isInsertion);\r\n\r\n            int proteinStart = MappedPositionUtilities.GetProteinPosition(cdsStart);\r\n            int proteinEnd   = MappedPositionUtilities.GetProteinPosition(cdsEnd);\r\n\r\n            (int exonStart, int exonEnd, int intronStart, int intronEnd) = regions.GetExonsAndIntrons(startIndex, endIndex);\r\n\r\n            return new MappedPosition(cdnaStart, cdnaEnd, cdsStart, cdsEnd, proteinStart, proteinEnd, exonStart,\r\n                exonEnd, intronStart, intronEnd, startIndex, endIndex);\r\n        }\r\n\r\n        private static TranscriptPositionalEffect GetPositionalEffect(ITranscript transcript, ISimpleVariant variant,\r\n            IMappedPosition position, string refAminoAcid, string altAminoAcid, int coveredCdnaStart,\r\n            int coveredCdnaEnd, int coveredCdsStart, int coveredCdsEnd)\r\n        {\r\n            bool startCodonInsertionWithNoImpact = variant.Type == VariantType.insertion &&\r\n                                                  position.ProteinStart <= 1 &&\r\n                                                  altAminoAcid.EndsWith(refAminoAcid);\r\n\r\n            var positionalEffect = new TranscriptPositionalEffect();\r\n            positionalEffect.DetermineIntronicEffect(transcript.TranscriptRegions, variant, variant.Type);\r\n            positionalEffect.DetermineExonicEffect(transcript, variant, position, coveredCdnaStart, coveredCdnaEnd,\r\n                coveredCdsStart, coveredCdsEnd, variant.AltAllele, startCodonInsertionWithNoImpact);\r\n            return positionalEffect;\r\n        }\r\n\r\n        private static List<ConsequenceTag> GetConsequences(IInterval transcript, bool onReverseStrand, IVariant variant,\r\n            IVariantEffect variantEffect)\r\n        {\r\n            OverlapType overlapType = Intervals.Utilities.GetOverlapType(transcript.Start, transcript.End, variant.Start, variant.End);\r\n            EndpointOverlapType endpointOverlapType = Intervals.Utilities.GetEndpointOverlapType(transcript.Start, transcript.End, variant.Start, variant.End);\r\n            var featureEffect = new FeatureVariantEffects(overlapType, endpointOverlapType, onReverseStrand, variant.Type, variant.IsStructuralVariant);\r\n\r\n            var consequence = new Consequences(variant.Type, variantEffect, featureEffect);\r\n            consequence.DetermineSmallVariantEffects();\r\n            return consequence.GetConsequences();\r\n        }\r\n\r\n        private static (PredictionScore Sift, PredictionScore PolyPhen) GetPredictionScores(IMappedPosition position,\r\n            string refAminoAcid, string altAminoAcid, IPredictionCache siftCache, IPredictionCache polyphenCache,\r\n            int siftIndex, int polyphenIndex)\r\n        {\r\n            if (!NeedPredictionScore(position.ProteinStart, position.ProteinEnd, refAminoAcid, altAminoAcid) ||\r\n                position.ProteinStart == -1) return (null, null);\r\n\r\n            var newAminoAcid  = altAminoAcid[0];\r\n            var siftScore     = GetPredictionScore(position.ProteinStart, newAminoAcid, siftCache, siftIndex);\r\n            var polyphenScore = GetPredictionScore(position.ProteinStart, newAminoAcid, polyphenCache, polyphenIndex);\r\n            return (siftScore, polyphenScore);\r\n        }\r\n\r\n        private static bool NeedPredictionScore(int proteinStart, int proteinEnd, string referenceAminoAcids,\r\n            string alternateAminoAcids)\r\n        {\r\n            return proteinStart != -1 &&\r\n                   proteinEnd != -1 &&\r\n                   proteinStart == proteinEnd &&\r\n                   referenceAminoAcids.Length == 1 &&\r\n                   alternateAminoAcids.Length == 1 &&\r\n                   referenceAminoAcids != alternateAminoAcids;\r\n        }\r\n\r\n        private static PredictionScore GetPredictionScore(int proteinPosition, char newAminoAcid,\r\n            IPredictionCache predictionCache, int predictionIndex)\r\n        {\r\n            return predictionIndex == -1\r\n                ? null\r\n                : predictionCache?.GetProteinFunctionPrediction(predictionIndex, newAminoAcid, proteinPosition);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/TranscriptAnnotation/ReducedTranscriptAnnotator.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing Intervals;\r\nusing VariantAnnotation.AnnotatedPositions.Consequence;\r\nusing VariantAnnotation.AnnotatedPositions.Transcript;\r\nusing VariantAnnotation.Caches.DataStructures;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.Pools;\r\nusing Variants;\r\n\r\nnamespace VariantAnnotation.TranscriptAnnotation\r\n{\r\n    public static class ReducedTranscriptAnnotator\r\n    {\r\n        public static IAnnotatedTranscript GetAnnotatedTranscript(ITranscript transcript, IVariant variant)\r\n        {\r\n            bool completeOverlap = variant.Contains(transcript);\r\n            var  mappedPosition  = completeOverlap ? null : GetMappedPosition(transcript.TranscriptRegions, variant);\r\n\r\n            List<ConsequenceTag> consequences = GetConsequences(transcript, transcript.Gene.OnReverseStrand, variant);\r\n\r\n            return AnnotatedTranscriptPool.Get(transcript, null, null, null, null, mappedPosition, null, null, null, null,\r\n                consequences, completeOverlap);\r\n        }\r\n\r\n        private static IMappedPosition GetMappedPosition(ITranscriptRegion[] regions, IInterval variant)\r\n        {\r\n            (int startIndex, _) = MappedPositionUtilities.FindRegion(regions, variant.Start);\r\n            (int endIndex, _)   = MappedPositionUtilities.FindRegion(regions, variant.End);\r\n\r\n            (int exonStart, int exonEnd, int intronStart, int intronEnd) = regions.GetExonsAndIntrons(startIndex, endIndex);\r\n\r\n            return new MappedPosition(-1, -1, -1, -1, -1, -1, exonStart, exonEnd, intronStart, intronEnd, startIndex,\r\n                endIndex);\r\n        }\r\n\r\n        private static List<ConsequenceTag> GetConsequences(IInterval transcript, bool onReverseStrand, IVariant variant)\r\n        {\r\n            OverlapType overlapType = Intervals.Utilities.GetOverlapType(transcript.Start, transcript.End, variant.Start, variant.End);\r\n            EndpointOverlapType endpointOverlapType = Intervals.Utilities.GetEndpointOverlapType(transcript.Start, transcript.End, variant.Start, variant.End);\r\n            var featureEffect = new FeatureVariantEffects(overlapType, endpointOverlapType, onReverseStrand, variant.Type, true);\r\n            var consequence   = new Consequences(variant.Type, null, featureEffect);\r\n            consequence.DetermineStructuralVariantEffect(variant);\r\n            return consequence.GetConsequences();\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/TranscriptAnnotation/RohTranscriptAnnotator.cs",
    "content": "﻿using VariantAnnotation.Interface.AnnotatedPositions;\nusing VariantAnnotation.Pools;\n\nnamespace VariantAnnotation.TranscriptAnnotation\n{\n    public static class RohTranscriptAnnotator\n    {\n        public static IAnnotatedTranscript GetAnnotatedTranscript(ITranscript transcript)\n        {\n            return transcript.IsCanonical ? AnnotatedTranscriptPool.Get(transcript, null, null, null, null, null, null, null, null, null,\n                null, null) : null;\n        }\n    }\n}"
  },
  {
    "path": "VariantAnnotation/TranscriptAnnotation/SequenceChange.cs",
    "content": "﻿namespace VariantAnnotation.TranscriptAnnotation\r\n{\r\n    public struct SequenceChange\r\n    {\r\n        public readonly string Reference;\r\n        public readonly string Alternate;\r\n\r\n        public SequenceChange(string reference, string alternate)\r\n        {\r\n            Reference = reference;\r\n            Alternate = alternate;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/TranscriptAnnotation/TranscriptAnnotationFactory.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing Genome;\r\nusing Intervals;\r\nusing VariantAnnotation.AnnotatedPositions.Transcript;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.Interface.Caches;\r\nusing Variants;\r\n\r\nnamespace VariantAnnotation.TranscriptAnnotation\r\n{\r\n    public static class TranscriptAnnotationFactory\r\n    {\r\n        private static readonly AminoAcids AminoAcidsProvider     = new AminoAcids(false);\r\n        private static readonly AminoAcids MitoAminoAcidsProvider = new AminoAcids(true);\r\n\r\n        public static IList<IAnnotatedTranscript> GetAnnotatedTranscripts(IVariant variant,\r\n            ITranscript[] transcriptCandidates, ISequence compressedSequence, IPredictionCache siftCache,\r\n            IPredictionCache polyphenCache)\r\n        {\r\n            var annotatedTranscripts = new List<IAnnotatedTranscript>();\r\n\r\n            foreach (var transcript in transcriptCandidates)\r\n            {\r\n                var annotationStatus = DecideAnnotationStatus(variant, transcript, variant.Behavior,\r\n                    variant.Chromosome.FlankingLength);\r\n\r\n                var annotatedTranscript = GetAnnotatedTranscript(variant, compressedSequence, transcript,\r\n                    annotationStatus, siftCache, polyphenCache);\r\n\r\n                if (annotatedTranscript != null) annotatedTranscripts.Add(annotatedTranscript);\r\n            }\r\n\r\n            return annotatedTranscripts;\r\n        }\r\n\r\n        private static IAnnotatedTranscript GetAnnotatedTranscript(IVariant variant, ISequence compressedSequence,\r\n            ITranscript transcript, Status annotationStatus, IPredictionCache siftCache, IPredictionCache polyphenCache)\r\n        {\r\n            IAnnotatedTranscript annotatedTranscript = null;\r\n\r\n            // ReSharper disable once SwitchStatementMissingSomeCases\r\n            switch (annotationStatus)\r\n            {\r\n                case Status.FlankingAnnotation:\r\n                    annotatedTranscript = FlankingTranscriptAnnotator.GetAnnotatedTranscript(variant.End, transcript);\r\n                    break;\r\n                case Status.ReducedAnnotation:\r\n                    annotatedTranscript = ReducedTranscriptAnnotator.GetAnnotatedTranscript(transcript, variant);\r\n                    break;\r\n                case Status.RohAnnotation:\r\n                    annotatedTranscript = RohTranscriptAnnotator.GetAnnotatedTranscript(transcript);\r\n                    break;\r\n                case Status.FullAnnotation:\r\n                    var acidsProvider = variant.Chromosome.UcscName == \"chrM\"\r\n                        ? MitoAminoAcidsProvider\r\n                        : AminoAcidsProvider;\r\n                    annotatedTranscript = FullTranscriptAnnotator.GetAnnotatedTranscript(transcript, variant,\r\n                        compressedSequence, siftCache, polyphenCache, acidsProvider);\r\n                    break;\r\n            }\r\n\r\n            return annotatedTranscript;\r\n        }\r\n\r\n        internal static Status DecideAnnotationStatus(IInterval variant, IInterval transcript, AnnotationBehavior behavior, int flankingLength)\r\n        {\r\n            bool overlapsTranscript = variant.Overlaps(transcript);\r\n            \r\n            if (!behavior.ReducedTranscriptAnnotation)\r\n            {\r\n                // handle small variants\r\n                if (overlapsTranscript) return Status.FullAnnotation;\r\n                if (behavior.NeedFlankingTranscripts && variant.Overlaps(transcript, flankingLength)) return Status.FlankingAnnotation;\r\n            }\r\n            else if (overlapsTranscript)\r\n            {\r\n                // handle large variants\r\n                if (behavior.CanonicalTranscriptOnly) return Status.RohAnnotation;\r\n                return Status.ReducedAnnotation;\r\n            }\r\n\r\n            return Status.NoAnnotation;\r\n        }\r\n\r\n        public enum Status\r\n        {\r\n            NoAnnotation,\r\n            FlankingAnnotation,\r\n            ReducedAnnotation,\r\n            FullAnnotation,\r\n            RohAnnotation\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/Utilities/BaseFormatting.cs",
    "content": "﻿namespace VariantAnnotation.Utilities\r\n{\r\n    public static class BaseFormatting\r\n    {\r\n        public static string EmptyToDash(string bases) => bases == \"\" ? \"-\" : bases;\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/Utilities/Date.cs",
    "content": "﻿using System;\r\n\r\nnamespace VariantAnnotation.Utilities\r\n{\r\n    public static class Date\r\n    {\r\n        public static string CurrentTimeStamp => DateTime.Now.ToString(\"yyyy-MM-dd HH:mm:ss\");\r\n        public static string GetDate(long ticks) => new DateTime(ticks).ToString(\"yyyy-MM-dd\");\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/Utilities/FormatUtilities.cs",
    "content": "﻿namespace VariantAnnotation.Utilities\r\n{\r\n\tpublic static class FormatUtilities\r\n\t{\r\n        public static (string Id, byte Version) SplitVersion(string s)\r\n\t\t{\r\n\t\t    if (s == null) return (null, 0);\r\n\r\n\t\t\tint lastPeriodPos = s.LastIndexOf('.');\r\n\t\t    if (lastPeriodPos == -1) return (s, 0);\r\n\r\n            string id        = s.Substring(0, lastPeriodPos);\r\n            string remaining = s.Substring(lastPeriodPos + 1);\r\n\r\n            return !byte.TryParse(remaining, out byte version) ? (s, (byte)1) : (id, version);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/Utilities/GeneComparer.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\n\r\nnamespace VariantAnnotation.Utilities\r\n{\r\n    public sealed class GeneComparer : EqualityComparer<IGene>\r\n    {\r\n        public override bool Equals(IGene x, IGene y)\r\n        {\r\n            return x.Start == y.Start &&\r\n                   x.End == y.End &&\r\n                   x.Chromosome.Index == y.Chromosome.Index &&\r\n                   x.OnReverseStrand == y.OnReverseStrand &&\r\n                   x.Symbol == y.Symbol &&\r\n                   x.EntrezGeneId.WithVersion == y.EntrezGeneId.WithVersion &&\r\n                   x.EnsemblId.WithVersion == y.EnsemblId.WithVersion &&\r\n                   x.HgncId == y.HgncId;\r\n        }\r\n\r\n        public override int GetHashCode(IGene obj)\r\n        {\r\n            string entrezGeneId = obj.EntrezGeneId.WithVersion;\r\n            string ensemblId    = obj.EnsemblId.WithVersion;\r\n\r\n            unchecked\r\n            {\r\n                int hashCode = obj.Start;\r\n                hashCode = (hashCode * 397) ^ obj.End;\r\n                hashCode = (hashCode * 397) ^ obj.Chromosome.Index;\r\n                hashCode = (hashCode * 397) ^ obj.OnReverseStrand.GetHashCode();\r\n                hashCode = (hashCode * 397) ^ obj.Symbol.GetHashCode();\r\n                if (entrezGeneId != null) hashCode = (hashCode * 397) ^ entrezGeneId.GetHashCode();\r\n                if (ensemblId != null) hashCode = (hashCode * 397) ^ ensemblId.GetHashCode();\r\n                hashCode = (hashCode * 397) ^ obj.HgncId;\r\n                return hashCode;\r\n            }\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation/VariantAnnotation.csproj",
    "content": "﻿<Project Sdk=\"Microsoft.NET.Sdk\">\r\n  <PropertyGroup>\r\n    <TargetFramework>net6.0</TargetFramework>\r\n    <OutputPath>..\\bin\\$(Configuration)</OutputPath>\r\n    \r\n  </PropertyGroup>\r\n  <ItemGroup>\r\n    <ProjectReference Include=\"..\\CommandLine\\CommandLine.csproj\" />\r\n    <ProjectReference Include=\"..\\Compression\\Compression.csproj\" />\r\n    <ProjectReference Include=\"..\\ErrorHandling\\ErrorHandling.csproj\" />\r\n    <ProjectReference Include=\"..\\Intervals\\Intervals.csproj\" />\r\n    <ProjectReference Include=\"..\\IO\\IO.csproj\" />\r\n    <ProjectReference Include=\"..\\Jasix\\Jasix.csproj\" />\r\n    <ProjectReference Include=\"..\\OptimizedCore\\OptimizedCore.csproj\" />\r\n    <ProjectReference Include=\"..\\ReferenceSequence\\ReferenceSequence.csproj\" />\r\n    <ProjectReference Include=\"..\\RepeatExpansions\\RepeatExpansions.csproj\" />\r\n    <ProjectReference Include=\"..\\VariantAnnotation.Interface\\VariantAnnotation.Interface.csproj\" />\r\n    <ProjectReference Include=\"..\\Variants\\Variants.csproj\" />\r\n  </ItemGroup>\r\n  <ItemGroup>\r\n    <PackageReference Include=\"Microsoft.Composition\" Version=\"1.0.31\" />\r\n    <PackageReference Include=\"Microsoft.Extensions.ObjectPool\" Version=\"6.0.2\" />\r\n  </ItemGroup>\r\n  <Import Project=\"..\\CommonAssemblyInfo.props\" />\r\n</Project>\r\n"
  },
  {
    "path": "VariantAnnotation.Interface/AnnotatedPositions/BioType.cs",
    "content": "﻿namespace VariantAnnotation.Interface.AnnotatedPositions\r\n{\r\n    public enum BioType : byte\r\n    {\r\n        // ReSharper disable InconsistentNaming\r\n        aligned_transcript,\r\n        ambiguous_orf,\r\n        antisense,\r\n        antisense_RNA,\r\n        bidirectional_promoter_lncRNA,\r\n        guide_RNA,\r\n        IG_pseudogene,\r\n        IG_C_gene,\r\n        IG_C_pseudogene,\r\n        IG_D_gene,\r\n        IG_J_gene,\r\n        IG_J_pseudogene,\r\n        IG_V_gene,\r\n        IG_V_pseudogene,\r\n        lincRNA,\r\n        lncRNA,\r\n        macro_lncRNA,\r\n        mRNA,\r\n        miRNA,\r\n        Mt_rRNA,\r\n        Mt_tRNA,\r\n        non_coding,\r\n        nonsense_mediated_decay,\r\n        non_stop_decay,\r\n        other,\r\n        polymorphic_pseudogene,\r\n        processed_pseudogene,\r\n        processed_transcript,\r\n        protein_coding,\r\n        pseudogene,\r\n        retained_intron,\r\n        retrotransposed,\r\n        RNase_MRP_RNA,\r\n        RNase_P_RNA,\r\n        rRNA,\r\n        ribozyme,\r\n        misc_RNA,\r\n        sRNA,\r\n        scRNA,\r\n        scaRNA,\r\n        sense_intronic,\r\n        sense_overlapping,\r\n        SRP_RNA,\r\n        snoRNA,\r\n        snRNA,\r\n        telomerase_RNA,\r\n        three_prime_overlapping_ncRNA,\r\n        transcribed_processed_pseudogene,\r\n        transcribed_unitary_pseudogene,\r\n        transcribed_unprocessed_pseudogene,\r\n        TEC,\r\n        translated_processed_pseudogene,\r\n        translated_unprocessed_pseudogene,\r\n        TR_C_gene,\r\n        TR_D_gene,\r\n        TR_J_gene,\r\n        TR_J_pseudogene,\r\n        tRNA,\r\n        TR_V_gene,\r\n        TR_V_pseudogene,\r\n        unitary_pseudogene,\r\n        unprocessed_pseudogene,\r\n        vaultRNA,\r\n        Y_RNA\r\n        // ReSharper restore InconsistentNaming\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation.Interface/AnnotatedPositions/ConsequenceTag.cs",
    "content": "﻿namespace VariantAnnotation.Interface.AnnotatedPositions\r\n{\r\n    public enum ConsequenceTag : byte\r\n    {\r\n        // ReSharper disable InconsistentNaming\r\n        coding_sequence_variant,\r\n        copy_number_increase,\r\n        copy_number_decrease,\r\n        copy_number_change,\r\n        downstream_gene_variant,\r\n        feature_elongation,\r\n        five_prime_duplicated_transcript,\r\n        five_prime_UTR_variant,\r\n        frameshift_variant,\r\n        incomplete_terminal_codon_variant,\r\n        inframe_deletion,\r\n        inframe_insertion,\r\n        start_lost,\r\n        start_retained_variant,\r\n        intron_variant,\r\n        missense_variant,\r\n        mature_miRNA_variant,\r\n        non_coding_transcript_exon_variant,\r\n        non_coding_transcript_variant,\r\n        NMD_transcript_variant,\r\n        protein_altering_variant,\r\n        regulatory_region_variant,\r\n        regulatory_region_ablation,\r\n        regulatory_region_amplification,\r\n        splice_acceptor_variant,\r\n        splice_donor_variant,\r\n        splice_region_variant,\r\n        stop_gained,\r\n        stop_lost,\r\n        stop_retained_variant,\r\n        synonymous_variant,\r\n        three_prime_duplicated_transcript,\r\n        three_prime_UTR_variant,\r\n        transcript_amplification,\r\n        transcript_ablation,\r\n        feature_truncation,\r\n        upstream_gene_variant,\r\n        short_tandem_repeat_change,\r\n        short_tandem_repeat_expansion,\r\n        short_tandem_repeat_contraction,\r\n        transcript_variant,\r\n        unidirectional_gene_fusion\r\n        // ReSharper restore InconsistentNaming\r\n    }\r\n\r\n    public static class ConsequenceUtil\r\n    {\r\n        public static string GetConsequence(ConsequenceTag consequence)\r\n        {\r\n            if (consequence == ConsequenceTag.five_prime_UTR_variant) return \"5_prime_UTR_variant\";\r\n            return consequence == ConsequenceTag.three_prime_UTR_variant ? \"3_prime_UTR_variant\" : consequence.ToString();\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation.Interface/AnnotatedPositions/IAnnotatedGeneFusion.cs",
    "content": "﻿// ReSharper disable InconsistentNaming\n\nusing VariantAnnotation.Interface.IO;\n\nnamespace VariantAnnotation.Interface.AnnotatedPositions\n{\n    public interface IAnnotatedGeneFusion : IGeneFusionPair, IJsonSerializer\n    {\n        ITranscript transcript      { get; }\n        int?        exon            { get; }\n        int?        intron          { get; }\n        string      hgvsr           { get; }\n        bool        isInFrame       { get; }\n    }\n}"
  },
  {
    "path": "VariantAnnotation.Interface/AnnotatedPositions/IAnnotatedPosition.cs",
    "content": "﻿using System.Collections.Generic;\nusing System.Text;\nusing VariantAnnotation.Interface.Positions;\nusing VariantAnnotation.Interface.SA;\n\nnamespace VariantAnnotation.Interface.AnnotatedPositions\n{\n\tpublic interface IAnnotatedPosition\n\t{\n\t\tIPosition                       Position               { get; }\n\t\tstring                          CytogeneticBand        { get; set; }\n\t\tIAnnotatedVariant[]             AnnotatedVariants      { get; }\n\t\tIList<ISupplementaryAnnotation> SupplementaryIntervals { get; }\n\t    StringBuilder                   GetJsonStringBuilder();\n\t}\n}"
  },
  {
    "path": "VariantAnnotation.Interface/AnnotatedPositions/IAnnotatedRegulatoryRegion.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing VariantAnnotation.Interface.IO;\r\n\r\nnamespace VariantAnnotation.Interface.AnnotatedPositions\r\n{\r\n    public interface IAnnotatedRegulatoryRegion:IJsonSerializer\r\n    {\r\n        IRegulatoryRegion RegulatoryRegion { get; }\r\n        IEnumerable<ConsequenceTag> Consequences { get; }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation.Interface/AnnotatedPositions/IAnnotatedTranscript.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing VariantAnnotation.Interface.IO;\r\n\r\nnamespace VariantAnnotation.Interface.AnnotatedPositions\r\n{\r\n    public interface IAnnotatedTranscript : IJsonSerializer\r\n    {\r\n        ITranscript          Transcript          { get; }\r\n        string               ReferenceAminoAcids { get; }\r\n        string               AlternateAminoAcids { get; }\r\n        string               ReferenceCodons     { get; }\r\n        string               AlternateCodons     { get; }\r\n        IMappedPosition      MappedPosition      { get; }\r\n        string               HgvsCoding          { get; }\r\n        string               HgvsProtein         { get; }\r\n        PredictionScore      Sift                { get; }\r\n        PredictionScore      PolyPhen            { get; }\r\n        List<ConsequenceTag> Consequences        { get; }\r\n        bool?                CompleteOverlap     { get; }\r\n        List<double>         ConservationScores  { get; set; }\r\n\r\n        void AddGeneFusions(IAnnotatedGeneFusion[] geneFusions);\r\n        void AddGeneFusionPairs(HashSet<IGeneFusionPair> geneKeys);\r\n\r\n        public void Initialize(ITranscript transcript, string referenceAminoAcids, string alternateAminoAcids,\r\n            string referenceCodons, string alternateCodons, IMappedPosition mappedPosition, string hgvsCoding,\r\n            string hgvsProtein, PredictionScore sift, PredictionScore polyphen,\r\n            List<ConsequenceTag> consequences, bool? completeOverlap);\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation.Interface/AnnotatedPositions/IAnnotatedVariant.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.Text;\r\nusing VariantAnnotation.Interface.SA;\r\nusing Variants;\r\n\r\nnamespace VariantAnnotation.Interface.AnnotatedPositions\r\n{\r\n\tpublic interface IAnnotatedVariant\r\n\t{\r\n\t\tIVariant                          Variant                   { get; }\r\n        string                            HgvsgNotation             { get; set; }\r\n        IList<IAnnotatedRegulatoryRegion> RegulatoryRegions         { get;  }\r\n\t    IList<IAnnotatedTranscript>       Transcripts               { get; }\r\n        IList<ISupplementaryAnnotation>   SaList                    { get; }\r\n        ISupplementaryAnnotation          RepeatExpansionPhenotypes { get; set; }\r\n\t\tdouble?                           PhylopScore               { get; set; }\r\n\t\tdouble?                           GerpScore               { get; set; }\r\n\t\tbool                              InLowComplexityRegion     { get; set; }\r\n        StringBuilder                     GetJsonStringBuilder(string originalChromName);\r\n    }\t\r\n}"
  },
  {
    "path": "VariantAnnotation.Interface/AnnotatedPositions/ICodingRegion.cs",
    "content": "﻿using Intervals;\r\nusing IO;\r\n\r\nnamespace VariantAnnotation.Interface.AnnotatedPositions\r\n{\r\n    public interface ICodingRegion : IInterval, ISerializable\r\n    {\r\n        int CdnaStart { get; }\r\n        int CdnaEnd { get; }\r\n        int Length { get; }\r\n    }\r\n}\r\n"
  },
  {
    "path": "VariantAnnotation.Interface/AnnotatedPositions/ICompactId.cs",
    "content": "﻿using IO;\r\n\r\nnamespace VariantAnnotation.Interface.AnnotatedPositions\r\n{\r\n    public interface ICompactId : ISerializable\r\n    {\r\n        bool IsEmpty();\r\n        string WithVersion { get; }\r\n        string WithoutVersion { get; }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation.Interface/AnnotatedPositions/IFeatureVariantEffects.cs",
    "content": "﻿namespace VariantAnnotation.Interface.AnnotatedPositions\r\n{\r\n    public interface IFeatureVariantEffects\r\n    {\r\n        bool Ablation();\r\n        bool Amplification();\r\n        bool Truncation();\r\n        bool Elongation();\r\n        bool FivePrimeDuplicatedTranscript();\r\n        bool ThreePrimeDuplicatedTranscript();\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation.Interface/AnnotatedPositions/IGene.cs",
    "content": "﻿using Genome;\r\nusing IO;\r\n\r\nnamespace VariantAnnotation.Interface.AnnotatedPositions\r\n{\r\n    public interface IGene : IChromosomeInterval, ISerializable\r\n    {\r\n        bool OnReverseStrand { get; }\r\n        string Symbol { get; }\r\n        ICompactId EntrezGeneId { get; }\r\n        ICompactId EnsemblId { get; }\r\n        int HgncId { get; }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation.Interface/AnnotatedPositions/IGeneFusion.cs",
    "content": "﻿using VariantAnnotation.Interface.IO;\n\nnamespace VariantAnnotation.Interface.AnnotatedPositions\n{\n    public interface IGeneFusion : IJsonSerializer\n    {\n        int?   Exon       { get; }\n        int?   Intron     { get; }\n        string HgvsCoding { get; }\n    }\n}"
  },
  {
    "path": "VariantAnnotation.Interface/AnnotatedPositions/IGeneFusionPair.cs",
    "content": "﻿namespace VariantAnnotation.Interface.AnnotatedPositions\n{\n    public interface IGeneFusionPair\n    {\n        ulong  FusionKey        { get; }\n        string FirstGeneSymbol  { get; }\n        uint   FirstGeneKey     { get; }\n        string SecondGeneSymbol { get; }\n        uint   SecondGeneKey    { get; }\n    }\n}"
  },
  {
    "path": "VariantAnnotation.Interface/AnnotatedPositions/IMappedPosition.cs",
    "content": "﻿namespace VariantAnnotation.Interface.AnnotatedPositions\r\n{\r\n    public interface IMappedPosition\r\n    {\r\n        int ProteinStart { get; set; }\r\n        int ProteinEnd { get; set; }\r\n        int CdsStart { get; }\r\n        int CdsEnd { get; }\r\n        int CdnaStart { get; }\r\n        int CdnaEnd { get; }\r\n        int ExonStart { get; }\r\n        int ExonEnd { get; }\r\n        int IntronStart { get; }\r\n        int IntronEnd { get; }\r\n        int RegionStartIndex { get; }\r\n        int RegionEndIndex { get; }\r\n        int CoveredProteinStart { get; set; }\r\n        int CoveredProteinEnd { get; set; }\r\n        int CoveredCdsStart { get; set; }\r\n        int CoveredCdsEnd { get; set; }\r\n        int CoveredCdnaStart { get; set; }\r\n        int CoveredCdnaEnd { get; set; }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation.Interface/AnnotatedPositions/IRegulatoryRegion.cs",
    "content": "﻿using Genome;\r\nusing IO;\r\nusing VariantAnnotation.Interface.Caches;\r\n\r\nnamespace VariantAnnotation.Interface.AnnotatedPositions\r\n{\r\n    public interface IRegulatoryRegion : IChromosomeInterval, ISerializable\r\n    {\r\n        ICompactId Id { get; }\r\n        RegulatoryRegionType Type { get; }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation.Interface/AnnotatedPositions/IRnaEdit.cs",
    "content": "﻿using System;\r\nusing Intervals;\r\nusing IO;\r\nusing Variants;\r\n\r\nnamespace VariantAnnotation.Interface.AnnotatedPositions\r\n{\r\n    public interface IRnaEdit : IInterval, ISerializable, IComparable<IRnaEdit>\r\n    {\r\n        string Bases { get; }\r\n        VariantType Type { get; set; }\r\n    }\r\n}\r\n"
  },
  {
    "path": "VariantAnnotation.Interface/AnnotatedPositions/ITranscript.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing Genome;\r\nusing Intervals;\r\nusing IO;\r\n\r\nnamespace VariantAnnotation.Interface.AnnotatedPositions\r\n{\r\n    public interface ITranscript : IChromosomeInterval\r\n    {\r\n        ICompactId Id { get; }\r\n        BioType BioType { get; }\r\n        bool IsCanonical { get; }\r\n        Source Source { get; }\r\n\r\n        IGene Gene { get; }\r\n        ITranscriptRegion[] TranscriptRegions { get; }\r\n        ushort NumExons { get; }\r\n        int TotalExonLength { get; }\r\n        byte StartExonPhase { get; }\r\n        int SiftIndex { get; }\r\n        int PolyPhenIndex { get; }\r\n\r\n        ITranslation Translation { get; }\r\n        IInterval[] MicroRnas { get; }\r\n        int[] Selenocysteines { get; }\r\n        IRnaEdit[] RnaEdits { get; }\r\n\r\n        bool      CdsStartNotFound { get; }\r\n        bool      CdsEndNotFound   { get; }\r\n        ISequence CodingSequence   { get; set; }\r\n        ISequence CdnaSequence     { get; set; }\r\n\r\n        void Write(IExtendedBinaryWriter writer, Dictionary<IGene, int> geneIndices,\r\n            Dictionary<ITranscriptRegion, int> transcriptRegionIndices, Dictionary<IInterval, int> microRnaIndices,\r\n            Dictionary<string, int> peptideIndices);\r\n    }\r\n\r\n    public enum Source : byte\r\n    {\r\n        None,\r\n        RefSeq,\r\n        Ensembl,\r\n        BothRefSeqAndEnsembl\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation.Interface/AnnotatedPositions/ITranscriptRegion.cs",
    "content": "﻿using Intervals;\r\nusing IO;\r\n\r\nnamespace VariantAnnotation.Interface.AnnotatedPositions\r\n{\r\n    public interface ITranscriptRegion : IInterval, ISerializable\r\n    {\r\n        TranscriptRegionType Type { get; }\r\n        ushort Id { get; }\r\n        int CdnaStart { get; }\r\n        int CdnaEnd { get; }\r\n    }\r\n\r\n    public enum TranscriptRegionType : byte\r\n    {\r\n        Exon,\r\n        Gap,\r\n        Intron\r\n    }\r\n}\r\n"
  },
  {
    "path": "VariantAnnotation.Interface/AnnotatedPositions/ITranslation.cs",
    "content": "﻿using IO;\r\n\r\nnamespace VariantAnnotation.Interface.AnnotatedPositions\r\n{\r\n    public interface ITranslation\r\n    {\r\n        ICodingRegion CodingRegion { get; }\r\n\t\tICompactId ProteinId { get; }\r\n\t    string PeptideSeq { get; }\r\n        void Write(IExtendedBinaryWriter writer, int peptideIndex);\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation.Interface/AnnotatedPositions/IVariantEffect.cs",
    "content": "﻿namespace VariantAnnotation.Interface.AnnotatedPositions\r\n{\r\n\tpublic interface IVariantEffect\r\n\t{\r\n\t\tbool IsStopLost();\r\n\t\tbool IsStopRetained();\r\n\t\tbool IsStartLost();\r\n\t\tbool IsFrameshiftVariant();\r\n\t    bool IsMatureMirnaVariant();\r\n\t    bool IsSpliceDonorVariant();\r\n\t    bool IsSpliceAcceptorVariant();\r\n\t    bool IsStopGained();\r\n\t    bool IsInframeInsertion();\r\n\t    bool IsInframeDeletion();\r\n\t    bool IsMissenseVariant();\r\n\t    bool IsProteinAlteringVariant();\r\n\t    bool IsSpliceRegionVariant();\r\n\t    bool IsIncompleteTerminalCodonVariant();\r\n\t    bool IsStartRetained();\r\n\t    bool IsSynonymousVariant();\r\n\t    bool IsCodingSequenceVariant();\r\n\t    bool IsFivePrimeUtrVariant();\r\n\t    bool IsThreePrimeUtrVariant();\r\n\t    bool IsNonCodingTranscriptExonVariant();\r\n\t    bool IsWithinIntron();\r\n\t    bool IsNonsenseMediatedDecayTranscriptVariant();\r\n\t    bool IsNonCodingTranscriptVariant();\r\n\t}\r\n}"
  },
  {
    "path": "VariantAnnotation.Interface/AnnotatedPositions/PredictionScore.cs",
    "content": "﻿namespace VariantAnnotation.Interface.AnnotatedPositions\r\n{\r\n    public sealed class PredictionScore\r\n    {\r\n        public readonly double Score;\r\n        public readonly string Prediction;\r\n\r\n        public PredictionScore(string prediction, double score)\r\n        {\r\n            Prediction = prediction;\r\n            Score      = score;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation.Interface/Caches/IPredictionCache.cs",
    "content": "﻿using VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.Interface.Providers;\r\n\r\nnamespace VariantAnnotation.Interface.Caches\r\n{\r\n    public interface IPredictionCache : IProvider\r\n    {\r\n        PredictionScore GetProteinFunctionPrediction(int predictionIndex, char newAminoAcid, int aaPosition);\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation.Interface/Caches/ITranscriptCache.cs",
    "content": "﻿using Intervals;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\nusing VariantAnnotation.Interface.Providers;\r\n\r\nnamespace VariantAnnotation.Interface.Caches\r\n{\r\n    public interface ITranscriptCache : IProvider\r\n    {\r\n        IIntervalForest<ITranscript> TranscriptIntervalForest { get; }\r\n        IIntervalForest<IRegulatoryRegion> RegulatoryIntervalForest { get; }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation.Interface/Caches/RegulatoryRegionType.cs",
    "content": "﻿namespace VariantAnnotation.Interface.Caches\r\n{\r\n\tpublic enum RegulatoryRegionType : byte\r\n\t{\r\n\t\t// ReSharper disable InconsistentNaming\r\n\t\tCTCF_binding_site,\r\n\t\tenhancer,\r\n\t\topen_chromatin_region,\r\n\t\tpromoter,\r\n\t\tpromoter_flanking_region,\r\n\t\tTF_binding_site,\r\n\t\tmature_protein_region\r\n\t\t// ReSharper restore InconsistentNaming\r\n\t}\r\n}"
  },
  {
    "path": "VariantAnnotation.Interface/Constants.cs",
    "content": "﻿namespace VariantAnnotation.Interface\r\n{\r\n\tpublic static class Constants\r\n\t{\r\n\t\tpublic const string Authors = \"Stromberg, Roy, Platzer, Siddiqui, Ouyang, et al\";\r\n\t}\r\n}"
  },
  {
    "path": "VariantAnnotation.Interface/GeneAnnotation/IGeneAnnotationProvider.cs",
    "content": "﻿using System;\r\nusing VariantAnnotation.Interface.Providers;\r\n\r\nnamespace VariantAnnotation.Interface.GeneAnnotation\r\n{\r\n    public interface IGeneAnnotationProvider : IProvider, IDisposable\r\n    {\r\n        string Annotate(string geneName);\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation.Interface/IAnnotationResources.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing VariantAnnotation.Interface.GeneAnnotation;\r\nusing VariantAnnotation.Interface.Positions;\r\nusing VariantAnnotation.Interface.Providers;\r\n\r\nnamespace VariantAnnotation.Interface\r\n{\r\n    public interface IAnnotationResources : IDisposable\r\n    {\r\n        ISequenceProvider SequenceProvider { get; }\r\n        ITranscriptAnnotationProvider TranscriptAnnotationProvider { get; }\r\n        IAnnotationProvider SaProvider { get; }\r\n        IAnnotationProvider ConservationProvider { get; }\r\n        IRefMinorProvider RefMinorProvider { get; }\r\n        IGeneAnnotationProvider GeneAnnotationProvider { get; }\r\n        IMitoHeteroplasmyProvider MitoHeteroplasmyProvider { get; }\r\n        IAnnotator Annotator { get; }\r\n        IVariantIdCreator VidCreator { get; }\r\n        List<IDataSourceVersion> DataSourceVersions { get; }\r\n        string VepDataVersion { get; }\r\n        string AnnotatorVersionTag { get; set; }\r\n        bool ForceMitochondrialAnnotation { get; }\r\n        long InputStartVirtualPosition { get; }\r\n        void SingleVariantPreLoad(IPosition position);\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation.Interface/IAnnotator.cs",
    "content": "﻿using System.Collections.Generic;\nusing Genome;\nusing VariantAnnotation.Interface.AnnotatedPositions;\nusing VariantAnnotation.Interface.Positions;\n\nnamespace VariantAnnotation.Interface\n{\n\tpublic interface IAnnotator\n\t{\n\t\tGenomeAssembly Assembly { get; }\n\t\tIAnnotatedPosition Annotate(IPosition position);\n\t\tIEnumerable<string> GetGeneAnnotations();\n\t\tvoid EnableMitochondrialAnnotation();\n\t}\n}"
  },
  {
    "path": "VariantAnnotation.Interface/IO/IJsonSerializer.cs",
    "content": "﻿using System.Text;\r\n\r\nnamespace VariantAnnotation.Interface.IO\r\n{\r\n\tpublic interface IJsonSerializer\r\n\t{\r\n\t\tvoid SerializeJson(StringBuilder sb);\r\n\t}\r\n}"
  },
  {
    "path": "VariantAnnotation.Interface/IO/IJsonWriter.cs",
    "content": "﻿using System;\r\nusing VariantAnnotation.Interface.Positions;\r\n\r\nnamespace VariantAnnotation.Interface.IO\r\n{\r\n    public interface IJsonWriter : IDisposable\r\n    {\r\n        void WritePosition(IPosition position, string entry);\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation.Interface/IO/IVcfReader.cs",
    "content": "﻿using System;\r\n\r\nnamespace VariantAnnotation.Interface.IO\r\n{\r\n    public interface IVcfReader : IDisposable\r\n    {\r\n\t\tbool IsRcrsMitochondrion { get; }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation.Interface/IO/VcfCommon.cs",
    "content": "﻿using System.Collections.Generic;\r\n\r\nnamespace VariantAnnotation.Interface.IO\r\n{\r\n    public static class VcfCommon\r\n    {\r\n        public const string ChromosomeHeader = \"#CHROM\";\r\n        public const string GatkNonRefAllele = \"<NON_REF>\";\r\n        private const string MissingValue    = \".\";\r\n\r\n        public const int MinNumColumnsSampleGenotypes = 10;\r\n\r\n        // define the column names\r\n        public const int ChromIndex    = 0;\r\n        public const int PosIndex      = 1;\r\n        public const int IdIndex       = 2;\r\n        public const int RefIndex      = 3;\r\n        public const int AltIndex      = 4;\r\n        public const int QualIndex     = 5;\r\n        public const int FilterIndex   = 6;\r\n        public const int InfoIndex     = 7;\r\n        public const int FormatIndex   = 8;\r\n        public const int GenotypeIndex = 9;\r\n\r\n        private static readonly HashSet<string> NonInformativeAltAllele =\r\n            new HashSet<string> {\"<*>\", \"*\", \"<M>\", GatkNonRefAllele};\r\n\r\n        public static readonly HashSet<string> ReferenceAltAllele =\r\n            new HashSet<string> {MissingValue, GatkNonRefAllele};\r\n\r\n        public static bool IsNonInformativeAltAllele(string altAllele) => NonInformativeAltAllele.Contains(altAllele);\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation.Interface/IVariantIdCreator.cs",
    "content": "﻿using Genome;\n\nnamespace VariantAnnotation.Interface\n{\n    public interface IVariantIdCreator\n    {\n        string Create(ISequence sequence, VariantCategory category, string svType, Chromosome chromosome, int start, int end, string refAllele,\n            string altAllele, string repeatUnit);\n\n        (int Start, string RefAllele, string AltAllele) Normalize(ISequence sequence, int start, string refAllele, string altAllele);\n    }\n}"
  },
  {
    "path": "VariantAnnotation.Interface/Positions/ICustomFields.cs",
    "content": "using System.Collections.Generic;\nusing VariantAnnotation.Interface.IO;\n\nnamespace VariantAnnotation.Interface.Positions\n{\n    public interface ICustomFields: IJsonSerializer\n    {\n        void Add(string key, string value);\n        void Clear();\n        bool IsEmpty();\n    }\n}"
  },
  {
    "path": "VariantAnnotation.Interface/Positions/IInfoData.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing VariantAnnotation.Interface.IO;\r\n\r\nnamespace VariantAnnotation.Interface.Positions\r\n{\r\n    public interface IInfoData\r\n    {\r\n        int[]                 CiEnd                     { get; }\r\n        int[]                 CiPos                     { get; }\r\n        int?                  End                       { get; }\r\n        double?               RecalibratedQuality       { get; }\r\n        int?                  JointSomaticNormalQuality { get; }\r\n        int?                  RefRepeatCount            { get; }\r\n        string                RepeatUnit                { get; }\r\n        double?               StrandBias                { get; }\r\n        int?                  SvLength                  { get; }\r\n        string                SvType                    { get; }\r\n        double?               FisherStrandBias          { get; }\r\n        double?               MappingQuality            { get; }\r\n        string                BreakendEventId           { get; }\r\n        bool                  IsImprecise               { get; }\r\n        ICustomFields CustomKeyValues{ get; }\r\n        // for old version of Manta, but still required by Encore\r\n        bool    IsInv3 { get; }\r\n        bool    IsInv5 { get; }\r\n        double? LogOddsRatio    { get; }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation.Interface/Positions/IPosition.cs",
    "content": "﻿using Variants;\n\nnamespace VariantAnnotation.Interface.Positions\n{\n    public interface IPosition : ISimplePosition\n    {\n        double? Quality { get; }\n        string[] Filters { get; }\n        IVariant[] Variants { get; }\n        ISample[] Samples { get; }\n        IInfoData InfoData { get; }\n        bool HasStructuralVariant { get; }\n        bool HasShortTandemRepeat { get; }\n    }\n}"
  },
  {
    "path": "VariantAnnotation.Interface/Positions/ISample.cs",
    "content": "﻿namespace VariantAnnotation.Interface.Positions\r\n{\r\n    public interface ISample\r\n    {\r\n        int[]                 AlleleDepths                 { get; }\r\n        float?                ArtifactAdjustedQualityScore { get; } // PEPE\r\n        int?                  CopyNumber                   { get; }\r\n        string[]              DiseaseAffectedStatuses      { get; } // SMN1\r\n        bool                  FailedFilter                 { get; }\r\n        string                Genotype                     { get; }\r\n        int?                  GenotypeQuality              { get; }\r\n        bool                  IsDeNovo                     { get; }\r\n        double?               DeNovoQuality                { get; }\r\n        bool                  IsEmpty                      { get; }\r\n        float?                LikelihoodRatioQualityScore  { get; } // PEPE\r\n        int[]                 PairedEndReadCounts          { get; } // Manta\r\n        int[]                 RepeatUnitCounts             { get; } // ExpansionHunter\r\n        int[]                 SplitReadCounts              { get; } // Manta\r\n        int?                  TotalDepth                   { get; }\r\n        double[]              VariantFrequencies           { get; }\r\n        int?                  MinorHaplotypeCopyNumber     { get; }\r\n        double?               SomaticQuality               { get; }\r\n        bool?                 IsLossOfHeterozygosity       { get; }\r\n        string[]              HeteroplasmyPercentile       { get; }\r\n        int?                  BinCount                     { get; }\r\n        public ICustomFields CustomFields                 { get; }\r\n        \r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation.Interface/Positions/ISimplePosition.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing Genome;\r\n\r\nnamespace VariantAnnotation.Interface.Positions\r\n{\r\n    public interface ISimplePosition : IChromosomeInterval\r\n    {\r\n        string RefAllele { get; }\r\n        string[] AltAlleles { get; }\r\n        string[] VcfFields { get; }\r\n        bool[] IsDecomposed { get; }\r\n        bool IsRecomposed { get; }\r\n        string[] Vids { get; }\r\n        List<string>[] LinkedVids { get; }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation.Interface/Providers/IAnnotationProvider.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing Genome;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\n\r\nnamespace VariantAnnotation.Interface.Providers\r\n{\r\n\tpublic interface IAnnotationProvider : IProvider, IDisposable\r\n    {\r\n\t\tvoid Annotate(IAnnotatedPosition annotatedPosition);\r\n        void PreLoad(Chromosome chromosome, List<int> positions);\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation.Interface/Providers/IDataSourceVersion.cs",
    "content": "﻿using IO;\r\nusing VariantAnnotation.Interface.IO;\r\n\r\nnamespace VariantAnnotation.Interface.Providers\r\n{\r\n    public interface IDataSourceVersion : IJsonSerializer\r\n    {\r\n        string Name             { get; }\r\n        string Description      { get; }\r\n        string Version          { get; }\r\n        long   ReleaseDateTicks { get; }\r\n        void   Write(IExtendedBinaryWriter writer);\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation.Interface/Providers/IMitoHeteroplasmyProvider.cs",
    "content": "﻿using Variants;\n\nnamespace VariantAnnotation.Interface.Providers\n{\n    public interface IMitoHeteroplasmyProvider : IProvider\n    {\n        double?[] GetVrfPercentiles(IVariant[] variants, double[] vrfs);\n    }\n}"
  },
  {
    "path": "VariantAnnotation.Interface/Providers/IProvider.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing Genome;\r\n\r\nnamespace VariantAnnotation.Interface.Providers\r\n{\r\n    public interface IProvider\r\n    {\r\n\t\tstring Name { get; }\r\n        GenomeAssembly Assembly { get; }\r\n        IEnumerable<IDataSourceVersion> DataSourceVersions { get; }\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation.Interface/Providers/IRefMinorProvider.cs",
    "content": "﻿using System;\r\nusing Genome;\r\n\r\nnamespace VariantAnnotation.Interface.Providers\r\n{\r\n    public interface IRefMinorProvider:IDisposable\r\n    {\r\n        string GetGlobalMajorAllele(Chromosome chromosome, int pos);\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation.Interface/Providers/ISequenceProvider.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing Genome;\r\n\r\nnamespace VariantAnnotation.Interface.Providers\r\n{\r\n    public interface ISequenceProvider : IAnnotationProvider\r\n    {\r\n        ISequence Sequence { get; }\r\n        Dictionary<string, Chromosome> RefNameToChromosome { get; }\r\n        Dictionary<ushort, Chromosome> RefIndexToChromosome { get; }\r\n        void LoadChromosome(Chromosome chromosome);\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation.Interface/Providers/ITranscriptAnnotationProvider.cs",
    "content": "﻿using Intervals;\r\nusing VariantAnnotation.Interface.AnnotatedPositions;\r\n\r\nnamespace VariantAnnotation.Interface.Providers\r\n{\r\n    public interface ITranscriptAnnotationProvider : IAnnotationProvider\r\n    {\r\n        IntervalArray<ITranscript>[] TranscriptIntervalArrays { get; }\r\n        ushort VepVersion { get; }\r\n    }\r\n}\r\n"
  },
  {
    "path": "VariantAnnotation.Interface/SA/INsaReader.cs",
    "content": "﻿using System;\nusing System.Collections.Generic;\nusing Genome;\n\nnamespace VariantAnnotation.Interface.SA\n{\n    public interface INsaReader : ISaMetadata, IDisposable\n    {\n        bool               MatchByAllele { get; }\n        bool               IsArray       { get; }\n        bool               IsPositional  { get; }\n\n        void GetAnnotation(int position, List<(string refAllele, string altAllele, string annotation)> annotations);\n        void PreLoad(Chromosome chrom, List<int> positions);\n    }\n}"
  },
  {
    "path": "VariantAnnotation.Interface/SA/INsiReader.cs",
    "content": "﻿using System.Collections.Generic;\nusing Variants;\n\nnamespace VariantAnnotation.Interface.SA\n{\n    public interface INsiReader : ISaMetadata\n    {\n        ReportFor           ReportFor { get; }\n        IEnumerable<string> GetAnnotation(IVariant variant);\n    }\n}"
  },
  {
    "path": "VariantAnnotation.Interface/SA/ISaMetadata.cs",
    "content": "﻿using Genome;\nusing VariantAnnotation.Interface.Providers;\n\nnamespace VariantAnnotation.Interface.SA\n{\n    public interface ISaMetadata\n    {\n        GenomeAssembly     Assembly { get; }\n        IDataSourceVersion Version  { get; }\n        string             JsonKey  { get; }\n    }\n}"
  },
  {
    "path": "VariantAnnotation.Interface/SA/ISupplementaryAnnotation.cs",
    "content": "﻿using VariantAnnotation.Interface.IO;\n\nnamespace VariantAnnotation.Interface.SA\n{\n    public interface ISupplementaryAnnotation : IJsonSerializer\n    {\n        string JsonKey { get; }\n    }\n}"
  },
  {
    "path": "VariantAnnotation.Interface/SA/ISupplementaryDataItem.cs",
    "content": "﻿using Genome;\n\nnamespace VariantAnnotation.Interface.SA\n{\n    public interface ISupplementaryDataItem\n    {\n        Chromosome Chromosome { get; }\n        int         Position   { get; set; }\n        string      RefAllele  { get; set; }\n        string      AltAllele  { get; set; }\n        string      GetJsonString();\n        string      InputLine { get; }\n    }\n}"
  },
  {
    "path": "VariantAnnotation.Interface/SA/ISupplementaryInterval.cs",
    "content": "﻿namespace VariantAnnotation.Interface.SA\r\n{\r\n    public enum ReportFor\r\n    {\r\n        None,\r\n        AllVariants,\r\n        SmallVariants,\r\n        StructuralVariants\r\n    }\r\n}"
  },
  {
    "path": "VariantAnnotation.Interface/SA/IsuppGeneItem.cs",
    "content": "﻿namespace VariantAnnotation.Interface.SA\n{\n    public interface ISuppGeneItem\n    {\n        string GeneSymbol { get; }\n        string GetJsonString();\n    }\n}"
  },
  {
    "path": "VariantAnnotation.Interface/SA/IsuppIntervalItem.cs",
    "content": "﻿using Genome;\n\nnamespace VariantAnnotation.Interface.SA\n{\n    public interface ISuppIntervalItem : IChromosomeInterval\n    {\n        string GetJsonString();\n    }\n}"
  },
  {
    "path": "VariantAnnotation.Interface/VariantAnnotation.Interface.csproj",
    "content": "﻿<Project Sdk=\"Microsoft.NET.Sdk\">\n  <PropertyGroup>\n    <TargetFramework>net6.0</TargetFramework>\n    <OutputPath>..\\bin\\$(Configuration)</OutputPath>\n    \n  </PropertyGroup>\n  <ItemGroup>\n    <ProjectReference Include=\"..\\Intervals\\Intervals.csproj\" />\n    <ProjectReference Include=\"..\\IO\\IO.csproj\" />\n    <ProjectReference Include=\"..\\Genome\\Genome.csproj\" />\n    <ProjectReference Include=\"..\\Variants\\Variants.csproj\" />\n  </ItemGroup>\n  <Import Project=\"..\\CommonAssemblyInfo.props\" />\n</Project>"
  },
  {
    "path": "VariantAnnotation.Interface/VariantCategory.cs",
    "content": "﻿namespace VariantAnnotation.Interface\r\n{\r\n    public enum VariantCategory\r\n    {\r\n        // ReSharper disable InconsistentNaming\r\n        Reference,\r\n        SmallVariant,\r\n        SV,\r\n        CNV,\r\n        RepeatExpansion,\r\n        ROH\r\n        // ReSharper restore InconsistentNaming\r\n    }\r\n}"
  },
  {
    "path": "Variants/AnnotationBehavior.cs",
    "content": "﻿namespace Variants\n{\n    public sealed class AnnotationBehavior\n    {\n        public readonly bool CanonicalTranscriptOnly;\n        public readonly bool NeedFlankingTranscripts;\n        public readonly bool NeedRegulatoryRegions;\n        public readonly bool NeedSaInterval;\n        public readonly bool NeedSaPosition;\n        public readonly bool NeedSaAllele;\n\n        public readonly bool MinimalTranscriptAnnotation;\n        public readonly bool ReducedTranscriptAnnotation;\n\n        public static readonly AnnotationBehavior SmallVariants         = new AnnotationBehavior(false, false, true, true, false, true, true, false);\n        public static readonly AnnotationBehavior NonInformativeAlleles = new AnnotationBehavior(false, true, false, false, false, false, false, false);\n        public static readonly AnnotationBehavior StructuralVariants    = new AnnotationBehavior(false, false, false, true, true, false, false, true);\n        public static readonly AnnotationBehavior BreakendVariants      = new AnnotationBehavior(false, false, false, true, true, false, true, true);\n        public static readonly AnnotationBehavior RepeatExpansions      = new AnnotationBehavior(false, false, false, true, false, false, false, true);\n        public static readonly AnnotationBehavior RunsOfHomozygosity    = new AnnotationBehavior(true, false, false, false, false, false, false, true);\n\n        private AnnotationBehavior(bool canonicalTranscriptOnly, bool minimalTranscriptAnnotation,\n            bool needFlankingTranscripts, bool needRegulatoryRegions, bool needSaInterval, bool needSaPosition,\n            bool needSaAllele, bool reducedTranscriptAnnotation)\n        {\n            CanonicalTranscriptOnly     = canonicalTranscriptOnly;\n            MinimalTranscriptAnnotation = minimalTranscriptAnnotation;\n            NeedFlankingTranscripts     = needFlankingTranscripts;\n            NeedRegulatoryRegions       = needRegulatoryRegions;\n            NeedSaInterval              = needSaInterval;\n            NeedSaPosition              = needSaPosition;\n            NeedSaAllele                = needSaAllele;\n            ReducedTranscriptAnnotation = reducedTranscriptAnnotation;\n        }\n    }\n}"
  },
  {
    "path": "Variants/BiDirectionalTrimmer.cs",
    "content": "﻿namespace Variants\n{\n    public static class BiDirectionalTrimmer\n    {\n        public static (int Start, string RefAllele, string AltAllele) Trim(int start, string refAllele, string altAllele)\n        {\n            // do not trim if ref and alt are same\n            if (refAllele == altAllele) return (start, refAllele, altAllele);\n\n            if (refAllele == null) refAllele = \"\";\n            if (altAllele == null) altAllele = \"\";\n\n            // trimming at the start\n            var i = 0;\n            while (i < refAllele.Length && i < altAllele.Length && refAllele[i] == altAllele[i]) i++;\n\n            if (i > 0)\n            {\n                start += i;\n                altAllele = altAllele.Substring(i);\n                refAllele = refAllele.Substring(i);\n            }\n\n            // trimming at the end\n            var j = 0;\n            while (j < refAllele.Length && j < altAllele.Length &&\n                   refAllele[refAllele.Length - j - 1] == altAllele[altAllele.Length - j - 1]) j++;\n\n            if (j <= 0) return (start, refAllele, altAllele);\n\n            altAllele = altAllele.Substring(0, altAllele.Length - j);\n            refAllele = refAllele.Substring(0, refAllele.Length - j);\n            return (start, refAllele, altAllele);\n        }\n    }\n}"
  },
  {
    "path": "Variants/ISimpleVariant.cs",
    "content": "﻿using Genome;\r\n\r\nnamespace Variants\r\n{\r\n    public interface ISimpleVariant : IChromosomeInterval\r\n    {\r\n        string RefAllele { get; }\r\n        string AltAllele { get; }\r\n        VariantType Type { get; }\r\n    }\r\n}"
  },
  {
    "path": "Variants/IVariant.cs",
    "content": "﻿namespace Variants\n{\n    public interface IVariant : ISimpleVariant\n    {\n        string VariantId { get; }\n        bool IsRefMinor { get; }\n        bool IsRecomposed { get; }\n        bool IsDecomposed { get; }\n        string[] LinkedVids { get; }\n        AnnotationBehavior Behavior { get; }\n        bool IsStructuralVariant { get; }\n    }\n}"
  },
  {
    "path": "Variants/RepeatExpansion.cs",
    "content": "﻿using Genome;\r\n\r\nnamespace Variants\r\n{\r\n    public sealed class RepeatExpansion : IVariant\r\n    {\r\n        public Chromosome Chromosome { get; }\r\n        public int Start { get; }\r\n        public int End { get; }\r\n        public string RefAllele { get; }\r\n        public string AltAllele { get; }\r\n        public string VariantId { get; }\r\n\r\n        public VariantType Type { get; }            = VariantType.short_tandem_repeat_variation;\r\n        public bool IsRefMinor { get; }             = false;\r\n        public bool IsRecomposed { get; }           = false;\r\n        public bool IsDecomposed { get; }           = false;\r\n        public string[] LinkedVids { get; }         = null;\r\n        public AnnotationBehavior Behavior { get; } = AnnotationBehavior.RepeatExpansions;\r\n        public bool IsStructuralVariant { get; }    = false;\r\n\r\n        public readonly int RepeatCount;\r\n        public readonly int? RefRepeatCount;\r\n\r\n        public RepeatExpansion(Chromosome chromosome, int start, int end, string refAllele, string altAllele,\r\n            string variantId, int repeatCount, int? refRepeatCount)\r\n        {\r\n            Chromosome     = chromosome;\r\n            Start          = start;\r\n            End            = end;\r\n            RefAllele      = refAllele;\r\n            AltAllele      = altAllele;\r\n            VariantId      = variantId;\r\n            RepeatCount    = repeatCount;\r\n            RefRepeatCount = refRepeatCount;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "Variants/SimpleVariant.cs",
    "content": "﻿using Genome;\r\n\r\nnamespace Variants\r\n{\r\n    public sealed class SimpleVariant : ISimpleVariant\r\n    {\r\n        public int Start { get; }\r\n        public int End { get; }\r\n        public Chromosome Chromosome { get; }\r\n        public string RefAllele { get; }\r\n        public string AltAllele { get; }\r\n        public VariantType Type { get; }\r\n\r\n        public SimpleVariant(Chromosome chromosome, int start, int end, string refAllele, string altAllele, VariantType type)\r\n        {\r\n            Chromosome = chromosome;\r\n            Start      = start;\r\n            End        = end;\r\n            RefAllele  = refAllele;\r\n            AltAllele  = altAllele;\r\n            Type       = type;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "Variants/Variant.cs",
    "content": "﻿using Genome;\r\n\r\nnamespace Variants\r\n{\r\n    public sealed class Variant : IVariant\r\n    {\r\n        public Chromosome        Chromosome          { get; private set; }\r\n        public int                Start               { get; private set;}\r\n        public int                End                 { get; private set;}\r\n        public string             RefAllele           { get; private set;}\r\n        public string             AltAllele           { get; private set;}\r\n        public VariantType        Type                { get; private set;}\r\n        public string             VariantId           { get; private set;}\r\n        public bool               IsRefMinor          { get; private set;}\r\n        public bool               IsRecomposed        { get; private set;}\r\n        public bool               IsDecomposed        { get; private set;}\r\n        public string[]           LinkedVids          { get; private set;}\r\n        public AnnotationBehavior Behavior            { get; private set;}\r\n        public bool               IsStructuralVariant { get; private set;}\r\n        \r\n        public void Initialize(Chromosome chromosome, int start, int end, string refAllele, string altAllele,\r\n            VariantType variantType, string variantId, bool isRefMinor, bool isDecomposed, bool isRecomposed,\r\n            string[] linkedVids, AnnotationBehavior behavior, bool isStructuralVariant)\r\n        {\r\n            Chromosome          = chromosome;\r\n            Start               = start;\r\n            End                 = end;\r\n            RefAllele           = refAllele;\r\n            AltAllele           = altAllele;\r\n            Type                = variantType;\r\n            VariantId           = variantId;\r\n            IsRefMinor          = isRefMinor;\r\n            IsRecomposed        = isRecomposed;\r\n            IsDecomposed        = isDecomposed;\r\n            LinkedVids          = linkedVids;\r\n            Behavior            = behavior;\r\n            IsStructuralVariant = isStructuralVariant;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "Variants/VariantRotator.cs",
    "content": "﻿using System;\r\nusing Genome;\r\nusing Intervals;\r\n\r\nnamespace Variants\r\n{\r\n    public static class VariantRotator\r\n    {\r\n        public const int MaxDownstreamLength = 500;\r\n\r\n        public static ISimpleVariant Right(ISimpleVariant simpleVariant, IInterval rotateRegion, ISequence refSequence, bool onReverseStrand)\r\n        {            \r\n            if (refSequence == null) return simpleVariant;\r\n\r\n            if (simpleVariant.Type != VariantType.deletion && simpleVariant.Type != VariantType.insertion)\r\n                return simpleVariant;\r\n\r\n            if (VariantStartOverlapsRegion(simpleVariant, rotateRegion, onReverseStrand))\r\n                return simpleVariant;\r\n            // if variant is before the transcript start, do not perform 3 prime shift\r\n            \r\n            string rotatingBases = GetRotatingBases(simpleVariant, onReverseStrand);\r\n\r\n            string downStreamSeq = GetDownstreamSeq(simpleVariant, rotateRegion, refSequence, onReverseStrand, rotatingBases);\r\n\r\n            string combinedSequence = rotatingBases + downStreamSeq;\r\n\r\n            int shiftStart, shiftEnd;\r\n            var hasShifted = false;\r\n\r\n            // probably a VEP bug, just use it for consistency\r\n            int numBases = rotatingBases.Length;\r\n\r\n            for (shiftStart = 0, shiftEnd = numBases; shiftEnd < combinedSequence.Length; shiftStart++, shiftEnd++)\r\n            {\r\n                if (combinedSequence[shiftStart] != combinedSequence[shiftEnd]) break;\r\n                hasShifted = true;\r\n            }\r\n\r\n            if (!hasShifted) return simpleVariant;\r\n\r\n            // create a new alternative allele\r\n            string rotatedSequence = combinedSequence.Substring(shiftStart, numBases);\r\n            int rotatedStart       = simpleVariant.Start + shiftStart;\r\n            int rotatedEnd         = simpleVariant.End + shiftStart;\r\n\r\n            if (onReverseStrand)\r\n            {\r\n                rotatedSequence = SequenceUtilities.GetReverseComplement(rotatedSequence);\r\n                rotatedStart    = simpleVariant.Start - shiftStart;\r\n                rotatedEnd      = simpleVariant.End - shiftStart;\r\n            }\r\n            \r\n            string rotatedRefAllele = simpleVariant.RefAllele;\r\n            string rotatedAltAllele = simpleVariant.AltAllele;\r\n\r\n            if (simpleVariant.Type == VariantType.insertion) rotatedAltAllele = rotatedSequence;\r\n            else rotatedRefAllele = rotatedSequence;\r\n\r\n            return new SimpleVariant(simpleVariant.Chromosome, rotatedStart, rotatedEnd, rotatedRefAllele,\r\n                rotatedAltAllele, simpleVariant.Type);\r\n        }\r\n\r\n        private static string GetDownstreamSeq(IInterval simpleVariant, IInterval rotateRegion,\r\n            ISequence refSequence, bool onReverseStrand, string rotatingBases)\r\n        {\r\n            int basesToEnd = onReverseStrand ? simpleVariant.Start - rotateRegion.Start : rotateRegion.End - simpleVariant.End;\r\n            int downStreamLength =\r\n                Math.Min(basesToEnd,\r\n                    Math.Max(rotatingBases.Length,\r\n                        MaxDownstreamLength)); // for large rotatingBases, we need to factor in its length but still make sure that we do not go past the end of transcript\r\n\r\n            string downStreamSeq = onReverseStrand\r\n                ? SequenceUtilities.GetReverseComplement(\r\n                    refSequence.Substring(simpleVariant.Start - 1 - downStreamLength, downStreamLength))\r\n                : refSequence.Substring(simpleVariant.End, downStreamLength);\r\n            return downStreamSeq;\r\n        }\r\n\r\n        private static string GetRotatingBases(ISimpleVariant simpleVariant, bool onReverseStrand)\r\n        {\r\n            string rotatingBases = simpleVariant.Type == VariantType.insertion ? simpleVariant.AltAllele : simpleVariant.RefAllele;\r\n            rotatingBases = onReverseStrand ? SequenceUtilities.GetReverseComplement(rotatingBases) : rotatingBases;\r\n            return rotatingBases;\r\n        }\r\n\r\n        private static bool VariantStartOverlapsRegion(IInterval variant, IInterval region, bool onReverseStrand)\r\n        {\r\n            if (onReverseStrand)\r\n            {\r\n                return variant.End > region.End || region.Start >= variant.End;\r\n            }\r\n            \r\n            return variant.Start < region.Start || region.End <= variant.Start;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "Variants/VariantType.cs",
    "content": "﻿namespace Variants\r\n{\r\n    public enum VariantType\r\n    {\r\n        // ReSharper disable InconsistentNaming\r\n        unknown = 0,\r\n\r\n        // small variants\r\n        SNV       = 2,\r\n        insertion = 3,\r\n        deletion  = 4,\r\n        indel     = 5,\r\n        MNV       = 6,\r\n\r\n        // structural variants\r\n        duplication                     = 10,\r\n        complex_structural_alteration   = 11,\r\n        structural_alteration           = 12,\r\n        tandem_duplication              = 13,\r\n        translocation_breakend          = 14,\r\n        inversion                       = 15,\r\n        mobile_element_insertion        = 16,\r\n        mobile_element_deletion         = 17,\r\n        novel_sequence_insertion        = 18,\r\n        short_tandem_repeat_variation   = 19,\r\n\r\n        // CNVs\r\n        copy_number_variation = 30,\r\n        copy_number_loss      = 31,\r\n        copy_number_gain      = 32,\r\n\r\n        // non variants\r\n        run_of_homozygosity = 33,\r\n        \r\n        // misc\r\n        reference = 42,\r\n        non_informative_allele = 43\r\n    }\r\n    // ReSharper restore InconsistentNaming\r\n}\r\n"
  },
  {
    "path": "Variants/VariantUtils.cs",
    "content": "﻿using System;\nusing Genome;\n\nnamespace Variants\n{\n    public static class VariantUtils\n    {\n        public const  int MaxUpstreamLength = 500;\n        public static int MaxShiftLength    = 0;\n\n        public static (int start, string refAllele, string altAllele) TrimAndLeftAlign(int start, string refAllele, string altAllele,\n            ISequence refSequence)\n        {\n            var initialStart = start;\n            var (newStart, newRefAllele, newAltAllele) = TrimAndLeftAlign(start, refAllele, altAllele, refSequence, 50);\n\n            while (newStart != start)\n            {\n                start     = newStart;\n                refAllele = newRefAllele;\n                altAllele = newAltAllele;\n\n                (newStart, newRefAllele, newAltAllele) = TrimAndLeftAlign(start, refAllele, altAllele, refSequence, 50);\n            }\n\n            // keeping track of maximum bases shifted\n            if (MaxShiftLength < Math.Abs(initialStart - newStart)) MaxShiftLength = Math.Abs(initialStart - newStart);\n            \n            return (newStart, newRefAllele, newAltAllele);\n        }\n\n        /// <summary>\n        /// Left aligns the variant using base rotation\n        /// </summary>\n        /// <returns>Tuple of new position, ref and alt allele</returns>\n        private static (int start, string refAllele, string altAllele) TrimAndLeftAlign(int start, string refAllele, string altAllele, ISequence refSequence, int maxUpstreamLength)\n        {\n            if (IsStructuralVariant(altAllele)) return (start, refAllele, altAllele);\n\n            // we have to check this before the trimming since it depends on the padding base\n            bool isLeftShiftPossible = IsLeftShiftPossible(refAllele, altAllele);\n\n            (start, refAllele, altAllele) = BiDirectionalTrimmer.Trim(start, refAllele, altAllele);\n\n            // alignment only makes sense for insertion and deletion\n            if (!(altAllele.Length == 0 || refAllele.Length == 0)) return (start, refAllele, altAllele);\n            if(! isLeftShiftPossible) return (start, refAllele, altAllele);\n\n            // base checking to make sure we can safely left shift\n            if (IfRefBaseMismatched(start, refAllele, refSequence)) return (start, refAllele, altAllele);\n\n            // adjust the max upstream length when you are near the beginning of the chrom\n            if (maxUpstreamLength >= start) maxUpstreamLength = start - 1;\n            string upstreamSeq = refSequence.Substring(start - maxUpstreamLength - 1, maxUpstreamLength);\n            \n            // compressed seq is 0 based\n            string combinedSeq = upstreamSeq;\n            int repeatLength;\n            int i;\n            if (refAllele.Length > altAllele.Length)\n            {\n                // deletion\n                combinedSeq += refAllele;\n                repeatLength = refAllele.Length;\n                for (i = combinedSeq.Length - 1; i >= repeatLength; i--, start--)\n                {\n                    if (combinedSeq[i] != combinedSeq[i - repeatLength]) break;\n                }\n\n                string newRefAllele = combinedSeq.Substring(i + 1 - repeatLength, repeatLength);\n                return (start, newRefAllele, \"\"); //alt is empty for deletion\n            }\n\n            //insertion\n            combinedSeq += altAllele;\n            repeatLength = altAllele.Length;\n\n            for (i = combinedSeq.Length - 1; i >= repeatLength; i--, start--)\n            {\n                if (combinedSeq[i] != combinedSeq[i - repeatLength]) break;\n            }\n            \n            string newAltAllele = combinedSeq.Substring(i + 1 - repeatLength, repeatLength);\n            return (start, \"\", newAltAllele);\n        }\n\n        private static bool IfRefBaseMismatched(int start, string refAllele, ISequence refSequence)\n        {\n            return refSequence != null && !string.IsNullOrEmpty(refAllele) && refAllele != refSequence.Substring(start - 1, refAllele.Length);\n        }\n\n        // we have a padding base we can check if its possible to left shift at all\n        public static bool IsLeftShiftPossible(string refAllele, string altAllele)\n        {\n            if (refAllele == altAllele) return false;\n            if (string.IsNullOrEmpty(refAllele) || string.IsNullOrEmpty(altAllele)) return true;\n            if (refAllele.Length == 1) return refAllele[0] == altAllele[altAllele.Length - 1];\n            if (altAllele.Length == 1) return altAllele[0] == refAllele[refAllele.Length - 1];\n\n            return true;\n        }\n\n        private static bool IsStructuralVariant(string altAllele)\n        {\n            return altAllele.StartsWith('<') || altAllele.Contains('[') || altAllele.Contains(']');\n        }\n    }\n}"
  },
  {
    "path": "Variants/Variants.csproj",
    "content": "<Project Sdk=\"Microsoft.NET.Sdk\">\r\n  <PropertyGroup>\r\n    <TargetFramework>net6.0</TargetFramework>\r\n    <OutputPath>..\\bin\\$(Configuration)</OutputPath>\n    \r\n  </PropertyGroup>\r\n  <Import Project=\"..\\CommonAssemblyInfo.props\" />\r\n  <ItemGroup>\r\n    <ProjectReference Include=\"..\\Intervals\\Intervals.csproj\" />\r\n    <ProjectReference Include=\"..\\Genome\\Genome.csproj\" />\r\n  </ItemGroup>\r\n</Project>\r\n"
  },
  {
    "path": "Vcf/AssemblyInfo.cs",
    "content": "﻿using System.Runtime.CompilerServices;\n\n[assembly: InternalsVisibleTo(\"UnitTests\")]"
  },
  {
    "path": "Vcf/IVcfFilter.cs",
    "content": "﻿using System.IO;\r\nusing Genome;\r\n\r\nnamespace Vcf\r\n{\r\n    public interface IVcfFilter\r\n    {\r\n        void FastForward(StreamReader reader);\r\n\r\n        string GetNextLine(StreamReader reader);\r\n\r\n        bool PassedTheEnd(Chromosome chromosome, int position);\r\n    }\r\n}"
  },
  {
    "path": "Vcf/Info/CustomFields.cs",
    "content": "using System.Collections.Generic;\nusing System.Text;\nusing OptimizedCore;\nusing VariantAnnotation.Interface.IO;\nusing VariantAnnotation.Interface.Positions;\nusing VariantAnnotation.IO;\n\nnamespace Vcf.Info\n{\n    public sealed class CustomFields:ICustomFields\n    {\n        private readonly Dictionary<string, string> _keyValues=new ();\n\n        public void Add(string key, string value)\n        {\n            _keyValues.Add(key, value);\n        }\n\n        public void Clear()\n        {\n            _keyValues.Clear();\n        }\n\n        public bool IsEmpty() =>_keyValues.Count == 0;\n        \n        public void SerializeJson(StringBuilder sb)\n        {\n            var jsonObject = new JsonObject(sb);\n\n            sb.Append(JsonObject.OpenBrace);\n\n            foreach (var (key, value) in _keyValues)\n            {\n                jsonObject.AddStringValue(key, value);\n            }\n            sb.Append(JsonObject.CloseBrace);\n        }\n\n        public override string ToString()\n        {\n            var sb = StringBuilderPool.Get();\n            SerializeJson(sb);\n            var s = sb.ToString();\n            StringBuilderPool.Return(sb);\n            return s;\n        }\n    }\n}"
  },
  {
    "path": "Vcf/Info/InfoData.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing VariantAnnotation.Interface.Positions;\r\n\r\nnamespace Vcf.Info\r\n{\r\n    public sealed record InfoData(string BreakendEventId, int[] CiEnd, int[] CiPos, int? End, double? FisherStrandBias, bool IsImprecise, bool IsInv3,\r\n        bool IsInv5, int? JointSomaticNormalQuality, double? MappingQuality, double? RecalibratedQuality, int? RefRepeatCount, string RepeatUnit,\r\n        double? StrandBias, int? SvLength, string SvType, double? LogOddsRatio, ICustomFields CustomKeyValues) : IInfoData;\r\n    \r\n    public sealed class InfoDataBuilder\r\n    {\r\n        public string         BreakendEventId;\r\n        public int[]          CiEnd;\r\n        public int[]          CiPos;\r\n        public int?           End;\r\n        public double?        FisherStrandBias;\r\n        public bool           IsImprecise;\r\n        public bool           IsInv3;\r\n        public bool           IsInv5;\r\n        public int?           JointSomaticNormalQuality;\r\n        public double?        MappingQuality;\r\n        public double?        RecalibratedQuality;\r\n        public int?           RefRepeatCount;\r\n        public string         RepeatUnit;\r\n        public double?        StrandBias;\r\n        public int?           SvLength;\r\n        public string         SvType;\r\n        public double? LogOddsRatio;\r\n        public ICustomFields CustomFields=new CustomFields();\r\n\r\n        public InfoData Create() =>\r\n            new(BreakendEventId, CiEnd, CiPos, End, FisherStrandBias, IsImprecise, IsInv3, IsInv5, JointSomaticNormalQuality, MappingQuality,\r\n                RecalibratedQuality, RefRepeatCount, RepeatUnit, StrandBias, SvLength, SvType, LogOddsRatio, CustomFields);\r\n\r\n        public void Reset()\r\n        {\r\n            BreakendEventId           = null;\r\n            CiEnd                     = null;\r\n            CiPos                     = null;\r\n            End                       = null;\r\n            FisherStrandBias          = null;\r\n            IsImprecise               = false;\r\n            IsInv3                    = false;\r\n            IsInv5                    = false;\r\n            JointSomaticNormalQuality = null;\r\n            MappingQuality            = null;\r\n            RecalibratedQuality       = null;\r\n            RefRepeatCount            = null;\r\n            RepeatUnit                = null;\r\n            StrandBias                = null;\r\n            SvLength                  = null;\r\n            SvType                    = null;\r\n            LogOddsRatio              = null;\r\n            CustomFields.Clear();\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "Vcf/Info/VcfInfoParser.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing OptimizedCore;\r\nusing VariantAnnotation.Interface.Positions;\r\n\r\nnamespace Vcf.Info\r\n{\r\n    public static class VcfInfoParser\r\n    {\r\n        private static readonly InfoDataBuilder            Builder         = new();\r\n        private static readonly Dictionary<string, string> EmptyDictionary = new();\r\n\r\n        public static IInfoData Parse(string infoField, HashSet<string> customInfoKeys=null)\r\n        {\r\n            if (string.IsNullOrEmpty(infoField)) return null;\r\n            \r\n            Dictionary<string, string> infoKeyValue = ExtractInfoFields(infoField);\r\n            Builder.Reset();\r\n            \r\n            foreach ((string key, string value) in infoKeyValue)\r\n            {\r\n                // ReSharper disable once SwitchStatementMissingSomeCases\r\n                switch (key)\r\n                {\r\n                    case \"CIEND\":\r\n                        Builder.CiEnd = value.SplitToArray();\r\n                        break;\r\n                    case \"CIPOS\":\r\n                        Builder.CiPos = value.SplitToArray();\r\n                        break;\r\n                    case \"END\":\r\n                        Builder.End = value.GetNullableInt();\r\n                        break;\r\n                    case \"EVENT\":\r\n                        Builder.BreakendEventId = value;\r\n                        break;\r\n                    case \"REF\":\r\n                        Builder.RefRepeatCount = Convert.ToInt32(value);\r\n                        break;\r\n                    case \"RU\":\r\n                        Builder.RepeatUnit = value;\r\n                        break;\r\n                    case \"SB\":\r\n                        Builder.StrandBias = value.GetNullableValue<double>(double.TryParse);\r\n                        break;\r\n                    case \"FS\":\r\n                        Builder.FisherStrandBias = value.GetNullableValue<double>(double.TryParse);\r\n                        break;\r\n                    case \"MQ\":\r\n                        Builder.MappingQuality = value.GetNullableValue<double>(double.TryParse);\r\n                        break;\r\n                    case \"QSI_NT\":\r\n                    case \"SOMATICSCORE\":\r\n                    case \"QSS_NT\":\r\n                        Builder.JointSomaticNormalQuality = value.GetNullableInt();\r\n                        break;\r\n                    case \"SVLEN\":\r\n                        Builder.SvLength = value.GetNullableInt();\r\n                        if (Builder.SvLength != null) Builder.SvLength = Math.Abs(Builder.SvLength.Value);\r\n                        break;\r\n                    case \"SVTYPE\":\r\n                        Builder.SvType = value;\r\n                        break;\r\n                    case \"VQSR\":\r\n                        Builder.RecalibratedQuality = value.GetNullableValue<double>(double.TryParse);\r\n                        break;\r\n                    case \"IMPRECISE\":\r\n                        Builder.IsImprecise = true;\r\n                        break;\r\n                    case \"INV3\":\r\n                        Builder.IsInv3 = true;\r\n                        break;\r\n                    case \"INV5\":\r\n                        Builder.IsInv5 = true;\r\n                        break;\r\n                    case \"LOD\":\r\n                        Builder.LogOddsRatio = Convert.ToDouble(value);\r\n                        break;\r\n                }\r\n\r\n                if (customInfoKeys != null && customInfoKeys.Contains(key))\r\n                {\r\n                    Builder.CustomFields.Add(key, value);\r\n                }\r\n            }\r\n\r\n            return Builder.Create();\r\n        }\r\n\r\n        private static Dictionary<string, string> ExtractInfoFields(string infoField)\r\n        {\r\n            if (infoField == \".\") return EmptyDictionary;\r\n\r\n            var infoKeyValue = new Dictionary<string, string>();\r\n\r\n            foreach (string field in infoField.OptimizedSplit(';'))\r\n            {\r\n                (string key, string value) =   field.OptimizedKeyValue();\r\n                value                      ??= \"true\";\r\n                infoKeyValue[key]          =   value;\r\n            }\r\n\r\n            return infoKeyValue;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "Vcf/NullVcfFilter.cs",
    "content": "﻿using System.IO;\r\nusing Genome;\r\n\r\nnamespace Vcf\r\n{\r\n    public sealed class NullVcfFilter : IVcfFilter\r\n    {\r\n\r\n        public void FastForward(StreamReader reader)\r\n        {\r\n            //stupid sonarQube requires a comment here\r\n        }\r\n\r\n        public string GetNextLine(StreamReader reader) => reader.ReadLine();\r\n\r\n        public bool PassedTheEnd(Chromosome chromosome, int position) => false;\r\n    }\r\n}"
  },
  {
    "path": "Vcf/Position.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing Genome;\r\nusing OptimizedCore;\r\nusing VariantAnnotation.Interface.IO;\r\nusing VariantAnnotation.Interface.Positions;\r\nusing VariantAnnotation.Interface.Providers;\r\nusing Variants;\r\nusing Vcf.Info;\r\nusing Vcf.Sample;\r\nusing Vcf.VariantCreator;\r\n\r\nnamespace Vcf\r\n{\r\n    public sealed class Position : IPosition\r\n    {\r\n        public Chromosome    Chromosome           { get; private set;}\r\n        public int            Start                { get; private set;}\r\n        public int            End                  { get; private set;}        \r\n        public string         RefAllele            { get; private set;}\r\n        public string[]       AltAlleles           { get; private set;}\r\n        public double?        Quality              { get; private set;}\r\n        public string[]       Filters              { get; private set;}\r\n        public IVariant[]     Variants             { get; private set;}\r\n        public ISample[]      Samples              { get; private set;}\r\n        public IInfoData      InfoData             { get; private set;}\r\n        public bool           HasStructuralVariant { get; private set;}\r\n        public bool           HasShortTandemRepeat { get; private set;}\r\n        public string[]       VcfFields            { get; private set;}\r\n        public bool[]         IsDecomposed         { get; private set;}\r\n        public bool           IsRecomposed         { get; private set;}\r\n        public string[]       Vids                 { get; private set;}\r\n        public List<string>[] LinkedVids           { get; private set;}\r\n        \r\n        public void Initialize(Chromosome chromosome, int start, int end, string refAllele, string[] altAlleles,\r\n            double? quality, string[] filters, IVariant[] variants, ISample[] samples, IInfoData infoData,\r\n            string[] vcfFields, bool[] isDecomposed, bool isRecomposed)\r\n        {\r\n            Chromosome   = chromosome;\r\n            Start        = start;\r\n            End          = end;\r\n            RefAllele    = refAllele;\r\n            AltAlleles   = altAlleles;\r\n            Quality      = quality;\r\n            Filters      = filters;\r\n            Variants     = variants;\r\n            Samples      = samples;\r\n            InfoData     = infoData;\r\n            VcfFields    = vcfFields;\r\n            IsDecomposed = isDecomposed;\r\n\r\n            (HasStructuralVariant, HasShortTandemRepeat) = CheckVariants(variants);\r\n            Vids                                         = null;\r\n            LinkedVids                                   = null;\r\n        }\r\n\r\n        private static (bool HasStructuralVariant, bool HasShortTandemRepeat) CheckVariants(IVariant[] variants)\r\n        {\r\n            if (variants == null) return (false, false);\r\n\r\n            var hasStructuralVariant = false;\r\n            var hasShortTandemRepeat = false;\r\n\r\n            // ReSharper disable once LoopCanBeConvertedToQuery\r\n            foreach (var variant in variants)\r\n            {\r\n                if (variant.IsStructuralVariant) hasStructuralVariant = true;\r\n                if (variant.Type == VariantType.short_tandem_repeat_variation) hasShortTandemRepeat = true;\r\n            }\r\n\r\n            return (hasStructuralVariant, hasShortTandemRepeat);\r\n        }\r\n\r\n        public static IPosition ToPosition(ISimplePosition simplePosition, IRefMinorProvider refMinorProvider, ISequenceProvider sequenceProvider, \r\n            IMitoHeteroplasmyProvider mitoHeteroplasmyProvider, VariantFactory variantFactory, bool enableDq = false, \r\n            HashSet<string> customInfoKeys=null)\r\n        {\r\n            if (simplePosition == null) return null;\r\n\r\n            sequenceProvider.LoadChromosome(simplePosition.Chromosome);\r\n\r\n            string[] vcfFields  = simplePosition.VcfFields;\r\n            string[] altAlleles = vcfFields[VcfCommon.AltIndex].OptimizedSplit(',');\r\n            bool isReference    = altAlleles.Length == 1 && VcfCommon.ReferenceAltAllele.Contains(altAlleles[0]);\r\n\r\n            string globalMajorAllele = isReference\r\n                ? refMinorProvider?.GetGlobalMajorAllele(simplePosition.Chromosome, simplePosition.Start)\r\n                : null;\r\n\r\n            bool isRefMinor = isReference && globalMajorAllele != null;\r\n            \r\n            if (isReference && !isRefMinor) return GetReferencePosition(simplePosition);\r\n\r\n            var       infoData              = VcfInfoParser.Parse(vcfFields[VcfCommon.InfoIndex],customInfoKeys);\r\n            int       end                   = ExtractEnd(infoData, simplePosition.Start, simplePosition.RefAllele.Length);\r\n            double?   quality               = vcfFields[VcfCommon.QualIndex].GetNullableValue<double>(double.TryParse);\r\n            string[]  filters               = vcfFields[VcfCommon.FilterIndex].OptimizedSplit(';');\r\n            \r\n            IVariant[] variants = variantFactory.CreateVariants(simplePosition.Chromosome, simplePosition.Start, end,\r\n                simplePosition.RefAllele, altAlleles, infoData, simplePosition.IsDecomposed,\r\n                simplePosition.IsRecomposed, simplePosition.LinkedVids, globalMajorAllele);\r\n\r\n            ISample[] samples = vcfFields.ToSamples(variantFactory.FormatIndices, simplePosition, variants, mitoHeteroplasmyProvider, \r\n                enableDq);\r\n\r\n            return PositionPool.Get(simplePosition.Chromosome, simplePosition.Start, end, simplePosition.RefAllele,\r\n                altAlleles, quality, filters, variants, samples, infoData, vcfFields, simplePosition.IsDecomposed,\r\n                simplePosition.IsRecomposed);\r\n        }\r\n        \r\n        private static IPosition GetReferencePosition(ISimplePosition simplePosition) =>\r\n            PositionPool.Get(simplePosition.Chromosome, simplePosition.Start, simplePosition.Start,\r\n                simplePosition.RefAllele, simplePosition.AltAlleles, null, null, null, null, null,\r\n                simplePosition.VcfFields, simplePosition.IsDecomposed, simplePosition.IsRecomposed);\r\n\r\n        private static int ExtractEnd(IInfoData infoData, int start, int refAlleleLength)\r\n        {\r\n            if (infoData.End != null) return infoData.End.Value;\r\n            return start + refAlleleLength - 1;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "Vcf/PositionPool.cs",
    "content": "using Genome;\nusing Microsoft.Extensions.ObjectPool;\nusing VariantAnnotation.Interface.Positions;\nusing Variants;\n\nnamespace Vcf\n{\n    public static class PositionPool\n    {\n        private static readonly ObjectPool<Position> Pool = new DefaultObjectPool<Position>(new DefaultPooledObjectPolicy<Position>(), 4);\n                \n        public static Position Get(Chromosome chromosome, int start, int end, string refAllele, string[] altAlleles,\n            double? quality, string[] filters, IVariant[] variants, ISample[] samples, IInfoData infoData,\n            string[] vcfFields, bool[] isDecomposed, bool isRecomposed)\n        {\n            var position =  Pool.Get();\n            position.Initialize( chromosome,  start, end, refAllele, altAlleles,\n                quality, filters, variants, samples,infoData,\n                vcfFields, isDecomposed, isRecomposed);\n            return position;\n        }\n        \n        public static void Return(Position position) => Pool.Return(position);\n    }\n}"
  },
  {
    "path": "Vcf/Sample/BooleanExtensions.cs",
    "content": "﻿namespace Vcf.Sample\r\n{\r\n    internal static class BooleanExtensions\r\n    {\r\n        internal static bool GetFailedFilter(this string ftField)\r\n        {\r\n            if (string.IsNullOrEmpty(ftField)) return false;\r\n            return ftField != \"PASS\" && ftField != \".\";\r\n        }\r\n\r\n        internal static bool IsDeNovo(this string dnField)\r\n        {\r\n            if (string.IsNullOrEmpty(dnField)) return false;\r\n            return dnField == \"DeNovo\";\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "Vcf/Sample/FormatIndices.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing OptimizedCore;\r\n\r\nnamespace Vcf.Sample\r\n{\r\n    public sealed class FormatIndices\r\n    {\r\n        // ReSharper disable InconsistentNaming\r\n        internal int? AD;\r\n        internal int? AQ;\r\n        internal int? CN;\r\n        internal int? DN;\r\n        internal int? DP;\r\n        internal int? DST;\r\n        internal int? FT;\r\n        internal int? GQ;\r\n        internal int? GT;\r\n        internal int? LQ;\r\n        internal int? PR;\r\n        internal int? REPCN;\r\n        internal int? SR;\r\n        internal int? VF;\r\n        internal int? MCN;\r\n        internal int? SQ;\r\n        internal int? BC;\r\n\r\n        //legacy fields\r\n        internal int? TAR;\r\n        internal int? TIR;\r\n        internal int? AU;\r\n        internal int? CU;\r\n        internal int? GU;\r\n        internal int? TU;\r\n        internal int? MCC;\r\n        internal int? GQX;\r\n        internal int? DPI;\r\n        internal int? DQ;\r\n        // ReSharper restore InconsistentNaming\r\n\r\n        internal int NumColumns;\r\n        \r\n        // custom fields\r\n        internal readonly Dictionary<string, int?> CustomFields;\r\n\r\n        private void Clear()\r\n        {\r\n            AD    = null;\r\n            AQ    = null;\r\n            CN    = null;\r\n            DN    = null;\r\n            DP    = null;\r\n            DST   = null;\r\n            FT    = null;\r\n            GQ    = null;\r\n            GT    = null;\r\n            LQ    = null;\r\n            PR    = null;\r\n            REPCN = null;\r\n            SR    = null;\r\n            VF    = null;\r\n            MCN   = null;\r\n            SQ    = null;\r\n            BC    = null;\r\n            \r\n            // legacy sample fields\r\n            TAR = null;\r\n            TIR = null;\r\n            AU  = null;\r\n            CU  = null;\r\n            GU  = null;\r\n            TU  = null;\r\n            MCC = null;\r\n            GQX = null;\r\n            DPI = null;\r\n            DQ  = null;\r\n            \r\n            // custom fields\r\n            if (CustomFields == null) return;\r\n            foreach (var field in CustomFields.Keys)\r\n            {\r\n                CustomFields[field] = null;\r\n            }\r\n\r\n        }\r\n\r\n        public FormatIndices(HashSet<string> customFields=null)\r\n        {\r\n            if (customFields == null) return;\r\n            CustomFields = new();\r\n            foreach (var field in customFields)\r\n            {\r\n                CustomFields[field] = null;\r\n            }\r\n        }\r\n\r\n        internal void Set(string formatColumn)\r\n        {\r\n            Clear();\r\n\r\n            if (formatColumn == null) return;\r\n\r\n            string[] formatCols = formatColumn.OptimizedSplit(':');\r\n            NumColumns = formatCols.Length;\r\n\r\n            for (var index = 0; index < NumColumns; index++)\r\n            {\r\n                // ReSharper disable once SwitchStatementMissingSomeCases\r\n                var formatKey = formatCols[index];\r\n                switch (formatKey)\r\n                {\r\n                    case \"AD\":\r\n                        AD = index;\r\n                        break;\r\n                    case \"AQ\":\r\n                        AQ = index;\r\n                        break;\r\n                    case \"CN\":\r\n                        CN = index;\r\n                        break;\r\n                    case \"DN\":\r\n                        DN = index;\r\n                        break;\r\n                    case \"DP\":\r\n                        DP = index;\r\n                        break;\r\n                    case \"DST\":\r\n                        DST = index;\r\n                        break;\r\n                    case \"FT\":\r\n                        FT = index;\r\n                        break;\r\n                    case \"GQ\":\r\n                        GQ = index;\r\n                        break;\r\n                    case \"GT\":\r\n                        GT = index;\r\n                        break;\r\n                    case \"LQ\":\r\n                        LQ = index;\r\n                        break;\r\n                    case \"PR\":\r\n                        PR = index;\r\n                        break;\r\n                    case \"REPCN\":\r\n                        REPCN = index;\r\n                        break;\r\n                    case \"SR\":\r\n                        SR = index;\r\n                        break;\r\n                    case \"VF\":\r\n                        VF = index;\r\n                        break;\r\n                        case \"MCN\":\r\n                        MCN = index;\r\n                        break;\r\n                    case \"SQ\":\r\n                        SQ = index;\r\n                        break;\r\n                    case \"BC\":\r\n                        BC = index;\r\n                        break;\r\n                    //LEGACY\r\n                    case \"TAR\":\r\n                        TAR = index;\r\n                        break;\r\n                    case \"TIR\":\r\n                        TIR = index;\r\n                        break;\r\n                    case \"AU\":\r\n                        AU = index;\r\n                        break;\r\n                    case \"GU\":\r\n                        GU = index;\r\n                        break;\r\n                    case \"CU\":\r\n                        CU = index;\r\n                        break;\r\n                    case \"TU\":\r\n                        TU = index;\r\n                        break;\r\n                    case \"GQX\":\r\n                        GQX = index;\r\n                        break;\r\n                    case \"DPI\":\r\n                        DPI = index;\r\n                        break;\r\n                    case \"MCC\":\r\n                        MCC = index;\r\n                        break;\r\n                    case \"DQ\":\r\n                        DQ = index;\r\n                        break;\r\n                    default:\r\n                        if(CustomFields!=null && CustomFields.ContainsKey(formatKey))\r\n                            CustomFields[formatKey] = index;\r\n                        break;\r\n                }\r\n            }\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "Vcf/Sample/Legacy/AlleleDepths.cs",
    "content": "﻿using OptimizedCore;\n\nnamespace Vcf.Sample.Legacy\n{\n    internal static class AlleleDepths\n    {\n        /// <summary>\n        /// returns the allele depths given different sources of information\n        /// </summary>\n        public static int[] GetAlleleDepths(IntermediateSampleFields intermediateSampleFields)\n        {\n            int[] ad = null;\n\n            // use TAR & TIR\n            if (intermediateSampleFields.TAR != null && intermediateSampleFields.TIR != null) ad = GetAlleleDepthsUsingTarTir(intermediateSampleFields);\n\n            // use allele counts\n            if (ad == null && intermediateSampleFields.TotalAlleleCount != null) ad = GetAlleleDepthsUsingAlleleCounts(intermediateSampleFields);\n\n            // use allele depths\n            if (ad == null && intermediateSampleFields.FormatIndices.AD != null) ad = GetAlleleDepthsUsingAd(intermediateSampleFields);\n\n            return ad;\n        }\n\n        /// <summary>\n        /// returns the variant frequency using TIR and TAR\n        /// </summary>\n        private static int[] GetAlleleDepthsUsingTarTir(IntermediateSampleFields intermediateSampleFields)\n        {\n            if (intermediateSampleFields.TIR == null || intermediateSampleFields.TAR == null || intermediateSampleFields.AltAlleles.Length > 1) return null;\n            return new[] { intermediateSampleFields.TAR.Value, intermediateSampleFields.TIR.Value };\n        }\n\n        /// <summary>\n        /// returns the allele depths using allele counts\n        /// </summary>\n        private static int[] GetAlleleDepthsUsingAlleleCounts(IntermediateSampleFields intermediateSampleFields)\n        {\n            if (intermediateSampleFields.TotalAlleleCount == null) return null;\n\n            // sanity check: make sure all alternate alleles are SNVs\n            if (intermediateSampleFields.VcfRefAllele.Length != 1 || !intermediateSampleFields.AltAlleles.AreAllAltAllelesSingleBase()) return null;\n\n            var ad = new int[intermediateSampleFields.AltAlleles.Length + 1];\n\n            // handle reference allele\n            var ac = GetAlleleCountString(intermediateSampleFields.VcfRefAllele, intermediateSampleFields);\n            if (ac == null) return null;\n            ad[0] = ac.Value;\n\n            // handle alternate alleles\n            var index = 1;\n\n            foreach (string altAllele in intermediateSampleFields.AltAlleles)\n            {\n                ac = GetAlleleCountString(altAllele, intermediateSampleFields);\n                if (ac == null) return null;\n                ad[index++] = ac.Value;\n            }\n\n            return ad;\n        }\n\n        /// <summary>\n        /// returns the appropriate allele count string given the supplied base\n        /// </summary>\n        private static int? GetAlleleCountString(string s, IntermediateSampleFields intermediateSampleFields)\n        {\n            int? ac = null;\n\n            // ReSharper disable once SwitchStatementMissingSomeCases\n            switch (s)\n            {\n                case \"A\":\n                    ac = intermediateSampleFields.ACount;\n                    break;\n                case \"C\":\n                    ac = intermediateSampleFields.CCount;\n                    break;\n                case \"G\":\n                    ac = intermediateSampleFields.GCount;\n                    break;\n                case \"T\":\n                    ac = intermediateSampleFields.TCount;\n                    break;\n            }\n\n            return ac;\n        }\n\n        /// <summary>\n        /// returns the allele depths using allele depths\n        /// </summary>\n        private static int[] GetAlleleDepthsUsingAd(IntermediateSampleFields intermediateSampleFields)\n        {\n            if (intermediateSampleFields.FormatIndices.AD == null || intermediateSampleFields.SampleColumns.Length <=\n                intermediateSampleFields.FormatIndices.AD.Value) return null;\n\n            var ad = intermediateSampleFields.SampleColumns[intermediateSampleFields.FormatIndices.AD.Value].OptimizedSplit(',');\n            if (ad[0] == \".\") return null;\n\n            int nAllele = ad.Length;\n            var alleleDepths = new int[nAllele];\n\n            for (var i = 0; i < nAllele; i++)\n            {\n                (int number, bool foundError) = ad[i].OptimizedParseInt32();\n                if (foundError) return null;\n                alleleDepths[i] = number;\n            }\n\n            return alleleDepths;\n        }\n    }\n}\n"
  },
  {
    "path": "Vcf/Sample/Legacy/FailedFilter.cs",
    "content": "﻿namespace Vcf.Sample.Legacy\n{\n    internal static class FailedFilter\n    {\n        public static bool GetFailedFilter(IntermediateSampleFields intermediateSampleFields)\n        {\n            if (intermediateSampleFields.FormatIndices.FT == null) return false;\n            if (intermediateSampleFields.FormatIndices.FT.Value >=\n                intermediateSampleFields.SampleColumns.Length) return false;\n            string filterValue = intermediateSampleFields.SampleColumns[intermediateSampleFields.FormatIndices.FT.Value];\n            return filterValue != \"PASS\" && filterValue != \".\";\n        }\n    }\n}\n"
  },
  {
    "path": "Vcf/Sample/Legacy/Genotype.cs",
    "content": "﻿namespace Vcf.Sample.Legacy\n{\n    internal static class Genotype\n    {\n        public static string GetGenotype(IntermediateSampleFields intermediateSampleFields)\n        {\n            if (intermediateSampleFields.FormatIndices.GT == null) return null;\n            string genotype = intermediateSampleFields.SampleColumns[intermediateSampleFields.FormatIndices.GT.Value];\n            return genotype == \".\" ? null : genotype;\n        }\n    }\n}\n"
  },
  {
    "path": "Vcf/Sample/Legacy/GenotypeQuality.cs",
    "content": "﻿using OptimizedCore;\n\nnamespace Vcf.Sample.Legacy\n{\n    internal static class GenotypeQuality\n    {\n        public static int? GetGenotypeQuality(IntermediateSampleFields intermediateSampleFields)\n        {\n            bool hasGqx = intermediateSampleFields.FormatIndices.GQX != null;\n            bool hasGq  = intermediateSampleFields.FormatIndices.GQ != null;\n\n            if (!hasGqx && !hasGq)  return null;\n\n            int gqIndex = hasGqx ? intermediateSampleFields.FormatIndices.GQX.Value : intermediateSampleFields.FormatIndices.GQ.Value;\n            if (intermediateSampleFields.SampleColumns.Length <= gqIndex) return null;\n\n            string gq = intermediateSampleFields.SampleColumns[gqIndex];\n\n            (int number, bool foundError) = gq.OptimizedParseInt32();\n            return foundError ? null : (int?)number;\n        }\n    }\n}\n"
  },
  {
    "path": "Vcf/Sample/Legacy/IntermediateSampleFields.cs",
    "content": "﻿using OptimizedCore;\nusing VariantAnnotation.Interface.IO;\n\nnamespace Vcf.Sample.Legacy\n{\n    public sealed class IntermediateSampleFields\n    {\n        public FormatIndices FormatIndices { get; }\n        public string[] SampleColumns { get; }\n        public string[] AltAlleles { get; }\n\n        public int? TotalAlleleCount { get; }\n        public string VcfRefAllele { get; }\n        public int? MajorChromosomeCount { get; }\n        public int? CopyNumber { get; }\n\n        // ReSharper disable InconsistentNaming\n        public float? AQ { get; }\n        public float? LQ { get; }\n        public double? VF { get; }\n        public int? TIR { get; }\n        public int? TAR { get; }\n        public int? ACount { get; }\n        public int? CCount { get; }\n        public int? GCount { get; }\n        public int? TCount { get; }\n\n        public string[] DST { get; }\n        // ReSharper restore InconsistentNaming\n\n        // ReSharper disable once SuggestBaseTypeForParameter\n        public IntermediateSampleFields(string[] vcfColumns, FormatIndices formatIndices, string[] sampleCols)\n        {\n            VcfRefAllele  = vcfColumns[VcfCommon.RefIndex];\n            AltAlleles    = vcfColumns[VcfCommon.AltIndex].OptimizedSplit(',');\n            FormatIndices = formatIndices;\n            SampleColumns = sampleCols;\n\n            (TAR, TIR)           = GetLinkedIntegers(GetFirstValue(GetString(formatIndices.TAR, sampleCols)), GetFirstValue(GetString(formatIndices.TIR, sampleCols)));\n            MajorChromosomeCount = GetInteger(GetString(formatIndices.MCC, sampleCols));\n            DST                  = GetStrings(GetString(formatIndices.DST, sampleCols));\n            AQ                   = GetFloat(GetString(formatIndices.AQ, sampleCols));\n            LQ                   = GetFloat(GetString(formatIndices.LQ, sampleCols));\n            VF                   = GetDouble(GetString(formatIndices.VF, sampleCols));\n\n            CopyNumber = GetCopyNumber(GetString(formatIndices.CN, sampleCols), vcfColumns[VcfCommon.AltIndex].Contains(\"STR\"));\n\n            (ACount, CCount, GCount, TCount, TotalAlleleCount) = GetAlleleCounts(\n                GetString(formatIndices.AU, sampleCols), GetString(formatIndices.CU, sampleCols),\n                GetString(formatIndices.GU, sampleCols), GetString(formatIndices.TU, sampleCols));\n        }\n\n        // ReSharper disable once SuggestBaseTypeForParameter\n        private static string GetString(int? index, string[] cols)\n        {\n            if (index == null || index >= cols.Length) return null;\n            string s = cols[index.Value];\n            return s == \".\" ? null : s;\n        }\n\n        private static float? GetFloat(string s)\n        {\n            if (s == null) return null;\n            if (float.TryParse(s, out float ret)) return ret;\n            return null;\n        }\n\n        private static double? GetDouble(string s)\n        {\n            if (s == null) return null;\n            if (double.TryParse(s, out double ret)) return ret;\n            return null;\n        }\n\n        private static int? GetInteger(string s)\n        {\n            if (s == null) return null;\n            (int number, bool foundError) = s.OptimizedParseInt32();\n            return foundError ? null : (int?)number;\n        }\n\n        private static (int?, int?) GetLinkedIntegers(string s, string s2)\n        {\n            var num = GetInteger(s);\n            var num2 = GetInteger(s2);\n            if (num == null || num2 == null) return (null, null);\n            return (num, num2);\n        }\n\n        private static string[] GetStrings(string s) => s?.OptimizedSplit(',');\n\n        private static int? GetCopyNumber(string s, bool containsStr)\n        {\n            if (s == null || containsStr) return null;\n            return GetInteger(s);\n        }\n\n        private static (int?, int?, int?, int?, int?) GetAlleleCounts(string au, string cu, string gu, string tu)\n        {\n            if (au == null || cu == null || gu == null || tu == null) return (null, null, null, null, null);\n\n            var a = GetInteger(GetFirstValue(au));\n            var c = GetInteger(GetFirstValue(cu));\n            var g = GetInteger(GetFirstValue(gu));\n            var t = GetInteger(GetFirstValue(tu));\n            var total = a == null || c == null || g == null || t == null ? null : a + c + g + t;\n            return (a, c, g, t, total);\n        }\n\n        private static string GetFirstValue(string s) => GetStrings(s)?[0];\n    }\n}"
  },
  {
    "path": "Vcf/Sample/Legacy/LegacySampleFieldExtractor.cs",
    "content": "﻿using OptimizedCore;\nusing VariantAnnotation.Interface.IO;\nusing VariantAnnotation.Interface.Positions;\n\nnamespace Vcf.Sample.Legacy\n{\n    public sealed class LegacySampleFieldExtractor\n    {\n        private readonly string[] _vcfColumns;\n        private readonly FormatIndices _formatIndices;\n        private readonly int? _infoDepth;\n        \n        internal LegacySampleFieldExtractor(string[] vcfColumns, FormatIndices formatIndices)\n        {\n            _vcfColumns = vcfColumns;\n            _infoDepth = GetInfoDepth(vcfColumns[VcfCommon.InfoIndex]);\n            _formatIndices = formatIndices;\n        }\n        \n        private static int? GetInfoDepth(string infoColumn)\n        {\n            var splits = infoColumn.OptimizedSplit(';');\n            foreach (string split in splits)\n            {\n                if(!split.StartsWith(\"DP\")) continue;\n                var depth = int.Parse(split.Split('=')[1]);\n                return depth;\n            }\n            // no DP field present\n            return null;\n        }\n\n        internal ISample ExtractSample(string sampleColumn)\n        {\n            // sanity check: make sure we have a format column\n            if (_formatIndices == null || string.IsNullOrEmpty(sampleColumn)) return Sample.EmptySample;\n\n            var sampleColumns = sampleColumn.OptimizedSplit(':');\n\n            // handle missing sample columns\n            if (sampleColumns.Length == 1 && sampleColumns[0] == \".\") return Sample.EmptySample;\n\n            var sampleFields = new IntermediateSampleFields(_vcfColumns, _formatIndices, sampleColumns);\n\n            var alleleDepths  = AlleleDepths.GetAlleleDepths(sampleFields);\n            bool failedFilter = FailedFilter.GetFailedFilter(sampleFields);\n            string genotype   = Genotype.GetGenotype(sampleFields);\n\n            var genotypeQuality    = GenotypeQuality.GetGenotypeQuality(sampleFields);\n            var totalDepth         = TotalDepth.GetTotalDepth(_infoDepth, sampleFields);\n            double? denovoQuality       = sampleColumns.GetString(_formatIndices.DQ).GetDouble();\n            var variantFrequencies = LegacyVariantFrequency.GetVariantFrequencies(sampleFields);\n            var splitReadCounts    = ReadCounts.GetSplitReadCounts(sampleFields);\n            var pairEndReadCounts  = ReadCounts.GetPairEndReadCounts(sampleFields);\n\n            bool isLossOfHeterozygosity = sampleFields.MajorChromosomeCount != null &&\n                                          sampleFields.CopyNumber != null &&\n                                          sampleFields.MajorChromosomeCount.Value == sampleFields.CopyNumber.Value &&\n                                          sampleFields.CopyNumber.Value > 1;\n\n            var sample = new Sample(alleleDepths, sampleFields.AQ, sampleFields.CopyNumber, sampleFields.DST,\n                failedFilter, genotype, genotypeQuality, false, denovoQuality, sampleFields.LQ, pairEndReadCounts, null, splitReadCounts,\n                totalDepth, variantFrequencies, null, null, isLossOfHeterozygosity, null, null);\n\n            return sample;\n        }\n    }\n}"
  },
  {
    "path": "Vcf/Sample/Legacy/LegacyVariantFrequency.cs",
    "content": "﻿using OptimizedCore;\n\nnamespace Vcf.Sample.Legacy\n{\n    internal static class LegacyVariantFrequency\n    {\n        private static readonly double[] ZeroVf = { 0.0 };\n\n        public static double[] GetVariantFrequencies(IntermediateSampleFields sampleFields)\n        {\n            double[] vf = null;\n\n            // use VF\n            if (sampleFields.VF != null) vf = GetVariantFrequenciesUsingVf(sampleFields);\n\n            // use TAR & TIR\n            if (sampleFields.TAR != null && sampleFields.TIR != null) vf = GetVariantFrequenciesUsingTarTir(sampleFields);\n\n            // use allele counts\n            if (vf == null && sampleFields.TotalAlleleCount != null) vf = GetVariantFrequenciesUsingAlleleCounts(sampleFields);\n\n            // use allele depths\n            if (vf == null && sampleFields.FormatIndices.AD != null) vf = GetVariantFrequenciesUsingAlleleDepths(sampleFields);\n\n            \n\n            return vf;\n        }\n\n        private static double[] GetVariantFrequenciesUsingVf(IntermediateSampleFields sampleFields)\n        {\n            if (sampleFields.AltAlleles.Length > 1 || sampleFields.VF == null) return null;\n            return new[] { sampleFields.VF.Value };\n        }\n\n        private static double[] GetVariantFrequenciesUsingAlleleCounts(IntermediateSampleFields sampleFields)\n        {\n            bool isRefSingleBase      = sampleFields.VcfRefAllele.Length == 1;\n            bool areAllAltsSingleBase = sampleFields.AltAlleles.AreAllAltAllelesSingleBase();\n            bool isReference          = sampleFields.AltAlleles.Length == 1 && sampleFields.AltAlleles[0] == \".\";\n\n            // for this to work we need a single-base reference allele and all raw allele counts must be available\n            if (sampleFields.TotalAlleleCount == null || isReference || !isRefSingleBase || !areAllAltsSingleBase) return null;\n\n            int numAltAlleles = sampleFields.AltAlleles.Length;\n            var variantFreqs  = new double[numAltAlleles];\n\n            if (sampleFields.TotalAlleleCount == 0) return variantFreqs;\n\n            for (var i = 0; i < numAltAlleles; i++)\n            {\n                int alleleCount = GetAlleleCount(sampleFields, i);\n                variantFreqs[i] = alleleCount / (double)sampleFields.TotalAlleleCount;\n            }\n\n            return variantFreqs;\n        }\n\n        internal static bool AreAllAltAllelesSingleBase(this string[] altAlleles)\n        {\n            foreach (string altAllele in altAlleles)\n                if (altAllele.Length != 1)\n                    return false;\n            return true;\n        }\n\n        private static int GetAlleleCount(IntermediateSampleFields sampleFields, int alleleIndex)\n        {\n            string altAllele = sampleFields.AltAlleles[alleleIndex];\n            var alleleCount = 0;\n\n            // ReSharper disable once SwitchStatementMissingSomeCases\n            switch (altAllele)\n            {\n                case \"A\":\n                    alleleCount = sampleFields.ACount ?? 0;\n                    break;\n                case \"C\":\n                    alleleCount = sampleFields.CCount ?? 0;\n                    break;\n                case \"G\":\n                    alleleCount = sampleFields.GCount ?? 0;\n                    break;\n                case \"T\":\n                    alleleCount = sampleFields.TCount ?? 0;\n                    break;\n            }\n\n            return alleleCount;\n        }\n\n        private static double[] GetVariantFrequenciesUsingTarTir(IntermediateSampleFields sampleFields)\n        {\n            // TAR and TIR: never observed with multiple alternate alleles\n            if (sampleFields.TIR == null || sampleFields.TAR == null || sampleFields.AltAlleles.Length > 1) return null;\n            if (sampleFields.TIR + sampleFields.TAR == 0) return ZeroVf;\n\n            var tir = (double)sampleFields.TIR;\n            var tar = (double)sampleFields.TAR;\n            return new[] { tir / (tar + tir) };\n        }\n\n        private static double[] GetVariantFrequenciesUsingAlleleDepths(IntermediateSampleFields sampleFields)\n        {\n            if (sampleFields.FormatIndices.AD == null || sampleFields.SampleColumns.Length <= sampleFields.FormatIndices.AD.Value) return null;\n\n            int numAltAlleles = sampleFields.AltAlleles.Length;\n            var variantFreqs  = new double[numAltAlleles];\n\n            string adField = sampleFields.SampleColumns[sampleFields.FormatIndices.AD.Value];\n            (var alleleDepths, bool allValuesAreValid, int totalDepth) = GetAlleleDepths(adField);\n            if (!allValuesAreValid || numAltAlleles != alleleDepths.Length) return null;\n\n            // sanity check: make sure we handle NaNs properly\n            if (totalDepth == 0) return variantFreqs;\n\n            for (var alleleIndex = 0; alleleIndex < numAltAlleles; alleleIndex++)\n            {\n                variantFreqs[alleleIndex] = alleleDepths[alleleIndex] / (double)totalDepth;\n            }\n\n            return variantFreqs;\n        }\n\n        private static (int[] AlleleDepths, bool AllValuesAreValid, int totalDepth) GetAlleleDepths(string adField)\n        {\n            var adFields = adField.OptimizedSplit(',');\n            var alleleDepths = new int[adFields.Length - 1];\n            var totalDepth = 0;\n\n            for (var i = 0; i < adFields.Length; i++)\n            {\n                (int ad, bool foundError) = adFields[i].OptimizedParseInt32();\n                if(foundError) return (null, false, totalDepth);\n                if (i > 0) alleleDepths[i - 1] = ad;\n                totalDepth += ad;\n            }\n\n            return (alleleDepths, true, totalDepth);\n        }\n    }\n}\n"
  },
  {
    "path": "Vcf/Sample/Legacy/ReadCounts.cs",
    "content": "﻿using OptimizedCore;\n\nnamespace Vcf.Sample.Legacy\n{\n    internal static class ReadCounts\n\t{\n\t\tpublic static int[] GetPairEndReadCounts(IntermediateSampleFields intermediateSampleFields)\n\t\t{\n\t\t\tif (intermediateSampleFields.FormatIndices.PR == null) return null;\n\t\t\tvar readCounts = intermediateSampleFields.SampleColumns[intermediateSampleFields.FormatIndices.PR.Value].OptimizedSplit(',');\n\n\t\t    var pairEndReadCounts = new int[readCounts.Length];\n\n\t\t    for (var i = 0; i < pairEndReadCounts.Length; i++)\n\t\t    {\n\t\t        (int number, bool foundError) = readCounts[i].OptimizedParseInt32();\n\t\t        if (foundError) return null;\n\t\t        pairEndReadCounts[i] = number;\n\t\t    }\n\n\t\t    return pairEndReadCounts;\n        }\n\n\t\tpublic static int[] GetSplitReadCounts(IntermediateSampleFields intermediateSampleFields)\n\t\t{\n\t\t\tif (intermediateSampleFields.FormatIndices.SR == null) return null;\n\t\t\tvar splitReadCounts = intermediateSampleFields.SampleColumns[intermediateSampleFields.FormatIndices.SR.Value].OptimizedSplit(',');\n\n\t\t    var splitReads = new int[splitReadCounts.Length];\n\n\t\t    for (var i = 0; i < splitReads.Length; i++)\n\t\t    {\n\t\t        (int number, bool foundError) = splitReadCounts[i].OptimizedParseInt32();\n\t\t        if (foundError) return null;\n\t\t        splitReads[i] = number;\n\t\t    }\n\n\t\t\treturn splitReads;\n\t\t}\n\t}\n}"
  },
  {
    "path": "Vcf/Sample/Legacy/TotalDepth.cs",
    "content": "﻿using OptimizedCore;\n\nnamespace Vcf.Sample.Legacy\n{\n    internal static class TotalDepth\n    {\n        public static int? GetTotalDepth(int? infoDepth, IntermediateSampleFields intermediateSampleFields)\n        {\n            // use TAR & TIR\n            if (intermediateSampleFields.TAR != null && intermediateSampleFields.TIR != null) return GetTotalDepthUsingTarTir(intermediateSampleFields);\n\n            // use base counts\n            if (intermediateSampleFields.TotalAlleleCount != null) return GetTotalDepthUsingAlleleCounts(intermediateSampleFields);\n\n            // use DPI\n            if (intermediateSampleFields.FormatIndices.DPI != null) return GetTotalDepthUsingDpi(intermediateSampleFields);\n\n            // use DP\n            if (intermediateSampleFields.FormatIndices.DP != null) return GetTotalDepthUsingDp(intermediateSampleFields);\n\n            // use INFO DP (Pisces)\n            return infoDepth;\n        }\n\n        private static int? GetTotalDepthUsingTarTir(IntermediateSampleFields intermediateSampleFields) => intermediateSampleFields.TAR + intermediateSampleFields.TIR;\n\n        private static int? GetTotalDepthUsingAlleleCounts(IntermediateSampleFields intermediateSampleFields) => intermediateSampleFields.TotalAlleleCount;\n\n        private static int? GetTotalDepthUsingDpi(IntermediateSampleFields intermediateSampleFields)\n        {\n            if (intermediateSampleFields.FormatIndices.DPI == null || intermediateSampleFields.SampleColumns.Length <= intermediateSampleFields.FormatIndices.DPI.Value) return null;\n            string depth = intermediateSampleFields.SampleColumns[intermediateSampleFields.FormatIndices.DPI.Value];\n            (int number, bool foundError) = depth.OptimizedParseInt32();\n            return foundError ? null : (int?)number;\n        }\n\n        private static int? GetTotalDepthUsingDp(IntermediateSampleFields intermediateSampleFields)\n        {\n            if (intermediateSampleFields.FormatIndices.DP == null || intermediateSampleFields.SampleColumns.Length <= intermediateSampleFields.FormatIndices.DP.Value) return null;\n            string depth = intermediateSampleFields.SampleColumns[intermediateSampleFields.FormatIndices.DP.Value];\n            (int number, bool foundError) = depth.OptimizedParseInt32();\n            return foundError ? null : (int?)number;\n        }\n    }\n}\n"
  },
  {
    "path": "Vcf/Sample/Sample.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing VariantAnnotation.Interface.Positions;\r\nusing Vcf.Info;\r\n\r\nnamespace Vcf.Sample\r\n{\r\n    public sealed class Sample : ISample\r\n    {\r\n        public int[]    AlleleDepths                 { get; }\r\n        public float?   ArtifactAdjustedQualityScore { get; } // PEPE\r\n        public int?     CopyNumber                   { get; }\r\n        public string[] DiseaseAffectedStatuses      { get; } // SMN1\r\n        public bool     FailedFilter                 { get; }\r\n        public string   Genotype                     { get; }\r\n        public int?     GenotypeQuality              { get; }\r\n        public bool     IsDeNovo                     { get; }\r\n        public double?  DeNovoQuality                { get; } //for legacy callers only\r\n        public bool     IsEmpty                      { get; }\r\n        public float?   LikelihoodRatioQualityScore  { get; } // PEPE\r\n        public int[]    PairedEndReadCounts          { get; } // Manta\r\n        public int[]    RepeatUnitCounts             { get; } // ExpansionHunter\r\n        public int[]    SplitReadCounts              { get; } // Manta\r\n        public int?     TotalDepth                   { get; }\r\n        public double[] VariantFrequencies           { get; }\r\n        public int?     MinorHaplotypeCopyNumber     { get; }\r\n        public double?  SomaticQuality               { get; }\r\n        public bool?    IsLossOfHeterozygosity       { get; }\r\n        public string[] HeteroplasmyPercentile       { get; }\r\n        public int?     BinCount                     { get; }\r\n        \r\n        public ICustomFields CustomFields { get; }\r\n\r\n        public static readonly Sample EmptySample =\r\n            new Sample(null, null, null, null,\r\n                false, null, null, false, null, \r\n                null, null, null, null, \r\n                null, null, null, null, \r\n                null, null, null);\r\n\r\n        public Sample(int[] alleleDepths, float? artifactAdjustedQualityScore, int? copyNumber,\r\n            string[] diseaseAffectedStatuses, bool failedFilter, string genotype, int? genotypeQuality, bool isDeNovo, double? deNovoQuality,\r\n            float? likelihoodRatioQualityScore, int[] pairedEndReadCounts, int[] repeatUnitCounts,\r\n            int[] splitReadCounts, int? totalDepth, double[] variantFrequencies, int? minorHaplotypeCopyNumber, double? somaticQuality, \r\n            bool? isLossOfHeterozygosity, string[] heteroplasmyPercentile, int? binCount, ICustomFields customFields=null)\r\n        {\r\n            AlleleDepths                 = alleleDepths;\r\n            ArtifactAdjustedQualityScore = artifactAdjustedQualityScore;\r\n            CopyNumber                   = copyNumber;\r\n            DiseaseAffectedStatuses      = diseaseAffectedStatuses;\r\n            FailedFilter                 = failedFilter;\r\n            Genotype                     = genotype;\r\n            GenotypeQuality              = genotypeQuality;\r\n            IsDeNovo                     = isDeNovo;\r\n            DeNovoQuality                = deNovoQuality;\r\n            LikelihoodRatioQualityScore  = likelihoodRatioQualityScore;\r\n            PairedEndReadCounts          = pairedEndReadCounts;\r\n            RepeatUnitCounts             = repeatUnitCounts;\r\n            SplitReadCounts              = splitReadCounts;\r\n            TotalDepth                   = totalDepth;\r\n            VariantFrequencies           = variantFrequencies;\r\n            IsLossOfHeterozygosity       = isLossOfHeterozygosity;\r\n            HeteroplasmyPercentile       = heteroplasmyPercentile;\r\n            MinorHaplotypeCopyNumber     = minorHaplotypeCopyNumber;\r\n            SomaticQuality               = somaticQuality;\r\n            BinCount                     = binCount;\r\n            CustomFields                 = customFields;\r\n\r\n            IsEmpty = AlleleDepths                 == null &&\r\n                      ArtifactAdjustedQualityScore == null &&\r\n                      CopyNumber                   == null &&\r\n                      DiseaseAffectedStatuses      == null &&\r\n                      Genotype                     == null &&\r\n                      GenotypeQuality              == null &&\r\n                      LikelihoodRatioQualityScore  == null &&\r\n                      PairedEndReadCounts          == null &&\r\n                      RepeatUnitCounts             == null &&\r\n                      SplitReadCounts              == null &&\r\n                      TotalDepth                   == null &&\r\n                      VariantFrequencies           == null &&\r\n                      IsLossOfHeterozygosity       == null &&\r\n                      MinorHaplotypeCopyNumber     == null &&\r\n                      SomaticQuality               == null &&\r\n                      HeteroplasmyPercentile       == null &&\r\n                      DeNovoQuality                == null &&\r\n                      BinCount                     == null &&\r\n                      !FailedFilter                        &&\r\n                      !IsDeNovo &&\r\n                      (CustomFields == null || CustomFields.IsEmpty());\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "Vcf/Sample/SampleFieldExtractor.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing System.Linq;\r\nusing OptimizedCore;\r\nusing VariantAnnotation.Interface.IO;\r\nusing VariantAnnotation.Interface.Positions;\r\nusing VariantAnnotation.Interface.Providers;\r\nusing Variants;\r\nusing Vcf.Info;\r\nusing Vcf.Sample.Legacy;\r\n\r\nnamespace Vcf.Sample\r\n{\r\n    internal static class SampleFieldExtractor\r\n    {\r\n        internal static ISample[]  ToSamples(this string[] vcfColumns, FormatIndices formatIndices, ISimplePosition simplePosition, \r\n            IVariant[] variants, IMitoHeteroplasmyProvider mitoHeteroplasmyProvider, bool enableDq=false, HashSet<string> customFormatKeys=null)\r\n        {\r\n            if (vcfColumns.Length < VcfCommon.MinNumColumnsSampleGenotypes) return null;\r\n\r\n            int numSamples = vcfColumns.Length - VcfCommon.MinNumColumnsSampleGenotypes + 1;\r\n            var samples    = new ISample[numSamples];\r\n\r\n            formatIndices.Set(vcfColumns[VcfCommon.FormatIndex]);\r\n            \r\n            var legacySampleExtractor = IsLegacyVariantCaller(formatIndices) ? new LegacySampleFieldExtractor(vcfColumns, formatIndices) : null;\r\n\r\n            for (int index = VcfCommon.GenotypeIndex; index < vcfColumns.Length; index++)\r\n            {\r\n                samples[index - VcfCommon.GenotypeIndex] = ExtractSample(vcfColumns[index], formatIndices, simplePosition, variants, \r\n                    mitoHeteroplasmyProvider, legacySampleExtractor, enableDq, customFormatKeys);\r\n            }\r\n\r\n            return samples;\r\n        }\r\n\r\n        internal static ISample ExtractSample(string sampleColumn, FormatIndices formatIndices, ISimplePosition simplePosition, \r\n            IVariant[] variants, IMitoHeteroplasmyProvider mitoHeteroplasmyProvider,  LegacySampleFieldExtractor legacyExtractor = null, \r\n            bool enableDq=false, HashSet<string> customFormatKeys=null)\r\n        {\r\n            // sanity check: make sure we have a format column\r\n            if (string.IsNullOrEmpty(sampleColumn)) return Sample.EmptySample;\r\n\r\n            string[] sampleColumns = sampleColumn.OptimizedSplit(':', formatIndices.NumColumns);\r\n            if (sampleColumns.Length == 1 && sampleColumns[0] == \".\") return Sample.EmptySample;\r\n\r\n            sampleColumns.NormalizeNulls();\r\n\r\n            if (legacyExtractor != null)\r\n            {\r\n                return legacyExtractor.ExtractSample(sampleColumn);\r\n            }\r\n\r\n            int[]    alleleDepths                 = sampleColumns.GetString(formatIndices.AD).GetIntegers();\r\n            float?   artifactAdjustedQualityScore = sampleColumns.GetString(formatIndices.AQ).GetFloat();\r\n            int?     copyNumber                   = sampleColumns.GetString(formatIndices.CN).GetInteger();\r\n            string[] diseaseAffectedStatuses      = sampleColumns.GetString(formatIndices.DST).GetStrings();\r\n            bool     failedFilter                 = sampleColumns.GetString(formatIndices.FT).GetFailedFilter();\r\n            string   genotype                     = sampleColumns.GetString(formatIndices.GT);\r\n            int?     genotypeQuality              = sampleColumns.GetString(formatIndices.GQ).GetInteger();\r\n            bool     isDeNovo                     = sampleColumns.GetString(formatIndices.DN).IsDeNovo();\r\n            double?  deNovoQuality                = enableDq? sampleColumns.GetString(formatIndices.DQ).GetDouble():null;\r\n            float?   likelihoodRatioQualityScore  = sampleColumns.GetString(formatIndices.LQ).GetFloat();\r\n            int[]    pairedEndReadCounts          = sampleColumns.GetString(formatIndices.PR).GetIntegers();\r\n            int[]    repeatUnitCounts             = sampleColumns.GetString(formatIndices.REPCN).GetIntegers('/');\r\n            int[]    splitReadCounts              = sampleColumns.GetString(formatIndices.SR).GetIntegers();\r\n            int?     totalDepth                   = sampleColumns.GetString(formatIndices.DP).GetInteger();\r\n            double?  variantFrequency             = sampleColumns.GetString(formatIndices.VF).GetDouble();\r\n            int?     minorHaplotypeCopyNumber     = sampleColumns.GetString(formatIndices.MCN).GetInteger();\r\n            double?  somaticQuality               = sampleColumns.GetString(formatIndices.SQ).GetDouble();\r\n            int?     binCount                     = sampleColumns.GetString(formatIndices.BC).GetInteger();\r\n            \r\n            CustomFields customFields = new CustomFields();\r\n            if (formatIndices.CustomFields != null)\r\n            {\r\n                foreach (var (key, index) in formatIndices.CustomFields)\r\n                {\r\n                    if (index == null) continue;\r\n                    var value = sampleColumns.GetString(index);\r\n                    if (string.IsNullOrEmpty(value) || value==\".\") continue;\r\n                    customFields.Add(key, sampleColumns.GetString(index));\r\n                }\r\n            }\r\n            \r\n\r\n            double[] variantFrequencies = VariantFrequency.GetVariantFrequencies(variantFrequency, alleleDepths, simplePosition.AltAlleles.Length);\r\n            string[] mitoHeteroplasmyPercentiles = mitoHeteroplasmyProvider?.GetVrfPercentiles(variants, variantFrequencies)?.Select(x => x?.ToString(\"0.##\") ?? \"null\").ToArray();\r\n\r\n            var isLoh = GetLoh(copyNumber, minorHaplotypeCopyNumber, genotype);\r\n\r\n            var sample = new Sample(alleleDepths, artifactAdjustedQualityScore, copyNumber, diseaseAffectedStatuses,\r\n                failedFilter, genotype, genotypeQuality, isDeNovo, deNovoQuality, likelihoodRatioQualityScore, pairedEndReadCounts,\r\n                repeatUnitCounts, splitReadCounts, totalDepth, variantFrequencies, minorHaplotypeCopyNumber, somaticQuality, isLoh, \r\n                mitoHeteroplasmyPercentiles, binCount, customFields);\r\n\r\n            return sample;\r\n        }\r\n\r\n        private static bool? GetLoh(int? copyNumber, int? minorHaplotypeCopyNumber, string genotype)\r\n        {\r\n            if (!minorHaplotypeCopyNumber.HasValue || !copyNumber.HasValue) return null;\r\n\r\n            return (genotype == \"1/2\" || genotype == \"1|2\") && minorHaplotypeCopyNumber == 0 && copyNumber >= 2;\r\n        }\r\n\r\n        private static bool IsLegacyVariantCaller(FormatIndices formatIndices)\r\n        {\r\n            return formatIndices.TAR != null ||\r\n                   formatIndices.TIR != null ||\r\n                   formatIndices.AU != null ||\r\n                   formatIndices.GU != null ||\r\n                   formatIndices.CU != null ||\r\n                   formatIndices.TU != null ||\r\n                   formatIndices.GQX != null ||\r\n                   formatIndices.DPI != null ||\r\n                   formatIndices.MCC != null;\r\n\r\n        }\r\n\r\n        internal static void NormalizeNulls(this string[] cols)\r\n        {\r\n            for (var i = 0; i < cols.Length; i++)\r\n            {\r\n                string col = cols[i];\r\n                if (col == null) continue;\r\n                if (col.Length == 0 || col == \".\") cols[i] = null;\r\n            }\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "Vcf/Sample/SampleParsingExtensions.cs",
    "content": "﻿using System;\r\nusing OptimizedCore;\r\n\r\nnamespace Vcf.Sample\r\n{\r\n    public static class SampleParsingExtensions\r\n    {\r\n        internal static string GetString(this string[] cols, int? index) => index == null ? null : cols[index.Value];\r\n\r\n        internal static float? GetFloat(this string s)\r\n        {\r\n            if (s == null) return null;\r\n            if (float.TryParse(s, out float num)) return num;\r\n            return null;\r\n        }\r\n\r\n        internal static double? GetDouble(this string s)\r\n        {\r\n            if (s == null) return null;\r\n            if (double.TryParse(s, out double num)) return num;\r\n            return null;\r\n        }\r\n\r\n        internal static int? GetInteger(this string s)\r\n        {\r\n            if (s == null) return null;\r\n            (int number, bool foundError) = s.OptimizedParseInt32();\r\n            return foundError ? null : (int?)number;\r\n        }\r\n\r\n        internal static string[] GetStrings(this string s) => s?.OptimizedSplit(',');\r\n\r\n        public static int[] GetIntegers(this string s, char delimiter = ',')\r\n        {\r\n            if (s == null) return null;\r\n\r\n            string[] cols = s.OptimizedSplit(delimiter);\r\n            var values    = new int[cols.Length];\r\n\r\n            for (var i = 0; i < values.Length; i++)\r\n            {\r\n                (int number, bool foundError) = cols[i].OptimizedParseInt32();\r\n                if (foundError) return null;\r\n                values[i] = number;\r\n            }\r\n\r\n            return values;\r\n        }\r\n        \r\n    }\r\n}"
  },
  {
    "path": "Vcf/Sample/VariantFrequency.cs",
    "content": "﻿namespace Vcf.Sample\r\n{\r\n    internal static class VariantFrequency\r\n    {\r\n        public static double[] GetVariantFrequencies(double? vfField, int[] alleleDepths, int numAltAlleles)\r\n        {\r\n            // use VF\r\n            double[] vf = GetVariantFrequenciesUsingVf(vfField, numAltAlleles > 1) ?? GetVariantFrequenciesUsingAlleleDepths(alleleDepths, numAltAlleles);\r\n\r\n            // use allele depths\r\n\r\n            return vf;\r\n        }\r\n\r\n        private static double[] GetVariantFrequenciesUsingVf(double? vf, bool multipleAltAlleles)\r\n        {\r\n            if (multipleAltAlleles || vf == null) return null;\r\n            return new[] { vf.Value };\r\n        }\r\n\r\n        private static double[] GetVariantFrequenciesUsingAlleleDepths(int[] alleleDepths, int numAltAlleles)\r\n        {\r\n            if (alleleDepths == null) return null;\r\n            if (numAltAlleles + 1 != alleleDepths.Length) return null;\r\n\r\n            var variantFreqs = new double[numAltAlleles];\r\n\r\n            var totalDepth = 0;\r\n            foreach (int ad in alleleDepths) totalDepth += ad;\r\n\r\n            if (totalDepth == 0) return variantFreqs;\r\n\r\n            for (var alleleIndex = 0; alleleIndex < numAltAlleles; alleleIndex++)\r\n            {\r\n                variantFreqs[alleleIndex] = alleleDepths[alleleIndex + 1] / (double)totalDepth;\r\n            }\r\n\r\n            return variantFreqs;\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "Vcf/SimplePosition.cs",
    "content": "﻿using System.Collections.Generic;\r\nusing Genome;\r\nusing OptimizedCore;\r\nusing VariantAnnotation.Interface.IO;\r\nusing VariantAnnotation.Interface.Positions;\r\n\r\nnamespace Vcf\r\n{\r\n    public sealed class SimplePosition : ISimplePosition\r\n    {\r\n        public int Start { get; }\r\n        public int End { get; private set; }\r\n        public Chromosome Chromosome { get; }\r\n        public string RefAllele { get; }\r\n        public string[] AltAlleles { get; }\r\n        public string[] VcfFields { get; private set; }\r\n        public bool[] IsDecomposed { get; private set; }\r\n        public bool IsRecomposed { get; private set; }\r\n        public string[] Vids { get; private set; }\r\n        public List<string>[] LinkedVids { get; private set; }\r\n\r\n        internal SimplePosition(Chromosome chromosome, int start, string refAllele, string[] altAlleles)\r\n        {\r\n            Chromosome = chromosome;\r\n            Start      = start;\r\n            RefAllele  = refAllele;\r\n            AltAlleles = altAlleles;\r\n        }\r\n\r\n        public static SimplePosition GetSimplePosition(Chromosome chromosome, int position, string[] vcfFields, IVcfFilter vcfFilter)\r\n        {\r\n            if (vcfFilter.PassedTheEnd(chromosome, position)) return null;\r\n\r\n            string refAllele      = vcfFields[VcfCommon.RefIndex];\r\n            string altAlleleField = vcfFields[VcfCommon.AltIndex];\r\n            string[] altAlleles   = altAlleleField.OptimizedSplit(',');\r\n            int numAltAlleles     = altAlleles.Length;\r\n\r\n            return new SimplePosition(chromosome, position, refAllele, altAlleles)\r\n            {\r\n                End          = altAlleleField.OptimizedStartsWith('<') || altAlleleField == \"*\" ? -1 : position + refAllele.Length - 1,\r\n                VcfFields    = vcfFields,\r\n                IsDecomposed = new bool[numAltAlleles],\r\n                Vids         = new string[numAltAlleles],\r\n                LinkedVids   = new List<string>[numAltAlleles]\r\n            };\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "Vcf/StringExtensions.cs",
    "content": "﻿using System;\r\nusing OptimizedCore;\r\n\r\nnamespace Vcf\r\n{\r\n    public static class StringExtensions\r\n    {\r\n        public delegate bool TryParse<T>(string str, out T value);\r\n\r\n        public static int? GetNullableInt(this string str)\r\n        {\r\n            (int number, bool foundError) = str.OptimizedParseInt32();\r\n            return foundError ? null : (int?) number;\r\n        }\r\n\r\n        public static T? GetNullableValue<T>(this string str, TryParse<T> parseFunc) where T : struct\r\n        {\r\n            try\r\n            {\r\n                if (parseFunc(str, out T val)) return val;\r\n                return null;\r\n            }\r\n            catch (InvalidCastException)\r\n            {\r\n                return null;\r\n            }\r\n        }\r\n\r\n        public static int[] SplitToArray(this string s)\r\n        {\r\n            try\r\n            {\r\n                string[] cols = s.OptimizedSplit(',');\r\n                var values    = new int[cols.Length];\r\n\r\n                for (var i = 0; i < cols.Length; i++)\r\n                {\r\n                    (int number, bool foundError) = cols[i].OptimizedParseInt32();\r\n                    if (foundError) return null;\r\n                    values[i] = number;\r\n                }\r\n\r\n                return values;\r\n            }\r\n            catch (InvalidCastException)\r\n            {\r\n                return null;\r\n            }\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "Vcf/VariantCreator/CnvCreator.cs",
    "content": "﻿using Genome;\r\nusing VariantAnnotation.Pools;\r\nusing Variants;\r\n\r\nnamespace Vcf.VariantCreator\r\n{\r\n    public static class CnvCreator\r\n    {\r\n        public static IVariant Create(Chromosome chromosome, int start, int end, string refAllele, string altAllele, string vid)\r\n        {\r\n            var variantType = GetVariantType(altAllele);\r\n            return VariantPool.Get(chromosome, start + 1, end, refAllele, altAllele, variantType, vid, false, false, false,\r\n                null, AnnotationBehavior.StructuralVariants, true);\r\n        }\r\n\r\n        // For old style allelic CNV calls (e.g. <CN1>, <CN4>, etc.),\r\n        // do not try to determine the overall copy number gain or loss\r\n        // - for allele-specific you'll probably introduce inconsistency\r\n        // - for normal <CNV>, you'll probably get type wrong for MT, sex chromosomes, etc.\r\n        private static VariantType GetVariantType(string altAllele)\r\n        {\r\n            if (altAllele == \"<DEL>\") return VariantType.copy_number_loss;\r\n            return altAllele == \"<DUP>\" ? VariantType.copy_number_gain : VariantType.copy_number_variation;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "Vcf/VariantCreator/LegacyVariantId.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing System.Security.Cryptography;\r\nusing System.Text;\r\nusing System.Text.RegularExpressions;\r\nusing Genome;\r\nusing OptimizedCore;\r\nusing VariantAnnotation.Interface;\r\nusing Variants;\r\n\r\nnamespace Vcf.VariantCreator\r\n{\r\n    public sealed class LegacyVariantId : IVariantIdCreator\r\n    {\r\n        private readonly Dictionary<string, Chromosome> _refNameToChromosome;\r\n\r\n        public LegacyVariantId(Dictionary<string, Chromosome> refNameToChromosome) => _refNameToChromosome = refNameToChromosome;\r\n\r\n        public string Create(ISequence sequence, VariantCategory category, string svType, Chromosome chromosome, int start, int end,\r\n            string refAllele, string altAllele, string repeatUnit)\r\n        {\r\n            switch (category)\r\n            {\r\n                case VariantCategory.Reference:\r\n                    return $\"{chromosome.EnsemblName}:{start}:{end}:{refAllele}\";\r\n                case VariantCategory.SV:\r\n                    return GetSvVid(_refNameToChromosome, svType, chromosome, start, end, refAllele, altAllele);\r\n                case VariantCategory.CNV:\r\n                    return GetCnvVid(chromosome, start, end, altAllele);\r\n                case VariantCategory.RepeatExpansion:\r\n                    return GetRepeatExpansionVid(chromosome, start,end, altAllele, repeatUnit);\r\n                case VariantCategory.ROH:\r\n                    return $\"{chromosome.EnsemblName}:{start + 1}:{end}:ROH\";\r\n                case VariantCategory.SmallVariant:\r\n                    var variantType = SmallVariantCreator.GetVariantType(refAllele, altAllele);\r\n                    return GetSmallVariantVid(chromosome, start, end, altAllele, variantType);\r\n                default:\r\n                    throw new ArgumentOutOfRangeException(nameof(category), category, null);\r\n            }\r\n        }\r\n\r\n        public (int Start, string RefAllele, string AltAllele) Normalize(ISequence sequence, int start, string refAllele, string altAllele)\r\n        {\r\n            if (altAllele.Contains('[') || altAllele.Contains(']')) return (start, refAllele, altAllele);\r\n            return BiDirectionalTrimmer.Trim(start, refAllele, altAllele);\r\n        }\r\n\r\n        private static string GetSvVid(Dictionary<string, Chromosome> refNameToChromosome, string svType, Chromosome chromosome, int start, int end, string refAllele, string altAllele)\r\n        {\r\n            var variantType = StructuralVariantCreator.GetVariantType(altAllele, svType);\r\n\r\n            switch (variantType)\r\n            {\r\n                case VariantType.insertion:\r\n                    return $\"{chromosome.EnsemblName}:{start + 1}:{end}:INS\";\r\n\r\n                case VariantType.deletion:\r\n                    return $\"{chromosome.EnsemblName}:{start + 1}:{end}\";\r\n\r\n                case VariantType.duplication:\r\n                    return $\"{chromosome.EnsemblName}:{start + 1}:{end}:DUP\";\r\n\r\n                case VariantType.tandem_duplication:\r\n                    return $\"{chromosome.EnsemblName}:{start + 1}:{end}:TDUP\";\r\n\r\n                case VariantType.translocation_breakend:\r\n                    (Chromosome chromosome2, int position2, bool isSuffix1, bool isSuffix2) = ParseBreakendAltAllele(refNameToChromosome, refAllele, altAllele);\r\n                    char orientation1 = isSuffix1 ? '-' : '+';\r\n                    char orientation2 = isSuffix2 ? '+' : '-';\r\n                    return $\"{chromosome.EnsemblName}:{start}:{orientation1}:{chromosome2.EnsemblName}:{position2}:{orientation2}\";\r\n\r\n                case VariantType.inversion:\r\n                    return $\"{chromosome.EnsemblName}:{start + 1}:{end}:Inverse\";\r\n\r\n                case VariantType.mobile_element_insertion:\r\n                    return $\"{chromosome.EnsemblName}:{start + 1}:{end}:MEI\";\r\n\r\n                default:\r\n                    return $\"{chromosome.EnsemblName}:{start + 1}:{end}\";\r\n            }\r\n        }\r\n\r\n        private static (Chromosome Chromosome2, int Position2, bool IsSuffix1, bool IsSuffix2) ParseBreakendAltAllele(\r\n            Dictionary<string, Chromosome> refNameToChromosome, string refAllele, string altAllele)\r\n        {\r\n            string referenceName2;\r\n            int    position2;\r\n            bool   isSuffix2;\r\n\r\n            const string forwardBreakEnd = \"[\";\r\n\r\n            if (altAllele.StartsWith(refAllele))\r\n            {\r\n                var   forwardRegex = new Regex(@\"\\w+([\\[\\]])(.+):(\\d+)([\\[\\]])\", RegexOptions.Compiled);\r\n                Match match        = forwardRegex.Match(altAllele);\r\n\r\n                if (!match.Success)\r\n                    throw new InvalidDataException(\r\n                        \"Unable to successfully parse the complex rearrangements for the following allele: \" + altAllele);\r\n\r\n                isSuffix2      = match.Groups[4].Value == forwardBreakEnd;\r\n                position2      = Convert.ToInt32(match.Groups[3].Value);\r\n                referenceName2 = match.Groups[2].Value;\r\n\r\n                return (ReferenceNameUtilities.GetChromosome(refNameToChromosome, referenceName2), position2, false, isSuffix2);\r\n            }\r\n            else\r\n            {\r\n                var   reverseRegex = new Regex(@\"([\\[\\]])(.+):(\\d+)([\\[\\]])\\w+\", RegexOptions.Compiled);\r\n                Match match        = reverseRegex.Match(altAllele);\r\n\r\n                if (!match.Success)\r\n                    throw new InvalidDataException(\r\n                        \"Unable to successfully parse the complex rearrangements for the following allele: \" + altAllele);\r\n\r\n                isSuffix2      = match.Groups[1].Value == forwardBreakEnd;\r\n                position2      = Convert.ToInt32(match.Groups[3].Value);\r\n                referenceName2 = match.Groups[2].Value;\r\n\r\n                return (ReferenceNameUtilities.GetChromosome(refNameToChromosome, referenceName2), position2, true, isSuffix2);\r\n            }\r\n        }\r\n\r\n        private static string GetCnvVid(Chromosome chromosome, int start, int end, string altAllele)\r\n        {\r\n            start++;\r\n            \r\n            switch (altAllele)\r\n            {\r\n                case \"<CNV>\":\r\n                    return $\"{chromosome.EnsemblName}:{start}:{end}:CNV\";\r\n                case \"<DEL>\":\r\n                    return $\"{chromosome.EnsemblName}:{start}:{end}:CDEL\";\r\n                case \"<DUP>\":\r\n                    return $\"{chromosome.EnsemblName}:{start}:{end}:CDUP\";\r\n            }\r\n\r\n            // ReSharper disable once PossibleNullReferenceException\r\n            string trimmedAltAllele = altAllele.Substring(1, altAllele.Length - 2);\r\n            return $\"{chromosome.EnsemblName}:{start}:{end}:{trimmedAltAllele}\";\r\n        }\r\n\r\n        internal static string GetSmallVariantVid(Chromosome chromosome, int start, int end, string altAllele, VariantType variantType)\r\n        {\r\n            switch (variantType)\r\n            {\r\n                case VariantType.SNV:\r\n                    return $\"{chromosome.EnsemblName}:{start}:{altAllele}\";\r\n                case VariantType.insertion:\r\n                    return $\"{chromosome.EnsemblName}:{start}:{end}:{GetInsertedAltAllele(altAllele)}\";\r\n                case VariantType.deletion:\r\n                    return $\"{chromosome.EnsemblName}:{start}:{end}\";\r\n                case VariantType.MNV:\r\n                case VariantType.indel:\r\n                    return $\"{chromosome.EnsemblName}:{start}:{end}:{GetInsertedAltAllele(altAllele)}\";\r\n                case VariantType.non_informative_allele:\r\n                    return $\"{chromosome.EnsemblName}:{start}:*\";\r\n                default:\r\n                    throw new ArgumentOutOfRangeException(nameof(variantType), variantType, null);\r\n            }\r\n        }\r\n\r\n        private static string GetInsertedAltAllele(string altAllele)\r\n        {\r\n            if (altAllele.Length <= 32) return altAllele;\r\n\r\n            string insAltAllele;\r\n\r\n            using (var md5Hash = MD5.Create())\r\n            {\r\n                var md5Builder = StringBuilderPool.Get();\r\n                byte[] data = md5Hash.ComputeHash(Encoding.UTF8.GetBytes(altAllele));\r\n\r\n                md5Builder.Clear();\r\n                foreach (byte b in data) md5Builder.Append(b.ToString(\"x2\"));\r\n\r\n                insAltAllele = StringBuilderPool.GetStringAndReturn(md5Builder);\r\n            }\r\n\r\n            return insAltAllele;\r\n        }\r\n\r\n        private static string GetRepeatExpansionVid(Chromosome chromosome, int start, int end, string altAllele,\r\n            string repeatUnit)\r\n        {\r\n            string repeatCount = altAllele.Trim('<', '>').Substring(3);\r\n            return $\"{chromosome.EnsemblName}:{start + 1}:{end}:{repeatUnit}:{repeatCount}\";\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "Vcf/VariantCreator/ReferenceVariantCreator.cs",
    "content": "﻿using Genome;\r\nusing VariantAnnotation.Interface;\r\nusing Variants;\r\n\r\nnamespace Vcf.VariantCreator\r\n{\r\n    public static class ReferenceVariantCreator\r\n    {\r\n        public static IVariant[] Create(IVariantIdCreator vidCreator, ISequence sequence, Chromosome chromosome, int start, int end,\r\n            string refAllele, string altAllele, string globalMajorAllele)\r\n        {\r\n            bool isRefMinor = end == start && globalMajorAllele != null;\r\n            if (!isRefMinor) return null;\r\n\r\n            string vid = vidCreator.Create(sequence, VariantCategory.SmallVariant, null, chromosome, start, end, refAllele, altAllele, null);\r\n\r\n            return new[]\r\n            {\r\n                SmallVariantCreator.Create(chromosome, start, end, globalMajorAllele, refAllele, false, false, null, vid, true)\r\n            };\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "Vcf/VariantCreator/RepeatExpansionCreator.cs",
    "content": "﻿using ErrorHandling.Exceptions;\r\nusing Genome;\r\nusing OptimizedCore;\r\nusing Variants;\r\n\r\nnamespace Vcf.VariantCreator\r\n{\r\n    public static class RepeatExpansionCreator\r\n    {\r\n        public static IVariant Create(Chromosome chromosome, int start, int end, string refAllele, string altAllele, int? refRepeatCount, string vid)\r\n        {\r\n            (int repeatCount, bool foundError) = altAllele.Trim('<', '>').Substring(3).OptimizedParseInt32();\r\n            if (foundError) throw new UserErrorException($\"Invalid alt allele ({altAllele}) found at {chromosome.UcscName}:{start}\");\r\n\r\n            start++;\r\n\r\n            return new RepeatExpansion(chromosome, start, end, refAllele, altAllele, vid, repeatCount, refRepeatCount);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "Vcf/VariantCreator/RohVariantCreator.cs",
    "content": "﻿using Genome;\r\nusing VariantAnnotation.Pools;\r\nusing Variants;\r\n\r\nnamespace Vcf.VariantCreator\r\n{\r\n    public static class RohVariantCreator\r\n    {\r\n        public static IVariant Create(Chromosome chromosome, int start, int end, string refAllele, string altAllele,\r\n            string variantId) => VariantPool.Get(chromosome, start + 1, end, refAllele, altAllele,\r\n            VariantType.run_of_homozygosity, variantId, false, false, false, null,\r\n            AnnotationBehavior.RunsOfHomozygosity, true);\r\n\r\n    }\r\n}"
  },
  {
    "path": "Vcf/VariantCreator/SmallVariantCreator.cs",
    "content": "﻿using Genome;\r\nusing VariantAnnotation.Interface.IO;\r\nusing VariantAnnotation.Pools;\r\nusing Variants;\r\n\r\nnamespace Vcf.VariantCreator\r\n{\r\n    public static class SmallVariantCreator\r\n    {\r\n        public static IVariant Create(Chromosome chromosome, int start, int end, string refAllele, string altAllele,\r\n            bool isDecomposed, bool isRecomposed, string[] linkedVids, string vid, bool isRefMinor)\r\n        {\r\n            var variantType = GetVariantType(refAllele, altAllele);\r\n\r\n            var annotationBehavior = variantType == VariantType.non_informative_allele\r\n                ? AnnotationBehavior.NonInformativeAlleles\r\n                : AnnotationBehavior.SmallVariants;\r\n\r\n            return VariantPool.Get(chromosome, start, end, refAllele, altAllele, variantType, vid, isRefMinor, isDecomposed,\r\n                isRecomposed, linkedVids, annotationBehavior, false);\r\n        }\r\n\r\n        public static VariantType GetVariantType(string refAllele, string altAllele)\r\n        {\r\n            if (VcfCommon.IsNonInformativeAltAllele(altAllele)) return VariantType.non_informative_allele;\r\n\r\n            int referenceAlleleLen = refAllele.Length;\r\n            int alternateAlleleLen = altAllele.Length;\r\n\r\n            if (alternateAlleleLen != referenceAlleleLen)\r\n            {\r\n                if (alternateAlleleLen == 0 && referenceAlleleLen > 0) return VariantType.deletion;\r\n                if (alternateAlleleLen > 0 && referenceAlleleLen == 0) return VariantType.insertion;\r\n\r\n                return VariantType.indel;\r\n            }\r\n\r\n            var variantType = alternateAlleleLen == 1 ? VariantType.SNV : VariantType.MNV;\r\n\r\n            return variantType;\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "Vcf/VariantCreator/StructuralVariantCreator.cs",
    "content": "﻿using Genome;\r\nusing VariantAnnotation.Pools;\r\nusing Variants;\r\n\r\nnamespace Vcf.VariantCreator\r\n{\r\n    public static class StructuralVariantCreator\r\n    {\r\n        public static IVariant Create(Chromosome chromosome, int start, int end, string refAllele, string altAllele, string svType, string vid)\r\n        {\r\n            VariantType variantType = GetVariantType(altAllele, svType);\r\n            AnnotationBehavior behavior = variantType == VariantType.translocation_breakend\r\n                ? AnnotationBehavior.BreakendVariants\r\n                : AnnotationBehavior.StructuralVariants;\r\n\r\n            if (variantType != VariantType.translocation_breakend) start++;\r\n\r\n            return VariantPool.Get(chromosome, start, end, refAllele, altAllele, variantType, vid, false, false, false,\r\n                null, behavior, true);\r\n        }\r\n                \r\n        public static VariantType GetVariantType(string altAllele, string svType)\r\n        {\r\n            switch (svType)\r\n            {\r\n                case \"DEL\":\r\n                    return VariantType.deletion;\r\n                case \"INS\":\r\n                    return VariantType.insertion;\r\n                case \"DUP\":\r\n                    return altAllele == \"<DUP:TANDEM>\" ? VariantType.tandem_duplication : VariantType.duplication;\r\n                case \"INV\":\r\n                    return VariantType.inversion;\r\n                case \"TDUP\":\r\n                    return VariantType.tandem_duplication;\r\n                case \"BND\":\r\n                    return VariantType.translocation_breakend;\r\n                case \"CNV\":\r\n                    return VariantType.copy_number_variation;\r\n                case \"STR\":\r\n                    return VariantType.short_tandem_repeat_variation;\r\n                case \"ALU\":\r\n                    return VariantType.mobile_element_insertion;\r\n                case \"LINE1\":\r\n                    return VariantType.mobile_element_insertion;\r\n                case \"LOH\":\r\n                    return VariantType.copy_number_variation;\r\n                case \"SVA\":\r\n                    return VariantType.mobile_element_insertion;\r\n                default:\r\n                    return VariantType.unknown;\r\n            }\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "Vcf/VariantCreator/VariantFactory.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing Genome;\r\nusing OptimizedCore;\r\nusing VariantAnnotation.Interface;\r\nusing VariantAnnotation.Interface.IO;\r\nusing VariantAnnotation.Interface.Positions;\r\nusing Variants;\r\nusing Vcf.Sample;\r\n\r\nnamespace Vcf.VariantCreator\r\n{\r\n    public sealed class VariantFactory\r\n    {\r\n        private readonly IVariantIdCreator _vidCreator;\r\n        private readonly ISequence         _sequence;\r\n        public readonly  FormatIndices     FormatIndices;\r\n\r\n        public VariantFactory(ISequence sequence, IVariantIdCreator vidCreator, HashSet<string> customSampleInfoKeys=null)\r\n        {\r\n            _sequence     = sequence;\r\n            _vidCreator   = vidCreator;\r\n            FormatIndices = new FormatIndices(customSampleInfoKeys);\r\n        }\r\n\r\n        public IVariant[] CreateVariants(Chromosome chromosome, int start, int end, string refAllele,\r\n            string[] altAlleles, IInfoData infoData, bool[] isDecomposedByAllele, bool isRecomposed, List<string>[] linkedVids, string globalMajorAllele)\r\n        {\r\n            bool isReference = globalMajorAllele != null;\r\n\r\n            if (isReference)\r\n                return ReferenceVariantCreator.Create(_vidCreator, _sequence, chromosome, start, end, refAllele, altAlleles[0], globalMajorAllele);\r\n\r\n            var variantCategory = GetVariantCategory(altAlleles[0], infoData.SvType);\r\n\r\n            var variants = new List<IVariant>(altAlleles.Length);\r\n\r\n            for (var i = 0; i < altAlleles.Length; i++)\r\n            {\r\n#if (!NI_ALLELE)\r\n                if (VcfCommon.IsNonInformativeAltAllele(altAlleles[i])) continue;\r\n#endif\r\n                string altAllele = altAlleles[i];\r\n\r\n                bool isDecomposed = isDecomposedByAllele[i];\r\n                if (isDecomposed && isRecomposed) throw new InvalidDataException(\"A variant can't be both decomposed and recomposed\");\r\n\r\n                (int shiftedStart, string shiftedRef, string shiftedAlt) =\r\n                    _vidCreator.Normalize(_sequence, start, refAllele, altAllele);\r\n\r\n                if (variantCategory == VariantCategory.SmallVariant || variantCategory == VariantCategory.Reference)\r\n                    end = shiftedStart + shiftedRef.Length - 1;\r\n\r\n                variants.Add(GetVariant(chromosome, shiftedStart, end, shiftedRef, shiftedAlt, infoData, variantCategory,\r\n                    isDecomposed, isRecomposed, linkedVids?[i]?.ToArray()));\r\n            }\r\n\r\n            return variants.Count == 0 ? null : variants.ToArray();\r\n        }\r\n\r\n        internal static VariantCategory GetVariantCategory(string firstAltAllele, string svType)\r\n        {\r\n            bool isSymbolicAllele = IsSymbolicAllele(firstAltAllele);\r\n\r\n            if (IsBreakend(firstAltAllele)) return VariantCategory.SV;\r\n            if (!isSymbolicAllele) return VariantCategory.SmallVariant;\r\n            if (firstAltAllele == \"<ROH>\") return VariantCategory.ROH;\r\n            if (firstAltAllele.StartsWith(\"<STR\")) return VariantCategory.RepeatExpansion;\r\n            return svType == \"CNV\" || svType == \"LOH\" ? VariantCategory.CNV : VariantCategory.SV;\r\n        }\r\n\r\n        private static bool IsBreakend(string altAllele) => altAllele.Contains(\"[\") || altAllele.Contains(\"]\");\r\n\r\n        private static bool IsSymbolicAllele(string altAllele) =>\r\n            altAllele.OptimizedStartsWith('<') && altAllele.OptimizedEndsWith('>') && !VcfCommon.IsNonInformativeAltAllele(altAllele);\r\n\r\n        private IVariant GetVariant(Chromosome chromosome, int start, int end, string refAllele, string altAllele,\r\n            IInfoData infoData, VariantCategory category, bool isDecomposed, bool isRecomposed, string[] linkedVids)\r\n        {\r\n            string vid = _vidCreator.Create(_sequence, category, infoData.SvType, chromosome, start, end, refAllele, altAllele, infoData.RepeatUnit);\r\n            int svEnd = infoData.End ?? start;\r\n\r\n            // ReSharper disable once SwitchStatementMissingSomeCases\r\n            switch (category)\r\n            {\r\n                case VariantCategory.SmallVariant:\r\n                    return SmallVariantCreator.Create(chromosome, start, end, refAllele, altAllele, isDecomposed, isRecomposed, linkedVids, vid,\r\n                        false);\r\n\r\n                case VariantCategory.ROH:\r\n                    return RohVariantCreator.Create(chromosome, start, svEnd, refAllele, altAllele, vid);\r\n\r\n                case VariantCategory.SV:\r\n                    return StructuralVariantCreator.Create(chromosome, start, svEnd, refAllele, altAllele, infoData.SvType, vid);\r\n\r\n                case VariantCategory.CNV:\r\n                    return CnvCreator.Create(chromosome, start, svEnd, refAllele, altAllele, vid);\r\n\r\n                case VariantCategory.RepeatExpansion:\r\n                    return RepeatExpansionCreator.Create(chromosome, start, svEnd, refAllele, altAllele, infoData.RefRepeatCount, vid);\r\n\r\n                default:\r\n                    throw new NotImplementedException($\"Unrecognized variant category: {category}\");\r\n            }\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "Vcf/VariantCreator/VariantId.cs",
    "content": "﻿using Genome;\r\nusing VariantAnnotation.Interface;\r\nusing Variants;\r\n\r\nnamespace Vcf.VariantCreator\r\n{\r\n    public sealed class VariantId : IVariantIdCreator\r\n    {\r\n        public string Create(ISequence sequence, VariantCategory category, string svType, Chromosome chromosome, int start, int end,\r\n            string refAllele, string altAllele, string repeatUnit)\r\n        {\r\n            if (altAllele == \".\") altAllele = refAllele;\r\n\r\n            // fix N reference\r\n            if (refAllele == \"N\")\r\n            {\r\n                refAllele = sequence.Substring(start - 1, 1);\r\n            }\r\n\r\n            // add padding bases\r\n            if (string.IsNullOrEmpty(refAllele) || string.IsNullOrEmpty(altAllele))\r\n            {\r\n                start--;\r\n                string paddingBase = sequence.Substring(start - 1, 1);\r\n                refAllele = paddingBase + refAllele;\r\n                altAllele = paddingBase + altAllele;\r\n            }\r\n\r\n            if (category == VariantCategory.SmallVariant ||\r\n                category == VariantCategory.Reference ||\r\n                svType   == \"BND\")\r\n            {\r\n                return GetVid(chromosome.EnsemblName, start, refAllele, altAllele);\r\n            }\r\n\r\n            if (category == VariantCategory.RepeatExpansion) svType = \"STR\";\r\n            return GetLongVid(chromosome.EnsemblName, start, end, refAllele, altAllele, svType);\r\n        }\r\n\r\n        public (int Start, string RefAllele, string AltAllele) Normalize(ISequence sequence, int start,\r\n            string refAllele, string altAllele) => VariantUtils.TrimAndLeftAlign(start, refAllele, altAllele, sequence);\r\n\r\n        private static string GetVid(string chromosomeName, int paddedPosition, string paddedRefAllele,\r\n            string paddedAltAllele) =>\r\n            chromosomeName + '-' + paddedPosition + '-' + paddedRefAllele + '-' + paddedAltAllele;\r\n\r\n        private static string GetLongVid(string chromosomeName, int paddedPosition, int endPosition,\r\n            string paddedRefAllele, string paddedAltAllele, string svType) =>\r\n            chromosomeName + '-' + paddedPosition + '-' + endPosition + '-' + paddedRefAllele + '-' + paddedAltAllele +\r\n            '-' + svType;\r\n    }\r\n}\r\n"
  },
  {
    "path": "Vcf/Vcf.csproj",
    "content": "﻿<Project Sdk=\"Microsoft.NET.Sdk\">\r\n  <PropertyGroup>\r\n    <TargetFramework>net6.0</TargetFramework>\r\n    <OutputPath>..\\bin\\$(Configuration)</OutputPath>\r\n    \r\n  </PropertyGroup>\r\n  <Import Project=\"..\\CommonAssemblyInfo.props\" />\r\n  <ItemGroup>\r\n    <ProjectReference Include=\"..\\OptimizedCore\\OptimizedCore.csproj\" />\r\n    <ProjectReference Include=\"..\\VariantAnnotation.Interface\\VariantAnnotation.Interface.csproj\" />\r\n    <ProjectReference Include=\"..\\Variants\\Variants.csproj\" />\r\n    <ProjectReference Include=\"..\\MitoHeteroplasmy\\MitoHeteroplasmy.csproj\" />\r\n  </ItemGroup>\r\n</Project>\r\n"
  },
  {
    "path": "Vcf/VcfFilter.cs",
    "content": "﻿using System.IO;\r\nusing Genome;\r\nusing OptimizedCore;\r\nusing VariantAnnotation.Interface.IO;\r\n\r\nnamespace Vcf\r\n{\r\n    public sealed class VcfFilter : IVcfFilter\r\n    {\r\n        private readonly GenomicRange _genomicRange;\r\n        private readonly GenomicRangeChecker _genomicRangeChecker;\r\n        internal string BufferedLine;\r\n\r\n        public VcfFilter(GenomicRange genomicRange)\r\n        {\r\n            _genomicRange = genomicRange;\r\n            _genomicRangeChecker = new GenomicRangeChecker(genomicRange);\r\n        }\r\n\r\n        public void FastForward(StreamReader reader)\r\n        {\r\n            string line;\r\n             while ((line = reader.ReadLine()) != null)\r\n             {\r\n                if (line.StartsWith('#')) continue;\r\n\r\n                string[] fields = line.OptimizedSplit('\\t');\r\n                string chrName = fields[VcfCommon.ChromIndex];\r\n                if (chrName != _genomicRange.Start.Chromosome.UcscName && chrName != _genomicRange.Start.Chromosome.EnsemblName) continue;\r\n\r\n                (int position, bool foundError) = fields[VcfCommon.PosIndex].OptimizedParseInt32();\r\n                if (foundError) throw new InvalidDataException($\"Unable to convert the VCF position to an integer: {fields[VcfCommon.PosIndex]}\");\r\n\r\n                if (position < _genomicRange.Start.Position) continue;\r\n\r\n                BufferedLine = line;\r\n                return;\r\n             }\r\n        }\r\n\r\n        public string GetNextLine(StreamReader reader)\r\n        {\r\n            if (BufferedLine == null)\r\n            {\r\n                return reader.ReadLine();\r\n            }\r\n            string bufferedLine = BufferedLine;\r\n            BufferedLine = null;\r\n\r\n            return bufferedLine;\r\n        }\r\n\r\n        public bool PassedTheEnd(Chromosome chromosome, int position) => _genomicRangeChecker.OutOfRange(chromosome, position);\r\n    }\r\n}"
  },
  {
    "path": "Vcf/VcfReader.cs",
    "content": "﻿using System;\r\nusing System.Collections.Generic;\r\nusing System.IO;\r\nusing ErrorHandling.Exceptions;\r\nusing Genome;\r\nusing OptimizedCore;\r\nusing VariantAnnotation.Interface;\r\nusing VariantAnnotation.Interface.IO;\r\nusing VariantAnnotation.Interface.Positions;\r\nusing VariantAnnotation.Interface.Providers;\r\nusing Vcf.VariantCreator;\r\n\r\nnamespace Vcf\r\n{\r\n    public sealed class VcfReader : IVcfReader\r\n    {\r\n        private readonly StreamReader _headerReader;\r\n        private readonly StreamReader _reader;\r\n        private readonly VariantFactory _variantFactory;\r\n        private readonly IRefMinorProvider _refMinorProvider;\r\n        private readonly ISequenceProvider _sequenceProvider;\r\n        private readonly Dictionary<string, Chromosome> _refNameToChromosome;\r\n        private readonly IVcfFilter _vcfFilter;\r\n        private readonly IMitoHeteroplasmyProvider _mitoHeteroplasmyProvider;\r\n        public bool IsRcrsMitochondrion { get; private set; }\r\n        public string VcfLine { get; private set; }\r\n        public GenomeAssembly InferredGenomeAssembly { get; private set; } = GenomeAssembly.Unknown;\r\n\r\n        private string[] _sampleNames;\r\n        private List<string> _headerLines;\r\n        private readonly Queue<ISimplePosition> _queuedPositions = new Queue<ISimplePosition>();\r\n\r\n        private readonly HashSet<string> _observedReferenceNames = new HashSet<string>();\r\n        private string _currentReferenceName;\r\n\r\n        public          string[]        GetSampleNames() => _sampleNames;\r\n        public readonly bool            EnableDq;\r\n        public readonly HashSet<string> CustomInfoKeys;\r\n\r\n        private VcfReader(\r\n            StreamReader headerReader,\r\n            StreamReader vcfLineReader,\r\n            ISequenceProvider sequenceProvider,\r\n            IRefMinorProvider refMinorProvider,\r\n            IVcfFilter vcfFilter,\r\n            IVariantIdCreator vidCreator,\r\n            IMitoHeteroplasmyProvider mitoHeteroplasmyProvider,\r\n            bool enableDq = false,\r\n            HashSet<string> customInfoKeys = null,\r\n            HashSet<string> customSampleInfoKeys=null\r\n        )\r\n        {\r\n            _headerReader             = headerReader;\r\n            _reader                   = vcfLineReader;\r\n            _variantFactory           = new VariantFactory(sequenceProvider.Sequence, vidCreator, customSampleInfoKeys);\r\n            _sequenceProvider         = sequenceProvider;\r\n            _refMinorProvider         = refMinorProvider;\r\n            _vcfFilter                = vcfFilter;\r\n            _refNameToChromosome      = sequenceProvider.RefNameToChromosome;\r\n            _mitoHeteroplasmyProvider = mitoHeteroplasmyProvider;\r\n            EnableDq                  = enableDq;\r\n            CustomInfoKeys            = customInfoKeys;\r\n        }\r\n\r\n        public static VcfReader Create(StreamReader headerReader, StreamReader vcfLineReader, ISequenceProvider sequenceProvider,\r\n            IRefMinorProvider refMinorProvider, IVcfFilter vcfFilter, IVariantIdCreator vidCreator,\r\n            IMitoHeteroplasmyProvider mitoHeteroplasmyProvider, bool enableDq = false, \r\n            HashSet<string> customInfoKeys=null, HashSet<string> customSampleInfoKeys=null)\r\n        {\r\n            var vcfReader = new VcfReader(headerReader, vcfLineReader, sequenceProvider, refMinorProvider, vcfFilter, \r\n                vidCreator, mitoHeteroplasmyProvider, enableDq, customInfoKeys, customSampleInfoKeys);\r\n            vcfReader.ParseHeader();\r\n            return vcfReader;\r\n        }\r\n        \r\n        private void ParseHeader()\r\n        {\r\n            _headerLines = new List<string>();\r\n\r\n            string line;\r\n            while ((line = _headerReader.ReadLine()) != null)\r\n            {\r\n                CheckContigId(line);\r\n                _headerLines.Add(line);\r\n                if (line.StartsWith(VcfCommon.ChromosomeHeader)) break;\r\n            }\r\n\r\n            ValidateVcfHeader();\r\n            _sampleNames = ExtractSampleNames(line);\r\n            _vcfFilter.FastForward(_reader);\r\n        }\r\n\r\n        private void CheckContigId(string line)\r\n        {\r\n            string[] chromAndLengthInfo = GetChromAndLengthInfo(line);\r\n            if (chromAndLengthInfo.Length == 0) return;\r\n\r\n            if (!_refNameToChromosome.TryGetValue(chromAndLengthInfo[0], out Chromosome chromosome)) return;\r\n            if (!int.TryParse(chromAndLengthInfo[1], out int length)) return;\r\n\r\n            var assemblyThisChrom = ContigInfo.GetGenomeAssembly(chromosome, length);\r\n\r\n            if (assemblyThisChrom == GenomeAssembly.rCRS)\r\n            {\r\n                IsRcrsMitochondrion = true;\r\n                return;\r\n            }\r\n\r\n            if (!GenomeAssemblyHelper.AutosomeAndAllosomeAssemblies.Contains(assemblyThisChrom)) return;\r\n\r\n            if (InferredGenomeAssembly == GenomeAssembly.Unknown) InferredGenomeAssembly = assemblyThisChrom;\r\n\r\n            if (InferredGenomeAssembly != assemblyThisChrom)\r\n                throw new UserErrorException($\"Inconsistent genome assemblies inferred:\\ncurrent line \\\"{line}\\\" indicates {assemblyThisChrom}, whereas the lines above it indicate {InferredGenomeAssembly}.\");\r\n        }\r\n\r\n        internal static string[] GetChromAndLengthInfo(string line)\r\n        {\r\n            if (!line.StartsWith(\"##contig=<ID=\")) return Array.Empty<string>();\r\n            if (!line.Contains(\",length=\")) return Array.Empty<string>();\r\n            string[] chromAndLength = line.TrimEnd('>').Substring(13).Split(\",length=\");\r\n            return chromAndLength.Length == 2 ? chromAndLength : Array.Empty<string>();\r\n        }\r\n\r\n        private void ValidateVcfHeader()\r\n        {\r\n            if (_headerLines.Count == 0 || !_headerLines[0].StartsWith(\"##fileformat=VCFv\"))\r\n                throw new UserErrorException(\"Please provide a valid VCF file with proper fileformat field.\");\r\n\r\n            if (!_headerLines[_headerLines.Count - 1].StartsWith(VcfCommon.ChromosomeHeader))\r\n                throw new UserErrorException($\"Could not find the vcf header line starting with {VcfCommon.ChromosomeHeader}. Is this a valid vcf file?\");\r\n        }\r\n\r\n        private static string[] ExtractSampleNames(string line)\r\n        {\r\n            string[] cols = line.OptimizedSplit('\\t');\r\n            bool hasSampleGenotypes = cols.Length >= VcfCommon.MinNumColumnsSampleGenotypes;\r\n            if (!hasSampleGenotypes) return null;\r\n\r\n            int numSamples = cols.Length - VcfCommon.GenotypeIndex;\r\n            var samples = new string[numSamples];\r\n            for (var i = 0; i < numSamples; i++) samples[i] = cols[VcfCommon.GenotypeIndex + i];\r\n            return samples;\r\n        }\r\n        \r\n        private ISimplePosition GetNextSimplePosition()\r\n        {\r\n            while (_queuedPositions.Count == 0)\r\n            {\r\n                VcfLine = _vcfFilter.GetNextLine(_reader);\r\n                \r\n                if (VcfLine != null) \r\n                {\r\n                    string[] vcfFields = VcfLine.OptimizedSplit('\\t');\r\n                    var chromosome = ReferenceNameUtilities.GetChromosome(_refNameToChromosome, vcfFields[VcfCommon.ChromIndex]);\r\n                    CheckVcfOrder(vcfFields[VcfCommon.ChromIndex]);\r\n\r\n                    (int start, bool foundError) = vcfFields[VcfCommon.PosIndex].OptimizedParseInt32();\r\n                    if (foundError) throw new InvalidDataException($\"Unable to convert the VCF position to an integer: {vcfFields[VcfCommon.PosIndex]}\");\r\n\r\n                    if (InconsistentSampleFields(vcfFields))\r\n                    {\r\n                        int sampleCount = _sampleNames?.Length ?? 0;\r\n                        throw new UserErrorException($\"Inconsistent number of sample fields in line:\\n{VcfLine}\\nExpected number of sample fields: {sampleCount}\");\r\n                    }\r\n                    _queuedPositions.Enqueue(SimplePosition.GetSimplePosition(chromosome, start, vcfFields, _vcfFilter));\r\n                }\r\n\r\n                if (VcfLine == null) break;\r\n            }\r\n\r\n            return _queuedPositions.Count == 0 ? null : _queuedPositions.Dequeue();\r\n        }\r\n\r\n        private bool InconsistentSampleFields(string[] vcfFields)\r\n        {\r\n            int sampleCount = _sampleNames?.Length ?? 0;\r\n            if (sampleCount != 0)\r\n            {\r\n                return vcfFields.Length != VcfCommon.FormatIndex + 1 + sampleCount;\r\n            }\r\n\r\n            return vcfFields.Length != VcfCommon.InfoIndex + 1;\r\n        }\r\n\r\n        private void CheckVcfOrder(string referenceName)\r\n        {\r\n            if (referenceName == _currentReferenceName) return;\r\n\r\n            if (_observedReferenceNames.Contains(referenceName))\r\n            {\r\n                throw new FileNotSortedException(\"The current input vcf file is not sorted. Please sort the vcf file before running variant annotation using a tool like vcf-sort in vcftools.\");\r\n            }\r\n\r\n            _observedReferenceNames.Add(referenceName);\r\n            _currentReferenceName = referenceName;\r\n        }\r\n\r\n        public IPosition GetNextPosition() => Position.ToPosition(GetNextSimplePosition(), \r\n            _refMinorProvider, _sequenceProvider, _mitoHeteroplasmyProvider, _variantFactory, \r\n            EnableDq, CustomInfoKeys);\r\n\r\n        public void Dispose() => _reader?.Dispose();\r\n    }\r\n}\r\n"
  }
]