Repository: Cysharp/Utf8StreamReader
Branch: main
Commit: 71d4683aef6e
Files: 36
Total size: 130.3 KB
Directory structure:
gitextract_yfdau7jl/
├── .editorconfig
├── .github/
│ ├── dependabot.yaml
│ └── workflows/
│ ├── build-debug.yaml
│ ├── build-release.yaml
│ └── stale.yaml
├── .gitignore
├── Directory.Build.props
├── LICENSE
├── README.md
├── Utf8StreamReader.sln
├── opensource.snk
├── sandbox/
│ ├── Benchmark/
│ │ ├── Benchmark.csproj
│ │ ├── BytesReadToEnd.cs
│ │ ├── FromFile.cs
│ │ ├── FromMemory.cs
│ │ ├── Program.cs
│ │ └── ReadToEndString.cs
│ └── ConsoleApp1/
│ ├── ConsoleApp1.csproj
│ ├── Program.cs
│ ├── ReadMeSample.cs
│ ├── RespReader.cs
│ └── file1.txt
├── src/
│ └── Utf8StreamReader/
│ ├── SegmentedArrayBufferWriter.cs
│ ├── Utf8StreamReader.cs
│ ├── Utf8StreamReader.csproj
│ └── Utf8TextReader.cs
└── tests/
└── Utf8StreamReader.Tests/
├── FakeMemoryStream.cs
├── FileReadTest.cs
├── ReadBlockTest.cs
├── ReadTest.cs
├── ReadToEndTest.cs
├── SegmentedArrayBufferWriterTest.cs
├── Tests.cs
├── TextReaderTest.cs
├── Utf8StreamReader.Tests.csproj
└── file1.txt
================================================
FILE CONTENTS
================================================
================================================
FILE: .editorconfig
================================================
# top-most EditorConfig file
root = true
[*]
charset = utf-8
end_of_line = lf
indent_style = space
indent_size = 2
insert_final_newline = true
trim_trailing_whitespace = true
# Visual Studio Spell checker configs (https://learn.microsoft.com/en-us/visualstudio/ide/text-spell-checker?view=vs-2022#how-to-customize-the-spell-checker)
spelling_exclusion_path = ./exclusion.dic
[*.cs]
indent_size = 4
charset = utf-8-bom
end_of_line = unset
# Solution files
[*.{sln,slnx}]
end_of_line = unset
# MSBuild project files
[*.{csproj,props,targets}]
end_of_line = unset
# Xml config files
[*.{ruleset,config,nuspec,resx,runsettings,DotSettings}]
end_of_line = unset
[*{_AssemblyInfo.cs,.notsupported.cs}]
generated_code = true
# C# code style settings
[*.{cs}]
dotnet_style_coalesce_expression = true:suggestion
dotnet_style_null_propagation = true:suggestion
dotnet_style_prefer_is_null_check_over_reference_equality_method = true:suggestion
dotnet_style_prefer_auto_properties = true:suggestion
dotnet_style_object_initializer = true:suggestion
dotnet_style_prefer_collection_expression = true:suggestion
dotnet_style_collection_initializer = true:suggestion
dotnet_style_prefer_simplified_boolean_expressions = true:suggestion
dotnet_style_prefer_conditional_expression_over_assignment = true:silent
dotnet_style_prefer_conditional_expression_over_return = true:silent
dotnet_style_explicit_tuple_names = true:suggestion
dotnet_style_prefer_inferred_tuple_names = true:suggestion
dotnet_style_prefer_inferred_anonymous_type_member_names = true:suggestion
dotnet_style_prefer_compound_assignment = true:suggestion
dotnet_style_prefer_simplified_interpolation = true:suggestion
dotnet_style_namespace_match_folder = true:suggestion
dotnet_style_readonly_field = true:suggestion
dotnet_style_predefined_type_for_member_access = true:suggestion
dotnet_style_predefined_type_for_locals_parameters_members = true:suggestion
dotnet_style_require_accessibility_modifiers = for_non_interface_members:silent
dotnet_style_allow_statement_immediately_after_block_experimental = true:silent
dotnet_style_allow_multiple_blank_lines_experimental = true:silent
dotnet_code_quality_unused_parameters = non_public:suggestion
dotnet_style_parentheses_in_relational_binary_operators = always_for_clarity:silent
dotnet_style_parentheses_in_other_binary_operators = always_for_clarity:silent
dotnet_style_parentheses_in_arithmetic_binary_operators = always_for_clarity:silent
dotnet_style_parentheses_in_other_operators = never_if_unnecessary:silent
dotnet_style_qualification_for_method = false:none
dotnet_style_qualification_for_property = false:none
dotnet_style_qualification_for_field = false:none
dotnet_style_qualification_for_event = false:none
# New line preferences
csharp_new_line_before_open_brace = all
csharp_new_line_before_else = true
csharp_new_line_before_catch = true
csharp_new_line_before_finally = true
csharp_new_line_before_members_in_object_initializers = true
csharp_new_line_before_members_in_anonymous_types = true
csharp_new_line_between_query_expression_clauses = true
# Indentation preferences
csharp_indent_block_contents = true
csharp_indent_braces = false
csharp_indent_case_contents = true
csharp_indent_case_contents_when_block = true
csharp_indent_switch_labels = true
csharp_indent_labels = one_less_than_current
# Modifier preferences
csharp_preferred_modifier_order = public,private,protected,internal,static,extern,new,virtual,abstract,sealed,override,readonly,unsafe,volatile,async:suggestion
# avoid this. unless absolutely necessary
dotnet_style_qualification_for_field = false:none
dotnet_style_qualification_for_property = false:none
dotnet_style_qualification_for_method = false:none
dotnet_style_qualification_for_event = false:none
# Types: use keywords instead of BCL types, and permit var only when the type is clear
csharp_style_var_for_built_in_types = false:none
csharp_style_var_when_type_is_apparent = false:none
csharp_style_var_elsewhere = false:none
dotnet_style_predefined_type_for_locals_parameters_members = true:suggestion
dotnet_style_predefined_type_for_member_access = true:suggestion
# name all constant fields using PascalCase
dotnet_naming_rule.constant_fields_should_be_pascal_case.severity = suggestion
dotnet_naming_rule.constant_fields_should_be_pascal_case.symbols = constant_fields
dotnet_naming_rule.constant_fields_should_be_pascal_case.style = pascal_case_style
dotnet_naming_symbols.constant_fields.applicable_kinds = field
dotnet_naming_symbols.constant_fields.required_modifiers = const
dotnet_naming_style.pascal_case_style.capitalization = pascal_case
# static fields
dotnet_naming_rule.static_fields_should_have_prefix.severity = none
dotnet_naming_rule.static_fields_should_have_prefix.symbols = static_fields
dotnet_naming_rule.static_fields_should_have_prefix.style = static_prefix_style
dotnet_naming_symbols.static_fields.applicable_kinds = field
dotnet_naming_symbols.static_fields.required_modifiers = static
dotnet_naming_symbols.static_fields.applicable_accessibilities = private, internal, private_protected
dotnet_naming_style.static_prefix_style.required_prefix = s_
dotnet_naming_style.static_prefix_style.capitalization = camel_case
# internal and private fields
dotnet_naming_rule.camel_case_for_private_internal_fields.severity = none
dotnet_naming_rule.camel_case_for_private_internal_fields.symbols = private_internal_fields
dotnet_naming_rule.camel_case_for_private_internal_fields.style = camel_case_underscore_style
dotnet_naming_symbols.private_internal_fields.applicable_kinds = field
dotnet_naming_symbols.private_internal_fields.applicable_accessibilities = private, internal
dotnet_naming_style.camel_case_underscore_style.required_prefix = _
dotnet_naming_style.camel_case_underscore_style.capitalization = camel_case
# Code style defaults
csharp_using_directive_placement = outside_namespace:suggestion
csharp_prefer_braces = true:silent
csharp_preserve_single_line_blocks = true:none
csharp_preserve_single_line_statements = false:none
csharp_prefer_static_local_function = true:suggestion
csharp_prefer_simple_using_statement = false:none
csharp_style_prefer_switch_expression = true:suggestion
# Code quality
dotnet_style_readonly_field = true:suggestion
dotnet_code_quality_unused_parameters = non_public:suggestion
# Expression-level preferences
dotnet_style_object_initializer = true:suggestion
dotnet_style_collection_initializer = true:suggestion
dotnet_style_explicit_tuple_names = true:suggestion
dotnet_style_coalesce_expression = true:suggestion
dotnet_style_null_propagation = true:suggestion
dotnet_style_prefer_is_null_check_over_reference_equality_method = true:suggestion
dotnet_style_prefer_inferred_tuple_names = true:suggestion
dotnet_style_prefer_inferred_anonymous_type_member_names = true:suggestion
dotnet_style_prefer_auto_properties = true:suggestion
dotnet_style_prefer_conditional_expression_over_assignment = true:silent
dotnet_style_prefer_conditional_expression_over_return = true:silent
csharp_prefer_simple_default_expression = true:suggestion
# Expression-bodied members
csharp_style_expression_bodied_methods = true:silent
csharp_style_expression_bodied_constructors = true:silent
csharp_style_expression_bodied_operators = true:silent
csharp_style_expression_bodied_properties = true:silent
csharp_style_expression_bodied_indexers = true:silent
csharp_style_expression_bodied_accessors = true:silent
csharp_style_expression_bodied_lambdas = true:silent
csharp_style_expression_bodied_local_functions = true:silent
# Pattern matching
csharp_style_pattern_matching_over_is_with_cast_check = true:suggestion
csharp_style_pattern_matching_over_as_with_null_check = true:suggestion
csharp_style_inlined_variable_declaration = true:suggestion
# Null checking preferences
csharp_style_throw_expression = true:suggestion
csharp_style_conditional_delegate_call = true:suggestion
# Other features
csharp_style_prefer_index_operator = false:none
csharp_style_prefer_range_operator = false:none
csharp_style_pattern_local_over_anonymous_function = false:none
# Space preferences
csharp_space_after_cast = false
csharp_space_after_colon_in_inheritance_clause = true
csharp_space_after_comma = true
csharp_space_after_dot = false
csharp_space_after_keywords_in_control_flow_statements = true
csharp_space_after_semicolon_in_for_statement = true
csharp_space_around_binary_operators = before_and_after
csharp_space_around_declaration_statements = do_not_ignore
csharp_space_before_colon_in_inheritance_clause = true
csharp_space_before_comma = false
csharp_space_before_dot = false
csharp_space_before_open_square_brackets = false
csharp_space_before_semicolon_in_for_statement = false
csharp_space_between_empty_square_brackets = false
csharp_space_between_method_call_empty_parameter_list_parentheses = false
csharp_space_between_method_call_name_and_opening_parenthesis = false
csharp_space_between_method_call_parameter_list_parentheses = false
csharp_space_between_method_declaration_empty_parameter_list_parentheses = false
csharp_space_between_method_declaration_name_and_open_parenthesis = false
csharp_space_between_method_declaration_parameter_list_parentheses = false
csharp_space_between_parentheses = false
csharp_space_between_square_brackets = false
# Analyzers
dotnet_code_quality.CA1052.api_surface = private, internal
dotnet_code_quality.CA1802.api_surface = private, internal
dotnet_code_quality.CA1822.api_surface = private, internal
dotnet_code_quality.CA2208.api_surface = public
# IDE0008: Use explicit type
dotnet_diagnostic.IDE0008.severity = none
# IDE0090: Use 'new(...)'
dotnet_diagnostic.IDE0090.severity = none
# IDE0040: Add accessibility modifiers
dotnet_diagnostic.IDE0040.severity = none
# Nullability in reference types of interface implemented by the base type doesn't match
dotnet_diagnostic.CS8644.severity = none
dotnet_diagnostic.CA1816.severity = none
dotnet_diagnostic.IDE1006.severity = none
#Remove unnecessary suppression
dotnet_diagnostic.IDE0079.severity = none
dotnet_diagnostic.IDE0130.severity = none
dotnet_diagnostic.CA1822.severity = none
csharp_style_prefer_switch_expression = false:suggestion
csharp_style_pattern_matching_over_as_with_null_check = false:suggestion
dotnet_naming_symbols.functional_symbols.applicable_kinds = property,method,event,delegate
dotnet_naming_style.pascal_case_style.capitalization = pascal_case
dotnet_naming_rule.functional_symbols_must_be_capitalized.symbols = functional_symbols
dotnet_naming_rule.functional_symbols_must_be_capitalized.style = pascal_case_style
dotnet_naming_rule.functional_symbols_must_be_capitalized.severity = warning
dotnet_naming_symbols.public_symbols.applicable_kinds = property,method,field,event,delegate
dotnet_naming_symbols.public_symbols.applicable_accessibilities = public
dotnet_naming_symbols.public_symbols.required_modifiers = readonly
dotnet_naming_style.first_word_upper_case_style.capitalization = first_word_upper
dotnet_naming_rule.public_members_must_be_capitalized.symbols = public_symbols
dotnet_naming_rule.public_members_must_be_capitalized.style = first_word_upper_case_style
dotnet_naming_rule.public_members_must_be_capitalized.severity = warning
csharp_style_expression_bodied_methods = false:silent
csharp_style_expression_bodied_constructors = false:silent
csharp_style_expression_bodied_operators = false:silent
csharp_style_namespace_declarations = file_scoped:suggestion
csharp_style_prefer_method_group_conversion = true:silent
csharp_style_prefer_top_level_statements = true:silent
csharp_style_prefer_primary_constructors = true:suggestion
csharp_style_prefer_null_check_over_type_check = true:suggestion
csharp_style_prefer_local_over_anonymous_function = true:suggestion
csharp_style_implicit_object_creation_when_type_is_apparent = true:suggestion
csharp_style_prefer_tuple_swap = true:suggestion
csharp_style_prefer_utf8_string_literals = true:suggestion
csharp_style_deconstructed_variable_declaration = true:suggestion
csharp_style_unused_value_assignment_preference = discard_variable:suggestion
csharp_style_unused_value_expression_statement_preference = discard_variable:silent
csharp_style_prefer_readonly_struct_member = true:suggestion
csharp_style_prefer_readonly_struct = true:suggestion
csharp_style_allow_embedded_statements_on_same_line_experimental = true:silent
csharp_style_allow_blank_line_after_token_in_arrow_expression_clause_experimental = true:silent
csharp_style_allow_blank_line_after_token_in_conditional_expression_experimental = true:silent
csharp_style_allow_blank_line_after_colon_in_constructor_initializer_experimental = true:silent
csharp_style_allow_blank_lines_between_consecutive_braces_experimental = true:silent
csharp_style_prefer_pattern_matching = true:silent
csharp_style_prefer_extended_property_pattern = true:suggestion
csharp_style_prefer_not_pattern = true:suggestion
================================================
FILE: .github/dependabot.yaml
================================================
# ref: https://docs.github.com/en/code-security/dependabot/working-with-dependabot/keeping-your-actions-up-to-date-with-dependabot
version: 2
updates:
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "weekly" # Check for updates to GitHub Actions every week
groups:
dependencies:
patterns:
- "*"
cooldown:
default-days: 14 # Wait 14 days before creating another PR for the same dependency. This will prevent vulnerability on the package impact.
ignore:
# I just want update action when major/minor version is updated. patch updates are too noisy.
- dependency-name: "*"
update-types:
- version-update:semver-patch
================================================
FILE: .github/workflows/build-debug.yaml
================================================
name: Build-Debug
on:
push:
branches:
- "main"
pull_request:
branches:
- "main"
jobs:
build-dotnet:
permissions:
contents: read
runs-on: ubuntu-24.04
timeout-minutes: 10
steps:
- uses: Cysharp/Actions/.github/actions/checkout@main
- uses: Cysharp/Actions/.github/actions/setup-dotnet@main
- run: dotnet build -c Release
- run: dotnet test -c Release --no-build
- run: dotnet pack -c Release --no-build -p:IncludeSymbols=true -p:SymbolPackageFormat=snupkg -o $GITHUB_WORKSPACE/artifacts
================================================
FILE: .github/workflows/build-release.yaml
================================================
name: Build-Release
on:
workflow_dispatch:
inputs:
tag:
description: "tag: git tag you want create. (sample 1.0.0)"
required: true
dry-run:
description: "dry-run: true will never create relase/nuget."
required: true
default: false
type: boolean
jobs:
build-dotnet:
permissions:
contents: read
runs-on: ubuntu-24.04
timeout-minutes: 10
steps:
- uses: Cysharp/Actions/.github/actions/checkout@main
- uses: Cysharp/Actions/.github/actions/setup-dotnet@main
- run: dotnet build -c Release -p:Version=${{ inputs.tag }}
- run: dotnet test -c Release --no-build
- run: dotnet pack -c Release --no-build -p:Version=${{ inputs.tag }} -o ./publish
# Store artifacts.
- uses: Cysharp/Actions/.github/actions/upload-artifact@main
with:
name: nuget
path: ./publish/
# release
create-release:
needs: [build-dotnet]
permissions:
contents: write
id-token: write # required for NuGet Trusted Publish
uses: Cysharp/Actions/.github/workflows/create-release.yaml@main
with:
commit-id: ""
dry-run: ${{ inputs.dry-run }}
tag: ${{ inputs.tag }}
nuget-push: true
release-upload: false
secrets: inherit
================================================
FILE: .github/workflows/stale.yaml
================================================
name: "Close stale issues"
on:
workflow_dispatch:
schedule:
- cron: "0 0 * * *"
jobs:
stale:
permissions:
contents: read
pull-requests: write
issues: write
uses: Cysharp/Actions/.github/workflows/stale-issue.yaml@main
================================================
FILE: .gitignore
================================================
# Build Folders (you can keep bin if you'd like, to store dlls and pdbs)
[Bb]in/
[Oo]bj/
# mstest test results
TestResults
## Ignore Visual Studio temporary files, build results, and
## files generated by popular Visual Studio add-ons.
# User-specific files
*.suo
*.user
*.sln.docstates
# Build results
[Dd]ebug/
[Rr]elease/
x64/
*_i.c
*_p.c
*.ilk
*.obj
*.pch
*.pdb
*.pgc
*.pgd
*.rsp
*.sbr
*.tlb
*.tli
*.tlh
*.tmp
*.log
*.vspscc
*.vssscc
.builds
# Visual C++ cache files
ipch/
*.aps
*.ncb
*.opensdf
*.sdf
# Visual Studio profiler
*.psess
*.vsp
*.vspx
# Guidance Automation Toolkit
*.gpState
# ReSharper is a .NET coding add-in
_ReSharper*
# NCrunch
*.ncrunch*
.*crunch*.local.xml
# Installshield output folder
[Ee]xpress
# DocProject is a documentation generator add-in
DocProject/buildhelp/
DocProject/Help/*.HxT
DocProject/Help/*.HxC
DocProject/Help/*.hhc
DocProject/Help/*.hhk
DocProject/Help/*.hhp
DocProject/Help/Html2
DocProject/Help/html
# Click-Once directory
publish
# Publish Web Output
*.Publish.xml
# NuGet Packages Directory
packages
# Windows Azure Build Output
csx
*.build.csdef
# Windows Store app package directory
AppPackages/
# Others
[Bb]in
[Oo]bj
sql
TestResults
[Tt]est[Rr]esult*
*.Cache
ClientBin
[Ss]tyle[Cc]op.*
~$*
*.dbmdl
Generated_Code #added for RIA/Silverlight projects
# Backup & report files from converting an old project file to a newer
# Visual Studio version. Backup files are not needed, because we have git ;-)
_UpgradeReport_Files/
Backup*/
UpgradeLog*.XML
.vs/config/applicationhost.config
.vs/restore.dg
# OTHER
nuget/tools/*
*.nupkg
.vs
**/.DS_Store
.idea
# publish directory
out/
*.tsbuildinfo
# BenchmarkDotNet Artifacts
BenchmarkDotNet.Artifacts/
================================================
FILE: Directory.Build.props
================================================
true
$(MSBuildThisFileDirectory)opensource.snk
false
$(Version)
Cysharp
Cysharp
© Cysharp, Inc.
https://github.com/Cysharp/Utf8StreamReader
README.md
$(PackageProjectUrl)
git
MIT
Icon.png
================================================
FILE: LICENSE
================================================
MIT License
Copyright (c) 2024 Cysharp, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: README.md
================================================
# Utf8StreamReader
[](https://github.com/Cysharp/Utf8StreamReader/actions) [](https://github.com/Cysharp/Utf8StreamReader/releases)
[](https://nuget.org/packages/Utf8StreamReader)
Utf8 based StreamReader for high performance text processing. In addition to UTF-8 based binary processing, it can also be used as a a high-performance replacement for StreamReader and as a helper for fast binary reading.
Avoiding unnecessary string allocation is a fundamental aspect of recent .NET performance improvements. Given that most file and network data is in UTF8, features like [JsonSerializer](https://learn.microsoft.com/en-us/dotnet/api/system.text.json.jsonserializer?view=net-8.0) and [IUtf8SpanParsable](https://learn.microsoft.com/en-us/dotnet/api/system.iutf8spanparsable-1?view=net-8.0), which operate on UTF8-based data, have been added. More recently, methods like [.NET8 MemoryExtensions.Split](https://learn.microsoft.com/en-us/dotnet/api/system.memoryextensions.split?view=net-8.0), which avoids allocations, have also been introduced.
However, for the most common use case of parsing strings delimited by newlines, only the traditional [StreamReader](https://learn.microsoft.com/en-us/dotnet/api/system.io.streamreader) is provided, which generates a new String for each line, resulting in a large amount of allocations.

> Read simple 1000000 lines text
Incredibly, there is a **240,000 times** difference!
While it is possible to process data in UTF8 format using standard classes like [PipeReader](https://learn.microsoft.com/en-us/dotnet/api/system.io.pipelines.pipereader?view=dotnet-plat-ext-8.0) and [SequenceReader](https://learn.microsoft.com/en-us/dotnet/api/system.buffers.sequencereader-1?view=net-8.0), they are generic librardies, so properly handling newline processing requires considerable effort(Handling BOM and Multiple Types of Newline Characters).
`Utf8StreamReader` provides a familiar API similar to StreamReader, making it easy to use, while its ReadLine-specific implementation maximizes performance.
By using optimized internal processing, higher performance can be achieved when reading Strings from Files compared to using the standard `StreamReader.ReadToEnd` or `File.ReadAllText` methods.

> Read from file(1000000 lines text)
```csharp
[Benchmark]
public async Task StreamReaderReadToEndAsync()
{
using var sr = new System.IO.StreamReader(filePath);
return await sr.ReadToEndAsync();
}
[Benchmark]
public async Task Utf8TextReaderReadToEndAsync()
{
using var sr = new Cysharp.IO.Utf8StreamReader(filePath).AsTextReader();
return await sr.ReadToEndAsync();
}
[Benchmark]
public async Task FileReadAllTextAsync()
{
return await File.ReadAllTextAsync(filePath);
}
```
For an explanation of the performance difference, please refer to the [ReadString Section](#readstring).
## Getting Started
This library is distributed via NuGet, supporting `.NET Standard 2.1`, `.NET 6(.NET 7)` and `.NET 8` or above. For information on usage with Unity, please refer to the [Unity Section](#unity).
PM> Install-Package [Utf8StreamReader](https://www.nuget.org/packages/Utf8StreamReader)
The basic API involves `using var streamReader = new Utf8StreamReader(stream);` and then `ReadOnlyMemory line = await streamReader.ReadLineAsync();`. When enumerating all lines, you can choose from three styles:
```csharp
using Cysharp.IO; // namespace of Utf8StreamReader
public async Task Sample1(Stream stream)
{
using var reader = new Utf8StreamReader(stream);
// Most performant style, similar as System.Threading.Channels
while (await reader.LoadIntoBufferAsync())
{
while (reader.TryReadLine(out var line))
{
// line is ReadOnlyMemory, deserialize UTF8 directly.
_ = JsonSerializer.Deserialize(line.Span);
}
}
}
public async Task Sample2(Stream stream)
{
using var reader = new Utf8StreamReader(stream);
// Classical style, same as StreamReader
ReadOnlyMemory? line = null;
while ((line = await reader.ReadLineAsync()) != null)
{
_ = JsonSerializer.Deserialize(line.Value.Span);
}
}
public async Task Sample3(Stream stream)
{
using var reader = new Utf8StreamReader(stream);
// Most easiest style, use async streams
await foreach (var line in reader.ReadAllLinesAsync())
{
_ = JsonSerializer.Deserialize(line.Span);
}
}
```
From a performance perspective, `Utf8StreamReader` only provides asynchronous APIs.
Theoretically, the highest performance can be achieved by combining `LoadIntoBufferAsync` and `TryReadLine` in a double while loop. This is similar to the combination of `WaitToReadAsync` and `TryRead` in [Channels](https://learn.microsoft.com/en-us/dotnet/core/extensions/channels).
`ReadLineAsync`, like StreamReader.ReadLine, returns null to indicate that the end has been reached.
`ReadAllLinesAsync` returns an `IAsyncEnumerable>`. Although there is a performance difference, it is minimal, so this API is ideal when you want to use it easily.
All asynchronous methods accept a `CancellationToken` and support cancellation.
For a real-world usage example, refer to [StreamMessageReader.cs](https://github.com/Cysharp/Claudia/blob/main/src/Claudia/StreamMessageReader.cs) in [Cysharp/Claudia](https://github.com/Cysharp/Claudia/), a C# SDK for Anthropic Claude, which parses [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events).
## Buffer Lifetimes
The `ReadOnlyMemory` returned from `ReadLineAsync` or `TryReadLine` is only valid until the next call to `LoadIntoBufferAsync` or `TryReadLine` or `ReadLineAsync`. Since the data is shared with the internal buffer, it may be overwritten, moved, or returned on the next call, so the safety of the data cannot be guaranteed. The received data must be promptly parsed and converted into a separate object. If you want to keep the data as is, use `ToArray()` to convert it to a `byte[]`.
This design is similar to [System.IO.Pipelines](https://learn.microsoft.com/en-us/dotnet/standard/io/pipelines).
## Read as `ReadOnlyMemory`
You can convert it to a `Utf8TextReader` that extracts `ReadOnlyMemory` or `string`. Although there is a conversion cost, it is still fast and low allocation, so it can be used as an alternative to `StreamReader`.

After converting with `AsTextReader()`, all the same methods (`TryReadLine`, `ReadLineAsync`, `LoadIntoBufferAsync`, `ReadAllLinesAsync`) can be used.
```csharp
using var sr = new Cysharp.IO.Utf8StreamReader(ms).AsTextReader();
while (await sr.LoadIntoBufferAsync())
{
while (sr.TryReadLine(out var line))
{
// line is ReadOnlyMemory, you can add to StringBuilder or other parsing method.
// If you neeed string, ReadOnlyMemory.ToString() build string instance
// string str = line.ToString();
}
}
```
You can perform text processing without allocation, such as splitting `ReadOnlySpan` using [MemoryExtensions.Split](https://learn.microsoft.com/en-us/dotnet/api/system.memoryextensions.split?view=net-8.0#system-memoryextensions-split(system-readonlyspan((system-char))-system-span((system-range))-system-char-system-stringsplitoptions)), and concatenate the results using StringBuilder's [`Append/AppendLine(ReadOnlySpan)`](https://learn.microsoft.com/en-us/dotnet/api/system.text.stringbuilder.append). This way, string-based processing can be done with much lower allocation compared to `StreamReader`.
When a string is needed, you can convert `ReadOnlyMemory` to a string using `ToString()`. Even with the added string conversion, the performance is higher than `StreamReader`, so it can be used as a better alternative.
## Optimizing FileStream
Similar to `StreamReader`, `Utf8StreamReader` has the ability to open a `FileStream` by accepting a `string path`.
```csharp
public Utf8StreamReader(string path, FileOpenMode fileOpenMode = FileOpenMode.Throughput)
public Utf8StreamReader(string path, int bufferSize, FileOpenMode fileOpenMode = FileOpenMode.Throughput)
public Utf8StreamReader(string path, FileStreamOptions options)
public Utf8StreamReader(string path, FileStreamOptions options, int bufferSize)
```
Unfortunately, the `FileStream` used by `StreamReader` is not optimized for modern .NET. For example, when using `FileStream` with asynchronous methods, it should be opened with `useAsync: true` for optimal performance. However, since `StreamReader` has both synchronous and asynchronous methods in its API, false is specified. Additionally, although `StreamReader` itself has a buffer and `FileStream` does not require a buffer, the buffer of `FileStream` is still being utilized.
It is difficult to handle `FileStream` correctly with high performance. By specifying a `string path`, the stream is opened with options optimized for `Utf8StreamReader`, so it is recommended to use this overload rather than opening `FileStream` yourself. The following is a benchmark of `FileStream`.

`Utf8StreamReader` opens `FileStream` with the following settings:
```csharp
var useAsync = (fileOpenMode == FileOpenMode.Scalability);
new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read, bufferSize: 1, useAsync: useAsync)
```
Due to historical reasons, the options for `FileStream` are odd, but by setting `bufferSize` to 1, you can avoid the use of internal buffers. `FileStream` has been significantly revamped in .NET 6, and by controlling the setting of this option and the way `Utf8StreamReader` is called as a whole, it can function as a thin wrapper around the fast [RandomAccess.ReadAsync](https://learn.microsoft.com/en-us/dotnet/api/system.io.randomaccess.readasync), allowing you to avoid most of the overhead of FileStream.
`FileOpenMode` is a proprietary option of `Utf8StreamReader`.
```csharp
public enum FileOpenMode
{
Scalability,
Throughput
}
```
In a Windows environment, the table in the [IO section of the Performance Improvements in .NET 6 blog](https://devblogs.microsoft.com/dotnet/performance-improvements-in-net-6/#io) shows that throughput decreases when `useAsync: true` is used.
| Method | Runtime | IsAsync | BufferSize | Mean |
| - | - | - | - | - |
| ReadAsync | .NET 6.0 | True | 1 | 119.573 ms |
| ReadAsync | .NET 6.0 | False | 1 | 36.018 ms |
By setting `Utf8StreamReader` to `FileOpenMode.Scalability`, true async I/O is enabled and scalability is prioritized. If set to `FileOpenMode.Throughput`, it internally becomes sync-over-async and consumes the ThreadPool, but reduces the overhead of asynchronous I/O and improves throughput.
If frequently executed within a server application, setting it to `Scalability`, and for batch applications, setting it to `Throughput` will likely yield the best performance characteristics. The default is `Throughput`. (In the current .NET implementation, both seem to be the same (similar to Throughput on Windows) in Linux environments.)
In `Utf8StreamReader`, by carefully adjusting the buffer size on the `Utf8StreamReader` side, the performance difference is minimized. Please refer to the above benchmark results image for specific values.
For overloads that accept `FileStreamOptions`, the above settings are not reflected, so please adjust them manually.
## ReadString
By combining the above FileStream optimization with `.AsTextReader().ReadToEndAsync()`, you can achieve higher performance when reading out a `string` compared to `StreamReader.ReadToEnd` or `File.ReadAllText`.

The implementation of `File.ReadAllText` in dotnet/runtime uses `StreamReader.ReadToEnd`, so they are almost the same. However, in the case of `File.ReadAllText`, it uses `useAsync: true` when opening the `FileStream`. That accounts for the performance difference in the benchmark.
Another significant difference in the implementation is that `Utf8StreamReader` generates a `string` without using `StringBuilder`. `StreamReader.ReadToEnd` generates a string using the following flow: `byte[] buffer` -> `char[] decodeBuffer` -> `StringBuilder.Append(char[])` -> `StringBuilder.ToString()`, but there are removable inefficiencies. Both `char[]` and `StringBuilder` are `char[]` buffers, and copying occurs. By generating a `string` directly from `char[]`, the copy to the internal buffer of `StringBuilder` can be eliminated.
In `Utf8StreamReader`'s `.AsTextReader().ReadToEndAsync()`, it receives streaming data in read buffer units from `Utf8StreamReader` (`ReadToEndChunksAsync`), converts it to `char[]` chunks using `Decoder`, and generates the string all at once using `string.Create`.
```csharp
// Utf8TextReader is a helper class for ReadOnlyMemory and string generation that internally holds Utf8StreamReader
public async ValueTask ReadToEndAsync(CancellationToken cancellationToken = default)
{
// Using a method similar to .NET 9 LINQ to Objects's ToArray improvement, returns a structure optimized for gap-free sequential expansion
// StreamReader.ReadToEnd copies the buffer to a StringBuilder, but this implementation holds char[] chunks(char[][]) without copying.
using var writer = new SegmentedArrayBufferWriter();
var decoder = Encoding.UTF8.GetDecoder();
// Utf8StreamReader.ReadToEndChunksAsync returns the internal buffer ReadOnlyMemory as an asynchronous sequence upon each read completion
await foreach (var chunk in reader.ReadToEndChunksAsync(cancellationToken).ConfigureAwait(reader.ConfigureAwait))
{
var input = chunk;
while (input.Length != 0)
{
// The Decoder directly writes from the read buffer to the char[] buffer
decoder.Convert(input.Span, writer.GetMemory().Span, flush: false, out var bytesUsed, out var charsUsed, out var completed);
input = input.Slice(bytesUsed);
writer.Advance(charsUsed);
}
}
decoder.Convert([], writer.GetMemory().Span, flush: true, out _, out var finalCharsUsed, out _);
writer.Advance(finalCharsUsed);
// Directly generate a string from the char[][] buffer using String.Create
return string.Create(writer.WrittenCount, writer, static (stringSpan, writer) =>
{
foreach (var item in writer.GetSegmentsAndDispose())
{
item.Span.CopyTo(stringSpan);
stringSpan = stringSpan.Slice(item.Length);
}
});
}
```
SegmentedArrayBufferWriter borrows the idea (which I proposed) from [the performance improvement of ToArray in LINQ in .NET 9](https://github.com/dotnet/runtime/pull/96570), and internally holds an InlineArray that expands by equal multipliers.
```csharp
[StructLayout(LayoutKind.Sequential)]
struct InlineArray19
{
public const int InitialSize = 8192;
T[] array00; // 8192
T[] array01; // 16384
T[] array02; // 32768
T[] array03; // 65536
T[] array04; // 131072
T[] array05; // 262144
T[] array06; // 524288
T[] array07; // 1048576
T[] array08; // 2097152
T[] array09; // 4194304
T[] array10; // 8388608
T[] array11; // 16777216
T[] array12; // 33554432
T[] array13; // 67108864
T[] array14; // 134217728
T[] array15; // 268435456
T[] array16; // 536870912
T[] array17; // 1073741824
T[] array18; // Array.MaxLength - total
public T[] this[int i]
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get
{
if (i < 0 || i > 18) Throw();
return Unsafe.Add(ref array00, i);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
set
{
if (i < 0 || i > 18) Throw();
Unsafe.Add(ref array00, i) = value;
}
}
void Throw() { throw new ArgumentOutOfRangeException(); }
}
```
With these optimizations for both reading and writing, we achieved several times the speedup compared to the .NET standard library.
## Binary Read
`TryPeek`, `PeekAsync`, `TryRead`, `ReadAsync`, `TryReadBlock`, and `ReadBlockAsync` enable reading as binary, irrespective of newline codes. For example, [Redis's protocol, RESP](https://redis.io/docs/latest/develop/reference/protocol-spec/), is a text protocol and typically newline-delimited, but after `$N`, it requires reading N bytes (BulkString). For instance, `$5\r\nhello\r\n` means reading 5 bytes.
Here's an example of how it can be parsed:
```csharp
// $5\r\nhello\r\n
var line = await reader.ReadLineAsync(); // $5(+ consumed \r\n)
if (line.Value.Span[0] == (byte)'$')
{
Utf8Parser.TryParse(line.Value.Span.Slice(1), out int size, out _); // 5
var block = await reader.ReadBlockAsync(size); // hello
await reader.ReadLineAsync(); // consume \r\n
Console.WriteLine(Encoding.UTF8.GetString(block.Span));
}
```
A sample that parses all RESP code is available in [RespReader.cs](https://github.com/Cysharp/Utf8StreamReader/blob/e400444/sandbox/ConsoleApp1/RespReader.cs).
Additionally, when using `LoadIntoBufferAsync` and `LoadIntoBufferAtLeastAsync` to include data in the buffer, using `Try***` allows for more efficient execution.
```csharp
while (await reader.LoadIntoBufferAsync())
{
while (reader.TryReadLine(out var line))
{
switch (line.Span[0])
{
case (byte)'$':
Utf8Parser.TryParse(line.Span.Slice(1), out int size, out _);
if (!reader.TryReadBlock(size + 2, out var block)) // +2 is \r\n
{
// ReadBlockAsync is TryReadBlock + LoadIntoBufferAtLeastAsync
block = await reader.ReadBlockAsync(size + 2);
}
yield return block.Slice(0, size);
break;
// and others('+', '-', ':', '*')
default:
break;
}
}
}
```
When using `ReadToEndAsync`, you can obtain a `byte[]` using Utf8StreamReader's efficient binary reading/concatenation (`SegmentedArrayBufferWriter, InlineArray19`).
```csharp
using var reader = new Utf8StreamReader(stream);
byte[] bytes = await reader.ReadToEndAsync();
```
`ReadToEndAsync()` has two optional overloads, `(bool disableBomCheck)` and `(long resultSizeHint)`.
If `disableBomCheck` is true, it disables the BOM check/trim and always performs a complete binary-matching read. The default for `ReadToEndAsync` is true, which always expects a binary-matching read. If false, it follows Utf8StreamReader.SkipBom.
`resultSizeHint` allows for reducing the copy cost by directly generating `new byte[resultSizeHint]` when the final binary size is known and reading directly into that buffer. When reading a file, i.e., when the `Stream` is a `FileStream` and seekable, `FileStream.Length` is used as the resultSizeHint as an optimization.
Here is the peformance comparison between copying a normal `Stream` to a `MemoryStream` by `CopyToAsync` and using `ToArray`, and using `ReadToEndAsync` of `Utf8StreamReader` when converting to `byte[]`. The options are adjusted so that optimization does not occur when directly passing FileStream to Utf8StreamReader, in order to intentionally avoid optimization.

```csharp
[Benchmark]
public async Task MemoryStreamCopyToToArray()
{
using var fs = new FileStream(filePath, FileMode.Open);
var ms = new MemoryStream();
await fs.CopyToAsync(ms);
return ms.ToArray();
}
[Benchmark]
public async Task Utf8StreamReaderReadToEndAsync()
{
using var fs = new FileStream(filePath, FileMode.Open);
using var sr = new Cysharp.IO.Utf8StreamReader(fs);
return await sr.ReadToEndAsync(disableBomCheck: false); // hack for disable optimize(for benchmark fairness)
}
```
## Reset
`Utf8StreamReader` is a class that supports reuse. By calling `Reset()`, the Stream and internal state are released. Using `Reset(Stream)`, it can be reused with a new `Stream`.
## Options
The constructor accepts `int bufferSize` and `bool leaveOpen` as parameters.
`int bufferSize` defaults to 65536 and the buffer is rented from `ArrayPool`. If the data per line is large, changing the buffer size may improve performance. When the buffer size and the size per line are close, frequent buffer copy operations occur, leading to performance degradation.
`bool leaveOpen` determines whether the internal Stream is also disposed when the object is disposed. The default is `false`, which means the Stream is disposed.
Additionally, there are init properties that allow changing the option values for `ConfigureAwait`, `SyncRead` and `SkipBom`.
`bool ConfigureAwait { init; }` allows you to specify the value for `ConfigureAwait(bool continueOnCapturedContext)` when awaiting asynchronous methods internally. The default is `false`.
`bool SyncRead { init; }` configures the Stream to use synchronous reading, meaning it will use Read instead. This causes all Async operations to complete synchronously. There is potential for slight performance improvements when a `FileStream` is opened with `useAsync:false`. Normally, leaving it as false is fine. The default is `false`.
`bool SkipBom { init; }` determines whether to identify and skip the BOM (Byte Order Mark) included at the beginning of the data during the first read. The default is `true`, which means the BOM is skipped.
Currently, this is not an option, but `Utf8StreamReader` only determines `CRLF(\r\n)` or `LF(\n)` as newline characters. Since environments that use `CR(\r)` are now extremely rare, the CR check is omitted for performance reasons. If you need this functionality, please let us know by creating an Issue. We will consider adding it as an option
Unity
---
Unity, which supports .NET Standard 2.1, can run this library. Since the library is only provided through NuGet, it is recommended to use [NuGetForUnity](https://github.com/GlitchEnzo/NuGetForUnity) for installation.
For detailed instructions on using NuGet libraries in Unity, please refer to the documentation of [Cysharp/R3](https://github.com/Cysharp/R3/) and other similar resources.
License
---
This library is under the MIT License.
================================================
FILE: Utf8StreamReader.sln
================================================
Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 17
VisualStudioVersion = 17.8.34330.188
MinimumVisualStudioVersion = 10.0.40219.1
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{BD07BD08-1CB4-41AE-B2BD-3975BE13B8EC}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Utf8StreamReader", "src\Utf8StreamReader\Utf8StreamReader.csproj", "{983561F1-F180-4188-AE80-BFA95FD69656}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tests", "tests", "{5A8808D6-63E0-48EE-A115-0380E0E57156}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Utf8StreamReader.Tests", "tests\Utf8StreamReader.Tests\Utf8StreamReader.Tests.csproj", "{6C953584-A04B-42C7-9CF3-267AFB010C2B}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "sandbox", "sandbox", "{6BA94544-B2DF-4DD2-9390-DAA8AF5CA90A}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ConsoleApp1", "sandbox\ConsoleApp1\ConsoleApp1.csproj", "{27B89B32-EC1A-48B0-BFC9-6172FCCE2961}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Benchmark", "sandbox\Benchmark\Benchmark.csproj", "{48293CC8-A87C-4F59-A398-51CD37E6B62B}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{983561F1-F180-4188-AE80-BFA95FD69656}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{983561F1-F180-4188-AE80-BFA95FD69656}.Debug|Any CPU.Build.0 = Debug|Any CPU
{983561F1-F180-4188-AE80-BFA95FD69656}.Release|Any CPU.ActiveCfg = Release|Any CPU
{983561F1-F180-4188-AE80-BFA95FD69656}.Release|Any CPU.Build.0 = Release|Any CPU
{6C953584-A04B-42C7-9CF3-267AFB010C2B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{6C953584-A04B-42C7-9CF3-267AFB010C2B}.Debug|Any CPU.Build.0 = Debug|Any CPU
{6C953584-A04B-42C7-9CF3-267AFB010C2B}.Release|Any CPU.ActiveCfg = Release|Any CPU
{6C953584-A04B-42C7-9CF3-267AFB010C2B}.Release|Any CPU.Build.0 = Release|Any CPU
{27B89B32-EC1A-48B0-BFC9-6172FCCE2961}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{27B89B32-EC1A-48B0-BFC9-6172FCCE2961}.Debug|Any CPU.Build.0 = Debug|Any CPU
{27B89B32-EC1A-48B0-BFC9-6172FCCE2961}.Release|Any CPU.ActiveCfg = Release|Any CPU
{27B89B32-EC1A-48B0-BFC9-6172FCCE2961}.Release|Any CPU.Build.0 = Release|Any CPU
{48293CC8-A87C-4F59-A398-51CD37E6B62B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{48293CC8-A87C-4F59-A398-51CD37E6B62B}.Debug|Any CPU.Build.0 = Debug|Any CPU
{48293CC8-A87C-4F59-A398-51CD37E6B62B}.Release|Any CPU.ActiveCfg = Release|Any CPU
{48293CC8-A87C-4F59-A398-51CD37E6B62B}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(NestedProjects) = preSolution
{983561F1-F180-4188-AE80-BFA95FD69656} = {BD07BD08-1CB4-41AE-B2BD-3975BE13B8EC}
{6C953584-A04B-42C7-9CF3-267AFB010C2B} = {5A8808D6-63E0-48EE-A115-0380E0E57156}
{27B89B32-EC1A-48B0-BFC9-6172FCCE2961} = {6BA94544-B2DF-4DD2-9390-DAA8AF5CA90A}
{48293CC8-A87C-4F59-A398-51CD37E6B62B} = {6BA94544-B2DF-4DD2-9390-DAA8AF5CA90A}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {38C0CA37-B15E-4200-9F2C-AD08076E4013}
EndGlobalSection
EndGlobal
================================================
FILE: sandbox/Benchmark/Benchmark.csproj
================================================
Exe
net8.0
enable
enable
================================================
FILE: sandbox/Benchmark/BytesReadToEnd.cs
================================================
using BenchmarkDotNet.Attributes;
using Cysharp.IO;
using System.Text.Encodings.Web;
using System.Text.Json;
using System.Text.Unicode;
namespace Benchmark;
[SimpleJob, MemoryDiagnoser]
public class BytesReadToEnd
{
const int C = 1000000;
string filePath = default!;
[GlobalSetup]
public void GlobalSetup()
{
var options = new JsonSerializerOptions
{
Encoder = JavaScriptEncoder.Create(UnicodeRanges.All)
};
var path = Path.GetTempFileName();
var newline = OperatingSystem.IsWindows() ? "\r\n"u8 : "\n"u8;
using var file = File.OpenWrite(path);
for (var i = 0; i < C; i++)
{
var json = JsonSerializer.SerializeToUtf8Bytes(
new MyClass { MyProperty = i, MyProperty2 = "あいうえおかきくけこ" }, options);
file.Write(json);
file.Write(newline);
}
filePath = path;
}
[GlobalCleanup]
public void GlobalCleanup()
{
File.Delete(filePath);
}
[Benchmark]
public async Task FileReadAllBytesAsync()
{
// ReadAllBytes knows file-length so fastest.
return await File.ReadAllBytesAsync(filePath);
}
[Benchmark]
public async Task Utf8StreamReaderReadToEndAsync()
{
using var sr = new Cysharp.IO.Utf8StreamReader(filePath);
return await sr.ReadToEndAsync();
}
}
[SimpleJob, MemoryDiagnoser]
public class BytesReadToEnd2
{
const int C = 1000000;
string filePath = default!;
[GlobalSetup]
public void GlobalSetup()
{
var options = new JsonSerializerOptions
{
Encoder = JavaScriptEncoder.Create(UnicodeRanges.All)
};
var path = Path.GetTempFileName();
var newline = OperatingSystem.IsWindows() ? "\r\n"u8 : "\n"u8;
using var file = File.OpenWrite(path);
for (var i = 0; i < C; i++)
{
var json = JsonSerializer.SerializeToUtf8Bytes(
new MyClass { MyProperty = i, MyProperty2 = "あいうえおかきくけこ" }, options);
file.Write(json);
file.Write(newline);
}
filePath = path;
}
[GlobalCleanup]
public void GlobalCleanup()
{
File.Delete(filePath);
}
[Benchmark]
public async Task MemoryStreamCopyToToArray()
{
using var fs = new FileStream(filePath, FileMode.Open);
var ms = new MemoryStream();
await fs.CopyToAsync(ms);
return ms.ToArray();
}
[Benchmark]
public async Task Utf8StreamReaderReadToEndAsync()
{
using var fs = new FileStream(filePath, FileMode.Open);
using var sr = new Cysharp.IO.Utf8StreamReader(fs);
return await sr.ReadToEndAsync(disableBomCheck: false); // hack for ignore optimize(for benchmark fairness)
}
}
================================================
FILE: sandbox/Benchmark/FromFile.cs
================================================
using BenchmarkDotNet.Attributes;
using Cysharp.IO;
using System.Text;
using System.Text.Encodings.Web;
using System.Text.Json;
using System.Text.Unicode;
namespace Benchmark;
[SimpleJob, MemoryDiagnoser]
public class FromFile
{
const int C = 1000000;
string filePath = default!;
[GlobalSetup]
public void GlobalSetup()
{
var options = new JsonSerializerOptions
{
Encoder = JavaScriptEncoder.Create(UnicodeRanges.All)
};
var path = Path.GetTempFileName();
var newline = OperatingSystem.IsWindows() ? "\r\n"u8 : "\n"u8;
using var file = File.OpenWrite(path);
for (var i = 0; i < C; i++)
{
var json = JsonSerializer.SerializeToUtf8Bytes(
new MyClass { MyProperty = i, MyProperty2 = "あいうえおかきくけこ" }, options);
file.Write(json);
file.Write(newline);
}
filePath = path;
}
[GlobalCleanup]
public void GlobalCleanup()
{
File.Delete(filePath);
}
[Benchmark]
public async Task StreamReaderFileStream()
{
using var sr = new System.IO.StreamReader(filePath);
string? line;
while ((line = await sr.ReadLineAsync()) != null)
{
// ...
}
}
[Benchmark]
public async Task FileReadLinesAsync()
{
await foreach (var line in File.ReadLinesAsync(filePath, Encoding.UTF8))
{
}
}
[Benchmark]
public async Task Utf8StreamReaderFileStreamScalability()
{
using var sr = new Cysharp.IO.Utf8StreamReader(filePath, fileOpenMode: FileOpenMode.Scalability);
while (await sr.LoadIntoBufferAsync())
{
while (sr.TryReadLine(out var line))
{
// ...
}
}
}
[Benchmark]
public async Task Utf8StreamReaderFileStreamThroughput()
{
using var sr = new Cysharp.IO.Utf8StreamReader(filePath, fileOpenMode: FileOpenMode.Throughput);
while (await sr.LoadIntoBufferAsync())
{
while (sr.TryReadLine(out var line))
{
// ...
}
}
}
[Benchmark]
public async ValueTask Utf8StreamReaderFileStreamThroughputSyncRead()
{
using var sr = new Cysharp.IO.Utf8StreamReader(filePath, fileOpenMode: FileOpenMode.Throughput) { SyncRead = true };
while (await sr.LoadIntoBufferAsync())
{
while (sr.TryReadLine(out var line))
{
}
}
}
[Benchmark]
public async Task Utf8TextReaderFileStreamScalability()
{
using var sr = new Cysharp.IO.Utf8StreamReader(filePath, fileOpenMode: FileOpenMode.Scalability).AsTextReader();
while (await sr.LoadIntoBufferAsync())
{
while (sr.TryReadLine(out var line))
{
// ...
}
}
}
[Benchmark]
public async Task Utf8TextReaderFileStreamThroughput()
{
using var sr = new Cysharp.IO.Utf8StreamReader(filePath, fileOpenMode: FileOpenMode.Throughput).AsTextReader();
while (await sr.LoadIntoBufferAsync())
{
while (sr.TryReadLine(out var line))
{
// ...
}
}
}
[Benchmark]
public async ValueTask Utf8TextReaderFileStreamThroughputSyncRead()
{
using var sr = new Cysharp.IO.Utf8StreamReader(filePath, fileOpenMode: FileOpenMode.Throughput) { SyncRead = true }.AsTextReader();
while (await sr.LoadIntoBufferAsync())
{
while (sr.TryReadLine(out var line))
{
// ...
}
}
}
[Benchmark]
public async Task Utf8TextReaderToStringFileStreamScalability()
{
using var sr = new Cysharp.IO.Utf8StreamReader(filePath, fileOpenMode: FileOpenMode.Scalability).AsTextReader();
while (await sr.LoadIntoBufferAsync())
{
while (sr.TryReadLine(out var line))
{
_ = line.ToString();
}
}
}
[Benchmark]
public async Task Utf8TextReaderToStringFileStreamThroughput()
{
using var sr = new Cysharp.IO.Utf8StreamReader(filePath, fileOpenMode: FileOpenMode.Throughput).AsTextReader();
while (await sr.LoadIntoBufferAsync())
{
while (sr.TryReadLine(out var line))
{
_ = line.ToString();
}
}
}
}
================================================
FILE: sandbox/Benchmark/FromMemory.cs
================================================
using System.Buffers;
using System.IO.Pipelines;
using System.Text;
using System.Text.Encodings.Web;
using System.Text.Json;
using System.Text.Unicode;
using BenchmarkDotNet.Attributes;
using Cysharp.IO;
namespace Benchmark;
[SimpleJob, MemoryDiagnoser]
public class FromMemory
{
const int C = 1000000;
// const int C = 100;
byte[] utf8Data = default!;
MemoryStream ms = default!;
[GlobalSetup]
public void GlobalSetup()
{
var options = new JsonSerializerOptions
{
Encoder = JavaScriptEncoder.Create(UnicodeRanges.All)
};
var jsonLines = Enumerable.Range(0, C)
.Select(x => new MyClass { MyProperty = x, MyProperty2 = "あいうえおかきくけこ" })
.Select(x => JsonSerializer.Serialize(x, options))
.ToArray();
utf8Data = Encoding.UTF8.GetBytes(string.Join(Environment.NewLine, jsonLines));
}
[IterationSetup]
public void Setup()
{
ms = new MemoryStream(utf8Data);
}
[Benchmark]
public async Task StreamReader()
{
using var sr = new System.IO.StreamReader(ms);
string? line;
while ((line = await sr.ReadLineAsync()) != null)
{
// Console.WriteLine(line);
}
}
[Benchmark]
public async Task Utf8StreamReader()
{
using var sr = new Cysharp.IO.Utf8StreamReader(ms);
while (await sr.LoadIntoBufferAsync())
{
while (sr.TryReadLine(out var line))
{
// Console.WriteLine(Encoding.UTF8.GetString( line.Span));
}
}
}
[Benchmark]
public async Task Utf8TextReader()
{
using var sr = new Cysharp.IO.Utf8StreamReader(ms).AsTextReader();
while (await sr.LoadIntoBufferAsync())
{
while (sr.TryReadLine(out var line))
{
// Console.WriteLine(Encoding.UTF8.GetString( line.Span));
}
}
}
[Benchmark]
public async Task Utf8TextReaderToString()
{
using var sr = new Cysharp.IO.Utf8StreamReader(ms).AsTextReader();
while (await sr.LoadIntoBufferAsync())
{
while (sr.TryReadLine(out var line))
{
_ = line.ToString();
// Console.WriteLine(Encoding.UTF8.GetString( line.Span));
}
}
}
//[Benchmark]
//public async Task Utf8StreamReaderReadLine()
//{
// using var sr = new Cysharp.IO.Utf8StreamReader(ms);
// ReadOnlyMemory? line;
// while ((line = await sr.ReadLineAsync()) != null)
// {
// // Console.WriteLine(Encoding.UTF8.GetString(line.Value.Span));
// }
//}
//[Benchmark]
//public async Task Utf8StreamReaderReadAllLines()
//{
// using var sr = new Cysharp.IO.Utf8StreamReader(ms);
// await foreach (var line in sr.ReadAllLinesAsync())
// {
// //Console.WriteLine(Encoding.UTF8.GetString(line.Span));
// }
//}
[Benchmark]
public async Task PipeReaderSequenceReader()
{
using (ms)
{
var reader = PipeReader.Create(ms);
READ_AGAIN:
var readResult = await reader.ReadAsync();
if (!(readResult.IsCompleted | readResult.IsCanceled))
{
var buffer = readResult.Buffer;
while (TryReadData(ref buffer, out var line))
{
//Console.WriteLine(Encoding.UTF8.GetString(line));
}
reader.AdvanceTo(buffer.Start, buffer.End);
goto READ_AGAIN;
}
}
static bool TryReadData(ref ReadOnlySequence buffer, out ReadOnlySequence line)
{
var reader = new SequenceReader(buffer);
if (reader.TryReadTo(out line, (byte)'\n', advancePastDelimiter: true))
{
buffer = buffer.Slice(reader.Consumed);
return true;
}
return false;
}
}
//[Benchmark]
//public async Task PipelineStreamReader2()
//{
// using (ms)
// {
// var reader = PipeReader.Create(ms);
// READ_AGAIN:
// var readResult = await reader.ReadAsync();
// if (!(readResult.IsCompleted | readResult.IsCanceled))
// {
// var buffer = readResult.Buffer;
// ConsumeAllData(ref buffer);
// reader.AdvanceTo(buffer.Start, buffer.End);
// goto READ_AGAIN;
// }
// }
// static void ConsumeAllData(ref ReadOnlySequence buffer)
// {
// var reader = new SequenceReader(buffer);
// while (reader.TryReadTo(out ReadOnlySequence line, (byte)'\n', advancePastDelimiter: true))
// {
// //Console.WriteLine(Encoding.UTF8.GetString(line));
// }
// buffer = buffer.Slice(reader.Consumed);
// }
//}
}
public class MyClass
{
public int MyProperty { get; set; }
public string? MyProperty2 { get; set; }
}
================================================
FILE: sandbox/Benchmark/Program.cs
================================================
#if DEBUG
using Benchmark;
using System.Runtime.CompilerServices;
global::System.Console.WriteLine("DEBUG");
//var benchmark = new BytesReadToEnd();
var benchmark = new ReadToEndString();
benchmark.GlobalSetup();
//var s1 = await benchmark.FileReadAllBytesAsync();
var s2 = await benchmark.Utf8TextReaderReadToEndAsync();
//Console.WriteLine(s1.SequenceEqual(s2));
benchmark.GlobalCleanup();
#else
using BenchmarkDotNet.Running;
BenchmarkSwitcher
.FromAssembly(typeof(Program).Assembly)
.Run(args);
#endif
================================================
FILE: sandbox/Benchmark/ReadToEndString.cs
================================================
using BenchmarkDotNet.Attributes;
using Cysharp.IO;
using System.Text.Encodings.Web;
using System.Text.Json;
using System.Text.Unicode;
namespace Benchmark;
[SimpleJob, MemoryDiagnoser]
public class ReadToEndString
{
const int C = 1000000;
string filePath = default!;
[GlobalSetup]
public void GlobalSetup()
{
var options = new JsonSerializerOptions
{
Encoder = JavaScriptEncoder.Create(UnicodeRanges.All)
};
var path = Path.GetTempFileName();
var newline = OperatingSystem.IsWindows() ? "\r\n"u8 : "\n"u8;
using var file = File.OpenWrite(path);
for (var i = 0; i < C; i++)
{
var json = JsonSerializer.SerializeToUtf8Bytes(
new MyClass { MyProperty = i, MyProperty2 = "あいうえおかきくけこ" }, options);
file.Write(json);
file.Write(newline);
}
filePath = path;
}
[GlobalCleanup]
public void GlobalCleanup()
{
File.Delete(filePath);
}
[Benchmark]
public async Task StreamReaderReadToEndAsync()
{
using var sr = new System.IO.StreamReader(filePath);
return await sr.ReadToEndAsync();
}
[Benchmark]
public async Task Utf8TextReaderReadToEndAsync()
{
using var sr = new Cysharp.IO.Utf8StreamReader(filePath).AsTextReader();
return await sr.ReadToEndAsync();
}
[Benchmark]
public async Task FileReadAllTextAsync()
{
return await File.ReadAllTextAsync(filePath);
}
}
================================================
FILE: sandbox/ConsoleApp1/ConsoleApp1.csproj
================================================
Exe
net8.0
enable
enable
Always
================================================
FILE: sandbox/ConsoleApp1/Program.cs
================================================
using Cysharp.IO;
using Microsoft.Win32.SafeHandles;
using System.Buffers;
using System.Buffers.Text;
using System.IO;
using System.IO.Pipelines;
using System.Runtime.InteropServices;
using System.Runtime.InteropServices.Marshalling;
using System.Text;
using System.Text.Encodings.Web;
using System.Text.Json;
using System.Text.Unicode;
var aa = Encoding.UTF8.GetBytes("$5\r\nhello\r\n");
var stream = new MemoryStream(aa);
using var reader = new Utf8StreamReader(stream) { SkipBom = false };
byte[] bytes = await reader.ReadToEndAsync();
//while (await reader.LoadIntoBufferAsync())
//{
// while (reader.TryReadLine(out var line))
// {
// switch (line.Span[0])
// {
// case (byte)'$':
// Utf8Parser.TryParse(line.Span.Slice(1), out int size, out _);
// if (!reader.TryReadBlock(size + 2, out var block)) // +2 is \r\n
// {
// // ReadBlockAsync is TryReadBlock + LoadIntoBufferAtLeastAsync
// block = await reader.ReadBlockAsync(size + 2);
// }
// yield return block.Slice(0, size);
// break;
// // and others('+', '-', ':', '*')
// default:
// break;
// }
// }
//}
//var path = "file1.txt";
//var fs = new FileStream(path, FileMode.Open,FileAccess.Read, FileShare.Read, 0, false);
//var buf = new byte[1024];
//await fs.ReadAsync(buf);
//using var reader = new Utf8StreamReader(path).AsTextReader();
//var str = await reader.ReadToEndAsync();
//Console.WriteLine(str.ToString());
// new StreamReader().ReadBlock(
//var options = new JsonSerializerOptions();
//options.Encoder = JavaScriptEncoder.Create(UnicodeRanges.All);
//var jsonLines = Enumerable.Range(0, 100000)
// .Select(x => new MyClass { MyProperty = x, MyProperty2 = "あいうえおかきくけこ" })
// .Select(x => JsonSerializer.Serialize(x, options))
// .ToArray();
//var utf8Data = Encoding.UTF8.GetBytes(string.Join(Environment.NewLine, jsonLines));
//var ms = new MemoryStream(utf8Data);
////using var sr = new System.IO.StreamReader(ms);
////string? line;
////while ((line = await sr.ReadLineAsync()) != null)
////{
//// // JsonSerializer.Deserialize(line);
////}
//using var sr = new Cysharp.IO.Utf8StreamReader(ms);
//ReadOnlyMemory? line;
//while ((line = await sr.ReadLineAsync()) != null)
//{
//}
//public class MyClass
//{
// public int MyProperty { get; set; }
// public string? MyProperty2 { get; set; }
//}
================================================
FILE: sandbox/ConsoleApp1/ReadMeSample.cs
================================================
using Cysharp.IO;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.Json;
using System.Threading.Tasks;
namespace ConsoleApp1;
internal class ReadMeSample
{
public async void Sample1(Stream stream)
{
using var reader = new Utf8StreamReader(stream);
// Most performant style, similar as System.Threading.Channels
while (await reader.LoadIntoBufferAsync())
{
while (reader.TryReadLine(out var line))
{
// line is ReadOnlyMemory, deserialize UTF8 directly.
_ = JsonSerializer.Deserialize(line.Span);
}
}
}
public async void Sample2(Stream stream)
{
using var reader = new Utf8StreamReader(stream);
// Classical style, same as StreamReader
ReadOnlyMemory? line = null;
while ((line = await reader.ReadLineAsync()) != null)
{
_ = JsonSerializer.Deserialize(line.Value.Span);
}
}
public async void Sample3(Stream stream)
{
using var reader = new Utf8StreamReader(stream);
// Most easiest style, use async streams
await foreach (var line in reader.ReadAllLinesAsync())
{
_ = JsonSerializer.Deserialize(line.Span);
}
}
}
public class Foo
{
}
================================================
FILE: sandbox/ConsoleApp1/RespReader.cs
================================================
using Cysharp.IO;
using System.Buffers.Text;
using System.Text;
namespace ConsoleApp1;
public enum RespType : byte
{
SimpleStrings = (byte)'+',
Errors = (byte)'-',
Integers = (byte)':',
BulkStrings = (byte)'$',
Arrays = (byte)'*'
}
public class RespReader : IDisposable
{
Utf8StreamReader reader;
public RespReader(Stream stream)
{
this.reader = new Utf8StreamReader(stream);
}
// NOTE: for more fast processing, you need to use TryRead method.
public async ValueTask ReadRespTypeAsync(CancellationToken cancellationToken = default)
{
return (RespType)await reader.ReadAsync(cancellationToken);
}
// all read message api expect befor call ReadRespTypeAsync(already trimed type prefix)
public async ValueTask ReadSimpleStringAsync(CancellationToken cancellationToken = default) // +OK\r\n
{
return Encoding.UTF8.GetString((await reader.ReadLineAsync(cancellationToken)).Value.Span);
}
public async ValueTask ReadErrorMessageAsync(CancellationToken cancellationToken = default) // -Error message\r\n
{
return Encoding.UTF8.GetString((await reader.ReadLineAsync(cancellationToken)).Value.Span);
}
public async ValueTask ReadIntegerAsync(CancellationToken cancellationToken = default) // :1000\r\n
{
var line = await reader.ReadLineAsync(cancellationToken);
Utf8Parser.TryParse(line.Value.Span, out long value, out _);
return value;
}
public async ValueTask?> ReadBulkStringAsync(CancellationToken cancellationToken = default) // "$5\r\nhello\r\n"
{
var line = await reader.ReadLineAsync(cancellationToken);
Utf8Parser.TryParse(line.Value.Span, out int count, out _);
if (count == -1)
{
return null;
}
else
{
var dataWithNewLine = await reader.ReadBlockAsync(count + 2, cancellationToken);
return dataWithNewLine[..^2]; // without newline
}
}
// for perf improvement, ReadIntegerArray, ReadStringArray, ReadArray for bulkstrings is better approach
public async ValueTask