Full Code of lukas-reineke/cbfmt for AI

master 88a3e46fb15c cached
13 files
35.9 KB
8.8k tokens
31 symbols
1 requests
Download .txt
Repository: lukas-reineke/cbfmt
Branch: master
Commit: 88a3e46fb15c
Files: 13
Total size: 35.9 KB

Directory structure:
gitextract_rak83c_g/

├── .github/
│   ├── FUNDING.yml
│   ├── dependabot.yml
│   └── workflows/
│       ├── pr_check.yml
│       └── release.yml
├── .gitignore
├── Cargo.toml
├── LICENSE.md
├── README.md
└── src/
    ├── config.rs
    ├── format.rs
    ├── main.rs
    ├── tree.rs
    └── utils.rs

================================================
FILE CONTENTS
================================================

================================================
FILE: .github/FUNDING.yml
================================================
github: [lukas-reineke]


================================================
FILE: .github/dependabot.yml
================================================
version: 2
updates:
  - package-ecosystem: 'cargo'
    directory: '/'
    schedule:
      interval: 'monthly'


================================================
FILE: .github/workflows/pr_check.yml
================================================
name: Pull request check

on:
  pull_request:

jobs:
  block-fixup:
    runs-on: ubuntu-latest

    steps:
      - uses: actions/checkout@v2
      - name: Block Fixup Commit Merge
        uses: 13rac1/block-fixup-merge-action@v2.0.0
  build:
    runs-on: ubuntu-latest
    strategy:
      matrix:
        toolchain:
          - stable
          - beta
          - nightly
    steps:
      - uses: actions/checkout@v3
        with:
          submodules: recursive
      - run: rustup update ${{ matrix.toolchain }} && rustup default ${{ matrix.toolchain }}
      - run: cargo build --verbose
  format:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v3
        with:
          submodules: recursive
      - run: rustup update stable && rustup default stable
      - run: rustup component add rustfmt
      - run: cargo fmt --all -- --check


================================================
FILE: .github/workflows/release.yml
================================================
name: upload

# copied from https://github.com/rust-lang/rustfmt/blob/master/.github/workflows/upload-assets.yml

on:
  release:
    types: [created]
  workflow_dispatch:

jobs:
  build-release:
    name: build-release
    strategy:
      matrix:
        build:
          [
            linux-x86_64,
            linux-x86_64-musl,
            macos-x86_64,
            windows-x86_64-gnu,
            windows-x86_64-msvc,
          ]
        include:
          - build: linux-x86_64
            os: ubuntu-latest
            rust: nightly
            target: x86_64-unknown-linux-gnu
            build_command: build
          - build: linux-x86_64-musl
            os: ubuntu-latest
            rust: nightly
            target: x86_64-unknown-linux-musl
            build_command: zigbuild
          - build: macos-x86_64
            os: macos-latest
            rust: nightly
            target: x86_64-apple-darwin
            build_command: build
          - build: windows-x86_64-gnu
            os: windows-latest
            rust: nightly-x86_64-gnu
            target: x86_64-pc-windows-gnu
            build_command: build
          - build: windows-x86_64-msvc
            os: windows-latest
            rust: nightly-x86_64-msvc
            target: x86_64-pc-windows-msvc
            build_command: build
    runs-on: ${{ matrix.os }}
    steps:
      - uses: actions/checkout@v3

        # Run build
      - name: install rustup
        run: |
          curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs > rustup-init.sh
          sh rustup-init.sh -y --default-toolchain none
          rustup target add ${{ matrix.target }}

      - name: Add mingw64 to path for x86_64-gnu
        run: echo "C:\msys64\mingw64\bin" >> $GITHUB_PATH
        if: matrix.rust == 'nightly-x86_64-gnu'
        shell: bash

      - name: Install dependencies for x86_64-musl
        run: |
          sudo apt install musl-tools python3-pip
          sudo pip3 install ziglang
          cargo install cargo-zigbuild
        if: matrix.target == 'x86_64-unknown-linux-musl'
        shell: bash

      - name: Build release binaries
        uses: actions-rs/cargo@v1
        with:
          command: ${{ matrix.build_command }}
          args: --release --target ${{ matrix.target }}

      - name: Build archive
        shell: bash
        run: |
          staging="cbfmt_${{ matrix.build }}_${{ github.event.release.tag_name }}"
          mkdir -p "$staging"

          cp {README.md,LICENSE.md} "$staging/"

          if [ "${{ matrix.os }}" = "windows-latest" ]; then
            cp target/${{ matrix.target }}/release/cbfmt.exe "$staging/"
            7z a "$staging.zip" "$staging"
            echo "ASSET=$staging.zip" >> $GITHUB_ENV
          else
            cp target/${{ matrix.target }}/release/cbfmt "$staging/"
            tar czf "$staging.tar.gz" "$staging"
            echo "ASSET=$staging.tar.gz" >> $GITHUB_ENV
          fi

      - name: Upload Release Asset
        if: github.event_name == 'release'
        uses: actions/upload-release-asset@v1
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        with:
          upload_url: ${{ github.event.release.upload_url }}
          asset_path: ${{ env.ASSET }}
          asset_name: ${{ env.ASSET }}
          asset_content_type: application/octet-stream

  publish-to-cargo:
    name: Publishing to Cargo
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@master
      - uses: actions-rs/toolchain@v1
        with:
          toolchain: stable
          profile: minimal
          override: true
      - uses: actions-rs/cargo@v1
        with:
          command: publish
          args: --token ${{ secrets.CARGO_API_KEY }} --allow-dirty


================================================
FILE: .gitignore
================================================
/target


================================================
FILE: Cargo.toml
================================================
[package]
name = "cbfmt"
version = "0.2.0"
edition = "2021"
description = "A tool to format codeblocks inside markdown, org, and restructuredtext documents"
repository = "https://github.com/lukas-reineke/cbfmt"
categories = ["development-tools"]
keywords = ["format", "markdown", "org", "codeblock"]
license = "MIT"

[[bin]]
name = "cbfmt"
doc = false

[dependencies]
atty = "0.2.14"
clap = "3.2.8"
futures = "0.3.21"
ignore = "0.4.18"
serde = { version = "1.0.138", features = ["derive"] }
termcolor = "1.1.3"
textwrap = "0.15.0"
thiserror = "1.0.31"
tokio = { version = "1.20.0", features = ["macros", "fs", "rt-multi-thread"] }
toml = "0.5.9"
tree-sitter = "~0.20"
tree-sitter-md = "0.1.1"
tree-sitter-org = "1.3.0"
tree-sitter-rst = "0.1.0"

[build-dependencies]
cc = "1.0.73"


================================================
FILE: LICENSE.md
================================================
The MIT Licence

Copyright (c) 2022 Lukas Reineke

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.


================================================
FILE: README.md
================================================
<p align="center">
    <img height="400" src="https://user-images.githubusercontent.com/12900252/188409065-2149a392-e5cb-4486-8d2b-b80f9022ef4c.png" alt="cbfmt" />
</p>

# cbfmt (codeblock format)

A tool to format codeblocks inside markdown, org, and restructuredtext documents.  
It iterates over all codeblocks, and formats them with the tool(s) specified for
the language of the block.

## Install

### Download from GitHub

Download the latest release binaries from [github.com/lukas-reineke/cbfmt/releases](https://github.com/lukas-reineke/cbfmt/releases)

### Cargo

```bash
cargo install cbfmt
```

### Build from source

1. Clone this repository
2. Build with [cargo](https://github.com/rust-lang/cargo/)

```bash
git clone https://github.com/lukas-reineke/cbfmt.git && cd cbfmt
cargo install --path .
```

This will install `cbfmt` in your `~/.cargo/bin`. Make sure to add `~/.cargo/bin` directory to your `PATH` variable.

## Config

A configuration file is required. By default the file is called
`.cbfmt.toml`

Example:

```toml
[languages]
rust = ["rustfmt"]
go = ["gofmt"]
lua = ["stylua -s -"]
python = ["black --fast -"]
```

### Sections

#### languages

This section specifies which commands should run for which language.  
Each entry is the name of the language as the key, and a list of format commands
to run in sequence as the value. Each format command needs to read from stdin
and write to stdout.

## Usage

### With arguments

You can run `cbfmt` on files and or directories by passing them as
arguments.

```bash
cbfmt [OPTIONS] [file/dir/glob]...
```

The default behaviour checks formatting for all files that were passed as
arguments. If all files are formatted correctly, it exits with status code 0,
otherwise it exits with status code 1.

When a directory is passed as an argument, `cbfmt` will recursively run on all files
in that directory which have a valid parser and are not ignored by git.

### With stdin

If no arguments are specified, `cbfmt` will read from stdin and write the format
result to stdout.

```bash
cbfmt [OPTIONS] < [file]
```

### Without arguments and stdin

If there are no arguments and nothing is written to stdin, `cbfmt` will print
the help text and exit.

### Options

These are the most important options. To see all options, please run
`cbfmt --help`

#### check `-c|--check`

Works the same as the default behaviour, but only prints the path to files that
fail.

#### write `-w|--write`

Writes the format result back into the files.

#### parser `-p|--parser`

Specifies which parser to use. This is inferred from the file ending when
possible.


================================================
FILE: src/config.rs
================================================
use serde::Deserialize;
use std::collections::HashMap;

#[derive(Debug, Deserialize)]
pub struct Conf {
    pub languages: HashMap<String, Vec<String>>,
}

pub fn get(name: &str) -> Result<Conf, std::io::Error> {
    let toml_string = std::fs::read_to_string(name)?;
    let conf: Conf = toml::from_str(&toml_string)?;
    Ok(conf)
}


================================================
FILE: src/format.rs
================================================
use super::config::Conf;
use super::tree;
use super::utils;
use futures::{stream::FuturesOrdered, StreamExt};
use std::char;
use std::fmt;
use std::io::{self, prelude::*, Error, ErrorKind, Write};
use std::process::{Command, Stdio};
use textwrap::dedent;

#[derive(thiserror::Error, Debug)]
pub struct FormatError {
    pub msg: String,
    pub filename: Option<String>,
    pub command: Option<String>,
    pub language: Option<String>,
    pub start: Option<String>,
}

impl fmt::Display for FormatError {
    fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
        if let Some(filename) = &self.filename {
            write!(formatter, "{filename}")?;
        }
        if let Some(start) = &self.start {
            write!(formatter, "{start}")?;
        }
        if let Some(language) = &self.language {
            write!(formatter, " [{language}] ->")?;
        }
        if let Some(command) = &self.command {
            write!(formatter, " [{command}] ")?;
        }
        write!(formatter, "\n{}", self.msg)
    }
}

pub enum FormatResult {
    Unchanged(String),
    Changed(String),
    Err(FormatError),
}

pub async fn run_file(
    conf: &Conf,
    filename: String,
    parser: Option<&str>,
    write: bool,
    best_effort: bool,
) -> FormatResult {
    let parser = match utils::get_parser(Some(&filename), parser) {
        Ok(p) => p,
        Err(e) => return FormatResult::Err(e),
    };

    let file = match tokio::fs::read(&filename).await {
        Err(error) => {
            return FormatResult::Err(FormatError {
                msg: error.to_string(),
                filename: Some(filename),
                command: None,
                language: None,
                start: None,
            })
        }
        Ok(f) => f,
    };
    let buf = file.lines().map(|l| l.unwrap()).collect::<Vec<_>>();

    match run(buf, conf, &parser, !write, best_effort).await {
        FormatResult::Changed(r) => {
            if write {
                if let Some(error) = tokio::fs::write(&filename, r).await.err() {
                    return FormatResult::Err(FormatError {
                        msg: error.to_string(),
                        filename: Some(filename),
                        command: None,
                        language: None,
                        start: None,
                    });
                }
            }
            FormatResult::Changed(filename)
        }
        FormatResult::Unchanged(_) => FormatResult::Unchanged(filename),
        FormatResult::Err(mut error) => {
            error.filename = Some(filename);
            FormatResult::Err(error)
        }
    }
}

pub async fn run_stdin(
    conf: &Conf,
    filename: Option<&str>,
    parser: Option<&str>,
    best_effort: bool,
) -> FormatResult {
    let parser = match utils::get_parser(filename, parser) {
        Ok(p) => p,
        Err(e) => return FormatResult::Err(e),
    };

    let buf = io::stdin().lines().map(|l| l.unwrap()).collect::<Vec<_>>();

    match run(buf, conf, &parser, false, best_effort).await {
        FormatResult::Changed(r) => {
            let mut stdout = io::stdout().lock();
            stdout.write_all(r.as_bytes()).unwrap();
            FormatResult::Changed("stdin".to_string())
        }
        FormatResult::Unchanged(r) => {
            let mut stdout = io::stdout().lock();
            stdout.write_all(r.as_bytes()).unwrap();
            FormatResult::Unchanged("stdin".to_string())
        }
        FormatResult::Err(e) => FormatResult::Err(e),
    }
}

struct FormatCtx {
    language: String,
    codeblock_start: usize,
    start: usize,
    end: usize,
    input_hash: u64,
}

async fn run(
    mut buf: Vec<String>,
    conf: &Conf,
    parser: &str,
    fail_fast: bool,
    best_effort: bool,
) -> FormatResult {
    let src = buf.join("\n");
    let src_bytes = src.as_bytes();
    let tree = match tree::get_tree(parser, src_bytes) {
        Some(t) => t,
        None => {
            return FormatResult::Err(FormatError {
                msg: format!("No parser found for {}.", parser),
                filename: None,
                command: None,
                language: None,
                start: None,
            })
        }
    };
    let query = tree::get_query(parser).unwrap();

    let mut futures: FuturesOrdered<_> = FuturesOrdered::new();

    let mut cursor = tree_sitter::QueryCursor::new();
    for each_match in cursor.matches(&query, tree.root_node(), src_bytes) {
        let mut content = String::new();
        let mut ctx = FormatCtx {
            language: String::new(),
            codeblock_start: 0,
            start: 0,
            end: 0,
            input_hash: 0,
        };

        for capture in each_match.captures.iter() {
            let mut range = capture.node.range();

            for predicate in query.general_predicates(each_match.pattern_index) {
                range = tree::handle_directive(&predicate.operator, &range, &predicate.args)
                    .unwrap_or(range);
            }

            let capture_name = &query.capture_names()[capture.index as usize];

            if capture_name == "language" {
                ctx.language = String::from(&src[range.start_byte..range.end_byte]);
            }
            if capture_name == "content" {
                ctx.start = range.start_point.row;
                ctx.end = range.end_point.row;
                let mut end_byte = range.end_byte;

                // Workaround for bug in markdown parser when the codeblock is the last thing in a
                // buffer
                if parser == "markdown" && &src[(end_byte - 3)..end_byte] == "```" {
                    end_byte -= 3
                }

                content = String::from(dedent(&src[range.start_byte..end_byte]));
            }
            if capture_name == "codeblock" {
                ctx.codeblock_start = range.start_point.row;
            }
        }

        let formatter = conf.languages.get(&ctx.language);
        let formatter = match formatter {
            Some(f) => f,
            None => continue,
        };
        let formatter = formatter.iter().map(|f| f.to_owned()).collect();

        ctx.input_hash = utils::get_hash(&content);
        futures.push_back(tokio::spawn(async move {
            format(ctx, formatter, &content).await
        }));
    }

    let mut formatted = false;
    let mut offset: i32 = 0;
    while let Some(output) = futures.next().await {
        let output = match output {
            Ok(o) => o,
            Err(e) => {
                return FormatResult::Err(FormatError {
                    msg: e.to_string(),
                    filename: None,
                    command: None,
                    language: None,
                    start: None,
                });
            }
        };
        let (ctx, output) = match output {
            Ok(o) => o,
            Err(e) => {
                if best_effort {
                    continue;
                }
                return FormatResult::Err(e);
            }
        };

        let indent = utils::get_start_whitespace(&buf[(ctx.start as i32 + offset) as usize]);

        let mut fixed_output = String::new();
        for line in output.lines() {
            fixed_output.push_str(&indent);
            fixed_output.push_str(line);
            fixed_output.push('\n');
        }

        // trim start for the hash because treesitter ignores leading indent
        let output_hash = utils::get_hash(fixed_output.trim_start());
        if ctx.input_hash != output_hash {
            formatted = true;
            if fail_fast {
                break;
            }
        }

        buf.drain((ctx.start as i32 + offset) as usize..(ctx.end as i32 + offset) as usize);

        let mut counter = 0;
        for (i, line) in fixed_output.lines().enumerate() {
            buf.insert(i + (ctx.start as i32 + offset) as usize, line.to_string());
            counter += 1;
        }

        offset += counter - (ctx.end as i32 - ctx.start as i32);
    }

    let output = buf.join("\n") + "\n";
    if formatted {
        return FormatResult::Changed(output);
    }
    FormatResult::Unchanged(output)
}

#[derive(Debug, PartialEq)]
struct ParsedCommand<'a> {
    cmd: &'a str,
    args: Vec<&'a str>,
}

fn parse_command<'a>(raw_command: &'a str) -> Result<ParsedCommand<'a>, &str> {
    let mut parsed_components = raw_command.split(char::is_whitespace);
    let cmd = parsed_components.next().ok_or("No command found.")?;
    if cmd.is_empty() {
        return Err("No command provided.");
    }
    Ok(ParsedCommand {
        cmd,
        args: parsed_components.collect(),
    })
}

async fn format(
    ctx: FormatCtx,
    formatter: Vec<String>,
    content: &str,
) -> Result<(FormatCtx, String), FormatError> {
    let mut result = String::from(content);
    let language = Some(ctx.language.to_owned());
    let start = Some(format!(":{}", ctx.start));

    for f in formatter.iter() {
        match parse_command(f) {
            Ok(parsed_command) => {
                result = match format_single(&parsed_command, &result) {
                    Err(e) => {
                        return Err(FormatError {
                            msg: e.to_string(),
                            filename: None,
                            command: Some(parsed_command.cmd.to_string()),
                            language,
                            start,
                        });
                    }
                    Ok(o) => o,
                }
            }
            Err(msg) => {
                return Err(FormatError {
                    msg: msg.to_owned(),
                    filename: None,
                    command: None,
                    language,
                    start,
                })
            }
        }
    }

    Ok((ctx, result))
}

fn format_single(formatter: &ParsedCommand, input: &str) -> Result<String, Error> {
    let mut child = Command::new(formatter.cmd)
        .args(&formatter.args)
        .stdin(Stdio::piped())
        .stderr(Stdio::piped())
        .stdout(Stdio::piped())
        .spawn()?;

    let stdin = child.stdin.as_mut().ok_or_else(|| {
        Error::new(
            ErrorKind::Other,
            String::from("Child process stdin has not been captured."),
        )
    })?;
    stdin.write_all(input.as_bytes())?;

    let output = child.wait_with_output()?;

    if output.status.success() {
        Ok(String::from_utf8(output.stdout).unwrap())
    } else {
        Err(Error::new(
            ErrorKind::Other,
            String::from_utf8(output.stderr).unwrap(),
        ))
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_parse_empty_command() {
        assert_eq!(Err("No command provided."), parse_command(""));
        assert_eq!(Err("No command provided."), parse_command("      "));
    }

    #[test]
    fn test_parse_whitespace_args() {
        assert_eq!(
            Ok(ParsedCommand {
                cmd: "shellharden",
                args: vec!["--transform", ""]
            }),
            parse_command("shellharden --transform ")
        );
    }
}


================================================
FILE: src/main.rs
================================================
use clap::{App, Arg, ArgMatches};
mod config;
mod format;
use format::FormatResult;
mod tree;
mod utils;
use futures::{stream::FuturesUnordered, StreamExt};
use std::process;
use termcolor::{ColorChoice, StandardStream};

#[tokio::main]
async fn main() {
    let (mut color_choice, clap_color_choice) = if atty::is(atty::Stream::Stdout) {
        (ColorChoice::Auto, clap::ColorChoice::Auto)
    } else {
        (ColorChoice::Never, clap::ColorChoice::Never)
    };

    let mut app =
        App::new("cbfmt")
            .version("0.2.0")
            .author("Lukas Reineke <lukas@reineke.jp>")
            .about("A tool to format codeblocks inside markdown, org, and restructuredtext documents.\nIt iterates over all codeblocks, and formats them with the tool(s) specified for the language of the block.")
            .arg(
                Arg::with_name("config")
                    .long("config")
                    .value_name("FILE")
                    .help("Sets a custom config file.")
                    .takes_value(true),
            )
            .arg(
                Arg::with_name("check")
                    .short('c')
                    .long("check")
                    .takes_value(false)
                    .help("Check if the given files are formatted. Print the path to unformatted files and exit with exit code 1 if they are not.")
            )
            .arg(
                Arg::with_name("fail_fast")
                    .long("fail-fast")
                    .takes_value(false)
                    .help("Exit as soon as one file is not formatted correctly.")
            )
            .arg(
                Arg::with_name("write")
                    .short('w')
                    .long("write")
                    .takes_value(false)
                    .help("Edit files in-place.")
            )
            .arg(
                Arg::with_name("best_effort")
                    .long("best-effort")
                    .takes_value(false)
                    .help("Ignore formatting errors and continue with the next codeblock.")
            )
            .arg(
                Arg::with_name("parser")
                    .short('p')
                    .long("parser")
                    .value_name("markdown|org|restructuredtext")
                    .help("Sets the parser to use.")
                    .takes_value(true),
            )
            .arg(
                Arg::with_name("stdin_filepath")
                    .long("stdin-filepath")
                    .help("Path to the file to pretend that stdin comes from.")
                    .takes_value(true),
            )
            .arg(
                Arg::with_name("color")
                    .long("color")
                    .value_name("never|auto|always")
                    .help("Use colored output.")
                    .default_value("auto")
                    .takes_value(true),
            )
            .arg(
                Arg::with_name("files")
                    .value_name("file/dir/glob")
                    .help("List of files to process. If no files are given cbfmt will read from Stdin.")
                    .index(1)
                    .multiple_values(true),
            )
            .color(clap_color_choice);

    let matches = app.to_owned().get_matches();

    if let Some(color) = matches.value_of("color") {
        if color == "never" {
            color_choice = ColorChoice::Never;
        } else if color == "always" {
            color_choice = ColorChoice::Always;
        }
    }

    if matches.values_of("files").is_none() && atty::is(atty::Stream::Stdin) {
        app.print_help().unwrap();
        return;
    }

    let mut stderr = StandardStream::stderr(color_choice);

    let config_path = match matches.value_of("config") {
        Some(p) => p.to_owned(),
        None => match utils::find_closest_config() {
            Some(p) => p,
            None => {
                utils::print_error(&mut stderr, "Could not find config file.");
                process::exit(1);
            }
        },
    };
    let conf = match config::get(&config_path) {
        Ok(c) => c,
        Err(_) => {
            utils::print_error(&mut stderr, "Could not parse config file.");
            process::exit(1);
        }
    };

    match matches.values_of("files") {
        Some(_) => use_files(matches, &conf, color_choice).await,
        None => use_stdin(matches, &conf).await,
    }
}

async fn use_files(matches: ArgMatches, conf: &config::Conf, color_choice: ColorChoice) {
    let mut stdout = StandardStream::stdout(color_choice);
    let mut stderr = StandardStream::stderr(color_choice);

    let check = matches.is_present("check");
    let write = matches.is_present("write");
    let best_effort = matches.is_present("best_effort");
    let fail_fast = matches.is_present("fail_fast");
    let files = matches.values_of("files").unwrap();
    let parser = matches.value_of("parser");

    let mut futures: FuturesUnordered<_> = FuturesUnordered::new();
    let files = match utils::get_files(files) {
        Ok(f) => f,
        Err(e) => {
            utils::print_error(&mut stderr, &e.to_string());
            process::exit(1);
        }
    };
    for filename in files {
        futures.push(format::run_file(conf, filename, parser, write, best_effort));
    }

    let mut error_count = 0;
    let mut unchanged_count = 0;
    let mut changed_count = 0;

    while let Some(result) = futures.next().await {
        match result {
            FormatResult::Unchanged(f) => {
                unchanged_count += 1;
                if check {
                    continue;
                }
                if write {
                    utils::print_unchanged(&mut stdout, &f);
                } else {
                    utils::print_ok(&mut stdout, &f);
                }
            }
            FormatResult::Changed(f) => {
                changed_count += 1;
                if check {
                    eprintln!("{f}")
                } else if write {
                    utils::print_ok(&mut stdout, &f);
                } else {
                    utils::print_fail(&mut stderr, &f);
                }
                if !write && fail_fast {
                    println!("Failed fast...");
                    break;
                }
            }
            FormatResult::Err(e) => {
                error_count += 1;
                if check {
                    let filename = match &e.filename {
                        Some(f) => f,
                        None => "Unknown",
                    };
                    eprintln!("{filename}");
                } else {
                    utils::print_error(&mut stderr, &e.to_string());
                }
                if fail_fast {
                    println!("Failed fast...");
                    break;
                }
            }
        }
    }

    let total_count = unchanged_count + changed_count + error_count;
    if write {
        println!("\n[{changed_count}/{total_count}] files were written.");
    }

    if !write && !check {
        println!("\n[{unchanged_count}/{total_count}] files are formatted correctly.");
    }

    if error_count > 0 || (changed_count > 0 && !write) {
        process::exit(1);
    }
}

async fn use_stdin(matches: ArgMatches, conf: &config::Conf) {
    let parser = matches.value_of("parser");
    let filename = matches.value_of("stdin_filepath");
    let best_effort = matches.is_present("best_effort");

    if let FormatResult::Err(e) = format::run_stdin(conf, filename, parser, best_effort).await {
        eprintln!("{e}");
        process::exit(1);
    }
}


================================================
FILE: src/tree.rs
================================================
use tree_sitter::Parser;

pub fn get_tree(parser_lang: &str, text: &[u8]) -> Option<tree_sitter::Tree> {
    let mut parser = Parser::new();

    match parser_lang {
        "markdown" => {
            parser
                .set_language(tree_sitter_md::language())
                .expect("Could not load markdown grammar");
        }
        "org" => {
            parser
                .set_language(tree_sitter_org::language())
                .expect("Could not load org grammar");
        }
        "restructuredtext" => {
            parser
                .set_language(tree_sitter_rst::language())
                .expect("Could not load restructuredtext grammar");
        }
        _ => {
            return None;
        }
    }

    Some(parser.parse(text, None).expect("Could not parse input"))
}

pub fn get_query(parser_lang: &str) -> Option<tree_sitter::Query> {
    match parser_lang {
        "markdown" => Some(
            tree_sitter::Query::new(
                tree_sitter_md::language(),
                r#"
                    (fenced_code_block
                        (info_string (language) @language)
                        (code_fence_content) @content) @codeblock
                "#,
            )
            .expect("Could not load markdown query"),
        ),
        "org" => Some(
            tree_sitter::Query::new(
                tree_sitter_org::language(),
                r#"
                    (block
                        name: (expr) @_name
                        (#match? @_name "(SRC|src)")
                        parameter: (expr) @language
                        contents: (contents) @content) @codeblock
                "#,
            )
            .expect("Could not load org query"),
        ),
        "restructuredtext" => Some(
            tree_sitter::Query::new(
                tree_sitter_rst::language(),
                r#"
                    (directive
                        name: (type) @_name
                        (#match? @_name "code")
                        body: (body
                            (arguments) @language
                            (content) @content
                            (#offset! @content 0 0 1 0))) @codeblock
                    
                "#,
            )
            .expect("Could not load restructuredtext query"),
        ),
        _ => None,
    }
}

pub fn get_parser_lang_from_filename(filename: &str) -> Option<&str> {
    let filename = filename.to_lowercase();
    if filename.ends_with(".md") {
        return Some("markdown");
    }
    if filename.ends_with(".org") {
        return Some("org");
    }
    if filename.ends_with(".rst") {
        return Some("restructuredtext");
    }
    None
}

pub fn handle_directive(
    directive: &str,
    range: &tree_sitter::Range,
    args: &Vec<tree_sitter::QueryPredicateArg>,
) -> Option<tree_sitter::Range> {
    match directive {
        "offset!" => {
            let start_row_offset = match &args[1] {
                tree_sitter::QueryPredicateArg::String(value) => value.parse::<usize>().unwrap(),
                _ => panic!("Unexpected argument type for offset!"),
            };
            let start_col_offset = match &args[2] {
                tree_sitter::QueryPredicateArg::String(value) => value.parse::<usize>().unwrap(),
                _ => panic!("Unexpected argument type for offset!"),
            };
            let end_row_offset = match &args[3] {
                tree_sitter::QueryPredicateArg::String(value) => value.parse::<usize>().unwrap(),
                _ => panic!("Unexpected argument type for offset!"),
            };
            let end_col_offset = match &args[4] {
                tree_sitter::QueryPredicateArg::String(value) => value.parse::<usize>().unwrap(),
                _ => panic!("Unexpected argument type for offset!"),
            };

            let mut new_range = range.clone();
            new_range.start_point.row = range.start_point.row + start_row_offset;
            new_range.start_point.column = range.start_point.column + start_col_offset;
            new_range.end_point.row = range.end_point.row + end_row_offset;
            new_range.end_point.column = range.end_point.column + end_col_offset;
            return Some(new_range);
        }
        &_ => {}
    }
    None
}


================================================
FILE: src/utils.rs
================================================
use super::format::FormatError;
use super::tree;
use clap::Values;
use ignore::WalkBuilder;
use std::collections::hash_map::DefaultHasher;
use std::env;
use std::fs;
use std::hash::Hash;
use std::hash::Hasher;
use std::io;
use termcolor::{Color, ColorSpec, StandardStream, WriteColor};

pub fn get_start_whitespace(text: &str) -> String {
    let mut result = String::new();

    for ch in text.chars() {
        if ch.is_whitespace() {
            result.push(ch)
        } else {
            break;
        }
    }

    result
}

pub fn get_hash(text: &str) -> u64 {
    let mut hasher = DefaultHasher::new();
    text.hash(&mut hasher);
    hasher.finish()
}

pub fn get_files(files: Values) -> Result<Vec<String>, io::Error> {
    let mut result = Vec::new();

    for file in files {
        let meta = match fs::metadata(file) {
            Ok(m) => m,
            Err(e) => {
                return Err(io::Error::new(
                    e.kind(),
                    format!("{file}: {}", &e.to_string()),
                ))
            }
        };
        if meta.is_file() {
            result.push(file.to_string());
        } else {
            for entry in WalkBuilder::new(file)
                .hidden(false)
                .build()
                .filter_map(|e| e.ok())
            {
                let path = entry.path().display().to_string();
                let meta = fs::metadata(entry.path()).unwrap();
                if meta.is_file() && tree::get_parser_lang_from_filename(&path).is_some() {
                    result.push(path);
                }
            }
        }
    }
    result.sort();
    result.dedup();

    Ok(result)
}

pub fn get_parser(filename: Option<&str>, parser: Option<&str>) -> Result<String, FormatError> {
    if let Some(p) = parser {
        return Ok(p.to_owned());
    }
    if let Some(f) = filename {
        if let Some(p) = tree::get_parser_lang_from_filename(f) {
            return Ok(p.to_owned());
        }
    }
    Err(FormatError {
        msg: "Could not infer parser.".to_string(),
        filename: filename.map(|f| f.to_owned()),
        command: None,
        language: None,
        start: None,
    })
}

pub fn find_closest_config() -> Option<String> {
    let name = ".cbfmt.toml";
    let mut current_dir = match env::current_dir() {
        Ok(c) => c,
        Err(_) => return None,
    };
    loop {
        let path = current_dir.join(name);

        if path.exists() {
            return Some(path.to_str()?.to_string());
        }
        match current_dir.parent() {
            Some(p) => current_dir = p.to_path_buf(),
            None => return None,
        }
    }
}

pub fn print_ok(stdout: &mut StandardStream, text: &str) {
    let mut color_spec = ColorSpec::new();
    print!("[");
    stdout
        .set_color(color_spec.set_fg(Some(Color::Green)).set_bold(true))
        .unwrap();
    print!("Okay");
    color_spec.clear();
    stdout.set_color(&color_spec).unwrap();
    println!("]: {text}");
}

pub fn print_unchanged(stdout: &mut StandardStream, text: &str) {
    let mut color_spec = ColorSpec::new();
    print!("[");
    stdout
        .set_color(color_spec.set_fg(Some(Color::Blue)).set_bold(true))
        .unwrap();
    print!("Same");
    color_spec.clear();
    stdout.set_color(&color_spec).unwrap();
    println!("]: {text}");
}

pub fn print_fail(stderr: &mut StandardStream, text: &str) {
    let mut color_spec = ColorSpec::new();
    eprint!("[");
    stderr
        .set_color(color_spec.set_fg(Some(Color::Yellow)).set_bold(true))
        .unwrap();
    eprint!("Fail");
    color_spec.clear();
    stderr.set_color(&color_spec).unwrap();
    eprintln!("]: {text}");
}

pub fn print_error(stderr: &mut StandardStream, text: &str) {
    let mut color_spec = ColorSpec::new();
    eprint!("[");
    stderr
        .set_color(color_spec.set_fg(Some(Color::Red)).set_bold(true))
        .unwrap();
    eprint!("Error");
    color_spec.clear();
    stderr.set_color(&color_spec).unwrap();
    eprintln!("]: {text}");
}
Download .txt
gitextract_rak83c_g/

├── .github/
│   ├── FUNDING.yml
│   ├── dependabot.yml
│   └── workflows/
│       ├── pr_check.yml
│       └── release.yml
├── .gitignore
├── Cargo.toml
├── LICENSE.md
├── README.md
└── src/
    ├── config.rs
    ├── format.rs
    ├── main.rs
    ├── tree.rs
    └── utils.rs
Download .txt
SYMBOL INDEX (31 symbols across 5 files)

FILE: src/config.rs
  type Conf (line 5) | pub struct Conf {
  function get (line 9) | pub fn get(name: &str) -> Result<Conf, std::io::Error> {

FILE: src/format.rs
  type FormatError (line 12) | pub struct FormatError {
    method fmt (line 21) | fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
  type FormatResult (line 38) | pub enum FormatResult {
  function run_file (line 44) | pub async fn run_file(
  function run_stdin (line 93) | pub async fn run_stdin(
  type FormatCtx (line 121) | struct FormatCtx {
  function run (line 129) | async fn run(
  type ParsedCommand (line 271) | struct ParsedCommand<'a> {
  function parse_command (line 276) | fn parse_command<'a>(raw_command: &'a str) -> Result<ParsedCommand<'a>, ...
  function format (line 288) | async fn format(
  function format_single (line 328) | fn format_single(formatter: &ParsedCommand, input: &str) -> Result<Strin...
  function test_parse_empty_command (line 361) | fn test_parse_empty_command() {
  function test_parse_whitespace_args (line 367) | fn test_parse_whitespace_args() {

FILE: src/main.rs
  function main (line 12) | async fn main() {
  function use_files (line 129) | async fn use_files(matches: ArgMatches, conf: &config::Conf, color_choic...
  function use_stdin (line 216) | async fn use_stdin(matches: ArgMatches, conf: &config::Conf) {

FILE: src/tree.rs
  function get_tree (line 3) | pub fn get_tree(parser_lang: &str, text: &[u8]) -> Option<tree_sitter::T...
  function get_query (line 30) | pub fn get_query(parser_lang: &str) -> Option<tree_sitter::Query> {
  function get_parser_lang_from_filename (line 76) | pub fn get_parser_lang_from_filename(filename: &str) -> Option<&str> {
  function handle_directive (line 90) | pub fn handle_directive(

FILE: src/utils.rs
  function get_start_whitespace (line 13) | pub fn get_start_whitespace(text: &str) -> String {
  function get_hash (line 27) | pub fn get_hash(text: &str) -> u64 {
  function get_files (line 33) | pub fn get_files(files: Values) -> Result<Vec<String>, io::Error> {
  function get_parser (line 68) | pub fn get_parser(filename: Option<&str>, parser: Option<&str>) -> Resul...
  function find_closest_config (line 86) | pub fn find_closest_config() -> Option<String> {
  function print_ok (line 105) | pub fn print_ok(stdout: &mut StandardStream, text: &str) {
  function print_unchanged (line 117) | pub fn print_unchanged(stdout: &mut StandardStream, text: &str) {
  function print_fail (line 129) | pub fn print_fail(stderr: &mut StandardStream, text: &str) {
  function print_error (line 141) | pub fn print_error(stderr: &mut StandardStream, text: &str) {
Condensed preview — 13 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (39K chars).
[
  {
    "path": ".github/FUNDING.yml",
    "chars": 24,
    "preview": "github: [lukas-reineke]\n"
  },
  {
    "path": ".github/dependabot.yml",
    "chars": 110,
    "preview": "version: 2\nupdates:\n  - package-ecosystem: 'cargo'\n    directory: '/'\n    schedule:\n      interval: 'monthly'\n"
  },
  {
    "path": ".github/workflows/pr_check.yml",
    "chars": 860,
    "preview": "name: Pull request check\n\non:\n  pull_request:\n\njobs:\n  block-fixup:\n    runs-on: ubuntu-latest\n\n    steps:\n      - uses:"
  },
  {
    "path": ".github/workflows/release.yml",
    "chars": 3739,
    "preview": "name: upload\n\n# copied from https://github.com/rust-lang/rustfmt/blob/master/.github/workflows/upload-assets.yml\n\non:\n  "
  },
  {
    "path": ".gitignore",
    "chars": 8,
    "preview": "/target\n"
  },
  {
    "path": "Cargo.toml",
    "chars": 781,
    "preview": "[package]\nname = \"cbfmt\"\nversion = \"0.2.0\"\nedition = \"2021\"\ndescription = \"A tool to format codeblocks inside markdown, "
  },
  {
    "path": "LICENSE.md",
    "chars": 1074,
    "preview": "The MIT Licence\n\nCopyright (c) 2022 Lukas Reineke\n\nPermission is hereby granted, free of charge, to any person obtaining"
  },
  {
    "path": "README.md",
    "chars": 2615,
    "preview": "<p align=\"center\">\n    <img height=\"400\" src=\"https://user-images.githubusercontent.com/12900252/188409065-2149a392-e5cb"
  },
  {
    "path": "src/config.rs",
    "chars": 334,
    "preview": "use serde::Deserialize;\nuse std::collections::HashMap;\n\n#[derive(Debug, Deserialize)]\npub struct Conf {\n    pub language"
  },
  {
    "path": "src/format.rs",
    "chars": 11210,
    "preview": "use super::config::Conf;\nuse super::tree;\nuse super::utils;\nuse futures::{stream::FuturesOrdered, StreamExt};\nuse std::c"
  },
  {
    "path": "src/main.rs",
    "chars": 7690,
    "preview": "use clap::{App, Arg, ArgMatches};\nmod config;\nmod format;\nuse format::FormatResult;\nmod tree;\nmod utils;\nuse futures::{s"
  },
  {
    "path": "src/tree.rs",
    "chars": 4325,
    "preview": "use tree_sitter::Parser;\n\npub fn get_tree(parser_lang: &str, text: &[u8]) -> Option<tree_sitter::Tree> {\n    let mut par"
  },
  {
    "path": "src/utils.rs",
    "chars": 4042,
    "preview": "use super::format::FormatError;\nuse super::tree;\nuse clap::Values;\nuse ignore::WalkBuilder;\nuse std::collections::hash_m"
  }
]

About this extraction

This page contains the full source code of the lukas-reineke/cbfmt GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 13 files (35.9 KB), approximately 8.8k tokens, and a symbol index with 31 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Copied to clipboard!