Full Code of doctrine/lexer for AI

3.0.x c2db835430a2 cached
30 files
47.6 KB
12.6k tokens
58 symbols
1 requests
Download .txt
Repository: doctrine/lexer
Branch: 3.0.x
Commit: c2db835430a2
Files: 30
Total size: 47.6 KB

Directory structure:
gitextract_ls_k7a7l/

├── .doctrine-project.json
├── .gitattributes
├── .github/
│   ├── FUNDING.yml
│   ├── dependabot.yml
│   └── workflows/
│       ├── coding-standards.yml
│       ├── composer-lint.yml
│       ├── continuous-integration.yml
│       ├── release-on-milestone-closed.yml
│       ├── static-analysis.yml
│       └── website-schema.yml
├── .gitignore
├── LICENSE
├── README.md
├── UPGRADE.md
├── composer.json
├── docs/
│   └── en/
│       ├── dql-parser.rst
│       ├── index.rst
│       ├── sidebar.rst
│       └── simple-parser-example.rst
├── phpcs.xml.dist
├── phpstan.neon.dist
├── phpunit.xml.dist
├── src/
│   ├── AbstractLexer.php
│   └── Token.php
└── tests/
    ├── AbstractLexerTest.php
    ├── ConcreteLexer.php
    ├── EnumLexer.php
    ├── MutableLexer.php
    ├── TokenTest.php
    └── TokenType.php

================================================
FILE CONTENTS
================================================

================================================
FILE: .doctrine-project.json
================================================
{
    "active": true,
    "name": "Lexer",
    "slug": "lexer",
    "docsSlug": "doctrine-lexer",
    "versions": [
        {
            "name": "3.1",
            "branchName": "3.1.x",
            "slug": "latest",
            "upcoming": true
        },
        {
            "name": "3.0",
            "branchName": "3.0.x",
            "slug": "3.1",
            "current": true
        },
        {
            "name": "2.1",
            "branchName": "2.1.x",
            "slug": "2.1",
            "maintained": false
        },
        {
            "name": "2.0",
            "branchName": "2.0.x",
            "slug": "2.0",
            "maintained": false
        },
        {
            "name": "1.2",
            "branchName": "1.2.x",
            "slug": "1.2",
            "maintained": false
        },
        {
            "name": "1.1",
            "branchName": "1.1",
            "slug": "1.1",
            "maintained": false
        },
        {
            "name": "1.0",
            "branchName": "1.0",
            "slug": "1.0",
            "maintained": false
        }
    ]
}


================================================
FILE: .gitattributes
================================================
# Auto-detect text files, ensure they use LF.
* text=auto eol=lf

 # Exclude non-essential files from dist
/.github export-ignore
/docs export-ignore
/tests export-ignore
/.doctrine-project.json export-ignore
/.gitattributes export-ignore
/.gitignore export-ignore
/phpcs.xml.dist export-ignore
/phpstan.neon.dist export-ignore
/phpunit.xml.dist export-ignore


================================================
FILE: .github/FUNDING.yml
================================================
patreon: phpdoctrine
tidelift: packagist/doctrine%2Flexer
custom: https://www.doctrine-project.org/sponsorship.html


================================================
FILE: .github/dependabot.yml
================================================
version: 2
updates:
  - package-ecosystem: "github-actions"
    directory: "/"
    schedule:
      interval: "weekly"
    labels:
      - "CI"


================================================
FILE: .github/workflows/coding-standards.yml
================================================

name: "Coding Standards"

on:
  pull_request:
    branches:
      - "*.x"
    paths:
      - .github/workflows/coding-standards.yml
      - composer.*
      - src/**
      - phpcs.xml.dist
      - tests/**
  push:
    branches:
      - "*.x"
    paths:
      - .github/workflows/coding-standards.yml
      - composer.*
      - src/**
      - phpcs.xml.dist
      - tests/**

jobs:
  coding-standards:
    name: "Coding Standards"
    uses: "doctrine/.github/.github/workflows/coding-standards.yml@14.0.0"


================================================
FILE: .github/workflows/composer-lint.yml
================================================
name: "Composer Lint"

on:
  pull_request:
    branches:
      - "*.x"
    paths:
      - .github/workflows/composer-lint.yml
      - composer.json
  push:
    branches:
      - "*.x"
    paths:
      - .github/workflows/composer-lint.yml
      - composer.json

jobs:
  composer-lint:
    name: "Composer Lint"
    uses: "doctrine/.github/.github/workflows/composer-lint.yml@13.1.0"


================================================
FILE: .github/workflows/continuous-integration.yml
================================================

name: "Continuous Integration"

on:
  pull_request:
    branches:
      - "*.x"
    paths:
      - .github/workflows/continuous-integration.yml
      - composer.*
      - src/**
      - phpunit.xml.dist
      - tests/**
  push:
    branches:
      - "*.x"
    paths:
      - .github/workflows/continuous-integration.yml
      - composer.*
      - src/**
      - phpunit.xml.dist
      - tests/**

jobs:
  phpunit:
    name: "PHPUnit"
    uses: "doctrine/.github/.github/workflows/continuous-integration.yml@14.0.0"
    with:
      php-versions: '["8.1", "8.2", "8.3", "8.4", "8.5"]'
    secrets:
      CODECOV_TOKEN: "${{ secrets.CODECOV_TOKEN }}"


================================================
FILE: .github/workflows/release-on-milestone-closed.yml
================================================
name: "Automatic Releases"

on:
  milestone:
    types:
      - "closed"

jobs:
  release:
    name: "Git tag, release & create merge-up PR"
    uses: "doctrine/.github/.github/workflows/release-on-milestone-closed.yml@14.0.0"
    secrets:
      GIT_AUTHOR_EMAIL: ${{ secrets.GIT_AUTHOR_EMAIL }}
      GIT_AUTHOR_NAME: ${{ secrets.GIT_AUTHOR_NAME }}
      ORGANIZATION_ADMIN_TOKEN: ${{ secrets.ORGANIZATION_ADMIN_TOKEN }}
      SIGNING_SECRET_KEY: ${{ secrets.SIGNING_SECRET_KEY }}


================================================
FILE: .github/workflows/static-analysis.yml
================================================

name: "Static Analysis"

on:
  pull_request:
    branches:
      - "*.x"
    paths:
      - .github/workflows/static-analysis.yml
      - composer.*
      - src/**
      - phpstan*
      - tests/**
  push:
    branches:
      - "*.x"
    paths:
      - .github/workflows/static-analysis.yml
      - composer.*
      - src/**
      - phpstan*
      - tests/**

jobs:
  static-analysis:
    name: "Static Analysis"
    uses: "doctrine/.github/.github/workflows/phpstan.yml@14.0.0"


================================================
FILE: .github/workflows/website-schema.yml
================================================

name: "Website config validation"

on:
  pull_request:
    branches:
      - "*.x"
    paths:
      - ".doctrine-project.json"
      - ".github/workflows/website-schema.yml"
  push:
    branches:
      - "*.x"
    paths:
      - ".doctrine-project.json"
      - ".github/workflows/website-schema.yml"

jobs:
  json-validate:
    name: "Validate JSON schema"
    uses: "doctrine/.github/.github/workflows/website-schema.yml@14.0.0"


================================================
FILE: .gitignore
================================================
/vendor
/composer.lock
/phpunit.xml
/.phpunit.cache
/phpcs.xml
/.phpcs-cache


================================================
FILE: LICENSE
================================================
Copyright (c) 2006-2018 Doctrine Project

Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: README.md
================================================
# Doctrine Lexer

[![Build Status](https://github.com/doctrine/lexer/workflows/Continuous%20Integration/badge.svg)](https://github.com/doctrine/lexer/actions)

Base library for a lexer that can be used in Top-Down, Recursive Descent Parsers.

This lexer is used in Doctrine Annotations and in Doctrine ORM (DQL).

https://www.doctrine-project.org/projects/lexer.html


================================================
FILE: UPGRADE.md
================================================
Note about upgrading: Doctrine uses static and runtime mechanisms to raise
awareness about deprecated code.

- Use of `@deprecated` docblock that is detected by IDEs (like PHPStorm) or
  Static Analysis tools (like Psalm, phpstan)
- Use of our low-overhead runtime deprecation API, details:
  https://github.com/doctrine/deprecations/

# Upgrade to 3.0.0

`Doctrine\Common\Lexer\Token` no longer implements `ArrayAccess`.
Parameter type declarations have been added to
`Doctrine\Common\Lexer\AbstractLexer` and `Doctrine\Common\Lexer\Token`.
You should add both parameter type declarations and return type declarations to
your lexers, based on the `@return` phpdoc.

# Upgrade to 2.0.0

`AbstractLexer::glimpse()` and `AbstractLexer::peek()` now return
instances of `Doctrine\Common\Lexer\Token`, which is an array-like class
Using it as an array is deprecated in favor of using properties of that class.
Using `count()` on it is deprecated with no replacement.


================================================
FILE: composer.json
================================================
{
    "name": "doctrine/lexer",
    "description": "PHP Doctrine Lexer parser library that can be used in Top-Down, Recursive Descent Parsers.",
    "license": "MIT",
    "type": "library",
    "keywords": [
        "php",
        "parser",
        "lexer",
        "annotations",
        "docblock"
    ],
    "authors": [
        {
            "name": "Guilherme Blanco",
            "email": "guilhermeblanco@gmail.com"
        },
        {
            "name": "Roman Borschel",
            "email": "roman@code-factory.org"
        },
        {
            "name": "Johannes Schmitt",
            "email": "schmittjoh@gmail.com"
        }
    ],
    "homepage": "https://www.doctrine-project.org/projects/lexer.html",
    "require": {
        "php": "^8.1"
    },
    "require-dev": {
        "doctrine/coding-standard": "^14",
        "phpstan/phpstan": "^2",
        "phpunit/phpunit": "^10.5.58 || ^12.5.4"
    },
    "autoload": {
        "psr-4": {
            "Doctrine\\Common\\Lexer\\": "src"
        }
    },
    "autoload-dev": {
        "psr-4": {
            "Doctrine\\Tests\\Common\\Lexer\\": "tests"
        }
    },
    "config": {
        "allow-plugins": {
            "composer/package-versions-deprecated": true,
            "dealerdirect/phpcodesniffer-composer-installer": true
        },
        "sort-packages": true
    }
}


================================================
FILE: docs/en/dql-parser.rst
================================================
DQL Lexer
=========

Here is a more complicated example from the Doctrine ORM project.
The ``Doctrine\ORM\Query\Lexer`` implementation for DQL looks something
like the following:

.. code-block:: php

    <?php

    use Doctrine\Common\Lexer\AbstractLexer;

    class Lexer extends AbstractLexer
    {
        // All tokens that are not valid identifiers must be < 100
        public const T_NONE              = 1;
        public const T_INTEGER           = 2;
        public const T_STRING            = 3;
        public const T_INPUT_PARAMETER   = 4;
        public const T_FLOAT             = 5;
        public const T_CLOSE_PARENTHESIS = 6;
        public const T_OPEN_PARENTHESIS  = 7;
        public const T_COMMA             = 8;
        public const T_DIVIDE            = 9;
        public const T_DOT               = 10;
        public const T_EQUALS            = 11;
        public const T_GREATER_THAN      = 12;
        public const T_LOWER_THAN        = 13;
        public const T_MINUS             = 14;
        public const T_MULTIPLY          = 15;
        public const T_NEGATE            = 16;
        public const T_PLUS              = 17;
        public const T_OPEN_CURLY_BRACE  = 18;
        public const T_CLOSE_CURLY_BRACE = 19;

        // All tokens that are identifiers or keywords that could be considered as identifiers should be >= 100
        public const T_ALIASED_NAME         = 100;
        public const T_FULLY_QUALIFIED_NAME = 101;
        public const T_IDENTIFIER           = 102;

        // All keyword tokens should be >= 200
        public const T_ALL      = 200;
        public const T_AND      = 201;
        public const T_ANY      = 202;
        public const T_AS       = 203;
        public const T_ASC      = 204;
        public const T_AVG      = 205;
        public const T_BETWEEN  = 206;
        public const T_BOTH     = 207;
        public const T_BY       = 208;
        public const T_CASE     = 209;
        public const T_COALESCE = 210;
        public const T_COUNT    = 211;
        public const T_DELETE   = 212;
        public const T_DESC     = 213;
        public const T_DISTINCT = 214;
        public const T_ELSE     = 215;
        public const T_EMPTY    = 216;
        public const T_END      = 217;
        public const T_ESCAPE   = 218;
        public const T_EXISTS   = 219;
        public const T_FALSE    = 220;
        public const T_FROM     = 221;
        public const T_GROUP    = 222;
        public const T_HAVING   = 223;
        public const T_HIDDEN   = 224;
        public const T_IN       = 225;
        public const T_INDEX    = 226;
        public const T_INNER    = 227;
        public const T_INSTANCE = 228;
        public const T_IS       = 229;
        public const T_JOIN     = 230;
        public const T_LEADING  = 231;
        public const T_LEFT     = 232;
        public const T_LIKE     = 233;
        public const T_MAX      = 234;
        public const T_MEMBER   = 235;
        public const T_MIN      = 236;
        public const T_NEW      = 237;
        public const T_NOT      = 238;
        public const T_NULL     = 239;
        public const T_NULLIF   = 240;
        public const T_OF       = 241;
        public const T_OR       = 242;
        public const T_ORDER    = 243;
        public const T_OUTER    = 244;
        public const T_PARTIAL  = 245;
        public const T_SELECT   = 246;
        public const T_SET      = 247;
        public const T_SOME     = 248;
        public const T_SUM      = 249;
        public const T_THEN     = 250;
        public const T_TRAILING = 251;
        public const T_TRUE     = 252;
        public const T_UPDATE   = 253;
        public const T_WHEN     = 254;
        public const T_WHERE    = 255;
        public const T_WITH     = 256;

        /**
         * Creates a new query scanner object.
         *
         * @param string $input A query string.
         */
        public function __construct(string $input)
        {
            $this->setInput($input);
        }

        /**
         * {@inheritdoc}
         */
        protected function getCatchablePatterns(): array
        {
            return [
                '[a-z_][a-z0-9_]*\:[a-z_][a-z0-9_]*(?:\\\[a-z_][a-z0-9_]*)*', // aliased name
                '[a-z_\\\][a-z0-9_]*(?:\\\[a-z_][a-z0-9_]*)*', // identifier or qualified name
                '(?:[0-9]+(?:[\.][0-9]+)*)(?:e[+-]?[0-9]+)?', // numbers
                "'(?:[^']|'')*'", // quoted strings
                '\?[0-9]*|:[a-z_][a-z0-9_]*', // parameters
            ];
        }

        /**
         * {@inheritdoc}
         */
        protected function getNonCatchablePatterns(): array
        {
            return ['\s+', '(.)'];
        }

        /**
         * {@inheritdoc}
         */
        protected function getType(&$value): int
        {
            $type = self::T_NONE;

            switch (true) {
                // Recognize numeric values
                case (is_numeric($value)):
                    if (strpos($value, '.') !== false || stripos($value, 'e') !== false) {
                        return self::T_FLOAT;
                    }

                    return self::T_INTEGER;

                // Recognize quoted strings
                case ($value[0] === "'"):
                    $value = str_replace("''", "'", substr($value, 1, strlen($value) - 2));

                    return self::T_STRING;

                // Recognize identifiers, aliased or qualified names
                case (ctype_alpha($value[0]) || $value[0] === '_' || $value[0] === '\\'):
                    $name = 'Doctrine\ORM\Query\Lexer::T_' . strtoupper($value);

                    if (defined($name)) {
                        $type = constant($name);

                        if ($type > 100) {
                            return $type;
                        }
                    }

                    if (strpos($value, ':') !== false) {
                        return self::T_ALIASED_NAME;
                    }

                    if (strpos($value, '\\') !== false) {
                        return self::T_FULLY_QUALIFIED_NAME;
                    }

                    return self::T_IDENTIFIER;

                // Recognize input parameters
                case ($value[0] === '?' || $value[0] === ':'):
                    return self::T_INPUT_PARAMETER;

                // Recognize symbols
                case ($value === '.'):
                    return self::T_DOT;
                case ($value === ','):
                    return self::T_COMMA;
                case ($value === '('):
                    return self::T_OPEN_PARENTHESIS;
                case ($value === ')'):
                    return self::T_CLOSE_PARENTHESIS;
                case ($value === '='):
                    return self::T_EQUALS;
                case ($value === '>'):
                    return self::T_GREATER_THAN;
                case ($value === '<'):
                    return self::T_LOWER_THAN;
                case ($value === '+'):
                    return self::T_PLUS;
                case ($value === '-'):
                    return self::T_MINUS;
                case ($value === '*'):
                    return self::T_MULTIPLY;
                case ($value === '/'):
                    return self::T_DIVIDE;
                case ($value === '!'):
                    return self::T_NEGATE;
                case ($value === '{'):
                    return self::T_OPEN_CURLY_BRACE;
                case ($value === '}'):
                    return self::T_CLOSE_CURLY_BRACE;

                // Default
                default:
                    // Do nothing
            }

            return $type;
        }
    }

This is roughly what the DQL Parser looks like that uses the above
Lexer implementation:

.. note::

    You can see the full implementation `here <https://github.com/doctrine/doctrine2/blob/1.3.x/lib/Doctrine/ORM/Query/Parser.php>`_.

.. code-block:: php

    <?php

    class Parser
    {
        private $lexer;

        public function __construct(string $dql)
        {
            $this->lexer = new Lexer();
            $this->lexer->setInput($dql);
        }

        // ...

        public function getAST(): AST\SelectStatement|AST\UpdateStatement|AST\DeleteStatement
        {
            // Parse & build AST
            $AST = $this->QueryLanguage();

            // ...

            return $AST;
        }

        public function QueryLanguage(): AST\SelectStatement|AST\UpdateStatement|AST\DeleteStatement
        {
            $this->lexer->moveNext();

            switch ($this->lexer->lookahead->type) {
                case Lexer::T_SELECT:
                    $statement = $this->SelectStatement();
                    break;
                case Lexer::T_UPDATE:
                    $statement = $this->UpdateStatement();
                    break;
                case Lexer::T_DELETE:
                    $statement = $this->DeleteStatement();
                    break;
                default:
                    $this->syntaxError('SELECT, UPDATE or DELETE');
            }

            // Check for end of string
            if ($this->lexer->lookahead !== null) {
                $this->syntaxError('end of string');
            }

            return $statement;
        }

        // ...
    }

Now the AST is used to transform the DQL query in to portable SQL for whatever relational
database you are using!

.. code-block:: php

    <?php

    $parser = new Parser('SELECT u FROM User u');
    $AST = $parser->getAST(); // returns \Doctrine\ORM\Query\AST\SelectStatement

What is an AST?
===============

AST stands for `Abstract syntax tree <http://en.wikipedia.org/wiki/Abstract_syntax_tree>`_.
In computer science, an abstract syntax tree (AST), or just syntax tree, is a
tree representation of the abstract syntactic structure of source code written
in a programming language. Each node of the tree denotes a construct occurring in
the source code.


================================================
FILE: docs/en/index.rst
================================================
Introduction
============

Doctrine Lexer is a library that can be used in Top-Down, Recursive
Descent Parsers. This lexer is used in Doctrine Annotations and in
Doctrine ORM (DQL).

To write your own parser you just need to extend ``Doctrine\Common\Lexer\AbstractLexer``
and implement the following three abstract methods.

.. code-block:: php

    /**
     * Lexical catchable patterns.
     *
     * @return string[]
     */
    abstract protected function getCatchablePatterns();

    /**
     * Lexical non-catchable patterns.
     *
     * @return string[]
     */
    abstract protected function getNonCatchablePatterns();

    /** Retrieve token type. Also processes the token value if necessary. */
    abstract protected function getType(string &$value): int;

These methods define the `lexical <http://en.wikipedia.org/wiki/Lexical_analysis>`_
catchable and non-catchable patterns and a method for returning the
type of a token and filtering the value if necessary.

The Lexer is responsible for giving you an API to walk across a
string one character at a time and analyze the type of each character, value and position of
each token in the string. The low level API of the lexer is pretty simple:

- ``setInput($input)`` - Sets the input data to be tokenized. The Lexer is immediately reset and the new input tokenized.
- ``reset()`` - Resets the lexer.
- ``resetPeek()`` - Resets the peek pointer to 0.
- ``resetPosition($position = 0)`` - Resets the lexer position on the input to the given position.
- ``isNextToken($token)`` - Checks whether a given token matches the current lookahead.
- ``isNextTokenAny(array $tokens)`` - Checks whether any of the given tokens matches the current lookahead.
- ``moveNext()`` - Moves to the next token in the input string.
- ``skipUntil($type)`` - Tells the lexer to skip input tokens until it sees a token with the given value.
- ``isA($value, $token)`` - Checks if given value is identical to the given token.
- ``peek()`` - Moves the lookahead token forward.
- ``glimpse()`` - Peeks at the next token, returns it and immediately resets the peek.


================================================
FILE: docs/en/sidebar.rst
================================================
.. toctree::
    :depth: 3

    index
    simple-parser-example
    dql-parser


================================================
FILE: docs/en/simple-parser-example.rst
================================================
Simple Parser Example
=====================

Extend the ``Doctrine\Common\Lexer\AbstractLexer`` class and implement
the ``getCatchablePatterns``, ``getNonCatchablePatterns``, and ``getType``
methods. Here is a very simple example lexer implementation named ``CharacterTypeLexer``.
It tokenizes a string to ``T_UPPER``, ``T_LOWER`` and``T_NUMBER`` tokens:

.. code-block:: php
    <?php

    use Doctrine\Common\Lexer\AbstractLexer;

    /**
     * @extends AbstractLexer<CharacterTypeLexer::T_*, string>
     */
    class CharacterTypeLexer extends AbstractLexer
    {
        const T_UPPER =  1;
        const T_LOWER =  2;
        const T_NUMBER = 3;

        protected function getCatchablePatterns(): array
        {
            return [
                '[a-zA-Z0-9]',
            ];
        }

        protected function getNonCatchablePatterns(): array
        {
            return [];
        }

        protected function getType(&$value): int
        {
            if (is_numeric($value)) {
                return self::T_NUMBER;
            }

            if (strtoupper($value) === $value) {
                return self::T_UPPER;
            }

            if (strtolower($value) === $value) {
                return self::T_LOWER;
            }
        }
    }

Use ``CharacterTypeLexer`` to extract an array of upper case characters:

.. code-block:: php
    <?php

    class UpperCaseCharacterExtracter
    {
        public function __construct(private CharacterTypeLexer $lexer)
        {
        }

        /** @return list<string> */
        public function getUpperCaseCharacters(string $string): array
        {
            $this->lexer->setInput($string);
            $this->lexer->moveNext();

            $upperCaseChars = [];
            while (true) {
                if (!$this->lexer->lookahead) {
                    break;
                }

                $this->lexer->moveNext();

                if ($this->lexer->token->isA(CharacterTypeLexer::T_UPPER)) {
                    $upperCaseChars[] = $this->lexer->token->value;
                }
            }

            return $upperCaseChars;
        }
    }

    $upperCaseCharacterExtractor = new UpperCaseCharacterExtracter(new CharacterTypeLexer());
    $upperCaseCharacters = $upperCaseCharacterExtractor->getUpperCaseCharacters('1aBcdEfgHiJ12');

    print_r($upperCaseCharacters);

The variable ``$upperCaseCharacters`` contains all of the upper case
characters:

.. code-block:: php
    Array
    (
        [0] => B
        [1] => E
        [2] => H
        [3] => J
    )

This is a simple example but it should demonstrate the low level API
that can be used to build more complex parsers.


================================================
FILE: phpcs.xml.dist
================================================
<?xml version="1.0"?>
<ruleset
        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
        xsi:noNamespaceSchemaLocation="vendor/squizlabs/php_codesniffer/phpcs.xsd"
>
    <arg name="basepath" value="."/>
    <arg name="extensions" value="php"/>
    <arg name="parallel" value="80"/>
    <arg name="cache" value=".phpcs-cache"/>
    <arg name="colors" />

    <config name="php_version" value="80100"/>

    <!-- Ignore warnings and show progress of the run -->
    <arg value="nps"/>

    <file>src</file>
    <file>tests</file>

    <rule ref="Doctrine">
        <!-- Will cause BC breaks to method signatures - disabled for now -->
        <exclude name="SlevomatCodingStandard.TypeHints.ReturnTypeHint.MissingNativeTypeHint" />

        <!-- Disabled to avoid class renaming - to be handled in a separate PR -->
        <exclude name="SlevomatCodingStandard.Classes.SuperfluousAbstractClassNaming" />
    </rule>
</ruleset>


================================================
FILE: phpstan.neon.dist
================================================
parameters:
  level: max
  paths:
    - %rootDir%/../../../src
    - %rootDir%/../../../tests


================================================
FILE: phpunit.xml.dist
================================================
<?xml version="1.0" encoding="UTF-8"?>

<!-- https://phpunit.de/manual/current/en/appendixes.configuration.html -->
<phpunit xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:noNamespaceSchemaLocation="vendor/phpunit/phpunit/phpunit.xsd"
         backupGlobals="false"
         colors="true"
         bootstrap="vendor/autoload.php"
         cacheDirectory=".phpunit.cache"
>
    <php>
      <ini name="error_reporting" value="-1" />
    </php>

    <testsuites>
        <testsuite name="Doctrine lexer Test Suite">
            <directory>tests</directory>
        </testsuite>
    </testsuites>

    <source>
        <include>
            <directory suffix=".php">src</directory>
        </include>
    </source>
</phpunit>


================================================
FILE: src/AbstractLexer.php
================================================
<?php

declare(strict_types=1);

namespace Doctrine\Common\Lexer;

use ReflectionClass;
use UnitEnum;

use function implode;
use function preg_split;
use function sprintf;
use function substr;

use const PREG_SPLIT_DELIM_CAPTURE;
use const PREG_SPLIT_NO_EMPTY;
use const PREG_SPLIT_OFFSET_CAPTURE;

/**
 * Base class for writing simple lexers, i.e. for creating small DSLs.
 *
 * @template T of UnitEnum|string|int
 * @template V of string|int|float|bool
 */
abstract class AbstractLexer
{
    /**
     * Lexer original input string.
     */
    private string $input;

    /**
     * Array of scanned tokens.
     *
     * @var list<Token<T, V>>
     */
    private array $tokens = [];

    /**
     * Current lexer position in input string.
     */
    private int $position = 0;

    /**
     * Current peek of current lexer position.
     */
    private int $peek = 0;

    /**
     * The next token in the input.
     *
     * @var Token<T, V>|null
     */
    public Token|null $lookahead;

    /**
     * The last matched/seen token.
     *
     * @var Token<T, V>|null
     */
    public Token|null $token;

    /**
     * Composed regex for input parsing.
     *
     * @var non-empty-string|null
     */
    private string|null $regex = null;

    /**
     * Sets the input data to be tokenized.
     *
     * The Lexer is immediately reset and the new input tokenized.
     * Any unprocessed tokens from any previous input are lost.
     *
     * @param string $input The input to be tokenized.
     *
     * @return void
     */
    public function setInput(string $input)
    {
        $this->input  = $input;
        $this->tokens = [];

        $this->reset();
        $this->scan($input);
    }

    /**
     * Resets the lexer.
     *
     * @return void
     */
    public function reset()
    {
        $this->lookahead = null;
        $this->token     = null;
        $this->peek      = 0;
        $this->position  = 0;
    }

    /**
     * Resets the peek pointer to 0.
     *
     * @return void
     */
    public function resetPeek()
    {
        $this->peek = 0;
    }

    /**
     * Resets the lexer position on the input to the given position.
     *
     * @param int $position Position to place the lexical scanner.
     *
     * @return void
     */
    public function resetPosition(int $position = 0)
    {
        $this->position = $position;
    }

    /**
     * Retrieve the original lexer's input until a given position.
     *
     * @return string
     */
    public function getInputUntilPosition(int $position)
    {
        return substr($this->input, 0, $position);
    }

    /**
     * Checks whether a given token matches the current lookahead.
     *
     * @param T $type
     *
     * @return bool
     *
     * @phpstan-assert-if-true !=null $this->lookahead
     */
    public function isNextToken(int|string|UnitEnum $type)
    {
        return $this->lookahead !== null && $this->lookahead->isA($type);
    }

    /**
     * Checks whether any of the given tokens matches the current lookahead.
     *
     * @param list<T> $types
     *
     * @return bool
     *
     * @phpstan-assert-if-true !=null $this->lookahead
     */
    public function isNextTokenAny(array $types)
    {
        return $this->lookahead !== null && $this->lookahead->isA(...$types);
    }

    /**
     * Moves to the next token in the input string.
     *
     * @return bool
     *
     * @phpstan-impure
     * @phpstan-assert-if-true !null $this->lookahead
     */
    public function moveNext()
    {
        $this->peek      = 0;
        $this->token     = $this->lookahead;
        $this->lookahead = isset($this->tokens[$this->position])
            ? $this->tokens[$this->position++] : null;

        return $this->lookahead !== null;
    }

    /**
     * Tells the lexer to skip input tokens until it sees a token with the given value.
     *
     * @param T $type The token type to skip until.
     *
     * @return void
     */
    public function skipUntil(int|string|UnitEnum $type)
    {
        while ($this->lookahead !== null && ! $this->lookahead->isA($type)) {
            $this->moveNext();
        }
    }

    /**
     * Checks if given value is identical to the given token.
     *
     * @return bool
     */
    public function isA(string $value, int|string|UnitEnum $token)
    {
        return $this->getType($value) === $token;
    }

    /**
     * Moves the lookahead token forward.
     *
     * @return Token<T, V>|null The next token or NULL if there are no more tokens ahead.
     */
    public function peek()
    {
        if (isset($this->tokens[$this->position + $this->peek])) {
            return $this->tokens[$this->position + $this->peek++];
        }

        return null;
    }

    /**
     * Peeks at the next token, returns it and immediately resets the peek.
     *
     * @return Token<T, V>|null The next token or NULL if there are no more tokens ahead.
     *
     * @phpstan-impure
     */
    public function glimpse()
    {
        $peek       = $this->peek();
        $this->peek = 0;

        return $peek;
    }

    /**
     * Scans the input string for tokens.
     *
     * @param string $input A query string.
     *
     * @return void
     */
    protected function scan(string $input)
    {
        if (! isset($this->regex)) {
            $this->regex = sprintf(
                '/(%s)|%s/%s',
                implode(')|(', $this->getCatchablePatterns()),
                implode('|', $this->getNonCatchablePatterns()),
                $this->getModifiers(),
            );
        }

        $flags   = PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_OFFSET_CAPTURE;
        $matches = preg_split($this->regex, $input, -1, $flags);

        if ($matches === false) {
            // Work around https://bugs.php.net/78122
            $matches = [[$input, 0]];
        }

        foreach ($matches as $match) {
            // Must remain before 'value' assignment since it can change content
            $firstMatch = $match[0];
            $type       = $this->getType($firstMatch);

            $this->tokens[] = new Token(
                $firstMatch,
                $type,
                $match[1],
            );
        }
    }

    /**
     * Gets the literal for a given token.
     *
     * @param T $token
     *
     * @return int|string
     */
    public function getLiteral(int|string|UnitEnum $token)
    {
        if ($token instanceof UnitEnum) {
            return $token::class . '::' . $token->name;
        }

        $className = static::class;

        $reflClass = new ReflectionClass($className);
        $constants = $reflClass->getConstants();

        foreach ($constants as $name => $value) {
            if ($value === $token) {
                return $className . '::' . $name;
            }
        }

        return $token;
    }

    /**
     * Regex modifiers
     *
     * @return string
     */
    protected function getModifiers()
    {
        return 'iu';
    }

    /**
     * Lexical catchable patterns.
     *
     * @return string[]
     */
    abstract protected function getCatchablePatterns();

    /**
     * Lexical non-catchable patterns.
     *
     * @return string[]
     */
    abstract protected function getNonCatchablePatterns();

    /**
     * Retrieve token type. Also processes the token value if necessary.
     *
     * @return T|null
     *
     * @param-out V $value
     */
    abstract protected function getType(string &$value);
}


================================================
FILE: src/Token.php
================================================
<?php

declare(strict_types=1);

namespace Doctrine\Common\Lexer;

use UnitEnum;

use function in_array;

/**
 * @template T of UnitEnum|string|int
 * @template-covariant V of string|int|float|bool
 */
final class Token
{
    /**
     * The string value of the token in the input string
     *
     * @readonly
     * @var V
     */
    public string|int|float|bool $value;

    /**
     * The type of the token (identifier, numeric, string, input parameter, none)
     *
     * @readonly
     * @var T|null
     */
    public $type;

    /**
     * The position of the token in the input string
     *
     * @readonly
     */
    public int $position;

    /**
     * @param V      $value
     * @param T|null $type
     */
    public function __construct(string|int|float|bool $value, $type, int $position)
    {
        $this->value    = $value;
        $this->type     = $type;
        $this->position = $position;
    }

    /** @param T ...$types */
    public function isA(...$types): bool
    {
        return in_array($this->type, $types, true);
    }
}


================================================
FILE: tests/AbstractLexerTest.php
================================================
<?php

declare(strict_types=1);

namespace Doctrine\Tests\Common\Lexer;

use Doctrine\Common\Lexer\AbstractLexer;
use Doctrine\Common\Lexer\Token;
use PHPUnit\Framework\Attributes\DataProvider;
use PHPUnit\Framework\TestCase;

use function array_map;
use function assert;
use function count;
use function is_int;
use function is_numeric;
use function setlocale;

use const LC_ALL;

class AbstractLexerTest extends TestCase
{
    private ConcreteLexer $concreteLexer;

    public function setUp(): void
    {
        $this->concreteLexer = new ConcreteLexer();
    }

    public function tearDown(): void
    {
        setlocale(LC_ALL, null);
    }

    /** @phpstan-return list<array{string, list<Token<string, string|int>>}> */
    public static function dataProvider(): array
    {
        return [
            [
                'price=10',
                [
                    new Token('price', 'string', 0),
                    new Token('=', 'operator', 5),
                    new Token(10, 'int', 6),
                ],
            ],
        ];
    }

    public function testResetPeek(): void
    {
        $expectedTokens = [
            new Token('price', 'string', 0),
            new Token('=', 'operator', 5),
            new Token(10, 'int', 6),
        ];

        $this->concreteLexer->setInput('price=10');

        $this->assertEquals($expectedTokens[0], $this->concreteLexer->peek());
        $this->assertEquals($expectedTokens[1], $this->concreteLexer->peek());
        $this->concreteLexer->resetPeek();
        $this->assertEquals($expectedTokens[0], $this->concreteLexer->peek());
    }

    public function testResetPosition(): void
    {
        $expectedTokens = [
            new Token('price', 'string', 0),
            new Token('=', 'operator', 5),
            new Token(10, 'int', 6),
        ];

        $this->concreteLexer->setInput('price=10');
        $this->assertNull($this->concreteLexer->lookahead);

        $this->assertTrue($this->concreteLexer->moveNext());
        $this->assertEquals($expectedTokens[0], $this->concreteLexer->lookahead);

        $this->assertTrue($this->concreteLexer->moveNext());
        $this->assertEquals($expectedTokens[1], $this->concreteLexer->lookahead);

        $this->concreteLexer->resetPosition(0);

        $this->assertTrue($this->concreteLexer->moveNext());
        $this->assertEquals($expectedTokens[0], $this->concreteLexer->lookahead);
    }

    /** @phpstan-param list<Token<string, string|int>>  $expectedTokens */
    #[DataProvider('dataProvider')]
    public function testMoveNext(string $input, array $expectedTokens): void
    {
        $this->concreteLexer->setInput($input);
        $this->assertNull($this->concreteLexer->lookahead);

        for ($i = 0; $i < count($expectedTokens); $i++) {
            $this->assertTrue($this->concreteLexer->moveNext());
            $this->assertEquals($expectedTokens[$i], $this->concreteLexer->lookahead);
        }

        $this->assertFalse($this->concreteLexer->moveNext());
        $this->assertNull($this->concreteLexer->lookahead);
    }

    public function testSkipUntil(): void
    {
        $this->concreteLexer->setInput('price=10');

        $this->assertTrue($this->concreteLexer->moveNext());
        $this->concreteLexer->skipUntil('operator');

        $this->assertEquals(
            new Token('=', 'operator', 5),
            $this->concreteLexer->lookahead,
        );
    }

    public function testUtf8Mismatch(): void
    {
        $this->concreteLexer->setInput("\xE9=10");

        $this->assertTrue($this->concreteLexer->moveNext());

        $this->assertEquals(
            new Token("\xE9=10", 'string', 0),
            $this->concreteLexer->lookahead,
        );
    }

    /** @phpstan-param list<Token<string, string|int>> $expectedTokens */
    #[DataProvider('dataProvider')]
    public function testPeek(string $input, array $expectedTokens): void
    {
        $this->concreteLexer->setInput($input);
        foreach ($expectedTokens as $expectedToken) {
            $actualToken = $this->concreteLexer->peek();
            assert($actualToken !== null);
            $this->assertEquals($expectedToken, $actualToken);
            $this->assertSame($expectedToken->value, $actualToken->value);
            $this->assertSame($expectedToken->type, $actualToken->type);
            $this->assertSame($expectedToken->position, $actualToken->position);
        }

        $this->assertNull($this->concreteLexer->peek());
    }

    /** @phpstan-param list<Token<string, string|int>> $expectedTokens */
    #[DataProvider('dataProvider')]
    public function testGlimpse(string $input, array $expectedTokens): void
    {
        $this->concreteLexer->setInput($input);

        foreach ($expectedTokens as $expectedToken) {
            $actualToken = $this->concreteLexer->glimpse();
            assert($actualToken !== null);
            $this->assertEquals($expectedToken, $actualToken);
            $this->assertEquals($expectedToken, $this->concreteLexer->glimpse());
            $this->assertSame($expectedToken->value, $actualToken->value);
            $this->assertSame($expectedToken->type, $actualToken->type);
            $this->assertSame($expectedToken->position, $actualToken->position);
            $this->concreteLexer->moveNext();
        }

        $this->assertNull($this->concreteLexer->peek());
    }

    /** @phpstan-return list<array{string, int, string}> */
    public static function inputUntilPositionDataProvider(): array
    {
        return [
            ['price=10', 5, 'price'],
        ];
    }

    #[DataProvider('inputUntilPositionDataProvider')]
    public function testGetInputUntilPosition(
        string $input,
        int $position,
        string $expectedInput,
    ): void {
        $this->concreteLexer->setInput($input);

        $this->assertSame($expectedInput, $this->concreteLexer->getInputUntilPosition($position));
    }

    /** @phpstan-param list<Token<string, string|int>> $expectedTokens */
    #[DataProvider('dataProvider')]
    public function testIsNextToken(string $input, array $expectedTokens): void
    {
        $this->concreteLexer->setInput($input);

        $this->concreteLexer->moveNext();
        for ($i = 0; $i < count($expectedTokens); $i++) {
            assert($expectedTokens[$i]->type !== null);
            $this->assertTrue($this->concreteLexer->isNextToken($expectedTokens[$i]->type));
            $this->concreteLexer->moveNext();
        }
    }

    /** @phpstan-param list<Token<string, string|int>> $expectedTokens */
    #[DataProvider('dataProvider')]
    public function testIsNextTokenAny(string $input, array $expectedTokens): void
    {
        $allTokenTypes = array_map(static function ($token): string {
            assert($token->type !== null);

            return $token->type;
        }, $expectedTokens);

        $this->concreteLexer->setInput($input);

        $this->concreteLexer->moveNext();
        for ($i = 0; $i < count($expectedTokens); $i++) {
            assert($expectedTokens[$i]->type !== null);
            $this->assertTrue($this->concreteLexer->isNextTokenAny([$expectedTokens[$i]->type]));
            $this->assertTrue($this->concreteLexer->isNextTokenAny($allTokenTypes));
            $this->concreteLexer->moveNext();
        }
    }

    public function testGetLiteral(): void
    {
        $this->assertSame('Doctrine\Tests\Common\Lexer\ConcreteLexer::INT', $this->concreteLexer->getLiteral('int'));
        $this->assertSame('fake_token', $this->concreteLexer->getLiteral('fake_token'));
    }

    public function testGetLiteralWithEnumLexer(): void
    {
        $enumLexer = new EnumLexer();
        $this->assertSame(
            'Doctrine\Tests\Common\Lexer\TokenType::OPERATOR',
            $enumLexer->getLiteral(TokenType::OPERATOR),
        );
    }

    public function testIsA(): void
    {
        $this->assertTrue($this->concreteLexer->isA('11', 'int'));
        $this->assertTrue($this->concreteLexer->isA('1.1', 'int'));
        $this->assertTrue($this->concreteLexer->isA('=', 'operator'));
        $this->assertTrue($this->concreteLexer->isA('>', 'operator'));
        $this->assertTrue($this->concreteLexer->isA('<', 'operator'));
        $this->assertTrue($this->concreteLexer->isA('fake_text', 'string'));
    }

    public function testAddCatchablePatternsToMutableLexer(): void
    {
        $mutableLexer = new MutableLexer();
        $mutableLexer->addCatchablePattern('[a-z]');
        $mutableLexer->setInput('one');
        $token = $mutableLexer->glimpse();

        $this->assertNotNull($token);
        $this->assertEquals('o', $token->value);

        $mutableLexer = new MutableLexer();
        $mutableLexer->addCatchablePattern('[a-z]+');
        $mutableLexer->setInput('one');
        $token = $mutableLexer->glimpse();

        $this->assertNotNull($token);
        $this->assertEquals('one', $token->value);
    }

    public function testMarkerAnnotationLocaleTr(): void
    {
        setlocale(LC_ALL, 'tr_TR.utf8', 'tr_TR');
        $mutableLexer = new MutableLexer();
        $mutableLexer->addCatchablePattern('[a-z_\\\][a-z0-9_\:\\\]*[a-z_][a-z0-9_]*');
        $mutableLexer->addCatchablePattern('(?:[+-]?[0-9]+(?:[\.][0-9]+)*)(?:[eE][+-]?[0-9]+)?');
        $mutableLexer->addCatchablePattern('"(?:""|[^"])*+"');
        $mutableLexer->setInput('@ODM\Id');

        self::assertNull($mutableLexer->token);
        self::assertNull($mutableLexer->lookahead);
        self::assertTrue($mutableLexer->moveNext());
        self::assertNull($mutableLexer->token);
        self::assertNotNull($mutableLexer->lookahead);
        self::assertEquals('@', $mutableLexer->lookahead->value);
        self::assertTrue($mutableLexer->moveNext());
        self::assertNotNull($mutableLexer->token);
        self::assertEquals('@', $mutableLexer->token->value);
        self::assertEquals('ODM\Id', $mutableLexer->lookahead->value);
    }

    public function testCanTokenizeFloatValue(): void
    {
        $lexer = new /** @template-extends AbstractLexer<int, string|int|float|bool> */ class () extends AbstractLexer {
            final public const T_NONE    = 1;
            final public const T_INTEGER = 2;
            final public const T_FLOAT   = 4;
            final public const T_BOOL    = 8;

            protected function getType(string|int|float|bool &$value): int
            {
                if ($value === 'y') {
                    $value = true;

                    return self::T_BOOL;
                }

                if (is_numeric($value)) {
                    $value += 0;

                    if (is_int($value)) {
                        return self::T_INTEGER;
                    }

                    return self::T_FLOAT;
                }

                return self::T_NONE;
            }

            /** {@inheritDoc} */
            protected function getCatchablePatterns(): array
            {
                return [
                    '(?:[0-9]+)(?:[\.][0-9]+)?(?:e[+-]?[0-9]+)?',
                    'y',
                ];
            }

            /** {@inheritDoc} */
            protected function getNonCatchablePatterns(): array
            {
                return ['\s+'];
            }
        };

        $lexer->setInput('123.456');
        $token = $lexer->peek();
        assert($token !== null);
        self::assertSame(123.456, $token->value, 'expect a real float, not a numerical string');

        $lexer->setInput('y');
        $token = $lexer->peek();
        assert($token !== null);
        self::assertTrue($token->value, 'expect a real bool, not a numerical string');
    }
}


================================================
FILE: tests/ConcreteLexer.php
================================================
<?php

declare(strict_types=1);

namespace Doctrine\Tests\Common\Lexer;

use Doctrine\Common\Lexer\AbstractLexer;

use function in_array;
use function is_numeric;

/** @extends AbstractLexer<string, string|int> */
class ConcreteLexer extends AbstractLexer
{
    final public const INT = 'int';

    /**
     * {@inheritDoc}
     */
    protected function getCatchablePatterns(): array
    {
        return [
            '=|<|>',
            '[a-z]+',
            '\d+',
        ];
    }

    /**
     * {@inheritDoc}
     */
    protected function getNonCatchablePatterns(): array
    {
        return [
            '\s+',
            '(.)',
        ];
    }

    protected function getType(string|int|float &$value): string
    {
        if (is_numeric($value)) {
            $value = (int) $value;

            return 'int';
        }

        if (in_array($value, ['=', '<', '>'])) {
            return 'operator';
        }

        return 'string';
    }
}


================================================
FILE: tests/EnumLexer.php
================================================
<?php

declare(strict_types=1);

namespace Doctrine\Tests\Common\Lexer;

use Doctrine\Common\Lexer\AbstractLexer;

use function in_array;
use function is_numeric;

/** @extends AbstractLexer<TokenType, string|int> */
class EnumLexer extends AbstractLexer
{
    /**
     * {@inheritDoc}
     */
    protected function getCatchablePatterns(): array
    {
        return [
            '=|<|>',
            '[a-z]+',
            '\d+',
        ];
    }

    /**
     * {@inheritDoc}
     */
    protected function getNonCatchablePatterns(): array
    {
        return [
            '\s+',
            '(.)',
        ];
    }

    protected function getType(string &$value): TokenType
    {
        if (is_numeric($value)) {
            $value = (int) $value;

            return TokenType::INT;
        }

        if (in_array($value, ['=', '<', '>'])) {
            return TokenType::OPERATOR;
        }

        return TokenType::STRING;
    }
}


================================================
FILE: tests/MutableLexer.php
================================================
<?php

declare(strict_types=1);

namespace Doctrine\Tests\Common\Lexer;

use Doctrine\Common\Lexer\AbstractLexer;

/** @extends AbstractLexer<int, string> */
class MutableLexer extends AbstractLexer
{
    /** @var string[] */
    private array $catchablePatterns = [];

    public function addCatchablePattern(string $pattern): void
    {
        $this->catchablePatterns[] = $pattern;
    }

    /**
     * {@inheritDoc}
     */
    protected function getCatchablePatterns(): array
    {
        return $this->catchablePatterns;
    }

    /**
     * {@inheritDoc}
     */
    protected function getNonCatchablePatterns(): array
    {
        return ['[\s,]+'];
    }

    protected function getType(string &$value): int
    {
        return 1;
    }
}


================================================
FILE: tests/TokenTest.php
================================================
<?php

declare(strict_types=1);

namespace Doctrine\Tests\Common\Lexer;

use Doctrine\Common\Lexer\Token;
use PHPUnit\Framework\TestCase;

final class TokenTest extends TestCase
{
    public function testIsA(): void
    {
        /** @var Token<'string'|'int', string> $token */
        $token = new Token('foo', 'string', 1);

        self::assertTrue($token->isA('string'));
        self::assertTrue($token->isA('int', 'string'));
        self::assertFalse($token->isA('int'));
    }
}


================================================
FILE: tests/TokenType.php
================================================
<?php

declare(strict_types=1);

namespace Doctrine\Tests\Common\Lexer;

enum TokenType
{
    case INT;
    case OPERATOR;
    case STRING;
}
Download .txt
gitextract_ls_k7a7l/

├── .doctrine-project.json
├── .gitattributes
├── .github/
│   ├── FUNDING.yml
│   ├── dependabot.yml
│   └── workflows/
│       ├── coding-standards.yml
│       ├── composer-lint.yml
│       ├── continuous-integration.yml
│       ├── release-on-milestone-closed.yml
│       ├── static-analysis.yml
│       └── website-schema.yml
├── .gitignore
├── LICENSE
├── README.md
├── UPGRADE.md
├── composer.json
├── docs/
│   └── en/
│       ├── dql-parser.rst
│       ├── index.rst
│       ├── sidebar.rst
│       └── simple-parser-example.rst
├── phpcs.xml.dist
├── phpstan.neon.dist
├── phpunit.xml.dist
├── src/
│   ├── AbstractLexer.php
│   └── Token.php
└── tests/
    ├── AbstractLexerTest.php
    ├── ConcreteLexer.php
    ├── EnumLexer.php
    ├── MutableLexer.php
    ├── TokenTest.php
    └── TokenType.php
Download .txt
SYMBOL INDEX (58 symbols across 7 files)

FILE: src/AbstractLexer.php
  class AbstractLexer (line 25) | abstract class AbstractLexer
    method setInput (line 80) | public function setInput(string $input)
    method reset (line 94) | public function reset()
    method resetPeek (line 107) | public function resetPeek()
    method resetPosition (line 119) | public function resetPosition(int $position = 0)
    method getInputUntilPosition (line 129) | public function getInputUntilPosition(int $position)
    method isNextToken (line 143) | public function isNextToken(int|string|UnitEnum $type)
    method isNextTokenAny (line 157) | public function isNextTokenAny(array $types)
    method moveNext (line 170) | public function moveNext()
    method skipUntil (line 187) | public function skipUntil(int|string|UnitEnum $type)
    method isA (line 199) | public function isA(string $value, int|string|UnitEnum $token)
    method peek (line 209) | public function peek()
    method glimpse (line 225) | public function glimpse()
    method scan (line 240) | protected function scan(string $input)
    method getLiteral (line 279) | public function getLiteral(int|string|UnitEnum $token)
    method getModifiers (line 304) | protected function getModifiers()
    method getCatchablePatterns (line 314) | abstract protected function getCatchablePatterns();
    method getNonCatchablePatterns (line 321) | abstract protected function getNonCatchablePatterns();
    method getType (line 330) | abstract protected function getType(string &$value);

FILE: src/Token.php
  class Token (line 15) | final class Token
    method __construct (line 44) | public function __construct(string|int|float|bool $value, $type, int $...
    method isA (line 52) | public function isA(...$types): bool

FILE: tests/AbstractLexerTest.php
  class AbstractLexerTest (line 21) | class AbstractLexerTest extends TestCase
    method setUp (line 25) | public function setUp(): void
    method tearDown (line 30) | public function tearDown(): void
    method dataProvider (line 36) | public static function dataProvider(): array
    method testResetPeek (line 50) | public function testResetPeek(): void
    method testResetPosition (line 66) | public function testResetPosition(): void
    method testMoveNext (line 90) | #[DataProvider('dataProvider')]
    method testSkipUntil (line 105) | public function testSkipUntil(): void
    method testUtf8Mismatch (line 118) | public function testUtf8Mismatch(): void
    method testPeek (line 131) | #[DataProvider('dataProvider')]
    method testGlimpse (line 148) | #[DataProvider('dataProvider')]
    method inputUntilPositionDataProvider (line 168) | public static function inputUntilPositionDataProvider(): array
    method testGetInputUntilPosition (line 175) | #[DataProvider('inputUntilPositionDataProvider')]
    method testIsNextToken (line 187) | #[DataProvider('dataProvider')]
    method testIsNextTokenAny (line 201) | #[DataProvider('dataProvider')]
    method testGetLiteral (line 221) | public function testGetLiteral(): void
    method testGetLiteralWithEnumLexer (line 227) | public function testGetLiteralWithEnumLexer(): void
    method testIsA (line 236) | public function testIsA(): void
    method testAddCatchablePatternsToMutableLexer (line 246) | public function testAddCatchablePatternsToMutableLexer(): void
    method testMarkerAnnotationLocaleTr (line 265) | public function testMarkerAnnotationLocaleTr(): void
    method testCanTokenizeFloatValue (line 286) | public function testCanTokenizeFloatValue(): void

FILE: tests/ConcreteLexer.php
  class ConcreteLexer (line 13) | class ConcreteLexer extends AbstractLexer
    method getCatchablePatterns (line 20) | protected function getCatchablePatterns(): array
    method getNonCatchablePatterns (line 32) | protected function getNonCatchablePatterns(): array
    method getType (line 40) | protected function getType(string|int|float &$value): string

FILE: tests/EnumLexer.php
  class EnumLexer (line 13) | class EnumLexer extends AbstractLexer
    method getCatchablePatterns (line 18) | protected function getCatchablePatterns(): array
    method getNonCatchablePatterns (line 30) | protected function getNonCatchablePatterns(): array
    method getType (line 38) | protected function getType(string &$value): TokenType

FILE: tests/MutableLexer.php
  class MutableLexer (line 10) | class MutableLexer extends AbstractLexer
    method addCatchablePattern (line 15) | public function addCatchablePattern(string $pattern): void
    method getCatchablePatterns (line 23) | protected function getCatchablePatterns(): array
    method getNonCatchablePatterns (line 31) | protected function getNonCatchablePatterns(): array
    method getType (line 36) | protected function getType(string &$value): int

FILE: tests/TokenTest.php
  class TokenTest (line 10) | final class TokenTest extends TestCase
    method testIsA (line 12) | public function testIsA(): void
Condensed preview — 30 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (53K chars).
[
  {
    "path": ".doctrine-project.json",
    "chars": 1109,
    "preview": "{\n    \"active\": true,\n    \"name\": \"Lexer\",\n    \"slug\": \"lexer\",\n    \"docsSlug\": \"doctrine-lexer\",\n    \"versions\": [\n    "
  },
  {
    "path": ".gitattributes",
    "chars": 360,
    "preview": "# Auto-detect text files, ensure they use LF.\n* text=auto eol=lf\n\n # Exclude non-essential files from dist\n/.github expo"
  },
  {
    "path": ".github/FUNDING.yml",
    "chars": 116,
    "preview": "patreon: phpdoctrine\ntidelift: packagist/doctrine%2Flexer\ncustom: https://www.doctrine-project.org/sponsorship.html\n"
  },
  {
    "path": ".github/dependabot.yml",
    "chars": 143,
    "preview": "version: 2\nupdates:\n  - package-ecosystem: \"github-actions\"\n    directory: \"/\"\n    schedule:\n      interval: \"weekly\"\n  "
  },
  {
    "path": ".github/workflows/coding-standards.yml",
    "chars": 506,
    "preview": "\nname: \"Coding Standards\"\n\non:\n  pull_request:\n    branches:\n      - \"*.x\"\n    paths:\n      - .github/workflows/coding-s"
  },
  {
    "path": ".github/workflows/composer-lint.yml",
    "chars": 383,
    "preview": "name: \"Composer Lint\"\n\non:\n  pull_request:\n    branches:\n      - \"*.x\"\n    paths:\n      - .github/workflows/composer-lin"
  },
  {
    "path": ".github/workflows/continuous-integration.yml",
    "chars": 649,
    "preview": "\nname: \"Continuous Integration\"\n\non:\n  pull_request:\n    branches:\n      - \"*.x\"\n    paths:\n      - .github/workflows/co"
  },
  {
    "path": ".github/workflows/release-on-milestone-closed.yml",
    "chars": 482,
    "preview": "name: \"Automatic Releases\"\n\non:\n  milestone:\n    types:\n      - \"closed\"\n\njobs:\n  release:\n    name: \"Git tag, release &"
  },
  {
    "path": ".github/workflows/static-analysis.yml",
    "chars": 480,
    "preview": "\nname: \"Static Analysis\"\n\non:\n  pull_request:\n    branches:\n      - \"*.x\"\n    paths:\n      - .github/workflows/static-an"
  },
  {
    "path": ".github/workflows/website-schema.yml",
    "chars": 432,
    "preview": "\nname: \"Website config validation\"\n\non:\n  pull_request:\n    branches:\n      - \"*.x\"\n    paths:\n      - \".doctrine-projec"
  },
  {
    "path": ".gitignore",
    "chars": 77,
    "preview": "/vendor\n/composer.lock\n/phpunit.xml\n/.phpunit.cache\n/phpcs.xml\n/.phpcs-cache\n"
  },
  {
    "path": "LICENSE",
    "chars": 1065,
    "preview": "Copyright (c) 2006-2018 Doctrine Project\n\nPermission is hereby granted, free of charge, to any person obtaining a copy o"
  },
  {
    "path": "README.md",
    "chars": 367,
    "preview": "# Doctrine Lexer\n\n[![Build Status](https://github.com/doctrine/lexer/workflows/Continuous%20Integration/badge.svg)](http"
  },
  {
    "path": "UPGRADE.md",
    "chars": 962,
    "preview": "Note about upgrading: Doctrine uses static and runtime mechanisms to raise\nawareness about deprecated code.\n\n- Use of `@"
  },
  {
    "path": "composer.json",
    "chars": 1353,
    "preview": "{\n    \"name\": \"doctrine/lexer\",\n    \"description\": \"PHP Doctrine Lexer parser library that can be used in Top-Down, Recu"
  },
  {
    "path": "docs/en/dql-parser.rst",
    "chars": 10034,
    "preview": "DQL Lexer\n=========\n\nHere is a more complicated example from the Doctrine ORM project.\nThe ``Doctrine\\ORM\\Query\\Lexer`` "
  },
  {
    "path": "docs/en/index.rst",
    "chars": 2102,
    "preview": "Introduction\n============\n\nDoctrine Lexer is a library that can be used in Top-Down, Recursive\nDescent Parsers. This lex"
  },
  {
    "path": "docs/en/sidebar.rst",
    "chars": 79,
    "preview": ".. toctree::\n    :depth: 3\n\n    index\n    simple-parser-example\n    dql-parser\n"
  },
  {
    "path": "docs/en/simple-parser-example.rst",
    "chars": 2683,
    "preview": "Simple Parser Example\n=====================\n\nExtend the ``Doctrine\\Common\\Lexer\\AbstractLexer`` class and implement\nthe "
  },
  {
    "path": "phpcs.xml.dist",
    "chars": 938,
    "preview": "<?xml version=\"1.0\"?>\n<ruleset\n        xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n        xsi:noNamespaceSche"
  },
  {
    "path": "phpstan.neon.dist",
    "chars": 94,
    "preview": "parameters:\n  level: max\n  paths:\n    - %rootDir%/../../../src\n    - %rootDir%/../../../tests\n"
  },
  {
    "path": "phpunit.xml.dist",
    "chars": 742,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n\n<!-- https://phpunit.de/manual/current/en/appendixes.configuration.html -->\n<php"
  },
  {
    "path": "src/AbstractLexer.php",
    "chars": 7516,
    "preview": "<?php\n\ndeclare(strict_types=1);\n\nnamespace Doctrine\\Common\\Lexer;\n\nuse ReflectionClass;\nuse UnitEnum;\n\nuse function impl"
  },
  {
    "path": "src/Token.php",
    "chars": 1064,
    "preview": "<?php\n\ndeclare(strict_types=1);\n\nnamespace Doctrine\\Common\\Lexer;\n\nuse UnitEnum;\n\nuse function in_array;\n\n/**\n * @templa"
  },
  {
    "path": "tests/AbstractLexerTest.php",
    "chars": 11679,
    "preview": "<?php\n\ndeclare(strict_types=1);\n\nnamespace Doctrine\\Tests\\Common\\Lexer;\n\nuse Doctrine\\Common\\Lexer\\AbstractLexer;\nuse Do"
  },
  {
    "path": "tests/ConcreteLexer.php",
    "chars": 962,
    "preview": "<?php\n\ndeclare(strict_types=1);\n\nnamespace Doctrine\\Tests\\Common\\Lexer;\n\nuse Doctrine\\Common\\Lexer\\AbstractLexer;\n\nuse f"
  },
  {
    "path": "tests/EnumLexer.php",
    "chars": 944,
    "preview": "<?php\n\ndeclare(strict_types=1);\n\nnamespace Doctrine\\Tests\\Common\\Lexer;\n\nuse Doctrine\\Common\\Lexer\\AbstractLexer;\n\nuse f"
  },
  {
    "path": "tests/MutableLexer.php",
    "chars": 754,
    "preview": "<?php\n\ndeclare(strict_types=1);\n\nnamespace Doctrine\\Tests\\Common\\Lexer;\n\nuse Doctrine\\Common\\Lexer\\AbstractLexer;\n\n/** @"
  },
  {
    "path": "tests/TokenTest.php",
    "chars": 488,
    "preview": "<?php\n\ndeclare(strict_types=1);\n\nnamespace Doctrine\\Tests\\Common\\Lexer;\n\nuse Doctrine\\Common\\Lexer\\Token;\nuse PHPUnit\\Fr"
  },
  {
    "path": "tests/TokenType.php",
    "chars": 142,
    "preview": "<?php\n\ndeclare(strict_types=1);\n\nnamespace Doctrine\\Tests\\Common\\Lexer;\n\nenum TokenType\n{\n    case INT;\n    case OPERATO"
  }
]

About this extraction

This page contains the full source code of the doctrine/lexer GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 30 files (47.6 KB), approximately 12.6k tokens, and a symbol index with 58 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Copied to clipboard!