| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519 | <?phpnamespace JmesPath;use JmesPath\Lexer as T;/** * JMESPath Pratt parser * @link http://hall.org.ua/halls/wizzard/pdf/Vaughan.Pratt.TDOP.pdf */class Parser{    /** @var Lexer */    private $lexer;    private $tokens;    private $token;    private $tpos;    private $expression;    private static $nullToken = ['type' => T::T_EOF];    private static $currentNode = ['type' => T::T_CURRENT];    private static $bp = [        T::T_EOF               => 0,        T::T_QUOTED_IDENTIFIER => 0,        T::T_IDENTIFIER        => 0,        T::T_RBRACKET          => 0,        T::T_RPAREN            => 0,        T::T_COMMA             => 0,        T::T_RBRACE            => 0,        T::T_NUMBER            => 0,        T::T_CURRENT           => 0,        T::T_EXPREF            => 0,        T::T_COLON             => 0,        T::T_PIPE              => 1,        T::T_OR                => 2,        T::T_AND               => 3,        T::T_COMPARATOR        => 5,        T::T_FLATTEN           => 9,        T::T_STAR              => 20,        T::T_FILTER            => 21,        T::T_DOT               => 40,        T::T_NOT               => 45,        T::T_LBRACE            => 50,        T::T_LBRACKET          => 55,        T::T_LPAREN            => 60,    ];    /** @var array Acceptable tokens after a dot token */    private static $afterDot = [        T::T_IDENTIFIER        => true, // foo.bar        T::T_QUOTED_IDENTIFIER => true, // foo."bar"        T::T_STAR              => true, // foo.*        T::T_LBRACE            => true, // foo[1]        T::T_LBRACKET          => true, // foo{a: 0}        T::T_FILTER            => true, // foo.[?bar==10]    ];    /**     * @param Lexer|null $lexer Lexer used to tokenize expressions     */    public function __construct(Lexer $lexer = null)    {        $this->lexer = $lexer ?: new Lexer();    }    /**     * Parses a JMESPath expression into an AST     *     * @param string $expression JMESPath expression to compile     *     * @return array Returns an array based AST     * @throws SyntaxErrorException     */    public function parse($expression)    {        $this->expression = $expression;        $this->tokens = $this->lexer->tokenize($expression);        $this->tpos = -1;        $this->next();        $result = $this->expr();        if ($this->token['type'] === T::T_EOF) {            return $result;        }        throw $this->syntax('Did not reach the end of the token stream');    }    /**     * Parses an expression while rbp < lbp.     *     * @param int   $rbp  Right bound precedence     *     * @return array     */    private function expr($rbp = 0)    {        $left = $this->{"nud_{$this->token['type']}"}();        while ($rbp < self::$bp[$this->token['type']]) {            $left = $this->{"led_{$this->token['type']}"}($left);        }        return $left;    }    private function nud_identifier()    {        $token = $this->token;        $this->next();        return ['type' => 'field', 'value' => $token['value']];    }    private function nud_quoted_identifier()    {        $token = $this->token;        $this->next();        $this->assertNotToken(T::T_LPAREN);        return ['type' => 'field', 'value' => $token['value']];    }    private function nud_current()    {        $this->next();        return self::$currentNode;    }    private function nud_literal()    {        $token = $this->token;        $this->next();        return ['type' => 'literal', 'value' => $token['value']];    }    private function nud_expref()    {        $this->next();        return ['type' => T::T_EXPREF, 'children' => [$this->expr(self::$bp[T::T_EXPREF])]];    }    private function nud_not()    {        $this->next();        return ['type' => T::T_NOT, 'children' => [$this->expr(self::$bp[T::T_NOT])]];    }    private function nud_lparen()    {        $this->next();        $result = $this->expr(0);        if ($this->token['type'] !== T::T_RPAREN) {            throw $this->syntax('Unclosed `(`');        }        $this->next();        return $result;    }    private function nud_lbrace()    {        static $validKeys = [T::T_QUOTED_IDENTIFIER => true, T::T_IDENTIFIER => true];        $this->next($validKeys);        $pairs = [];        do {            $pairs[] = $this->parseKeyValuePair();            if ($this->token['type'] == T::T_COMMA) {                $this->next($validKeys);            }        } while ($this->token['type'] !== T::T_RBRACE);        $this->next();        return['type' => 'multi_select_hash', 'children' => $pairs];    }    private function nud_flatten()    {        return $this->led_flatten(self::$currentNode);    }    private function nud_filter()    {        return $this->led_filter(self::$currentNode);    }    private function nud_star()    {        return $this->parseWildcardObject(self::$currentNode);    }    private function nud_lbracket()    {        $this->next();        $type = $this->token['type'];        if ($type == T::T_NUMBER || $type == T::T_COLON) {            return $this->parseArrayIndexExpression();        } elseif ($type == T::T_STAR && $this->lookahead() == T::T_RBRACKET) {            return $this->parseWildcardArray();        } else {            return $this->parseMultiSelectList();        }    }    private function led_lbracket(array $left)    {        static $nextTypes = [T::T_NUMBER => true, T::T_COLON => true, T::T_STAR => true];        $this->next($nextTypes);        switch ($this->token['type']) {            case T::T_NUMBER:            case T::T_COLON:                return [                    'type' => 'subexpression',                    'children' => [$left, $this->parseArrayIndexExpression()]                ];            default:                return $this->parseWildcardArray($left);        }    }    private function led_flatten(array $left)    {        $this->next();        return [            'type'     => 'projection',            'from'     => 'array',            'children' => [                ['type' => T::T_FLATTEN, 'children' => [$left]],                $this->parseProjection(self::$bp[T::T_FLATTEN])            ]        ];    }    private function led_dot(array $left)    {        $this->next(self::$afterDot);        if ($this->token['type'] == T::T_STAR) {            return $this->parseWildcardObject($left);        }        return [            'type'     => 'subexpression',            'children' => [$left, $this->parseDot(self::$bp[T::T_DOT])]        ];    }    private function led_or(array $left)    {        $this->next();        return [            'type'     => T::T_OR,            'children' => [$left, $this->expr(self::$bp[T::T_OR])]        ];    }    private function led_and(array $left)    {        $this->next();        return [            'type'     => T::T_AND,            'children' => [$left, $this->expr(self::$bp[T::T_AND])]        ];    }    private function led_pipe(array $left)    {        $this->next();        return [            'type'     => T::T_PIPE,            'children' => [$left, $this->expr(self::$bp[T::T_PIPE])]        ];    }    private function led_lparen(array $left)    {        $args = [];        $this->next();        while ($this->token['type'] != T::T_RPAREN) {            $args[] = $this->expr(0);            if ($this->token['type'] == T::T_COMMA) {                $this->next();            }        }        $this->next();        return [            'type'     => 'function',            'value'    => $left['value'],            'children' => $args        ];    }    private function led_filter(array $left)    {        $this->next();        $expression = $this->expr();        if ($this->token['type'] != T::T_RBRACKET) {            throw $this->syntax('Expected a closing rbracket for the filter');        }        $this->next();        $rhs = $this->parseProjection(self::$bp[T::T_FILTER]);        return [            'type'       => 'projection',            'from'       => 'array',            'children'   => [                $left ?: self::$currentNode,                [                    'type' => 'condition',                    'children' => [$expression, $rhs]                ]            ]        ];    }    private function led_comparator(array $left)    {        $token = $this->token;        $this->next();        return [            'type'     => T::T_COMPARATOR,            'value'    => $token['value'],            'children' => [$left, $this->expr(self::$bp[T::T_COMPARATOR])]        ];    }    private function parseProjection($bp)    {        $type = $this->token['type'];        if (self::$bp[$type] < 10) {            return self::$currentNode;        } elseif ($type == T::T_DOT) {            $this->next(self::$afterDot);            return $this->parseDot($bp);        } elseif ($type == T::T_LBRACKET || $type == T::T_FILTER) {            return $this->expr($bp);        }        throw $this->syntax('Syntax error after projection');    }    private function parseDot($bp)    {        if ($this->token['type'] == T::T_LBRACKET) {            $this->next();            return $this->parseMultiSelectList();        }        return $this->expr($bp);    }    private function parseKeyValuePair()    {        static $validColon = [T::T_COLON => true];        $key = $this->token['value'];        $this->next($validColon);        $this->next();        return [            'type'     => 'key_val_pair',            'value'    => $key,            'children' => [$this->expr()]        ];    }    private function parseWildcardObject(array $left = null)    {        $this->next();        return [            'type'     => 'projection',            'from'     => 'object',            'children' => [                $left ?: self::$currentNode,                $this->parseProjection(self::$bp[T::T_STAR])            ]        ];    }    private function parseWildcardArray(array $left = null)    {        static $getRbracket = [T::T_RBRACKET => true];        $this->next($getRbracket);        $this->next();        return [            'type'     => 'projection',            'from'     => 'array',            'children' => [                $left ?: self::$currentNode,                $this->parseProjection(self::$bp[T::T_STAR])            ]        ];    }    /**     * Parses an array index expression (e.g., [0], [1:2:3]     */    private function parseArrayIndexExpression()    {        static $matchNext = [            T::T_NUMBER   => true,            T::T_COLON    => true,            T::T_RBRACKET => true        ];        $pos = 0;        $parts = [null, null, null];        $expected = $matchNext;        do {            if ($this->token['type'] == T::T_COLON) {                $pos++;                $expected = $matchNext;            } elseif ($this->token['type'] == T::T_NUMBER) {                $parts[$pos] = $this->token['value'];                $expected = [T::T_COLON => true, T::T_RBRACKET => true];            }            $this->next($expected);        } while ($this->token['type'] != T::T_RBRACKET);        // Consume the closing bracket        $this->next();        if ($pos === 0) {            // No colons were found so this is a simple index extraction            return ['type' => 'index', 'value' => $parts[0]];        }        if ($pos > 2) {            throw $this->syntax('Invalid array slice syntax: too many colons');        }        // Sliced array from start (e.g., [2:])        return [            'type'     => 'projection',            'from'     => 'array',            'children' => [                ['type' => 'slice', 'value' => $parts],                $this->parseProjection(self::$bp[T::T_STAR])            ]        ];    }    private function parseMultiSelectList()    {        $nodes = [];        do {            $nodes[] = $this->expr();            if ($this->token['type'] == T::T_COMMA) {                $this->next();                $this->assertNotToken(T::T_RBRACKET);            }        } while ($this->token['type'] !== T::T_RBRACKET);        $this->next();        return ['type' => 'multi_select_list', 'children' => $nodes];    }    private function syntax($msg)    {        return new SyntaxErrorException($msg, $this->token, $this->expression);    }    private function lookahead()    {        return (!isset($this->tokens[$this->tpos + 1]))            ? T::T_EOF            : $this->tokens[$this->tpos + 1]['type'];    }    private function next(array $match = null)    {        if (!isset($this->tokens[$this->tpos + 1])) {            $this->token = self::$nullToken;        } else {            $this->token = $this->tokens[++$this->tpos];        }        if ($match && !isset($match[$this->token['type']])) {            throw $this->syntax($match);        }    }    private function assertNotToken($type)    {        if ($this->token['type'] == $type) {            throw $this->syntax("Token {$this->tpos} not allowed to be $type");        }    }    /**     * @internal Handles undefined tokens without paying the cost of validation     */    public function __call($method, $args)    {        $prefix = substr($method, 0, 4);        if ($prefix == 'nud_' || $prefix == 'led_') {            $token = substr($method, 4);            $message = "Unexpected \"$token\" token ($method). Expected one of"                . " the following tokens: "                . implode(', ', array_map(function ($i) {                    return '"' . substr($i, 4) . '"';                }, array_filter(                    get_class_methods($this),                    function ($i) use ($prefix) {                        return strpos($i, $prefix) === 0;                    }                )));            throw $this->syntax($message);        }        throw new \BadMethodCallException("Call to undefined method $method");    }}
 |