| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444 | 
							- <?php
 
- namespace JmesPath;
 
- /**
 
-  * Tokenizes JMESPath expressions
 
-  */
 
- class Lexer
 
- {
 
-     const T_DOT = 'dot';
 
-     const T_STAR = 'star';
 
-     const T_COMMA = 'comma';
 
-     const T_COLON = 'colon';
 
-     const T_CURRENT = 'current';
 
-     const T_EXPREF = 'expref';
 
-     const T_LPAREN = 'lparen';
 
-     const T_RPAREN = 'rparen';
 
-     const T_LBRACE = 'lbrace';
 
-     const T_RBRACE = 'rbrace';
 
-     const T_LBRACKET = 'lbracket';
 
-     const T_RBRACKET = 'rbracket';
 
-     const T_FLATTEN = 'flatten';
 
-     const T_IDENTIFIER = 'identifier';
 
-     const T_NUMBER = 'number';
 
-     const T_QUOTED_IDENTIFIER = 'quoted_identifier';
 
-     const T_UNKNOWN = 'unknown';
 
-     const T_PIPE = 'pipe';
 
-     const T_OR = 'or';
 
-     const T_AND = 'and';
 
-     const T_NOT = 'not';
 
-     const T_FILTER = 'filter';
 
-     const T_LITERAL = 'literal';
 
-     const T_EOF = 'eof';
 
-     const T_COMPARATOR = 'comparator';
 
-     const STATE_IDENTIFIER = 0;
 
-     const STATE_NUMBER = 1;
 
-     const STATE_SINGLE_CHAR = 2;
 
-     const STATE_WHITESPACE = 3;
 
-     const STATE_STRING_LITERAL = 4;
 
-     const STATE_QUOTED_STRING = 5;
 
-     const STATE_JSON_LITERAL = 6;
 
-     const STATE_LBRACKET = 7;
 
-     const STATE_PIPE = 8;
 
-     const STATE_LT = 9;
 
-     const STATE_GT = 10;
 
-     const STATE_EQ = 11;
 
-     const STATE_NOT = 12;
 
-     const STATE_AND = 13;
 
-     /** @var array We know what token we are consuming based on each char */
 
-     private static $transitionTable = [
 
-         '<'  => self::STATE_LT,
 
-         '>'  => self::STATE_GT,
 
-         '='  => self::STATE_EQ,
 
-         '!'  => self::STATE_NOT,
 
-         '['  => self::STATE_LBRACKET,
 
-         '|'  => self::STATE_PIPE,
 
-         '&'  => self::STATE_AND,
 
-         '`'  => self::STATE_JSON_LITERAL,
 
-         '"'  => self::STATE_QUOTED_STRING,
 
-         "'"  => self::STATE_STRING_LITERAL,
 
-         '-'  => self::STATE_NUMBER,
 
-         '0'  => self::STATE_NUMBER,
 
-         '1'  => self::STATE_NUMBER,
 
-         '2'  => self::STATE_NUMBER,
 
-         '3'  => self::STATE_NUMBER,
 
-         '4'  => self::STATE_NUMBER,
 
-         '5'  => self::STATE_NUMBER,
 
-         '6'  => self::STATE_NUMBER,
 
-         '7'  => self::STATE_NUMBER,
 
-         '8'  => self::STATE_NUMBER,
 
-         '9'  => self::STATE_NUMBER,
 
-         ' '  => self::STATE_WHITESPACE,
 
-         "\t" => self::STATE_WHITESPACE,
 
-         "\n" => self::STATE_WHITESPACE,
 
-         "\r" => self::STATE_WHITESPACE,
 
-         '.'  => self::STATE_SINGLE_CHAR,
 
-         '*'  => self::STATE_SINGLE_CHAR,
 
-         ']'  => self::STATE_SINGLE_CHAR,
 
-         ','  => self::STATE_SINGLE_CHAR,
 
-         ':'  => self::STATE_SINGLE_CHAR,
 
-         '@'  => self::STATE_SINGLE_CHAR,
 
-         '('  => self::STATE_SINGLE_CHAR,
 
-         ')'  => self::STATE_SINGLE_CHAR,
 
-         '{'  => self::STATE_SINGLE_CHAR,
 
-         '}'  => self::STATE_SINGLE_CHAR,
 
-         '_'  => self::STATE_IDENTIFIER,
 
-         'A'  => self::STATE_IDENTIFIER,
 
-         'B'  => self::STATE_IDENTIFIER,
 
-         'C'  => self::STATE_IDENTIFIER,
 
-         'D'  => self::STATE_IDENTIFIER,
 
-         'E'  => self::STATE_IDENTIFIER,
 
-         'F'  => self::STATE_IDENTIFIER,
 
-         'G'  => self::STATE_IDENTIFIER,
 
-         'H'  => self::STATE_IDENTIFIER,
 
-         'I'  => self::STATE_IDENTIFIER,
 
-         'J'  => self::STATE_IDENTIFIER,
 
-         'K'  => self::STATE_IDENTIFIER,
 
-         'L'  => self::STATE_IDENTIFIER,
 
-         'M'  => self::STATE_IDENTIFIER,
 
-         'N'  => self::STATE_IDENTIFIER,
 
-         'O'  => self::STATE_IDENTIFIER,
 
-         'P'  => self::STATE_IDENTIFIER,
 
-         'Q'  => self::STATE_IDENTIFIER,
 
-         'R'  => self::STATE_IDENTIFIER,
 
-         'S'  => self::STATE_IDENTIFIER,
 
-         'T'  => self::STATE_IDENTIFIER,
 
-         'U'  => self::STATE_IDENTIFIER,
 
-         'V'  => self::STATE_IDENTIFIER,
 
-         'W'  => self::STATE_IDENTIFIER,
 
-         'X'  => self::STATE_IDENTIFIER,
 
-         'Y'  => self::STATE_IDENTIFIER,
 
-         'Z'  => self::STATE_IDENTIFIER,
 
-         'a'  => self::STATE_IDENTIFIER,
 
-         'b'  => self::STATE_IDENTIFIER,
 
-         'c'  => self::STATE_IDENTIFIER,
 
-         'd'  => self::STATE_IDENTIFIER,
 
-         'e'  => self::STATE_IDENTIFIER,
 
-         'f'  => self::STATE_IDENTIFIER,
 
-         'g'  => self::STATE_IDENTIFIER,
 
-         'h'  => self::STATE_IDENTIFIER,
 
-         'i'  => self::STATE_IDENTIFIER,
 
-         'j'  => self::STATE_IDENTIFIER,
 
-         'k'  => self::STATE_IDENTIFIER,
 
-         'l'  => self::STATE_IDENTIFIER,
 
-         'm'  => self::STATE_IDENTIFIER,
 
-         'n'  => self::STATE_IDENTIFIER,
 
-         'o'  => self::STATE_IDENTIFIER,
 
-         'p'  => self::STATE_IDENTIFIER,
 
-         'q'  => self::STATE_IDENTIFIER,
 
-         'r'  => self::STATE_IDENTIFIER,
 
-         's'  => self::STATE_IDENTIFIER,
 
-         't'  => self::STATE_IDENTIFIER,
 
-         'u'  => self::STATE_IDENTIFIER,
 
-         'v'  => self::STATE_IDENTIFIER,
 
-         'w'  => self::STATE_IDENTIFIER,
 
-         'x'  => self::STATE_IDENTIFIER,
 
-         'y'  => self::STATE_IDENTIFIER,
 
-         'z'  => self::STATE_IDENTIFIER,
 
-     ];
 
-     /** @var array Valid identifier characters after first character */
 
-     private $validIdentifier = [
 
-         'A' => true, 'B' => true, 'C' => true, 'D' => true, 'E' => true,
 
-         'F' => true, 'G' => true, 'H' => true, 'I' => true, 'J' => true,
 
-         'K' => true, 'L' => true, 'M' => true, 'N' => true, 'O' => true,
 
-         'P' => true, 'Q' => true, 'R' => true, 'S' => true, 'T' => true,
 
-         'U' => true, 'V' => true, 'W' => true, 'X' => true, 'Y' => true,
 
-         'Z' => true, 'a' => true, 'b' => true, 'c' => true, 'd' => true,
 
-         'e' => true, 'f' => true, 'g' => true, 'h' => true, 'i' => true,
 
-         'j' => true, 'k' => true, 'l' => true, 'm' => true, 'n' => true,
 
-         'o' => true, 'p' => true, 'q' => true, 'r' => true, 's' => true,
 
-         't' => true, 'u' => true, 'v' => true, 'w' => true, 'x' => true,
 
-         'y' => true, 'z' => true, '_' => true, '0' => true, '1' => true,
 
-         '2' => true, '3' => true, '4' => true, '5' => true, '6' => true,
 
-         '7' => true, '8' => true, '9' => true,
 
-     ];
 
-     /** @var array Valid number characters after the first character */
 
-     private $numbers = [
 
-         '0' => true, '1' => true, '2' => true, '3' => true, '4' => true,
 
-         '5' => true, '6' => true, '7' => true, '8' => true, '9' => true
 
-     ];
 
-     /** @var array Map of simple single character tokens */
 
-     private $simpleTokens = [
 
-         '.' => self::T_DOT,
 
-         '*' => self::T_STAR,
 
-         ']' => self::T_RBRACKET,
 
-         ',' => self::T_COMMA,
 
-         ':' => self::T_COLON,
 
-         '@' => self::T_CURRENT,
 
-         '(' => self::T_LPAREN,
 
-         ')' => self::T_RPAREN,
 
-         '{' => self::T_LBRACE,
 
-         '}' => self::T_RBRACE,
 
-     ];
 
-     /**
 
-      * Tokenize the JMESPath expression into an array of tokens hashes that
 
-      * contain a 'type', 'value', and 'key'.
 
-      *
 
-      * @param string $input JMESPath input
 
-      *
 
-      * @return array
 
-      * @throws SyntaxErrorException
 
-      */
 
-     public function tokenize($input)
 
-     {
 
-         $tokens = [];
 
-         if ($input === '') {
 
-             goto eof;
 
-         }
 
-         $chars = str_split($input);
 
-         while (false !== ($current = current($chars))) {
 
-             // Every character must be in the transition character table.
 
-             if (!isset(self::$transitionTable[$current])) {
 
-                 $tokens[] = [
 
-                     'type'  => self::T_UNKNOWN,
 
-                     'pos'   => key($chars),
 
-                     'value' => $current
 
-                 ];
 
-                 next($chars);
 
-                 continue;
 
-             }
 
-             $state = self::$transitionTable[$current];
 
-             if ($state === self::STATE_SINGLE_CHAR) {
 
-                 // Consume simple tokens like ".", ",", "@", etc.
 
-                 $tokens[] = [
 
-                     'type'  => $this->simpleTokens[$current],
 
-                     'pos'   => key($chars),
 
-                     'value' => $current
 
-                 ];
 
-                 next($chars);
 
-             } elseif ($state === self::STATE_IDENTIFIER) {
 
-                 // Consume identifiers
 
-                 $start = key($chars);
 
-                 $buffer = '';
 
-                 do {
 
-                     $buffer .= $current;
 
-                     $current = next($chars);
 
-                 } while ($current !== false && isset($this->validIdentifier[$current]));
 
-                 $tokens[] = [
 
-                     'type'  => self::T_IDENTIFIER,
 
-                     'value' => $buffer,
 
-                     'pos'   => $start
 
-                 ];
 
-             } elseif ($state === self::STATE_WHITESPACE) {
 
-                 // Skip whitespace
 
-                 next($chars);
 
-             } elseif ($state === self::STATE_LBRACKET) {
 
-                 // Consume "[", "[?", and "[]"
 
-                 $position = key($chars);
 
-                 $actual = next($chars);
 
-                 if ($actual === ']') {
 
-                     next($chars);
 
-                     $tokens[] = [
 
-                         'type'  => self::T_FLATTEN,
 
-                         'pos'   => $position,
 
-                         'value' => '[]'
 
-                     ];
 
-                 } elseif ($actual === '?') {
 
-                     next($chars);
 
-                     $tokens[] = [
 
-                         'type'  => self::T_FILTER,
 
-                         'pos'   => $position,
 
-                         'value' => '[?'
 
-                     ];
 
-                 } else {
 
-                     $tokens[] = [
 
-                         'type'  => self::T_LBRACKET,
 
-                         'pos'   => $position,
 
-                         'value' => '['
 
-                     ];
 
-                 }
 
-             } elseif ($state === self::STATE_STRING_LITERAL) {
 
-                 // Consume raw string literals
 
-                 $t = $this->inside($chars, "'", self::T_LITERAL);
 
-                 $t['value'] = str_replace("\\'", "'", $t['value']);
 
-                 $tokens[] = $t;
 
-             } elseif ($state === self::STATE_PIPE) {
 
-                 // Consume pipe and OR
 
-                 $tokens[] = $this->matchOr($chars, '|', '|', self::T_OR, self::T_PIPE);
 
-             } elseif ($state == self::STATE_JSON_LITERAL) {
 
-                 // Consume JSON literals
 
-                 $token = $this->inside($chars, '`', self::T_LITERAL);
 
-                 if ($token['type'] === self::T_LITERAL) {
 
-                     $token['value'] = str_replace('\\`', '`', $token['value']);
 
-                     $token = $this->parseJson($token);
 
-                 }
 
-                 $tokens[] = $token;
 
-             } elseif ($state == self::STATE_NUMBER) {
 
-                 // Consume numbers
 
-                 $start = key($chars);
 
-                 $buffer = '';
 
-                 do {
 
-                     $buffer .= $current;
 
-                     $current = next($chars);
 
-                 } while ($current !== false && isset($this->numbers[$current]));
 
-                 $tokens[] = [
 
-                     'type'  => self::T_NUMBER,
 
-                     'value' => (int)$buffer,
 
-                     'pos'   => $start
 
-                 ];
 
-             } elseif ($state === self::STATE_QUOTED_STRING) {
 
-                 // Consume quoted identifiers
 
-                 $token = $this->inside($chars, '"', self::T_QUOTED_IDENTIFIER);
 
-                 if ($token['type'] === self::T_QUOTED_IDENTIFIER) {
 
-                     $token['value'] = '"' . $token['value'] . '"';
 
-                     $token = $this->parseJson($token);
 
-                 }
 
-                 $tokens[] = $token;
 
-             } elseif ($state === self::STATE_EQ) {
 
-                 // Consume equals
 
-                 $tokens[] = $this->matchOr($chars, '=', '=', self::T_COMPARATOR, self::T_UNKNOWN);
 
-             } elseif ($state == self::STATE_AND) {
 
-                 $tokens[] = $this->matchOr($chars, '&', '&', self::T_AND, self::T_EXPREF);
 
-             } elseif ($state === self::STATE_NOT) {
 
-                 // Consume not equal
 
-                 $tokens[] = $this->matchOr($chars, '!', '=', self::T_COMPARATOR, self::T_NOT);
 
-             } else {
 
-                 // either '<' or '>'
 
-                 // Consume less than and greater than
 
-                 $tokens[] = $this->matchOr($chars, $current, '=', self::T_COMPARATOR, self::T_COMPARATOR);
 
-             }
 
-         }
 
-         eof:
 
-         $tokens[] = [
 
-             'type'  => self::T_EOF,
 
-             'pos'   => mb_strlen($input, 'UTF-8'),
 
-             'value' => null
 
-         ];
 
-         return $tokens;
 
-     }
 
-     /**
 
-      * Returns a token based on whether or not the next token matches the
 
-      * expected value. If it does, a token of "$type" is returned. Otherwise,
 
-      * a token of "$orElse" type is returned.
 
-      *
 
-      * @param array  $chars    Array of characters by reference.
 
-      * @param string $current  The current character.
 
-      * @param string $expected Expected character.
 
-      * @param string $type     Expected result type.
 
-      * @param string $orElse   Otherwise return a token of this type.
 
-      *
 
-      * @return array Returns a conditional token.
 
-      */
 
-     private function matchOr(array &$chars, $current, $expected, $type, $orElse)
 
-     {
 
-         if (next($chars) === $expected) {
 
-             next($chars);
 
-             return [
 
-                 'type'  => $type,
 
-                 'pos'   => key($chars) - 1,
 
-                 'value' => $current . $expected
 
-             ];
 
-         }
 
-         return [
 
-             'type'  => $orElse,
 
-             'pos'   => key($chars) - 1,
 
-             'value' => $current
 
-         ];
 
-     }
 
-     /**
 
-      * Returns a token the is the result of consuming inside of delimiter
 
-      * characters. Escaped delimiters will be adjusted before returning a
 
-      * value. If the token is not closed, "unknown" is returned.
 
-      *
 
-      * @param array  $chars Array of characters by reference.
 
-      * @param string $delim The delimiter character.
 
-      * @param string $type  Token type.
 
-      *
 
-      * @return array Returns the consumed token.
 
-      */
 
-     private function inside(array &$chars, $delim, $type)
 
-     {
 
-         $position = key($chars);
 
-         $current = next($chars);
 
-         $buffer = '';
 
-         while ($current !== $delim) {
 
-             if ($current === '\\') {
 
-                 $buffer .= '\\';
 
-                 $current = next($chars);
 
-             }
 
-             if ($current === false) {
 
-                 // Unclosed delimiter
 
-                 return [
 
-                     'type'  => self::T_UNKNOWN,
 
-                     'value' => $buffer,
 
-                     'pos'   => $position
 
-                 ];
 
-             }
 
-             $buffer .= $current;
 
-             $current = next($chars);
 
-         }
 
-         next($chars);
 
-         return ['type' => $type, 'value' => $buffer, 'pos' => $position];
 
-     }
 
-     /**
 
-      * Parses a JSON token or sets the token type to "unknown" on error.
 
-      *
 
-      * @param array $token Token that needs parsing.
 
-      *
 
-      * @return array Returns a token with a parsed value.
 
-      */
 
-     private function parseJson(array $token)
 
-     {
 
-         $value = json_decode($token['value'], true);
 
-         if ($error = json_last_error()) {
 
-             // Legacy support for elided quotes. Try to parse again by adding
 
-             // quotes around the bad input value.
 
-             $value = json_decode('"' . $token['value'] . '"', true);
 
-             if ($error = json_last_error()) {
 
-                 $token['type'] = self::T_UNKNOWN;
 
-                 return $token;
 
-             }
 
-         }
 
-         $token['value'] = $value;
 
-         return $token;
 
-     }
 
- }
 
 
  |