You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
237 lines
9.4 KiB
237 lines
9.4 KiB
<?php declare(strict_types=1); |
|
|
|
namespace PhpParser\Internal; |
|
|
|
if (\PHP_VERSION_ID >= 80000) { |
|
class TokenPolyfill extends \PhpToken { |
|
} |
|
return; |
|
} |
|
|
|
/** |
|
* This is a polyfill for the PhpToken class introduced in PHP 8.0. We do not actually polyfill |
|
* PhpToken, because composer might end up picking a different polyfill implementation, which does |
|
* not meet our requirements. |
|
* |
|
* @internal |
|
*/ |
|
class TokenPolyfill { |
|
/** @var int The ID of the token. Either a T_* constant of a character code < 256. */ |
|
public int $id; |
|
/** @var string The textual content of the token. */ |
|
public string $text; |
|
/** @var int The 1-based starting line of the token (or -1 if unknown). */ |
|
public int $line; |
|
/** @var int The 0-based starting position of the token (or -1 if unknown). */ |
|
public int $pos; |
|
|
|
/** @var array<int, bool> Tokens ignored by the PHP parser. */ |
|
private const IGNORABLE_TOKENS = [ |
|
\T_WHITESPACE => true, |
|
\T_COMMENT => true, |
|
\T_DOC_COMMENT => true, |
|
\T_OPEN_TAG => true, |
|
]; |
|
|
|
/** @var array<int, bool> Tokens that may be part of a T_NAME_* identifier. */ |
|
private static array $identifierTokens; |
|
|
|
/** |
|
* Create a Token with the given ID and text, as well optional line and position information. |
|
*/ |
|
final public function __construct(int $id, string $text, int $line = -1, int $pos = -1) { |
|
$this->id = $id; |
|
$this->text = $text; |
|
$this->line = $line; |
|
$this->pos = $pos; |
|
} |
|
|
|
/** |
|
* Get the name of the token. For single-char tokens this will be the token character. |
|
* Otherwise it will be a T_* style name, or null if the token ID is unknown. |
|
*/ |
|
public function getTokenName(): ?string { |
|
if ($this->id < 256) { |
|
return \chr($this->id); |
|
} |
|
|
|
$name = token_name($this->id); |
|
return $name === 'UNKNOWN' ? null : $name; |
|
} |
|
|
|
/** |
|
* Check whether the token is of the given kind. The kind may be either an integer that matches |
|
* the token ID, a string that matches the token text, or an array of integers/strings. In the |
|
* latter case, the function returns true if any of the kinds in the array match. |
|
* |
|
* @param int|string|(int|string)[] $kind |
|
*/ |
|
public function is($kind): bool { |
|
if (\is_int($kind)) { |
|
return $this->id === $kind; |
|
} |
|
if (\is_string($kind)) { |
|
return $this->text === $kind; |
|
} |
|
if (\is_array($kind)) { |
|
foreach ($kind as $entry) { |
|
if (\is_int($entry)) { |
|
if ($this->id === $entry) { |
|
return true; |
|
} |
|
} elseif (\is_string($entry)) { |
|
if ($this->text === $entry) { |
|
return true; |
|
} |
|
} else { |
|
throw new \TypeError( |
|
'Argument #1 ($kind) must only have elements of type string|int, ' . |
|
gettype($entry) . ' given'); |
|
} |
|
} |
|
return false; |
|
} |
|
throw new \TypeError( |
|
'Argument #1 ($kind) must be of type string|int|array, ' .gettype($kind) . ' given'); |
|
} |
|
|
|
/** |
|
* Check whether this token would be ignored by the PHP parser. Returns true for T_WHITESPACE, |
|
* T_COMMENT, T_DOC_COMMENT and T_OPEN_TAG, and false for everything else. |
|
*/ |
|
public function isIgnorable(): bool { |
|
return isset(self::IGNORABLE_TOKENS[$this->id]); |
|
} |
|
|
|
/** |
|
* Return the textual content of the token. |
|
*/ |
|
public function __toString(): string { |
|
return $this->text; |
|
} |
|
|
|
/** |
|
* Tokenize the given source code and return an array of tokens. |
|
* |
|
* This performs certain canonicalizations to match the PHP 8.0 token format: |
|
* * Bad characters are represented using T_BAD_CHARACTER rather than omitted. |
|
* * T_COMMENT does not include trailing newlines, instead the newline is part of a following |
|
* T_WHITESPACE token. |
|
* * Namespaced names are represented using T_NAME_* tokens. |
|
* |
|
* @return static[] |
|
*/ |
|
public static function tokenize(string $code, int $flags = 0): array { |
|
self::init(); |
|
|
|
$tokens = []; |
|
$line = 1; |
|
$pos = 0; |
|
$origTokens = \token_get_all($code, $flags); |
|
|
|
$numTokens = \count($origTokens); |
|
for ($i = 0; $i < $numTokens; $i++) { |
|
$token = $origTokens[$i]; |
|
if (\is_string($token)) { |
|
if (\strlen($token) === 2) { |
|
// b" and B" are tokenized as single-char tokens, even though they aren't. |
|
$tokens[] = new static(\ord('"'), $token, $line, $pos); |
|
$pos += 2; |
|
} else { |
|
$tokens[] = new static(\ord($token), $token, $line, $pos); |
|
$pos++; |
|
} |
|
} else { |
|
$id = $token[0]; |
|
$text = $token[1]; |
|
|
|
// Emulate PHP 8.0 comment format, which does not include trailing whitespace anymore. |
|
if ($id === \T_COMMENT && \substr($text, 0, 2) !== '/*' && |
|
\preg_match('/(\r\n|\n|\r)$/D', $text, $matches) |
|
) { |
|
$trailingNewline = $matches[0]; |
|
$text = \substr($text, 0, -\strlen($trailingNewline)); |
|
$tokens[] = new static($id, $text, $line, $pos); |
|
$pos += \strlen($text); |
|
|
|
if ($i + 1 < $numTokens && $origTokens[$i + 1][0] === \T_WHITESPACE) { |
|
// Move trailing newline into following T_WHITESPACE token, if it already exists. |
|
$origTokens[$i + 1][1] = $trailingNewline . $origTokens[$i + 1][1]; |
|
$origTokens[$i + 1][2]--; |
|
} else { |
|
// Otherwise, we need to create a new T_WHITESPACE token. |
|
$tokens[] = new static(\T_WHITESPACE, $trailingNewline, $line, $pos); |
|
$line++; |
|
$pos += \strlen($trailingNewline); |
|
} |
|
continue; |
|
} |
|
|
|
// Emulate PHP 8.0 T_NAME_* tokens, by combining sequences of T_NS_SEPARATOR and |
|
// T_STRING into a single token. |
|
if (($id === \T_NS_SEPARATOR || isset(self::$identifierTokens[$id]))) { |
|
$newText = $text; |
|
$lastWasSeparator = $id === \T_NS_SEPARATOR; |
|
for ($j = $i + 1; $j < $numTokens; $j++) { |
|
if ($lastWasSeparator) { |
|
if (!isset(self::$identifierTokens[$origTokens[$j][0]])) { |
|
break; |
|
} |
|
$lastWasSeparator = false; |
|
} else { |
|
if ($origTokens[$j][0] !== \T_NS_SEPARATOR) { |
|
break; |
|
} |
|
$lastWasSeparator = true; |
|
} |
|
$newText .= $origTokens[$j][1]; |
|
} |
|
if ($lastWasSeparator) { |
|
// Trailing separator is not part of the name. |
|
$j--; |
|
$newText = \substr($newText, 0, -1); |
|
} |
|
if ($j > $i + 1) { |
|
if ($id === \T_NS_SEPARATOR) { |
|
$id = \T_NAME_FULLY_QUALIFIED; |
|
} elseif ($id === \T_NAMESPACE) { |
|
$id = \T_NAME_RELATIVE; |
|
} else { |
|
$id = \T_NAME_QUALIFIED; |
|
} |
|
$tokens[] = new static($id, $newText, $line, $pos); |
|
$pos += \strlen($newText); |
|
$i = $j - 1; |
|
continue; |
|
} |
|
} |
|
|
|
$tokens[] = new static($id, $text, $line, $pos); |
|
$line += \substr_count($text, "\n"); |
|
$pos += \strlen($text); |
|
} |
|
} |
|
return $tokens; |
|
} |
|
|
|
/** Initialize private static state needed by tokenize(). */ |
|
private static function init(): void { |
|
if (isset(self::$identifierTokens)) { |
|
return; |
|
} |
|
|
|
// Based on semi_reserved production. |
|
self::$identifierTokens = \array_fill_keys([ |
|
\T_STRING, |
|
\T_STATIC, \T_ABSTRACT, \T_FINAL, \T_PRIVATE, \T_PROTECTED, \T_PUBLIC, \T_READONLY, |
|
\T_INCLUDE, \T_INCLUDE_ONCE, \T_EVAL, \T_REQUIRE, \T_REQUIRE_ONCE, \T_LOGICAL_OR, \T_LOGICAL_XOR, \T_LOGICAL_AND, |
|
\T_INSTANCEOF, \T_NEW, \T_CLONE, \T_EXIT, \T_IF, \T_ELSEIF, \T_ELSE, \T_ENDIF, \T_ECHO, \T_DO, \T_WHILE, |
|
\T_ENDWHILE, \T_FOR, \T_ENDFOR, \T_FOREACH, \T_ENDFOREACH, \T_DECLARE, \T_ENDDECLARE, \T_AS, \T_TRY, \T_CATCH, |
|
\T_FINALLY, \T_THROW, \T_USE, \T_INSTEADOF, \T_GLOBAL, \T_VAR, \T_UNSET, \T_ISSET, \T_EMPTY, \T_CONTINUE, \T_GOTO, |
|
\T_FUNCTION, \T_CONST, \T_RETURN, \T_PRINT, \T_YIELD, \T_LIST, \T_SWITCH, \T_ENDSWITCH, \T_CASE, \T_DEFAULT, |
|
\T_BREAK, \T_ARRAY, \T_CALLABLE, \T_EXTENDS, \T_IMPLEMENTS, \T_NAMESPACE, \T_TRAIT, \T_INTERFACE, \T_CLASS, |
|
\T_CLASS_C, \T_TRAIT_C, \T_FUNC_C, \T_METHOD_C, \T_LINE, \T_FILE, \T_DIR, \T_NS_C, \T_HALT_COMPILER, \T_FN, |
|
\T_MATCH, |
|
], true); |
|
} |
|
}
|
|
|