1: <?php
2:
3: declare(strict_types=1);
4:
5: namespace LaravelUi5\OData\Protocol\Parser;
6:
7: use LaravelUi5\OData\Exception\BadRequestException;
8:
9: /**
10: * OData expression lexer — tokenizes filter/search/compute expressions.
11: *
12: * Uses the same regex patterns as the legacy Expression\Lexer (OData ABNF spec)
13: * but is self-contained with no legacy infrastructure dependencies.
14: *
15: * @link https://docs.oasis-open.org/odata/odata/v4.01/os/abnf/odata-abnf-construction-rules.txt
16: */
17: final class ExpressionLexer
18: {
19: // ── OData ABNF regex patterns ───────────────────────────────────────
20:
21: public const IDENTIFIER = '([A-Za-z_\p{L}\p{Nl}][A-Za-z_0-9\p{L}\p{Nl}\p{Nd}\p{Mn}\p{Mc}\p{Pc}\p{Cf}]{0,127})';
22: public const QUALIFIED_IDENTIFIER = '(?:' . self::IDENTIFIER . '\.?)*' . self::IDENTIFIER;
23: public const DURATION = '(-?)P(?=\d|T\d)(\d+Y)?(\d+M)?(\d+[DW])?(T(\d+H)?(\d+M)?((\d+(\.\d+)?)S)?)?';
24: public const DATETIME_OFFSET = '[0-9]{4,}-(0[1-9]|1[012])-(0[1-9]|[12][0-9]|3[01])T([01][0-9]|2[0-3]):[0-5][0-9]:[0-5][0-9]([.][0-9]{1,12})?(Z|[+-][0-9][0-9]:[0-9][0-9])';
25: public const DATE = '[0-9]{4,}-(0[1-9]|1[012])-(0[1-9]|[12][0-9]|3[01])';
26: public const GUID = '[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}';
27: public const TIME_OF_DAY = '([01][0-9]|2[0-3]):[0-5][0-9]:[0-5][0-9]([.][0-9]{1,12})?';
28: public const DIGIT = '\d';
29: public const LAMBDA_VARIABLE = self::IDENTIFIER . '\:';
30:
31: private int $pos = 0;
32: private readonly int $len;
33:
34: public function __construct(
35: private readonly string $text,
36: ) {
37: $this->len = strlen($text);
38: }
39:
40: // ── Backtracking ────────────────────────────────────────────────────
41:
42: /**
43: * Try a parse action; on failure (null return or exception), reset position.
44: */
45: public function with(callable $callback): mixed
46: {
47: $savedPos = $this->pos;
48:
49: try {
50: $result = $callback($this);
51: } catch (BadRequestException) {
52: $result = null;
53: }
54:
55: if ($result === null) {
56: $this->pos = $savedPos;
57: }
58:
59: return $result;
60: }
61:
62: // ── Core matching ───────────────────────────────────────────────────
63:
64: public function finished(): bool
65: {
66: return $this->pos >= $this->len;
67: }
68:
69: public function remaining(): string
70: {
71: return substr($this->text, $this->pos);
72: }
73:
74: public function errorContext(): string
75: {
76: $ctx = 32;
77: $left = max($this->pos - $ctx, 0);
78: $right = min($left + $ctx * 2, $this->len);
79:
80: if ($this->pos >= $this->len) {
81: return sprintf('%s<END', substr($this->text, $left, $this->pos - $left));
82: }
83:
84: return sprintf(
85: '%s>%s<%s',
86: substr($this->text, $left, $this->pos - $left),
87: $this->text[$this->pos],
88: substr($this->text, $this->pos + 1, $right - $this->pos)
89: );
90: }
91:
92: /**
93: * Match regex at current position, advance cursor, return match.
94: *
95: * @throws BadRequestException on no match
96: */
97: public function expression(string $pattern, bool $caseSensitive = true, int $group = 0): string
98: {
99: if ($this->pos > $this->len) {
100: throw new BadRequestException('lexer_eof', "Expected {$pattern} but reached end of string");
101: }
102:
103: $flags = $caseSensitive ? '' : 'i';
104: $result = preg_match("@^{$pattern}@{$flags}", substr($this->text, $this->pos), $matches);
105:
106: if ($result !== 1) {
107: throw new BadRequestException('lexer_no_match', "Expected {$pattern} at: " . $this->errorContext());
108: }
109:
110: $match = $matches[$group];
111: $this->pos += strlen($matches[0]); // advance by full match, not group
112: return $match;
113: }
114:
115: public function maybeExpression(string $pattern, bool $caseSensitive = true, int $group = 0): ?string
116: {
117: return $this->with(fn() => $this->expression($pattern, $caseSensitive, $group));
118: }
119:
120: /**
121: * Match a single character (or any character if $char is empty).
122: *
123: * @throws BadRequestException
124: */
125: public function char(string $char = ''): string
126: {
127: if ($this->pos >= $this->len) {
128: throw new BadRequestException('lexer_eof', "Expected '{$char}' but reached end of string");
129: }
130:
131: $next = $this->text[$this->pos];
132:
133: if ($char === '' || $next === $char) {
134: $this->pos++;
135: return $next;
136: }
137:
138: throw new BadRequestException('lexer_char', "Expected '{$char}' but got '{$next}' at: " . $this->errorContext());
139: }
140:
141: public function maybeChar(string $char): ?string
142: {
143: return $this->with(fn() => $this->char($char));
144: }
145:
146: /**
147: * Check if the next non-whitespace character matches, without consuming it.
148: */
149: public function peekChar(string $char): bool
150: {
151: $pos = $this->pos;
152:
153: // Skip whitespace
154: while ($pos < $this->len && $this->text[$pos] === ' ') {
155: $pos++;
156: }
157:
158: return $pos < $this->len && $this->text[$pos] === $char;
159: }
160:
161: /**
162: * Match one of the given literal keywords at the current position.
163: *
164: * @throws BadRequestException
165: */
166: public function literal(string ...$keywords): string
167: {
168: foreach ($keywords as $kw) {
169: if (substr($this->text, $this->pos, strlen($kw)) === $kw) {
170: $this->pos += strlen($kw);
171: return $kw;
172: }
173: }
174:
175: throw new BadRequestException('lexer_literal', 'Expected ' . implode('|', $keywords) . ' at: ' . $this->errorContext());
176: }
177:
178: public function maybeLiteral(string ...$keywords): ?string
179: {
180: return $this->with(fn() => $this->literal(...$keywords));
181: }
182:
183: // ── Whitespace ──────────────────────────────────────────────────────
184:
185: public function whitespace(): string
186: {
187: return $this->expression('\s+');
188: }
189:
190: public function maybeWhitespace(): ?string
191: {
192: return $this->with(fn() => $this->whitespace());
193: }
194:
195: // ── OData-specific token matchers ───────────────────────────────────
196:
197: public function identifier(): string
198: {
199: return $this->expression(self::IDENTIFIER);
200: }
201:
202: public function qualifiedIdentifier(): string
203: {
204: return $this->expression(self::QUALIFIED_IDENTIFIER);
205: }
206:
207: public function boolean(): string
208: {
209: return $this->literal('true', 'false');
210: }
211:
212: public function guid(): string
213: {
214: return $this->expression(self::GUID);
215: }
216:
217: public function dateTimeOffset(): string
218: {
219: return $this->expression(self::DATETIME_OFFSET);
220: }
221:
222: public function date(): string
223: {
224: return $this->expression(self::DATE);
225: }
226:
227: public function timeOfDay(): string
228: {
229: return $this->expression(self::TIME_OF_DAY);
230: }
231:
232: public function duration(): string
233: {
234: return $this->expression(self::DURATION);
235: }
236:
237: /**
238: * Match a number (integer or float). Returns int|float|null on no match.
239: */
240: public function number(): int|float|null
241: {
242: return $this->with(function () {
243: // NaN
244: if ($this->maybeLiteral('NaN') !== null) {
245: return NAN;
246: }
247:
248: $sign = $this->maybeLiteral('+', '-');
249:
250: // INF / -INF
251: if ($this->maybeLiteral('INF') !== null) {
252: return $sign === '-' ? -INF : INF;
253: }
254:
255: $chars = [];
256: if ($sign !== null) {
257: $chars[] = $sign;
258: }
259:
260: $chars[] = $this->expression(self::DIGIT);
261:
262: while (($d = $this->maybeExpression(self::DIGIT)) !== null) {
263: $chars[] = $d;
264: }
265:
266: if ($this->maybeChar('.') !== null) {
267: $chars[] = '.';
268: $chars[] = $this->expression(self::DIGIT);
269: while (($d = $this->maybeExpression(self::DIGIT)) !== null) {
270: $chars[] = $d;
271: }
272: return (float) implode('', $chars);
273: }
274:
275: return (int) implode('', $chars);
276: });
277: }
278:
279: /**
280: * Match a single-quoted string with '' escape handling.
281: */
282: public function quotedString(string $quote = "'"): string
283: {
284: $this->char($quote);
285: $chars = [];
286:
287: while (true) {
288: $ch = $this->char();
289: if ($ch === $quote) {
290: // Escaped quote: ''
291: if ($this->pos < $this->len && $this->text[$this->pos] === $quote) {
292: $this->pos++;
293: $chars[] = $quote;
294: continue;
295: }
296: break;
297: }
298: $chars[] = $ch;
299: }
300:
301: return implode('', $chars);
302: }
303:
304: /**
305: * Match an operator: \s{symbol}\s (case-insensitive).
306: */
307: public function operator(string $symbol): ?string
308: {
309: return $this->with(function () use ($symbol) {
310: return trim($this->expression('\s' . $symbol . '\s', false), ' ');
311: });
312: }
313:
314: /**
315: * Match a unary operator: {symbol}\s (case-insensitive).
316: */
317: public function unaryOperator(string $symbol): ?string
318: {
319: return $this->with(function () use ($symbol) {
320: return trim($this->expression($symbol . '\s', false), ' ');
321: });
322: }
323:
324: /**
325: * Match a function call: {symbol}( (case-insensitive). Backtracks the '('.
326: */
327: public function func(string $symbol): ?string
328: {
329: return $this->with(function () use ($symbol) {
330: $result = $this->expression($symbol . '\(', false);
331: if ($result) {
332: $this->pos--; // backtrack the '(' so caller can handle it
333: return trim($result, '(');
334: }
335: return null;
336: });
337: }
338:
339: /**
340: * Match balanced parentheses, return inner content.
341: */
342: public function matchingParenthesis(): string
343: {
344: $this->char('(');
345: $chars = [];
346: $nesting = 0;
347:
348: while (true) {
349: $ch = $this->char();
350: if ($ch === '(') {
351: $nesting++;
352: }
353: if ($ch === ')') {
354: if ($nesting === 0) {
355: break;
356: }
357: $nesting--;
358: }
359: $chars[] = $ch;
360: }
361:
362: return implode('', $chars);
363: }
364:
365: public function maybeMatchingParenthesis(): ?string
366: {
367: return $this->with(fn() => $this->matchingParenthesis());
368: }
369: }
370: