1: <?php
2:
3: declare(strict_types=1);
4:
5: namespace LaravelUi5\OData\Protocol\Parser;
6:
7: use LaravelUi5\OData\Exception\BadRequestException;
8: use LaravelUi5\OData\Protocol\Planning\Expression\BinaryExpression;
9: use LaravelUi5\OData\Protocol\Planning\Expression\BinaryOperator;
10: use LaravelUi5\OData\Protocol\Planning\Expression\FilterExpression;
11: use LaravelUi5\OData\Protocol\Planning\Expression\FunctionCallExpression;
12: use LaravelUi5\OData\Protocol\Planning\Expression\LambdaExpression;
13: use LaravelUi5\OData\Protocol\Planning\Expression\LambdaOperator;
14: use LaravelUi5\OData\Protocol\Planning\Expression\LambdaVariableExpression;
15: use LaravelUi5\OData\Protocol\Planning\Expression\LiteralExpression;
16: use LaravelUi5\OData\Protocol\Planning\Expression\NullLiteralExpression;
17: use LaravelUi5\OData\Protocol\Planning\Expression\PropertyPathExpression;
18: use LaravelUi5\OData\Protocol\Planning\Expression\UnaryExpression;
19: use LaravelUi5\OData\Protocol\Planning\Expression\UnaryOperator;
20:
21: /**
22: * OData $filter expression parser — Shunting Yard producing FilterExpression directly.
23: *
24: * Eliminates the 83-class Node tree by placing FilterExpression objects on the
25: * operand stack during parsing. Property names are stored as unresolved strings
26: * in PropertyPathExpression segments; callers resolve them against an EntityType.
27: *
28: * Reuses the proven operator precedence levels and regex patterns from the legacy
29: * parser but is fully self-contained with no legacy dependencies.
30: */
31: final class FilterParser
32: {
33: // ── Operator symbol registry ────────────────────────────────────────
34: // Keyed by lowercase symbol → [precedence, isUnary, isBinary, isFunction, isLambda]
35:
36: private const OPERATORS = [
37: // Functions (precedence 8, unary/function-call)
38: 'concat' => [8, false, false, true, false],
39: 'contains' => [8, false, false, true, false],
40: 'endswith' => [8, false, false, true, false],
41: 'indexof' => [8, false, false, true, false],
42: 'length' => [8, false, false, true, false],
43: 'startswith' => [8, false, false, true, false],
44: 'substring' => [8, false, false, true, false],
45: 'matchespattern' => [8, false, false, true, false],
46: 'tolower' => [8, false, false, true, false],
47: 'toupper' => [8, false, false, true, false],
48: 'trim' => [8, false, false, true, false],
49: 'ceiling' => [8, false, false, true, false],
50: 'floor' => [8, false, false, true, false],
51: 'round' => [8, false, false, true, false],
52: 'cast' => [8, false, false, true, false],
53: 'date' => [8, false, false, true, false],
54: 'day' => [8, false, false, true, false],
55: 'fractionalseconds' => [8, false, false, true, false],
56: 'hour' => [8, false, false, true, false],
57: 'maxdatetime' => [8, false, false, true, false],
58: 'mindatetime' => [8, false, false, true, false],
59: 'minute' => [8, false, false, true, false],
60: 'month' => [8, false, false, true, false],
61: 'now' => [8, false, false, true, false],
62: 'second' => [8, false, false, true, false],
63: 'time' => [8, false, false, true, false],
64: 'totaloffsetminutes' => [8, false, false, true, false],
65: 'totalseconds' => [8, false, false, true, false],
66: 'year' => [8, false, false, true, false],
67: // In operator (precedence 8, unary-style)
68: 'in' => [8, true, false, false, false],
69: // Has operator (precedence 8, binary with hint)
70: 'has' => [8, false, true, false, false],
71: // Not (precedence 7, unary)
72: 'not' => [7, true, false, false, false],
73: // Multiplicative (precedence 6)
74: 'mul' => [6, false, true, false, false],
75: 'div' => [6, false, true, false, false],
76: 'divby' => [6, false, true, false, false],
77: 'mod' => [6, false, true, false, false],
78: // Additive (precedence 5)
79: 'add' => [5, false, true, false, false],
80: 'sub' => [5, false, true, false, false],
81: // Relational (precedence 4)
82: 'gt' => [4, false, true, false, false],
83: 'ge' => [4, false, true, false, false],
84: 'lt' => [4, false, true, false, false],
85: 'le' => [4, false, true, false, false],
86: // Equality (precedence 3)
87: 'eq' => [3, false, true, false, false],
88: 'ne' => [3, false, true, false, false],
89: // Conditional AND (precedence 2)
90: 'and' => [2, false, true, false, false],
91: // Conditional OR (precedence 1)
92: 'or' => [1, false, true, false, false],
93: // Lambda (precedence 8, unary)
94: 'any' => [8, false, false, false, true],
95: 'all' => [8, false, false, false, true],
96: ];
97:
98: private const BINARY_OP_MAP = [
99: 'eq' => BinaryOperator::Eq, 'ne' => BinaryOperator::Ne,
100: 'gt' => BinaryOperator::Gt, 'ge' => BinaryOperator::Ge,
101: 'lt' => BinaryOperator::Lt, 'le' => BinaryOperator::Le,
102: 'and' => BinaryOperator::And, 'or' => BinaryOperator::Or,
103: 'add' => BinaryOperator::Add, 'sub' => BinaryOperator::Sub,
104: 'mul' => BinaryOperator::Mul, 'div' => BinaryOperator::Div,
105: 'divby' => BinaryOperator::DivBy, 'mod' => BinaryOperator::Mod,
106: 'has' => BinaryOperator::Has, 'in' => BinaryOperator::In,
107: ];
108:
109: private const LAMBDA_OP_MAP = [
110: 'any' => LambdaOperator::Any,
111: 'all' => LambdaOperator::All,
112: ];
113:
114: /** Canonical OData function names (where they differ from lowercase). */
115: private const CANONICAL_NAMES = [
116: 'matchespattern' => 'matchesPattern',
117: 'startswith' => 'startswith',
118: 'endswith' => 'endswith',
119: 'indexof' => 'indexof',
120: 'tolower' => 'tolower',
121: 'toupper' => 'toupper',
122: 'fractionalseconds' => 'fractionalseconds',
123: 'maxdatetime' => 'maxdatetime',
124: 'mindatetime' => 'mindatetime',
125: 'totaloffsetminutes' => 'totaloffsetminutes',
126: 'totalseconds' => 'totalseconds',
127: ];
128:
129: /** @var FilterExpression[] Operand stack */
130: private array $operands = [];
131:
132: /** @var list<array{symbol: string, prec: int, isUnary: bool, isBinary: bool, isFunc: bool, isLambda: bool, args: list<FilterExpression>, attachedOp: ?array, navProp: ?string, lambdaVar: ?string}> */
133: private array $operators = [];
134:
135: /** @var list<array{type: string, value: mixed}> Token history for lambda variable lookback */
136: private array $tokens = [];
137:
138: private ExpressionLexer $lexer;
139:
140: /**
141: * Parse a filter expression string into a FilterExpression IR.
142: *
143: * Property names are stored as unresolved strings in PropertyPathExpression
144: * segments. Callers (QueryPlanner) must resolve them against an EntityType.
145: */
146: public function parse(string $expression): FilterExpression
147: {
148: $this->lexer = new ExpressionLexer($expression);
149: $this->operands = [];
150: $this->operators = [];
151: $this->tokens = [];
152:
153: while (!$this->lexer->finished()) {
154: if (!$this->findToken()) {
155: throw new BadRequestException(
156: 'parse_error',
157: 'Unexpected token at: ' . $this->lexer->errorContext()
158: );
159: }
160: }
161:
162: // Apply remaining operators
163: while ($this->operators !== []) {
164: $op = array_pop($this->operators);
165: if ($op['symbol'] === '(') {
166: throw new BadRequestException('parse_error', 'Unbalanced parentheses');
167: }
168: $this->applyOperator($op);
169: }
170:
171: if (count($this->operands) !== 1) {
172: // Single literal or empty expression
173: if (count($this->operands) === 0) {
174: throw new BadRequestException('parse_error', 'Empty expression');
175: }
176: }
177:
178: return $this->operands[0];
179: }
180:
181: // ── Token dispatch (order matters!) ─────────────────────────────────
182:
183: private function findToken(): bool
184: {
185: return $this->tokenizeNull()
186: || $this->tokenizeBoolean()
187: || $this->tokenizeGuid()
188: || $this->tokenizeDateTimeOffset()
189: || $this->tokenizeDate()
190: || $this->tokenizeTimeOfDay()
191: || $this->tokenizeNumber()
192: || $this->tokenizeSingleQuotedString()
193: || $this->tokenizeDuration()
194: || $this->tokenizeEnum()
195: || $this->tokenizeLeftParen()
196: || $this->tokenizeRightParen()
197: || $this->tokenizeSeparator()
198: || $this->tokenizeLambdaVariable()
199: || $this->tokenizeLambdaProperty()
200: || $this->tokenizeNavigationPropertyPath()
201: || $this->tokenizeIdentifier()
202: || $this->tokenizeOperator();
203: }
204:
205: // ── Literal tokenizers ──────────────────────────────────────────────
206:
207: private function tokenizeNull(): bool
208: {
209: $token = $this->lexer->maybeLiteral('null');
210: if ($token === null) {
211: return false;
212: }
213: $this->pushOperand(new NullLiteralExpression());
214: $this->tokens[] = ['type' => 'null', 'value' => null];
215: return true;
216: }
217:
218: private function tokenizeBoolean(): bool
219: {
220: $token = $this->lexer->with(fn() => $this->lexer->boolean());
221: if ($token === null) {
222: return false;
223: }
224: $this->pushOperand(new LiteralExpression($token === 'true', 'Edm.Boolean'));
225: $this->tokens[] = ['type' => 'boolean', 'value' => $token === 'true'];
226: return true;
227: }
228:
229: private function tokenizeGuid(): bool
230: {
231: $token = $this->lexer->with(fn() => $this->lexer->guid());
232: if ($token === null) {
233: return false;
234: }
235: $this->pushOperand(new LiteralExpression($token, 'Edm.Guid'));
236: $this->tokens[] = ['type' => 'guid', 'value' => $token];
237: return true;
238: }
239:
240: private function tokenizeDateTimeOffset(): bool
241: {
242: $token = $this->lexer->with(fn() => $this->lexer->dateTimeOffset());
243: if ($token === null) {
244: return false;
245: }
246: $this->pushOperand(new LiteralExpression($token, 'Edm.DateTimeOffset'));
247: $this->tokens[] = ['type' => 'datetime', 'value' => $token];
248: return true;
249: }
250:
251: private function tokenizeDate(): bool
252: {
253: $token = $this->lexer->with(fn() => $this->lexer->date());
254: if ($token === null) {
255: return false;
256: }
257: $this->pushOperand(new LiteralExpression($token, 'Edm.Date'));
258: $this->tokens[] = ['type' => 'date', 'value' => $token];
259: return true;
260: }
261:
262: private function tokenizeTimeOfDay(): bool
263: {
264: $token = $this->lexer->with(fn() => $this->lexer->timeOfDay());
265: if ($token === null) {
266: return false;
267: }
268: $this->pushOperand(new LiteralExpression($token, 'Edm.TimeOfDay'));
269: $this->tokens[] = ['type' => 'time', 'value' => $token];
270: return true;
271: }
272:
273: private function tokenizeNumber(): bool
274: {
275: $value = $this->lexer->with(fn() => $this->lexer->number());
276: if ($value === null) {
277: return false;
278: }
279: $edmType = is_int($value) ? 'Edm.Int64' : 'Edm.Double';
280: $this->pushOperand(new LiteralExpression($value, $edmType));
281: $this->tokens[] = ['type' => 'number', 'value' => $value];
282: return true;
283: }
284:
285: private function tokenizeSingleQuotedString(): bool
286: {
287: $token = $this->lexer->with(fn() => $this->lexer->quotedString("'"));
288: if ($token === null) {
289: return false;
290: }
291: $this->pushOperand(new LiteralExpression($token, 'Edm.String'));
292: $this->tokens[] = ['type' => 'string', 'value' => $token];
293: return true;
294: }
295:
296: private function tokenizeDuration(): bool
297: {
298: $token = $this->lexer->with(fn() => $this->lexer->duration());
299: if ($token === null) {
300: return false;
301: }
302: $this->pushOperand(new LiteralExpression($token, 'Edm.Duration'));
303: $this->tokens[] = ['type' => 'duration', 'value' => $token];
304: return true;
305: }
306:
307: private function tokenizeEnum(): bool
308: {
309: $token = $this->lexer->with(function () {
310: $name = $this->lexer->qualifiedIdentifier();
311: $value = $this->lexer->quotedString("'");
312: return $name . "'" . $value . "'";
313: });
314: if ($token === null) {
315: return false;
316: }
317: $this->pushOperand(new LiteralExpression($token, 'Edm.Enum'));
318: $this->tokens[] = ['type' => 'enum', 'value' => $token];
319: return true;
320: }
321:
322: // ── Parentheses and separators ──────────────────────────────────────
323:
324: private function tokenizeLeftParen(): bool
325: {
326: if ($this->lexer->maybeChar('(') === null) {
327: return false;
328: }
329:
330: // Create group entry on operator stack
331: $group = [
332: 'symbol' => '(', 'prec' => 0,
333: 'isUnary' => false, 'isBinary' => false, 'isFunc' => false, 'isLambda' => false,
334: 'args' => [], 'attachedOp' => null, 'navProp' => null, 'lambdaVar' => null,
335: 'operandCount' => count($this->operands), // track operands at group open
336: ];
337:
338: // If previous token was a function, in, or lambda: attach it to this group
339: if ($this->operators !== []) {
340: $last = end($this->operators);
341: if ($last['isFunc'] || $last['isLambda'] || $last['symbol'] === 'in') {
342: $group['attachedOp'] = array_pop($this->operators);
343: }
344: }
345:
346: $this->operators[] = $group;
347: $this->tokens[] = ['type' => 'lparen', 'value' => '('];
348: return true;
349: }
350:
351: private function tokenizeRightParen(): bool
352: {
353: if ($this->lexer->maybeChar(')') === null) {
354: return false;
355: }
356:
357: // Pop and apply operators back to the matching group
358: while ($this->operators !== []) {
359: $top = end($this->operators);
360: if ($top['symbol'] === '(') {
361: break;
362: }
363: $this->applyOperator(array_pop($this->operators));
364: }
365:
366: if ($this->operators === []) {
367: throw new BadRequestException('parse_error', 'Unbalanced right parenthesis');
368: }
369:
370: // Pop the group
371: $group = array_pop($this->operators);
372: $attached = $group['attachedOp'];
373:
374: // Check if operands were added inside this group
375: $hasNewOperands = count($this->operands) > $group['operandCount'];
376:
377: if ($attached !== null) {
378: if ($attached['isFunc']) {
379: if ($hasNewOperands) {
380: $attached['args'][] = array_pop($this->operands);
381: }
382: $this->pushOperand(new FunctionCallExpression($attached['symbol'], $attached['args']));
383: } elseif ($attached['isLambda']) {
384: $bodyExpr = $hasNewOperands ? array_pop($this->operands) : new NullLiteralExpression();
385: $lambdaOp = self::LAMBDA_OP_MAP[$attached['symbol']];
386: $navProp = $attached['navProp'] ?? '';
387: $variable = $attached['lambdaVar'] ?? '';
388:
389: $collection = new PropertyPathExpression([$navProp]);
390: $this->pushOperand(new LambdaExpression($collection, $variable, $bodyExpr, $lambdaOp));
391: } elseif ($attached['symbol'] === 'in') {
392: if ($hasNewOperands) {
393: $attached['args'][] = array_pop($this->operands);
394: }
395: $left = array_pop($this->operands);
396: $listExpr = new FunctionCallExpression('__list', $attached['args']);
397: $this->pushOperand(new BinaryExpression($left, BinaryOperator::In, $listExpr));
398: }
399: }
400:
401: $this->tokens[] = ['type' => 'rparen', 'value' => ')'];
402: return true;
403: }
404:
405: private function tokenizeSeparator(): bool
406: {
407: $token = $this->lexer->with(fn() => $this->lexer->expression(',\s?'));
408: if ($token === null) {
409: return false;
410: }
411:
412: // Pop operators back to the group, collecting arguments
413: while ($this->operators !== []) {
414: $top = end($this->operators);
415: if ($top['symbol'] === '(') {
416: break;
417: }
418: $this->applyOperator(array_pop($this->operators));
419: }
420:
421: // Add current operand as argument to the attached function
422: if ($this->operators !== []) {
423: $groupIdx = array_key_last($this->operators);
424: $group = &$this->operators[$groupIdx];
425: if ($group['attachedOp'] !== null && $this->operands !== []) {
426: $group['attachedOp']['args'][] = array_pop($this->operands);
427: }
428: }
429:
430: $this->tokens[] = ['type' => 'separator', 'value' => ','];
431: return true;
432: }
433:
434: // ── Lambda variable and property ────────────────────────────────────
435:
436: private function tokenizeLambdaVariable(): bool
437: {
438: $token = $this->lexer->with(fn() => $this->lexer->expression(ExpressionLexer::LAMBDA_VARIABLE));
439: if ($token === null) {
440: return false;
441: }
442:
443: $varName = rtrim($token, ':');
444: $this->pushOperand(new LambdaVariableExpression($varName));
445: $this->tokens[] = ['type' => 'lambda_variable', 'value' => $varName];
446:
447: // Attach variable to the lambda operator on the operator stack
448: foreach (array_reverse($this->operators) as $idx => $op) {
449: if ($op['symbol'] === '(' && $op['attachedOp'] !== null && $op['attachedOp']['isLambda']) {
450: $realIdx = count($this->operators) - 1 - $idx;
451: $this->operators[$realIdx]['attachedOp']['lambdaVar'] = $varName;
452: // Pop the lambda variable from operands (it's captured, not used directly)
453: array_pop($this->operands);
454: break;
455: }
456: }
457:
458: return true;
459: }
460:
461: private function tokenizeLambdaProperty(): bool
462: {
463: // Find the most recent lambda variable
464: $variable = null;
465: foreach (array_reverse($this->tokens) as $tok) {
466: if ($tok['type'] === 'lambda_variable') {
467: $variable = $tok['value'];
468: break;
469: }
470: }
471:
472: if ($variable === null) {
473: return false;
474: }
475:
476: $token = $this->lexer->with(function () use ($variable) {
477: $this->lexer->literal($variable . '/');
478: return $this->lexer->identifier();
479: });
480:
481: if ($token === null) {
482: return false;
483: }
484:
485: $this->pushOperand(new PropertyPathExpression([$token]));
486: $this->tokens[] = ['type' => 'lambda_property', 'value' => $token];
487: return true;
488: }
489:
490: // ── Navigation property path (identifier + '/') ────────────────────
491:
492: private function tokenizeNavigationPropertyPath(): bool
493: {
494: $token = $this->lexer->with(function () {
495: $id = $this->lexer->identifier();
496: $this->lexer->char('/');
497: // Exclude operator keywords
498: return isset(self::OPERATORS[strtolower($id)]) ? null : $id;
499: });
500:
501: if ($token === null) {
502: return false;
503: }
504:
505: // Navigation property becomes part of the next operator (lambda).
506: // Push as operand; lambda handling will pop it.
507: $this->pushOperand(new PropertyPathExpression([$token]));
508: $this->tokens[] = ['type' => 'navigation', 'value' => $token];
509: return true;
510: }
511:
512: // ── Identifier (property name) ──────────────────────────────────────
513:
514: private function tokenizeIdentifier(): bool
515: {
516: $token = $this->lexer->with(function () {
517: $id = $this->lexer->identifier();
518: $lower = strtolower($id);
519:
520: // Allow function names (like 'date', 'time', 'year', etc.) as property
521: // names when NOT followed by '('. Functions always require parentheses;
522: // bare identifiers are property paths.
523: if (isset(self::OPERATORS[$lower])) {
524: $def = self::OPERATORS[$lower];
525: $isFunc = $def[3]; // isFunction flag
526:
527: if ($isFunc && !$this->lexer->peekChar('(')) {
528: // It's a function name used as a property — allow it
529: return $id;
530: }
531:
532: // Binary/unary operators (eq, and, not, etc.) are never property names
533: return null;
534: }
535:
536: return $id;
537: });
538:
539: if ($token === null) {
540: return false;
541: }
542:
543: $this->pushOperand(new PropertyPathExpression([$token]));
544: $this->tokens[] = ['type' => 'property', 'value' => $token];
545: return true;
546: }
547:
548: // ── Operator tokenizer ──────────────────────────────────────────────
549:
550: private function tokenizeOperator(): bool
551: {
552: foreach (self::OPERATORS as $symbol => $def) {
553: [$prec, $isUnary, $isBinary, $isFunc, $isLambda] = $def;
554:
555: $matched = null;
556:
557: if ($isFunc || $isLambda) {
558: $matched = $this->lexer->func($symbol);
559: } elseif ($isUnary && !$isBinary) {
560: $matched = $this->lexer->unaryOperator($symbol);
561: } else {
562: $matched = $this->lexer->operator($symbol);
563: }
564:
565: if ($matched === null) {
566: continue;
567: }
568:
569: $o1 = [
570: 'symbol' => self::CANONICAL_NAMES[$symbol] ?? $symbol,
571: 'prec' => $prec,
572: 'isUnary' => $isUnary,
573: 'isBinary' => $isBinary,
574: 'isFunc' => $isFunc,
575: 'isLambda' => $isLambda,
576: 'args' => [],
577: 'attachedOp' => null,
578: 'navProp' => null,
579: 'lambdaVar' => null,
580: ];
581:
582: // For lambda operators: pop the navigation property from operands
583: if ($isLambda && $this->operands !== []) {
584: $navOperand = array_pop($this->operands);
585: if ($navOperand instanceof PropertyPathExpression && $navOperand->segments !== []) {
586: $o1['navProp'] = $navOperand->segments[0];
587: }
588: }
589:
590: // Shunting Yard: pop higher-or-equal precedence operators
591: while ($this->operators !== []) {
592: $o2 = end($this->operators);
593: if ($o2['symbol'] === '(') {
594: break;
595: }
596: if (!$o1['isUnary'] || ($o1['isUnary'] && $o2['isUnary'])) {
597: if ($o2['prec'] >= $o1['prec']) {
598: $this->applyOperator(array_pop($this->operators));
599: continue;
600: }
601: }
602: break;
603: }
604:
605: $this->operators[] = $o1;
606: $this->tokens[] = ['type' => 'operator', 'value' => $o1['symbol']];
607: return true;
608: }
609:
610: return false;
611: }
612:
613: // ── Operator application ────────────────────────────────────────────
614:
615: private function applyOperator(array $op): void
616: {
617: $symbol = $op['symbol'];
618:
619: if ($op['isFunc']) {
620: // Function: just push it (arguments handled via parens)
621: $this->pushOperand(new FunctionCallExpression($symbol, $op['args']));
622: return;
623: }
624:
625: if ($op['isLambda']) {
626: // Lambda without parentheses (shouldn't happen in valid OData, but handle gracefully)
627: $lambdaOp = self::LAMBDA_OP_MAP[$symbol];
628: $navProp = $op['navProp'] ?? '';
629: $variable = $op['lambdaVar'] ?? '';
630: $body = array_pop($this->operands) ?? new NullLiteralExpression();
631: $collection = new PropertyPathExpression([$navProp]);
632: $this->pushOperand(new LambdaExpression($collection, $variable, $body, $lambdaOp));
633: return;
634: }
635:
636: if ($op['isUnary'] && $symbol === 'not') {
637: $operand = array_pop($this->operands);
638: if ($operand === null) {
639: throw new BadRequestException('parse_error', "Missing operand for 'not'");
640: }
641: $this->pushOperand(new UnaryExpression(UnaryOperator::Not, $operand));
642: return;
643: }
644:
645: if ($op['isUnary'] && $symbol === 'in') {
646: // 'in' handled during paren closing (right paren collects the list)
647: // If we get here, something went wrong
648: return;
649: }
650:
651: // Binary operator
652: if (!isset(self::BINARY_OP_MAP[$symbol])) {
653: throw new BadRequestException('parse_error', "Unknown operator: {$symbol}");
654: }
655:
656: $right = array_pop($this->operands);
657: $left = array_pop($this->operands);
658:
659: if ($left === null || $right === null) {
660: throw new BadRequestException('parse_error', "Missing operand for '{$symbol}'");
661: }
662:
663: $this->pushOperand(new BinaryExpression($left, self::BINARY_OP_MAP[$symbol], $right));
664: }
665:
666: private function pushOperand(FilterExpression $expr): void
667: {
668: $this->operands[] = $expr;
669: }
670: }
671: