001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied. See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 *
019 */
020 package org.apache.directory.shared.ldap.filter;
021
022
023 import java.text.ParseException;
024
025 import org.apache.directory.shared.i18n.I18n;
026 import org.apache.directory.shared.ldap.entry.BinaryValue;
027 import org.apache.directory.shared.ldap.entry.Value;
028 import org.apache.directory.shared.ldap.util.AttributeUtils;
029 import org.apache.directory.shared.ldap.util.Position;
030 import org.apache.directory.shared.ldap.util.StringTools;
031
032
033 /**
034 * This class parse a Ldap filter. The grammar is given in RFC 4515
035 *
036 * @author <a href="mailto:dev@directory.apache.org">Apache Directory Project</a>
037 * @version $Rev$, $Date$
038 */
039 public class FilterParser
040 {
041 /**
042 * Creates a filter parser implementation.
043 */
044 public FilterParser()
045 {
046 }
047
048
049 /**
050 * Parse an extensible
051 *
052 * extensible = ( attr [":dn"] [':' oid] ":=" assertionvalue )
053 * / ( [":dn"] ':' oid ":=" assertionvalue )
054 * matchingrule = ":" oid
055 */
056 private static ExprNode parseExtensible( String attr, String filter, Position pos ) throws ParseException
057 {
058 ExtensibleNode node = new ExtensibleNode( attr );
059
060 if ( attr != null )
061 {
062 // First check if we have a ":dn"
063 if ( StringTools.areEquals( filter, pos.start, "dn" ) )
064 {
065 // Set the dnAttributes flag and move forward in the string
066 node.setDnAttributes( true );
067 pos.start += 2;
068 }
069 else
070 {
071 // Push back the ':'
072 pos.start--;
073 }
074
075 // Do we have a MatchingRule ?
076 if ( StringTools.charAt( filter, pos.start ) == ':' )
077 {
078 pos.start++;
079 int start = pos.start;
080
081 if ( StringTools.charAt( filter, pos.start ) == '=' )
082 {
083 pos.start++;
084
085 // Get the assertionValue
086 node.setValue( parseAssertionValue( filter, pos, true ) );
087
088 return node;
089 }
090 else
091 {
092 AttributeUtils.parseAttribute( filter, pos, false );
093
094 node.setMatchingRuleId( filter.substring( start, pos.start ) );
095
096 if ( StringTools.areEquals( filter, pos.start, ":=" ) )
097 {
098 pos.start += 2;
099
100 // Get the assertionValue
101 node.setValue( parseAssertionValue( filter, pos, true ) );
102
103 return node;
104 }
105 else
106 {
107 throw new ParseException( I18n.err( I18n.ERR_04146 ), pos.start );
108 }
109 }
110 }
111 else
112 {
113 throw new ParseException( I18n.err( I18n.ERR_04147 ), pos.start );
114 }
115 }
116 else
117 {
118 boolean oidRequested = false;
119
120 // First check if we have a ":dn"
121 if ( StringTools.areEquals( filter, pos.start, ":dn" ) )
122 {
123 // Set the dnAttributes flag and move forward in the string
124 node.setDnAttributes( true );
125 pos.start += 3;
126 }
127 else
128 {
129 oidRequested = true;
130 }
131
132 // Do we have a MatchingRule ?
133 if ( StringTools.charAt( filter, pos.start ) == ':' )
134 {
135 pos.start++;
136 int start = pos.start;
137
138 if ( StringTools.charAt( filter, pos.start ) == '=' )
139 {
140 if ( oidRequested )
141 {
142 throw new ParseException( I18n.err( I18n.ERR_04148 ), pos.start );
143 }
144
145 pos.start++;
146
147 // Get the assertionValue
148 node.setValue( parseAssertionValue( filter, pos, true ) );
149
150 return node;
151 }
152 else
153 {
154 AttributeUtils.parseAttribute( filter, pos, false );
155
156 node.setMatchingRuleId( filter.substring( start, pos.start ) );
157
158 if ( StringTools.areEquals( filter, pos.start, ":=" ) )
159 {
160 pos.start += 2;
161
162 // Get the assertionValue
163 node.setValue( parseAssertionValue( filter, pos, true ) );
164
165 return node;
166 }
167 else
168 {
169 throw new ParseException( I18n.err( I18n.ERR_04146 ), pos.start );
170 }
171 }
172 }
173 else
174 {
175 throw new ParseException( I18n.err( I18n.ERR_04147 ), pos.start );
176 }
177 }
178 }
179
180
181 /**
182 * An assertion value :
183 * assertionvalue = valueencoding
184 * valueencoding = 0*(normal / escaped)
185 * normal = UTF1SUBSET / UTFMB
186 * escaped = '\' HEX HEX
187 * HEX = '0'-'9' / 'A'-'F' / 'a'-'f'
188 * UTF1SUBSET = %x01-27 / %x2B-5B / %x5D-7F (Everything but '\0', '*', '(', ')' and '\')
189 * UTFMB = UTF2 / UTF3 / UTF4
190 * UTF0 = %x80-BF
191 * UTF2 = %xC2-DF UTF0
192 * UTF3 = %xE0 %xA0-BF UTF0 / %xE1-EC UTF0 UTF0 / %xED %x80-9F UTF0 / %xEE-EF UTF0 UTF0
193 * UTF4 = %xF0 %x90-BF UTF0 UTF0 / %xF1-F3 UTF0 UTF0 UTF0 / %xF4 %x80-8F UTF0 UTF0
194 *
195 * With the specific constraints (RFC 4515):
196 * "The <valueencoding> rule ensures that the entire filter string is a"
197 * "valid UTF-8 string and provides that the octets that represent the"
198 * "ASCII characters "*" (ASCII 0x2a), "(" (ASCII 0x28), ")" (ASCII"
199 * "0x29), "\" (ASCII 0x5c), and NUL (ASCII 0x00) are represented as a"
200 * "backslash "\" (ASCII 0x5c) followed by the two hexadecimal digits"
201 * "representing the value of the encoded octet."
202
203 *
204 * The incomming String is already transformed from UTF-8 to unicode, so we must assume that the
205 * grammar we have to check is the following :
206 *
207 * assertionvalue = valueencoding
208 * valueencoding = 0*(normal / escaped)
209 * normal = unicodeSubset
210 * escaped = '\' HEX HEX
211 * HEX = '0'-'9' / 'A'-'F' / 'a'-'f'
212 * unicodeSubset = %x01-27 / %x2B-5B / %x5D-FFFF
213 */
214 private static Value<?> parseAssertionValue( String filter, Position pos, boolean preserveEscapedChars ) throws ParseException
215 {
216 int start = pos.start;
217 char c = StringTools.charAt( filter, pos.start );
218
219 // Create a buffer big enough to contain the value once converted
220 byte[] value = new byte[ filter.length() - pos.start];
221 int current = 0;
222
223 do
224 {
225 if ( StringTools.isUnicodeSubset( c ) )
226 {
227 value[current++] = (byte)c;
228 pos.start++;
229 }
230 else if ( StringTools.isCharASCII( filter, pos.start, '\\' ) )
231 {
232 // Maybe an escaped
233 pos.start++;
234
235 // First hex
236 if ( StringTools.isHex( filter, pos.start ) )
237 {
238 pos.start++;
239 }
240 else
241 {
242 throw new ParseException( I18n.err( I18n.ERR_04149 ), pos.start );
243 }
244
245 // second hex
246 if ( StringTools.isHex( filter, pos.start ) )
247 {
248 value[current++] = StringTools.getHexValue( filter.charAt( pos.start - 1 ), filter.charAt( pos.start ) );
249 pos.start++;
250 }
251 else
252 {
253 throw new ParseException( I18n.err( I18n.ERR_04149 ), pos.start );
254 }
255 }
256 else
257 {
258 // not a valid char, so let's get out
259 break;
260 }
261 }
262 while ( ( c = StringTools.charAt( filter, pos.start ) ) != '\0' );
263
264 if ( current != 0 )
265 {
266 byte[] result = new byte[ current ];
267 System.arraycopy( value, 0, result, 0, current );
268
269 return new BinaryValue( result );
270 }
271 else
272 {
273 return new BinaryValue();
274 }
275 }
276
277
278 private static Value<?> parseAssertionValue( String filter, Position pos ) throws ParseException
279 {
280 return parseAssertionValue( filter, pos, false );
281 }
282
283
284 /**
285 * Parse a substring
286 */
287 private static ExprNode parseSubstring( String attr, Value<?> initial, String filter, Position pos )
288 throws ParseException
289 {
290 if ( StringTools.isCharASCII( filter, pos.start, '*' ) )
291 {
292 // We have found a '*' : this is a substring
293 SubstringNode node = new SubstringNode( attr );
294
295 if ( initial != null && !initial.isNull() )
296 {
297 // We have a substring starting with a value : val*...
298 // Set the initial value. It must be a String
299 String initialStr = initial.getString();
300 node.setInitial( initialStr );
301 }
302
303 pos.start++;
304
305 //
306 while ( true )
307 {
308 Value<?> assertionValue = parseAssertionValue( filter, pos );
309
310 // Is there anything else but a ')' after the value ?
311 if ( StringTools.isCharASCII( filter, pos.start, ')' ) )
312 {
313 // Nope : as we have had [initial] '*' (any '*' ) *,
314 // this is the final
315 if ( !assertionValue.isNull() )
316 {
317 String finalStr = assertionValue.getString();
318 node.setFinal( finalStr );
319 }
320
321 return node;
322 }
323 else if ( StringTools.isCharASCII( filter, pos.start, '*' ) )
324 {
325 // We have a '*' : it's an any
326 // If the value is empty, that means we have more than
327 // one consecutive '*' : do nothing in this case.
328 if ( !assertionValue.isNull() )
329 {
330 String anyStr = assertionValue.getString();
331 node.addAny( anyStr );
332 }
333
334 pos.start++;
335 }
336 else
337 {
338 // This is an error
339 throw new ParseException( I18n.err( I18n.ERR_04150 ), pos.start );
340 }
341 }
342 }
343 else
344 {
345 // This is an error
346 throw new ParseException( I18n.err( I18n.ERR_04150 ), pos.start );
347 }
348 }
349
350
351 /**
352 * Here is the grammar to parse :
353 *
354 * simple ::= '=' assertionValue
355 * present ::= '=' '*'
356 * substring ::= '=' [initial] any [final]
357 * initial ::= assertionValue
358 * any ::= '*' ( assertionValue '*')*
359 *
360 * As we can see, there is an ambiguity in the grammar : attr=* can be
361 * seen as a present or as a substring. As stated in the RFC :
362 *
363 * "Note that although both the <substring> and <present> productions in"
364 * "the grammar above can produce the "attr=*" construct, this construct"
365 * "is used only to denote a presence filter." (RFC 4515, 3)
366 *
367 * We have also to consider the difference between a substring and the
368 * equality node : this last node does not contain a '*'
369 *
370 * @param attr
371 * @param filter
372 * @param pos
373 * @return
374 */
375 private static ExprNode parsePresenceEqOrSubstring( String attr, String filter, Position pos )
376 throws ParseException
377 {
378 if ( StringTools.isCharASCII( filter, pos.start, '*' ) )
379 {
380 // To be a present node, the next char should be a ')'
381 pos.start++;
382
383 if ( StringTools.isCharASCII( filter, pos.start, ')' ) )
384 {
385 // This is a present node
386 return new PresenceNode( attr );
387 }
388 else
389 {
390 // Definitively a substring with no initial or an error
391 // Push back the '*' on the string
392 pos.start--;
393 return parseSubstring( attr, null, filter, pos );
394 }
395 }
396 else if ( StringTools.isCharASCII( filter, pos.start, ')' ) )
397 {
398 // An empty equality Node
399 return new EqualityNode( attr, new BinaryValue() );
400 }
401 else
402 {
403 // A substring or an equality node
404 Value<?> value = parseAssertionValue( filter, pos );
405
406 // Is there anything else but a ')' after the value ?
407 if ( StringTools.isCharASCII( filter, pos.start, ')' ) )
408 {
409 // This is an equality node
410 return new EqualityNode( attr, value );
411 }
412
413 return parseSubstring( attr, value, filter, pos );
414 }
415 }
416
417
418 /**
419 * Parse the following grammar :
420 * item = simple / present / substring / extensible
421 * simple = attr filtertype assertionvalue
422 * filtertype = '=' / '~=' / '>=' / '<='
423 * present = attr '=' '*'
424 * substring = attr '=' [initial] any [final]
425 * extensible = ( attr [":dn"] [':' oid] ":=" assertionvalue )
426 * / ( [":dn"] ':' oid ":=" assertionvalue )
427 * matchingrule = ":" oid
428 *
429 * An item starts with an attribute or a colon.
430 */
431 private static ExprNode parseItem( String filter, Position pos, char c ) throws ParseException
432 {
433 LeafNode node = null;
434 String attr = null;
435
436 if ( c == '\0' )
437 {
438 throw new ParseException( I18n.err( I18n.ERR_04151 ), pos.start );
439 }
440
441 if ( c == ':' )
442 {
443 // If we have a colon, then the item is an extensible one
444 return parseExtensible( null, filter, pos );
445 }
446 else
447 {
448 // We must have an attribute
449 attr = AttributeUtils.parseAttribute( filter, pos, true );
450
451 // Now, we may have a present, substring, simple or an extensible
452 c = StringTools.charAt( filter, pos.start );
453
454 switch ( c )
455 {
456 case '=':
457 // It can be a presence, an equal or a substring
458 pos.start++;
459 return parsePresenceEqOrSubstring( attr, filter, pos );
460
461 case '~':
462 // Approximate node
463 pos.start++;
464
465 // Check that we have a '='
466 if ( !StringTools.isCharASCII( filter, pos.start, '=' ) )
467 {
468 throw new ParseException( I18n.err( I18n.ERR_04152 ), pos.start );
469 }
470
471 pos.start++;
472
473 // Parse the value and create the node
474 node = new ApproximateNode( attr, parseAssertionValue( filter, pos ) );
475 return node;
476
477 case '>':
478 // Greater or equal node
479 pos.start++;
480
481 // Check that we have a '='
482 if ( !StringTools.isCharASCII( filter, pos.start, '=' ) )
483 {
484 throw new ParseException( I18n.err( I18n.ERR_04152 ), pos.start );
485 }
486
487 pos.start++;
488
489 // Parse the value and create the node
490 node = new GreaterEqNode( attr, parseAssertionValue( filter, pos ) );
491 return node;
492
493 case '<':
494 // Less or equal node
495 pos.start++;
496
497 // Check that we have a '='
498 if ( !StringTools.isCharASCII( filter, pos.start, '=' ) )
499 {
500 throw new ParseException( I18n.err( I18n.ERR_04152 ), pos.start );
501 }
502
503 pos.start++;
504
505 // Parse the value and create the node
506 node = new LessEqNode( attr, parseAssertionValue( filter, pos ) );
507 return node;
508
509 case ':':
510 // An extensible node
511 pos.start++;
512 return parseExtensible( attr, filter, pos );
513
514 default:
515 // This is an error
516 throw new ParseException( I18n.err( I18n.ERR_04153 ), pos.start );
517 }
518 }
519 }
520
521
522 /**
523 * Parse AND, OR and NOT nodes :
524 *
525 * and = '&' filterlist
526 * or = '|' filterlist
527 * not = '!' filter
528 * filterlist = 1*filter
529 *
530 * @return
531 */
532 private static ExprNode parseBranchNode( ExprNode node, String filter, Position pos ) throws ParseException
533 {
534 BranchNode bNode = ( BranchNode ) node;
535
536 // We must have at least one filter
537 ExprNode child = parseFilterInternal( filter, pos );
538
539 // Add the child to the node children
540 bNode.addNode( child );
541
542 // Now, iterate recusively though all the remaining filters, if any
543 while ( ( child = parseFilterInternal( filter, pos ) ) != null )
544 {
545 // Add the child to the node children
546 bNode.addNode( child );
547 }
548
549 return node;
550 }
551
552
553 /**
554 * filtercomp = and / or / not / item
555 * and = '&' filterlist
556 * or = '|' filterlist
557 * not = '!' filter
558 * item = simple / present / substring / extensible
559 * simple = attr filtertype assertionvalue
560 * present = attr EQUALS ASTERISK
561 * substring = attr EQUALS [initial] any [final]
562 * extensible = ( attr [dnattrs]
563 * [matchingrule] COLON EQUALS assertionvalue )
564 * / ( [dnattrs]
565 * matchingrule COLON EQUALS assertionvalue )
566 */
567 private static ExprNode parseFilterComp( String filter, Position pos ) throws ParseException
568 {
569 ExprNode node = null;
570
571 if ( pos.start == pos.length )
572 {
573 throw new ParseException( I18n.err( I18n.ERR_04154 ), pos.start );
574 }
575
576 char c = StringTools.charAt( filter, pos.start );
577
578 switch ( c )
579 {
580 case '&':
581 // This is a AND node
582 pos.start++;
583 node = new AndNode();
584 parseBranchNode( node, filter, pos );
585 break;
586
587 case '|':
588 // This is an OR node
589 pos.start++;
590 node = new OrNode();
591 parseBranchNode( node, filter, pos );
592 break;
593
594 case '!':
595 // This is a NOT node
596 pos.start++;
597 node = new NotNode();
598 parseBranchNode( node, filter, pos );
599 break;
600
601 default:
602 // This is an item
603 node = parseItem( filter, pos, c );
604 break;
605
606 }
607
608 return node;
609 }
610
611
612 /**
613 * Pasre the grammar rule :
614 * filter ::= '(' filterComp ')'
615 */
616 private static ExprNode parseFilterInternal( String filter, Position pos ) throws ParseException
617 {
618 // Check for the left '('
619 if ( !StringTools.isCharASCII( filter, pos.start, '(' ) )
620 {
621 // No more node, get out
622 if ( ( pos.start == 0 ) && ( pos.length != 0 ) )
623 {
624 throw new ParseException( I18n.err( I18n.ERR_04155 ), 0 );
625 }
626 else
627 {
628 return null;
629 }
630 }
631
632 pos.start++;
633
634 // parse the filter component
635 ExprNode node = parseFilterComp( filter, pos );
636
637 if ( node == null )
638 {
639 throw new ParseException( I18n.err( I18n.ERR_04156 ), pos.start );
640 }
641
642 // Check that we have a right ')'
643 if ( !StringTools.isCharASCII( filter, pos.start, ')' ) )
644 {
645 throw new ParseException( I18n.err( I18n.ERR_04157 ), pos.start );
646 }
647
648 pos.start++;
649
650 return node;
651 }
652
653
654 /**
655 * @see FilterParser#parse(String)
656 */
657 public static ExprNode parse( String filter ) throws ParseException
658 {
659 // The filter must not be null. This is a defensive test
660 if ( StringTools.isEmpty( filter ) )
661 {
662 throw new ParseException( I18n.err( I18n.ERR_04158 ), 0 );
663 }
664
665 Position pos = new Position();
666 pos.start = 0;
667 pos.end = 0;
668 pos.length = filter.length();
669
670 return parseFilterInternal( filter, pos );
671 }
672
673
674 public void setFilterParserMonitor( FilterParserMonitor monitor )
675 {
676 }
677 }